#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.16.13-xen0
-# Thu May 11 17:06:31 2006
+# Linux kernel version: 2.6.17-xen0
+# Thu Jan 11 10:23:10 2007
#
CONFIG_X86_32=y
CONFIG_SEMAPHORE_SLEEPERS=y
CONFIG_MMU=y
CONFIG_GENERIC_ISA_DMA=y
CONFIG_GENERIC_IOMAP=y
+CONFIG_GENERIC_HWEIGHT=y
CONFIG_ARCH_MAY_HAVE_PC_FDC=y
CONFIG_DMI=y
CONFIG_SYSCTL=y
# CONFIG_AUDIT is not set
# CONFIG_IKCONFIG is not set
+# CONFIG_RELAY is not set
CONFIG_INITRAMFS_SOURCE=""
CONFIG_UID16=y
CONFIG_VM86=y
CONFIG_FUTEX=y
CONFIG_EPOLL=y
CONFIG_SHMEM=y
-CONFIG_CC_ALIGN_FUNCTIONS=0
-CONFIG_CC_ALIGN_LABELS=0
-CONFIG_CC_ALIGN_LOOPS=0
-CONFIG_CC_ALIGN_JUMPS=0
CONFIG_SLAB=y
# CONFIG_TINY_SHMEM is not set
CONFIG_BASE_SMALL=0
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_MODULE_FORCE_UNLOAD is not set
-CONFIG_OBSOLETE_MODPARM=y
# CONFIG_MODVERSIONS is not set
# CONFIG_MODULE_SRCVERSION_ALL is not set
CONFIG_KMOD=y
# Block layer
#
# CONFIG_LBD is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+CONFIG_LSF=y
#
# IO Schedulers
#
# Processor type and features
#
+# CONFIG_SMP is not set
# CONFIG_X86_PC is not set
CONFIG_X86_XEN=y
# CONFIG_X86_ELAN is not set
CONFIG_X86_GOOD_APIC=y
CONFIG_X86_USE_PPRO_CHECKSUM=y
CONFIG_X86_TSC=y
-# CONFIG_SMP is not set
CONFIG_PREEMPT_NONE=y
# CONFIG_PREEMPT_VOLUNTARY is not set
# CONFIG_PREEMPT is not set
# CONFIG_NOHIGHMEM is not set
CONFIG_HIGHMEM4G=y
# CONFIG_HIGHMEM64G is not set
-CONFIG_VMSPLIT_3G=y
-# CONFIG_VMSPLIT_3G_OPT is not set
-# CONFIG_VMSPLIT_2G is not set
-# CONFIG_VMSPLIT_1G is not set
CONFIG_PAGE_OFFSET=0xC0000000
CONFIG_HIGHMEM=y
CONFIG_SELECT_MEMORY_MODEL=y
CONFIG_ACPI_THERMAL=m
CONFIG_ACPI_ASUS=m
CONFIG_ACPI_IBM=m
+# CONFIG_ACPI_IBM_DOCK is not set
CONFIG_ACPI_TOSHIBA=m
# CONFIG_ACPI_CUSTOM_DSDT is not set
CONFIG_ACPI_BLACKLIST_YEAR=0
CONFIG_XEN_PCIDEV_FRONTEND=y
# CONFIG_XEN_PCIDEV_FE_DEBUG is not set
# CONFIG_PCIEPORTBUS is not set
-CONFIG_PCI_LEGACY_PROC=y
# CONFIG_PCI_DEBUG is not set
CONFIG_ISA_DMA_API=y
# CONFIG_SCx200 is not set
# CONFIG_INET_AH is not set
# CONFIG_INET_ESP is not set
# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
# CONFIG_INET_TUNNEL is not set
# CONFIG_INET_DIAG is not set
# CONFIG_TCP_CONG_ADVANCED is not set
#
# CONFIG_IP_VS is not set
# CONFIG_IPV6 is not set
+# CONFIG_INET6_XFRM_TUNNEL is not set
+# CONFIG_INET6_TUNNEL is not set
CONFIG_NETFILTER=y
# CONFIG_NETFILTER_DEBUG is not set
CONFIG_BRIDGE_NETFILTER=y
# CONFIG_IP_NF_TFTP is not set
# CONFIG_IP_NF_AMANDA is not set
# CONFIG_IP_NF_PPTP is not set
+# CONFIG_IP_NF_H323 is not set
# CONFIG_IP_NF_QUEUE is not set
#
CONFIG_BRIDGE=y
# CONFIG_VLAN_8021Q is not set
# CONFIG_DECNET is not set
+CONFIG_LLC=y
# CONFIG_LLC2 is not set
# CONFIG_IPX is not set
# CONFIG_ATALK is not set
CONFIG_AIC79XX_DEBUG_MASK=0
CONFIG_AIC79XX_REG_PRETTY_PRINT=y
# CONFIG_SCSI_DPT_I2O is not set
+CONFIG_SCSI_ADVANSYS=y
CONFIG_MEGARAID_NEWGEN=y
# CONFIG_MEGARAID_MM is not set
# CONFIG_MEGARAID_LEGACY is not set
# CONFIG_SCSI_INIA100 is not set
# CONFIG_SCSI_SYM53C8XX_2 is not set
# CONFIG_SCSI_IPR is not set
-# CONFIG_SCSI_QLOGIC_FC is not set
# CONFIG_SCSI_QLOGIC_1280 is not set
# CONFIG_SCSI_QLA_FC is not set
# CONFIG_SCSI_LPFC is not set
CONFIG_MD_RAID1=y
# CONFIG_MD_RAID10 is not set
CONFIG_MD_RAID5=y
+# CONFIG_MD_RAID5_RESHAPE is not set
# CONFIG_MD_RAID6 is not set
# CONFIG_MD_MULTIPATH is not set
# CONFIG_MD_FAULTY is not set
#
# CONFIG_IBM_ASM is not set
-#
-# Multimedia Capabilities Port drivers
-#
-
#
# Multimedia devices
#
# CONFIG_VIDEO_DEV is not set
+CONFIG_VIDEO_V4L2=y
#
# Digital Video Broadcasting Devices
#
# CONFIG_DVB is not set
+# CONFIG_USB_DABUSB is not set
#
# Graphics support
# Console display driver support
#
CONFIG_VGA_CONSOLE=y
+# CONFIG_VGACON_SOFT_SCROLLBACK is not set
CONFIG_DUMMY_CONSOLE=y
#
#
CONFIG_USB_ARCH_HAS_HCD=y
CONFIG_USB_ARCH_HAS_OHCI=y
+CONFIG_USB_ARCH_HAS_EHCI=y
CONFIG_USB=y
# CONFIG_USB_DEBUG is not set
# CONFIG_USB_ACECAD is not set
# CONFIG_USB_KBTAB is not set
# CONFIG_USB_POWERMATE is not set
-# CONFIG_USB_MTOUCH is not set
-# CONFIG_USB_ITMTOUCH is not set
-# CONFIG_USB_EGALAX is not set
+# CONFIG_USB_TOUCHSCREEN is not set
# CONFIG_USB_YEALINK is not set
# CONFIG_USB_XPAD is not set
# CONFIG_USB_ATI_REMOTE is not set
# CONFIG_USB_MDC800 is not set
# CONFIG_USB_MICROTEK is not set
-#
-# USB Multimedia devices
-#
-# CONFIG_USB_DABUSB is not set
-
-#
-# Video4Linux support is needed for USB Multimedia device support
-#
-
#
# USB Network Adapters
#
#
# CONFIG_MMC is not set
+#
+# LED devices
+#
+# CONFIG_NEW_LEDS is not set
+
+#
+# LED drivers
+#
+
+#
+# LED Triggers
+#
+
#
# InfiniBand support
#
#
# CONFIG_EDAC is not set
+#
+# Real Time Clock
+#
+CONFIG_RTC_LIB=m
+CONFIG_RTC_CLASS=m
+
+#
+# RTC interfaces
+#
+CONFIG_RTC_INTF_SYSFS=m
+CONFIG_RTC_INTF_PROC=m
+CONFIG_RTC_INTF_DEV=m
+
+#
+# RTC drivers
+#
+CONFIG_RTC_DRV_M48T86=m
+CONFIG_RTC_DRV_TEST=m
+
#
# File systems
#
CONFIG_TMPFS=y
# CONFIG_HUGETLB_PAGE is not set
CONFIG_RAMFS=y
-# CONFIG_RELAYFS_FS is not set
# CONFIG_CONFIGFS_FS is not set
#
# CONFIG_DEBUG_FS is not set
# CONFIG_DEBUG_VM is not set
CONFIG_FRAME_POINTER=y
+# CONFIG_UNWIND_INFO is not set
CONFIG_FORCED_INLINING=y
# CONFIG_RCU_TORTURE_TEST is not set
CONFIG_EARLY_PRINTK=y
# CONFIG_DEBUG_STACKOVERFLOW is not set
# CONFIG_DEBUG_STACK_USAGE is not set
+CONFIG_STACK_BACKTRACE_COLS=2
# CONFIG_DEBUG_PAGEALLOC is not set
# CONFIG_DEBUG_RODATA is not set
# CONFIG_4KSTACKS is not set
#
# CONFIG_CRYPTO_DEV_PADLOCK is not set
CONFIG_XEN=y
-CONFIG_XEN_INTERFACE_VERSION=0x00030202
+CONFIG_XEN_INTERFACE_VERSION=0x00030203
#
# XEN
#
CONFIG_XEN_PRIVILEGED_GUEST=y
# CONFIG_XEN_UNPRIVILEGED_GUEST is not set
+CONFIG_XEN_PRIVCMD=y
+CONFIG_XEN_XENBUS_DEV=y
CONFIG_XEN_BACKEND=y
-CONFIG_XEN_PCIDEV_BACKEND=y
-# CONFIG_XEN_PCIDEV_BACKEND_VPCI is not set
-CONFIG_XEN_PCIDEV_BACKEND_PASS=y
-# CONFIG_XEN_PCIDEV_BACKEND_SLOT is not set
-# CONFIG_XEN_PCIDEV_BE_DEBUG is not set
CONFIG_XEN_BLKDEV_BACKEND=y
CONFIG_XEN_BLKDEV_TAP=y
CONFIG_XEN_NETDEV_BACKEND=y
# CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER is not set
CONFIG_XEN_NETDEV_LOOPBACK=y
+CONFIG_XEN_PCIDEV_BACKEND=y
+# CONFIG_XEN_PCIDEV_BACKEND_VPCI is not set
+CONFIG_XEN_PCIDEV_BACKEND_PASS=y
+# CONFIG_XEN_PCIDEV_BACKEND_SLOT is not set
+# CONFIG_XEN_PCIDEV_BE_DEBUG is not set
# CONFIG_XEN_TPMDEV_BACKEND is not set
CONFIG_XEN_BLKDEV_FRONTEND=y
CONFIG_XEN_NETDEV_FRONTEND=y
CONFIG_XEN_COMPAT_030002=y
CONFIG_HAVE_ARCH_ALLOC_SKB=y
CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
+CONFIG_HAVE_IRQ_IGNORE_UNHANDLED=y
CONFIG_NO_IDLE_HZ=y
+CONFIG_XEN_UTIL=y
+CONFIG_XEN_BALLOON=y
+CONFIG_XEN_DEVMEM=y
+CONFIG_XEN_SKBUFF=y
+CONFIG_XEN_REBOOT=y
#
# Library routines
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.16.13-xen0
-# Thu May 11 17:17:19 2006
+# Linux kernel version: 2.6.17-xen0
+# Thu Jan 11 09:46:03 2007
#
CONFIG_X86_64=y
CONFIG_64BIT=y
CONFIG_SEMAPHORE_SLEEPERS=y
CONFIG_MMU=y
CONFIG_RWSEM_GENERIC_SPINLOCK=y
+CONFIG_GENERIC_HWEIGHT=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
CONFIG_X86_CMPXCHG=y
CONFIG_EARLY_PRINTK=y
CONFIG_SYSCTL=y
# CONFIG_AUDIT is not set
# CONFIG_IKCONFIG is not set
+# CONFIG_RELAY is not set
CONFIG_INITRAMFS_SOURCE=""
CONFIG_UID16=y
CONFIG_VM86=y
CONFIG_FUTEX=y
CONFIG_EPOLL=y
CONFIG_SHMEM=y
-CONFIG_CC_ALIGN_FUNCTIONS=0
-CONFIG_CC_ALIGN_LABELS=0
-CONFIG_CC_ALIGN_LOOPS=0
-CONFIG_CC_ALIGN_JUMPS=0
CONFIG_SLAB=y
# CONFIG_TINY_SHMEM is not set
CONFIG_BASE_SMALL=0
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_MODULE_FORCE_UNLOAD is not set
-CONFIG_OBSOLETE_MODPARM=y
# CONFIG_MODVERSIONS is not set
# CONFIG_MODULE_SRCVERSION_ALL is not set
CONFIG_KMOD=y
# Block layer
#
# CONFIG_LBD is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+CONFIG_LSF=y
#
# IO Schedulers
CONFIG_X86_NO_IDT=y
CONFIG_X86_L1_CACHE_BYTES=128
CONFIG_X86_L1_CACHE_SHIFT=7
+CONFIG_X86_INTERNODE_CACHE_BYTES=128
CONFIG_X86_GOOD_APIC=y
CONFIG_MICROCODE=y
# CONFIG_X86_MSR is not set
CONFIG_PREEMPT_NONE=y
# CONFIG_PREEMPT_VOLUNTARY is not set
# CONFIG_PREEMPT is not set
-CONFIG_ARCH_SPARSEMEM_ENABLE=y
CONFIG_ARCH_FLATMEM_ENABLE=y
CONFIG_SELECT_MEMORY_MODEL=y
CONFIG_FLATMEM_MANUAL=y
CONFIG_SWIOTLB=y
CONFIG_KEXEC=y
# CONFIG_CRASH_DUMP is not set
-CONFIG_PHYSICAL_START=0x100000
+CONFIG_PHYSICAL_START=0x200000
CONFIG_SECCOMP=y
CONFIG_HZ_100=y
# CONFIG_HZ_250 is not set
# CONFIG_HZ_1000 is not set
CONFIG_HZ=100
+# CONFIG_REORDER is not set
CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_ISA_DMA_API=y
CONFIG_ACPI_THERMAL=m
CONFIG_ACPI_ASUS=m
CONFIG_ACPI_IBM=m
+# CONFIG_ACPI_IBM_DOCK is not set
CONFIG_ACPI_TOSHIBA=m
CONFIG_ACPI_BLACKLIST_YEAR=0
# CONFIG_ACPI_DEBUG is not set
CONFIG_ACPI_POWER=y
CONFIG_ACPI_SYSTEM=y
# CONFIG_ACPI_CONTAINER is not set
+CONFIG_ACPI_HOTPLUG_MEMORY=m
#
# CPU Frequency scaling
# CONFIG_PCI_MMCONFIG is not set
CONFIG_XEN_PCIDEV_FRONTEND=y
# CONFIG_XEN_PCIDEV_FE_DEBUG is not set
-# CONFIG_UNORDERED_IO is not set
# CONFIG_PCIEPORTBUS is not set
-CONFIG_PCI_LEGACY_PROC=y
# CONFIG_PCI_DEBUG is not set
#
# CONFIG_INET_AH is not set
# CONFIG_INET_ESP is not set
# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
# CONFIG_INET_TUNNEL is not set
# CONFIG_INET_DIAG is not set
# CONFIG_TCP_CONG_ADVANCED is not set
#
# CONFIG_IP_VS is not set
# CONFIG_IPV6 is not set
+# CONFIG_INET6_XFRM_TUNNEL is not set
+# CONFIG_INET6_TUNNEL is not set
CONFIG_NETFILTER=y
# CONFIG_NETFILTER_DEBUG is not set
CONFIG_BRIDGE_NETFILTER=y
# CONFIG_IP_NF_TFTP is not set
# CONFIG_IP_NF_AMANDA is not set
# CONFIG_IP_NF_PPTP is not set
+# CONFIG_IP_NF_H323 is not set
# CONFIG_IP_NF_QUEUE is not set
#
CONFIG_BRIDGE=y
# CONFIG_VLAN_8021Q is not set
# CONFIG_DECNET is not set
+CONFIG_LLC=y
# CONFIG_LLC2 is not set
# CONFIG_IPX is not set
# CONFIG_ATALK is not set
# CONFIG_SCSI_INIA100 is not set
# CONFIG_SCSI_SYM53C8XX_2 is not set
# CONFIG_SCSI_IPR is not set
-# CONFIG_SCSI_QLOGIC_FC is not set
# CONFIG_SCSI_QLOGIC_1280 is not set
# CONFIG_SCSI_QLA_FC is not set
# CONFIG_SCSI_LPFC is not set
CONFIG_AGP=m
CONFIG_AGP_AMD64=m
# CONFIG_AGP_INTEL is not set
+CONFIG_AGP_SIS=m
+CONFIG_AGP_VIA=m
CONFIG_DRM=m
CONFIG_DRM_TDFX=m
CONFIG_DRM_R128=m
#
# CONFIG_IBM_ASM is not set
-#
-# Multimedia Capabilities Port drivers
-#
-
#
# Multimedia devices
#
# CONFIG_VIDEO_DEV is not set
+CONFIG_VIDEO_V4L2=y
#
# Digital Video Broadcasting Devices
#
# CONFIG_DVB is not set
+# CONFIG_USB_DABUSB is not set
#
# Graphics support
# Console display driver support
#
CONFIG_VGA_CONSOLE=y
+# CONFIG_VGACON_SOFT_SCROLLBACK is not set
CONFIG_DUMMY_CONSOLE=y
#
#
CONFIG_USB_ARCH_HAS_HCD=y
CONFIG_USB_ARCH_HAS_OHCI=y
+CONFIG_USB_ARCH_HAS_EHCI=y
CONFIG_USB=y
# CONFIG_USB_DEBUG is not set
# CONFIG_USB_ACECAD is not set
# CONFIG_USB_KBTAB is not set
# CONFIG_USB_POWERMATE is not set
-# CONFIG_USB_MTOUCH is not set
-# CONFIG_USB_ITMTOUCH is not set
-# CONFIG_USB_EGALAX is not set
+# CONFIG_USB_TOUCHSCREEN is not set
# CONFIG_USB_YEALINK is not set
# CONFIG_USB_XPAD is not set
# CONFIG_USB_ATI_REMOTE is not set
# CONFIG_USB_MDC800 is not set
# CONFIG_USB_MICROTEK is not set
-#
-# USB Multimedia devices
-#
-# CONFIG_USB_DABUSB is not set
-
-#
-# Video4Linux support is needed for USB Multimedia device support
-#
-
#
# USB Network Adapters
#
#
# CONFIG_MMC is not set
+#
+# LED devices
+#
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=m
+
+#
+# LED drivers
+#
+
+#
+# LED Triggers
+#
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_TIMER=y
+CONFIG_LEDS_TRIGGER_IDE_DISK=y
+
#
# InfiniBand support
#
#
# CONFIG_EDAC is not set
+#
+# Real Time Clock
+#
+CONFIG_RTC_LIB=m
+CONFIG_RTC_CLASS=m
+
+#
+# RTC interfaces
+#
+CONFIG_RTC_INTF_SYSFS=m
+CONFIG_RTC_INTF_PROC=m
+CONFIG_RTC_INTF_DEV=m
+
+#
+# RTC drivers
+#
+CONFIG_RTC_DRV_M48T86=m
+CONFIG_RTC_DRV_TEST=m
+
#
# Firmware Drivers
#
CONFIG_TMPFS=y
# CONFIG_HUGETLB_PAGE is not set
CONFIG_RAMFS=y
-# CONFIG_RELAYFS_FS is not set
# CONFIG_CONFIGFS_FS is not set
#
# CONFIG_DEBUG_FS is not set
# CONFIG_DEBUG_VM is not set
CONFIG_FRAME_POINTER=y
+# CONFIG_UNWIND_INFO is not set
CONFIG_FORCED_INLINING=y
# CONFIG_RCU_TORTURE_TEST is not set
# CONFIG_DEBUG_RODATA is not set
# Hardware crypto devices
#
CONFIG_XEN=y
-CONFIG_XEN_INTERFACE_VERSION=0x00030202
+CONFIG_XEN_INTERFACE_VERSION=0x00030203
#
# XEN
#
CONFIG_XEN_PRIVILEGED_GUEST=y
# CONFIG_XEN_UNPRIVILEGED_GUEST is not set
+CONFIG_XEN_PRIVCMD=y
+CONFIG_XEN_XENBUS_DEV=y
CONFIG_XEN_BACKEND=y
-CONFIG_XEN_PCIDEV_BACKEND=y
-# CONFIG_XEN_PCIDEV_BACKEND_VPCI is not set
-CONFIG_XEN_PCIDEV_BACKEND_PASS=y
-# CONFIG_XEN_PCIDEV_BACKEND_SLOT is not set
-# CONFIG_XEN_PCIDEV_BE_DEBUG is not set
CONFIG_XEN_BLKDEV_BACKEND=y
CONFIG_XEN_BLKDEV_TAP=y
CONFIG_XEN_NETDEV_BACKEND=y
# CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER is not set
CONFIG_XEN_NETDEV_LOOPBACK=y
+CONFIG_XEN_PCIDEV_BACKEND=y
+# CONFIG_XEN_PCIDEV_BACKEND_VPCI is not set
+CONFIG_XEN_PCIDEV_BACKEND_PASS=y
+# CONFIG_XEN_PCIDEV_BACKEND_SLOT is not set
+# CONFIG_XEN_PCIDEV_BE_DEBUG is not set
# CONFIG_XEN_TPMDEV_BACKEND is not set
CONFIG_XEN_BLKDEV_FRONTEND=y
CONFIG_XEN_NETDEV_FRONTEND=y
CONFIG_XEN_COMPAT_030002=y
CONFIG_HAVE_ARCH_ALLOC_SKB=y
CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
+CONFIG_HAVE_IRQ_IGNORE_UNHANDLED=y
CONFIG_NO_IDLE_HZ=y
+CONFIG_XEN_UTIL=y
+CONFIG_XEN_BALLOON=y
+CONFIG_XEN_DEVMEM=y
+CONFIG_XEN_SKBUFF=y
+CONFIG_XEN_REBOOT=y
#
# Library routines
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.16.13-xenU
-# Thu May 11 17:08:12 2006
+# Linux kernel version: 2.6.17-xenU
+# Thu Jan 11 10:26:13 2007
#
CONFIG_X86_32=y
CONFIG_SEMAPHORE_SLEEPERS=y
CONFIG_MMU=y
CONFIG_GENERIC_ISA_DMA=y
CONFIG_GENERIC_IOMAP=y
+CONFIG_GENERIC_HWEIGHT=y
CONFIG_ARCH_MAY_HAVE_PC_FDC=y
CONFIG_DMI=y
# CONFIG_AUDIT is not set
# CONFIG_IKCONFIG is not set
# CONFIG_CPUSETS is not set
+# CONFIG_RELAY is not set
CONFIG_INITRAMFS_SOURCE=""
CONFIG_UID16=y
CONFIG_VM86=y
CONFIG_FUTEX=y
CONFIG_EPOLL=y
CONFIG_SHMEM=y
-CONFIG_CC_ALIGN_FUNCTIONS=0
-CONFIG_CC_ALIGN_LABELS=0
-CONFIG_CC_ALIGN_LOOPS=0
-CONFIG_CC_ALIGN_JUMPS=0
CONFIG_SLAB=y
# CONFIG_TINY_SHMEM is not set
CONFIG_BASE_SMALL=0
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_MODULE_FORCE_UNLOAD is not set
-CONFIG_OBSOLETE_MODPARM=y
# CONFIG_MODVERSIONS is not set
# CONFIG_MODULE_SRCVERSION_ALL is not set
CONFIG_KMOD=y
# Block layer
#
# CONFIG_LBD is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+CONFIG_LSF=y
#
# IO Schedulers
#
# Processor type and features
#
+CONFIG_SMP=y
# CONFIG_X86_PC is not set
CONFIG_X86_XEN=y
# CONFIG_X86_ELAN is not set
CONFIG_X86_GOOD_APIC=y
CONFIG_X86_USE_PPRO_CHECKSUM=y
CONFIG_X86_TSC=y
-CONFIG_SMP=y
-CONFIG_SMP_ALTERNATIVES=y
CONFIG_NR_CPUS=8
+# CONFIG_SCHED_MC is not set
CONFIG_PREEMPT_NONE=y
# CONFIG_PREEMPT_VOLUNTARY is not set
# CONFIG_PREEMPT is not set
# CONFIG_NOHIGHMEM is not set
CONFIG_HIGHMEM4G=y
# CONFIG_HIGHMEM64G is not set
-CONFIG_VMSPLIT_3G=y
-# CONFIG_VMSPLIT_3G_OPT is not set
-# CONFIG_VMSPLIT_2G is not set
-# CONFIG_VMSPLIT_1G is not set
CONFIG_PAGE_OFFSET=0xC0000000
CONFIG_HIGHMEM=y
CONFIG_SELECT_MEMORY_MODEL=y
# CONFIG_INET_AH is not set
# CONFIG_INET_ESP is not set
# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
# CONFIG_INET_TUNNEL is not set
# CONFIG_INET_DIAG is not set
# CONFIG_TCP_CONG_ADVANCED is not set
CONFIG_TCP_CONG_BIC=y
# CONFIG_IPV6 is not set
+# CONFIG_INET6_XFRM_TUNNEL is not set
+# CONFIG_INET6_TUNNEL is not set
# CONFIG_NETFILTER is not set
#
# Misc devices
#
-#
-# Multimedia Capabilities Port drivers
-#
-
#
# Multimedia devices
#
# CONFIG_VIDEO_DEV is not set
+CONFIG_VIDEO_V4L2=y
#
# Digital Video Broadcasting Devices
# Console display driver support
#
CONFIG_VGA_CONSOLE=y
+# CONFIG_VGACON_SOFT_SCROLLBACK is not set
CONFIG_DUMMY_CONSOLE=y
#
#
# CONFIG_USB_ARCH_HAS_HCD is not set
# CONFIG_USB_ARCH_HAS_OHCI is not set
+# CONFIG_USB_ARCH_HAS_EHCI is not set
#
# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
#
# CONFIG_MMC is not set
+#
+# LED devices
+#
+# CONFIG_NEW_LEDS is not set
+
+#
+# LED drivers
+#
+
+#
+# LED Triggers
+#
+
#
# InfiniBand support
#
#
# CONFIG_EDAC is not set
+#
+# Real Time Clock
+#
+# CONFIG_RTC_CLASS is not set
+
#
# File systems
#
CONFIG_TMPFS=y
# CONFIG_HUGETLB_PAGE is not set
CONFIG_RAMFS=y
-# CONFIG_RELAYFS_FS is not set
# CONFIG_CONFIGFS_FS is not set
#
# CONFIG_DEBUG_FS is not set
# CONFIG_DEBUG_VM is not set
CONFIG_FRAME_POINTER=y
+# CONFIG_UNWIND_INFO is not set
CONFIG_FORCED_INLINING=y
# CONFIG_RCU_TORTURE_TEST is not set
CONFIG_EARLY_PRINTK=y
# CONFIG_DEBUG_STACKOVERFLOW is not set
# CONFIG_DEBUG_STACK_USAGE is not set
+CONFIG_STACK_BACKTRACE_COLS=2
# CONFIG_DEBUG_PAGEALLOC is not set
# CONFIG_DEBUG_RODATA is not set
# CONFIG_4KSTACKS is not set
#
# CONFIG_CRYPTO_DEV_PADLOCK is not set
CONFIG_XEN=y
-CONFIG_XEN_INTERFACE_VERSION=0x00030202
+CONFIG_XEN_INTERFACE_VERSION=0x00030203
#
# XEN
#
# CONFIG_XEN_PRIVILEGED_GUEST is not set
CONFIG_XEN_UNPRIVILEGED_GUEST=y
+CONFIG_XEN_PRIVCMD=y
+CONFIG_XEN_XENBUS_DEV=y
# CONFIG_XEN_BACKEND is not set
CONFIG_XEN_BLKDEV_FRONTEND=y
CONFIG_XEN_NETDEV_FRONTEND=y
CONFIG_XEN_COMPAT_030002=y
CONFIG_HAVE_ARCH_ALLOC_SKB=y
CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
+CONFIG_HAVE_IRQ_IGNORE_UNHANDLED=y
CONFIG_NO_IDLE_HZ=y
+CONFIG_XEN_UTIL=y
+CONFIG_XEN_BALLOON=y
+CONFIG_XEN_DEVMEM=y
+CONFIG_XEN_SKBUFF=y
+CONFIG_XEN_REBOOT=y
+CONFIG_XEN_SMPBOOT=y
#
# Library routines
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.16.13-xenU
-# Thu May 11 17:17:57 2006
+# Linux kernel version: 2.6.17-xenU
+# Thu Jan 11 10:17:30 2007
#
CONFIG_X86_64=y
CONFIG_64BIT=y
CONFIG_SEMAPHORE_SLEEPERS=y
CONFIG_MMU=y
CONFIG_RWSEM_GENERIC_SPINLOCK=y
+CONFIG_GENERIC_HWEIGHT=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
CONFIG_X86_CMPXCHG=y
CONFIG_EARLY_PRINTK=y
CONFIG_AUDITSYSCALL=y
# CONFIG_IKCONFIG is not set
# CONFIG_CPUSETS is not set
+# CONFIG_RELAY is not set
CONFIG_INITRAMFS_SOURCE=""
CONFIG_UID16=y
CONFIG_VM86=y
CONFIG_FUTEX=y
CONFIG_EPOLL=y
CONFIG_SHMEM=y
-CONFIG_CC_ALIGN_FUNCTIONS=0
-CONFIG_CC_ALIGN_LABELS=0
-CONFIG_CC_ALIGN_LOOPS=0
-CONFIG_CC_ALIGN_JUMPS=0
CONFIG_SLAB=y
# CONFIG_TINY_SHMEM is not set
CONFIG_BASE_SMALL=0
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_MODULE_FORCE_UNLOAD is not set
-CONFIG_OBSOLETE_MODPARM=y
CONFIG_MODVERSIONS=y
# CONFIG_MODULE_SRCVERSION_ALL is not set
CONFIG_KMOD=y
# Block layer
#
CONFIG_LBD=y
+# CONFIG_BLK_DEV_IO_TRACE is not set
+CONFIG_LSF=y
#
# IO Schedulers
CONFIG_X86_NO_IDT=y
CONFIG_X86_L1_CACHE_BYTES=128
CONFIG_X86_L1_CACHE_SHIFT=7
+CONFIG_X86_INTERNODE_CACHE_BYTES=128
CONFIG_X86_GOOD_APIC=y
# CONFIG_MICROCODE is not set
# CONFIG_X86_MSR is not set
CONFIG_X86_CPUID=y
CONFIG_X86_XEN_GENAPIC=y
CONFIG_SMP=y
+# CONFIG_SCHED_MC is not set
CONFIG_PREEMPT_NONE=y
# CONFIG_PREEMPT_VOLUNTARY is not set
# CONFIG_PREEMPT is not set
CONFIG_PREEMPT_BKL=y
-CONFIG_ARCH_SPARSEMEM_ENABLE=y
CONFIG_ARCH_FLATMEM_ENABLE=y
CONFIG_SELECT_MEMORY_MODEL=y
CONFIG_FLATMEM_MANUAL=y
CONFIG_HOTPLUG_CPU=y
CONFIG_SWIOTLB=y
# CONFIG_CRASH_DUMP is not set
-CONFIG_PHYSICAL_START=0x100000
+CONFIG_PHYSICAL_START=0x200000
CONFIG_SECCOMP=y
CONFIG_HZ_100=y
# CONFIG_HZ_250 is not set
# CONFIG_HZ_1000 is not set
CONFIG_HZ=100
+# CONFIG_REORDER is not set
CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_ISA_DMA_API=y
# Bus options (PCI etc.)
#
# CONFIG_PCI is not set
-# CONFIG_UNORDERED_IO is not set
#
# PCCARD (PCMCIA/CardBus) support
CONFIG_INET_AH=m
CONFIG_INET_ESP=m
CONFIG_INET_IPCOMP=m
+CONFIG_INET_XFRM_TUNNEL=m
CONFIG_INET_TUNNEL=m
# CONFIG_INET_DIAG is not set
# CONFIG_TCP_CONG_ADVANCED is not set
CONFIG_IP_VS_FTP=m
CONFIG_IPV6=m
CONFIG_IPV6_PRIVACY=y
+# CONFIG_IPV6_ROUTER_PREF is not set
CONFIG_INET6_AH=m
CONFIG_INET6_ESP=m
CONFIG_INET6_IPCOMP=m
+CONFIG_INET6_XFRM_TUNNEL=m
CONFIG_INET6_TUNNEL=m
CONFIG_IPV6_TUNNEL=m
CONFIG_NETFILTER=y
CONFIG_IP_NF_TFTP=m
CONFIG_IP_NF_AMANDA=m
# CONFIG_IP_NF_PPTP is not set
+# CONFIG_IP_NF_H323 is not set
CONFIG_IP_NF_QUEUE=m
#
CONFIG_ESI_DONGLE=m
CONFIG_ACTISYS_DONGLE=m
CONFIG_TEKRAM_DONGLE=m
+# CONFIG_TOIM3232_DONGLE is not set
CONFIG_LITELINK_DONGLE=m
CONFIG_MA600_DONGLE=m
CONFIG_GIRBIL_DONGLE=m
CONFIG_BT_HCIUART_BCSP=y
CONFIG_BT_HCIVHCI=m
# CONFIG_IEEE80211 is not set
+CONFIG_WIRELESS_EXT=y
#
# Device Drivers
CONFIG_MD_RAID1=m
CONFIG_MD_RAID10=m
CONFIG_MD_RAID5=m
+# CONFIG_MD_RAID5_RESHAPE is not set
CONFIG_MD_RAID6=m
CONFIG_MD_MULTIPATH=m
# CONFIG_MD_FAULTY is not set
# Wireless LAN (non-hamradio)
#
CONFIG_NET_RADIO=y
+# CONFIG_NET_WIRELESS_RTNETLINK is not set
#
# Obsolete Wireless cards support (pre-802.11)
#
# CONFIG_STRIP is not set
-CONFIG_ATMEL=m
# CONFIG_HOSTAP is not set
#
# Misc devices
#
-#
-# Multimedia Capabilities Port drivers
-#
-
#
# Multimedia devices
#
# CONFIG_VIDEO_DEV is not set
+CONFIG_VIDEO_V4L2=y
#
# Digital Video Broadcasting Devices
# Console display driver support
#
CONFIG_VGA_CONSOLE=y
+# CONFIG_VGACON_SOFT_SCROLLBACK is not set
CONFIG_DUMMY_CONSOLE=y
#
#
# CONFIG_USB_ARCH_HAS_HCD is not set
# CONFIG_USB_ARCH_HAS_OHCI is not set
+# CONFIG_USB_ARCH_HAS_EHCI is not set
#
# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
#
# CONFIG_MMC is not set
+#
+# LED devices
+#
+# CONFIG_NEW_LEDS is not set
+
+#
+# LED drivers
+#
+
+#
+# LED Triggers
+#
+
#
# InfiniBand support
#
#
# CONFIG_EDAC is not set
+#
+# Real Time Clock
+#
+# CONFIG_RTC_CLASS is not set
+
#
# Firmware Drivers
#
CONFIG_TMPFS=y
# CONFIG_HUGETLB_PAGE is not set
CONFIG_RAMFS=y
-# CONFIG_RELAYFS_FS is not set
# CONFIG_CONFIGFS_FS is not set
#
# CONFIG_DEBUG_FS is not set
# CONFIG_DEBUG_VM is not set
CONFIG_FRAME_POINTER=y
+# CONFIG_UNWIND_INFO is not set
CONFIG_FORCED_INLINING=y
# CONFIG_RCU_TORTURE_TEST is not set
# CONFIG_DEBUG_RODATA is not set
# Hardware crypto devices
#
CONFIG_XEN=y
-CONFIG_XEN_INTERFACE_VERSION=0x00030202
+CONFIG_XEN_INTERFACE_VERSION=0x00030203
#
# XEN
#
# CONFIG_XEN_PRIVILEGED_GUEST is not set
CONFIG_XEN_UNPRIVILEGED_GUEST=y
+CONFIG_XEN_PRIVCMD=y
+CONFIG_XEN_XENBUS_DEV=y
# CONFIG_XEN_BACKEND is not set
CONFIG_XEN_BLKDEV_FRONTEND=y
CONFIG_XEN_NETDEV_FRONTEND=y
CONFIG_XEN_COMPAT_030002=y
CONFIG_HAVE_ARCH_ALLOC_SKB=y
CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
+CONFIG_HAVE_IRQ_IGNORE_UNHANDLED=y
CONFIG_NO_IDLE_HZ=y
+CONFIG_XEN_UTIL=y
+CONFIG_XEN_BALLOON=y
+CONFIG_XEN_DEVMEM=y
+CONFIG_XEN_SKBUFF=y
+CONFIG_XEN_REBOOT=y
+CONFIG_XEN_SMPBOOT=y
#
# Library routines
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.16.13-xen
-# Thu May 11 17:11:00 2006
+# Linux kernel version: 2.6.17-xen
+# Thu Jan 11 10:45:59 2007
#
CONFIG_X86_32=y
CONFIG_SEMAPHORE_SLEEPERS=y
CONFIG_MMU=y
CONFIG_GENERIC_ISA_DMA=y
CONFIG_GENERIC_IOMAP=y
+CONFIG_GENERIC_HWEIGHT=y
CONFIG_ARCH_MAY_HAVE_PC_FDC=y
CONFIG_DMI=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_CPUSETS=y
+# CONFIG_RELAY is not set
CONFIG_INITRAMFS_SOURCE=""
CONFIG_UID16=y
CONFIG_VM86=y
CONFIG_FUTEX=y
CONFIG_EPOLL=y
CONFIG_SHMEM=y
-CONFIG_CC_ALIGN_FUNCTIONS=0
-CONFIG_CC_ALIGN_LABELS=0
-CONFIG_CC_ALIGN_LOOPS=0
-CONFIG_CC_ALIGN_JUMPS=0
CONFIG_SLAB=y
# CONFIG_TINY_SHMEM is not set
CONFIG_BASE_SMALL=0
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
-CONFIG_OBSOLETE_MODPARM=y
CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
CONFIG_KMOD=y
# Block layer
#
CONFIG_LBD=y
+# CONFIG_BLK_DEV_IO_TRACE is not set
+CONFIG_LSF=y
#
# IO Schedulers
#
# Processor type and features
#
+CONFIG_SMP=y
# CONFIG_X86_PC is not set
CONFIG_X86_XEN=y
# CONFIG_X86_ELAN is not set
CONFIG_X86_INTEL_USERCOPY=y
CONFIG_X86_USE_PPRO_CHECKSUM=y
CONFIG_X86_TSC=y
-CONFIG_SMP=y
-CONFIG_SMP_ALTERNATIVES=y
CONFIG_NR_CPUS=32
+# CONFIG_SCHED_MC is not set
# CONFIG_PREEMPT_NONE is not set
CONFIG_PREEMPT_VOLUNTARY=y
# CONFIG_PREEMPT is not set
# CONFIG_NOHIGHMEM is not set
CONFIG_HIGHMEM4G=y
# CONFIG_HIGHMEM64G is not set
-CONFIG_VMSPLIT_3G=y
-# CONFIG_VMSPLIT_3G_OPT is not set
-# CONFIG_VMSPLIT_2G is not set
-# CONFIG_VMSPLIT_1G is not set
CONFIG_PAGE_OFFSET=0xC0000000
CONFIG_HIGHMEM=y
CONFIG_SELECT_MEMORY_MODEL=y
CONFIG_REGPARM=y
CONFIG_SECCOMP=y
CONFIG_HZ_100=y
-CONFIG_KEXEC=y
# CONFIG_HZ_250 is not set
# CONFIG_HZ_1000 is not set
CONFIG_HZ=100
+CONFIG_KEXEC=y
# CONFIG_CRASH_DUMP is not set
CONFIG_PHYSICAL_START=0x100000
CONFIG_HOTPLUG_CPU=y
CONFIG_ACPI_THERMAL=m
CONFIG_ACPI_ASUS=m
CONFIG_ACPI_IBM=m
+# CONFIG_ACPI_IBM_DOCK is not set
CONFIG_ACPI_TOSHIBA=m
CONFIG_ACPI_BLACKLIST_YEAR=0
# CONFIG_ACPI_DEBUG is not set
CONFIG_XEN_PCIDEV_FRONTEND=y
# CONFIG_XEN_PCIDEV_FE_DEBUG is not set
# CONFIG_PCIEPORTBUS is not set
-# CONFIG_PCI_LEGACY_PROC is not set
# CONFIG_PCI_DEBUG is not set
CONFIG_ISA_DMA_API=y
CONFIG_SCx200=m
CONFIG_INET_AH=m
CONFIG_INET_ESP=m
CONFIG_INET_IPCOMP=m
+CONFIG_INET_XFRM_TUNNEL=m
CONFIG_INET_TUNNEL=m
CONFIG_INET_DIAG=m
CONFIG_INET_TCP_DIAG=m
CONFIG_IP_VS_FTP=m
CONFIG_IPV6=m
CONFIG_IPV6_PRIVACY=y
+# CONFIG_IPV6_ROUTER_PREF is not set
CONFIG_INET6_AH=m
CONFIG_INET6_ESP=m
CONFIG_INET6_IPCOMP=m
+CONFIG_INET6_XFRM_TUNNEL=m
CONFIG_INET6_TUNNEL=m
CONFIG_IPV6_TUNNEL=m
CONFIG_NETFILTER=y
CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
CONFIG_NETFILTER_XT_MATCH_DCCP=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
CONFIG_NETFILTER_XT_MATCH_HELPER=m
CONFIG_NETFILTER_XT_MATCH_LENGTH=m
CONFIG_NETFILTER_XT_MATCH_LIMIT=m
CONFIG_NETFILTER_XT_MATCH_MAC=m
CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
CONFIG_NETFILTER_XT_MATCH_REALM=m
CONFIG_IP_NF_TFTP=m
CONFIG_IP_NF_AMANDA=m
CONFIG_IP_NF_PPTP=m
+CONFIG_IP_NF_H323=m
CONFIG_IP_NF_QUEUE=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_IPRANGE=m
-CONFIG_IP_NF_MATCH_MULTIPORT=m
CONFIG_IP_NF_MATCH_TOS=m
CONFIG_IP_NF_MATCH_RECENT=m
CONFIG_IP_NF_MATCH_ECN=m
CONFIG_IP_NF_MATCH_DSCP=m
-CONFIG_IP_NF_MATCH_AH_ESP=m
+CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_TTL=m
CONFIG_IP_NF_MATCH_OWNER=m
CONFIG_IP_NF_MATCH_ADDRTYPE=m
CONFIG_IP_NF_MATCH_HASHLIMIT=m
-CONFIG_IP_NF_MATCH_POLICY=m
CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_TARGET_REJECT=m
CONFIG_IP_NF_TARGET_LOG=m
CONFIG_IP_NF_NAT_TFTP=m
CONFIG_IP_NF_NAT_AMANDA=m
CONFIG_IP_NF_NAT_PPTP=m
+CONFIG_IP_NF_NAT_H323=m
CONFIG_IP_NF_MANGLE=m
CONFIG_IP_NF_TARGET_TOS=m
CONFIG_IP_NF_TARGET_ECN=m
CONFIG_IP6_NF_MATCH_OPTS=m
CONFIG_IP6_NF_MATCH_FRAG=m
CONFIG_IP6_NF_MATCH_HL=m
-CONFIG_IP6_NF_MATCH_MULTIPORT=m
CONFIG_IP6_NF_MATCH_OWNER=m
CONFIG_IP6_NF_MATCH_IPV6HEADER=m
-CONFIG_IP6_NF_MATCH_AHESP=m
+CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
-CONFIG_IP6_NF_MATCH_POLICY=m
CONFIG_IP6_NF_FILTER=m
CONFIG_IP6_NF_TARGET_LOG=m
CONFIG_IP6_NF_TARGET_REJECT=m
#
CONFIG_IP_DCCP=m
CONFIG_INET_DCCP_DIAG=m
+CONFIG_IP_DCCP_ACKVEC=y
#
# DCCP CCIDs Configuration (EXPERIMENTAL)
#
+CONFIG_IP_DCCP_CCID2=m
CONFIG_IP_DCCP_CCID3=m
CONFIG_IP_DCCP_TFRC_LIB=m
# DCCP Kernel Hacking
#
# CONFIG_IP_DCCP_DEBUG is not set
-# CONFIG_IP_DCCP_UNLOAD_HACK is not set
#
# SCTP Configuration (EXPERIMENTAL)
CONFIG_ESI_DONGLE=m
CONFIG_ACTISYS_DONGLE=m
CONFIG_TEKRAM_DONGLE=m
+CONFIG_TOIM3232_DONGLE=m
CONFIG_LITELINK_DONGLE=m
CONFIG_MA600_DONGLE=m
CONFIG_GIRBIL_DONGLE=m
CONFIG_IEEE80211_CRYPT_WEP=m
CONFIG_IEEE80211_CRYPT_CCMP=m
CONFIG_IEEE80211_CRYPT_TKIP=m
+CONFIG_IEEE80211_SOFTMAC=m
+# CONFIG_IEEE80211_SOFTMAC_DEBUG is not set
+CONFIG_WIRELESS_EXT=y
#
# Device Drivers
# CONFIG_MTD_OTP is not set
CONFIG_MTD_CFI_INTELEXT=m
CONFIG_MTD_CFI_AMDSTD=m
-CONFIG_MTD_CFI_AMDSTD_RETRY=0
CONFIG_MTD_CFI_STAA=m
CONFIG_MTD_CFI_UTIL=m
CONFIG_MTD_RAM=m
CONFIG_MTD_MTDRAM=m
CONFIG_MTDRAM_TOTAL_SIZE=4096
CONFIG_MTDRAM_ERASE_SIZE=128
-CONFIG_MTD_BLKMTD=m
CONFIG_MTD_BLOCK2MTD=m
#
CONFIG_AIC79XX_DEBUG_MASK=0
CONFIG_AIC79XX_REG_PRETTY_PRINT=y
CONFIG_SCSI_DPT_I2O=m
+CONFIG_SCSI_ADVANSYS=m
CONFIG_MEGARAID_NEWGEN=y
CONFIG_MEGARAID_MM=m
CONFIG_MEGARAID_MAILBOX=m
CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
-# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
+CONFIG_SCSI_SYM53C8XX_MMIO=y
CONFIG_SCSI_IPR=m
CONFIG_SCSI_IPR_TRACE=y
CONFIG_SCSI_IPR_DUMP=y
-CONFIG_SCSI_QLOGIC_FC=m
-CONFIG_SCSI_QLOGIC_FC_FIRMWARE=y
CONFIG_SCSI_QLOGIC_1280=m
CONFIG_SCSI_QLA_FC=m
# CONFIG_SCSI_QLA2XXX_EMBEDDED_FIRMWARE is not set
CONFIG_MD_RAID1=m
CONFIG_MD_RAID10=m
CONFIG_MD_RAID5=m
+CONFIG_MD_RAID5_RESHAPE=y
CONFIG_MD_RAID6=m
CONFIG_MD_MULTIPATH=m
CONFIG_MD_FAULTY=m
CONFIG_I2O=m
CONFIG_I2O_LCT_NOTIFY_ON_CHANGES=y
CONFIG_I2O_EXT_ADAPTEC=y
-CONFIG_I2O_EXT_ADAPTEC_DMA64=y
CONFIG_I2O_CONFIG=m
CONFIG_I2O_CONFIG_OLD_IOCTL=y
CONFIG_I2O_BUS=m
# Wireless LAN (non-hamradio)
#
CONFIG_NET_RADIO=y
+CONFIG_NET_WIRELESS_RTNETLINK=y
#
# Obsolete Wireless cards support (pre-802.11)
CONFIG_IPW2100_MONITOR=y
# CONFIG_IPW2100_DEBUG is not set
CONFIG_IPW2200=m
+CONFIG_IPW2200_MONITOR=y
+CONFIG_IPW_QOS=y
# CONFIG_IPW2200_DEBUG is not set
CONFIG_AIRO=m
CONFIG_HERMES=m
CONFIG_HOSTAP_PLX=m
CONFIG_HOSTAP_PCI=m
CONFIG_HOSTAP_CS=m
+CONFIG_BCM43XX=m
+CONFIG_BCM43XX_DEBUG=y
+CONFIG_BCM43XX_DMA=y
+CONFIG_BCM43XX_PIO=y
+CONFIG_BCM43XX_DMA_AND_PIO_MODE=y
+# CONFIG_BCM43XX_DMA_MODE is not set
+# CONFIG_BCM43XX_PIO_MODE is not set
CONFIG_NET_WIRELESS=y
#
# Active cards
#
+#
+# Siemens Gigaset
+#
+CONFIG_ISDN_DRV_GIGASET=m
+CONFIG_GIGASET_BASE=m
+CONFIG_GIGASET_M105=m
+# CONFIG_GIGASET_DEBUG is not set
+# CONFIG_GIGASET_UNDOCREQ is not set
+
#
# CAPI subsystem
#
# Serial drivers
#
CONFIG_SERIAL_8250=m
+CONFIG_SERIAL_8250_PCI=m
+CONFIG_SERIAL_8250_PNP=m
# CONFIG_SERIAL_8250_CS is not set
-# CONFIG_SERIAL_8250_ACPI is not set
CONFIG_SERIAL_8250_NR_UARTS=4
CONFIG_SERIAL_8250_RUNTIME_UARTS=4
# CONFIG_SERIAL_8250_EXTENDED is not set
CONFIG_SENSORS_PCF8574=m
CONFIG_SENSORS_PCA9539=m
CONFIG_SENSORS_PCF8591=m
-CONFIG_SENSORS_RTC8564=m
CONFIG_SENSORS_MAX6875=m
-CONFIG_RTC_X1205_I2C=m
# CONFIG_I2C_DEBUG_CORE is not set
# CONFIG_I2C_DEBUG_ALGO is not set
# CONFIG_I2C_DEBUG_BUS is not set
# Dallas's 1-wire bus
#
CONFIG_W1=m
-CONFIG_W1_MATROX=m
-CONFIG_W1_DS9490=m
-CONFIG_W1_DS9490_BRIDGE=m
-CONFIG_W1_THERM=m
-CONFIG_W1_SMEM=m
-CONFIG_W1_DS2433=m
-CONFIG_W1_DS2433_CRC=y
+
+#
+# 1-wire Bus Masters
+#
+CONFIG_W1_MASTER_MATROX=m
+CONFIG_W1_MASTER_DS9490=m
+CONFIG_W1_MASTER_DS9490_BRIDGE=m
+CONFIG_W1_MASTER_DS2482=m
+
+#
+# 1-wire Slaves
+#
+CONFIG_W1_SLAVE_THERM=m
+CONFIG_W1_SLAVE_SMEM=m
+CONFIG_W1_SLAVE_DS2433=m
#
# Hardware Monitoring support
#
CONFIG_IBM_ASM=m
-#
-# Multimedia Capabilities Port drivers
-#
-
#
# Multimedia devices
#
CONFIG_VIDEO_DEV=m
+CONFIG_VIDEO_V4L1=y
+CONFIG_VIDEO_V4L1_COMPAT=y
+CONFIG_VIDEO_V4L2=m
#
-# Video For Linux
+# Video Capture Adapters
#
#
-# Video Adapters
+# Video Capture Adapters
#
# CONFIG_VIDEO_ADV_DEBUG is not set
+CONFIG_VIDEO_VIVI=m
CONFIG_VIDEO_BT848=m
CONFIG_VIDEO_BT848_DVB=y
CONFIG_VIDEO_SAA6588=m
CONFIG_VIDEO_CPIA=m
CONFIG_VIDEO_CPIA_PP=m
CONFIG_VIDEO_CPIA_USB=m
+CONFIG_VIDEO_CPIA2=m
CONFIG_VIDEO_SAA5246A=m
CONFIG_VIDEO_SAA5249=m
CONFIG_TUNER_3036=m
CONFIG_VIDEO_DPC=m
CONFIG_VIDEO_HEXIUM_ORION=m
CONFIG_VIDEO_HEXIUM_GEMINI=m
+CONFIG_VIDEO_CX88_VP3054=m
CONFIG_VIDEO_CX88=m
CONFIG_VIDEO_CX88_ALSA=m
CONFIG_VIDEO_CX88_DVB=m
CONFIG_VIDEO_CX88_DVB_ALL_FRONTENDS=y
-CONFIG_VIDEO_CX88_VP3054=m
-CONFIG_VIDEO_EM28XX=m
CONFIG_VIDEO_OVCAMCHIP=m
-CONFIG_VIDEO_AUDIO_DECODER=m
-CONFIG_VIDEO_DECODER=m
+
+#
+# Encoders and Decoders
+#
+CONFIG_VIDEO_MSP3400=m
+CONFIG_VIDEO_CS53L32A=m
+CONFIG_VIDEO_WM8775=m
+CONFIG_VIDEO_WM8739=m
+CONFIG_VIDEO_CX25840=m
+CONFIG_VIDEO_SAA711X=m
+CONFIG_VIDEO_SAA7127=m
+CONFIG_VIDEO_UPD64031A=m
+CONFIG_VIDEO_UPD64083=m
+
+#
+# V4L USB devices
+#
+CONFIG_VIDEO_EM28XX=m
+CONFIG_USB_DSBR=m
+CONFIG_VIDEO_USBVIDEO=m
+CONFIG_USB_VICAM=m
+CONFIG_USB_IBMCAM=m
+CONFIG_USB_KONICAWC=m
+CONFIG_USB_ET61X251=m
+CONFIG_USB_OV511=m
+CONFIG_USB_SE401=m
+CONFIG_USB_SN9C102=m
+CONFIG_USB_STV680=m
+CONFIG_USB_W9968CF=m
+CONFIG_USB_ZC0301=m
+CONFIG_USB_PWC=m
#
# Radio Adapters
CONFIG_DVB_TDA1004X=m
CONFIG_DVB_NXT6000=m
CONFIG_DVB_MT352=m
+CONFIG_DVB_ZL10353=m
CONFIG_DVB_DIB3000MB=m
CONFIG_DVB_DIB3000MC=m
CONFIG_VIDEO_BTCX=m
CONFIG_VIDEO_IR=m
CONFIG_VIDEO_TVEEPROM=m
+CONFIG_USB_DABUSB=m
#
# Graphics support
CONFIG_FB_CFB_COPYAREA=y
CONFIG_FB_CFB_IMAGEBLIT=y
# CONFIG_FB_MACMODES is not set
+CONFIG_FB_FIRMWARE_EDID=y
CONFIG_FB_MODE_HELPERS=y
CONFIG_FB_TILEBLITTING=y
CONFIG_FB_CIRRUS=m
CONFIG_FB_MATROX_G=y
# CONFIG_FB_MATROX_I2C is not set
CONFIG_FB_MATROX_MULTIHEAD=y
-# CONFIG_FB_RADEON_OLD is not set
CONFIG_FB_RADEON=m
CONFIG_FB_RADEON_I2C=y
# CONFIG_FB_RADEON_DEBUG is not set
CONFIG_FB_TRIDENT=m
# CONFIG_FB_TRIDENT_ACCEL is not set
CONFIG_FB_GEODE=y
+CONFIG_FB_GEODE_GX=m
CONFIG_FB_GEODE_GX1=m
CONFIG_FB_VIRTUAL=m
# Console display driver support
#
CONFIG_VGA_CONSOLE=y
+# CONFIG_VGACON_SOFT_SCROLLBACK is not set
CONFIG_DUMMY_CONSOLE=y
CONFIG_FRAMEBUFFER_CONSOLE=m
# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set
CONFIG_SND_OSSEMUL=y
CONFIG_SND_MIXER_OSS=m
CONFIG_SND_PCM_OSS=m
+CONFIG_SND_PCM_OSS_PLUGINS=y
CONFIG_SND_SEQUENCER_OSS=y
CONFIG_SND_RTCTIMER=m
CONFIG_SND_SEQ_RTCTIMER_DEFAULT=y
# CONFIG_SND_DYNAMIC_MINORS is not set
CONFIG_SND_SUPPORT_OLD_API=y
+CONFIG_SND_VERBOSE_PROCFS=y
# CONFIG_SND_VERBOSE_PRINTK is not set
# CONFIG_SND_DEBUG is not set
# PCI devices
#
CONFIG_SND_AD1889=m
+CONFIG_SND_ALS300=m
CONFIG_SND_ALS4000=m
CONFIG_SND_ALI5451=m
CONFIG_SND_ATIIXP=m
CONFIG_SND_ES1938=m
CONFIG_SND_ES1968=m
CONFIG_SND_FM801=m
-# CONFIG_SND_FM801_TEA575X_BOOL is not set
CONFIG_SND_FM801_TEA575X=m
CONFIG_SND_HDA_INTEL=m
CONFIG_SND_HDSP=m
CONFIG_SND_MIXART=m
CONFIG_SND_NM256=m
CONFIG_SND_PCXHR=m
+CONFIG_SND_RIPTIDE=m
CONFIG_SND_RME32=m
CONFIG_SND_RME96=m
CONFIG_SND_RME9652=m
#
# PCMCIA devices
#
+CONFIG_SND_VXPOCKET=m
+CONFIG_SND_PDAUDIOCF=m
#
# Open Sound System
#
CONFIG_SOUND_PRIME=m
-# CONFIG_OBSOLETE_OSS_DRIVER is not set
+CONFIG_SOUND_BT878=m
+CONFIG_SOUND_EMU10K1=m
+# CONFIG_MIDI_EMU10K1 is not set
CONFIG_SOUND_FUSION=m
+CONFIG_SOUND_ES1371=m
CONFIG_SOUND_ICH=m
CONFIG_SOUND_TRIDENT=m
# CONFIG_SOUND_MSNDCLAS is not set
# CONFIG_SOUND_MSNDPIN is not set
+CONFIG_SOUND_VIA82CXXX=m
+# CONFIG_MIDI_VIA82CXXX is not set
# CONFIG_SOUND_OSS is not set
CONFIG_SOUND_TVMIXER=m
#
CONFIG_USB_ARCH_HAS_HCD=y
CONFIG_USB_ARCH_HAS_OHCI=y
+CONFIG_USB_ARCH_HAS_EHCI=y
CONFIG_USB=m
# CONFIG_USB_DEBUG is not set
#
# USB Device Class drivers
#
-# CONFIG_OBSOLETE_OSS_USB_DRIVER is not set
CONFIG_USB_ACM=m
CONFIG_USB_PRINTER=m
CONFIG_USB_ACECAD=m
CONFIG_USB_KBTAB=m
CONFIG_USB_POWERMATE=m
-CONFIG_USB_MTOUCH=m
-CONFIG_USB_ITMTOUCH=m
-CONFIG_USB_EGALAX=m
+CONFIG_USB_TOUCHSCREEN=m
+CONFIG_USB_TOUCHSCREEN_EGALAX=y
+CONFIG_USB_TOUCHSCREEN_PANJIT=y
+CONFIG_USB_TOUCHSCREEN_3M=y
+CONFIG_USB_TOUCHSCREEN_ITM=y
CONFIG_USB_YEALINK=m
CONFIG_USB_XPAD=m
CONFIG_USB_ATI_REMOTE=m
CONFIG_USB_MDC800=m
CONFIG_USB_MICROTEK=m
-#
-# USB Multimedia devices
-#
-CONFIG_USB_DABUSB=m
-CONFIG_USB_VICAM=m
-CONFIG_USB_DSBR=m
-CONFIG_USB_ET61X251=m
-CONFIG_USB_IBMCAM=m
-CONFIG_USB_KONICAWC=m
-CONFIG_USB_OV511=m
-CONFIG_USB_SE401=m
-CONFIG_USB_SN9C102=m
-CONFIG_USB_STV680=m
-CONFIG_USB_W9968CF=m
-CONFIG_USB_PWC=m
-
#
# USB Network Adapters
#
CONFIG_USB_SERIAL_GENERIC=y
CONFIG_USB_SERIAL_AIRPRIME=m
CONFIG_USB_SERIAL_ANYDATA=m
+CONFIG_USB_SERIAL_ARK3116=m
CONFIG_USB_SERIAL_BELKIN=m
CONFIG_USB_SERIAL_WHITEHEAT=m
CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m
CONFIG_USB_SERIAL_CYPRESS_M8=m
CONFIG_USB_SERIAL_EMPEG=m
CONFIG_USB_SERIAL_FTDI_SIO=m
+CONFIG_USB_SERIAL_FUNSOFT=m
CONFIG_USB_SERIAL_VISOR=m
CONFIG_USB_SERIAL_IPAQ=m
CONFIG_USB_SERIAL_IR=m
CONFIG_USB_SERIAL_KLSI=m
CONFIG_USB_SERIAL_KOBIL_SCT=m
CONFIG_USB_SERIAL_MCT_U232=m
+CONFIG_USB_SERIAL_NAVMAN=m
CONFIG_USB_SERIAL_PL2303=m
CONFIG_USB_SERIAL_HP4X=m
CONFIG_USB_SERIAL_SAFE=m
# CONFIG_USB_GADGET_GOKU is not set
# CONFIG_USB_GADGET_LH7A40X is not set
# CONFIG_USB_GADGET_OMAP is not set
+# CONFIG_USB_GADGET_AT91 is not set
# CONFIG_USB_GADGET_DUMMY_HCD is not set
CONFIG_USB_GADGET_DUALSPEED=y
CONFIG_USB_ZERO=m
CONFIG_MMC=m
# CONFIG_MMC_DEBUG is not set
CONFIG_MMC_BLOCK=m
+CONFIG_MMC_SDHCI=m
CONFIG_MMC_WBSD=m
+#
+# LED devices
+#
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=m
+
+#
+# LED drivers
+#
+
+#
+# LED Triggers
+#
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_TIMER=m
+CONFIG_LEDS_TRIGGER_IDE_DISK=y
+
#
# InfiniBand support
#
CONFIG_INFINIBAND_USER_MAD=m
CONFIG_INFINIBAND_USER_ACCESS=m
CONFIG_INFINIBAND_MTHCA=m
-# CONFIG_INFINIBAND_MTHCA_DEBUG is not set
+CONFIG_INFINIBAND_MTHCA_DEBUG=y
CONFIG_INFINIBAND_IPOIB=m
-# CONFIG_INFINIBAND_IPOIB_DEBUG is not set
+CONFIG_INFINIBAND_IPOIB_DEBUG=y
+# CONFIG_INFINIBAND_IPOIB_DEBUG_DATA is not set
CONFIG_INFINIBAND_SRP=m
#
CONFIG_EDAC_R82600=m
CONFIG_EDAC_POLL=y
+#
+# Real Time Clock
+#
+CONFIG_RTC_LIB=m
+CONFIG_RTC_CLASS=m
+
+#
+# RTC interfaces
+#
+CONFIG_RTC_INTF_SYSFS=m
+CONFIG_RTC_INTF_PROC=m
+CONFIG_RTC_INTF_DEV=m
+
+#
+# RTC drivers
+#
+CONFIG_RTC_DRV_X1205=m
+CONFIG_RTC_DRV_DS1672=m
+CONFIG_RTC_DRV_PCF8563=m
+CONFIG_RTC_DRV_RS5C372=m
+CONFIG_RTC_DRV_M48T86=m
+CONFIG_RTC_DRV_TEST=m
+
#
# File systems
#
#
CONFIG_PROC_FS=y
CONFIG_PROC_KCORE=y
-# CONFIG_PROC_VMCORE is not set
CONFIG_SYSFS=y
CONFIG_TMPFS=y
# CONFIG_HUGETLB_PAGE is not set
CONFIG_RAMFS=y
-CONFIG_RELAYFS_FS=m
CONFIG_CONFIGFS_FS=m
#
# CONFIG_DEBUG_FS is not set
# CONFIG_DEBUG_VM is not set
# CONFIG_FRAME_POINTER is not set
+# CONFIG_UNWIND_INFO is not set
CONFIG_FORCED_INLINING=y
# CONFIG_RCU_TORTURE_TEST is not set
CONFIG_EARLY_PRINTK=y
CONFIG_DEBUG_STACKOVERFLOW=y
# CONFIG_DEBUG_STACK_USAGE is not set
+CONFIG_STACK_BACKTRACE_COLS=2
# CONFIG_DEBUG_PAGEALLOC is not set
# CONFIG_DEBUG_RODATA is not set
# CONFIG_4KSTACKS is not set
#
# CONFIG_CRYPTO_DEV_PADLOCK is not set
CONFIG_XEN=y
-CONFIG_XEN_INTERFACE_VERSION=0x00030202
+CONFIG_XEN_INTERFACE_VERSION=0x00030203
#
# XEN
#
CONFIG_XEN_PRIVILEGED_GUEST=y
# CONFIG_XEN_UNPRIVILEGED_GUEST is not set
+CONFIG_XEN_PRIVCMD=y
+CONFIG_XEN_XENBUS_DEV=y
CONFIG_XEN_BACKEND=y
-CONFIG_XEN_PCIDEV_BACKEND=m
-CONFIG_XEN_PCIDEV_BACKEND_VPCI=y
-# CONFIG_XEN_PCIDEV_BACKEND_PASS is not set
-# CONFIG_XEN_PCIDEV_BACKEND_SLOT is not set
-# CONFIG_XEN_PCIDEV_BE_DEBUG is not set
CONFIG_XEN_BLKDEV_BACKEND=y
CONFIG_XEN_BLKDEV_TAP=y
CONFIG_XEN_NETDEV_BACKEND=y
# CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER is not set
CONFIG_XEN_NETDEV_LOOPBACK=y
+CONFIG_XEN_PCIDEV_BACKEND=m
+CONFIG_XEN_PCIDEV_BACKEND_VPCI=y
+# CONFIG_XEN_PCIDEV_BACKEND_PASS is not set
+# CONFIG_XEN_PCIDEV_BACKEND_SLOT is not set
+# CONFIG_XEN_PCIDEV_BE_DEBUG is not set
# CONFIG_XEN_TPMDEV_BACKEND is not set
CONFIG_XEN_BLKDEV_FRONTEND=y
CONFIG_XEN_NETDEV_FRONTEND=y
CONFIG_XEN_COMPAT_030002=y
CONFIG_HAVE_ARCH_ALLOC_SKB=y
CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
+CONFIG_HAVE_IRQ_IGNORE_UNHANDLED=y
CONFIG_NO_IDLE_HZ=y
+CONFIG_XEN_UTIL=y
+CONFIG_XEN_BALLOON=y
+CONFIG_XEN_DEVMEM=y
+CONFIG_XEN_SKBUFF=y
+CONFIG_XEN_REBOOT=y
+CONFIG_XEN_SMPBOOT=y
#
# Library routines
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.16.13-xen
-# Thu May 11 17:18:58 2006
+# Linux kernel version: 2.6.17-xen
+# Thu Jan 11 10:10:22 2007
#
CONFIG_X86_64=y
CONFIG_64BIT=y
CONFIG_SEMAPHORE_SLEEPERS=y
CONFIG_MMU=y
CONFIG_RWSEM_GENERIC_SPINLOCK=y
+CONFIG_GENERIC_HWEIGHT=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
CONFIG_X86_CMPXCHG=y
CONFIG_EARLY_PRINTK=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_CPUSETS=y
+# CONFIG_RELAY is not set
CONFIG_INITRAMFS_SOURCE=""
CONFIG_UID16=y
CONFIG_VM86=y
CONFIG_FUTEX=y
CONFIG_EPOLL=y
CONFIG_SHMEM=y
-CONFIG_CC_ALIGN_FUNCTIONS=0
-CONFIG_CC_ALIGN_LABELS=0
-CONFIG_CC_ALIGN_LOOPS=0
-CONFIG_CC_ALIGN_JUMPS=0
CONFIG_SLAB=y
# CONFIG_TINY_SHMEM is not set
CONFIG_BASE_SMALL=0
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
-CONFIG_OBSOLETE_MODPARM=y
CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
CONFIG_KMOD=y
# Block layer
#
CONFIG_LBD=y
+# CONFIG_BLK_DEV_IO_TRACE is not set
+CONFIG_LSF=y
#
# IO Schedulers
CONFIG_X86_NO_IDT=y
CONFIG_X86_L1_CACHE_BYTES=128
CONFIG_X86_L1_CACHE_SHIFT=7
+CONFIG_X86_INTERNODE_CACHE_BYTES=128
CONFIG_X86_GOOD_APIC=y
CONFIG_MICROCODE=y
CONFIG_X86_MSR=m
CONFIG_X86_LOCAL_APIC=y
CONFIG_MTRR=y
CONFIG_SMP=y
+# CONFIG_SCHED_MC is not set
# CONFIG_PREEMPT_NONE is not set
CONFIG_PREEMPT_VOLUNTARY=y
# CONFIG_PREEMPT is not set
CONFIG_PREEMPT_BKL=y
-CONFIG_ARCH_SPARSEMEM_ENABLE=y
CONFIG_ARCH_FLATMEM_ENABLE=y
CONFIG_SELECT_MEMORY_MODEL=y
CONFIG_FLATMEM_MANUAL=y
CONFIG_NR_CPUS=32
CONFIG_HOTPLUG_CPU=y
CONFIG_SWIOTLB=y
+CONFIG_KEXEC=y
# CONFIG_CRASH_DUMP is not set
-CONFIG_PHYSICAL_START=0x100000
+CONFIG_PHYSICAL_START=0x200000
CONFIG_SECCOMP=y
CONFIG_HZ_100=y
-CONFIG_KEXEC=y
# CONFIG_HZ_250 is not set
# CONFIG_HZ_1000 is not set
CONFIG_HZ=100
+# CONFIG_REORDER is not set
CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_ISA_DMA_API=y
CONFIG_ACPI_THERMAL=m
CONFIG_ACPI_ASUS=m
CONFIG_ACPI_IBM=m
+# CONFIG_ACPI_IBM_DOCK is not set
CONFIG_ACPI_TOSHIBA=m
CONFIG_ACPI_BLACKLIST_YEAR=0
# CONFIG_ACPI_DEBUG is not set
CONFIG_ACPI_POWER=y
CONFIG_ACPI_SYSTEM=y
CONFIG_ACPI_CONTAINER=m
+CONFIG_ACPI_HOTPLUG_MEMORY=m
#
# CPU Frequency scaling
# CONFIG_PCI_MMCONFIG is not set
CONFIG_XEN_PCIDEV_FRONTEND=y
# CONFIG_XEN_PCIDEV_FE_DEBUG is not set
-# CONFIG_UNORDERED_IO is not set
# CONFIG_PCIEPORTBUS is not set
-# CONFIG_PCI_LEGACY_PROC is not set
# CONFIG_PCI_DEBUG is not set
#
CONFIG_INET_AH=m
CONFIG_INET_ESP=m
CONFIG_INET_IPCOMP=m
+CONFIG_INET_XFRM_TUNNEL=m
CONFIG_INET_TUNNEL=m
CONFIG_INET_DIAG=m
CONFIG_INET_TCP_DIAG=m
CONFIG_IP_VS_FTP=m
CONFIG_IPV6=m
CONFIG_IPV6_PRIVACY=y
+# CONFIG_IPV6_ROUTER_PREF is not set
CONFIG_INET6_AH=m
CONFIG_INET6_ESP=m
CONFIG_INET6_IPCOMP=m
+CONFIG_INET6_XFRM_TUNNEL=m
CONFIG_INET6_TUNNEL=m
CONFIG_IPV6_TUNNEL=m
CONFIG_NETFILTER=y
CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
CONFIG_NETFILTER_XT_MATCH_DCCP=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
CONFIG_NETFILTER_XT_MATCH_HELPER=m
CONFIG_NETFILTER_XT_MATCH_LENGTH=m
CONFIG_NETFILTER_XT_MATCH_LIMIT=m
CONFIG_NETFILTER_XT_MATCH_MAC=m
CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
CONFIG_NETFILTER_XT_MATCH_REALM=m
CONFIG_IP_NF_TFTP=m
CONFIG_IP_NF_AMANDA=m
CONFIG_IP_NF_PPTP=m
+CONFIG_IP_NF_H323=m
CONFIG_IP_NF_QUEUE=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_IPRANGE=m
-CONFIG_IP_NF_MATCH_MULTIPORT=m
CONFIG_IP_NF_MATCH_TOS=m
CONFIG_IP_NF_MATCH_RECENT=m
CONFIG_IP_NF_MATCH_ECN=m
CONFIG_IP_NF_MATCH_DSCP=m
-CONFIG_IP_NF_MATCH_AH_ESP=m
+CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_TTL=m
CONFIG_IP_NF_MATCH_OWNER=m
CONFIG_IP_NF_MATCH_ADDRTYPE=m
CONFIG_IP_NF_MATCH_HASHLIMIT=m
-CONFIG_IP_NF_MATCH_POLICY=m
CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_TARGET_REJECT=m
CONFIG_IP_NF_TARGET_LOG=m
CONFIG_IP_NF_NAT_TFTP=m
CONFIG_IP_NF_NAT_AMANDA=m
CONFIG_IP_NF_NAT_PPTP=m
+CONFIG_IP_NF_NAT_H323=m
CONFIG_IP_NF_MANGLE=m
CONFIG_IP_NF_TARGET_TOS=m
CONFIG_IP_NF_TARGET_ECN=m
CONFIG_IP6_NF_MATCH_OPTS=m
CONFIG_IP6_NF_MATCH_FRAG=m
CONFIG_IP6_NF_MATCH_HL=m
-CONFIG_IP6_NF_MATCH_MULTIPORT=m
CONFIG_IP6_NF_MATCH_OWNER=m
CONFIG_IP6_NF_MATCH_IPV6HEADER=m
-CONFIG_IP6_NF_MATCH_AHESP=m
+CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
-CONFIG_IP6_NF_MATCH_POLICY=m
CONFIG_IP6_NF_FILTER=m
CONFIG_IP6_NF_TARGET_LOG=m
CONFIG_IP6_NF_TARGET_REJECT=m
#
CONFIG_IP_DCCP=m
CONFIG_INET_DCCP_DIAG=m
+CONFIG_IP_DCCP_ACKVEC=y
#
# DCCP CCIDs Configuration (EXPERIMENTAL)
#
+CONFIG_IP_DCCP_CCID2=m
CONFIG_IP_DCCP_CCID3=m
CONFIG_IP_DCCP_TFRC_LIB=m
# DCCP Kernel Hacking
#
# CONFIG_IP_DCCP_DEBUG is not set
-# CONFIG_IP_DCCP_UNLOAD_HACK is not set
#
# SCTP Configuration (EXPERIMENTAL)
CONFIG_ESI_DONGLE=m
CONFIG_ACTISYS_DONGLE=m
CONFIG_TEKRAM_DONGLE=m
+CONFIG_TOIM3232_DONGLE=m
CONFIG_LITELINK_DONGLE=m
CONFIG_MA600_DONGLE=m
CONFIG_GIRBIL_DONGLE=m
CONFIG_IEEE80211_CRYPT_WEP=m
CONFIG_IEEE80211_CRYPT_CCMP=m
CONFIG_IEEE80211_CRYPT_TKIP=m
+CONFIG_IEEE80211_SOFTMAC=m
+# CONFIG_IEEE80211_SOFTMAC_DEBUG is not set
+CONFIG_WIRELESS_EXT=y
#
# Device Drivers
# CONFIG_MTD_OTP is not set
CONFIG_MTD_CFI_INTELEXT=m
CONFIG_MTD_CFI_AMDSTD=m
-CONFIG_MTD_CFI_AMDSTD_RETRY=3
CONFIG_MTD_CFI_STAA=m
CONFIG_MTD_CFI_UTIL=m
CONFIG_MTD_RAM=m
CONFIG_MTD_MTDRAM=m
CONFIG_MTDRAM_TOTAL_SIZE=4096
CONFIG_MTDRAM_ERASE_SIZE=128
-# CONFIG_MTD_BLKMTD is not set
CONFIG_MTD_BLOCK2MTD=m
#
CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
-# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
+CONFIG_SCSI_SYM53C8XX_MMIO=y
# CONFIG_SCSI_IPR is not set
-CONFIG_SCSI_QLOGIC_FC=m
-CONFIG_SCSI_QLOGIC_FC_FIRMWARE=y
CONFIG_SCSI_QLOGIC_1280=m
CONFIG_SCSI_QLA_FC=m
# CONFIG_SCSI_QLA2XXX_EMBEDDED_FIRMWARE is not set
CONFIG_MD_RAID1=m
CONFIG_MD_RAID10=m
CONFIG_MD_RAID5=m
+CONFIG_MD_RAID5_RESHAPE=y
CONFIG_MD_RAID6=m
CONFIG_MD_MULTIPATH=m
CONFIG_MD_FAULTY=m
# Wireless LAN (non-hamradio)
#
CONFIG_NET_RADIO=y
+CONFIG_NET_WIRELESS_RTNETLINK=y
#
# Obsolete Wireless cards support (pre-802.11)
CONFIG_IPW2100_MONITOR=y
# CONFIG_IPW2100_DEBUG is not set
CONFIG_IPW2200=m
+CONFIG_IPW2200_MONITOR=y
+CONFIG_IPW_QOS=y
# CONFIG_IPW2200_DEBUG is not set
CONFIG_AIRO=m
CONFIG_HERMES=m
CONFIG_HOSTAP_PLX=m
CONFIG_HOSTAP_PCI=m
# CONFIG_HOSTAP_CS is not set
+CONFIG_BCM43XX=m
+CONFIG_BCM43XX_DEBUG=y
+CONFIG_BCM43XX_DMA=y
+CONFIG_BCM43XX_PIO=y
+CONFIG_BCM43XX_DMA_AND_PIO_MODE=y
+# CONFIG_BCM43XX_DMA_MODE is not set
+# CONFIG_BCM43XX_PIO_MODE is not set
CONFIG_NET_WIRELESS=y
#
# Active cards
#
+#
+# Siemens Gigaset
+#
+CONFIG_ISDN_DRV_GIGASET=m
+CONFIG_GIGASET_BASE=m
+CONFIG_GIGASET_M105=m
+# CONFIG_GIGASET_DEBUG is not set
+# CONFIG_GIGASET_UNDOCREQ is not set
+
#
# CAPI subsystem
#
CONFIG_AGP=m
CONFIG_AGP_AMD64=m
CONFIG_AGP_INTEL=m
+CONFIG_AGP_SIS=m
+CONFIG_AGP_VIA=m
CONFIG_DRM=m
CONFIG_DRM_TDFX=m
CONFIG_DRM_R128=m
CONFIG_I2C_PARPORT_LIGHT=m
CONFIG_I2C_PROSAVAGE=m
CONFIG_I2C_SAVAGE4=m
-CONFIG_SCx200_ACB=m
CONFIG_I2C_SIS5595=m
CONFIG_I2C_SIS630=m
CONFIG_I2C_SIS96X=m
CONFIG_SENSORS_PCF8574=m
CONFIG_SENSORS_PCA9539=m
CONFIG_SENSORS_PCF8591=m
-CONFIG_SENSORS_RTC8564=m
CONFIG_SENSORS_MAX6875=m
-CONFIG_RTC_X1205_I2C=m
# CONFIG_I2C_DEBUG_CORE is not set
# CONFIG_I2C_DEBUG_ALGO is not set
# CONFIG_I2C_DEBUG_BUS is not set
# Dallas's 1-wire bus
#
CONFIG_W1=m
-CONFIG_W1_MATROX=m
-CONFIG_W1_DS9490=m
-CONFIG_W1_DS9490_BRIDGE=m
-CONFIG_W1_THERM=m
-CONFIG_W1_SMEM=m
-CONFIG_W1_DS2433=m
-CONFIG_W1_DS2433_CRC=y
+
+#
+# 1-wire Bus Masters
+#
+CONFIG_W1_MASTER_MATROX=m
+CONFIG_W1_MASTER_DS9490=m
+CONFIG_W1_MASTER_DS9490_BRIDGE=m
+CONFIG_W1_MASTER_DS2482=m
+
+#
+# 1-wire Slaves
+#
+CONFIG_W1_SLAVE_THERM=m
+CONFIG_W1_SLAVE_SMEM=m
+CONFIG_W1_SLAVE_DS2433=m
#
# Hardware Monitoring support
#
CONFIG_IBM_ASM=m
-#
-# Multimedia Capabilities Port drivers
-#
-
#
# Multimedia devices
#
CONFIG_VIDEO_DEV=m
+CONFIG_VIDEO_V4L1=y
+CONFIG_VIDEO_V4L1_COMPAT=y
+CONFIG_VIDEO_V4L2=m
#
-# Video For Linux
+# Video Capture Adapters
#
#
-# Video Adapters
+# Video Capture Adapters
#
# CONFIG_VIDEO_ADV_DEBUG is not set
+CONFIG_VIDEO_VIVI=m
CONFIG_VIDEO_BT848=m
CONFIG_VIDEO_BT848_DVB=y
CONFIG_VIDEO_SAA6588=m
CONFIG_VIDEO_CPIA=m
CONFIG_VIDEO_CPIA_PP=m
CONFIG_VIDEO_CPIA_USB=m
+CONFIG_VIDEO_CPIA2=m
CONFIG_VIDEO_SAA5246A=m
CONFIG_VIDEO_SAA5249=m
CONFIG_TUNER_3036=m
CONFIG_VIDEO_DPC=m
CONFIG_VIDEO_HEXIUM_ORION=m
CONFIG_VIDEO_HEXIUM_GEMINI=m
+CONFIG_VIDEO_CX88_VP3054=m
CONFIG_VIDEO_CX88=m
CONFIG_VIDEO_CX88_ALSA=m
CONFIG_VIDEO_CX88_DVB=m
CONFIG_VIDEO_CX88_DVB_ALL_FRONTENDS=y
-CONFIG_VIDEO_CX88_VP3054=m
-CONFIG_VIDEO_EM28XX=m
CONFIG_VIDEO_OVCAMCHIP=m
-CONFIG_VIDEO_AUDIO_DECODER=m
-CONFIG_VIDEO_DECODER=m
+
+#
+# Encoders and Decoders
+#
+CONFIG_VIDEO_MSP3400=m
+CONFIG_VIDEO_CS53L32A=m
+CONFIG_VIDEO_WM8775=m
+CONFIG_VIDEO_WM8739=m
+CONFIG_VIDEO_CX25840=m
+CONFIG_VIDEO_SAA711X=m
+CONFIG_VIDEO_SAA7127=m
+CONFIG_VIDEO_UPD64031A=m
+CONFIG_VIDEO_UPD64083=m
+
+#
+# V4L USB devices
+#
+CONFIG_VIDEO_EM28XX=m
+CONFIG_USB_DSBR=m
+CONFIG_VIDEO_USBVIDEO=m
+CONFIG_USB_VICAM=m
+CONFIG_USB_IBMCAM=m
+CONFIG_USB_KONICAWC=m
+CONFIG_USB_ET61X251=m
+CONFIG_USB_OV511=m
+CONFIG_USB_SE401=m
+CONFIG_USB_SN9C102=m
+CONFIG_USB_STV680=m
+CONFIG_USB_W9968CF=m
+CONFIG_USB_ZC0301=m
+CONFIG_USB_PWC=m
#
# Radio Adapters
CONFIG_DVB_TDA1004X=m
CONFIG_DVB_NXT6000=m
CONFIG_DVB_MT352=m
+CONFIG_DVB_ZL10353=m
CONFIG_DVB_DIB3000MB=m
CONFIG_DVB_DIB3000MC=m
CONFIG_VIDEO_BTCX=m
CONFIG_VIDEO_IR=m
CONFIG_VIDEO_TVEEPROM=m
+CONFIG_USB_DABUSB=m
#
# Graphics support
CONFIG_FB_CFB_COPYAREA=y
CONFIG_FB_CFB_IMAGEBLIT=y
# CONFIG_FB_MACMODES is not set
+CONFIG_FB_FIRMWARE_EDID=y
CONFIG_FB_MODE_HELPERS=y
CONFIG_FB_TILEBLITTING=y
CONFIG_FB_CIRRUS=m
CONFIG_FB_MATROX_I2C=m
CONFIG_FB_MATROX_MAVEN=m
CONFIG_FB_MATROX_MULTIHEAD=y
-# CONFIG_FB_RADEON_OLD is not set
CONFIG_FB_RADEON=m
CONFIG_FB_RADEON_I2C=y
# CONFIG_FB_RADEON_DEBUG is not set
CONFIG_FB_TRIDENT=m
CONFIG_FB_TRIDENT_ACCEL=y
CONFIG_FB_GEODE=y
+CONFIG_FB_GEODE_GX=m
CONFIG_FB_GEODE_GX1=m
CONFIG_FB_VIRTUAL=m
# Console display driver support
#
CONFIG_VGA_CONSOLE=y
+# CONFIG_VGACON_SOFT_SCROLLBACK is not set
CONFIG_DUMMY_CONSOLE=y
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y
CONFIG_SND_OSSEMUL=y
CONFIG_SND_MIXER_OSS=m
CONFIG_SND_PCM_OSS=m
+CONFIG_SND_PCM_OSS_PLUGINS=y
CONFIG_SND_SEQUENCER_OSS=y
CONFIG_SND_RTCTIMER=m
CONFIG_SND_SEQ_RTCTIMER_DEFAULT=y
CONFIG_SND_DYNAMIC_MINORS=y
CONFIG_SND_SUPPORT_OLD_API=y
+CONFIG_SND_VERBOSE_PROCFS=y
# CONFIG_SND_VERBOSE_PRINTK is not set
# CONFIG_SND_DEBUG is not set
# PCI devices
#
CONFIG_SND_AD1889=m
+CONFIG_SND_ALS300=m
CONFIG_SND_ALS4000=m
CONFIG_SND_ALI5451=m
CONFIG_SND_ATIIXP=m
CONFIG_SND_ES1938=m
CONFIG_SND_ES1968=m
CONFIG_SND_FM801=m
-# CONFIG_SND_FM801_TEA575X_BOOL is not set
CONFIG_SND_FM801_TEA575X=m
CONFIG_SND_HDA_INTEL=m
CONFIG_SND_HDSP=m
CONFIG_SND_MIXART=m
CONFIG_SND_NM256=m
CONFIG_SND_PCXHR=m
+CONFIG_SND_RIPTIDE=m
CONFIG_SND_RME32=m
CONFIG_SND_RME96=m
CONFIG_SND_RME9652=m
#
# PCMCIA devices
#
+CONFIG_SND_VXPOCKET=m
+CONFIG_SND_PDAUDIOCF=m
#
# Open Sound System
#
CONFIG_USB_ARCH_HAS_HCD=y
CONFIG_USB_ARCH_HAS_OHCI=y
+CONFIG_USB_ARCH_HAS_EHCI=y
CONFIG_USB=m
# CONFIG_USB_DEBUG is not set
#
# USB Device Class drivers
#
-# CONFIG_OBSOLETE_OSS_USB_DRIVER is not set
CONFIG_USB_ACM=m
CONFIG_USB_PRINTER=m
CONFIG_USB_ACECAD=m
CONFIG_USB_KBTAB=m
CONFIG_USB_POWERMATE=m
-CONFIG_USB_MTOUCH=m
-CONFIG_USB_ITMTOUCH=m
-CONFIG_USB_EGALAX=m
+CONFIG_USB_TOUCHSCREEN=m
+CONFIG_USB_TOUCHSCREEN_EGALAX=y
+CONFIG_USB_TOUCHSCREEN_PANJIT=y
+CONFIG_USB_TOUCHSCREEN_3M=y
+CONFIG_USB_TOUCHSCREEN_ITM=y
CONFIG_USB_YEALINK=m
CONFIG_USB_XPAD=m
CONFIG_USB_ATI_REMOTE=m
CONFIG_USB_MDC800=m
CONFIG_USB_MICROTEK=m
-#
-# USB Multimedia devices
-#
-CONFIG_USB_DABUSB=m
-CONFIG_USB_VICAM=m
-CONFIG_USB_DSBR=m
-CONFIG_USB_ET61X251=m
-CONFIG_USB_IBMCAM=m
-CONFIG_USB_KONICAWC=m
-CONFIG_USB_OV511=m
-CONFIG_USB_SE401=m
-CONFIG_USB_SN9C102=m
-CONFIG_USB_STV680=m
-CONFIG_USB_W9968CF=m
-CONFIG_USB_PWC=m
-
#
# USB Network Adapters
#
CONFIG_USB_SERIAL_GENERIC=y
CONFIG_USB_SERIAL_AIRPRIME=m
CONFIG_USB_SERIAL_ANYDATA=m
+CONFIG_USB_SERIAL_ARK3116=m
CONFIG_USB_SERIAL_BELKIN=m
CONFIG_USB_SERIAL_WHITEHEAT=m
CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m
CONFIG_USB_SERIAL_CYPRESS_M8=m
CONFIG_USB_SERIAL_EMPEG=m
CONFIG_USB_SERIAL_FTDI_SIO=m
+CONFIG_USB_SERIAL_FUNSOFT=m
CONFIG_USB_SERIAL_VISOR=m
CONFIG_USB_SERIAL_IPAQ=m
CONFIG_USB_SERIAL_IR=m
CONFIG_USB_SERIAL_KLSI=m
CONFIG_USB_SERIAL_KOBIL_SCT=m
CONFIG_USB_SERIAL_MCT_U232=m
+CONFIG_USB_SERIAL_NAVMAN=m
CONFIG_USB_SERIAL_PL2303=m
CONFIG_USB_SERIAL_HP4X=m
CONFIG_USB_SERIAL_SAFE=m
CONFIG_MMC=m
# CONFIG_MMC_DEBUG is not set
CONFIG_MMC_BLOCK=m
+CONFIG_MMC_SDHCI=m
CONFIG_MMC_WBSD=m
+#
+# LED devices
+#
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=m
+
+#
+# LED drivers
+#
+
+#
+# LED Triggers
+#
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_TIMER=m
+CONFIG_LEDS_TRIGGER_IDE_DISK=y
+
#
# InfiniBand support
#
CONFIG_INFINIBAND_USER_MAD=m
CONFIG_INFINIBAND_USER_ACCESS=m
CONFIG_INFINIBAND_MTHCA=m
-# CONFIG_INFINIBAND_MTHCA_DEBUG is not set
+CONFIG_INFINIBAND_MTHCA_DEBUG=y
CONFIG_INFINIBAND_IPOIB=m
-# CONFIG_INFINIBAND_IPOIB_DEBUG is not set
+CONFIG_INFINIBAND_IPOIB_DEBUG=y
+# CONFIG_INFINIBAND_IPOIB_DEBUG_DATA is not set
CONFIG_INFINIBAND_SRP=m
#
#
# CONFIG_EDAC_DEBUG is not set
CONFIG_EDAC_MM_EDAC=m
-CONFIG_EDAC_E7XXX=m
CONFIG_EDAC_E752X=m
-CONFIG_EDAC_I82875P=m
-CONFIG_EDAC_I82860=m
-CONFIG_EDAC_R82600=m
CONFIG_EDAC_POLL=y
+#
+# Real Time Clock
+#
+CONFIG_RTC_LIB=m
+CONFIG_RTC_CLASS=m
+
+#
+# RTC interfaces
+#
+CONFIG_RTC_INTF_SYSFS=m
+CONFIG_RTC_INTF_PROC=m
+CONFIG_RTC_INTF_DEV=m
+
+#
+# RTC drivers
+#
+CONFIG_RTC_DRV_X1205=m
+CONFIG_RTC_DRV_DS1672=m
+CONFIG_RTC_DRV_PCF8563=m
+CONFIG_RTC_DRV_RS5C372=m
+CONFIG_RTC_DRV_M48T86=m
+CONFIG_RTC_DRV_TEST=m
+
#
# Firmware Drivers
#
CONFIG_TMPFS=y
# CONFIG_HUGETLB_PAGE is not set
CONFIG_RAMFS=y
-CONFIG_RELAYFS_FS=m
CONFIG_CONFIGFS_FS=m
#
# CONFIG_DEBUG_FS is not set
# CONFIG_DEBUG_VM is not set
# CONFIG_FRAME_POINTER is not set
+# CONFIG_UNWIND_INFO is not set
CONFIG_FORCED_INLINING=y
# CONFIG_RCU_TORTURE_TEST is not set
# CONFIG_DEBUG_RODATA is not set
# Hardware crypto devices
#
CONFIG_XEN=y
-CONFIG_XEN_INTERFACE_VERSION=0x00030202
+CONFIG_XEN_INTERFACE_VERSION=0x00030203
#
# XEN
#
CONFIG_XEN_PRIVILEGED_GUEST=y
# CONFIG_XEN_UNPRIVILEGED_GUEST is not set
+CONFIG_XEN_PRIVCMD=y
+CONFIG_XEN_XENBUS_DEV=y
CONFIG_XEN_BACKEND=y
-CONFIG_XEN_PCIDEV_BACKEND=m
-# CONFIG_XEN_PCIDEV_BACKEND_VPCI is not set
-CONFIG_XEN_PCIDEV_BACKEND_PASS=y
-# CONFIG_XEN_PCIDEV_BACKEND_SLOT is not set
-# CONFIG_XEN_PCIDEV_BE_DEBUG is not set
CONFIG_XEN_BLKDEV_BACKEND=y
CONFIG_XEN_BLKDEV_TAP=y
CONFIG_XEN_NETDEV_BACKEND=y
# CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER is not set
CONFIG_XEN_NETDEV_LOOPBACK=y
+CONFIG_XEN_PCIDEV_BACKEND=m
+# CONFIG_XEN_PCIDEV_BACKEND_VPCI is not set
+CONFIG_XEN_PCIDEV_BACKEND_PASS=y
+# CONFIG_XEN_PCIDEV_BACKEND_SLOT is not set
+# CONFIG_XEN_PCIDEV_BE_DEBUG is not set
CONFIG_XEN_TPMDEV_BACKEND=m
-# CONFIG_XEN_TPMDEV_CLOSE_IF_VTPM_FAILS is not set
CONFIG_XEN_BLKDEV_FRONTEND=y
CONFIG_XEN_NETDEV_FRONTEND=y
CONFIG_XEN_FRAMEBUFFER=y
CONFIG_XEN_COMPAT_030002=y
CONFIG_HAVE_ARCH_ALLOC_SKB=y
CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
+CONFIG_HAVE_IRQ_IGNORE_UNHANDLED=y
CONFIG_NO_IDLE_HZ=y
+CONFIG_XEN_UTIL=y
+CONFIG_XEN_BALLOON=y
+CONFIG_XEN_DEVMEM=y
+CONFIG_XEN_SKBUFF=y
+CONFIG_XEN_REBOOT=y
+CONFIG_XEN_SMPBOOT=y
#
# Library routines
LINUX_SERIES = 2.6
-LINUX_VER = 2.6.16.33
+LINUX_VER = 2.6.17
EXTRAVERSION ?= xen
bool
default y
+config GENERIC_HWEIGHT
+ bool
+ default y
+
config ARCH_MAY_HAVE_PC_FDC
bool
default y
menu "Processor type and features"
+config SMP
+ bool "Symmetric multi-processing support"
+ ---help---
+ This enables support for systems with more than one CPU. If you have
+ a system with only one CPU, like most personal computers, say N. If
+ you have a system with more than one CPU, say Y.
+
+ If you say N here, the kernel will run on single and multiprocessor
+ machines, but will use only one CPU of a multiprocessor machine. If
+ you say Y here, the kernel will run on many, but not all,
+ singleprocessor machines. On a singleprocessor machine, the kernel
+ will run faster if you say N here.
+
+ Note that if you say Y here and choose architecture "586" or
+ "Pentium" under "Processor family", the kernel will not work on 486
+ architectures. Similarly, multiprocessor kernels for the "PPro"
+ architecture may not work on all Pentium based boards.
+
+ People using multiprocessor machines who say Y here should also say
+ Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
+ Management" code will be disabled if you say Y here.
+
+ See also the <file:Documentation/smp.txt>,
+ <file:Documentation/i386/IO-APIC.txt>,
+ <file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at
+ <http://www.tldp.org/docs.html#howto>.
+
+ If you don't know what to do here, say N.
+
choice
prompt "Subarchitecture Type"
default X86_PC
config X86_NUMAQ
bool "NUMAQ (IBM/Sequent)"
+ select SMP
select NUMA
help
This option is used for getting Linux to run on a (IBM/Sequent) NUMA
depends on HPET_TIMER && RTC=y
default y
-config SMP
- bool "Symmetric multi-processing support"
- ---help---
- This enables support for systems with more than one CPU. If you have
- a system with only one CPU, like most personal computers, say N. If
- you have a system with more than one CPU, say Y.
-
- If you say N here, the kernel will run on single and multiprocessor
- machines, but will use only one CPU of a multiprocessor machine. If
- you say Y here, the kernel will run on many, but not all,
- singleprocessor machines. On a singleprocessor machine, the kernel
- will run faster if you say N here.
-
- Note that if you say Y here and choose architecture "586" or
- "Pentium" under "Processor family", the kernel will not work on 486
- architectures. Similarly, multiprocessor kernels for the "PPro"
- architecture may not work on all Pentium based boards.
-
- People using multiprocessor machines who say Y here should also say
- Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
- Management" code will be disabled if you say Y here.
-
- See also the <file:Documentation/smp.txt>,
- <file:Documentation/i386/IO-APIC.txt>,
- <file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at
- <http://www.tldp.org/docs.html#howto>.
-
- If you don't know what to do here, say N.
-
-config SMP_ALTERNATIVES
- bool "SMP alternatives support (EXPERIMENTAL)"
- depends on SMP && EXPERIMENTAL
- help
- Try to reduce the overhead of running an SMP kernel on a uniprocessor
- host slightly by replacing certain key instruction sequences
- according to whether we currently have more than one CPU available.
- This should provide a noticeable boost to performance when
- running SMP kernels on UP machines, and have negligible impact
- when running on an true SMP host.
-
- If unsure, say N.
-
config NR_CPUS
int "Maximum number of CPUs (2-255)"
range 2 255
cost of slightly increased overhead in some places. If unsure say
N here.
+config SCHED_MC
+ bool "Multi-core scheduler support"
+ depends on SMP
+ default y
+ help
+ Multi-core scheduler support improves the CPU scheduler's decision
+ making when dealing with multi-core CPU chips at a cost of slightly
+ increased overhead in some places. If unsure say N here.
+
source "kernel/Kconfig.preempt"
config X86_UP_APIC
config NOHIGHMEM
bool "off"
+ depends on !X86_NUMAQ
---help---
Linux can use up to 64 Gigabytes of physical memory on x86 systems.
However, the address space of 32-bit x86 processors is only 4
config HIGHMEM4G
bool "4GB"
+ depends on !X86_NUMAQ
help
Select this if you have a 32-bit processor and between 1 and 4
gigabytes of physical RAM.
choice
depends on EXPERIMENTAL && !X86_PAE
- prompt "Memory split"
+ prompt "Memory split" if EMBEDDED
default VMSPLIT_3G
help
Select the desired split between kernel and user memory.
default n if X86_PC
default y if (X86_NUMAQ || X86_SUMMIT)
-# Need comments to help the hapless user trying to turn on NUMA support
-comment "NUMA (NUMA-Q) requires SMP, 64GB highmem support"
- depends on X86_NUMAQ && (!HIGHMEM64G || !SMP)
-
comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI"
depends on X86_SUMMIT && (!HIGHMEM64G || !ACPI)
+config NODES_SHIFT
+ int
+ default "4" if X86_NUMAQ
+ default "3"
+ depends on NEED_MULTIPLE_NODES
+
config HAVE_ARCH_BOOTMEM_NODE
bool
depends on NUMA
default y
config REGPARM
- bool "Use register arguments (EXPERIMENTAL)"
- depends on EXPERIMENTAL
- default n
+ bool "Use register arguments"
+ default y
help
- Compile the kernel with -mregparm=3. This uses a different ABI
- and passes the first three arguments of a function call in registers.
- This will probably break binary only modules.
+ Compile the kernel with -mregparm=3. This instructs gcc to use
+ a more efficient function call ABI which passes the first three
+ arguments of a function call via registers, which results in denser
+ and faster code.
+
+ If this option is disabled, then the default ABI of passing
+ arguments via the stack is used.
+
+ If unsure, say Y.
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
depends on SMP && HOTPLUG && EXPERIMENTAL && !X86_VOYAGER
---help---
- Say Y here to experiment with turning CPUs off and on. CPUs
- can be controlled through /sys/devices/system/cpu.
-
- Say N.
-
-config DOUBLEFAULT
- default y
- bool "Enable doublefault exception handler" if EMBEDDED
- depends on !X86_NO_TSS
- help
- This option allows trapping of rare doublefault exceptions that
- would otherwise cause a system to silently reboot. Disabling this
- option saves about 4k and might cause you much additional grey
- hair.
+ Say Y here to experiment with turning CPUs off and on, and to
+ enable suspend on SMP systems. CPUs can be controlled through
+ /sys/devices/system/cpu.
endmenu
-config ARCH_ENABLE_MEMORY_HOTPLUG
- def_bool y
- depends on HIGHMEM
menu "Power management options (ACPI, APM)"
depends on !(X86_VOYAGER || XEN_UNPRIVILEGED_GUEST)
config X86_TSC
bool
- depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1) && !X86_NUMAQ
+ depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1 || MGEODE_LX) && !X86_NUMAQ
default y
--- /dev/null
+menu "Kernel hacking"
+
+source "lib/Kconfig.debug"
+
+config EARLY_PRINTK
+ bool "Early printk" if EMBEDDED && DEBUG_KERNEL
+ default y
+ help
+ Write kernel log output directly into the VGA buffer or to a serial
+ port.
+
+ This is useful for kernel debugging when your machine crashes very
+ early before the console code is initialized. For normal operation
+ it is not recommended because it looks ugly and doesn't cooperate
+ with klogd/syslogd or the X server. You should normally N here,
+ unless you want to debug such a crash.
+
+config DEBUG_STACKOVERFLOW
+ bool "Check for stack overflows"
+ depends on DEBUG_KERNEL
+ help
+ This option will cause messages to be printed if free stack space
+ drops below a certain limit.
+
+config DEBUG_STACK_USAGE
+ bool "Stack utilization instrumentation"
+ depends on DEBUG_KERNEL
+ help
+ Enables the display of the minimum amount of free stack which each
+ task has ever had available in the sysrq-T and sysrq-P debug output.
+
+ This option will slow down process creation somewhat.
+
+config STACK_BACKTRACE_COLS
+ int "Stack backtraces per line" if DEBUG_KERNEL
+ range 1 3
+ default 2
+ help
+ Selects how many stack backtrace entries per line to display.
+
+ This can save screen space when displaying traces.
+
+comment "Page alloc debug is incompatible with Software Suspend on i386"
+ depends on DEBUG_KERNEL && SOFTWARE_SUSPEND
+
+config DEBUG_PAGEALLOC
+ bool "Debug page memory allocations"
+ depends on DEBUG_KERNEL && !SOFTWARE_SUSPEND && !HUGETLBFS
+ help
+ Unmap pages from the kernel linear mapping after free_pages().
+ This results in a large slowdown, but helps to find certain types
+ of memory corruptions.
+
+config DEBUG_RODATA
+ bool "Write protect kernel read-only data structures"
+ depends on DEBUG_KERNEL
+ help
+ Mark the kernel read-only data as write-protected in the pagetables,
+ in order to catch accidental (and incorrect) writes to such const
+ data. This option may have a slight performance impact because a
+ portion of the kernel code won't be covered by a 2MB TLB anymore.
+ If in doubt, say "N".
+
+config 4KSTACKS
+ bool "Use 4Kb for kernel stacks instead of 8Kb"
+ depends on DEBUG_KERNEL
+ help
+ If you say Y here the kernel will use a 4Kb stacksize for the
+ kernel stack attached to each process/thread. This facilitates
+ running more threads on a system and also reduces the pressure
+ on the VM subsystem for higher order allocations. This option
+ will also use IRQ stacks to compensate for the reduced stackspace.
+
+config X86_FIND_SMP_CONFIG
+ bool
+ depends on X86_LOCAL_APIC || X86_VOYAGER
+ default y
+
+config X86_MPPARSE
+ bool
+ depends on X86_LOCAL_APIC && !X86_VISWS
+ default y
+
+config DOUBLEFAULT
+ default y
+ bool "Enable doublefault exception handler" if EMBEDDED
+ depends on !X86_NO_TSS
+ help
+ This option allows trapping of rare doublefault exceptions that
+ would otherwise cause a system to silently reboot. Disabling this
+ option saves about 4k and might cause you much additional grey
+ hair.
+
+endmenu
cflags-$(CONFIG_REGPARM) += -mregparm=3
+# temporary until string.h is fixed
+cflags-y += -ffreestanding
+
# Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use
# a lot more stack due to the lack of sharing of stacklots:
CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then echo $(call cc-option,-fno-unit-at-a-time); fi ;)
boot := arch/i386/boot
-.PHONY: zImage bzImage compressed zlilo bzlilo \
- zdisk bzdisk fdimage fdimage144 fdimage288 install
+PHONY += zImage bzImage compressed zlilo bzlilo \
+ zdisk bzdisk fdimage fdimage144 fdimage288 isoimage install
ifdef CONFIG_XEN
CPPFLAGS := -Iinclude$(if $(KBUILD_SRC),2)/asm/mach-xen $(CPPFLAGS)
zdisk bzdisk: vmlinux
$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) zdisk
-fdimage fdimage144 fdimage288: vmlinux
+fdimage fdimage144 fdimage288 isoimage: vmlinux
$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@
install:
echo ' install to $$(INSTALL_PATH) and run lilo'
echo ' bzdisk - Create a boot floppy in /dev/fd0'
echo ' fdimage - Create a boot floppy image'
+ echo ' isoimage - Create a boot CD-ROM image'
endef
-CLEAN_FILES += arch/$(ARCH)/boot/fdimage arch/$(ARCH)/boot/mtools.conf
+CLEAN_FILES += arch/$(ARCH)/boot/fdimage \
+ arch/$(ARCH)/boot/image.iso \
+ arch/$(ARCH)/boot/mtools.conf
CLEAN_FILES += vmlinuz vmlinux-stripped
obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \
ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
- pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \
- quirks.o i8237.o topology.o
+ pci-dma.o i386_ksyms.o i387.o bootflag.o \
+ quirks.o i8237.o topology.o alternative.o
obj-y += cpu/
obj-y += timers/
obj-$(CONFIG_DOUBLEFAULT) += doublefault.o
obj-$(CONFIG_VM86) += vm86.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
-obj-$(CONFIG_SMP_ALTERNATIVES) += smpalts.o
EXTRA_AFLAGS := -traditional
unsigned long rsdp_phys = 0;
if (efi_enabled) {
- if (efi.acpi20)
- return __pa(efi.acpi20);
- else if (efi.acpi)
- return __pa(efi.acpi);
+ if (efi.acpi20 != EFI_INVALID_TABLE_ADDR)
+ return efi.acpi20;
+ else if (efi.acpi != EFI_INVALID_TABLE_ADDR)
+ return efi.acpi;
}
/*
* Scan memory looking for the RSDP signature. First search EBDA (low
{
int count;
+ if (!cpu_has_apic)
+ return -ENODEV;
+
/*
* Note that the LAPIC address is obtained from the MADT (32-bit value)
* and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
return -ENODEV;
}
+ if (!cpu_has_apic)
+ return -ENODEV;
+
/*
* if "noapic" boot option, don't look for IO-APICs
*/
#include <asm/i8253.h>
#include <mach_apic.h>
+#include <mach_apicdef.h>
#include <mach_ipi.h>
#include "io_ports.h"
*/
int apic_verbosity;
+int modern_apic(void)
+{
+ unsigned int lvr, version;
+ /* AMD systems use old APIC versions, so check the CPU */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
+ boot_cpu_data.x86 >= 0xf)
+ return 1;
+ lvr = apic_read(APIC_LVR);
+ version = GET_APIC_VERSION(lvr);
+ return version >= 0x14;
+}
+
/*
* 'what should we do if we get a hw irq event on an illegal vector'.
* each architecture has to answer this themselves.
#include <asm/mpspec.h>
#include <asm/apic.h>
#include <mach_apic.h>
+#else
+#ifdef CONFIG_XEN
+#define phys_pkg_id(a,b) a
+#endif
#endif
#include <asm/hypervisor.h>
EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
#endif
-static int cachesize_override __devinitdata = -1;
-static int disable_x86_fxsr __devinitdata = 0;
-static int disable_x86_serial_nr __devinitdata = 1;
+static int cachesize_override __cpuinitdata = -1;
+static int disable_x86_fxsr __cpuinitdata;
+static int disable_x86_serial_nr __cpuinitdata = 1;
+static int disable_x86_sep __cpuinitdata;
struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
}
__setup("cachesize=", cachesize_setup);
-int __devinit get_model_name(struct cpuinfo_x86 *c)
+int __cpuinit get_model_name(struct cpuinfo_x86 *c)
{
unsigned int *v;
char *p, *q;
}
-void __devinit display_cacheinfo(struct cpuinfo_x86 *c)
+void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
{
unsigned int n, dummy, ecx, edx, l2size;
/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */
/* Look up CPU names by table lookup. */
-static char __devinit *table_lookup_model(struct cpuinfo_x86 *c)
+static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
{
struct cpu_model_info *info;
}
-static void __devinit get_cpu_vendor(struct cpuinfo_x86 *c, int early)
+static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early)
{
char *v = c->x86_vendor_id;
int i;
__setup("nofxsr", x86_fxsr_setup);
+static int __init x86_sep_setup(char * s)
+{
+ disable_x86_sep = 1;
+ return 1;
+}
+__setup("nosep", x86_sep_setup);
+
+
/* Standard macro to see if a specific flag is changeable */
static inline int flag_is_changeable_p(u32 flag)
{
/* Probe for the CPUID instruction */
-static int __devinit have_cpuid_p(void)
+static int __cpuinit have_cpuid_p(void)
{
return flag_is_changeable_p(X86_EFLAGS_ID);
}
}
}
-void __devinit generic_identify(struct cpuinfo_x86 * c)
+void __cpuinit generic_identify(struct cpuinfo_x86 * c)
{
u32 tfms, xlvl;
- int junk;
+ int ebx;
if (have_cpuid_p()) {
/* Get vendor name */
/* Intel-defined flags: level 0x00000001 */
if ( c->cpuid_level >= 0x00000001 ) {
u32 capability, excap;
- cpuid(0x00000001, &tfms, &junk, &excap, &capability);
+ cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
c->x86_capability[0] = capability;
c->x86_capability[4] = excap;
c->x86 = (tfms >> 8) & 15;
if (c->x86 >= 0x6)
c->x86_model += ((tfms >> 16) & 0xF) << 4;
c->x86_mask = tfms & 15;
+#ifdef CONFIG_SMP
+ c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
+#else
+ c->apicid = (ebx >> 24) & 0xFF;
+#endif
} else {
/* Have CPUID level 0 only - unheard of */
c->x86 = 4;
#endif
}
-static void __devinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
+static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
{
if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) {
/* Disable processor serial number */
/*
* This does the hard work of actually picking apart the CPU stuff...
*/
-void __devinit identify_cpu(struct cpuinfo_x86 *c)
+void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
{
int i;
clear_bit(X86_FEATURE_XMM, c->x86_capability);
}
+ /* SEP disabled? */
+ if (disable_x86_sep)
+ clear_bit(X86_FEATURE_SEP, c->x86_capability);
+
if (disable_pse)
clear_bit(X86_FEATURE_PSE, c->x86_capability);
else
/* Last resort... */
sprintf(c->x86_model_id, "%02x/%02x",
- c->x86_vendor, c->x86_model);
+ c->x86, c->x86_model);
}
/* Now the feature flags better reflect actual CPU features! */
}
#ifdef CONFIG_X86_HT
-void __devinit detect_ht(struct cpuinfo_x86 *c)
+void __cpuinit detect_ht(struct cpuinfo_x86 *c)
{
u32 eax, ebx, ecx, edx;
int index_msb, core_bits;
cpuid(1, &eax, &ebx, &ecx, &edx);
- c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
return;
}
#endif
-void __devinit print_cpu_info(struct cpuinfo_x86 *c)
+void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
{
char *vendor = NULL;
}
#ifdef CONFIG_HOTPLUG_CPU
-void __devinit cpu_uninit(void)
+void __cpuinit cpu_uninit(void)
{
int cpu = raw_smp_processor_id();
cpu_clear(cpu, cpu_initialized);
--- /dev/null
+/*
+ * Routines to indentify caches on Intel CPU.
+ *
+ * Changes:
+ * Venkatesh Pallipadi : Adding cache identification through cpuid(4)
+ * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
+ */
+
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/compiler.h>
+#include <linux/cpu.h>
+#include <linux/sched.h>
+
+#include <asm/processor.h>
+#include <asm/smp.h>
+
+#define LVL_1_INST 1
+#define LVL_1_DATA 2
+#define LVL_2 3
+#define LVL_3 4
+#define LVL_TRACE 5
+
+struct _cache_table
+{
+ unsigned char descriptor;
+ char cache_type;
+ short size;
+};
+
+/* all the cache descriptor types we care about (no TLB or trace cache entries) */
+static struct _cache_table cache_table[] __cpuinitdata =
+{
+ { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */
+ { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */
+ { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */
+ { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */
+ { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
+ { 0x23, LVL_3, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */
+ { 0x25, LVL_3, 2048 }, /* 8-way set assoc, sectored cache, 64 byte line size */
+ { 0x29, LVL_3, 4096 }, /* 8-way set assoc, sectored cache, 64 byte line size */
+ { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */
+ { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */
+ { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */
+ { 0x3a, LVL_2, 192 }, /* 6-way set assoc, sectored cache, 64 byte line size */
+ { 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */
+ { 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */
+ { 0x3d, LVL_2, 384 }, /* 6-way set assoc, sectored cache, 64 byte line size */
+ { 0x3e, LVL_2, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
+ { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */
+ { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */
+ { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */
+ { 0x44, LVL_2, 1024 }, /* 4-way set assoc, 32 byte line size */
+ { 0x45, LVL_2, 2048 }, /* 4-way set assoc, 32 byte line size */
+ { 0x46, LVL_3, 4096 }, /* 4-way set assoc, 64 byte line size */
+ { 0x47, LVL_3, 8192 }, /* 8-way set assoc, 64 byte line size */
+ { 0x49, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */
+ { 0x4a, LVL_3, 6144 }, /* 12-way set assoc, 64 byte line size */
+ { 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */
+ { 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size */
+ { 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size */
+ { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */
+ { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */
+ { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */
+ { 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */
+ { 0x70, LVL_TRACE, 12 }, /* 8-way set assoc */
+ { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */
+ { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */
+ { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */
+ { 0x78, LVL_2, 1024 }, /* 4-way set assoc, 64 byte line size */
+ { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */
+ { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */
+ { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */
+ { 0x7c, LVL_2, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */
+ { 0x7d, LVL_2, 2048 }, /* 8-way set assoc, 64 byte line size */
+ { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */
+ { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */
+ { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */
+ { 0x84, LVL_2, 1024 }, /* 8-way set assoc, 32 byte line size */
+ { 0x85, LVL_2, 2048 }, /* 8-way set assoc, 32 byte line size */
+ { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */
+ { 0x87, LVL_2, 1024 }, /* 8-way set assoc, 64 byte line size */
+ { 0x00, 0, 0}
+};
+
+
+enum _cache_type
+{
+ CACHE_TYPE_NULL = 0,
+ CACHE_TYPE_DATA = 1,
+ CACHE_TYPE_INST = 2,
+ CACHE_TYPE_UNIFIED = 3
+};
+
+union _cpuid4_leaf_eax {
+ struct {
+ enum _cache_type type:5;
+ unsigned int level:3;
+ unsigned int is_self_initializing:1;
+ unsigned int is_fully_associative:1;
+ unsigned int reserved:4;
+ unsigned int num_threads_sharing:12;
+ unsigned int num_cores_on_die:6;
+ } split;
+ u32 full;
+};
+
+union _cpuid4_leaf_ebx {
+ struct {
+ unsigned int coherency_line_size:12;
+ unsigned int physical_line_partition:10;
+ unsigned int ways_of_associativity:10;
+ } split;
+ u32 full;
+};
+
+union _cpuid4_leaf_ecx {
+ struct {
+ unsigned int number_of_sets:32;
+ } split;
+ u32 full;
+};
+
+struct _cpuid4_info {
+ union _cpuid4_leaf_eax eax;
+ union _cpuid4_leaf_ebx ebx;
+ union _cpuid4_leaf_ecx ecx;
+ unsigned long size;
+ cpumask_t shared_cpu_map;
+};
+
+static unsigned short num_cache_leaves;
+
+static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
+{
+ unsigned int eax, ebx, ecx, edx;
+ union _cpuid4_leaf_eax cache_eax;
+
+ cpuid_count(4, index, &eax, &ebx, &ecx, &edx);
+ cache_eax.full = eax;
+ if (cache_eax.split.type == CACHE_TYPE_NULL)
+ return -EIO; /* better error ? */
+
+ this_leaf->eax.full = eax;
+ this_leaf->ebx.full = ebx;
+ this_leaf->ecx.full = ecx;
+ this_leaf->size = (this_leaf->ecx.split.number_of_sets + 1) *
+ (this_leaf->ebx.split.coherency_line_size + 1) *
+ (this_leaf->ebx.split.physical_line_partition + 1) *
+ (this_leaf->ebx.split.ways_of_associativity + 1);
+ return 0;
+}
+
+/* will only be called once; __init is safe here */
+static int __init find_num_cache_leaves(void)
+{
+ unsigned int eax, ebx, ecx, edx;
+ union _cpuid4_leaf_eax cache_eax;
+ int i = -1;
+
+ do {
+ ++i;
+ /* Do cpuid(4) loop to find out num_cache_leaves */
+ cpuid_count(4, i, &eax, &ebx, &ecx, &edx);
+ cache_eax.full = eax;
+ } while (cache_eax.split.type != CACHE_TYPE_NULL);
+ return i;
+}
+
+unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
+{
+ unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */
+ unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
+ unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
+ unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
+#ifdef CONFIG_SMP
+#ifndef CONFIG_XEN
+ unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data);
+#endif
+#endif
+
+ if (c->cpuid_level > 3) {
+ static int is_initialized;
+
+ if (is_initialized == 0) {
+ /* Init num_cache_leaves from boot CPU */
+ num_cache_leaves = find_num_cache_leaves();
+ is_initialized++;
+ }
+
+ /*
+ * Whenever possible use cpuid(4), deterministic cache
+ * parameters cpuid leaf to find the cache details
+ */
+ for (i = 0; i < num_cache_leaves; i++) {
+ struct _cpuid4_info this_leaf;
+
+ int retval;
+
+ retval = cpuid4_cache_lookup(i, &this_leaf);
+ if (retval >= 0) {
+ switch(this_leaf.eax.split.level) {
+ case 1:
+ if (this_leaf.eax.split.type ==
+ CACHE_TYPE_DATA)
+ new_l1d = this_leaf.size/1024;
+ else if (this_leaf.eax.split.type ==
+ CACHE_TYPE_INST)
+ new_l1i = this_leaf.size/1024;
+ break;
+ case 2:
+ new_l2 = this_leaf.size/1024;
+ num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
+ index_msb = get_count_order(num_threads_sharing);
+ l2_id = c->apicid >> index_msb;
+ break;
+ case 3:
+ new_l3 = this_leaf.size/1024;
+ num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
+ index_msb = get_count_order(num_threads_sharing);
+ l3_id = c->apicid >> index_msb;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ }
+ /*
+ * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
+ * trace cache
+ */
+ if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
+ /* supports eax=2 call */
+ int i, j, n;
+ int regs[4];
+ unsigned char *dp = (unsigned char *)regs;
+ int only_trace = 0;
+
+ if (num_cache_leaves != 0 && c->x86 == 15)
+ only_trace = 1;
+
+ /* Number of times to iterate */
+ n = cpuid_eax(2) & 0xFF;
+
+ for ( i = 0 ; i < n ; i++ ) {
+ cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]);
+
+ /* If bit 31 is set, this is an unknown format */
+ for ( j = 0 ; j < 3 ; j++ ) {
+ if ( regs[j] < 0 ) regs[j] = 0;
+ }
+
+ /* Byte 0 is level count, not a descriptor */
+ for ( j = 1 ; j < 16 ; j++ ) {
+ unsigned char des = dp[j];
+ unsigned char k = 0;
+
+ /* look up this descriptor in the table */
+ while (cache_table[k].descriptor != 0)
+ {
+ if (cache_table[k].descriptor == des) {
+ if (only_trace && cache_table[k].cache_type != LVL_TRACE)
+ break;
+ switch (cache_table[k].cache_type) {
+ case LVL_1_INST:
+ l1i += cache_table[k].size;
+ break;
+ case LVL_1_DATA:
+ l1d += cache_table[k].size;
+ break;
+ case LVL_2:
+ l2 += cache_table[k].size;
+ break;
+ case LVL_3:
+ l3 += cache_table[k].size;
+ break;
+ case LVL_TRACE:
+ trace += cache_table[k].size;
+ break;
+ }
+
+ break;
+ }
+
+ k++;
+ }
+ }
+ }
+ }
+
+ if (new_l1d)
+ l1d = new_l1d;
+
+ if (new_l1i)
+ l1i = new_l1i;
+
+ if (new_l2) {
+ l2 = new_l2;
+#ifdef CONFIG_SMP
+#ifndef CONFIG_XEN
+ cpu_llc_id[cpu] = l2_id;
+#endif
+#endif
+ }
+
+ if (new_l3) {
+ l3 = new_l3;
+#ifdef CONFIG_SMP
+#ifndef CONFIG_XEN
+ cpu_llc_id[cpu] = l3_id;
+#endif
+#endif
+ }
+
+ if (trace)
+ printk (KERN_INFO "CPU: Trace cache: %dK uops", trace);
+ else if ( l1i )
+ printk (KERN_INFO "CPU: L1 I cache: %dK", l1i);
+
+ if (l1d)
+ printk(", L1 D cache: %dK\n", l1d);
+ else
+ printk("\n");
+
+ if (l2)
+ printk(KERN_INFO "CPU: L2 cache: %dK\n", l2);
+
+ if (l3)
+ printk(KERN_INFO "CPU: L3 cache: %dK\n", l3);
+
+ c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
+
+ return l2;
+}
+
+/* pointer to _cpuid4_info array (for each cache leaf) */
+static struct _cpuid4_info *cpuid4_info[NR_CPUS];
+#define CPUID4_INFO_IDX(x,y) (&((cpuid4_info[x])[y]))
+
+#ifdef CONFIG_SMP
+static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
+{
+ struct _cpuid4_info *this_leaf, *sibling_leaf;
+ unsigned long num_threads_sharing;
+ int index_msb, i;
+ struct cpuinfo_x86 *c = cpu_data;
+
+ this_leaf = CPUID4_INFO_IDX(cpu, index);
+ num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
+
+ if (num_threads_sharing == 1)
+ cpu_set(cpu, this_leaf->shared_cpu_map);
+ else {
+ index_msb = get_count_order(num_threads_sharing);
+
+ for_each_online_cpu(i) {
+ if (c[i].apicid >> index_msb ==
+ c[cpu].apicid >> index_msb) {
+ cpu_set(i, this_leaf->shared_cpu_map);
+ if (i != cpu && cpuid4_info[i]) {
+ sibling_leaf = CPUID4_INFO_IDX(i, index);
+ cpu_set(cpu, sibling_leaf->shared_cpu_map);
+ }
+ }
+ }
+ }
+}
+static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
+{
+ struct _cpuid4_info *this_leaf, *sibling_leaf;
+ int sibling;
+
+ this_leaf = CPUID4_INFO_IDX(cpu, index);
+ for_each_cpu_mask(sibling, this_leaf->shared_cpu_map) {
+ sibling_leaf = CPUID4_INFO_IDX(sibling, index);
+ cpu_clear(cpu, sibling_leaf->shared_cpu_map);
+ }
+}
+#else
+static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {}
+static void __init cache_remove_shared_cpu_map(unsigned int cpu, int index) {}
+#endif
+
+static void free_cache_attributes(unsigned int cpu)
+{
+ kfree(cpuid4_info[cpu]);
+ cpuid4_info[cpu] = NULL;
+}
+
+static int __cpuinit detect_cache_attributes(unsigned int cpu)
+{
+ struct _cpuid4_info *this_leaf;
+ unsigned long j;
+ int retval;
+ cpumask_t oldmask;
+
+ if (num_cache_leaves == 0)
+ return -ENOENT;
+
+ cpuid4_info[cpu] = kmalloc(
+ sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
+ if (unlikely(cpuid4_info[cpu] == NULL))
+ return -ENOMEM;
+ memset(cpuid4_info[cpu], 0,
+ sizeof(struct _cpuid4_info) * num_cache_leaves);
+
+ oldmask = current->cpus_allowed;
+ retval = set_cpus_allowed(current, cpumask_of_cpu(cpu));
+ if (retval)
+ goto out;
+
+ /* Do cpuid and store the results */
+ retval = 0;
+ for (j = 0; j < num_cache_leaves; j++) {
+ this_leaf = CPUID4_INFO_IDX(cpu, j);
+ retval = cpuid4_cache_lookup(j, this_leaf);
+ if (unlikely(retval < 0))
+ break;
+ cache_shared_cpu_map_setup(cpu, j);
+ }
+ set_cpus_allowed(current, oldmask);
+
+out:
+ if (retval)
+ free_cache_attributes(cpu);
+ return retval;
+}
+
+#ifdef CONFIG_SYSFS
+
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+
+extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */
+
+/* pointer to kobject for cpuX/cache */
+static struct kobject * cache_kobject[NR_CPUS];
+
+struct _index_kobject {
+ struct kobject kobj;
+ unsigned int cpu;
+ unsigned short index;
+};
+
+/* pointer to array of kobjects for cpuX/cache/indexY */
+static struct _index_kobject *index_kobject[NR_CPUS];
+#define INDEX_KOBJECT_PTR(x,y) (&((index_kobject[x])[y]))
+
+#define show_one_plus(file_name, object, val) \
+static ssize_t show_##file_name \
+ (struct _cpuid4_info *this_leaf, char *buf) \
+{ \
+ return sprintf (buf, "%lu\n", (unsigned long)this_leaf->object + val); \
+}
+
+show_one_plus(level, eax.split.level, 0);
+show_one_plus(coherency_line_size, ebx.split.coherency_line_size, 1);
+show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1);
+show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1);
+show_one_plus(number_of_sets, ecx.split.number_of_sets, 1);
+
+static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf)
+{
+ return sprintf (buf, "%luK\n", this_leaf->size / 1024);
+}
+
+static ssize_t show_shared_cpu_map(struct _cpuid4_info *this_leaf, char *buf)
+{
+ char mask_str[NR_CPUS];
+ cpumask_scnprintf(mask_str, NR_CPUS, this_leaf->shared_cpu_map);
+ return sprintf(buf, "%s\n", mask_str);
+}
+
+static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) {
+ switch(this_leaf->eax.split.type) {
+ case CACHE_TYPE_DATA:
+ return sprintf(buf, "Data\n");
+ break;
+ case CACHE_TYPE_INST:
+ return sprintf(buf, "Instruction\n");
+ break;
+ case CACHE_TYPE_UNIFIED:
+ return sprintf(buf, "Unified\n");
+ break;
+ default:
+ return sprintf(buf, "Unknown\n");
+ break;
+ }
+}
+
+struct _cache_attr {
+ struct attribute attr;
+ ssize_t (*show)(struct _cpuid4_info *, char *);
+ ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
+};
+
+#define define_one_ro(_name) \
+static struct _cache_attr _name = \
+ __ATTR(_name, 0444, show_##_name, NULL)
+
+define_one_ro(level);
+define_one_ro(type);
+define_one_ro(coherency_line_size);
+define_one_ro(physical_line_partition);
+define_one_ro(ways_of_associativity);
+define_one_ro(number_of_sets);
+define_one_ro(size);
+define_one_ro(shared_cpu_map);
+
+static struct attribute * default_attrs[] = {
+ &type.attr,
+ &level.attr,
+ &coherency_line_size.attr,
+ &physical_line_partition.attr,
+ &ways_of_associativity.attr,
+ &number_of_sets.attr,
+ &size.attr,
+ &shared_cpu_map.attr,
+ NULL
+};
+
+#define to_object(k) container_of(k, struct _index_kobject, kobj)
+#define to_attr(a) container_of(a, struct _cache_attr, attr)
+
+static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
+{
+ struct _cache_attr *fattr = to_attr(attr);
+ struct _index_kobject *this_leaf = to_object(kobj);
+ ssize_t ret;
+
+ ret = fattr->show ?
+ fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
+ buf) :
+ 0;
+ return ret;
+}
+
+static ssize_t store(struct kobject * kobj, struct attribute * attr,
+ const char * buf, size_t count)
+{
+ return 0;
+}
+
+static struct sysfs_ops sysfs_ops = {
+ .show = show,
+ .store = store,
+};
+
+static struct kobj_type ktype_cache = {
+ .sysfs_ops = &sysfs_ops,
+ .default_attrs = default_attrs,
+};
+
+static struct kobj_type ktype_percpu_entry = {
+ .sysfs_ops = &sysfs_ops,
+};
+
+static void cpuid4_cache_sysfs_exit(unsigned int cpu)
+{
+ kfree(cache_kobject[cpu]);
+ kfree(index_kobject[cpu]);
+ cache_kobject[cpu] = NULL;
+ index_kobject[cpu] = NULL;
+ free_cache_attributes(cpu);
+}
+
+static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
+{
+
+ if (num_cache_leaves == 0)
+ return -ENOENT;
+
+ detect_cache_attributes(cpu);
+ if (cpuid4_info[cpu] == NULL)
+ return -ENOENT;
+
+ /* Allocate all required memory */
+ cache_kobject[cpu] = kmalloc(sizeof(struct kobject), GFP_KERNEL);
+ if (unlikely(cache_kobject[cpu] == NULL))
+ goto err_out;
+ memset(cache_kobject[cpu], 0, sizeof(struct kobject));
+
+ index_kobject[cpu] = kmalloc(
+ sizeof(struct _index_kobject ) * num_cache_leaves, GFP_KERNEL);
+ if (unlikely(index_kobject[cpu] == NULL))
+ goto err_out;
+ memset(index_kobject[cpu], 0,
+ sizeof(struct _index_kobject) * num_cache_leaves);
+
+ return 0;
+
+err_out:
+ cpuid4_cache_sysfs_exit(cpu);
+ return -ENOMEM;
+}
+
+/* Add/Remove cache interface for CPU device */
+static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
+{
+ unsigned int cpu = sys_dev->id;
+ unsigned long i, j;
+ struct _index_kobject *this_object;
+ int retval = 0;
+
+ retval = cpuid4_cache_sysfs_init(cpu);
+ if (unlikely(retval < 0))
+ return retval;
+
+ cache_kobject[cpu]->parent = &sys_dev->kobj;
+ kobject_set_name(cache_kobject[cpu], "%s", "cache");
+ cache_kobject[cpu]->ktype = &ktype_percpu_entry;
+ retval = kobject_register(cache_kobject[cpu]);
+
+ for (i = 0; i < num_cache_leaves; i++) {
+ this_object = INDEX_KOBJECT_PTR(cpu,i);
+ this_object->cpu = cpu;
+ this_object->index = i;
+ this_object->kobj.parent = cache_kobject[cpu];
+ kobject_set_name(&(this_object->kobj), "index%1lu", i);
+ this_object->kobj.ktype = &ktype_cache;
+ retval = kobject_register(&(this_object->kobj));
+ if (unlikely(retval)) {
+ for (j = 0; j < i; j++) {
+ kobject_unregister(
+ &(INDEX_KOBJECT_PTR(cpu,j)->kobj));
+ }
+ kobject_unregister(cache_kobject[cpu]);
+ cpuid4_cache_sysfs_exit(cpu);
+ break;
+ }
+ }
+ return retval;
+}
+
+static void __cpuexit cache_remove_dev(struct sys_device * sys_dev)
+{
+ unsigned int cpu = sys_dev->id;
+ unsigned long i;
+
+ for (i = 0; i < num_cache_leaves; i++) {
+ cache_remove_shared_cpu_map(cpu, i);
+ kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));
+ }
+ kobject_unregister(cache_kobject[cpu]);
+ cpuid4_cache_sysfs_exit(cpu);
+ return;
+}
+
+static int cacheinfo_cpu_callback(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (unsigned long)hcpu;
+ struct sys_device *sys_dev;
+
+ sys_dev = get_cpu_sysdev(cpu);
+ switch (action) {
+ case CPU_ONLINE:
+ cache_add_dev(sys_dev);
+ break;
+ case CPU_DEAD:
+ cache_remove_dev(sys_dev);
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block cacheinfo_cpu_notifier =
+{
+ .notifier_call = cacheinfo_cpu_callback,
+};
+
+static int __cpuinit cache_sysfs_init(void)
+{
+ int i;
+
+ if (num_cache_leaves == 0)
+ return 0;
+
+ register_cpu_notifier(&cacheinfo_cpu_notifier);
+
+ for_each_online_cpu(i) {
+ cacheinfo_cpu_callback(&cacheinfo_cpu_notifier, CPU_ONLINE,
+ (void *)(long)i);
+ }
+
+ return 0;
+}
+
+device_initcall(cache_sysfs_init);
+
+#endif
#include <linux/module.h>
#include <linux/seq_file.h>
#include <asm/uaccess.h>
+#include <linux/mutex.h>
#include <asm/mtrr.h>
#include "mtrr.h"
-static DECLARE_MUTEX(mtrr_sem);
+static DEFINE_MUTEX(mtrr_mutex);
void generic_get_mtrr(unsigned int reg, unsigned long *base,
unsigned int *size, mtrr_type * type)
int error;
dom0_op_t op;
- down(&mtrr_sem);
+ mutex_lock(&mtrr_mutex);
op.cmd = DOM0_ADD_MEMTYPE;
op.u.add_memtype.mfn = base;
op.u.add_memtype.type = type;
error = HYPERVISOR_dom0_op(&op);
if (error) {
- up(&mtrr_sem);
+ mutex_unlock(&mtrr_mutex);
BUG_ON(error > 0);
return error;
}
if (increment)
++usage_table[op.u.add_memtype.reg];
- up(&mtrr_sem);
+ mutex_unlock(&mtrr_mutex);
return op.u.add_memtype.reg;
}
int error = -EINVAL;
dom0_op_t op;
- down(&mtrr_sem);
+ mutex_lock(&mtrr_mutex);
if (reg < 0) {
/* Search for existing MTRR */
}
error = reg;
out:
- up(&mtrr_sem);
+ mutex_unlock(&mtrr_mutex);
return error;
}
* for the data I pass, and I need tags
* on the data to indicate what information I have
* squirrelled away. ELF notes happen to provide
- * all of that that no need to invent something new.
+ * all of that, so there is no need to invent something new.
*/
buf = (u32*)per_cpu_ptr(crash_notes, cpu);
if (!buf)
return 1;
local_irq_disable();
- if (!user_mode(regs)) {
+ if (!user_mode_vm(regs)) {
crash_fixup_ss_esp(&fixed_regs, regs);
regs = &fixed_regs;
}
pushl %eax # save orig_eax
SAVE_ALL
GET_THREAD_INFO(%ebp)
+ testl $TF_MASK,EFLAGS(%esp)
+ jz no_singlestep
+ orl $_TIF_SINGLESTEP,TI_flags(%ebp)
+no_singlestep:
# system call tracing in operation / emulation
/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/page.h>
+#include <asm/cache.h>
#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
#include <xen/interface/xen.h>
/*
* The Global Descriptor Table contains 28 quadwords, per-CPU.
*/
+ .align L1_CACHE_BYTES
ENTRY(cpu_gdt_table)
.quad 0x0000000000000000 /* NULL descriptor */
.quad 0x0000000000000000 /* 0x0b reserved */
{
int i, j;
Dprintk("Rotating IRQs among CPUs.\n");
- for (i = 0; i < NR_CPUS; i++) {
- for (j = 0; cpu_online(i) && (j < NR_IRQS); j++) {
+ for_each_online_cpu(i) {
+ for (j = 0; j < NR_IRQS; j++) {
if (!irq_desc[j].action)
continue;
/* Is it a significant load ? */
unsigned long imbalance = 0;
cpumask_t allowed_mask, target_cpu_mask, tmp;
- for (i = 0; i < NR_CPUS; i++) {
+ for_each_possible_cpu(i) {
int package_index;
CPU_IRQ(i) = 0;
if (!cpu_online(i))
}
}
/* Find the least loaded processor package */
- for (i = 0; i < NR_CPUS; i++) {
- if (!cpu_online(i))
- continue;
+ for_each_online_cpu(i) {
if (i != CPU_TO_PACKAGEINDEX(i))
continue;
if (min_cpu_irq > CPU_IRQ(i)) {
*/
tmp_cpu_irq = 0;
tmp_loaded = -1;
- for (i = 0; i < NR_CPUS; i++) {
- if (!cpu_online(i))
- continue;
+ for_each_online_cpu(i) {
if (i != CPU_TO_PACKAGEINDEX(i))
continue;
if (max_cpu_irq <= CPU_IRQ(i))
if (smp_num_siblings > 1 && !cpus_empty(tmp))
physical_balance = 1;
- for (i = 0; i < NR_CPUS; i++) {
- if (!cpu_online(i))
- continue;
+ for_each_online_cpu(i) {
irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
else
printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
failed:
- for (i = 0; i < NR_CPUS; i++) {
+ for_each_possible_cpu(i) {
kfree(irq_cpu_data[i].irq_delta);
+ irq_cpu_data[i].irq_delta = NULL;
kfree(irq_cpu_data[i].last_irq);
+ irq_cpu_data[i].last_irq = NULL;
}
return 0;
}
int __init irqbalance_disable(char *str)
{
irqbalance_disabled = 1;
- return 0;
+ return 1;
}
__setup("noirqbalance", irqbalance_disable);
* Don't check I/O APIC IDs for xAPIC systems. They have
* no meaning without the serial APIC bus.
*/
- if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_data.x86 < 15))
+ if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
return;
/*
* This is broken; anything with a real cpu count has to
spin_unlock_irqrestore(&ioapic_lock, flags);
}
+int timer_uses_ioapic_pin_0;
+
/*
* This code may look a bit paranoid, but it's supposed to cooperate with
* a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
pin2 = ioapic_i8259.pin;
apic2 = ioapic_i8259.apic;
+ if (pin1 == 0)
+ timer_uses_ioapic_pin_0 = 1;
+
printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
vector, apic1, pin1, apic2, pin2);
"report. Then try booting with the 'noapic' option");
}
#else
+int timer_uses_ioapic_pin_0 = 0;
#define check_timer() ((void)0)
#endif
#include <linux/miscdevice.h>
#include <linux/spinlock.h>
#include <linux/mm.h>
+#include <linux/mutex.h>
#include <linux/syscalls.h>
#include <asm/msr.h>
#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */
/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
-static DECLARE_MUTEX(microcode_sem);
+static DEFINE_MUTEX(microcode_mutex);
static int microcode_open (struct inode *unused1, struct file *unused2)
{
return -EINVAL;
}
- down(µcode_sem);
+ mutex_lock(µcode_mutex);
ret = do_microcode_update(buf, len);
if (!ret)
ret = (ssize_t)len;
- up(µcode_sem);
+ mutex_unlock(µcode_mutex);
return ret;
}
-static int microcode_ioctl (struct inode *inode, struct file *file,
- unsigned int cmd, unsigned long arg)
-{
- switch (cmd) {
- /*
- * XXX: will be removed after microcode_ctl
- * is updated to ignore failure of this ioctl()
- */
- case MICROCODE_IOCFREE:
- return 0;
- default:
- return -EINVAL;
- }
- return -EINVAL;
-}
-
static struct file_operations microcode_fops = {
.owner = THIS_MODULE,
.write = microcode_write,
- .ioctl = microcode_ioctl,
.open = microcode_open,
};
static void __exit microcode_exit (void)
{
misc_deregister(µcode_dev);
- printk(KERN_INFO "IA-32 Microcode Update Driver v" MICROCODE_VERSION " unregistered\n");
}
module_init(microcode_init)
int smp_found_config;
unsigned int __initdata maxcpus = NR_CPUS;
-#ifdef CONFIG_HOTPLUG_CPU
-#define CPU_HOTPLUG_ENABLED (1)
-#else
-#define CPU_HOTPLUG_ENABLED (0)
-#endif
-
/*
* Various Linux-internal data structures created from the
* MP-table.
static int mpc_record;
static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __initdata;
-#ifdef CONFIG_X86_NUMAQ
-static int MP_valid_apicid(int apicid, int version)
-{
- return hweight_long(apicid & 0xf) == 1 && (apicid >> 4) != 0xf;
-}
-#elif !defined(CONFIG_XEN)
-static int MP_valid_apicid(int apicid, int version)
-{
- if (version >= 0x14)
- return apicid < 0xff;
- else
- return apicid < 0xf;
-}
-#endif
-
#ifndef CONFIG_XEN
static void __devinit MP_processor_info (struct mpc_config_processor *m)
{
ver = m->mpc_apicver;
- if (!MP_valid_apicid(apicid, ver)) {
- printk(KERN_WARNING "Processor #%d INVALID. (Max ID: %d).\n",
- m->mpc_apicid, MAX_APICS);
- return;
- }
-
/*
* Validate version
*/
cpu_set(num_processors, cpu_possible_map);
num_processors++;
- if (CPU_HOTPLUG_ENABLED || (num_processors > 8)) {
+ /*
+ * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
+ * but we need to work other dependencies like SMP_SUSPEND etc
+ * before this can be done without some confusion.
+ * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
+ * - Ashok Raj <ashok.raj@intel.com>
+ */
+ if (num_processors > 8) {
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_INTEL:
if (!APIC_XAPIC(ver)) {
mpc_oem_bus_info(m, str, translation_table[mpc_record]);
+ if (m->mpc_busid >= MAX_MP_BUSSES) {
+ printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
+ " is too large, max. supported is %d\n",
+ m->mpc_busid, str, MAX_MP_BUSSES - 1);
+ return;
+ }
+
if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
} else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) {
#endif
}
+int es7000_plat;
+
/* --------------------------------------------------------------------------
ACPI-based MP Configuration
-------------------------------------------------------------------------- */
#ifndef CONFIG_XEN
set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
#endif
- if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 < 15))
+ if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ && !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
tmpid = io_apic_get_unique_id(idx, id);
else
tmpid = id;
return;
}
-int es7000_plat;
-
void __init mp_config_acpi_legacy_irqs (void)
{
struct mpc_config_intsrc intsrc;
*/
int irq = gsi;
if (gsi < MAX_GSI_NUM) {
- if (gsi > 15)
+ /*
+ * Retain the VIA chipset work-around (gsi > 15), but
+ * avoid a problem where the 8254 timer (IRQ0) is setup
+ * via an override (so it's not on pin 0 of the ioapic),
+ * and at the same time, the pin 0 interrupt is a PCI
+ * type. The gsi > 15 test could cause these two pins
+ * to be shared as IRQ0, and they are not shareable.
+ * So test for this condition, and if necessary, avoid
+ * the pin collision.
+ */
+ if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0))
gsi = pci_irq++;
/*
* Don't assign IRQ used by ACPI SCI
#include <linux/kallsyms.h>
#include <linux/ptrace.h>
#include <linux/random.h>
-#include <linux/kprobes.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id());
print_symbol("EIP is at %s\n", regs->eip);
- if (user_mode(regs))
+ if (user_mode_vm(regs))
printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
printk(" EFLAGS: %08lx %s (%s %.*s)\n",
regs->eflags, print_tainted(), system_utsname.release,
struct task_struct *tsk = current;
struct thread_struct *t = &tsk->thread;
- /*
- * Remove function-return probe instances associated with this task
- * and put them back on the free list. Do not insert an exit probe for
- * this function, it will be disabled by kprobe_flush_task if you do.
- */
- kprobe_flush_task(tsk);
-
/* The process may have allocated an io port bitmap... nuke it. */
if (unlikely(NULL != t->io_bitmap_ptr)) {
struct physdev_set_iobitmap set_iobitmap = { 0 };
} while (count++ < 16);
return 0;
}
-EXPORT_SYMBOL(get_wchan);
/*
* sys_alloc_thread_area: get a yet unused TLS descriptor index.
#include <linux/initrd.h>
#include <linux/bootmem.h>
#include <linux/seq_file.h>
+#include <linux/platform_device.h>
#include <linux/console.h>
#include <linux/mca.h>
#include <linux/root_dev.h>
#include <linux/kexec.h>
#include <linux/crash_dump.h>
#include <linux/dmi.h>
+#include <linux/pfn.h>
#include <video/edid.h>
return 0;
}
+ /*
+ * This function checks if the entire range <start,end> is mapped with type.
+ *
+ * Note: this function only works correct if the e820 table is sorted and
+ * not-overlapping, which is the case
+ */
+int __init
+e820_all_mapped(unsigned long s, unsigned long e, unsigned type)
+{
+ u64 start = s;
+ u64 end = e;
+ int i;
+ for (i = 0; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
+ if (type && ei->type != type)
+ continue;
+ /* is the region (part) in overlap with the current region ?*/
+ if (ei->addr >= end || ei->addr + ei->size <= start)
+ continue;
+ /* if the region is at the beginning of <start,end> we move
+ * start to the end of the region since it's ok until there
+ */
+ if (ei->addr <= start)
+ start = ei->addr + ei->size;
+ /* if start is now at or beyond end, we're done, full
+ * coverage */
+ if (start >= end)
+ return 1; /* we're done */
+ }
+ return 0;
+}
+
/*
* Find the highest page frame number we have available
*/
free_available_memory(unsigned long start, unsigned long end, void *arg)
{
/* check max_low_pfn */
- if (start >= ((max_low_pfn + 1) << PAGE_SHIFT))
+ if (start >= (max_low_pfn << PAGE_SHIFT))
return 0;
- if (end >= ((max_low_pfn + 1) << PAGE_SHIFT))
- end = (max_low_pfn + 1) << PAGE_SHIFT;
+ if (end >= (max_low_pfn << PAGE_SHIFT))
+ end = max_low_pfn << PAGE_SHIFT;
if (start < end)
free_bootmem(start, end - start);
struct resource *res;
if (e820[i].addr + e820[i].size > 0x100000000ULL)
continue;
- res = alloc_bootmem_low(sizeof(struct resource));
+ res = kzalloc(sizeof(struct resource), GFP_ATOMIC);
switch (e820[i].type) {
case E820_RAM: res->name = "System RAM"; break;
case E820_ACPI: res->name = "ACPI Tables"; break;
/*
* Request address space for all standard resources
+ *
+ * This is called just before pcibios_init(), which is also a
+ * subsys_initcall, but is linked in later (in arch/i386/pci/common.c).
*/
-static void __init register_memory(void)
+static int __init request_standard_resources(void)
{
#ifdef CONFIG_XEN
struct xen_memory_map memmap;
/* Nothing to do if not running in dom0. */
if (!is_initial_xendomain())
- return;
+ return 0;
+ printk("Setting up standard PCI resources\n");
#ifdef CONFIG_XEN
memmap.nr_entries = E820MAX;
set_xen_guest_handle(memmap.buffer, machine_e820.map);
/* request I/O space for devices used on all i[345]86 PCs */
for (i = 0; i < STANDARD_IO_RESOURCES; i++)
request_resource(&ioport_resource, &standard_io_resources[i]);
+ return 0;
+}
+
+subsys_initcall(request_standard_resources);
+
+static void __init register_memory(void)
+{
#ifdef CONFIG_XEN
e820_setup_gap(machine_e820.map, machine_e820.nr_map);
#endif
}
-/* Use inline assembly to define this because the nops are defined
- as inline assembly strings in the include files and we cannot
- get them easily into strings. */
-asm("\t.data\nintelnops: "
- GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
- GENERIC_NOP7 GENERIC_NOP8);
-asm("\t.data\nk8nops: "
- K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
- K8_NOP7 K8_NOP8);
-asm("\t.data\nk7nops: "
- K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
- K7_NOP7 K7_NOP8);
-
-extern unsigned char intelnops[], k8nops[], k7nops[];
-static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
- NULL,
- intelnops,
- intelnops + 1,
- intelnops + 1 + 2,
- intelnops + 1 + 2 + 3,
- intelnops + 1 + 2 + 3 + 4,
- intelnops + 1 + 2 + 3 + 4 + 5,
- intelnops + 1 + 2 + 3 + 4 + 5 + 6,
- intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
-};
-static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
- NULL,
- k8nops,
- k8nops + 1,
- k8nops + 1 + 2,
- k8nops + 1 + 2 + 3,
- k8nops + 1 + 2 + 3 + 4,
- k8nops + 1 + 2 + 3 + 4 + 5,
- k8nops + 1 + 2 + 3 + 4 + 5 + 6,
- k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
-};
-static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
- NULL,
- k7nops,
- k7nops + 1,
- k7nops + 1 + 2,
- k7nops + 1 + 2 + 3,
- k7nops + 1 + 2 + 3 + 4,
- k7nops + 1 + 2 + 3 + 4 + 5,
- k7nops + 1 + 2 + 3 + 4 + 5 + 6,
- k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
-};
-static struct nop {
- int cpuid;
- unsigned char **noptable;
-} noptypes[] = {
- { X86_FEATURE_K8, k8_nops },
- { X86_FEATURE_K7, k7_nops },
- { -1, NULL }
-};
-
-/* Replace instructions with better alternatives for this CPU type.
-
- This runs before SMP is initialized to avoid SMP problems with
- self modifying code. This implies that assymetric systems where
- APs have less capabilities than the boot processor are not handled.
- Tough. Make sure you disable such features by hand. */
-void apply_alternatives(void *start, void *end)
-{
- struct alt_instr *a;
- int diff, i, k;
- unsigned char **noptable = intel_nops;
- for (i = 0; noptypes[i].cpuid >= 0; i++) {
- if (boot_cpu_has(noptypes[i].cpuid)) {
- noptable = noptypes[i].noptable;
- break;
- }
- }
- for (a = start; (void *)a < end; a++) {
- if (!boot_cpu_has(a->cpuid))
- continue;
- BUG_ON(a->replacementlen > a->instrlen);
- memcpy(a->instr, a->replacement, a->replacementlen);
- diff = a->instrlen - a->replacementlen;
- /* Pad the rest with nops */
- for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
- k = diff;
- if (k > ASM_NOP_MAX)
- k = ASM_NOP_MAX;
- memcpy(a->instr + i, noptable[k], k);
- }
- }
-}
-
-void __init alternative_instructions(void)
-{
- extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
- apply_alternatives(__alt_instructions, __alt_instructions_end);
-}
-
static char * __init machine_specific_memory_setup(void);
#ifdef CONFIG_MCA
panic_timeout = 1;
/* Register a call for panic conditions. */
- notifier_chain_register(&panic_notifier_list, &xen_panic_block);
+ atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
HYPERVISOR_vm_assist(VMASST_CMD_enable,
memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
early_cpu_init();
+#ifdef CONFIG_SMP
+ prefill_possible_map();
+#endif
/*
* FIXME: This isn't an official loader_type right
parse_cmdline_early(cmdline_p);
+#ifdef CONFIG_EARLY_PRINTK
+ {
+ char *s = strstr(*cmdline_p, "earlyprintk=");
+ if (s) {
+ setup_early_printk(strchr(s, '=') + 1);
+ printk("early console enabled\n");
+ }
+ }
+#endif
+
max_low_pfn = setup_memory();
/*
* NOTE: at this point the bootmem allocator is fully available.
*/
-#ifdef CONFIG_EARLY_PRINTK
- {
- char *s = strstr(*cmdline_p, "earlyprintk=");
- if (s) {
- extern void setup_early_printk(char *);
-
- setup_early_printk(strchr(s, '=') + 1);
- printk("early console enabled\n");
- }
- }
-#endif
-
if (is_initial_xendomain())
dmi_scan_machine();
set_iopl.iopl = 1;
HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
-#ifdef CONFIG_X86_IO_APIC
- check_acpi_pci(); /* Checks more than just ACPI actually */
-#endif
-
#ifdef CONFIG_ACPI
if (!is_initial_xendomain()) {
printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
* Parse the ACPI tables for possible boot-time SMP configuration.
*/
acpi_boot_table_init();
+#endif
+
+#ifdef CONFIG_X86_IO_APIC
+ check_acpi_pci(); /* Checks more than just ACPI actually */
+#endif
+
+#ifdef CONFIG_ACPI
acpi_boot_init();
#if defined(CONFIG_SMP) && defined(CONFIG_X86_PC)
return NOTIFY_DONE;
}
+static __init int add_pcspkr(void)
+{
+ struct platform_device *pd;
+ int ret;
+
+ pd = platform_device_alloc("pcspkr", -1);
+ if (!pd)
+ return -ENOMEM;
+
+ ret = platform_device_add(pd);
+ if (ret)
+ platform_device_put(pd);
+
+ return ret;
+}
+device_initcall(add_pcspkr);
+
#include "setup_arch_post.h"
/*
* Local Variables:
spin_unlock_irq(&call_lock);
}
-static struct call_data_struct * call_data;
-
-/*
- * this function sends a 'generic call function' IPI to all other CPUs
- * in the system.
- */
-
-int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
- int wait)
-/*
- * [SUMMARY] Run a function on all other CPUs.
- * <func> The function to run. This must be fast and non-blocking.
- * <info> An arbitrary pointer to pass to the function.
- * <nonatomic> currently unused.
- * <wait> If true, wait (atomically) until function has completed on other CPUs.
- * [RETURNS] 0 on success, else a negative status code. Does not return until
+static struct call_data_struct *call_data;
+
+/**
+ * smp_call_function(): Run a function on all other CPUs.
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+ * @nonatomic: currently unused.
+ * @wait: If true, wait (atomically) until function has completed on other CPUs.
+ *
+ * Returns 0 on success, else a negative status code. Does not return until
* remote CPUs are nearly ready to execute <<func>> or are or have executed.
*
* You must not call this function with disabled interrupts or from a
* hardware interrupt handler or from a bottom half handler.
*/
+int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
+ int wait)
{
struct call_data_struct data;
int cpus;
+++ /dev/null
-#include <linux/kernel.h>
-#include <asm/system.h>
-#include <asm/smp_alt.h>
-#include <asm/processor.h>
-#include <asm/string.h>
-
-struct smp_replacement_record {
- unsigned char targ_size;
- unsigned char smp1_size;
- unsigned char smp2_size;
- unsigned char up_size;
- unsigned char feature;
- unsigned char data[0];
-};
-
-struct smp_alternative_record {
- void *targ_start;
- struct smp_replacement_record *repl;
-};
-
-extern struct smp_alternative_record __start_smp_alternatives_table,
- __stop_smp_alternatives_table;
-extern unsigned long __init_begin, __init_end;
-
-void prepare_for_smp(void)
-{
- struct smp_alternative_record *r;
- printk(KERN_INFO "Enabling SMP...\n");
- for (r = &__start_smp_alternatives_table;
- r != &__stop_smp_alternatives_table;
- r++) {
- BUG_ON(r->repl->targ_size < r->repl->smp1_size);
- BUG_ON(r->repl->targ_size < r->repl->smp2_size);
- BUG_ON(r->repl->targ_size < r->repl->up_size);
- if (system_state == SYSTEM_RUNNING &&
- r->targ_start >= (void *)&__init_begin &&
- r->targ_start < (void *)&__init_end)
- continue;
- if (r->repl->feature != (unsigned char)-1 &&
- boot_cpu_has(r->repl->feature)) {
- memcpy(r->targ_start,
- r->repl->data + r->repl->smp1_size,
- r->repl->smp2_size);
- memset(r->targ_start + r->repl->smp2_size,
- 0x90,
- r->repl->targ_size - r->repl->smp2_size);
- } else {
- memcpy(r->targ_start,
- r->repl->data,
- r->repl->smp1_size);
- memset(r->targ_start + r->repl->smp1_size,
- 0x90,
- r->repl->targ_size - r->repl->smp1_size);
- }
- }
- /* Paranoia */
- asm volatile ("jmp 1f\n1:");
- mb();
-}
-
-void unprepare_for_smp(void)
-{
- struct smp_alternative_record *r;
- printk(KERN_INFO "Disabling SMP...\n");
- for (r = &__start_smp_alternatives_table;
- r != &__stop_smp_alternatives_table;
- r++) {
- BUG_ON(r->repl->targ_size < r->repl->smp1_size);
- BUG_ON(r->repl->targ_size < r->repl->smp2_size);
- BUG_ON(r->repl->targ_size < r->repl->up_size);
- if (system_state == SYSTEM_RUNNING &&
- r->targ_start >= (void *)&__init_begin &&
- r->targ_start < (void *)&__init_end)
- continue;
- memcpy(r->targ_start,
- r->repl->data + r->repl->smp1_size + r->repl->smp2_size,
- r->repl->up_size);
- memset(r->targ_start + r->repl->up_size,
- 0x90,
- r->repl->targ_size - r->repl->up_size);
- }
- /* Paranoia */
- asm volatile ("jmp 1f\n1:");
- mb();
-}
/* Core ID of each logical CPU */
int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
+/* Last level cache ID of each logical CPU */
+int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID};
+
/* representing HT siblings of each logical CPU */
cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(cpu_sibling_map);
if (tsc_values[i] < avg)
realdelta = -realdelta;
- printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta);
+ if (realdelta > 0)
+ printk(KERN_INFO "CPU#%d had %ld usecs TSC "
+ "skew, fixed it up.\n", i, realdelta);
}
sum += delta;
static int cpucount;
+/* maps the cpu to the sched domain representing multi-core */
+cpumask_t cpu_coregroup_map(int cpu)
+{
+ struct cpuinfo_x86 *c = cpu_data + cpu;
+ /*
+ * For perf, we return last level cache shared map.
+ * TBD: when power saving sched policy is added, we will return
+ * cpu_core_map when power saving policy is enabled
+ */
+ return c->llc_shared_map;
+}
+
/* representing cpus for which sibling maps can be computed */
static cpumask_t cpu_sibling_setup_map;
cpu_set(cpu, cpu_sibling_map[i]);
cpu_set(i, cpu_core_map[cpu]);
cpu_set(cpu, cpu_core_map[i]);
+ cpu_set(i, c[cpu].llc_shared_map);
+ cpu_set(cpu, c[i].llc_shared_map);
}
}
} else {
cpu_set(cpu, cpu_sibling_map[cpu]);
}
+ cpu_set(cpu, c[cpu].llc_shared_map);
+
if (current_cpu_data.x86_max_cores == 1) {
cpu_core_map[cpu] = cpu_sibling_map[cpu];
c[cpu].booted_cores = 1;
}
for_each_cpu_mask(i, cpu_sibling_setup_map) {
+ if (cpu_llc_id[cpu] != BAD_APICID &&
+ cpu_llc_id[cpu] == cpu_llc_id[i]) {
+ cpu_set(i, c[cpu].llc_shared_map);
+ cpu_set(cpu, c[i].llc_shared_map);
+ }
if (phys_proc_id[cpu] == phys_proc_id[i]) {
cpu_set(i, cpu_core_map[cpu]);
cpu_set(cpu, cpu_core_map[i]);
unsigned short nmi_high = 0, nmi_low = 0;
++cpucount;
+ alternatives_smp_switch(1);
/*
* We can't use kernel_thread since we must avoid to
cpu_clear(cpu, cpu_callout_map);
cpu_clear(cpu, cpu_callin_map);
- cpu_clear(cpu, cpu_present_map);
cpu_clear(cpu, smp_commenced_mask);
unmap_cpu_to_logical_apicid(cpu);
int cpu;
};
-static void __devinit do_warm_boot_cpu(void *p)
+static void __cpuinit do_warm_boot_cpu(void *p)
{
struct warm_boot_cpu_info *info = p;
do_boot_cpu(info->apicid, info->cpu);
complete(info->complete);
}
-int __devinit smp_prepare_cpu(int cpu)
+static int __cpuinit __smp_prepare_cpu(int cpu)
{
DECLARE_COMPLETION(done);
struct warm_boot_cpu_info info;
struct work_struct task;
int apicid, ret;
- lock_cpu_hotplug();
-
- /*
- * On x86, CPU0 is never offlined. Trying to bring up an
- * already-booted CPU will hang. So check for that case.
- */
- if (cpu_online(cpu)) {
- ret = -EINVAL;
- goto exit;
- }
-
apicid = x86_cpu_to_apicid[cpu];
if (apicid == BAD_APICID) {
ret = -ENODEV;
zap_low_mappings();
ret = 0;
exit:
- unlock_cpu_hotplug();
return ret;
}
#endif
if (max_cpus <= cpucount+1)
continue;
-#ifdef CONFIG_SMP_ALTERNATIVES
- if (kicked == 1)
- prepare_for_smp();
-#endif
-
if (((cpu = alloc_cpu_id()) <= 0) || do_boot_cpu(apicid, cpu))
printk("CPU #%d not responding - cannot use it.\n",
apicid);
/* They ack this in play_dead by setting CPU_DEAD */
if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
printk ("CPU %d is now offline\n", cpu);
+ if (1 == num_online_cpus())
+ alternatives_smp_switch(0);
return;
}
msleep(100);
int __devinit __cpu_up(unsigned int cpu)
{
+#ifdef CONFIG_HOTPLUG_CPU
+ int ret=0;
+
+ /*
+ * We do warm boot only on cpus that had booted earlier
+ * Otherwise cold boot is all handled from smp_boot_cpus().
+ * cpu_callin_map is set during AP kickstart process. Its reset
+ * when a cpu is taken offline from cpu_exit_clear().
+ */
+ if (!cpu_isset(cpu, cpu_callin_map))
+ ret = __smp_prepare_cpu(cpu);
+
+ if (ret)
+ return -EIO;
+#endif
+
/* In case one didn't come up */
if (!cpu_isset(cpu, cpu_callin_map)) {
printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu);
return -EIO;
}
-#ifdef CONFIG_SMP_ALTERNATIVES
- if (num_online_cpus() == 1)
- prepare_for_smp();
-#endif
-
local_irq_enable();
per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
/* Unleash the CPU! */
asmlinkage void machine_check(void);
static int kstack_depth_to_print = 24;
-struct notifier_block *i386die_chain;
-static DEFINE_SPINLOCK(die_notifier_lock);
+ATOMIC_NOTIFIER_HEAD(i386die_chain);
int register_die_notifier(struct notifier_block *nb)
{
- int err = 0;
- unsigned long flags;
- spin_lock_irqsave(&die_notifier_lock, flags);
- err = notifier_chain_register(&i386die_chain, nb);
- spin_unlock_irqrestore(&die_notifier_lock, flags);
- return err;
+ vmalloc_sync_all();
+ return atomic_notifier_chain_register(&i386die_chain, nb);
}
EXPORT_SYMBOL(register_die_notifier);
+int unregister_die_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&i386die_chain, nb);
+}
+EXPORT_SYMBOL(unregister_die_notifier);
+
static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
{
return p > (void *)tinfo &&
p < (void *)tinfo + THREAD_SIZE - 3;
}
-static void print_addr_and_symbol(unsigned long addr, char *log_lvl)
+/*
+ * Print CONFIG_STACK_BACKTRACE_COLS address/symbol entries per line.
+ */
+static inline int print_addr_and_symbol(unsigned long addr, char *log_lvl,
+ int printed)
{
- printk(log_lvl);
+ if (!printed)
+ printk(log_lvl);
+
+#if CONFIG_STACK_BACKTRACE_COLS == 1
printk(" [<%08lx>] ", addr);
+#else
+ printk(" <%08lx> ", addr);
+#endif
print_symbol("%s", addr);
- printk("\n");
+
+ printed = (printed + 1) % CONFIG_STACK_BACKTRACE_COLS;
+ if (printed)
+ printk(" ");
+ else
+ printk("\n");
+
+ return printed;
}
static inline unsigned long print_context_stack(struct thread_info *tinfo,
char *log_lvl)
{
unsigned long addr;
+ int printed = 0; /* nr of entries already printed on current line */
#ifdef CONFIG_FRAME_POINTER
while (valid_stack_ptr(tinfo, (void *)ebp)) {
addr = *(unsigned long *)(ebp + 4);
- print_addr_and_symbol(addr, log_lvl);
+ printed = print_addr_and_symbol(addr, log_lvl, printed);
ebp = *(unsigned long *)ebp;
}
#else
while (valid_stack_ptr(tinfo, stack)) {
addr = *stack++;
if (__kernel_text_address(addr))
- print_addr_and_symbol(addr, log_lvl);
+ printed = print_addr_and_symbol(addr, log_lvl, printed);
}
#endif
+ if (printed)
+ printk("\n");
+
return ebp;
}
stack = (unsigned long*)context->previous_esp;
if (!stack)
break;
- printk(log_lvl);
- printk(" =======================\n");
+ printk("%s =======================\n", log_lvl);
}
}
}
stack = esp;
- printk(log_lvl);
for(i = 0; i < kstack_depth_to_print; i++) {
if (kstack_end(stack))
break;
- if (i && ((i % 8) == 0)) {
- printk("\n");
- printk(log_lvl);
- printk(" ");
- }
+ if (i && ((i % 8) == 0))
+ printk("\n%s ", log_lvl);
printk("%08lx ", *stack++);
}
- printk("\n");
- printk(log_lvl);
- printk("Call Trace:\n");
+ printk("\n%sCall Trace:\n", log_lvl);
show_trace_log_lvl(task, esp, log_lvl);
}
void show_stack(struct task_struct *task, unsigned long *esp)
{
+ printk(" ");
show_stack_log_lvl(task, esp, "");
}
esp = (unsigned long) (®s->esp);
savesegment(ss, ss);
- if (user_mode(regs)) {
+ if (user_mode_vm(regs)) {
in_kernel = 0;
esp = regs->esp;
ss = regs->xss & 0xffff;
static int die_counter;
unsigned long flags;
+ oops_enter();
+
if (die.lock_owner != raw_smp_processor_id()) {
console_verbose();
spin_lock_irqsave(&die.lock, flags);
if (++die.lock_owner_depth < 3) {
int nl = 0;
+ unsigned long esp;
+ unsigned short ss;
+
handle_BUG(regs);
printk(KERN_EMERG "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter);
#ifdef CONFIG_PREEMPT
#endif
if (nl)
printk("\n");
- notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV);
- show_registers(regs);
+ if (notify_die(DIE_OOPS, str, regs, err,
+ current->thread.trap_no, SIGSEGV) !=
+ NOTIFY_STOP) {
+ show_registers(regs);
+ /* Executive summary in case the oops scrolled away */
+ esp = (unsigned long) (®s->esp);
+ savesegment(ss, ss);
+ if (user_mode(regs)) {
+ esp = regs->esp;
+ ss = regs->xss & 0xffff;
+ }
+ printk(KERN_EMERG "EIP: [<%08lx>] ", regs->eip);
+ print_symbol("%s", regs->eip);
+ printk(" SS:ESP %04x:%08lx\n", ss, esp);
+ }
+ else
+ regs = NULL;
} else
printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
die.lock_owner = -1;
spin_unlock_irqrestore(&die.lock, flags);
+ if (!regs)
+ return;
+
if (kexec_should_crash(current))
crash_kexec(regs);
ssleep(5);
panic("Fatal exception");
}
+ oops_exit();
do_exit(SIGSEGV);
}
void die_nmi (struct pt_regs *regs, const char *msg)
{
- if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 0, SIGINT) ==
+ if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) ==
NOTIFY_STOP)
return;
/* If we are in kernel we are probably nested up pretty bad
* and might aswell get out now while we still can.
*/
- if (!user_mode(regs)) {
+ if (!user_mode_vm(regs)) {
current->thread.trap_no = 2;
crash_kexec(regs);
}
reason = get_nmi_reason();
if (!(reason & 0xc0)) {
- if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 0, SIGINT)
+ if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
== NOTIFY_STOP)
return;
#ifdef CONFIG_X86_LOCAL_APIC
unknown_nmi_error(reason, regs);
return;
}
- if (notify_die(DIE_NMI, "nmi", regs, reason, 0, SIGINT) == NOTIFY_STOP)
+ if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
return;
if (reason & 0x80)
mem_parity_error(reason, regs);
void set_nmi_callback(nmi_callback_t callback)
{
+ vmalloc_sync_all();
rcu_assign_pointer(nmi_callback, callback);
}
EXPORT_SYMBOL_GPL(set_nmi_callback);
static int __init kstack_setup(char *s)
{
kstack_depth_to_print = simple_strtoul(s, NULL, 0);
- return 0;
+ return 1;
}
__setup("kstack=", kstack_setup);
asmlinkage void machine_check(void);
static int kstack_depth_to_print = 24;
-struct notifier_block *i386die_chain;
-static DEFINE_SPINLOCK(die_notifier_lock);
+ATOMIC_NOTIFIER_HEAD(i386die_chain);
int register_die_notifier(struct notifier_block *nb)
{
- int err = 0;
- unsigned long flags;
- spin_lock_irqsave(&die_notifier_lock, flags);
- err = notifier_chain_register(&i386die_chain, nb);
- spin_unlock_irqrestore(&die_notifier_lock, flags);
- return err;
+ vmalloc_sync_all();
+ return atomic_notifier_chain_register(&i386die_chain, nb);
}
EXPORT_SYMBOL(register_die_notifier);
+int unregister_die_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&i386die_chain, nb);
+}
+EXPORT_SYMBOL(unregister_die_notifier);
+
static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
{
return p > (void *)tinfo &&
p < (void *)tinfo + THREAD_SIZE - 3;
}
-static void print_addr_and_symbol(unsigned long addr, char *log_lvl)
+/*
+ * Print CONFIG_STACK_BACKTRACE_COLS address/symbol entries per line.
+ */
+static inline int print_addr_and_symbol(unsigned long addr, char *log_lvl,
+ int printed)
{
- printk(log_lvl);
+ if (!printed)
+ printk(log_lvl);
+
+#if CONFIG_STACK_BACKTRACE_COLS == 1
printk(" [<%08lx>] ", addr);
+#else
+ printk(" <%08lx> ", addr);
+#endif
print_symbol("%s", addr);
- printk("\n");
+
+ printed = (printed + 1) % CONFIG_STACK_BACKTRACE_COLS;
+ if (printed)
+ printk(" ");
+ else
+ printk("\n");
+
+ return printed;
}
static inline unsigned long print_context_stack(struct thread_info *tinfo,
char *log_lvl)
{
unsigned long addr;
+ int printed = 0; /* nr of entries already printed on current line */
#ifdef CONFIG_FRAME_POINTER
while (valid_stack_ptr(tinfo, (void *)ebp)) {
addr = *(unsigned long *)(ebp + 4);
- print_addr_and_symbol(addr, log_lvl);
+ printed = print_addr_and_symbol(addr, log_lvl, printed);
ebp = *(unsigned long *)ebp;
}
#else
while (valid_stack_ptr(tinfo, stack)) {
addr = *stack++;
if (__kernel_text_address(addr))
- print_addr_and_symbol(addr, log_lvl);
+ printed = print_addr_and_symbol(addr, log_lvl, printed);
}
#endif
+ if (printed)
+ printk("\n");
+
return ebp;
}
stack = (unsigned long*)context->previous_esp;
if (!stack)
break;
- printk(log_lvl);
- printk(" =======================\n");
+ printk("%s =======================\n", log_lvl);
}
}
}
stack = esp;
- printk(log_lvl);
for(i = 0; i < kstack_depth_to_print; i++) {
if (kstack_end(stack))
break;
- if (i && ((i % 8) == 0)) {
- printk("\n");
- printk(log_lvl);
- printk(" ");
- }
+ if (i && ((i % 8) == 0))
+ printk("\n%s ", log_lvl);
printk("%08lx ", *stack++);
}
- printk("\n");
- printk(log_lvl);
- printk("Call Trace:\n");
+ printk("\n%sCall Trace:\n", log_lvl);
show_trace_log_lvl(task, esp, log_lvl);
}
void show_stack(struct task_struct *task, unsigned long *esp)
{
+ printk(" ");
show_stack_log_lvl(task, esp, "");
}
esp = (unsigned long) (®s->esp);
savesegment(ss, ss);
- if (user_mode(regs)) {
+ if (user_mode_vm(regs)) {
in_kernel = 0;
esp = regs->esp;
ss = regs->xss & 0xffff;
static int die_counter;
unsigned long flags;
+ oops_enter();
+
if (die.lock_owner != raw_smp_processor_id()) {
console_verbose();
spin_lock_irqsave(&die.lock, flags);
if (++die.lock_owner_depth < 3) {
int nl = 0;
+ unsigned long esp;
+ unsigned short ss;
+
handle_BUG(regs);
printk(KERN_EMERG "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter);
#ifdef CONFIG_PREEMPT
#endif
if (nl)
printk("\n");
- notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV);
- show_registers(regs);
+ if (notify_die(DIE_OOPS, str, regs, err,
+ current->thread.trap_no, SIGSEGV) !=
+ NOTIFY_STOP) {
+ show_registers(regs);
+ /* Executive summary in case the oops scrolled away */
+ esp = (unsigned long) (®s->esp);
+ savesegment(ss, ss);
+ if (user_mode(regs)) {
+ esp = regs->esp;
+ ss = regs->xss & 0xffff;
+ }
+ printk(KERN_EMERG "EIP: [<%08lx>] ", regs->eip);
+ print_symbol("%s", regs->eip);
+ printk(" SS:ESP %04x:%08lx\n", ss, esp);
+ }
+ else
+ regs = NULL;
} else
printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
die.lock_owner = -1;
spin_unlock_irqrestore(&die.lock, flags);
+ if (!regs)
+ return;
+
if (kexec_should_crash(current))
crash_kexec(regs);
ssleep(5);
panic("Fatal exception");
}
+ oops_exit();
do_exit(SIGSEGV);
}
void die_nmi (struct pt_regs *regs, const char *msg)
{
- if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 0, SIGINT) ==
+ if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) ==
NOTIFY_STOP)
return;
/* If we are in kernel we are probably nested up pretty bad
* and might aswell get out now while we still can.
*/
- if (!user_mode(regs)) {
+ if (!user_mode_vm(regs)) {
current->thread.trap_no = 2;
crash_kexec(regs);
}
reason = get_nmi_reason();
if (!(reason & 0xc0)) {
- if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 0, SIGINT)
+ if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
== NOTIFY_STOP)
return;
#ifdef CONFIG_X86_LOCAL_APIC
unknown_nmi_error(reason, regs);
return;
}
- if (notify_die(DIE_NMI, "nmi", regs, reason, 0, SIGINT) == NOTIFY_STOP)
+ if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
return;
if (reason & 0x80)
mem_parity_error(reason, regs);
void set_nmi_callback(nmi_callback_t callback)
{
+ vmalloc_sync_all();
rcu_assign_pointer(nmi_callback, callback);
}
EXPORT_SYMBOL_GPL(set_nmi_callback);
static int __init kstack_setup(char *s)
{
kstack_depth_to_print = simple_strtoul(s, NULL, 0);
- return 0;
+ return 1;
}
__setup("kstack=", kstack_setup);
/*call audit_syscall_exit since we do not exit via the normal paths */
if (unlikely(current->audit_context))
- audit_syscall_exit(current, AUDITSC_RESULT(eax), eax);
+ audit_syscall_exit(AUDITSC_RESULT(eax), eax);
__asm__ __volatile__(
"movl %0,%%esp\n\t"
#include <asm-generic/vmlinux.lds.h>
#include <asm/thread_info.h>
#include <asm/page.h>
+#include <asm/cache.h>
OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
OUTPUT_ARCH(i386)
__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) }
__stop___ex_table = .;
- . = ALIGN(16);
- __start_smp_alternatives_table = .;
- __smp_alternatives : AT(ADDR(__smp_alternatives) - LOAD_OFFSET) { *(__smp_alternatives) }
- __stop_smp_alternatives_table = .;
-
- __smp_replacements : AT(ADDR(__smp_replacements) - LOAD_OFFSET) { *(__smp_replacements) }
-
RODATA
/* writeable */
*(.data.init_task)
}
+ /* might get freed after init */
+ . = ALIGN(4096);
+ __smp_alt_begin = .;
+ __smp_alt_instructions = .;
+ .smp_altinstructions : AT(ADDR(.smp_altinstructions) - LOAD_OFFSET) {
+ *(.smp_altinstructions)
+ }
+ __smp_alt_instructions_end = .;
+ . = ALIGN(4);
+ __smp_locks = .;
+ .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
+ *(.smp_locks)
+ }
+ __smp_locks_end = .;
+ .smp_altinstr_replacement : AT(ADDR(.smp_altinstr_replacement) - LOAD_OFFSET) {
+ *(.smp_altinstr_replacement)
+ }
+ . = ALIGN(4096);
+ __smp_alt_end = .;
+
/* will be freed after init */
. = ALIGN(4096); /* Init code and data */
__init_begin = .;
__initramfs_start = .;
.init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) }
__initramfs_end = .;
- . = ALIGN(32);
+ . = ALIGN(L1_CACHE_BYTES);
__per_cpu_start = .;
.data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) }
__per_cpu_end = .;
page = read_cr3();
page = ((unsigned long *) __va(page))[address >> 22];
- printk(KERN_ALERT "*pde = ma %08lx pa %08lx\n", page,
- machine_to_phys(page));
+ if (oops_may_print())
+ printk(KERN_ALERT "*pde = ma %08lx pa %08lx\n", page,
+ machine_to_phys(page));
/*
* We must not directly access the pte in the highpte
* case, the page table might be allocated in highmem.
* it's allocated already.
*/
#ifndef CONFIG_HIGHPTE
- if (page & 1) {
+ if ((page & 1) && oops_may_print()) {
page &= PAGE_MASK;
address &= 0x003ff000;
page = machine_to_phys(page);
return 1;
}
+static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
+{
+ unsigned index = pgd_index(address);
+ pgd_t *pgd_k;
+ pud_t *pud, *pud_k;
+ pmd_t *pmd, *pmd_k;
+
+ pgd += index;
+ pgd_k = init_mm.pgd + index;
+
+ if (!pgd_present(*pgd_k))
+ return NULL;
+
+ /*
+ * set_pgd(pgd, *pgd_k); here would be useless on PAE
+ * and redundant with the set_pmd() on non-PAE. As would
+ * set_pud.
+ */
+
+ pud = pud_offset(pgd, address);
+ pud_k = pud_offset(pgd_k, address);
+ if (!pud_present(*pud_k))
+ return NULL;
+
+ pmd = pmd_offset(pud, address);
+ pmd_k = pmd_offset(pud_k, address);
+ if (!pmd_present(*pmd_k))
+ return NULL;
+ if (!pmd_present(*pmd))
+#ifndef CONFIG_XEN
+ set_pmd(pmd, *pmd_k);
+#else
+ /*
+ * When running on Xen we must launder *pmd_k through
+ * pmd_val() to ensure that _PAGE_PRESENT is correctly set.
+ */
+ set_pmd(pmd, __pmd(pmd_val(*pmd_k)));
+#endif
+ else
+ BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
+ return pmd_k;
+}
+
+/*
+ * Handle a fault on the vmalloc or module mapping area
+ *
+ * This assumes no large pages in there.
+ */
+static inline int vmalloc_fault(unsigned long address)
+{
+ unsigned long pgd_paddr;
+ pmd_t *pmd_k;
+ pte_t *pte_k;
+ /*
+ * Synchronize this task's top level page-table
+ * with the 'reference' page table.
+ *
+ * Do _not_ use "current" here. We might be inside
+ * an interrupt in the middle of a task switch..
+ */
+ pgd_paddr = read_cr3();
+ pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
+ if (!pmd_k)
+ return -1;
+ pte_k = pte_offset_kernel(pmd_k, address);
+ if (!pte_present(*pte_k))
+ return -1;
+ return 0;
+}
+
/*
* This routine handles page faults. It determines the address,
* and the problem, and then passes it off to one of the appropriate
* bit 0 == 0 means no page found, 1 means protection fault
* bit 1 == 0 means read, 1 means write
* bit 2 == 0 means kernel, 1 means user-mode
+ * bit 3 == 1 means use of reserved bit detected
+ * bit 4 == 1 means fault was an instruction fetch
*/
fastcall void __kprobes do_page_fault(struct pt_regs *regs,
unsigned long error_code)
if (regs->eflags & X86_EFLAGS_VM)
error_code |= 4;
- if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
- SIGSEGV) == NOTIFY_STOP)
- return;
- /* It's safe to allow irq's after cr2 has been saved */
- if (regs->eflags & (X86_EFLAGS_IF|VM_MASK))
- local_irq_enable();
-
tsk = current;
si_code = SEGV_MAPERR;
*
* This verifies that the fault happens in kernel space
* (error_code & 4) == 0, and that the fault was not a
- * protection error (error_code & 1) == 0.
+ * protection error (error_code & 9) == 0.
*/
- if (unlikely(address >= TASK_SIZE)) {
+ if (unlikely(address >= TASK_SIZE)) {
#ifdef CONFIG_XEN
/* Faults in hypervisor area can never be patched up. */
if (address >= hypervisor_virt_start)
goto bad_area_nosemaphore;
#endif
- if (!(error_code & 5))
- goto vmalloc_fault;
+ if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0)
+ return;
/* Can take a spurious fault if mapping changes R/O -> R/W. */
if (spurious_fault(regs, address, error_code))
return;
+ if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
+ SIGSEGV) == NOTIFY_STOP)
+ return;
/*
* Don't take the mm semaphore here. If we fixup a prefetch
* fault we could otherwise deadlock.
*/
goto bad_area_nosemaphore;
- }
+ }
+
+ if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
+ SIGSEGV) == NOTIFY_STOP)
+ return;
+
+ /* It's safe to allow irq's after cr2 has been saved and the vmalloc
+ fault has been handled. */
+ if (regs->eflags & (X86_EFLAGS_IF|VM_MASK))
+ local_irq_enable();
mm = tsk->mm;
bust_spinlocks(1);
-#ifdef CONFIG_X86_PAE
- if (error_code & 16) {
- pte_t *pte = lookup_address(address);
+ if (oops_may_print()) {
+ #ifdef CONFIG_X86_PAE
+ if (error_code & 16) {
+ pte_t *pte = lookup_address(address);
- if (pte && pte_present(*pte) && !pte_exec_kernel(*pte))
- printk(KERN_CRIT "kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n", current->uid);
+ if (pte && pte_present(*pte) && !pte_exec_kernel(*pte))
+ printk(KERN_CRIT "kernel tried to execute "
+ "NX-protected page - exploit attempt? "
+ "(uid: %d)\n", current->uid);
+ }
+ #endif
+ if (address < PAGE_SIZE)
+ printk(KERN_ALERT "BUG: unable to handle kernel NULL "
+ "pointer dereference");
+ else
+ printk(KERN_ALERT "BUG: unable to handle kernel paging"
+ " request");
+ printk(" at virtual address %08lx\n",address);
+ printk(KERN_ALERT " printing eip:\n");
+ printk("%08lx\n", regs->eip);
}
-#endif
- if (address < PAGE_SIZE)
- printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
- else
- printk(KERN_ALERT "Unable to handle kernel paging request");
- printk(" at virtual address %08lx\n",address);
- printk(KERN_ALERT " printing eip:\n");
- printk("%08lx\n", regs->eip);
dump_fault_path(address);
tsk->thread.cr2 = address;
tsk->thread.trap_no = 14;
tsk->thread.error_code = error_code;
tsk->thread.trap_no = 14;
force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
- return;
-
-vmalloc_fault:
- {
- /*
- * Synchronize this task's top level page-table
- * with the 'reference' page table.
- *
- * Do _not_ use "tsk" here. We might be inside
- * an interrupt in the middle of a task switch..
- */
- int index = pgd_index(address);
- unsigned long pgd_paddr;
- pgd_t *pgd, *pgd_k;
- pud_t *pud, *pud_k;
- pmd_t *pmd, *pmd_k;
- pte_t *pte_k;
-
- pgd_paddr = read_cr3();
- pgd = index + (pgd_t *)__va(pgd_paddr);
- pgd_k = init_mm.pgd + index;
-
- if (!pgd_present(*pgd_k))
- goto no_context;
-
- /*
- * set_pgd(pgd, *pgd_k); here would be useless on PAE
- * and redundant with the set_pmd() on non-PAE. As would
- * set_pud.
- */
+}
- pud = pud_offset(pgd, address);
- pud_k = pud_offset(pgd_k, address);
- if (!pud_present(*pud_k))
- goto no_context;
-
- pmd = pmd_offset(pud, address);
- pmd_k = pmd_offset(pud_k, address);
- if (!pmd_present(*pmd_k))
- goto no_context;
-#ifndef CONFIG_XEN
- set_pmd(pmd, *pmd_k);
-#else
- /*
- * When running on Xen we must launder *pmd_k through
- * pmd_val() to ensure that _PAGE_PRESENT is correctly set.
- */
- set_pmd(pmd, __pmd(pmd_val(*pmd_k)));
-#endif
+#ifndef CONFIG_X86_PAE
+void vmalloc_sync_all(void)
+{
+ /*
+ * Note that races in the updates of insync and start aren't
+ * problematic: insync can only get set bits added, and updates to
+ * start are only improving performance (without affecting correctness
+ * if undone).
+ */
+ static DECLARE_BITMAP(insync, PTRS_PER_PGD);
+ static unsigned long start = TASK_SIZE;
+ unsigned long address;
- pte_k = pte_offset_kernel(pmd_k, address);
- if (!pte_present(*pte_k))
- goto no_context;
- return;
+ BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK);
+ for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) {
+ if (!test_bit(pgd_index(address), insync)) {
+ unsigned long flags;
+ struct page *page;
+
+ spin_lock_irqsave(&pgd_lock, flags);
+ for (page = pgd_list; page; page =
+ (struct page *)page->index)
+ if (!vmalloc_sync_one(page_address(page),
+ address)) {
+ BUG_ON(page != pgd_list);
+ break;
+ }
+ spin_unlock_irqrestore(&pgd_lock, flags);
+ if (!page)
+ set_bit(pgd_index(address), insync);
+ }
+ if (address == start && test_bit(pgd_index(address), insync))
+ start = address + PGDIR_SIZE;
}
}
+#endif
static void __meminit free_new_highpage(struct page *page, int pfn)
{
- set_page_count(page, 1);
+ init_page_count(page);
if (pfn < xen_start_info->nr_pages)
__free_page(page);
totalhigh_pages++;
/* XEN: init and count low-mem pages outside initial allocation. */
for (pfn = xen_start_info->nr_pages; pfn < max_low_pfn; pfn++) {
ClearPageReserved(pfn_to_page(pfn));
- set_page_count(pfn_to_page(pfn), 1);
+ init_page_count(pfn_to_page(pfn));
totalram_pages++;
}
* Specifically, in the case of x86, we will always add
* memory to the highmem for now.
*/
+#ifdef CONFIG_MEMORY_HOTPLUG
#ifndef CONFIG_NEED_MULTIPLE_NODES
int add_memory(u64 start, u64 size)
{
return -EINVAL;
}
#endif
+#endif
kmem_cache_t *pgd_cache;
kmem_cache_t *pmd_cache;
return flag;
}
-void free_initmem(void)
-{
- unsigned long addr;
-
- addr = (unsigned long)(&__init_begin);
- for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(addr));
- set_page_count(virt_to_page(addr), 1);
- memset((void *)addr, 0xcc, PAGE_SIZE);
- free_page(addr);
- totalram_pages++;
- }
- printk (KERN_INFO "Freeing unused kernel memory: %dk freed\n", (__init_end - __init_begin) >> 10);
-}
-
#ifdef CONFIG_DEBUG_RODATA
extern char __start_rodata, __end_rodata;
}
#endif
+void free_init_pages(char *what, unsigned long begin, unsigned long end)
+{
+ unsigned long addr;
+
+ for (addr = begin; addr < end; addr += PAGE_SIZE) {
+ ClearPageReserved(virt_to_page(addr));
+ init_page_count(virt_to_page(addr));
+ memset((void *)addr, 0xcc, PAGE_SIZE);
+ free_page(addr);
+ totalram_pages++;
+ }
+ printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
+}
+
+void free_initmem(void)
+{
+ free_init_pages("unused kernel memory",
+ (unsigned long)(&__init_begin),
+ (unsigned long)(&__init_end));
+}
#ifdef CONFIG_BLK_DEV_INITRD
void free_initrd_mem(unsigned long start, unsigned long end)
{
- if (start < end)
- printk (KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
- for (; start < end; start += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(start));
- set_page_count(virt_to_page(start), 1);
- free_page(start);
- totalram_pages++;
- }
+ free_init_pages("initrd memory", start, end);
}
#endif
+
printk(KERN_INFO "Mem-info:\n");
show_free_areas();
printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
- for_each_pgdat(pgdat) {
+ for_each_online_pgdat(pgdat) {
pgdat_resize_lock(pgdat, &flags);
for (i = 0; i < pgdat->node_spanned_pages; ++i) {
page = pgdat_page_nr(pgdat, i);
pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
if (pte) {
SetPageForeign(pte, pte_free);
- set_page_count(pte, 1);
+ init_page_count(pte);
}
#endif
return pte;
va, pfn_pte(page_to_pfn(pte), PAGE_KERNEL), 0));
ClearPageForeign(pte);
- set_page_count(pte, 1);
+ init_page_count(pte);
__free_page(pte);
}
printk(KERN_INFO "Mem-info:\n");
show_free_areas();
printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
- for_each_pgdat(pgdat) {
+ for_each_online_pgdat(pgdat) {
pgdat_resize_lock(pgdat, &flags);
for (i = 0; i < pgdat->node_spanned_pages; ++i) {
page = pgdat_page_nr(pgdat, i);
-obj-y := i386.o
+obj-y := i386.o init.o
obj-$(CONFIG_PCI_BIOS) += pcbios.o
obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o direct.o
*/
static int pirq_via586_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
{
- static unsigned int pirqmap[5] = { 3, 2, 5, 1, 1 };
+ static unsigned int pirqmap[4] = { 3, 2, 5, 1 };
return read_config_nybble(router, 0x55, pirqmap[pirq-1]);
}
static int pirq_via586_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
{
- static unsigned int pirqmap[5] = { 3, 2, 5, 1, 1 };
+ static unsigned int pirqmap[4] = { 3, 2, 5, 1 };
write_config_nybble(router, 0x55, pirqmap[pirq-1], irq);
return 1;
}
case PCI_DEVICE_ID_VIA_82C596:
case PCI_DEVICE_ID_VIA_82C686:
case PCI_DEVICE_ID_VIA_8231:
+ case PCI_DEVICE_ID_VIA_8233A:
case PCI_DEVICE_ID_VIA_8235:
+ case PCI_DEVICE_ID_VIA_8237:
/* FIXME: add new ones for 8233/5 */
r->name = "VIA";
r->get = pirq_via_get;
bool
default y
+config GENERIC_FIND_NEXT_BIT
+ bool
+ default y
+
config GENERIC_CALIBRATE_DELAY
bool
default y
bool
default y
+config DMI
+ bool
+ default y
+
config EFI
bool
default y
can be controlled through /sys/devices/system/cpu/cpu#.
Say N if you want to disable CPU hotplug.
-config ARCH_ENABLE_MEMORY_HOTPLUG
- def_bool y
-
config SCHED_SMT
bool "SMT scheduler support"
depends on SMP
Intel IA64 chips with MultiThreading at a cost of slightly increased
overhead in some places. If unsure say N here.
+config PERMIT_BSP_REMOVE
+ bool "Support removal of Bootstrap Processor"
+ depends on HOTPLUG_CPU
+ default n
+ ---help---
+ Say Y here if your platform SAL will support removal of BSP with HOTPLUG_CPU
+ support.
+
+config FORCE_CPEI_RETARGET
+ bool "Force assumption that CPEI can be re-targetted"
+ depends on PERMIT_BSP_REMOVE
+ default n
+ ---help---
+ Say Y if you need to force the assumption that CPEI can be re-targetted to
+ any cpu in the system. This hint is available via ACPI 3.0 specifications.
+ Tiger4 systems are capable of re-directing CPEI to any CPU other than BSP.
+ This option it useful to enable this feature on older BIOS's as well.
+ You can also enable this by using boot command line option force_cpei=1.
+
config PREEMPT
bool "Preemptible Kernel"
help
Access). This option is for configuring high-end multiprocessor
server systems. If in doubt, say N.
+config NODES_SHIFT
+ int "Max num nodes shift(3-10)"
+ range 3 10
+ default "8"
+ depends on NEED_MULTIPLE_NODES
+ help
+ This option specifies the maximum number of nodes in your SSI system.
+ MAX_NUMNODES will be 2^(This value).
+ If in doubt, use the default.
+
# VIRTUAL_MEM_MAP and FLAT_NODE_MEM_MAP are functionally equivalent.
# VIRTUAL_MEM_MAP has been retained for historical reasons.
config VIRTUAL_MEM_MAP
config SGI_SN
def_bool y if (IA64_SGI_SN2 || IA64_GENERIC)
+source "drivers/sn/Kconfig"
+
source "drivers/firmware/Kconfig"
source "fs/Kconfig.binfmt"
#
# ia64/Makefile
#
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies.
+#
# This file is subject to the terms and conditions of the GNU General Public
# License. See the file "COPYING" in the main directory of this archive
# for more details.
boot := arch/ia64/hp/sim/boot
-.PHONY: boot compressed check
+PHONY += boot compressed check
all: compressed unwcheck
xen_start_info->console.domU.evtchn = 0;
#endif
}
-
-void __init
-dig_irq_init (void)
-{
-}
st8 [r2]=r8
st8 [r3]=r10
.work_pending:
- tbit.nz p6,p0=r31,TIF_SIGDELAYED // signal delayed from MCA/INIT/NMI/PMI context?
-(p6) br.cond.sptk.few .sigdelayed
- ;;
tbit.z p6,p0=r31,TIF_NEED_RESCHED // current_thread_info()->need_resched==0?
(p6) br.cond.sptk.few .notify
#ifdef CONFIG_PREEMPT
(pLvSys)br.cond.sptk.few .work_pending_syscall_end
br.cond.sptk.many .work_processed_kernel // don't re-check
-// There is a delayed signal that was detected in MCA/INIT/NMI/PMI context where
-// it could not be delivered. Deliver it now. The signal might be for us and
-// may set TIF_SIGPENDING, so redrive ia64_leave_* after processing the delayed
-// signal.
-
-.sigdelayed:
- br.call.sptk.many rp=do_sigdelayed
- cmp.eq p6,p0=r0,r0 // p6 <- 1, always re-check
-(pLvSys)br.cond.sptk.few .work_pending_syscall_end
- br.cond.sptk.many .work_processed_kernel // re-check
-
.work_pending_syscall_end:
adds r2=PT(R8)+16,r12
adds r3=PT(R10)+16,r12
data8 sys_ni_syscall // reserved for pselect
data8 sys_ni_syscall // 1295 reserved for ppoll
data8 sys_unshare
+ data8 sys_splice
+ data8 sys_set_robust_list
+ data8 sys_get_robust_list
+ data8 sys_sync_file_range // 1300
+ data8 sys_tee
+ data8 sys_vmsplice
.org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
*(.dynbss)
*(.bss .bss.* .gnu.linkonce.b.*)
*(__ex_table)
+ *(__mca_table)
}
}
* Copyright (C) 1999 VA Linux Systems
* Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com>
*
- * 00/04/19 D. Mosberger Rewritten to mirror more closely the x86 I/O APIC code.
- * In particular, we now have separate handlers for edge
- * and level triggered interrupts.
- * 00/10/27 Asit Mallick, Goutham Rao <goutham.rao@intel.com> IRQ vector allocation
- * PCI to vector mapping, shared PCI interrupts.
- * 00/10/27 D. Mosberger Document things a bit more to make them more understandable.
- * Clean up much of the old IOSAPIC cruft.
- * 01/07/27 J.I. Lee PCI irq routing, Platform/Legacy interrupts and fixes for
- * ACPI S5(SoftOff) support.
+ * 00/04/19 D. Mosberger Rewritten to mirror more closely the x86 I/O
+ * APIC code. In particular, we now have separate
+ * handlers for edge and level triggered
+ * interrupts.
+ * 00/10/27 Asit Mallick, Goutham Rao <goutham.rao@intel.com> IRQ vector
+ * allocation PCI to vector mapping, shared PCI
+ * interrupts.
+ * 00/10/27 D. Mosberger Document things a bit more to make them more
+ * understandable. Clean up much of the old
+ * IOSAPIC cruft.
+ * 01/07/27 J.I. Lee PCI irq routing, Platform/Legacy interrupts
+ * and fixes for ACPI S5(SoftOff) support.
* 02/01/23 J.I. Lee iosapic pgm fixes for PCI irq routing from _PRT
- * 02/01/07 E. Focht <efocht@ess.nec.de> Redirectable interrupt vectors in
- * iosapic_set_affinity(), initializations for
- * /proc/irq/#/smp_affinity
+ * 02/01/07 E. Focht <efocht@ess.nec.de> Redirectable interrupt
+ * vectors in iosapic_set_affinity(),
+ * initializations for /proc/irq/#/smp_affinity
* 02/04/02 P. Diefenbaugh Cleaned up ACPI PCI IRQ routing.
* 02/04/18 J.I. Lee bug fix in iosapic_init_pci_irq
- * 02/04/30 J.I. Lee bug fix in find_iosapic to fix ACPI PCI IRQ to IOSAPIC mapping
- * error
+ * 02/04/30 J.I. Lee bug fix in find_iosapic to fix ACPI PCI IRQ to
+ * IOSAPIC mapping error
* 02/07/29 T. Kochi Allocate interrupt vectors dynamically
- * 02/08/04 T. Kochi Cleaned up terminology (irq, global system interrupt, vector, etc.)
- * 02/09/20 D. Mosberger Simplified by taking advantage of ACPI's pci_irq code.
+ * 02/08/04 T. Kochi Cleaned up terminology (irq, global system
+ * interrupt, vector, etc.)
+ * 02/09/20 D. Mosberger Simplified by taking advantage of ACPI's
+ * pci_irq code.
* 03/02/19 B. Helgaas Make pcat_compat system-wide, not per-IOSAPIC.
- * Remove iosapic_address & gsi_base from external interfaces.
- * Rationalize __init/__devinit attributes.
+ * Remove iosapic_address & gsi_base from
+ * external interfaces. Rationalize
+ * __init/__devinit attributes.
* 04/12/04 Ashok Raj <ashok.raj@intel.com> Intel Corporation 2004
- * Updated to work with irq migration necessary for CPU Hotplug
+ * Updated to work with irq migration necessary
+ * for CPU Hotplug
*/
/*
- * Here is what the interrupt logic between a PCI device and the kernel looks like:
+ * Here is what the interrupt logic between a PCI device and the kernel looks
+ * like:
*
- * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC, INTD). The
- * device is uniquely identified by its bus--, and slot-number (the function
- * number does not matter here because all functions share the same interrupt
- * lines).
+ * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC,
+ * INTD). The device is uniquely identified by its bus-, and slot-number
+ * (the function number does not matter here because all functions share
+ * the same interrupt lines).
*
- * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC controller.
- * Multiple interrupt lines may have to share the same IOSAPIC pin (if they're level
- * triggered and use the same polarity). Each interrupt line has a unique Global
- * System Interrupt (GSI) number which can be calculated as the sum of the controller's
- * base GSI number and the IOSAPIC pin number to which the line connects.
+ * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC
+ * controller. Multiple interrupt lines may have to share the same
+ * IOSAPIC pin (if they're level triggered and use the same polarity).
+ * Each interrupt line has a unique Global System Interrupt (GSI) number
+ * which can be calculated as the sum of the controller's base GSI number
+ * and the IOSAPIC pin number to which the line connects.
*
- * (3) The IOSAPIC uses an internal routing table entries (RTEs) to map the IOSAPIC pin
- * into the IA-64 interrupt vector. This interrupt vector is then sent to the CPU.
+ * (3) The IOSAPIC uses an internal routing table entries (RTEs) to map the
+ * IOSAPIC pin into the IA-64 interrupt vector. This interrupt vector is then
+ * sent to the CPU.
*
- * (4) The kernel recognizes an interrupt as an IRQ. The IRQ interface is used as
- * architecture-independent interrupt handling mechanism in Linux. As an
- * IRQ is a number, we have to have IA-64 interrupt vector number <-> IRQ number
- * mapping. On smaller systems, we use one-to-one mapping between IA-64 vector and
- * IRQ. A platform can implement platform_irq_to_vector(irq) and
+ * (4) The kernel recognizes an interrupt as an IRQ. The IRQ interface is
+ * used as architecture-independent interrupt handling mechanism in Linux.
+ * As an IRQ is a number, we have to have
+ * IA-64 interrupt vector number <-> IRQ number mapping. On smaller
+ * systems, we use one-to-one mapping between IA-64 vector and IRQ. A
+ * platform can implement platform_irq_to_vector(irq) and
* platform_local_vector_to_irq(vector) APIs to differentiate the mapping.
* Please see also include/asm-ia64/hw_irq.h for those APIs.
*
*
* PCI pin -> global system interrupt (GSI) -> IA-64 vector <-> IRQ
*
- * Note: The term "IRQ" is loosely used everywhere in Linux kernel to describe interrupts.
- * Now we use "IRQ" only for Linux IRQ's. ISA IRQ (isa_irq) is the only exception in this
- * source code.
+ * Note: The term "IRQ" is loosely used everywhere in Linux kernel to
+ * describeinterrupts. Now we use "IRQ" only for Linux IRQ's. ISA IRQ
+ * (isa_irq) is the only exception in this source code.
*/
#include <linux/config.h>
#include <asm/ptrace.h>
#include <asm/system.h>
-
#undef DEBUG_INTERRUPT_ROUTING
#ifdef DEBUG_INTERRUPT_ROUTING
#define DBG(fmt...)
#endif
-#define NR_PREALLOCATE_RTE_ENTRIES (PAGE_SIZE / sizeof(struct iosapic_rte_info))
+#define NR_PREALLOCATE_RTE_ENTRIES \
+ (PAGE_SIZE / sizeof(struct iosapic_rte_info))
#define RTE_PREALLOCATED (1)
static DEFINE_SPINLOCK(iosapic_lock);
-/* These tables map IA-64 vectors to the IOSAPIC pin that generates this vector. */
+/*
+ * These tables map IA-64 vectors to the IOSAPIC pin that generates this
+ * vector.
+ */
struct iosapic_rte_info {
- struct list_head rte_list; /* node in list of RTEs sharing the same vector */
+ struct list_head rte_list; /* node in list of RTEs sharing the
+ * same vector */
char __iomem *addr; /* base address of IOSAPIC */
- unsigned int gsi_base; /* first GSI assigned to this IOSAPIC */
+ unsigned int gsi_base; /* first GSI assigned to this
+ * IOSAPIC */
char rte_index; /* IOSAPIC RTE index */
int refcnt; /* reference counter */
unsigned int flags; /* flags */
} ____cacheline_aligned;
static struct iosapic_intr_info {
- struct list_head rtes; /* RTEs using this vector (empty => not an IOSAPIC interrupt) */
+ struct list_head rtes; /* RTEs using this vector (empty =>
+ * not an IOSAPIC interrupt) */
int count; /* # of RTEs that shares this vector */
- u32 low32; /* current value of low word of Redirection table entry */
+ u32 low32; /* current value of low word of
+ * Redirection table entry */
unsigned int dest; /* destination CPU physical ID */
unsigned char dmode : 3; /* delivery mode (see iosapic.h) */
- unsigned char polarity: 1; /* interrupt polarity (see iosapic.h) */
+ unsigned char polarity: 1; /* interrupt polarity
+ * (see iosapic.h) */
unsigned char trigger : 1; /* trigger mode (see iosapic.h) */
} iosapic_intr_info[IA64_NUM_VECTORS];
static struct iosapic {
char __iomem *addr; /* base address of IOSAPIC */
- unsigned int gsi_base; /* first GSI assigned to this IOSAPIC */
- unsigned short num_rte; /* number of RTE in this IOSAPIC */
+ unsigned int gsi_base; /* first GSI assigned to this
+ * IOSAPIC */
+ unsigned short num_rte; /* # of RTEs on this IOSAPIC */
int rtes_inuse; /* # of RTEs in use on this IOSAPIC */
#ifdef CONFIG_NUMA
unsigned short node; /* numa node association via pxm */
int i;
for (i = 0; i < NR_IOSAPICS; i++) {
- if ((unsigned) (gsi - iosapic_lists[i].gsi_base) < iosapic_lists[i].num_rte)
+ if ((unsigned) (gsi - iosapic_lists[i].gsi_base) <
+ iosapic_lists[i].num_rte)
return i;
}
struct iosapic_intr_info *info;
struct iosapic_rte_info *rte;
- for (info = iosapic_intr_info; info < iosapic_intr_info + IA64_NUM_VECTORS; ++info)
+ for (info = iosapic_intr_info; info <
+ iosapic_intr_info + IA64_NUM_VECTORS; ++info)
list_for_each_entry(rte, &info->rtes, rte_list)
if (rte->gsi_base + rte->rte_index == gsi)
return info - iosapic_intr_info;
unsigned long flags;
int irq;
/*
- * XXX fix me: this assumes an identity mapping vetween IA-64 vector and Linux irq
- * numbers...
+ * XXX fix me: this assumes an identity mapping between IA-64 vector
+ * and Linux irq numbers...
*/
spin_lock_irqsave(&iosapic_lock, flags);
{
return irq;
}
-static struct iosapic_rte_info *gsi_vector_to_rte(unsigned int gsi, unsigned int vec)
+static struct iosapic_rte_info *gsi_vector_to_rte(unsigned int gsi,
+ unsigned int vec)
{
struct iosapic_rte_info *rte;
for (irq = 0; irq < NR_IRQS; ++irq)
if (irq_to_vector(irq) == vector) {
- set_irq_affinity_info(irq, (int)(dest & 0xffff), redir);
+ set_irq_affinity_info(irq,
+ (int)(dest & 0xffff),
+ redir);
break;
}
}
}
static void
-nop (unsigned int vector)
+nop (unsigned int irq)
{
/* do nothing... */
}
{
/* set only the mask bit */
low32 = iosapic_intr_info[vec].low32 |= IOSAPIC_MASK;
- list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) {
+ list_for_each_entry(rte, &iosapic_intr_info[vec].rtes,
+ rte_list) {
addr = rte->addr;
rte_index = rte->rte_index;
iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
spin_lock_irqsave(&iosapic_lock, flags);
{
low32 = iosapic_intr_info[vec].low32 &= ~IOSAPIC_MASK;
- list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) {
+ list_for_each_entry(rte, &iosapic_intr_info[vec].rtes,
+ rte_list) {
addr = rte->addr;
rte_index = rte->rte_index;
iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
spin_lock_irqsave(&iosapic_lock, flags);
{
- low32 = iosapic_intr_info[vec].low32 & ~(7 << IOSAPIC_DELIVERY_SHIFT);
+ low32 = iosapic_intr_info[vec].low32 &
+ ~(7 << IOSAPIC_DELIVERY_SHIFT);
if (redir)
/* change delivery mode to lowest priority */
- low32 |= (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT);
+ low32 |= (IOSAPIC_LOWEST_PRIORITY <<
+ IOSAPIC_DELIVERY_SHIFT);
else
/* change delivery mode to fixed */
low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT);
iosapic_intr_info[vec].low32 = low32;
iosapic_intr_info[vec].dest = dest;
- list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) {
+ list_for_each_entry(rte, &iosapic_intr_info[vec].rtes,
+ rte_list) {
addr = rte->addr;
rte_index = rte->rte_index;
- iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32);
+ iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index),
+ high32);
iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
}
}
ia64_vector vec = irq_to_vector(irq);
struct iosapic_rte_info *rte;
- move_irq(irq);
+ move_native_irq(irq);
list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list)
iosapic_eoi(rte->addr, vec);
}
{
irq_desc_t *idesc = irq_descp(irq);
- move_irq(irq);
+ move_native_irq(irq);
/*
* Once we have recorded IRQ_PENDING already, we can mask the
* interrupt for real. This prevents IRQ storms from unhandled
* devices.
*/
- if ((idesc->status & (IRQ_PENDING|IRQ_DISABLED)) == (IRQ_PENDING|IRQ_DISABLED))
+ if ((idesc->status & (IRQ_PENDING|IRQ_DISABLED)) ==
+ (IRQ_PENDING|IRQ_DISABLED))
mask_irq(irq);
}
return iosapic_read(addr, IOSAPIC_VERSION);
}
-static int iosapic_find_sharable_vector (unsigned long trigger, unsigned long pol)
+static int iosapic_find_sharable_vector (unsigned long trigger,
+ unsigned long pol)
{
int i, vector = -1, min_count = -1;
struct iosapic_intr_info *info;
for (i = IA64_FIRST_DEVICE_VECTOR; i <= IA64_LAST_DEVICE_VECTOR; i++) {
info = &iosapic_intr_info[i];
if (info->trigger == trigger && info->polarity == pol &&
- (info->dmode == IOSAPIC_FIXED || info->dmode == IOSAPIC_LOWEST_PRIORITY)) {
+ (info->dmode == IOSAPIC_FIXED || info->dmode ==
+ IOSAPIC_LOWEST_PRIORITY)) {
if (min_count == -1 || info->count < min_count) {
vector = i;
min_count = info->count;
new_vector = assign_irq_vector(AUTO_ASSIGN);
if (new_vector < 0)
panic("%s: out of interrupt vectors!\n", __FUNCTION__);
- printk(KERN_INFO "Reassigning vector %d to %d\n", vector, new_vector);
+ printk(KERN_INFO "Reassigning vector %d to %d\n",
+ vector, new_vector);
memcpy(&iosapic_intr_info[new_vector], &iosapic_intr_info[vector],
sizeof(struct iosapic_intr_info));
INIT_LIST_HEAD(&iosapic_intr_info[new_vector].rtes);
- list_move(iosapic_intr_info[vector].rtes.next, &iosapic_intr_info[new_vector].rtes);
- memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info));
+ list_move(iosapic_intr_info[vector].rtes.next,
+ &iosapic_intr_info[new_vector].rtes);
+ memset(&iosapic_intr_info[vector], 0,
+ sizeof(struct iosapic_intr_info));
iosapic_intr_info[vector].low32 = IOSAPIC_MASK;
INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
}
int preallocated = 0;
if (!iosapic_kmalloc_ok && list_empty(&free_rte_list)) {
- rte = alloc_bootmem(sizeof(struct iosapic_rte_info) * NR_PREALLOCATE_RTE_ENTRIES);
+ rte = alloc_bootmem(sizeof(struct iosapic_rte_info) *
+ NR_PREALLOCATE_RTE_ENTRIES);
if (!rte)
return NULL;
for (i = 0; i < NR_PREALLOCATE_RTE_ENTRIES; i++, rte++)
}
if (!list_empty(&free_rte_list)) {
- rte = list_entry(free_rte_list.next, struct iosapic_rte_info, rte_list);
+ rte = list_entry(free_rte_list.next, struct iosapic_rte_info,
+ rte_list);
list_del(&rte->rte_list);
preallocated++;
} else {
index = find_iosapic(gsi);
if (index < 0) {
- printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n", __FUNCTION__, gsi);
+ printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n",
+ __FUNCTION__, gsi);
return -ENODEV;
}
if (!rte) {
rte = iosapic_alloc_rte();
if (!rte) {
- printk(KERN_WARNING "%s: cannot allocate memory\n", __FUNCTION__);
+ printk(KERN_WARNING "%s: cannot allocate memory\n",
+ __FUNCTION__);
return -ENOMEM;
}
else if (vector_is_shared(vector)) {
struct iosapic_intr_info *info = &iosapic_intr_info[vector];
if (info->trigger != trigger || info->polarity != polarity) {
- printk (KERN_WARNING "%s: cannot override the interrupt\n", __FUNCTION__);
+ printk (KERN_WARNING
+ "%s: cannot override the interrupt\n",
+ __FUNCTION__);
return -EINVAL;
}
}
idesc = irq_descp(vector);
if (idesc->handler != irq_type) {
if (idesc->handler != &no_irq_type)
- printk(KERN_WARNING "%s: changing vector %d from %s to %s\n",
- __FUNCTION__, vector, idesc->handler->typename, irq_type->typename);
+ printk(KERN_WARNING
+ "%s: changing vector %d from %s to %s\n",
+ __FUNCTION__, vector,
+ idesc->handler->typename, irq_type->typename);
idesc->handler = irq_type;
}
return 0;
{
#ifdef CONFIG_SMP
static int cpu = -1;
+ extern int cpe_vector;
/*
* In case of vector shared by multiple RTEs, all RTEs that
if (!cpu_online(smp_processor_id()))
return cpu_physical_id(smp_processor_id());
+#ifdef CONFIG_ACPI
+ if (cpe_vector > 0 && vector == IA64_CPEP_VECTOR)
+ return get_cpei_target_cpu();
+#endif
+
#ifdef CONFIG_NUMA
{
int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0;
if (!num_cpus)
goto skip_numa_setup;
- /* Use vector assigment to distribute across cpus in node */
+ /* Use vector assignment to distribute across cpus in node */
cpu_index = vector % num_cpus;
for (numa_cpu = first_cpu(cpu_mask) ; i < cpu_index ; i++)
} while (!cpu_online(cpu));
return cpu_physical_id(cpu);
-#else
+#else /* CONFIG_SMP */
return cpu_physical_id(smp_processor_id());
#endif
}
if (list_empty(&iosapic_intr_info[vector].rtes))
free_irq_vector(vector);
spin_unlock(&iosapic_lock);
- spin_unlock_irqrestore(&irq_descp(vector)->lock, flags);
+ spin_unlock_irqrestore(&irq_descp(vector)->lock,
+ flags);
goto again;
}
polarity, trigger);
if (err < 0) {
spin_unlock(&iosapic_lock);
- spin_unlock_irqrestore(&irq_descp(vector)->lock, flags);
+ spin_unlock_irqrestore(&irq_descp(vector)->lock,
+ flags);
return err;
}
*/
irq = gsi_to_irq(gsi);
if (irq < 0) {
- printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", gsi);
+ printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n",
+ gsi);
WARN_ON(1);
return;
}
spin_lock(&iosapic_lock);
{
if ((rte = gsi_vector_to_rte(gsi, vector)) == NULL) {
- printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", gsi);
+ printk(KERN_ERR
+ "iosapic_unregister_intr(%u) unbalanced\n",
+ gsi);
WARN_ON(1);
goto out;
}
/* Mask the interrupt */
low32 = iosapic_intr_info[vector].low32 | IOSAPIC_MASK;
- iosapic_write(rte->addr, IOSAPIC_RTE_LOW(rte->rte_index), low32);
+ iosapic_write(rte->addr, IOSAPIC_RTE_LOW(rte->rte_index),
+ low32);
/* Remove the rte entry from the list */
list_del(&rte->rte_list);
trigger = iosapic_intr_info[vector].trigger;
polarity = iosapic_intr_info[vector].polarity;
dest = iosapic_intr_info[vector].dest;
- printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d unregistered\n",
+ printk(KERN_INFO
+ "GSI %u (%s, %s) -> CPU %d (0x%04x)"
+ " vector %d unregistered\n",
gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
(polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
cpu_logical_id(dest), dest, vector);
idesc->handler = &no_irq_type;
/* Clear the interrupt information */
- memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info));
+ memset(&iosapic_intr_info[vector], 0,
+ sizeof(struct iosapic_intr_info));
iosapic_intr_info[vector].low32 |= IOSAPIC_MASK;
INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
if (idesc->action) {
- printk(KERN_ERR "interrupt handlers still exist on IRQ %u\n", irq);
+ printk(KERN_ERR
+ "interrupt handlers still exist on"
+ "IRQ %u\n", irq);
WARN_ON(1);
}
/*
* ACPI calls this when it finds an entry for a platform interrupt.
- * Note that the irq_base and IOSAPIC address must be set in iosapic_init().
*/
int __init
iosapic_register_platform_intr (u32 int_type, unsigned int gsi,
mask = 1;
break;
default:
- printk(KERN_ERR "iosapic_register_platform_irq(): invalid int type 0x%x\n", int_type);
+ printk(KERN_ERR "%s: invalid int type 0x%x\n", __FUNCTION__,
+ int_type);
return -1;
}
register_intr(gsi, vector, delivery, polarity, trigger);
- printk(KERN_INFO "PLATFORM int %s (0x%x): GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n",
+ printk(KERN_INFO
+ "PLATFORM int %s (0x%x): GSI %u (%s, %s) -> CPU %d (0x%04x)"
+ " vector %d\n",
int_type < ARRAY_SIZE(name) ? name[int_type] : "unknown",
int_type, gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
(polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
return vector;
}
-
/*
* ACPI calls this when it finds an entry for a legacy ISA IRQ override.
- * Note that the gsi_base and IOSAPIC address must be set in iosapic_init().
*/
void __init
iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi,
for (vector = 0; vector < IA64_NUM_VECTORS; ++vector) {
iosapic_intr_info[vector].low32 = IOSAPIC_MASK;
- INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes); /* mark as unused */
+ /* mark as unused */
+ INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
}
pcat_compat = system_pcat_compat;
if (pcat_compat) {
/*
- * Disable the compatibility mode interrupts (8259 style), needs IN/OUT support
- * enabled.
+ * Disable the compatibility mode interrupts (8259 style),
+ * needs IN/OUT support enabled.
*/
- printk(KERN_INFO "%s: Disabling PC-AT compatible 8259 interrupts\n", __FUNCTION__);
+ printk(KERN_INFO
+ "%s: Disabling PC-AT compatible 8259 interrupts\n",
+ __FUNCTION__);
outb(0xff, 0xA1);
outb(0xff, 0x21);
}
base = iosapic_lists[index].gsi_base;
end = base + iosapic_lists[index].num_rte - 1;
- if (gsi_base < base && gsi_end < base)
- continue;/* OK */
-
- if (gsi_base > end && gsi_end > end)
+ if (gsi_end < base || end < gsi_base)
continue; /* OK */
return -EBUSY;
if ((gsi_base == 0) && pcat_compat) {
/*
- * Map the legacy ISA devices into the IOSAPIC data. Some of these may
- * get reprogrammed later on with data from the ACPI Interrupt Source
- * Override table.
+ * Map the legacy ISA devices into the IOSAPIC data. Some of
+ * these may get reprogrammed later on with data from the ACPI
+ * Interrupt Source Override table.
*/
for (isa_irq = 0; isa_irq < 16; ++isa_irq)
- iosapic_override_isa_irq(isa_irq, isa_irq, IOSAPIC_POL_HIGH, IOSAPIC_EDGE);
+ iosapic_override_isa_irq(isa_irq, isa_irq,
+ IOSAPIC_POL_HIGH,
+ IOSAPIC_EDGE);
}
return 0;
}
if (iosapic_lists[index].rtes_inuse) {
err = -EBUSY;
- printk(KERN_WARNING "%s: IOSAPIC for GSI base %u is busy\n",
+ printk(KERN_WARNING
+ "%s: IOSAPIC for GSI base %u is busy\n",
__FUNCTION__, gsi_base);
goto out;
}
ia64_srlz_i();
}
-void
+void __init
ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
{
static int first_time = 1;
ia64_srlz_i();
}
-static void
+static void __init
patch_fsyscall_table (unsigned long start, unsigned long end)
{
extern unsigned long fsyscall_table[NR_syscalls];
ia64_srlz_i();
}
-static void
+static void __init
patch_brl_fsys_bubble_down (unsigned long start, unsigned long end)
{
extern char fsys_bubble_down[];
#define patch_brl_in_vdso() do { } while (0)
#endif
-void
+void __init
ia64_patch_gate (void)
{
# define START(name) ((unsigned long) __start_gate_##name##_patchlist)
if (likely(ctx)) {
DPRINT(("context unlocked\n"));
UNPROTECT_CTX(ctx, flags);
+ fput(file);
}
/* copy argument back to user, if needed */
if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT;
error_args:
- if (file)
- fput(file);
-
kfree(args_k);
DPRINT(("cmd=%s ret=%ld\n", PFM_CMD_NAME(cmd), ret));
void
pfm_init_percpu (void)
{
+ static int first_time=1;
/*
* make sure no measurement is active
* (may inherit programmed PMCs from EFI).
*/
pfm_unfreeze_pmu();
- if (smp_processor_id() == 0)
+ if (first_time) {
register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction);
+ first_time=0;
+ }
ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR);
ia64_srlz_d();
#include <linux/string.h>
#include <linux/threads.h>
#include <linux/tty.h>
+#include <linux/dmi.h>
#include <linux/serial.h>
#include <linux/serial_core.h>
#include <linux/efi.h>
#include <linux/initrd.h>
-#include <linux/platform.h>
#include <linux/pm.h>
#include <linux/cpufreq.h>
/*
* We use a special marker for the end of memory and it uses the extra (+1) slot
*/
-struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1];
-int num_rsvd_regions;
+struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1] __initdata;
+int num_rsvd_regions __initdata;
/*
* caller-specified function is called with the memory ranges that remain after filtering.
* This routine does not assume the incoming segments are sorted.
*/
-int
+int __init
filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
{
unsigned long range_start, range_end, prev_start;
return 0;
}
-static void
+static void __init
sort_regions (struct rsvd_region *rsvd_region, int max)
{
int j;
* initrd, etc. There are currently %IA64_MAX_RSVD_REGIONS defined,
* see include/asm-ia64/meminit.h if you need to define more.
*/
-void
+void __init
reserve_memory (void)
{
int n = 0;
* Grab the initrd start and end from the boot parameter struct given us by
* the boot loader.
*/
-void
+void __init
find_initrd (void)
{
#ifdef CONFIG_BLK_DEV_INITRD
}
#ifdef CONFIG_SMP
-static void
+static void __init
check_for_logical_procs (void)
{
pal_logical_to_physical_t info;
}
#endif
+static __initdata int nomca;
+static __init int setup_nomca(char *s)
+{
+ nomca = 1;
+ return 0;
+}
+early_param("nomca", setup_nomca);
+
void __init
setup_arch (char **cmdline_p)
{
efi_init();
io_port_init();
+ parse_early_param();
+
#ifdef CONFIG_IA64_GENERIC
- {
- const char *mvec_name = strstr (*cmdline_p, "machvec=");
- char str[64];
-
- if (mvec_name) {
- const char *end;
- size_t len;
-
- mvec_name += 8;
- end = strchr (mvec_name, ' ');
- if (end)
- len = end - mvec_name;
- else
- len = strlen (mvec_name);
- len = min(len, sizeof (str) - 1);
- strncpy (str, mvec_name, len);
- str[len] = '\0';
- mvec_name = str;
- } else
- mvec_name = acpi_get_sysname();
- machvec_init(mvec_name);
- }
+ machvec_init(NULL);
#endif
if (early_console_setup(*cmdline_p) == 0)
mark_bsp_online();
- parse_early_param();
#ifdef CONFIG_ACPI
/* Initialize the ACPI boot-time table parser */
acpi_table_init();
find_memory();
/* process SAL system table: */
- ia64_sal_init(efi.sal_systab);
+ ia64_sal_init(__va(efi.sal_systab));
ia64_setup_printk_clock();
/* enable IA-64 Machine Check Abort Handling unless disabled */
- if (!strstr(saved_command_line, "nomca"))
+ if (!nomca)
ia64_mca_init();
platform_setup(cmdline_p);
.show = show_cpuinfo
};
-void
+static void __cpuinit
identify_cpu (struct cpuinfo_ia64 *c)
{
union {
* In addition, the minimum of the i-cache stride sizes is calculated for
* "flush_icache_range()".
*/
-static void
+static void __cpuinit
get_max_cacheline_size (void)
{
unsigned long line_size, max = 1;
* cpu_init() initializes state that is per-CPU. This function acts
* as a 'CPU state barrier', nothing should get across.
*/
-void
+void __cpuinit
cpu_init (void)
{
- extern void __devinit ia64_mmu_init (void *);
+ extern void __cpuinit ia64_mmu_init (void *);
unsigned long num_phys_stacked;
pal_vm_info_2_u_t vmi;
unsigned int max_ctx;
ia64_sal_cache_flush(3);
}
-void
+void __init
check_bugs (void)
{
ia64_patch_mckinley_e9((unsigned long) __start___mckinley_e9_bundles,
(unsigned long) __end___mckinley_e9_bundles);
}
+
+static int __init run_dmi_scan(void)
+{
+ dmi_scan_machine();
+ return 0;
+}
+core_initcall(run_dmi_scan);
--- /dev/null
+/*
+ * (c) Copyright 2006 Hewlett-Packard Development Company, L.P.
+ * Bjorn Helgaas <bjorn.helgaas@hp.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/compiler.h>
+#include <linux/module.h>
+#include <linux/efi.h>
+#include <asm/io.h>
+
+static inline void __iomem *
+__ioremap (unsigned long offset, unsigned long size)
+{
+ offset = HYPERVISOR_ioremap(offset, size);
+ if (IS_ERR_VALUE(offset))
+ return (void __iomem*)offset;
+ return (void __iomem *) (__IA64_UNCACHED_OFFSET | offset);
+}
+
+void __iomem *
+ioremap (unsigned long offset, unsigned long size)
+{
+ if (efi_mem_attribute_range(offset, size, EFI_MEMORY_WB))
+ return phys_to_virt(offset);
+
+ if (efi_mem_attribute_range(offset, size, EFI_MEMORY_UC))
+ return __ioremap(offset, size);
+
+ /*
+ * Someday this should check ACPI resources so we
+ * can do the right thing for hot-plugged regions.
+ */
+ return __ioremap(offset, size);
+}
+EXPORT_SYMBOL(ioremap);
+
+void __iomem *
+ioremap_nocache (unsigned long offset, unsigned long size)
+{
+ return __ioremap(offset, size);
+}
+EXPORT_SYMBOL(ioremap_nocache);
#include "linux/vmalloc.h"
#include "linux/bootmem.h"
#include "linux/module.h"
+#include "linux/pfn.h"
#include "asm/types.h"
#include "asm/pgtable.h"
#include "kern_util.h"
for(i = 0; i < total_pages; i++){
p = &map[i];
- set_page_count(p, 0);
+ memset(p, 0, sizeof(struct page));
SetPageReserved(p);
INIT_LIST_HEAD(&p->lru);
}
}
}
-#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
-
extern int __syscall_stub_start, __binary_start;
void setup_physmem(unsigned long start, unsigned long reserve_end,
*len_out = region->size;
return(region->virt);
}
+
+ region = region->next;
}
return(0);
config RWSEM_XCHGADD_ALGORITHM
bool
+config GENERIC_HWEIGHT
+ bool
+ default y
+
config GENERIC_CALIBRATE_DELAY
bool
default y
default "7" if GENERIC_CPU || MPSC
default "6" if MK8
+config X86_INTERNODE_CACHE_BYTES
+ int
+ default "4096" if X86_VSMP
+ default X86_L1_CACHE_BYTES if !X86_VSMP
+
config X86_TSC
bool
depends on !X86_64_XEN
cost of slightly increased overhead in some places. If unsure say
N here.
+config SCHED_MC
+ bool "Multi-core scheduler support"
+ depends on SMP
+ default y
+ help
+ Multi-core scheduler support improves the CPU scheduler's decision
+ making when dealing with multi-core CPU chips at a cost of slightly
+ increased overhead in some places. If unsure say N here.
+
source "kernel/Kconfig.preempt"
config NUMA
Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA
instead, which also takes priority if both are compiled in.
+config NODES_SHIFT
+ int
+ default "6"
+ depends on NEED_MULTIPLE_NODES
+
# Dummy CONFIG option to select ACPI_NUMA from drivers/acpi/Kconfig.
config X86_64_ACPI_NUMA
def_bool y
depends on NUMA
+config OUT_OF_LINE_PFN_TO_PAGE
+ def_bool y
+ depends on DISCONTIGMEM
+
config NR_CPUS
int "Maximum number of CPUs (2-256)"
- range 2 256
+ range 2 255
depends on SMP
default "16" if X86_64_XEN
default "8"
can be controlled through /sys/devices/system/cpu/cpu#.
Say N if you want to disable CPU hotplug.
-config ARCH_ENABLE_MEMORY_HOTPLUG
- def_bool y
config HPET_TIMER
bool
select SWIOTLB
depends on PCI && !X86_64_XEN
help
- Support the IOMMU. Needed to run systems with more than 3GB of memory
- properly with 32-bit PCI devices that do not support DAC (Double Address
- Cycle). The IOMMU can be turned off at runtime with the iommu=off parameter.
- Normally the kernel will take the right choice by itself.
- This option includes a driver for the AMD Opteron/Athlon64 northbridge IOMMU
- and a software emulation used on other systems.
- If unsure, say Y.
+ Support for hardware IOMMU in AMD's Opteron/Athlon64 Processors
+ and for the bounce buffering software IOMMU.
+ Needed to run systems with more than 3GB of memory properly with
+ 32-bit PCI devices that do not support DAC (Double Address Cycle).
+ The IOMMU can be turned off at runtime with the iommu=off parameter.
+ Normally the kernel will take the right choice by itself.
+ This option includes a driver for the AMD Opteron/Athlon64 IOMMU
+ northbridge and a software emulation used on other systems without
+ hardware IOMMU. If unsure, say Y.
# need this always enabled with GART_IOMMU for the VIA workaround
config SWIOTLB
config PHYSICAL_START
hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)
default "0x1000000" if CRASH_DUMP
- default "0x100000"
+ default "0x200000"
help
This gives the physical address where the kernel is loaded. Normally
- for regular kernels this value is 0x100000 (1MB). But in the case
+ for regular kernels this value is 0x200000 (2MB). But in the case
of kexec on panic the fail safe kernel needs to run at a different
address than the panic-ed kernel. This option is used to set the load
address for kernels used to capture crash dump on being kexec'ed
source kernel/Kconfig.hz
+config REORDER
+ bool "Function reordering"
+ default n
+ help
+ This option enables the toolchain to reorder functions for a more
+ optimal TLB usage. If you have pretty much any version of binutils,
+ this can increase your kernel build time by roughly one minute.
+
endmenu
#
help
Enables some debug statements within the PCI Frontend.
-config UNORDERED_IO
- bool "Unordered IO mapping access"
- depends on EXPERIMENTAL
- help
- Use unordered stores to access IO memory mappings in device drivers.
- Still very experimental. When a driver works on IA64/ppc64/pa-risc it should
- work with this option, but it makes the drivers behave differently
- from i386. Requires that the driver writer used memory barriers
- properly.
-
source "drivers/pci/pcie/Kconfig"
source "drivers/pci/Kconfig"
LDFLAGS := -m elf_x86_64
OBJCOPYFLAGS := -O binary -R .note -R .comment -S
LDFLAGS_vmlinux :=
-
CHECKFLAGS += -D__x86_64__ -m64
+cflags-y :=
cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
-CFLAGS += $(cflags-y)
+cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
cppflags-$(CONFIG_XEN) += \
-D__XEN_INTERFACE_VERSION__=$(CONFIG_XEN_INTERFACE_VERSION)
CPPFLAGS += $(cppflags-y)
-CFLAGS += -m64
-CFLAGS += -mno-red-zone
-CFLAGS += -mcmodel=kernel
-CFLAGS += -pipe
+cflags-y += -m64
+cflags-y += -mno-red-zone
+cflags-y += -mcmodel=kernel
+cflags-y += -pipe
+cflags-$(CONFIG_REORDER) += -ffunction-sections
# this makes reading assembly source easier, but produces worse code
# actually it makes the kernel smaller too.
-CFLAGS += -fno-reorder-blocks
-CFLAGS += -Wno-sign-compare
+cflags-y += -fno-reorder-blocks
+cflags-y += -Wno-sign-compare
ifneq ($(CONFIG_UNWIND_INFO),y)
-CFLAGS += -fno-asynchronous-unwind-tables
+cflags-y += -fno-asynchronous-unwind-tables
endif
ifneq ($(CONFIG_DEBUG_INFO),y)
# -fweb shrinks the kernel a bit, but the difference is very small
# it also messes up debugging, so don't use it for now.
-#CFLAGS += $(call cc-option,-fweb)
+#cflags-y += $(call cc-option,-fweb)
endif
# -funit-at-a-time shrinks the kernel .text considerably
# unfortunately it makes reading oopses harder.
-CFLAGS += $(call cc-option,-funit-at-a-time)
+cflags-y += $(call cc-option,-funit-at-a-time)
# prevent gcc from generating any FP code by mistake
-CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
+cflags-y += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
+CFLAGS += $(cflags-y)
AFLAGS += -m64
head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task.o
boot := arch/x86_64/boot
-.PHONY: bzImage bzlilo install archmrproper \
- fdimage fdimage144 fdimage288 archclean
+PHONY += bzImage bzlilo install archmrproper \
+ fdimage fdimage144 fdimage288 isoimage archclean
ifdef CONFIG_XEN
CPPFLAGS := -Iinclude$(if $(KBUILD_SRC),2)/asm/mach-xen $(CPPFLAGS)
bzdisk: vmlinux
$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) zdisk
-fdimage fdimage144 fdimage288: vmlinux
+fdimage fdimage144 fdimage288 isoimage: vmlinux
$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) $@
install:
define archhelp
echo '* bzImage - Compressed kernel image (arch/$(ARCH)/boot/bzImage)'
echo ' install - Install kernel using'
- echo ' (your) ~/bin/installkernel or'
- echo ' (distribution) /sbin/installkernel or'
- echo ' install to $$(INSTALL_PATH) and run lilo'
+ echo ' (your) ~/bin/installkernel or'
+ echo ' (distribution) /sbin/installkernel or'
+ echo ' install to $$(INSTALL_PATH) and run lilo'
+ echo ' bzdisk - Create a boot floppy in /dev/fd0'
+ echo ' fdimage - Create a boot floppy image'
+ echo ' isoimage - Create a boot CD-ROM image'
endef
-CLEAN_FILES += arch/$(ARCH)/boot/fdimage arch/$(ARCH)/boot/mtools.conf
+CLEAN_FILES += arch/$(ARCH)/boot/fdimage \
+ arch/$(ARCH)/boot/image.iso \
+ arch/$(ARCH)/boot/mtools.conf
#include <linux/linkage.h>
#define __XEN_X86_64 1
-
+
+#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
+
.macro IA32_ARG_FIXUP noebp=0
movl %edi,%r8d
.if \noebp
CFI_REMEMBER_STATE
jnz sysenter_tracesys
sysenter_do_call:
- cmpl $(IA32_NR_syscalls),%eax
- jae ia32_badsys
+ cmpl $(IA32_NR_syscalls-1),%eax
+ ja ia32_badsys
IA32_ARG_FIXUP 1
call *ia32_sys_call_table(,%rax,8)
movq %rax,RAX-ARGOFFSET(%rsp)
CFI_REMEMBER_STATE
jnz cstar_tracesys
cstar_do_call:
- cmpl $IA32_NR_syscalls,%eax
- jae ia32_badsys
+ cmpl $IA32_NR_syscalls-1,%eax
+ ja ia32_badsys
IA32_ARG_FIXUP 1
call *ia32_sys_call_table(,%rax,8)
movq %rax,RAX-ARGOFFSET(%rsp)
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
jnz ia32_tracesys
ia32_do_syscall:
- cmpl $(IA32_NR_syscalls),%eax
- jae ia32_badsys
+ cmpl $(IA32_NR_syscalls-1),%eax
+ ja ia32_badsys
IA32_ARG_FIXUP
call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
ia32_sysret:
.quad sys_setdomainname
.quad sys_uname
.quad sys_modify_ldt
- .quad sys32_adjtimex
+ .quad compat_sys_adjtimex
.quad sys32_mprotect /* 125 */
.quad compat_sys_sigprocmask
.quad quiet_ni_syscall /* create_module */
.quad sys_readlinkat /* 305 */
.quad sys_fchmodat
.quad sys_faccessat
- .quad sys_ni_syscall /* pselect6 for now */
- .quad sys_ni_syscall /* ppoll for now */
+ .quad quiet_ni_syscall /* pselect6 for now */
+ .quad quiet_ni_syscall /* ppoll for now */
.quad sys_unshare /* 310 */
+ .quad compat_sys_set_robust_list
+ .quad compat_sys_get_robust_list
+ .quad sys_splice
+ .quad sys_sync_file_range
+ .quad sys_tee
+ .quad compat_sys_vmsplice
ia32_syscall_end:
- .rept IA32_NR_syscalls-(ia32_syscall_end-ia32_sys_call_table)/8
- .quad ni_syscall
- .endr
.size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn
.section .eh_frame,"a",@progbits
+.LSTARTFRAMES:
+ .long .LENDCIES-.LSTARTCIES
+.LSTARTCIES:
+ .long 0 /* CIE ID */
+ .byte 1 /* Version number */
+ .string "zRS" /* NUL-terminated augmentation string */
+ .uleb128 1 /* Code alignment factor */
+ .sleb128 -4 /* Data alignment factor */
+ .byte 8 /* Return address register column */
+ .uleb128 1 /* Augmentation value length */
+ .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
+ .byte 0x0c /* DW_CFA_def_cfa */
+ .uleb128 4
+ .uleb128 4
+ .byte 0x88 /* DW_CFA_offset, column 0x8 */
+ .uleb128 1
+ .align 4
+.LENDCIES:
+
.long .LENDFDE2-.LSTARTFDE2 /* Length FDE */
.LSTARTFDE2:
- .long .LSTARTFDE2-.LSTARTFRAME /* CIE pointer */
+ .long .LSTARTFDE2-.LSTARTFRAMES /* CIE pointer */
/* HACK: The dwarf2 unwind routines will subtract 1 from the
return address to get an address in the middle of the
presumed call instruction. Since we didn't get here via
.long .LENDFDE3-.LSTARTFDE3 /* Length FDE */
.LSTARTFDE3:
- .long .LSTARTFDE3-.LSTARTFRAME /* CIE pointer */
+ .long .LSTARTFDE3-.LSTARTFRAMES /* CIE pointer */
/* HACK: See above wrt unwind library assumptions. */
.long .LSTART_rt_sigreturn-1-. /* PC-relative start address */
.long .LEND_rt_sigreturn-.LSTART_rt_sigreturn+1
ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \
x8664_ksyms.o i387.o syscall.o vsyscall.o \
setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
- dmi_scan.o pci-dma.o pci-nommu.o
+ pci-dma.o pci-nommu.o
obj-$(CONFIG_X86_MCE) += mce.o
obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
cpuid-$(subst m,y,$(CONFIG_X86_CPUID)) += ../../i386/kernel/cpuid.o
topology-y += ../../i386/kernel/topology.o
microcode-$(subst m,y,$(CONFIG_MICROCODE)) += ../../i386/kernel/microcode.o
-intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo.o
+intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo-xen.o
quirks-y += ../../i386/kernel/quirks.o
i8237-y += ../../i386/kernel/i8237.o
msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o
-dmi_scan-y += ../../i386/kernel/dmi_scan.o
ifdef CONFIG_XEN
time-y += ../../i386/kernel/time-xen.o
ifneq ($(CONFIG_ACPI_PROCESSOR),)
obj-y += processor.o
-processor-y := ../../../i386/kernel/acpi/processor.o ../../../i386/kernel/acpi/cstate.o
endif
boot-$(CONFIG_XEN) := ../../../i386/kernel/acpi/boot-xen.o
setup_IO_APIC();
#endif
- return 0;
+ return 1;
}
*addrp = __pa_symbol(&_end);
return 1;
}
+
+ if (last >= ebda_addr && addr < ebda_addr + ebda_size) {
+ *addrp = ebda_addr + ebda_size;
+ return 1;
+ }
+
/* XXX ramdisk image here? */
#else
if (last < (table_end<<PAGE_SHIFT)) {
}
#ifndef CONFIG_XEN
-int __init e820_mapped(unsigned long start, unsigned long end, unsigned type)
+/*
+ * This function checks if any part of the range <start,end> is mapped
+ * with type.
+ */
+int __meminit
+e820_any_mapped(unsigned long start, unsigned long end, unsigned type)
{
int i;
for (i = 0; i < e820.nr_map; i++) {
}
#endif
+/*
+ * This function checks if the entire range <start,end> is mapped with type.
+ *
+ * Note: this function only works correct if the e820 table is sorted and
+ * not-overlapping, which is the case
+ */
+int __init e820_all_mapped(unsigned long start, unsigned long end, unsigned type)
+{
+ int i;
+ for (i = 0; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
+ if (type && ei->type != type)
+ continue;
+ /* is the region (part) in overlap with the current region ?*/
+ if (ei->addr >= end || ei->addr + ei->size <= start)
+ continue;
+
+ /* if the region is at the beginning of <start,end> we move
+ * start to the end of the region since it's ok until there
+ */
+ if (ei->addr <= start)
+ start = ei->addr + ei->size;
+ /* if start is now at or beyond end, we're done, full coverage */
+ if (start >= end)
+ return 1; /* we're done */
+ }
+ return 0;
+}
+
/*
* Find a free area in a specific range.
*/
addr = start;
if (addr > ei->addr + ei->size)
continue;
- while (bad_addr(&addr, size) && addr+size < ei->addr + ei->size)
+ while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size)
;
last = addr + size;
if (last > ei->addr + ei->size)
#define VGABASE ((void __iomem *)0xffffffff800b8000UL)
#endif
-#define MAX_YPOS max_ypos
-#define MAX_XPOS max_xpos
-
static int max_ypos = 25, max_xpos = 80;
#ifndef CONFIG_XEN
-static int current_ypos = 1, current_xpos = 0;
+static int current_ypos = 25, current_xpos = 0;
static void early_vga_write(struct console *con, const char *str, unsigned n)
{
int i, k, j;
while ((c = *str++) != '\0' && n-- > 0) {
- if (current_ypos >= MAX_YPOS) {
+ if (current_ypos >= max_ypos) {
/* scroll 1 line up */
- for (k = 1, j = 0; k < MAX_YPOS; k++, j++) {
- for (i = 0; i < MAX_XPOS; i++) {
- writew(readw(VGABASE + 2*(MAX_XPOS*k + i)),
- VGABASE + 2*(MAX_XPOS*j + i));
+ for (k = 1, j = 0; k < max_ypos; k++, j++) {
+ for (i = 0; i < max_xpos; i++) {
+ writew(readw(VGABASE+2*(max_xpos*k+i)),
+ VGABASE + 2*(max_xpos*j + i));
}
}
- for (i = 0; i < MAX_XPOS; i++)
- writew(0x720, VGABASE + 2*(MAX_XPOS*j + i));
- current_ypos = MAX_YPOS-1;
+ for (i = 0; i < max_xpos; i++)
+ writew(0x720, VGABASE + 2*(max_xpos*j + i));
+ current_ypos = max_ypos-1;
}
if (c == '\n') {
current_xpos = 0;
current_ypos++;
} else if (c != '\r') {
writew(((0x7 << 8) | (unsigned short) c),
- VGABASE + 2*(MAX_XPOS*current_ypos +
+ VGABASE + 2*(max_xpos*current_ypos +
current_xpos++));
- if (current_xpos >= MAX_XPOS) {
+ if (current_xpos >= max_xpos) {
current_xpos = 0;
current_ypos++;
}
.index = -1,
};
-/* Serial functions loosely based on a similar package from Klaus P. Gerlicher */
+/* Serial functions loosely based on a similar package from Klaus P. Gerlicher */
static int early_serial_base = 0x3f8; /* ttyS0 */
#define DLL 0 /* Divisor Latch Low */
#define DLH 1 /* Divisor latch High */
-static int early_serial_putc(unsigned char ch)
-{
- unsigned timeout = 0xffff;
- while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout)
+static int early_serial_putc(unsigned char ch)
+{
+ unsigned timeout = 0xffff;
+ while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout)
cpu_relax();
outb(ch, early_serial_base + TXR);
return timeout ? 0 : -1;
-}
+}
static void early_serial_write(struct console *con, const char *s, unsigned n)
{
- while (*s && n-- > 0) {
- early_serial_putc(*s);
- if (*s == '\n')
- early_serial_putc('\r');
- s++;
- }
-}
+ while (*s && n-- > 0) {
+ early_serial_putc(*s);
+ if (*s == '\n')
+ early_serial_putc('\r');
+ s++;
+ }
+}
#define DEFAULT_BAUD 9600
static __init void early_serial_init(char *s)
{
- unsigned char c;
+ unsigned char c;
unsigned divisor;
unsigned baud = DEFAULT_BAUD;
char *e;
++s;
if (*s) {
- unsigned port;
+ unsigned port;
if (!strncmp(s,"0x",2)) {
early_serial_base = simple_strtoul(s, &e, 16);
} else {
outb(0x3, early_serial_base + MCR); /* DTR + RTS */
if (*s) {
- baud = simple_strtoul(s, &e, 0);
- if (baud == 0 || s == e)
+ baud = simple_strtoul(s, &e, 0);
+ if (baud == 0 || s == e)
baud = DEFAULT_BAUD;
- }
-
- divisor = 115200 / baud;
- c = inb(early_serial_base + LCR);
- outb(c | DLAB, early_serial_base + LCR);
- outb(divisor & 0xff, early_serial_base + DLL);
- outb((divisor >> 8) & 0xff, early_serial_base + DLH);
+ }
+
+ divisor = 115200 / baud;
+ c = inb(early_serial_base + LCR);
+ outb(c | DLAB, early_serial_base + LCR);
+ outb(divisor & 0xff, early_serial_base + DLL);
+ outb((divisor >> 8) & 0xff, early_serial_base + DLH);
outb(c & ~DLAB, early_serial_base + LCR);
}
static int early_console_initialized = 0;
void early_printk(const char *fmt, ...)
-{
- char buf[512];
- int n;
+{
+ char buf[512];
+ int n;
va_list ap;
- va_start(ap,fmt);
+ va_start(ap,fmt);
n = vscnprintf(buf,512,fmt,ap);
early_console->write(early_console,buf,n);
- va_end(ap);
-}
+ va_end(ap);
+}
static int __initdata keep_early;
-int __init setup_early_printk(char *opt)
-{
+int __init setup_early_printk(char *opt)
+{
char *space;
- char buf[256];
+ char buf[256];
if (early_console_initialized)
- return -1;
+ return 1;
- strlcpy(buf,opt,sizeof(buf));
- space = strchr(buf, ' ');
+ strlcpy(buf,opt,sizeof(buf));
+ space = strchr(buf, ' ');
if (space)
- *space = 0;
+ *space = 0;
if (strstr(buf,"keep"))
- keep_early = 1;
+ keep_early = 1;
- if (!strncmp(buf, "serial", 6)) {
+ if (!strncmp(buf, "serial", 6)) {
early_serial_init(buf + 6);
early_console = &early_serial_console;
- } else if (!strncmp(buf, "ttyS", 4)) {
+ } else if (!strncmp(buf, "ttyS", 4)) {
early_serial_init(buf);
- early_console = &early_serial_console;
+ early_console = &early_serial_console;
} else if (!strncmp(buf, "vga", 3)
&& SCREEN_INFO.orig_video_isVGA == 1) {
max_xpos = SCREEN_INFO.orig_video_cols;
max_ypos = SCREEN_INFO.orig_video_lines;
- early_console = &early_vga_console;
+#ifndef CONFIG_XEN
+ current_ypos = SCREEN_INFO.orig_y;
+#endif
+ early_console = &early_vga_console;
} else if (!strncmp(buf, "simnow", 6)) {
simnow_init(buf + 6);
early_console = &simnow_console;
keep_early = 1;
}
early_console_initialized = 1;
- register_console(early_console);
+ register_console(early_console);
return 0;
}
void __init disable_early_printk(void)
-{
+{
if (!early_console_initialized || !early_console)
return;
if (!keep_early) {
printk("disabling early console\n");
unregister_console(early_console);
early_console_initialized = 0;
- } else {
+ } else {
printk("keeping early console\n");
}
-}
+}
__setup("earlyprintk=", setup_early_printk);
movq (%rsp),%rcx
movq 8(%rsp),%r11
addq $0x30,%rsp
- movq $-9999,%rdi /* better code? */
+ movq $11,%rdi /* SIGSEGV */
jmp do_exit
1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
movq (%rsp),%rcx
NEXT_PAGE(level2_kernel_pgt)
.fill 512,8,0
-NEXT_PAGE(empty_zero_page)
- .skip PAGE_SIZE
-
NEXT_PAGE(hypercall_page)
.fill 512,8,0
.align 16
.globl cpu_gdt_descr
cpu_gdt_descr:
- .word gdt_end-cpu_gdt_table
+ .word gdt_end-cpu_gdt_table-1
gdt:
.quad cpu_gdt_table
#ifdef CONFIG_SMP
/* zero the remaining page */
.fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0
+ .section .bss.page_aligned, "aw", @nobits
+ .align PAGE_SIZE
+ENTRY(empty_zero_page)
+ .skip PAGE_SIZE
+
#ifdef CONFIG_XEN_COMPAT_030002
/*
* __xen_guest information
int disable_timer_pin_1 __initdata;
#ifndef CONFIG_XEN
-int timer_over_8254 __initdata = 1;
+int timer_over_8254 __initdata = 0;
/* Where if anywhere is the i8259 connect in external int mode */
static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
#include <linux/pci_ids.h>
#include <linux/pci.h>
+
+#ifdef CONFIG_ACPI
+
+static int nvidia_hpet_detected __initdata;
+
+static int __init nvidia_hpet_check(unsigned long phys, unsigned long size)
+{
+ nvidia_hpet_detected = 1;
+ return 0;
+}
+#endif
+
/* Temporary Hack. Nvidia and VIA boards currently only work with IO-APIC
off. Check for an Nvidia or VIA PCI bridge and turn it off.
Use pci direct infrastructure because this runs before the PCI subsystem.
force_iommu) &&
!iommu_aperture_allowed) {
printk(KERN_INFO
- "Looks like a VIA chipset. Disabling IOMMU. Overwrite with \"iommu=allowed\"\n");
+ "Looks like a VIA chipset. Disabling IOMMU. Override with \"iommu=allowed\"\n");
iommu_aperture_disabled = 1;
}
#endif
return;
case PCI_VENDOR_ID_NVIDIA:
#ifdef CONFIG_ACPI
- /* All timer overrides on Nvidia
- seem to be wrong. Skip them. */
- acpi_skip_timer_override = 1;
- printk(KERN_INFO
- "Nvidia board detected. Ignoring ACPI timer override.\n");
+ /*
+ * All timer overrides on Nvidia are
+ * wrong unless HPET is enabled.
+ */
+ nvidia_hpet_detected = 0;
+ acpi_table_parse(ACPI_HPET,
+ nvidia_hpet_check);
+ if (nvidia_hpet_detected == 0) {
+ acpi_skip_timer_override = 1;
+ printk(KERN_INFO "Nvidia board "
+ "detected. Ignoring ACPI "
+ "timer override.\n");
+ }
#endif
/* RED-PEN skip them on mptables too? */
return;
spin_unlock_irqrestore(&ioapic_lock, flags);
}
+int timer_uses_ioapic_pin_0;
+
/*
* This code may look a bit paranoid, but it's supposed to cooperate with
* a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
pin2 = ioapic_i8259.pin;
apic2 = ioapic_i8259.apic;
+ if (pin1 == 0)
+ timer_uses_ioapic_pin_0 = 1;
+
apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
vector, apic1, pin1, apic2, pin2);
*/
setup_ExtINT_IRQ0_pin(apic2, pin2, vector);
if (timer_irq_works()) {
- printk("works.\n");
+ apic_printk(APIC_VERBOSE," works.\n");
nmi_watchdog_default();
if (nmi_watchdog == NMI_IO_APIC) {
setup_nmi();
*/
clear_IO_APIC_pin(apic2, pin2);
}
- printk(" failed.\n");
+ apic_printk(APIC_VERBOSE," failed.\n");
if (nmi_watchdog == NMI_IO_APIC) {
printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
enable_8259A_irq(0);
if (timer_irq_works()) {
- apic_printk(APIC_QUIET, " works.\n");
+ apic_printk(APIC_VERBOSE," works.\n");
return;
}
apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
}
#else
#define check_timer() ((void)0)
+int timer_uses_ioapic_pin_0 = 0;
#endif /* !CONFIG_XEN */
static int __init notimercheck(char *s)
if (i == 0) {
seq_printf(p, " ");
- for (j=0; j<NR_CPUS; j++)
- if (cpu_online(j))
- seq_printf(p, "CPU%d ",j);
+ for_each_online_cpu(j)
+ seq_printf(p, "CPU%d ",j);
seq_putc(p, '\n');
}
#ifndef CONFIG_SMP
seq_printf(p, "%10u ", kstat_irqs(i));
#else
- for (j=0; j<NR_CPUS; j++)
- if (cpu_online(j))
- seq_printf(p, "%10u ",
- kstat_cpu(j).irqs[i]);
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
#endif
seq_printf(p, " %14s", irq_desc[i].handler->typename);
spin_unlock_irqrestore(&irq_desc[i].lock, flags);
} else if (i == NR_IRQS) {
seq_printf(p, "NMI: ");
- for (j = 0; j < NR_CPUS; j++)
- if (cpu_online(j))
- seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count);
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count);
seq_putc(p, '\n');
#ifdef CONFIG_X86_LOCAL_APIC
seq_printf(p, "LOC: ");
- for (j = 0; j < NR_CPUS; j++)
- if (cpu_online(j))
- seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs);
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs);
seq_putc(p, '\n');
#endif
seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
}
#ifndef CONFIG_XEN
-static void __init MP_processor_info (struct mpc_config_processor *m)
+static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
{
int cpu;
unsigned char ver;
- static int found_bsp=0;
+ cpumask_t tmp_map;
if (!(m->mpc_cpuflag & CPU_ENABLED)) {
disabled_cpus++;
return;
}
- cpu = num_processors++;
-
+ num_processors++;
+ cpus_complement(tmp_map, cpu_present_map);
+ cpu = first_cpu(tmp_map);
+
#if MAX_APICS < 255
if ((int)m->mpc_apicid > MAX_APICS) {
printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
* entry is BSP, and so on.
*/
cpu = 0;
-
- bios_cpu_apicid[0] = m->mpc_apicid;
- x86_cpu_to_apicid[0] = m->mpc_apicid;
- found_bsp = 1;
- } else
- cpu = num_processors - found_bsp;
+ }
bios_cpu_apicid[cpu] = m->mpc_apicid;
x86_cpu_to_apicid[cpu] = m->mpc_apicid;
}
-void __init mp_register_lapic (
+void __cpuinit mp_register_lapic (
u8 id,
u8 enabled)
{
*/
int irq = gsi;
if (gsi < MAX_GSI_NUM) {
- if (gsi > 15)
+ /*
+ * Retain the VIA chipset work-around (gsi > 15), but
+ * avoid a problem where the 8254 timer (IRQ0) is setup
+ * via an override (so it's not on pin 0 of the ioapic),
+ * and at the same time, the pin 0 interrupt is a PCI
+ * type. The gsi > 15 test could cause these two pins
+ * to be shared as IRQ0, and they are not shareable.
+ * So test for this condition, and if necessary, avoid
+ * the pin collision.
+ */
+ if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0))
gsi = pci_irq++;
/*
* Don't assign IRQ used by ACPI SCI
#include <linux/ptrace.h>
#include <linux/utsname.h>
#include <linux/random.h>
-#include <linux/kprobes.h>
#include <linux/notifier.h>
+#include <linux/kprobes.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
void (*pm_idle)(void);
static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
-static struct notifier_block *idle_notifier;
-static DEFINE_SPINLOCK(idle_notifier_lock);
+static ATOMIC_NOTIFIER_HEAD(idle_notifier);
void idle_notifier_register(struct notifier_block *n)
{
- unsigned long flags;
- spin_lock_irqsave(&idle_notifier_lock, flags);
- notifier_chain_register(&idle_notifier, n);
- spin_unlock_irqrestore(&idle_notifier_lock, flags);
+ atomic_notifier_chain_register(&idle_notifier, n);
}
EXPORT_SYMBOL_GPL(idle_notifier_register);
void idle_notifier_unregister(struct notifier_block *n)
{
- unsigned long flags;
- spin_lock_irqsave(&idle_notifier_lock, flags);
- notifier_chain_unregister(&idle_notifier, n);
- spin_unlock_irqrestore(&idle_notifier_lock, flags);
+ atomic_notifier_chain_unregister(&idle_notifier, n);
}
EXPORT_SYMBOL(idle_notifier_unregister);
void enter_idle(void)
{
__get_cpu_var(idle_state) = CPU_IDLE;
- notifier_call_chain(&idle_notifier, IDLE_START, NULL);
+ atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
}
static void __exit_idle(void)
{
__get_cpu_var(idle_state) = CPU_NOT_IDLE;
- notifier_call_chain(&idle_notifier, IDLE_END, NULL);
+ atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
}
/* Called from interrupts to signify idle end */
struct task_struct *me = current;
struct thread_struct *t = &me->thread;
- /*
- * Remove function-return probe instances associated with this task
- * and put them back on the free list. Do not insert an exit probe for
- * this function, it will be disabled by kprobe_flush_task if you do.
- */
- kprobe_flush_task(me);
-
if (me->thread.io_bitmap_ptr) {
#ifndef CONFIG_X86_NO_TSS
struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
* multicall to indicate FPU task switch, rather than
* synchronously trapping to Xen.
* This must be here to ensure both math_state_restore() and
- * kernel_fpu_begin() work consistently.
+ * kernel_fpu_begin() work consistently.
* The AMD workaround requires it to be after DS reload, or
* after DS has been cleared, which we do in __prepare_arch_switch.
*/
HYPERVISOR_set_segment_base(SEGBASE_GS_USER, next->gs);
/*
- * Switch the PDA context.
+ * Switch the PDA and FPU contexts.
*/
prev->userrsp = read_pda(oldrsp);
write_pda(oldrsp, next->userrsp);
}
case ARCH_GET_GS: {
unsigned long base;
+ unsigned gsindex;
if (task->thread.gsindex == GS_TLS_SEL)
base = read_32bit_tls(task, GS_TLS);
- else if (doit)
- rdmsrl(MSR_KERNEL_GS_BASE, base);
+ else if (doit) {
+ asm("movl %%gs,%0" : "=r" (gsindex));
+ if (gsindex)
+ rdmsrl(MSR_KERNEL_GS_BASE, base);
+ else
+ base = task->thread.gs;
+ }
else
base = task->thread.gs;
ret = put_user(base, (unsigned long __user *)addr);
#include <linux/cpufreq.h>
#include <linux/dmi.h>
#include <linux/dma-mapping.h>
+#include <linux/ctype.h>
#include <asm/mtrr.h>
#include <asm/uaccess.h>
#include <asm/swiotlb.h>
#include <asm/sections.h>
#include <asm/gart-mapping.h>
+#include <asm/dmi.h>
#ifdef CONFIG_XEN
#include <linux/percpu.h>
#include <xen/interface/physdev.h>
unsigned long saved_video_mode;
+/*
+ * Early DMI memory
+ */
+int dmi_alloc_index;
+char dmi_alloc_data[DMI_MAX_DATA];
+
/*
* Setup options
*/
}
}
+/* Check for full argument with no trailing characters */
+static int fullarg(char *p, char *arg)
+{
+ int l = strlen(arg);
+ return !memcmp(p, arg, l) && (p[l] == 0 || isspace(p[l]));
+}
+
static __init void parse_cmdline_early (char ** cmdline_p)
{
char c = ' ', *to = command_line, *from = COMMAND_LINE;
#endif
#ifdef CONFIG_ACPI
/* "acpi=off" disables both ACPI table parsing and interpreter init */
- if (!memcmp(from, "acpi=off", 8))
+ if (fullarg(from,"acpi=off"))
disable_acpi();
- if (!memcmp(from, "acpi=force", 10)) {
+ if (fullarg(from, "acpi=force")) {
/* add later when we do DMI horrors: */
acpi_force = 1;
acpi_disabled = 0;
/* acpi=ht just means: do ACPI MADT parsing
at bootup, but don't enable the full ACPI interpreter */
- if (!memcmp(from, "acpi=ht", 7)) {
+ if (fullarg(from, "acpi=ht")) {
if (!acpi_force)
disable_acpi();
acpi_ht = 1;
}
- else if (!memcmp(from, "pci=noacpi", 10))
+ else if (fullarg(from, "pci=noacpi"))
acpi_disable_pci();
- else if (!memcmp(from, "acpi=noirq", 10))
+ else if (fullarg(from, "acpi=noirq"))
acpi_noirq_set();
- else if (!memcmp(from, "acpi_sci=edge", 13))
+ else if (fullarg(from, "acpi_sci=edge"))
acpi_sci_flags.trigger = 1;
- else if (!memcmp(from, "acpi_sci=level", 14))
+ else if (fullarg(from, "acpi_sci=level"))
acpi_sci_flags.trigger = 3;
- else if (!memcmp(from, "acpi_sci=high", 13))
+ else if (fullarg(from, "acpi_sci=high"))
acpi_sci_flags.polarity = 1;
- else if (!memcmp(from, "acpi_sci=low", 12))
+ else if (fullarg(from, "acpi_sci=low"))
acpi_sci_flags.polarity = 3;
/* acpi=strict disables out-of-spec workarounds */
- else if (!memcmp(from, "acpi=strict", 11)) {
+ else if (fullarg(from, "acpi=strict")) {
acpi_strict = 1;
}
#ifdef CONFIG_X86_IO_APIC
- else if (!memcmp(from, "acpi_skip_timer_override", 24))
+ else if (fullarg(from, "acpi_skip_timer_override"))
acpi_skip_timer_override = 1;
#endif
#endif
#ifndef CONFIG_XEN
- if (!memcmp(from, "nolapic", 7) ||
- !memcmp(from, "disableapic", 11))
+ if (fullarg(from, "nolapic") || fullarg(from, "disableapic")) {
+ clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
disable_apic = 1;
+ }
- /* Don't confuse with noapictimer */
- if (!memcmp(from, "noapic", 6) &&
- (from[6] == ' ' || from[6] == 0))
+ if (fullarg(from, "noapic"))
skip_ioapic_setup = 1;
- /* Make sure to not confuse with apic= */
- if (!memcmp(from, "apic", 4) &&
- (from[4] == ' ' || from[4] == 0)) {
+ if (fullarg(from,"apic")) {
skip_ioapic_setup = 0;
ioapic_force = 1;
}
iommu_setup(from+6);
}
- if (!memcmp(from,"oops=panic", 10))
+ if (fullarg(from,"oops=panic"))
panic_on_oops = 1;
if (!memcmp(from, "noexec=", 7))
static int __init noreplacement_setup(char *s)
{
no_replacement = 1;
- return 0;
+ return 1;
}
__setup("noreplacement", noreplacement_setup);
#ifndef CONFIG_XEN
#define EBDA_ADDR_POINTER 0x40E
-static void __init reserve_ebda_region(void)
+
+unsigned __initdata ebda_addr;
+unsigned __initdata ebda_size;
+
+static void discover_ebda(void)
{
- unsigned int addr;
- /**
+ /*
* there is a real-mode segmented pointer pointing to the
* 4K EBDA area at 0x40E
*/
- addr = *(unsigned short *)phys_to_virt(EBDA_ADDR_POINTER);
- addr <<= 4;
- if (addr)
- reserve_bootmem_generic(addr, PAGE_SIZE);
+ ebda_addr = *(unsigned short *)EBDA_ADDR_POINTER;
+ ebda_addr <<= 4;
+
+ ebda_size = *(unsigned short *)(unsigned long)ebda_addr;
+
+ /* Round EBDA up to pages */
+ if (ebda_size == 0)
+ ebda_size = 1;
+ ebda_size <<= 10;
+ ebda_size = round_up(ebda_size + (ebda_addr & ~PAGE_MASK), PAGE_SIZE);
+ if (ebda_size > 64*1024)
+ ebda_size = 64*1024;
}
#endif
#ifdef CONFIG_XEN
/* Register a call for panic conditions. */
- notifier_chain_register(&panic_notifier_list, &xen_panic_block);
+ atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
ROOT_DEV = MKDEV(RAMDISK_MAJOR,0);
kernel_end = 0; /* dummy */
* we are rounding upwards:
*/
end_pfn = e820_end_of_ram();
+ num_physpages = end_pfn; /* for pfn_valid */
check_efer();
+#ifndef CONFIG_XEN
+ discover_ebda();
+#endif
+
init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
+ /* dmi_scan_machine(); */
+
#ifdef CONFIG_ACPI_NUMA
/*
* Parse SRAT to discover nodes.
reserve_bootmem_generic(0, PAGE_SIZE);
/* reserve ebda region */
- reserve_ebda_region();
+ if (ebda_addr)
+ reserve_bootmem_generic(ebda_addr, ebda_size);
#endif
#ifdef CONFIG_SMP
zap_low_mappings(0);
+ /*
+ * set this early, so we dont allocate cpu0
+ * if MADT list doesnt list BSP first
+ * mpparse.c/MP_processor_info() allocates logical cpu numbers.
+ */
+ cpu_set(0, cpu_present_map);
#ifdef CONFIG_ACPI
/*
* Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
unsigned bits;
#ifdef CONFIG_NUMA
int node = 0;
- unsigned apicid = phys_proc_id[cpu];
+ unsigned apicid = hard_smp_processor_id();
#endif
bits = 0;
/* Low order bits define the core id (index of core in socket) */
cpu_core_id[cpu] = phys_proc_id[cpu] & ((1 << bits)-1);
/* Convert the APIC ID into the socket ID */
- phys_proc_id[cpu] >>= bits;
+ phys_proc_id[cpu] = phys_pkg_id(bits);
#ifdef CONFIG_NUMA
node = phys_proc_id[cpu];
}
numa_set_node(cpu, node);
- printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n",
- cpu, c->x86_max_cores, node, cpu_core_id[cpu]);
+ printk(KERN_INFO "CPU %d/%x(%d) -> Node %d -> Core %d\n",
+ cpu, apicid, c->x86_max_cores, node, cpu_core_id[cpu]);
#endif
#endif
}
if (c->extended_cpuid_level >= 0x80000008) {
c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
- if (c->x86_max_cores & (c->x86_max_cores - 1))
- c->x86_max_cores = 1;
amd_detect_cmp(c);
}
cpuid(1, &eax, &ebx, &ecx, &edx);
- c->apicid = phys_pkg_id(0);
if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
return;
for now. */
node = apicid_to_node[hard_smp_processor_id()];
if (node == NUMA_NO_NODE)
- node = 0;
+ node = first_node(node_online_map);
numa_set_node(cpu, node);
if (acpi_numa > 0)
c->x86_capability[2] = cpuid_edx(0x80860001);
}
+#ifdef CONFIG_X86_XEN_GENAPIC
+ c->apicid = phys_pkg_id(0);
+#endif
+
/*
* Vendor-specific initialization. In this section we
* canonicalize the feature flags, meaning if there are
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
/* Intel-defined (#2) */
- "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", NULL, "est",
+ "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
"tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
{
int i;
for ( i = 0 ; i < 32*NCAPINTS ; i++ )
- if ( test_bit(i, &c->x86_capability) &&
- x86_cap_flags[i] != NULL )
+ if (cpu_has(c, i) && x86_cap_flags[i] != NULL)
seq_printf(m, " %s", x86_cap_flags[i]);
}
}
core_initcall(run_dmi_scan);
+#ifdef CONFIG_INPUT_PCSPKR
+#include <linux/platform_device.h>
+static __init int add_pcspkr(void)
+{
+ struct platform_device *pd;
+ int ret;
+
+ pd = platform_device_alloc("pcspkr", -1);
+ if (!pd)
+ return -ENOMEM;
+
+ ret = platform_device_add(pd);
+ if (ret)
+ platform_device_put(pd);
+
+ return ret;
+}
+device_initcall(add_pcspkr);
+#endif
struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned;
#ifndef CONFIG_X86_NO_IDT
-struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table };
+struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
#endif
char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned")));
do_not_nx = 1;
__supported_pte_mask &= ~_PAGE_NX;
}
- return 0;
+ return 1;
}
__setup("noexec=", nonx_setup); /* parsed early actually */
-int force_personality32 = READ_IMPLIES_EXEC;
+int force_personality32 = 0;
/* noexec32=on|off
Control non executable heap for 32bit processes.
force_personality32 &= ~READ_IMPLIES_EXEC;
else if (!strcmp(str, "off"))
force_personality32 |= READ_IMPLIES_EXEC;
- return 0;
+ return 1;
}
__setup("noexec32=", nonx32_setup);
switch (v + 1) {
#if DEBUG_STKSZ > EXCEPTION_STKSZ
case DEBUG_STACK:
- cpu_pda[cpu].debugstack = (unsigned long)estacks;
+ cpu_pda(cpu)->debugstack = (unsigned long)estacks;
estacks += DEBUG_STKSZ;
break;
#endif
* Clear all 6 debug registers:
*/
- set_debug(0UL, 0);
- set_debug(0UL, 1);
- set_debug(0UL, 2);
- set_debug(0UL, 3);
- set_debug(0UL, 6);
- set_debug(0UL, 7);
+ set_debugreg(0UL, 0);
+ set_debugreg(0UL, 1);
+ set_debugreg(0UL, 2);
+ set_debugreg(0UL, 3);
+ set_debugreg(0UL, 6);
+ set_debugreg(0UL, 7);
fpu_init();
}
{
if (read_pda(mmu_state) == TLBSTATE_OK)
BUG();
- clear_bit(cpu, &read_pda(active_mm)->cpu_vm_mask);
+ cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask);
load_cr3(swapper_pg_dir);
}
* [cpu0: the cpu that switches]
* 1) switch_mm() either 1a) or 1b)
* 1a) thread switch to a different mm
- * 1a1) clear_bit(cpu, &old_mm->cpu_vm_mask);
+ * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
* Stop ipi delivery for the old mm. This is not synchronized with
* the other cpus, but smp_invalidate_interrupt ignore flush ipis
* for the wrong mm, and in the worst case we perform a superfluous
* was in lazy tlb mode.
* 1a3) update cpu active_mm
* Now cpu0 accepts tlb flushes for the new mm.
- * 1a4) set_bit(cpu, &new_mm->cpu_vm_mask);
+ * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
* Now the other cpus will send tlb flush ipis.
* 1a4) change cr3.
* 1b) thread switch without mm change
#include <asm/proto.h>
#include <asm/nmi.h>
-#ifndef CONFIG_X86_NO_IDT
-extern struct gate_struct idt_table[256];
-#endif
-
asmlinkage void divide_error(void);
asmlinkage void debug(void);
asmlinkage void nmi(void);
asmlinkage void machine_check(void);
asmlinkage void spurious_interrupt_bug(void);
-struct notifier_block *die_chain;
-static DEFINE_SPINLOCK(die_notifier_lock);
+ATOMIC_NOTIFIER_HEAD(die_chain);
int register_die_notifier(struct notifier_block *nb)
{
- int err = 0;
- unsigned long flags;
- spin_lock_irqsave(&die_notifier_lock, flags);
- err = notifier_chain_register(&die_chain, nb);
- spin_unlock_irqrestore(&die_notifier_lock, flags);
- return err;
+ vmalloc_sync_all();
+ return atomic_notifier_chain_register(&die_chain, nb);
+}
+EXPORT_SYMBOL(register_die_notifier);
+
+int unregister_die_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&die_chain, nb);
}
+EXPORT_SYMBOL(unregister_die_notifier);
static inline void conditional_sti(struct pt_regs *regs)
{
{
if (regs->eflags & X86_EFLAGS_IF)
local_irq_disable();
+ /* Make sure to not schedule here because we could be running
+ on an exception stack. */
preempt_enable_no_resched();
}
if (!modname)
modname = delim = "";
return printk("<%016lx>{%s%s%s%s%+ld}",
- address,delim,modname,delim,symname,offset);
+ address, delim, modname, delim, symname, offset);
}
#else
int printk_address(unsigned long address)
show_stack(NULL, (unsigned long*)rsp);
printk("\nCode: ");
- if(regs->rip < PAGE_OFFSET)
+ if (regs->rip < PAGE_OFFSET)
goto bad;
- for(i=0;i<20;i++)
- {
+ for (i=0; i<20; i++) {
unsigned char c;
- if(__get_user(c, &((unsigned char*)regs->rip)[i])) {
+ if (__get_user(c, &((unsigned char*)regs->rip)[i])) {
bad:
printk(" Bad RIP value.");
break;
static DEFINE_SPINLOCK(die_lock);
static int die_owner = -1;
+static unsigned int die_nest_count;
unsigned __kprobes long oops_begin(void)
{
else
spin_lock(&die_lock);
}
+ die_nest_count++;
die_owner = cpu;
console_verbose();
bust_spinlocks(1);
{
die_owner = -1;
bust_spinlocks(0);
- spin_unlock_irqrestore(&die_lock, flags);
+ die_nest_count--;
+ if (die_nest_count)
+ /* We still own the lock */
+ local_irq_restore(flags);
+ else
+ /* Nest count reaches zero, release the lock. */
+ spin_unlock_irqrestore(&die_lock, flags);
if (panic_on_oops)
panic("Oops");
}
panic("nmi watchdog");
printk("console shuts up ...\n");
oops_end(flags);
+ nmi_exit();
+ local_irq_enable();
do_exit(SIGSEGV);
}
#endif
{
struct task_struct *tsk = current;
- conditional_sti(regs);
-
tsk->thread.error_code = error_code;
tsk->thread.trap_no = trapnr;
printk(KERN_INFO
"%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n",
tsk->comm, tsk->pid, str,
- regs->rip,regs->rsp,error_code);
+ regs->rip, regs->rsp, error_code);
if (info)
force_sig_info(signr, info, tsk);
{
const struct exception_table_entry *fixup;
fixup = search_exception_tables(regs->rip);
- if (fixup) {
+ if (fixup)
regs->rip = fixup->fixup;
- } else
+ else
die(str, regs, error_code);
return;
}
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
== NOTIFY_STOP) \
return; \
+ conditional_sti(regs); \
do_trap(trapnr, signr, str, regs, error_code, NULL); \
}
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
== NOTIFY_STOP) \
return; \
+ conditional_sti(regs); \
do_trap(trapnr, signr, str, regs, error_code, &info); \
}
DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
DO_ERROR(18, SIGSEGV, "reserved", reserved)
-DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
+
+/* Runs on IST stack */
+asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code)
+{
+ if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
+ 12, SIGBUS) == NOTIFY_STOP)
+ return;
+ preempt_conditional_sti(regs);
+ do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL);
+ preempt_conditional_cli(regs);
+}
asmlinkage void do_double_fault(struct pt_regs * regs, long error_code)
{
printk(KERN_INFO
"%s[%d] general protection rip:%lx rsp:%lx error:%lx\n",
tsk->comm, tsk->pid,
- regs->rip,regs->rsp,error_code);
+ regs->rip, regs->rsp, error_code);
force_sig(SIGSEGV, tsk);
return;
if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) {
return;
}
+ preempt_conditional_sti(regs);
do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
- return;
+ preempt_conditional_cli(regs);
}
/* Help handler running on IST stack to switch back to user stack
static int __init oops_dummy(char *s)
{
panic_on_oops = 1;
- return -1;
+ return 1;
}
__setup("oops=", oops_dummy);
static int __init kstack_setup(char *s)
{
kstack_depth_to_print = simple_strtoul(s,NULL,0);
- return 0;
+ return 1;
}
__setup("kstack=", kstack_setup);
EXPORT_SYMBOL(screen_info);
#endif
-EXPORT_SYMBOL(get_wchan);
-
#ifdef CONFIG_X86_LOCAL_APIC
EXPORT_SYMBOL_GPL(set_nmi_callback);
EXPORT_SYMBOL_GPL(unset_nmi_callback);
#undef memcpy
#undef memset
#undef memmove
-#undef strlen
extern void * memset(void *,int,__kernel_size_t);
extern size_t strlen(const char *);
extern void * __memcpy(void *,const void *,__kernel_size_t);
EXPORT_SYMBOL(memset);
-EXPORT_SYMBOL(strlen);
EXPORT_SYMBOL(memmove);
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(__memcpy);
EXPORT_SYMBOL(empty_zero_page);
EXPORT_SYMBOL(die_chain);
-EXPORT_SYMBOL(register_die_notifier);
#ifdef CONFIG_SMP
EXPORT_SYMBOL(cpu_sibling_map);
EXPORT_SYMBOL(smp_num_siblings);
#endif
-extern void do_softirq_thunk(void);
-EXPORT_SYMBOL(do_softirq_thunk);
-
#ifdef CONFIG_BUG
EXPORT_SYMBOL(out_of_line_bug);
#endif
return -1;
if (pgd_none(*pgd))
set_pgd(pgd, *pgd_ref);
+ else
+ BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
/* Below here mismatches are bugs because these lower tables
are shared */
if (!user_mode(regs))
error_code &= ~PF_USER; /* means kernel */
+ tsk = current;
+ mm = tsk->mm;
+ prefetchw(&mm->mmap_sem);
+
/* get the address */
address = HYPERVISOR_shared_info->vcpu_info[
smp_processor_id()].arch.cr2;
- if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
- SIGSEGV) == NOTIFY_STOP)
- return;
-
- if (likely(regs->eflags & X86_EFLAGS_IF))
- local_irq_enable();
- if (unlikely(page_fault_trace))
- printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
- regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code);
-
- tsk = current;
- mm = tsk->mm;
info.si_code = SEGV_MAPERR;
*/
if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
((address >= VMALLOC_START && address < VMALLOC_END))) {
- if (vmalloc_fault(address) < 0)
- goto bad_area_nosemaphore;
- return;
+ if (vmalloc_fault(address) >= 0)
+ return;
}
/* Can take a spurious fault if mapping changes R/O -> R/W. */
if (spurious_fault(regs, address, error_code))
return;
+ if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
+ SIGSEGV) == NOTIFY_STOP)
+ return;
/*
* Don't take the mm semaphore here. If we fixup a prefetch
* fault we could otherwise deadlock.
goto bad_area_nosemaphore;
}
+ if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
+ SIGSEGV) == NOTIFY_STOP)
+ return;
+
+ if (likely(regs->eflags & X86_EFLAGS_IF))
+ local_irq_enable();
+
+ if (unlikely(page_fault_trace))
+ printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
+ regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code);
+
if (unlikely(error_code & PF_RSVD))
pgtable_bad(address, regs, error_code);
return;
}
+DEFINE_SPINLOCK(pgd_lock);
+struct page *pgd_list;
+
+void vmalloc_sync_all(void)
+{
+ /* Note that races in the updates of insync and start aren't
+ problematic:
+ insync can only get set bits added, and updates to start are only
+ improving performance (without affecting correctness if undone). */
+ static DECLARE_BITMAP(insync, PTRS_PER_PGD);
+ static unsigned long start = VMALLOC_START & PGDIR_MASK;
+ unsigned long address;
+
+ for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
+ if (!test_bit(pgd_index(address), insync)) {
+ const pgd_t *pgd_ref = pgd_offset_k(address);
+ struct page *page;
+
+ if (pgd_none(*pgd_ref))
+ continue;
+ spin_lock(&pgd_lock);
+ for (page = pgd_list; page;
+ page = (struct page *)page->index) {
+ pgd_t *pgd;
+ pgd = (pgd_t *)page_address(page) + pgd_index(address);
+ if (pgd_none(*pgd))
+ set_pgd(pgd, *pgd_ref);
+ else
+ BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
+ }
+ spin_unlock(&pgd_lock);
+ set_bit(pgd_index(address), insync);
+ }
+ if (address == start)
+ start = address + PGDIR_SIZE;
+ }
+ /* Check that there is no need to do the same for the modules area. */
+ BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
+ BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
+ (__START_KERNEL & PGDIR_MASK)));
+}
+
static int __init enable_pagefaulttrace(char *str)
{
page_fault_trace = 1;
- return 0;
+ return 1;
}
__setup("pagefaulttrace", enable_pagefaulttrace);
show_free_areas();
printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
- for_each_pgdat(pgdat) {
+ for_each_online_pgdat(pgdat) {
for (i = 0; i < pgdat->node_spanned_pages; ++i) {
page = pfn_to_page(pgdat->node_start_pfn + i);
total++;
int after_bootmem;
-static void *spp_getpage(void)
+static __init void *spp_getpage(void)
{
void *ptr;
if (after_bootmem)
return pud + pud_index(address);
}
-static void set_pte_phys(unsigned long vaddr,
+static __init void set_pte_phys(unsigned long vaddr,
unsigned long phys, pgprot_t prot, int user_mode)
{
pgd_t *pgd;
#define SET_FIXMAP_USER 1
/* NOTE: this is meant to be run only at boot */
-void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
+void __init
+__set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
{
unsigned long address = __fix_to_virt(idx);
return readonly;
}
+/* Must run before zap_low_mappings */
+__init void *early_ioremap(unsigned long addr, unsigned long size)
+{
+ return ioremap(addr, size);
+}
+
+/* To avoid virtual aliases later */
+__init void early_iounmap(void *addr, unsigned long size)
+{
+ iounmap(addr);
+}
+
static void __meminit
phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
{
pud_t *pud;
if (after_bootmem) {
- pud = pud_offset_k(pgd, __PAGE_OFFSET);
+ pud = pud_offset_k(pgd, start & PGDIR_MASK);
make_page_readonly(pud, XENFEAT_writable_page_tables);
pud_phys = __pa(pud);
} else {
/*
* Memory hotplug specific functions
- * These are only for non-NUMA machines right now.
*/
-#ifdef CONFIG_MEMORY_HOTPLUG
+#if defined(CONFIG_ACPI_HOTPLUG_MEMORY) || defined(CONFIG_ACPI_HOTPLUG_MEMORY_MODULE)
void online_page(struct page *page)
{
ClearPageReserved(page);
- set_page_count(page, 1);
+ init_page_count(page);
__free_page(page);
totalram_pages++;
num_physpages++;
}
+#ifndef CONFIG_MEMORY_HOTPLUG
+/*
+ * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance,
+ * just online the pages.
+ */
+int __add_pages(struct zone *z, unsigned long start_pfn, unsigned long nr_pages)
+{
+ int err = -EIO;
+ unsigned long pfn;
+ unsigned long total = 0, mem = 0;
+ for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
+ if (pfn_valid(pfn)) {
+ online_page(pfn_to_page(pfn));
+ err = 0;
+ mem++;
+ }
+ total++;
+ }
+ if (!err) {
+ z->spanned_pages += total;
+ z->present_pages += mem;
+ z->zone_pgdat->node_spanned_pages += total;
+ z->zone_pgdat->node_present_pages += mem;
+ }
+ return err;
+}
+#endif
+
+/*
+ * Memory is added always to NORMAL zone. This means you will never get
+ * additional DMA/DMA32 memory.
+ */
int add_memory(u64 start, u64 size)
{
struct pglist_data *pgdat = NODE_DATA(0);
/* XEN: init and count pages outside initial allocation. */
for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
ClearPageReserved(pfn_to_page(pfn));
- set_page_count(pfn_to_page(pfn), 1);
+ init_page_count(pfn_to_page(pfn));
totalram_pages++;
}
reservedpages = end_pfn - totalram_pages - e820_hole_size(0, end_pfn);
addr = (unsigned long)(&__init_begin);
for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
- set_page_count(virt_to_page(addr), 1);
+ init_page_count(virt_to_page(addr));
memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE);
make_page_writable(
__va(__pa(addr)), XENFEAT_writable_page_tables);
printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
for (; start < end; start += PAGE_SIZE) {
ClearPageReserved(virt_to_page(start));
- set_page_count(virt_to_page(start), 1);
+ init_page_count(virt_to_page(start));
free_page(start);
totalram_pages++;
}
pte_t *pbase;
if (!base)
return NULL;
+ /*
+ * page_private is used to track the number of entries in
+ * the page table page have non standard attributes.
+ */
+ SetPagePrivate(base);
+ page_private(base) = 0;
+
address = __pa(address);
addr = address & LARGE_PAGE_MASK;
pbase = (pte_t *)page_address(base);
on_each_cpu(flush_kernel_map, (void *)address, 1, 1);
}
-struct deferred_page {
- struct deferred_page *next;
- struct page *fpage;
- unsigned long address;
-};
-static struct deferred_page *df_list; /* protected by init_mm.mmap_sem */
+static struct page *deferred_pages; /* protected by init_mm.mmap_sem */
-static inline void save_page(unsigned long address, struct page *fpage)
+static inline void save_page(struct page *fpage)
{
- struct deferred_page *df;
- df = kmalloc(sizeof(struct deferred_page), GFP_KERNEL);
- if (!df) {
- flush_map(address);
- __free_page(fpage);
- } else {
- df->next = df_list;
- df->fpage = fpage;
- df->address = address;
- df_list = df;
- }
+ fpage->lru.next = (struct list_head *)deferred_pages;
+ deferred_pages = fpage;
}
/*
set_pte(kpte, pfn_pte(pfn, prot));
} else {
/*
- * split_large_page will take the reference for this change_page_attr
- * on the split page.
+ * split_large_page will take the reference for this
+ * change_page_attr on the split page.
*/
struct page *split;
set_pte(kpte,mk_pte(split, ref_prot2));
kpte_page = split;
}
- get_page(kpte_page);
+ page_private(kpte_page)++;
} else if ((kpte_flags & _PAGE_PSE) == 0) {
set_pte(kpte, pfn_pte(pfn, ref_prot));
- __put_page(kpte_page);
+ BUG_ON(page_private(kpte_page) == 0);
+ page_private(kpte_page)--;
} else
BUG();
#ifndef CONFIG_XEN
BUG_ON(PageReserved(kpte_page));
#else
- if (!PageReserved(kpte_page))
+ if (PageReserved(kpte_page))
+ return 0;
#endif
- switch (page_count(kpte_page)) {
- case 1:
- save_page(address, kpte_page);
- revert_page(address, ref_prot);
- break;
- case 0:
- BUG(); /* memleak and failed 2M page regeneration */
- }
+
+ if (page_private(kpte_page) == 0) {
+ save_page(kpte_page);
+ revert_page(address, ref_prot);
+ }
return 0;
}
void global_flush_tlb(void)
{
- struct deferred_page *df, *next_df;
+ struct page *dpage;
down_read(&init_mm.mmap_sem);
- df = xchg(&df_list, NULL);
+ dpage = xchg(&deferred_pages, NULL);
up_read(&init_mm.mmap_sem);
- flush_map((df && !df->next) ? df->address : 0);
- for (; df; df = next_df) {
- next_df = df->next;
- if (df->fpage)
- __free_page(df->fpage);
- kfree(df);
+
+ flush_map((dpage && !dpage->lru.next) ? (unsigned long)page_address(dpage) : 0);
+ while (dpage) {
+ struct page *tmp = dpage;
+ dpage = (struct page *)dpage->lru.next;
+ ClearPagePrivate(tmp);
+ __free_page(tmp);
}
}
obj-y := i386.o
obj-$(CONFIG_PCI_DIRECT)+= direct.o
-obj-y += fixup.o
+obj-y += fixup.o init.o
obj-$(CONFIG_ACPI) += acpi.o
obj-y += legacy.o irq.o common.o
# mmconfig has a 64bit special
common-y += ../../i386/pci/common.o
fixup-y += ../../i386/pci/fixup.o
i386-y += ../../i386/pci/i386.o
+init-y += ../../i386/pci/init.o
ifdef CONFIG_XEN
irq-y := ../../i386/pci/irq-xen.o
obj-$(CONFIG_FB_I810) += video/i810/
obj-$(CONFIG_FB_INTEL) += video/intelfb/
-# we also need input/serio early so serio bus is initialized by the time
-# serial drivers start registering their serio ports
-obj-$(CONFIG_SERIO) += input/serio/
obj-y += serial/
obj-$(CONFIG_PARPORT) += parport/
obj-y += base/ block/ misc/ mfd/ net/ media/
obj-$(CONFIG_USB) += usb/
obj-$(CONFIG_PCI) += usb/
obj-$(CONFIG_USB_GADGET) += usb/gadget/
+obj-$(CONFIG_SERIO) += input/serio/
obj-$(CONFIG_GAMEPORT) += input/gameport/
obj-$(CONFIG_INPUT) += input/
obj-$(CONFIG_I2O) += message/
+obj-$(CONFIG_RTC_LIB) += rtc/
obj-$(CONFIG_I2C) += i2c/
obj-$(CONFIG_W1) += w1/
obj-$(CONFIG_HWMON) += hwmon/
obj-$(CONFIG_EISA) += eisa/
obj-$(CONFIG_CPU_FREQ) += cpufreq/
obj-$(CONFIG_MMC) += mmc/
+obj-$(CONFIG_NEW_LEDS) += leds/
obj-$(CONFIG_INFINIBAND) += infiniband/
+obj-$(CONFIG_IPATH_CORE) += infiniband/
obj-$(CONFIG_SGI_SN) += sn/
obj-y += firmware/
obj-$(CONFIG_CRYPTO) += crypto/
If you have an IBM ThinkPad laptop, say Y or M here.
+config ACPI_IBM_DOCK
+ bool "Legacy Docking Station Support"
+ depends on ACPI_IBM
+ default n
+ ---help---
+ Allows the ibm_acpi driver to handle docking station events.
+ This support is obsoleted by CONFIG_HOTPLUG_PCI_ACPI. It will
+ allow locking and removing the laptop from the docking station,
+ but will not properly connect PCI devices.
+
+ If you are not sure, say N here.
+
config ACPI_TOSHIBA
tristate "Toshiba Laptop Extras"
depends on X86
depends on ACPI_CUSTOM_DSDT
default ""
help
- Enter the full path name to the file wich includes the AmlCode declaration.
+ Enter the full path name to the file which includes the AmlCode
+ declaration.
config ACPI_BLACKLIST_YEAR
int "Disable ACPI for systems before Jan 1st this year" if X86_32
config ACPI_HOTPLUG_MEMORY
tristate "Memory Hotplug"
depends on ACPI
- depends on MEMORY_HOTPLUG
+ depends on MEMORY_HOTPLUG || X86_64
default n
help
This driver adds supports for ACPI Memory Hotplug. This driver
+++ /dev/null
-/*
- * acpi_tables.c - ACPI Boot-Time Table Parsing
- *
- * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- */
-
-#include <linux/config.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/smp.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/irq.h>
-#include <linux/errno.h>
-#include <linux/acpi.h>
-#include <linux/bootmem.h>
-
-#define PREFIX "ACPI: "
-
-#define ACPI_MAX_TABLES 128
-
-static char *acpi_table_signatures[ACPI_TABLE_COUNT] = {
- [ACPI_TABLE_UNKNOWN] = "????",
- [ACPI_APIC] = "APIC",
- [ACPI_BOOT] = "BOOT",
- [ACPI_DBGP] = "DBGP",
- [ACPI_DSDT] = "DSDT",
- [ACPI_ECDT] = "ECDT",
- [ACPI_ETDT] = "ETDT",
- [ACPI_FADT] = "FACP",
- [ACPI_FACS] = "FACS",
- [ACPI_OEMX] = "OEM",
- [ACPI_PSDT] = "PSDT",
- [ACPI_SBST] = "SBST",
- [ACPI_SLIT] = "SLIT",
- [ACPI_SPCR] = "SPCR",
- [ACPI_SRAT] = "SRAT",
- [ACPI_SSDT] = "SSDT",
- [ACPI_SPMI] = "SPMI",
- [ACPI_HPET] = "HPET",
- [ACPI_MCFG] = "MCFG",
-};
-
-static char *mps_inti_flags_polarity[] = { "dfl", "high", "res", "low" };
-static char *mps_inti_flags_trigger[] = { "dfl", "edge", "res", "level" };
-
-/* System Description Table (RSDT/XSDT) */
-struct acpi_table_sdt {
- unsigned long pa;
- enum acpi_table_id id;
- unsigned long size;
-} __attribute__ ((packed));
-
-static unsigned long sdt_pa; /* Physical Address */
-static unsigned long sdt_count; /* Table count */
-
-static struct acpi_table_sdt sdt_entry[ACPI_MAX_TABLES] __initdata;
-
-void acpi_table_print(struct acpi_table_header *header, unsigned long phys_addr)
-{
- char *name = NULL;
-
- if (!header)
- return;
-
- /* Some table signatures aren't good table names */
-
- if (!strncmp((char *)&header->signature,
- acpi_table_signatures[ACPI_APIC],
- sizeof(header->signature))) {
- name = "MADT";
- } else if (!strncmp((char *)&header->signature,
- acpi_table_signatures[ACPI_FADT],
- sizeof(header->signature))) {
- name = "FADT";
- } else
- name = header->signature;
-
- printk(KERN_DEBUG PREFIX
- "%.4s (v%3.3d %6.6s %8.8s 0x%08x %.4s 0x%08x) @ 0x%p\n", name,
- header->revision, header->oem_id, header->oem_table_id,
- header->oem_revision, header->asl_compiler_id,
- header->asl_compiler_revision, (void *)phys_addr);
-}
-
-void acpi_table_print_madt_entry(acpi_table_entry_header * header)
-{
- if (!header)
- return;
-
- switch (header->type) {
-
- case ACPI_MADT_LAPIC:
- {
- struct acpi_table_lapic *p =
- (struct acpi_table_lapic *)header;
- printk(KERN_INFO PREFIX
- "LAPIC (acpi_id[0x%02x] lapic_id[0x%02x] %s)\n",
- p->acpi_id, p->id,
- p->flags.enabled ? "enabled" : "disabled");
- }
- break;
-
- case ACPI_MADT_IOAPIC:
- {
- struct acpi_table_ioapic *p =
- (struct acpi_table_ioapic *)header;
- printk(KERN_INFO PREFIX
- "IOAPIC (id[0x%02x] address[0x%08x] gsi_base[%d])\n",
- p->id, p->address, p->global_irq_base);
- }
- break;
-
- case ACPI_MADT_INT_SRC_OVR:
- {
- struct acpi_table_int_src_ovr *p =
- (struct acpi_table_int_src_ovr *)header;
- printk(KERN_INFO PREFIX
- "INT_SRC_OVR (bus %d bus_irq %d global_irq %d %s %s)\n",
- p->bus, p->bus_irq, p->global_irq,
- mps_inti_flags_polarity[p->flags.polarity],
- mps_inti_flags_trigger[p->flags.trigger]);
- if (p->flags.reserved)
- printk(KERN_INFO PREFIX
- "INT_SRC_OVR unexpected reserved flags: 0x%x\n",
- p->flags.reserved);
-
- }
- break;
-
- case ACPI_MADT_NMI_SRC:
- {
- struct acpi_table_nmi_src *p =
- (struct acpi_table_nmi_src *)header;
- printk(KERN_INFO PREFIX
- "NMI_SRC (%s %s global_irq %d)\n",
- mps_inti_flags_polarity[p->flags.polarity],
- mps_inti_flags_trigger[p->flags.trigger],
- p->global_irq);
- }
- break;
-
- case ACPI_MADT_LAPIC_NMI:
- {
- struct acpi_table_lapic_nmi *p =
- (struct acpi_table_lapic_nmi *)header;
- printk(KERN_INFO PREFIX
- "LAPIC_NMI (acpi_id[0x%02x] %s %s lint[0x%x])\n",
- p->acpi_id,
- mps_inti_flags_polarity[p->flags.polarity],
- mps_inti_flags_trigger[p->flags.trigger],
- p->lint);
- }
- break;
-
- case ACPI_MADT_LAPIC_ADDR_OVR:
- {
- struct acpi_table_lapic_addr_ovr *p =
- (struct acpi_table_lapic_addr_ovr *)header;
- printk(KERN_INFO PREFIX
- "LAPIC_ADDR_OVR (address[%p])\n",
- (void *)(unsigned long)p->address);
- }
- break;
-
- case ACPI_MADT_IOSAPIC:
- {
- struct acpi_table_iosapic *p =
- (struct acpi_table_iosapic *)header;
- printk(KERN_INFO PREFIX
- "IOSAPIC (id[0x%x] address[%p] gsi_base[%d])\n",
- p->id, (void *)(unsigned long)p->address,
- p->global_irq_base);
- }
- break;
-
- case ACPI_MADT_LSAPIC:
- {
- struct acpi_table_lsapic *p =
- (struct acpi_table_lsapic *)header;
- printk(KERN_INFO PREFIX
- "LSAPIC (acpi_id[0x%02x] lsapic_id[0x%02x] lsapic_eid[0x%02x] %s)\n",
- p->acpi_id, p->id, p->eid,
- p->flags.enabled ? "enabled" : "disabled");
- }
- break;
-
- case ACPI_MADT_PLAT_INT_SRC:
- {
- struct acpi_table_plat_int_src *p =
- (struct acpi_table_plat_int_src *)header;
- printk(KERN_INFO PREFIX
- "PLAT_INT_SRC (%s %s type[0x%x] id[0x%04x] eid[0x%x] iosapic_vector[0x%x] global_irq[0x%x]\n",
- mps_inti_flags_polarity[p->flags.polarity],
- mps_inti_flags_trigger[p->flags.trigger],
- p->type, p->id, p->eid, p->iosapic_vector,
- p->global_irq);
- }
- break;
-
- default:
- printk(KERN_WARNING PREFIX
- "Found unsupported MADT entry (type = 0x%x)\n",
- header->type);
- break;
- }
-}
-
-static int
-acpi_table_compute_checksum(void *table_pointer, unsigned long length)
-{
- u8 *p = (u8 *) table_pointer;
- unsigned long remains = length;
- unsigned long sum = 0;
-
- if (!p || !length)
- return -EINVAL;
-
- while (remains--)
- sum += *p++;
-
- return (sum & 0xFF);
-}
-
-/*
- * acpi_get_table_header_early()
- * for acpi_blacklisted(), acpi_table_get_sdt()
- */
-int __init
-acpi_get_table_header_early(enum acpi_table_id id,
- struct acpi_table_header **header)
-{
- unsigned int i;
- enum acpi_table_id temp_id;
-
- /* DSDT is different from the rest */
- if (id == ACPI_DSDT)
- temp_id = ACPI_FADT;
- else
- temp_id = id;
-
- /* Locate the table. */
-
- for (i = 0; i < sdt_count; i++) {
- if (sdt_entry[i].id != temp_id)
- continue;
- *header = (void *)
- __acpi_map_table(sdt_entry[i].pa, sdt_entry[i].size);
- if (!*header) {
- printk(KERN_WARNING PREFIX "Unable to map %s\n",
- acpi_table_signatures[temp_id]);
- return -ENODEV;
- }
- break;
- }
-
- if (!*header) {
- printk(KERN_WARNING PREFIX "%s not present\n",
- acpi_table_signatures[id]);
- return -ENODEV;
- }
-
- /* Map the DSDT header via the pointer in the FADT */
- if (id == ACPI_DSDT) {
- struct fadt_descriptor_rev2 *fadt =
- (struct fadt_descriptor_rev2 *)*header;
-
- if (fadt->revision == 3 && fadt->Xdsdt) {
- *header = (void *)__acpi_map_table(fadt->Xdsdt,
- sizeof(struct
- acpi_table_header));
- } else if (fadt->V1_dsdt) {
- *header = (void *)__acpi_map_table(fadt->V1_dsdt,
- sizeof(struct
- acpi_table_header));
- } else
- *header = NULL;
-
- if (!*header) {
- printk(KERN_WARNING PREFIX "Unable to map DSDT\n");
- return -ENODEV;
- }
- }
-
- return 0;
-}
-
-int __init
-acpi_table_parse_madt_family(enum acpi_table_id id,
- unsigned long madt_size,
- int entry_id,
- acpi_madt_entry_handler handler,
- unsigned int max_entries)
-{
- void *madt = NULL;
- acpi_table_entry_header *entry;
- unsigned int count = 0;
- unsigned long madt_end;
- unsigned int i;
-
- if (!handler)
- return -EINVAL;
-
- /* Locate the MADT (if exists). There should only be one. */
-
- for (i = 0; i < sdt_count; i++) {
- if (sdt_entry[i].id != id)
- continue;
- madt = (void *)
- __acpi_map_table(sdt_entry[i].pa, sdt_entry[i].size);
- if (!madt) {
- printk(KERN_WARNING PREFIX "Unable to map %s\n",
- acpi_table_signatures[id]);
- return -ENODEV;
- }
- break;
- }
-
- if (!madt) {
- printk(KERN_WARNING PREFIX "%s not present\n",
- acpi_table_signatures[id]);
- return -ENODEV;
- }
-
- madt_end = (unsigned long)madt + sdt_entry[i].size;
-
- /* Parse all entries looking for a match. */
-
- entry = (acpi_table_entry_header *)
- ((unsigned long)madt + madt_size);
-
- while (((unsigned long)entry) + sizeof(acpi_table_entry_header) <
- madt_end) {
- if (entry->type == entry_id
- && (!max_entries || count++ < max_entries))
- if (handler(entry, madt_end))
- return -EINVAL;
-
- entry = (acpi_table_entry_header *)
- ((unsigned long)entry + entry->length);
- }
- if (max_entries && count > max_entries) {
- printk(KERN_WARNING PREFIX "[%s:0x%02x] ignored %i entries of "
- "%i found\n", acpi_table_signatures[id], entry_id,
- count - max_entries, count);
- }
-
- return count;
-}
-
-int __init
-acpi_table_parse_madt(enum acpi_madt_entry_id id,
- acpi_madt_entry_handler handler, unsigned int max_entries)
-{
- return acpi_table_parse_madt_family(ACPI_APIC,
- sizeof(struct acpi_table_madt), id,
- handler, max_entries);
-}
-
-int __init acpi_table_parse(enum acpi_table_id id, acpi_table_handler handler)
-{
- int count = 0;
- unsigned int i = 0;
-
- if (!handler)
- return -EINVAL;
-
- for (i = 0; i < sdt_count; i++) {
- if (sdt_entry[i].id != id)
- continue;
- count++;
- if (count == 1)
- handler(sdt_entry[i].pa, sdt_entry[i].size);
-
- else
- printk(KERN_WARNING PREFIX
- "%d duplicate %s table ignored.\n", count,
- acpi_table_signatures[id]);
- }
-
- return count;
-}
-
-static int __init acpi_table_get_sdt(struct acpi_table_rsdp *rsdp)
-{
- struct acpi_table_header *header = NULL;
- unsigned int i, id = 0;
-
- if (!rsdp)
- return -EINVAL;
-
- /* First check XSDT (but only on ACPI 2.0-compatible systems) */
-
- if ((rsdp->revision >= 2) &&
- (((struct acpi20_table_rsdp *)rsdp)->xsdt_address)) {
-
- struct acpi_table_xsdt *mapped_xsdt = NULL;
-
- sdt_pa = ((struct acpi20_table_rsdp *)rsdp)->xsdt_address;
-
- /* map in just the header */
- header = (struct acpi_table_header *)
- __acpi_map_table(sdt_pa, sizeof(struct acpi_table_header));
-
- if (!header) {
- printk(KERN_WARNING PREFIX
- "Unable to map XSDT header\n");
- return -ENODEV;
- }
-
- /* remap in the entire table before processing */
- mapped_xsdt = (struct acpi_table_xsdt *)
- __acpi_map_table(sdt_pa, header->length);
- if (!mapped_xsdt) {
- printk(KERN_WARNING PREFIX "Unable to map XSDT\n");
- return -ENODEV;
- }
- header = &mapped_xsdt->header;
-
- if (strncmp(header->signature, "XSDT", 4)) {
- printk(KERN_WARNING PREFIX
- "XSDT signature incorrect\n");
- return -ENODEV;
- }
-
- if (acpi_table_compute_checksum(header, header->length)) {
- printk(KERN_WARNING PREFIX "Invalid XSDT checksum\n");
- return -ENODEV;
- }
-
- sdt_count =
- (header->length - sizeof(struct acpi_table_header)) >> 3;
- if (sdt_count > ACPI_MAX_TABLES) {
- printk(KERN_WARNING PREFIX
- "Truncated %lu XSDT entries\n",
- (sdt_count - ACPI_MAX_TABLES));
- sdt_count = ACPI_MAX_TABLES;
- }
-
- for (i = 0; i < sdt_count; i++)
- sdt_entry[i].pa = (unsigned long)mapped_xsdt->entry[i];
- }
-
- /* Then check RSDT */
-
- else if (rsdp->rsdt_address) {
-
- struct acpi_table_rsdt *mapped_rsdt = NULL;
-
- sdt_pa = rsdp->rsdt_address;
-
- /* map in just the header */
- header = (struct acpi_table_header *)
- __acpi_map_table(sdt_pa, sizeof(struct acpi_table_header));
- if (!header) {
- printk(KERN_WARNING PREFIX
- "Unable to map RSDT header\n");
- return -ENODEV;
- }
-
- /* remap in the entire table before processing */
- mapped_rsdt = (struct acpi_table_rsdt *)
- __acpi_map_table(sdt_pa, header->length);
- if (!mapped_rsdt) {
- printk(KERN_WARNING PREFIX "Unable to map RSDT\n");
- return -ENODEV;
- }
- header = &mapped_rsdt->header;
-
- if (strncmp(header->signature, "RSDT", 4)) {
- printk(KERN_WARNING PREFIX
- "RSDT signature incorrect\n");
- return -ENODEV;
- }
-
- if (acpi_table_compute_checksum(header, header->length)) {
- printk(KERN_WARNING PREFIX "Invalid RSDT checksum\n");
- return -ENODEV;
- }
-
- sdt_count =
- (header->length - sizeof(struct acpi_table_header)) >> 2;
- if (sdt_count > ACPI_MAX_TABLES) {
- printk(KERN_WARNING PREFIX
- "Truncated %lu RSDT entries\n",
- (sdt_count - ACPI_MAX_TABLES));
- sdt_count = ACPI_MAX_TABLES;
- }
-
- for (i = 0; i < sdt_count; i++)
- sdt_entry[i].pa = (unsigned long)mapped_rsdt->entry[i];
- }
-
- else {
- printk(KERN_WARNING PREFIX
- "No System Description Table (RSDT/XSDT) specified in RSDP\n");
- return -ENODEV;
- }
-
- acpi_table_print(header, sdt_pa);
-
- for (i = 0; i < sdt_count; i++) {
-
- /* map in just the header */
- header = (struct acpi_table_header *)
- __acpi_map_table(sdt_entry[i].pa,
- sizeof(struct acpi_table_header));
- if (!header)
- continue;
-
- /* remap in the entire table before processing */
- header = (struct acpi_table_header *)
- __acpi_map_table(sdt_entry[i].pa, header->length);
- if (!header)
- continue;
-
- acpi_table_print(header, sdt_entry[i].pa);
-
- if (acpi_table_compute_checksum(header, header->length)) {
- printk(KERN_WARNING " >>> ERROR: Invalid checksum\n");
- continue;
- }
-
- sdt_entry[i].size = header->length;
-
- for (id = 0; id < ACPI_TABLE_COUNT; id++) {
- if (!strncmp((char *)&header->signature,
- acpi_table_signatures[id],
- sizeof(header->signature))) {
- sdt_entry[i].id = id;
- }
- }
- }
-
- /*
- * The DSDT is *not* in the RSDT (why not? no idea.) but we want
- * to print its info, because this is what people usually blacklist
- * against. Unfortunately, we don't know the phys_addr, so just
- * print 0. Maybe no one will notice.
- */
- if (!acpi_get_table_header_early(ACPI_DSDT, &header))
- acpi_table_print(header, 0);
-
- return 0;
-}
-
-/*
- * acpi_table_init()
- *
- * find RSDP, find and checksum SDT/XSDT.
- * checksum all tables, print SDT/XSDT
- *
- * result: sdt_entry[] is initialized
- */
-#if defined(CONFIG_X86_XEN) || defined(CONFIG_X86_64_XEN)
-#define acpi_rsdp_phys_to_va(rsdp_phys) isa_bus_to_virt(rsdp_phys)
-#else
-#define acpi_rsdp_phys_to_va(rsdp_phys) __va(rsdp_phys)
-#endif
-
-int __init acpi_table_init(void)
-{
- struct acpi_table_rsdp *rsdp = NULL;
- unsigned long rsdp_phys = 0;
- int result = 0;
-
- /* Locate and map the Root System Description Table (RSDP) */
-
- rsdp_phys = acpi_find_rsdp();
- if (!rsdp_phys) {
- printk(KERN_ERR PREFIX "Unable to locate RSDP\n");
- return -ENODEV;
- }
-
- rsdp = (struct acpi_table_rsdp *)acpi_rsdp_phys_to_va(rsdp_phys);
- if (!rsdp) {
- printk(KERN_WARNING PREFIX "Unable to map RSDP\n");
- return -ENODEV;
- }
-
- printk(KERN_DEBUG PREFIX
- "RSDP (v%3.3d %6.6s ) @ 0x%p\n",
- rsdp->revision, rsdp->oem_id, (void *)rsdp_phys);
-
- if (rsdp->revision < 2)
- result =
- acpi_table_compute_checksum(rsdp,
- sizeof(struct acpi_table_rsdp));
- else
- result =
- acpi_table_compute_checksum(rsdp,
- ((struct acpi20_table_rsdp *)
- rsdp)->length);
-
- if (result) {
- printk(KERN_WARNING " >>> ERROR: Invalid checksum\n");
- return -ENODEV;
- }
-
- /* Locate and map the System Description table (RSDT/XSDT) */
-
- if (acpi_table_get_sdt(rsdp))
- return -ENODEV;
-
- return 0;
-}
#include <linux/crash_dump.h>
#include <linux/backing-dev.h>
#include <linux/bootmem.h>
+#include <linux/pipe_fs_i.h>
#include <asm/uaccess.h>
#include <asm/io.h>
}
#ifndef ARCH_HAS_VALID_PHYS_ADDR_RANGE
-static inline int valid_phys_addr_range(unsigned long addr, size_t *count)
+static inline int valid_phys_addr_range(unsigned long addr, size_t count)
{
- unsigned long end_mem;
-
- end_mem = __pa(high_memory);
- if (addr >= end_mem)
+ if (addr + count > __pa(high_memory))
return 0;
- if (*count > end_mem - addr)
- *count = end_mem - addr;
-
return 1;
}
-static inline int valid_mmap_phys_addr_range(unsigned long addr, size_t *size)
+static inline int valid_mmap_phys_addr_range(unsigned long addr, size_t size)
{
return 1;
}
ssize_t read, sz;
char *ptr;
- if (!valid_phys_addr_range(p, &count))
+ if (!valid_phys_addr_range(p, count))
return -EFAULT;
read = 0;
#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
unsigned long copied;
void *ptr;
- if (!valid_phys_addr_range(p, &count))
+ if (!valid_phys_addr_range(p, count))
return -EFAULT;
written = 0;
copied = copy_from_user(ptr, buf, sz);
if (copied) {
- ssize_t ret;
-
- ret = written + (sz - copied);
- if (ret)
- return ret;
+ written += sz - copied;
+ if (written)
+ break;
return -EFAULT;
}
buf += sz;
{
size_t size = vma->vm_end - vma->vm_start;
- if (!valid_mmap_phys_addr_range(vma->vm_pgoff << PAGE_SHIFT, &size))
+ if (!valid_mmap_phys_addr_range(vma->vm_pgoff << PAGE_SHIFT, size))
return -EINVAL;
vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
copied = copy_from_user(ptr, buf, sz);
if (copied) {
- ssize_t ret;
-
- ret = written + (sz - copied);
- if (ret)
- return ret;
+ written += sz - copied;
+ if (written)
+ break;
return -EFAULT;
}
buf += sz;
if (len) {
written = copy_from_user(kbuf, buf, len);
if (written) {
- ssize_t ret;
-
+ if (wrote + virtr)
+ break;
free_page((unsigned long)kbuf);
- ret = wrote + virtr + (len - written);
- return ret ? ret : -EFAULT;
+ return -EFAULT;
}
}
len = vwrite(kbuf, (char *)p, len);
return -EFAULT;
while (count-- > 0 && i < 65536) {
char c;
- if (__get_user(c, tmp))
+ if (__get_user(c, tmp)) {
+ if (tmp > buf)
+ break;
return -EFAULT;
+ }
outb(c,i);
i++;
tmp++;
return count;
}
+static int pipe_to_null(struct pipe_inode_info *info, struct pipe_buffer *buf,
+ struct splice_desc *sd)
+{
+ return sd->len;
+}
+
+static ssize_t splice_write_null(struct pipe_inode_info *pipe,struct file *out,
+ loff_t *ppos, size_t len, unsigned int flags)
+{
+ return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_null);
+}
+
#ifdef CONFIG_MMU
/*
* For fun, we are using the MMU for this.
.llseek = null_lseek,
.read = read_null,
.write = write_null,
+ .splice_write = splice_write_null,
};
#if defined(CONFIG_ISA) || !defined(__mc68000__)
unsigned int minor;
char *name;
umode_t mode;
- struct file_operations *fops;
+ const struct file_operations *fops;
} devlist[] = { /* list of minor devices */
{1, "mem", S_IRUSR | S_IWUSR | S_IRGRP, &mem_fops},
{2, "kmem", S_IRUSR | S_IWUSR | S_IRGRP, &kmem_fops},
/* Semaphore to protect creating and releasing a tty. This is shared with
vt.c for deeply disgusting hack reasons */
-DECLARE_MUTEX(tty_sem);
+DEFINE_MUTEX(tty_mutex);
int console_use_vt = 1;
spin_unlock_irqrestore(&tty->buf.lock, flags);
return size;
}
-
EXPORT_SYMBOL_GPL(tty_buffer_request_room);
-int tty_insert_flip_string(struct tty_struct *tty, unsigned char *chars, size_t size)
+int tty_insert_flip_string(struct tty_struct *tty, const unsigned char *chars,
+ size_t size)
{
int copied = 0;
do {
tb->used += space;
copied += space;
chars += space;
-/* printk("Flip insert %d.\n", space); */
}
/* There is a small chance that we need to split the data over
several buffers. If this is the case we must loop */
while (unlikely(size > copied));
return copied;
}
+EXPORT_SYMBOL(tty_insert_flip_string);
-EXPORT_SYMBOL_GPL(tty_insert_flip_string);
-
-int tty_insert_flip_string_flags(struct tty_struct *tty, unsigned char *chars, char *flags, size_t size)
+int tty_insert_flip_string_flags(struct tty_struct *tty,
+ const unsigned char *chars, const char *flags, size_t size)
{
int copied = 0;
do {
while (unlikely(size > copied));
return copied;
}
+EXPORT_SYMBOL(tty_insert_flip_string_flags);
-EXPORT_SYMBOL_GPL(tty_insert_flip_string_flags);
-
+void tty_schedule_flip(struct tty_struct *tty)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&tty->buf.lock, flags);
+ if (tty->buf.tail != NULL) {
+ tty->buf.tail->active = 0;
+ tty->buf.tail->commit = tty->buf.tail->used;
+ }
+ spin_unlock_irqrestore(&tty->buf.lock, flags);
+ schedule_delayed_work(&tty->buf.work, 1);
+}
+EXPORT_SYMBOL(tty_schedule_flip);
/*
* Prepare a block of space in the buffer for data. Returns the length
struct tty_ldisc *ld;
unsigned long flags;
- if (disc < N_TTY || disc >= NR_LDISCS)
- BUG();
+ BUG_ON(disc < N_TTY || disc >= NR_LDISCS);
spin_lock_irqsave(&tty_ldisc_lock, flags);
ld = &tty_ldiscs[disc];
- if(ld->refcount == 0)
- BUG();
- ld->refcount --;
+ BUG_ON(ld->refcount == 0);
+ ld->refcount--;
module_put(ld->owner);
spin_unlock_irqrestore(&tty_ldisc_lock, flags);
}
{
unsigned long flags;
- if(ld == NULL)
- BUG();
+ BUG_ON(ld == NULL);
spin_lock_irqsave(&tty_ldisc_lock, flags);
if(ld->refcount == 0)
p->signal->tty = NULL;
if (!p->signal->leader)
continue;
- send_group_sig_info(SIGHUP, SEND_SIG_PRIV, p);
- send_group_sig_info(SIGCONT, SEND_SIG_PRIV, p);
+ group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p);
+ group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p);
if (tty->pgrp > 0)
p->signal->tty_old_pgrp = tty->pgrp;
} while_each_task_pid(tty->session, PIDTYPE_SID, p);
lock_kernel();
- down(&tty_sem);
+ mutex_lock(&tty_mutex);
tty = current->signal->tty;
if (tty) {
tty_pgrp = tty->pgrp;
- up(&tty_sem);
+ mutex_unlock(&tty_mutex);
if (on_exit && tty->driver->type != TTY_DRIVER_TYPE_PTY)
tty_vhangup(tty);
} else {
kill_pg(current->signal->tty_old_pgrp, SIGHUP, on_exit);
kill_pg(current->signal->tty_old_pgrp, SIGCONT, on_exit);
}
- up(&tty_sem);
+ mutex_unlock(&tty_mutex);
unlock_kernel();
return;
}
}
/* Must lock changes to tty_old_pgrp */
- down(&tty_sem);
+ mutex_lock(&tty_mutex);
current->signal->tty_old_pgrp = 0;
tty->session = 0;
tty->pgrp = -1;
p->signal->tty = NULL;
} while_each_task_pid(current->signal->session, PIDTYPE_SID, p);
read_unlock(&tasklist_lock);
- up(&tty_sem);
+ mutex_unlock(&tty_mutex);
unlock_kernel();
}
ssize_t ret = 0, written = 0;
unsigned int chunk;
- if (down_interruptible(&tty->atomic_write)) {
+ if (mutex_lock_interruptible(&tty->atomic_write_lock)) {
return -ERESTARTSYS;
}
if (count < chunk)
chunk = count;
- /* write_buf/write_cnt is protected by the atomic_write semaphore */
+ /* write_buf/write_cnt is protected by the atomic_write_lock mutex */
if (tty->write_cnt < chunk) {
unsigned char *buf;
buf = kmalloc(chunk, GFP_KERNEL);
if (!buf) {
- up(&tty->atomic_write);
+ mutex_unlock(&tty->atomic_write_lock);
return -ENOMEM;
}
kfree(tty->write_buf);
inode->i_mtime = current_fs_time(inode->i_sb);
ret = written;
}
- up(&tty->atomic_write);
+ mutex_unlock(&tty->atomic_write_lock);
return ret;
}
/*
* WSH 06/09/97: Rewritten to remove races and properly clean up after a
- * failed open. The new code protects the open with a semaphore, so it's
- * really quite straightforward. The semaphore locking can probably be
+ * failed open. The new code protects the open with a mutex, so it's
+ * really quite straightforward. The mutex locking can probably be
* relaxed for the (most common) case of reopening a tty.
*/
static int init_dev(struct tty_driver *driver, int idx,
success:
*ret_tty = tty;
- /* All paths come through here to release the semaphore */
+ /* All paths come through here to release the mutex */
end_init:
return retval;
{
struct tty_struct *tty, *o_tty;
int pty_master, tty_closing, o_tty_closing, do_sleep;
- int devpts_master, devpts;
+ int devpts;
int idx;
char buf[64];
unsigned long flags;
pty_master = (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
tty->driver->subtype == PTY_TYPE_MASTER);
devpts = (tty->driver->flags & TTY_DRIVER_DEVPTS_MEM) != 0;
- devpts_master = pty_master && devpts;
o_tty = tty->link;
#ifdef TTY_PARANOIA_CHECK
/* Guard against races with tty->count changes elsewhere and
opens on /dev/tty */
- down(&tty_sem);
+ mutex_lock(&tty_mutex);
tty_closing = tty->count <= 1;
o_tty_closing = o_tty &&
(o_tty->count <= (pty_master ? 1 : 0));
printk(KERN_WARNING "release_dev: %s: read/write wait queue "
"active!\n", tty_name(tty, buf));
- up(&tty_sem);
+ mutex_unlock(&tty_mutex);
schedule();
}
read_unlock(&tasklist_lock);
}
- up(&tty_sem);
+ mutex_unlock(&tty_mutex);
/* check whether both sides are closing ... */
if (!tty_closing || (o_tty && !o_tty_closing))
index = -1;
retval = 0;
- down(&tty_sem);
+ mutex_lock(&tty_mutex);
if (device == MKDEV(TTYAUX_MAJOR,0)) {
if (!current->signal->tty) {
- up(&tty_sem);
+ mutex_unlock(&tty_mutex);
return -ENXIO;
}
driver = current->signal->tty->driver;
noctty = 1;
goto got_driver;
}
- up(&tty_sem);
+ mutex_unlock(&tty_mutex);
return -ENODEV;
}
driver = get_tty_driver(device, &index);
if (!driver) {
- up(&tty_sem);
+ mutex_unlock(&tty_mutex);
return -ENODEV;
}
got_driver:
retval = init_dev(driver, index, &tty);
- up(&tty_sem);
+ mutex_unlock(&tty_mutex);
if (retval)
return retval;
}
up(&allocated_ptys_lock);
- down(&tty_sem);
+ mutex_lock(&tty_mutex);
retval = init_dev(ptm_driver, index, &tty);
- up(&tty_sem);
+ mutex_unlock(&tty_mutex);
if (retval)
goto out;
return 0;
out1:
release_dev(filp);
+ return retval;
out:
down(&allocated_ptys_lock);
idr_remove(&allocated_ptys, index);
tty_hangup(tty);
#else
struct tty_struct *tty = arg;
- struct task_struct *p;
+ struct task_struct *g, *p;
int session;
int i;
struct file *filp;
tty->driver->flush_buffer(tty);
read_lock(&tasklist_lock);
+ /* Kill the entire session */
do_each_task_pid(session, PIDTYPE_SID, p) {
- if (p->signal->tty == tty || session > 0) {
+ printk(KERN_NOTICE "SAK: killed process %d"
+ " (%s): p->signal->session==tty->session\n",
+ p->pid, p->comm);
+ send_sig(SIGKILL, p, 1);
+ } while_each_task_pid(session, PIDTYPE_SID, p);
+ /* Now kill any processes that happen to have the
+ * tty open.
+ */
+ do_each_thread(g, p) {
+ if (p->signal->tty == tty) {
printk(KERN_NOTICE "SAK: killed process %d"
" (%s): p->signal->session==tty->session\n",
p->pid, p->comm);
printk(KERN_NOTICE "SAK: killed process %d"
" (%s): fd#%d opened to the tty\n",
p->pid, p->comm, i);
- send_sig(SIGKILL, p, 1);
+ force_sig(SIGKILL, p);
break;
}
}
spin_unlock(&p->files->file_lock);
}
task_unlock(p);
- } while_each_task_pid(session, PIDTYPE_SID, p);
+ } while_each_thread(g, p);
read_unlock(&tasklist_lock);
#endif
}
struct tty_struct *tty = (struct tty_struct *) private_;
unsigned long flags;
struct tty_ldisc *disc;
- struct tty_buffer *tbuf, *head;
+ struct tty_buffer *tbuf;
int count;
char *char_buf;
unsigned char *flag_buf;
goto out;
}
spin_lock_irqsave(&tty->buf.lock, flags);
- head = tty->buf.head;
- tty->buf.head = NULL;
- while((tbuf = head) != NULL) {
+ while((tbuf = tty->buf.head) != NULL) {
while ((count = tbuf->commit - tbuf->read) != 0) {
char_buf = tbuf->char_buf_ptr + tbuf->read;
flag_buf = tbuf->flag_buf_ptr + tbuf->read;
disc->receive_buf(tty, char_buf, flag_buf, count);
spin_lock_irqsave(&tty->buf.lock, flags);
}
- if (tbuf->active) {
- tty->buf.head = head;
+ if (tbuf->active)
break;
- }
- head = tbuf->next;
- if (head == NULL)
+ tty->buf.head = tbuf->next;
+ if (tty->buf.head == NULL)
tty->buf.tail = NULL;
tty_buffer_free(tty, tbuf);
}
init_waitqueue_head(&tty->write_wait);
init_waitqueue_head(&tty->read_wait);
INIT_WORK(&tty->hangup_work, do_tty_hangup, tty);
- sema_init(&tty->atomic_read, 1);
- sema_init(&tty->atomic_write, 1);
+ mutex_init(&tty->atomic_read_lock);
+ mutex_init(&tty->atomic_write_lock);
spin_lock_init(&tty->read_lock);
INIT_LIST_HEAD(&tty->tty_files);
INIT_WORK(&tty->SAK_work, NULL, NULL);
generate an interrupt using an inbound Memory Write on its
PCI bus instead of asserting a device IRQ pin.
- If you don't know what to do here, say N.
-
-config PCI_LEGACY_PROC
- bool "Legacy /proc/pci interface"
- depends on PCI
- ---help---
- This feature enables a procfs file -- /proc/pci -- that provides a
- summary of PCI devices in the system.
-
- This feature has been deprecated as of v2.5.53, in favor of using the
- tool lspci(8). This feature may be removed at a future date.
+ Use of PCI MSI interrupts can be disabled at kernel boot time
+ by using the 'pci=nomsi' option. This disables MSI for the
+ entire system.
- lspci can provide the same data, as well as much more. lspci is a part of
- the pci-utils package, which should be installed by your distribution.
- See <file:Documentation/Changes> for information on where to get the latest
- version.
-
- When in doubt, say N.
+ If you don't know what to do here, say N.
config PCI_DEBUG
bool "PCI Debugging"
If unsure, say N.
+config SERIAL_8250_GSC
+ tristate
+ depends on SERIAL_8250 && GSC
+ default SERIAL_8250
+
+config SERIAL_8250_PCI
+ tristate "8250/16550 PCI device support" if EMBEDDED
+ depends on SERIAL_8250 && PCI
+ default SERIAL_8250
+ help
+ This builds standard PCI serial support. You may be able to
+ disable this feature if you only need legacy serial support.
+ Saves about 9K.
+
+config SERIAL_8250_PNP
+ tristate "8250/16550 PNP device support" if EMBEDDED
+ depends on SERIAL_8250 && PNP
+ default SERIAL_8250
+ help
+ This builds standard PNP serial support. You may be able to
+ disable this feature if you only need legacy serial support.
+
+config SERIAL_8250_HP300
+ tristate
+ depends on SERIAL_8250 && HP300
+ default SERIAL_8250
+
config SERIAL_8250_CS
tristate "8250/16550 PCMCIA device support"
depends on PCMCIA && SERIAL_8250
If unsure, say N.
-config SERIAL_8250_ACPI
- bool "8250/16550 device discovery via ACPI namespace"
- default y if IA64
- depends on ACPI && SERIAL_8250
- ---help---
- If you wish to enable serial port discovery via the ACPI
- namespace, say Y here. If unsure, say N.
-
config SERIAL_8250_NR_UARTS
int "Maximum number of 8250/16550 serial ports"
depends on SERIAL_8250
on your Sparc system as the console, you can do so by answering
Y to this option.
+config SERIAL_SUNHV
+ bool "Sun4v Hypervisor Console support"
+ depends on SPARC64
+ help
+ This driver supports the console device found on SUN4V Sparc
+ systems. Say Y if you want to be able to use this device.
+
config SERIAL_IP22_ZILOG
tristate "IP22 Zilog8530 serial support"
depends on SGI_IP22
depends on SERIAL_SH_SCI=y
select SERIAL_CORE_CONSOLE
-config SERIAL_AU1X00
- bool "Enable Au1x00 UART Support"
- depends on MIPS && SOC_AU1X00
- select SERIAL_CORE
- help
- If you have an Alchemy AU1X00 processor (MIPS based) and you want
- to use serial ports, say Y. Otherwise, say N.
-
-config SERIAL_AU1X00_CONSOLE
- bool "Enable Au1x00 serial console"
- depends on SERIAL_AU1X00
- select SERIAL_CORE_CONSOLE
- help
- If you have an Alchemy AU1X00 processor (MIPS based) and you want
- to use a console on a serial port, say Y. Otherwise, say N.
-
config SERIAL_CORE
tristate
config SERIAL_TXX9
bool "TMPTX39XX/49XX SIO support"
- depends HAS_TXX9_SERIAL && BROKEN
+ depends HAS_TXX9_SERIAL
select SERIAL_CORE
default y
depends on FB
default n
+config FB_FIRMWARE_EDID
+ bool "Enable firmware EDID"
+ depends on FB
+ default y
+ ---help---
+ This enables access to the EDID transferred from the firmware.
+ On the i386, this is from the Video BIOS. Enable this if DDC/I2C
+ transfers do not work for your driver and if you are using
+ nvidiafb, i810fb or savagefb.
+
+ In general, choosing Y for this option is safe. If you
+ experience extremely long delays while booting before you get
+ something on your display, try setting this to N. Matrox cards in
+ combination with certain motherboards and monitors are known to
+ suffer from this problem.
+
config FB_MODE_HELPERS
bool "Enable Video Mode Handling Helpers"
depends on FB
select FB_CFB_FILLRECT
select FB_CFB_COPYAREA
select FB_CFB_IMAGEBLIT
+ help
+ This is the frame buffer device driver for the Asiliant 69030 chipset
config FB_IMSTT
bool "IMS Twin Turbo display support"
There is no need for enabling 'Matrox multihead support' if you have
only one Matrox card in the box.
-config FB_RADEON_OLD
- tristate "ATI Radeon display support (Old driver)"
- depends on FB && PCI
- select FB_CFB_FILLRECT
- select FB_CFB_COPYAREA
- select FB_CFB_IMAGEBLIT
- select FB_MACMODES if PPC
- help
- Choose this option if you want to use an ATI Radeon graphics card as
- a framebuffer device. There are both PCI and AGP versions. You
- don't need to choose this to run the Radeon in plain VGA mode.
- There is a product page at
- <http://www.ati.com/na/pages/products/pc/radeon32/index.html>.
-
config FB_RADEON
tristate "ATI Radeon display support"
depends on FB && PCI
config FB_ATY
tristate "ATI Mach64 display support" if PCI || ATARI
- depends on FB
+ depends on FB && !SPARC32
select FB_CFB_FILLRECT
select FB_CFB_COPYAREA
select FB_CFB_IMAGEBLIT
bool "Au1100 LCD Driver"
depends on (FB = y) && EXPERIMENTAL && PCI && MIPS && MIPS_PB1100=y
+config FB_AU1200
+ bool "Au1200 LCD Driver"
+ depends on FB && MIPS && SOC_AU1200
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ This is the framebuffer driver for the AMD Au1200 SOC. It can drive
+ various panels and CRTs by passing in kernel cmd line option
+ au1200fb:panel=<name>.
+
source "drivers/video/geode/Kconfig"
config FB_FFB
/* Relinquish the page back to the allocator. */
ClearPageReserved(page);
- set_page_count(page, 1);
+ init_page_count(page);
__free_page(page);
}
if (bd == NULL)
xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
- down(&bd->bd_sem);
+ mutex_lock(&bd->bd_mutex);
if (info->users > 0)
xenbus_dev_error(dev, -EBUSY,
"Device in use; refusing to close");
else
blkfront_closing(dev);
- up(&bd->bd_sem);
+ mutex_unlock(&bd->bd_mutex);
bdput(bd);
break;
}
if (DUMMY_TTY(tty))
return;
- down(&tty_sem);
+ mutex_lock(&tty_mutex);
if (tty->count != 1) {
- up(&tty_sem);
+ mutex_unlock(&tty_mutex);
return;
}
/* Prevent other threads from re-opening this tty. */
set_bit(TTY_CLOSING, &tty->flags);
- up(&tty_sem);
+ mutex_unlock(&tty_mutex);
tty->closing = 1;
tty_wait_until_sent(tty, 0);
#include <xen/cpu_hotplug.h>
#include <xen/xenbus.h>
-#ifdef CONFIG_SMP_ALTERNATIVES
-#include <asm/smp_alt.h>
-#endif
-
extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
{
int i, rc;
- if (!cpus_empty(cpu_possible_map))
+ for_each_possible_cpu(i)
+ if (i != smp_processor_id())
return;
for (i = 0; i < NR_CPUS; i++) {
void __devinit smp_prepare_boot_cpu(void)
{
- prefill_possible_map();
cpu_present_map = cpumask_of_cpu(0);
cpu_online_map = cpumask_of_cpu(0);
}
xen_smp_intr_exit(cpu);
-#ifdef CONFIG_SMP_ALTERNATIVES
if (num_online_cpus() == 1)
- unprepare_for_smp();
-#endif
+ alternatives_smp_switch(0);
}
#else /* !CONFIG_HOTPLUG_CPU */
if (rc)
return rc;
-#ifdef CONFIG_SMP_ALTERNATIVES
if (num_online_cpus() == 1)
- prepare_for_smp();
-#endif
+ alternatives_smp_switch(1);
/* This must be done before setting cpu_online_map */
set_cpu_sibling_map(cpu);
static void netif_page_release(struct page *page)
{
/* Ready for next use. */
- set_page_count(page, 1);
+ init_page_count(page);
netif_idx_release(page->index);
}
struct xen_pci_op *active_op = &pdev->sh_info->op;
unsigned long irq_flags;
evtchn_port_t port = pdev->evtchn;
- nsec_t ns, ns_timeout;
+ s64 ns, ns_timeout;
struct timeval tv;
spin_lock_irqsave(&pdev->sh_info_lock, irq_flags);
* timeout in the past). 1s difference gives plenty of slack for error.
*/
do_gettimeofday(&tv);
- ns_timeout = timeval_to_ns(&tv) + 2 * (nsec_t)NSEC_PER_SEC;
+ ns_timeout = timeval_to_ns(&tv) + 2 * (s64)NSEC_PER_SEC;
clear_evtchn(port);
extern struct mutex xenwatch_mutex;
-static struct notifier_block *xenstore_chain;
+static ATOMIC_NOTIFIER_HEAD(xenstore_chain);
static void wait_for_devices(struct xenbus_driver *xendrv);
if (xenstored_ready > 0)
ret = nb->notifier_call(nb, 0, NULL);
else
- notifier_chain_register(&xenstore_chain, nb);
+ atomic_notifier_chain_register(&xenstore_chain, nb);
return ret;
}
void unregister_xenstore_notifier(struct notifier_block *nb)
{
- notifier_chain_unregister(&xenstore_chain, nb);
+ atomic_notifier_chain_unregister(&xenstore_chain, nb);
}
EXPORT_SYMBOL_GPL(unregister_xenstore_notifier);
xenbus_backend_probe_and_watch();
/* Notify others that xenstore is up */
- notifier_call_chain(&xenstore_chain, 0, NULL);
+ atomic_notifier_call_chain(&xenstore_chain, 0, NULL);
}
config PROC_VMCORE
bool "/proc/vmcore support (EXPERIMENTAL)"
depends on PROC_FS && EXPERIMENTAL && CRASH_DUMP
+ default y
help
Exports the dump image of crashed kernel in ELF format.
bool "HugeTLB file system support"
depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || BROKEN
depends !XEN
+ help
+ hugetlbfs is a filesystem backing for HugeTLB pages, based on
+ ramfs. For architectures that support it, say Y here and read
+ <file:Documentation/vm/hugetlbpage.txt> for details.
+
+ If unsure, say N.
config HUGETLB_PAGE
def_bool HUGETLBFS
To compile this as a module, choose M here: the module will be called
ramfs.
-config RELAYFS_FS
- tristate "Relayfs file system support"
- ---help---
- Relayfs is a high-speed data relay filesystem designed to provide
- an efficient mechanism for tools and facilities to relay large
- amounts of data from kernel space to user space.
-
- To compile this code as a module, choose M here: the module will be
- called relayfs.
-
- If unsure, say N.
-
config CONFIGFS_FS
tristate "Userspace-driven configuration filesystem (EXPERIMENTAL)"
- depends on EXPERIMENTAL
+ depends on SYSFS && EXPERIMENTAL
help
configfs is a ram-based filesystem that provides the converse
of sysfs's functionality. Where sysfs is a filesystem-based
select CRYPTO
select CRYPTO_MD5
select CRYPTO_DES
+ select CRYPTO_CAST5
help
Provides for secure RPC calls by means of a gss-api
mechanism based on the SPKM3 public-key mechanism.
extern int timer_over_8254;
+extern int modern_apic(void);
+
#else /* !CONFIG_X86_LOCAL_APIC */
static inline void lapic_shutdown(void) { }
+++ /dev/null
-#ifndef __ARCH_I386_ATOMIC__
-#define __ARCH_I386_ATOMIC__
-
-#include <linux/config.h>
-#include <linux/compiler.h>
-#include <asm/processor.h>
-#include <asm/smp_alt.h>
-
-/*
- * Atomic operations that C can't guarantee us. Useful for
- * resource counting etc..
- */
-
-/*
- * Make sure gcc doesn't try to be clever and move things around
- * on us. We need to use _exactly_ the address the user gave us,
- * not some alias that contains the same information.
- */
-typedef struct { volatile int counter; } atomic_t;
-
-#define ATOMIC_INIT(i) { (i) }
-
-/**
- * atomic_read - read atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically reads the value of @v.
- */
-#define atomic_read(v) ((v)->counter)
-
-/**
- * atomic_set - set atomic variable
- * @v: pointer of type atomic_t
- * @i: required value
- *
- * Atomically sets the value of @v to @i.
- */
-#define atomic_set(v,i) (((v)->counter) = (i))
-
-/**
- * atomic_add - add integer to atomic variable
- * @i: integer value to add
- * @v: pointer of type atomic_t
- *
- * Atomically adds @i to @v.
- */
-static __inline__ void atomic_add(int i, atomic_t *v)
-{
- __asm__ __volatile__(
- LOCK "addl %1,%0"
- :"=m" (v->counter)
- :"ir" (i), "m" (v->counter));
-}
-
-/**
- * atomic_sub - subtract the atomic variable
- * @i: integer value to subtract
- * @v: pointer of type atomic_t
- *
- * Atomically subtracts @i from @v.
- */
-static __inline__ void atomic_sub(int i, atomic_t *v)
-{
- __asm__ __volatile__(
- LOCK "subl %1,%0"
- :"=m" (v->counter)
- :"ir" (i), "m" (v->counter));
-}
-
-/**
- * atomic_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @v: pointer of type atomic_t
- *
- * Atomically subtracts @i from @v and returns
- * true if the result is zero, or false for all
- * other cases.
- */
-static __inline__ int atomic_sub_and_test(int i, atomic_t *v)
-{
- unsigned char c;
-
- __asm__ __volatile__(
- LOCK "subl %2,%0; sete %1"
- :"=m" (v->counter), "=qm" (c)
- :"ir" (i), "m" (v->counter) : "memory");
- return c;
-}
-
-/**
- * atomic_inc - increment atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1.
- */
-static __inline__ void atomic_inc(atomic_t *v)
-{
- __asm__ __volatile__(
- LOCK "incl %0"
- :"=m" (v->counter)
- :"m" (v->counter));
-}
-
-/**
- * atomic_dec - decrement atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically decrements @v by 1.
- */
-static __inline__ void atomic_dec(atomic_t *v)
-{
- __asm__ __volatile__(
- LOCK "decl %0"
- :"=m" (v->counter)
- :"m" (v->counter));
-}
-
-/**
- * atomic_dec_and_test - decrement and test
- * @v: pointer of type atomic_t
- *
- * Atomically decrements @v by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */
-static __inline__ int atomic_dec_and_test(atomic_t *v)
-{
- unsigned char c;
-
- __asm__ __volatile__(
- LOCK "decl %0; sete %1"
- :"=m" (v->counter), "=qm" (c)
- :"m" (v->counter) : "memory");
- return c != 0;
-}
-
-/**
- * atomic_inc_and_test - increment and test
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-static __inline__ int atomic_inc_and_test(atomic_t *v)
-{
- unsigned char c;
-
- __asm__ __volatile__(
- LOCK "incl %0; sete %1"
- :"=m" (v->counter), "=qm" (c)
- :"m" (v->counter) : "memory");
- return c != 0;
-}
-
-/**
- * atomic_add_negative - add and test if negative
- * @v: pointer of type atomic_t
- * @i: integer value to add
- *
- * Atomically adds @i to @v and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
- */
-static __inline__ int atomic_add_negative(int i, atomic_t *v)
-{
- unsigned char c;
-
- __asm__ __volatile__(
- LOCK "addl %2,%0; sets %1"
- :"=m" (v->counter), "=qm" (c)
- :"ir" (i), "m" (v->counter) : "memory");
- return c;
-}
-
-/**
- * atomic_add_return - add and return
- * @v: pointer of type atomic_t
- * @i: integer value to add
- *
- * Atomically adds @i to @v and returns @i + @v
- */
-static __inline__ int atomic_add_return(int i, atomic_t *v)
-{
- int __i;
-#ifdef CONFIG_M386
- if(unlikely(boot_cpu_data.x86==3))
- goto no_xadd;
-#endif
- /* Modern 486+ processor */
- __i = i;
- __asm__ __volatile__(
- LOCK "xaddl %0, %1;"
- :"=r"(i)
- :"m"(v->counter), "0"(i));
- return i + __i;
-
-#ifdef CONFIG_M386
-no_xadd: /* Legacy 386 processor */
- local_irq_disable();
- __i = atomic_read(v);
- atomic_set(v, i + __i);
- local_irq_enable();
- return i + __i;
-#endif
-}
-
-static __inline__ int atomic_sub_return(int i, atomic_t *v)
-{
- return atomic_add_return(-i,v);
-}
-
-#define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new))
-#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
-
-/**
- * atomic_add_unless - add unless the number is a given value
- * @v: pointer of type atomic_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns non-zero if @v was not @u, and zero otherwise.
- */
-#define atomic_add_unless(v, a, u) \
-({ \
- int c, old; \
- c = atomic_read(v); \
- while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \
- c = old; \
- c != (u); \
-})
-#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
-
-#define atomic_inc_return(v) (atomic_add_return(1,v))
-#define atomic_dec_return(v) (atomic_sub_return(1,v))
-
-/* These are x86-specific, used by some header files */
-#define atomic_clear_mask(mask, addr) \
-__asm__ __volatile__(LOCK "andl %0,%1" \
-: : "r" (~(mask)),"m" (*addr) : "memory")
-
-#define atomic_set_mask(mask, addr) \
-__asm__ __volatile__(LOCK "orl %0,%1" \
-: : "r" (mask),"m" (*(addr)) : "memory")
-
-/* Atomic operations are already serializing on x86 */
-#define smp_mb__before_atomic_dec() barrier()
-#define smp_mb__after_atomic_dec() barrier()
-#define smp_mb__before_atomic_inc() barrier()
-#define smp_mb__after_atomic_inc() barrier()
-
-#include <asm-generic/atomic.h>
-#endif
+++ /dev/null
-#ifndef _I386_BITOPS_H
-#define _I386_BITOPS_H
-
-/*
- * Copyright 1992, Linus Torvalds.
- */
-
-#include <linux/config.h>
-#include <linux/compiler.h>
-#include <asm/smp_alt.h>
-
-/*
- * These have to be done with inline assembly: that way the bit-setting
- * is guaranteed to be atomic. All bit operations return 0 if the bit
- * was cleared before the operation and != 0 if it was not.
- *
- * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
- */
-
-#define ADDR (*(volatile long *) addr)
-
-/**
- * set_bit - Atomically set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * This function is atomic and may not be reordered. See __set_bit()
- * if you do not require the atomic guarantees.
- *
- * Note: there are no guarantees that this function will not be reordered
- * on non x86 architectures, so if you are writting portable code,
- * make sure not to rely on its reordering guarantees.
- *
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
-static inline void set_bit(int nr, volatile unsigned long * addr)
-{
- __asm__ __volatile__( LOCK
- "btsl %1,%0"
- :"+m" (ADDR)
- :"Ir" (nr));
-}
-
-/**
- * __set_bit - Set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * Unlike set_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static inline void __set_bit(int nr, volatile unsigned long * addr)
-{
- __asm__(
- "btsl %1,%0"
- :"+m" (ADDR)
- :"Ir" (nr));
-}
-
-/**
- * clear_bit - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * clear_bit() is atomic and may not be reordered. However, it does
- * not contain a memory barrier, so if it is used for locking purposes,
- * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
- * in order to ensure changes are visible on other processors.
- */
-static inline void clear_bit(int nr, volatile unsigned long * addr)
-{
- __asm__ __volatile__( LOCK
- "btrl %1,%0"
- :"+m" (ADDR)
- :"Ir" (nr));
-}
-
-static inline void __clear_bit(int nr, volatile unsigned long * addr)
-{
- __asm__ __volatile__(
- "btrl %1,%0"
- :"+m" (ADDR)
- :"Ir" (nr));
-}
-#define smp_mb__before_clear_bit() barrier()
-#define smp_mb__after_clear_bit() barrier()
-
-/**
- * __change_bit - Toggle a bit in memory
- * @nr: the bit to change
- * @addr: the address to start counting from
- *
- * Unlike change_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static inline void __change_bit(int nr, volatile unsigned long * addr)
-{
- __asm__ __volatile__(
- "btcl %1,%0"
- :"+m" (ADDR)
- :"Ir" (nr));
-}
-
-/**
- * change_bit - Toggle a bit in memory
- * @nr: Bit to change
- * @addr: Address to start counting from
- *
- * change_bit() is atomic and may not be reordered. It may be
- * reordered on other architectures than x86.
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
-static inline void change_bit(int nr, volatile unsigned long * addr)
-{
- __asm__ __volatile__( LOCK
- "btcl %1,%0"
- :"+m" (ADDR)
- :"Ir" (nr));
-}
-
-/**
- * test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.
- * It may be reordered on other architectures than x86.
- * It also implies a memory barrier.
- */
-static inline int test_and_set_bit(int nr, volatile unsigned long * addr)
-{
- int oldbit;
-
- __asm__ __volatile__( LOCK
- "btsl %2,%1\n\tsbbl %0,%0"
- :"=r" (oldbit),"+m" (ADDR)
- :"Ir" (nr) : "memory");
- return oldbit;
-}
-
-/**
- * __test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.
- * If two examples of this operation race, one can appear to succeed
- * but actually fail. You must protect multiple accesses with a lock.
- */
-static inline int __test_and_set_bit(int nr, volatile unsigned long * addr)
-{
- int oldbit;
-
- __asm__(
- "btsl %2,%1\n\tsbbl %0,%0"
- :"=r" (oldbit),"+m" (ADDR)
- :"Ir" (nr));
- return oldbit;
-}
-
-/**
- * test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to clear
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.
- * It can be reorderdered on other architectures other than x86.
- * It also implies a memory barrier.
- */
-static inline int test_and_clear_bit(int nr, volatile unsigned long * addr)
-{
- int oldbit;
-
- __asm__ __volatile__( LOCK
- "btrl %2,%1\n\tsbbl %0,%0"
- :"=r" (oldbit),"+m" (ADDR)
- :"Ir" (nr) : "memory");
- return oldbit;
-}
-
-/**
- * __test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to clear
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.
- * If two examples of this operation race, one can appear to succeed
- * but actually fail. You must protect multiple accesses with a lock.
- */
-static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
- int oldbit;
-
- __asm__(
- "btrl %2,%1\n\tsbbl %0,%0"
- :"=r" (oldbit),"+m" (ADDR)
- :"Ir" (nr));
- return oldbit;
-}
-
-/* WARNING: non atomic and it can be reordered! */
-static inline int __test_and_change_bit(int nr, volatile unsigned long *addr)
-{
- int oldbit;
-
- __asm__ __volatile__(
- "btcl %2,%1\n\tsbbl %0,%0"
- :"=r" (oldbit),"+m" (ADDR)
- :"Ir" (nr) : "memory");
- return oldbit;
-}
-
-/**
- * test_and_change_bit - Change a bit and return its old value
- * @nr: Bit to change
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.
- * It also implies a memory barrier.
- */
-static inline int test_and_change_bit(int nr, volatile unsigned long* addr)
-{
- int oldbit;
-
- __asm__ __volatile__( LOCK
- "btcl %2,%1\n\tsbbl %0,%0"
- :"=r" (oldbit),"+m" (ADDR)
- :"Ir" (nr) : "memory");
- return oldbit;
-}
-
-#if 0 /* Fool kernel-doc since it doesn't do macros yet */
-/**
- * test_bit - Determine whether a bit is set
- * @nr: bit number to test
- * @addr: Address to start counting from
- */
-static int test_bit(int nr, const volatile void * addr);
-#endif
-
-static __always_inline int constant_test_bit(int nr, const volatile unsigned long *addr)
-{
- return ((1UL << (nr & 31)) & (addr[nr >> 5])) != 0;
-}
-
-static inline int variable_test_bit(int nr, const volatile unsigned long * addr)
-{
- int oldbit;
-
- __asm__ __volatile__(
- "btl %2,%1\n\tsbbl %0,%0"
- :"=r" (oldbit)
- :"m" (ADDR),"Ir" (nr));
- return oldbit;
-}
-
-#define test_bit(nr,addr) \
-(__builtin_constant_p(nr) ? \
- constant_test_bit((nr),(addr)) : \
- variable_test_bit((nr),(addr)))
-
-#undef ADDR
-
-/**
- * find_first_zero_bit - find the first zero bit in a memory region
- * @addr: The address to start the search at
- * @size: The maximum size to search
- *
- * Returns the bit-number of the first zero bit, not the number of the byte
- * containing a bit.
- */
-static inline int find_first_zero_bit(const unsigned long *addr, unsigned size)
-{
- int d0, d1, d2;
- int res;
-
- if (!size)
- return 0;
- /* This looks at memory. Mark it volatile to tell gcc not to move it around */
- __asm__ __volatile__(
- "movl $-1,%%eax\n\t"
- "xorl %%edx,%%edx\n\t"
- "repe; scasl\n\t"
- "je 1f\n\t"
- "xorl -4(%%edi),%%eax\n\t"
- "subl $4,%%edi\n\t"
- "bsfl %%eax,%%edx\n"
- "1:\tsubl %%ebx,%%edi\n\t"
- "shll $3,%%edi\n\t"
- "addl %%edi,%%edx"
- :"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2)
- :"1" ((size + 31) >> 5), "2" (addr), "b" (addr) : "memory");
- return res;
-}
-
-/**
- * find_next_zero_bit - find the first zero bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The maximum size to search
- */
-int find_next_zero_bit(const unsigned long *addr, int size, int offset);
-
-/**
- * __ffs - find first bit in word.
- * @word: The word to search
- *
- * Undefined if no bit exists, so code should check against 0 first.
- */
-static inline unsigned long __ffs(unsigned long word)
-{
- __asm__("bsfl %1,%0"
- :"=r" (word)
- :"rm" (word));
- return word;
-}
-
-/**
- * find_first_bit - find the first set bit in a memory region
- * @addr: The address to start the search at
- * @size: The maximum size to search
- *
- * Returns the bit-number of the first set bit, not the number of the byte
- * containing a bit.
- */
-static inline unsigned find_first_bit(const unsigned long *addr, unsigned size)
-{
- unsigned x = 0;
-
- while (x < size) {
- unsigned long val = *addr++;
- if (val)
- return __ffs(val) + x;
- x += (sizeof(*addr)<<3);
- }
- return x;
-}
-
-/**
- * find_next_bit - find the first set bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The maximum size to search
- */
-int find_next_bit(const unsigned long *addr, int size, int offset);
-
-/**
- * ffz - find first zero in word.
- * @word: The word to search
- *
- * Undefined if no zero exists, so code should check against ~0UL first.
- */
-static inline unsigned long ffz(unsigned long word)
-{
- __asm__("bsfl %1,%0"
- :"=r" (word)
- :"r" (~word));
- return word;
-}
-
-#define fls64(x) generic_fls64(x)
-
-#ifdef __KERNEL__
-
-/*
- * Every architecture must define this function. It's the fastest
- * way of searching a 140-bit bitmap where the first 100 bits are
- * unlikely to be set. It's guaranteed that at least one of the 140
- * bits is cleared.
- */
-static inline int sched_find_first_bit(const unsigned long *b)
-{
- if (unlikely(b[0]))
- return __ffs(b[0]);
- if (unlikely(b[1]))
- return __ffs(b[1]) + 32;
- if (unlikely(b[2]))
- return __ffs(b[2]) + 64;
- if (b[3])
- return __ffs(b[3]) + 96;
- return __ffs(b[4]) + 128;
-}
-
-/**
- * ffs - find first bit set
- * @x: the word to search
- *
- * This is defined the same way as
- * the libc and compiler builtin ffs routines, therefore
- * differs in spirit from the above ffz (man ffs).
- */
-static inline int ffs(int x)
-{
- int r;
-
- __asm__("bsfl %1,%0\n\t"
- "jnz 1f\n\t"
- "movl $-1,%0\n"
- "1:" : "=r" (r) : "rm" (x));
- return r+1;
-}
-
-/**
- * fls - find last bit set
- * @x: the word to search
- *
- * This is defined the same way as ffs.
- */
-static inline int fls(int x)
-{
- int r;
-
- __asm__("bsrl %1,%0\n\t"
- "jnz 1f\n\t"
- "movl $-1,%0\n"
- "1:" : "=r" (r) : "rm" (x));
- return r+1;
-}
-
-/**
- * hweightN - returns the hamming weight of a N-bit word
- * @x: the word to weigh
- *
- * The Hamming Weight of a number is the total number of bits set in it.
- */
-
-#define hweight32(x) generic_hweight32(x)
-#define hweight16(x) generic_hweight16(x)
-#define hweight8(x) generic_hweight8(x)
-
-#endif /* __KERNEL__ */
-
-#ifdef __KERNEL__
-
-#define ext2_set_bit(nr,addr) \
- __test_and_set_bit((nr),(unsigned long*)addr)
-#define ext2_set_bit_atomic(lock,nr,addr) \
- test_and_set_bit((nr),(unsigned long*)addr)
-#define ext2_clear_bit(nr, addr) \
- __test_and_clear_bit((nr),(unsigned long*)addr)
-#define ext2_clear_bit_atomic(lock,nr, addr) \
- test_and_clear_bit((nr),(unsigned long*)addr)
-#define ext2_test_bit(nr, addr) test_bit((nr),(unsigned long*)addr)
-#define ext2_find_first_zero_bit(addr, size) \
- find_first_zero_bit((unsigned long*)addr, size)
-#define ext2_find_next_zero_bit(addr, size, off) \
- find_next_zero_bit((unsigned long*)addr, size, off)
-
-/* Bitmap functions for the minix filesystem. */
-#define minix_test_and_set_bit(nr,addr) __test_and_set_bit(nr,(void*)addr)
-#define minix_set_bit(nr,addr) __set_bit(nr,(void*)addr)
-#define minix_test_and_clear_bit(nr,addr) __test_and_clear_bit(nr,(void*)addr)
-#define minix_test_bit(nr,addr) test_bit(nr,(void*)addr)
-#define minix_find_first_zero_bit(addr,size) \
- find_first_zero_bit((void*)addr,size)
-
-#endif /* __KERNEL__ */
-
-#endif /* _I386_BITOPS_H */
+++ /dev/null
-#ifndef _ASM_FUTEX_H
-#define _ASM_FUTEX_H
-
-#ifdef __KERNEL__
-
-#include <linux/futex.h>
-#include <asm/errno.h>
-#include <asm/system.h>
-#include <asm/processor.h>
-#include <asm/uaccess.h>
-
-#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \
- __asm__ __volatile ( \
-"1: " insn "\n" \
-"2: .section .fixup,\"ax\"\n\
-3: mov %3, %1\n\
- jmp 2b\n\
- .previous\n\
- .section __ex_table,\"a\"\n\
- .align 8\n\
- .long 1b,3b\n\
- .previous" \
- : "=r" (oldval), "=r" (ret), "=m" (*uaddr) \
- : "i" (-EFAULT), "m" (*uaddr), "0" (oparg), "1" (0))
-
-#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \
- __asm__ __volatile ( \
-"1: movl %2, %0\n\
- movl %0, %3\n" \
- insn "\n" \
-"2: " LOCK "cmpxchgl %3, %2\n\
- jnz 1b\n\
-3: .section .fixup,\"ax\"\n\
-4: mov %5, %1\n\
- jmp 3b\n\
- .previous\n\
- .section __ex_table,\"a\"\n\
- .align 8\n\
- .long 1b,4b,2b,4b\n\
- .previous" \
- : "=&a" (oldval), "=&r" (ret), "=m" (*uaddr), \
- "=&r" (tem) \
- : "r" (oparg), "i" (-EFAULT), "m" (*uaddr), "1" (0))
-
-static inline int
-futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
-{
- int op = (encoded_op >> 28) & 7;
- int cmp = (encoded_op >> 24) & 15;
- int oparg = (encoded_op << 8) >> 20;
- int cmparg = (encoded_op << 20) >> 20;
- int oldval = 0, ret, tem;
- if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
- oparg = 1 << oparg;
-
- if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
- return -EFAULT;
-
- inc_preempt_count();
-
- if (op == FUTEX_OP_SET)
- __futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg);
- else {
-#ifndef CONFIG_X86_BSWAP
- if (boot_cpu_data.x86 == 3)
- ret = -ENOSYS;
- else
-#endif
- switch (op) {
- case FUTEX_OP_ADD:
- __futex_atomic_op1(LOCK "xaddl %0, %2", ret,
- oldval, uaddr, oparg);
- break;
- case FUTEX_OP_OR:
- __futex_atomic_op2("orl %4, %3", ret, oldval, uaddr,
- oparg);
- break;
- case FUTEX_OP_ANDN:
- __futex_atomic_op2("andl %4, %3", ret, oldval, uaddr,
- ~oparg);
- break;
- case FUTEX_OP_XOR:
- __futex_atomic_op2("xorl %4, %3", ret, oldval, uaddr,
- oparg);
- break;
- default:
- ret = -ENOSYS;
- }
- }
-
- dec_preempt_count();
-
- if (!ret) {
- switch (cmp) {
- case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
- case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
- case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
- case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
- case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
- case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
- default: ret = -ENOSYS;
- }
- }
- return ret;
-}
-
-#endif
-#endif
*/
#define __ISA_IO_base ((char __iomem *)(fix_to_virt(FIX_ISAMAP_BEGIN)))
-#define isa_readb(a) readb(__ISA_IO_base + (a))
-#define isa_readw(a) readw(__ISA_IO_base + (a))
-#define isa_readl(a) readl(__ISA_IO_base + (a))
-#define isa_writeb(b,a) writeb(b,__ISA_IO_base + (a))
-#define isa_writew(w,a) writew(w,__ISA_IO_base + (a))
-#define isa_writel(l,a) writel(l,__ISA_IO_base + (a))
-#define isa_memset_io(a,b,c) memset_io(__ISA_IO_base + (a),(b),(c))
-#define isa_memcpy_fromio(a,b,c) memcpy_fromio((a),__ISA_IO_base + (b),(c))
-#define isa_memcpy_toio(a,b,c) memcpy_toio(__ISA_IO_base + (a),(b),(c))
-
-
/*
* Again, i386 does not require mem IO specific function.
*/
#define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void __force *)(b),(c),(d))
-#define isa_eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void __force *)(__ISA_IO_base + (b)),(c),(d))
/**
* check_signature - find BIOS signatures
#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
#ifdef CONFIG_FLATMEM
-#define pfn_to_page(pfn) (mem_map + (pfn))
-#define page_to_pfn(page) ((unsigned long)((page) - mem_map))
#define pfn_valid(pfn) ((pfn) < max_mapnr)
#endif /* CONFIG_FLATMEM */
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
#endif /* __KERNEL__ */
+#include <asm-generic/memory_model.h>
#include <asm-generic/page.h>
#endif /* _I386_PAGE_H */
#define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+void vmalloc_sync_all(void);
+
#endif /* _I386_PGTABLE_2LEVEL_H */
#define __pmd_free_tlb(tlb, x) do { } while (0)
+#define vmalloc_sync_all() ((void)0)
+
#endif /* _I386_PGTABLE_3LEVEL_H */
* The following only work if pte_present() is true.
* Undefined behaviour if not..
*/
-#define __LARGE_PTE (_PAGE_PSE | _PAGE_PRESENT)
static inline int pte_user(pte_t pte) { return (pte).pte_low & _PAGE_USER; }
static inline int pte_read(pte_t pte) { return (pte).pte_low & _PAGE_USER; }
static inline int pte_dirty(pte_t pte) { return (pte).pte_low & _PAGE_DIRTY; }
static inline int pte_young(pte_t pte) { return (pte).pte_low & _PAGE_ACCESSED; }
static inline int pte_write(pte_t pte) { return (pte).pte_low & _PAGE_RW; }
-static inline int pte_huge(pte_t pte) { return ((pte).pte_low & __LARGE_PTE) == __LARGE_PTE; }
+static inline int pte_huge(pte_t pte) { return (pte).pte_low & _PAGE_PSE; }
/*
* The following only works if pte_present() is not true.
static inline pte_t pte_mkdirty(pte_t pte) { (pte).pte_low |= _PAGE_DIRTY; return pte; }
static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; return pte; }
static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; }
-static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= __LARGE_PTE; return pte; }
+static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return pte; }
#ifdef CONFIG_X86_PAE
# include <asm/pgtable-3level.h>
#include <linux/config.h>
#include <linux/threads.h>
#include <asm/percpu.h>
+#include <linux/cpumask.h>
#include <xen/interface/physdev.h>
/* flag for disabling the tsc */
char pad0;
int x86_power;
unsigned long loops_per_jiffy;
+#ifdef CONFIG_SMP
+ cpumask_t llc_shared_map; /* cpus sharing the last level cache */
+#endif
unsigned char x86_max_cores; /* cpuid returned max cores value */
unsigned char booted_cores; /* number of cores as seen by OS */
unsigned char apicid;
extern int phys_proc_id[NR_CPUS];
extern int cpu_core_id[NR_CPUS];
+extern int cpu_llc_id[NR_CPUS];
extern char ignore_fpu_irq;
extern void identify_cpu(struct cpuinfo_x86 *);
unsigned int reserved[3];
struct extended_signature sigs[0];
};
-/* '6' because it used to be for P6 only (but now covers Pentium 4 as well) */
-#define MICROCODE_IOCFREE _IO('6',0)
/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
static inline void rep_nop(void)
#ifndef _i386_SETUP_H
#define _i386_SETUP_H
-#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
-#define PFN_DOWN(x) ((x) >> PAGE_SHIFT)
-#define PFN_PHYS(x) ((unsigned long long)(x) << PAGE_SHIFT)
+#include <linux/pfn.h>
/*
* Reserved space for vmalloc and iomap - defined in asm/page.h
extern int __cpu_disable(void);
extern void __cpu_die(unsigned int cpu);
+extern void prefill_possible_map(void);
#endif /* !__ASSEMBLY__ */
#else /* CONFIG_SMP */
#include <asm/page.h>
#include <linux/config.h>
#include <linux/compiler.h>
-#include <asm/smp_alt.h>
/*
* Your basic SMP spinlocks, allowing only a single CPU anywhere
(*(volatile signed char *)(&(x)->slock) <= 0)
#define __raw_spin_lock_string \
- "\n1:\n" \
- LOCK \
- "decb %0\n\t" \
+ "\n1:\t" \
+ "lock ; decb %0\n\t" \
"jns 3f\n" \
"2:\t" \
"rep;nop\n\t" \
"3:\n\t"
#define __raw_spin_lock_string_flags \
- "\n1:\n" \
- LOCK \
- "decb %0\n\t" \
- "jns 4f\n\t" \
+ "\n1:\t" \
+ "lock ; decb %0\n\t" \
+ "jns 5f\n" \
"2:\t" \
"testl $0x200, %1\n\t" \
- "jz 3f\n\t" \
- "#sti\n\t" \
+ "jz 4f\n\t" \
+ "#sti\n" \
"3:\t" \
"rep;nop\n\t" \
"cmpb $0, %0\n\t" \
"jle 3b\n\t" \
"#cli\n\t" \
"jmp 1b\n" \
- "4:\n\t"
+ "4:\t" \
+ "rep;nop\n\t" \
+ "cmpb $0, %0\n\t" \
+ "jg 1b\n\t" \
+ "jmp 4b\n" \
+ "5:\n\t"
+
+#define __raw_spin_lock_string_up \
+ "\n\tdecb %0"
static inline void __raw_spin_lock(raw_spinlock_t *lock)
{
- __asm__ __volatile__(
- __raw_spin_lock_string
- :"=m" (lock->slock) : : "memory");
+ alternative_smp(
+ __raw_spin_lock_string,
+ __raw_spin_lock_string_up,
+ "=m" (lock->slock) : : "memory");
}
static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
{
- __asm__ __volatile__(
- __raw_spin_lock_string_flags
- :"=m" (lock->slock) : "r" (flags) : "memory");
+ alternative_smp(
+ __raw_spin_lock_string_flags,
+ __raw_spin_lock_string_up,
+ "=m" (lock->slock) : "r" (flags) : "memory");
}
static inline int __raw_spin_trylock(raw_spinlock_t *lock)
{
char oldval;
-#ifdef CONFIG_SMP_ALTERNATIVES
- __asm__ __volatile__(
- "1:movb %1,%b0\n"
- "movb $0,%1\n"
- "2:"
- ".section __smp_alternatives,\"a\"\n"
- ".long 1b\n"
- ".long 3f\n"
- ".previous\n"
- ".section __smp_replacements,\"a\"\n"
- "3: .byte 2b - 1b\n"
- ".byte 5f-4f\n"
- ".byte 0\n"
- ".byte 6f-5f\n"
- ".byte -1\n"
- "4: xchgb %b0,%1\n"
- "5: movb %1,%b0\n"
- "movb $0,%1\n"
- "6:\n"
- ".previous\n"
- :"=q" (oldval), "=m" (lock->slock)
- :"0" (0) : "memory");
-#else
__asm__ __volatile__(
"xchgb %b0,%1"
:"=q" (oldval), "=m" (lock->slock)
:"0" (0) : "memory");
-#endif
return oldval > 0;
}
static inline void __raw_read_unlock(raw_rwlock_t *rw)
{
- asm volatile(LOCK "incl %0" :"=m" (rw->lock) : : "memory");
+ asm volatile(LOCK_PREFIX "incl %0" :"=m" (rw->lock) : : "memory");
}
static inline void __raw_write_unlock(raw_rwlock_t *rw)
{
- asm volatile(LOCK "addl $" RW_LOCK_BIAS_STR ", %0"
+ asm volatile(LOCK_PREFIX "addl $" RW_LOCK_BIAS_STR ", %0"
: "=m" (rw->lock) : : "memory");
}
#include <linux/config.h>
#include <linux/kernel.h>
-#include <linux/bitops.h>
-#include <asm/synch_bitops.h>
#include <asm/segment.h>
#include <asm/cpufeature.h>
+#include <linux/bitops.h> /* for LOCK_PREFIX */
+#include <asm/synch_bitops.h>
#include <asm/hypervisor.h>
-#include <asm/smp_alt.h>
#ifdef __KERNEL__
unsigned long prev;
switch (size) {
case 1:
- __asm__ __volatile__(LOCK "cmpxchgb %b1,%2"
+ __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2"
: "=a"(prev)
: "q"(new), "m"(*__xg(ptr)), "0"(old)
: "memory");
return prev;
case 2:
- __asm__ __volatile__(LOCK "cmpxchgw %w1,%2"
+ __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2"
: "=a"(prev)
: "r"(new), "m"(*__xg(ptr)), "0"(old)
: "memory");
return prev;
case 4:
- __asm__ __volatile__(LOCK "cmpxchgl %1,%2"
+ __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2"
: "=a"(prev)
: "r"(new), "m"(*__xg(ptr)), "0"(old)
: "memory");
unsigned long long new)
{
unsigned long long prev;
- __asm__ __volatile__(LOCK "cmpxchg8b %3"
+ __asm__ __volatile__(LOCK_PREFIX "cmpxchg8b %3"
: "=A"(prev)
: "b"((unsigned long)new),
"c"((unsigned long)(new >> 32)),
#endif
-#ifdef __KERNEL__
-struct alt_instr {
- __u8 *instr; /* original instruction */
- __u8 *replacement;
- __u8 cpuid; /* cpuid bit set for replacement */
- __u8 instrlen; /* length of original instruction */
- __u8 replacementlen; /* length of new instruction, <= instrlen */
- __u8 pad;
-};
-#endif
-
-/*
- * Alternative instructions for different CPU types or capabilities.
- *
- * This allows to use optimized instructions even on generic binary
- * kernels.
- *
- * length of oldinstr must be longer or equal the length of newinstr
- * It can be padded with nops as needed.
- *
- * For non barrier like inlines please define new variants
- * without volatile and memory clobber.
- */
-#define alternative(oldinstr, newinstr, feature) \
- asm volatile ("661:\n\t" oldinstr "\n662:\n" \
- ".section .altinstructions,\"a\"\n" \
- " .align 4\n" \
- " .long 661b\n" /* label */ \
- " .long 663f\n" /* new instruction */ \
- " .byte %c0\n" /* feature bit */ \
- " .byte 662b-661b\n" /* sourcelen */ \
- " .byte 664f-663f\n" /* replacementlen */ \
- ".previous\n" \
- ".section .altinstr_replacement,\"ax\"\n" \
- "663:\n\t" newinstr "\n664:\n" /* replacement */ \
- ".previous" :: "i" (feature) : "memory")
-
-/*
- * Alternative inline assembly with input.
- *
- * Pecularities:
- * No memory clobber here.
- * Argument numbers start with 1.
- * Best is to use constraints that are fixed size (like (%1) ... "r")
- * If you use variable sized constraints like "m" or "g" in the
- * replacement maake sure to pad to the worst case length.
- */
-#define alternative_input(oldinstr, newinstr, feature, input...) \
- asm volatile ("661:\n\t" oldinstr "\n662:\n" \
- ".section .altinstructions,\"a\"\n" \
- " .align 4\n" \
- " .long 661b\n" /* label */ \
- " .long 663f\n" /* new instruction */ \
- " .byte %c0\n" /* feature bit */ \
- " .byte 662b-661b\n" /* sourcelen */ \
- " .byte 664f-663f\n" /* replacementlen */ \
- ".previous\n" \
- ".section .altinstr_replacement,\"ax\"\n" \
- "663:\n\t" newinstr "\n664:\n" /* replacement */ \
- ".previous" :: "i" (feature), ##input)
-
/*
* Force strict CPU ordering.
* And yes, this is required on UP too when we're talking
#endif
#ifdef CONFIG_SMP
-#define smp_wmb() wmb()
-#if defined(CONFIG_SMP_ALTERNATIVES) && !defined(MODULE)
-#define smp_alt_mb(instr) \
-__asm__ __volatile__("6667:\nnop\nnop\nnop\nnop\nnop\nnop\n6668:\n" \
- ".section __smp_alternatives,\"a\"\n" \
- ".long 6667b\n" \
- ".long 6673f\n" \
- ".previous\n" \
- ".section __smp_replacements,\"a\"\n" \
- "6673:.byte 6668b-6667b\n" \
- ".byte 6670f-6669f\n" \
- ".byte 6671f-6670f\n" \
- ".byte 0\n" \
- ".byte %c0\n" \
- "6669:lock;addl $0,0(%%esp)\n" \
- "6670:" instr "\n" \
- "6671:\n" \
- ".previous\n" \
- : \
- : "i" (X86_FEATURE_XMM2) \
- : "memory")
-#define smp_rmb() smp_alt_mb("lfence")
-#define smp_mb() smp_alt_mb("mfence")
-#define set_mb(var, value) do { \
-unsigned long __set_mb_temp; \
-__asm__ __volatile__("6667:movl %1, %0\n6668:\n" \
- ".section __smp_alternatives,\"a\"\n" \
- ".long 6667b\n" \
- ".long 6673f\n" \
- ".previous\n" \
- ".section __smp_replacements,\"a\"\n" \
- "6673: .byte 6668b-6667b\n" \
- ".byte 6670f-6669f\n" \
- ".byte 0\n" \
- ".byte 6671f-6670f\n" \
- ".byte -1\n" \
- "6669: xchg %1, %0\n" \
- "6670:movl %1, %0\n" \
- "6671:\n" \
- ".previous\n" \
- : "=m" (var), "=r" (__set_mb_temp) \
- : "1" (value) \
- : "memory"); } while (0)
-#else
-#define smp_rmb() rmb()
#define smp_mb() mb()
-#define set_mb(var, value) do { (void) xchg(&var, value); } while (0)
-#endif
+#define smp_rmb() rmb()
+#define smp_wmb() wmb()
#define smp_read_barrier_depends() read_barrier_depends()
+#define set_mb(var, value) do { (void) xchg(&var, value); } while (0)
#else
#define smp_mb() barrier()
#define smp_rmb() barrier()
}
extern unsigned long arch_align_stack(unsigned long sp);
+extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
+
+void default_idle(void);
#endif
#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
#ifdef CONFIG_FLATMEM
-#define pfn_to_page(pfn) (mem_map + (pfn))
-#define page_to_pfn(page) ((unsigned long)((page) - mem_map))
#define pfn_valid(pfn) ((pfn) < max_mapnr)
#endif /* CONFIG_FLATMEM */
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
#endif /* __KERNEL__ */
+#include <asm-generic/memory_model.h>
#include <asm-generic/page.h>
#endif /* _I386_PAGE_H */
+++ /dev/null
-/* rwsem.h: R/W semaphores implemented using XADD/CMPXCHG for i486+
- *
- * Written by David Howells (dhowells@redhat.com).
- *
- * Derived from asm-i386/semaphore.h
- *
- *
- * The MSW of the count is the negated number of active writers and waiting
- * lockers, and the LSW is the total number of active locks
- *
- * The lock count is initialized to 0 (no active and no waiting lockers).
- *
- * When a writer subtracts WRITE_BIAS, it'll get 0xffff0001 for the case of an
- * uncontended lock. This can be determined because XADD returns the old value.
- * Readers increment by 1 and see a positive value when uncontended, negative
- * if there are writers (and maybe) readers waiting (in which case it goes to
- * sleep).
- *
- * The value of WAITING_BIAS supports up to 32766 waiting processes. This can
- * be extended to 65534 by manually checking the whole MSW rather than relying
- * on the S flag.
- *
- * The value of ACTIVE_BIAS supports up to 65535 active processes.
- *
- * This should be totally fair - if anything is waiting, a process that wants a
- * lock will go to the back of the queue. When the currently active lock is
- * released, if there's a writer at the front of the queue, then that and only
- * that will be woken up; if there's a bunch of consequtive readers at the
- * front, then they'll all be woken up, but no other readers will be.
- */
-
-#ifndef _I386_RWSEM_H
-#define _I386_RWSEM_H
-
-#ifndef _LINUX_RWSEM_H
-#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead"
-#endif
-
-#ifdef __KERNEL__
-
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <asm/smp_alt.h>
-
-struct rwsem_waiter;
-
-extern struct rw_semaphore *FASTCALL(rwsem_down_read_failed(struct rw_semaphore *sem));
-extern struct rw_semaphore *FASTCALL(rwsem_down_write_failed(struct rw_semaphore *sem));
-extern struct rw_semaphore *FASTCALL(rwsem_wake(struct rw_semaphore *));
-extern struct rw_semaphore *FASTCALL(rwsem_downgrade_wake(struct rw_semaphore *sem));
-
-/*
- * the semaphore definition
- */
-struct rw_semaphore {
- signed long count;
-#define RWSEM_UNLOCKED_VALUE 0x00000000
-#define RWSEM_ACTIVE_BIAS 0x00000001
-#define RWSEM_ACTIVE_MASK 0x0000ffff
-#define RWSEM_WAITING_BIAS (-0x00010000)
-#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
-#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
- spinlock_t wait_lock;
- struct list_head wait_list;
-#if RWSEM_DEBUG
- int debug;
-#endif
-};
-
-/*
- * initialisation
- */
-#if RWSEM_DEBUG
-#define __RWSEM_DEBUG_INIT , 0
-#else
-#define __RWSEM_DEBUG_INIT /* */
-#endif
-
-#define __RWSEM_INITIALIZER(name) \
-{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) \
- __RWSEM_DEBUG_INIT }
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-static inline void init_rwsem(struct rw_semaphore *sem)
-{
- sem->count = RWSEM_UNLOCKED_VALUE;
- spin_lock_init(&sem->wait_lock);
- INIT_LIST_HEAD(&sem->wait_list);
-#if RWSEM_DEBUG
- sem->debug = 0;
-#endif
-}
-
-/*
- * lock for reading
- */
-static inline void __down_read(struct rw_semaphore *sem)
-{
- __asm__ __volatile__(
- "# beginning down_read\n\t"
-LOCK " incl (%%eax)\n\t" /* adds 0x00000001, returns the old value */
- " js 2f\n\t" /* jump if we weren't granted the lock */
- "1:\n\t"
- LOCK_SECTION_START("")
- "2:\n\t"
- " pushl %%ecx\n\t"
- " pushl %%edx\n\t"
- " call rwsem_down_read_failed\n\t"
- " popl %%edx\n\t"
- " popl %%ecx\n\t"
- " jmp 1b\n"
- LOCK_SECTION_END
- "# ending down_read\n\t"
- : "=m"(sem->count)
- : "a"(sem), "m"(sem->count)
- : "memory", "cc");
-}
-
-/*
- * trylock for reading -- returns 1 if successful, 0 if contention
- */
-static inline int __down_read_trylock(struct rw_semaphore *sem)
-{
- __s32 result, tmp;
- __asm__ __volatile__(
- "# beginning __down_read_trylock\n\t"
- " movl %0,%1\n\t"
- "1:\n\t"
- " movl %1,%2\n\t"
- " addl %3,%2\n\t"
- " jle 2f\n\t"
-LOCK " cmpxchgl %2,%0\n\t"
- " jnz 1b\n\t"
- "2:\n\t"
- "# ending __down_read_trylock\n\t"
- : "+m"(sem->count), "=&a"(result), "=&r"(tmp)
- : "i"(RWSEM_ACTIVE_READ_BIAS)
- : "memory", "cc");
- return result>=0 ? 1 : 0;
-}
-
-/*
- * lock for writing
- */
-static inline void __down_write(struct rw_semaphore *sem)
-{
- int tmp;
-
- tmp = RWSEM_ACTIVE_WRITE_BIAS;
- __asm__ __volatile__(
- "# beginning down_write\n\t"
-LOCK " xadd %%edx,(%%eax)\n\t" /* subtract 0x0000ffff, returns the old value */
- " testl %%edx,%%edx\n\t" /* was the count 0 before? */
- " jnz 2f\n\t" /* jump if we weren't granted the lock */
- "1:\n\t"
- LOCK_SECTION_START("")
- "2:\n\t"
- " pushl %%ecx\n\t"
- " call rwsem_down_write_failed\n\t"
- " popl %%ecx\n\t"
- " jmp 1b\n"
- LOCK_SECTION_END
- "# ending down_write"
- : "=m"(sem->count), "=d"(tmp)
- : "a"(sem), "1"(tmp), "m"(sem->count)
- : "memory", "cc");
-}
-
-/*
- * trylock for writing -- returns 1 if successful, 0 if contention
- */
-static inline int __down_write_trylock(struct rw_semaphore *sem)
-{
- signed long ret = cmpxchg(&sem->count,
- RWSEM_UNLOCKED_VALUE,
- RWSEM_ACTIVE_WRITE_BIAS);
- if (ret == RWSEM_UNLOCKED_VALUE)
- return 1;
- return 0;
-}
-
-/*
- * unlock after reading
- */
-static inline void __up_read(struct rw_semaphore *sem)
-{
- __s32 tmp = -RWSEM_ACTIVE_READ_BIAS;
- __asm__ __volatile__(
- "# beginning __up_read\n\t"
-LOCK " xadd %%edx,(%%eax)\n\t" /* subtracts 1, returns the old value */
- " js 2f\n\t" /* jump if the lock is being waited upon */
- "1:\n\t"
- LOCK_SECTION_START("")
- "2:\n\t"
- " decw %%dx\n\t" /* do nothing if still outstanding active readers */
- " jnz 1b\n\t"
- " pushl %%ecx\n\t"
- " call rwsem_wake\n\t"
- " popl %%ecx\n\t"
- " jmp 1b\n"
- LOCK_SECTION_END
- "# ending __up_read\n"
- : "=m"(sem->count), "=d"(tmp)
- : "a"(sem), "1"(tmp), "m"(sem->count)
- : "memory", "cc");
-}
-
-/*
- * unlock after writing
- */
-static inline void __up_write(struct rw_semaphore *sem)
-{
- __asm__ __volatile__(
- "# beginning __up_write\n\t"
- " movl %2,%%edx\n\t"
-LOCK " xaddl %%edx,(%%eax)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */
- " jnz 2f\n\t" /* jump if the lock is being waited upon */
- "1:\n\t"
- LOCK_SECTION_START("")
- "2:\n\t"
- " decw %%dx\n\t" /* did the active count reduce to 0? */
- " jnz 1b\n\t" /* jump back if not */
- " pushl %%ecx\n\t"
- " call rwsem_wake\n\t"
- " popl %%ecx\n\t"
- " jmp 1b\n"
- LOCK_SECTION_END
- "# ending __up_write\n"
- : "=m"(sem->count)
- : "a"(sem), "i"(-RWSEM_ACTIVE_WRITE_BIAS), "m"(sem->count)
- : "memory", "cc", "edx");
-}
-
-/*
- * downgrade write lock to read lock
- */
-static inline void __downgrade_write(struct rw_semaphore *sem)
-{
- __asm__ __volatile__(
- "# beginning __downgrade_write\n\t"
-LOCK " addl %2,(%%eax)\n\t" /* transitions 0xZZZZ0001 -> 0xYYYY0001 */
- " js 2f\n\t" /* jump if the lock is being waited upon */
- "1:\n\t"
- LOCK_SECTION_START("")
- "2:\n\t"
- " pushl %%ecx\n\t"
- " pushl %%edx\n\t"
- " call rwsem_downgrade_wake\n\t"
- " popl %%edx\n\t"
- " popl %%ecx\n\t"
- " jmp 1b\n"
- LOCK_SECTION_END
- "# ending __downgrade_write\n"
- : "=m"(sem->count)
- : "a"(sem), "i"(-RWSEM_WAITING_BIAS), "m"(sem->count)
- : "memory", "cc");
-}
-
-/*
- * implement atomic add functionality
- */
-static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem)
-{
- __asm__ __volatile__(
-LOCK "addl %1,%0"
- : "=m"(sem->count)
- : "ir"(delta), "m"(sem->count));
-}
-
-/*
- * implement exchange and add functionality
- */
-static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
-{
- int tmp = delta;
-
- __asm__ __volatile__(
-LOCK "xadd %0,(%2)"
- : "+r"(tmp), "=m"(sem->count)
- : "r"(sem), "m"(sem->count)
- : "memory");
-
- return tmp+delta;
-}
-
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
-#endif /* __KERNEL__ */
-#endif /* _I386_RWSEM_H */
+++ /dev/null
-#ifndef __ASM_SMP_ALT_H__
-#define __ASM_SMP_ALT_H__
-
-#include <linux/config.h>
-
-#ifdef CONFIG_SMP
-#if defined(CONFIG_SMP_ALTERNATIVES) && !defined(MODULE)
-#define LOCK \
- "6677: nop\n" \
- ".section __smp_alternatives,\"a\"\n" \
- ".long 6677b\n" \
- ".long 6678f\n" \
- ".previous\n" \
- ".section __smp_replacements,\"a\"\n" \
- "6678: .byte 1\n" \
- ".byte 1\n" \
- ".byte 0\n" \
- ".byte 1\n" \
- ".byte -1\n" \
- "lock\n" \
- "nop\n" \
- ".previous\n"
-void prepare_for_smp(void);
-void unprepare_for_smp(void);
-#else
-#define LOCK "lock ; "
-#endif
-#else
-#define LOCK ""
-#endif
-
-#endif /* __ASM_SMP_ALT_H__ */
+++ /dev/null
-#ifndef __ASM_SPINLOCK_H
-#define __ASM_SPINLOCK_H
-
-#include <asm/atomic.h>
-#include <asm/rwlock.h>
-#include <asm/page.h>
-#include <linux/config.h>
-#include <linux/compiler.h>
-#include <asm/smp_alt.h>
-
-/*
- * Your basic SMP spinlocks, allowing only a single CPU anywhere
- *
- * Simple spin lock operations. There are two variants, one clears IRQ's
- * on the local processor, one does not.
- *
- * We make no fairness assumptions. They have a cost.
- *
- * (the type definitions are in asm/spinlock_types.h)
- */
-
-#define __raw_spin_is_locked(x) \
- (*(volatile signed char *)(&(x)->slock) <= 0)
-
-#define __raw_spin_lock_string \
- "\n1:\t" \
- LOCK \
- "decb %0\n\t" \
- "jns 3f\n" \
- "2:\t" \
- "rep;nop\n\t" \
- "cmpb $0,%0\n\t" \
- "jle 2b\n\t" \
- "jmp 1b\n" \
- "3:\n\t"
-
-#define __raw_spin_lock_string_flags \
- "\n1:\t" \
- LOCK \
- "decb %0\n\t" \
- "jns 4f\n\t" \
- "2:\t" \
- "testl $0x200, %1\n\t" \
- "jz 3f\n\t" \
- "sti\n\t" \
- "3:\t" \
- "rep;nop\n\t" \
- "cmpb $0, %0\n\t" \
- "jle 3b\n\t" \
- "cli\n\t" \
- "jmp 1b\n" \
- "4:\n\t"
-
-static inline void __raw_spin_lock(raw_spinlock_t *lock)
-{
- __asm__ __volatile__(
- __raw_spin_lock_string
- :"=m" (lock->slock) : : "memory");
-}
-
-static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
-{
- __asm__ __volatile__(
- __raw_spin_lock_string_flags
- :"=m" (lock->slock) : "r" (flags) : "memory");
-}
-
-static inline int __raw_spin_trylock(raw_spinlock_t *lock)
-{
- char oldval;
-#ifdef CONFIG_SMP_ALTERNATIVES
- __asm__ __volatile__(
- "1:movb %1,%b0\n"
- "movb $0,%1\n"
- "2:"
- ".section __smp_alternatives,\"a\"\n"
- ".long 1b\n"
- ".long 3f\n"
- ".previous\n"
- ".section __smp_replacements,\"a\"\n"
- "3: .byte 2b - 1b\n"
- ".byte 5f-4f\n"
- ".byte 0\n"
- ".byte 6f-5f\n"
- ".byte -1\n"
- "4: xchgb %b0,%1\n"
- "5: movb %1,%b0\n"
- "movb $0,%1\n"
- "6:\n"
- ".previous\n"
- :"=q" (oldval), "=m" (lock->slock)
- :"0" (0) : "memory");
-#else
- __asm__ __volatile__(
- "xchgb %b0,%1"
- :"=q" (oldval), "=m" (lock->slock)
- :"0" (0) : "memory");
-#endif
- return oldval > 0;
-}
-
-/*
- * __raw_spin_unlock based on writing $1 to the low byte.
- * This method works. Despite all the confusion.
- * (except on PPro SMP or if we are using OOSTORE, so we use xchgb there)
- * (PPro errata 66, 92)
- */
-
-#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE)
-
-#define __raw_spin_unlock_string \
- "movb $1,%0" \
- :"=m" (lock->slock) : : "memory"
-
-
-static inline void __raw_spin_unlock(raw_spinlock_t *lock)
-{
- __asm__ __volatile__(
- __raw_spin_unlock_string
- );
-}
-
-#else
-
-#define __raw_spin_unlock_string \
- "xchgb %b0, %1" \
- :"=q" (oldval), "=m" (lock->slock) \
- :"0" (oldval) : "memory"
-
-static inline void __raw_spin_unlock(raw_spinlock_t *lock)
-{
- char oldval = 1;
-
- __asm__ __volatile__(
- __raw_spin_unlock_string
- );
-}
-
-#endif
-
-#define __raw_spin_unlock_wait(lock) \
- do { while (__raw_spin_is_locked(lock)) cpu_relax(); } while (0)
-
-/*
- * Read-write spinlocks, allowing multiple readers
- * but only one writer.
- *
- * NOTE! it is quite common to have readers in interrupts
- * but no interrupt writers. For those circumstances we
- * can "mix" irq-safe locks - any writer needs to get a
- * irq-safe write-lock, but readers can get non-irqsafe
- * read-locks.
- *
- * On x86, we implement read-write locks as a 32-bit counter
- * with the high bit (sign) being the "contended" bit.
- *
- * The inline assembly is non-obvious. Think about it.
- *
- * Changed to use the same technique as rw semaphores. See
- * semaphore.h for details. -ben
- *
- * the helpers are in arch/i386/kernel/semaphore.c
- */
-
-/**
- * read_can_lock - would read_trylock() succeed?
- * @lock: the rwlock in question.
- */
-#define __raw_read_can_lock(x) ((int)(x)->lock > 0)
-
-/**
- * write_can_lock - would write_trylock() succeed?
- * @lock: the rwlock in question.
- */
-#define __raw_write_can_lock(x) ((x)->lock == RW_LOCK_BIAS)
-
-static inline void __raw_read_lock(raw_rwlock_t *rw)
-{
- __build_read_lock(rw, "__read_lock_failed");
-}
-
-static inline void __raw_write_lock(raw_rwlock_t *rw)
-{
- __build_write_lock(rw, "__write_lock_failed");
-}
-
-static inline int __raw_read_trylock(raw_rwlock_t *lock)
-{
- atomic_t *count = (atomic_t *)lock;
- atomic_dec(count);
- if (atomic_read(count) >= 0)
- return 1;
- atomic_inc(count);
- return 0;
-}
-
-static inline int __raw_write_trylock(raw_rwlock_t *lock)
-{
- atomic_t *count = (atomic_t *)lock;
- if (atomic_sub_and_test(RW_LOCK_BIAS, count))
- return 1;
- atomic_add(RW_LOCK_BIAS, count);
- return 0;
-}
-
-static inline void __raw_read_unlock(raw_rwlock_t *rw)
-{
- asm volatile(LOCK "incl %0" :"=m" (rw->lock) : : "memory");
-}
-
-static inline void __raw_write_unlock(raw_rwlock_t *rw)
-{
- asm volatile(LOCK "addl $" RW_LOCK_BIAS_STR ", %0"
- : "=m" (rw->lock) : : "memory");
-}
-
-#endif /* __ASM_SPINLOCK_H */
+++ /dev/null
-#ifndef __ASM_SYSTEM_H
-#define __ASM_SYSTEM_H
-
-#include <linux/config.h>
-#include <linux/kernel.h>
-#include <asm/segment.h>
-#include <asm/cpufeature.h>
-#include <asm/smp_alt.h>
-
-#ifdef __KERNEL__
-
-struct task_struct; /* one of the stranger aspects of C forward declarations.. */
-extern struct task_struct * FASTCALL(__switch_to(struct task_struct *prev, struct task_struct *next));
-
-#define switch_to(prev,next,last) do { \
- unsigned long esi,edi; \
- asm volatile("pushl %%ebp\n\t" \
- "movl %%esp,%0\n\t" /* save ESP */ \
- "movl %5,%%esp\n\t" /* restore ESP */ \
- "movl $1f,%1\n\t" /* save EIP */ \
- "pushl %6\n\t" /* restore EIP */ \
- "jmp __switch_to\n" \
- "1:\t" \
- "popl %%ebp\n\t" \
- :"=m" (prev->thread.esp),"=m" (prev->thread.eip), \
- "=a" (last),"=S" (esi),"=D" (edi) \
- :"m" (next->thread.esp),"m" (next->thread.eip), \
- "2" (prev), "d" (next)); \
-} while (0)
-
-#define _set_base(addr,base) do { unsigned long __pr; \
-__asm__ __volatile__ ("movw %%dx,%1\n\t" \
- "rorl $16,%%edx\n\t" \
- "movb %%dl,%2\n\t" \
- "movb %%dh,%3" \
- :"=&d" (__pr) \
- :"m" (*((addr)+2)), \
- "m" (*((addr)+4)), \
- "m" (*((addr)+7)), \
- "0" (base) \
- ); } while(0)
-
-#define _set_limit(addr,limit) do { unsigned long __lr; \
-__asm__ __volatile__ ("movw %%dx,%1\n\t" \
- "rorl $16,%%edx\n\t" \
- "movb %2,%%dh\n\t" \
- "andb $0xf0,%%dh\n\t" \
- "orb %%dh,%%dl\n\t" \
- "movb %%dl,%2" \
- :"=&d" (__lr) \
- :"m" (*(addr)), \
- "m" (*((addr)+6)), \
- "0" (limit) \
- ); } while(0)
-
-#define set_base(ldt,base) _set_base( ((char *)&(ldt)) , (base) )
-#define set_limit(ldt,limit) _set_limit( ((char *)&(ldt)) , ((limit)-1) )
-
-/*
- * Load a segment. Fall back on loading the zero
- * segment if something goes wrong..
- */
-#define loadsegment(seg,value) \
- asm volatile("\n" \
- "1:\t" \
- "mov %0,%%" #seg "\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3:\t" \
- "pushl $0\n\t" \
- "popl %%" #seg "\n\t" \
- "jmp 2b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n\t" \
- ".align 4\n\t" \
- ".long 1b,3b\n" \
- ".previous" \
- : :"rm" (value))
-
-/*
- * Save a segment register away
- */
-#define savesegment(seg, value) \
- asm volatile("mov %%" #seg ",%0":"=rm" (value))
-
-/*
- * Clear and set 'TS' bit respectively
- */
-#define clts() __asm__ __volatile__ ("clts")
-#define read_cr0() ({ \
- unsigned int __dummy; \
- __asm__ __volatile__( \
- "movl %%cr0,%0\n\t" \
- :"=r" (__dummy)); \
- __dummy; \
-})
-#define write_cr0(x) \
- __asm__ __volatile__("movl %0,%%cr0": :"r" (x));
-
-#define read_cr2() ({ \
- unsigned int __dummy; \
- __asm__ __volatile__( \
- "movl %%cr2,%0\n\t" \
- :"=r" (__dummy)); \
- __dummy; \
-})
-#define write_cr2(x) \
- __asm__ __volatile__("movl %0,%%cr2": :"r" (x));
-
-#define read_cr3() ({ \
- unsigned int __dummy; \
- __asm__ ( \
- "movl %%cr3,%0\n\t" \
- :"=r" (__dummy)); \
- __dummy; \
-})
-#define write_cr3(x) \
- __asm__ __volatile__("movl %0,%%cr3": :"r" (x));
-
-#define read_cr4() ({ \
- unsigned int __dummy; \
- __asm__( \
- "movl %%cr4,%0\n\t" \
- :"=r" (__dummy)); \
- __dummy; \
-})
-
-#define read_cr4_safe() ({ \
- unsigned int __dummy; \
- /* This could fault if %cr4 does not exist */ \
- __asm__("1: movl %%cr4, %0 \n" \
- "2: \n" \
- ".section __ex_table,\"a\" \n" \
- ".long 1b,2b \n" \
- ".previous \n" \
- : "=r" (__dummy): "0" (0)); \
- __dummy; \
-})
-
-#define write_cr4(x) \
- __asm__ __volatile__("movl %0,%%cr4": :"r" (x));
-#define stts() write_cr0(8 | read_cr0())
-
-#endif /* __KERNEL__ */
-
-#define wbinvd() \
- __asm__ __volatile__ ("wbinvd": : :"memory");
-
-static inline unsigned long get_limit(unsigned long segment)
-{
- unsigned long __limit;
- __asm__("lsll %1,%0"
- :"=r" (__limit):"r" (segment));
- return __limit+1;
-}
-
-#define nop() __asm__ __volatile__ ("nop")
-
-#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
-
-#define tas(ptr) (xchg((ptr),1))
-
-struct __xchg_dummy { unsigned long a[100]; };
-#define __xg(x) ((struct __xchg_dummy *)(x))
-
-
-#ifdef CONFIG_X86_CMPXCHG64
-
-/*
- * The semantics of XCHGCMP8B are a bit strange, this is why
- * there is a loop and the loading of %%eax and %%edx has to
- * be inside. This inlines well in most cases, the cached
- * cost is around ~38 cycles. (in the future we might want
- * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that
- * might have an implicit FPU-save as a cost, so it's not
- * clear which path to go.)
- *
- * cmpxchg8b must be used with the lock prefix here to allow
- * the instruction to be executed atomically, see page 3-102
- * of the instruction set reference 24319102.pdf. We need
- * the reader side to see the coherent 64bit value.
- */
-static inline void __set_64bit (unsigned long long * ptr,
- unsigned int low, unsigned int high)
-{
- __asm__ __volatile__ (
- "\n1:\t"
- "movl (%0), %%eax\n\t"
- "movl 4(%0), %%edx\n\t"
- "lock cmpxchg8b (%0)\n\t"
- "jnz 1b"
- : /* no outputs */
- : "D"(ptr),
- "b"(low),
- "c"(high)
- : "ax","dx","memory");
-}
-
-static inline void __set_64bit_constant (unsigned long long *ptr,
- unsigned long long value)
-{
- __set_64bit(ptr,(unsigned int)(value), (unsigned int)((value)>>32ULL));
-}
-#define ll_low(x) *(((unsigned int*)&(x))+0)
-#define ll_high(x) *(((unsigned int*)&(x))+1)
-
-static inline void __set_64bit_var (unsigned long long *ptr,
- unsigned long long value)
-{
- __set_64bit(ptr,ll_low(value), ll_high(value));
-}
-
-#define set_64bit(ptr,value) \
-(__builtin_constant_p(value) ? \
- __set_64bit_constant(ptr, value) : \
- __set_64bit_var(ptr, value) )
-
-#define _set_64bit(ptr,value) \
-(__builtin_constant_p(value) ? \
- __set_64bit(ptr, (unsigned int)(value), (unsigned int)((value)>>32ULL) ) : \
- __set_64bit(ptr, ll_low(value), ll_high(value)) )
-
-#endif
-
-/*
- * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
- * Note 2: xchg has side effect, so that attribute volatile is necessary,
- * but generally the primitive is invalid, *ptr is output argument. --ANK
- */
-static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
-{
- switch (size) {
- case 1:
- __asm__ __volatile__("xchgb %b0,%1"
- :"=q" (x)
- :"m" (*__xg(ptr)), "0" (x)
- :"memory");
- break;
- case 2:
- __asm__ __volatile__("xchgw %w0,%1"
- :"=r" (x)
- :"m" (*__xg(ptr)), "0" (x)
- :"memory");
- break;
- case 4:
- __asm__ __volatile__("xchgl %0,%1"
- :"=r" (x)
- :"m" (*__xg(ptr)), "0" (x)
- :"memory");
- break;
- }
- return x;
-}
-
-/*
- * Atomic compare and exchange. Compare OLD with MEM, if identical,
- * store NEW in MEM. Return the initial value in MEM. Success is
- * indicated by comparing RETURN with OLD.
- */
-
-#ifdef CONFIG_X86_CMPXCHG
-#define __HAVE_ARCH_CMPXCHG 1
-#define cmpxchg(ptr,o,n)\
- ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
- (unsigned long)(n),sizeof(*(ptr))))
-#endif
-
-static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
- unsigned long new, int size)
-{
- unsigned long prev;
- switch (size) {
- case 1:
- __asm__ __volatile__(LOCK "cmpxchgb %b1,%2"
- : "=a"(prev)
- : "q"(new), "m"(*__xg(ptr)), "0"(old)
- : "memory");
- return prev;
- case 2:
- __asm__ __volatile__(LOCK "cmpxchgw %w1,%2"
- : "=a"(prev)
- : "r"(new), "m"(*__xg(ptr)), "0"(old)
- : "memory");
- return prev;
- case 4:
- __asm__ __volatile__(LOCK "cmpxchgl %1,%2"
- : "=a"(prev)
- : "r"(new), "m"(*__xg(ptr)), "0"(old)
- : "memory");
- return prev;
- }
- return old;
-}
-
-#ifndef CONFIG_X86_CMPXCHG
-/*
- * Building a kernel capable running on 80386. It may be necessary to
- * simulate the cmpxchg on the 80386 CPU. For that purpose we define
- * a function for each of the sizes we support.
- */
-
-extern unsigned long cmpxchg_386_u8(volatile void *, u8, u8);
-extern unsigned long cmpxchg_386_u16(volatile void *, u16, u16);
-extern unsigned long cmpxchg_386_u32(volatile void *, u32, u32);
-
-static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
- unsigned long new, int size)
-{
- switch (size) {
- case 1:
- return cmpxchg_386_u8(ptr, old, new);
- case 2:
- return cmpxchg_386_u16(ptr, old, new);
- case 4:
- return cmpxchg_386_u32(ptr, old, new);
- }
- return old;
-}
-
-#define cmpxchg(ptr,o,n) \
-({ \
- __typeof__(*(ptr)) __ret; \
- if (likely(boot_cpu_data.x86 > 3)) \
- __ret = __cmpxchg((ptr), (unsigned long)(o), \
- (unsigned long)(n), sizeof(*(ptr))); \
- else \
- __ret = cmpxchg_386((ptr), (unsigned long)(o), \
- (unsigned long)(n), sizeof(*(ptr))); \
- __ret; \
-})
-#endif
-
-#ifdef CONFIG_X86_CMPXCHG64
-
-static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long long old,
- unsigned long long new)
-{
- unsigned long long prev;
- __asm__ __volatile__(LOCK "cmpxchg8b %3"
- : "=A"(prev)
- : "b"((unsigned long)new),
- "c"((unsigned long)(new >> 32)),
- "m"(*__xg(ptr)),
- "0"(old)
- : "memory");
- return prev;
-}
-
-#define cmpxchg64(ptr,o,n)\
- ((__typeof__(*(ptr)))__cmpxchg64((ptr),(unsigned long long)(o),\
- (unsigned long long)(n)))
-
-#endif
-
-#ifdef __KERNEL__
-struct alt_instr {
- __u8 *instr; /* original instruction */
- __u8 *replacement;
- __u8 cpuid; /* cpuid bit set for replacement */
- __u8 instrlen; /* length of original instruction */
- __u8 replacementlen; /* length of new instruction, <= instrlen */
- __u8 pad;
-};
-#endif
-
-/*
- * Alternative instructions for different CPU types or capabilities.
- *
- * This allows to use optimized instructions even on generic binary
- * kernels.
- *
- * length of oldinstr must be longer or equal the length of newinstr
- * It can be padded with nops as needed.
- *
- * For non barrier like inlines please define new variants
- * without volatile and memory clobber.
- */
-#define alternative(oldinstr, newinstr, feature) \
- asm volatile ("661:\n\t" oldinstr "\n662:\n" \
- ".section .altinstructions,\"a\"\n" \
- " .align 4\n" \
- " .long 661b\n" /* label */ \
- " .long 663f\n" /* new instruction */ \
- " .byte %c0\n" /* feature bit */ \
- " .byte 662b-661b\n" /* sourcelen */ \
- " .byte 664f-663f\n" /* replacementlen */ \
- ".previous\n" \
- ".section .altinstr_replacement,\"ax\"\n" \
- "663:\n\t" newinstr "\n664:\n" /* replacement */ \
- ".previous" :: "i" (feature) : "memory")
-
-/*
- * Alternative inline assembly with input.
- *
- * Pecularities:
- * No memory clobber here.
- * Argument numbers start with 1.
- * Best is to use constraints that are fixed size (like (%1) ... "r")
- * If you use variable sized constraints like "m" or "g" in the
- * replacement maake sure to pad to the worst case length.
- */
-#define alternative_input(oldinstr, newinstr, feature, input...) \
- asm volatile ("661:\n\t" oldinstr "\n662:\n" \
- ".section .altinstructions,\"a\"\n" \
- " .align 4\n" \
- " .long 661b\n" /* label */ \
- " .long 663f\n" /* new instruction */ \
- " .byte %c0\n" /* feature bit */ \
- " .byte 662b-661b\n" /* sourcelen */ \
- " .byte 664f-663f\n" /* replacementlen */ \
- ".previous\n" \
- ".section .altinstr_replacement,\"ax\"\n" \
- "663:\n\t" newinstr "\n664:\n" /* replacement */ \
- ".previous" :: "i" (feature), ##input)
-
-/*
- * Force strict CPU ordering.
- * And yes, this is required on UP too when we're talking
- * to devices.
- *
- * For now, "wmb()" doesn't actually do anything, as all
- * Intel CPU's follow what Intel calls a *Processor Order*,
- * in which all writes are seen in the program order even
- * outside the CPU.
- *
- * I expect future Intel CPU's to have a weaker ordering,
- * but I'd also expect them to finally get their act together
- * and add some real memory barriers if so.
- *
- * Some non intel clones support out of order store. wmb() ceases to be a
- * nop for these.
- */
-
-
-/*
- * Actually only lfence would be needed for mb() because all stores done
- * by the kernel should be already ordered. But keep a full barrier for now.
- */
-
-#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
-#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
-
-/**
- * read_barrier_depends - Flush all pending reads that subsequents reads
- * depend on.
- *
- * No data-dependent reads from memory-like regions are ever reordered
- * over this barrier. All reads preceding this primitive are guaranteed
- * to access memory (but not necessarily other CPUs' caches) before any
- * reads following this primitive that depend on the data return by
- * any of the preceding reads. This primitive is much lighter weight than
- * rmb() on most CPUs, and is never heavier weight than is
- * rmb().
- *
- * These ordering constraints are respected by both the local CPU
- * and the compiler.
- *
- * Ordering is not guaranteed by anything other than these primitives,
- * not even by data dependencies. See the documentation for
- * memory_barrier() for examples and URLs to more information.
- *
- * For example, the following code would force ordering (the initial
- * value of "a" is zero, "b" is one, and "p" is "&a"):
- *
- * <programlisting>
- * CPU 0 CPU 1
- *
- * b = 2;
- * memory_barrier();
- * p = &b; q = p;
- * read_barrier_depends();
- * d = *q;
- * </programlisting>
- *
- * because the read of "*q" depends on the read of "p" and these
- * two reads are separated by a read_barrier_depends(). However,
- * the following code, with the same initial values for "a" and "b":
- *
- * <programlisting>
- * CPU 0 CPU 1
- *
- * a = 2;
- * memory_barrier();
- * b = 3; y = b;
- * read_barrier_depends();
- * x = a;
- * </programlisting>
- *
- * does not enforce ordering, since there is no data dependency between
- * the read of "a" and the read of "b". Therefore, on some CPUs, such
- * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
- * in cases like thiswhere there are no data dependencies.
- **/
-
-#define read_barrier_depends() do { } while(0)
-
-#ifdef CONFIG_X86_OOSTORE
-/* Actually there are no OOO store capable CPUs for now that do SSE,
- but make it already an possibility. */
-#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
-#else
-#define wmb() __asm__ __volatile__ ("": : :"memory")
-#endif
-
-#ifdef CONFIG_SMP
-#if defined(CONFIG_SMP_ALTERNATIVES) && !defined(MODULE)
-#define smp_alt_mb(instr) \
-__asm__ __volatile__("6667:\nnop\nnop\nnop\nnop\nnop\nnop\n6668:\n" \
- ".section __smp_alternatives,\"a\"\n" \
- ".long 6667b\n" \
- ".long 6673f\n" \
- ".previous\n" \
- ".section __smp_replacements,\"a\"\n" \
- "6673:.byte 6668b-6667b\n" \
- ".byte 6670f-6669f\n" \
- ".byte 6671f-6670f\n" \
- ".byte 0\n" \
- ".byte %c0\n" \
- "6669:lock;addl $0,0(%%esp)\n" \
- "6670:" instr "\n" \
- "6671:\n" \
- ".previous\n" \
- : \
- : "i" (X86_FEATURE_XMM2) \
- : "memory")
-#define smp_mb() smp_alt_mb("mfence")
-#define smp_rmb() smp_alt_mb("lfence")
-#define set_mb(var, value) do { \
-unsigned long __set_mb_temp; \
-__asm__ __volatile__("6667:movl %1, %0\n6668:\n" \
- ".section __smp_alternatives,\"a\"\n" \
- ".long 6667b\n" \
- ".long 6673f\n" \
- ".previous\n" \
- ".section __smp_replacements,\"a\"\n" \
- "6673: .byte 6668b-6667b\n" \
- ".byte 6670f-6669f\n" \
- ".byte 0\n" \
- ".byte 6671f-6670f\n" \
- ".byte -1\n" \
- "6669: xchg %1, %0\n" \
- "6670:movl %1, %0\n" \
- "6671:\n" \
- ".previous\n" \
- : "=m" (var), "=r" (__set_mb_temp) \
- : "1" (value) \
- : "memory"); } while (0)
-#else
-#define smp_mb() mb()
-#define smp_rmb() rmb()
-#define set_mb(var, value) do { (void) xchg(&var, value); } while (0)
-#endif
-#define smp_wmb() wmb()
-#define smp_read_barrier_depends() read_barrier_depends()
-#else
-#define smp_mb() barrier()
-#define smp_rmb() barrier()
-#define smp_wmb() barrier()
-#define smp_read_barrier_depends() do { } while(0)
-#define set_mb(var, value) do { var = value; barrier(); } while (0)
-#endif
-
-#define set_wmb(var, value) do { var = value; wmb(); } while (0)
-
-/* interrupt control.. */
-#define local_save_flags(x) do { typecheck(unsigned long,x); __asm__ __volatile__("pushfl ; popl %0":"=g" (x): /* no input */); } while (0)
-#define local_irq_restore(x) do { typecheck(unsigned long,x); __asm__ __volatile__("pushl %0 ; popfl": /* no output */ :"g" (x):"memory", "cc"); } while (0)
-#define local_irq_disable() __asm__ __volatile__("cli": : :"memory")
-#define local_irq_enable() __asm__ __volatile__("sti": : :"memory")
-/* used in the idle loop; sti takes one instruction cycle to complete */
-#define safe_halt() __asm__ __volatile__("sti; hlt": : :"memory")
-/* used when interrupts are already enabled or to shutdown the processor */
-#define halt() __asm__ __volatile__("hlt": : :"memory")
-
-#define irqs_disabled() \
-({ \
- unsigned long flags; \
- local_save_flags(flags); \
- !(flags & (1<<9)); \
-})
-
-/* For spinlocks etc */
-#define local_irq_save(x) __asm__ __volatile__("pushfl ; popl %0 ; cli":"=g" (x): /* no input */ :"memory")
-
-/*
- * disable hlt during certain critical i/o operations
- */
-#define HAVE_DISABLE_HLT
-void disable_hlt(void);
-void enable_hlt(void);
-
-extern int es7000_plat;
-void cpu_idle_wait(void);
-
-/*
- * On SMP systems, when the scheduler does migration-cost autodetection,
- * it needs a way to flush as much of the CPU's caches as possible:
- */
-static inline void sched_cacheflush(void)
-{
- wbinvd();
-}
-
-extern unsigned long arch_align_stack(unsigned long sp);
-
-#endif
*
* Copyright (C) 2002,2003 Jun Nakajima <jun.nakajima@intel.com>
* Copyright (C) 2002,2003 Suresh Siddha <suresh.b.siddha@intel.com>
+ * Copyright (C) 2005,2006 Hongjiu Lu <hongjiu.lu@intel.com>
*
*/
-#include <asm/types.h>
-
-void __lfetch(int lfhint, void *y);
-void __lfetch_excl(int lfhint, void *y);
-void __lfetch_fault(int lfhint, void *y);
-void __lfetch_fault_excl(int lfhint, void *y);
-
-/* In the following, whichFloatReg should be an integer from 0-127 */
-void __ldfs(const int whichFloatReg, void *src);
-void __ldfd(const int whichFloatReg, void *src);
-void __ldfe(const int whichFloatReg, void *src);
-void __ldf8(const int whichFloatReg, void *src);
-void __ldf_fill(const int whichFloatReg, void *src);
-void __stfs(void *dst, const int whichFloatReg);
-void __stfd(void *dst, const int whichFloatReg);
-void __stfe(void *dst, const int whichFloatReg);
-void __stf8(void *dst, const int whichFloatReg);
-void __stf_spill(void *dst, const int whichFloatReg);
-
-void __st1_rel(void *dst, const __s8 value);
-void __st2_rel(void *dst, const __s16 value);
-void __st4_rel(void *dst, const __s32 value);
-void __st8_rel(void *dst, const __s64 value);
-__u8 __ld1_acq(void *src);
-__u16 __ld2_acq(void *src);
-__u32 __ld4_acq(void *src);
-__u64 __ld8_acq(void *src);
-
-__u64 __fetchadd4_acq(__u32 *addend, const int increment);
-__u64 __fetchadd4_rel(__u32 *addend, const int increment);
-__u64 __fetchadd8_acq(__u64 *addend, const int increment);
-__u64 __fetchadd8_rel(__u64 *addend, const int increment);
-
-__u64 __getf_exp(double d);
-
-/* OS Related Itanium(R) Intrinsics */
-
-/* The names to use for whichReg and whichIndReg below come from
- the include file asm/ia64regs.h */
-
-__u64 __getIndReg(const int whichIndReg, __s64 index);
-__u64 __getReg(const int whichReg);
-
-void __setIndReg(const int whichIndReg, __s64 index, __u64 value);
-void __setReg(const int whichReg, __u64 value);
-
-void __mf(void);
-void __mfa(void);
-void __synci(void);
-void __itcd(__s64 pa);
-void __itci(__s64 pa);
-void __itrd(__s64 whichTransReg, __s64 pa);
-void __itri(__s64 whichTransReg, __s64 pa);
-void __ptce(__s64 va);
-void __ptcl(__s64 va, __s64 pagesz);
-void __ptcg(__s64 va, __s64 pagesz);
-void __ptcga(__s64 va, __s64 pagesz);
-void __ptri(__s64 va, __s64 pagesz);
-void __ptrd(__s64 va, __s64 pagesz);
-void __invala (void);
-void __invala_gr(const int whichGeneralReg /* 0-127 */ );
-void __invala_fr(const int whichFloatReg /* 0-127 */ );
-void __nop(const int);
-void __fc(__u64 *addr);
-void __sum(int mask);
-void __rum(int mask);
-void __ssm(int mask);
-void __rsm(int mask);
-__u64 __thash(__s64);
-__u64 __ttag(__s64);
-__s64 __tpa(__s64);
-
-/* Intrinsics for implementing get/put_user macros */
-void __st_user(const char *tableName, __u64 addr, char size, char relocType, __u64 val);
-void __ld_user(const char *tableName, __u64 addr, char size, char relocType);
-
-/* This intrinsic does not generate code, it creates a barrier across which
- * the compiler will not schedule data access instructions.
- */
-void __memory_barrier(void);
-
-void __isrlz(void);
-void __dsrlz(void);
-
-__u64 _m64_mux1(__u64 a, const int n);
-__u64 __thash(__u64);
-
-/* Lock and Atomic Operation Related Intrinsics */
-__u64 _InterlockedExchange8(volatile __u8 *trgt, __u8 value);
-__u64 _InterlockedExchange16(volatile __u16 *trgt, __u16 value);
-__s64 _InterlockedExchange(volatile __u32 *trgt, __u32 value);
-__s64 _InterlockedExchange64(volatile __u64 *trgt, __u64 value);
-
-__u64 _InterlockedCompareExchange8_rel(volatile __u8 *dest, __u64 xchg, __u64 comp);
-__u64 _InterlockedCompareExchange8_acq(volatile __u8 *dest, __u64 xchg, __u64 comp);
-__u64 _InterlockedCompareExchange16_rel(volatile __u16 *dest, __u64 xchg, __u64 comp);
-__u64 _InterlockedCompareExchange16_acq(volatile __u16 *dest, __u64 xchg, __u64 comp);
-__u64 _InterlockedCompareExchange_rel(volatile __u32 *dest, __u64 xchg, __u64 comp);
-__u64 _InterlockedCompareExchange_acq(volatile __u32 *dest, __u64 xchg, __u64 comp);
-__u64 _InterlockedCompareExchange64_rel(volatile __u64 *dest, __u64 xchg, __u64 comp);
-__u64 _InterlockedCompareExchange64_acq(volatile __u64 *dest, __u64 xchg, __u64 comp);
-
-__s64 _m64_dep_mi(const int v, __s64 s, const int p, const int len);
-__s64 _m64_shrp(__s64 a, __s64 b, const int count);
-__s64 _m64_popcnt(__s64 a);
+#include <ia64intrin.h>
#define ia64_barrier() __memory_barrier()
#define __ia64_getreg __getReg
#define __ia64_setreg __setReg
-#define __ia64_hint(x)
+#define ia64_hint __hint
+#define ia64_hint_pause __hint_pause
-#define ia64_mux1_brcst 0
-#define ia64_mux1_mix 8
-#define ia64_mux1_shuf 9
-#define ia64_mux1_alt 10
-#define ia64_mux1_rev 11
+#define ia64_mux1_brcst _m64_mux1_brcst
+#define ia64_mux1_mix _m64_mux1_mix
+#define ia64_mux1_shuf _m64_mux1_shuf
+#define ia64_mux1_alt _m64_mux1_alt
+#define ia64_mux1_rev _m64_mux1_rev
-#define ia64_mux1 _m64_mux1
+#define ia64_mux1(x,v) _m_to_int64(_m64_mux1(_m_from_int64(x), (v)))
#define ia64_popcnt _m64_popcnt
#define ia64_getf_exp __getf_exp
#define ia64_shrp _m64_shrp
#define ia64_stf8 __stf8
#define ia64_stf_spill __stf_spill
-#define ia64_mf __mf
+#define ia64_mf __mf
#define ia64_mfa __mfa
#define ia64_fetchadd4_acq __fetchadd4_acq
/* Values for lfhint in __lfetch and __lfetch_fault */
-#define ia64_lfhint_none 0
-#define ia64_lfhint_nt1 1
-#define ia64_lfhint_nt2 2
-#define ia64_lfhint_nta 3
+#define ia64_lfhint_none __lfhint_none
+#define ia64_lfhint_nt1 __lfhint_nt1
+#define ia64_lfhint_nt2 __lfhint_nt2
+#define ia64_lfhint_nta __lfhint_nta
#define ia64_lfetch __lfetch
#define ia64_lfetch_excl __lfetch_excl
#define __ia64_get_psr_i() (__ia64_getreg(_IA64_REG_PSR) & 0x4000UL)
+#define __builtin_trap() __break(0);
+
#endif /* _ASM_IA64_INTEL_INTRIN_H */
}
#define ARCH_HAS_VALID_PHYS_ADDR_RANGE
-extern int valid_phys_addr_range (unsigned long addr, size_t *count); /* efi.c */
-extern int valid_mmap_phys_addr_range (unsigned long addr, size_t *count);
+extern int valid_phys_addr_range (unsigned long addr, size_t count); /* efi.c */
+extern int valid_mmap_phys_addr_range (unsigned long addr, size_t count);
/*
* The following two macros are deprecated and scheduled for removal.
# define outl_p outl
#endif
-/*
- * An "address" in IO memory space is not clearly either an integer or a pointer. We will
- * accept both, thus the casts.
- *
- * On ia-64, we access the physical I/O memory space through the uncached kernel region.
- */
-static inline void __iomem *
-ioremap (unsigned long offset, unsigned long size)
-{
- offset = HYPERVISOR_ioremap(offset, size);
- if (IS_ERR_VALUE(offset))
- return (void __iomem*)offset;
- return (void __iomem *) (__IA64_UNCACHED_OFFSET | (offset));
-}
+extern void __iomem * ioremap(unsigned long offset, unsigned long size);
+extern void __iomem * ioremap_nocache (unsigned long offset, unsigned long size);
static inline void
iounmap (volatile void __iomem *addr)
{
}
-#define ioremap_nocache(o,s) ioremap(o,s)
+/* Use normal IO mappings for DMI */
+#define dmi_ioremap ioremap
+#define dmi_iounmap(x,l) iounmap(x)
+#define dmi_alloc(l) kmalloc(l, GFP_ATOMIC)
# ifdef __KERNEL__
struct page;
struct mm_struct;
struct pci_bus;
+struct task_struct;
typedef void ia64_mv_setup_t (char **);
typedef void ia64_mv_cpu_init_t (void);
u8 size);
typedef int ia64_mv_pci_legacy_write_t (struct pci_bus *, u16 port, u32 val,
u8 size);
+typedef void ia64_mv_migrate_t(struct task_struct * task);
/* DMA-mapping interface: */
typedef void ia64_mv_dma_init (void);
{
}
+static inline void
+machvec_noop_task (struct task_struct *task)
+{
+}
+
extern void machvec_setup (char **);
extern void machvec_timer_interrupt (int, void *, struct pt_regs *);
extern void machvec_dma_sync_single (struct device *, dma_addr_t, size_t, int);
# define platform_readw_relaxed ia64_mv.readw_relaxed
# define platform_readl_relaxed ia64_mv.readl_relaxed
# define platform_readq_relaxed ia64_mv.readq_relaxed
+# define platform_migrate ia64_mv.migrate
# endif
/* __attribute__((__aligned__(16))) is required to make size of the
ia64_mv_readw_relaxed_t *readw_relaxed;
ia64_mv_readl_relaxed_t *readl_relaxed;
ia64_mv_readq_relaxed_t *readq_relaxed;
+ ia64_mv_migrate_t *migrate;
} __attribute__((__aligned__(16))); /* align attrib? see above comment */
#define MACHVEC_INIT(name) \
platform_readw_relaxed, \
platform_readl_relaxed, \
platform_readq_relaxed, \
+ platform_migrate, \
}
extern struct ia64_machine_vector ia64_mv;
#endif
#ifndef platform_pci_legacy_read
# define platform_pci_legacy_read ia64_pci_legacy_read
+extern int ia64_pci_legacy_read(struct pci_bus *bus, u16 port, u32 *val, u8 size);
#endif
#ifndef platform_pci_legacy_write
# define platform_pci_legacy_write ia64_pci_legacy_write
+extern int ia64_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size);
#endif
#ifndef platform_inb
# define platform_inb __ia64_inb
#ifndef platform_readq_relaxed
# define platform_readq_relaxed __ia64_readq_relaxed
#endif
+#ifndef platform_migrate
+# define platform_migrate machvec_noop_task
+#endif
#endif /* _ASM_IA64_MACHVEC_H */
#define _ASM_IA64_MACHVEC_DIG_h
extern ia64_mv_setup_t dig_setup;
-extern ia64_mv_irq_init_t dig_irq_init;
/*
* This stuff has dual use!
*/
#define platform_name "dig"
#define platform_setup dig_setup
-#define platform_irq_init dig_irq_init
#ifdef CONFIG_XEN
# define platform_dma_map_sg dma_map_sg
# define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
# define ARCH_HAS_HUGEPAGE_ONLY_RANGE
+# define ARCH_HAS_PREPARE_HUGEPAGE_RANGE
+# define ARCH_HAS_HUGETLB_FREE_PGD_RANGE
#endif /* CONFIG_HUGETLB_PAGE */
#ifdef __ASSEMBLY__
# define ia64_pfn_valid(pfn) 1
#endif
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+extern struct page *vmem_map;
+#ifdef CONFIG_DISCONTIGMEM
+# define page_to_pfn(page) ((unsigned long) (page - vmem_map))
+# define pfn_to_page(pfn) (vmem_map + (pfn))
+#endif
+#endif
+
+#if defined(CONFIG_FLATMEM) || defined(CONFIG_SPARSEMEM)
+/* FLATMEM always configures mem_map (mem_map = vmem_map if necessary) */
+#include <asm-generic/memory_model.h>
+#endif
+
#ifdef CONFIG_FLATMEM
# define pfn_valid(pfn) (((pfn) < max_mapnr) && ia64_pfn_valid(pfn))
-# define page_to_pfn(page) ((unsigned long) (page - mem_map))
-# define pfn_to_page(pfn) (mem_map + (pfn))
#elif defined(CONFIG_DISCONTIGMEM)
-extern struct page *vmem_map;
extern unsigned long min_low_pfn;
extern unsigned long max_low_pfn;
# define pfn_valid(pfn) (((pfn) >= min_low_pfn) && ((pfn) < max_low_pfn) && ia64_pfn_valid(pfn))
-# define page_to_pfn(page) ((unsigned long) (page - vmem_map))
-# define pfn_to_page(pfn) (vmem_map + (pfn))
#endif
#ifndef CONFIG_XEN
| (REGION_OFFSET(x) >> (HPAGE_SHIFT-PAGE_SHIFT)))
# define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
# define is_hugepage_only_range(mm, addr, len) \
- (REGION_NUMBER(addr) == RGN_HPAGE && \
+ (REGION_NUMBER(addr) == RGN_HPAGE || \
REGION_NUMBER((addr)+(len)-1) == RGN_HPAGE)
extern unsigned int hpage_shift;
#endif
#define PAL_SHUTDOWN 40 /* enter processor shutdown state */
#define PAL_PREFETCH_VISIBILITY 41 /* Make Processor Prefetches Visible */
#define PAL_LOGICAL_TO_PHYSICAL 42 /* returns information on logical to physical processor mapping */
+#define PAL_CACHE_SHARED_INFO 43 /* returns information on caches shared by logical processor */
#define PAL_COPY_PAL 256 /* relocate PAL procedures and PAL PMI */
#define PAL_HALT_INFO 257 /* return the low power capabilities of processor */
#define PAL_CACHE_LINE_STATE_MODIFIED 3 /* Modified */
typedef struct pal_freq_ratio {
- u64 den : 32, num : 32; /* numerator & denominator */
+ u32 den, num; /* numerator & denominator */
} itc_ratio, proc_ratio;
typedef union pal_cache_config_info_1_s {
typedef union pal_cache_config_info_2_s {
struct {
- u64 cache_size : 32, /*cache size in bytes*/
+ u32 cache_size; /*cache size in bytes*/
- alias_boundary : 8, /* 39-32 aliased addr
+ u32 alias_boundary : 8, /* 39-32 aliased addr
* separation for max
* performance.
*/
if (iprv.status == PAL_STATUS_SUCCESS)
{
- if (proc_number == 0)
- mapping->overview.overview_data = iprv.v0;
+ mapping->overview.overview_data = iprv.v0;
mapping->ppli1.ppli1_data = iprv.v1;
mapping->ppli2.ppli2_data = iprv.v2;
}
return iprv.status;
}
+
+typedef struct pal_cache_shared_info_s
+{
+ u64 num_shared;
+ pal_proc_n_log_info1_t ppli1;
+ pal_proc_n_log_info2_t ppli2;
+} pal_cache_shared_info_t;
+
+/* Get information on logical to physical processor mappings. */
+static inline s64
+ia64_pal_cache_shared_info(u64 level,
+ u64 type,
+ u64 proc_number,
+ pal_cache_shared_info_t *info)
+{
+ struct ia64_pal_retval iprv;
+
+ PAL_CALL(iprv, PAL_CACHE_SHARED_INFO, level, type, proc_number);
+
+ if (iprv.status == PAL_STATUS_SUCCESS) {
+ info->num_shared = iprv.v0;
+ info->ppli1.ppli1_data = iprv.v1;
+ info->ppli2.ppli2_data = iprv.v2;
+ }
+
+ return iprv.status;
+}
#endif /* __ASSEMBLY__ */
#endif /* _ASM_IA64_PAL_H */
#define IA64_THREAD_PM_VALID (__IA64_UL(1) << 2) /* performance registers valid? */
#define IA64_THREAD_UAC_NOPRINT (__IA64_UL(1) << 3) /* don't log unaligned accesses */
#define IA64_THREAD_UAC_SIGBUS (__IA64_UL(1) << 4) /* generate SIGBUS on unaligned acc. */
- /* bit 5 is currently unused */
+#define IA64_THREAD_MIGRATION (__IA64_UL(1) << 5) /* require migration
+ sync at ctx sw */
#define IA64_THREAD_FPEMU_NOPRINT (__IA64_UL(1) << 6) /* don't log any fpswa faults */
#define IA64_THREAD_FPEMU_SIGFPE (__IA64_UL(1) << 7) /* send a SIGFPE for fpswa faults */
#define local_cpu_data (&__ia64_per_cpu_var(cpu_info))
#define cpu_data(cpu) (&per_cpu(cpu_info, cpu))
-extern void identify_cpu (struct cpuinfo_ia64 *);
extern void print_cpu_info (struct cpuinfo_ia64 *);
typedef struct {
__ia64_save_fpu((prev)->thread.fph); \
} \
__switch_to(prev, next, last); \
+ /* "next" in old context is "current" in new context */ \
+ if (unlikely((current->thread.flags & IA64_THREAD_MIGRATION) && \
+ (task_cpu(current) != \
+ task_thread_info(current)->last_cpu))) { \
+ platform_migrate(current); \
+ task_thread_info(current)->last_cpu = task_cpu(current); \
+ } \
} while (0)
#else
# define switch_to(prev,next,last) __switch_to(prev, next, last)
#define arch_align_stack(x) (x)
+void default_idle(void);
+
#endif /* __KERNEL__ */
#endif /* __ASSEMBLY__ */
#define __pa(virt) to_phys((void *) (unsigned long) (virt))
#define __va(phys) to_virt((unsigned long) (phys))
-#define page_to_pfn(page) ((page) - mem_map)
-#define pfn_to_page(pfn) (mem_map + (pfn))
-
#define phys_to_pfn(p) ((p) >> PAGE_SHIFT)
#define pfn_to_phys(pfn) ((pfn) << PAGE_SHIFT)
extern int arch_free_page(struct page *page, int order);
#define HAVE_ARCH_FREE_PAGE
+#include <asm-generic/memory_model.h>
#include <asm-generic/page.h>
#endif
extern unsigned long e820_end_of_ram(void);
extern void e820_reserve_resources(struct e820entry *e820, int nr_map);
extern void e820_print_map(char *who);
-extern int e820_mapped(unsigned long start, unsigned long end, unsigned type);
+extern int e820_any_mapped(unsigned long start, unsigned long end, unsigned type);
+extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type);
extern void e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end);
extern void e820_setup_gap(struct e820entry *e820, int nr_map);
extern void __init parse_memmapopt(char *p, char **end);
extern struct e820map e820;
+
+extern unsigned ebda_addr, ebda_size;
#endif/*!__ASSEMBLY__*/
#endif/*__E820_HEADER*/
* Do not use vmalloc/vfree: floppy_release_irq_and_dma() gets called from
* softirq context via motor_off_callback. A generic bug we happen to trigger.
*/
-#define fd_dma_mem_alloc(size) __get_free_pages(GFP_KERNEL, get_order(size))
+#define fd_dma_mem_alloc(size) __get_free_pages(GFP_KERNEL|__GFP_NORETRY, get_order(size))
#define fd_dma_mem_free(addr, size) free_pages(addr, get_order(size))
#define fd_dma_setup(addr, size, mode, io) vdma_dma_setup(addr, size, mode, io)
return __ioremap(offset, size, 0);
}
+extern void *early_ioremap(unsigned long addr, unsigned long size);
+extern void early_iounmap(void *addr, unsigned long size);
+
/*
* This one maps high address device memory and turns off caching for that area.
* it's useful if some control registers are in such an area and write combining
extern void __iomem * ioremap_nocache (unsigned long offset, unsigned long size);
extern void iounmap(volatile void __iomem *addr);
-/* Use normal IO mappings for DMI */
-#define dmi_ioremap ioremap
-#define dmi_iounmap(x,l) iounmap(x)
-#define dmi_alloc(l) kmalloc(l, GFP_ATOMIC)
-
/*
* ISA I/O bus memory addresses are 1:1 with the physical address.
*/
{
return *(__force volatile __u16 *)addr;
}
-static inline __u32 __readl(const volatile void __iomem *addr)
+static __always_inline __u32 __readl(const volatile void __iomem *addr)
{
return *(__force volatile __u32 *)addr;
}
#define mmiowb()
-#ifdef CONFIG_UNORDERED_IO
-static inline void __writel(__u32 val, volatile void __iomem *addr)
-{
- volatile __u32 __iomem *target = addr;
- asm volatile("movnti %1,%0"
- : "=m" (*target)
- : "r" (val) : "memory");
-}
-
-static inline void __writeq(__u64 val, volatile void __iomem *addr)
-{
- volatile __u64 __iomem *target = addr;
- asm volatile("movnti %1,%0"
- : "=m" (*target)
- : "r" (val) : "memory");
-}
-#else
static inline void __writel(__u32 b, volatile void __iomem *addr)
{
*(__force volatile __u32 *)addr = b;
{
*(__force volatile __u64 *)addr = b;
}
-#endif
static inline void __writeb(__u8 b, volatile void __iomem *addr)
{
*(__force volatile __u8 *)addr = b;
*/
#define __ISA_IO_base ((char __iomem *)(fix_to_virt(FIX_ISAMAP_BEGIN)))
-#define isa_readb(a) readb(__ISA_IO_base + (a))
-#define isa_readw(a) readw(__ISA_IO_base + (a))
-#define isa_readl(a) readl(__ISA_IO_base + (a))
-#define isa_writeb(b,a) writeb(b,__ISA_IO_base + (a))
-#define isa_writew(w,a) writew(w,__ISA_IO_base + (a))
-#define isa_writel(l,a) writel(l,__ISA_IO_base + (a))
-#define isa_memset_io(a,b,c) memset_io(__ISA_IO_base + (a),(b),(c))
-#define isa_memcpy_fromio(a,b,c) memcpy_fromio((a),__ISA_IO_base + (b),(c))
-#define isa_memcpy_toio(a,b,c) memcpy_toio(__ISA_IO_base + (a),(b),(c))
-
-
/*
* Again, x86-64 does not require mem IO specific function.
*/
#define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void *)(b),(c),(d))
-#define isa_eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void *)(__ISA_IO_base + (b)),(c),(d))
/**
* check_signature - find BIOS signatures
BUG_ON(!next->context.pinned);
/* stop flush ipis for the previous mm */
- clear_bit(cpu, &prev->cpu_vm_mask);
+ cpu_clear(cpu, prev->cpu_vm_mask);
#if defined(CONFIG_SMP) && !defined(CONFIG_XEN)
write_pda(mmu_state, TLBSTATE_OK);
write_pda(active_mm, next);
#endif
- set_bit(cpu, &next->cpu_vm_mask);
+ cpu_set(cpu, next->cpu_vm_mask);
/* load_cr3(next->pgd) */
op->cmd = MMUEXT_NEW_BASEPTR;
write_pda(mmu_state, TLBSTATE_OK);
if (read_pda(active_mm) != next)
out_of_line_bug();
- if(!test_and_set_bit(cpu, &next->cpu_vm_mask)) {
+ if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
/* We were in lazy tlb mode and leave_mm disabled
* tlb flush IPI delivery. We must reload CR3
* to make sure to use no freed page tables.
#define __boot_va(x) __va(x)
#define __boot_pa(x) __pa(x)
#ifdef CONFIG_FLATMEM
-#define pfn_to_page(pfn) (mem_map + (pfn))
-#define page_to_pfn(page) ((unsigned long)((page) - mem_map))
#define pfn_valid(pfn) ((pfn) < end_pfn)
#endif
#endif /* __KERNEL__ */
+#include <asm-generic/memory_model.h>
#include <asm-generic/page.h>
#endif /* _X86_64_PAGE_H */
free_page((unsigned long)pud);
}
+static inline void pgd_list_add(pgd_t *pgd)
+{
+ struct page *page = virt_to_page(pgd);
+
+ spin_lock(&pgd_lock);
+ page->index = (pgoff_t)pgd_list;
+ if (pgd_list)
+ pgd_list->private = (unsigned long)&page->index;
+ pgd_list = page;
+ page->private = (unsigned long)&pgd_list;
+ spin_unlock(&pgd_lock);
+}
+
+static inline void pgd_list_del(pgd_t *pgd)
+{
+ struct page *next, **pprev, *page = virt_to_page(pgd);
+
+ spin_lock(&pgd_lock);
+ next = (struct page *)page->index;
+ pprev = (struct page **)page->private;
+ *pprev = next;
+ if (next)
+ next->private = (unsigned long)pprev;
+ spin_unlock(&pgd_lock);
+}
+
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
/*
*/
unsigned boundary;
pgd_t *pgd = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_REPEAT, 1);
-
if (!pgd)
return NULL;
+ pgd_list_add(pgd);
/*
* Copy kernel pointers in from init.
* Could keep a freelist or slab cache of those because the kernel
0));
}
+ pgd_list_del(pgd);
free_pages((unsigned long)pgd, 1);
}
static inline int pte_young(pte_t pte) { return __pte_val(pte) & _PAGE_ACCESSED; }
static inline int pte_write(pte_t pte) { return __pte_val(pte) & _PAGE_RW; }
static inline int pte_file(pte_t pte) { return __pte_val(pte) & _PAGE_FILE; }
-static inline int pte_huge(pte_t pte) { return (__pte_val(pte) & __LARGE_PTE) == __LARGE_PTE; }
+static inline int pte_huge(pte_t pte) { return __pte_val(pte) & _PAGE_PSE; }
static inline pte_t pte_rdprotect(pte_t pte) { __pte_val(pte) &= ~_PAGE_USER; return pte; }
static inline pte_t pte_exprotect(pte_t pte) { __pte_val(pte) &= ~_PAGE_USER; return pte; }
static inline pte_t pte_mkdirty(pte_t pte) { __pte_val(pte) |= _PAGE_DIRTY; return pte; }
static inline pte_t pte_mkyoung(pte_t pte) { __pte_val(pte) |= _PAGE_ACCESSED; return pte; }
static inline pte_t pte_mkwrite(pte_t pte) { __pte_val(pte) |= _PAGE_RW; return pte; }
-static inline pte_t pte_mkhuge(pte_t pte) { __pte_val(pte) |= __LARGE_PTE; return pte; }
+static inline pte_t pte_mkhuge(pte_t pte) { __pte_val(pte) |= _PAGE_PSE; return pte; }
struct vm_area_struct;
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+extern spinlock_t pgd_lock;
+extern struct page *pgd_list;
+void vmalloc_sync_all(void);
+
#endif /* !__ASSEMBLY__ */
extern int kern_addr_valid(unsigned long addr);
#include <asm/mmsegment.h>
#include <asm/percpu.h>
#include <linux/personality.h>
+#include <linux/cpumask.h>
#define TF_MASK 0x00000100
#define IF_MASK 0x00000200
__u32 x86_power;
__u32 extended_cpuid_level; /* Max extended CPUID function supported */
unsigned long loops_per_jiffy;
+#ifdef CONFIG_SMP
+ cpumask_t llc_shared_map; /* cpus sharing the last level cache */
+#endif
__u8 apicid;
__u8 booted_cores; /* number of cores as seen by OS */
} ____cacheline_aligned;
struct extended_signature sigs[0];
};
-/* '6' because it used to be for P6 only (but now covers Pentium 4 as well) */
-#define MICROCODE_IOCFREE _IO('6',0)
-
#define ASM_NOP1 K8_NOP1
#define ASM_NOP2 K8_NOP2
extern cpumask_t cpu_core_map[NR_CPUS];
extern int phys_proc_id[NR_CPUS];
extern int cpu_core_id[NR_CPUS];
+extern u8 cpu_llc_id[NR_CPUS];
#define SMP_TRAMPOLINE_BASE 0x6000
[pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \
: "memory", "cc" __EXTRA_CLOBBER)
-
-extern void load_gs_index(unsigned);
+extern void load_gs_index(unsigned);
/*
* Load a segment. Fall back on loading the zero
".previous" \
: :"r" (value), "r" (0))
-#define set_debug(value,register) \
- __asm__("movq %0,%%db" #register \
- : /* no output */ \
- :"r" ((unsigned long) value))
-
-
#ifdef __KERNEL__
struct alt_instr {
__u8 *instr; /* original instruction */
static inline unsigned long mach_get_cmos_time(void)
{
unsigned int year, mon, day, hour, min, sec;
- int i;
- /* The Linux interpretation of the CMOS clock register contents:
- * When the Update-In-Progress (UIP) flag goes from 1 to 0, the
- * RTC registers show the second which has precisely just started.
- * Let's hope other operating systems interpret the RTC the same way.
- */
- /* read RTC exactly on falling edge of update flag */
- for (i = 0 ; i < 1000000 ; i++) /* may take up to 1 second... */
- if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP)
- break;
- for (i = 0 ; i < 1000000 ; i++) /* must try at least 2.228 ms */
- if (!(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP))
- break;
- do { /* Isn't this overkill ? UIP above should guarantee consistency */
+ do {
sec = CMOS_READ(RTC_SECONDS);
min = CMOS_READ(RTC_MINUTES);
hour = CMOS_READ(RTC_HOURS);
mon = CMOS_READ(RTC_MONTH);
year = CMOS_READ(RTC_YEAR);
} while (sec != CMOS_READ(RTC_SECONDS));
- if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
- {
- BCD_TO_BIN(sec);
- BCD_TO_BIN(min);
- BCD_TO_BIN(hour);
- BCD_TO_BIN(day);
- BCD_TO_BIN(mon);
- BCD_TO_BIN(year);
- }
- if ((year += 1900) < 1970)
+
+ if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
+ BCD_TO_BIN(sec);
+ BCD_TO_BIN(min);
+ BCD_TO_BIN(hour);
+ BCD_TO_BIN(day);
+ BCD_TO_BIN(mon);
+ BCD_TO_BIN(year);
+ }
+
+ year += 1900;
+ if (year < 1970)
year += 100;
return mktime(year, mon, day, hour, min, sec);
__GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
__GFP_NOMEMALLOC|__GFP_HARDWALL)
+/* This equals 0, but use constants in case they ever change */
+#define GFP_NOWAIT (GFP_ATOMIC & ~__GFP_HIGH)
/* GFP_ATOMIC means both !wait (__GFP_WAIT not set) and use emergency pool */
#define GFP_ATOMIC (__GFP_HIGH)
#define GFP_NOIO (__GFP_WAIT)
#include <asm/cacheflush.h>
+#ifndef ARCH_HAS_FLUSH_ANON_PAGE
+static inline void flush_anon_page(struct page *page, unsigned long vmaddr)
+{
+}
+#endif
+
+#ifndef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
+static inline void flush_kernel_dcache_page(struct page *page)
+{
+}
+#endif
+
#ifdef CONFIG_HIGHMEM
#include <asm/highmem.h>
*
* Also, many kernel routines increase the page count before a critical
* routine so they can be sure the page doesn't go away from under them.
- *
- * Since 2.6.6 (approx), a free page has ->_count = -1. This is so that we
- * can use atomic_add_negative(-1, page->_count) to detect when the page
- * becomes free and so that we can also use atomic_inc_and_test to atomically
- * detect when we just tried to grab a ref on a page which some other CPU has
- * already deemed to be freeable.
- *
- * NO code should make assumptions about this internal detail! Use the provided
- * macros which retain the old rules: page_count(page) == 0 is a free page.
*/
/*
* Drop a ref, return true if the logical refcount fell to zero (the page has
* no users)
*/
-#define put_page_testzero(p) \
- ({ \
- BUG_ON(atomic_read(&(p)->_count) == -1);\
- atomic_add_negative(-1, &(p)->_count); \
- })
+static inline int put_page_testzero(struct page *page)
+{
+ BUG_ON(atomic_read(&page->_count) == 0);
+ return atomic_dec_and_test(&page->_count);
+}
/*
- * Grab a ref, return true if the page previously had a logical refcount of
- * zero. ie: returns true if we just grabbed an already-deemed-to-be-free page
+ * Try to grab a ref unless the page has a refcount of zero, return false if
+ * that is the case.
*/
-#define get_page_testone(p) atomic_inc_and_test(&(p)->_count)
-
-#define set_page_count(p,v) atomic_set(&(p)->_count, (v) - 1)
-#define __put_page(p) atomic_dec(&(p)->_count)
+static inline int get_page_unless_zero(struct page *page)
+{
+ return atomic_inc_not_zero(&page->_count);
+}
extern void FASTCALL(__page_cache_release(struct page *));
static inline int page_count(struct page *page)
{
- if (PageCompound(page))
+ if (unlikely(PageCompound(page)))
page = (struct page *)page_private(page);
- return atomic_read(&page->_count) + 1;
+ return atomic_read(&page->_count);
}
static inline void get_page(struct page *page)
atomic_inc(&page->_count);
}
+/*
+ * Setup the page count before being freed into the page allocator for
+ * the first time (boot or memory hotplug)
+ */
+static inline void init_page_count(struct page *page)
+{
+ atomic_set(&page->_count, 1);
+}
+
void put_page(struct page *page);
+void split_page(struct page *page, unsigned int order);
+
/*
* Multiple processes may "see" the same page. E.g. for untouched
* mappings of /dev/null, all processes see the same page full of
int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *,
void __user *, size_t *, loff_t *);
-int shrink_slab(unsigned long scanned, gfp_t gfp_mask,
+unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
unsigned long lru_pages);
void drop_pagecache(void);
void drop_slab(void);
--- /dev/null
+#ifndef _LINUX_PFN_H_
+#define _LINUX_PFN_H_
+
+#define PFN_ALIGN(x) (((unsigned long long)(x) + (PAGE_SIZE - 1)) & PAGE_MASK)
+#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
+#define PFN_DOWN(x) ((x) >> PAGE_SHIFT)
+#define PFN_PHYS(x) ((unsigned long long)(x) << PAGE_SHIFT)
+
+#endif
void (*destructor)(struct sk_buff *skb);
#ifdef CONFIG_NETFILTER
- __u32 nfmark;
struct nf_conntrack *nfct;
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
struct sk_buff *nfct_reasm;
#ifdef CONFIG_BRIDGE_NETFILTER
struct nf_bridge_info *nf_bridge;
#endif
+ __u32 nfmark;
#endif /* CONFIG_NETFILTER */
#ifdef CONFIG_NET_SCHED
__u16 tc_index; /* traffic control index */
#include <asm/system.h>
+extern void kfree_skb(struct sk_buff *skb);
extern void __kfree_skb(struct sk_buff *skb);
extern struct sk_buff *__alloc_skb(unsigned int size,
gfp_t priority, int fclone);
void *here);
extern void skb_under_panic(struct sk_buff *skb, int len,
void *here);
+extern void skb_truesize_bug(struct sk_buff *skb);
+
+static inline void skb_truesize_check(struct sk_buff *skb)
+{
+ if (unlikely((int)skb->truesize < sizeof(struct sk_buff) + skb->len))
+ skb_truesize_bug(skb);
+}
extern int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
int getfrag(void *from, char *to, int offset,
* atomic change.
*/
-/**
- * kfree_skb - free an sk_buff
- * @skb: buffer to free
- *
- * Drop a reference to the buffer and free it if the usage count has
- * hit zero.
- */
-static inline void kfree_skb(struct sk_buff *skb)
-{
- if (likely(atomic_read(&skb->users) == 1))
- smp_rmb();
- else if (likely(!atomic_dec_and_test(&skb->users)))
- return;
- __kfree_skb(skb);
-}
-
/**
* skb_cloned - is the buffer a clone
* @skb: buffer to check
#define NET_IP_ALIGN 2
#endif
-extern int ___pskb_trim(struct sk_buff *skb, unsigned int len);
+/*
+ * The networking layer reserves some headroom in skb data (via
+ * dev_alloc_skb). This is used to avoid having to reallocate skb data when
+ * the header has to grow. In the default case, if the header has to grow
+ * 16 bytes or less we avoid the reallocation.
+ *
+ * Unfortunately this headroom changes the DMA alignment of the resulting
+ * network packet. As for NET_IP_ALIGN, this unaligned DMA is expensive
+ * on some architectures. An architecture can override this value,
+ * perhaps setting it to a cacheline in size (since that will maintain
+ * cacheline alignment of the DMA). It must be a power of 2.
+ *
+ * Various parts of the networking layer expect at least 16 bytes of
+ * headroom, you should not reduce this.
+ */
+#ifndef NET_SKB_PAD
+#define NET_SKB_PAD 16
+#endif
+
+extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc);
static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
{
- if (unlikely(skb->data_len)) {
- WARN_ON(1);
- return;
- }
- skb->len = len;
- skb->tail = skb->data + len;
+ if (!skb->data_len) {
+ skb->len = len;
+ skb->tail = skb->data + len;
+ } else
+ ___pskb_trim(skb, len, 0);
}
/**
*
* Cut the length of a buffer down by removing data from the tail. If
* the buffer is already under the length specified it is not modified.
- * The skb must be linear.
*/
static inline void skb_trim(struct sk_buff *skb, unsigned int len)
{
static inline int __pskb_trim(struct sk_buff *skb, unsigned int len)
{
- if (skb->data_len)
- return ___pskb_trim(skb, len);
- __skb_trim(skb, len);
- return 0;
+ if (!skb->data_len) {
+ skb->len = len;
+ skb->tail = skb->data+len;
+ return 0;
+ }
+ return ___pskb_trim(skb, len, 1);
}
static inline int pskb_trim(struct sk_buff *skb, unsigned int len)
static inline struct sk_buff *__dev_alloc_skb(unsigned int length,
gfp_t gfp_mask)
{
- struct sk_buff *skb = alloc_skb(length + 16, gfp_mask);
+ struct sk_buff *skb = alloc_skb(length + NET_SKB_PAD, gfp_mask);
if (likely(skb))
- skb_reserve(skb, 16);
+ skb_reserve(skb, NET_SKB_PAD);
return skb;
}
#else
*/
static inline int skb_cow(struct sk_buff *skb, unsigned int headroom)
{
- int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb);
+ int delta = (headroom > NET_SKB_PAD ? headroom : NET_SKB_PAD) -
+ skb_headroom(skb);
if (delta < 0)
delta = 0;
if (delta || skb_cloned(skb))
- return pskb_expand_head(skb, (delta + 15) & ~15, 0, GFP_ATOMIC);
+ return pskb_expand_head(skb, (delta + (NET_SKB_PAD-1)) &
+ ~(NET_SKB_PAD-1), 0, GFP_ATOMIC);
return 0;
}
*/
static inline void skb_postpull_rcsum(struct sk_buff *skb,
- const void *start, int len)
+ const void *start, unsigned int len)
{
if (skb->ip_summed == CHECKSUM_HW)
skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0));
}
+unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len);
+
/**
* pskb_trim_rcsum - trim received skb and update checksum
* @skb: buffer to trim
kfree_skb(skb);
}
#endif
-static inline void nf_reset(struct sk_buff *skb)
-{
- nf_conntrack_put(skb->nfct);
- skb->nfct = NULL;
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
- nf_conntrack_put_reasm(skb->nfct_reasm);
- skb->nfct_reasm = NULL;
-#endif
-}
-
#ifdef CONFIG_BRIDGE_NETFILTER
static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge)
{
atomic_inc(&nf_bridge->use);
}
#endif /* CONFIG_BRIDGE_NETFILTER */
+static inline void nf_reset(struct sk_buff *skb)
+{
+ nf_conntrack_put(skb->nfct);
+ skb->nfct = NULL;
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+ nf_conntrack_put_reasm(skb->nfct_reasm);
+ skb->nfct_reasm = NULL;
+#endif
+#ifdef CONFIG_BRIDGE_NETFILTER
+ nf_bridge_put(skb->nf_bridge);
+ skb->nf_bridge = NULL;
+#endif
+}
+
#else /* CONFIG_NETFILTER */
static inline void nf_reset(struct sk_buff *skb) {}
#endif /* CONFIG_NETFILTER */
#endif
/* SLAB cache for signal_struct structures (tsk->signal) */
-kmem_cache_t *signal_cachep;
+static kmem_cache_t *signal_cachep;
/* SLAB cache for sighand_struct structures (tsk->sighand) */
kmem_cache_t *sighand_cachep;
}
EXPORT_SYMBOL(free_task);
-void __put_task_struct_cb(struct rcu_head *rhp)
+void __put_task_struct(struct task_struct *tsk)
{
- struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
-
WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE)));
WARN_ON(atomic_read(&tsk->usage));
WARN_ON(tsk == current);
- if (unlikely(tsk->audit_context))
- audit_free(tsk);
security_task_free(tsk);
free_uid(tsk->user);
put_group_info(tsk->group_info);
/* One for us, one for whoever does the "release_task()" (usually parent) */
atomic_set(&tsk->usage,2);
atomic_set(&tsk->fs_excl, 0);
+ tsk->btrace_seq = 0;
+ tsk->splice_pipe = NULL;
return tsk;
}
atomic_set(&newf->count, 1);
spin_lock_init(&newf->file_lock);
+ newf->next_fd = 0;
fdt = &newf->fdtab;
- fdt->next_fd = 0;
fdt->max_fds = NR_OPEN_DEFAULT;
- fdt->max_fdset = __FD_SETSIZE;
- fdt->close_on_exec = &newf->close_on_exec_init;
- fdt->open_fds = &newf->open_fds_init;
+ fdt->max_fdset = EMBEDDED_FD_SET_SIZE;
+ fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init;
+ fdt->open_fds = (fd_set *)&newf->open_fds_init;
fdt->fd = &newf->fd_array[0];
INIT_RCU_HEAD(&fdt->rcu);
fdt->free_files = NULL;
struct files_struct *files = current->files;
int rc;
- if(!files)
- BUG();
+ BUG_ON(!files);
/* This can race but the race causes us to copy when we don't
need to and drop the copy */
EXPORT_SYMBOL(unshare_files);
-void sighand_free_cb(struct rcu_head *rhp)
-{
- struct sighand_struct *sp;
-
- sp = container_of(rhp, struct sighand_struct, rcu);
- kmem_cache_free(sighand_cachep, sp);
-}
-
static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
{
struct sighand_struct *sig;
rcu_assign_pointer(tsk->sighand, sig);
if (!sig)
return -ENOMEM;
- spin_lock_init(&sig->siglock);
atomic_set(&sig->count, 1);
memcpy(sig->action, current->sighand->action, sizeof(sig->action));
return 0;
}
+void __cleanup_sighand(struct sighand_struct *sighand)
+{
+ if (atomic_dec_and_test(&sighand->count))
+ kmem_cache_free(sighand_cachep, sighand);
+}
+
static inline int copy_signal(unsigned long clone_flags, struct task_struct * tsk)
{
struct signal_struct *sig;
hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_REL);
sig->it_real_incr.tv64 = 0;
sig->real_timer.function = it_real_fn;
- sig->real_timer.data = tsk;
+ sig->tsk = tsk;
sig->it_virt_expires = cputime_zero;
sig->it_virt_incr = cputime_zero;
return 0;
}
+void __cleanup_signal(struct signal_struct *sig)
+{
+ exit_thread_group_keys(sig);
+ kmem_cache_free(signal_cachep, sig);
+}
+
+static inline void cleanup_signal(struct task_struct *tsk)
+{
+ struct signal_struct *sig = tsk->signal;
+
+ atomic_dec(&sig->live);
+
+ if (atomic_dec_and_test(&sig->count))
+ __cleanup_signal(sig);
+}
+
static inline void copy_flags(unsigned long clone_flags, struct task_struct *p)
{
unsigned long new_flags = p->flags;
p->mempolicy = NULL;
goto bad_fork_cleanup_cpuset;
}
+ mpol_fix_fork_child_flag(p);
#endif
#ifdef CONFIG_DEBUG_MUTEXES
* Clear TID on mm_release()?
*/
p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
-
+ p->robust_list = NULL;
+#ifdef CONFIG_COMPAT
+ p->compat_robust_list = NULL;
+#endif
/*
* sigaltstack should be cleared when sharing the same VM
*/
* We dont wake it up yet.
*/
p->group_leader = p;
+ INIT_LIST_HEAD(&p->thread_group);
INIT_LIST_HEAD(&p->ptrace_children);
INIT_LIST_HEAD(&p->ptrace_list);
!cpu_online(task_cpu(p))))
set_task_cpu(p, smp_processor_id());
- /*
- * Check for pending SIGKILL! The new thread should not be allowed
- * to slip out of an OOM kill. (or normal SIGKILL.)
- */
- if (sigismember(¤t->pending.signal, SIGKILL)) {
- write_unlock_irq(&tasklist_lock);
- retval = -EINTR;
- goto bad_fork_cleanup_namespace;
- }
-
/* CLONE_PARENT re-uses the old parent */
if (clone_flags & (CLONE_PARENT|CLONE_THREAD))
p->real_parent = current->real_parent;
p->parent = p->real_parent;
spin_lock(¤t->sighand->siglock);
+
+ /*
+ * Process group and session signals need to be delivered to just the
+ * parent before the fork or both the parent and the child after the
+ * fork. Restart if a signal comes in before we add the new process to
+ * it's process group.
+ * A fatal signal pending means that current will exit, so the new
+ * thread can't slip out of an OOM kill (or normal SIGKILL).
+ */
+ recalc_sigpending();
+ if (signal_pending(current)) {
+ spin_unlock(¤t->sighand->siglock);
+ write_unlock_irq(&tasklist_lock);
+ retval = -ERESTARTNOINTR;
+ goto bad_fork_cleanup_namespace;
+ }
+
if (clone_flags & CLONE_THREAD) {
/*
* Important: if an exit-all has been started then
retval = -EAGAIN;
goto bad_fork_cleanup_namespace;
}
- p->group_leader = current->group_leader;
- if (current->signal->group_stop_count > 0) {
- /*
- * There is an all-stop in progress for the group.
- * We ourselves will stop as soon as we check signals.
- * Make the new thread part of that group stop too.
- */
- current->signal->group_stop_count++;
- set_tsk_thread_flag(p, TIF_SIGPENDING);
- }
+ p->group_leader = current->group_leader;
+ list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
if (!cputime_eq(current->signal->it_virt_expires,
cputime_zero) ||
*/
p->ioprio = current->ioprio;
- SET_LINKS(p);
- if (unlikely(p->ptrace & PT_PTRACED))
- __ptrace_link(p, current->parent);
-
- if (thread_group_leader(p)) {
- p->signal->tty = current->signal->tty;
- p->signal->pgrp = process_group(current);
- p->signal->session = current->signal->session;
- attach_pid(p, PIDTYPE_PGID, process_group(p));
- attach_pid(p, PIDTYPE_SID, p->signal->session);
- if (p->pid)
+ if (likely(p->pid)) {
+ add_parent(p);
+ if (unlikely(p->ptrace & PT_PTRACED))
+ __ptrace_link(p, current->parent);
+
+ if (thread_group_leader(p)) {
+ p->signal->tty = current->signal->tty;
+ p->signal->pgrp = process_group(current);
+ p->signal->session = current->signal->session;
+ attach_pid(p, PIDTYPE_PGID, process_group(p));
+ attach_pid(p, PIDTYPE_SID, p->signal->session);
+
+ list_add_tail_rcu(&p->tasks, &init_task.tasks);
__get_cpu_var(process_counts)++;
+ }
+ attach_pid(p, PIDTYPE_PID, p->pid);
+ nr_threads++;
}
- attach_pid(p, PIDTYPE_TGID, p->tgid);
- attach_pid(p, PIDTYPE_PID, p->pid);
- nr_threads++;
total_forks++;
spin_unlock(¤t->sighand->siglock);
write_unlock_irq(&tasklist_lock);
if (p->mm)
mmput(p->mm);
bad_fork_cleanup_signal:
- exit_signal(p);
+ cleanup_signal(p);
bad_fork_cleanup_sighand:
- exit_sighand(p);
+ __cleanup_sighand(p->sighand);
bad_fork_cleanup_fs:
exit_fs(p); /* blocking */
bad_fork_cleanup_files:
if (!task)
return ERR_PTR(-ENOMEM);
init_idle(task, cpu);
- unhash_process(task);
+
return task;
}
{
struct task_struct *p;
int trace = 0;
- long pid = alloc_pidmap();
+ struct pid *pid = alloc_pid();
+ long nr;
- if (pid < 0)
+ if (!pid)
return -EAGAIN;
+ nr = pid->nr;
if (unlikely(current->ptrace)) {
trace = fork_traceflag (clone_flags);
if (trace)
clone_flags |= CLONE_PTRACE;
}
- p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid);
+ p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, nr);
/*
* Do this prior waking up the new thread - the thread pointer
* might get invalid after that point, if the thread exits quickly.
p->state = TASK_STOPPED;
if (unlikely (trace)) {
- current->ptrace_message = pid;
+ current->ptrace_message = nr;
ptrace_notify ((trace << 8) | SIGTRAP);
}
ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
}
} else {
- free_pidmap(pid);
- pid = PTR_ERR(p);
+ free_pid(pid);
+ nr = PTR_ERR(p);
}
- return pid;
+ return nr;
}
#ifndef ARCH_MIN_MMSTRUCT_ALIGN
#define ARCH_MIN_MMSTRUCT_ALIGN 0
#endif
+static void sighand_ctor(void *data, kmem_cache_t *cachep, unsigned long flags)
+{
+ struct sighand_struct *sighand = data;
+
+ if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) ==
+ SLAB_CTOR_CONSTRUCTOR)
+ spin_lock_init(&sighand->siglock);
+}
+
void __init proc_caches_init(void)
{
sighand_cachep = kmem_cache_create("sighand_cache",
sizeof(struct sighand_struct), 0,
- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU,
+ sighand_ctor, NULL);
signal_cachep = kmem_cache_create("signal_cache",
sizeof(struct signal_struct), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
check_unshare_flags(&unshare_flags);
+ /* Return -EINVAL for all unsupported flags */
+ err = -EINVAL;
+ if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
+ CLONE_VM|CLONE_FILES|CLONE_SYSVSEM))
+ goto bad_unshare_out;
+
if ((err = unshare_thread(unshare_flags)))
goto bad_unshare_out;
if ((err = unshare_fs(unshare_flags, &new_fs)))
idr.o div64.o int_sqrt.o bitmap.o extable.o prio_tree.o \
sha1.o
+lib-$(CONFIG_SMP) += cpumask.o
+
lib-y += kobject.o kref.o kobject_uevent.o klist.o
obj-y += sort.o parser.o halfmd4.o iomap_copy.o
lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
lib-$(CONFIG_SEMAPHORE_SLEEPERS) += semaphore-sleepers.o
lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o
+lib-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o
obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
# eventually, we can have this option just 'select SPARSEMEM'
config MEMORY_HOTPLUG
bool "Allow for memory hot-add"
- depends on SPARSEMEM && HOTPLUG && !SOFTWARE_SUSPEND && ARCH_ENABLE_MEMORY_HOTPLUG
+ depends on SPARSEMEM && HOTPLUG && !SOFTWARE_SUSPEND
comment "Memory hotplug is currently incompatible with Software Suspend"
depends on SPARSEMEM && HOTPLUG && SOFTWARE_SUSPEND
# support for page migration
#
config MIGRATION
- def_bool y if NUMA || SPARSEMEM || DISCONTIGMEM
- depends on SWAP
+ bool "Page migration"
+ def_bool y if NUMA
+ depends on SWAP && NUMA
+ help
+ Allows the migration of the physical location of pages of processes
+ while the virtual addresses are not changed. This is useful for
+ example on NUMA systems to put pages nearer to the processors accessing
+ the page.
#include <linux/init.h>
#include <linux/hash.h>
#include <linux/highmem.h>
+#include <linux/blktrace_api.h>
#include <asm/tlbflush.h>
static mempool_t *page_pool, *isa_page_pool;
-static void *page_pool_alloc_isa(gfp_t gfp_mask, void *data)
+static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data)
{
- return alloc_page(gfp_mask | GFP_DMA);
-}
-
-static void page_pool_free(void *page, void *data)
-{
- __free_page(page);
+ return mempool_alloc_pages(gfp_mask | GFP_DMA, data);
}
/*
*/
#ifdef CONFIG_HIGHMEM
-static void *page_pool_alloc(gfp_t gfp_mask, void *data)
-{
- return alloc_page(gfp_mask);
-}
-
static int pkmap_count[LAST_PKMAP];
static unsigned int last_pkmap_nr;
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock);
pkmap_count[i] = 0;
/* sanity check */
- if (pte_none(pkmap_page_table[i]))
- BUG();
+ BUG_ON(pte_none(pkmap_page_table[i]));
/*
* Don't need an atomic fetch-and-clear op here;
if (!vaddr)
vaddr = map_new_virtual(page);
pkmap_count[PKMAP_NR(vaddr)]++;
- if (pkmap_count[PKMAP_NR(vaddr)] < 2)
- BUG();
+ BUG_ON(pkmap_count[PKMAP_NR(vaddr)] < 2);
spin_unlock(&kmap_lock);
return (void*) vaddr;
}
spin_lock(&kmap_lock);
vaddr = (unsigned long)page_address(page);
- if (!vaddr)
- BUG();
+ BUG_ON(!vaddr);
nr = PKMAP_NR(vaddr);
/*
if (!i.totalhigh)
return 0;
- page_pool = mempool_create(POOL_SIZE, page_pool_alloc, page_pool_free, NULL);
- if (!page_pool)
- BUG();
+ page_pool = mempool_create_page_pool(POOL_SIZE, 0);
+ BUG_ON(!page_pool);
printk("highmem bounce pool size: %d pages\n", POOL_SIZE);
return 0;
if (isa_page_pool)
return 0;
- isa_page_pool = mempool_create(ISA_POOL_SIZE, page_pool_alloc_isa, page_pool_free, NULL);
- if (!isa_page_pool)
- BUG();
+ isa_page_pool = mempool_create(ISA_POOL_SIZE, mempool_alloc_pages_isa,
+ mempool_free_pages, (void *) 0);
+ BUG_ON(!isa_page_pool);
printk("isa bounce pool size: %d pages\n", ISA_POOL_SIZE);
return 0;
bio_put(bio);
}
-static int bounce_end_io_write(struct bio *bio, unsigned int bytes_done,int err)
+static int bounce_end_io_write(struct bio *bio, unsigned int bytes_done, int err)
{
if (bio->bi_size)
return 1;
}
static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig,
- mempool_t *pool)
+ mempool_t *pool)
{
struct page *page;
struct bio *bio = NULL;
pool = isa_page_pool;
}
+ blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE);
+
/*
* slow path
*/
static int __init disable_randmaps(char *s)
{
randomize_va_space = 0;
- return 0;
+ return 1;
}
__setup("norandmaps", disable_randmaps);
anon_vma_unlink(vma);
unlink_file_vma(vma);
- if (is_hugepage_only_range(vma->vm_mm, addr, HPAGE_SIZE)) {
+ if (is_vm_hugetlb_page(vma)) {
hugetlb_free_pgd_range(tlb, addr, vma->vm_end,
floor, next? next->vm_start: ceiling);
} else {
* Optimization: gather nearby vmas into one call down
*/
while (next && next->vm_start <= vma->vm_end + PMD_SIZE
- && !is_hugepage_only_range(vma->vm_mm, next->vm_start,
- HPAGE_SIZE)) {
+ && !is_vm_hugetlb_page(next)) {
vma = next;
next = vma->vm_next;
anon_vma_unlink(vma);
{
unsigned long pfn = pte_pfn(pte);
- if (vma->vm_flags & VM_PFNMAP) {
+ if (unlikely(vma->vm_flags & VM_PFNMAP)) {
unsigned long off = (addr - vma->vm_start) >> PAGE_SHIFT;
if (pfn == vma->vm_pgoff + off)
return NULL;
* we should just do "return pfn_to_page(pfn)", but
* in the meantime we check that we get a valid pfn,
* and that the resulting page looks ok.
- *
- * Remove this test eventually!
*/
if (unlikely(!pfn_valid(pfn))) {
if (!(vma->vm_flags & VM_RESERVED))
}
if (pages) {
pages[i] = page;
+
+ flush_anon_page(page, start);
flush_dcache_page(page);
}
if (vmas)
* The page has to be a nice clean _individual_ kernel allocation.
* If you allocate a compound page, you need to have marked it as
* such (__GFP_COMP), or manually just split the page up yourself
- * (which is mainly an issue of doing "set_page_count(page, 1)" for
- * each sub-page, and then freeing them one by one when you free
- * them rather than freeing it as a compound page).
+ * (see split_page()).
*
* NOTE! Traditionally this was done with "remap_pfn_range()" which
* took an arbitrary page protection parameter. This doesn't allow
if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE))
memset(kaddr, 0, PAGE_SIZE);
kunmap_atomic(kaddr, KM_USER0);
- flush_dcache_page(dst);
return;
}
if (!vma)
return -1;
write = (vma->vm_flags & VM_WRITE) != 0;
- if (addr >= end)
- BUG();
- if (end > vma->vm_end)
- BUG();
+ BUG_ON(addr >= end);
+ BUG_ON(end > vma->vm_end);
len = (end+PAGE_SIZE-1)/PAGE_SIZE-addr/PAGE_SIZE;
ret = get_user_pages(current, current->mm, addr,
len, write, 0, NULL, NULL);
#include <asm/cacheflush.h>
#include <asm/tlb.h>
-#ifndef arch_mmap_check
-#define arch_mmap_check(addr, len, flags) (0)
-#endif
-
static void unmap_region(struct mm_struct *mm,
struct vm_area_struct *vma, struct vm_area_struct *prev,
unsigned long start, unsigned long end);
* only call if we're about to fail.
*/
n = nr_free_pages();
+
+ /*
+ * Leave reserved pages. The pages are not for anonymous pages.
+ */
+ if (n <= totalreserve_pages)
+ goto error;
+ else
+ n -= totalreserve_pages;
+
+ /*
+ * Leave the last 3% for root
+ */
if (!cap_sys_admin)
n -= n / 32;
free += n;
if (free > pages)
return 0;
- vm_unacct_memory(pages);
- return -ENOMEM;
+
+ goto error;
}
allowed = (totalram_pages - hugetlb_total_pages())
*/
if (atomic_read(&vm_committed_space) < (long)allowed)
return 0;
-
+error:
vm_unacct_memory(pages);
return -ENOMEM;
if (brk < mm->end_code)
goto out;
+
+ /*
+ * Check against rlimit here. If this check is done later after the test
+ * of oldbrk with newbrk then it can escape the test and let the data
+ * segment grow beyond its set limit the in case where the limit is
+ * not page aligned -Ram Gupta
+ */
+ rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
+ if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
+ goto out;
+
newbrk = PAGE_ALIGN(brk);
oldbrk = PAGE_ALIGN(mm->brk);
if (oldbrk == newbrk)
goto out;
}
- /* Check against rlimit.. */
- rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
- if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
- goto out;
-
/* Check against existing mmap mappings. */
if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
goto out;
i = browse_rb(&mm->mm_rb);
if (i != mm->map_count)
printk("map_count %d rb %d\n", mm->map_count, i), bug = 1;
- if (bug)
- BUG();
+ BUG_ON(bug);
}
#else
#define validate_mm(mm) do { } while (0)
struct rb_node ** rb_link, * rb_parent;
__vma = find_vma_prepare(mm, vma->vm_start,&prev, &rb_link, &rb_parent);
- if (__vma && __vma->vm_start < vma->vm_end)
- BUG();
+ BUG_ON(__vma && __vma->vm_start < vma->vm_end);
__vma_link(mm, vma, prev, rb_link, rb_parent);
mm->map_count++;
}
* If the vma has a ->close operation then the driver probably needs to release
* per-vma resources, so we don't attempt to merge those.
*/
-#define VM_SPECIAL (VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
+#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
static inline int is_mergeable_vma(struct vm_area_struct *vma,
struct file *file, unsigned long vm_flags)
* (e.g. stash info in next's anon_vma_node when assigning
* an anon_vma, or when trying vma_merge). Another time.
*/
- if (find_vma_prev(vma->vm_mm, vma->vm_start, &near) != vma)
- BUG();
+ BUG_ON(find_vma_prev(vma->vm_mm, vma->vm_start, &near) != vma);
if (!near)
goto none;
const unsigned long stack_flags
= VM_STACK_FLAGS & (VM_GROWSUP|VM_GROWSDOWN);
-#ifdef CONFIG_HUGETLB
- if (flags & VM_HUGETLB) {
- if (!(flags & VM_DONTCOPY))
- mm->shared_vm += pages;
- return;
- }
-#endif /* CONFIG_HUGETLB */
-
if (file) {
mm->shared_vm += pages;
if ((flags & (VM_EXEC|VM_WRITE)) == VM_EXEC)
if (!len)
return -EINVAL;
- error = arch_mmap_check(addr, len, flags);
- if (error)
- return error;
-
/* Careful about overflows.. */
len = PAGE_ALIGN(len);
if (!len || len > TASK_SIZE)
* specific mapper. the address has already been validated, but
* not unmapped, but the maps are removed from the list.
*/
- vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
if (!vma) {
error = -ENOMEM;
goto unacct_error;
}
- memset(vma, 0, sizeof(*vma));
vma->vm_mm = mm;
vma->vm_start = addr;
unsigned long flags;
struct rb_node ** rb_link, * rb_parent;
pgoff_t pgoff = addr >> PAGE_SHIFT;
- int error;
len = PAGE_ALIGN(len);
if (!len)
if ((addr + len) > TASK_SIZE || (addr + len) < addr)
return -EINVAL;
- flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
-
- error = arch_mmap_check(addr, len, flags);
- if (error)
- return error;
-
/*
* mlock MCL_FUTURE?
*/
if (security_vm_enough_memory(len >> PAGE_SHIFT))
return -ENOMEM;
+ flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
+
/* Can we just expand an old private anonymous mapping? */
if (vma_merge(mm, prev, addr, addr + len, flags,
NULL, NULL, pgoff, NULL))
/*
* create a vma struct for an anonymous mapping
*/
- vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
if (!vma) {
vm_unacct_memory(len >> PAGE_SHIFT);
return -ENOMEM;
}
- memset(vma, 0, sizeof(*vma));
vma->vm_mm = mm;
vma->vm_start = addr;
#include <linux/mempolicy.h>
#include <asm/tlbflush.h>
+#include <asm/div64.h>
#include "internal.h"
/*
EXPORT_SYMBOL(node_online_map);
nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL;
EXPORT_SYMBOL(node_possible_map);
-struct pglist_data *pgdat_list __read_mostly;
unsigned long totalram_pages __read_mostly;
unsigned long totalhigh_pages __read_mostly;
+unsigned long totalreserve_pages __read_mostly;
long nr_swap_pages;
int percpu_pagelist_fraction;
-static void fastcall free_hot_cold_page(struct page *page, int cold);
static void __free_pages_ok(struct page *page, unsigned int order);
/*
for (i = 0; i < nr_pages; i++) {
struct page *p = page + i;
- SetPageCompound(p);
+ __SetPageCompound(p);
set_page_private(p, (unsigned long)page);
}
}
if (unlikely(!PageCompound(p) |
(page_private(p) != (unsigned long)page)))
bad_page(page);
- ClearPageCompound(p);
+ __ClearPageCompound(p);
}
}
+static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
+{
+ int i;
+
+ BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM);
+ /*
+ * clear_highpage() will use KM_USER0, so it's a bug to use __GFP_ZERO
+ * and __GFP_HIGHMEM from hard or soft interrupt context.
+ */
+ BUG_ON((gfp_flags & __GFP_HIGHMEM) && in_interrupt());
+ for (i = 0; i < (1 << order); i++)
+ clear_highpage(page + i);
+}
+
/*
* function for dealing with page's order in buddy system.
* zone->lock is already acquired when we use these.
* So, we don't need atomic page->flags operations here.
*/
-static inline unsigned long page_order(struct page *page) {
+static inline unsigned long page_order(struct page *page)
+{
return page_private(page);
}
-static inline void set_page_order(struct page *page, int order) {
+static inline void set_page_order(struct page *page, int order)
+{
set_page_private(page, order);
__SetPageBuddy(page);
}
if (PageBuddy(page) && page_order(page) == order) {
BUG_ON(page_count(page) != 0);
- return 1;
+ return 1;
}
- return 0;
+ return 0;
}
/*
mutex_debug_check_no_locks_freed(page_address(page),
PAGE_SIZE<<order);
-#ifndef CONFIG_MMU
- for (i = 1 ; i < (1 << order) ; ++i)
- __put_page(page + i);
-#endif
-
for (i = 0 ; i < (1 << order) ; ++i)
reserved += free_pages_check(page + i);
if (reserved)
if (order == 0) {
__ClearPageReserved(page);
set_page_count(page, 0);
-
- free_hot_cold_page(page, 0);
+ set_page_refcounted(page);
+ __free_page(page);
} else {
- LIST_HEAD(list);
int loop;
+ prefetchw(page);
for (loop = 0; loop < BITS_PER_LONG; loop++) {
struct page *p = &page[loop];
- if (loop + 16 < BITS_PER_LONG)
- prefetchw(p + 16);
+ if (loop + 1 < BITS_PER_LONG)
+ prefetchw(p + 1);
__ClearPageReserved(p);
set_page_count(p, 0);
}
- arch_free_page(page, order);
-
- mod_page_state(pgfree, 1 << order);
-
- list_add(&page->lru, &list);
- kernel_map_pages(page, 1 << order, 0);
- free_pages_bulk(page_zone(page), 1, &list, order);
+ set_page_refcounted(page);
+ __free_pages(page, order);
}
}
/*
* This page is about to be returned from the page allocator
*/
-static int prep_new_page(struct page *page, int order)
+static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
{
if (unlikely(page_mapcount(page) |
(page->mapping != NULL) |
1 << PG_referenced | 1 << PG_arch_1 |
1 << PG_checked | 1 << PG_mappedtodisk);
set_page_private(page, 0);
- set_page_refs(page, order);
+ set_page_refcounted(page);
kernel_map_pages(page, 1 << order, 1);
+
+ if (gfp_flags & __GFP_ZERO)
+ prep_zero_page(page, order, gfp_flags);
+
+ if (order && (gfp_flags & __GFP_COMP))
+ prep_compound_page(page, order);
+
return 0;
}
/*
* Called from the slab reaper to drain pagesets on a particular node that
* belong to the currently executing processor.
+ * Note that this function must be called with the thread pinned to
+ * a single processor.
*/
void drain_node_pages(int nodeid)
{
int i, z;
unsigned long flags;
- local_irq_save(flags);
for (z = 0; z < MAX_NR_ZONES; z++) {
struct zone *zone = NODE_DATA(nodeid)->node_zones + z;
struct per_cpu_pageset *pset;
struct per_cpu_pages *pcp;
pcp = &pset->pcp[i];
- free_pages_bulk(zone, pcp->count, &pcp->list, 0);
- pcp->count = 0;
+ if (pcp->count) {
+ local_irq_save(flags);
+ free_pages_bulk(zone, pcp->count, &pcp->list, 0);
+ pcp->count = 0;
+ local_irq_restore(flags);
+ }
}
}
- local_irq_restore(flags);
}
#endif
free_hot_cold_page(page, 1);
}
-static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
+/*
+ * split_page takes a non-compound higher-order page, and splits it into
+ * n (1<<order) sub-pages: page[0..n]
+ * Each sub-page must be freed individually.
+ *
+ * Note: this is probably too low level an operation for use in drivers.
+ * Please consult with lkml before using this in your driver.
+ */
+void split_page(struct page *page, unsigned int order)
{
int i;
- BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM);
- for(i = 0; i < (1 << order); i++)
- clear_highpage(page + i);
+ BUG_ON(PageCompound(page));
+ BUG_ON(!page_count(page));
+ for (i = 1; i < (1 << order); i++)
+ set_page_refcounted(page + i);
}
/*
put_cpu();
BUG_ON(bad_range(zone, page));
- if (prep_new_page(page, order))
+ if (prep_new_page(page, order, gfp_flags))
goto again;
-
- if (gfp_flags & __GFP_ZERO)
- prep_zero_page(page, order, gfp_flags);
-
- if (order && (gfp_flags & __GFP_COMP))
- prep_compound_page(page, order);
return page;
failed:
goto got_pg;
do {
- wakeup_kswapd(*z, order);
+ if (cpuset_zone_allowed(*z, gfp_mask|__GFP_HARDWALL))
+ wakeup_kswapd(*z, order);
} while (*(++z));
/*
pg_data_t *pgdat;
unsigned int pages = 0;
- for_each_pgdat(pgdat)
+ for_each_online_pgdat(pgdat)
pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages;
return pages;
static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask)
{
- int cpu = 0;
+ unsigned cpu;
memset(ret, 0, nr * sizeof(unsigned long));
cpus_and(*cpumask, *cpumask, cpu_online_map);
- cpu = first_cpu(*cpumask);
- while (cpu < NR_CPUS) {
- unsigned long *in, *out, off;
-
- if (!cpu_isset(cpu, *cpumask))
- continue;
+ for_each_cpu_mask(cpu, *cpumask) {
+ unsigned long *in;
+ unsigned long *out;
+ unsigned off;
+ unsigned next_cpu;
in = (unsigned long *)&per_cpu(page_states, cpu);
- cpu = next_cpu(cpu, *cpumask);
-
- if (likely(cpu < NR_CPUS))
- prefetch(&per_cpu(page_states, cpu));
+ next_cpu = next_cpu(cpu, *cpumask);
+ if (likely(next_cpu < NR_CPUS))
+ prefetch(&per_cpu(page_states, next_cpu));
out = (unsigned long *)ret;
for (off = 0; off < nr; off++)
*active = 0;
*inactive = 0;
*free = 0;
- for_each_pgdat(pgdat) {
+ for_each_online_pgdat(pgdat) {
unsigned long l, m, n;
__get_zone_counts(&l, &m, &n, pgdat);
*active += l;
continue;
page = pfn_to_page(pfn);
set_page_links(page, zone, nid, pfn);
- set_page_count(page, 1);
+ init_page_count(page);
reset_page_mapcount(page);
SetPageReserved(page);
INIT_LIST_HEAD(&page->lru);
}
}
-static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb,
+static int pageset_cpuup_callback(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
setup_pageset(zone_pcp(zone,cpu), batch);
#endif
}
- printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n",
- zone->name, zone->present_pages, batch);
+ if (zone->present_pages)
+ printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n",
+ zone->name, zone->present_pages, batch);
}
static __meminit void init_currently_empty_zone(struct zone *zone,
zone_wait_table_init(zone, size);
pgdat->nr_zones = zone_idx(zone) + 1;
- zone->zone_mem_map = pfn_to_page(zone_start_pfn);
zone->zone_start_pfn = zone_start_pfn;
memmap_init(size, pgdat->node_id, zone_idx(zone), zone_start_pfn);
#ifdef CONFIG_FLAT_NODE_MEM_MAP
/* ia64 gets its own node_mem_map, before this, without bootmem */
if (!pgdat->node_mem_map) {
- unsigned long size;
+ unsigned long size, start, end;
struct page *map;
- size = (pgdat->node_spanned_pages + 1) * sizeof(struct page);
+ /*
+ * The zone's endpoints aren't required to be MAX_ORDER
+ * aligned but the node_mem_map endpoints must be in order
+ * for the buddy allocator to function correctly.
+ */
+ start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1);
+ end = pgdat->node_start_pfn + pgdat->node_spanned_pages;
+ end = ALIGN(end, MAX_ORDER_NR_PAGES);
+ size = (end - start) * sizeof(struct page);
map = alloc_remap(pgdat->node_id, size);
if (!map)
map = alloc_bootmem_node(pgdat, size);
- pgdat->node_mem_map = map;
+ pgdat->node_mem_map = map + (pgdat->node_start_pfn - start);
}
#ifdef CONFIG_FLATMEM
/*
{
pg_data_t *pgdat;
loff_t node = *pos;
-
- for (pgdat = pgdat_list; pgdat && node; pgdat = pgdat->pgdat_next)
+ for (pgdat = first_online_pgdat();
+ pgdat && node;
+ pgdat = next_online_pgdat(pgdat))
--node;
return pgdat;
pg_data_t *pgdat = (pg_data_t *)arg;
(*pos)++;
- return pgdat->pgdat_next;
+ return next_online_pgdat(pgdat);
}
static void frag_stop(struct seq_file *m, void *arg)
hotcpu_notifier(page_alloc_cpu_notify, 0);
}
+/*
+ * calculate_totalreserve_pages - called when sysctl_lower_zone_reserve_ratio
+ * or min_free_kbytes changes.
+ */
+static void calculate_totalreserve_pages(void)
+{
+ struct pglist_data *pgdat;
+ unsigned long reserve_pages = 0;
+ int i, j;
+
+ for_each_online_pgdat(pgdat) {
+ for (i = 0; i < MAX_NR_ZONES; i++) {
+ struct zone *zone = pgdat->node_zones + i;
+ unsigned long max = 0;
+
+ /* Find valid and maximum lowmem_reserve in the zone */
+ for (j = i; j < MAX_NR_ZONES; j++) {
+ if (zone->lowmem_reserve[j] > max)
+ max = zone->lowmem_reserve[j];
+ }
+
+ /* we treat pages_high as reserved pages. */
+ max += zone->pages_high;
+
+ if (max > zone->present_pages)
+ max = zone->present_pages;
+ reserve_pages += max;
+ }
+ }
+ totalreserve_pages = reserve_pages;
+}
+
/*
* setup_per_zone_lowmem_reserve - called whenever
* sysctl_lower_zone_reserve_ratio changes. Ensures that each zone
struct pglist_data *pgdat;
int j, idx;
- for_each_pgdat(pgdat) {
+ for_each_online_pgdat(pgdat) {
for (j = 0; j < MAX_NR_ZONES; j++) {
struct zone *zone = pgdat->node_zones + j;
unsigned long present_pages = zone->present_pages;
}
}
}
+
+ /* update totalreserve_pages */
+ calculate_totalreserve_pages();
}
/*
}
for_each_zone(zone) {
- unsigned long tmp;
+ u64 tmp;
+
spin_lock_irqsave(&zone->lru_lock, flags);
- tmp = (pages_min * zone->present_pages) / lowmem_pages;
+ tmp = (u64)pages_min * zone->present_pages;
+ do_div(tmp, lowmem_pages);
if (is_highmem(zone)) {
/*
* __GFP_HIGH and PF_MEMALLOC allocations usually don't
zone->pages_min = tmp;
}
- zone->pages_low = zone->pages_min + tmp / 4;
- zone->pages_high = zone->pages_min + tmp / 2;
+ zone->pages_low = zone->pages_min + (tmp >> 2);
+ zone->pages_high = zone->pages_min + (tmp >> 1);
spin_unlock_irqrestore(&zone->lru_lock, flags);
}
+
+ /* update totalreserve_pages */
+ calculate_totalreserve_pages();
}
/*
else
numentries <<= (PAGE_SHIFT - scale);
}
- /* rounded up to nearest power of 2 in size */
- numentries = 1UL << (long_log2(numentries) + 1);
+ numentries = roundup_pow_of_two(numentries);
/* limit allocation size to 1/16 total memory by default */
if (max == 0) {
return table;
}
+
+#ifdef CONFIG_OUT_OF_LINE_PFN_TO_PAGE
+/*
+ * pfn <-> page translation. out-of-line version.
+ * (see asm-generic/memory_model.h)
+ */
+#if defined(CONFIG_FLATMEM)
+struct page *pfn_to_page(unsigned long pfn)
+{
+ return mem_map + (pfn - ARCH_PFN_OFFSET);
+}
+unsigned long page_to_pfn(struct page *page)
+{
+ return (page - mem_map) + ARCH_PFN_OFFSET;
+}
+#elif defined(CONFIG_DISCONTIGMEM)
+struct page *pfn_to_page(unsigned long pfn)
+{
+ int nid = arch_pfn_to_nid(pfn);
+ return NODE_DATA(nid)->node_mem_map + arch_local_page_offset(pfn,nid);
+}
+unsigned long page_to_pfn(struct page *page)
+{
+ struct pglist_data *pgdat = NODE_DATA(page_to_nid(page));
+ return (page - pgdat->node_mem_map) + pgdat->node_start_pfn;
+}
+#elif defined(CONFIG_SPARSEMEM)
+struct page *pfn_to_page(unsigned long pfn)
+{
+ return __section_mem_map_addr(__pfn_to_section(pfn)) + pfn;
+}
+
+unsigned long page_to_pfn(struct page *page)
+{
+ long section_id = page_to_section(page);
+ return page - __section_mem_map_addr(__nr_to_section(section_id));
+}
+#endif /* CONFIG_FLATMEM/DISCONTIGMME/SPARSEMEM */
+EXPORT_SYMBOL(pfn_to_page);
+EXPORT_SYMBOL(page_to_pfn);
+#endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
+#include <linux/mutex.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/socket.h>
#include <linux/netpoll.h>
#include <linux/rcupdate.h>
#include <linux/delay.h>
-#ifdef CONFIG_NET_RADIO
-#include <linux/wireless.h> /* Note : will define WIRELESS_EXT */
+#include <linux/wireless.h>
#include <net/iw_handler.h>
-#endif /* CONFIG_NET_RADIO */
#include <asm/current.h>
#include <linux/err.h>
+#include <linux/audit.h>
#ifdef CONFIG_XEN
#include <net/ip.h>
* sure which should go first, but I bet it won't make much
* difference if we are running VLANs. The good news is that
* this protocol won't be in the list unless compiled in, so
- * the average user (w/out VLANs) will not be adversly affected.
+ * the average user (w/out VLANs) will not be adversely affected.
* --BLG
*
* 0800 IP
static struct list_head ptype_all; /* Taps */
/*
- * The @dev_base list is protected by @dev_base_lock and the rtln
+ * The @dev_base list is protected by @dev_base_lock and the rtnl
* semaphore.
*
* Pure readers hold dev_base_lock for reading.
* Our notifier list
*/
-static struct notifier_block *netdev_chain;
+static RAW_NOTIFIER_HEAD(netdev_chain);
/*
* Device drivers call our routines to queue packets here. We empty the
* @name: name format string
*
* Passed a format string - eg "lt%d" it will try and find a suitable
- * id. Not efficient for many devices, not called a lot. The caller
- * must hold the dev_base or rtnl lock while allocating the name and
- * adding the device in order to avoid duplicates. Returns the number
- * of the unit assigned or a negative errno code.
+ * id. It scans list of devices to build up a free map, then chooses
+ * the first empty slot. The caller must hold the dev_base or rtnl lock
+ * while allocating the name and adding the device in order to avoid
+ * duplicates.
+ * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
+ * Returns the number of the unit assigned or a negative errno code.
*/
int dev_alloc_name(struct net_device *dev, const char *name)
if (!err) {
hlist_del(&dev->name_hlist);
hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
- notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
+ raw_notifier_call_chain(&netdev_chain,
+ NETDEV_CHANGENAME, dev);
}
return err;
}
/**
- * netdev_features_change - device changes fatures
+ * netdev_features_change - device changes features
* @dev: device to cause notification
*
* Called to indicate a device has changed features.
*/
void netdev_features_change(struct net_device *dev)
{
- notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
+ raw_notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
}
EXPORT_SYMBOL(netdev_features_change);
void netdev_state_change(struct net_device *dev)
{
if (dev->flags & IFF_UP) {
- notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
+ raw_notifier_call_chain(&netdev_chain,
+ NETDEV_CHANGE, dev);
rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
}
}
/*
* ... and announce new interface.
*/
- notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
+ raw_notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
}
return ret;
}
* Tell people we are going down, so that they can
* prepare to death, when device is still operating.
*/
- notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
+ raw_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
dev_deactivate(dev);
/*
* Tell people we are down
*/
- notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
+ raw_notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
return 0;
}
int err;
rtnl_lock();
- err = notifier_chain_register(&netdev_chain, nb);
+ err = raw_notifier_chain_register(&netdev_chain, nb);
if (!err) {
for (dev = dev_base; dev; dev = dev->next) {
nb->notifier_call(nb, NETDEV_REGISTER, dev);
int unregister_netdevice_notifier(struct notifier_block *nb)
{
- return notifier_chain_unregister(&netdev_chain, nb);
+ int err;
+
+ rtnl_lock();
+ err = raw_notifier_chain_unregister(&netdev_chain, nb);
+ rtnl_unlock();
+ return err;
}
/**
* @v: pointer passed unmodified to notifier function
*
* Call all network notifier blocks. Parameters and return value
- * are as for notifier_call_chain().
+ * are as for raw_notifier_call_chain().
*/
int call_netdevice_notifiers(unsigned long val, void *v)
{
- return notifier_call_chain(&netdev_chain, val, v);
+ return raw_notifier_call_chain(&netdev_chain, val, v);
}
/* When > 0 there are consumers of rx skb time stamps */
rcu_read_unlock();
}
+
+void __netif_schedule(struct net_device *dev)
+{
+ if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
+ unsigned long flags;
+ struct softnet_data *sd;
+
+ local_irq_save(flags);
+ sd = &__get_cpu_var(softnet_data);
+ dev->next_sched = sd->output_queue;
+ sd->output_queue = dev;
+ raise_softirq_irqoff(NET_TX_SOFTIRQ);
+ local_irq_restore(flags);
+ }
+}
+EXPORT_SYMBOL(__netif_schedule);
+
+void __netif_rx_schedule(struct net_device *dev)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ dev_hold(dev);
+ list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list);
+ if (dev->quota < 0)
+ dev->quota += dev->weight;
+ else
+ dev->quota = dev->weight;
+ __raise_softirq_irqoff(NET_RX_SOFTIRQ);
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL(__netif_rx_schedule);
+
+void dev_kfree_skb_any(struct sk_buff *skb)
+{
+ if (in_irq() || irqs_disabled())
+ dev_kfree_skb_irq(skb);
+ else
+ dev_kfree_skb(skb);
+}
+EXPORT_SYMBOL(dev_kfree_skb_any);
+
+
+/* Hot-plugging. */
+void netif_device_detach(struct net_device *dev)
+{
+ if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
+ netif_running(dev)) {
+ netif_stop_queue(dev);
+ }
+}
+EXPORT_SYMBOL(netif_device_detach);
+
+void netif_device_attach(struct net_device *dev)
+{
+ if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
+ netif_running(dev)) {
+ netif_wake_queue(dev);
+ __netdev_watchdog_up(dev);
+ }
+}
+EXPORT_SYMBOL(netif_device_attach);
+
+
/*
* Invalidate hardware checksum when packet is to be mangled, and
* complete checksum manually on outgoing path.
{
struct net_device *dev = skb->dev;
- if (dev->master)
+ if (dev->master) {
+ /*
+ * On bonding slaves other than the currently active
+ * slave, suppress duplicates except for 802.3ad
+ * ETH_P_SLOW and alb non-mcast/bcast.
+ */
+ if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
+ if (dev->master->priv_flags & IFF_MASTER_ALB) {
+ if (skb->pkt_type != PACKET_BROADCAST &&
+ skb->pkt_type != PACKET_MULTICAST)
+ goto keep;
+ }
+
+ if (dev->master->priv_flags & IFF_MASTER_8023AD &&
+ skb->protocol == __constant_htons(ETH_P_SLOW))
+ goto keep;
+
+ kfree_skb(skb);
+ return NULL;
+ }
+keep:
skb->dev = dev->master;
+ }
return dev;
}
orig_dev = skb_bond(skb);
+ if (!orig_dev)
+ return NET_RX_DROP;
+
__get_cpu_var(netdev_rx_stat).total++;
skb->h.raw = skb->nh.raw = skb->data;
if (dev->quota <= 0 || dev->poll(dev, &budget)) {
netpoll_poll_unlock(have);
local_irq_disable();
- list_del(&dev->poll_list);
- list_add_tail(&dev->poll_list, &queue->poll_list);
+ list_move_tail(&dev->poll_list, &queue->poll_list);
if (dev->quota < 0)
dev->quota += dev->weight;
else
.release = seq_release,
};
-#ifdef WIRELESS_EXT
+#ifdef CONFIG_WIRELESS_EXT
extern int wireless_proc_init(void);
#else
#define wireless_proc_init() 0
* @dev: device
* @inc: modifier
*
- * Add or remove promsicuity from a device. While the count in the device
+ * Add or remove promiscuity from a device. While the count in the device
* remains above zero the interface remains promiscuous. Once it hits zero
* the device reverts back to normal filtering operation. A negative inc
* value is used to drop promiscuity on the device.
printk(KERN_INFO "device %s %s promiscuous mode\n",
dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
"left");
+ audit_log(current->audit_context, GFP_ATOMIC,
+ AUDIT_ANOM_PROMISCUOUS,
+ "dev=%s prom=%d old_prom=%d auid=%u",
+ dev->name, (dev->flags & IFF_PROMISC),
+ (old_flags & IFF_PROMISC),
+ audit_get_loginuid(current->audit_context));
}
}
flags = (dev->flags & ~(IFF_PROMISC |
IFF_ALLMULTI |
- IFF_RUNNING)) |
+ IFF_RUNNING |
+ IFF_LOWER_UP |
+ IFF_DORMANT)) |
(dev->gflags & (IFF_PROMISC |
IFF_ALLMULTI));
- if (netif_running(dev) && netif_carrier_ok(dev))
- flags |= IFF_RUNNING;
+ if (netif_running(dev)) {
+ if (netif_oper_up(dev))
+ flags |= IFF_RUNNING;
+ if (netif_carrier_ok(dev))
+ flags |= IFF_LOWER_UP;
+ if (netif_dormant(dev))
+ flags |= IFF_DORMANT;
+ }
return flags;
}
if (dev->flags & IFF_UP &&
((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
IFF_VOLATILE)))
- notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
+ raw_notifier_call_chain(&netdev_chain,
+ NETDEV_CHANGE, dev);
if ((flags ^ dev->gflags) & IFF_PROMISC) {
int inc = (flags & IFF_PROMISC) ? +1 : -1;
else
dev->mtu = new_mtu;
if (!err && dev->flags & IFF_UP)
- notifier_call_chain(&netdev_chain,
- NETDEV_CHANGEMTU, dev);
+ raw_notifier_call_chain(&netdev_chain,
+ NETDEV_CHANGEMTU, dev);
return err;
}
return -ENODEV;
err = dev->set_mac_address(dev, sa);
if (!err)
- notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
+ raw_notifier_call_chain(&netdev_chain,
+ NETDEV_CHANGEADDR, dev);
return err;
}
return -EINVAL;
memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
- notifier_call_chain(&netdev_chain,
+ raw_notifier_call_chain(&netdev_chain,
NETDEV_CHANGEADDR, dev);
return 0;
*/
if (cmd == SIOCGIFCONF) {
- rtnl_shlock();
+ rtnl_lock();
ret = dev_ifconf((char __user *) arg);
- rtnl_shunlock();
+ rtnl_unlock();
return ret;
}
if (cmd == SIOCGIFNAME)
ret = -EFAULT;
return ret;
}
-#ifdef WIRELESS_EXT
+#ifdef CONFIG_WIRELESS_EXT
/* Take care of Wireless Extensions */
if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
/* If command is `set a parameter', or
* `get the encoding parameters', check if
* the user has the right to do it */
- if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE) {
+ if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE
+ || cmd == SIOCGIWENCODEEXT) {
if (!capable(CAP_NET_ADMIN))
return -EPERM;
}
ret = -EFAULT;
return ret;
}
-#endif /* WIRELESS_EXT */
+#endif /* CONFIG_WIRELESS_EXT */
return -EINVAL;
}
}
BUG_ON(dev_boot_phase);
ASSERT_RTNL();
+ might_sleep();
+
/* When net_device's are persistent, this will be fatal. */
BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
if (!dev->rebuild_header)
dev->rebuild_header = default_rebuild_header;
+ ret = netdev_register_sysfs(dev);
+ if (ret)
+ goto out_err;
+ dev->reg_state = NETREG_REGISTERED;
+
/*
* Default initial state at registry is that the
* device is present.
hlist_add_head(&dev->name_hlist, head);
hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
dev_hold(dev);
- dev->reg_state = NETREG_REGISTERING;
write_unlock_bh(&dev_base_lock);
/* Notify protocols, that a new device appeared. */
- notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
+ raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
- /* Finish registration after unlock */
- net_set_todo(dev);
ret = 0;
out:
rebroadcast_time = warning_time = jiffies;
while (atomic_read(&dev->refcnt) != 0) {
if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
- rtnl_shlock();
+ rtnl_lock();
/* Rebroadcast unregister notification */
- notifier_call_chain(&netdev_chain,
+ raw_notifier_call_chain(&netdev_chain,
NETDEV_UNREGISTER, dev);
if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
linkwatch_run_queue();
}
- rtnl_shunlock();
+ __rtnl_unlock();
rebroadcast_time = jiffies;
}
*
* We are invoked by rtnl_unlock() after it drops the semaphore.
* This allows us to deal with problems:
- * 1) We can create/delete sysfs objects which invoke hotplug
+ * 1) We can delete sysfs objects which invoke hotplug
* without deadlocking with linkwatch via keventd.
* 2) Since we run with the RTNL semaphore not held, we can sleep
* safely in order to wait for the netdev refcnt to drop to zero.
*/
-static DECLARE_MUTEX(net_todo_run_mutex);
+static DEFINE_MUTEX(net_todo_run_mutex);
void netdev_run_todo(void)
{
struct list_head list = LIST_HEAD_INIT(list);
- int err;
-
/* Need to guard against multiple cpu's getting out of order. */
- down(&net_todo_run_mutex);
+ mutex_lock(&net_todo_run_mutex);
/* Not safe to do outside the semaphore. We must not return
* until all unregister events invoked by the local processor
= list_entry(list.next, struct net_device, todo_list);
list_del(&dev->todo_list);
- switch(dev->reg_state) {
- case NETREG_REGISTERING:
- dev->reg_state = NETREG_REGISTERED;
- err = netdev_register_sysfs(dev);
- if (err)
- printk(KERN_ERR "%s: failed sysfs registration (%d)\n",
- dev->name, err);
- break;
-
- case NETREG_UNREGISTERING:
- netdev_unregister_sysfs(dev);
- dev->reg_state = NETREG_UNREGISTERED;
+ if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
+ printk(KERN_ERR "network todo '%s' but state %d\n",
+ dev->name, dev->reg_state);
+ dump_stack();
+ continue;
+ }
- netdev_wait_allrefs(dev);
+ netdev_unregister_sysfs(dev);
+ dev->reg_state = NETREG_UNREGISTERED;
- /* paranoia */
- BUG_ON(atomic_read(&dev->refcnt));
- BUG_TRAP(!dev->ip_ptr);
- BUG_TRAP(!dev->ip6_ptr);
- BUG_TRAP(!dev->dn_ptr);
+ netdev_wait_allrefs(dev);
+ /* paranoia */
+ BUG_ON(atomic_read(&dev->refcnt));
+ BUG_TRAP(!dev->ip_ptr);
+ BUG_TRAP(!dev->ip6_ptr);
+ BUG_TRAP(!dev->dn_ptr);
- /* It must be the very last action,
- * after this 'dev' may point to freed up memory.
- */
- if (dev->destructor)
- dev->destructor(dev);
- break;
-
- default:
- printk(KERN_ERR "network todo '%s' but state %d\n",
- dev->name, dev->reg_state);
- break;
- }
+ /* It must be the very last action,
+ * after this 'dev' may point to freed up memory.
+ */
+ if (dev->destructor)
+ dev->destructor(dev);
}
out:
- up(&net_todo_run_mutex);
+ mutex_unlock(&net_todo_run_mutex);
}
/**
alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
- p = kmalloc(alloc_size, GFP_KERNEL);
+ p = kzalloc(alloc_size, GFP_KERNEL);
if (!p) {
printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
return NULL;
}
- memset(p, 0, alloc_size);
dev = (struct net_device *)
(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
void free_netdev(struct net_device *dev)
{
#ifdef CONFIG_SYSFS
- /* Compatiablity with error handling in drivers */
+ /* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED) {
kfree((char *)dev - dev->padded);
return;
/* Notify protocols, that we are about to destroy
this device. They should clean all the things.
*/
- notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
+ raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
/*
* Flush the multicast chain
* Initialise the packet receive queues.
*/
- for_each_cpu(i) {
+ for_each_possible_cpu(i) {
struct softnet_data *queue;
queue = &per_cpu(softnet_data, i);
BUG();
}
+void skb_truesize_bug(struct sk_buff *skb)
+{
+ printk(KERN_ERR "SKB BUG: Invalid truesize (%u) "
+ "len=%u, sizeof(sk_buff)=%Zd\n",
+ skb->truesize, skb->len, sizeof(struct sk_buff));
+}
+EXPORT_SYMBOL(skb_truesize_bug);
+
/* Allocate a new skbuff. We do this ourselves so we can fill in a few
* 'private' fields and also do memory statistics to find all the
* [BEEP] leaks.
/* Get the DATA. Size must match skb_add_mtu(). */
size = SKB_DATA_ALIGN(size);
- data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
+ data = ____kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
if (!data)
goto nodata;
}
-static void skb_drop_list(struct sk_buff **listp)
+static void skb_drop_fraglist(struct sk_buff *skb)
{
- struct sk_buff *list = *listp;
+ struct sk_buff *list = skb_shinfo(skb)->frag_list;
- *listp = NULL;
+ skb_shinfo(skb)->frag_list = NULL;
do {
struct sk_buff *this = list;
} while (list);
}
-static inline void skb_drop_fraglist(struct sk_buff *skb)
-{
- skb_drop_list(&skb_shinfo(skb)->frag_list);
-}
-
static void skb_clone_fraglist(struct sk_buff *skb)
{
struct sk_buff *list;
kfree_skbmem(skb);
}
+/**
+ * kfree_skb - free an sk_buff
+ * @skb: buffer to free
+ *
+ * Drop a reference to the buffer and free it if the usage count has
+ * hit zero.
+ */
+void kfree_skb(struct sk_buff *skb)
+{
+ if (unlikely(!skb))
+ return;
+ if (likely(atomic_read(&skb->users) == 1))
+ smp_rmb();
+ else if (likely(!atomic_dec_and_test(&skb->users)))
+ return;
+ __kfree_skb(skb);
+}
+
/**
* skb_clone - duplicate an sk_buff
* @skb: buffer to clone
n->csum = skb->csum;
n->ip_summed = skb->ip_summed;
- n->truesize += skb->data_len;
n->data_len = skb->data_len;
n->len = skb->len;
return nskb;
}
-/* Trims skb to length len. It can change skb pointers.
+/* Trims skb to length len. It can change skb pointers, if "realloc" is 1.
+ * If realloc==0 and trimming is impossible without change of data,
+ * it is BUG().
*/
-int ___pskb_trim(struct sk_buff *skb, unsigned int len)
+int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc)
{
- struct sk_buff **fragp;
- struct sk_buff *frag;
int offset = skb_headlen(skb);
int nfrags = skb_shinfo(skb)->nr_frags;
int i;
- int err;
- if (skb_cloned(skb) &&
- unlikely((err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))))
- return err;
-
- i = 0;
- if (offset >= len)
- goto drop_pages;
-
- for (; i < nfrags; i++) {
+ for (i = 0; i < nfrags; i++) {
int end = offset + skb_shinfo(skb)->frags[i].size;
-
- if (end < len) {
- offset = end;
- continue;
- }
-
- skb_shinfo(skb)->frags[i++].size = len - offset;
-
-drop_pages:
- skb_shinfo(skb)->nr_frags = i;
-
- for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
-
- if (skb_shinfo(skb)->frag_list)
- skb_drop_fraglist(skb);
- goto done;
- }
-
- for (fragp = &skb_shinfo(skb)->frag_list; (frag = *fragp);
- fragp = &frag->next) {
- int end = offset + frag->len;
-
- if (skb_shared(frag)) {
- struct sk_buff *nfrag;
-
- nfrag = skb_clone(frag, GFP_ATOMIC);
- if (unlikely(!nfrag))
- return -ENOMEM;
-
- nfrag->next = frag->next;
- kfree_skb(frag);
- frag = nfrag;
- *fragp = frag;
- }
-
- if (end < len) {
- offset = end;
- continue;
+ if (end > len) {
+ if (skb_cloned(skb)) {
+ BUG_ON(!realloc);
+ if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+ return -ENOMEM;
+ }
+ if (len <= offset) {
+ put_page(skb_shinfo(skb)->frags[i].page);
+ skb_shinfo(skb)->nr_frags--;
+ } else {
+ skb_shinfo(skb)->frags[i].size = len - offset;
+ }
}
-
- if (end > len &&
- unlikely((err = pskb_trim(frag, len - offset))))
- return err;
-
- if (frag->next)
- skb_drop_list(&frag->next);
- break;
+ offset = end;
}
-done:
- if (len > skb_headlen(skb)) {
+ if (offset < len) {
skb->data_len -= skb->len - len;
skb->len = len;
} else {
- skb->len = len;
- skb->data_len = 0;
- skb->tail = skb->data + len;
+ if (len <= skb_headlen(skb)) {
+ skb->len = len;
+ skb->data_len = 0;
+ skb->tail = skb->data + len;
+ if (skb_shinfo(skb)->frag_list && !skb_cloned(skb))
+ skb_drop_fraglist(skb);
+ } else {
+ skb->data_len -= skb->len - len;
+ skb->len = len;
+ }
}
return 0;
EXPORT_SYMBOL_GPL(skb_segment);
+/**
+ * skb_pull_rcsum - pull skb and update receive checksum
+ * @skb: buffer to update
+ * @start: start of data before pull
+ * @len: length of data pulled
+ *
+ * This function performs an skb_pull on the packet and updates
+ * update the CHECKSUM_HW checksum. It should be used on receive
+ * path processing instead of skb_pull unless you know that the
+ * checksum difference is zero (e.g., a valid IP header) or you
+ * are setting ip_summed to CHECKSUM_NONE.
+ */
+unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
+{
+ BUG_ON(len > skb->len);
+ skb->len -= len;
+ BUG_ON(skb->len < skb->data_len);
+ skb_postpull_rcsum(skb, skb->data, len);
+ return skb->data += len;
+}
+
+EXPORT_SYMBOL_GPL(skb_pull_rcsum);
+
void __init skb_init(void)
{
skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
EXPORT_SYMBOL(___pskb_trim);
EXPORT_SYMBOL(__kfree_skb);
+EXPORT_SYMBOL(kfree_skb);
EXPORT_SYMBOL(__pskb_pull_tail);
EXPORT_SYMBOL(__alloc_skb);
EXPORT_SYMBOL(pskb_copy);
+++ /dev/null
-diff -pruN ../orig-linux-2.6.16.29/fs/aio.c ./fs/aio.c
---- ../orig-linux-2.6.16.29/fs/aio.c 2006-09-12 19:02:10.000000000 +0100
-+++ ./fs/aio.c 2006-09-19 13:58:49.000000000 +0100
-@@ -34,6 +34,11 @@
- #include <asm/uaccess.h>
- #include <asm/mmu_context.h>
-
-+#ifdef CONFIG_EPOLL
-+#include <linux/poll.h>
-+#include <linux/eventpoll.h>
-+#endif
-+
- #if DEBUG > 1
- #define dprintk printk
- #else
-@@ -1016,6 +1021,10 @@ put_rq:
- if (waitqueue_active(&ctx->wait))
- wake_up(&ctx->wait);
-
-+#ifdef CONFIG_EPOLL
-+ if (ctx->file && waitqueue_active(&ctx->poll_wait))
-+ wake_up(&ctx->poll_wait);
-+#endif
- if (ret)
- put_ioctx(ctx);
-
-@@ -1025,6 +1034,8 @@ put_rq:
- /* aio_read_evt
- * Pull an event off of the ioctx's event ring. Returns the number of
- * events fetched (0 or 1 ;-)
-+ * If ent parameter is 0, just returns the number of events that would
-+ * be fetched.
- * FIXME: make this use cmpxchg.
- * TODO: make the ringbuffer user mmap()able (requires FIXME).
- */
-@@ -1047,13 +1058,18 @@ static int aio_read_evt(struct kioctx *i
-
- head = ring->head % info->nr;
- if (head != ring->tail) {
-- struct io_event *evp = aio_ring_event(info, head, KM_USER1);
-- *ent = *evp;
-- head = (head + 1) % info->nr;
-- smp_mb(); /* finish reading the event before updatng the head */
-- ring->head = head;
-- ret = 1;
-- put_aio_ring_event(evp, KM_USER1);
-+ if (ent) { /* event requested */
-+ struct io_event *evp =
-+ aio_ring_event(info, head, KM_USER1);
-+ *ent = *evp;
-+ head = (head + 1) % info->nr;
-+ /* finish reading the event before updatng the head */
-+ smp_mb();
-+ ring->head = head;
-+ ret = 1;
-+ put_aio_ring_event(evp, KM_USER1);
-+ } else /* only need to know availability */
-+ ret = 1;
- }
- spin_unlock(&info->ring_lock);
-
-@@ -1236,9 +1252,78 @@ static void io_destroy(struct kioctx *io
-
- aio_cancel_all(ioctx);
- wait_for_all_aios(ioctx);
-+#ifdef CONFIG_EPOLL
-+ /* forget the poll file, but it's up to the user to close it */
-+ if (ioctx->file) {
-+ ioctx->file->private_data = 0;
-+ ioctx->file = 0;
-+ }
-+#endif
- put_ioctx(ioctx); /* once for the lookup */
- }
-
-+#ifdef CONFIG_EPOLL
-+
-+static int aio_queue_fd_close(struct inode *inode, struct file *file)
-+{
-+ struct kioctx *ioctx = file->private_data;
-+ if (ioctx) {
-+ file->private_data = 0;
-+ spin_lock_irq(&ioctx->ctx_lock);
-+ ioctx->file = 0;
-+ spin_unlock_irq(&ioctx->ctx_lock);
-+ }
-+ return 0;
-+}
-+
-+static unsigned int aio_queue_fd_poll(struct file *file, poll_table *wait)
-+{ unsigned int pollflags = 0;
-+ struct kioctx *ioctx = file->private_data;
-+
-+ if (ioctx) {
-+
-+ spin_lock_irq(&ioctx->ctx_lock);
-+ /* Insert inside our poll wait queue */
-+ poll_wait(file, &ioctx->poll_wait, wait);
-+
-+ /* Check our condition */
-+ if (aio_read_evt(ioctx, 0))
-+ pollflags = POLLIN | POLLRDNORM;
-+ spin_unlock_irq(&ioctx->ctx_lock);
-+ }
-+
-+ return pollflags;
-+}
-+
-+static struct file_operations aioq_fops = {
-+ .release = aio_queue_fd_close,
-+ .poll = aio_queue_fd_poll
-+};
-+
-+/* make_aio_fd:
-+ * Create a file descriptor that can be used to poll the event queue.
-+ * Based and piggybacked on the excellent epoll code.
-+ */
-+
-+static int make_aio_fd(struct kioctx *ioctx)
-+{
-+ int error, fd;
-+ struct inode *inode;
-+ struct file *file;
-+
-+ error = ep_getfd(&fd, &inode, &file, NULL, &aioq_fops);
-+ if (error)
-+ return error;
-+
-+ /* associate the file with the IO context */
-+ file->private_data = ioctx;
-+ ioctx->file = file;
-+ init_waitqueue_head(&ioctx->poll_wait);
-+ return fd;
-+}
-+#endif
-+
-+
- /* sys_io_setup:
- * Create an aio_context capable of receiving at least nr_events.
- * ctxp must not point to an aio_context that already exists, and
-@@ -1251,18 +1336,30 @@ static void io_destroy(struct kioctx *io
- * resources are available. May fail with -EFAULT if an invalid
- * pointer is passed for ctxp. Will fail with -ENOSYS if not
- * implemented.
-+ *
-+ * To request a selectable fd, the user context has to be initialized
-+ * to 1, instead of 0, and the return value is the fd.
-+ * This keeps the system call compatible, since a non-zero value
-+ * was not allowed so far.
- */
- asmlinkage long sys_io_setup(unsigned nr_events, aio_context_t __user *ctxp)
- {
- struct kioctx *ioctx = NULL;
- unsigned long ctx;
- long ret;
-+ int make_fd = 0;
-
- ret = get_user(ctx, ctxp);
- if (unlikely(ret))
- goto out;
-
- ret = -EINVAL;
-+#ifdef CONFIG_EPOLL
-+ if (ctx == 1) {
-+ make_fd = 1;
-+ ctx = 0;
-+ }
-+#endif
- if (unlikely(ctx || nr_events == 0)) {
- pr_debug("EINVAL: io_setup: ctx %lu nr_events %u\n",
- ctx, nr_events);
-@@ -1273,8 +1370,12 @@ asmlinkage long sys_io_setup(unsigned nr
- ret = PTR_ERR(ioctx);
- if (!IS_ERR(ioctx)) {
- ret = put_user(ioctx->user_id, ctxp);
-- if (!ret)
-- return 0;
-+#ifdef CONFIG_EPOLL
-+ if (make_fd && ret >= 0)
-+ ret = make_aio_fd(ioctx);
-+#endif
-+ if (ret >= 0)
-+ return ret;
-
- get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */
- io_destroy(ioctx);
-diff -pruN ../orig-linux-2.6.16.29/fs/eventpoll.c ./fs/eventpoll.c
---- ../orig-linux-2.6.16.29/fs/eventpoll.c 2006-09-12 19:02:10.000000000 +0100
-+++ ./fs/eventpoll.c 2006-09-19 13:58:49.000000000 +0100
-@@ -235,8 +235,6 @@ struct ep_pqueue {
-
- static void ep_poll_safewake_init(struct poll_safewake *psw);
- static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq);
--static int ep_getfd(int *efd, struct inode **einode, struct file **efile,
-- struct eventpoll *ep);
- static int ep_alloc(struct eventpoll **pep);
- static void ep_free(struct eventpoll *ep);
- static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd);
-@@ -266,7 +264,7 @@ static int ep_events_transfer(struct eve
- static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
- int maxevents, long timeout);
- static int eventpollfs_delete_dentry(struct dentry *dentry);
--static struct inode *ep_eventpoll_inode(void);
-+static struct inode *ep_eventpoll_inode(struct file_operations *fops);
- static struct super_block *eventpollfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name,
- void *data);
-@@ -525,7 +523,7 @@ asmlinkage long sys_epoll_create(int siz
- * Creates all the items needed to setup an eventpoll file. That is,
- * a file structure, and inode and a free file descriptor.
- */
-- error = ep_getfd(&fd, &inode, &file, ep);
-+ error = ep_getfd(&fd, &inode, &file, ep, &eventpoll_fops);
- if (error)
- goto eexit_2;
-
-@@ -710,8 +708,8 @@ eexit_1:
- /*
- * Creates the file descriptor to be used by the epoll interface.
- */
--static int ep_getfd(int *efd, struct inode **einode, struct file **efile,
-- struct eventpoll *ep)
-+int ep_getfd(int *efd, struct inode **einode, struct file **efile,
-+ struct eventpoll *ep, struct file_operations *fops)
- {
- struct qstr this;
- char name[32];
-@@ -727,7 +725,7 @@ static int ep_getfd(int *efd, struct ino
- goto eexit_1;
-
- /* Allocates an inode from the eventpoll file system */
-- inode = ep_eventpoll_inode();
-+ inode = ep_eventpoll_inode(fops);
- error = PTR_ERR(inode);
- if (IS_ERR(inode))
- goto eexit_2;
-@@ -758,7 +756,7 @@ static int ep_getfd(int *efd, struct ino
-
- file->f_pos = 0;
- file->f_flags = O_RDONLY;
-- file->f_op = &eventpoll_fops;
-+ file->f_op = fops;
- file->f_mode = FMODE_READ;
- file->f_version = 0;
- file->private_data = ep;
-@@ -1574,7 +1572,7 @@ static int eventpollfs_delete_dentry(str
- }
-
-
--static struct inode *ep_eventpoll_inode(void)
-+static struct inode *ep_eventpoll_inode(struct file_operations *fops)
- {
- int error = -ENOMEM;
- struct inode *inode = new_inode(eventpoll_mnt->mnt_sb);
-@@ -1582,7 +1580,7 @@ static struct inode *ep_eventpoll_inode(
- if (!inode)
- goto eexit_1;
-
-- inode->i_fop = &eventpoll_fops;
-+ inode->i_fop = fops;
-
- /*
- * Mark the inode dirty from the very beginning,
-diff -pruN ../orig-linux-2.6.16.29/include/linux/aio.h ./include/linux/aio.h
---- ../orig-linux-2.6.16.29/include/linux/aio.h 2006-09-12 19:02:10.000000000 +0100
-+++ ./include/linux/aio.h 2006-09-19 13:58:49.000000000 +0100
-@@ -191,6 +191,11 @@ struct kioctx {
- struct aio_ring_info ring_info;
-
- struct work_struct wq;
-+#ifdef CONFIG_EPOLL
-+ // poll integration
-+ wait_queue_head_t poll_wait;
-+ struct file *file;
-+#endif
- };
-
- /* prototypes */
-diff -pruN ../orig-linux-2.6.16.29/include/linux/eventpoll.h ./include/linux/eventpoll.h
---- ../orig-linux-2.6.16.29/include/linux/eventpoll.h 2006-09-12 19:02:10.000000000 +0100
-+++ ./include/linux/eventpoll.h 2006-09-19 13:58:49.000000000 +0100
-@@ -86,6 +86,12 @@ static inline void eventpoll_release(str
- }
-
-
-+/*
-+ * called by aio code to create fd that can poll the aio event queueQ
-+ */
-+struct eventpoll;
-+int ep_getfd(int *efd, struct inode **einode, struct file **efile,
-+ struct eventpoll *ep, struct file_operations *fops);
- #else
-
- static inline void eventpoll_init_file(struct file *file) {}
+++ /dev/null
-diff -pruN ../orig-linux-2.6.16.29/drivers/base/bus.c ./drivers/base/bus.c
---- ../orig-linux-2.6.16.29/drivers/base/bus.c 2006-09-12 19:02:10.000000000 +0100
-+++ ./drivers/base/bus.c 2006-09-19 13:58:54.000000000 +0100
-@@ -188,6 +188,11 @@ static ssize_t driver_bind(struct device
- up(&dev->sem);
- if (dev->parent)
- up(&dev->parent->sem);
-+
-+ if (err > 0) /* success */
-+ err = count;
-+ else if (err == 0) /* driver didn't accept device */
-+ err = -ENODEV;
- }
- put_device(dev);
- put_bus(bus);
+++ /dev/null
-diff -pruN ../orig-linux-2.6.16.29/kernel/timer.c ./kernel/timer.c
---- ../orig-linux-2.6.16.29/kernel/timer.c 2006-09-12 19:02:10.000000000 +0100
-+++ ./kernel/timer.c 2006-09-19 13:58:58.000000000 +0100
-@@ -555,6 +555,22 @@ found:
- }
- spin_unlock(&base->t_base.lock);
-
-+ /*
-+ * It can happen that other CPUs service timer IRQs and increment
-+ * jiffies, but we have not yet got a local timer tick to process
-+ * the timer wheels. In that case, the expiry time can be before
-+ * jiffies, but since the high-resolution timer here is relative to
-+ * jiffies, the default expression when high-resolution timers are
-+ * not active,
-+ *
-+ * time_before(MAX_JIFFY_OFFSET + jiffies, expires)
-+ *
-+ * would falsely evaluate to true. If that is the case, just
-+ * return jiffies so that we can immediately fire the local timer
-+ */
-+ if (time_before(expires, jiffies))
-+ return jiffies;
-+
- if (time_before(hr_expires, expires))
- return hr_expires;
-
+++ /dev/null
-diff -pruN ../orig-linux-2.6.16.29/drivers/ide/ide-lib.c ./drivers/ide/ide-lib.c
---- ../orig-linux-2.6.16.29/drivers/ide/ide-lib.c 2006-09-12 19:02:10.000000000 +0100
-+++ ./drivers/ide/ide-lib.c 2006-09-19 13:59:03.000000000 +0100
-@@ -410,10 +410,10 @@ void ide_toggle_bounce(ide_drive_t *driv
- {
- u64 addr = BLK_BOUNCE_HIGH; /* dma64_addr_t */
-
-- if (!PCI_DMA_BUS_IS_PHYS) {
-- addr = BLK_BOUNCE_ANY;
-- } else if (on && drive->media == ide_disk) {
-- if (HWIF(drive)->pci_dev)
-+ if (on && drive->media == ide_disk) {
-+ if (!PCI_DMA_BUS_IS_PHYS)
-+ addr = BLK_BOUNCE_ANY;
-+ else if (HWIF(drive)->pci_dev)
- addr = HWIF(drive)->pci_dev->dma_mask;
- }
-
+++ /dev/null
-From: Eric W. Biederman <ebiederm@xmission.com>
-Date: Sun, 30 Jul 2006 10:03:20 +0000 (-0700)
-Subject: [PATCH] machine_kexec.c: Fix the description of segment handling
-X-Git-Tag: v2.6.18-rc4
-X-Git-Url: http://www.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=2a8a3d5b65e86ec1dfef7d268c64a909eab94af7
-
-[PATCH] machine_kexec.c: Fix the description of segment handling
-
-One of my original comments in machine_kexec was unclear
-and this should fix it.
-
-Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
-Cc: Andi Kleen <ak@muc.de>
-Acked-by: Horms <horms@verge.net.au>
-Signed-off-by: Andrew Morton <akpm@osdl.org>
-Signed-off-by: Linus Torvalds <torvalds@osdl.org>
----
-
---- a/arch/i386/kernel/machine_kexec.c
-+++ b/arch/i386/kernel/machine_kexec.c
-@@ -189,14 +189,11 @@ NORET_TYPE void machine_kexec(struct kim
- memcpy((void *)reboot_code_buffer, relocate_new_kernel,
- relocate_new_kernel_size);
-
-- /* The segment registers are funny things, they are
-- * automatically loaded from a table, in memory wherever you
-- * set them to a specific selector, but this table is never
-- * accessed again you set the segment to a different selector.
-- *
-- * The more common model is are caches where the behide
-- * the scenes work is done, but is also dropped at arbitrary
-- * times.
-+ /* The segment registers are funny things, they have both a
-+ * visible and an invisible part. Whenever the visible part is
-+ * set to a specific selector, the invisible part is loaded
-+ * with from a table in memory. At no other time is the
-+ * descriptor table in memory accessed.
- *
- * I take advantage of this here by force loading the
- * segments, before I zap the gdt with an invalid value.
---- a/arch/x86_64/kernel/machine_kexec.c
-+++ b/arch/x86_64/kernel/machine_kexec.c
-@@ -207,14 +207,11 @@ NORET_TYPE void machine_kexec(struct kim
- __flush_tlb();
-
-
-- /* The segment registers are funny things, they are
-- * automatically loaded from a table, in memory wherever you
-- * set them to a specific selector, but this table is never
-- * accessed again unless you set the segment to a different selector.
-- *
-- * The more common model are caches where the behide
-- * the scenes work is done, but is also dropped at arbitrary
-- * times.
-+ /* The segment registers are funny things, they have both a
-+ * visible and an invisible part. Whenever the visible part is
-+ * set to a specific selector, the invisible part is loaded
-+ * with from a table in memory. At no other time is the
-+ * descriptor table in memory accessed.
- *
- * I take advantage of this here by force loading the
- * segments, before I zap the gdt with an invalid value.
+++ /dev/null
-From: Tobias Klauser <tklauser@nuerscht.ch>
-Date: Mon, 26 Jun 2006 16:57:34 +0000 (+0200)
-Subject: Storage class should be first
-X-Git-Tag: v2.6.18-rc1
-X-Git-Url: http://www.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=2efe55a9cec8418f0e0cde3dc3787a42fddc4411
-
-Storage class should be first
-
-Storage class should be before const
-
-Signed-off-by: Tobias Klauser <tklauser@nuerscht.ch>
-Signed-off-by: Adrian Bunk <bunk@stusta.de>
----
-
---- a/arch/i386/kernel/machine_kexec.c
-+++ b/arch/i386/kernel/machine_kexec.c
-@@ -133,9 +133,9 @@ typedef asmlinkage NORET_TYPE void (*rel
- unsigned long start_address,
- unsigned int has_pae) ATTRIB_NORET;
-
--const extern unsigned char relocate_new_kernel[];
-+extern const unsigned char relocate_new_kernel[];
- extern void relocate_new_kernel_end(void);
--const extern unsigned int relocate_new_kernel_size;
-+extern const unsigned int relocate_new_kernel_size;
-
- /*
- * A architecture hook called to validate the
---- a/arch/powerpc/kernel/machine_kexec_32.c
-+++ b/arch/powerpc/kernel/machine_kexec_32.c
-@@ -30,8 +30,8 @@ typedef NORET_TYPE void (*relocate_new_k
- */
- void default_machine_kexec(struct kimage *image)
- {
-- const extern unsigned char relocate_new_kernel[];
-- const extern unsigned int relocate_new_kernel_size;
-+ extern const unsigned char relocate_new_kernel[];
-+ extern const unsigned int relocate_new_kernel_size;
- unsigned long page_list;
- unsigned long reboot_code_buffer, reboot_code_buffer_phys;
- relocate_new_kernel_t rnk;
---- a/arch/ppc/kernel/machine_kexec.c
-+++ b/arch/ppc/kernel/machine_kexec.c
-@@ -25,8 +25,8 @@ typedef NORET_TYPE void (*relocate_new_k
- unsigned long reboot_code_buffer,
- unsigned long start_address) ATTRIB_NORET;
-
--const extern unsigned char relocate_new_kernel[];
--const extern unsigned int relocate_new_kernel_size;
-+extern const unsigned char relocate_new_kernel[];
-+extern const unsigned int relocate_new_kernel_size;
-
- void machine_shutdown(void)
- {
---- a/arch/s390/kernel/machine_kexec.c
-+++ b/arch/s390/kernel/machine_kexec.c
-@@ -27,8 +27,8 @@ static void kexec_halt_all_cpus(void *);
-
- typedef void (*relocate_kernel_t) (kimage_entry_t *, unsigned long);
-
--const extern unsigned char relocate_kernel[];
--const extern unsigned long long relocate_kernel_len;
-+extern const unsigned char relocate_kernel[];
-+extern const unsigned long long relocate_kernel_len;
-
- int
- machine_kexec_prepare(struct kimage *image)
---- a/arch/sh/kernel/machine_kexec.c
-+++ b/arch/sh/kernel/machine_kexec.c
-@@ -25,8 +25,8 @@ typedef NORET_TYPE void (*relocate_new_k
- unsigned long start_address,
- unsigned long vbr_reg) ATTRIB_NORET;
-
--const extern unsigned char relocate_new_kernel[];
--const extern unsigned int relocate_new_kernel_size;
-+extern const unsigned char relocate_new_kernel[];
-+extern const unsigned int relocate_new_kernel_size;
- extern void *gdb_vbr_vector;
-
- /*
---- a/arch/x86_64/kernel/machine_kexec.c
-+++ b/arch/x86_64/kernel/machine_kexec.c
-@@ -149,8 +149,8 @@ typedef NORET_TYPE void (*relocate_new_k
- unsigned long start_address,
- unsigned long pgtable) ATTRIB_NORET;
-
--const extern unsigned char relocate_new_kernel[];
--const extern unsigned long relocate_new_kernel_size;
-+extern const unsigned char relocate_new_kernel[];
-+extern const unsigned long relocate_new_kernel_size;
-
- int machine_kexec_prepare(struct kimage *image)
- {
+++ /dev/null
-From: Magnus Damm <magnus@valinux.co.jp>
-Date: Tue, 26 Sep 2006 08:52:38 +0000 (+0200)
-Subject: [PATCH] i386: Avoid overwriting the current pgd (V4, i386)
-X-Git-Url: http://www.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=3566561bfadffcb5dbc85d576be80c0dbf2cccc9
-
-[PATCH] i386: Avoid overwriting the current pgd (V4, i386)
-
-kexec: Avoid overwriting the current pgd (V4, i386)
-
-This patch upgrades the i386-specific kexec code to avoid overwriting the
-current pgd. Overwriting the current pgd is bad when CONFIG_CRASH_DUMP is used
-to start a secondary kernel that dumps the memory of the previous kernel.
-
-The code introduces a new set of page tables. These tables are used to provide
-an executable identity mapping without overwriting the current pgd.
-
-Signed-off-by: Magnus Damm <magnus@valinux.co.jp>
-Signed-off-by: Andi Kleen <ak@suse.de>
----
-
---- a/arch/i386/kernel/machine_kexec.c
-+++ b/arch/i386/kernel/machine_kexec.c
-@@ -21,70 +21,13 @@
- #include <asm/system.h>
-
- #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
--
--#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
--#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
--#define L2_ATTR (_PAGE_PRESENT)
--
--#define LEVEL0_SIZE (1UL << 12UL)
--
--#ifndef CONFIG_X86_PAE
--#define LEVEL1_SIZE (1UL << 22UL)
--static u32 pgtable_level1[1024] PAGE_ALIGNED;
--
--static void identity_map_page(unsigned long address)
--{
-- unsigned long level1_index, level2_index;
-- u32 *pgtable_level2;
--
-- /* Find the current page table */
-- pgtable_level2 = __va(read_cr3());
--
-- /* Find the indexes of the physical address to identity map */
-- level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
-- level2_index = address / LEVEL1_SIZE;
--
-- /* Identity map the page table entry */
-- pgtable_level1[level1_index] = address | L0_ATTR;
-- pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
--
-- /* Flush the tlb so the new mapping takes effect.
-- * Global tlb entries are not flushed but that is not an issue.
-- */
-- load_cr3(pgtable_level2);
--}
--
--#else
--#define LEVEL1_SIZE (1UL << 21UL)
--#define LEVEL2_SIZE (1UL << 30UL)
--static u64 pgtable_level1[512] PAGE_ALIGNED;
--static u64 pgtable_level2[512] PAGE_ALIGNED;
--
--static void identity_map_page(unsigned long address)
--{
-- unsigned long level1_index, level2_index, level3_index;
-- u64 *pgtable_level3;
--
-- /* Find the current page table */
-- pgtable_level3 = __va(read_cr3());
--
-- /* Find the indexes of the physical address to identity map */
-- level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
-- level2_index = (address % LEVEL2_SIZE)/LEVEL1_SIZE;
-- level3_index = address / LEVEL2_SIZE;
--
-- /* Identity map the page table entry */
-- pgtable_level1[level1_index] = address | L0_ATTR;
-- pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
-- set_64bit(&pgtable_level3[level3_index],
-- __pa(pgtable_level2) | L2_ATTR);
--
-- /* Flush the tlb so the new mapping takes effect.
-- * Global tlb entries are not flushed but that is not an issue.
-- */
-- load_cr3(pgtable_level3);
--}
-+static u32 kexec_pgd[1024] PAGE_ALIGNED;
-+#ifdef CONFIG_X86_PAE
-+static u32 kexec_pmd0[1024] PAGE_ALIGNED;
-+static u32 kexec_pmd1[1024] PAGE_ALIGNED;
- #endif
-+static u32 kexec_pte0[1024] PAGE_ALIGNED;
-+static u32 kexec_pte1[1024] PAGE_ALIGNED;
-
- static void set_idt(void *newidt, __u16 limit)
- {
-@@ -128,16 +71,6 @@ static void load_segments(void)
- #undef __STR
- }
-
--typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)(
-- unsigned long indirection_page,
-- unsigned long reboot_code_buffer,
-- unsigned long start_address,
-- unsigned int has_pae) ATTRIB_NORET;
--
--extern const unsigned char relocate_new_kernel[];
--extern void relocate_new_kernel_end(void);
--extern const unsigned int relocate_new_kernel_size;
--
- /*
- * A architecture hook called to validate the
- * proposed image and prepare the control pages
-@@ -170,25 +103,29 @@ void machine_kexec_cleanup(struct kimage
- */
- NORET_TYPE void machine_kexec(struct kimage *image)
- {
-- unsigned long page_list;
-- unsigned long reboot_code_buffer;
--
-- relocate_new_kernel_t rnk;
-+ unsigned long page_list[PAGES_NR];
-+ void *control_page;
-
- /* Interrupts aren't acceptable while we reboot */
- local_irq_disable();
-
-- /* Compute some offsets */
-- reboot_code_buffer = page_to_pfn(image->control_code_page)
-- << PAGE_SHIFT;
-- page_list = image->head;
--
-- /* Set up an identity mapping for the reboot_code_buffer */
-- identity_map_page(reboot_code_buffer);
--
-- /* copy it out */
-- memcpy((void *)reboot_code_buffer, relocate_new_kernel,
-- relocate_new_kernel_size);
-+ control_page = page_address(image->control_code_page);
-+ memcpy(control_page, relocate_kernel, PAGE_SIZE);
-+
-+ page_list[PA_CONTROL_PAGE] = __pa(control_page);
-+ page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
-+ page_list[PA_PGD] = __pa(kexec_pgd);
-+ page_list[VA_PGD] = (unsigned long)kexec_pgd;
-+#ifdef CONFIG_X86_PAE
-+ page_list[PA_PMD_0] = __pa(kexec_pmd0);
-+ page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
-+ page_list[PA_PMD_1] = __pa(kexec_pmd1);
-+ page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
-+#endif
-+ page_list[PA_PTE_0] = __pa(kexec_pte0);
-+ page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
-+ page_list[PA_PTE_1] = __pa(kexec_pte1);
-+ page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
-
- /* The segment registers are funny things, they have both a
- * visible and an invisible part. Whenever the visible part is
-@@ -207,8 +144,8 @@ NORET_TYPE void machine_kexec(struct kim
- set_idt(phys_to_virt(0),0);
-
- /* now call it */
-- rnk = (relocate_new_kernel_t) reboot_code_buffer;
-- (*rnk)(page_list, reboot_code_buffer, image->start, cpu_has_pae);
-+ relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
-+ image->start, cpu_has_pae);
- }
-
- /* crashkernel=size@addr specifies the location to reserve for
---- a/arch/i386/kernel/relocate_kernel.S
-+++ b/arch/i386/kernel/relocate_kernel.S
-@@ -7,16 +7,138 @@
- */
-
- #include <linux/linkage.h>
-+#include <asm/page.h>
-+#include <asm/kexec.h>
-+
-+/*
-+ * Must be relocatable PIC code callable as a C function
-+ */
-+
-+#define PTR(x) (x << 2)
-+#define PAGE_ALIGNED (1 << PAGE_SHIFT)
-+#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */
-+#define PAE_PGD_ATTR 0x01 /* _PAGE_PRESENT */
-+
-+ .text
-+ .align PAGE_ALIGNED
-+ .globl relocate_kernel
-+relocate_kernel:
-+ movl 8(%esp), %ebp /* list of pages */
-+
-+#ifdef CONFIG_X86_PAE
-+ /* map the control page at its virtual address */
-+
-+ movl PTR(VA_PGD)(%ebp), %edi
-+ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
-+ andl $0xc0000000, %eax
-+ shrl $27, %eax
-+ addl %edi, %eax
-+
-+ movl PTR(PA_PMD_0)(%ebp), %edx
-+ orl $PAE_PGD_ATTR, %edx
-+ movl %edx, (%eax)
-+
-+ movl PTR(VA_PMD_0)(%ebp), %edi
-+ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
-+ andl $0x3fe00000, %eax
-+ shrl $18, %eax
-+ addl %edi, %eax
-+
-+ movl PTR(PA_PTE_0)(%ebp), %edx
-+ orl $PAGE_ATTR, %edx
-+ movl %edx, (%eax)
-+
-+ movl PTR(VA_PTE_0)(%ebp), %edi
-+ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
-+ andl $0x001ff000, %eax
-+ shrl $9, %eax
-+ addl %edi, %eax
-+
-+ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
-+ orl $PAGE_ATTR, %edx
-+ movl %edx, (%eax)
-+
-+ /* identity map the control page at its physical address */
-+
-+ movl PTR(VA_PGD)(%ebp), %edi
-+ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
-+ andl $0xc0000000, %eax
-+ shrl $27, %eax
-+ addl %edi, %eax
-+
-+ movl PTR(PA_PMD_1)(%ebp), %edx
-+ orl $PAE_PGD_ATTR, %edx
-+ movl %edx, (%eax)
-+
-+ movl PTR(VA_PMD_1)(%ebp), %edi
-+ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
-+ andl $0x3fe00000, %eax
-+ shrl $18, %eax
-+ addl %edi, %eax
-+
-+ movl PTR(PA_PTE_1)(%ebp), %edx
-+ orl $PAGE_ATTR, %edx
-+ movl %edx, (%eax)
-+
-+ movl PTR(VA_PTE_1)(%ebp), %edi
-+ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
-+ andl $0x001ff000, %eax
-+ shrl $9, %eax
-+ addl %edi, %eax
-+
-+ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
-+ orl $PAGE_ATTR, %edx
-+ movl %edx, (%eax)
-+#else
-+ /* map the control page at its virtual address */
-+
-+ movl PTR(VA_PGD)(%ebp), %edi
-+ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
-+ andl $0xffc00000, %eax
-+ shrl $20, %eax
-+ addl %edi, %eax
-+
-+ movl PTR(PA_PTE_0)(%ebp), %edx
-+ orl $PAGE_ATTR, %edx
-+ movl %edx, (%eax)
-+
-+ movl PTR(VA_PTE_0)(%ebp), %edi
-+ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
-+ andl $0x003ff000, %eax
-+ shrl $10, %eax
-+ addl %edi, %eax
-+
-+ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
-+ orl $PAGE_ATTR, %edx
-+ movl %edx, (%eax)
-+
-+ /* identity map the control page at its physical address */
-+
-+ movl PTR(VA_PGD)(%ebp), %edi
-+ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
-+ andl $0xffc00000, %eax
-+ shrl $20, %eax
-+ addl %edi, %eax
-+
-+ movl PTR(PA_PTE_1)(%ebp), %edx
-+ orl $PAGE_ATTR, %edx
-+ movl %edx, (%eax)
-+
-+ movl PTR(VA_PTE_1)(%ebp), %edi
-+ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
-+ andl $0x003ff000, %eax
-+ shrl $10, %eax
-+ addl %edi, %eax
-+
-+ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
-+ orl $PAGE_ATTR, %edx
-+ movl %edx, (%eax)
-+#endif
-
-- /*
-- * Must be relocatable PIC code callable as a C function, that once
-- * it starts can not use the previous processes stack.
-- */
-- .globl relocate_new_kernel
- relocate_new_kernel:
- /* read the arguments and say goodbye to the stack */
- movl 4(%esp), %ebx /* page_list */
-- movl 8(%esp), %ebp /* reboot_code_buffer */
-+ movl 8(%esp), %ebp /* list of pages */
- movl 12(%esp), %edx /* start address */
- movl 16(%esp), %ecx /* cpu_has_pae */
-
-@@ -24,11 +146,26 @@ relocate_new_kernel:
- pushl $0
- popfl
-
-- /* set a new stack at the bottom of our page... */
-- lea 4096(%ebp), %esp
-+ /* get physical address of control page now */
-+ /* this is impossible after page table switch */
-+ movl PTR(PA_CONTROL_PAGE)(%ebp), %edi
-+
-+ /* switch to new set of page tables */
-+ movl PTR(PA_PGD)(%ebp), %eax
-+ movl %eax, %cr3
-+
-+ /* setup a new stack at the end of the physical control page */
-+ lea 4096(%edi), %esp
-
-- /* store the parameters back on the stack */
-- pushl %edx /* store the start address */
-+ /* jump to identity mapped page */
-+ movl %edi, %eax
-+ addl $(identity_mapped - relocate_kernel), %eax
-+ pushl %eax
-+ ret
-+
-+identity_mapped:
-+ /* store the start address on the stack */
-+ pushl %edx
-
- /* Set cr0 to a known state:
- * 31 0 == Paging disabled
-@@ -113,8 +250,3 @@ relocate_new_kernel:
- xorl %edi, %edi
- xorl %ebp, %ebp
- ret
--relocate_new_kernel_end:
--
-- .globl relocate_new_kernel_size
--relocate_new_kernel_size:
-- .long relocate_new_kernel_end - relocate_new_kernel
---- a/include/asm-i386/kexec.h
-+++ b/include/asm-i386/kexec.h
-@@ -1,6 +1,26 @@
- #ifndef _I386_KEXEC_H
- #define _I386_KEXEC_H
-
-+#define PA_CONTROL_PAGE 0
-+#define VA_CONTROL_PAGE 1
-+#define PA_PGD 2
-+#define VA_PGD 3
-+#define PA_PTE_0 4
-+#define VA_PTE_0 5
-+#define PA_PTE_1 6
-+#define VA_PTE_1 7
-+#ifdef CONFIG_X86_PAE
-+#define PA_PMD_0 8
-+#define VA_PMD_0 9
-+#define PA_PMD_1 10
-+#define VA_PMD_1 11
-+#define PAGES_NR 12
-+#else
-+#define PAGES_NR 8
-+#endif
-+
-+#ifndef __ASSEMBLY__
-+
- #include <asm/fixmap.h>
- #include <asm/ptrace.h>
- #include <asm/string.h>
-@@ -72,5 +92,12 @@ static inline void crash_setup_regs(stru
- newregs->eip = (unsigned long)current_text_addr();
- }
- }
-+asmlinkage NORET_TYPE void
-+relocate_kernel(unsigned long indirection_page,
-+ unsigned long control_page,
-+ unsigned long start_address,
-+ unsigned int has_pae) ATTRIB_NORET;
-+
-+#endif /* __ASSEMBLY__ */
-
- #endif /* _I386_KEXEC_H */
+++ /dev/null
-From: Magnus Damm <magnus@valinux.co.jp>
-Date: Tue, 26 Sep 2006 08:52:38 +0000 (+0200)
-Subject: [PATCH] Avoid overwriting the current pgd (V4, x86_64)
-X-Git-Tag: v2.6.19-rc1
-X-Git-Url: http://www.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f
-
-[PATCH] Avoid overwriting the current pgd (V4, x86_64)
-
-kexec: Avoid overwriting the current pgd (V4, x86_64)
-
-This patch upgrades the x86_64-specific kexec code to avoid overwriting the
-current pgd. Overwriting the current pgd is bad when CONFIG_CRASH_DUMP is used
-to start a secondary kernel that dumps the memory of the previous kernel.
-
-The code introduces a new set of page tables. These tables are used to provide
-an executable identity mapping without overwriting the current pgd.
-
-Signed-off-by: Magnus Damm <magnus@valinux.co.jp>
-Signed-off-by: Andi Kleen <ak@suse.de>
----
-
---- a/arch/x86_64/kernel/machine_kexec.c
-+++ b/arch/x86_64/kernel/machine_kexec.c
-@@ -15,6 +15,15 @@
- #include <asm/mmu_context.h>
- #include <asm/io.h>
-
-+#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
-+static u64 kexec_pgd[512] PAGE_ALIGNED;
-+static u64 kexec_pud0[512] PAGE_ALIGNED;
-+static u64 kexec_pmd0[512] PAGE_ALIGNED;
-+static u64 kexec_pte0[512] PAGE_ALIGNED;
-+static u64 kexec_pud1[512] PAGE_ALIGNED;
-+static u64 kexec_pmd1[512] PAGE_ALIGNED;
-+static u64 kexec_pte1[512] PAGE_ALIGNED;
-+
- static void init_level2_page(pmd_t *level2p, unsigned long addr)
- {
- unsigned long end_addr;
-@@ -144,32 +153,19 @@ static void load_segments(void)
- );
- }
-
--typedef NORET_TYPE void (*relocate_new_kernel_t)(unsigned long indirection_page,
-- unsigned long control_code_buffer,
-- unsigned long start_address,
-- unsigned long pgtable) ATTRIB_NORET;
--
--extern const unsigned char relocate_new_kernel[];
--extern const unsigned long relocate_new_kernel_size;
--
- int machine_kexec_prepare(struct kimage *image)
- {
-- unsigned long start_pgtable, control_code_buffer;
-+ unsigned long start_pgtable;
- int result;
-
- /* Calculate the offsets */
- start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
-- control_code_buffer = start_pgtable + PAGE_SIZE;
-
- /* Setup the identity mapped 64bit page table */
- result = init_pgtable(image, start_pgtable);
- if (result)
- return result;
-
-- /* Place the code in the reboot code buffer */
-- memcpy(__va(control_code_buffer), relocate_new_kernel,
-- relocate_new_kernel_size);
--
- return 0;
- }
-
-@@ -184,28 +180,34 @@ void machine_kexec_cleanup(struct kimage
- */
- NORET_TYPE void machine_kexec(struct kimage *image)
- {
-- unsigned long page_list;
-- unsigned long control_code_buffer;
-- unsigned long start_pgtable;
-- relocate_new_kernel_t rnk;
-+ unsigned long page_list[PAGES_NR];
-+ void *control_page;
-
- /* Interrupts aren't acceptable while we reboot */
- local_irq_disable();
-
-- /* Calculate the offsets */
-- page_list = image->head;
-- start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
-- control_code_buffer = start_pgtable + PAGE_SIZE;
-+ control_page = page_address(image->control_code_page) + PAGE_SIZE;
-+ memcpy(control_page, relocate_kernel, PAGE_SIZE);
-
-- /* Set the low half of the page table to my identity mapped
-- * page table for kexec. Leave the high half pointing at the
-- * kernel pages. Don't bother to flush the global pages
-- * as that will happen when I fully switch to my identity mapped
-- * page table anyway.
-- */
-- memcpy(__va(read_cr3()), __va(start_pgtable), PAGE_SIZE/2);
-- __flush_tlb();
-+ page_list[PA_CONTROL_PAGE] = __pa(control_page);
-+ page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
-+ page_list[PA_PGD] = __pa(kexec_pgd);
-+ page_list[VA_PGD] = (unsigned long)kexec_pgd;
-+ page_list[PA_PUD_0] = __pa(kexec_pud0);
-+ page_list[VA_PUD_0] = (unsigned long)kexec_pud0;
-+ page_list[PA_PMD_0] = __pa(kexec_pmd0);
-+ page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
-+ page_list[PA_PTE_0] = __pa(kexec_pte0);
-+ page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
-+ page_list[PA_PUD_1] = __pa(kexec_pud1);
-+ page_list[VA_PUD_1] = (unsigned long)kexec_pud1;
-+ page_list[PA_PMD_1] = __pa(kexec_pmd1);
-+ page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
-+ page_list[PA_PTE_1] = __pa(kexec_pte1);
-+ page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
-
-+ page_list[PA_TABLE_PAGE] =
-+ (unsigned long)__pa(page_address(image->control_code_page));
-
- /* The segment registers are funny things, they have both a
- * visible and an invisible part. Whenever the visible part is
-@@ -222,9 +224,10 @@ NORET_TYPE void machine_kexec(struct kim
- */
- set_gdt(phys_to_virt(0),0);
- set_idt(phys_to_virt(0),0);
-+
- /* now call it */
-- rnk = (relocate_new_kernel_t) control_code_buffer;
-- (*rnk)(page_list, control_code_buffer, image->start, start_pgtable);
-+ relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
-+ image->start);
- }
-
- /* crashkernel=size@addr specifies the location to reserve for
---- a/arch/x86_64/kernel/relocate_kernel.S
-+++ b/arch/x86_64/kernel/relocate_kernel.S
-@@ -7,31 +7,169 @@
- */
-
- #include <linux/linkage.h>
-+#include <asm/page.h>
-+#include <asm/kexec.h>
-
-- /*
-- * Must be relocatable PIC code callable as a C function, that once
-- * it starts can not use the previous processes stack.
-- */
-- .globl relocate_new_kernel
-+/*
-+ * Must be relocatable PIC code callable as a C function
-+ */
-+
-+#define PTR(x) (x << 3)
-+#define PAGE_ALIGNED (1 << PAGE_SHIFT)
-+#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */
-+
-+ .text
-+ .align PAGE_ALIGNED
- .code64
-+ .globl relocate_kernel
-+relocate_kernel:
-+ /* %rdi indirection_page
-+ * %rsi page_list
-+ * %rdx start address
-+ */
-+
-+ /* map the control page at its virtual address */
-+
-+ movq $0x0000ff8000000000, %r10 /* mask */
-+ mov $(39 - 3), %cl /* bits to shift */
-+ movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 /* address to map */
-+
-+ movq %r11, %r9
-+ andq %r10, %r9
-+ shrq %cl, %r9
-+
-+ movq PTR(VA_PGD)(%rsi), %r8
-+ addq %r8, %r9
-+ movq PTR(PA_PUD_0)(%rsi), %r8
-+ orq $PAGE_ATTR, %r8
-+ movq %r8, (%r9)
-+
-+ shrq $9, %r10
-+ sub $9, %cl
-+
-+ movq %r11, %r9
-+ andq %r10, %r9
-+ shrq %cl, %r9
-+
-+ movq PTR(VA_PUD_0)(%rsi), %r8
-+ addq %r8, %r9
-+ movq PTR(PA_PMD_0)(%rsi), %r8
-+ orq $PAGE_ATTR, %r8
-+ movq %r8, (%r9)
-+
-+ shrq $9, %r10
-+ sub $9, %cl
-+
-+ movq %r11, %r9
-+ andq %r10, %r9
-+ shrq %cl, %r9
-+
-+ movq PTR(VA_PMD_0)(%rsi), %r8
-+ addq %r8, %r9
-+ movq PTR(PA_PTE_0)(%rsi), %r8
-+ orq $PAGE_ATTR, %r8
-+ movq %r8, (%r9)
-+
-+ shrq $9, %r10
-+ sub $9, %cl
-+
-+ movq %r11, %r9
-+ andq %r10, %r9
-+ shrq %cl, %r9
-+
-+ movq PTR(VA_PTE_0)(%rsi), %r8
-+ addq %r8, %r9
-+ movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
-+ orq $PAGE_ATTR, %r8
-+ movq %r8, (%r9)
-+
-+ /* identity map the control page at its physical address */
-+
-+ movq $0x0000ff8000000000, %r10 /* mask */
-+ mov $(39 - 3), %cl /* bits to shift */
-+ movq PTR(PA_CONTROL_PAGE)(%rsi), %r11 /* address to map */
-+
-+ movq %r11, %r9
-+ andq %r10, %r9
-+ shrq %cl, %r9
-+
-+ movq PTR(VA_PGD)(%rsi), %r8
-+ addq %r8, %r9
-+ movq PTR(PA_PUD_1)(%rsi), %r8
-+ orq $PAGE_ATTR, %r8
-+ movq %r8, (%r9)
-+
-+ shrq $9, %r10
-+ sub $9, %cl
-+
-+ movq %r11, %r9
-+ andq %r10, %r9
-+ shrq %cl, %r9
-+
-+ movq PTR(VA_PUD_1)(%rsi), %r8
-+ addq %r8, %r9
-+ movq PTR(PA_PMD_1)(%rsi), %r8
-+ orq $PAGE_ATTR, %r8
-+ movq %r8, (%r9)
-+
-+ shrq $9, %r10
-+ sub $9, %cl
-+
-+ movq %r11, %r9
-+ andq %r10, %r9
-+ shrq %cl, %r9
-+
-+ movq PTR(VA_PMD_1)(%rsi), %r8
-+ addq %r8, %r9
-+ movq PTR(PA_PTE_1)(%rsi), %r8
-+ orq $PAGE_ATTR, %r8
-+ movq %r8, (%r9)
-+
-+ shrq $9, %r10
-+ sub $9, %cl
-+
-+ movq %r11, %r9
-+ andq %r10, %r9
-+ shrq %cl, %r9
-+
-+ movq PTR(VA_PTE_1)(%rsi), %r8
-+ addq %r8, %r9
-+ movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
-+ orq $PAGE_ATTR, %r8
-+ movq %r8, (%r9)
-+
- relocate_new_kernel:
-- /* %rdi page_list
-- * %rsi reboot_code_buffer
-+ /* %rdi indirection_page
-+ * %rsi page_list
- * %rdx start address
-- * %rcx page_table
-- * %r8 arg5
-- * %r9 arg6
- */
-
- /* zero out flags, and disable interrupts */
- pushq $0
- popfq
-
-- /* set a new stack at the bottom of our page... */
-- lea 4096(%rsi), %rsp
-+ /* get physical address of control page now */
-+ /* this is impossible after page table switch */
-+ movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
-+
-+ /* get physical address of page table now too */
-+ movq PTR(PA_TABLE_PAGE)(%rsi), %rcx
-+
-+ /* switch to new set of page tables */
-+ movq PTR(PA_PGD)(%rsi), %r9
-+ movq %r9, %cr3
-+
-+ /* setup a new stack at the end of the physical control page */
-+ lea 4096(%r8), %rsp
-+
-+ /* jump to identity mapped page */
-+ addq $(identity_mapped - relocate_kernel), %r8
-+ pushq %r8
-+ ret
-
-- /* store the parameters back on the stack */
-- pushq %rdx /* store the start address */
-+identity_mapped:
-+ /* store the start address on the stack */
-+ pushq %rdx
-
- /* Set cr0 to a known state:
- * 31 1 == Paging enabled
-@@ -136,8 +274,3 @@ relocate_new_kernel:
- xorq %r15, %r15
-
- ret
--relocate_new_kernel_end:
--
-- .globl relocate_new_kernel_size
--relocate_new_kernel_size:
-- .quad relocate_new_kernel_end - relocate_new_kernel
---- a/include/asm-x86_64/kexec.h
-+++ b/include/asm-x86_64/kexec.h
-@@ -1,6 +1,27 @@
- #ifndef _X86_64_KEXEC_H
- #define _X86_64_KEXEC_H
-
-+#define PA_CONTROL_PAGE 0
-+#define VA_CONTROL_PAGE 1
-+#define PA_PGD 2
-+#define VA_PGD 3
-+#define PA_PUD_0 4
-+#define VA_PUD_0 5
-+#define PA_PMD_0 6
-+#define VA_PMD_0 7
-+#define PA_PTE_0 8
-+#define VA_PTE_0 9
-+#define PA_PUD_1 10
-+#define VA_PUD_1 11
-+#define PA_PMD_1 12
-+#define VA_PMD_1 13
-+#define PA_PTE_1 14
-+#define VA_PTE_1 15
-+#define PA_TABLE_PAGE 16
-+#define PAGES_NR 17
-+
-+#ifndef __ASSEMBLY__
-+
- #include <linux/string.h>
-
- #include <asm/page.h>
-@@ -64,4 +85,12 @@ static inline void crash_setup_regs(stru
- newregs->rip = (unsigned long)current_text_addr();
- }
- }
-+
-+NORET_TYPE void
-+relocate_kernel(unsigned long indirection_page,
-+ unsigned long page_list,
-+ unsigned long start_address) ATTRIB_NORET;
-+
-+#endif /* __ASSEMBLY__ */
-+
- #endif /* _X86_64_KEXEC_H */
+++ /dev/null
-commit dbaab49f92ff6ae6255762a948375e4036cbdbd2
-Author: Vivek Goyal <vgoyal@in.ibm.com>
-Date: Sat Oct 21 18:37:03 2006 +0200
-
- [PATCH] x86-64: Overlapping program headers in physical addr space fix
-
- o A recent change to vmlinux.ld.S file broke kexec as now resulting vmlinux
- program headers are overlapping in physical address space.
-
- o Now all the vsyscall related sections are placed after data and after
- that mostly init data sections are placed. To avoid physical overlap
- among phdrs, there are three possible solutions.
- - Place vsyscall sections also in data phdrs instead of user
- - move vsyscal sections after init data in bss.
- - create another phdrs say data.init and move all the sections
- after vsyscall into this new phdr.
-
- o This patch implements the third solution.
-
- Signed-off-by: Vivek Goyal <vgoyal@in.ibm.com>
- Signed-off-by: Andi Kleen <ak@suse.de>
- Cc: Magnus Damm <magnus@valinux.co.jp>
- Cc: Andi Kleen <ak@suse.de>
- Cc: "Eric W. Biederman" <ebiederm@xmission.com>
- Signed-off-by: Andrew Morton <akpm@osdl.org>
-
-diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
-index b9df2ab..1283614 100644
---- a/arch/x86_64/kernel/vmlinux.lds.S
-+++ b/arch/x86_64/kernel/vmlinux.lds.S
-@@ -17,6 +17,7 @@ PHDRS {
- text PT_LOAD FLAGS(5); /* R_E */
- data PT_LOAD FLAGS(7); /* RWE */
- user PT_LOAD FLAGS(7); /* RWE */
-+ data.init PT_LOAD FLAGS(7); /* RWE */
- note PT_NOTE FLAGS(4); /* R__ */
- }
- SECTIONS
-@@ -131,7 +132,7 @@ SECTIONS
- . = ALIGN(8192); /* init_task */
- .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
- *(.data.init_task)
-- } :data
-+ }:data.init
-
- . = ALIGN(4096);
- .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
+++ /dev/null
-diff -pruN ../orig-linux-2.6.16.29/arch/i386/kernel/traps.c ./arch/i386/kernel/traps.c
---- ../orig-linux-2.6.16.29/arch/i386/kernel/traps.c 2006-09-12 19:02:10.000000000 +0100
-+++ ./arch/i386/kernel/traps.c 2006-09-19 13:59:06.000000000 +0100
-@@ -567,18 +567,11 @@ static void mem_parity_error(unsigned ch
-
- static void io_check_error(unsigned char reason, struct pt_regs * regs)
- {
-- unsigned long i;
--
- printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
- show_registers(regs);
-
- /* Re-enable the IOCK line, wait for a few seconds */
-- reason = (reason & 0xf) | 8;
-- outb(reason, 0x61);
-- i = 2000;
-- while (--i) udelay(1000);
-- reason &= ~8;
-- outb(reason, 0x61);
-+ clear_io_check_error(reason);
- }
-
- static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
-diff -pruN ../orig-linux-2.6.16.29/include/asm-i386/mach-default/mach_traps.h ./include/asm-i386/mach-default/mach_traps.h
---- ../orig-linux-2.6.16.29/include/asm-i386/mach-default/mach_traps.h 2006-09-12 19:02:10.000000000 +0100
-+++ ./include/asm-i386/mach-default/mach_traps.h 2006-09-19 13:59:06.000000000 +0100
-@@ -15,6 +15,18 @@ static inline void clear_mem_error(unsig
- outb(reason, 0x61);
- }
-
-+static inline void clear_io_check_error(unsigned char reason)
-+{
-+ unsigned long i;
-+
-+ reason = (reason & 0xf) | 8;
-+ outb(reason, 0x61);
-+ i = 2000;
-+ while (--i) udelay(1000);
-+ reason &= ~8;
-+ outb(reason, 0x61);
-+}
-+
- static inline unsigned char get_nmi_reason(void)
- {
- return inb(0x61);
+++ /dev/null
-diff -pruN ../orig-linux-2.6.16.29/net/ipv6/addrconf.c ./net/ipv6/addrconf.c
---- ../orig-linux-2.6.16.29/net/ipv6/addrconf.c 2006-09-12 19:02:10.000000000 +0100
-+++ ./net/ipv6/addrconf.c 2006-09-19 13:59:11.000000000 +0100
-@@ -2471,6 +2471,7 @@ static void addrconf_dad_start(struct in
- spin_lock_bh(&ifp->lock);
-
- if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
-+ !(dev->flags&IFF_MULTICAST) ||
- !(ifp->flags&IFA_F_TENTATIVE)) {
- ifp->flags &= ~IFA_F_TENTATIVE;
- spin_unlock_bh(&ifp->lock);
-@@ -2555,6 +2556,7 @@ static void addrconf_dad_completed(struc
- if (ifp->idev->cnf.forwarding == 0 &&
- ifp->idev->cnf.rtr_solicits > 0 &&
- (dev->flags&IFF_LOOPBACK) == 0 &&
-+ (dev->flags & IFF_MULTICAST) &&
- (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) {
- struct in6_addr all_routers;
-
+++ /dev/null
-commit e905914f96e11862b130dd229f73045dad9a34e8
-Author: Jeremy Fitzhardinge <jeremy@xensource.com>
-Date: Sun Jun 25 05:49:17 2006 -0700
-
- [PATCH] Implement kasprintf
-
- Implement kasprintf, a kernel version of asprintf. This allocates the
- memory required for the formatted string, including the trailing '\0'.
- Returns NULL on allocation failure.
-
- Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
- Signed-off-by: Chris Wright <chrisw@sous-sol.org>
- Signed-off-by: Andrew Morton <akpm@osdl.org>
- Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-
-diff --git a/include/linux/kernel.h b/include/linux/kernel.h
-index 8c21aaa..3c5e4c2 100644
---- a/include/linux/kernel.h
-+++ b/include/linux/kernel.h
-@@ -117,6 +117,8 @@ extern int scnprintf(char * buf, size_t
- __attribute__ ((format (printf, 3, 4)));
- extern int vscnprintf(char *buf, size_t size, const char *fmt, va_list args)
- __attribute__ ((format (printf, 3, 0)));
-+extern char *kasprintf(gfp_t gfp, const char *fmt, ...)
-+ __attribute__ ((format (printf, 2, 3)));
-
- extern int sscanf(const char *, const char *, ...)
- __attribute__ ((format (scanf, 2, 3)));
-diff --git a/lib/vsprintf.c b/lib/vsprintf.c
-index f595947..797428a 100644
---- a/lib/vsprintf.c
-+++ b/lib/vsprintf.c
-@@ -849,3 +849,26 @@ int sscanf(const char * buf, const char
- }
-
- EXPORT_SYMBOL(sscanf);
-+
-+
-+/* Simplified asprintf. */
-+char *kasprintf(gfp_t gfp, const char *fmt, ...)
-+{
-+ va_list ap;
-+ unsigned int len;
-+ char *p;
-+
-+ va_start(ap, fmt);
-+ len = vsnprintf(NULL, 0, fmt, ap);
-+ va_end(ap);
-+
-+ p = kmalloc(len+1, gfp);
-+ if (!p)
-+ return NULL;
-+ va_start(ap, fmt);
-+ vsnprintf(p, len+1, fmt, ap);
-+ va_end(ap);
-+ return p;
-+}
-+
-+EXPORT_SYMBOL(kasprintf);
+++ /dev/null
-kexec: Move asm segment handling code to the assembly file (i386)
-
-This patch moves the idt, gdt, and segment handling code from machine_kexec.c
-to relocate_kernel.S. The main reason behind this move is to avoid code
-duplication in the Xen hypervisor. With this patch all code required to kexec
-is put on the control page.
-
-On top of that this patch also counts as a cleanup - I think it is much
-nicer to write assembly directly in assembly files than wrap inline assembly
-in C functions for no apparent reason.
-
-Signed-off-by: Magnus Damm <magnus@valinux.co.jp>
----
-
- Applies to 2.6.19-rc1.
-
- machine_kexec.c | 59 -----------------------------------------------------
- relocate_kernel.S | 58 +++++++++++++++++++++++++++++++++++++++++++++++-----
- 2 files changed, 53 insertions(+), 64 deletions(-)
-
---- 0002/arch/i386/kernel/machine_kexec.c
-+++ work/arch/i386/kernel/machine_kexec.c 2006-10-05 15:49:08.000000000 +0900
-@@ -29,48 +29,6 @@ static u32 kexec_pmd1[1024] PAGE_ALIGNED
- static u32 kexec_pte0[1024] PAGE_ALIGNED;
- static u32 kexec_pte1[1024] PAGE_ALIGNED;
-
--static void set_idt(void *newidt, __u16 limit)
--{
-- struct Xgt_desc_struct curidt;
--
-- /* ia32 supports unaliged loads & stores */
-- curidt.size = limit;
-- curidt.address = (unsigned long)newidt;
--
-- load_idt(&curidt);
--};
--
--
--static void set_gdt(void *newgdt, __u16 limit)
--{
-- struct Xgt_desc_struct curgdt;
--
-- /* ia32 supports unaligned loads & stores */
-- curgdt.size = limit;
-- curgdt.address = (unsigned long)newgdt;
--
-- load_gdt(&curgdt);
--};
--
--static void load_segments(void)
--{
--#define __STR(X) #X
--#define STR(X) __STR(X)
--
-- __asm__ __volatile__ (
-- "\tljmp $"STR(__KERNEL_CS)",$1f\n"
-- "\t1:\n"
-- "\tmovl $"STR(__KERNEL_DS)",%%eax\n"
-- "\tmovl %%eax,%%ds\n"
-- "\tmovl %%eax,%%es\n"
-- "\tmovl %%eax,%%fs\n"
-- "\tmovl %%eax,%%gs\n"
-- "\tmovl %%eax,%%ss\n"
-- ::: "eax", "memory");
--#undef STR
--#undef __STR
--}
--
- /*
- * A architecture hook called to validate the
- * proposed image and prepare the control pages
-@@ -127,23 +85,6 @@ NORET_TYPE void machine_kexec(struct kim
- page_list[PA_PTE_1] = __pa(kexec_pte1);
- page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
-
-- /* The segment registers are funny things, they have both a
-- * visible and an invisible part. Whenever the visible part is
-- * set to a specific selector, the invisible part is loaded
-- * with from a table in memory. At no other time is the
-- * descriptor table in memory accessed.
-- *
-- * I take advantage of this here by force loading the
-- * segments, before I zap the gdt with an invalid value.
-- */
-- load_segments();
-- /* The gdt & idt are now invalid.
-- * If you want to load them you must set up your own idt & gdt.
-- */
-- set_gdt(phys_to_virt(0),0);
-- set_idt(phys_to_virt(0),0);
--
-- /* now call it */
- relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
- image->start, cpu_has_pae);
- }
---- 0002/arch/i386/kernel/relocate_kernel.S
-+++ work/arch/i386/kernel/relocate_kernel.S 2006-10-05 16:03:21.000000000 +0900
-@@ -154,14 +154,45 @@ relocate_new_kernel:
- movl PTR(PA_PGD)(%ebp), %eax
- movl %eax, %cr3
-
-+ /* setup idt */
-+ movl %edi, %eax
-+ addl $(idt_48 - relocate_kernel), %eax
-+ lidtl (%eax)
-+
-+ /* setup gdt */
-+ movl %edi, %eax
-+ addl $(gdt - relocate_kernel), %eax
-+ movl %edi, %esi
-+ addl $((gdt_48 - relocate_kernel) + 2), %esi
-+ movl %eax, (%esi)
-+
-+ movl %edi, %eax
-+ addl $(gdt_48 - relocate_kernel), %eax
-+ lgdtl (%eax)
-+
-+ /* setup data segment registers */
-+ mov $(gdt_ds - gdt), %eax
-+ mov %eax, %ds
-+ mov %eax, %es
-+ mov %eax, %fs
-+ mov %eax, %gs
-+ mov %eax, %ss
-+
- /* setup a new stack at the end of the physical control page */
- lea 4096(%edi), %esp
-
-- /* jump to identity mapped page */
-- movl %edi, %eax
-- addl $(identity_mapped - relocate_kernel), %eax
-- pushl %eax
-- ret
-+ /* load new code segment and jump to identity mapped page */
-+ movl %edi, %esi
-+ xorl %eax, %eax
-+ pushl %eax
-+ pushl %esi
-+ pushl %eax
-+ movl $(gdt_cs - gdt), %eax
-+ pushl %eax
-+ movl %edi, %eax
-+ addl $(identity_mapped - relocate_kernel),%eax
-+ pushl %eax
-+ iretl
-
- identity_mapped:
- /* store the start address on the stack */
-@@ -250,3 +281,20 @@ identity_mapped:
- xorl %edi, %edi
- xorl %ebp, %ebp
- ret
-+
-+ .align 16
-+gdt:
-+ .quad 0x0000000000000000 /* NULL descriptor */
-+gdt_cs:
-+ .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */
-+gdt_ds:
-+ .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */
-+gdt_end:
-+
-+gdt_48:
-+ .word gdt_end - gdt - 1 /* limit */
-+ .long 0 /* base - filled in by code above */
-+
-+idt_48:
-+ .word 0 /* limit */
-+ .long 0 /* base */
+++ /dev/null
-kexec: Move asm segment handling code to the assembly file (x86_64)
-
-This patch moves the idt, gdt, and segment handling code from machine_kexec.c
-to relocate_kernel.S. The main reason behind this move is to avoid code
-duplication in the Xen hypervisor. With this patch all code required to kexec
-is put on the control page.
-
-On top of that this patch also counts as a cleanup - I think it is much
-nicer to write assembly directly in assembly files than wrap inline assembly
-in C functions for no apparent reason.
-
-Signed-off-by: Magnus Damm <magnus@valinux.co.jp>
----
-
- Applies to 2.6.19-rc1.
-
- machine_kexec.c | 58 -----------------------------------------------------
- relocate_kernel.S | 50 +++++++++++++++++++++++++++++++++++++++++----
- 2 files changed, 45 insertions(+), 63 deletions(-)
-
---- 0002/arch/x86_64/kernel/machine_kexec.c
-+++ work/arch/x86_64/kernel/machine_kexec.c 2006-10-05 16:15:49.000000000 +0900
-@@ -112,47 +112,6 @@ static int init_pgtable(struct kimage *i
- return init_level4_page(image, level4p, 0, end_pfn << PAGE_SHIFT);
- }
-
--static void set_idt(void *newidt, u16 limit)
--{
-- struct desc_ptr curidt;
--
-- /* x86-64 supports unaliged loads & stores */
-- curidt.size = limit;
-- curidt.address = (unsigned long)newidt;
--
-- __asm__ __volatile__ (
-- "lidtq %0\n"
-- : : "m" (curidt)
-- );
--};
--
--
--static void set_gdt(void *newgdt, u16 limit)
--{
-- struct desc_ptr curgdt;
--
-- /* x86-64 supports unaligned loads & stores */
-- curgdt.size = limit;
-- curgdt.address = (unsigned long)newgdt;
--
-- __asm__ __volatile__ (
-- "lgdtq %0\n"
-- : : "m" (curgdt)
-- );
--};
--
--static void load_segments(void)
--{
-- __asm__ __volatile__ (
-- "\tmovl %0,%%ds\n"
-- "\tmovl %0,%%es\n"
-- "\tmovl %0,%%ss\n"
-- "\tmovl %0,%%fs\n"
-- "\tmovl %0,%%gs\n"
-- : : "a" (__KERNEL_DS) : "memory"
-- );
--}
--
- int machine_kexec_prepare(struct kimage *image)
- {
- unsigned long start_pgtable;
-@@ -209,23 +168,6 @@ NORET_TYPE void machine_kexec(struct kim
- page_list[PA_TABLE_PAGE] =
- (unsigned long)__pa(page_address(image->control_code_page));
-
-- /* The segment registers are funny things, they have both a
-- * visible and an invisible part. Whenever the visible part is
-- * set to a specific selector, the invisible part is loaded
-- * with from a table in memory. At no other time is the
-- * descriptor table in memory accessed.
-- *
-- * I take advantage of this here by force loading the
-- * segments, before I zap the gdt with an invalid value.
-- */
-- load_segments();
-- /* The gdt & idt are now invalid.
-- * If you want to load them you must set up your own idt & gdt.
-- */
-- set_gdt(phys_to_virt(0),0);
-- set_idt(phys_to_virt(0),0);
--
-- /* now call it */
- relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
- image->start);
- }
---- 0002/arch/x86_64/kernel/relocate_kernel.S
-+++ work/arch/x86_64/kernel/relocate_kernel.S 2006-10-05 16:18:07.000000000 +0900
-@@ -159,13 +159,39 @@ relocate_new_kernel:
- movq PTR(PA_PGD)(%rsi), %r9
- movq %r9, %cr3
-
-+ /* setup idt */
-+ movq %r8, %rax
-+ addq $(idt_80 - relocate_kernel), %rax
-+ lidtq (%rax)
-+
-+ /* setup gdt */
-+ movq %r8, %rax
-+ addq $(gdt - relocate_kernel), %rax
-+ movq %r8, %r9
-+ addq $((gdt_80 - relocate_kernel) + 2), %r9
-+ movq %rax, (%r9)
-+
-+ movq %r8, %rax
-+ addq $(gdt_80 - relocate_kernel), %rax
-+ lgdtq (%rax)
-+
-+ /* setup data segment registers */
-+ xorl %eax, %eax
-+ movl %eax, %ds
-+ movl %eax, %es
-+ movl %eax, %fs
-+ movl %eax, %gs
-+ movl %eax, %ss
-+
- /* setup a new stack at the end of the physical control page */
- lea 4096(%r8), %rsp
-
-- /* jump to identity mapped page */
-- addq $(identity_mapped - relocate_kernel), %r8
-- pushq %r8
-- ret
-+ /* load new code segment and jump to identity mapped page */
-+ movq %r8, %rax
-+ addq $(identity_mapped - relocate_kernel), %rax
-+ pushq $(gdt_cs - gdt)
-+ pushq %rax
-+ lretq
-
- identity_mapped:
- /* store the start address on the stack */
-@@ -272,5 +298,19 @@ identity_mapped:
- xorq %r13, %r13
- xorq %r14, %r14
- xorq %r15, %r15
--
- ret
-+
-+ .align 16
-+gdt:
-+ .quad 0x0000000000000000 /* NULL descriptor */
-+gdt_cs:
-+ .quad 0x00af9a000000ffff
-+gdt_end:
-+
-+gdt_80:
-+ .word gdt_end - gdt - 1 /* limit */
-+ .quad 0 /* base - filled in by code above */
-+
-+idt_80:
-+ .word 0 /* limit */
-+ .quad 0 /* base */
+++ /dev/null
-diff -pruN ../orig-linux-2.6.16.29/net/ipv4/netfilter/ip_nat_proto_tcp.c ./net/ipv4/netfilter/ip_nat_proto_tcp.c
---- ../orig-linux-2.6.16.29/net/ipv4/netfilter/ip_nat_proto_tcp.c 2006-09-12 19:02:10.000000000 +0100
-+++ ./net/ipv4/netfilter/ip_nat_proto_tcp.c 2006-09-19 13:59:15.000000000 +0100
-@@ -129,7 +129,12 @@ tcp_manip_pkt(struct sk_buff **pskb,
- if (hdrsize < sizeof(*hdr))
- return 1;
-
-- hdr->check = ip_nat_cheat_check(~oldip, newip,
-+#ifdef CONFIG_XEN
-+ if ((*pskb)->proto_csum_blank)
-+ hdr->check = ip_nat_cheat_check(oldip, ~newip, hdr->check);
-+ else
-+#endif
-+ hdr->check = ip_nat_cheat_check(~oldip, newip,
- ip_nat_cheat_check(oldport ^ 0xFFFF,
- newport,
- hdr->check));
-diff -pruN ../orig-linux-2.6.16.29/net/ipv4/netfilter/ip_nat_proto_udp.c ./net/ipv4/netfilter/ip_nat_proto_udp.c
---- ../orig-linux-2.6.16.29/net/ipv4/netfilter/ip_nat_proto_udp.c 2006-09-12 19:02:10.000000000 +0100
-+++ ./net/ipv4/netfilter/ip_nat_proto_udp.c 2006-09-19 13:59:15.000000000 +0100
-@@ -113,11 +113,17 @@ udp_manip_pkt(struct sk_buff **pskb,
- newport = tuple->dst.u.udp.port;
- portptr = &hdr->dest;
- }
-- if (hdr->check) /* 0 is a special case meaning no checksum */
-- hdr->check = ip_nat_cheat_check(~oldip, newip,
-+ if (hdr->check) { /* 0 is a special case meaning no checksum */
-+#ifdef CONFIG_XEN
-+ if ((*pskb)->proto_csum_blank)
-+ hdr->check = ip_nat_cheat_check(oldip, ~newip, hdr->check);
-+ else
-+#endif
-+ hdr->check = ip_nat_cheat_check(~oldip, newip,
- ip_nat_cheat_check(*portptr ^ 0xFFFF,
- newport,
- hdr->check));
-+ }
- *portptr = newport;
- return 1;
- }
-diff -pruN ../orig-linux-2.6.16.29/net/ipv4/xfrm4_output.c ./net/ipv4/xfrm4_output.c
---- ../orig-linux-2.6.16.29/net/ipv4/xfrm4_output.c 2006-09-12 19:02:10.000000000 +0100
-+++ ./net/ipv4/xfrm4_output.c 2006-09-19 13:59:15.000000000 +0100
-@@ -17,6 +17,8 @@
- #include <net/xfrm.h>
- #include <net/icmp.h>
-
-+extern int skb_checksum_setup(struct sk_buff *skb);
-+
- /* Add encapsulation header.
- *
- * In transport mode, the IP header will be moved forward to make space
-@@ -103,6 +105,10 @@ static int xfrm4_output_one(struct sk_bu
- struct xfrm_state *x = dst->xfrm;
- int err;
-
-+ err = skb_checksum_setup(skb);
-+ if (err)
-+ goto error_nolock;
-+
- if (skb->ip_summed == CHECKSUM_HW) {
- err = skb_checksum_help(skb, 0);
- if (err)
+++ /dev/null
-Index: tmp-xxx/Documentation/networking/netdevices.txt
-===================================================================
---- tmp-xxx.orig/Documentation/networking/netdevices.txt 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/Documentation/networking/netdevices.txt 2006-11-27 10:52:42.000000000 +0000
-@@ -42,9 +42,9 @@
- Context: nominally process, but don't sleep inside an rwlock
-
- dev->hard_start_xmit:
-- Synchronization: dev->xmit_lock spinlock.
-+ Synchronization: netif_tx_lock spinlock.
- When the driver sets NETIF_F_LLTX in dev->features this will be
-- called without holding xmit_lock. In this case the driver
-+ called without holding netif_tx_lock. In this case the driver
- has to lock by itself when needed. It is recommended to use a try lock
- for this and return -1 when the spin lock fails.
- The locking there should also properly protect against
-@@ -62,12 +62,12 @@
- Only valid when NETIF_F_LLTX is set.
-
- dev->tx_timeout:
-- Synchronization: dev->xmit_lock spinlock.
-+ Synchronization: netif_tx_lock spinlock.
- Context: BHs disabled
- Notes: netif_queue_stopped() is guaranteed true
-
- dev->set_multicast_list:
-- Synchronization: dev->xmit_lock spinlock.
-+ Synchronization: netif_tx_lock spinlock.
- Context: BHs disabled
-
- dev->poll:
-Index: tmp-xxx/drivers/block/aoe/aoenet.c
-===================================================================
---- tmp-xxx.orig/drivers/block/aoe/aoenet.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/drivers/block/aoe/aoenet.c 2006-11-27 10:52:42.000000000 +0000
-@@ -95,9 +95,8 @@
- static struct sk_buff *
- skb_check(struct sk_buff *skb)
- {
-- if (skb_is_nonlinear(skb))
- if ((skb = skb_share_check(skb, GFP_ATOMIC)))
-- if (skb_linearize(skb, GFP_ATOMIC) < 0) {
-+ if (skb_linearize(skb)) {
- dev_kfree_skb(skb);
- return NULL;
- }
-Index: tmp-xxx/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
-===================================================================
---- tmp-xxx.orig/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2006-11-27 10:52:42.000000000 +0000
-@@ -821,7 +821,8 @@
-
- ipoib_mcast_stop_thread(dev, 0);
-
-- spin_lock_irqsave(&dev->xmit_lock, flags);
-+ local_irq_save(flags);
-+ netif_tx_lock(dev);
- spin_lock(&priv->lock);
-
- /*
-@@ -896,7 +897,8 @@
- }
-
- spin_unlock(&priv->lock);
-- spin_unlock_irqrestore(&dev->xmit_lock, flags);
-+ netif_tx_unlock(dev);
-+ local_irq_restore(flags);
-
- /* We have to cancel outside of the spinlock */
- list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
-Index: tmp-xxx/drivers/media/dvb/dvb-core/dvb_net.c
-===================================================================
---- tmp-xxx.orig/drivers/media/dvb/dvb-core/dvb_net.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/drivers/media/dvb/dvb-core/dvb_net.c 2006-11-27 10:52:42.000000000 +0000
-@@ -1053,7 +1053,7 @@
-
- dvb_net_feed_stop(dev);
- priv->rx_mode = RX_MODE_UNI;
-- spin_lock_bh(&dev->xmit_lock);
-+ netif_tx_lock_bh(dev);
-
- if (dev->flags & IFF_PROMISC) {
- dprintk("%s: promiscuous mode\n", dev->name);
-@@ -1078,7 +1078,7 @@
- }
- }
-
-- spin_unlock_bh(&dev->xmit_lock);
-+ netif_tx_unlock_bh(dev);
- dvb_net_feed_start(dev);
- }
-
-Index: tmp-xxx/drivers/net/8139cp.c
-===================================================================
---- tmp-xxx.orig/drivers/net/8139cp.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/drivers/net/8139cp.c 2006-11-27 10:52:42.000000000 +0000
-@@ -794,7 +794,7 @@
- entry = cp->tx_head;
- eor = (entry == (CP_TX_RING_SIZE - 1)) ? RingEnd : 0;
- if (dev->features & NETIF_F_TSO)
-- mss = skb_shinfo(skb)->tso_size;
-+ mss = skb_shinfo(skb)->gso_size;
-
- if (skb_shinfo(skb)->nr_frags == 0) {
- struct cp_desc *txd = &cp->tx_ring[entry];
-Index: tmp-xxx/drivers/net/bnx2.c
-===================================================================
---- tmp-xxx.orig/drivers/net/bnx2.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/drivers/net/bnx2.c 2006-11-27 10:52:42.000000000 +0000
-@@ -1593,7 +1593,7 @@
- skb = tx_buf->skb;
- #ifdef BCM_TSO
- /* partial BD completions possible with TSO packets */
-- if (skb_shinfo(skb)->tso_size) {
-+ if (skb_shinfo(skb)->gso_size) {
- u16 last_idx, last_ring_idx;
-
- last_idx = sw_cons +
-@@ -1948,7 +1948,7 @@
- return 1;
- }
-
--/* Called with rtnl_lock from vlan functions and also dev->xmit_lock
-+/* Called with rtnl_lock from vlan functions and also netif_tx_lock
- * from set_multicast.
- */
- static void
-@@ -4403,7 +4403,7 @@
- }
- #endif
-
--/* Called with dev->xmit_lock.
-+/* Called with netif_tx_lock.
- * hard_start_xmit is pseudo-lockless - a lock is only required when
- * the tx queue is full. This way, we get the benefit of lockless
- * operations most of the time without the complexities to handle
-@@ -4441,7 +4441,7 @@
- (TX_BD_FLAGS_VLAN_TAG | (vlan_tx_tag_get(skb) << 16));
- }
- #ifdef BCM_TSO
-- if ((mss = skb_shinfo(skb)->tso_size) &&
-+ if ((mss = skb_shinfo(skb)->gso_size) &&
- (skb->len > (bp->dev->mtu + ETH_HLEN))) {
- u32 tcp_opt_len, ip_tcp_len;
-
-Index: tmp-xxx/drivers/net/bonding/bond_main.c
-===================================================================
---- tmp-xxx.orig/drivers/net/bonding/bond_main.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/drivers/net/bonding/bond_main.c 2006-11-27 10:52:42.000000000 +0000
-@@ -1145,8 +1145,7 @@
- }
-
- #define BOND_INTERSECT_FEATURES \
-- (NETIF_F_SG|NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM|\
-- NETIF_F_TSO|NETIF_F_UFO)
-+ (NETIF_F_SG | NETIF_F_ALL_CSUM | NETIF_F_TSO | NETIF_F_UFO)
-
- /*
- * Compute the common dev->feature set available to all slaves. Some
-@@ -1164,9 +1163,7 @@
- features &= (slave->dev->features & BOND_INTERSECT_FEATURES);
-
- if ((features & NETIF_F_SG) &&
-- !(features & (NETIF_F_IP_CSUM |
-- NETIF_F_NO_CSUM |
-- NETIF_F_HW_CSUM)))
-+ !(features & NETIF_F_ALL_CSUM))
- features &= ~NETIF_F_SG;
-
- /*
-@@ -4147,7 +4144,7 @@
- */
- bond_dev->features |= NETIF_F_VLAN_CHALLENGED;
-
-- /* don't acquire bond device's xmit_lock when
-+ /* don't acquire bond device's netif_tx_lock when
- * transmitting */
- bond_dev->features |= NETIF_F_LLTX;
-
-Index: tmp-xxx/drivers/net/chelsio/sge.c
-===================================================================
---- tmp-xxx.orig/drivers/net/chelsio/sge.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/drivers/net/chelsio/sge.c 2006-11-27 10:52:42.000000000 +0000
-@@ -1419,7 +1419,7 @@
- struct cpl_tx_pkt *cpl;
-
- #ifdef NETIF_F_TSO
-- if (skb_shinfo(skb)->tso_size) {
-+ if (skb_shinfo(skb)->gso_size) {
- int eth_type;
- struct cpl_tx_pkt_lso *hdr;
-
-@@ -1434,7 +1434,7 @@
- hdr->ip_hdr_words = skb->nh.iph->ihl;
- hdr->tcp_hdr_words = skb->h.th->doff;
- hdr->eth_type_mss = htons(MK_ETH_TYPE_MSS(eth_type,
-- skb_shinfo(skb)->tso_size));
-+ skb_shinfo(skb)->gso_size));
- hdr->len = htonl(skb->len - sizeof(*hdr));
- cpl = (struct cpl_tx_pkt *)hdr;
- sge->stats.tx_lso_pkts++;
-Index: tmp-xxx/drivers/net/e1000/e1000_main.c
-===================================================================
---- tmp-xxx.orig/drivers/net/e1000/e1000_main.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/drivers/net/e1000/e1000_main.c 2006-11-27 10:52:42.000000000 +0000
-@@ -2526,7 +2526,7 @@
- uint8_t ipcss, ipcso, tucss, tucso, hdr_len;
- int err;
-
-- if (skb_shinfo(skb)->tso_size) {
-+ if (skb_shinfo(skb)->gso_size) {
- if (skb_header_cloned(skb)) {
- err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
- if (err)
-@@ -2534,7 +2534,7 @@
- }
-
- hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
-- mss = skb_shinfo(skb)->tso_size;
-+ mss = skb_shinfo(skb)->gso_size;
- if (skb->protocol == ntohs(ETH_P_IP)) {
- skb->nh.iph->tot_len = 0;
- skb->nh.iph->check = 0;
-@@ -2651,7 +2651,7 @@
- * tso gets written back prematurely before the data is fully
- * DMAd to the controller */
- if (!skb->data_len && tx_ring->last_tx_tso &&
-- !skb_shinfo(skb)->tso_size) {
-+ !skb_shinfo(skb)->gso_size) {
- tx_ring->last_tx_tso = 0;
- size -= 4;
- }
-@@ -2893,7 +2893,7 @@
- }
-
- #ifdef NETIF_F_TSO
-- mss = skb_shinfo(skb)->tso_size;
-+ mss = skb_shinfo(skb)->gso_size;
- /* The controller does a simple calculation to
- * make sure there is enough room in the FIFO before
- * initiating the DMA for each buffer. The calc is:
-@@ -2935,7 +2935,7 @@
- #ifdef NETIF_F_TSO
- /* Controller Erratum workaround */
- if (!skb->data_len && tx_ring->last_tx_tso &&
-- !skb_shinfo(skb)->tso_size)
-+ !skb_shinfo(skb)->gso_size)
- count++;
- #endif
-
-Index: tmp-xxx/drivers/net/forcedeth.c
-===================================================================
---- tmp-xxx.orig/drivers/net/forcedeth.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/drivers/net/forcedeth.c 2006-11-27 10:52:42.000000000 +0000
-@@ -482,9 +482,9 @@
- * critical parts:
- * - rx is (pseudo-) lockless: it relies on the single-threading provided
- * by the arch code for interrupts.
-- * - tx setup is lockless: it relies on dev->xmit_lock. Actual submission
-+ * - tx setup is lockless: it relies on netif_tx_lock. Actual submission
- * needs dev->priv->lock :-(
-- * - set_multicast_list: preparation lockless, relies on dev->xmit_lock.
-+ * - set_multicast_list: preparation lockless, relies on netif_tx_lock.
- */
-
- /* in dev: base, irq */
-@@ -1016,7 +1016,7 @@
-
- /*
- * nv_start_xmit: dev->hard_start_xmit function
-- * Called with dev->xmit_lock held.
-+ * Called with netif_tx_lock held.
- */
- static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
- {
-@@ -1105,8 +1105,8 @@
- np->tx_skbuff[nr] = skb;
-
- #ifdef NETIF_F_TSO
-- if (skb_shinfo(skb)->tso_size)
-- tx_flags_extra = NV_TX2_TSO | (skb_shinfo(skb)->tso_size << NV_TX2_TSO_SHIFT);
-+ if (skb_shinfo(skb)->gso_size)
-+ tx_flags_extra = NV_TX2_TSO | (skb_shinfo(skb)->gso_size << NV_TX2_TSO_SHIFT);
- else
- #endif
- tx_flags_extra = (skb->ip_summed == CHECKSUM_HW ? (NV_TX2_CHECKSUM_L3|NV_TX2_CHECKSUM_L4) : 0);
-@@ -1203,7 +1203,7 @@
-
- /*
- * nv_tx_timeout: dev->tx_timeout function
-- * Called with dev->xmit_lock held.
-+ * Called with netif_tx_lock held.
- */
- static void nv_tx_timeout(struct net_device *dev)
- {
-@@ -1524,7 +1524,7 @@
- * Changing the MTU is a rare event, it shouldn't matter.
- */
- disable_irq(dev->irq);
-- spin_lock_bh(&dev->xmit_lock);
-+ netif_tx_lock_bh(dev);
- spin_lock(&np->lock);
- /* stop engines */
- nv_stop_rx(dev);
-@@ -1559,7 +1559,7 @@
- nv_start_rx(dev);
- nv_start_tx(dev);
- spin_unlock(&np->lock);
-- spin_unlock_bh(&dev->xmit_lock);
-+ netif_tx_unlock_bh(dev);
- enable_irq(dev->irq);
- }
- return 0;
-@@ -1594,7 +1594,7 @@
- memcpy(dev->dev_addr, macaddr->sa_data, ETH_ALEN);
-
- if (netif_running(dev)) {
-- spin_lock_bh(&dev->xmit_lock);
-+ netif_tx_lock_bh(dev);
- spin_lock_irq(&np->lock);
-
- /* stop rx engine */
-@@ -1606,7 +1606,7 @@
- /* restart rx engine */
- nv_start_rx(dev);
- spin_unlock_irq(&np->lock);
-- spin_unlock_bh(&dev->xmit_lock);
-+ netif_tx_unlock_bh(dev);
- } else {
- nv_copy_mac_to_hw(dev);
- }
-@@ -1615,7 +1615,7 @@
-
- /*
- * nv_set_multicast: dev->set_multicast function
-- * Called with dev->xmit_lock held.
-+ * Called with netif_tx_lock held.
- */
- static void nv_set_multicast(struct net_device *dev)
- {
-Index: tmp-xxx/drivers/net/hamradio/6pack.c
-===================================================================
---- tmp-xxx.orig/drivers/net/hamradio/6pack.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/drivers/net/hamradio/6pack.c 2006-11-27 10:52:42.000000000 +0000
-@@ -308,9 +308,9 @@
- {
- struct sockaddr_ax25 *sa = addr;
-
-- spin_lock_irq(&dev->xmit_lock);
-+ netif_tx_lock_bh(dev);
- memcpy(dev->dev_addr, &sa->sax25_call, AX25_ADDR_LEN);
-- spin_unlock_irq(&dev->xmit_lock);
-+ netif_tx_unlock_bh(dev);
-
- return 0;
- }
-@@ -767,9 +767,9 @@
- break;
- }
-
-- spin_lock_irq(&dev->xmit_lock);
-+ netif_tx_lock_bh(dev);
- memcpy(dev->dev_addr, &addr, AX25_ADDR_LEN);
-- spin_unlock_irq(&dev->xmit_lock);
-+ netif_tx_unlock_bh(dev);
-
- err = 0;
- break;
-Index: tmp-xxx/drivers/net/hamradio/mkiss.c
-===================================================================
---- tmp-xxx.orig/drivers/net/hamradio/mkiss.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/drivers/net/hamradio/mkiss.c 2006-11-27 10:52:42.000000000 +0000
-@@ -357,9 +357,9 @@
- {
- struct sockaddr_ax25 *sa = addr;
-
-- spin_lock_irq(&dev->xmit_lock);
-+ netif_tx_lock_bh(dev);
- memcpy(dev->dev_addr, &sa->sax25_call, AX25_ADDR_LEN);
-- spin_unlock_irq(&dev->xmit_lock);
-+ netif_tx_unlock_bh(dev);
-
- return 0;
- }
-@@ -886,9 +886,9 @@
- break;
- }
-
-- spin_lock_irq(&dev->xmit_lock);
-+ netif_tx_lock_bh(dev);
- memcpy(dev->dev_addr, addr, AX25_ADDR_LEN);
-- spin_unlock_irq(&dev->xmit_lock);
-+ netif_tx_unlock_bh(dev);
-
- err = 0;
- break;
-Index: tmp-xxx/drivers/net/ifb.c
-===================================================================
---- tmp-xxx.orig/drivers/net/ifb.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/drivers/net/ifb.c 2006-11-27 10:52:42.000000000 +0000
-@@ -76,13 +76,13 @@
- dp->st_task_enter++;
- if ((skb = skb_peek(&dp->tq)) == NULL) {
- dp->st_txq_refl_try++;
-- if (spin_trylock(&_dev->xmit_lock)) {
-+ if (netif_tx_trylock(_dev)) {
- dp->st_rxq_enter++;
- while ((skb = skb_dequeue(&dp->rq)) != NULL) {
- skb_queue_tail(&dp->tq, skb);
- dp->st_rx2tx_tran++;
- }
-- spin_unlock(&_dev->xmit_lock);
-+ netif_tx_unlock(_dev);
- } else {
- /* reschedule */
- dp->st_rxq_notenter++;
-@@ -110,7 +110,7 @@
- }
- }
-
-- if (spin_trylock(&_dev->xmit_lock)) {
-+ if (netif_tx_trylock(_dev)) {
- dp->st_rxq_check++;
- if ((skb = skb_peek(&dp->rq)) == NULL) {
- dp->tasklet_pending = 0;
-@@ -118,10 +118,10 @@
- netif_wake_queue(_dev);
- } else {
- dp->st_rxq_rsch++;
-- spin_unlock(&_dev->xmit_lock);
-+ netif_tx_unlock(_dev);
- goto resched;
- }
-- spin_unlock(&_dev->xmit_lock);
-+ netif_tx_unlock(_dev);
- } else {
- resched:
- dp->tasklet_pending = 1;
-Index: tmp-xxx/drivers/net/irda/vlsi_ir.c
-===================================================================
---- tmp-xxx.orig/drivers/net/irda/vlsi_ir.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/drivers/net/irda/vlsi_ir.c 2006-11-27 10:52:42.000000000 +0000
-@@ -959,7 +959,7 @@
- || (now.tv_sec==ready.tv_sec && now.tv_usec>=ready.tv_usec))
- break;
- udelay(100);
-- /* must not sleep here - we are called under xmit_lock! */
-+ /* must not sleep here - called under netif_tx_lock! */
- }
- }
-
-Index: tmp-xxx/drivers/net/ixgb/ixgb_main.c
-===================================================================
---- tmp-xxx.orig/drivers/net/ixgb/ixgb_main.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/drivers/net/ixgb/ixgb_main.c 2006-11-27 10:52:42.000000000 +0000
-@@ -1163,7 +1163,7 @@
- uint16_t ipcse, tucse, mss;
- int err;
-
-- if(likely(skb_shinfo(skb)->tso_size)) {
-+ if(likely(skb_shinfo(skb)->gso_size)) {
- if (skb_header_cloned(skb)) {
- err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
- if (err)
-@@ -1171,7 +1171,7 @@
- }
-
- hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
-- mss = skb_shinfo(skb)->tso_size;
-+ mss = skb_shinfo(skb)->gso_size;
- skb->nh.iph->tot_len = 0;
- skb->nh.iph->check = 0;
- skb->h.th->check = ~csum_tcpudp_magic(skb->nh.iph->saddr,
-Index: tmp-xxx/drivers/net/loopback.c
-===================================================================
---- tmp-xxx.orig/drivers/net/loopback.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/drivers/net/loopback.c 2006-11-27 10:52:42.000000000 +0000
-@@ -74,7 +74,7 @@
- struct iphdr *iph = skb->nh.iph;
- struct tcphdr *th = (struct tcphdr*)(skb->nh.raw + (iph->ihl * 4));
- unsigned int doffset = (iph->ihl + th->doff) * 4;
-- unsigned int mtu = skb_shinfo(skb)->tso_size + doffset;
-+ unsigned int mtu = skb_shinfo(skb)->gso_size + doffset;
- unsigned int offset = 0;
- u32 seq = ntohl(th->seq);
- u16 id = ntohs(iph->id);
-@@ -139,7 +139,7 @@
- #endif
-
- #ifdef LOOPBACK_TSO
-- if (skb_shinfo(skb)->tso_size) {
-+ if (skb_shinfo(skb)->gso_size) {
- BUG_ON(skb->protocol != htons(ETH_P_IP));
- BUG_ON(skb->nh.iph->protocol != IPPROTO_TCP);
-
-Index: tmp-xxx/drivers/net/mv643xx_eth.c
-===================================================================
---- tmp-xxx.orig/drivers/net/mv643xx_eth.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/drivers/net/mv643xx_eth.c 2006-11-27 10:52:42.000000000 +0000
-@@ -1107,7 +1107,7 @@
-
- #ifdef MV643XX_CHECKSUM_OFFLOAD_TX
- if (has_tiny_unaligned_frags(skb)) {
-- if ((skb_linearize(skb, GFP_ATOMIC) != 0)) {
-+ if (__skb_linearize(skb)) {
- stats->tx_dropped++;
- printk(KERN_DEBUG "%s: failed to linearize tiny "
- "unaligned fragment\n", dev->name);
-Index: tmp-xxx/drivers/net/natsemi.c
-===================================================================
---- tmp-xxx.orig/drivers/net/natsemi.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/drivers/net/natsemi.c 2006-11-27 10:52:42.000000000 +0000
-@@ -323,12 +323,12 @@
- The rx process only runs in the interrupt handler. Access from outside
- the interrupt handler is only permitted after disable_irq().
-
--The rx process usually runs under the dev->xmit_lock. If np->intr_tx_reap
-+The rx process usually runs under the netif_tx_lock. If np->intr_tx_reap
- is set, then access is permitted under spin_lock_irq(&np->lock).
-
- Thus configuration functions that want to access everything must call
- disable_irq(dev->irq);
-- spin_lock_bh(dev->xmit_lock);
-+ netif_tx_lock_bh(dev);
- spin_lock_irq(&np->lock);
-
- IV. Notes
-Index: tmp-xxx/drivers/net/r8169.c
-===================================================================
---- tmp-xxx.orig/drivers/net/r8169.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/drivers/net/r8169.c 2006-11-27 10:52:42.000000000 +0000
-@@ -2171,7 +2171,7 @@
- static inline u32 rtl8169_tso_csum(struct sk_buff *skb, struct net_device *dev)
- {
- if (dev->features & NETIF_F_TSO) {
-- u32 mss = skb_shinfo(skb)->tso_size;
-+ u32 mss = skb_shinfo(skb)->gso_size;
-
- if (mss)
- return LargeSend | ((mss & MSSMask) << MSSShift);
-Index: tmp-xxx/drivers/net/s2io.c
-===================================================================
---- tmp-xxx.orig/drivers/net/s2io.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/drivers/net/s2io.c 2006-11-27 10:52:42.000000000 +0000
-@@ -3522,8 +3522,8 @@
- txdp->Control_1 = 0;
- txdp->Control_2 = 0;
- #ifdef NETIF_F_TSO
-- mss = skb_shinfo(skb)->tso_size;
-- if (mss) {
-+ mss = skb_shinfo(skb)->gso_size;
-+ if (skb_shinfo(skb)->gso_type == SKB_GSO_TCPV4) {
- txdp->Control_1 |= TXD_TCP_LSO_EN;
- txdp->Control_1 |= TXD_TCP_LSO_MSS(mss);
- }
-@@ -3543,10 +3543,10 @@
- }
-
- frg_len = skb->len - skb->data_len;
-- if (skb_shinfo(skb)->ufo_size) {
-+ if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4) {
- int ufo_size;
-
-- ufo_size = skb_shinfo(skb)->ufo_size;
-+ ufo_size = skb_shinfo(skb)->gso_size;
- ufo_size &= ~7;
- txdp->Control_1 |= TXD_UFO_EN;
- txdp->Control_1 |= TXD_UFO_MSS(ufo_size);
-@@ -3572,7 +3572,7 @@
- txdp->Host_Control = (unsigned long) skb;
- txdp->Control_1 |= TXD_BUFFER0_SIZE(frg_len);
-
-- if (skb_shinfo(skb)->ufo_size)
-+ if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4)
- txdp->Control_1 |= TXD_UFO_EN;
-
- frg_cnt = skb_shinfo(skb)->nr_frags;
-@@ -3587,12 +3587,12 @@
- (sp->pdev, frag->page, frag->page_offset,
- frag->size, PCI_DMA_TODEVICE);
- txdp->Control_1 = TXD_BUFFER0_SIZE(frag->size);
-- if (skb_shinfo(skb)->ufo_size)
-+ if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4)
- txdp->Control_1 |= TXD_UFO_EN;
- }
- txdp->Control_1 |= TXD_GATHER_CODE_LAST;
-
-- if (skb_shinfo(skb)->ufo_size)
-+ if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4)
- frg_cnt++; /* as Txd0 was used for inband header */
-
- tx_fifo = mac_control->tx_FIFO_start[queue];
-@@ -3606,7 +3606,7 @@
- if (mss)
- val64 |= TX_FIFO_SPECIAL_FUNC;
- #endif
-- if (skb_shinfo(skb)->ufo_size)
-+ if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4)
- val64 |= TX_FIFO_SPECIAL_FUNC;
- writeq(val64, &tx_fifo->List_Control);
-
-Index: tmp-xxx/drivers/net/sky2.c
-===================================================================
---- tmp-xxx.orig/drivers/net/sky2.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/drivers/net/sky2.c 2006-11-27 10:52:42.000000000 +0000
-@@ -1141,7 +1141,7 @@
- count = sizeof(dma_addr_t) / sizeof(u32);
- count += skb_shinfo(skb)->nr_frags * count;
-
-- if (skb_shinfo(skb)->tso_size)
-+ if (skb_shinfo(skb)->gso_size)
- ++count;
-
- if (skb->ip_summed == CHECKSUM_HW)
-@@ -1213,7 +1213,7 @@
- }
-
- /* Check for TCP Segmentation Offload */
-- mss = skb_shinfo(skb)->tso_size;
-+ mss = skb_shinfo(skb)->gso_size;
- if (mss != 0) {
- /* just drop the packet if non-linear expansion fails */
- if (skb_header_cloned(skb) &&
-Index: tmp-xxx/drivers/net/tg3.c
-===================================================================
---- tmp-xxx.orig/drivers/net/tg3.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/drivers/net/tg3.c 2006-11-27 10:52:42.000000000 +0000
-@@ -3664,7 +3664,7 @@
- #if TG3_TSO_SUPPORT != 0
- mss = 0;
- if (skb->len > (tp->dev->mtu + ETH_HLEN) &&
-- (mss = skb_shinfo(skb)->tso_size) != 0) {
-+ (mss = skb_shinfo(skb)->gso_size) != 0) {
- int tcp_opt_len, ip_tcp_len;
-
- if (skb_header_cloned(skb) &&
-Index: tmp-xxx/drivers/net/tulip/winbond-840.c
-===================================================================
---- tmp-xxx.orig/drivers/net/tulip/winbond-840.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/drivers/net/tulip/winbond-840.c 2006-11-27 10:52:42.000000000 +0000
-@@ -1605,11 +1605,11 @@
- * - get_stats:
- * spin_lock_irq(np->lock), doesn't touch hw if not present
- * - hard_start_xmit:
-- * netif_stop_queue + spin_unlock_wait(&dev->xmit_lock);
-+ * synchronize_irq + netif_tx_disable;
- * - tx_timeout:
-- * netif_device_detach + spin_unlock_wait(&dev->xmit_lock);
-+ * netif_device_detach + netif_tx_disable;
- * - set_multicast_list
-- * netif_device_detach + spin_unlock_wait(&dev->xmit_lock);
-+ * netif_device_detach + netif_tx_disable;
- * - interrupt handler
- * doesn't touch hw if not present, synchronize_irq waits for
- * running instances of the interrupt handler.
-@@ -1635,11 +1635,10 @@
- netif_device_detach(dev);
- update_csr6(dev, 0);
- iowrite32(0, ioaddr + IntrEnable);
-- netif_stop_queue(dev);
- spin_unlock_irq(&np->lock);
-
-- spin_unlock_wait(&dev->xmit_lock);
- synchronize_irq(dev->irq);
-+ netif_tx_disable(dev);
-
- np->stats.rx_missed_errors += ioread32(ioaddr + RxMissed) & 0xffff;
-
-Index: tmp-xxx/drivers/net/typhoon.c
-===================================================================
---- tmp-xxx.orig/drivers/net/typhoon.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/drivers/net/typhoon.c 2006-11-27 10:52:42.000000000 +0000
-@@ -340,7 +340,7 @@
- #endif
-
- #if defined(NETIF_F_TSO)
--#define skb_tso_size(x) (skb_shinfo(x)->tso_size)
-+#define skb_tso_size(x) (skb_shinfo(x)->gso_size)
- #define TSO_NUM_DESCRIPTORS 2
- #define TSO_OFFLOAD_ON TYPHOON_OFFLOAD_TCP_SEGMENT
- #else
-Index: tmp-xxx/drivers/net/via-velocity.c
-===================================================================
---- tmp-xxx.orig/drivers/net/via-velocity.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/drivers/net/via-velocity.c 2006-11-27 10:52:42.000000000 +0000
-@@ -1905,6 +1905,13 @@
-
- int pktlen = skb->len;
-
-+#ifdef VELOCITY_ZERO_COPY_SUPPORT
-+ if (skb_shinfo(skb)->nr_frags > 6 && __skb_linearize(skb)) {
-+ kfree_skb(skb);
-+ return 0;
-+ }
-+#endif
-+
- spin_lock_irqsave(&vptr->lock, flags);
-
- index = vptr->td_curr[qnum];
-@@ -1920,8 +1927,6 @@
- */
- if (pktlen < ETH_ZLEN) {
- /* Cannot occur until ZC support */
-- if(skb_linearize(skb, GFP_ATOMIC))
-- return 0;
- pktlen = ETH_ZLEN;
- memcpy(tdinfo->buf, skb->data, skb->len);
- memset(tdinfo->buf + skb->len, 0, ETH_ZLEN - skb->len);
-@@ -1939,7 +1944,6 @@
- int nfrags = skb_shinfo(skb)->nr_frags;
- tdinfo->skb = skb;
- if (nfrags > 6) {
-- skb_linearize(skb, GFP_ATOMIC);
- memcpy(tdinfo->buf, skb->data, skb->len);
- tdinfo->skb_dma[0] = tdinfo->buf_dma;
- td_ptr->tdesc0.pktsize =
-Index: tmp-xxx/drivers/net/wireless/orinoco.c
-===================================================================
---- tmp-xxx.orig/drivers/net/wireless/orinoco.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/drivers/net/wireless/orinoco.c 2006-11-27 10:52:42.000000000 +0000
-@@ -1835,7 +1835,9 @@
- /* Set promiscuity / multicast*/
- priv->promiscuous = 0;
- priv->mc_count = 0;
-- __orinoco_set_multicast_list(dev); /* FIXME: what about the xmit_lock */
-+
-+ /* FIXME: what about netif_tx_lock */
-+ __orinoco_set_multicast_list(dev);
-
- return 0;
- }
-Index: tmp-xxx/drivers/s390/net/qeth_eddp.c
-===================================================================
---- tmp-xxx.orig/drivers/s390/net/qeth_eddp.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/drivers/s390/net/qeth_eddp.c 2006-11-27 10:52:42.000000000 +0000
-@@ -421,7 +421,7 @@
- }
- tcph = eddp->skb->h.th;
- while (eddp->skb_offset < eddp->skb->len) {
-- data_len = min((int)skb_shinfo(eddp->skb)->tso_size,
-+ data_len = min((int)skb_shinfo(eddp->skb)->gso_size,
- (int)(eddp->skb->len - eddp->skb_offset));
- /* prepare qdio hdr */
- if (eddp->qh.hdr.l2.id == QETH_HEADER_TYPE_LAYER2){
-@@ -516,20 +516,20 @@
-
- QETH_DBF_TEXT(trace, 5, "eddpcanp");
- /* can we put multiple skbs in one page? */
-- skbs_per_page = PAGE_SIZE / (skb_shinfo(skb)->tso_size + hdr_len);
-+ skbs_per_page = PAGE_SIZE / (skb_shinfo(skb)->gso_size + hdr_len);
- if (skbs_per_page > 1){
-- ctx->num_pages = (skb_shinfo(skb)->tso_segs + 1) /
-+ ctx->num_pages = (skb_shinfo(skb)->gso_segs + 1) /
- skbs_per_page + 1;
- ctx->elements_per_skb = 1;
- } else {
- /* no -> how many elements per skb? */
-- ctx->elements_per_skb = (skb_shinfo(skb)->tso_size + hdr_len +
-+ ctx->elements_per_skb = (skb_shinfo(skb)->gso_size + hdr_len +
- PAGE_SIZE) >> PAGE_SHIFT;
- ctx->num_pages = ctx->elements_per_skb *
-- (skb_shinfo(skb)->tso_segs + 1);
-+ (skb_shinfo(skb)->gso_segs + 1);
- }
- ctx->num_elements = ctx->elements_per_skb *
-- (skb_shinfo(skb)->tso_segs + 1);
-+ (skb_shinfo(skb)->gso_segs + 1);
- }
-
- static inline struct qeth_eddp_context *
-Index: tmp-xxx/drivers/s390/net/qeth_main.c
-===================================================================
---- tmp-xxx.orig/drivers/s390/net/qeth_main.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/drivers/s390/net/qeth_main.c 2006-11-27 10:52:42.000000000 +0000
-@@ -4454,7 +4454,7 @@
- queue = card->qdio.out_qs
- [qeth_get_priority_queue(card, skb, ipv, cast_type)];
-
-- if (skb_shinfo(skb)->tso_size)
-+ if (skb_shinfo(skb)->gso_size)
- large_send = card->options.large_send;
-
- /*are we able to do TSO ? If so ,prepare and send it from here */
-@@ -4501,7 +4501,7 @@
- card->stats.tx_packets++;
- card->stats.tx_bytes += skb->len;
- #ifdef CONFIG_QETH_PERF_STATS
-- if (skb_shinfo(skb)->tso_size &&
-+ if (skb_shinfo(skb)->gso_size &&
- !(large_send == QETH_LARGE_SEND_NO)) {
- card->perf_stats.large_send_bytes += skb->len;
- card->perf_stats.large_send_cnt++;
-Index: tmp-xxx/drivers/s390/net/qeth_tso.h
-===================================================================
---- tmp-xxx.orig/drivers/s390/net/qeth_tso.h 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/drivers/s390/net/qeth_tso.h 2006-11-27 10:52:42.000000000 +0000
-@@ -51,7 +51,7 @@
- hdr->ext.hdr_version = 1;
- hdr->ext.hdr_len = 28;
- /*insert non-fix values */
-- hdr->ext.mss = skb_shinfo(skb)->tso_size;
-+ hdr->ext.mss = skb_shinfo(skb)->gso_size;
- hdr->ext.dg_hdr_len = (__u16)(iph->ihl*4 + tcph->doff*4);
- hdr->ext.payload_len = (__u16)(skb->len - hdr->ext.dg_hdr_len -
- sizeof(struct qeth_hdr_tso));
-Index: tmp-xxx/include/linux/ethtool.h
-===================================================================
---- tmp-xxx.orig/include/linux/ethtool.h 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/include/linux/ethtool.h 2006-11-27 10:52:42.000000000 +0000
-@@ -408,6 +408,8 @@
- #define ETHTOOL_GPERMADDR 0x00000020 /* Get permanent hardware address */
- #define ETHTOOL_GUFO 0x00000021 /* Get UFO enable (ethtool_value) */
- #define ETHTOOL_SUFO 0x00000022 /* Set UFO enable (ethtool_value) */
-+#define ETHTOOL_GGSO 0x00000023 /* Get GSO enable (ethtool_value) */
-+#define ETHTOOL_SGSO 0x00000024 /* Set GSO enable (ethtool_value) */
-
- /* compatibility with older code */
- #define SPARC_ETH_GSET ETHTOOL_GSET
-Index: tmp-xxx/include/linux/netdevice.h
-===================================================================
---- tmp-xxx.orig/include/linux/netdevice.h 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/include/linux/netdevice.h 2006-11-27 10:52:42.000000000 +0000
-@@ -230,7 +230,8 @@
- __LINK_STATE_SCHED,
- __LINK_STATE_NOCARRIER,
- __LINK_STATE_RX_SCHED,
-- __LINK_STATE_LINKWATCH_PENDING
-+ __LINK_STATE_LINKWATCH_PENDING,
-+ __LINK_STATE_QDISC_RUNNING,
- };
-
-
-@@ -306,9 +307,17 @@
- #define NETIF_F_HW_VLAN_RX 256 /* Receive VLAN hw acceleration */
- #define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */
- #define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */
--#define NETIF_F_TSO 2048 /* Can offload TCP/IP segmentation */
-+#define NETIF_F_GSO 2048 /* Enable software GSO. */
- #define NETIF_F_LLTX 4096 /* LockLess TX */
--#define NETIF_F_UFO 8192 /* Can offload UDP Large Send*/
-+
-+ /* Segmentation offload features */
-+#define NETIF_F_GSO_SHIFT 16
-+#define NETIF_F_TSO (SKB_GSO_TCPV4 << NETIF_F_GSO_SHIFT)
-+#define NETIF_F_UFO (SKB_GSO_UDPV4 << NETIF_F_GSO_SHIFT)
-+#define NETIF_F_GSO_ROBUST (SKB_GSO_DODGY << NETIF_F_GSO_SHIFT)
-+
-+#define NETIF_F_GEN_CSUM (NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)
-+#define NETIF_F_ALL_CSUM (NETIF_F_IP_CSUM | NETIF_F_GEN_CSUM)
-
- struct net_device *next_sched;
-
-@@ -394,6 +403,9 @@
- struct list_head qdisc_list;
- unsigned long tx_queue_len; /* Max frames per queue allowed */
-
-+ /* Partially transmitted GSO packet. */
-+ struct sk_buff *gso_skb;
-+
- /* ingress path synchronizer */
- spinlock_t ingress_lock;
- struct Qdisc *qdisc_ingress;
-@@ -402,7 +414,7 @@
- * One part is mostly used on xmit path (device)
- */
- /* hard_start_xmit synchronizer */
-- spinlock_t xmit_lock ____cacheline_aligned_in_smp;
-+ spinlock_t _xmit_lock ____cacheline_aligned_in_smp;
- /* cpu id of processor entered to hard_start_xmit or -1,
- if nobody entered there.
- */
-@@ -527,6 +539,8 @@
- struct net_device *,
- struct packet_type *,
- struct net_device *);
-+ struct sk_buff *(*gso_segment)(struct sk_buff *skb,
-+ int features);
- void *af_packet_priv;
- struct list_head list;
- };
-@@ -693,7 +707,8 @@
- extern int dev_set_mtu(struct net_device *, int);
- extern int dev_set_mac_address(struct net_device *,
- struct sockaddr *);
--extern void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev);
-+extern int dev_hard_start_xmit(struct sk_buff *skb,
-+ struct net_device *dev);
-
- extern void dev_init(void);
-
-@@ -900,11 +915,43 @@
- clear_bit(__LINK_STATE_RX_SCHED, &dev->state);
- }
-
-+static inline void netif_tx_lock(struct net_device *dev)
-+{
-+ spin_lock(&dev->_xmit_lock);
-+ dev->xmit_lock_owner = smp_processor_id();
-+}
-+
-+static inline void netif_tx_lock_bh(struct net_device *dev)
-+{
-+ spin_lock_bh(&dev->_xmit_lock);
-+ dev->xmit_lock_owner = smp_processor_id();
-+}
-+
-+static inline int netif_tx_trylock(struct net_device *dev)
-+{
-+ int err = spin_trylock(&dev->_xmit_lock);
-+ if (!err)
-+ dev->xmit_lock_owner = smp_processor_id();
-+ return err;
-+}
-+
-+static inline void netif_tx_unlock(struct net_device *dev)
-+{
-+ dev->xmit_lock_owner = -1;
-+ spin_unlock(&dev->_xmit_lock);
-+}
-+
-+static inline void netif_tx_unlock_bh(struct net_device *dev)
-+{
-+ dev->xmit_lock_owner = -1;
-+ spin_unlock_bh(&dev->_xmit_lock);
-+}
-+
- static inline void netif_tx_disable(struct net_device *dev)
- {
-- spin_lock_bh(&dev->xmit_lock);
-+ netif_tx_lock_bh(dev);
- netif_stop_queue(dev);
-- spin_unlock_bh(&dev->xmit_lock);
-+ netif_tx_unlock_bh(dev);
- }
-
- /* These functions live elsewhere (drivers/net/net_init.c, but related) */
-@@ -932,6 +979,7 @@
- extern int weight_p;
- extern int netdev_set_master(struct net_device *dev, struct net_device *master);
- extern int skb_checksum_help(struct sk_buff *skb, int inward);
-+extern struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features);
- #ifdef CONFIG_BUG
- extern void netdev_rx_csum_fault(struct net_device *dev);
- #else
-@@ -951,6 +999,18 @@
-
- extern void linkwatch_run_queue(void);
-
-+static inline int skb_gso_ok(struct sk_buff *skb, int features)
-+{
-+ int feature = skb_shinfo(skb)->gso_size ?
-+ skb_shinfo(skb)->gso_type << NETIF_F_GSO_SHIFT : 0;
-+ return (features & feature) == feature;
-+}
-+
-+static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
-+{
-+ return !skb_gso_ok(skb, dev->features);
-+}
-+
- #endif /* __KERNEL__ */
-
- #endif /* _LINUX_DEV_H */
-Index: tmp-xxx/include/linux/skbuff.h
-===================================================================
---- tmp-xxx.orig/include/linux/skbuff.h 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/include/linux/skbuff.h 2006-11-27 10:52:42.000000000 +0000
-@@ -134,9 +134,10 @@
- struct skb_shared_info {
- atomic_t dataref;
- unsigned short nr_frags;
-- unsigned short tso_size;
-- unsigned short tso_segs;
-- unsigned short ufo_size;
-+ unsigned short gso_size;
-+ /* Warning: this field is not always filled in (UFO)! */
-+ unsigned short gso_segs;
-+ unsigned short gso_type;
- unsigned int ip6_frag_id;
- struct sk_buff *frag_list;
- skb_frag_t frags[MAX_SKB_FRAGS];
-@@ -168,6 +169,14 @@
- SKB_FCLONE_CLONE,
- };
-
-+enum {
-+ SKB_GSO_TCPV4 = 1 << 0,
-+ SKB_GSO_UDPV4 = 1 << 1,
-+
-+ /* This indicates the skb is from an untrusted source. */
-+ SKB_GSO_DODGY = 1 << 2,
-+};
-+
- /**
- * struct sk_buff - socket buffer
- * @next: Next buffer in list
-@@ -1148,18 +1157,34 @@
- return 0;
- }
-
-+static inline int __skb_linearize(struct sk_buff *skb)
-+{
-+ return __pskb_pull_tail(skb, skb->data_len) ? 0 : -ENOMEM;
-+}
-+
- /**
- * skb_linearize - convert paged skb to linear one
- * @skb: buffer to linarize
-- * @gfp: allocation mode
- *
- * If there is no free memory -ENOMEM is returned, otherwise zero
- * is returned and the old skb data released.
- */
--extern int __skb_linearize(struct sk_buff *skb, gfp_t gfp);
--static inline int skb_linearize(struct sk_buff *skb, gfp_t gfp)
-+static inline int skb_linearize(struct sk_buff *skb)
-+{
-+ return skb_is_nonlinear(skb) ? __skb_linearize(skb) : 0;
-+}
-+
-+/**
-+ * skb_linearize_cow - make sure skb is linear and writable
-+ * @skb: buffer to process
-+ *
-+ * If there is no free memory -ENOMEM is returned, otherwise zero
-+ * is returned and the old skb data released.
-+ */
-+static inline int skb_linearize_cow(struct sk_buff *skb)
- {
-- return __skb_linearize(skb, gfp);
-+ return skb_is_nonlinear(skb) || skb_cloned(skb) ?
-+ __skb_linearize(skb) : 0;
- }
-
- /**
-@@ -1254,6 +1279,7 @@
- struct sk_buff *skb1, const u32 len);
-
- extern void skb_release_data(struct sk_buff *skb);
-+extern struct sk_buff *skb_segment(struct sk_buff *skb, int features);
-
- static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
- int len, void *buffer)
-Index: tmp-xxx/include/net/pkt_sched.h
-===================================================================
---- tmp-xxx.orig/include/net/pkt_sched.h 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/include/net/pkt_sched.h 2006-11-27 10:52:42.000000000 +0000
-@@ -218,12 +218,13 @@
- struct rtattr *tab);
- extern void qdisc_put_rtab(struct qdisc_rate_table *tab);
-
--extern int qdisc_restart(struct net_device *dev);
-+extern void __qdisc_run(struct net_device *dev);
-
- static inline void qdisc_run(struct net_device *dev)
- {
-- while (!netif_queue_stopped(dev) && qdisc_restart(dev) < 0)
-- /* NOTHING */;
-+ if (!netif_queue_stopped(dev) &&
-+ !test_and_set_bit(__LINK_STATE_QDISC_RUNNING, &dev->state))
-+ __qdisc_run(dev);
- }
-
- extern int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
-Index: tmp-xxx/include/net/protocol.h
-===================================================================
---- tmp-xxx.orig/include/net/protocol.h 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/include/net/protocol.h 2006-11-27 10:52:42.000000000 +0000
-@@ -37,6 +37,8 @@
- struct net_protocol {
- int (*handler)(struct sk_buff *skb);
- void (*err_handler)(struct sk_buff *skb, u32 info);
-+ struct sk_buff *(*gso_segment)(struct sk_buff *skb,
-+ int features);
- int no_policy;
- };
-
-Index: tmp-xxx/include/net/sock.h
-===================================================================
---- tmp-xxx.orig/include/net/sock.h 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/include/net/sock.h 2006-11-27 10:52:42.000000000 +0000
-@@ -1064,9 +1064,13 @@
- {
- __sk_dst_set(sk, dst);
- sk->sk_route_caps = dst->dev->features;
-+ if (sk->sk_route_caps & NETIF_F_GSO)
-+ sk->sk_route_caps |= NETIF_F_TSO;
- if (sk->sk_route_caps & NETIF_F_TSO) {
- if (sock_flag(sk, SOCK_NO_LARGESEND) || dst->header_len)
- sk->sk_route_caps &= ~NETIF_F_TSO;
-+ else
-+ sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
- }
- }
-
-Index: tmp-xxx/include/net/tcp.h
-===================================================================
---- tmp-xxx.orig/include/net/tcp.h 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/include/net/tcp.h 2006-11-27 10:52:42.000000000 +0000
-@@ -552,13 +552,13 @@
- */
- static inline int tcp_skb_pcount(const struct sk_buff *skb)
- {
-- return skb_shinfo(skb)->tso_segs;
-+ return skb_shinfo(skb)->gso_segs;
- }
-
- /* This is valid iff tcp_skb_pcount() > 1. */
- static inline int tcp_skb_mss(const struct sk_buff *skb)
- {
-- return skb_shinfo(skb)->tso_size;
-+ return skb_shinfo(skb)->gso_size;
- }
-
- static inline void tcp_dec_pcount_approx(__u32 *count,
-@@ -1063,6 +1063,8 @@
-
- extern int tcp_v4_destroy_sock(struct sock *sk);
-
-+extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features);
-+
- #ifdef CONFIG_PROC_FS
- extern int tcp4_proc_init(void);
- extern void tcp4_proc_exit(void);
-Index: tmp-xxx/net/atm/clip.c
-===================================================================
---- tmp-xxx.orig/net/atm/clip.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/net/atm/clip.c 2006-11-27 10:52:42.000000000 +0000
-@@ -101,7 +101,7 @@
- printk(KERN_CRIT "!clip_vcc->entry (clip_vcc %p)\n",clip_vcc);
- return;
- }
-- spin_lock_bh(&entry->neigh->dev->xmit_lock); /* block clip_start_xmit() */
-+ netif_tx_lock_bh(entry->neigh->dev); /* block clip_start_xmit() */
- entry->neigh->used = jiffies;
- for (walk = &entry->vccs; *walk; walk = &(*walk)->next)
- if (*walk == clip_vcc) {
-@@ -125,7 +125,7 @@
- printk(KERN_CRIT "ATMARP: unlink_clip_vcc failed (entry %p, vcc "
- "0x%p)\n",entry,clip_vcc);
- out:
-- spin_unlock_bh(&entry->neigh->dev->xmit_lock);
-+ netif_tx_unlock_bh(entry->neigh->dev);
- }
-
- /* The neighbour entry n->lock is held. */
-Index: tmp-xxx/net/bridge/br_device.c
-===================================================================
---- tmp-xxx.orig/net/bridge/br_device.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/net/bridge/br_device.c 2006-11-27 10:52:42.000000000 +0000
-@@ -146,9 +146,9 @@
- struct net_bridge *br = netdev_priv(dev);
-
- if (data)
-- br->feature_mask |= NETIF_F_IP_CSUM;
-+ br->feature_mask |= NETIF_F_NO_CSUM;
- else
-- br->feature_mask &= ~NETIF_F_IP_CSUM;
-+ br->feature_mask &= ~NETIF_F_ALL_CSUM;
-
- br_features_recompute(br);
- return 0;
-@@ -185,6 +185,6 @@
- dev->set_mac_address = br_set_mac_address;
- dev->priv_flags = IFF_EBRIDGE;
-
-- dev->features = NETIF_F_SG | NETIF_F_FRAGLIST
-- | NETIF_F_HIGHDMA | NETIF_F_TSO | NETIF_F_IP_CSUM;
-+ dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
-+ NETIF_F_TSO | NETIF_F_NO_CSUM | NETIF_F_GSO_ROBUST;
- }
-Index: tmp-xxx/net/bridge/br_forward.c
-===================================================================
---- tmp-xxx.orig/net/bridge/br_forward.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/net/bridge/br_forward.c 2006-11-27 10:52:42.000000000 +0000
-@@ -32,7 +32,7 @@
- int br_dev_queue_push_xmit(struct sk_buff *skb)
- {
- /* drop mtu oversized packets except tso */
-- if (skb->len > skb->dev->mtu && !skb_shinfo(skb)->tso_size)
-+ if (skb->len > skb->dev->mtu && !skb_shinfo(skb)->gso_size)
- kfree_skb(skb);
- else {
- #ifdef CONFIG_BRIDGE_NETFILTER
-Index: tmp-xxx/net/bridge/br_if.c
-===================================================================
---- tmp-xxx.orig/net/bridge/br_if.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/net/bridge/br_if.c 2006-11-27 10:52:42.000000000 +0000
-@@ -385,17 +385,28 @@
- struct net_bridge_port *p;
- unsigned long features, checksum;
-
-- features = br->feature_mask &~ NETIF_F_IP_CSUM;
-- checksum = br->feature_mask & NETIF_F_IP_CSUM;
-+ checksum = br->feature_mask & NETIF_F_ALL_CSUM ? NETIF_F_NO_CSUM : 0;
-+ features = br->feature_mask & ~NETIF_F_ALL_CSUM;
-
- list_for_each_entry(p, &br->port_list, list) {
-- if (!(p->dev->features
-- & (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM)))
-+ unsigned long feature = p->dev->features;
-+
-+ if (checksum & NETIF_F_NO_CSUM && !(feature & NETIF_F_NO_CSUM))
-+ checksum ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM;
-+ if (checksum & NETIF_F_HW_CSUM && !(feature & NETIF_F_HW_CSUM))
-+ checksum ^= NETIF_F_HW_CSUM | NETIF_F_IP_CSUM;
-+ if (!(feature & NETIF_F_IP_CSUM))
- checksum = 0;
-- features &= p->dev->features;
-+
-+ if (feature & NETIF_F_GSO)
-+ feature |= NETIF_F_TSO;
-+ feature |= NETIF_F_GSO;
-+
-+ features &= feature;
- }
-
-- br->dev->features = features | checksum | NETIF_F_LLTX;
-+ br->dev->features = features | checksum | NETIF_F_LLTX |
-+ NETIF_F_GSO_ROBUST;
- }
-
- /* called with RTNL */
-Index: tmp-xxx/net/bridge/br_netfilter.c
-===================================================================
---- tmp-xxx.orig/net/bridge/br_netfilter.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/net/bridge/br_netfilter.c 2006-11-27 10:52:42.000000000 +0000
-@@ -743,7 +743,7 @@
- {
- if (skb->protocol == htons(ETH_P_IP) &&
- skb->len > skb->dev->mtu &&
-- !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
-+ !skb_shinfo(skb)->gso_size)
- return ip_fragment(skb, br_dev_queue_push_xmit);
- else
- return br_dev_queue_push_xmit(skb);
-Index: tmp-xxx/net/core/dev.c
-===================================================================
---- tmp-xxx.orig/net/core/dev.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/net/core/dev.c 2006-11-27 10:57:31.000000000 +0000
-@@ -115,6 +115,7 @@
- #include <net/iw_handler.h>
- #endif /* CONFIG_NET_RADIO */
- #include <asm/current.h>
-+#include <linux/err.h>
-
- /*
- * The list of packet types we will receive (as opposed to discard)
-@@ -1032,7 +1033,7 @@
- * taps currently in use.
- */
-
--void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
-+static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
- {
- struct packet_type *ptype;
-
-@@ -1106,6 +1107,45 @@
- return ret;
- }
-
-+/**
-+ * skb_gso_segment - Perform segmentation on skb.
-+ * @skb: buffer to segment
-+ * @features: features for the output path (see dev->features)
-+ *
-+ * This function segments the given skb and returns a list of segments.
-+ *
-+ * It may return NULL if the skb requires no segmentation. This is
-+ * only possible when GSO is used for verifying header integrity.
-+ */
-+struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
-+{
-+ struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
-+ struct packet_type *ptype;
-+ int type = skb->protocol;
-+
-+ BUG_ON(skb_shinfo(skb)->frag_list);
-+ BUG_ON(skb->ip_summed != CHECKSUM_HW);
-+
-+ skb->mac.raw = skb->data;
-+ skb->mac_len = skb->nh.raw - skb->data;
-+ __skb_pull(skb, skb->mac_len);
-+
-+ rcu_read_lock();
-+ list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
-+ if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
-+ segs = ptype->gso_segment(skb, features);
-+ break;
-+ }
-+ }
-+ rcu_read_unlock();
-+
-+ __skb_push(skb, skb->data - skb->mac.raw);
-+
-+ return segs;
-+}
-+
-+EXPORT_SYMBOL(skb_gso_segment);
-+
- /* Take action when hardware reception checksum errors are detected. */
- #ifdef CONFIG_BUG
- void netdev_rx_csum_fault(struct net_device *dev)
-@@ -1142,76 +1182,107 @@
- #define illegal_highdma(dev, skb) (0)
- #endif
-
--/* Keep head the same: replace data */
--int __skb_linearize(struct sk_buff *skb, gfp_t gfp_mask)
-+struct dev_gso_cb {
-+ void (*destructor)(struct sk_buff *skb);
-+};
-+
-+#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
-+
-+static void dev_gso_skb_destructor(struct sk_buff *skb)
- {
-- unsigned int size;
-- u8 *data;
-- long offset;
-- struct skb_shared_info *ninfo;
-- int headerlen = skb->data - skb->head;
-- int expand = (skb->tail + skb->data_len) - skb->end;
--
-- if (skb_shared(skb))
-- BUG();
--
-- if (expand <= 0)
-- expand = 0;
--
-- size = skb->end - skb->head + expand;
-- size = SKB_DATA_ALIGN(size);
-- data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
-- if (!data)
-- return -ENOMEM;
--
-- /* Copy entire thing */
-- if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len))
-- BUG();
--
-- /* Set up shinfo */
-- ninfo = (struct skb_shared_info*)(data + size);
-- atomic_set(&ninfo->dataref, 1);
-- ninfo->tso_size = skb_shinfo(skb)->tso_size;
-- ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
-- ninfo->ufo_size = skb_shinfo(skb)->ufo_size;
-- ninfo->nr_frags = 0;
-- ninfo->frag_list = NULL;
--
-- /* Offset between the two in bytes */
-- offset = data - skb->head;
--
-- /* Free old data. */
-- skb_release_data(skb);
--
-- skb->head = data;
-- skb->end = data + size;
--
-- /* Set up new pointers */
-- skb->h.raw += offset;
-- skb->nh.raw += offset;
-- skb->mac.raw += offset;
-- skb->tail += offset;
-- skb->data += offset;
-+ struct dev_gso_cb *cb;
-
-- /* We are no longer a clone, even if we were. */
-- skb->cloned = 0;
-+ do {
-+ struct sk_buff *nskb = skb->next;
-
-- skb->tail += skb->data_len;
-- skb->data_len = 0;
-+ skb->next = nskb->next;
-+ nskb->next = NULL;
-+ kfree_skb(nskb);
-+ } while (skb->next);
-+
-+ cb = DEV_GSO_CB(skb);
-+ if (cb->destructor)
-+ cb->destructor(skb);
-+}
-+
-+/**
-+ * dev_gso_segment - Perform emulated hardware segmentation on skb.
-+ * @skb: buffer to segment
-+ *
-+ * This function segments the given skb and stores the list of segments
-+ * in skb->next.
-+ */
-+static int dev_gso_segment(struct sk_buff *skb)
-+{
-+ struct net_device *dev = skb->dev;
-+ struct sk_buff *segs;
-+ int features = dev->features & ~(illegal_highdma(dev, skb) ?
-+ NETIF_F_SG : 0);
-+
-+ segs = skb_gso_segment(skb, features);
-+
-+ /* Verifying header integrity only. */
-+ if (!segs)
-+ return 0;
-+
-+ if (unlikely(IS_ERR(segs)))
-+ return PTR_ERR(segs);
-+
-+ skb->next = segs;
-+ DEV_GSO_CB(skb)->destructor = skb->destructor;
-+ skb->destructor = dev_gso_skb_destructor;
-+ return 0;
-+}
-+
-+int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
-+{
-+ if (likely(!skb->next)) {
-+ if (netdev_nit)
-+ dev_queue_xmit_nit(skb, dev);
-+
-+ if (netif_needs_gso(dev, skb)) {
-+ if (unlikely(dev_gso_segment(skb)))
-+ goto out_kfree_skb;
-+ if (skb->next)
-+ goto gso;
-+ }
-+
-+ return dev->hard_start_xmit(skb, dev);
-+ }
-+
-+gso:
-+ do {
-+ struct sk_buff *nskb = skb->next;
-+ int rc;
-+
-+ skb->next = nskb->next;
-+ nskb->next = NULL;
-+ rc = dev->hard_start_xmit(nskb, dev);
-+ if (unlikely(rc)) {
-+ nskb->next = skb->next;
-+ skb->next = nskb;
-+ return rc;
-+ }
-+ if (unlikely(netif_queue_stopped(dev) && skb->next))
-+ return NETDEV_TX_BUSY;
-+ } while (skb->next);
-+
-+ skb->destructor = DEV_GSO_CB(skb)->destructor;
-+
-+out_kfree_skb:
-+ kfree_skb(skb);
- return 0;
- }
-
- #define HARD_TX_LOCK(dev, cpu) { \
- if ((dev->features & NETIF_F_LLTX) == 0) { \
-- spin_lock(&dev->xmit_lock); \
-- dev->xmit_lock_owner = cpu; \
-+ netif_tx_lock(dev); \
- } \
- }
-
- #define HARD_TX_UNLOCK(dev) { \
- if ((dev->features & NETIF_F_LLTX) == 0) { \
-- dev->xmit_lock_owner = -1; \
-- spin_unlock(&dev->xmit_lock); \
-+ netif_tx_unlock(dev); \
- } \
- }
-
-@@ -1247,9 +1318,13 @@
- struct Qdisc *q;
- int rc = -ENOMEM;
-
-+ /* GSO will handle the following emulations directly. */
-+ if (netif_needs_gso(dev, skb))
-+ goto gso;
-+
- if (skb_shinfo(skb)->frag_list &&
- !(dev->features & NETIF_F_FRAGLIST) &&
-- __skb_linearize(skb, GFP_ATOMIC))
-+ __skb_linearize(skb))
- goto out_kfree_skb;
-
- /* Fragmented skb is linearized if device does not support SG,
-@@ -1258,25 +1333,26 @@
- */
- if (skb_shinfo(skb)->nr_frags &&
- (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
-- __skb_linearize(skb, GFP_ATOMIC))
-+ __skb_linearize(skb))
- goto out_kfree_skb;
-
- /* If packet is not checksummed and device does not support
- * checksumming for this protocol, complete checksumming here.
- */
- if (skb->ip_summed == CHECKSUM_HW &&
-- (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) &&
-+ (!(dev->features & NETIF_F_GEN_CSUM) &&
- (!(dev->features & NETIF_F_IP_CSUM) ||
- skb->protocol != htons(ETH_P_IP))))
- if (skb_checksum_help(skb, 0))
- goto out_kfree_skb;
-
-+gso:
- spin_lock_prefetch(&dev->queue_lock);
-
- /* Disable soft irqs for various locks below. Also
- * stops preemption for RCU.
- */
-- local_bh_disable();
-+ rcu_read_lock_bh();
-
- /* Updates of qdisc are serialized by queue_lock.
- * The struct Qdisc which is pointed to by qdisc is now a
-@@ -1310,8 +1386,8 @@
- /* The device has no queue. Common case for software devices:
- loopback, all the sorts of tunnels...
-
-- Really, it is unlikely that xmit_lock protection is necessary here.
-- (f.e. loopback and IP tunnels are clean ignoring statistics
-+ Really, it is unlikely that netif_tx_lock protection is necessary
-+ here. (f.e. loopback and IP tunnels are clean ignoring statistics
- counters.)
- However, it is possible, that they rely on protection
- made by us here.
-@@ -1327,11 +1403,8 @@
- HARD_TX_LOCK(dev, cpu);
-
- if (!netif_queue_stopped(dev)) {
-- if (netdev_nit)
-- dev_queue_xmit_nit(skb, dev);
--
- rc = 0;
-- if (!dev->hard_start_xmit(skb, dev)) {
-+ if (!dev_hard_start_xmit(skb, dev)) {
- HARD_TX_UNLOCK(dev);
- goto out;
- }
-@@ -1350,13 +1423,13 @@
- }
-
- rc = -ENETDOWN;
-- local_bh_enable();
-+ rcu_read_unlock_bh();
-
- out_kfree_skb:
- kfree_skb(skb);
- return rc;
- out:
-- local_bh_enable();
-+ rcu_read_unlock_bh();
- return rc;
- }
-
-@@ -2671,7 +2744,7 @@
- BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
-
- spin_lock_init(&dev->queue_lock);
-- spin_lock_init(&dev->xmit_lock);
-+ spin_lock_init(&dev->_xmit_lock);
- dev->xmit_lock_owner = -1;
- #ifdef CONFIG_NET_CLS_ACT
- spin_lock_init(&dev->ingress_lock);
-@@ -2715,9 +2788,7 @@
-
- /* Fix illegal SG+CSUM combinations. */
- if ((dev->features & NETIF_F_SG) &&
-- !(dev->features & (NETIF_F_IP_CSUM |
-- NETIF_F_NO_CSUM |
-- NETIF_F_HW_CSUM))) {
-+ !(dev->features & NETIF_F_ALL_CSUM)) {
- printk("%s: Dropping NETIF_F_SG since no checksum feature.\n",
- dev->name);
- dev->features &= ~NETIF_F_SG;
-@@ -3269,7 +3340,6 @@
- EXPORT_SYMBOL(__dev_get_by_index);
- EXPORT_SYMBOL(__dev_get_by_name);
- EXPORT_SYMBOL(__dev_remove_pack);
--EXPORT_SYMBOL(__skb_linearize);
- EXPORT_SYMBOL(dev_valid_name);
- EXPORT_SYMBOL(dev_add_pack);
- EXPORT_SYMBOL(dev_alloc_name);
-Index: tmp-xxx/net/core/dev_mcast.c
-===================================================================
---- tmp-xxx.orig/net/core/dev_mcast.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/net/core/dev_mcast.c 2006-11-27 10:52:42.000000000 +0000
-@@ -62,7 +62,7 @@
- * Device mc lists are changed by bh at least if IPv6 is enabled,
- * so that it must be bh protected.
- *
-- * We block accesses to device mc filters with dev->xmit_lock.
-+ * We block accesses to device mc filters with netif_tx_lock.
- */
-
- /*
-@@ -93,9 +93,9 @@
-
- void dev_mc_upload(struct net_device *dev)
- {
-- spin_lock_bh(&dev->xmit_lock);
-+ netif_tx_lock_bh(dev);
- __dev_mc_upload(dev);
-- spin_unlock_bh(&dev->xmit_lock);
-+ netif_tx_unlock_bh(dev);
- }
-
- /*
-@@ -107,7 +107,7 @@
- int err = 0;
- struct dev_mc_list *dmi, **dmip;
-
-- spin_lock_bh(&dev->xmit_lock);
-+ netif_tx_lock_bh(dev);
-
- for (dmip = &dev->mc_list; (dmi = *dmip) != NULL; dmip = &dmi->next) {
- /*
-@@ -139,13 +139,13 @@
- */
- __dev_mc_upload(dev);
-
-- spin_unlock_bh(&dev->xmit_lock);
-+ netif_tx_unlock_bh(dev);
- return 0;
- }
- }
- err = -ENOENT;
- done:
-- spin_unlock_bh(&dev->xmit_lock);
-+ netif_tx_unlock_bh(dev);
- return err;
- }
-
-@@ -160,7 +160,7 @@
-
- dmi1 = kmalloc(sizeof(*dmi), GFP_ATOMIC);
-
-- spin_lock_bh(&dev->xmit_lock);
-+ netif_tx_lock_bh(dev);
- for (dmi = dev->mc_list; dmi != NULL; dmi = dmi->next) {
- if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 &&
- dmi->dmi_addrlen == alen) {
-@@ -176,7 +176,7 @@
- }
-
- if ((dmi = dmi1) == NULL) {
-- spin_unlock_bh(&dev->xmit_lock);
-+ netif_tx_unlock_bh(dev);
- return -ENOMEM;
- }
- memcpy(dmi->dmi_addr, addr, alen);
-@@ -189,11 +189,11 @@
-
- __dev_mc_upload(dev);
-
-- spin_unlock_bh(&dev->xmit_lock);
-+ netif_tx_unlock_bh(dev);
- return 0;
-
- done:
-- spin_unlock_bh(&dev->xmit_lock);
-+ netif_tx_unlock_bh(dev);
- kfree(dmi1);
- return err;
- }
-@@ -204,7 +204,7 @@
-
- void dev_mc_discard(struct net_device *dev)
- {
-- spin_lock_bh(&dev->xmit_lock);
-+ netif_tx_lock_bh(dev);
-
- while (dev->mc_list != NULL) {
- struct dev_mc_list *tmp = dev->mc_list;
-@@ -215,7 +215,7 @@
- }
- dev->mc_count = 0;
-
-- spin_unlock_bh(&dev->xmit_lock);
-+ netif_tx_unlock_bh(dev);
- }
-
- #ifdef CONFIG_PROC_FS
-@@ -250,7 +250,7 @@
- struct dev_mc_list *m;
- struct net_device *dev = v;
-
-- spin_lock_bh(&dev->xmit_lock);
-+ netif_tx_lock_bh(dev);
- for (m = dev->mc_list; m; m = m->next) {
- int i;
-
-@@ -262,7 +262,7 @@
-
- seq_putc(seq, '\n');
- }
-- spin_unlock_bh(&dev->xmit_lock);
-+ netif_tx_unlock_bh(dev);
- return 0;
- }
-
-Index: tmp-xxx/net/core/ethtool.c
-===================================================================
---- tmp-xxx.orig/net/core/ethtool.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/net/core/ethtool.c 2006-11-27 10:52:42.000000000 +0000
-@@ -30,7 +30,7 @@
-
- u32 ethtool_op_get_tx_csum(struct net_device *dev)
- {
-- return (dev->features & (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM)) != 0;
-+ return (dev->features & NETIF_F_ALL_CSUM) != 0;
- }
-
- int ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
-@@ -551,9 +551,7 @@
- return -EFAULT;
-
- if (edata.data &&
-- !(dev->features & (NETIF_F_IP_CSUM |
-- NETIF_F_NO_CSUM |
-- NETIF_F_HW_CSUM)))
-+ !(dev->features & NETIF_F_ALL_CSUM))
- return -EINVAL;
-
- return __ethtool_set_sg(dev, edata.data);
-@@ -561,7 +559,7 @@
-
- static int ethtool_get_tso(struct net_device *dev, char __user *useraddr)
- {
-- struct ethtool_value edata = { ETHTOOL_GTSO };
-+ struct ethtool_value edata = { ETHTOOL_GUFO };
-
- if (!dev->ethtool_ops->get_tso)
- return -EOPNOTSUPP;
-@@ -616,6 +614,29 @@
- return dev->ethtool_ops->set_ufo(dev, edata.data);
- }
-
-+static int ethtool_get_gso(struct net_device *dev, char __user *useraddr)
-+{
-+ struct ethtool_value edata = { ETHTOOL_GGSO };
-+
-+ edata.data = dev->features & NETIF_F_GSO;
-+ if (copy_to_user(useraddr, &edata, sizeof(edata)))
-+ return -EFAULT;
-+ return 0;
-+}
-+
-+static int ethtool_set_gso(struct net_device *dev, char __user *useraddr)
-+{
-+ struct ethtool_value edata;
-+
-+ if (copy_from_user(&edata, useraddr, sizeof(edata)))
-+ return -EFAULT;
-+ if (edata.data)
-+ dev->features |= NETIF_F_GSO;
-+ else
-+ dev->features &= ~NETIF_F_GSO;
-+ return 0;
-+}
-+
- static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
- {
- struct ethtool_test test;
-@@ -907,6 +928,12 @@
- case ETHTOOL_SUFO:
- rc = ethtool_set_ufo(dev, useraddr);
- break;
-+ case ETHTOOL_GGSO:
-+ rc = ethtool_get_gso(dev, useraddr);
-+ break;
-+ case ETHTOOL_SGSO:
-+ rc = ethtool_set_gso(dev, useraddr);
-+ break;
- default:
- rc = -EOPNOTSUPP;
- }
-Index: tmp-xxx/net/core/netpoll.c
-===================================================================
---- tmp-xxx.orig/net/core/netpoll.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/net/core/netpoll.c 2006-11-27 10:52:42.000000000 +0000
-@@ -273,24 +273,21 @@
-
- do {
- npinfo->tries--;
-- spin_lock(&np->dev->xmit_lock);
-- np->dev->xmit_lock_owner = smp_processor_id();
-+ netif_tx_lock(np->dev);
-
- /*
- * network drivers do not expect to be called if the queue is
- * stopped.
- */
- if (netif_queue_stopped(np->dev)) {
-- np->dev->xmit_lock_owner = -1;
-- spin_unlock(&np->dev->xmit_lock);
-+ netif_tx_unlock(np->dev);
- netpoll_poll(np);
- udelay(50);
- continue;
- }
-
- status = np->dev->hard_start_xmit(skb, np->dev);
-- np->dev->xmit_lock_owner = -1;
-- spin_unlock(&np->dev->xmit_lock);
-+ netif_tx_unlock(np->dev);
-
- /* success */
- if(!status) {
-Index: tmp-xxx/net/core/pktgen.c
-===================================================================
---- tmp-xxx.orig/net/core/pktgen.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/net/core/pktgen.c 2006-11-27 10:52:42.000000000 +0000
-@@ -2586,7 +2586,7 @@
- }
- }
-
-- spin_lock_bh(&odev->xmit_lock);
-+ netif_tx_lock_bh(odev);
- if (!netif_queue_stopped(odev)) {
-
- atomic_inc(&(pkt_dev->skb->users));
-@@ -2631,7 +2631,7 @@
- pkt_dev->next_tx_ns = 0;
- }
-
-- spin_unlock_bh(&odev->xmit_lock);
-+ netif_tx_unlock_bh(odev);
-
- /* If pkt_dev->count is zero, then run forever */
- if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) {
-Index: tmp-xxx/net/core/skbuff.c
-===================================================================
---- tmp-xxx.orig/net/core/skbuff.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/net/core/skbuff.c 2006-11-27 10:58:31.000000000 +0000
-@@ -164,9 +164,9 @@
- shinfo = skb_shinfo(skb);
- atomic_set(&shinfo->dataref, 1);
- shinfo->nr_frags = 0;
-- shinfo->tso_size = 0;
-- shinfo->tso_segs = 0;
-- shinfo->ufo_size = 0;
-+ shinfo->gso_size = 0;
-+ shinfo->gso_segs = 0;
-+ shinfo->gso_type = 0;
- shinfo->ip6_frag_id = 0;
- shinfo->frag_list = NULL;
-
-@@ -230,9 +230,9 @@
-
- atomic_set(&(skb_shinfo(skb)->dataref), 1);
- skb_shinfo(skb)->nr_frags = 0;
-- skb_shinfo(skb)->tso_size = 0;
-- skb_shinfo(skb)->tso_segs = 0;
-- skb_shinfo(skb)->ufo_size = 0;
-+ skb_shinfo(skb)->gso_size = 0;
-+ skb_shinfo(skb)->gso_segs = 0;
-+ skb_shinfo(skb)->gso_type = 0;
- skb_shinfo(skb)->frag_list = NULL;
- out:
- return skb;
-@@ -507,9 +507,9 @@
- new->tc_index = old->tc_index;
- #endif
- atomic_set(&new->users, 1);
-- skb_shinfo(new)->tso_size = skb_shinfo(old)->tso_size;
-- skb_shinfo(new)->tso_segs = skb_shinfo(old)->tso_segs;
-- skb_shinfo(new)->ufo_size = skb_shinfo(old)->ufo_size;
-+ skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
-+ skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
-+ skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type;
- }
-
- /**
-@@ -1822,6 +1822,133 @@
- return 0;
- }
-
-+/**
-+ * skb_segment - Perform protocol segmentation on skb.
-+ * @skb: buffer to segment
-+ * @features: features for the output path (see dev->features)
-+ *
-+ * This function performs segmentation on the given skb. It returns
-+ * the segment at the given position. It returns NULL if there are
-+ * no more segments to generate, or when an error is encountered.
-+ */
-+struct sk_buff *skb_segment(struct sk_buff *skb, int features)
-+{
-+ struct sk_buff *segs = NULL;
-+ struct sk_buff *tail = NULL;
-+ unsigned int mss = skb_shinfo(skb)->gso_size;
-+ unsigned int doffset = skb->data - skb->mac.raw;
-+ unsigned int offset = doffset;
-+ unsigned int headroom;
-+ unsigned int len;
-+ int sg = features & NETIF_F_SG;
-+ int nfrags = skb_shinfo(skb)->nr_frags;
-+ int err = -ENOMEM;
-+ int i = 0;
-+ int pos;
-+
-+ __skb_push(skb, doffset);
-+ headroom = skb_headroom(skb);
-+ pos = skb_headlen(skb);
-+
-+ do {
-+ struct sk_buff *nskb;
-+ skb_frag_t *frag;
-+ int hsize, nsize;
-+ int k;
-+ int size;
-+
-+ len = skb->len - offset;
-+ if (len > mss)
-+ len = mss;
-+
-+ hsize = skb_headlen(skb) - offset;
-+ if (hsize < 0)
-+ hsize = 0;
-+ nsize = hsize + doffset;
-+ if (nsize > len + doffset || !sg)
-+ nsize = len + doffset;
-+
-+ nskb = alloc_skb(nsize + headroom, GFP_ATOMIC);
-+ if (unlikely(!nskb))
-+ goto err;
-+
-+ if (segs)
-+ tail->next = nskb;
-+ else
-+ segs = nskb;
-+ tail = nskb;
-+
-+ nskb->dev = skb->dev;
-+ nskb->priority = skb->priority;
-+ nskb->protocol = skb->protocol;
-+ nskb->dst = dst_clone(skb->dst);
-+ memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
-+ nskb->pkt_type = skb->pkt_type;
-+ nskb->mac_len = skb->mac_len;
-+
-+ skb_reserve(nskb, headroom);
-+ nskb->mac.raw = nskb->data;
-+ nskb->nh.raw = nskb->data + skb->mac_len;
-+ nskb->h.raw = nskb->nh.raw + (skb->h.raw - skb->nh.raw);
-+ memcpy(skb_put(nskb, doffset), skb->data, doffset);
-+
-+ if (!sg) {
-+ nskb->csum = skb_copy_and_csum_bits(skb, offset,
-+ skb_put(nskb, len),
-+ len, 0);
-+ continue;
-+ }
-+
-+ frag = skb_shinfo(nskb)->frags;
-+ k = 0;
-+
-+ nskb->ip_summed = CHECKSUM_HW;
-+ nskb->csum = skb->csum;
-+ memcpy(skb_put(nskb, hsize), skb->data + offset, hsize);
-+
-+ while (pos < offset + len) {
-+ BUG_ON(i >= nfrags);
-+
-+ *frag = skb_shinfo(skb)->frags[i];
-+ get_page(frag->page);
-+ size = frag->size;
-+
-+ if (pos < offset) {
-+ frag->page_offset += offset - pos;
-+ frag->size -= offset - pos;
-+ }
-+
-+ k++;
-+
-+ if (pos + size <= offset + len) {
-+ i++;
-+ pos += size;
-+ } else {
-+ frag->size -= pos + size - (offset + len);
-+ break;
-+ }
-+
-+ frag++;
-+ }
-+
-+ skb_shinfo(nskb)->nr_frags = k;
-+ nskb->data_len = len - hsize;
-+ nskb->len += nskb->data_len;
-+ nskb->truesize += nskb->data_len;
-+ } while ((offset += len) < skb->len);
-+
-+ return segs;
-+
-+err:
-+ while ((skb = segs)) {
-+ segs = skb->next;
-+ kfree(skb);
-+ }
-+ return ERR_PTR(err);
-+}
-+
-+EXPORT_SYMBOL_GPL(skb_segment);
-+
- void __init skb_init(void)
- {
- skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
-Index: tmp-xxx/net/decnet/dn_nsp_in.c
-===================================================================
---- tmp-xxx.orig/net/decnet/dn_nsp_in.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/net/decnet/dn_nsp_in.c 2006-11-27 10:52:42.000000000 +0000
-@@ -801,8 +801,7 @@
- * We linearize everything except data segments here.
- */
- if (cb->nsp_flags & ~0x60) {
-- if (unlikely(skb_is_nonlinear(skb)) &&
-- skb_linearize(skb, GFP_ATOMIC) != 0)
-+ if (unlikely(skb_linearize(skb)))
- goto free_out;
- }
-
-Index: tmp-xxx/net/decnet/dn_route.c
-===================================================================
---- tmp-xxx.orig/net/decnet/dn_route.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/net/decnet/dn_route.c 2006-11-27 10:52:42.000000000 +0000
-@@ -629,8 +629,7 @@
- padlen);
-
- if (flags & DN_RT_PKT_CNTL) {
-- if (unlikely(skb_is_nonlinear(skb)) &&
-- skb_linearize(skb, GFP_ATOMIC) != 0)
-+ if (unlikely(skb_linearize(skb)))
- goto dump_it;
-
- switch(flags & DN_RT_CNTL_MSK) {
-Index: tmp-xxx/net/ipv4/af_inet.c
-===================================================================
---- tmp-xxx.orig/net/ipv4/af_inet.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/net/ipv4/af_inet.c 2006-11-27 10:52:42.000000000 +0000
-@@ -68,6 +68,7 @@
- */
-
- #include <linux/config.h>
-+#include <linux/err.h>
- #include <linux/errno.h>
- #include <linux/types.h>
- #include <linux/socket.h>
-@@ -1084,6 +1085,54 @@
-
- EXPORT_SYMBOL(inet_sk_rebuild_header);
-
-+static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
-+{
-+ struct sk_buff *segs = ERR_PTR(-EINVAL);
-+ struct iphdr *iph;
-+ struct net_protocol *ops;
-+ int proto;
-+ int ihl;
-+ int id;
-+
-+ if (!pskb_may_pull(skb, sizeof(*iph)))
-+ goto out;
-+
-+ iph = skb->nh.iph;
-+ ihl = iph->ihl * 4;
-+ if (ihl < sizeof(*iph))
-+ goto out;
-+
-+ if (!pskb_may_pull(skb, ihl))
-+ goto out;
-+
-+ skb->h.raw = __skb_pull(skb, ihl);
-+ iph = skb->nh.iph;
-+ id = ntohs(iph->id);
-+ proto = iph->protocol & (MAX_INET_PROTOS - 1);
-+ segs = ERR_PTR(-EPROTONOSUPPORT);
-+
-+ rcu_read_lock();
-+ ops = rcu_dereference(inet_protos[proto]);
-+ if (ops && ops->gso_segment)
-+ segs = ops->gso_segment(skb, features);
-+ rcu_read_unlock();
-+
-+ if (!segs || unlikely(IS_ERR(segs)))
-+ goto out;
-+
-+ skb = segs;
-+ do {
-+ iph = skb->nh.iph;
-+ iph->id = htons(id++);
-+ iph->tot_len = htons(skb->len - skb->mac_len);
-+ iph->check = 0;
-+ iph->check = ip_fast_csum(skb->nh.raw, iph->ihl);
-+ } while ((skb = skb->next));
-+
-+out:
-+ return segs;
-+}
-+
- #ifdef CONFIG_IP_MULTICAST
- static struct net_protocol igmp_protocol = {
- .handler = igmp_rcv,
-@@ -1093,6 +1142,7 @@
- static struct net_protocol tcp_protocol = {
- .handler = tcp_v4_rcv,
- .err_handler = tcp_v4_err,
-+ .gso_segment = tcp_tso_segment,
- .no_policy = 1,
- };
-
-@@ -1138,6 +1188,7 @@
- static struct packet_type ip_packet_type = {
- .type = __constant_htons(ETH_P_IP),
- .func = ip_rcv,
-+ .gso_segment = inet_gso_segment,
- };
-
- static int __init inet_init(void)
-Index: tmp-xxx/net/ipv4/ip_output.c
-===================================================================
---- tmp-xxx.orig/net/ipv4/ip_output.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/net/ipv4/ip_output.c 2006-11-27 10:52:42.000000000 +0000
-@@ -210,8 +210,7 @@
- return dst_output(skb);
- }
- #endif
-- if (skb->len > dst_mtu(skb->dst) &&
-- !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
-+ if (skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->gso_size)
- return ip_fragment(skb, ip_finish_output2);
- else
- return ip_finish_output2(skb);
-@@ -362,7 +361,7 @@
- }
-
- ip_select_ident_more(iph, &rt->u.dst, sk,
-- (skb_shinfo(skb)->tso_segs ?: 1) - 1);
-+ (skb_shinfo(skb)->gso_segs ?: 1) - 1);
-
- /* Add an IP checksum. */
- ip_send_check(iph);
-@@ -743,7 +742,8 @@
- (length - transhdrlen));
- if (!err) {
- /* specify the length of each IP datagram fragment*/
-- skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen);
-+ skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
-+ skb_shinfo(skb)->gso_type = SKB_GSO_UDPV4;
- __skb_queue_tail(&sk->sk_write_queue, skb);
-
- return 0;
-@@ -839,7 +839,7 @@
- */
- if (transhdrlen &&
- length + fragheaderlen <= mtu &&
-- rt->u.dst.dev->features&(NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM) &&
-+ rt->u.dst.dev->features & NETIF_F_ALL_CSUM &&
- !exthdrlen)
- csummode = CHECKSUM_HW;
-
-@@ -1086,14 +1086,16 @@
-
- inet->cork.length += size;
- if ((sk->sk_protocol == IPPROTO_UDP) &&
-- (rt->u.dst.dev->features & NETIF_F_UFO))
-- skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen);
-+ (rt->u.dst.dev->features & NETIF_F_UFO)) {
-+ skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
-+ skb_shinfo(skb)->gso_type = SKB_GSO_UDPV4;
-+ }
-
-
- while (size > 0) {
- int i;
-
-- if (skb_shinfo(skb)->ufo_size)
-+ if (skb_shinfo(skb)->gso_size)
- len = size;
- else {
-
-Index: tmp-xxx/net/ipv4/ipcomp.c
-===================================================================
---- tmp-xxx.orig/net/ipv4/ipcomp.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/net/ipv4/ipcomp.c 2006-11-27 10:52:42.000000000 +0000
-@@ -84,7 +84,7 @@
- struct xfrm_decap_state *decap, struct sk_buff *skb)
- {
- u8 nexthdr;
-- int err = 0;
-+ int err = -ENOMEM;
- struct iphdr *iph;
- union {
- struct iphdr iph;
-@@ -92,11 +92,8 @@
- } tmp_iph;
-
-
-- if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
-- skb_linearize(skb, GFP_ATOMIC) != 0) {
-- err = -ENOMEM;
-+ if (skb_linearize_cow(skb))
- goto out;
-- }
-
- skb->ip_summed = CHECKSUM_NONE;
-
-@@ -171,10 +168,8 @@
- goto out_ok;
- }
-
-- if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
-- skb_linearize(skb, GFP_ATOMIC) != 0) {
-+ if (skb_linearize_cow(skb))
- goto out_ok;
-- }
-
- err = ipcomp_compress(x, skb);
- iph = skb->nh.iph;
-Index: tmp-xxx/net/ipv4/tcp.c
-===================================================================
---- tmp-xxx.orig/net/ipv4/tcp.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/net/ipv4/tcp.c 2006-11-27 10:52:42.000000000 +0000
-@@ -257,6 +257,7 @@
- #include <linux/fs.h>
- #include <linux/random.h>
- #include <linux/bootmem.h>
-+#include <linux/err.h>
-
- #include <net/icmp.h>
- #include <net/tcp.h>
-@@ -570,7 +571,7 @@
- skb->ip_summed = CHECKSUM_HW;
- tp->write_seq += copy;
- TCP_SKB_CB(skb)->end_seq += copy;
-- skb_shinfo(skb)->tso_segs = 0;
-+ skb_shinfo(skb)->gso_segs = 0;
-
- if (!copied)
- TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH;
-@@ -621,14 +622,10 @@
- ssize_t res;
- struct sock *sk = sock->sk;
-
--#define TCP_ZC_CSUM_FLAGS (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)
--
- if (!(sk->sk_route_caps & NETIF_F_SG) ||
-- !(sk->sk_route_caps & TCP_ZC_CSUM_FLAGS))
-+ !(sk->sk_route_caps & NETIF_F_ALL_CSUM))
- return sock_no_sendpage(sock, page, offset, size, flags);
-
--#undef TCP_ZC_CSUM_FLAGS
--
- lock_sock(sk);
- TCP_CHECK_TIMER(sk);
- res = do_tcp_sendpages(sk, &page, offset, size, flags);
-@@ -725,9 +722,7 @@
- /*
- * Check whether we can use HW checksum.
- */
-- if (sk->sk_route_caps &
-- (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM |
-- NETIF_F_HW_CSUM))
-+ if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
- skb->ip_summed = CHECKSUM_HW;
-
- skb_entail(sk, tp, skb);
-@@ -823,7 +818,7 @@
-
- tp->write_seq += copy;
- TCP_SKB_CB(skb)->end_seq += copy;
-- skb_shinfo(skb)->tso_segs = 0;
-+ skb_shinfo(skb)->gso_segs = 0;
-
- from += copy;
- copied += copy;
-@@ -2026,6 +2021,71 @@
- }
-
-
-+struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
-+{
-+ struct sk_buff *segs = ERR_PTR(-EINVAL);
-+ struct tcphdr *th;
-+ unsigned thlen;
-+ unsigned int seq;
-+ unsigned int delta;
-+ unsigned int oldlen;
-+ unsigned int len;
-+
-+ if (!pskb_may_pull(skb, sizeof(*th)))
-+ goto out;
-+
-+ th = skb->h.th;
-+ thlen = th->doff * 4;
-+ if (thlen < sizeof(*th))
-+ goto out;
-+
-+ if (!pskb_may_pull(skb, thlen))
-+ goto out;
-+
-+ segs = NULL;
-+ if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST))
-+ goto out;
-+
-+ oldlen = (u16)~skb->len;
-+ __skb_pull(skb, thlen);
-+
-+ segs = skb_segment(skb, features);
-+ if (IS_ERR(segs))
-+ goto out;
-+
-+ len = skb_shinfo(skb)->gso_size;
-+ delta = htonl(oldlen + (thlen + len));
-+
-+ skb = segs;
-+ th = skb->h.th;
-+ seq = ntohl(th->seq);
-+
-+ do {
-+ th->fin = th->psh = 0;
-+
-+ th->check = ~csum_fold(th->check + delta);
-+ if (skb->ip_summed != CHECKSUM_HW)
-+ th->check = csum_fold(csum_partial(skb->h.raw, thlen,
-+ skb->csum));
-+
-+ seq += len;
-+ skb = skb->next;
-+ th = skb->h.th;
-+
-+ th->seq = htonl(seq);
-+ th->cwr = 0;
-+ } while (skb->next);
-+
-+ delta = htonl(oldlen + (skb->tail - skb->h.raw) + skb->data_len);
-+ th->check = ~csum_fold(th->check + delta);
-+ if (skb->ip_summed != CHECKSUM_HW)
-+ th->check = csum_fold(csum_partial(skb->h.raw, thlen,
-+ skb->csum));
-+
-+out:
-+ return segs;
-+}
-+
- extern void __skb_cb_too_small_for_tcp(int, int);
- extern struct tcp_congestion_ops tcp_reno;
-
-Index: tmp-xxx/net/ipv4/tcp_input.c
-===================================================================
---- tmp-xxx.orig/net/ipv4/tcp_input.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/net/ipv4/tcp_input.c 2006-11-27 10:52:42.000000000 +0000
-@@ -1072,7 +1072,7 @@
- else
- pkt_len = (end_seq -
- TCP_SKB_CB(skb)->seq);
-- if (tcp_fragment(sk, skb, pkt_len, skb_shinfo(skb)->tso_size))
-+ if (tcp_fragment(sk, skb, pkt_len, skb_shinfo(skb)->gso_size))
- break;
- pcount = tcp_skb_pcount(skb);
- }
-Index: tmp-xxx/net/ipv4/tcp_output.c
-===================================================================
---- tmp-xxx.orig/net/ipv4/tcp_output.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/net/ipv4/tcp_output.c 2006-11-27 10:52:42.000000000 +0000
-@@ -497,15 +497,17 @@
- /* Avoid the costly divide in the normal
- * non-TSO case.
- */
-- skb_shinfo(skb)->tso_segs = 1;
-- skb_shinfo(skb)->tso_size = 0;
-+ skb_shinfo(skb)->gso_segs = 1;
-+ skb_shinfo(skb)->gso_size = 0;
-+ skb_shinfo(skb)->gso_type = 0;
- } else {
- unsigned int factor;
-
- factor = skb->len + (mss_now - 1);
- factor /= mss_now;
-- skb_shinfo(skb)->tso_segs = factor;
-- skb_shinfo(skb)->tso_size = mss_now;
-+ skb_shinfo(skb)->gso_segs = factor;
-+ skb_shinfo(skb)->gso_size = mss_now;
-+ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
- }
- }
-
-@@ -850,7 +852,7 @@
-
- if (!tso_segs ||
- (tso_segs > 1 &&
-- skb_shinfo(skb)->tso_size != mss_now)) {
-+ tcp_skb_mss(skb) != mss_now)) {
- tcp_set_skb_tso_segs(sk, skb, mss_now);
- tso_segs = tcp_skb_pcount(skb);
- }
-@@ -1510,8 +1512,9 @@
- tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
- if (!pskb_trim(skb, 0)) {
- TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1;
-- skb_shinfo(skb)->tso_segs = 1;
-- skb_shinfo(skb)->tso_size = 0;
-+ skb_shinfo(skb)->gso_segs = 1;
-+ skb_shinfo(skb)->gso_size = 0;
-+ skb_shinfo(skb)->gso_type = 0;
- skb->ip_summed = CHECKSUM_NONE;
- skb->csum = 0;
- }
-@@ -1716,8 +1719,9 @@
- skb->csum = 0;
- TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
- TCP_SKB_CB(skb)->sacked = 0;
-- skb_shinfo(skb)->tso_segs = 1;
-- skb_shinfo(skb)->tso_size = 0;
-+ skb_shinfo(skb)->gso_segs = 1;
-+ skb_shinfo(skb)->gso_size = 0;
-+ skb_shinfo(skb)->gso_type = 0;
-
- /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
- TCP_SKB_CB(skb)->seq = tp->write_seq;
-@@ -1749,8 +1753,9 @@
- skb->csum = 0;
- TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_RST);
- TCP_SKB_CB(skb)->sacked = 0;
-- skb_shinfo(skb)->tso_segs = 1;
-- skb_shinfo(skb)->tso_size = 0;
-+ skb_shinfo(skb)->gso_segs = 1;
-+ skb_shinfo(skb)->gso_size = 0;
-+ skb_shinfo(skb)->gso_type = 0;
-
- /* Send it off. */
- TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp);
-@@ -1833,8 +1838,9 @@
- TCP_SKB_CB(skb)->seq = tcp_rsk(req)->snt_isn;
- TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
- TCP_SKB_CB(skb)->sacked = 0;
-- skb_shinfo(skb)->tso_segs = 1;
-- skb_shinfo(skb)->tso_size = 0;
-+ skb_shinfo(skb)->gso_segs = 1;
-+ skb_shinfo(skb)->gso_size = 0;
-+ skb_shinfo(skb)->gso_type = 0;
- th->seq = htonl(TCP_SKB_CB(skb)->seq);
- th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
- if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
-@@ -1937,8 +1943,9 @@
- TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN;
- TCP_ECN_send_syn(sk, tp, buff);
- TCP_SKB_CB(buff)->sacked = 0;
-- skb_shinfo(buff)->tso_segs = 1;
-- skb_shinfo(buff)->tso_size = 0;
-+ skb_shinfo(buff)->gso_segs = 1;
-+ skb_shinfo(buff)->gso_size = 0;
-+ skb_shinfo(buff)->gso_type = 0;
- buff->csum = 0;
- TCP_SKB_CB(buff)->seq = tp->write_seq++;
- TCP_SKB_CB(buff)->end_seq = tp->write_seq;
-@@ -2042,8 +2049,9 @@
- buff->csum = 0;
- TCP_SKB_CB(buff)->flags = TCPCB_FLAG_ACK;
- TCP_SKB_CB(buff)->sacked = 0;
-- skb_shinfo(buff)->tso_segs = 1;
-- skb_shinfo(buff)->tso_size = 0;
-+ skb_shinfo(buff)->gso_segs = 1;
-+ skb_shinfo(buff)->gso_size = 0;
-+ skb_shinfo(buff)->gso_type = 0;
-
- /* Send it off, this clears delayed acks for us. */
- TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp);
-@@ -2078,8 +2086,9 @@
- skb->csum = 0;
- TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK;
- TCP_SKB_CB(skb)->sacked = urgent;
-- skb_shinfo(skb)->tso_segs = 1;
-- skb_shinfo(skb)->tso_size = 0;
-+ skb_shinfo(skb)->gso_segs = 1;
-+ skb_shinfo(skb)->gso_size = 0;
-+ skb_shinfo(skb)->gso_type = 0;
-
- /* Use a previous sequence. This should cause the other
- * end to send an ack. Don't queue or clone SKB, just
-Index: tmp-xxx/net/ipv4/xfrm4_output.c
-===================================================================
---- tmp-xxx.orig/net/ipv4/xfrm4_output.c 2006-11-27 10:52:32.000000000 +0000
-+++ tmp-xxx/net/ipv4/xfrm4_output.c 2006-11-27 10:52:42.000000000 +0000
-@@ -9,6 +9,8 @@
- */
-
- #include <linux/compiler.h>
-+#include <linux/if_ether.h>
-+#include <linux/kernel.h>
- #include <linux/skbuff.h>
- #include <linux/spinlock.h>
- #include <linux/netfilter_ipv4.h>
-@@ -158,16 +160,10 @@
- goto out_exit;
- }
-
--static int xfrm4_output_finish(struct sk_buff *skb)
-+static int xfrm4_output_finish2(struct sk_buff *skb)
- {
- int err;
-
--#ifdef CONFIG_NETFILTER
-- if (!skb->dst->xfrm) {
-- IPCB(skb)->flags |= IPSKB_REROUTED;
-- return dst_output(skb);
-- }
--#endif
- while (likely((err = xfrm4_output_one(skb)) == 0)) {
- nf_reset(skb);
-
-@@ -180,7 +176,7 @@
- return dst_output(skb);
-
- err = nf_hook(PF_INET, NF_IP_POST_ROUTING, &skb, NULL,
-- skb->dst->dev, xfrm4_output_finish);
-+ skb->dst->dev, xfrm4_output_finish2);
- if (unlikely(err != 1))
- break;
- }
-@@ -188,6 +184,48 @@
- return err;
- }
-
-+static int xfrm4_output_finish(struct sk_buff *skb)
-+{
-+ struct sk_buff *segs;
-+
-+#ifdef CONFIG_NETFILTER
-+ if (!skb->dst->xfrm) {
-+ IPCB(skb)->flags |= IPSKB_REROUTED;
-+ return dst_output(skb);
-+ }
-+#endif
-+
-+ if (!skb_shinfo(skb)->gso_size)
-+ return xfrm4_output_finish2(skb);
-+
-+ skb->protocol = htons(ETH_P_IP);
-+ segs = skb_gso_segment(skb, 0);
-+ kfree_skb(skb);
-+ if (unlikely(IS_ERR(segs)))
-+ return PTR_ERR(segs);
-+
-+ do {
-+ struct sk_buff *nskb = segs->next;
-+ int err;
-+
-+ segs->next = NULL;
-+ err = xfrm4_output_finish2(segs);
-+
-+ if (unlikely(err)) {
-+ while ((segs = nskb)) {
-+ nskb = segs->next;
-+ segs->next = NULL;
-+ kfree_skb(segs);
-+ }
-+ return err;
-+ }
-+
-+ segs = nskb;
-+ } while (segs);
-+
-+ return 0;
-+}
-+
- int xfrm4_output(struct sk_buff *skb)
- {
- return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev,
-Index: tmp-xxx/net/ipv6/ip6_output.c
-===================================================================
---- tmp-xxx.orig/net/ipv6/ip6_output.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/net/ipv6/ip6_output.c 2006-11-27 10:52:42.000000000 +0000
-@@ -147,7 +147,7 @@
-
- int ip6_output(struct sk_buff *skb)
- {
-- if ((skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->ufo_size) ||
-+ if ((skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->gso_size) ||
- dst_allfrag(skb->dst))
- return ip6_fragment(skb, ip6_output2);
- else
-@@ -829,8 +829,9 @@
- struct frag_hdr fhdr;
-
- /* specify the length of each IP datagram fragment*/
-- skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen) -
-- sizeof(struct frag_hdr);
-+ skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
-+ sizeof(struct frag_hdr);
-+ skb_shinfo(skb)->gso_type = SKB_GSO_UDPV4;
- ipv6_select_ident(skb, &fhdr);
- skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
- __skb_queue_tail(&sk->sk_write_queue, skb);
-Index: tmp-xxx/net/ipv6/ipcomp6.c
-===================================================================
---- tmp-xxx.orig/net/ipv6/ipcomp6.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/net/ipv6/ipcomp6.c 2006-11-27 10:52:42.000000000 +0000
-@@ -64,7 +64,7 @@
-
- static int ipcomp6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
- {
-- int err = 0;
-+ int err = -ENOMEM;
- u8 nexthdr = 0;
- int hdr_len = skb->h.raw - skb->nh.raw;
- unsigned char *tmp_hdr = NULL;
-@@ -75,11 +75,8 @@
- struct crypto_tfm *tfm;
- int cpu;
-
-- if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
-- skb_linearize(skb, GFP_ATOMIC) != 0) {
-- err = -ENOMEM;
-+ if (skb_linearize_cow(skb))
- goto out;
-- }
-
- skb->ip_summed = CHECKSUM_NONE;
-
-@@ -158,10 +155,8 @@
- goto out_ok;
- }
-
-- if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
-- skb_linearize(skb, GFP_ATOMIC) != 0) {
-+ if (skb_linearize_cow(skb))
- goto out_ok;
-- }
-
- /* compression */
- plen = skb->len - hdr_len;
-Index: tmp-xxx/net/ipv6/xfrm6_output.c
-===================================================================
---- tmp-xxx.orig/net/ipv6/xfrm6_output.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/net/ipv6/xfrm6_output.c 2006-11-27 10:52:42.000000000 +0000
-@@ -151,7 +151,7 @@
- goto out_exit;
- }
-
--static int xfrm6_output_finish(struct sk_buff *skb)
-+static int xfrm6_output_finish2(struct sk_buff *skb)
- {
- int err;
-
-@@ -167,7 +167,7 @@
- return dst_output(skb);
-
- err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, &skb, NULL,
-- skb->dst->dev, xfrm6_output_finish);
-+ skb->dst->dev, xfrm6_output_finish2);
- if (unlikely(err != 1))
- break;
- }
-@@ -175,6 +175,41 @@
- return err;
- }
-
-+static int xfrm6_output_finish(struct sk_buff *skb)
-+{
-+ struct sk_buff *segs;
-+
-+ if (!skb_shinfo(skb)->gso_size)
-+ return xfrm6_output_finish2(skb);
-+
-+ skb->protocol = htons(ETH_P_IP);
-+ segs = skb_gso_segment(skb, 0);
-+ kfree_skb(skb);
-+ if (unlikely(IS_ERR(segs)))
-+ return PTR_ERR(segs);
-+
-+ do {
-+ struct sk_buff *nskb = segs->next;
-+ int err;
-+
-+ segs->next = NULL;
-+ err = xfrm6_output_finish2(segs);
-+
-+ if (unlikely(err)) {
-+ while ((segs = nskb)) {
-+ nskb = segs->next;
-+ segs->next = NULL;
-+ kfree_skb(segs);
-+ }
-+ return err;
-+ }
-+
-+ segs = nskb;
-+ } while (segs);
-+
-+ return 0;
-+}
-+
- int xfrm6_output(struct sk_buff *skb)
- {
- return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dst->dev,
-Index: tmp-xxx/net/sched/sch_generic.c
-===================================================================
---- tmp-xxx.orig/net/sched/sch_generic.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/net/sched/sch_generic.c 2006-11-27 10:52:42.000000000 +0000
-@@ -72,9 +72,9 @@
- dev->queue_lock serializes queue accesses for this device
- AND dev->qdisc pointer itself.
-
-- dev->xmit_lock serializes accesses to device driver.
-+ netif_tx_lock serializes accesses to device driver.
-
-- dev->queue_lock and dev->xmit_lock are mutually exclusive,
-+ dev->queue_lock and netif_tx_lock are mutually exclusive,
- if one is grabbed, another must be free.
- */
-
-@@ -90,14 +90,17 @@
- NOTE: Called under dev->queue_lock with locally disabled BH.
- */
-
--int qdisc_restart(struct net_device *dev)
-+static inline int qdisc_restart(struct net_device *dev)
- {
- struct Qdisc *q = dev->qdisc;
- struct sk_buff *skb;
-
- /* Dequeue packet */
-- if ((skb = q->dequeue(q)) != NULL) {
-+ if (((skb = dev->gso_skb)) || ((skb = q->dequeue(q)))) {
- unsigned nolock = (dev->features & NETIF_F_LLTX);
-+
-+ dev->gso_skb = NULL;
-+
- /*
- * When the driver has LLTX set it does its own locking
- * in start_xmit. No need to add additional overhead by
-@@ -108,7 +111,7 @@
- * will be requeued.
- */
- if (!nolock) {
-- if (!spin_trylock(&dev->xmit_lock)) {
-+ if (!netif_tx_trylock(dev)) {
- collision:
- /* So, someone grabbed the driver. */
-
-@@ -126,8 +129,6 @@
- __get_cpu_var(netdev_rx_stat).cpu_collision++;
- goto requeue;
- }
-- /* Remember that the driver is grabbed by us. */
-- dev->xmit_lock_owner = smp_processor_id();
- }
-
- {
-@@ -136,14 +137,11 @@
-
- if (!netif_queue_stopped(dev)) {
- int ret;
-- if (netdev_nit)
-- dev_queue_xmit_nit(skb, dev);
-
-- ret = dev->hard_start_xmit(skb, dev);
-+ ret = dev_hard_start_xmit(skb, dev);
- if (ret == NETDEV_TX_OK) {
- if (!nolock) {
-- dev->xmit_lock_owner = -1;
-- spin_unlock(&dev->xmit_lock);
-+ netif_tx_unlock(dev);
- }
- spin_lock(&dev->queue_lock);
- return -1;
-@@ -157,8 +155,7 @@
- /* NETDEV_TX_BUSY - we need to requeue */
- /* Release the driver */
- if (!nolock) {
-- dev->xmit_lock_owner = -1;
-- spin_unlock(&dev->xmit_lock);
-+ netif_tx_unlock(dev);
- }
- spin_lock(&dev->queue_lock);
- q = dev->qdisc;
-@@ -175,7 +172,10 @@
- */
-
- requeue:
-- q->ops->requeue(skb, q);
-+ if (skb->next)
-+ dev->gso_skb = skb;
-+ else
-+ q->ops->requeue(skb, q);
- netif_schedule(dev);
- return 1;
- }
-@@ -183,11 +183,23 @@
- return q->q.qlen;
- }
-
-+void __qdisc_run(struct net_device *dev)
-+{
-+ if (unlikely(dev->qdisc == &noop_qdisc))
-+ goto out;
-+
-+ while (qdisc_restart(dev) < 0 && !netif_queue_stopped(dev))
-+ /* NOTHING */;
-+
-+out:
-+ clear_bit(__LINK_STATE_QDISC_RUNNING, &dev->state);
-+}
-+
- static void dev_watchdog(unsigned long arg)
- {
- struct net_device *dev = (struct net_device *)arg;
-
-- spin_lock(&dev->xmit_lock);
-+ netif_tx_lock(dev);
- if (dev->qdisc != &noop_qdisc) {
- if (netif_device_present(dev) &&
- netif_running(dev) &&
-@@ -201,7 +213,7 @@
- dev_hold(dev);
- }
- }
-- spin_unlock(&dev->xmit_lock);
-+ netif_tx_unlock(dev);
-
- dev_put(dev);
- }
-@@ -225,17 +237,17 @@
-
- static void dev_watchdog_up(struct net_device *dev)
- {
-- spin_lock_bh(&dev->xmit_lock);
-+ netif_tx_lock_bh(dev);
- __netdev_watchdog_up(dev);
-- spin_unlock_bh(&dev->xmit_lock);
-+ netif_tx_unlock_bh(dev);
- }
-
- static void dev_watchdog_down(struct net_device *dev)
- {
-- spin_lock_bh(&dev->xmit_lock);
-+ netif_tx_lock_bh(dev);
- if (del_timer(&dev->watchdog_timer))
- __dev_put(dev);
-- spin_unlock_bh(&dev->xmit_lock);
-+ netif_tx_unlock_bh(dev);
- }
-
- void netif_carrier_on(struct net_device *dev)
-@@ -577,10 +589,17 @@
-
- dev_watchdog_down(dev);
-
-- while (test_bit(__LINK_STATE_SCHED, &dev->state))
-+ /* Wait for outstanding dev_queue_xmit calls. */
-+ synchronize_rcu();
-+
-+ /* Wait for outstanding qdisc_run calls. */
-+ while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state))
- yield();
-
-- spin_unlock_wait(&dev->xmit_lock);
-+ if (dev->gso_skb) {
-+ kfree_skb(dev->gso_skb);
-+ dev->gso_skb = NULL;
-+ }
- }
-
- void dev_init_scheduler(struct net_device *dev)
-@@ -622,6 +641,5 @@
- EXPORT_SYMBOL(qdisc_alloc);
- EXPORT_SYMBOL(qdisc_destroy);
- EXPORT_SYMBOL(qdisc_reset);
--EXPORT_SYMBOL(qdisc_restart);
- EXPORT_SYMBOL(qdisc_lock_tree);
- EXPORT_SYMBOL(qdisc_unlock_tree);
-Index: tmp-xxx/net/sched/sch_teql.c
-===================================================================
---- tmp-xxx.orig/net/sched/sch_teql.c 2006-11-15 10:38:39.000000000 +0000
-+++ tmp-xxx/net/sched/sch_teql.c 2006-11-27 10:52:42.000000000 +0000
-@@ -302,20 +302,17 @@
-
- switch (teql_resolve(skb, skb_res, slave)) {
- case 0:
-- if (spin_trylock(&slave->xmit_lock)) {
-- slave->xmit_lock_owner = smp_processor_id();
-+ if (netif_tx_trylock(slave)) {
- if (!netif_queue_stopped(slave) &&
- slave->hard_start_xmit(skb, slave) == 0) {
-- slave->xmit_lock_owner = -1;
-- spin_unlock(&slave->xmit_lock);
-+ netif_tx_unlock(slave);
- master->slaves = NEXT_SLAVE(q);
- netif_wake_queue(dev);
- master->stats.tx_packets++;
- master->stats.tx_bytes += len;
- return 0;
- }
-- slave->xmit_lock_owner = -1;
-- spin_unlock(&slave->xmit_lock);
-+ netif_tx_unlock(slave);
- }
- if (netif_queue_stopped(dev))
- busy = 1;
+++ /dev/null
-diff -pruN ../orig-linux-2.6.16.29/net/ipv4/tcp.c ./net/ipv4/tcp.c
---- ../orig-linux-2.6.16.29/net/ipv4/tcp.c 2006-09-19 13:59:20.000000000 +0100
-+++ ./net/ipv4/tcp.c 2006-09-19 13:59:42.000000000 +0100
-@@ -2042,13 +2042,19 @@ struct sk_buff *tcp_tso_segment(struct s
- if (!pskb_may_pull(skb, thlen))
- goto out;
-
-- segs = NULL;
-- if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST))
-- goto out;
--
- oldlen = (u16)~skb->len;
- __skb_pull(skb, thlen);
-
-+ if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
-+ /* Packet is from an untrusted source, reset gso_segs. */
-+ int mss = skb_shinfo(skb)->gso_size;
-+
-+ skb_shinfo(skb)->gso_segs = (skb->len + mss - 1) / mss;
-+
-+ segs = NULL;
-+ goto out;
-+ }
-+
- segs = skb_segment(skb, features);
- if (IS_ERR(segs))
- goto out;
+++ /dev/null
-diff -pruN ../orig-linux-2.6.16.29/drivers/net/bnx2.c ./drivers/net/bnx2.c
---- ../orig-linux-2.6.16.29/drivers/net/bnx2.c 2006-09-19 13:59:20.000000000 +0100
-+++ ./drivers/net/bnx2.c 2006-09-19 13:59:46.000000000 +0100
-@@ -1593,7 +1593,7 @@ bnx2_tx_int(struct bnx2 *bp)
- skb = tx_buf->skb;
- #ifdef BCM_TSO
- /* partial BD completions possible with TSO packets */
-- if (skb_shinfo(skb)->gso_size) {
-+ if (skb_is_gso(skb)) {
- u16 last_idx, last_ring_idx;
-
- last_idx = sw_cons +
-diff -pruN ../orig-linux-2.6.16.29/drivers/net/chelsio/sge.c ./drivers/net/chelsio/sge.c
---- ../orig-linux-2.6.16.29/drivers/net/chelsio/sge.c 2006-09-19 13:59:20.000000000 +0100
-+++ ./drivers/net/chelsio/sge.c 2006-09-19 13:59:46.000000000 +0100
-@@ -1419,7 +1419,7 @@ int t1_start_xmit(struct sk_buff *skb, s
- struct cpl_tx_pkt *cpl;
-
- #ifdef NETIF_F_TSO
-- if (skb_shinfo(skb)->gso_size) {
-+ if (skb_is_gso(skb)) {
- int eth_type;
- struct cpl_tx_pkt_lso *hdr;
-
-diff -pruN ../orig-linux-2.6.16.29/drivers/net/e1000/e1000_main.c ./drivers/net/e1000/e1000_main.c
---- ../orig-linux-2.6.16.29/drivers/net/e1000/e1000_main.c 2006-09-19 13:59:20.000000000 +0100
-+++ ./drivers/net/e1000/e1000_main.c 2006-09-19 13:59:46.000000000 +0100
-@@ -2526,7 +2526,7 @@ e1000_tso(struct e1000_adapter *adapter,
- uint8_t ipcss, ipcso, tucss, tucso, hdr_len;
- int err;
-
-- if (skb_shinfo(skb)->gso_size) {
-+ if (skb_is_gso(skb)) {
- if (skb_header_cloned(skb)) {
- err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
- if (err)
-@@ -2651,7 +2651,7 @@ e1000_tx_map(struct e1000_adapter *adapt
- * tso gets written back prematurely before the data is fully
- * DMAd to the controller */
- if (!skb->data_len && tx_ring->last_tx_tso &&
-- !skb_shinfo(skb)->gso_size) {
-+ !skb_is_gso(skb)) {
- tx_ring->last_tx_tso = 0;
- size -= 4;
- }
-@@ -2934,8 +2934,7 @@ e1000_xmit_frame(struct sk_buff *skb, st
-
- #ifdef NETIF_F_TSO
- /* Controller Erratum workaround */
-- if (!skb->data_len && tx_ring->last_tx_tso &&
-- !skb_shinfo(skb)->gso_size)
-+ if (!skb->data_len && tx_ring->last_tx_tso && !skb_is_gso(skb))
- count++;
- #endif
-
-diff -pruN ../orig-linux-2.6.16.29/drivers/net/forcedeth.c ./drivers/net/forcedeth.c
---- ../orig-linux-2.6.16.29/drivers/net/forcedeth.c 2006-09-19 13:59:20.000000000 +0100
-+++ ./drivers/net/forcedeth.c 2006-09-19 13:59:46.000000000 +0100
-@@ -1105,7 +1105,7 @@ static int nv_start_xmit(struct sk_buff
- np->tx_skbuff[nr] = skb;
-
- #ifdef NETIF_F_TSO
-- if (skb_shinfo(skb)->gso_size)
-+ if (skb_is_gso(skb))
- tx_flags_extra = NV_TX2_TSO | (skb_shinfo(skb)->gso_size << NV_TX2_TSO_SHIFT);
- else
- #endif
-diff -pruN ../orig-linux-2.6.16.29/drivers/net/ixgb/ixgb_main.c ./drivers/net/ixgb/ixgb_main.c
---- ../orig-linux-2.6.16.29/drivers/net/ixgb/ixgb_main.c 2006-09-19 13:59:20.000000000 +0100
-+++ ./drivers/net/ixgb/ixgb_main.c 2006-09-19 13:59:46.000000000 +0100
-@@ -1163,7 +1163,7 @@ ixgb_tso(struct ixgb_adapter *adapter, s
- uint16_t ipcse, tucse, mss;
- int err;
-
-- if(likely(skb_shinfo(skb)->gso_size)) {
-+ if (likely(skb_is_gso(skb))) {
- if (skb_header_cloned(skb)) {
- err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
- if (err)
-diff -pruN ../orig-linux-2.6.16.29/drivers/net/loopback.c ./drivers/net/loopback.c
---- ../orig-linux-2.6.16.29/drivers/net/loopback.c 2006-09-19 13:59:20.000000000 +0100
-+++ ./drivers/net/loopback.c 2006-09-19 13:59:46.000000000 +0100
-@@ -139,7 +139,7 @@ static int loopback_xmit(struct sk_buff
- #endif
-
- #ifdef LOOPBACK_TSO
-- if (skb_shinfo(skb)->gso_size) {
-+ if (skb_is_gso(skb)) {
- BUG_ON(skb->protocol != htons(ETH_P_IP));
- BUG_ON(skb->nh.iph->protocol != IPPROTO_TCP);
-
-diff -pruN ../orig-linux-2.6.16.29/drivers/net/sky2.c ./drivers/net/sky2.c
---- ../orig-linux-2.6.16.29/drivers/net/sky2.c 2006-09-19 13:59:20.000000000 +0100
-+++ ./drivers/net/sky2.c 2006-09-19 13:59:46.000000000 +0100
-@@ -1125,7 +1125,7 @@ static unsigned tx_le_req(const struct s
- count = sizeof(dma_addr_t) / sizeof(u32);
- count += skb_shinfo(skb)->nr_frags * count;
-
-- if (skb_shinfo(skb)->gso_size)
-+ if (skb_is_gso(skb))
- ++count;
-
- if (skb->ip_summed == CHECKSUM_HW)
-diff -pruN ../orig-linux-2.6.16.29/drivers/net/typhoon.c ./drivers/net/typhoon.c
---- ../orig-linux-2.6.16.29/drivers/net/typhoon.c 2006-09-19 13:59:20.000000000 +0100
-+++ ./drivers/net/typhoon.c 2006-09-19 13:59:46.000000000 +0100
-@@ -805,7 +805,7 @@ typhoon_start_tx(struct sk_buff *skb, st
- * If problems develop with TSO, check this first.
- */
- numDesc = skb_shinfo(skb)->nr_frags + 1;
-- if(skb_tso_size(skb))
-+ if (skb_is_gso(skb))
- numDesc++;
-
- /* When checking for free space in the ring, we need to also
-@@ -845,7 +845,7 @@ typhoon_start_tx(struct sk_buff *skb, st
- TYPHOON_TX_PF_VLAN_TAG_SHIFT);
- }
-
-- if(skb_tso_size(skb)) {
-+ if (skb_is_gso(skb)) {
- first_txd->processFlags |= TYPHOON_TX_PF_TCP_SEGMENT;
- first_txd->numDesc++;
-
-diff -pruN ../orig-linux-2.6.16.29/drivers/s390/net/qeth_main.c ./drivers/s390/net/qeth_main.c
---- ../orig-linux-2.6.16.29/drivers/s390/net/qeth_main.c 2006-09-19 13:59:20.000000000 +0100
-+++ ./drivers/s390/net/qeth_main.c 2006-09-19 13:59:46.000000000 +0100
-@@ -4454,7 +4454,7 @@ qeth_send_packet(struct qeth_card *card,
- queue = card->qdio.out_qs
- [qeth_get_priority_queue(card, skb, ipv, cast_type)];
-
-- if (skb_shinfo(skb)->gso_size)
-+ if (skb_is_gso(skb))
- large_send = card->options.large_send;
-
- /*are we able to do TSO ? If so ,prepare and send it from here */
-@@ -4501,8 +4501,7 @@ qeth_send_packet(struct qeth_card *card,
- card->stats.tx_packets++;
- card->stats.tx_bytes += skb->len;
- #ifdef CONFIG_QETH_PERF_STATS
-- if (skb_shinfo(skb)->gso_size &&
-- !(large_send == QETH_LARGE_SEND_NO)) {
-+ if (skb_is_gso(skb) && !(large_send == QETH_LARGE_SEND_NO)) {
- card->perf_stats.large_send_bytes += skb->len;
- card->perf_stats.large_send_cnt++;
- }
-diff -pruN ../orig-linux-2.6.16.29/include/linux/netdevice.h ./include/linux/netdevice.h
---- ../orig-linux-2.6.16.29/include/linux/netdevice.h 2006-09-19 13:59:20.000000000 +0100
-+++ ./include/linux/netdevice.h 2006-09-19 13:59:46.000000000 +0100
-@@ -541,6 +541,7 @@ struct packet_type {
- struct net_device *);
- struct sk_buff *(*gso_segment)(struct sk_buff *skb,
- int features);
-+ int (*gso_send_check)(struct sk_buff *skb);
- void *af_packet_priv;
- struct list_head list;
- };
-@@ -1001,14 +1002,15 @@ extern void linkwatch_run_queue(void);
-
- static inline int skb_gso_ok(struct sk_buff *skb, int features)
- {
-- int feature = skb_shinfo(skb)->gso_size ?
-- skb_shinfo(skb)->gso_type << NETIF_F_GSO_SHIFT : 0;
-+ int feature = skb_shinfo(skb)->gso_type << NETIF_F_GSO_SHIFT;
- return (features & feature) == feature;
- }
-
- static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
- {
-- return !skb_gso_ok(skb, dev->features);
-+ return skb_is_gso(skb) &&
-+ (!skb_gso_ok(skb, dev->features) ||
-+ unlikely(skb->ip_summed != CHECKSUM_HW));
- }
-
- #endif /* __KERNEL__ */
-diff -pruN ../orig-linux-2.6.16.29/include/linux/skbuff.h ./include/linux/skbuff.h
---- ../orig-linux-2.6.16.29/include/linux/skbuff.h 2006-09-19 13:59:20.000000000 +0100
-+++ ./include/linux/skbuff.h 2006-09-19 13:59:46.000000000 +0100
-@@ -1403,5 +1403,10 @@ static inline void nf_bridge_get(struct
- static inline void nf_reset(struct sk_buff *skb) {}
- #endif /* CONFIG_NETFILTER */
-
-+static inline int skb_is_gso(const struct sk_buff *skb)
-+{
-+ return skb_shinfo(skb)->gso_size;
-+}
-+
- #endif /* __KERNEL__ */
- #endif /* _LINUX_SKBUFF_H */
-diff -pruN ../orig-linux-2.6.16.29/include/net/protocol.h ./include/net/protocol.h
---- ../orig-linux-2.6.16.29/include/net/protocol.h 2006-09-19 13:59:20.000000000 +0100
-+++ ./include/net/protocol.h 2006-09-19 13:59:46.000000000 +0100
-@@ -37,6 +37,7 @@
- struct net_protocol {
- int (*handler)(struct sk_buff *skb);
- void (*err_handler)(struct sk_buff *skb, u32 info);
-+ int (*gso_send_check)(struct sk_buff *skb);
- struct sk_buff *(*gso_segment)(struct sk_buff *skb,
- int features);
- int no_policy;
-diff -pruN ../orig-linux-2.6.16.29/include/net/tcp.h ./include/net/tcp.h
---- ../orig-linux-2.6.16.29/include/net/tcp.h 2006-09-19 13:59:20.000000000 +0100
-+++ ./include/net/tcp.h 2006-09-19 13:59:46.000000000 +0100
-@@ -1063,6 +1063,7 @@ extern struct request_sock_ops tcp_reque
-
- extern int tcp_v4_destroy_sock(struct sock *sk);
-
-+extern int tcp_v4_gso_send_check(struct sk_buff *skb);
- extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features);
-
- #ifdef CONFIG_PROC_FS
-diff -pruN ../orig-linux-2.6.16.29/net/bridge/br_forward.c ./net/bridge/br_forward.c
---- ../orig-linux-2.6.16.29/net/bridge/br_forward.c 2006-09-19 13:59:20.000000000 +0100
-+++ ./net/bridge/br_forward.c 2006-09-19 13:59:46.000000000 +0100
-@@ -32,7 +32,7 @@ static inline int should_deliver(const s
- int br_dev_queue_push_xmit(struct sk_buff *skb)
- {
- /* drop mtu oversized packets except tso */
-- if (skb->len > skb->dev->mtu && !skb_shinfo(skb)->gso_size)
-+ if (skb->len > skb->dev->mtu && !skb_is_gso(skb))
- kfree_skb(skb);
- else {
- #ifdef CONFIG_BRIDGE_NETFILTER
-diff -pruN ../orig-linux-2.6.16.29/net/bridge/br_netfilter.c ./net/bridge/br_netfilter.c
---- ../orig-linux-2.6.16.29/net/bridge/br_netfilter.c 2006-09-19 13:59:20.000000000 +0100
-+++ ./net/bridge/br_netfilter.c 2006-09-19 13:59:46.000000000 +0100
-@@ -743,7 +743,7 @@ static int br_nf_dev_queue_xmit(struct s
- {
- if (skb->protocol == htons(ETH_P_IP) &&
- skb->len > skb->dev->mtu &&
-- !skb_shinfo(skb)->gso_size)
-+ !skb_is_gso(skb))
- return ip_fragment(skb, br_dev_queue_push_xmit);
- else
- return br_dev_queue_push_xmit(skb);
-diff -pruN ../orig-linux-2.6.16.29/net/core/dev.c ./net/core/dev.c
---- ../orig-linux-2.6.16.29/net/core/dev.c 2006-09-19 13:59:20.000000000 +0100
-+++ ./net/core/dev.c 2006-09-19 13:59:46.000000000 +0100
-@@ -1083,9 +1083,17 @@ int skb_checksum_help(struct sk_buff *sk
- unsigned int csum;
- int ret = 0, offset = skb->h.raw - skb->data;
-
-- if (inward) {
-- skb->ip_summed = CHECKSUM_NONE;
-- goto out;
-+ if (inward)
-+ goto out_set_summed;
-+
-+ if (unlikely(skb_shinfo(skb)->gso_size)) {
-+ static int warned;
-+
-+ WARN_ON(!warned);
-+ warned = 1;
-+
-+ /* Let GSO fix up the checksum. */
-+ goto out_set_summed;
- }
-
- if (skb_cloned(skb)) {
-@@ -1102,6 +1110,8 @@ int skb_checksum_help(struct sk_buff *sk
- BUG_ON(skb->csum + 2 > offset);
-
- *(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
-+
-+out_set_summed:
- skb->ip_summed = CHECKSUM_NONE;
- out:
- return ret;
-@@ -1122,17 +1132,35 @@ struct sk_buff *skb_gso_segment(struct s
- struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
- struct packet_type *ptype;
- int type = skb->protocol;
-+ int err;
-
- BUG_ON(skb_shinfo(skb)->frag_list);
-- BUG_ON(skb->ip_summed != CHECKSUM_HW);
-
- skb->mac.raw = skb->data;
- skb->mac_len = skb->nh.raw - skb->data;
- __skb_pull(skb, skb->mac_len);
-
-+ if (unlikely(skb->ip_summed != CHECKSUM_HW)) {
-+ static int warned;
-+
-+ WARN_ON(!warned);
-+ warned = 1;
-+
-+ if (skb_header_cloned(skb) &&
-+ (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
-+ return ERR_PTR(err);
-+ }
-+
- rcu_read_lock();
- list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
- if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
-+ if (unlikely(skb->ip_summed != CHECKSUM_HW)) {
-+ err = ptype->gso_send_check(skb);
-+ segs = ERR_PTR(err);
-+ if (err || skb_gso_ok(skb, features))
-+ break;
-+ __skb_push(skb, skb->data - skb->nh.raw);
-+ }
- segs = ptype->gso_segment(skb, features);
- break;
- }
-diff -pruN ../orig-linux-2.6.16.29/net/ipv4/af_inet.c ./net/ipv4/af_inet.c
---- ../orig-linux-2.6.16.29/net/ipv4/af_inet.c 2006-09-19 13:59:20.000000000 +0100
-+++ ./net/ipv4/af_inet.c 2006-09-19 13:59:46.000000000 +0100
-@@ -1085,6 +1085,40 @@ int inet_sk_rebuild_header(struct sock *
-
- EXPORT_SYMBOL(inet_sk_rebuild_header);
-
-+static int inet_gso_send_check(struct sk_buff *skb)
-+{
-+ struct iphdr *iph;
-+ struct net_protocol *ops;
-+ int proto;
-+ int ihl;
-+ int err = -EINVAL;
-+
-+ if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
-+ goto out;
-+
-+ iph = skb->nh.iph;
-+ ihl = iph->ihl * 4;
-+ if (ihl < sizeof(*iph))
-+ goto out;
-+
-+ if (unlikely(!pskb_may_pull(skb, ihl)))
-+ goto out;
-+
-+ skb->h.raw = __skb_pull(skb, ihl);
-+ iph = skb->nh.iph;
-+ proto = iph->protocol & (MAX_INET_PROTOS - 1);
-+ err = -EPROTONOSUPPORT;
-+
-+ rcu_read_lock();
-+ ops = rcu_dereference(inet_protos[proto]);
-+ if (likely(ops && ops->gso_send_check))
-+ err = ops->gso_send_check(skb);
-+ rcu_read_unlock();
-+
-+out:
-+ return err;
-+}
-+
- static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
- {
- struct sk_buff *segs = ERR_PTR(-EINVAL);
-@@ -1142,6 +1176,7 @@ static struct net_protocol igmp_protocol
- static struct net_protocol tcp_protocol = {
- .handler = tcp_v4_rcv,
- .err_handler = tcp_v4_err,
-+ .gso_send_check = tcp_v4_gso_send_check,
- .gso_segment = tcp_tso_segment,
- .no_policy = 1,
- };
-@@ -1188,6 +1223,7 @@ static int ipv4_proc_init(void);
- static struct packet_type ip_packet_type = {
- .type = __constant_htons(ETH_P_IP),
- .func = ip_rcv,
-+ .gso_send_check = inet_gso_send_check,
- .gso_segment = inet_gso_segment,
- };
-
-diff -pruN ../orig-linux-2.6.16.29/net/ipv4/ip_output.c ./net/ipv4/ip_output.c
---- ../orig-linux-2.6.16.29/net/ipv4/ip_output.c 2006-09-19 13:59:20.000000000 +0100
-+++ ./net/ipv4/ip_output.c 2006-09-19 13:59:46.000000000 +0100
-@@ -210,7 +210,7 @@ static inline int ip_finish_output(struc
- return dst_output(skb);
- }
- #endif
-- if (skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->gso_size)
-+ if (skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb))
- return ip_fragment(skb, ip_finish_output2);
- else
- return ip_finish_output2(skb);
-@@ -1095,7 +1095,7 @@ ssize_t ip_append_page(struct sock *sk,
- while (size > 0) {
- int i;
-
-- if (skb_shinfo(skb)->gso_size)
-+ if (skb_is_gso(skb))
- len = size;
- else {
-
-diff -pruN ../orig-linux-2.6.16.29/net/ipv4/tcp_ipv4.c ./net/ipv4/tcp_ipv4.c
---- ../orig-linux-2.6.16.29/net/ipv4/tcp_ipv4.c 2006-09-12 19:02:10.000000000 +0100
-+++ ./net/ipv4/tcp_ipv4.c 2006-09-19 13:59:46.000000000 +0100
-@@ -495,6 +495,24 @@ void tcp_v4_send_check(struct sock *sk,
- }
- }
-
-+int tcp_v4_gso_send_check(struct sk_buff *skb)
-+{
-+ struct iphdr *iph;
-+ struct tcphdr *th;
-+
-+ if (!pskb_may_pull(skb, sizeof(*th)))
-+ return -EINVAL;
-+
-+ iph = skb->nh.iph;
-+ th = skb->h.th;
-+
-+ th->check = 0;
-+ th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0);
-+ skb->csum = offsetof(struct tcphdr, check);
-+ skb->ip_summed = CHECKSUM_HW;
-+ return 0;
-+}
-+
- /*
- * This routine will send an RST to the other tcp.
- *
-diff -pruN ../orig-linux-2.6.16.29/net/ipv4/xfrm4_output.c ./net/ipv4/xfrm4_output.c
---- ../orig-linux-2.6.16.29/net/ipv4/xfrm4_output.c 2006-09-19 13:59:20.000000000 +0100
-+++ ./net/ipv4/xfrm4_output.c 2006-09-19 13:59:46.000000000 +0100
-@@ -195,7 +195,7 @@ static int xfrm4_output_finish(struct sk
- }
- #endif
-
-- if (!skb_shinfo(skb)->gso_size)
-+ if (!skb_is_gso(skb))
- return xfrm4_output_finish2(skb);
-
- skb->protocol = htons(ETH_P_IP);
-diff -pruN ../orig-linux-2.6.16.29/net/ipv6/ip6_output.c ./net/ipv6/ip6_output.c
---- ../orig-linux-2.6.16.29/net/ipv6/ip6_output.c 2006-09-19 13:59:20.000000000 +0100
-+++ ./net/ipv6/ip6_output.c 2006-09-19 13:59:46.000000000 +0100
-@@ -147,7 +147,7 @@ static int ip6_output2(struct sk_buff *s
-
- int ip6_output(struct sk_buff *skb)
- {
-- if ((skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->gso_size) ||
-+ if ((skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb)) ||
- dst_allfrag(skb->dst))
- return ip6_fragment(skb, ip6_output2);
- else
-diff -pruN ../orig-linux-2.6.16.29/net/ipv6/xfrm6_output.c ./net/ipv6/xfrm6_output.c
---- ../orig-linux-2.6.16.29/net/ipv6/xfrm6_output.c 2006-09-19 13:59:20.000000000 +0100
-+++ ./net/ipv6/xfrm6_output.c 2006-09-19 13:59:46.000000000 +0100
-@@ -179,7 +179,7 @@ static int xfrm6_output_finish(struct sk
- {
- struct sk_buff *segs;
-
-- if (!skb_shinfo(skb)->gso_size)
-+ if (!skb_is_gso(skb))
- return xfrm6_output_finish2(skb);
-
- skb->protocol = htons(ETH_P_IP);
+++ /dev/null
-diff -pruN ../orig-linux-2.6.16.29/include/linux/netdevice.h ./include/linux/netdevice.h
---- ../orig-linux-2.6.16.29/include/linux/netdevice.h 2006-09-19 13:59:46.000000000 +0100
-+++ ./include/linux/netdevice.h 2006-09-19 14:05:28.000000000 +0100
-@@ -930,10 +930,10 @@ static inline void netif_tx_lock_bh(stru
-
- static inline int netif_tx_trylock(struct net_device *dev)
- {
-- int err = spin_trylock(&dev->_xmit_lock);
-- if (!err)
-+ int ok = spin_trylock(&dev->_xmit_lock);
-+ if (likely(ok))
- dev->xmit_lock_owner = smp_processor_id();
-- return err;
-+ return ok;
- }
-
- static inline void netif_tx_unlock(struct net_device *dev)
+++ /dev/null
-diff -pruN ../orig-linux-2.6.16.29/net/core/dev.c ./net/core/dev.c
---- ../orig-linux-2.6.16.29/net/core/dev.c 2006-09-19 13:59:46.000000000 +0100
-+++ ./net/core/dev.c 2006-09-19 14:05:32.000000000 +0100
-@@ -1087,11 +1087,6 @@ int skb_checksum_help(struct sk_buff *sk
- goto out_set_summed;
-
- if (unlikely(skb_shinfo(skb)->gso_size)) {
-- static int warned;
--
-- WARN_ON(!warned);
-- warned = 1;
--
- /* Let GSO fix up the checksum. */
- goto out_set_summed;
- }
-@@ -1141,11 +1136,6 @@ struct sk_buff *skb_gso_segment(struct s
- __skb_pull(skb, skb->mac_len);
-
- if (unlikely(skb->ip_summed != CHECKSUM_HW)) {
-- static int warned;
--
-- WARN_ON(!warned);
-- warned = 1;
--
- if (skb_header_cloned(skb) &&
- (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
- return ERR_PTR(err);
+++ /dev/null
-diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
-index 104af5d..1fa1536 100644
---- a/net/ipv4/tcp_input.c
-+++ b/net/ipv4/tcp_input.c
-@@ -127,7 +127,7 @@ static void tcp_measure_rcv_mss(struct s
- /* skb->len may jitter because of SACKs, even if peer
- * sends good full-sized frames.
- */
-- len = skb->len;
-+ len = skb_shinfo(skb)->gso_size ?: skb->len;
- if (len >= icsk->icsk_ack.rcv_mss) {
- icsk->icsk_ack.rcv_mss = len;
- } else {
+++ /dev/null
-diff -Naru a/net/core/skbuff.c b/net/core/skbuff.c
---- a/net/core/skbuff.c 2006-12-14 09:32:04 -08:00
-+++ b/net/core/skbuff.c 2006-12-14 09:32:04 -08:00
-@@ -1946,7 +1946,7 @@
- do {
- struct sk_buff *nskb;
- skb_frag_t *frag;
-- int hsize, nsize;
-+ int hsize;
- int k;
- int size;
-
-@@ -1957,11 +1957,10 @@
- hsize = skb_headlen(skb) - offset;
- if (hsize < 0)
- hsize = 0;
-- nsize = hsize + doffset;
-- if (nsize > len + doffset || !sg)
-- nsize = len + doffset;
-+ if (hsize > len || !sg)
-+ hsize = len;
-
-- nskb = alloc_skb(nsize + headroom, GFP_ATOMIC);
-+ nskb = alloc_skb(hsize + doffset + headroom, GFP_ATOMIC);
- if (unlikely(!nskb))
- goto err;
+++ /dev/null
-diff -pruN ../orig-linux-2.6.16.29/arch/i386/pci/mmconfig.c ./arch/i386/pci/mmconfig.c
---- ../orig-linux-2.6.16.29/arch/i386/pci/mmconfig.c 2006-09-12 19:02:10.000000000 +0100
-+++ ./arch/i386/pci/mmconfig.c 2006-09-21 09:35:27.000000000 +0100
-@@ -12,14 +12,22 @@
- #include <linux/pci.h>
- #include <linux/init.h>
- #include <linux/acpi.h>
-+#include <asm/e820.h>
- #include "pci.h"
-
-+/* aperture is up to 256MB but BIOS may reserve less */
-+#define MMCONFIG_APER_MIN (2 * 1024*1024)
-+#define MMCONFIG_APER_MAX (256 * 1024*1024)
-+
-+/* Assume systems with more busses have correct MCFG */
-+#define MAX_CHECK_BUS 16
-+
- #define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG))
-
- /* The base address of the last MMCONFIG device accessed */
- static u32 mmcfg_last_accessed_device;
-
--static DECLARE_BITMAP(fallback_slots, 32);
-+static DECLARE_BITMAP(fallback_slots, MAX_CHECK_BUS*32);
-
- /*
- * Functions for accessing PCI configuration space with MMCONFIG accesses
-@@ -29,8 +37,8 @@ static u32 get_base_addr(unsigned int se
- int cfg_num = -1;
- struct acpi_table_mcfg_config *cfg;
-
-- if (seg == 0 && bus == 0 &&
-- test_bit(PCI_SLOT(devfn), fallback_slots))
-+ if (seg == 0 && bus < MAX_CHECK_BUS &&
-+ test_bit(PCI_SLOT(devfn) + 32*bus, fallback_slots))
- return 0;
-
- while (1) {
-@@ -74,8 +82,10 @@ static int pci_mmcfg_read(unsigned int s
- unsigned long flags;
- u32 base;
-
-- if (!value || (bus > 255) || (devfn > 255) || (reg > 4095))
-+ if ((bus > 255) || (devfn > 255) || (reg > 4095)) {
-+ *value = -1;
- return -EINVAL;
-+ }
-
- base = get_base_addr(seg, bus, devfn);
- if (!base)
-@@ -146,30 +156,66 @@ static struct pci_raw_ops pci_mmcfg = {
- Normally this can be expressed in the MCFG by not listing them
- and assigning suitable _SEGs, but this isn't implemented in some BIOS.
- Instead try to discover all devices on bus 0 that are unreachable using MM
-- and fallback for them.
-- We only do this for bus 0/seg 0 */
-+ and fallback for them. */
- static __init void unreachable_devices(void)
- {
-- int i;
-+ int i, k;
- unsigned long flags;
-
-- for (i = 0; i < 32; i++) {
-- u32 val1;
-- u32 addr;
-+ for (k = 0; k < MAX_CHECK_BUS; k++) {
-+ for (i = 0; i < 32; i++) {
-+ u32 val1;
-+ u32 addr;
-+
-+ pci_conf1_read(0, k, PCI_DEVFN(i, 0), 0, 4, &val1);
-+ if (val1 == 0xffffffff)
-+ continue;
-+
-+ /* Locking probably not needed, but safer */
-+ spin_lock_irqsave(&pci_config_lock, flags);
-+ addr = get_base_addr(0, k, PCI_DEVFN(i, 0));
-+ if (addr != 0)
-+ pci_exp_set_dev_base(addr, k, PCI_DEVFN(i, 0));
-+ if (addr == 0 ||
-+ readl((u32 __iomem *)mmcfg_virt_addr) != val1) {
-+ set_bit(i, fallback_slots);
-+ printk(KERN_NOTICE
-+ "PCI: No mmconfig possible on %x:%x\n", k, i);
-+ }
-+ spin_unlock_irqrestore(&pci_config_lock, flags);
-+ }
-+ }
-+}
-
-- pci_conf1_read(0, 0, PCI_DEVFN(i, 0), 0, 4, &val1);
-- if (val1 == 0xffffffff)
-+/* NB. Ripped from arch/i386/kernel/setup.c for this Xen bugfix patch. */
-+#ifdef CONFIG_XEN
-+extern struct e820map machine_e820;
-+#define e820 machine_e820
-+#endif
-+static int __init
-+e820_all_mapped(unsigned long s, unsigned long e, unsigned type)
-+{
-+ u64 start = s;
-+ u64 end = e;
-+ int i;
-+ for (i = 0; i < e820.nr_map; i++) {
-+ struct e820entry *ei = &e820.map[i];
-+ if (type && ei->type != type)
- continue;
--
-- /* Locking probably not needed, but safer */
-- spin_lock_irqsave(&pci_config_lock, flags);
-- addr = get_base_addr(0, 0, PCI_DEVFN(i, 0));
-- if (addr != 0)
-- pci_exp_set_dev_base(addr, 0, PCI_DEVFN(i, 0));
-- if (addr == 0 || readl((u32 __iomem *)mmcfg_virt_addr) != val1)
-- set_bit(i, fallback_slots);
-- spin_unlock_irqrestore(&pci_config_lock, flags);
-+ /* is the region (part) in overlap with the current region ?*/
-+ if (ei->addr >= end || ei->addr + ei->size <= start)
-+ continue;
-+ /* if the region is at the beginning of <start,end> we move
-+ * start to the end of the region since it's ok until there
-+ */
-+ if (ei->addr <= start)
-+ start = ei->addr + ei->size;
-+ /* if start is now at or beyond end, we're done, full
-+ * coverage */
-+ if (start >= end)
-+ return 1; /* we're done */
- }
-+ return 0;
- }
-
- static int __init pci_mmcfg_init(void)
-@@ -183,6 +229,15 @@ static int __init pci_mmcfg_init(void)
- (pci_mmcfg_config[0].base_address == 0))
- goto out;
-
-+ if (!e820_all_mapped(pci_mmcfg_config[0].base_address,
-+ pci_mmcfg_config[0].base_address + MMCONFIG_APER_MIN,
-+ E820_RESERVED)) {
-+ printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %x is not E820-reserved\n",
-+ pci_mmcfg_config[0].base_address);
-+ printk(KERN_ERR "PCI: Not using MMCONFIG.\n");
-+ goto out;
-+ }
-+
- printk(KERN_INFO "PCI: Using MMCONFIG\n");
- raw_pci_ops = &pci_mmcfg;
- pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
-diff -pruN ../orig-linux-2.6.16.29/arch/x86_64/pci/mmconfig.c ./arch/x86_64/pci/mmconfig.c
---- ../orig-linux-2.6.16.29/arch/x86_64/pci/mmconfig.c 2006-09-12 19:02:10.000000000 +0100
-+++ ./arch/x86_64/pci/mmconfig.c 2006-09-21 09:35:40.000000000 +0100
-@@ -9,11 +9,19 @@
- #include <linux/init.h>
- #include <linux/acpi.h>
- #include <linux/bitmap.h>
-+#include <asm/e820.h>
-+
- #include "pci.h"
-
--#define MMCONFIG_APER_SIZE (256*1024*1024)
-+/* aperture is up to 256MB but BIOS may reserve less */
-+#define MMCONFIG_APER_MIN (2 * 1024*1024)
-+#define MMCONFIG_APER_MAX (256 * 1024*1024)
-+
-+/* Verify the first 16 busses. We assume that systems with more busses
-+ get MCFG right. */
-+#define MAX_CHECK_BUS 16
-
--static DECLARE_BITMAP(fallback_slots, 32);
-+static DECLARE_BITMAP(fallback_slots, 32*MAX_CHECK_BUS);
-
- /* Static virtual mapping of the MMCONFIG aperture */
- struct mmcfg_virt {
-@@ -55,7 +63,8 @@ static char __iomem *get_virt(unsigned i
- static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn)
- {
- char __iomem *addr;
-- if (seg == 0 && bus == 0 && test_bit(PCI_SLOT(devfn), &fallback_slots))
-+ if (seg == 0 && bus < MAX_CHECK_BUS &&
-+ test_bit(32*bus + PCI_SLOT(devfn), fallback_slots))
- return NULL;
- addr = get_virt(seg, bus);
- if (!addr)
-@@ -69,8 +78,10 @@ static int pci_mmcfg_read(unsigned int s
- char __iomem *addr;
-
- /* Why do we have this when nobody checks it. How about a BUG()!? -AK */
-- if (unlikely(!value || (bus > 255) || (devfn > 255) || (reg > 4095)))
-+ if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095))) {
-+ *value = -1;
- return -EINVAL;
-+ }
-
- addr = pci_dev_base(seg, bus, devfn);
- if (!addr)
-@@ -129,23 +140,56 @@ static struct pci_raw_ops pci_mmcfg = {
- Normally this can be expressed in the MCFG by not listing them
- and assigning suitable _SEGs, but this isn't implemented in some BIOS.
- Instead try to discover all devices on bus 0 that are unreachable using MM
-- and fallback for them.
-- We only do this for bus 0/seg 0 */
-+ and fallback for them. */
- static __init void unreachable_devices(void)
- {
-- int i;
-- for (i = 0; i < 32; i++) {
-- u32 val1;
-- char __iomem *addr;
-+ int i, k;
-+ /* Use the max bus number from ACPI here? */
-+ for (k = 0; k < MAX_CHECK_BUS; k++) {
-+ for (i = 0; i < 32; i++) {
-+ u32 val1;
-+ char __iomem *addr;
-+
-+ pci_conf1_read(0, k, PCI_DEVFN(i,0), 0, 4, &val1);
-+ if (val1 == 0xffffffff)
-+ continue;
-+ addr = pci_dev_base(0, k, PCI_DEVFN(i, 0));
-+ if (addr == NULL|| readl(addr) != val1) {
-+ set_bit(i + 32*k, fallback_slots);
-+ printk(KERN_NOTICE
-+ "PCI: No mmconfig possible on device %x:%x\n",
-+ k, i);
-+ }
-+ }
-+ }
-+}
-
-- pci_conf1_read(0, 0, PCI_DEVFN(i,0), 0, 4, &val1);
-- if (val1 == 0xffffffff)
-+/* NB. Ripped from arch/x86_64/kernel/e820.c for this Xen bugfix patch. */
-+#ifdef CONFIG_XEN
-+extern struct e820map machine_e820;
-+#define e820 machine_e820
-+#endif
-+static int __init e820_all_mapped(unsigned long start, unsigned long end, unsigned type)
-+{
-+ int i;
-+ for (i = 0; i < e820.nr_map; i++) {
-+ struct e820entry *ei = &e820.map[i];
-+ if (type && ei->type != type)
- continue;
-- addr = pci_dev_base(0, 0, PCI_DEVFN(i, 0));
-- if (addr == NULL|| readl(addr) != val1) {
-- set_bit(i, &fallback_slots);
-- }
-+ /* is the region (part) in overlap with the current region ?*/
-+ if (ei->addr >= end || ei->addr + ei->size <= start)
-+ continue;
-+
-+ /* if the region is at the beginning of <start,end> we move
-+ * start to the end of the region since it's ok until there
-+ */
-+ if (ei->addr <= start)
-+ start = ei->addr + ei->size;
-+ /* if start is now at or beyond end, we're done, full coverage */
-+ if (start >= end)
-+ return 1; /* we're done */
- }
-+ return 0;
- }
-
- static int __init pci_mmcfg_init(void)
-@@ -161,6 +205,15 @@ static int __init pci_mmcfg_init(void)
- (pci_mmcfg_config[0].base_address == 0))
- return 0;
-
-+ if (!e820_all_mapped(pci_mmcfg_config[0].base_address,
-+ pci_mmcfg_config[0].base_address + MMCONFIG_APER_MIN,
-+ E820_RESERVED)) {
-+ printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %x is not E820-reserved\n",
-+ pci_mmcfg_config[0].base_address);
-+ printk(KERN_ERR "PCI: Not using MMCONFIG.\n");
-+ return 0;
-+ }
-+
- /* RED-PEN i386 doesn't do _nocache right now */
- pci_mmcfg_virt = kmalloc(sizeof(*pci_mmcfg_virt) * pci_mmcfg_config_num, GFP_KERNEL);
- if (pci_mmcfg_virt == NULL) {
-@@ -169,7 +222,8 @@ static int __init pci_mmcfg_init(void)
- }
- for (i = 0; i < pci_mmcfg_config_num; ++i) {
- pci_mmcfg_virt[i].cfg = &pci_mmcfg_config[i];
-- pci_mmcfg_virt[i].virt = ioremap_nocache(pci_mmcfg_config[i].base_address, MMCONFIG_APER_SIZE);
-+ pci_mmcfg_virt[i].virt = ioremap_nocache(pci_mmcfg_config[i].base_address,
-+ MMCONFIG_APER_MAX);
- if (!pci_mmcfg_virt[i].virt) {
- printk("PCI: Cannot map mmconfig aperture for segment %d\n",
- pci_mmcfg_config[i].pci_segment_group_number);
+++ /dev/null
-diff -pruN ../orig-linux-2.6.16.29/arch/i386/mm/pageattr.c ./arch/i386/mm/pageattr.c
---- ../orig-linux-2.6.16.29/arch/i386/mm/pageattr.c 2006-09-12 19:02:10.000000000 +0100
-+++ ./arch/i386/mm/pageattr.c 2006-09-19 14:05:35.000000000 +0100
-@@ -78,7 +78,7 @@ static void set_pmd_pte(pte_t *kpte, uns
- unsigned long flags;
-
- set_pte_atomic(kpte, pte); /* change init_mm */
-- if (PTRS_PER_PMD > 1)
-+ if (HAVE_SHARED_KERNEL_PMD)
- return;
-
- spin_lock_irqsave(&pgd_lock, flags);
-diff -pruN ../orig-linux-2.6.16.29/arch/i386/mm/pgtable.c ./arch/i386/mm/pgtable.c
---- ../orig-linux-2.6.16.29/arch/i386/mm/pgtable.c 2006-09-12 19:02:10.000000000 +0100
-+++ ./arch/i386/mm/pgtable.c 2006-09-19 14:05:35.000000000 +0100
-@@ -215,9 +215,10 @@ void pgd_ctor(void *pgd, kmem_cache_t *c
- spin_lock_irqsave(&pgd_lock, flags);
- }
-
-- clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
-- swapper_pg_dir + USER_PTRS_PER_PGD,
-- KERNEL_PGD_PTRS);
-+ if (PTRS_PER_PMD == 1 || HAVE_SHARED_KERNEL_PMD)
-+ clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
-+ swapper_pg_dir + USER_PTRS_PER_PGD,
-+ KERNEL_PGD_PTRS);
- if (PTRS_PER_PMD > 1)
- return;
-
-@@ -249,6 +250,30 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
- goto out_oom;
- set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
- }
-+
-+ if (!HAVE_SHARED_KERNEL_PMD) {
-+ unsigned long flags;
-+
-+ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
-+ pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
-+ if (!pmd)
-+ goto out_oom;
-+ set_pgd(&pgd[USER_PTRS_PER_PGD], __pgd(1 + __pa(pmd)));
-+ }
-+
-+ spin_lock_irqsave(&pgd_lock, flags);
-+ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
-+ unsigned long v = (unsigned long)i << PGDIR_SHIFT;
-+ pgd_t *kpgd = pgd_offset_k(v);
-+ pud_t *kpud = pud_offset(kpgd, v);
-+ pmd_t *kpmd = pmd_offset(kpud, v);
-+ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
-+ memcpy(pmd, kpmd, PAGE_SIZE);
-+ }
-+ pgd_list_add(pgd);
-+ spin_unlock_irqrestore(&pgd_lock, flags);
-+ }
-+
- return pgd;
-
- out_oom:
-@@ -263,9 +288,23 @@ void pgd_free(pgd_t *pgd)
- int i;
-
- /* in the PAE case user pgd entries are overwritten before usage */
-- if (PTRS_PER_PMD > 1)
-- for (i = 0; i < USER_PTRS_PER_PGD; ++i)
-- kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
-+ if (PTRS_PER_PMD > 1) {
-+ for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
-+ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
-+ kmem_cache_free(pmd_cache, pmd);
-+ }
-+ if (!HAVE_SHARED_KERNEL_PMD) {
-+ unsigned long flags;
-+ spin_lock_irqsave(&pgd_lock, flags);
-+ pgd_list_del(pgd);
-+ spin_unlock_irqrestore(&pgd_lock, flags);
-+ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
-+ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
-+ memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
-+ kmem_cache_free(pmd_cache, pmd);
-+ }
-+ }
-+ }
- /* in the non-PAE case, free_pgtables() clears user pgd entries */
- kmem_cache_free(pgd_cache, pgd);
- }
-diff -pruN ../orig-linux-2.6.16.29/include/asm-i386/pgtable-2level-defs.h ./include/asm-i386/pgtable-2level-defs.h
---- ../orig-linux-2.6.16.29/include/asm-i386/pgtable-2level-defs.h 2006-09-12 19:02:10.000000000 +0100
-+++ ./include/asm-i386/pgtable-2level-defs.h 2006-09-19 14:05:35.000000000 +0100
-@@ -1,6 +1,8 @@
- #ifndef _I386_PGTABLE_2LEVEL_DEFS_H
- #define _I386_PGTABLE_2LEVEL_DEFS_H
-
-+#define HAVE_SHARED_KERNEL_PMD 0
-+
- /*
- * traditional i386 two-level paging structure:
- */
-diff -pruN ../orig-linux-2.6.16.29/include/asm-i386/pgtable-3level-defs.h ./include/asm-i386/pgtable-3level-defs.h
---- ../orig-linux-2.6.16.29/include/asm-i386/pgtable-3level-defs.h 2006-09-12 19:02:10.000000000 +0100
-+++ ./include/asm-i386/pgtable-3level-defs.h 2006-09-19 14:05:35.000000000 +0100
-@@ -1,6 +1,8 @@
- #ifndef _I386_PGTABLE_3LEVEL_DEFS_H
- #define _I386_PGTABLE_3LEVEL_DEFS_H
-
-+#define HAVE_SHARED_KERNEL_PMD 1
-+
- /*
- * PGDIR_SHIFT determines what a top-level page table entry can map
- */
+++ /dev/null
-diff -pruN ../orig-linux-2.6.16.29/include/linux/rcupdate.h ./include/linux/rcupdate.h
---- ../orig-linux-2.6.16.29/include/linux/rcupdate.h 2006-09-12 19:02:10.000000000 +0100
-+++ ./include/linux/rcupdate.h 2006-09-19 14:05:39.000000000 +0100
-@@ -134,6 +134,7 @@ static inline void rcu_bh_qsctr_inc(int
- }
-
- extern int rcu_pending(int cpu);
-+extern int rcu_needs_cpu(int cpu);
-
- /**
- * rcu_read_lock - mark the beginning of an RCU read-side critical section.
-diff -pruN ../orig-linux-2.6.16.29/kernel/rcupdate.c ./kernel/rcupdate.c
---- ../orig-linux-2.6.16.29/kernel/rcupdate.c 2006-09-12 19:02:10.000000000 +0100
-+++ ./kernel/rcupdate.c 2006-09-19 14:05:39.000000000 +0100
-@@ -485,6 +485,20 @@ int rcu_pending(int cpu)
- __rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu));
- }
-
-+/*
-+ * Check to see if any future RCU-related work will need to be done
-+ * by the current CPU, even if none need be done immediately, returning
-+ * 1 if so. This function is part of the RCU implementation; it is -not-
-+ * an exported member of the RCU API.
-+ */
-+int rcu_needs_cpu(int cpu)
-+{
-+ struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
-+ struct rcu_data *rdp_bh = &per_cpu(rcu_bh_data, cpu);
-+
-+ return (!!rdp->curlist || !!rdp_bh->curlist || rcu_pending(cpu));
-+}
-+
- void rcu_check_callbacks(int cpu, int user)
- {
- if (user ||
+++ /dev/null
-diff -pruN ../orig-linux-2.6.16.29/arch/i386/kernel/entry.S ./arch/i386/kernel/entry.S
---- ../orig-linux-2.6.16.29/arch/i386/kernel/entry.S 2006-09-12 19:02:10.000000000 +0100
-+++ ./arch/i386/kernel/entry.S 2006-09-19 14:05:44.000000000 +0100
-@@ -177,7 +177,7 @@ need_resched:
-
- # sysenter call handler stub
- ENTRY(sysenter_entry)
-- movl TSS_sysenter_esp0(%esp),%esp
-+ movl SYSENTER_stack_esp0(%esp),%esp
- sysenter_past_esp:
- sti
- pushl $(__USER_DS)
-@@ -492,7 +492,7 @@ device_not_available_emulate:
- * that sets up the real kernel stack. Check here, since we can't
- * allow the wrong stack to be used.
- *
-- * "TSS_sysenter_esp0+12" is because the NMI/debug handler will have
-+ * "SYSENTER_stack_esp0+12" is because the NMI/debug handler will have
- * already pushed 3 words if it hits on the sysenter instruction:
- * eflags, cs and eip.
- *
-@@ -504,7 +504,7 @@ device_not_available_emulate:
- cmpw $__KERNEL_CS,4(%esp); \
- jne ok; \
- label: \
-- movl TSS_sysenter_esp0+offset(%esp),%esp; \
-+ movl SYSENTER_stack_esp0+offset(%esp),%esp; \
- pushfl; \
- pushl $__KERNEL_CS; \
- pushl $sysenter_past_esp
+++ /dev/null
-git-2efe55a9cec8418f0e0cde3dc3787a42fddc4411.patch
-git-2a8a3d5b65e86ec1dfef7d268c64a909eab94af7.patch
-git-3566561bfadffcb5dbc85d576be80c0dbf2cccc9.patch
-linux-2.6.19-rc1-kexec-move_segment_code-i386.patch
-git-4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f.patch
-linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch
-blktap-aio-16_03_06.patch
-device_bind.patch
-fix-hz-suspend.patch
-fix-ide-cd-pio-mode.patch
-i386-mach-io-check-nmi.patch
-ipv6-no-autoconf.patch
-net-csum.patch
-net-gso-0-base.patch
-net-gso-1-check-dodgy.patch
-net-gso-2-checksum-fix.patch
-net-gso-3-fix-errorcheck.patch
-net-gso-4-kill-warnon.patch
-net-gso-5-rcv-mss.patch
-net-gso-6-linear-segmentation.patch
-pci-mmconfig-fix-from-2.6.17.patch
-pmd-shared.patch
-rcu_needs_cpu.patch
-rename-TSS_sysenter_esp0-SYSENTER_stack_esp0.patch
-smp-alts.patch
-tpm_plugin_2.6.17.patch
-x86-increase-interrupt-vector-range.patch
-xen-hotplug.patch
-xenoprof-generic.patch
-x86-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch
-x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch
-git-dbaab49f92ff6ae6255762a948375e4036cbdbd2.patch
-x86-elfnote-as-preprocessor-macro.patch
-vsnprintf.patch
-kasprintf.patch
+++ /dev/null
-diff -pruN ../orig-linux-2.6.16.29/arch/i386/Kconfig ./arch/i386/Kconfig
---- ../orig-linux-2.6.16.29/arch/i386/Kconfig 2006-09-12 19:02:10.000000000 +0100
-+++ ./arch/i386/Kconfig 2006-09-19 14:05:48.000000000 +0100
-@@ -202,6 +202,19 @@ config SMP
-
- If you don't know what to do here, say N.
-
-+config SMP_ALTERNATIVES
-+ bool "SMP alternatives support (EXPERIMENTAL)"
-+ depends on SMP && EXPERIMENTAL
-+ help
-+ Try to reduce the overhead of running an SMP kernel on a uniprocessor
-+ host slightly by replacing certain key instruction sequences
-+ according to whether we currently have more than one CPU available.
-+ This should provide a noticeable boost to performance when
-+ running SMP kernels on UP machines, and have negligible impact
-+ when running on an true SMP host.
-+
-+ If unsure, say N.
-+
- config NR_CPUS
- int "Maximum number of CPUs (2-255)"
- range 2 255
-diff -pruN ../orig-linux-2.6.16.29/arch/i386/kernel/Makefile ./arch/i386/kernel/Makefile
---- ../orig-linux-2.6.16.29/arch/i386/kernel/Makefile 2006-09-12 19:02:10.000000000 +0100
-+++ ./arch/i386/kernel/Makefile 2006-09-19 14:05:48.000000000 +0100
-@@ -37,6 +37,7 @@ obj-$(CONFIG_EFI) += efi.o efi_stub.o
- obj-$(CONFIG_DOUBLEFAULT) += doublefault.o
- obj-$(CONFIG_VM86) += vm86.o
- obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
-+obj-$(CONFIG_SMP_ALTERNATIVES) += smpalts.o
-
- EXTRA_AFLAGS := -traditional
-
-diff -pruN ../orig-linux-2.6.16.29/arch/i386/kernel/smpalts.c ./arch/i386/kernel/smpalts.c
---- ../orig-linux-2.6.16.29/arch/i386/kernel/smpalts.c 1970-01-01 01:00:00.000000000 +0100
-+++ ./arch/i386/kernel/smpalts.c 2006-09-19 14:05:48.000000000 +0100
-@@ -0,0 +1,85 @@
-+#include <linux/kernel.h>
-+#include <asm/system.h>
-+#include <asm/smp_alt.h>
-+#include <asm/processor.h>
-+#include <asm/string.h>
-+
-+struct smp_replacement_record {
-+ unsigned char targ_size;
-+ unsigned char smp1_size;
-+ unsigned char smp2_size;
-+ unsigned char up_size;
-+ unsigned char feature;
-+ unsigned char data[0];
-+};
-+
-+struct smp_alternative_record {
-+ void *targ_start;
-+ struct smp_replacement_record *repl;
-+};
-+
-+extern struct smp_alternative_record __start_smp_alternatives_table,
-+ __stop_smp_alternatives_table;
-+extern unsigned long __init_begin, __init_end;
-+
-+void prepare_for_smp(void)
-+{
-+ struct smp_alternative_record *r;
-+ printk(KERN_INFO "Enabling SMP...\n");
-+ for (r = &__start_smp_alternatives_table;
-+ r != &__stop_smp_alternatives_table;
-+ r++) {
-+ BUG_ON(r->repl->targ_size < r->repl->smp1_size);
-+ BUG_ON(r->repl->targ_size < r->repl->smp2_size);
-+ BUG_ON(r->repl->targ_size < r->repl->up_size);
-+ if (system_state == SYSTEM_RUNNING &&
-+ r->targ_start >= (void *)&__init_begin &&
-+ r->targ_start < (void *)&__init_end)
-+ continue;
-+ if (r->repl->feature != (unsigned char)-1 &&
-+ boot_cpu_has(r->repl->feature)) {
-+ memcpy(r->targ_start,
-+ r->repl->data + r->repl->smp1_size,
-+ r->repl->smp2_size);
-+ memset(r->targ_start + r->repl->smp2_size,
-+ 0x90,
-+ r->repl->targ_size - r->repl->smp2_size);
-+ } else {
-+ memcpy(r->targ_start,
-+ r->repl->data,
-+ r->repl->smp1_size);
-+ memset(r->targ_start + r->repl->smp1_size,
-+ 0x90,
-+ r->repl->targ_size - r->repl->smp1_size);
-+ }
-+ }
-+ /* Paranoia */
-+ asm volatile ("jmp 1f\n1:");
-+ mb();
-+}
-+
-+void unprepare_for_smp(void)
-+{
-+ struct smp_alternative_record *r;
-+ printk(KERN_INFO "Disabling SMP...\n");
-+ for (r = &__start_smp_alternatives_table;
-+ r != &__stop_smp_alternatives_table;
-+ r++) {
-+ BUG_ON(r->repl->targ_size < r->repl->smp1_size);
-+ BUG_ON(r->repl->targ_size < r->repl->smp2_size);
-+ BUG_ON(r->repl->targ_size < r->repl->up_size);
-+ if (system_state == SYSTEM_RUNNING &&
-+ r->targ_start >= (void *)&__init_begin &&
-+ r->targ_start < (void *)&__init_end)
-+ continue;
-+ memcpy(r->targ_start,
-+ r->repl->data + r->repl->smp1_size + r->repl->smp2_size,
-+ r->repl->up_size);
-+ memset(r->targ_start + r->repl->up_size,
-+ 0x90,
-+ r->repl->targ_size - r->repl->up_size);
-+ }
-+ /* Paranoia */
-+ asm volatile ("jmp 1f\n1:");
-+ mb();
-+}
-diff -pruN ../orig-linux-2.6.16.29/arch/i386/kernel/smpboot.c ./arch/i386/kernel/smpboot.c
---- ../orig-linux-2.6.16.29/arch/i386/kernel/smpboot.c 2006-09-12 19:02:10.000000000 +0100
-+++ ./arch/i386/kernel/smpboot.c 2006-09-19 14:05:48.000000000 +0100
-@@ -1218,6 +1218,11 @@ static void __init smp_boot_cpus(unsigne
- if (max_cpus <= cpucount+1)
- continue;
-
-+#ifdef CONFIG_SMP_ALTERNATIVES
-+ if (kicked == 1)
-+ prepare_for_smp();
-+#endif
-+
- if (((cpu = alloc_cpu_id()) <= 0) || do_boot_cpu(apicid, cpu))
- printk("CPU #%d not responding - cannot use it.\n",
- apicid);
-@@ -1396,6 +1401,11 @@ int __devinit __cpu_up(unsigned int cpu)
- return -EIO;
- }
-
-+#ifdef CONFIG_SMP_ALTERNATIVES
-+ if (num_online_cpus() == 1)
-+ prepare_for_smp();
-+#endif
-+
- local_irq_enable();
- per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
- /* Unleash the CPU! */
-diff -pruN ../orig-linux-2.6.16.29/arch/i386/kernel/vmlinux.lds.S ./arch/i386/kernel/vmlinux.lds.S
---- ../orig-linux-2.6.16.29/arch/i386/kernel/vmlinux.lds.S 2006-09-12 19:02:10.000000000 +0100
-+++ ./arch/i386/kernel/vmlinux.lds.S 2006-09-19 14:05:48.000000000 +0100
-@@ -34,6 +34,13 @@ SECTIONS
- __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) }
- __stop___ex_table = .;
-
-+ . = ALIGN(16);
-+ __start_smp_alternatives_table = .;
-+ __smp_alternatives : { *(__smp_alternatives) }
-+ __stop_smp_alternatives_table = .;
-+
-+ __smp_replacements : { *(__smp_replacements) }
-+
- RODATA
-
- /* writeable */
-diff -pruN ../orig-linux-2.6.16.29/include/asm-i386/atomic.h ./include/asm-i386/atomic.h
---- ../orig-linux-2.6.16.29/include/asm-i386/atomic.h 2006-09-12 19:02:10.000000000 +0100
-+++ ./include/asm-i386/atomic.h 2006-09-19 14:05:48.000000000 +0100
-@@ -4,18 +4,13 @@
- #include <linux/config.h>
- #include <linux/compiler.h>
- #include <asm/processor.h>
-+#include <asm/smp_alt.h>
-
- /*
- * Atomic operations that C can't guarantee us. Useful for
- * resource counting etc..
- */
-
--#ifdef CONFIG_SMP
--#define LOCK "lock ; "
--#else
--#define LOCK ""
--#endif
--
- /*
- * Make sure gcc doesn't try to be clever and move things around
- * on us. We need to use _exactly_ the address the user gave us,
-diff -pruN ../orig-linux-2.6.16.29/include/asm-i386/bitops.h ./include/asm-i386/bitops.h
---- ../orig-linux-2.6.16.29/include/asm-i386/bitops.h 2006-09-12 19:02:10.000000000 +0100
-+++ ./include/asm-i386/bitops.h 2006-09-19 14:05:48.000000000 +0100
-@@ -7,6 +7,7 @@
-
- #include <linux/config.h>
- #include <linux/compiler.h>
-+#include <asm/smp_alt.h>
-
- /*
- * These have to be done with inline assembly: that way the bit-setting
-@@ -16,12 +17,6 @@
- * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
- */
-
--#ifdef CONFIG_SMP
--#define LOCK_PREFIX "lock ; "
--#else
--#define LOCK_PREFIX ""
--#endif
--
- #define ADDR (*(volatile long *) addr)
-
- /**
-@@ -41,7 +36,7 @@
- */
- static inline void set_bit(int nr, volatile unsigned long * addr)
- {
-- __asm__ __volatile__( LOCK_PREFIX
-+ __asm__ __volatile__( LOCK
- "btsl %1,%0"
- :"+m" (ADDR)
- :"Ir" (nr));
-@@ -76,7 +71,7 @@ static inline void __set_bit(int nr, vol
- */
- static inline void clear_bit(int nr, volatile unsigned long * addr)
- {
-- __asm__ __volatile__( LOCK_PREFIX
-+ __asm__ __volatile__( LOCK
- "btrl %1,%0"
- :"+m" (ADDR)
- :"Ir" (nr));
-@@ -121,7 +116,7 @@ static inline void __change_bit(int nr,
- */
- static inline void change_bit(int nr, volatile unsigned long * addr)
- {
-- __asm__ __volatile__( LOCK_PREFIX
-+ __asm__ __volatile__( LOCK
- "btcl %1,%0"
- :"+m" (ADDR)
- :"Ir" (nr));
-@@ -140,7 +135,7 @@ static inline int test_and_set_bit(int n
- {
- int oldbit;
-
-- __asm__ __volatile__( LOCK_PREFIX
-+ __asm__ __volatile__( LOCK
- "btsl %2,%1\n\tsbbl %0,%0"
- :"=r" (oldbit),"+m" (ADDR)
- :"Ir" (nr) : "memory");
-@@ -180,7 +175,7 @@ static inline int test_and_clear_bit(int
- {
- int oldbit;
-
-- __asm__ __volatile__( LOCK_PREFIX
-+ __asm__ __volatile__( LOCK
- "btrl %2,%1\n\tsbbl %0,%0"
- :"=r" (oldbit),"+m" (ADDR)
- :"Ir" (nr) : "memory");
-@@ -231,7 +226,7 @@ static inline int test_and_change_bit(in
- {
- int oldbit;
-
-- __asm__ __volatile__( LOCK_PREFIX
-+ __asm__ __volatile__( LOCK
- "btcl %2,%1\n\tsbbl %0,%0"
- :"=r" (oldbit),"+m" (ADDR)
- :"Ir" (nr) : "memory");
-diff -pruN ../orig-linux-2.6.16.29/include/asm-i386/futex.h ./include/asm-i386/futex.h
---- ../orig-linux-2.6.16.29/include/asm-i386/futex.h 2006-09-12 19:02:10.000000000 +0100
-+++ ./include/asm-i386/futex.h 2006-09-19 14:05:48.000000000 +0100
-@@ -28,7 +28,7 @@
- "1: movl %2, %0\n\
- movl %0, %3\n" \
- insn "\n" \
--"2: " LOCK_PREFIX "cmpxchgl %3, %2\n\
-+"2: " LOCK "cmpxchgl %3, %2\n\
- jnz 1b\n\
- 3: .section .fixup,\"ax\"\n\
- 4: mov %5, %1\n\
-@@ -68,7 +68,7 @@ futex_atomic_op_inuser (int encoded_op,
- #endif
- switch (op) {
- case FUTEX_OP_ADD:
-- __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret,
-+ __futex_atomic_op1(LOCK "xaddl %0, %2", ret,
- oldval, uaddr, oparg);
- break;
- case FUTEX_OP_OR:
-diff -pruN ../orig-linux-2.6.16.29/include/asm-i386/rwsem.h ./include/asm-i386/rwsem.h
---- ../orig-linux-2.6.16.29/include/asm-i386/rwsem.h 2006-09-12 19:02:10.000000000 +0100
-+++ ./include/asm-i386/rwsem.h 2006-09-19 14:05:48.000000000 +0100
-@@ -40,6 +40,7 @@
-
- #include <linux/list.h>
- #include <linux/spinlock.h>
-+#include <asm/smp_alt.h>
-
- struct rwsem_waiter;
-
-@@ -99,7 +100,7 @@ static inline void __down_read(struct rw
- {
- __asm__ __volatile__(
- "# beginning down_read\n\t"
--LOCK_PREFIX " incl (%%eax)\n\t" /* adds 0x00000001, returns the old value */
-+LOCK " incl (%%eax)\n\t" /* adds 0x00000001, returns the old value */
- " js 2f\n\t" /* jump if we weren't granted the lock */
- "1:\n\t"
- LOCK_SECTION_START("")
-@@ -130,7 +131,7 @@ static inline int __down_read_trylock(st
- " movl %1,%2\n\t"
- " addl %3,%2\n\t"
- " jle 2f\n\t"
--LOCK_PREFIX " cmpxchgl %2,%0\n\t"
-+LOCK " cmpxchgl %2,%0\n\t"
- " jnz 1b\n\t"
- "2:\n\t"
- "# ending __down_read_trylock\n\t"
-@@ -150,7 +151,7 @@ static inline void __down_write(struct r
- tmp = RWSEM_ACTIVE_WRITE_BIAS;
- __asm__ __volatile__(
- "# beginning down_write\n\t"
--LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtract 0x0000ffff, returns the old value */
-+LOCK " xadd %%edx,(%%eax)\n\t" /* subtract 0x0000ffff, returns the old value */
- " testl %%edx,%%edx\n\t" /* was the count 0 before? */
- " jnz 2f\n\t" /* jump if we weren't granted the lock */
- "1:\n\t"
-@@ -188,7 +189,7 @@ static inline void __up_read(struct rw_s
- __s32 tmp = -RWSEM_ACTIVE_READ_BIAS;
- __asm__ __volatile__(
- "# beginning __up_read\n\t"
--LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtracts 1, returns the old value */
-+LOCK " xadd %%edx,(%%eax)\n\t" /* subtracts 1, returns the old value */
- " js 2f\n\t" /* jump if the lock is being waited upon */
- "1:\n\t"
- LOCK_SECTION_START("")
-@@ -214,7 +215,7 @@ static inline void __up_write(struct rw_
- __asm__ __volatile__(
- "# beginning __up_write\n\t"
- " movl %2,%%edx\n\t"
--LOCK_PREFIX " xaddl %%edx,(%%eax)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */
-+LOCK " xaddl %%edx,(%%eax)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */
- " jnz 2f\n\t" /* jump if the lock is being waited upon */
- "1:\n\t"
- LOCK_SECTION_START("")
-@@ -239,7 +240,7 @@ static inline void __downgrade_write(str
- {
- __asm__ __volatile__(
- "# beginning __downgrade_write\n\t"
--LOCK_PREFIX " addl %2,(%%eax)\n\t" /* transitions 0xZZZZ0001 -> 0xYYYY0001 */
-+LOCK " addl %2,(%%eax)\n\t" /* transitions 0xZZZZ0001 -> 0xYYYY0001 */
- " js 2f\n\t" /* jump if the lock is being waited upon */
- "1:\n\t"
- LOCK_SECTION_START("")
-@@ -263,7 +264,7 @@ LOCK_PREFIX " addl %2,(%%eax)\n\t"
- static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem)
- {
- __asm__ __volatile__(
--LOCK_PREFIX "addl %1,%0"
-+LOCK "addl %1,%0"
- : "=m"(sem->count)
- : "ir"(delta), "m"(sem->count));
- }
-@@ -276,7 +277,7 @@ static inline int rwsem_atomic_update(in
- int tmp = delta;
-
- __asm__ __volatile__(
--LOCK_PREFIX "xadd %0,(%2)"
-+LOCK "xadd %0,(%2)"
- : "+r"(tmp), "=m"(sem->count)
- : "r"(sem), "m"(sem->count)
- : "memory");
-diff -pruN ../orig-linux-2.6.16.29/include/asm-i386/smp_alt.h ./include/asm-i386/smp_alt.h
---- ../orig-linux-2.6.16.29/include/asm-i386/smp_alt.h 1970-01-01 01:00:00.000000000 +0100
-+++ ./include/asm-i386/smp_alt.h 2006-09-19 14:05:48.000000000 +0100
-@@ -0,0 +1,32 @@
-+#ifndef __ASM_SMP_ALT_H__
-+#define __ASM_SMP_ALT_H__
-+
-+#include <linux/config.h>
-+
-+#ifdef CONFIG_SMP
-+#if defined(CONFIG_SMP_ALTERNATIVES) && !defined(MODULE)
-+#define LOCK \
-+ "6677: nop\n" \
-+ ".section __smp_alternatives,\"a\"\n" \
-+ ".long 6677b\n" \
-+ ".long 6678f\n" \
-+ ".previous\n" \
-+ ".section __smp_replacements,\"a\"\n" \
-+ "6678: .byte 1\n" \
-+ ".byte 1\n" \
-+ ".byte 0\n" \
-+ ".byte 1\n" \
-+ ".byte -1\n" \
-+ "lock\n" \
-+ "nop\n" \
-+ ".previous\n"
-+void prepare_for_smp(void);
-+void unprepare_for_smp(void);
-+#else
-+#define LOCK "lock ; "
-+#endif
-+#else
-+#define LOCK ""
-+#endif
-+
-+#endif /* __ASM_SMP_ALT_H__ */
-diff -pruN ../orig-linux-2.6.16.29/include/asm-i386/spinlock.h ./include/asm-i386/spinlock.h
---- ../orig-linux-2.6.16.29/include/asm-i386/spinlock.h 2006-09-12 19:02:10.000000000 +0100
-+++ ./include/asm-i386/spinlock.h 2006-09-19 14:05:48.000000000 +0100
-@@ -6,6 +6,7 @@
- #include <asm/page.h>
- #include <linux/config.h>
- #include <linux/compiler.h>
-+#include <asm/smp_alt.h>
-
- /*
- * Your basic SMP spinlocks, allowing only a single CPU anywhere
-@@ -23,7 +24,8 @@
-
- #define __raw_spin_lock_string \
- "\n1:\t" \
-- "lock ; decb %0\n\t" \
-+ LOCK \
-+ "decb %0\n\t" \
- "jns 3f\n" \
- "2:\t" \
- "rep;nop\n\t" \
-@@ -34,7 +36,8 @@
-
- #define __raw_spin_lock_string_flags \
- "\n1:\t" \
-- "lock ; decb %0\n\t" \
-+ LOCK \
-+ "decb %0\n\t" \
- "jns 4f\n\t" \
- "2:\t" \
- "testl $0x200, %1\n\t" \
-@@ -65,10 +68,34 @@ static inline void __raw_spin_lock_flags
- static inline int __raw_spin_trylock(raw_spinlock_t *lock)
- {
- char oldval;
-+#ifdef CONFIG_SMP_ALTERNATIVES
- __asm__ __volatile__(
-- "xchgb %b0,%1"
-+ "1:movb %1,%b0\n"
-+ "movb $0,%1\n"
-+ "2:"
-+ ".section __smp_alternatives,\"a\"\n"
-+ ".long 1b\n"
-+ ".long 3f\n"
-+ ".previous\n"
-+ ".section __smp_replacements,\"a\"\n"
-+ "3: .byte 2b - 1b\n"
-+ ".byte 5f-4f\n"
-+ ".byte 0\n"
-+ ".byte 6f-5f\n"
-+ ".byte -1\n"
-+ "4: xchgb %b0,%1\n"
-+ "5: movb %1,%b0\n"
-+ "movb $0,%1\n"
-+ "6:\n"
-+ ".previous\n"
- :"=q" (oldval), "=m" (lock->slock)
- :"0" (0) : "memory");
-+#else
-+ __asm__ __volatile__(
-+ "xchgb %b0,%1\n"
-+ :"=q" (oldval), "=m" (lock->slock)
-+ :"0" (0) : "memory");
-+#endif
- return oldval > 0;
- }
-
-@@ -178,12 +205,12 @@ static inline int __raw_write_trylock(ra
-
- static inline void __raw_read_unlock(raw_rwlock_t *rw)
- {
-- asm volatile("lock ; incl %0" :"=m" (rw->lock) : : "memory");
-+ asm volatile(LOCK "incl %0" :"=m" (rw->lock) : : "memory");
- }
-
- static inline void __raw_write_unlock(raw_rwlock_t *rw)
- {
-- asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ", %0"
-+ asm volatile(LOCK "addl $" RW_LOCK_BIAS_STR ", %0"
- : "=m" (rw->lock) : : "memory");
- }
-
-diff -pruN ../orig-linux-2.6.16.29/include/asm-i386/system.h ./include/asm-i386/system.h
---- ../orig-linux-2.6.16.29/include/asm-i386/system.h 2006-09-12 19:02:10.000000000 +0100
-+++ ./include/asm-i386/system.h 2006-09-19 14:05:48.000000000 +0100
-@@ -5,7 +5,7 @@
- #include <linux/kernel.h>
- #include <asm/segment.h>
- #include <asm/cpufeature.h>
--#include <linux/bitops.h> /* for LOCK_PREFIX */
-+#include <asm/smp_alt.h>
-
- #ifdef __KERNEL__
-
-@@ -271,19 +271,19 @@ static inline unsigned long __cmpxchg(vo
- unsigned long prev;
- switch (size) {
- case 1:
-- __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2"
-+ __asm__ __volatile__(LOCK "cmpxchgb %b1,%2"
- : "=a"(prev)
- : "q"(new), "m"(*__xg(ptr)), "0"(old)
- : "memory");
- return prev;
- case 2:
-- __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2"
-+ __asm__ __volatile__(LOCK "cmpxchgw %w1,%2"
- : "=a"(prev)
- : "r"(new), "m"(*__xg(ptr)), "0"(old)
- : "memory");
- return prev;
- case 4:
-- __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2"
-+ __asm__ __volatile__(LOCK "cmpxchgl %1,%2"
- : "=a"(prev)
- : "r"(new), "m"(*__xg(ptr)), "0"(old)
- : "memory");
-@@ -336,7 +336,7 @@ static inline unsigned long long __cmpxc
- unsigned long long new)
- {
- unsigned long long prev;
-- __asm__ __volatile__(LOCK_PREFIX "cmpxchg8b %3"
-+ __asm__ __volatile__(LOCK "cmpxchg8b %3"
- : "=A"(prev)
- : "b"((unsigned long)new),
- "c"((unsigned long)(new >> 32)),
-@@ -503,11 +503,55 @@ struct alt_instr {
- #endif
-
- #ifdef CONFIG_SMP
-+#if defined(CONFIG_SMP_ALTERNATIVES) && !defined(MODULE)
-+#define smp_alt_mb(instr) \
-+__asm__ __volatile__("6667:\nnop\nnop\nnop\nnop\nnop\nnop\n6668:\n" \
-+ ".section __smp_alternatives,\"a\"\n" \
-+ ".long 6667b\n" \
-+ ".long 6673f\n" \
-+ ".previous\n" \
-+ ".section __smp_replacements,\"a\"\n" \
-+ "6673:.byte 6668b-6667b\n" \
-+ ".byte 6670f-6669f\n" \
-+ ".byte 6671f-6670f\n" \
-+ ".byte 0\n" \
-+ ".byte %c0\n" \
-+ "6669:lock;addl $0,0(%%esp)\n" \
-+ "6670:" instr "\n" \
-+ "6671:\n" \
-+ ".previous\n" \
-+ : \
-+ : "i" (X86_FEATURE_XMM2) \
-+ : "memory")
-+#define smp_rmb() smp_alt_mb("lfence")
-+#define smp_mb() smp_alt_mb("mfence")
-+#define set_mb(var, value) do { \
-+unsigned long __set_mb_temp; \
-+__asm__ __volatile__("6667:movl %1, %0\n6668:\n" \
-+ ".section __smp_alternatives,\"a\"\n" \
-+ ".long 6667b\n" \
-+ ".long 6673f\n" \
-+ ".previous\n" \
-+ ".section __smp_replacements,\"a\"\n" \
-+ "6673: .byte 6668b-6667b\n" \
-+ ".byte 6670f-6669f\n" \
-+ ".byte 0\n" \
-+ ".byte 6671f-6670f\n" \
-+ ".byte -1\n" \
-+ "6669: xchg %1, %0\n" \
-+ "6670:movl %1, %0\n" \
-+ "6671:\n" \
-+ ".previous\n" \
-+ : "=m" (var), "=r" (__set_mb_temp) \
-+ : "1" (value) \
-+ : "memory"); } while (0)
-+#else
- #define smp_mb() mb()
- #define smp_rmb() rmb()
-+#define set_mb(var, value) do { (void) xchg(&var, value); } while (0)
-+#endif
- #define smp_wmb() wmb()
- #define smp_read_barrier_depends() read_barrier_depends()
--#define set_mb(var, value) do { (void) xchg(&var, value); } while (0)
- #else
- #define smp_mb() barrier()
- #define smp_rmb() barrier()
+++ /dev/null
-diff -pruN ../orig-linux-2.6.16.29/drivers/char/tpm/tpm_atmel.c ./drivers/char/tpm/tpm_atmel.c
---- ../orig-linux-2.6.16.29/drivers/char/tpm/tpm_atmel.c 2006-09-12 19:02:10.000000000 +0100
-+++ ./drivers/char/tpm/tpm_atmel.c 2006-09-19 14:05:52.000000000 +0100
-@@ -47,12 +47,12 @@ static int tpm_atml_recv(struct tpm_chip
- return -EIO;
-
- for (i = 0; i < 6; i++) {
-- status = ioread8(chip->vendor->iobase + 1);
-+ status = ioread8(chip->vendor.iobase + 1);
- if ((status & ATML_STATUS_DATA_AVAIL) == 0) {
- dev_err(chip->dev, "error reading header\n");
- return -EIO;
- }
-- *buf++ = ioread8(chip->vendor->iobase);
-+ *buf++ = ioread8(chip->vendor.iobase);
- }
-
- /* size of the data received */
-@@ -63,7 +63,7 @@ static int tpm_atml_recv(struct tpm_chip
- dev_err(chip->dev,
- "Recv size(%d) less than available space\n", size);
- for (; i < size; i++) { /* clear the waiting data anyway */
-- status = ioread8(chip->vendor->iobase + 1);
-+ status = ioread8(chip->vendor.iobase + 1);
- if ((status & ATML_STATUS_DATA_AVAIL) == 0) {
- dev_err(chip->dev, "error reading data\n");
- return -EIO;
-@@ -74,16 +74,16 @@ static int tpm_atml_recv(struct tpm_chip
-
- /* read all the data available */
- for (; i < size; i++) {
-- status = ioread8(chip->vendor->iobase + 1);
-+ status = ioread8(chip->vendor.iobase + 1);
- if ((status & ATML_STATUS_DATA_AVAIL) == 0) {
- dev_err(chip->dev, "error reading data\n");
- return -EIO;
- }
-- *buf++ = ioread8(chip->vendor->iobase);
-+ *buf++ = ioread8(chip->vendor.iobase);
- }
-
- /* make sure data available is gone */
-- status = ioread8(chip->vendor->iobase + 1);
-+ status = ioread8(chip->vendor.iobase + 1);
-
- if (status & ATML_STATUS_DATA_AVAIL) {
- dev_err(chip->dev, "data available is stuck\n");
-@@ -100,7 +100,7 @@ static int tpm_atml_send(struct tpm_chip
- dev_dbg(chip->dev, "tpm_atml_send:\n");
- for (i = 0; i < count; i++) {
- dev_dbg(chip->dev, "%d 0x%x(%d)\n", i, buf[i], buf[i]);
-- iowrite8(buf[i], chip->vendor->iobase);
-+ iowrite8(buf[i], chip->vendor.iobase);
- }
-
- return count;
-@@ -108,12 +108,12 @@ static int tpm_atml_send(struct tpm_chip
-
- static void tpm_atml_cancel(struct tpm_chip *chip)
- {
-- iowrite8(ATML_STATUS_ABORT, chip->vendor->iobase + 1);
-+ iowrite8(ATML_STATUS_ABORT, chip->vendor.iobase + 1);
- }
-
- static u8 tpm_atml_status(struct tpm_chip *chip)
- {
-- return ioread8(chip->vendor->iobase + 1);
-+ return ioread8(chip->vendor.iobase + 1);
- }
-
- static struct file_operations atmel_ops = {
-@@ -140,7 +140,7 @@ static struct attribute* atmel_attrs[] =
-
- static struct attribute_group atmel_attr_grp = { .attrs = atmel_attrs };
-
--static struct tpm_vendor_specific tpm_atmel = {
-+static const struct tpm_vendor_specific tpm_atmel = {
- .recv = tpm_atml_recv,
- .send = tpm_atml_send,
- .cancel = tpm_atml_cancel,
-@@ -159,10 +159,10 @@ static void atml_plat_remove(void)
- struct tpm_chip *chip = dev_get_drvdata(&pdev->dev);
-
- if (chip) {
-- if (chip->vendor->have_region)
-- atmel_release_region(chip->vendor->base,
-- chip->vendor->region_size);
-- atmel_put_base_addr(chip->vendor);
-+ if (chip->vendor.have_region)
-+ atmel_release_region(chip->vendor.base,
-+ chip->vendor.region_size);
-+ atmel_put_base_addr(chip->vendor.iobase);
- tpm_remove_hardware(chip->dev);
- platform_device_unregister(pdev);
- }
-@@ -179,18 +179,22 @@ static struct device_driver atml_drv = {
- static int __init init_atmel(void)
- {
- int rc = 0;
-+ void __iomem *iobase = NULL;
-+ int have_region, region_size;
-+ unsigned long base;
-+ struct tpm_chip *chip;
-
- driver_register(&atml_drv);
-
-- if ((tpm_atmel.iobase = atmel_get_base_addr(&tpm_atmel)) == NULL) {
-+ if ((iobase = atmel_get_base_addr(&base, ®ion_size)) == NULL) {
- rc = -ENODEV;
- goto err_unreg_drv;
- }
-
-- tpm_atmel.have_region =
-+ have_region =
- (atmel_request_region
-- (tpm_atmel.base, tpm_atmel.region_size,
-- "tpm_atmel0") == NULL) ? 0 : 1;
-+ (tpm_atmel.base, region_size, "tpm_atmel0") == NULL) ? 0 : 1;
-+
-
- if (IS_ERR
- (pdev =
-@@ -199,17 +203,25 @@ static int __init init_atmel(void)
- goto err_rel_reg;
- }
-
-- if ((rc = tpm_register_hardware(&pdev->dev, &tpm_atmel)) < 0)
-+ if (!(chip = tpm_register_hardware(&pdev->dev, &tpm_atmel))) {
-+ rc = -ENODEV;
- goto err_unreg_dev;
-+ }
-+
-+ chip->vendor.iobase = iobase;
-+ chip->vendor.base = base;
-+ chip->vendor.have_region = have_region;
-+ chip->vendor.region_size = region_size;
-+
- return 0;
-
- err_unreg_dev:
- platform_device_unregister(pdev);
- err_rel_reg:
-- atmel_put_base_addr(&tpm_atmel);
-- if (tpm_atmel.have_region)
-- atmel_release_region(tpm_atmel.base,
-- tpm_atmel.region_size);
-+ atmel_put_base_addr(iobase);
-+ if (have_region)
-+ atmel_release_region(base,
-+ region_size);
- err_unreg_drv:
- driver_unregister(&atml_drv);
- return rc;
-diff -pruN ../orig-linux-2.6.16.29/drivers/char/tpm/tpm_atmel.h ./drivers/char/tpm/tpm_atmel.h
---- ../orig-linux-2.6.16.29/drivers/char/tpm/tpm_atmel.h 2006-09-12 19:02:10.000000000 +0100
-+++ ./drivers/char/tpm/tpm_atmel.h 2006-09-19 14:05:52.000000000 +0100
-@@ -28,13 +28,12 @@
- #define atmel_request_region request_mem_region
- #define atmel_release_region release_mem_region
-
--static inline void atmel_put_base_addr(struct tpm_vendor_specific
-- *vendor)
-+static inline void atmel_put_base_addr(void __iomem *iobase)
- {
-- iounmap(vendor->iobase);
-+ iounmap(iobase);
- }
-
--static void __iomem * atmel_get_base_addr(struct tpm_vendor_specific *vendor)
-+static void __iomem * atmel_get_base_addr(unsigned long *base, int *region_size)
- {
- struct device_node *dn;
- unsigned long address, size;
-@@ -71,9 +70,9 @@ static void __iomem * atmel_get_base_add
- else
- size = reg[naddrc];
-
-- vendor->base = address;
-- vendor->region_size = size;
-- return ioremap(vendor->base, vendor->region_size);
-+ *base = address;
-+ *region_size = size;
-+ return ioremap(*base, *region_size);
- }
- #else
- #define atmel_getb(chip, offset) inb(chip->vendor->base + offset)
-@@ -106,14 +105,12 @@ static int atmel_verify_tpm11(void)
- return 0;
- }
-
--static inline void atmel_put_base_addr(struct tpm_vendor_specific
-- *vendor)
-+static inline void atmel_put_base_addr(void __iomem *iobase)
- {
- }
-
- /* Determine where to talk to device */
--static void __iomem * atmel_get_base_addr(struct tpm_vendor_specific
-- *vendor)
-+static void __iomem * atmel_get_base_addr(unsigned long *base, int *region_size)
- {
- int lo, hi;
-
-@@ -123,9 +120,9 @@ static void __iomem * atmel_get_base_add
- lo = tpm_read_index(TPM_ADDR, TPM_ATMEL_BASE_ADDR_LO);
- hi = tpm_read_index(TPM_ADDR, TPM_ATMEL_BASE_ADDR_HI);
-
-- vendor->base = (hi << 8) | lo;
-- vendor->region_size = 2;
-+ *base = (hi << 8) | lo;
-+ *region_size = 2;
-
-- return ioport_map(vendor->base, vendor->region_size);
-+ return ioport_map(*base, *region_size);
- }
- #endif
-diff -pruN ../orig-linux-2.6.16.29/drivers/char/tpm/tpm_bios.c ./drivers/char/tpm/tpm_bios.c
---- ../orig-linux-2.6.16.29/drivers/char/tpm/tpm_bios.c 2006-09-12 19:02:10.000000000 +0100
-+++ ./drivers/char/tpm/tpm_bios.c 2006-09-19 14:05:52.000000000 +0100
-@@ -29,6 +29,11 @@
- #define MAX_TEXT_EVENT 1000 /* Max event string length */
- #define ACPI_TCPA_SIG "TCPA" /* 0x41504354 /'TCPA' */
-
-+enum bios_platform_class {
-+ BIOS_CLIENT = 0x00,
-+ BIOS_SERVER = 0x01,
-+};
-+
- struct tpm_bios_log {
- void *bios_event_log;
- void *bios_event_log_end;
-@@ -36,9 +41,18 @@ struct tpm_bios_log {
-
- struct acpi_tcpa {
- struct acpi_table_header hdr;
-- u16 reserved;
-- u32 log_max_len __attribute__ ((packed));
-- u32 log_start_addr __attribute__ ((packed));
-+ u16 platform_class;
-+ union {
-+ struct client_hdr {
-+ u32 log_max_len __attribute__ ((packed));
-+ u64 log_start_addr __attribute__ ((packed));
-+ } client;
-+ struct server_hdr {
-+ u16 reserved;
-+ u64 log_max_len __attribute__ ((packed));
-+ u64 log_start_addr __attribute__ ((packed));
-+ } server;
-+ };
- };
-
- struct tcpa_event {
-@@ -91,6 +105,12 @@ static const char* tcpa_event_type_strin
- "Non-Host Info"
- };
-
-+struct tcpa_pc_event {
-+ u32 event_id;
-+ u32 event_size;
-+ u8 event_data[0];
-+};
-+
- enum tcpa_pc_event_ids {
- SMBIOS = 1,
- BIS_CERT,
-@@ -100,14 +120,15 @@ enum tcpa_pc_event_ids {
- NVRAM,
- OPTION_ROM_EXEC,
- OPTION_ROM_CONFIG,
-- OPTION_ROM_MICROCODE,
-+ OPTION_ROM_MICROCODE = 10,
- S_CRTM_VERSION,
- S_CRTM_CONTENTS,
- POST_CONTENTS,
-+ HOST_TABLE_OF_DEVICES,
- };
-
- static const char* tcpa_pc_event_id_strings[] = {
-- ""
-+ "",
- "SMBIOS",
- "BIS Certificate",
- "POST BIOS ",
-@@ -116,10 +137,12 @@ static const char* tcpa_pc_event_id_stri
- "NVRAM",
- "Option ROM",
- "Option ROM config",
-- "Option ROM microcode",
-+ "",
-+ "Option ROM microcode ",
- "S-CRTM Version",
-- "S-CRTM Contents",
-- "S-CRTM POST Contents",
-+ "S-CRTM Contents ",
-+ "POST Contents ",
-+ "Table of Devices",
- };
-
- /* returns pointer to start of pos. entry of tcg log */
-@@ -191,7 +214,7 @@ static int get_event_name(char *dest, st
- const char *name = "";
- char data[40] = "";
- int i, n_len = 0, d_len = 0;
-- u32 event_id;
-+ struct tcpa_pc_event *pc_event;
-
- switch(event->event_type) {
- case PREBOOT:
-@@ -220,31 +243,32 @@ static int get_event_name(char *dest, st
- }
- break;
- case EVENT_TAG:
-- event_id = be32_to_cpu(*((u32 *)event_entry));
-+ pc_event = (struct tcpa_pc_event *)event_entry;
-
- /* ToDo Row data -> Base64 */
-
-- switch (event_id) {
-+ switch (pc_event->event_id) {
- case SMBIOS:
- case BIS_CERT:
- case CMOS:
- case NVRAM:
- case OPTION_ROM_EXEC:
- case OPTION_ROM_CONFIG:
-- case OPTION_ROM_MICROCODE:
- case S_CRTM_VERSION:
-- case S_CRTM_CONTENTS:
-- case POST_CONTENTS:
-- name = tcpa_pc_event_id_strings[event_id];
-+ name = tcpa_pc_event_id_strings[pc_event->event_id];
- n_len = strlen(name);
- break;
-+ /* hash data */
- case POST_BIOS_ROM:
- case ESCD:
-- name = tcpa_pc_event_id_strings[event_id];
-+ case OPTION_ROM_MICROCODE:
-+ case S_CRTM_CONTENTS:
-+ case POST_CONTENTS:
-+ name = tcpa_pc_event_id_strings[pc_event->event_id];
- n_len = strlen(name);
- for (i = 0; i < 20; i++)
-- d_len += sprintf(data, "%02x",
-- event_entry[8 + i]);
-+ d_len += sprintf(&data[2*i], "%02x",
-+ pc_event->event_data[i]);
- break;
- default:
- break;
-@@ -260,52 +284,13 @@ static int get_event_name(char *dest, st
-
- static int tpm_binary_bios_measurements_show(struct seq_file *m, void *v)
- {
-+ struct tcpa_event *event = v;
-+ char *data = v;
-+ int i;
-
-- char *eventname;
-- char data[4];
-- u32 help;
-- int i, len;
-- struct tcpa_event *event = (struct tcpa_event *) v;
-- unsigned char *event_entry =
-- (unsigned char *) (v + sizeof(struct tcpa_event));
--
-- eventname = kmalloc(MAX_TEXT_EVENT, GFP_KERNEL);
-- if (!eventname) {
-- printk(KERN_ERR "%s: ERROR - No Memory for event name\n ",
-- __func__);
-- return -ENOMEM;
-- }
--
-- /* 1st: PCR used is in little-endian format (4 bytes) */
-- help = le32_to_cpu(event->pcr_index);
-- memcpy(data, &help, 4);
-- for (i = 0; i < 4; i++)
-- seq_putc(m, data[i]);
--
-- /* 2nd: SHA1 (20 bytes) */
-- for (i = 0; i < 20; i++)
-- seq_putc(m, event->pcr_value[i]);
--
-- /* 3rd: event type identifier (4 bytes) */
-- help = le32_to_cpu(event->event_type);
-- memcpy(data, &help, 4);
-- for (i = 0; i < 4; i++)
-+ for (i = 0; i < sizeof(struct tcpa_event) + event->event_size; i++)
- seq_putc(m, data[i]);
-
-- len = 0;
--
-- len += get_event_name(eventname, event, event_entry);
--
-- /* 4th: filename <= 255 + \'0' delimiter */
-- if (len > TCG_EVENT_NAME_LEN_MAX)
-- len = TCG_EVENT_NAME_LEN_MAX;
--
-- for (i = 0; i < len; i++)
-- seq_putc(m, eventname[i]);
--
-- /* 5th: delimiter */
-- seq_putc(m, '\0');
--
- return 0;
- }
-
-@@ -353,6 +338,7 @@ static int tpm_ascii_bios_measurements_s
- /* 4th: eventname <= max + \'0' delimiter */
- seq_printf(m, " %s\n", eventname);
-
-+ kfree(eventname);
- return 0;
- }
-
-@@ -376,6 +362,7 @@ static int read_log(struct tpm_bios_log
- struct acpi_tcpa *buff;
- acpi_status status;
- struct acpi_table_header *virt;
-+ u64 len, start;
-
- if (log->bios_event_log != NULL) {
- printk(KERN_ERR
-@@ -396,27 +383,37 @@ static int read_log(struct tpm_bios_log
- return -EIO;
- }
-
-- if (buff->log_max_len == 0) {
-+ switch(buff->platform_class) {
-+ case BIOS_SERVER:
-+ len = buff->server.log_max_len;
-+ start = buff->server.log_start_addr;
-+ break;
-+ case BIOS_CLIENT:
-+ default:
-+ len = buff->client.log_max_len;
-+ start = buff->client.log_start_addr;
-+ break;
-+ }
-+ if (!len) {
- printk(KERN_ERR "%s: ERROR - TCPA log area empty\n", __func__);
- return -EIO;
- }
-
- /* malloc EventLog space */
-- log->bios_event_log = kmalloc(buff->log_max_len, GFP_KERNEL);
-+ log->bios_event_log = kmalloc(len, GFP_KERNEL);
- if (!log->bios_event_log) {
-- printk
-- ("%s: ERROR - Not enough Memory for BIOS measurements\n",
-- __func__);
-+ printk("%s: ERROR - Not enough Memory for BIOS measurements\n",
-+ __func__);
- return -ENOMEM;
- }
-
-- log->bios_event_log_end = log->bios_event_log + buff->log_max_len;
-+ log->bios_event_log_end = log->bios_event_log + len;
-
-- acpi_os_map_memory(buff->log_start_addr, buff->log_max_len, (void *) &virt);
-+ acpi_os_map_memory(start, len, (void *) &virt);
-
-- memcpy(log->bios_event_log, virt, buff->log_max_len);
-+ memcpy(log->bios_event_log, virt, len);
-
-- acpi_os_unmap_memory(virt, buff->log_max_len);
-+ acpi_os_unmap_memory(virt, len);
- return 0;
- }
-
-diff -pruN ../orig-linux-2.6.16.29/drivers/char/tpm/tpm_infineon.c ./drivers/char/tpm/tpm_infineon.c
---- ../orig-linux-2.6.16.29/drivers/char/tpm/tpm_infineon.c 2006-09-12 19:02:10.000000000 +0100
-+++ ./drivers/char/tpm/tpm_infineon.c 2006-09-19 14:05:52.000000000 +0100
-@@ -15,6 +15,7 @@
- * License.
- */
-
-+#include <linux/init.h>
- #include <linux/pnp.h>
- #include "tpm.h"
-
-@@ -104,7 +105,7 @@ static int empty_fifo(struct tpm_chip *c
-
- if (clear_wrfifo) {
- for (i = 0; i < 4096; i++) {
-- status = inb(chip->vendor->base + WRFIFO);
-+ status = inb(chip->vendor.base + WRFIFO);
- if (status == 0xff) {
- if (check == 5)
- break;
-@@ -124,8 +125,8 @@ static int empty_fifo(struct tpm_chip *c
- */
- i = 0;
- do {
-- status = inb(chip->vendor->base + RDFIFO);
-- status = inb(chip->vendor->base + STAT);
-+ status = inb(chip->vendor.base + RDFIFO);
-+ status = inb(chip->vendor.base + STAT);
- i++;
- if (i == TPM_MAX_TRIES)
- return -EIO;
-@@ -138,7 +139,7 @@ static int wait(struct tpm_chip *chip, i
- int status;
- int i;
- for (i = 0; i < TPM_MAX_TRIES; i++) {
-- status = inb(chip->vendor->base + STAT);
-+ status = inb(chip->vendor.base + STAT);
- /* check the status-register if wait_for_bit is set */
- if (status & 1 << wait_for_bit)
- break;
-@@ -157,7 +158,7 @@ static int wait(struct tpm_chip *chip, i
- static void wait_and_send(struct tpm_chip *chip, u8 sendbyte)
- {
- wait(chip, STAT_XFE);
-- outb(sendbyte, chip->vendor->base + WRFIFO);
-+ outb(sendbyte, chip->vendor.base + WRFIFO);
- }
-
- /* Note: WTX means Waiting-Time-Extension. Whenever the TPM needs more
-@@ -204,7 +205,7 @@ recv_begin:
- ret = wait(chip, STAT_RDA);
- if (ret)
- return -EIO;
-- buf[i] = inb(chip->vendor->base + RDFIFO);
-+ buf[i] = inb(chip->vendor.base + RDFIFO);
- }
-
- if (buf[0] != TPM_VL_VER) {
-@@ -219,7 +220,7 @@ recv_begin:
-
- for (i = 0; i < size; i++) {
- wait(chip, STAT_RDA);
-- buf[i] = inb(chip->vendor->base + RDFIFO);
-+ buf[i] = inb(chip->vendor.base + RDFIFO);
- }
-
- if ((size == 0x6D00) && (buf[1] == 0x80)) {
-@@ -268,7 +269,7 @@ static int tpm_inf_send(struct tpm_chip
- u8 count_high, count_low, count_4, count_3, count_2, count_1;
-
- /* Disabling Reset, LP and IRQC */
-- outb(RESET_LP_IRQC_DISABLE, chip->vendor->base + CMD);
-+ outb(RESET_LP_IRQC_DISABLE, chip->vendor.base + CMD);
-
- ret = empty_fifo(chip, 1);
- if (ret) {
-@@ -319,7 +320,7 @@ static void tpm_inf_cancel(struct tpm_ch
-
- static u8 tpm_inf_status(struct tpm_chip *chip)
- {
-- return inb(chip->vendor->base + STAT);
-+ return inb(chip->vendor.base + STAT);
- }
-
- static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL);
-@@ -346,7 +347,7 @@ static struct file_operations inf_ops =
- .release = tpm_release,
- };
-
--static struct tpm_vendor_specific tpm_inf = {
-+static const struct tpm_vendor_specific tpm_inf = {
- .recv = tpm_inf_recv,
- .send = tpm_inf_send,
- .cancel = tpm_inf_cancel,
-@@ -375,6 +376,7 @@ static int __devinit tpm_inf_pnp_probe(s
- int version[2];
- int productid[2];
- char chipname[20];
-+ struct tpm_chip *chip;
-
- /* read IO-ports through PnP */
- if (pnp_port_valid(dev, 0) && pnp_port_valid(dev, 1) &&
-@@ -395,14 +397,13 @@ static int __devinit tpm_inf_pnp_probe(s
- goto err_last;
- }
- /* publish my base address and request region */
-- tpm_inf.base = TPM_INF_BASE;
- if (request_region
-- (tpm_inf.base, TPM_INF_PORT_LEN, "tpm_infineon0") == NULL) {
-+ (TPM_INF_BASE, TPM_INF_PORT_LEN, "tpm_infineon0") == NULL) {
- rc = -EINVAL;
- goto err_last;
- }
-- if (request_region(TPM_INF_ADDR, TPM_INF_ADDR_LEN,
-- "tpm_infineon0") == NULL) {
-+ if (request_region
-+ (TPM_INF_ADDR, TPM_INF_ADDR_LEN, "tpm_infineon0") == NULL) {
- rc = -EINVAL;
- goto err_last;
- }
-@@ -442,9 +443,9 @@ static int __devinit tpm_inf_pnp_probe(s
-
- /* configure TPM with IO-ports */
- outb(IOLIMH, TPM_INF_ADDR);
-- outb(((tpm_inf.base >> 8) & 0xff), TPM_INF_DATA);
-+ outb(((TPM_INF_BASE >> 8) & 0xff), TPM_INF_DATA);
- outb(IOLIML, TPM_INF_ADDR);
-- outb((tpm_inf.base & 0xff), TPM_INF_DATA);
-+ outb((TPM_INF_BASE & 0xff), TPM_INF_DATA);
-
- /* control if IO-ports are set correctly */
- outb(IOLIMH, TPM_INF_ADDR);
-@@ -452,10 +453,10 @@ static int __devinit tpm_inf_pnp_probe(s
- outb(IOLIML, TPM_INF_ADDR);
- iol = inb(TPM_INF_DATA);
-
-- if ((ioh << 8 | iol) != tpm_inf.base) {
-+ if ((ioh << 8 | iol) != TPM_INF_BASE) {
- dev_err(&dev->dev,
-- "Could not set IO-ports to 0x%lx\n",
-- tpm_inf.base);
-+ "Could not set IO-ports to 0x%x\n",
-+ TPM_INF_BASE);
- rc = -EIO;
- goto err_release_region;
- }
-@@ -466,15 +467,15 @@ static int __devinit tpm_inf_pnp_probe(s
- outb(DISABLE_REGISTER_PAIR, TPM_INF_ADDR);
-
- /* disable RESET, LP and IRQC */
-- outb(RESET_LP_IRQC_DISABLE, tpm_inf.base + CMD);
-+ outb(RESET_LP_IRQC_DISABLE, TPM_INF_BASE + CMD);
-
- /* Finally, we're done, print some infos */
- dev_info(&dev->dev, "TPM found: "
- "config base 0x%x, "
- "io base 0x%x, "
-- "chip version %02x%02x, "
-- "vendor id %x%x (Infineon), "
-- "product id %02x%02x"
-+ "chip version 0x%02x%02x, "
-+ "vendor id 0x%x%x (Infineon), "
-+ "product id 0x%02x%02x"
- "%s\n",
- TPM_INF_ADDR,
- TPM_INF_BASE,
-@@ -482,11 +483,10 @@ static int __devinit tpm_inf_pnp_probe(s
- vendorid[0], vendorid[1],
- productid[0], productid[1], chipname);
-
-- rc = tpm_register_hardware(&dev->dev, &tpm_inf);
-- if (rc < 0) {
-- rc = -ENODEV;
-+ if (!(chip = tpm_register_hardware(&dev->dev, &tpm_inf))) {
- goto err_release_region;
- }
-+ chip->vendor.base = TPM_INF_BASE;
- return 0;
- } else {
- rc = -ENODEV;
-@@ -494,7 +494,7 @@ static int __devinit tpm_inf_pnp_probe(s
- }
-
- err_release_region:
-- release_region(tpm_inf.base, TPM_INF_PORT_LEN);
-+ release_region(TPM_INF_BASE, TPM_INF_PORT_LEN);
- release_region(TPM_INF_ADDR, TPM_INF_ADDR_LEN);
-
- err_last:
-@@ -506,7 +506,8 @@ static __devexit void tpm_inf_pnp_remove
- struct tpm_chip *chip = pnp_get_drvdata(dev);
-
- if (chip) {
-- release_region(chip->vendor->base, TPM_INF_PORT_LEN);
-+ release_region(TPM_INF_BASE, TPM_INF_PORT_LEN);
-+ release_region(TPM_INF_ADDR, TPM_INF_ADDR_LEN);
- tpm_remove_hardware(chip->dev);
- }
- }
-@@ -520,7 +521,7 @@ static struct pnp_driver tpm_inf_pnp = {
- },
- .id_table = tpm_pnp_tbl,
- .probe = tpm_inf_pnp_probe,
-- .remove = tpm_inf_pnp_remove,
-+ .remove = __devexit_p(tpm_inf_pnp_remove),
- };
-
- static int __init init_inf(void)
-@@ -538,5 +539,5 @@ module_exit(cleanup_inf);
-
- MODULE_AUTHOR("Marcel Selhorst <selhorst@crypto.rub.de>");
- MODULE_DESCRIPTION("Driver for Infineon TPM SLD 9630 TT 1.1 / SLB 9635 TT 1.2");
--MODULE_VERSION("1.7");
-+MODULE_VERSION("1.8");
- MODULE_LICENSE("GPL");
-diff -pruN ../orig-linux-2.6.16.29/drivers/char/tpm/tpm_nsc.c ./drivers/char/tpm/tpm_nsc.c
---- ../orig-linux-2.6.16.29/drivers/char/tpm/tpm_nsc.c 2006-09-12 19:02:10.000000000 +0100
-+++ ./drivers/char/tpm/tpm_nsc.c 2006-09-19 14:05:52.000000000 +0100
-@@ -71,7 +71,7 @@ static int wait_for_stat(struct tpm_chip
- unsigned long stop;
-
- /* status immediately available check */
-- *data = inb(chip->vendor->base + NSC_STATUS);
-+ *data = inb(chip->vendor.base + NSC_STATUS);
- if ((*data & mask) == val)
- return 0;
-
-@@ -79,7 +79,7 @@ static int wait_for_stat(struct tpm_chip
- stop = jiffies + 10 * HZ;
- do {
- msleep(TPM_TIMEOUT);
-- *data = inb(chip->vendor->base + 1);
-+ *data = inb(chip->vendor.base + 1);
- if ((*data & mask) == val)
- return 0;
- }
-@@ -94,9 +94,9 @@ static int nsc_wait_for_ready(struct tpm
- unsigned long stop;
-
- /* status immediately available check */
-- status = inb(chip->vendor->base + NSC_STATUS);
-+ status = inb(chip->vendor.base + NSC_STATUS);
- if (status & NSC_STATUS_OBF)
-- status = inb(chip->vendor->base + NSC_DATA);
-+ status = inb(chip->vendor.base + NSC_DATA);
- if (status & NSC_STATUS_RDY)
- return 0;
-
-@@ -104,9 +104,9 @@ static int nsc_wait_for_ready(struct tpm
- stop = jiffies + 100;
- do {
- msleep(TPM_TIMEOUT);
-- status = inb(chip->vendor->base + NSC_STATUS);
-+ status = inb(chip->vendor.base + NSC_STATUS);
- if (status & NSC_STATUS_OBF)
-- status = inb(chip->vendor->base + NSC_DATA);
-+ status = inb(chip->vendor.base + NSC_DATA);
- if (status & NSC_STATUS_RDY)
- return 0;
- }
-@@ -132,7 +132,7 @@ static int tpm_nsc_recv(struct tpm_chip
- return -EIO;
- }
- if ((data =
-- inb(chip->vendor->base + NSC_DATA)) != NSC_COMMAND_NORMAL) {
-+ inb(chip->vendor.base + NSC_DATA)) != NSC_COMMAND_NORMAL) {
- dev_err(chip->dev, "not in normal mode (0x%x)\n",
- data);
- return -EIO;
-@@ -148,7 +148,7 @@ static int tpm_nsc_recv(struct tpm_chip
- }
- if (data & NSC_STATUS_F0)
- break;
-- *p = inb(chip->vendor->base + NSC_DATA);
-+ *p = inb(chip->vendor.base + NSC_DATA);
- }
-
- if ((data & NSC_STATUS_F0) == 0 &&
-@@ -156,7 +156,7 @@ static int tpm_nsc_recv(struct tpm_chip
- dev_err(chip->dev, "F0 not set\n");
- return -EIO;
- }
-- if ((data = inb(chip->vendor->base + NSC_DATA)) != NSC_COMMAND_EOC) {
-+ if ((data = inb(chip->vendor.base + NSC_DATA)) != NSC_COMMAND_EOC) {
- dev_err(chip->dev,
- "expected end of command(0x%x)\n", data);
- return -EIO;
-@@ -182,7 +182,7 @@ static int tpm_nsc_send(struct tpm_chip
- * fix it. Not sure why this is needed, we followed the flow
- * chart in the manual to the letter.
- */
-- outb(NSC_COMMAND_CANCEL, chip->vendor->base + NSC_COMMAND);
-+ outb(NSC_COMMAND_CANCEL, chip->vendor.base + NSC_COMMAND);
-
- if (nsc_wait_for_ready(chip) != 0)
- return -EIO;
-@@ -192,7 +192,7 @@ static int tpm_nsc_send(struct tpm_chip
- return -EIO;
- }
-
-- outb(NSC_COMMAND_NORMAL, chip->vendor->base + NSC_COMMAND);
-+ outb(NSC_COMMAND_NORMAL, chip->vendor.base + NSC_COMMAND);
- if (wait_for_stat(chip, NSC_STATUS_IBR, NSC_STATUS_IBR, &data) < 0) {
- dev_err(chip->dev, "IBR timeout\n");
- return -EIO;
-@@ -204,26 +204,26 @@ static int tpm_nsc_send(struct tpm_chip
- "IBF timeout (while writing data)\n");
- return -EIO;
- }
-- outb(buf[i], chip->vendor->base + NSC_DATA);
-+ outb(buf[i], chip->vendor.base + NSC_DATA);
- }
-
- if (wait_for_stat(chip, NSC_STATUS_IBF, 0, &data) < 0) {
- dev_err(chip->dev, "IBF timeout\n");
- return -EIO;
- }
-- outb(NSC_COMMAND_EOC, chip->vendor->base + NSC_COMMAND);
-+ outb(NSC_COMMAND_EOC, chip->vendor.base + NSC_COMMAND);
-
- return count;
- }
-
- static void tpm_nsc_cancel(struct tpm_chip *chip)
- {
-- outb(NSC_COMMAND_CANCEL, chip->vendor->base + NSC_COMMAND);
-+ outb(NSC_COMMAND_CANCEL, chip->vendor.base + NSC_COMMAND);
- }
-
- static u8 tpm_nsc_status(struct tpm_chip *chip)
- {
-- return inb(chip->vendor->base + NSC_STATUS);
-+ return inb(chip->vendor.base + NSC_STATUS);
- }
-
- static struct file_operations nsc_ops = {
-@@ -250,7 +250,7 @@ static struct attribute * nsc_attrs[] =
-
- static struct attribute_group nsc_attr_grp = { .attrs = nsc_attrs };
-
--static struct tpm_vendor_specific tpm_nsc = {
-+static const struct tpm_vendor_specific tpm_nsc = {
- .recv = tpm_nsc_recv,
- .send = tpm_nsc_send,
- .cancel = tpm_nsc_cancel,
-@@ -268,7 +268,7 @@ static void __devexit tpm_nsc_remove(str
- {
- struct tpm_chip *chip = dev_get_drvdata(dev);
- if ( chip ) {
-- release_region(chip->vendor->base, 2);
-+ release_region(chip->vendor.base, 2);
- tpm_remove_hardware(chip->dev);
- }
- }
-@@ -286,7 +286,8 @@ static int __init init_nsc(void)
- int rc = 0;
- int lo, hi;
- int nscAddrBase = TPM_ADDR;
--
-+ struct tpm_chip *chip;
-+ unsigned long base;
-
- /* verify that it is a National part (SID) */
- if (tpm_read_index(TPM_ADDR, NSC_SID_INDEX) != 0xEF) {
-@@ -300,7 +301,7 @@ static int __init init_nsc(void)
-
- hi = tpm_read_index(nscAddrBase, TPM_NSC_BASE0_HI);
- lo = tpm_read_index(nscAddrBase, TPM_NSC_BASE0_LO);
-- tpm_nsc.base = (hi<<8) | lo;
-+ base = (hi<<8) | lo;
-
- /* enable the DPM module */
- tpm_write_index(nscAddrBase, NSC_LDC_INDEX, 0x01);
-@@ -320,13 +321,15 @@ static int __init init_nsc(void)
- if ((rc = platform_device_register(pdev)) < 0)
- goto err_free_dev;
-
-- if (request_region(tpm_nsc.base, 2, "tpm_nsc0") == NULL ) {
-+ if (request_region(base, 2, "tpm_nsc0") == NULL ) {
- rc = -EBUSY;
- goto err_unreg_dev;
- }
-
-- if ((rc = tpm_register_hardware(&pdev->dev, &tpm_nsc)) < 0)
-+ if (!(chip = tpm_register_hardware(&pdev->dev, &tpm_nsc))) {
-+ rc = -ENODEV;
- goto err_rel_reg;
-+ }
-
- dev_dbg(&pdev->dev, "NSC TPM detected\n");
- dev_dbg(&pdev->dev,
-@@ -361,10 +364,12 @@ static int __init init_nsc(void)
- "NSC TPM revision %d\n",
- tpm_read_index(nscAddrBase, 0x27) & 0x1F);
-
-+ chip->vendor.base = base;
-+
- return 0;
-
- err_rel_reg:
-- release_region(tpm_nsc.base, 2);
-+ release_region(base, 2);
- err_unreg_dev:
- platform_device_unregister(pdev);
- err_free_dev:
-diff -pruN ../orig-linux-2.6.16.29/drivers/char/tpm/tpm_tis.c ./drivers/char/tpm/tpm_tis.c
---- ../orig-linux-2.6.16.29/drivers/char/tpm/tpm_tis.c 1970-01-01 01:00:00.000000000 +0100
-+++ ./drivers/char/tpm/tpm_tis.c 2006-09-19 14:05:52.000000000 +0100
-@@ -0,0 +1,665 @@
-+/*
-+ * Copyright (C) 2005, 2006 IBM Corporation
-+ *
-+ * Authors:
-+ * Leendert van Doorn <leendert@watson.ibm.com>
-+ * Kylene Hall <kjhall@us.ibm.com>
-+ *
-+ * Device driver for TCG/TCPA TPM (trusted platform module).
-+ * Specifications at www.trustedcomputinggroup.org
-+ *
-+ * This device driver implements the TPM interface as defined in
-+ * the TCG TPM Interface Spec version 1.2, revision 1.0.
-+ *
-+ * This program is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU General Public License as
-+ * published by the Free Software Foundation, version 2 of the
-+ * License.
-+ */
-+#include <linux/init.h>
-+#include <linux/module.h>
-+#include <linux/moduleparam.h>
-+#include <linux/pnp.h>
-+#include <linux/interrupt.h>
-+#include <linux/wait.h>
-+#include "tpm.h"
-+
-+#define TPM_HEADER_SIZE 10
-+
-+enum tis_access {
-+ TPM_ACCESS_VALID = 0x80,
-+ TPM_ACCESS_ACTIVE_LOCALITY = 0x20,
-+ TPM_ACCESS_REQUEST_PENDING = 0x04,
-+ TPM_ACCESS_REQUEST_USE = 0x02,
-+};
-+
-+enum tis_status {
-+ TPM_STS_VALID = 0x80,
-+ TPM_STS_COMMAND_READY = 0x40,
-+ TPM_STS_GO = 0x20,
-+ TPM_STS_DATA_AVAIL = 0x10,
-+ TPM_STS_DATA_EXPECT = 0x08,
-+};
-+
-+enum tis_int_flags {
-+ TPM_GLOBAL_INT_ENABLE = 0x80000000,
-+ TPM_INTF_BURST_COUNT_STATIC = 0x100,
-+ TPM_INTF_CMD_READY_INT = 0x080,
-+ TPM_INTF_INT_EDGE_FALLING = 0x040,
-+ TPM_INTF_INT_EDGE_RISING = 0x020,
-+ TPM_INTF_INT_LEVEL_LOW = 0x010,
-+ TPM_INTF_INT_LEVEL_HIGH = 0x008,
-+ TPM_INTF_LOCALITY_CHANGE_INT = 0x004,
-+ TPM_INTF_STS_VALID_INT = 0x002,
-+ TPM_INTF_DATA_AVAIL_INT = 0x001,
-+};
-+
-+enum tis_defaults {
-+ TIS_MEM_BASE = 0xFED40000,
-+ TIS_MEM_LEN = 0x5000,
-+ TIS_SHORT_TIMEOUT = 750, /* ms */
-+ TIS_LONG_TIMEOUT = 2000, /* 2 sec */
-+};
-+
-+#define TPM_ACCESS(l) (0x0000 | ((l) << 12))
-+#define TPM_INT_ENABLE(l) (0x0008 | ((l) << 12))
-+#define TPM_INT_VECTOR(l) (0x000C | ((l) << 12))
-+#define TPM_INT_STATUS(l) (0x0010 | ((l) << 12))
-+#define TPM_INTF_CAPS(l) (0x0014 | ((l) << 12))
-+#define TPM_STS(l) (0x0018 | ((l) << 12))
-+#define TPM_DATA_FIFO(l) (0x0024 | ((l) << 12))
-+
-+#define TPM_DID_VID(l) (0x0F00 | ((l) << 12))
-+#define TPM_RID(l) (0x0F04 | ((l) << 12))
-+
-+static LIST_HEAD(tis_chips);
-+static DEFINE_SPINLOCK(tis_lock);
-+
-+static int check_locality(struct tpm_chip *chip, int l)
-+{
-+ if ((ioread8(chip->vendor.iobase + TPM_ACCESS(l)) &
-+ (TPM_ACCESS_ACTIVE_LOCALITY | TPM_ACCESS_VALID)) ==
-+ (TPM_ACCESS_ACTIVE_LOCALITY | TPM_ACCESS_VALID))
-+ return chip->vendor.locality = l;
-+
-+ return -1;
-+}
-+
-+static void release_locality(struct tpm_chip *chip, int l, int force)
-+{
-+ if (force || (ioread8(chip->vendor.iobase + TPM_ACCESS(l)) &
-+ (TPM_ACCESS_REQUEST_PENDING | TPM_ACCESS_VALID)) ==
-+ (TPM_ACCESS_REQUEST_PENDING | TPM_ACCESS_VALID))
-+ iowrite8(TPM_ACCESS_ACTIVE_LOCALITY,
-+ chip->vendor.iobase + TPM_ACCESS(l));
-+}
-+
-+static int request_locality(struct tpm_chip *chip, int l)
-+{
-+ unsigned long stop;
-+ long rc;
-+
-+ if (check_locality(chip, l) >= 0)
-+ return l;
-+
-+ iowrite8(TPM_ACCESS_REQUEST_USE,
-+ chip->vendor.iobase + TPM_ACCESS(l));
-+
-+ if (chip->vendor.irq) {
-+ rc = wait_event_interruptible_timeout(chip->vendor.int_queue,
-+ (check_locality
-+ (chip, l) >= 0),
-+ chip->vendor.timeout_a);
-+ if (rc > 0)
-+ return l;
-+
-+ } else {
-+ /* wait for burstcount */
-+ stop = jiffies + chip->vendor.timeout_a;
-+ do {
-+ if (check_locality(chip, l) >= 0)
-+ return l;
-+ msleep(TPM_TIMEOUT);
-+ }
-+ while (time_before(jiffies, stop));
-+ }
-+ return -1;
-+}
-+
-+static u8 tpm_tis_status(struct tpm_chip *chip)
-+{
-+ return ioread8(chip->vendor.iobase +
-+ TPM_STS(chip->vendor.locality));
-+}
-+
-+static void tpm_tis_ready(struct tpm_chip *chip)
-+{
-+ /* this causes the current command to be aborted */
-+ iowrite8(TPM_STS_COMMAND_READY,
-+ chip->vendor.iobase + TPM_STS(chip->vendor.locality));
-+}
-+
-+static int get_burstcount(struct tpm_chip *chip)
-+{
-+ unsigned long stop;
-+ int burstcnt;
-+
-+ /* wait for burstcount */
-+ /* which timeout value, spec has 2 answers (c & d) */
-+ stop = jiffies + chip->vendor.timeout_d;
-+ do {
-+ burstcnt = ioread8(chip->vendor.iobase +
-+ TPM_STS(chip->vendor.locality) + 1);
-+ burstcnt += ioread8(chip->vendor.iobase +
-+ TPM_STS(chip->vendor.locality) +
-+ 2) << 8;
-+ if (burstcnt)
-+ return burstcnt;
-+ msleep(TPM_TIMEOUT);
-+ } while (time_before(jiffies, stop));
-+ return -EBUSY;
-+}
-+
-+static int wait_for_stat(struct tpm_chip *chip, u8 mask, unsigned long timeout,
-+ wait_queue_head_t *queue)
-+{
-+ unsigned long stop;
-+ long rc;
-+ u8 status;
-+
-+ /* check current status */
-+ status = tpm_tis_status(chip);
-+ if ((status & mask) == mask)
-+ return 0;
-+
-+ if (chip->vendor.irq) {
-+ rc = wait_event_interruptible_timeout(*queue,
-+ ((tpm_tis_status
-+ (chip) & mask) ==
-+ mask), timeout);
-+ if (rc > 0)
-+ return 0;
-+ } else {
-+ stop = jiffies + timeout;
-+ do {
-+ msleep(TPM_TIMEOUT);
-+ status = tpm_tis_status(chip);
-+ if ((status & mask) == mask)
-+ return 0;
-+ } while (time_before(jiffies, stop));
-+ }
-+ return -ETIME;
-+}
-+
-+static int recv_data(struct tpm_chip *chip, u8 *buf, size_t count)
-+{
-+ int size = 0, burstcnt;
-+ while (size < count &&
-+ wait_for_stat(chip,
-+ TPM_STS_DATA_AVAIL | TPM_STS_VALID,
-+ chip->vendor.timeout_c,
-+ &chip->vendor.read_queue)
-+ == 0) {
-+ burstcnt = get_burstcount(chip);
-+ for (; burstcnt > 0 && size < count; burstcnt--)
-+ buf[size++] = ioread8(chip->vendor.iobase +
-+ TPM_DATA_FIFO(chip->vendor.
-+ locality));
-+ }
-+ return size;
-+}
-+
-+static int tpm_tis_recv(struct tpm_chip *chip, u8 *buf, size_t count)
-+{
-+ int size = 0;
-+ int expected, status;
-+
-+ if (count < TPM_HEADER_SIZE) {
-+ size = -EIO;
-+ goto out;
-+ }
-+
-+ /* read first 10 bytes, including tag, paramsize, and result */
-+ if ((size =
-+ recv_data(chip, buf, TPM_HEADER_SIZE)) < TPM_HEADER_SIZE) {
-+ dev_err(chip->dev, "Unable to read header\n");
-+ goto out;
-+ }
-+
-+ expected = be32_to_cpu(*(__be32 *) (buf + 2));
-+ if (expected > count) {
-+ size = -EIO;
-+ goto out;
-+ }
-+
-+ if ((size +=
-+ recv_data(chip, &buf[TPM_HEADER_SIZE],
-+ expected - TPM_HEADER_SIZE)) < expected) {
-+ dev_err(chip->dev, "Unable to read remainder of result\n");
-+ size = -ETIME;
-+ goto out;
-+ }
-+
-+ wait_for_stat(chip, TPM_STS_VALID, chip->vendor.timeout_c,
-+ &chip->vendor.int_queue);
-+ status = tpm_tis_status(chip);
-+ if (status & TPM_STS_DATA_AVAIL) { /* retry? */
-+ dev_err(chip->dev, "Error left over data\n");
-+ size = -EIO;
-+ goto out;
-+ }
-+
-+out:
-+ tpm_tis_ready(chip);
-+ release_locality(chip, chip->vendor.locality, 0);
-+ return size;
-+}
-+
-+/*
-+ * If interrupts are used (signaled by an irq set in the vendor structure)
-+ * tpm.c can skip polling for the data to be available as the interrupt is
-+ * waited for here
-+ */
-+static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len)
-+{
-+ int rc, status, burstcnt;
-+ size_t count = 0;
-+ u32 ordinal;
-+
-+ if (request_locality(chip, 0) < 0)
-+ return -EBUSY;
-+
-+ status = tpm_tis_status(chip);
-+ if ((status & TPM_STS_COMMAND_READY) == 0) {
-+ tpm_tis_ready(chip);
-+ if (wait_for_stat
-+ (chip, TPM_STS_COMMAND_READY, chip->vendor.timeout_b,
-+ &chip->vendor.int_queue) < 0) {
-+ rc = -ETIME;
-+ goto out_err;
-+ }
-+ }
-+
-+ while (count < len - 1) {
-+ burstcnt = get_burstcount(chip);
-+ for (; burstcnt > 0 && count < len - 1; burstcnt--) {
-+ iowrite8(buf[count], chip->vendor.iobase +
-+ TPM_DATA_FIFO(chip->vendor.locality));
-+ count++;
-+ }
-+
-+ wait_for_stat(chip, TPM_STS_VALID, chip->vendor.timeout_c,
-+ &chip->vendor.int_queue);
-+ status = tpm_tis_status(chip);
-+ if ((status & TPM_STS_DATA_EXPECT) == 0) {
-+ rc = -EIO;
-+ goto out_err;
-+ }
-+ }
-+
-+ /* write last byte */
-+ iowrite8(buf[count],
-+ chip->vendor.iobase +
-+ TPM_DATA_FIFO(chip->vendor.locality));
-+ wait_for_stat(chip, TPM_STS_VALID, chip->vendor.timeout_c,
-+ &chip->vendor.int_queue);
-+ status = tpm_tis_status(chip);
-+ if ((status & TPM_STS_DATA_EXPECT) != 0) {
-+ rc = -EIO;
-+ goto out_err;
-+ }
-+
-+ /* go and do it */
-+ iowrite8(TPM_STS_GO,
-+ chip->vendor.iobase + TPM_STS(chip->vendor.locality));
-+
-+ if (chip->vendor.irq) {
-+ ordinal = be32_to_cpu(*((__be32 *) (buf + 6)));
-+ if (wait_for_stat
-+ (chip, TPM_STS_DATA_AVAIL | TPM_STS_VALID,
-+ tpm_calc_ordinal_duration(chip, ordinal),
-+ &chip->vendor.read_queue) < 0) {
-+ rc = -ETIME;
-+ goto out_err;
-+ }
-+ }
-+ return len;
-+out_err:
-+ tpm_tis_ready(chip);
-+ release_locality(chip, chip->vendor.locality, 0);
-+ return rc;
-+}
-+
-+static struct file_operations tis_ops = {
-+ .owner = THIS_MODULE,
-+ .llseek = no_llseek,
-+ .open = tpm_open,
-+ .read = tpm_read,
-+ .write = tpm_write,
-+ .release = tpm_release,
-+};
-+
-+static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL);
-+static DEVICE_ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL);
-+static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL);
-+static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
-+static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
-+static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated,
-+ NULL);
-+static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps_1_2, NULL);
-+static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel);
-+
-+static struct attribute *tis_attrs[] = {
-+ &dev_attr_pubek.attr,
-+ &dev_attr_pcrs.attr,
-+ &dev_attr_enabled.attr,
-+ &dev_attr_active.attr,
-+ &dev_attr_owned.attr,
-+ &dev_attr_temp_deactivated.attr,
-+ &dev_attr_caps.attr,
-+ &dev_attr_cancel.attr, NULL,
-+};
-+
-+static struct attribute_group tis_attr_grp = {
-+ .attrs = tis_attrs
-+};
-+
-+static struct tpm_vendor_specific tpm_tis = {
-+ .status = tpm_tis_status,
-+ .recv = tpm_tis_recv,
-+ .send = tpm_tis_send,
-+ .cancel = tpm_tis_ready,
-+ .req_complete_mask = TPM_STS_DATA_AVAIL | TPM_STS_VALID,
-+ .req_complete_val = TPM_STS_DATA_AVAIL | TPM_STS_VALID,
-+ .req_canceled = TPM_STS_COMMAND_READY,
-+ .attr_group = &tis_attr_grp,
-+ .miscdev = {
-+ .fops = &tis_ops,},
-+};
-+
-+static irqreturn_t tis_int_probe(int irq, void *dev_id, struct pt_regs *regs)
-+{
-+ struct tpm_chip *chip = (struct tpm_chip *) dev_id;
-+ u32 interrupt;
-+
-+ interrupt = ioread32(chip->vendor.iobase +
-+ TPM_INT_STATUS(chip->vendor.locality));
-+
-+ if (interrupt == 0)
-+ return IRQ_NONE;
-+
-+ chip->vendor.irq = irq;
-+
-+ /* Clear interrupts handled with TPM_EOI */
-+ iowrite32(interrupt,
-+ chip->vendor.iobase +
-+ TPM_INT_STATUS(chip->vendor.locality));
-+ return IRQ_HANDLED;
-+}
-+
-+static irqreturn_t tis_int_handler(int irq, void *dev_id, struct pt_regs *regs)
-+{
-+ struct tpm_chip *chip = (struct tpm_chip *) dev_id;
-+ u32 interrupt;
-+ int i;
-+
-+ interrupt = ioread32(chip->vendor.iobase +
-+ TPM_INT_STATUS(chip->vendor.locality));
-+
-+ if (interrupt == 0)
-+ return IRQ_NONE;
-+
-+ if (interrupt & TPM_INTF_DATA_AVAIL_INT)
-+ wake_up_interruptible(&chip->vendor.read_queue);
-+ if (interrupt & TPM_INTF_LOCALITY_CHANGE_INT)
-+ for (i = 0; i < 5; i++)
-+ if (check_locality(chip, i) >= 0)
-+ break;
-+ if (interrupt &
-+ (TPM_INTF_LOCALITY_CHANGE_INT | TPM_INTF_STS_VALID_INT |
-+ TPM_INTF_CMD_READY_INT))
-+ wake_up_interruptible(&chip->vendor.int_queue);
-+
-+ /* Clear interrupts handled with TPM_EOI */
-+ iowrite32(interrupt,
-+ chip->vendor.iobase +
-+ TPM_INT_STATUS(chip->vendor.locality));
-+ return IRQ_HANDLED;
-+}
-+
-+static int interrupts = 1;
-+module_param(interrupts, bool, 0444);
-+MODULE_PARM_DESC(interrupts, "Enable interrupts");
-+
-+static int __devinit tpm_tis_pnp_init(struct pnp_dev *pnp_dev,
-+ const struct pnp_device_id *pnp_id)
-+{
-+ u32 vendor, intfcaps, intmask;
-+ int rc, i;
-+ unsigned long start, len;
-+ struct tpm_chip *chip;
-+
-+ start = pnp_mem_start(pnp_dev, 0);
-+ len = pnp_mem_len(pnp_dev, 0);
-+
-+ if (!start)
-+ start = TIS_MEM_BASE;
-+ if (!len)
-+ len = TIS_MEM_LEN;
-+
-+ if (!(chip = tpm_register_hardware(&pnp_dev->dev, &tpm_tis)))
-+ return -ENODEV;
-+
-+ chip->vendor.iobase = ioremap(start, len);
-+ if (!chip->vendor.iobase) {
-+ rc = -EIO;
-+ goto out_err;
-+ }
-+
-+ vendor = ioread32(chip->vendor.iobase + TPM_DID_VID(0));
-+
-+ /* Default timeouts */
-+ chip->vendor.timeout_a = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
-+ chip->vendor.timeout_b = msecs_to_jiffies(TIS_LONG_TIMEOUT);
-+ chip->vendor.timeout_c = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
-+ chip->vendor.timeout_d = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
-+
-+ dev_info(&pnp_dev->dev,
-+ "1.2 TPM (device-id 0x%X, rev-id %d)\n",
-+ vendor >> 16, ioread8(chip->vendor.iobase + TPM_RID(0)));
-+
-+ /* Figure out the capabilities */
-+ intfcaps =
-+ ioread32(chip->vendor.iobase +
-+ TPM_INTF_CAPS(chip->vendor.locality));
-+ dev_dbg(&pnp_dev->dev, "TPM interface capabilities (0x%x):\n",
-+ intfcaps);
-+ if (intfcaps & TPM_INTF_BURST_COUNT_STATIC)
-+ dev_dbg(&pnp_dev->dev, "\tBurst Count Static\n");
-+ if (intfcaps & TPM_INTF_CMD_READY_INT)
-+ dev_dbg(&pnp_dev->dev, "\tCommand Ready Int Support\n");
-+ if (intfcaps & TPM_INTF_INT_EDGE_FALLING)
-+ dev_dbg(&pnp_dev->dev, "\tInterrupt Edge Falling\n");
-+ if (intfcaps & TPM_INTF_INT_EDGE_RISING)
-+ dev_dbg(&pnp_dev->dev, "\tInterrupt Edge Rising\n");
-+ if (intfcaps & TPM_INTF_INT_LEVEL_LOW)
-+ dev_dbg(&pnp_dev->dev, "\tInterrupt Level Low\n");
-+ if (intfcaps & TPM_INTF_INT_LEVEL_HIGH)
-+ dev_dbg(&pnp_dev->dev, "\tInterrupt Level High\n");
-+ if (intfcaps & TPM_INTF_LOCALITY_CHANGE_INT)
-+ dev_dbg(&pnp_dev->dev, "\tLocality Change Int Support\n");
-+ if (intfcaps & TPM_INTF_STS_VALID_INT)
-+ dev_dbg(&pnp_dev->dev, "\tSts Valid Int Support\n");
-+ if (intfcaps & TPM_INTF_DATA_AVAIL_INT)
-+ dev_dbg(&pnp_dev->dev, "\tData Avail Int Support\n");
-+
-+ if (request_locality(chip, 0) != 0) {
-+ rc = -ENODEV;
-+ goto out_err;
-+ }
-+
-+ /* INTERRUPT Setup */
-+ init_waitqueue_head(&chip->vendor.read_queue);
-+ init_waitqueue_head(&chip->vendor.int_queue);
-+
-+ intmask =
-+ ioread32(chip->vendor.iobase +
-+ TPM_INT_ENABLE(chip->vendor.locality));
-+
-+ intmask |= TPM_INTF_CMD_READY_INT
-+ | TPM_INTF_LOCALITY_CHANGE_INT | TPM_INTF_DATA_AVAIL_INT
-+ | TPM_INTF_STS_VALID_INT;
-+
-+ iowrite32(intmask,
-+ chip->vendor.iobase +
-+ TPM_INT_ENABLE(chip->vendor.locality));
-+ if (interrupts) {
-+ chip->vendor.irq =
-+ ioread8(chip->vendor.iobase +
-+ TPM_INT_VECTOR(chip->vendor.locality));
-+
-+ for (i = 3; i < 16 && chip->vendor.irq == 0; i++) {
-+ iowrite8(i, chip->vendor.iobase +
-+ TPM_INT_VECTOR(chip->vendor.locality));
-+ if (request_irq
-+ (i, tis_int_probe, SA_SHIRQ,
-+ chip->vendor.miscdev.name, chip) != 0) {
-+ dev_info(chip->dev,
-+ "Unable to request irq: %d for probe\n",
-+ i);
-+ continue;
-+ }
-+
-+ /* Clear all existing */
-+ iowrite32(ioread32
-+ (chip->vendor.iobase +
-+ TPM_INT_STATUS(chip->vendor.locality)),
-+ chip->vendor.iobase +
-+ TPM_INT_STATUS(chip->vendor.locality));
-+
-+ /* Turn on */
-+ iowrite32(intmask | TPM_GLOBAL_INT_ENABLE,
-+ chip->vendor.iobase +
-+ TPM_INT_ENABLE(chip->vendor.locality));
-+
-+ /* Generate Interrupts */
-+ tpm_gen_interrupt(chip);
-+
-+ /* Turn off */
-+ iowrite32(intmask,
-+ chip->vendor.iobase +
-+ TPM_INT_ENABLE(chip->vendor.locality));
-+ free_irq(i, chip);
-+ }
-+ }
-+ if (chip->vendor.irq) {
-+ iowrite8(chip->vendor.irq,
-+ chip->vendor.iobase +
-+ TPM_INT_VECTOR(chip->vendor.locality));
-+ if (request_irq
-+ (chip->vendor.irq, tis_int_handler, SA_SHIRQ,
-+ chip->vendor.miscdev.name, chip) != 0) {
-+ dev_info(chip->dev,
-+ "Unable to request irq: %d for use\n",
-+ chip->vendor.irq);
-+ chip->vendor.irq = 0;
-+ } else {
-+ /* Clear all existing */
-+ iowrite32(ioread32
-+ (chip->vendor.iobase +
-+ TPM_INT_STATUS(chip->vendor.locality)),
-+ chip->vendor.iobase +
-+ TPM_INT_STATUS(chip->vendor.locality));
-+
-+ /* Turn on */
-+ iowrite32(intmask | TPM_GLOBAL_INT_ENABLE,
-+ chip->vendor.iobase +
-+ TPM_INT_ENABLE(chip->vendor.locality));
-+ }
-+ }
-+
-+ INIT_LIST_HEAD(&chip->vendor.list);
-+ spin_lock(&tis_lock);
-+ list_add(&chip->vendor.list, &tis_chips);
-+ spin_unlock(&tis_lock);
-+
-+ tpm_get_timeouts(chip);
-+ tpm_continue_selftest(chip);
-+
-+ return 0;
-+out_err:
-+ if (chip->vendor.iobase)
-+ iounmap(chip->vendor.iobase);
-+ tpm_remove_hardware(chip->dev);
-+ return rc;
-+}
-+
-+static int tpm_tis_pnp_suspend(struct pnp_dev *dev, pm_message_t msg)
-+{
-+ return tpm_pm_suspend(&dev->dev, msg);
-+}
-+
-+static int tpm_tis_pnp_resume(struct pnp_dev *dev)
-+{
-+ return tpm_pm_resume(&dev->dev);
-+}
-+
-+static struct pnp_device_id tpm_pnp_tbl[] __devinitdata = {
-+ {"PNP0C31", 0}, /* TPM */
-+ {"ATM1200", 0}, /* Atmel */
-+ {"IFX0102", 0}, /* Infineon */
-+ {"BCM0101", 0}, /* Broadcom */
-+ {"NSC1200", 0}, /* National */
-+ /* Add new here */
-+ {"", 0}, /* User Specified */
-+ {"", 0} /* Terminator */
-+};
-+
-+static struct pnp_driver tis_pnp_driver = {
-+ .name = "tpm_tis",
-+ .id_table = tpm_pnp_tbl,
-+ .probe = tpm_tis_pnp_init,
-+ .suspend = tpm_tis_pnp_suspend,
-+ .resume = tpm_tis_pnp_resume,
-+};
-+
-+#define TIS_HID_USR_IDX sizeof(tpm_pnp_tbl)/sizeof(struct pnp_device_id) -2
-+module_param_string(hid, tpm_pnp_tbl[TIS_HID_USR_IDX].id,
-+ sizeof(tpm_pnp_tbl[TIS_HID_USR_IDX].id), 0444);
-+MODULE_PARM_DESC(hid, "Set additional specific HID for this driver to probe");
-+
-+static int __init init_tis(void)
-+{
-+ return pnp_register_driver(&tis_pnp_driver);
-+}
-+
-+static void __exit cleanup_tis(void)
-+{
-+ struct tpm_vendor_specific *i, *j;
-+ struct tpm_chip *chip;
-+ spin_lock(&tis_lock);
-+ list_for_each_entry_safe(i, j, &tis_chips, list) {
-+ chip = to_tpm_chip(i);
-+ iowrite32(~TPM_GLOBAL_INT_ENABLE &
-+ ioread32(chip->vendor.iobase +
-+ TPM_INT_ENABLE(chip->vendor.
-+ locality)),
-+ chip->vendor.iobase +
-+ TPM_INT_ENABLE(chip->vendor.locality));
-+ release_locality(chip, chip->vendor.locality, 1);
-+ if (chip->vendor.irq)
-+ free_irq(chip->vendor.irq, chip);
-+ iounmap(i->iobase);
-+ list_del(&i->list);
-+ tpm_remove_hardware(chip->dev);
-+ }
-+ spin_unlock(&tis_lock);
-+ pnp_unregister_driver(&tis_pnp_driver);
-+}
-+
-+module_init(init_tis);
-+module_exit(cleanup_tis);
-+MODULE_AUTHOR("Leendert van Doorn (leendert@watson.ibm.com)");
-+MODULE_DESCRIPTION("TPM Driver");
-+MODULE_VERSION("2.0");
-+MODULE_LICENSE("GPL");
+++ /dev/null
-commit f796937a062c7aeb44cd0e75e1586c8543634a7d
-Author: Jeremy Fitzhardinge <jeremy@xensource.com>
-Date: Sun Jun 25 05:49:17 2006 -0700
-
- [PATCH] Fix bounds check in vsnprintf, to allow for a 0 size and NULL buffer
-
- This change allows callers to use a 0-byte buffer and a NULL buffer pointer
- with vsnprintf, so it can be used to determine how large the resulting
- formatted string will be.
-
- Previously the code effectively treated a size of 0 as a size of 4G (on
- 32-bit systems), with other checks preventing it from actually trying to
- emit the string - but the terminal \0 would still be written, which would
- crash if the buffer is NULL.
-
- This change changes the boundary check so that 'end' points to the putative
- location of the terminal '\0', which is only written if size > 0.
-
- vsnprintf still allows the buffer size to be set very large, to allow
- unbounded buffer sizes (to implement sprintf, etc).
-
- [akpm@osdl.org: fix long-vs-longlong confusion]
- Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
- Signed-off-by: Chris Wright <chrisw@sous-sol.org>
- Signed-off-by: Andrew Morton <akpm@osdl.org>
- Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-
-diff --git a/lib/vsprintf.c b/lib/vsprintf.c
-index b07db5c..f595947 100644
---- a/lib/vsprintf.c
-+++ b/lib/vsprintf.c
-@@ -187,49 +187,49 @@ static char * number(char * buf, char *
- size -= precision;
- if (!(type&(ZEROPAD+LEFT))) {
- while(size-->0) {
-- if (buf <= end)
-+ if (buf < end)
- *buf = ' ';
- ++buf;
- }
- }
- if (sign) {
-- if (buf <= end)
-+ if (buf < end)
- *buf = sign;
- ++buf;
- }
- if (type & SPECIAL) {
- if (base==8) {
-- if (buf <= end)
-+ if (buf < end)
- *buf = '0';
- ++buf;
- } else if (base==16) {
-- if (buf <= end)
-+ if (buf < end)
- *buf = '0';
- ++buf;
-- if (buf <= end)
-+ if (buf < end)
- *buf = digits[33];
- ++buf;
- }
- }
- if (!(type & LEFT)) {
- while (size-- > 0) {
-- if (buf <= end)
-+ if (buf < end)
- *buf = c;
- ++buf;
- }
- }
- while (i < precision--) {
-- if (buf <= end)
-+ if (buf < end)
- *buf = '0';
- ++buf;
- }
- while (i-- > 0) {
-- if (buf <= end)
-+ if (buf < end)
- *buf = tmp[i];
- ++buf;
- }
- while (size-- > 0) {
-- if (buf <= end)
-+ if (buf < end)
- *buf = ' ';
- ++buf;
- }
-@@ -272,7 +272,8 @@ int vsnprintf(char *buf, size_t size, co
- /* 'z' changed to 'Z' --davidm 1/25/99 */
- /* 't' added for ptrdiff_t */
-
-- /* Reject out-of-range values early */
-+ /* Reject out-of-range values early. Large positive sizes are
-+ used for unknown buffer sizes. */
- if (unlikely((int) size < 0)) {
- /* There can be only one.. */
- static int warn = 1;
-@@ -282,16 +283,17 @@ int vsnprintf(char *buf, size_t size, co
- }
-
- str = buf;
-- end = buf + size - 1;
-+ end = buf + size;
-
-- if (end < buf - 1) {
-- end = ((void *) -1);
-- size = end - buf + 1;
-+ /* Make sure end is always >= buf */
-+ if (end < buf) {
-+ end = ((void *)-1);
-+ size = end - buf;
- }
-
- for (; *fmt ; ++fmt) {
- if (*fmt != '%') {
-- if (str <= end)
-+ if (str < end)
- *str = *fmt;
- ++str;
- continue;
-@@ -357,17 +359,17 @@ int vsnprintf(char *buf, size_t size, co
- case 'c':
- if (!(flags & LEFT)) {
- while (--field_width > 0) {
-- if (str <= end)
-+ if (str < end)
- *str = ' ';
- ++str;
- }
- }
- c = (unsigned char) va_arg(args, int);
-- if (str <= end)
-+ if (str < end)
- *str = c;
- ++str;
- while (--field_width > 0) {
-- if (str <= end)
-+ if (str < end)
- *str = ' ';
- ++str;
- }
-@@ -382,18 +384,18 @@ int vsnprintf(char *buf, size_t size, co
-
- if (!(flags & LEFT)) {
- while (len < field_width--) {
-- if (str <= end)
-+ if (str < end)
- *str = ' ';
- ++str;
- }
- }
- for (i = 0; i < len; ++i) {
-- if (str <= end)
-+ if (str < end)
- *str = *s;
- ++str; ++s;
- }
- while (len < field_width--) {
-- if (str <= end)
-+ if (str < end)
- *str = ' ';
- ++str;
- }
-@@ -426,7 +428,7 @@ int vsnprintf(char *buf, size_t size, co
- continue;
-
- case '%':
-- if (str <= end)
-+ if (str < end)
- *str = '%';
- ++str;
- continue;
-@@ -449,11 +451,11 @@ int vsnprintf(char *buf, size_t size, co
- break;
-
- default:
-- if (str <= end)
-+ if (str < end)
- *str = '%';
- ++str;
- if (*fmt) {
-- if (str <= end)
-+ if (str < end)
- *str = *fmt;
- ++str;
- } else {
-@@ -483,14 +485,13 @@ int vsnprintf(char *buf, size_t size, co
- str = number(str, end, num, base,
- field_width, precision, flags);
- }
-- if (str <= end)
-- *str = '\0';
-- else if (size > 0)
-- /* don't write out a null byte if the buf size is zero */
-- *end = '\0';
-- /* the trailing null byte doesn't count towards the total
-- * ++str;
-- */
-+ if (size > 0) {
-+ if (str < end)
-+ *str = '\0';
-+ else
-+ end[-1] = '\0';
-+ }
-+ /* the trailing null byte doesn't count towards the total */
- return str-buf;
- }
-
+++ /dev/null
-diff -pruN ../orig-linux-2.6.16.29/include/linux/elfnote.h ./include/linux/elfnote.h
---- ../orig-linux-2.6.16.29/include/linux/elfnote.h 2006-09-19 14:06:10.000000000 +0100
-+++ ./include/linux/elfnote.h 2006-09-19 14:06:20.000000000 +0100
-@@ -31,22 +31,24 @@
- /*
- * Generate a structure with the same shape as Elf{32,64}_Nhdr (which
- * turn out to be the same size and shape), followed by the name and
-- * desc data with appropriate padding. The 'desc' argument includes
-- * the assembler pseudo op defining the type of the data: .asciz
-- * "hello, world"
-+ * desc data with appropriate padding. The 'desctype' argument is the
-+ * assembler pseudo op defining the type of the data e.g. .asciz while
-+ * 'descdata' is the data itself e.g. "hello, world".
-+ *
-+ * e.g. ELFNOTE(XYZCo, 42, .asciz, "forty-two")
-+ * ELFNOTE(XYZCo, 12, .long, 0xdeadbeef)
- */
--.macro ELFNOTE name type desc:vararg
--.pushsection ".note.\name"
-- .align 4
-- .long 2f - 1f /* namesz */
-- .long 4f - 3f /* descsz */
-- .long \type
--1:.asciz "\name"
--2:.align 4
--3:\desc
--4:.align 4
--.popsection
--.endm
-+#define ELFNOTE(name, type, desctype, descdata) \
-+.pushsection .note.name ; \
-+ .align 4 ; \
-+ .long 2f - 1f /* namesz */ ; \
-+ .long 4f - 3f /* descsz */ ; \
-+ .long type ; \
-+1:.asciz "name" ; \
-+2:.align 4 ; \
-+3:desctype descdata ; \
-+4:.align 4 ; \
-+.popsection ;
- #else /* !__ASSEMBLER__ */
- #include <linux/elf.h>
- /*
+++ /dev/null
-diff -pruN ../orig-linux-2.6.16.29/arch/i386/kernel/entry.S ./arch/i386/kernel/entry.S
---- ../orig-linux-2.6.16.29/arch/i386/kernel/entry.S 2006-09-19 14:05:44.000000000 +0100
-+++ ./arch/i386/kernel/entry.S 2006-09-19 14:05:56.000000000 +0100
-@@ -406,7 +406,7 @@ vector=0
- ENTRY(irq_entries_start)
- .rept NR_IRQS
- ALIGN
--1: pushl $vector-256
-+1: pushl $~(vector)
- jmp common_interrupt
- .data
- .long 1b
-@@ -423,7 +423,7 @@ common_interrupt:
-
- #define BUILD_INTERRUPT(name, nr) \
- ENTRY(name) \
-- pushl $nr-256; \
-+ pushl $~(nr); \
- SAVE_ALL \
- movl %esp,%eax; \
- call smp_/**/name; \
-diff -pruN ../orig-linux-2.6.16.29/arch/i386/kernel/irq.c ./arch/i386/kernel/irq.c
---- ../orig-linux-2.6.16.29/arch/i386/kernel/irq.c 2006-09-12 19:02:10.000000000 +0100
-+++ ./arch/i386/kernel/irq.c 2006-09-19 14:05:56.000000000 +0100
-@@ -53,8 +53,8 @@ static union irq_ctx *softirq_ctx[NR_CPU
- */
- fastcall unsigned int do_IRQ(struct pt_regs *regs)
- {
-- /* high bits used in ret_from_ code */
-- int irq = regs->orig_eax & 0xff;
-+ /* high bit used in ret_from_ code */
-+ int irq = ~regs->orig_eax;
- #ifdef CONFIG_4KSTACKS
- union irq_ctx *curctx, *irqctx;
- u32 *isp;
-diff -pruN ../orig-linux-2.6.16.29/arch/x86_64/kernel/entry.S ./arch/x86_64/kernel/entry.S
---- ../orig-linux-2.6.16.29/arch/x86_64/kernel/entry.S 2006-09-12 19:02:10.000000000 +0100
-+++ ./arch/x86_64/kernel/entry.S 2006-09-19 14:05:56.000000000 +0100
-@@ -596,7 +596,7 @@ retint_kernel:
- */
- .macro apicinterrupt num,func
- INTR_FRAME
-- pushq $\num-256
-+ pushq $~(\num)
- CFI_ADJUST_CFA_OFFSET 8
- interrupt \func
- jmp ret_from_intr
-diff -pruN ../orig-linux-2.6.16.29/arch/x86_64/kernel/irq.c ./arch/x86_64/kernel/irq.c
---- ../orig-linux-2.6.16.29/arch/x86_64/kernel/irq.c 2006-09-12 19:02:10.000000000 +0100
-+++ ./arch/x86_64/kernel/irq.c 2006-09-19 14:05:56.000000000 +0100
-@@ -96,8 +96,8 @@ skip:
- */
- asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
- {
-- /* high bits used in ret_from_ code */
-- unsigned irq = regs->orig_rax & 0xff;
-+ /* high bit used in ret_from_ code */
-+ unsigned irq = ~regs->orig_rax;
-
- exit_idle();
- irq_enter();
-diff -pruN ../orig-linux-2.6.16.29/arch/x86_64/kernel/smp.c ./arch/x86_64/kernel/smp.c
---- ../orig-linux-2.6.16.29/arch/x86_64/kernel/smp.c 2006-09-12 19:02:10.000000000 +0100
-+++ ./arch/x86_64/kernel/smp.c 2006-09-19 14:05:56.000000000 +0100
-@@ -135,10 +135,10 @@ asmlinkage void smp_invalidate_interrupt
-
- cpu = smp_processor_id();
- /*
-- * orig_rax contains the interrupt vector - 256.
-+ * orig_rax contains the negated interrupt vector.
- * Use that to determine where the sender put the data.
- */
-- sender = regs->orig_rax + 256 - INVALIDATE_TLB_VECTOR_START;
-+ sender = ~regs->orig_rax - INVALIDATE_TLB_VECTOR_START;
- f = &per_cpu(flush_state, sender);
-
- if (!cpu_isset(cpu, f->flush_cpumask))
-diff -pruN ../orig-linux-2.6.16.29/include/asm-x86_64/hw_irq.h ./include/asm-x86_64/hw_irq.h
---- ../orig-linux-2.6.16.29/include/asm-x86_64/hw_irq.h 2006-09-12 19:02:10.000000000 +0100
-+++ ./include/asm-x86_64/hw_irq.h 2006-09-19 14:05:56.000000000 +0100
-@@ -127,7 +127,7 @@ asmlinkage void IRQ_NAME(nr); \
- __asm__( \
- "\n.p2align\n" \
- "IRQ" #nr "_interrupt:\n\t" \
-- "push $" #nr "-256 ; " \
-+ "push $~(" #nr ") ; " \
- "jmp common_interrupt");
-
- #if defined(CONFIG_X86_IO_APIC)
+++ /dev/null
-diff -pruN ../orig-linux-2.6.16.29/arch/i386/kernel/vmlinux.lds.S ./arch/i386/kernel/vmlinux.lds.S
---- ../orig-linux-2.6.16.29/arch/i386/kernel/vmlinux.lds.S 2006-09-19 14:05:48.000000000 +0100
-+++ ./arch/i386/kernel/vmlinux.lds.S 2006-09-19 14:06:10.000000000 +0100
-@@ -12,6 +12,12 @@ OUTPUT_FORMAT("elf32-i386", "elf32-i386"
- OUTPUT_ARCH(i386)
- ENTRY(phys_startup_32)
- jiffies = jiffies_64;
-+
-+PHDRS {
-+ text PT_LOAD FLAGS(5); /* R_E */
-+ data PT_LOAD FLAGS(7); /* RWE */
-+ note PT_NOTE FLAGS(4); /* R__ */
-+}
- SECTIONS
- {
- . = __KERNEL_START;
-@@ -25,7 +31,7 @@ SECTIONS
- KPROBES_TEXT
- *(.fixup)
- *(.gnu.warning)
-- } = 0x9090
-+ } :text = 0x9090
-
- _etext = .; /* End of text section */
-
-@@ -47,7 +53,7 @@ SECTIONS
- .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */
- *(.data)
- CONSTRUCTORS
-- }
-+ } :data
-
- . = ALIGN(4096);
- __nosave_begin = .;
-@@ -154,4 +160,6 @@ SECTIONS
- STABS_DEBUG
-
- DWARF_DEBUG
-+
-+ NOTES
- }
-diff -pruN ../orig-linux-2.6.16.29/include/asm-generic/vmlinux.lds.h ./include/asm-generic/vmlinux.lds.h
---- ../orig-linux-2.6.16.29/include/asm-generic/vmlinux.lds.h 2006-09-12 19:02:10.000000000 +0100
-+++ ./include/asm-generic/vmlinux.lds.h 2006-09-19 14:06:10.000000000 +0100
-@@ -152,3 +152,6 @@
- .stab.index 0 : { *(.stab.index) } \
- .stab.indexstr 0 : { *(.stab.indexstr) } \
- .comment 0 : { *(.comment) }
-+
-+#define NOTES \
-+ .notes : { *(.note.*) } :note
-diff -pruN ../orig-linux-2.6.16.29/include/linux/elfnote.h ./include/linux/elfnote.h
---- ../orig-linux-2.6.16.29/include/linux/elfnote.h 1970-01-01 01:00:00.000000000 +0100
-+++ ./include/linux/elfnote.h 2006-09-19 14:06:10.000000000 +0100
-@@ -0,0 +1,88 @@
-+#ifndef _LINUX_ELFNOTE_H
-+#define _LINUX_ELFNOTE_H
-+/*
-+ * Helper macros to generate ELF Note structures, which are put into a
-+ * PT_NOTE segment of the final vmlinux image. These are useful for
-+ * including name-value pairs of metadata into the kernel binary (or
-+ * modules?) for use by external programs.
-+ *
-+ * Each note has three parts: a name, a type and a desc. The name is
-+ * intended to distinguish the note's originator, so it would be a
-+ * company, project, subsystem, etc; it must be in a suitable form for
-+ * use in a section name. The type is an integer which is used to tag
-+ * the data, and is considered to be within the "name" namespace (so
-+ * "FooCo"'s type 42 is distinct from "BarProj"'s type 42). The
-+ * "desc" field is the actual data. There are no constraints on the
-+ * desc field's contents, though typically they're fairly small.
-+ *
-+ * All notes from a given NAME are put into a section named
-+ * .note.NAME. When the kernel image is finally linked, all the notes
-+ * are packed into a single .notes section, which is mapped into the
-+ * PT_NOTE segment. Because notes for a given name are grouped into
-+ * the same section, they'll all be adjacent the output file.
-+ *
-+ * This file defines macros for both C and assembler use. Their
-+ * syntax is slightly different, but they're semantically similar.
-+ *
-+ * See the ELF specification for more detail about ELF notes.
-+ */
-+
-+#ifdef __ASSEMBLER__
-+/*
-+ * Generate a structure with the same shape as Elf{32,64}_Nhdr (which
-+ * turn out to be the same size and shape), followed by the name and
-+ * desc data with appropriate padding. The 'desc' argument includes
-+ * the assembler pseudo op defining the type of the data: .asciz
-+ * "hello, world"
-+ */
-+.macro ELFNOTE name type desc:vararg
-+.pushsection ".note.\name"
-+ .align 4
-+ .long 2f - 1f /* namesz */
-+ .long 4f - 3f /* descsz */
-+ .long \type
-+1:.asciz "\name"
-+2:.align 4
-+3:\desc
-+4:.align 4
-+.popsection
-+.endm
-+#else /* !__ASSEMBLER__ */
-+#include <linux/elf.h>
-+/*
-+ * Use an anonymous structure which matches the shape of
-+ * Elf{32,64}_Nhdr, but includes the name and desc data. The size and
-+ * type of name and desc depend on the macro arguments. "name" must
-+ * be a literal string, and "desc" must be passed by value. You may
-+ * only define one note per line, since __LINE__ is used to generate
-+ * unique symbols.
-+ */
-+#define _ELFNOTE_PASTE(a,b) a##b
-+#define _ELFNOTE(size, name, unique, type, desc) \
-+ static const struct { \
-+ struct elf##size##_note _nhdr; \
-+ unsigned char _name[sizeof(name)] \
-+ __attribute__((aligned(sizeof(Elf##size##_Word)))); \
-+ typeof(desc) _desc \
-+ __attribute__((aligned(sizeof(Elf##size##_Word)))); \
-+ } _ELFNOTE_PASTE(_note_, unique) \
-+ __attribute_used__ \
-+ __attribute__((section(".note." name), \
-+ aligned(sizeof(Elf##size##_Word)), \
-+ unused)) = { \
-+ { \
-+ sizeof(name), \
-+ sizeof(desc), \
-+ type, \
-+ }, \
-+ name, \
-+ desc \
-+ }
-+#define ELFNOTE(size, name, type, desc) \
-+ _ELFNOTE(size, name, __LINE__, type, desc)
-+
-+#define ELFNOTE32(name, type, desc) ELFNOTE(32, name, type, desc)
-+#define ELFNOTE64(name, type, desc) ELFNOTE(64, name, type, desc)
-+#endif /* __ASSEMBLER__ */
-+
-+#endif /* _LINUX_ELFNOTE_H */
+++ /dev/null
-diff -pruN ../orig-linux-2.6.16.29/arch/x86_64/kernel/vmlinux.lds.S ./arch/x86_64/kernel/vmlinux.lds.S
---- ../orig-linux-2.6.16.29/arch/x86_64/kernel/vmlinux.lds.S 2006-09-12 19:02:10.000000000 +0100
-+++ ./arch/x86_64/kernel/vmlinux.lds.S 2006-09-19 14:06:15.000000000 +0100
-@@ -14,6 +14,12 @@ OUTPUT_FORMAT("elf64-x86-64", "elf64-x86
- OUTPUT_ARCH(i386:x86-64)
- ENTRY(phys_startup_64)
- jiffies_64 = jiffies;
-+PHDRS {
-+ text PT_LOAD FLAGS(5); /* R_E */
-+ data PT_LOAD FLAGS(7); /* RWE */
-+ user PT_LOAD FLAGS(7); /* RWE */
-+ note PT_NOTE FLAGS(4); /* R__ */
-+}
- SECTIONS
- {
- . = __START_KERNEL;
-@@ -26,7 +32,7 @@ SECTIONS
- KPROBES_TEXT
- *(.fixup)
- *(.gnu.warning)
-- } = 0x9090
-+ } :text = 0x9090
- /* out-of-line lock text */
- .text.lock : AT(ADDR(.text.lock) - LOAD_OFFSET) { *(.text.lock) }
-
-@@ -43,17 +49,10 @@ SECTIONS
- .data : AT(ADDR(.data) - LOAD_OFFSET) {
- *(.data)
- CONSTRUCTORS
-- }
-+ } :data
-
- _edata = .; /* End of data section */
-
-- __bss_start = .; /* BSS */
-- .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
-- *(.bss.page_aligned)
-- *(.bss)
-- }
-- __bss_stop = .;
--
- . = ALIGN(PAGE_SIZE);
- . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
- .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
-@@ -75,7 +74,7 @@ SECTIONS
- #define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
-
- . = VSYSCALL_ADDR;
-- .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { *(.vsyscall_0) }
-+ .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { *(.vsyscall_0) } :user
- __vsyscall_0 = VSYSCALL_VIRT_ADDR;
-
- . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
-@@ -118,7 +117,7 @@ SECTIONS
- . = ALIGN(8192); /* init_task */
- .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
- *(.data.init_task)
-- }
-+ } :data
-
- . = ALIGN(4096);
- .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
-@@ -188,6 +187,14 @@ SECTIONS
- . = ALIGN(4096);
- __nosave_end = .;
-
-+ __bss_start = .; /* BSS */
-+ . = ALIGN(4096);
-+ .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
-+ *(.bss.page_aligned)
-+ *(.bss)
-+ }
-+ __bss_stop = .;
-+
- _end = . ;
-
- /* Sections to be discarded */
-@@ -201,4 +208,6 @@ SECTIONS
- STABS_DEBUG
-
- DWARF_DEBUG
-+
-+ NOTES
- }
+++ /dev/null
-diff -pruN ../orig-linux-2.6.16.29/fs/proc/proc_misc.c ./fs/proc/proc_misc.c
---- ../orig-linux-2.6.16.29/fs/proc/proc_misc.c 2006-09-12 19:02:10.000000000 +0100
-+++ ./fs/proc/proc_misc.c 2006-09-19 14:06:00.000000000 +0100
-@@ -433,7 +433,7 @@ static int show_stat(struct seq_file *p,
- (unsigned long long)cputime64_to_clock_t(irq),
- (unsigned long long)cputime64_to_clock_t(softirq),
- (unsigned long long)cputime64_to_clock_t(steal));
-- for_each_online_cpu(i) {
-+ for_each_cpu(i) {
-
- /* Copy values here to work around gcc-2.95.3, gcc-2.96 */
- user = kstat_cpu(i).cpustat.user;
+++ /dev/null
-diff -pruN ../orig-linux-2.6.16.29/drivers/oprofile/buffer_sync.c ./drivers/oprofile/buffer_sync.c
---- ../orig-linux-2.6.16.29/drivers/oprofile/buffer_sync.c 2006-11-06 14:46:52.000000000 -0800
-+++ ./drivers/oprofile/buffer_sync.c 2006-11-06 15:16:52.000000000 -0800
-@@ -6,6 +6,10 @@
- *
- * @author John Levon <levon@movementarian.org>
- *
-+ * Modified by Aravind Menon for Xen
-+ * These modifications are:
-+ * Copyright (C) 2005 Hewlett-Packard Co.
-+ *
- * This is the core of the buffer management. Each
- * CPU buffer is processed and entered into the
- * global event buffer. Such processing is necessary
-@@ -38,6 +42,7 @@ static cpumask_t marked_cpus = CPU_MASK_
- static DEFINE_SPINLOCK(task_mortuary);
- static void process_task_mortuary(void);
-
-+static int cpu_current_domain[NR_CPUS];
-
- /* Take ownership of the task struct and place it on the
- * list for processing. Only after two full buffer syncs
-@@ -146,6 +151,11 @@ static void end_sync(void)
- int sync_start(void)
- {
- int err;
-+ int i;
-+
-+ for (i = 0; i < NR_CPUS; i++) {
-+ cpu_current_domain[i] = COORDINATOR_DOMAIN;
-+ }
-
- start_cpu_work();
-
-@@ -275,15 +285,31 @@ static void add_cpu_switch(int i)
- last_cookie = INVALID_COOKIE;
- }
-
--static void add_kernel_ctx_switch(unsigned int in_kernel)
-+static void add_cpu_mode_switch(unsigned int cpu_mode)
- {
- add_event_entry(ESCAPE_CODE);
-- if (in_kernel)
-- add_event_entry(KERNEL_ENTER_SWITCH_CODE);
-- else
-- add_event_entry(KERNEL_EXIT_SWITCH_CODE);
-+ switch (cpu_mode) {
-+ case CPU_MODE_USER:
-+ add_event_entry(USER_ENTER_SWITCH_CODE);
-+ break;
-+ case CPU_MODE_KERNEL:
-+ add_event_entry(KERNEL_ENTER_SWITCH_CODE);
-+ break;
-+ case CPU_MODE_XEN:
-+ add_event_entry(XEN_ENTER_SWITCH_CODE);
-+ break;
-+ default:
-+ break;
-+ }
- }
--
-+
-+static void add_domain_switch(unsigned long domain_id)
-+{
-+ add_event_entry(ESCAPE_CODE);
-+ add_event_entry(DOMAIN_SWITCH_CODE);
-+ add_event_entry(domain_id);
-+}
-+
- static void
- add_user_ctx_switch(struct task_struct const * task, unsigned long cookie)
- {
-@@ -348,9 +374,9 @@ static int add_us_sample(struct mm_struc
- * for later lookup from userspace.
- */
- static int
--add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel)
-+add_sample(struct mm_struct * mm, struct op_sample * s, int cpu_mode)
- {
-- if (in_kernel) {
-+ if (cpu_mode >= CPU_MODE_KERNEL) {
- add_sample_entry(s->eip, s->event);
- return 1;
- } else if (mm) {
-@@ -496,15 +522,21 @@ void sync_buffer(int cpu)
- struct mm_struct *mm = NULL;
- struct task_struct * new;
- unsigned long cookie = 0;
-- int in_kernel = 1;
-+ int cpu_mode = 1;
- unsigned int i;
- sync_buffer_state state = sb_buffer_start;
- unsigned long available;
-+ int domain_switch = 0;
-
- down(&buffer_sem);
-
- add_cpu_switch(cpu);
-
-+ /* We need to assign the first samples in this CPU buffer to the
-+ same domain that we were processing at the last sync_buffer */
-+ if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) {
-+ add_domain_switch(cpu_current_domain[cpu]);
-+ }
- /* Remember, only we can modify tail_pos */
-
- available = get_slots(cpu_buf);
-@@ -512,16 +544,18 @@ void sync_buffer(int cpu)
- for (i = 0; i < available; ++i) {
- struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos];
-
-- if (is_code(s->eip)) {
-- if (s->event <= CPU_IS_KERNEL) {
-- /* kernel/userspace switch */
-- in_kernel = s->event;
-+ if (is_code(s->eip) && !domain_switch) {
-+ if (s->event <= CPU_MODE_XEN) {
-+ /* xen/kernel/userspace switch */
-+ cpu_mode = s->event;
- if (state == sb_buffer_start)
- state = sb_sample_start;
-- add_kernel_ctx_switch(s->event);
-+ add_cpu_mode_switch(s->event);
- } else if (s->event == CPU_TRACE_BEGIN) {
- state = sb_bt_start;
- add_trace_begin();
-+ } else if (s->event == CPU_DOMAIN_SWITCH) {
-+ domain_switch = 1;
- } else {
- struct mm_struct * oldmm = mm;
-
-@@ -535,11 +569,21 @@ void sync_buffer(int cpu)
- add_user_ctx_switch(new, cookie);
- }
- } else {
-- if (state >= sb_bt_start &&
-- !add_sample(mm, s, in_kernel)) {
-- if (state == sb_bt_start) {
-- state = sb_bt_ignore;
-- atomic_inc(&oprofile_stats.bt_lost_no_mapping);
-+ if (domain_switch) {
-+ cpu_current_domain[cpu] = s->eip;
-+ add_domain_switch(s->eip);
-+ domain_switch = 0;
-+ } else {
-+ if (cpu_current_domain[cpu] !=
-+ COORDINATOR_DOMAIN) {
-+ add_sample_entry(s->eip, s->event);
-+ }
-+ else if (state >= sb_bt_start &&
-+ !add_sample(mm, s, cpu_mode)) {
-+ if (state == sb_bt_start) {
-+ state = sb_bt_ignore;
-+ atomic_inc(&oprofile_stats.bt_lost_no_mapping);
-+ }
- }
- }
- }
-@@ -548,6 +592,11 @@ void sync_buffer(int cpu)
- }
- release_mm(mm);
-
-+ /* We reset domain to COORDINATOR at each CPU switch */
-+ if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) {
-+ add_domain_switch(COORDINATOR_DOMAIN);
-+ }
-+
- mark_done(cpu);
-
- up(&buffer_sem);
-diff -pruN ../orig-linux-2.6.16.29/drivers/oprofile/cpu_buffer.c ./drivers/oprofile/cpu_buffer.c
---- ../orig-linux-2.6.16.29/drivers/oprofile/cpu_buffer.c 2006-11-06 14:46:52.000000000 -0800
-+++ ./drivers/oprofile/cpu_buffer.c 2006-11-06 14:47:55.000000000 -0800
-@@ -6,6 +6,10 @@
- *
- * @author John Levon <levon@movementarian.org>
- *
-+ * Modified by Aravind Menon for Xen
-+ * These modifications are:
-+ * Copyright (C) 2005 Hewlett-Packard Co.
-+ *
- * Each CPU has a local buffer that stores PC value/event
- * pairs. We also log context switches when we notice them.
- * Eventually each CPU's buffer is processed into the global
-@@ -34,6 +38,8 @@ static void wq_sync_buffer(void *);
- #define DEFAULT_TIMER_EXPIRE (HZ / 10)
- static int work_enabled;
-
-+static int32_t current_domain = COORDINATOR_DOMAIN;
-+
- void free_cpu_buffers(void)
- {
- int i;
-@@ -58,7 +64,7 @@ int alloc_cpu_buffers(void)
- goto fail;
-
- b->last_task = NULL;
-- b->last_is_kernel = -1;
-+ b->last_cpu_mode = -1;
- b->tracing = 0;
- b->buffer_size = buffer_size;
- b->tail_pos = 0;
-@@ -114,7 +120,7 @@ void cpu_buffer_reset(struct oprofile_cp
- * collected will populate the buffer with proper
- * values to initialize the buffer
- */
-- cpu_buf->last_is_kernel = -1;
-+ cpu_buf->last_cpu_mode = -1;
- cpu_buf->last_task = NULL;
- }
-
-@@ -164,13 +170,13 @@ add_code(struct oprofile_cpu_buffer * bu
- * because of the head/tail separation of the writer and reader
- * of the CPU buffer.
- *
-- * is_kernel is needed because on some architectures you cannot
-+ * cpu_mode is needed because on some architectures you cannot
- * tell if you are in kernel or user space simply by looking at
-- * pc. We tag this in the buffer by generating kernel enter/exit
-- * events whenever is_kernel changes
-+ * pc. We tag this in the buffer by generating kernel/user (and xen)
-+ * enter events whenever cpu_mode changes
- */
- static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc,
-- int is_kernel, unsigned long event)
-+ int cpu_mode, unsigned long event)
- {
- struct task_struct * task;
-
-@@ -181,18 +187,18 @@ static int log_sample(struct oprofile_cp
- return 0;
- }
-
-- is_kernel = !!is_kernel;
--
- task = current;
-
- /* notice a switch from user->kernel or vice versa */
-- if (cpu_buf->last_is_kernel != is_kernel) {
-- cpu_buf->last_is_kernel = is_kernel;
-- add_code(cpu_buf, is_kernel);
-+ if (cpu_buf->last_cpu_mode != cpu_mode) {
-+ cpu_buf->last_cpu_mode = cpu_mode;
-+ add_code(cpu_buf, cpu_mode);
- }
--
-+
- /* notice a task switch */
-- if (cpu_buf->last_task != task) {
-+ /* if not processing other domain samples */
-+ if ((cpu_buf->last_task != task) &&
-+ (current_domain == COORDINATOR_DOMAIN)) {
- cpu_buf->last_task = task;
- add_code(cpu_buf, (unsigned long)task);
- }
-@@ -269,6 +275,25 @@ void oprofile_add_trace(unsigned long pc
- add_sample(cpu_buf, pc, 0);
- }
-
-+int oprofile_add_domain_switch(int32_t domain_id)
-+{
-+ struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()];
-+
-+ /* should have space for switching into and out of domain
-+ (2 slots each) plus one sample and one cpu mode switch */
-+ if (((nr_available_slots(cpu_buf) < 6) &&
-+ (domain_id != COORDINATOR_DOMAIN)) ||
-+ (nr_available_slots(cpu_buf) < 2))
-+ return 0;
-+
-+ add_code(cpu_buf, CPU_DOMAIN_SWITCH);
-+ add_sample(cpu_buf, domain_id, 0);
-+
-+ current_domain = domain_id;
-+
-+ return 1;
-+}
-+
- /*
- * This serves to avoid cpu buffer overflow, and makes sure
- * the task mortuary progresses
-diff -pruN ../orig-linux-2.6.16.29/drivers/oprofile/cpu_buffer.h ./drivers/oprofile/cpu_buffer.h
---- ../orig-linux-2.6.16.29/drivers/oprofile/cpu_buffer.h 2006-11-06 14:46:52.000000000 -0800
-+++ ./drivers/oprofile/cpu_buffer.h 2006-11-06 14:47:55.000000000 -0800
-@@ -36,7 +36,7 @@ struct oprofile_cpu_buffer {
- volatile unsigned long tail_pos;
- unsigned long buffer_size;
- struct task_struct * last_task;
-- int last_is_kernel;
-+ int last_cpu_mode;
- int tracing;
- struct op_sample * buffer;
- unsigned long sample_received;
-@@ -51,7 +51,10 @@ extern struct oprofile_cpu_buffer cpu_bu
- void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf);
-
- /* transient events for the CPU buffer -> event buffer */
--#define CPU_IS_KERNEL 1
--#define CPU_TRACE_BEGIN 2
-+#define CPU_MODE_USER 0
-+#define CPU_MODE_KERNEL 1
-+#define CPU_MODE_XEN 2
-+#define CPU_TRACE_BEGIN 3
-+#define CPU_DOMAIN_SWITCH 4
-
- #endif /* OPROFILE_CPU_BUFFER_H */
-diff -pruN ../orig-linux-2.6.16.29/drivers/oprofile/event_buffer.h ./drivers/oprofile/event_buffer.h
---- ../orig-linux-2.6.16.29/drivers/oprofile/event_buffer.h 2006-11-06 14:46:52.000000000 -0800
-+++ ./drivers/oprofile/event_buffer.h 2006-11-06 14:47:55.000000000 -0800
-@@ -29,15 +29,20 @@ void wake_up_buffer_waiter(void);
- #define CPU_SWITCH_CODE 2
- #define COOKIE_SWITCH_CODE 3
- #define KERNEL_ENTER_SWITCH_CODE 4
--#define KERNEL_EXIT_SWITCH_CODE 5
-+#define USER_ENTER_SWITCH_CODE 5
- #define MODULE_LOADED_CODE 6
- #define CTX_TGID_CODE 7
- #define TRACE_BEGIN_CODE 8
- #define TRACE_END_CODE 9
-+#define XEN_ENTER_SWITCH_CODE 10
-+#define DOMAIN_SWITCH_CODE 11
-
- #define INVALID_COOKIE ~0UL
- #define NO_COOKIE 0UL
-
-+/* Constant used to refer to coordinator domain (Xen) */
-+#define COORDINATOR_DOMAIN -1
-+
- /* add data to the event buffer */
- void add_event_entry(unsigned long data);
-
-diff -pruN ../orig-linux-2.6.16.29/drivers/oprofile/oprof.c ./drivers/oprofile/oprof.c
---- ../orig-linux-2.6.16.29/drivers/oprofile/oprof.c 2006-11-06 14:46:52.000000000 -0800
-+++ ./drivers/oprofile/oprof.c 2006-11-06 14:47:55.000000000 -0800
-@@ -5,6 +5,10 @@
- * @remark Read the file COPYING
- *
- * @author John Levon <levon@movementarian.org>
-+ *
-+ * Modified by Aravind Menon for Xen
-+ * These modifications are:
-+ * Copyright (C) 2005 Hewlett-Packard Co.
- */
-
- #include <linux/kernel.h>
-@@ -19,7 +23,7 @@
- #include "cpu_buffer.h"
- #include "buffer_sync.h"
- #include "oprofile_stats.h"
--
-+
- struct oprofile_operations oprofile_ops;
-
- unsigned long oprofile_started;
-@@ -33,6 +37,32 @@ static DECLARE_MUTEX(start_sem);
- */
- static int timer = 0;
-
-+int oprofile_set_active(int active_domains[], unsigned int adomains)
-+{
-+ int err;
-+
-+ if (!oprofile_ops.set_active)
-+ return -EINVAL;
-+
-+ down(&start_sem);
-+ err = oprofile_ops.set_active(active_domains, adomains);
-+ up(&start_sem);
-+ return err;
-+}
-+
-+int oprofile_set_passive(int passive_domains[], unsigned int pdomains)
-+{
-+ int err;
-+
-+ if (!oprofile_ops.set_passive)
-+ return -EINVAL;
-+
-+ down(&start_sem);
-+ err = oprofile_ops.set_passive(passive_domains, pdomains);
-+ up(&start_sem);
-+ return err;
-+}
-+
- int oprofile_setup(void)
- {
- int err;
-diff -pruN ../orig-linux-2.6.16.29/drivers/oprofile/oprof.h ./drivers/oprofile/oprof.h
---- ../orig-linux-2.6.16.29/drivers/oprofile/oprof.h 2006-11-06 14:46:52.000000000 -0800
-+++ ./drivers/oprofile/oprof.h 2006-11-06 14:47:55.000000000 -0800
-@@ -35,5 +35,8 @@ void oprofile_create_files(struct super_
- void oprofile_timer_init(struct oprofile_operations * ops);
-
- int oprofile_set_backtrace(unsigned long depth);
-+
-+int oprofile_set_active(int active_domains[], unsigned int adomains);
-+int oprofile_set_passive(int passive_domains[], unsigned int pdomains);
-
- #endif /* OPROF_H */
-diff -pruN ../orig-linux-2.6.16.29/drivers/oprofile/oprofile_files.c ./drivers/oprofile/oprofile_files.c
---- ../orig-linux-2.6.16.29/drivers/oprofile/oprofile_files.c 2006-11-06 14:46:52.000000000 -0800
-+++ ./drivers/oprofile/oprofile_files.c 2006-11-06 14:47:55.000000000 -0800
-@@ -5,15 +5,21 @@
- * @remark Read the file COPYING
- *
- * @author John Levon <levon@movementarian.org>
-+ *
-+ * Modified by Aravind Menon for Xen
-+ * These modifications are:
-+ * Copyright (C) 2005 Hewlett-Packard Co.
- */
-
- #include <linux/fs.h>
- #include <linux/oprofile.h>
-+#include <asm/uaccess.h>
-+#include <linux/ctype.h>
-
- #include "event_buffer.h"
- #include "oprofile_stats.h"
- #include "oprof.h"
--
-+
- unsigned long fs_buffer_size = 131072;
- unsigned long fs_cpu_buffer_size = 8192;
- unsigned long fs_buffer_watershed = 32768; /* FIXME: tune */
-@@ -117,11 +123,202 @@ static ssize_t dump_write(struct file *
- static struct file_operations dump_fops = {
- .write = dump_write,
- };
--
-+
-+#define TMPBUFSIZE 512
-+
-+static unsigned int adomains = 0;
-+static int active_domains[MAX_OPROF_DOMAINS + 1];
-+static DEFINE_MUTEX(adom_mutex);
-+
-+static ssize_t adomain_write(struct file * file, char const __user * buf,
-+ size_t count, loff_t * offset)
-+{
-+ char *tmpbuf;
-+ char *startp, *endp;
-+ int i;
-+ unsigned long val;
-+ ssize_t retval = count;
-+
-+ if (*offset)
-+ return -EINVAL;
-+ if (count > TMPBUFSIZE - 1)
-+ return -EINVAL;
-+
-+ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
-+ return -ENOMEM;
-+
-+ if (copy_from_user(tmpbuf, buf, count)) {
-+ kfree(tmpbuf);
-+ return -EFAULT;
-+ }
-+ tmpbuf[count] = 0;
-+
-+ mutex_lock(&adom_mutex);
-+
-+ startp = tmpbuf;
-+ /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */
-+ for (i = 0; i <= MAX_OPROF_DOMAINS; i++) {
-+ val = simple_strtoul(startp, &endp, 0);
-+ if (endp == startp)
-+ break;
-+ while (ispunct(*endp) || isspace(*endp))
-+ endp++;
-+ active_domains[i] = val;
-+ if (active_domains[i] != val)
-+ /* Overflow, force error below */
-+ i = MAX_OPROF_DOMAINS + 1;
-+ startp = endp;
-+ }
-+ /* Force error on trailing junk */
-+ adomains = *startp ? MAX_OPROF_DOMAINS + 1 : i;
-+
-+ kfree(tmpbuf);
-+
-+ if (adomains > MAX_OPROF_DOMAINS
-+ || oprofile_set_active(active_domains, adomains)) {
-+ adomains = 0;
-+ retval = -EINVAL;
-+ }
-+
-+ mutex_unlock(&adom_mutex);
-+ return retval;
-+}
-+
-+static ssize_t adomain_read(struct file * file, char __user * buf,
-+ size_t count, loff_t * offset)
-+{
-+ char * tmpbuf;
-+ size_t len;
-+ int i;
-+ ssize_t retval;
-+
-+ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
-+ return -ENOMEM;
-+
-+ mutex_lock(&adom_mutex);
-+
-+ len = 0;
-+ for (i = 0; i < adomains; i++)
-+ len += snprintf(tmpbuf + len,
-+ len < TMPBUFSIZE ? TMPBUFSIZE - len : 0,
-+ "%u ", active_domains[i]);
-+ WARN_ON(len > TMPBUFSIZE);
-+ if (len != 0 && len <= TMPBUFSIZE)
-+ tmpbuf[len-1] = '\n';
-+
-+ mutex_unlock(&adom_mutex);
-+
-+ retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len);
-+
-+ kfree(tmpbuf);
-+ return retval;
-+}
-+
-+
-+static struct file_operations active_domain_ops = {
-+ .read = adomain_read,
-+ .write = adomain_write,
-+};
-+
-+static unsigned int pdomains = 0;
-+static int passive_domains[MAX_OPROF_DOMAINS];
-+static DEFINE_MUTEX(pdom_mutex);
-+
-+static ssize_t pdomain_write(struct file * file, char const __user * buf,
-+ size_t count, loff_t * offset)
-+{
-+ char *tmpbuf;
-+ char *startp, *endp;
-+ int i;
-+ unsigned long val;
-+ ssize_t retval = count;
-+
-+ if (*offset)
-+ return -EINVAL;
-+ if (count > TMPBUFSIZE - 1)
-+ return -EINVAL;
-+
-+ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
-+ return -ENOMEM;
-+
-+ if (copy_from_user(tmpbuf, buf, count)) {
-+ kfree(tmpbuf);
-+ return -EFAULT;
-+ }
-+ tmpbuf[count] = 0;
-+
-+ mutex_lock(&pdom_mutex);
-+
-+ startp = tmpbuf;
-+ /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */
-+ for (i = 0; i <= MAX_OPROF_DOMAINS; i++) {
-+ val = simple_strtoul(startp, &endp, 0);
-+ if (endp == startp)
-+ break;
-+ while (ispunct(*endp) || isspace(*endp))
-+ endp++;
-+ passive_domains[i] = val;
-+ if (passive_domains[i] != val)
-+ /* Overflow, force error below */
-+ i = MAX_OPROF_DOMAINS + 1;
-+ startp = endp;
-+ }
-+ /* Force error on trailing junk */
-+ pdomains = *startp ? MAX_OPROF_DOMAINS + 1 : i;
-+
-+ kfree(tmpbuf);
-+
-+ if (pdomains > MAX_OPROF_DOMAINS
-+ || oprofile_set_passive(passive_domains, pdomains)) {
-+ pdomains = 0;
-+ retval = -EINVAL;
-+ }
-+
-+ mutex_unlock(&pdom_mutex);
-+ return retval;
-+}
-+
-+static ssize_t pdomain_read(struct file * file, char __user * buf,
-+ size_t count, loff_t * offset)
-+{
-+ char * tmpbuf;
-+ size_t len;
-+ int i;
-+ ssize_t retval;
-+
-+ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
-+ return -ENOMEM;
-+
-+ mutex_lock(&pdom_mutex);
-+
-+ len = 0;
-+ for (i = 0; i < pdomains; i++)
-+ len += snprintf(tmpbuf + len,
-+ len < TMPBUFSIZE ? TMPBUFSIZE - len : 0,
-+ "%u ", passive_domains[i]);
-+ WARN_ON(len > TMPBUFSIZE);
-+ if (len != 0 && len <= TMPBUFSIZE)
-+ tmpbuf[len-1] = '\n';
-+
-+ mutex_unlock(&pdom_mutex);
-+
-+ retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len);
-+
-+ kfree(tmpbuf);
-+ return retval;
-+}
-+
-+static struct file_operations passive_domain_ops = {
-+ .read = pdomain_read,
-+ .write = pdomain_write,
-+};
-+
- void oprofile_create_files(struct super_block * sb, struct dentry * root)
- {
- oprofilefs_create_file(sb, root, "enable", &enable_fops);
- oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666);
-+ oprofilefs_create_file(sb, root, "active_domains", &active_domain_ops);
-+ oprofilefs_create_file(sb, root, "passive_domains", &passive_domain_ops);
- oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops);
- oprofilefs_create_ulong(sb, root, "buffer_size", &fs_buffer_size);
- oprofilefs_create_ulong(sb, root, "buffer_watershed", &fs_buffer_watershed);
-diff -pruN ../orig-linux-2.6.16.29/include/linux/oprofile.h ./include/linux/oprofile.h
---- ../orig-linux-2.6.16.29/include/linux/oprofile.h 2006-11-06 14:46:42.000000000 -0800
-+++ ./include/linux/oprofile.h 2006-11-06 14:47:55.000000000 -0800
-@@ -16,6 +16,8 @@
- #include <linux/types.h>
- #include <linux/spinlock.h>
- #include <asm/atomic.h>
-+
-+#include <xen/interface/xenoprof.h>
-
- struct super_block;
- struct dentry;
-@@ -27,6 +29,11 @@ struct oprofile_operations {
- /* create any necessary configuration files in the oprofile fs.
- * Optional. */
- int (*create_files)(struct super_block * sb, struct dentry * root);
-+ /* setup active domains with Xen */
-+ int (*set_active)(int *active_domains, unsigned int adomains);
-+ /* setup passive domains with Xen */
-+ int (*set_passive)(int *passive_domains, unsigned int pdomains);
-+
- /* Do any necessary interrupt setup. Optional. */
- int (*setup)(void);
- /* Do any necessary interrupt shutdown. Optional. */
-@@ -68,6 +75,8 @@ void oprofile_add_pc(unsigned long pc, i
- /* add a backtrace entry, to be called from the ->backtrace callback */
- void oprofile_add_trace(unsigned long eip);
-
-+/* add a domain switch entry */
-+int oprofile_add_domain_switch(int32_t domain_id);
-
- /**
- * Create a file of the given name as a child of the given root, with
--- /dev/null
+diff -pruN ../orig-linux-2.6.17/fs/aio.c ./fs/aio.c
+--- ../orig-linux-2.6.17/fs/aio.c 2006-06-18 02:49:35.000000000 +0100
++++ ./fs/aio.c 2007-01-08 15:13:31.000000000 +0000
+@@ -34,6 +34,11 @@
+ #include <asm/uaccess.h>
+ #include <asm/mmu_context.h>
+
++#ifdef CONFIG_EPOLL
++#include <linux/poll.h>
++#include <linux/eventpoll.h>
++#endif
++
+ #if DEBUG > 1
+ #define dprintk printk
+ #else
+@@ -1015,6 +1020,10 @@ put_rq:
+ if (waitqueue_active(&ctx->wait))
+ wake_up(&ctx->wait);
+
++#ifdef CONFIG_EPOLL
++ if (ctx->file && waitqueue_active(&ctx->poll_wait))
++ wake_up(&ctx->poll_wait);
++#endif
+ if (ret)
+ put_ioctx(ctx);
+
+@@ -1024,6 +1033,8 @@ put_rq:
+ /* aio_read_evt
+ * Pull an event off of the ioctx's event ring. Returns the number of
+ * events fetched (0 or 1 ;-)
++ * If ent parameter is 0, just returns the number of events that would
++ * be fetched.
+ * FIXME: make this use cmpxchg.
+ * TODO: make the ringbuffer user mmap()able (requires FIXME).
+ */
+@@ -1046,13 +1057,18 @@ static int aio_read_evt(struct kioctx *i
+
+ head = ring->head % info->nr;
+ if (head != ring->tail) {
+- struct io_event *evp = aio_ring_event(info, head, KM_USER1);
+- *ent = *evp;
+- head = (head + 1) % info->nr;
+- smp_mb(); /* finish reading the event before updatng the head */
+- ring->head = head;
+- ret = 1;
+- put_aio_ring_event(evp, KM_USER1);
++ if (ent) { /* event requested */
++ struct io_event *evp =
++ aio_ring_event(info, head, KM_USER1);
++ *ent = *evp;
++ head = (head + 1) % info->nr;
++ /* finish reading the event before updatng the head */
++ smp_mb();
++ ring->head = head;
++ ret = 1;
++ put_aio_ring_event(evp, KM_USER1);
++ } else /* only need to know availability */
++ ret = 1;
+ }
+ spin_unlock(&info->ring_lock);
+
+@@ -1235,9 +1251,78 @@ static void io_destroy(struct kioctx *io
+
+ aio_cancel_all(ioctx);
+ wait_for_all_aios(ioctx);
++#ifdef CONFIG_EPOLL
++ /* forget the poll file, but it's up to the user to close it */
++ if (ioctx->file) {
++ ioctx->file->private_data = 0;
++ ioctx->file = 0;
++ }
++#endif
+ put_ioctx(ioctx); /* once for the lookup */
+ }
+
++#ifdef CONFIG_EPOLL
++
++static int aio_queue_fd_close(struct inode *inode, struct file *file)
++{
++ struct kioctx *ioctx = file->private_data;
++ if (ioctx) {
++ file->private_data = 0;
++ spin_lock_irq(&ioctx->ctx_lock);
++ ioctx->file = 0;
++ spin_unlock_irq(&ioctx->ctx_lock);
++ }
++ return 0;
++}
++
++static unsigned int aio_queue_fd_poll(struct file *file, poll_table *wait)
++{ unsigned int pollflags = 0;
++ struct kioctx *ioctx = file->private_data;
++
++ if (ioctx) {
++
++ spin_lock_irq(&ioctx->ctx_lock);
++ /* Insert inside our poll wait queue */
++ poll_wait(file, &ioctx->poll_wait, wait);
++
++ /* Check our condition */
++ if (aio_read_evt(ioctx, 0))
++ pollflags = POLLIN | POLLRDNORM;
++ spin_unlock_irq(&ioctx->ctx_lock);
++ }
++
++ return pollflags;
++}
++
++static struct file_operations aioq_fops = {
++ .release = aio_queue_fd_close,
++ .poll = aio_queue_fd_poll
++};
++
++/* make_aio_fd:
++ * Create a file descriptor that can be used to poll the event queue.
++ * Based and piggybacked on the excellent epoll code.
++ */
++
++static int make_aio_fd(struct kioctx *ioctx)
++{
++ int error, fd;
++ struct inode *inode;
++ struct file *file;
++
++ error = ep_getfd(&fd, &inode, &file, NULL, &aioq_fops);
++ if (error)
++ return error;
++
++ /* associate the file with the IO context */
++ file->private_data = ioctx;
++ ioctx->file = file;
++ init_waitqueue_head(&ioctx->poll_wait);
++ return fd;
++}
++#endif
++
++
+ /* sys_io_setup:
+ * Create an aio_context capable of receiving at least nr_events.
+ * ctxp must not point to an aio_context that already exists, and
+@@ -1250,18 +1335,30 @@ static void io_destroy(struct kioctx *io
+ * resources are available. May fail with -EFAULT if an invalid
+ * pointer is passed for ctxp. Will fail with -ENOSYS if not
+ * implemented.
++ *
++ * To request a selectable fd, the user context has to be initialized
++ * to 1, instead of 0, and the return value is the fd.
++ * This keeps the system call compatible, since a non-zero value
++ * was not allowed so far.
+ */
+ asmlinkage long sys_io_setup(unsigned nr_events, aio_context_t __user *ctxp)
+ {
+ struct kioctx *ioctx = NULL;
+ unsigned long ctx;
+ long ret;
++ int make_fd = 0;
+
+ ret = get_user(ctx, ctxp);
+ if (unlikely(ret))
+ goto out;
+
+ ret = -EINVAL;
++#ifdef CONFIG_EPOLL
++ if (ctx == 1) {
++ make_fd = 1;
++ ctx = 0;
++ }
++#endif
+ if (unlikely(ctx || nr_events == 0)) {
+ pr_debug("EINVAL: io_setup: ctx %lu nr_events %u\n",
+ ctx, nr_events);
+@@ -1272,8 +1369,12 @@ asmlinkage long sys_io_setup(unsigned nr
+ ret = PTR_ERR(ioctx);
+ if (!IS_ERR(ioctx)) {
+ ret = put_user(ioctx->user_id, ctxp);
+- if (!ret)
+- return 0;
++#ifdef CONFIG_EPOLL
++ if (make_fd && ret >= 0)
++ ret = make_aio_fd(ioctx);
++#endif
++ if (ret >= 0)
++ return ret;
+
+ get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */
+ io_destroy(ioctx);
+diff -pruN ../orig-linux-2.6.17/fs/eventpoll.c ./fs/eventpoll.c
+--- ../orig-linux-2.6.17/fs/eventpoll.c 2006-06-18 02:49:35.000000000 +0100
++++ ./fs/eventpoll.c 2007-01-08 15:13:31.000000000 +0000
+@@ -236,8 +236,6 @@ struct ep_pqueue {
+
+ static void ep_poll_safewake_init(struct poll_safewake *psw);
+ static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq);
+-static int ep_getfd(int *efd, struct inode **einode, struct file **efile,
+- struct eventpoll *ep);
+ static int ep_alloc(struct eventpoll **pep);
+ static void ep_free(struct eventpoll *ep);
+ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd);
+@@ -267,7 +265,7 @@ static int ep_events_transfer(struct eve
+ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
+ int maxevents, long timeout);
+ static int eventpollfs_delete_dentry(struct dentry *dentry);
+-static struct inode *ep_eventpoll_inode(void);
++static struct inode *ep_eventpoll_inode(struct file_operations *fops);
+ static struct super_block *eventpollfs_get_sb(struct file_system_type *fs_type,
+ int flags, const char *dev_name,
+ void *data);
+@@ -517,7 +515,7 @@ asmlinkage long sys_epoll_create(int siz
+ * Creates all the items needed to setup an eventpoll file. That is,
+ * a file structure, and inode and a free file descriptor.
+ */
+- error = ep_getfd(&fd, &inode, &file, ep);
++ error = ep_getfd(&fd, &inode, &file, ep, &eventpoll_fops);
+ if (error)
+ goto eexit_2;
+
+@@ -702,8 +700,8 @@ eexit_1:
+ /*
+ * Creates the file descriptor to be used by the epoll interface.
+ */
+-static int ep_getfd(int *efd, struct inode **einode, struct file **efile,
+- struct eventpoll *ep)
++int ep_getfd(int *efd, struct inode **einode, struct file **efile,
++ struct eventpoll *ep, struct file_operations *fops)
+ {
+ struct qstr this;
+ char name[32];
+@@ -719,7 +717,7 @@ static int ep_getfd(int *efd, struct ino
+ goto eexit_1;
+
+ /* Allocates an inode from the eventpoll file system */
+- inode = ep_eventpoll_inode();
++ inode = ep_eventpoll_inode(fops);
+ error = PTR_ERR(inode);
+ if (IS_ERR(inode))
+ goto eexit_2;
+@@ -750,7 +748,7 @@ static int ep_getfd(int *efd, struct ino
+
+ file->f_pos = 0;
+ file->f_flags = O_RDONLY;
+- file->f_op = &eventpoll_fops;
++ file->f_op = fops;
+ file->f_mode = FMODE_READ;
+ file->f_version = 0;
+ file->private_data = ep;
+@@ -1566,7 +1564,7 @@ static int eventpollfs_delete_dentry(str
+ }
+
+
+-static struct inode *ep_eventpoll_inode(void)
++static struct inode *ep_eventpoll_inode(struct file_operations *fops)
+ {
+ int error = -ENOMEM;
+ struct inode *inode = new_inode(eventpoll_mnt->mnt_sb);
+@@ -1574,7 +1572,7 @@ static struct inode *ep_eventpoll_inode(
+ if (!inode)
+ goto eexit_1;
+
+- inode->i_fop = &eventpoll_fops;
++ inode->i_fop = fops;
+
+ /*
+ * Mark the inode dirty from the very beginning,
+diff -pruN ../orig-linux-2.6.17/include/linux/aio.h ./include/linux/aio.h
+--- ../orig-linux-2.6.17/include/linux/aio.h 2006-06-18 02:49:35.000000000 +0100
++++ ./include/linux/aio.h 2007-01-08 15:13:32.000000000 +0000
+@@ -191,6 +191,11 @@ struct kioctx {
+ struct aio_ring_info ring_info;
+
+ struct work_struct wq;
++#ifdef CONFIG_EPOLL
++ // poll integration
++ wait_queue_head_t poll_wait;
++ struct file *file;
++#endif
+ };
+
+ /* prototypes */
+diff -pruN ../orig-linux-2.6.17/include/linux/eventpoll.h ./include/linux/eventpoll.h
+--- ../orig-linux-2.6.17/include/linux/eventpoll.h 2006-06-18 02:49:35.000000000 +0100
++++ ./include/linux/eventpoll.h 2007-01-08 15:13:32.000000000 +0000
+@@ -90,6 +90,12 @@ static inline void eventpoll_release(str
+ eventpoll_release_file(file);
+ }
+
++/*
++ * called by aio code to create fd that can poll the aio event queueQ
++ */
++struct eventpoll;
++int ep_getfd(int *efd, struct inode **einode, struct file **efile,
++ struct eventpoll *ep, struct file_operations *fops);
+ #else
+
+ static inline void eventpoll_init_file(struct file *file) {}
--- /dev/null
+diff -pruN ../orig-linux-2.6.17/kernel/timer.c ./kernel/timer.c
+--- ../orig-linux-2.6.17/kernel/timer.c 2006-06-18 02:49:35.000000000 +0100
++++ ./kernel/timer.c 2007-01-08 15:14:52.000000000 +0000
+@@ -557,6 +557,22 @@ found:
+ if (time_before(expires, jiffies))
+ return jiffies;
+
++ /*
++ * It can happen that other CPUs service timer IRQs and increment
++ * jiffies, but we have not yet got a local timer tick to process
++ * the timer wheels. In that case, the expiry time can be before
++ * jiffies, but since the high-resolution timer here is relative to
++ * jiffies, the default expression when high-resolution timers are
++ * not active,
++ *
++ * time_before(MAX_JIFFY_OFFSET + jiffies, expires)
++ *
++ * would falsely evaluate to true. If that is the case, just
++ * return jiffies so that we can immediately fire the local timer
++ */
++ if (time_before(expires, jiffies))
++ return jiffies;
++
+ if (time_before(hr_expires, expires))
+ return hr_expires;
+
--- /dev/null
+diff -pruN ../orig-linux-2.6.17/drivers/ide/ide-lib.c ./drivers/ide/ide-lib.c
+--- ../orig-linux-2.6.17/drivers/ide/ide-lib.c 2006-06-18 02:49:35.000000000 +0100
++++ ./drivers/ide/ide-lib.c 2007-01-08 15:15:41.000000000 +0000
+@@ -410,10 +410,10 @@ void ide_toggle_bounce(ide_drive_t *driv
+ {
+ u64 addr = BLK_BOUNCE_HIGH; /* dma64_addr_t */
+
+- if (!PCI_DMA_BUS_IS_PHYS) {
+- addr = BLK_BOUNCE_ANY;
+- } else if (on && drive->media == ide_disk) {
+- if (HWIF(drive)->pci_dev)
++ if (on && drive->media == ide_disk) {
++ if (!PCI_DMA_BUS_IS_PHYS)
++ addr = BLK_BOUNCE_ANY;
++ else if (HWIF(drive)->pci_dev)
+ addr = HWIF(drive)->pci_dev->dma_mask;
+ }
+
--- /dev/null
+diff -pruN ../orig-linux-2.6.17/arch/i386/kernel/machine_kexec.c ./arch/i386/kernel/machine_kexec.c
+--- ../orig-linux-2.6.17/arch/i386/kernel/machine_kexec.c 2007-01-08 15:01:25.000000000 +0000
++++ ./arch/i386/kernel/machine_kexec.c 2007-01-08 15:12:20.000000000 +0000
+@@ -189,14 +189,11 @@ NORET_TYPE void machine_kexec(struct kim
+ memcpy((void *)reboot_code_buffer, relocate_new_kernel,
+ relocate_new_kernel_size);
+
+- /* The segment registers are funny things, they are
+- * automatically loaded from a table, in memory wherever you
+- * set them to a specific selector, but this table is never
+- * accessed again you set the segment to a different selector.
+- *
+- * The more common model is are caches where the behide
+- * the scenes work is done, but is also dropped at arbitrary
+- * times.
++ /* The segment registers are funny things, they have both a
++ * visible and an invisible part. Whenever the visible part is
++ * set to a specific selector, the invisible part is loaded
++ * with from a table in memory. At no other time is the
++ * descriptor table in memory accessed.
+ *
+ * I take advantage of this here by force loading the
+ * segments, before I zap the gdt with an invalid value.
+diff -pruN ../orig-linux-2.6.17/arch/x86_64/kernel/machine_kexec.c ./arch/x86_64/kernel/machine_kexec.c
+--- ../orig-linux-2.6.17/arch/x86_64/kernel/machine_kexec.c 2007-01-08 15:01:25.000000000 +0000
++++ ./arch/x86_64/kernel/machine_kexec.c 2007-01-08 15:12:20.000000000 +0000
+@@ -207,14 +207,11 @@ NORET_TYPE void machine_kexec(struct kim
+ __flush_tlb();
+
+
+- /* The segment registers are funny things, they are
+- * automatically loaded from a table, in memory wherever you
+- * set them to a specific selector, but this table is never
+- * accessed again unless you set the segment to a different selector.
+- *
+- * The more common model are caches where the behide
+- * the scenes work is done, but is also dropped at arbitrary
+- * times.
++ /* The segment registers are funny things, they have both a
++ * visible and an invisible part. Whenever the visible part is
++ * set to a specific selector, the invisible part is loaded
++ * with from a table in memory. At no other time is the
++ * descriptor table in memory accessed.
+ *
+ * I take advantage of this here by force loading the
+ * segments, before I zap the gdt with an invalid value.
--- /dev/null
+diff -pruN ../orig-linux-2.6.17/arch/i386/kernel/machine_kexec.c ./arch/i386/kernel/machine_kexec.c
+--- ../orig-linux-2.6.17/arch/i386/kernel/machine_kexec.c 2006-06-18 02:49:35.000000000 +0100
++++ ./arch/i386/kernel/machine_kexec.c 2007-01-08 15:01:25.000000000 +0000
+@@ -133,9 +133,9 @@ typedef asmlinkage NORET_TYPE void (*rel
+ unsigned long start_address,
+ unsigned int has_pae) ATTRIB_NORET;
+
+-const extern unsigned char relocate_new_kernel[];
++extern const unsigned char relocate_new_kernel[];
+ extern void relocate_new_kernel_end(void);
+-const extern unsigned int relocate_new_kernel_size;
++extern const unsigned int relocate_new_kernel_size;
+
+ /*
+ * A architecture hook called to validate the
+diff -pruN ../orig-linux-2.6.17/arch/powerpc/kernel/machine_kexec_32.c ./arch/powerpc/kernel/machine_kexec_32.c
+--- ../orig-linux-2.6.17/arch/powerpc/kernel/machine_kexec_32.c 2006-06-18 02:49:35.000000000 +0100
++++ ./arch/powerpc/kernel/machine_kexec_32.c 2007-01-08 15:01:25.000000000 +0000
+@@ -30,8 +30,8 @@ typedef NORET_TYPE void (*relocate_new_k
+ */
+ void default_machine_kexec(struct kimage *image)
+ {
+- const extern unsigned char relocate_new_kernel[];
+- const extern unsigned int relocate_new_kernel_size;
++ extern const unsigned char relocate_new_kernel[];
++ extern const unsigned int relocate_new_kernel_size;
+ unsigned long page_list;
+ unsigned long reboot_code_buffer, reboot_code_buffer_phys;
+ relocate_new_kernel_t rnk;
+diff -pruN ../orig-linux-2.6.17/arch/ppc/kernel/machine_kexec.c ./arch/ppc/kernel/machine_kexec.c
+--- ../orig-linux-2.6.17/arch/ppc/kernel/machine_kexec.c 2006-06-18 02:49:35.000000000 +0100
++++ ./arch/ppc/kernel/machine_kexec.c 2007-01-08 15:01:25.000000000 +0000
+@@ -25,8 +25,8 @@ typedef NORET_TYPE void (*relocate_new_k
+ unsigned long reboot_code_buffer,
+ unsigned long start_address) ATTRIB_NORET;
+
+-const extern unsigned char relocate_new_kernel[];
+-const extern unsigned int relocate_new_kernel_size;
++extern const unsigned char relocate_new_kernel[];
++extern const unsigned int relocate_new_kernel_size;
+
+ void machine_shutdown(void)
+ {
+diff -pruN ../orig-linux-2.6.17/arch/s390/kernel/machine_kexec.c ./arch/s390/kernel/machine_kexec.c
+--- ../orig-linux-2.6.17/arch/s390/kernel/machine_kexec.c 2006-06-18 02:49:35.000000000 +0100
++++ ./arch/s390/kernel/machine_kexec.c 2007-01-08 15:01:25.000000000 +0000
+@@ -27,8 +27,8 @@ static void kexec_halt_all_cpus(void *);
+
+ typedef void (*relocate_kernel_t) (kimage_entry_t *, unsigned long);
+
+-const extern unsigned char relocate_kernel[];
+-const extern unsigned long long relocate_kernel_len;
++extern const unsigned char relocate_kernel[];
++extern const unsigned long long relocate_kernel_len;
+
+ int
+ machine_kexec_prepare(struct kimage *image)
+diff -pruN ../orig-linux-2.6.17/arch/sh/kernel/machine_kexec.c ./arch/sh/kernel/machine_kexec.c
+--- ../orig-linux-2.6.17/arch/sh/kernel/machine_kexec.c 2006-06-18 02:49:35.000000000 +0100
++++ ./arch/sh/kernel/machine_kexec.c 2007-01-08 15:01:25.000000000 +0000
+@@ -25,8 +25,8 @@ typedef NORET_TYPE void (*relocate_new_k
+ unsigned long start_address,
+ unsigned long vbr_reg) ATTRIB_NORET;
+
+-const extern unsigned char relocate_new_kernel[];
+-const extern unsigned int relocate_new_kernel_size;
++extern const unsigned char relocate_new_kernel[];
++extern const unsigned int relocate_new_kernel_size;
+ extern void *gdb_vbr_vector;
+
+ /*
+diff -pruN ../orig-linux-2.6.17/arch/x86_64/kernel/machine_kexec.c ./arch/x86_64/kernel/machine_kexec.c
+--- ../orig-linux-2.6.17/arch/x86_64/kernel/machine_kexec.c 2006-06-18 02:49:35.000000000 +0100
++++ ./arch/x86_64/kernel/machine_kexec.c 2007-01-08 15:01:25.000000000 +0000
+@@ -149,8 +149,8 @@ typedef NORET_TYPE void (*relocate_new_k
+ unsigned long start_address,
+ unsigned long pgtable) ATTRIB_NORET;
+
+-const extern unsigned char relocate_new_kernel[];
+-const extern unsigned long relocate_new_kernel_size;
++extern const unsigned char relocate_new_kernel[];
++extern const unsigned long relocate_new_kernel_size;
+
+ int machine_kexec_prepare(struct kimage *image)
+ {
--- /dev/null
+diff -pruN ../orig-linux-2.6.17/arch/i386/kernel/machine_kexec.c ./arch/i386/kernel/machine_kexec.c
+--- ../orig-linux-2.6.17/arch/i386/kernel/machine_kexec.c 2007-01-08 15:12:20.000000000 +0000
++++ ./arch/i386/kernel/machine_kexec.c 2007-01-08 15:12:33.000000000 +0000
+@@ -20,70 +20,13 @@
+ #include <asm/system.h>
+
+ #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
+-
+-#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+-#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+-#define L2_ATTR (_PAGE_PRESENT)
+-
+-#define LEVEL0_SIZE (1UL << 12UL)
+-
+-#ifndef CONFIG_X86_PAE
+-#define LEVEL1_SIZE (1UL << 22UL)
+-static u32 pgtable_level1[1024] PAGE_ALIGNED;
+-
+-static void identity_map_page(unsigned long address)
+-{
+- unsigned long level1_index, level2_index;
+- u32 *pgtable_level2;
+-
+- /* Find the current page table */
+- pgtable_level2 = __va(read_cr3());
+-
+- /* Find the indexes of the physical address to identity map */
+- level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
+- level2_index = address / LEVEL1_SIZE;
+-
+- /* Identity map the page table entry */
+- pgtable_level1[level1_index] = address | L0_ATTR;
+- pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
+-
+- /* Flush the tlb so the new mapping takes effect.
+- * Global tlb entries are not flushed but that is not an issue.
+- */
+- load_cr3(pgtable_level2);
+-}
+-
+-#else
+-#define LEVEL1_SIZE (1UL << 21UL)
+-#define LEVEL2_SIZE (1UL << 30UL)
+-static u64 pgtable_level1[512] PAGE_ALIGNED;
+-static u64 pgtable_level2[512] PAGE_ALIGNED;
+-
+-static void identity_map_page(unsigned long address)
+-{
+- unsigned long level1_index, level2_index, level3_index;
+- u64 *pgtable_level3;
+-
+- /* Find the current page table */
+- pgtable_level3 = __va(read_cr3());
+-
+- /* Find the indexes of the physical address to identity map */
+- level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
+- level2_index = (address % LEVEL2_SIZE)/LEVEL1_SIZE;
+- level3_index = address / LEVEL2_SIZE;
+-
+- /* Identity map the page table entry */
+- pgtable_level1[level1_index] = address | L0_ATTR;
+- pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
+- set_64bit(&pgtable_level3[level3_index],
+- __pa(pgtable_level2) | L2_ATTR);
+-
+- /* Flush the tlb so the new mapping takes effect.
+- * Global tlb entries are not flushed but that is not an issue.
+- */
+- load_cr3(pgtable_level3);
+-}
++static u32 kexec_pgd[1024] PAGE_ALIGNED;
++#ifdef CONFIG_X86_PAE
++static u32 kexec_pmd0[1024] PAGE_ALIGNED;
++static u32 kexec_pmd1[1024] PAGE_ALIGNED;
+ #endif
++static u32 kexec_pte0[1024] PAGE_ALIGNED;
++static u32 kexec_pte1[1024] PAGE_ALIGNED;
+
+ static void set_idt(void *newidt, __u16 limit)
+ {
+@@ -127,16 +70,6 @@ static void load_segments(void)
+ #undef __STR
+ }
+
+-typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)(
+- unsigned long indirection_page,
+- unsigned long reboot_code_buffer,
+- unsigned long start_address,
+- unsigned int has_pae) ATTRIB_NORET;
+-
+-extern const unsigned char relocate_new_kernel[];
+-extern void relocate_new_kernel_end(void);
+-extern const unsigned int relocate_new_kernel_size;
+-
+ /*
+ * A architecture hook called to validate the
+ * proposed image and prepare the control pages
+@@ -169,25 +102,29 @@ void machine_kexec_cleanup(struct kimage
+ */
+ NORET_TYPE void machine_kexec(struct kimage *image)
+ {
+- unsigned long page_list;
+- unsigned long reboot_code_buffer;
+-
+- relocate_new_kernel_t rnk;
++ unsigned long page_list[PAGES_NR];
++ void *control_page;
+
+ /* Interrupts aren't acceptable while we reboot */
+ local_irq_disable();
+
+- /* Compute some offsets */
+- reboot_code_buffer = page_to_pfn(image->control_code_page)
+- << PAGE_SHIFT;
+- page_list = image->head;
+-
+- /* Set up an identity mapping for the reboot_code_buffer */
+- identity_map_page(reboot_code_buffer);
+-
+- /* copy it out */
+- memcpy((void *)reboot_code_buffer, relocate_new_kernel,
+- relocate_new_kernel_size);
++ control_page = page_address(image->control_code_page);
++ memcpy(control_page, relocate_kernel, PAGE_SIZE);
++
++ page_list[PA_CONTROL_PAGE] = __pa(control_page);
++ page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
++ page_list[PA_PGD] = __pa(kexec_pgd);
++ page_list[VA_PGD] = (unsigned long)kexec_pgd;
++#ifdef CONFIG_X86_PAE
++ page_list[PA_PMD_0] = __pa(kexec_pmd0);
++ page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
++ page_list[PA_PMD_1] = __pa(kexec_pmd1);
++ page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
++#endif
++ page_list[PA_PTE_0] = __pa(kexec_pte0);
++ page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
++ page_list[PA_PTE_1] = __pa(kexec_pte1);
++ page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
+
+ /* The segment registers are funny things, they have both a
+ * visible and an invisible part. Whenever the visible part is
+@@ -206,6 +143,6 @@ NORET_TYPE void machine_kexec(struct kim
+ set_idt(phys_to_virt(0),0);
+
+ /* now call it */
+- rnk = (relocate_new_kernel_t) reboot_code_buffer;
+- (*rnk)(page_list, reboot_code_buffer, image->start, cpu_has_pae);
++ relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
++ image->start, cpu_has_pae);
+ }
+diff -pruN ../orig-linux-2.6.17/arch/i386/kernel/relocate_kernel.S ./arch/i386/kernel/relocate_kernel.S
+--- ../orig-linux-2.6.17/arch/i386/kernel/relocate_kernel.S 2006-06-18 02:49:35.000000000 +0100
++++ ./arch/i386/kernel/relocate_kernel.S 2007-01-08 15:12:33.000000000 +0000
+@@ -7,16 +7,138 @@
+ */
+
+ #include <linux/linkage.h>
++#include <asm/page.h>
++#include <asm/kexec.h>
++
++/*
++ * Must be relocatable PIC code callable as a C function
++ */
++
++#define PTR(x) (x << 2)
++#define PAGE_ALIGNED (1 << PAGE_SHIFT)
++#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */
++#define PAE_PGD_ATTR 0x01 /* _PAGE_PRESENT */
++
++ .text
++ .align PAGE_ALIGNED
++ .globl relocate_kernel
++relocate_kernel:
++ movl 8(%esp), %ebp /* list of pages */
++
++#ifdef CONFIG_X86_PAE
++ /* map the control page at its virtual address */
++
++ movl PTR(VA_PGD)(%ebp), %edi
++ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
++ andl $0xc0000000, %eax
++ shrl $27, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_PMD_0)(%ebp), %edx
++ orl $PAE_PGD_ATTR, %edx
++ movl %edx, (%eax)
++
++ movl PTR(VA_PMD_0)(%ebp), %edi
++ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
++ andl $0x3fe00000, %eax
++ shrl $18, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_PTE_0)(%ebp), %edx
++ orl $PAGE_ATTR, %edx
++ movl %edx, (%eax)
++
++ movl PTR(VA_PTE_0)(%ebp), %edi
++ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
++ andl $0x001ff000, %eax
++ shrl $9, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
++ orl $PAGE_ATTR, %edx
++ movl %edx, (%eax)
++
++ /* identity map the control page at its physical address */
++
++ movl PTR(VA_PGD)(%ebp), %edi
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
++ andl $0xc0000000, %eax
++ shrl $27, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_PMD_1)(%ebp), %edx
++ orl $PAE_PGD_ATTR, %edx
++ movl %edx, (%eax)
++
++ movl PTR(VA_PMD_1)(%ebp), %edi
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
++ andl $0x3fe00000, %eax
++ shrl $18, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_PTE_1)(%ebp), %edx
++ orl $PAGE_ATTR, %edx
++ movl %edx, (%eax)
++
++ movl PTR(VA_PTE_1)(%ebp), %edi
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
++ andl $0x001ff000, %eax
++ shrl $9, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
++ orl $PAGE_ATTR, %edx
++ movl %edx, (%eax)
++#else
++ /* map the control page at its virtual address */
++
++ movl PTR(VA_PGD)(%ebp), %edi
++ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
++ andl $0xffc00000, %eax
++ shrl $20, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_PTE_0)(%ebp), %edx
++ orl $PAGE_ATTR, %edx
++ movl %edx, (%eax)
++
++ movl PTR(VA_PTE_0)(%ebp), %edi
++ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
++ andl $0x003ff000, %eax
++ shrl $10, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
++ orl $PAGE_ATTR, %edx
++ movl %edx, (%eax)
++
++ /* identity map the control page at its physical address */
++
++ movl PTR(VA_PGD)(%ebp), %edi
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
++ andl $0xffc00000, %eax
++ shrl $20, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_PTE_1)(%ebp), %edx
++ orl $PAGE_ATTR, %edx
++ movl %edx, (%eax)
++
++ movl PTR(VA_PTE_1)(%ebp), %edi
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
++ andl $0x003ff000, %eax
++ shrl $10, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
++ orl $PAGE_ATTR, %edx
++ movl %edx, (%eax)
++#endif
+
+- /*
+- * Must be relocatable PIC code callable as a C function, that once
+- * it starts can not use the previous processes stack.
+- */
+- .globl relocate_new_kernel
+ relocate_new_kernel:
+ /* read the arguments and say goodbye to the stack */
+ movl 4(%esp), %ebx /* page_list */
+- movl 8(%esp), %ebp /* reboot_code_buffer */
++ movl 8(%esp), %ebp /* list of pages */
+ movl 12(%esp), %edx /* start address */
+ movl 16(%esp), %ecx /* cpu_has_pae */
+
+@@ -24,11 +146,26 @@ relocate_new_kernel:
+ pushl $0
+ popfl
+
+- /* set a new stack at the bottom of our page... */
+- lea 4096(%ebp), %esp
++ /* get physical address of control page now */
++ /* this is impossible after page table switch */
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %edi
++
++ /* switch to new set of page tables */
++ movl PTR(PA_PGD)(%ebp), %eax
++ movl %eax, %cr3
++
++ /* setup a new stack at the end of the physical control page */
++ lea 4096(%edi), %esp
+
+- /* store the parameters back on the stack */
+- pushl %edx /* store the start address */
++ /* jump to identity mapped page */
++ movl %edi, %eax
++ addl $(identity_mapped - relocate_kernel), %eax
++ pushl %eax
++ ret
++
++identity_mapped:
++ /* store the start address on the stack */
++ pushl %edx
+
+ /* Set cr0 to a known state:
+ * 31 0 == Paging disabled
+@@ -113,8 +250,3 @@ relocate_new_kernel:
+ xorl %edi, %edi
+ xorl %ebp, %ebp
+ ret
+-relocate_new_kernel_end:
+-
+- .globl relocate_new_kernel_size
+-relocate_new_kernel_size:
+- .long relocate_new_kernel_end - relocate_new_kernel
+diff -pruN ../orig-linux-2.6.17/include/asm-i386/kexec.h ./include/asm-i386/kexec.h
+--- ../orig-linux-2.6.17/include/asm-i386/kexec.h 2006-06-18 02:49:35.000000000 +0100
++++ ./include/asm-i386/kexec.h 2007-01-08 15:12:33.000000000 +0000
+@@ -1,6 +1,26 @@
+ #ifndef _I386_KEXEC_H
+ #define _I386_KEXEC_H
+
++#define PA_CONTROL_PAGE 0
++#define VA_CONTROL_PAGE 1
++#define PA_PGD 2
++#define VA_PGD 3
++#define PA_PTE_0 4
++#define VA_PTE_0 5
++#define PA_PTE_1 6
++#define VA_PTE_1 7
++#ifdef CONFIG_X86_PAE
++#define PA_PMD_0 8
++#define VA_PMD_0 9
++#define PA_PMD_1 10
++#define VA_PMD_1 11
++#define PAGES_NR 12
++#else
++#define PAGES_NR 8
++#endif
++
++#ifndef __ASSEMBLY__
++
+ #include <asm/fixmap.h>
+ #include <asm/ptrace.h>
+ #include <asm/string.h>
+@@ -72,5 +92,12 @@ static inline void crash_setup_regs(stru
+ newregs->eip = (unsigned long)current_text_addr();
+ }
+ }
++asmlinkage NORET_TYPE void
++relocate_kernel(unsigned long indirection_page,
++ unsigned long control_page,
++ unsigned long start_address,
++ unsigned int has_pae) ATTRIB_NORET;
++
++#endif /* __ASSEMBLY__ */
+
+ #endif /* _I386_KEXEC_H */
--- /dev/null
+diff -pruN ../orig-linux-2.6.17/arch/x86_64/kernel/machine_kexec.c ./arch/x86_64/kernel/machine_kexec.c
+--- ../orig-linux-2.6.17/arch/x86_64/kernel/machine_kexec.c 2007-01-08 15:12:20.000000000 +0000
++++ ./arch/x86_64/kernel/machine_kexec.c 2007-01-08 15:13:01.000000000 +0000
+@@ -15,6 +15,15 @@
+ #include <asm/mmu_context.h>
+ #include <asm/io.h>
+
++#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
++static u64 kexec_pgd[512] PAGE_ALIGNED;
++static u64 kexec_pud0[512] PAGE_ALIGNED;
++static u64 kexec_pmd0[512] PAGE_ALIGNED;
++static u64 kexec_pte0[512] PAGE_ALIGNED;
++static u64 kexec_pud1[512] PAGE_ALIGNED;
++static u64 kexec_pmd1[512] PAGE_ALIGNED;
++static u64 kexec_pte1[512] PAGE_ALIGNED;
++
+ static void init_level2_page(pmd_t *level2p, unsigned long addr)
+ {
+ unsigned long end_addr;
+@@ -144,32 +153,19 @@ static void load_segments(void)
+ );
+ }
+
+-typedef NORET_TYPE void (*relocate_new_kernel_t)(unsigned long indirection_page,
+- unsigned long control_code_buffer,
+- unsigned long start_address,
+- unsigned long pgtable) ATTRIB_NORET;
+-
+-extern const unsigned char relocate_new_kernel[];
+-extern const unsigned long relocate_new_kernel_size;
+-
+ int machine_kexec_prepare(struct kimage *image)
+ {
+- unsigned long start_pgtable, control_code_buffer;
++ unsigned long start_pgtable;
+ int result;
+
+ /* Calculate the offsets */
+ start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
+- control_code_buffer = start_pgtable + PAGE_SIZE;
+
+ /* Setup the identity mapped 64bit page table */
+ result = init_pgtable(image, start_pgtable);
+ if (result)
+ return result;
+
+- /* Place the code in the reboot code buffer */
+- memcpy(__va(control_code_buffer), relocate_new_kernel,
+- relocate_new_kernel_size);
+-
+ return 0;
+ }
+
+@@ -184,28 +180,34 @@ void machine_kexec_cleanup(struct kimage
+ */
+ NORET_TYPE void machine_kexec(struct kimage *image)
+ {
+- unsigned long page_list;
+- unsigned long control_code_buffer;
+- unsigned long start_pgtable;
+- relocate_new_kernel_t rnk;
++ unsigned long page_list[PAGES_NR];
++ void *control_page;
+
+ /* Interrupts aren't acceptable while we reboot */
+ local_irq_disable();
+
+- /* Calculate the offsets */
+- page_list = image->head;
+- start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
+- control_code_buffer = start_pgtable + PAGE_SIZE;
++ control_page = page_address(image->control_code_page) + PAGE_SIZE;
++ memcpy(control_page, relocate_kernel, PAGE_SIZE);
+
+- /* Set the low half of the page table to my identity mapped
+- * page table for kexec. Leave the high half pointing at the
+- * kernel pages. Don't bother to flush the global pages
+- * as that will happen when I fully switch to my identity mapped
+- * page table anyway.
+- */
+- memcpy(__va(read_cr3()), __va(start_pgtable), PAGE_SIZE/2);
+- __flush_tlb();
++ page_list[PA_CONTROL_PAGE] = __pa(control_page);
++ page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
++ page_list[PA_PGD] = __pa(kexec_pgd);
++ page_list[VA_PGD] = (unsigned long)kexec_pgd;
++ page_list[PA_PUD_0] = __pa(kexec_pud0);
++ page_list[VA_PUD_0] = (unsigned long)kexec_pud0;
++ page_list[PA_PMD_0] = __pa(kexec_pmd0);
++ page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
++ page_list[PA_PTE_0] = __pa(kexec_pte0);
++ page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
++ page_list[PA_PUD_1] = __pa(kexec_pud1);
++ page_list[VA_PUD_1] = (unsigned long)kexec_pud1;
++ page_list[PA_PMD_1] = __pa(kexec_pmd1);
++ page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
++ page_list[PA_PTE_1] = __pa(kexec_pte1);
++ page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
+
++ page_list[PA_TABLE_PAGE] =
++ (unsigned long)__pa(page_address(image->control_code_page));
+
+ /* The segment registers are funny things, they have both a
+ * visible and an invisible part. Whenever the visible part is
+@@ -222,7 +224,8 @@ NORET_TYPE void machine_kexec(struct kim
+ */
+ set_gdt(phys_to_virt(0),0);
+ set_idt(phys_to_virt(0),0);
++
+ /* now call it */
+- rnk = (relocate_new_kernel_t) control_code_buffer;
+- (*rnk)(page_list, control_code_buffer, image->start, start_pgtable);
++ relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
++ image->start);
+ }
+diff -pruN ../orig-linux-2.6.17/arch/x86_64/kernel/relocate_kernel.S ./arch/x86_64/kernel/relocate_kernel.S
+--- ../orig-linux-2.6.17/arch/x86_64/kernel/relocate_kernel.S 2006-06-18 02:49:35.000000000 +0100
++++ ./arch/x86_64/kernel/relocate_kernel.S 2007-01-08 15:13:01.000000000 +0000
+@@ -7,31 +7,169 @@
+ */
+
+ #include <linux/linkage.h>
++#include <asm/page.h>
++#include <asm/kexec.h>
+
+- /*
+- * Must be relocatable PIC code callable as a C function, that once
+- * it starts can not use the previous processes stack.
+- */
+- .globl relocate_new_kernel
++/*
++ * Must be relocatable PIC code callable as a C function
++ */
++
++#define PTR(x) (x << 3)
++#define PAGE_ALIGNED (1 << PAGE_SHIFT)
++#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */
++
++ .text
++ .align PAGE_ALIGNED
+ .code64
++ .globl relocate_kernel
++relocate_kernel:
++ /* %rdi indirection_page
++ * %rsi page_list
++ * %rdx start address
++ */
++
++ /* map the control page at its virtual address */
++
++ movq $0x0000ff8000000000, %r10 /* mask */
++ mov $(39 - 3), %cl /* bits to shift */
++ movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 /* address to map */
++
++ movq %r11, %r9
++ andq %r10, %r9
++ shrq %cl, %r9
++
++ movq PTR(VA_PGD)(%rsi), %r8
++ addq %r8, %r9
++ movq PTR(PA_PUD_0)(%rsi), %r8
++ orq $PAGE_ATTR, %r8
++ movq %r8, (%r9)
++
++ shrq $9, %r10
++ sub $9, %cl
++
++ movq %r11, %r9
++ andq %r10, %r9
++ shrq %cl, %r9
++
++ movq PTR(VA_PUD_0)(%rsi), %r8
++ addq %r8, %r9
++ movq PTR(PA_PMD_0)(%rsi), %r8
++ orq $PAGE_ATTR, %r8
++ movq %r8, (%r9)
++
++ shrq $9, %r10
++ sub $9, %cl
++
++ movq %r11, %r9
++ andq %r10, %r9
++ shrq %cl, %r9
++
++ movq PTR(VA_PMD_0)(%rsi), %r8
++ addq %r8, %r9
++ movq PTR(PA_PTE_0)(%rsi), %r8
++ orq $PAGE_ATTR, %r8
++ movq %r8, (%r9)
++
++ shrq $9, %r10
++ sub $9, %cl
++
++ movq %r11, %r9
++ andq %r10, %r9
++ shrq %cl, %r9
++
++ movq PTR(VA_PTE_0)(%rsi), %r8
++ addq %r8, %r9
++ movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
++ orq $PAGE_ATTR, %r8
++ movq %r8, (%r9)
++
++ /* identity map the control page at its physical address */
++
++ movq $0x0000ff8000000000, %r10 /* mask */
++ mov $(39 - 3), %cl /* bits to shift */
++ movq PTR(PA_CONTROL_PAGE)(%rsi), %r11 /* address to map */
++
++ movq %r11, %r9
++ andq %r10, %r9
++ shrq %cl, %r9
++
++ movq PTR(VA_PGD)(%rsi), %r8
++ addq %r8, %r9
++ movq PTR(PA_PUD_1)(%rsi), %r8
++ orq $PAGE_ATTR, %r8
++ movq %r8, (%r9)
++
++ shrq $9, %r10
++ sub $9, %cl
++
++ movq %r11, %r9
++ andq %r10, %r9
++ shrq %cl, %r9
++
++ movq PTR(VA_PUD_1)(%rsi), %r8
++ addq %r8, %r9
++ movq PTR(PA_PMD_1)(%rsi), %r8
++ orq $PAGE_ATTR, %r8
++ movq %r8, (%r9)
++
++ shrq $9, %r10
++ sub $9, %cl
++
++ movq %r11, %r9
++ andq %r10, %r9
++ shrq %cl, %r9
++
++ movq PTR(VA_PMD_1)(%rsi), %r8
++ addq %r8, %r9
++ movq PTR(PA_PTE_1)(%rsi), %r8
++ orq $PAGE_ATTR, %r8
++ movq %r8, (%r9)
++
++ shrq $9, %r10
++ sub $9, %cl
++
++ movq %r11, %r9
++ andq %r10, %r9
++ shrq %cl, %r9
++
++ movq PTR(VA_PTE_1)(%rsi), %r8
++ addq %r8, %r9
++ movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
++ orq $PAGE_ATTR, %r8
++ movq %r8, (%r9)
++
+ relocate_new_kernel:
+- /* %rdi page_list
+- * %rsi reboot_code_buffer
++ /* %rdi indirection_page
++ * %rsi page_list
+ * %rdx start address
+- * %rcx page_table
+- * %r8 arg5
+- * %r9 arg6
+ */
+
+ /* zero out flags, and disable interrupts */
+ pushq $0
+ popfq
+
+- /* set a new stack at the bottom of our page... */
+- lea 4096(%rsi), %rsp
++ /* get physical address of control page now */
++ /* this is impossible after page table switch */
++ movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
++
++ /* get physical address of page table now too */
++ movq PTR(PA_TABLE_PAGE)(%rsi), %rcx
++
++ /* switch to new set of page tables */
++ movq PTR(PA_PGD)(%rsi), %r9
++ movq %r9, %cr3
++
++ /* setup a new stack at the end of the physical control page */
++ lea 4096(%r8), %rsp
++
++ /* jump to identity mapped page */
++ addq $(identity_mapped - relocate_kernel), %r8
++ pushq %r8
++ ret
+
+- /* store the parameters back on the stack */
+- pushq %rdx /* store the start address */
++identity_mapped:
++ /* store the start address on the stack */
++ pushq %rdx
+
+ /* Set cr0 to a known state:
+ * 31 1 == Paging enabled
+@@ -136,8 +274,3 @@ relocate_new_kernel:
+ xorq %r15, %r15
+
+ ret
+-relocate_new_kernel_end:
+-
+- .globl relocate_new_kernel_size
+-relocate_new_kernel_size:
+- .quad relocate_new_kernel_end - relocate_new_kernel
+diff -pruN ../orig-linux-2.6.17/include/asm-x86_64/kexec.h ./include/asm-x86_64/kexec.h
+--- ../orig-linux-2.6.17/include/asm-x86_64/kexec.h 2006-06-18 02:49:35.000000000 +0100
++++ ./include/asm-x86_64/kexec.h 2007-01-08 15:13:01.000000000 +0000
+@@ -1,6 +1,27 @@
+ #ifndef _X86_64_KEXEC_H
+ #define _X86_64_KEXEC_H
+
++#define PA_CONTROL_PAGE 0
++#define VA_CONTROL_PAGE 1
++#define PA_PGD 2
++#define VA_PGD 3
++#define PA_PUD_0 4
++#define VA_PUD_0 5
++#define PA_PMD_0 6
++#define VA_PMD_0 7
++#define PA_PTE_0 8
++#define VA_PTE_0 9
++#define PA_PUD_1 10
++#define VA_PUD_1 11
++#define PA_PMD_1 12
++#define VA_PMD_1 13
++#define PA_PTE_1 14
++#define VA_PTE_1 15
++#define PA_TABLE_PAGE 16
++#define PAGES_NR 17
++
++#ifndef __ASSEMBLY__
++
+ #include <linux/string.h>
+
+ #include <asm/page.h>
+@@ -64,4 +85,12 @@ static inline void crash_setup_regs(stru
+ newregs->rip = (unsigned long)current_text_addr();
+ }
+ }
++
++NORET_TYPE void
++relocate_kernel(unsigned long indirection_page,
++ unsigned long page_list,
++ unsigned long start_address) ATTRIB_NORET;
++
++#endif /* __ASSEMBLY__ */
++
+ #endif /* _X86_64_KEXEC_H */
--- /dev/null
+diff -pruN ../orig-linux-2.6.17/arch/x86_64/kernel/vmlinux.lds.S ./arch/x86_64/kernel/vmlinux.lds.S
+--- ../orig-linux-2.6.17/arch/x86_64/kernel/vmlinux.lds.S 2007-01-08 15:37:29.000000000 +0000
++++ ./arch/x86_64/kernel/vmlinux.lds.S 2007-01-08 15:37:43.000000000 +0000
+@@ -18,6 +18,7 @@ PHDRS {
+ text PT_LOAD FLAGS(5); /* R_E */
+ data PT_LOAD FLAGS(7); /* RWE */
+ user PT_LOAD FLAGS(7); /* RWE */
++ data.init PT_LOAD FLAGS(7); /* RWE */
+ note PT_NOTE FLAGS(4); /* R__ */
+ }
+ SECTIONS
+@@ -123,7 +124,7 @@ SECTIONS
+ . = ALIGN(8192); /* init_task */
+ .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
+ *(.data.init_task)
+- } :data
++ }:data.init
+
+ . = ALIGN(4096);
+ .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
--- /dev/null
+diff -pruN ../orig-linux-2.6.17/arch/i386/kernel/traps.c ./arch/i386/kernel/traps.c
+--- ../orig-linux-2.6.17/arch/i386/kernel/traps.c 2006-06-18 02:49:35.000000000 +0100
++++ ./arch/i386/kernel/traps.c 2007-01-08 15:15:53.000000000 +0000
+@@ -607,18 +607,11 @@ static void mem_parity_error(unsigned ch
+
+ static void io_check_error(unsigned char reason, struct pt_regs * regs)
+ {
+- unsigned long i;
+-
+ printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
+ show_registers(regs);
+
+ /* Re-enable the IOCK line, wait for a few seconds */
+- reason = (reason & 0xf) | 8;
+- outb(reason, 0x61);
+- i = 2000;
+- while (--i) udelay(1000);
+- reason &= ~8;
+- outb(reason, 0x61);
++ clear_io_check_error(reason);
+ }
+
+ static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
+diff -pruN ../orig-linux-2.6.17/include/asm-i386/mach-default/mach_traps.h ./include/asm-i386/mach-default/mach_traps.h
+--- ../orig-linux-2.6.17/include/asm-i386/mach-default/mach_traps.h 2006-06-18 02:49:35.000000000 +0100
++++ ./include/asm-i386/mach-default/mach_traps.h 2007-01-08 15:15:53.000000000 +0000
+@@ -15,6 +15,18 @@ static inline void clear_mem_error(unsig
+ outb(reason, 0x61);
+ }
+
++static inline void clear_io_check_error(unsigned char reason)
++{
++ unsigned long i;
++
++ reason = (reason & 0xf) | 8;
++ outb(reason, 0x61);
++ i = 2000;
++ while (--i) udelay(1000);
++ reason &= ~8;
++ outb(reason, 0x61);
++}
++
+ static inline unsigned char get_nmi_reason(void)
+ {
+ return inb(0x61);
--- /dev/null
+diff -pruN ../orig-linux-2.6.17/net/ipv6/addrconf.c ./net/ipv6/addrconf.c
+--- ../orig-linux-2.6.17/net/ipv6/addrconf.c 2006-06-18 02:49:35.000000000 +0100
++++ ./net/ipv6/addrconf.c 2007-01-08 15:16:04.000000000 +0000
+@@ -2461,6 +2461,7 @@ static void addrconf_dad_start(struct in
+ spin_lock_bh(&ifp->lock);
+
+ if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
++ !(dev->flags&IFF_MULTICAST) ||
+ !(ifp->flags&IFA_F_TENTATIVE)) {
+ ifp->flags &= ~IFA_F_TENTATIVE;
+ spin_unlock_bh(&ifp->lock);
+@@ -2545,6 +2546,7 @@ static void addrconf_dad_completed(struc
+ if (ifp->idev->cnf.forwarding == 0 &&
+ ifp->idev->cnf.rtr_solicits > 0 &&
+ (dev->flags&IFF_LOOPBACK) == 0 &&
++ (dev->flags & IFF_MULTICAST) &&
+ (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) {
+ struct in6_addr all_routers;
+
--- /dev/null
+diff -pruN ../orig-linux-2.6.17/include/linux/kernel.h ./include/linux/kernel.h
+--- ../orig-linux-2.6.17/include/linux/kernel.h 2006-06-18 02:49:35.000000000 +0100
++++ ./include/linux/kernel.h 2007-01-08 15:38:20.000000000 +0000
+@@ -114,6 +114,8 @@ extern int scnprintf(char * buf, size_t
+ __attribute__ ((format (printf, 3, 4)));
+ extern int vscnprintf(char *buf, size_t size, const char *fmt, va_list args)
+ __attribute__ ((format (printf, 3, 0)));
++extern char *kasprintf(gfp_t gfp, const char *fmt, ...)
++ __attribute__ ((format (printf, 2, 3)));
+
+ extern int sscanf(const char *, const char *, ...)
+ __attribute__ ((format (scanf, 2, 3)));
+diff -pruN ../orig-linux-2.6.17/lib/vsprintf.c ./lib/vsprintf.c
+--- ../orig-linux-2.6.17/lib/vsprintf.c 2007-01-08 15:38:07.000000000 +0000
++++ ./lib/vsprintf.c 2007-01-08 15:38:20.000000000 +0000
+@@ -849,3 +849,26 @@ int sscanf(const char * buf, const char
+ }
+
+ EXPORT_SYMBOL(sscanf);
++
++
++/* Simplified asprintf. */
++char *kasprintf(gfp_t gfp, const char *fmt, ...)
++{
++ va_list ap;
++ unsigned int len;
++ char *p;
++
++ va_start(ap, fmt);
++ len = vsnprintf(NULL, 0, fmt, ap);
++ va_end(ap);
++
++ p = kmalloc(len+1, gfp);
++ if (!p)
++ return NULL;
++ va_start(ap, fmt);
++ vsnprintf(p, len+1, fmt, ap);
++ va_end(ap);
++ return p;
++}
++
++EXPORT_SYMBOL(kasprintf);
--- /dev/null
+diff -pruN ../orig-linux-2.6.17/arch/i386/kernel/machine_kexec.c ./arch/i386/kernel/machine_kexec.c
+--- ../orig-linux-2.6.17/arch/i386/kernel/machine_kexec.c 2007-01-08 15:12:33.000000000 +0000
++++ ./arch/i386/kernel/machine_kexec.c 2007-01-08 15:12:46.000000000 +0000
+@@ -28,48 +28,6 @@ static u32 kexec_pmd1[1024] PAGE_ALIGNED
+ static u32 kexec_pte0[1024] PAGE_ALIGNED;
+ static u32 kexec_pte1[1024] PAGE_ALIGNED;
+
+-static void set_idt(void *newidt, __u16 limit)
+-{
+- struct Xgt_desc_struct curidt;
+-
+- /* ia32 supports unaliged loads & stores */
+- curidt.size = limit;
+- curidt.address = (unsigned long)newidt;
+-
+- load_idt(&curidt);
+-};
+-
+-
+-static void set_gdt(void *newgdt, __u16 limit)
+-{
+- struct Xgt_desc_struct curgdt;
+-
+- /* ia32 supports unaligned loads & stores */
+- curgdt.size = limit;
+- curgdt.address = (unsigned long)newgdt;
+-
+- load_gdt(&curgdt);
+-};
+-
+-static void load_segments(void)
+-{
+-#define __STR(X) #X
+-#define STR(X) __STR(X)
+-
+- __asm__ __volatile__ (
+- "\tljmp $"STR(__KERNEL_CS)",$1f\n"
+- "\t1:\n"
+- "\tmovl $"STR(__KERNEL_DS)",%%eax\n"
+- "\tmovl %%eax,%%ds\n"
+- "\tmovl %%eax,%%es\n"
+- "\tmovl %%eax,%%fs\n"
+- "\tmovl %%eax,%%gs\n"
+- "\tmovl %%eax,%%ss\n"
+- ::: "eax", "memory");
+-#undef STR
+-#undef __STR
+-}
+-
+ /*
+ * A architecture hook called to validate the
+ * proposed image and prepare the control pages
+@@ -126,23 +84,6 @@ NORET_TYPE void machine_kexec(struct kim
+ page_list[PA_PTE_1] = __pa(kexec_pte1);
+ page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
+
+- /* The segment registers are funny things, they have both a
+- * visible and an invisible part. Whenever the visible part is
+- * set to a specific selector, the invisible part is loaded
+- * with from a table in memory. At no other time is the
+- * descriptor table in memory accessed.
+- *
+- * I take advantage of this here by force loading the
+- * segments, before I zap the gdt with an invalid value.
+- */
+- load_segments();
+- /* The gdt & idt are now invalid.
+- * If you want to load them you must set up your own idt & gdt.
+- */
+- set_gdt(phys_to_virt(0),0);
+- set_idt(phys_to_virt(0),0);
+-
+- /* now call it */
+ relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
+ image->start, cpu_has_pae);
+ }
+diff -pruN ../orig-linux-2.6.17/arch/i386/kernel/relocate_kernel.S ./arch/i386/kernel/relocate_kernel.S
+--- ../orig-linux-2.6.17/arch/i386/kernel/relocate_kernel.S 2007-01-08 15:12:33.000000000 +0000
++++ ./arch/i386/kernel/relocate_kernel.S 2007-01-08 15:12:46.000000000 +0000
+@@ -154,14 +154,45 @@ relocate_new_kernel:
+ movl PTR(PA_PGD)(%ebp), %eax
+ movl %eax, %cr3
+
++ /* setup idt */
++ movl %edi, %eax
++ addl $(idt_48 - relocate_kernel), %eax
++ lidtl (%eax)
++
++ /* setup gdt */
++ movl %edi, %eax
++ addl $(gdt - relocate_kernel), %eax
++ movl %edi, %esi
++ addl $((gdt_48 - relocate_kernel) + 2), %esi
++ movl %eax, (%esi)
++
++ movl %edi, %eax
++ addl $(gdt_48 - relocate_kernel), %eax
++ lgdtl (%eax)
++
++ /* setup data segment registers */
++ mov $(gdt_ds - gdt), %eax
++ mov %eax, %ds
++ mov %eax, %es
++ mov %eax, %fs
++ mov %eax, %gs
++ mov %eax, %ss
++
+ /* setup a new stack at the end of the physical control page */
+ lea 4096(%edi), %esp
+
+- /* jump to identity mapped page */
+- movl %edi, %eax
+- addl $(identity_mapped - relocate_kernel), %eax
+- pushl %eax
+- ret
++ /* load new code segment and jump to identity mapped page */
++ movl %edi, %esi
++ xorl %eax, %eax
++ pushl %eax
++ pushl %esi
++ pushl %eax
++ movl $(gdt_cs - gdt), %eax
++ pushl %eax
++ movl %edi, %eax
++ addl $(identity_mapped - relocate_kernel),%eax
++ pushl %eax
++ iretl
+
+ identity_mapped:
+ /* store the start address on the stack */
+@@ -250,3 +281,20 @@ identity_mapped:
+ xorl %edi, %edi
+ xorl %ebp, %ebp
+ ret
++
++ .align 16
++gdt:
++ .quad 0x0000000000000000 /* NULL descriptor */
++gdt_cs:
++ .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */
++gdt_ds:
++ .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */
++gdt_end:
++
++gdt_48:
++ .word gdt_end - gdt - 1 /* limit */
++ .long 0 /* base - filled in by code above */
++
++idt_48:
++ .word 0 /* limit */
++ .long 0 /* base */
--- /dev/null
+diff -pruN ../orig-linux-2.6.17/arch/x86_64/kernel/machine_kexec.c ./arch/x86_64/kernel/machine_kexec.c
+--- ../orig-linux-2.6.17/arch/x86_64/kernel/machine_kexec.c 2007-01-08 15:13:01.000000000 +0000
++++ ./arch/x86_64/kernel/machine_kexec.c 2007-01-08 15:13:18.000000000 +0000
+@@ -112,47 +112,6 @@ static int init_pgtable(struct kimage *i
+ return init_level4_page(image, level4p, 0, end_pfn << PAGE_SHIFT);
+ }
+
+-static void set_idt(void *newidt, u16 limit)
+-{
+- struct desc_ptr curidt;
+-
+- /* x86-64 supports unaliged loads & stores */
+- curidt.size = limit;
+- curidt.address = (unsigned long)newidt;
+-
+- __asm__ __volatile__ (
+- "lidtq %0\n"
+- : : "m" (curidt)
+- );
+-};
+-
+-
+-static void set_gdt(void *newgdt, u16 limit)
+-{
+- struct desc_ptr curgdt;
+-
+- /* x86-64 supports unaligned loads & stores */
+- curgdt.size = limit;
+- curgdt.address = (unsigned long)newgdt;
+-
+- __asm__ __volatile__ (
+- "lgdtq %0\n"
+- : : "m" (curgdt)
+- );
+-};
+-
+-static void load_segments(void)
+-{
+- __asm__ __volatile__ (
+- "\tmovl %0,%%ds\n"
+- "\tmovl %0,%%es\n"
+- "\tmovl %0,%%ss\n"
+- "\tmovl %0,%%fs\n"
+- "\tmovl %0,%%gs\n"
+- : : "a" (__KERNEL_DS) : "memory"
+- );
+-}
+-
+ int machine_kexec_prepare(struct kimage *image)
+ {
+ unsigned long start_pgtable;
+@@ -209,23 +168,6 @@ NORET_TYPE void machine_kexec(struct kim
+ page_list[PA_TABLE_PAGE] =
+ (unsigned long)__pa(page_address(image->control_code_page));
+
+- /* The segment registers are funny things, they have both a
+- * visible and an invisible part. Whenever the visible part is
+- * set to a specific selector, the invisible part is loaded
+- * with from a table in memory. At no other time is the
+- * descriptor table in memory accessed.
+- *
+- * I take advantage of this here by force loading the
+- * segments, before I zap the gdt with an invalid value.
+- */
+- load_segments();
+- /* The gdt & idt are now invalid.
+- * If you want to load them you must set up your own idt & gdt.
+- */
+- set_gdt(phys_to_virt(0),0);
+- set_idt(phys_to_virt(0),0);
+-
+- /* now call it */
+ relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
+ image->start);
+ }
+diff -pruN ../orig-linux-2.6.17/arch/x86_64/kernel/relocate_kernel.S ./arch/x86_64/kernel/relocate_kernel.S
+--- ../orig-linux-2.6.17/arch/x86_64/kernel/relocate_kernel.S 2007-01-08 15:13:01.000000000 +0000
++++ ./arch/x86_64/kernel/relocate_kernel.S 2007-01-08 15:13:18.000000000 +0000
+@@ -159,13 +159,39 @@ relocate_new_kernel:
+ movq PTR(PA_PGD)(%rsi), %r9
+ movq %r9, %cr3
+
++ /* setup idt */
++ movq %r8, %rax
++ addq $(idt_80 - relocate_kernel), %rax
++ lidtq (%rax)
++
++ /* setup gdt */
++ movq %r8, %rax
++ addq $(gdt - relocate_kernel), %rax
++ movq %r8, %r9
++ addq $((gdt_80 - relocate_kernel) + 2), %r9
++ movq %rax, (%r9)
++
++ movq %r8, %rax
++ addq $(gdt_80 - relocate_kernel), %rax
++ lgdtq (%rax)
++
++ /* setup data segment registers */
++ xorl %eax, %eax
++ movl %eax, %ds
++ movl %eax, %es
++ movl %eax, %fs
++ movl %eax, %gs
++ movl %eax, %ss
++
+ /* setup a new stack at the end of the physical control page */
+ lea 4096(%r8), %rsp
+
+- /* jump to identity mapped page */
+- addq $(identity_mapped - relocate_kernel), %r8
+- pushq %r8
+- ret
++ /* load new code segment and jump to identity mapped page */
++ movq %r8, %rax
++ addq $(identity_mapped - relocate_kernel), %rax
++ pushq $(gdt_cs - gdt)
++ pushq %rax
++ lretq
+
+ identity_mapped:
+ /* store the start address on the stack */
+@@ -272,5 +298,19 @@ identity_mapped:
+ xorq %r13, %r13
+ xorq %r14, %r14
+ xorq %r15, %r15
+-
+ ret
++
++ .align 16
++gdt:
++ .quad 0x0000000000000000 /* NULL descriptor */
++gdt_cs:
++ .quad 0x00af9a000000ffff
++gdt_end:
++
++gdt_80:
++ .word gdt_end - gdt - 1 /* limit */
++ .quad 0 /* base - filled in by code above */
++
++idt_80:
++ .word 0 /* limit */
++ .quad 0 /* base */
--- /dev/null
+diff -pruN ../orig-linux-2.6.17/net/ipv4/netfilter/ip_nat_proto_tcp.c ./net/ipv4/netfilter/ip_nat_proto_tcp.c
+--- ../orig-linux-2.6.17/net/ipv4/netfilter/ip_nat_proto_tcp.c 2006-06-18 02:49:35.000000000 +0100
++++ ./net/ipv4/netfilter/ip_nat_proto_tcp.c 2007-01-08 15:16:15.000000000 +0000
+@@ -129,7 +129,12 @@ tcp_manip_pkt(struct sk_buff **pskb,
+ if (hdrsize < sizeof(*hdr))
+ return 1;
+
+- hdr->check = ip_nat_cheat_check(~oldip, newip,
++#ifdef CONFIG_XEN
++ if ((*pskb)->proto_csum_blank)
++ hdr->check = ip_nat_cheat_check(oldip, ~newip, hdr->check);
++ else
++#endif
++ hdr->check = ip_nat_cheat_check(~oldip, newip,
+ ip_nat_cheat_check(oldport ^ 0xFFFF,
+ newport,
+ hdr->check));
+diff -pruN ../orig-linux-2.6.17/net/ipv4/netfilter/ip_nat_proto_udp.c ./net/ipv4/netfilter/ip_nat_proto_udp.c
+--- ../orig-linux-2.6.17/net/ipv4/netfilter/ip_nat_proto_udp.c 2006-06-18 02:49:35.000000000 +0100
++++ ./net/ipv4/netfilter/ip_nat_proto_udp.c 2007-01-08 15:16:15.000000000 +0000
+@@ -113,11 +113,17 @@ udp_manip_pkt(struct sk_buff **pskb,
+ newport = tuple->dst.u.udp.port;
+ portptr = &hdr->dest;
+ }
+- if (hdr->check) /* 0 is a special case meaning no checksum */
+- hdr->check = ip_nat_cheat_check(~oldip, newip,
++ if (hdr->check) { /* 0 is a special case meaning no checksum */
++#ifdef CONFIG_XEN
++ if ((*pskb)->proto_csum_blank)
++ hdr->check = ip_nat_cheat_check(oldip, ~newip, hdr->check);
++ else
++#endif
++ hdr->check = ip_nat_cheat_check(~oldip, newip,
+ ip_nat_cheat_check(*portptr ^ 0xFFFF,
+ newport,
+ hdr->check));
++ }
+ *portptr = newport;
+ return 1;
+ }
+diff -pruN ../orig-linux-2.6.17/net/ipv4/xfrm4_output.c ./net/ipv4/xfrm4_output.c
+--- ../orig-linux-2.6.17/net/ipv4/xfrm4_output.c 2006-06-18 02:49:35.000000000 +0100
++++ ./net/ipv4/xfrm4_output.c 2007-01-08 15:16:15.000000000 +0000
+@@ -17,6 +17,8 @@
+ #include <net/xfrm.h>
+ #include <net/icmp.h>
+
++extern int skb_checksum_setup(struct sk_buff *skb);
++
+ /* Add encapsulation header.
+ *
+ * In transport mode, the IP header will be moved forward to make space
+@@ -103,6 +105,10 @@ static int xfrm4_output_one(struct sk_bu
+ struct xfrm_state *x = dst->xfrm;
+ int err;
+
++ err = skb_checksum_setup(skb);
++ if (err)
++ goto error_nolock;
++
+ if (skb->ip_summed == CHECKSUM_HW) {
+ err = skb_checksum_help(skb, 0);
+ if (err)
--- /dev/null
+Index: tmp-linux-2.6.17/Documentation/networking/netdevices.txt
+===================================================================
+--- tmp-linux-2.6.17.orig/Documentation/networking/netdevices.txt 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/Documentation/networking/netdevices.txt 2007-01-08 16:48:05.000000000 +0000
+@@ -42,9 +42,9 @@
+ Context: nominally process, but don't sleep inside an rwlock
+
+ dev->hard_start_xmit:
+- Synchronization: dev->xmit_lock spinlock.
++ Synchronization: netif_tx_lock spinlock.
+ When the driver sets NETIF_F_LLTX in dev->features this will be
+- called without holding xmit_lock. In this case the driver
++ called without holding netif_tx_lock. In this case the driver
+ has to lock by itself when needed. It is recommended to use a try lock
+ for this and return -1 when the spin lock fails.
+ The locking there should also properly protect against
+@@ -62,12 +62,12 @@
+ Only valid when NETIF_F_LLTX is set.
+
+ dev->tx_timeout:
+- Synchronization: dev->xmit_lock spinlock.
++ Synchronization: netif_tx_lock spinlock.
+ Context: BHs disabled
+ Notes: netif_queue_stopped() is guaranteed true
+
+ dev->set_multicast_list:
+- Synchronization: dev->xmit_lock spinlock.
++ Synchronization: netif_tx_lock spinlock.
+ Context: BHs disabled
+
+ dev->poll:
+Index: tmp-linux-2.6.17/drivers/block/aoe/aoenet.c
+===================================================================
+--- tmp-linux-2.6.17.orig/drivers/block/aoe/aoenet.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/drivers/block/aoe/aoenet.c 2007-01-08 16:48:05.000000000 +0000
+@@ -116,8 +116,7 @@
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (skb == NULL)
+ return 0;
+- if (skb_is_nonlinear(skb))
+- if (skb_linearize(skb, GFP_ATOMIC) < 0)
++ if (skb_linearize(skb) < 0)
+ goto exit;
+ if (!is_aoe_netif(ifp))
+ goto exit;
+Index: tmp-linux-2.6.17/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+===================================================================
+--- tmp-linux-2.6.17.orig/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2007-01-08 16:48:05.000000000 +0000
+@@ -821,7 +821,8 @@
+
+ ipoib_mcast_stop_thread(dev, 0);
+
+- spin_lock_irqsave(&dev->xmit_lock, flags);
++ local_irq_save(flags);
++ netif_tx_lock(dev);
+ spin_lock(&priv->lock);
+
+ /*
+@@ -896,7 +897,8 @@
+ }
+
+ spin_unlock(&priv->lock);
+- spin_unlock_irqrestore(&dev->xmit_lock, flags);
++ netif_tx_unlock(dev);
++ local_irq_restore(flags);
+
+ /* We have to cancel outside of the spinlock */
+ list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
+Index: tmp-linux-2.6.17/drivers/media/dvb/dvb-core/dvb_net.c
+===================================================================
+--- tmp-linux-2.6.17.orig/drivers/media/dvb/dvb-core/dvb_net.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/drivers/media/dvb/dvb-core/dvb_net.c 2007-01-08 16:48:05.000000000 +0000
+@@ -1052,7 +1052,7 @@
+
+ dvb_net_feed_stop(dev);
+ priv->rx_mode = RX_MODE_UNI;
+- spin_lock_bh(&dev->xmit_lock);
++ netif_tx_lock_bh(dev);
+
+ if (dev->flags & IFF_PROMISC) {
+ dprintk("%s: promiscuous mode\n", dev->name);
+@@ -1077,7 +1077,7 @@
+ }
+ }
+
+- spin_unlock_bh(&dev->xmit_lock);
++ netif_tx_unlock_bh(dev);
+ dvb_net_feed_start(dev);
+ }
+
+Index: tmp-linux-2.6.17/drivers/net/8139cp.c
+===================================================================
+--- tmp-linux-2.6.17.orig/drivers/net/8139cp.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/drivers/net/8139cp.c 2007-01-08 16:48:05.000000000 +0000
+@@ -792,7 +792,7 @@
+ entry = cp->tx_head;
+ eor = (entry == (CP_TX_RING_SIZE - 1)) ? RingEnd : 0;
+ if (dev->features & NETIF_F_TSO)
+- mss = skb_shinfo(skb)->tso_size;
++ mss = skb_shinfo(skb)->gso_size;
+
+ if (skb_shinfo(skb)->nr_frags == 0) {
+ struct cp_desc *txd = &cp->tx_ring[entry];
+Index: tmp-linux-2.6.17/drivers/net/bnx2.c
+===================================================================
+--- tmp-linux-2.6.17.orig/drivers/net/bnx2.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/drivers/net/bnx2.c 2007-01-08 16:48:05.000000000 +0000
+@@ -1638,7 +1638,7 @@
+ skb = tx_buf->skb;
+ #ifdef BCM_TSO
+ /* partial BD completions possible with TSO packets */
+- if (skb_shinfo(skb)->tso_size) {
++ if (skb_shinfo(skb)->gso_size) {
+ u16 last_idx, last_ring_idx;
+
+ last_idx = sw_cons +
+@@ -2009,7 +2009,7 @@
+ return 1;
+ }
+
+-/* Called with rtnl_lock from vlan functions and also dev->xmit_lock
++/* Called with rtnl_lock from vlan functions and also netif_tx_lock
+ * from set_multicast.
+ */
+ static void
+@@ -4252,7 +4252,7 @@
+ }
+ #endif
+
+-/* Called with dev->xmit_lock.
++/* Called with netif_tx_lock.
+ * hard_start_xmit is pseudo-lockless - a lock is only required when
+ * the tx queue is full. This way, we get the benefit of lockless
+ * operations most of the time without the complexities to handle
+@@ -4290,7 +4290,7 @@
+ (TX_BD_FLAGS_VLAN_TAG | (vlan_tx_tag_get(skb) << 16));
+ }
+ #ifdef BCM_TSO
+- if ((mss = skb_shinfo(skb)->tso_size) &&
++ if ((mss = skb_shinfo(skb)->gso_size) &&
+ (skb->len > (bp->dev->mtu + ETH_HLEN))) {
+ u32 tcp_opt_len, ip_tcp_len;
+
+Index: tmp-linux-2.6.17/drivers/net/bonding/bond_main.c
+===================================================================
+--- tmp-linux-2.6.17.orig/drivers/net/bonding/bond_main.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/drivers/net/bonding/bond_main.c 2007-01-08 16:48:05.000000000 +0000
+@@ -1199,8 +1199,7 @@
+ }
+
+ #define BOND_INTERSECT_FEATURES \
+- (NETIF_F_SG|NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM|\
+- NETIF_F_TSO|NETIF_F_UFO)
++ (NETIF_F_SG | NETIF_F_ALL_CSUM | NETIF_F_TSO | NETIF_F_UFO)
+
+ /*
+ * Compute the common dev->feature set available to all slaves. Some
+@@ -1218,9 +1217,7 @@
+ features &= (slave->dev->features & BOND_INTERSECT_FEATURES);
+
+ if ((features & NETIF_F_SG) &&
+- !(features & (NETIF_F_IP_CSUM |
+- NETIF_F_NO_CSUM |
+- NETIF_F_HW_CSUM)))
++ !(features & NETIF_F_ALL_CSUM))
+ features &= ~NETIF_F_SG;
+
+ /*
+@@ -4191,7 +4188,7 @@
+ */
+ bond_dev->features |= NETIF_F_VLAN_CHALLENGED;
+
+- /* don't acquire bond device's xmit_lock when
++ /* don't acquire bond device's netif_tx_lock when
+ * transmitting */
+ bond_dev->features |= NETIF_F_LLTX;
+
+Index: tmp-linux-2.6.17/drivers/net/chelsio/sge.c
+===================================================================
+--- tmp-linux-2.6.17.orig/drivers/net/chelsio/sge.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/drivers/net/chelsio/sge.c 2007-01-08 16:48:05.000000000 +0000
+@@ -1418,7 +1418,7 @@
+ struct cpl_tx_pkt *cpl;
+
+ #ifdef NETIF_F_TSO
+- if (skb_shinfo(skb)->tso_size) {
++ if (skb_shinfo(skb)->gso_size) {
+ int eth_type;
+ struct cpl_tx_pkt_lso *hdr;
+
+@@ -1433,7 +1433,7 @@
+ hdr->ip_hdr_words = skb->nh.iph->ihl;
+ hdr->tcp_hdr_words = skb->h.th->doff;
+ hdr->eth_type_mss = htons(MK_ETH_TYPE_MSS(eth_type,
+- skb_shinfo(skb)->tso_size));
++ skb_shinfo(skb)->gso_size));
+ hdr->len = htonl(skb->len - sizeof(*hdr));
+ cpl = (struct cpl_tx_pkt *)hdr;
+ sge->stats.tx_lso_pkts++;
+Index: tmp-linux-2.6.17/drivers/net/e1000/e1000_main.c
+===================================================================
+--- tmp-linux-2.6.17.orig/drivers/net/e1000/e1000_main.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/drivers/net/e1000/e1000_main.c 2007-01-08 16:48:05.000000000 +0000
+@@ -2413,7 +2413,7 @@
+ uint8_t ipcss, ipcso, tucss, tucso, hdr_len;
+ int err;
+
+- if (skb_shinfo(skb)->tso_size) {
++ if (skb_shinfo(skb)->gso_size) {
+ if (skb_header_cloned(skb)) {
+ err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+ if (err)
+@@ -2421,7 +2421,7 @@
+ }
+
+ hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
+- mss = skb_shinfo(skb)->tso_size;
++ mss = skb_shinfo(skb)->gso_size;
+ if (skb->protocol == ntohs(ETH_P_IP)) {
+ skb->nh.iph->tot_len = 0;
+ skb->nh.iph->check = 0;
+@@ -2538,7 +2538,7 @@
+ * tso gets written back prematurely before the data is fully
+ * DMA'd to the controller */
+ if (!skb->data_len && tx_ring->last_tx_tso &&
+- !skb_shinfo(skb)->tso_size) {
++ !skb_shinfo(skb)->gso_size) {
+ tx_ring->last_tx_tso = 0;
+ size -= 4;
+ }
+@@ -2776,7 +2776,7 @@
+ }
+
+ #ifdef NETIF_F_TSO
+- mss = skb_shinfo(skb)->tso_size;
++ mss = skb_shinfo(skb)->gso_size;
+ /* The controller does a simple calculation to
+ * make sure there is enough room in the FIFO before
+ * initiating the DMA for each buffer. The calc is:
+@@ -2826,7 +2826,7 @@
+ #ifdef NETIF_F_TSO
+ /* Controller Erratum workaround */
+ if (!skb->data_len && tx_ring->last_tx_tso &&
+- !skb_shinfo(skb)->tso_size)
++ !skb_shinfo(skb)->gso_size)
+ count++;
+ #endif
+
+Index: tmp-linux-2.6.17/drivers/net/forcedeth.c
+===================================================================
+--- tmp-linux-2.6.17.orig/drivers/net/forcedeth.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/drivers/net/forcedeth.c 2007-01-08 16:48:05.000000000 +0000
+@@ -533,9 +533,9 @@
+ * critical parts:
+ * - rx is (pseudo-) lockless: it relies on the single-threading provided
+ * by the arch code for interrupts.
+- * - tx setup is lockless: it relies on dev->xmit_lock. Actual submission
++ * - tx setup is lockless: it relies on netif_tx_lock. Actual submission
+ * needs dev->priv->lock :-(
+- * - set_multicast_list: preparation lockless, relies on dev->xmit_lock.
++ * - set_multicast_list: preparation lockless, relies on netif_tx_lock.
+ */
+
+ /* in dev: base, irq */
+@@ -1213,7 +1213,7 @@
+
+ /*
+ * nv_start_xmit: dev->hard_start_xmit function
+- * Called with dev->xmit_lock held.
++ * Called with netif_tx_lock held.
+ */
+ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ {
+@@ -1303,8 +1303,8 @@
+ np->tx_skbuff[nr] = skb;
+
+ #ifdef NETIF_F_TSO
+- if (skb_shinfo(skb)->tso_size)
+- tx_flags_extra = NV_TX2_TSO | (skb_shinfo(skb)->tso_size << NV_TX2_TSO_SHIFT);
++ if (skb_shinfo(skb)->gso_size)
++ tx_flags_extra = NV_TX2_TSO | (skb_shinfo(skb)->gso_size << NV_TX2_TSO_SHIFT);
+ else
+ #endif
+ tx_flags_extra = (skb->ip_summed == CHECKSUM_HW ? (NV_TX2_CHECKSUM_L3|NV_TX2_CHECKSUM_L4) : 0);
+@@ -1407,7 +1407,7 @@
+
+ /*
+ * nv_tx_timeout: dev->tx_timeout function
+- * Called with dev->xmit_lock held.
++ * Called with netif_tx_lock held.
+ */
+ static void nv_tx_timeout(struct net_device *dev)
+ {
+@@ -1737,7 +1737,7 @@
+ * Changing the MTU is a rare event, it shouldn't matter.
+ */
+ nv_disable_irq(dev);
+- spin_lock_bh(&dev->xmit_lock);
++ netif_tx_lock_bh(dev);
+ spin_lock(&np->lock);
+ /* stop engines */
+ nv_stop_rx(dev);
+@@ -1768,7 +1768,7 @@
+ nv_start_rx(dev);
+ nv_start_tx(dev);
+ spin_unlock(&np->lock);
+- spin_unlock_bh(&dev->xmit_lock);
++ netif_tx_unlock_bh(dev);
+ nv_enable_irq(dev);
+ }
+ return 0;
+@@ -1803,7 +1803,7 @@
+ memcpy(dev->dev_addr, macaddr->sa_data, ETH_ALEN);
+
+ if (netif_running(dev)) {
+- spin_lock_bh(&dev->xmit_lock);
++ netif_tx_lock_bh(dev);
+ spin_lock_irq(&np->lock);
+
+ /* stop rx engine */
+@@ -1815,7 +1815,7 @@
+ /* restart rx engine */
+ nv_start_rx(dev);
+ spin_unlock_irq(&np->lock);
+- spin_unlock_bh(&dev->xmit_lock);
++ netif_tx_unlock_bh(dev);
+ } else {
+ nv_copy_mac_to_hw(dev);
+ }
+@@ -1824,7 +1824,7 @@
+
+ /*
+ * nv_set_multicast: dev->set_multicast function
+- * Called with dev->xmit_lock held.
++ * Called with netif_tx_lock held.
+ */
+ static void nv_set_multicast(struct net_device *dev)
+ {
+Index: tmp-linux-2.6.17/drivers/net/hamradio/6pack.c
+===================================================================
+--- tmp-linux-2.6.17.orig/drivers/net/hamradio/6pack.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/drivers/net/hamradio/6pack.c 2007-01-08 16:48:05.000000000 +0000
+@@ -308,9 +308,9 @@
+ {
+ struct sockaddr_ax25 *sa = addr;
+
+- spin_lock_irq(&dev->xmit_lock);
++ netif_tx_lock_bh(dev);
+ memcpy(dev->dev_addr, &sa->sax25_call, AX25_ADDR_LEN);
+- spin_unlock_irq(&dev->xmit_lock);
++ netif_tx_unlock_bh(dev);
+
+ return 0;
+ }
+@@ -767,9 +767,9 @@
+ break;
+ }
+
+- spin_lock_irq(&dev->xmit_lock);
++ netif_tx_lock_bh(dev);
+ memcpy(dev->dev_addr, &addr, AX25_ADDR_LEN);
+- spin_unlock_irq(&dev->xmit_lock);
++ netif_tx_unlock_bh(dev);
+
+ err = 0;
+ break;
+Index: tmp-linux-2.6.17/drivers/net/hamradio/mkiss.c
+===================================================================
+--- tmp-linux-2.6.17.orig/drivers/net/hamradio/mkiss.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/drivers/net/hamradio/mkiss.c 2007-01-08 16:48:05.000000000 +0000
+@@ -357,9 +357,9 @@
+ {
+ struct sockaddr_ax25 *sa = addr;
+
+- spin_lock_irq(&dev->xmit_lock);
++ netif_tx_lock_bh(dev);
+ memcpy(dev->dev_addr, &sa->sax25_call, AX25_ADDR_LEN);
+- spin_unlock_irq(&dev->xmit_lock);
++ netif_tx_unlock_bh(dev);
+
+ return 0;
+ }
+@@ -886,9 +886,9 @@
+ break;
+ }
+
+- spin_lock_irq(&dev->xmit_lock);
++ netif_tx_lock_bh(dev);
+ memcpy(dev->dev_addr, addr, AX25_ADDR_LEN);
+- spin_unlock_irq(&dev->xmit_lock);
++ netif_tx_unlock_bh(dev);
+
+ err = 0;
+ break;
+Index: tmp-linux-2.6.17/drivers/net/ifb.c
+===================================================================
+--- tmp-linux-2.6.17.orig/drivers/net/ifb.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/drivers/net/ifb.c 2007-01-08 16:48:05.000000000 +0000
+@@ -76,13 +76,13 @@
+ dp->st_task_enter++;
+ if ((skb = skb_peek(&dp->tq)) == NULL) {
+ dp->st_txq_refl_try++;
+- if (spin_trylock(&_dev->xmit_lock)) {
++ if (netif_tx_trylock(_dev)) {
+ dp->st_rxq_enter++;
+ while ((skb = skb_dequeue(&dp->rq)) != NULL) {
+ skb_queue_tail(&dp->tq, skb);
+ dp->st_rx2tx_tran++;
+ }
+- spin_unlock(&_dev->xmit_lock);
++ netif_tx_unlock(_dev);
+ } else {
+ /* reschedule */
+ dp->st_rxq_notenter++;
+@@ -110,7 +110,7 @@
+ }
+ }
+
+- if (spin_trylock(&_dev->xmit_lock)) {
++ if (netif_tx_trylock(_dev)) {
+ dp->st_rxq_check++;
+ if ((skb = skb_peek(&dp->rq)) == NULL) {
+ dp->tasklet_pending = 0;
+@@ -118,10 +118,10 @@
+ netif_wake_queue(_dev);
+ } else {
+ dp->st_rxq_rsch++;
+- spin_unlock(&_dev->xmit_lock);
++ netif_tx_unlock(_dev);
+ goto resched;
+ }
+- spin_unlock(&_dev->xmit_lock);
++ netif_tx_unlock(_dev);
+ } else {
+ resched:
+ dp->tasklet_pending = 1;
+Index: tmp-linux-2.6.17/drivers/net/irda/vlsi_ir.c
+===================================================================
+--- tmp-linux-2.6.17.orig/drivers/net/irda/vlsi_ir.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/drivers/net/irda/vlsi_ir.c 2007-01-08 16:48:05.000000000 +0000
+@@ -959,7 +959,7 @@
+ || (now.tv_sec==ready.tv_sec && now.tv_usec>=ready.tv_usec))
+ break;
+ udelay(100);
+- /* must not sleep here - we are called under xmit_lock! */
++ /* must not sleep here - called under netif_tx_lock! */
+ }
+ }
+
+Index: tmp-linux-2.6.17/drivers/net/ixgb/ixgb_main.c
+===================================================================
+--- tmp-linux-2.6.17.orig/drivers/net/ixgb/ixgb_main.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/drivers/net/ixgb/ixgb_main.c 2007-01-08 16:48:05.000000000 +0000
+@@ -1168,7 +1168,7 @@
+ uint16_t ipcse, tucse, mss;
+ int err;
+
+- if(likely(skb_shinfo(skb)->tso_size)) {
++ if(likely(skb_shinfo(skb)->gso_size)) {
+ if (skb_header_cloned(skb)) {
+ err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+ if (err)
+@@ -1176,7 +1176,7 @@
+ }
+
+ hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
+- mss = skb_shinfo(skb)->tso_size;
++ mss = skb_shinfo(skb)->gso_size;
+ skb->nh.iph->tot_len = 0;
+ skb->nh.iph->check = 0;
+ skb->h.th->check = ~csum_tcpudp_magic(skb->nh.iph->saddr,
+Index: tmp-linux-2.6.17/drivers/net/loopback.c
+===================================================================
+--- tmp-linux-2.6.17.orig/drivers/net/loopback.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/drivers/net/loopback.c 2007-01-08 16:48:05.000000000 +0000
+@@ -74,7 +74,7 @@
+ struct iphdr *iph = skb->nh.iph;
+ struct tcphdr *th = (struct tcphdr*)(skb->nh.raw + (iph->ihl * 4));
+ unsigned int doffset = (iph->ihl + th->doff) * 4;
+- unsigned int mtu = skb_shinfo(skb)->tso_size + doffset;
++ unsigned int mtu = skb_shinfo(skb)->gso_size + doffset;
+ unsigned int offset = 0;
+ u32 seq = ntohl(th->seq);
+ u16 id = ntohs(iph->id);
+@@ -139,7 +139,7 @@
+ #endif
+
+ #ifdef LOOPBACK_TSO
+- if (skb_shinfo(skb)->tso_size) {
++ if (skb_shinfo(skb)->gso_size) {
+ BUG_ON(skb->protocol != htons(ETH_P_IP));
+ BUG_ON(skb->nh.iph->protocol != IPPROTO_TCP);
+
+Index: tmp-linux-2.6.17/drivers/net/mv643xx_eth.c
+===================================================================
+--- tmp-linux-2.6.17.orig/drivers/net/mv643xx_eth.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/drivers/net/mv643xx_eth.c 2007-01-08 16:48:05.000000000 +0000
+@@ -1200,7 +1200,7 @@
+ }
+
+ if (has_tiny_unaligned_frags(skb)) {
+- if ((skb_linearize(skb, GFP_ATOMIC) != 0)) {
++ if (__skb_linearize(skb)) {
+ stats->tx_dropped++;
+ printk(KERN_DEBUG "%s: failed to linearize tiny "
+ "unaligned fragment\n", dev->name);
+Index: tmp-linux-2.6.17/drivers/net/natsemi.c
+===================================================================
+--- tmp-linux-2.6.17.orig/drivers/net/natsemi.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/drivers/net/natsemi.c 2007-01-08 16:48:05.000000000 +0000
+@@ -318,12 +318,12 @@
+ The rx process only runs in the interrupt handler. Access from outside
+ the interrupt handler is only permitted after disable_irq().
+
+-The rx process usually runs under the dev->xmit_lock. If np->intr_tx_reap
++The rx process usually runs under the netif_tx_lock. If np->intr_tx_reap
+ is set, then access is permitted under spin_lock_irq(&np->lock).
+
+ Thus configuration functions that want to access everything must call
+ disable_irq(dev->irq);
+- spin_lock_bh(dev->xmit_lock);
++ netif_tx_lock_bh(dev);
+ spin_lock_irq(&np->lock);
+
+ IV. Notes
+Index: tmp-linux-2.6.17/drivers/net/r8169.c
+===================================================================
+--- tmp-linux-2.6.17.orig/drivers/net/r8169.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/drivers/net/r8169.c 2007-01-08 16:48:05.000000000 +0000
+@@ -2171,7 +2171,7 @@
+ static inline u32 rtl8169_tso_csum(struct sk_buff *skb, struct net_device *dev)
+ {
+ if (dev->features & NETIF_F_TSO) {
+- u32 mss = skb_shinfo(skb)->tso_size;
++ u32 mss = skb_shinfo(skb)->gso_size;
+
+ if (mss)
+ return LargeSend | ((mss & MSSMask) << MSSShift);
+Index: tmp-linux-2.6.17/drivers/net/s2io.c
+===================================================================
+--- tmp-linux-2.6.17.orig/drivers/net/s2io.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/drivers/net/s2io.c 2007-01-08 16:48:05.000000000 +0000
+@@ -3564,8 +3564,8 @@
+ txdp->Control_1 = 0;
+ txdp->Control_2 = 0;
+ #ifdef NETIF_F_TSO
+- mss = skb_shinfo(skb)->tso_size;
+- if (mss) {
++ mss = skb_shinfo(skb)->gso_size;
++ if (skb_shinfo(skb)->gso_type == SKB_GSO_TCPV4) {
+ txdp->Control_1 |= TXD_TCP_LSO_EN;
+ txdp->Control_1 |= TXD_TCP_LSO_MSS(mss);
+ }
+@@ -3585,10 +3585,10 @@
+ }
+
+ frg_len = skb->len - skb->data_len;
+- if (skb_shinfo(skb)->ufo_size) {
++ if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4) {
+ int ufo_size;
+
+- ufo_size = skb_shinfo(skb)->ufo_size;
++ ufo_size = skb_shinfo(skb)->gso_size;
+ ufo_size &= ~7;
+ txdp->Control_1 |= TXD_UFO_EN;
+ txdp->Control_1 |= TXD_UFO_MSS(ufo_size);
+@@ -3614,7 +3614,7 @@
+ txdp->Host_Control = (unsigned long) skb;
+ txdp->Control_1 |= TXD_BUFFER0_SIZE(frg_len);
+
+- if (skb_shinfo(skb)->ufo_size)
++ if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4)
+ txdp->Control_1 |= TXD_UFO_EN;
+
+ frg_cnt = skb_shinfo(skb)->nr_frags;
+@@ -3629,12 +3629,12 @@
+ (sp->pdev, frag->page, frag->page_offset,
+ frag->size, PCI_DMA_TODEVICE);
+ txdp->Control_1 = TXD_BUFFER0_SIZE(frag->size);
+- if (skb_shinfo(skb)->ufo_size)
++ if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4)
+ txdp->Control_1 |= TXD_UFO_EN;
+ }
+ txdp->Control_1 |= TXD_GATHER_CODE_LAST;
+
+- if (skb_shinfo(skb)->ufo_size)
++ if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4)
+ frg_cnt++; /* as Txd0 was used for inband header */
+
+ tx_fifo = mac_control->tx_FIFO_start[queue];
+@@ -3648,7 +3648,7 @@
+ if (mss)
+ val64 |= TX_FIFO_SPECIAL_FUNC;
+ #endif
+- if (skb_shinfo(skb)->ufo_size)
++ if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4)
+ val64 |= TX_FIFO_SPECIAL_FUNC;
+ writeq(val64, &tx_fifo->List_Control);
+
+Index: tmp-linux-2.6.17/drivers/net/sky2.c
+===================================================================
+--- tmp-linux-2.6.17.orig/drivers/net/sky2.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/drivers/net/sky2.c 2007-01-08 16:48:05.000000000 +0000
+@@ -1160,7 +1160,7 @@
+ count = sizeof(dma_addr_t) / sizeof(u32);
+ count += skb_shinfo(skb)->nr_frags * count;
+
+- if (skb_shinfo(skb)->tso_size)
++ if (skb_shinfo(skb)->gso_size)
+ ++count;
+
+ if (skb->ip_summed == CHECKSUM_HW)
+@@ -1232,7 +1232,7 @@
+ }
+
+ /* Check for TCP Segmentation Offload */
+- mss = skb_shinfo(skb)->tso_size;
++ mss = skb_shinfo(skb)->gso_size;
+ if (mss != 0) {
+ /* just drop the packet if non-linear expansion fails */
+ if (skb_header_cloned(skb) &&
+Index: tmp-linux-2.6.17/drivers/net/tg3.c
+===================================================================
+--- tmp-linux-2.6.17.orig/drivers/net/tg3.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/drivers/net/tg3.c 2007-01-08 16:48:53.000000000 +0000
+@@ -3743,7 +3743,7 @@
+ #if TG3_TSO_SUPPORT != 0
+ mss = 0;
+ if (skb->len > (tp->dev->mtu + ETH_HLEN) &&
+- (mss = skb_shinfo(skb)->tso_size) != 0) {
++ (mss = skb_shinfo(skb)->gso_size) != 0) {
+ int tcp_opt_len, ip_tcp_len;
+
+ if (skb_header_cloned(skb) &&
+@@ -3871,7 +3871,7 @@
+ #if TG3_TSO_SUPPORT != 0
+ mss = 0;
+ if (skb->len > (tp->dev->mtu + ETH_HLEN) &&
+- (mss = skb_shinfo(skb)->tso_size) != 0) {
++ (mss = skb_shinfo(skb)->gso_size) != 0) {
+ int tcp_opt_len, ip_tcp_len;
+
+ if (skb_header_cloned(skb) &&
+Index: tmp-linux-2.6.17/drivers/net/tulip/winbond-840.c
+===================================================================
+--- tmp-linux-2.6.17.orig/drivers/net/tulip/winbond-840.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/drivers/net/tulip/winbond-840.c 2007-01-08 16:48:05.000000000 +0000
+@@ -1605,11 +1605,11 @@
+ * - get_stats:
+ * spin_lock_irq(np->lock), doesn't touch hw if not present
+ * - hard_start_xmit:
+- * netif_stop_queue + spin_unlock_wait(&dev->xmit_lock);
++ * synchronize_irq + netif_tx_disable;
+ * - tx_timeout:
+- * netif_device_detach + spin_unlock_wait(&dev->xmit_lock);
++ * netif_device_detach + netif_tx_disable;
+ * - set_multicast_list
+- * netif_device_detach + spin_unlock_wait(&dev->xmit_lock);
++ * netif_device_detach + netif_tx_disable;
+ * - interrupt handler
+ * doesn't touch hw if not present, synchronize_irq waits for
+ * running instances of the interrupt handler.
+@@ -1635,11 +1635,10 @@
+ netif_device_detach(dev);
+ update_csr6(dev, 0);
+ iowrite32(0, ioaddr + IntrEnable);
+- netif_stop_queue(dev);
+ spin_unlock_irq(&np->lock);
+
+- spin_unlock_wait(&dev->xmit_lock);
+ synchronize_irq(dev->irq);
++ netif_tx_disable(dev);
+
+ np->stats.rx_missed_errors += ioread32(ioaddr + RxMissed) & 0xffff;
+
+Index: tmp-linux-2.6.17/drivers/net/typhoon.c
+===================================================================
+--- tmp-linux-2.6.17.orig/drivers/net/typhoon.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/drivers/net/typhoon.c 2007-01-08 16:48:05.000000000 +0000
+@@ -340,7 +340,7 @@
+ #endif
+
+ #if defined(NETIF_F_TSO)
+-#define skb_tso_size(x) (skb_shinfo(x)->tso_size)
++#define skb_tso_size(x) (skb_shinfo(x)->gso_size)
+ #define TSO_NUM_DESCRIPTORS 2
+ #define TSO_OFFLOAD_ON TYPHOON_OFFLOAD_TCP_SEGMENT
+ #else
+Index: tmp-linux-2.6.17/drivers/net/via-velocity.c
+===================================================================
+--- tmp-linux-2.6.17.orig/drivers/net/via-velocity.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/drivers/net/via-velocity.c 2007-01-08 16:48:05.000000000 +0000
+@@ -1899,6 +1899,13 @@
+
+ int pktlen = skb->len;
+
++#ifdef VELOCITY_ZERO_COPY_SUPPORT
++ if (skb_shinfo(skb)->nr_frags > 6 && __skb_linearize(skb)) {
++ kfree_skb(skb);
++ return 0;
++ }
++#endif
++
+ spin_lock_irqsave(&vptr->lock, flags);
+
+ index = vptr->td_curr[qnum];
+@@ -1914,8 +1921,6 @@
+ */
+ if (pktlen < ETH_ZLEN) {
+ /* Cannot occur until ZC support */
+- if(skb_linearize(skb, GFP_ATOMIC))
+- return 0;
+ pktlen = ETH_ZLEN;
+ memcpy(tdinfo->buf, skb->data, skb->len);
+ memset(tdinfo->buf + skb->len, 0, ETH_ZLEN - skb->len);
+@@ -1933,7 +1938,6 @@
+ int nfrags = skb_shinfo(skb)->nr_frags;
+ tdinfo->skb = skb;
+ if (nfrags > 6) {
+- skb_linearize(skb, GFP_ATOMIC);
+ memcpy(tdinfo->buf, skb->data, skb->len);
+ tdinfo->skb_dma[0] = tdinfo->buf_dma;
+ td_ptr->tdesc0.pktsize =
+Index: tmp-linux-2.6.17/drivers/net/wireless/orinoco.c
+===================================================================
+--- tmp-linux-2.6.17.orig/drivers/net/wireless/orinoco.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/drivers/net/wireless/orinoco.c 2007-01-08 16:48:05.000000000 +0000
+@@ -1833,7 +1833,9 @@
+ /* Set promiscuity / multicast*/
+ priv->promiscuous = 0;
+ priv->mc_count = 0;
+- __orinoco_set_multicast_list(dev); /* FIXME: what about the xmit_lock */
++
++ /* FIXME: what about netif_tx_lock */
++ __orinoco_set_multicast_list(dev);
+
+ return 0;
+ }
+Index: tmp-linux-2.6.17/drivers/s390/net/qeth_eddp.c
+===================================================================
+--- tmp-linux-2.6.17.orig/drivers/s390/net/qeth_eddp.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/drivers/s390/net/qeth_eddp.c 2007-01-08 16:48:05.000000000 +0000
+@@ -420,7 +420,7 @@
+ }
+ tcph = eddp->skb->h.th;
+ while (eddp->skb_offset < eddp->skb->len) {
+- data_len = min((int)skb_shinfo(eddp->skb)->tso_size,
++ data_len = min((int)skb_shinfo(eddp->skb)->gso_size,
+ (int)(eddp->skb->len - eddp->skb_offset));
+ /* prepare qdio hdr */
+ if (eddp->qh.hdr.l2.id == QETH_HEADER_TYPE_LAYER2){
+@@ -515,20 +515,20 @@
+
+ QETH_DBF_TEXT(trace, 5, "eddpcanp");
+ /* can we put multiple skbs in one page? */
+- skbs_per_page = PAGE_SIZE / (skb_shinfo(skb)->tso_size + hdr_len);
++ skbs_per_page = PAGE_SIZE / (skb_shinfo(skb)->gso_size + hdr_len);
+ if (skbs_per_page > 1){
+- ctx->num_pages = (skb_shinfo(skb)->tso_segs + 1) /
++ ctx->num_pages = (skb_shinfo(skb)->gso_segs + 1) /
+ skbs_per_page + 1;
+ ctx->elements_per_skb = 1;
+ } else {
+ /* no -> how many elements per skb? */
+- ctx->elements_per_skb = (skb_shinfo(skb)->tso_size + hdr_len +
++ ctx->elements_per_skb = (skb_shinfo(skb)->gso_size + hdr_len +
+ PAGE_SIZE) >> PAGE_SHIFT;
+ ctx->num_pages = ctx->elements_per_skb *
+- (skb_shinfo(skb)->tso_segs + 1);
++ (skb_shinfo(skb)->gso_segs + 1);
+ }
+ ctx->num_elements = ctx->elements_per_skb *
+- (skb_shinfo(skb)->tso_segs + 1);
++ (skb_shinfo(skb)->gso_segs + 1);
+ }
+
+ static inline struct qeth_eddp_context *
+Index: tmp-linux-2.6.17/drivers/s390/net/qeth_main.c
+===================================================================
+--- tmp-linux-2.6.17.orig/drivers/s390/net/qeth_main.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/drivers/s390/net/qeth_main.c 2007-01-08 16:48:05.000000000 +0000
+@@ -4417,7 +4417,7 @@
+ struct qeth_eddp_context *ctx = NULL;
+ int tx_bytes = skb->len;
+ unsigned short nr_frags = skb_shinfo(skb)->nr_frags;
+- unsigned short tso_size = skb_shinfo(skb)->tso_size;
++ unsigned short gso_size = skb_shinfo(skb)->gso_size;
+ int rc;
+
+ QETH_DBF_TEXT(trace, 6, "sendpkt");
+@@ -4453,7 +4453,7 @@
+ queue = card->qdio.out_qs
+ [qeth_get_priority_queue(card, skb, ipv, cast_type)];
+
+- if (skb_shinfo(skb)->tso_size)
++ if (skb_shinfo(skb)->gso_size)
+ large_send = card->options.large_send;
+
+ /*are we able to do TSO ? If so ,prepare and send it from here */
+@@ -4500,7 +4500,7 @@
+ card->stats.tx_packets++;
+ card->stats.tx_bytes += tx_bytes;
+ #ifdef CONFIG_QETH_PERF_STATS
+- if (tso_size &&
++ if (gso_size &&
+ !(large_send == QETH_LARGE_SEND_NO)) {
+ card->perf_stats.large_send_bytes += tx_bytes;
+ card->perf_stats.large_send_cnt++;
+Index: tmp-linux-2.6.17/drivers/s390/net/qeth_tso.h
+===================================================================
+--- tmp-linux-2.6.17.orig/drivers/s390/net/qeth_tso.h 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/drivers/s390/net/qeth_tso.h 2007-01-08 16:48:05.000000000 +0000
+@@ -51,7 +51,7 @@
+ hdr->ext.hdr_version = 1;
+ hdr->ext.hdr_len = 28;
+ /*insert non-fix values */
+- hdr->ext.mss = skb_shinfo(skb)->tso_size;
++ hdr->ext.mss = skb_shinfo(skb)->gso_size;
+ hdr->ext.dg_hdr_len = (__u16)(iph->ihl*4 + tcph->doff*4);
+ hdr->ext.payload_len = (__u16)(skb->len - hdr->ext.dg_hdr_len -
+ sizeof(struct qeth_hdr_tso));
+Index: tmp-linux-2.6.17/include/linux/ethtool.h
+===================================================================
+--- tmp-linux-2.6.17.orig/include/linux/ethtool.h 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/include/linux/ethtool.h 2007-01-08 16:48:05.000000000 +0000
+@@ -408,6 +408,8 @@
+ #define ETHTOOL_GPERMADDR 0x00000020 /* Get permanent hardware address */
+ #define ETHTOOL_GUFO 0x00000021 /* Get UFO enable (ethtool_value) */
+ #define ETHTOOL_SUFO 0x00000022 /* Set UFO enable (ethtool_value) */
++#define ETHTOOL_GGSO 0x00000023 /* Get GSO enable (ethtool_value) */
++#define ETHTOOL_SGSO 0x00000024 /* Set GSO enable (ethtool_value) */
+
+ /* compatibility with older code */
+ #define SPARC_ETH_GSET ETHTOOL_GSET
+Index: tmp-linux-2.6.17/include/linux/netdevice.h
+===================================================================
+--- tmp-linux-2.6.17.orig/include/linux/netdevice.h 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/include/linux/netdevice.h 2007-01-08 16:48:05.000000000 +0000
+@@ -232,6 +232,7 @@
+ __LINK_STATE_RX_SCHED,
+ __LINK_STATE_LINKWATCH_PENDING,
+ __LINK_STATE_DORMANT,
++ __LINK_STATE_QDISC_RUNNING,
+ };
+
+
+@@ -307,9 +308,17 @@
+ #define NETIF_F_HW_VLAN_RX 256 /* Receive VLAN hw acceleration */
+ #define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */
+ #define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */
+-#define NETIF_F_TSO 2048 /* Can offload TCP/IP segmentation */
++#define NETIF_F_GSO 2048 /* Enable software GSO. */
+ #define NETIF_F_LLTX 4096 /* LockLess TX */
+-#define NETIF_F_UFO 8192 /* Can offload UDP Large Send*/
++
++ /* Segmentation offload features */
++#define NETIF_F_GSO_SHIFT 16
++#define NETIF_F_TSO (SKB_GSO_TCPV4 << NETIF_F_GSO_SHIFT)
++#define NETIF_F_UFO (SKB_GSO_UDPV4 << NETIF_F_GSO_SHIFT)
++#define NETIF_F_GSO_ROBUST (SKB_GSO_DODGY << NETIF_F_GSO_SHIFT)
++
++#define NETIF_F_GEN_CSUM (NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)
++#define NETIF_F_ALL_CSUM (NETIF_F_IP_CSUM | NETIF_F_GEN_CSUM)
+
+ struct net_device *next_sched;
+
+@@ -398,6 +407,9 @@
+ struct list_head qdisc_list;
+ unsigned long tx_queue_len; /* Max frames per queue allowed */
+
++ /* Partially transmitted GSO packet. */
++ struct sk_buff *gso_skb;
++
+ /* ingress path synchronizer */
+ spinlock_t ingress_lock;
+ struct Qdisc *qdisc_ingress;
+@@ -406,7 +418,7 @@
+ * One part is mostly used on xmit path (device)
+ */
+ /* hard_start_xmit synchronizer */
+- spinlock_t xmit_lock ____cacheline_aligned_in_smp;
++ spinlock_t _xmit_lock ____cacheline_aligned_in_smp;
+ /* cpu id of processor entered to hard_start_xmit or -1,
+ if nobody entered there.
+ */
+@@ -532,6 +544,8 @@
+ struct net_device *,
+ struct packet_type *,
+ struct net_device *);
++ struct sk_buff *(*gso_segment)(struct sk_buff *skb,
++ int features);
+ void *af_packet_priv;
+ struct list_head list;
+ };
+@@ -679,7 +693,8 @@
+ extern int dev_set_mtu(struct net_device *, int);
+ extern int dev_set_mac_address(struct net_device *,
+ struct sockaddr *);
+-extern void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev);
++extern int dev_hard_start_xmit(struct sk_buff *skb,
++ struct net_device *dev);
+
+ extern void dev_init(void);
+
+@@ -889,11 +904,43 @@
+ clear_bit(__LINK_STATE_RX_SCHED, &dev->state);
+ }
+
++static inline void netif_tx_lock(struct net_device *dev)
++{
++ spin_lock(&dev->_xmit_lock);
++ dev->xmit_lock_owner = smp_processor_id();
++}
++
++static inline void netif_tx_lock_bh(struct net_device *dev)
++{
++ spin_lock_bh(&dev->_xmit_lock);
++ dev->xmit_lock_owner = smp_processor_id();
++}
++
++static inline int netif_tx_trylock(struct net_device *dev)
++{
++ int err = spin_trylock(&dev->_xmit_lock);
++ if (!err)
++ dev->xmit_lock_owner = smp_processor_id();
++ return err;
++}
++
++static inline void netif_tx_unlock(struct net_device *dev)
++{
++ dev->xmit_lock_owner = -1;
++ spin_unlock(&dev->_xmit_lock);
++}
++
++static inline void netif_tx_unlock_bh(struct net_device *dev)
++{
++ dev->xmit_lock_owner = -1;
++ spin_unlock_bh(&dev->_xmit_lock);
++}
++
+ static inline void netif_tx_disable(struct net_device *dev)
+ {
+- spin_lock_bh(&dev->xmit_lock);
++ netif_tx_lock_bh(dev);
+ netif_stop_queue(dev);
+- spin_unlock_bh(&dev->xmit_lock);
++ netif_tx_unlock_bh(dev);
+ }
+
+ /* These functions live elsewhere (drivers/net/net_init.c, but related) */
+@@ -921,6 +968,7 @@
+ extern int weight_p;
+ extern int netdev_set_master(struct net_device *dev, struct net_device *master);
+ extern int skb_checksum_help(struct sk_buff *skb, int inward);
++extern struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features);
+ #ifdef CONFIG_BUG
+ extern void netdev_rx_csum_fault(struct net_device *dev);
+ #else
+@@ -940,6 +988,18 @@
+
+ extern void linkwatch_run_queue(void);
+
++static inline int skb_gso_ok(struct sk_buff *skb, int features)
++{
++ int feature = skb_shinfo(skb)->gso_size ?
++ skb_shinfo(skb)->gso_type << NETIF_F_GSO_SHIFT : 0;
++ return (features & feature) == feature;
++}
++
++static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
++{
++ return !skb_gso_ok(skb, dev->features);
++}
++
+ #endif /* __KERNEL__ */
+
+ #endif /* _LINUX_DEV_H */
+Index: tmp-linux-2.6.17/include/linux/skbuff.h
+===================================================================
+--- tmp-linux-2.6.17.orig/include/linux/skbuff.h 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/include/linux/skbuff.h 2007-01-08 16:48:05.000000000 +0000
+@@ -134,9 +134,10 @@
+ struct skb_shared_info {
+ atomic_t dataref;
+ unsigned short nr_frags;
+- unsigned short tso_size;
+- unsigned short tso_segs;
+- unsigned short ufo_size;
++ unsigned short gso_size;
++ /* Warning: this field is not always filled in (UFO)! */
++ unsigned short gso_segs;
++ unsigned short gso_type;
+ unsigned int ip6_frag_id;
+ struct sk_buff *frag_list;
+ skb_frag_t frags[MAX_SKB_FRAGS];
+@@ -168,6 +169,14 @@
+ SKB_FCLONE_CLONE,
+ };
+
++enum {
++ SKB_GSO_TCPV4 = 1 << 0,
++ SKB_GSO_UDPV4 = 1 << 1,
++
++ /* This indicates the skb is from an untrusted source. */
++ SKB_GSO_DODGY = 1 << 2,
++};
++
+ /**
+ * struct sk_buff - socket buffer
+ * @next: Next buffer in list
+@@ -1161,18 +1170,34 @@
+ return 0;
+ }
+
++static inline int __skb_linearize(struct sk_buff *skb)
++{
++ return __pskb_pull_tail(skb, skb->data_len) ? 0 : -ENOMEM;
++}
++
+ /**
+ * skb_linearize - convert paged skb to linear one
+ * @skb: buffer to linarize
+- * @gfp: allocation mode
+ *
+ * If there is no free memory -ENOMEM is returned, otherwise zero
+ * is returned and the old skb data released.
+ */
+-extern int __skb_linearize(struct sk_buff *skb, gfp_t gfp);
+-static inline int skb_linearize(struct sk_buff *skb, gfp_t gfp)
++static inline int skb_linearize(struct sk_buff *skb)
++{
++ return skb_is_nonlinear(skb) ? __skb_linearize(skb) : 0;
++}
++
++/**
++ * skb_linearize_cow - make sure skb is linear and writable
++ * @skb: buffer to process
++ *
++ * If there is no free memory -ENOMEM is returned, otherwise zero
++ * is returned and the old skb data released.
++ */
++static inline int skb_linearize_cow(struct sk_buff *skb)
+ {
+- return __skb_linearize(skb, gfp);
++ return skb_is_nonlinear(skb) || skb_cloned(skb) ?
++ __skb_linearize(skb) : 0;
+ }
+
+ /**
+@@ -1269,6 +1294,7 @@
+ struct sk_buff *skb1, const u32 len);
+
+ extern void skb_release_data(struct sk_buff *skb);
++extern struct sk_buff *skb_segment(struct sk_buff *skb, int features);
+
+ static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
+ int len, void *buffer)
+Index: tmp-linux-2.6.17/include/net/pkt_sched.h
+===================================================================
+--- tmp-linux-2.6.17.orig/include/net/pkt_sched.h 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/include/net/pkt_sched.h 2007-01-08 16:48:05.000000000 +0000
+@@ -218,12 +218,13 @@
+ struct rtattr *tab);
+ extern void qdisc_put_rtab(struct qdisc_rate_table *tab);
+
+-extern int qdisc_restart(struct net_device *dev);
++extern void __qdisc_run(struct net_device *dev);
+
+ static inline void qdisc_run(struct net_device *dev)
+ {
+- while (!netif_queue_stopped(dev) && qdisc_restart(dev) < 0)
+- /* NOTHING */;
++ if (!netif_queue_stopped(dev) &&
++ !test_and_set_bit(__LINK_STATE_QDISC_RUNNING, &dev->state))
++ __qdisc_run(dev);
+ }
+
+ extern int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
+Index: tmp-linux-2.6.17/include/net/protocol.h
+===================================================================
+--- tmp-linux-2.6.17.orig/include/net/protocol.h 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/include/net/protocol.h 2007-01-08 16:48:05.000000000 +0000
+@@ -37,6 +37,8 @@
+ struct net_protocol {
+ int (*handler)(struct sk_buff *skb);
+ void (*err_handler)(struct sk_buff *skb, u32 info);
++ struct sk_buff *(*gso_segment)(struct sk_buff *skb,
++ int features);
+ int no_policy;
+ };
+
+Index: tmp-linux-2.6.17/include/net/sock.h
+===================================================================
+--- tmp-linux-2.6.17.orig/include/net/sock.h 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/include/net/sock.h 2007-01-08 16:48:05.000000000 +0000
+@@ -1032,9 +1032,13 @@
+ {
+ __sk_dst_set(sk, dst);
+ sk->sk_route_caps = dst->dev->features;
++ if (sk->sk_route_caps & NETIF_F_GSO)
++ sk->sk_route_caps |= NETIF_F_TSO;
+ if (sk->sk_route_caps & NETIF_F_TSO) {
+ if (sock_flag(sk, SOCK_NO_LARGESEND) || dst->header_len)
+ sk->sk_route_caps &= ~NETIF_F_TSO;
++ else
++ sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
+ }
+ }
+
+Index: tmp-linux-2.6.17/include/net/tcp.h
+===================================================================
+--- tmp-linux-2.6.17.orig/include/net/tcp.h 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/include/net/tcp.h 2007-01-08 16:48:05.000000000 +0000
+@@ -565,13 +565,13 @@
+ */
+ static inline int tcp_skb_pcount(const struct sk_buff *skb)
+ {
+- return skb_shinfo(skb)->tso_segs;
++ return skb_shinfo(skb)->gso_segs;
+ }
+
+ /* This is valid iff tcp_skb_pcount() > 1. */
+ static inline int tcp_skb_mss(const struct sk_buff *skb)
+ {
+- return skb_shinfo(skb)->tso_size;
++ return skb_shinfo(skb)->gso_size;
+ }
+
+ static inline void tcp_dec_pcount_approx(__u32 *count,
+@@ -1076,6 +1076,8 @@
+
+ extern int tcp_v4_destroy_sock(struct sock *sk);
+
++extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features);
++
+ #ifdef CONFIG_PROC_FS
+ extern int tcp4_proc_init(void);
+ extern void tcp4_proc_exit(void);
+Index: tmp-linux-2.6.17/net/atm/clip.c
+===================================================================
+--- tmp-linux-2.6.17.orig/net/atm/clip.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/net/atm/clip.c 2007-01-08 16:48:05.000000000 +0000
+@@ -98,7 +98,7 @@
+ printk(KERN_CRIT "!clip_vcc->entry (clip_vcc %p)\n", clip_vcc);
+ return;
+ }
+- spin_lock_bh(&entry->neigh->dev->xmit_lock); /* block clip_start_xmit() */
++ netif_tx_lock_bh(entry->neigh->dev); /* block clip_start_xmit() */
+ entry->neigh->used = jiffies;
+ for (walk = &entry->vccs; *walk; walk = &(*walk)->next)
+ if (*walk == clip_vcc) {
+@@ -122,7 +122,7 @@
+ printk(KERN_CRIT "ATMARP: unlink_clip_vcc failed (entry %p, vcc "
+ "0x%p)\n", entry, clip_vcc);
+ out:
+- spin_unlock_bh(&entry->neigh->dev->xmit_lock);
++ netif_tx_unlock_bh(entry->neigh->dev);
+ }
+
+ /* The neighbour entry n->lock is held. */
+Index: tmp-linux-2.6.17/net/bridge/br_device.c
+===================================================================
+--- tmp-linux-2.6.17.orig/net/bridge/br_device.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/net/bridge/br_device.c 2007-01-08 16:48:05.000000000 +0000
+@@ -145,9 +145,9 @@
+ struct net_bridge *br = netdev_priv(dev);
+
+ if (data)
+- br->feature_mask |= NETIF_F_IP_CSUM;
++ br->feature_mask |= NETIF_F_NO_CSUM;
+ else
+- br->feature_mask &= ~NETIF_F_IP_CSUM;
++ br->feature_mask &= ~NETIF_F_ALL_CSUM;
+
+ br_features_recompute(br);
+ return 0;
+@@ -184,6 +184,6 @@
+ dev->set_mac_address = br_set_mac_address;
+ dev->priv_flags = IFF_EBRIDGE;
+
+- dev->features = NETIF_F_SG | NETIF_F_FRAGLIST
+- | NETIF_F_HIGHDMA | NETIF_F_TSO | NETIF_F_IP_CSUM;
++ dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
++ NETIF_F_TSO | NETIF_F_NO_CSUM | NETIF_F_GSO_ROBUST;
+ }
+Index: tmp-linux-2.6.17/net/bridge/br_forward.c
+===================================================================
+--- tmp-linux-2.6.17.orig/net/bridge/br_forward.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/net/bridge/br_forward.c 2007-01-08 16:48:05.000000000 +0000
+@@ -38,7 +38,7 @@
+ int br_dev_queue_push_xmit(struct sk_buff *skb)
+ {
+ /* drop mtu oversized packets except tso */
+- if (packet_length(skb) > skb->dev->mtu && !skb_shinfo(skb)->tso_size)
++ if (packet_length(skb) > skb->dev->mtu && !skb_shinfo(skb)->gso_size)
+ kfree_skb(skb);
+ else {
+ #ifdef CONFIG_BRIDGE_NETFILTER
+Index: tmp-linux-2.6.17/net/bridge/br_if.c
+===================================================================
+--- tmp-linux-2.6.17.orig/net/bridge/br_if.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/net/bridge/br_if.c 2007-01-08 16:48:05.000000000 +0000
+@@ -372,17 +372,28 @@
+ struct net_bridge_port *p;
+ unsigned long features, checksum;
+
+- features = br->feature_mask &~ NETIF_F_IP_CSUM;
+- checksum = br->feature_mask & NETIF_F_IP_CSUM;
++ checksum = br->feature_mask & NETIF_F_ALL_CSUM ? NETIF_F_NO_CSUM : 0;
++ features = br->feature_mask & ~NETIF_F_ALL_CSUM;
+
+ list_for_each_entry(p, &br->port_list, list) {
+- if (!(p->dev->features
+- & (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM)))
++ unsigned long feature = p->dev->features;
++
++ if (checksum & NETIF_F_NO_CSUM && !(feature & NETIF_F_NO_CSUM))
++ checksum ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM;
++ if (checksum & NETIF_F_HW_CSUM && !(feature & NETIF_F_HW_CSUM))
++ checksum ^= NETIF_F_HW_CSUM | NETIF_F_IP_CSUM;
++ if (!(feature & NETIF_F_IP_CSUM))
+ checksum = 0;
+- features &= p->dev->features;
++
++ if (feature & NETIF_F_GSO)
++ feature |= NETIF_F_TSO;
++ feature |= NETIF_F_GSO;
++
++ features &= feature;
+ }
+
+- br->dev->features = features | checksum | NETIF_F_LLTX;
++ br->dev->features = features | checksum | NETIF_F_LLTX |
++ NETIF_F_GSO_ROBUST;
+ }
+
+ /* called with RTNL */
+Index: tmp-linux-2.6.17/net/bridge/br_netfilter.c
+===================================================================
+--- tmp-linux-2.6.17.orig/net/bridge/br_netfilter.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/net/bridge/br_netfilter.c 2007-01-08 16:48:05.000000000 +0000
+@@ -769,7 +769,7 @@
+ {
+ if (skb->protocol == htons(ETH_P_IP) &&
+ skb->len > skb->dev->mtu &&
+- !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
++ !skb_shinfo(skb)->gso_size)
+ return ip_fragment(skb, br_dev_queue_push_xmit);
+ else
+ return br_dev_queue_push_xmit(skb);
+Index: tmp-linux-2.6.17/net/core/dev.c
+===================================================================
+--- tmp-linux-2.6.17.orig/net/core/dev.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/net/core/dev.c 2007-01-08 16:48:05.000000000 +0000
+@@ -115,6 +115,7 @@
+ #include <net/iw_handler.h>
+ #include <asm/current.h>
+ #include <linux/audit.h>
++#include <linux/err.h>
+
+ /*
+ * The list of packet types we will receive (as opposed to discard)
+@@ -1041,7 +1042,7 @@
+ * taps currently in use.
+ */
+
+-void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
++static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
+ {
+ struct packet_type *ptype;
+
+@@ -1179,6 +1180,45 @@
+ return ret;
+ }
+
++/**
++ * skb_gso_segment - Perform segmentation on skb.
++ * @skb: buffer to segment
++ * @features: features for the output path (see dev->features)
++ *
++ * This function segments the given skb and returns a list of segments.
++ *
++ * It may return NULL if the skb requires no segmentation. This is
++ * only possible when GSO is used for verifying header integrity.
++ */
++struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
++{
++ struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
++ struct packet_type *ptype;
++ int type = skb->protocol;
++
++ BUG_ON(skb_shinfo(skb)->frag_list);
++ BUG_ON(skb->ip_summed != CHECKSUM_HW);
++
++ skb->mac.raw = skb->data;
++ skb->mac_len = skb->nh.raw - skb->data;
++ __skb_pull(skb, skb->mac_len);
++
++ rcu_read_lock();
++ list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
++ if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
++ segs = ptype->gso_segment(skb, features);
++ break;
++ }
++ }
++ rcu_read_unlock();
++
++ __skb_push(skb, skb->data - skb->mac.raw);
++
++ return segs;
++}
++
++EXPORT_SYMBOL(skb_gso_segment);
++
+ /* Take action when hardware reception checksum errors are detected. */
+ #ifdef CONFIG_BUG
+ void netdev_rx_csum_fault(struct net_device *dev)
+@@ -1215,75 +1255,107 @@
+ #define illegal_highdma(dev, skb) (0)
+ #endif
+
+-/* Keep head the same: replace data */
+-int __skb_linearize(struct sk_buff *skb, gfp_t gfp_mask)
++struct dev_gso_cb {
++ void (*destructor)(struct sk_buff *skb);
++};
++
++#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
++
++static void dev_gso_skb_destructor(struct sk_buff *skb)
+ {
+- unsigned int size;
+- u8 *data;
+- long offset;
+- struct skb_shared_info *ninfo;
+- int headerlen = skb->data - skb->head;
+- int expand = (skb->tail + skb->data_len) - skb->end;
+-
+- if (skb_shared(skb))
+- BUG();
+-
+- if (expand <= 0)
+- expand = 0;
+-
+- size = skb->end - skb->head + expand;
+- size = SKB_DATA_ALIGN(size);
+- data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
+- if (!data)
+- return -ENOMEM;
+-
+- /* Copy entire thing */
+- if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len))
+- BUG();
+-
+- /* Set up shinfo */
+- ninfo = (struct skb_shared_info*)(data + size);
+- atomic_set(&ninfo->dataref, 1);
+- ninfo->tso_size = skb_shinfo(skb)->tso_size;
+- ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
+- ninfo->nr_frags = 0;
+- ninfo->frag_list = NULL;
+-
+- /* Offset between the two in bytes */
+- offset = data - skb->head;
+-
+- /* Free old data. */
+- skb_release_data(skb);
+-
+- skb->head = data;
+- skb->end = data + size;
+-
+- /* Set up new pointers */
+- skb->h.raw += offset;
+- skb->nh.raw += offset;
+- skb->mac.raw += offset;
+- skb->tail += offset;
+- skb->data += offset;
++ struct dev_gso_cb *cb;
+
+- /* We are no longer a clone, even if we were. */
+- skb->cloned = 0;
++ do {
++ struct sk_buff *nskb = skb->next;
+
+- skb->tail += skb->data_len;
+- skb->data_len = 0;
++ skb->next = nskb->next;
++ nskb->next = NULL;
++ kfree_skb(nskb);
++ } while (skb->next);
++
++ cb = DEV_GSO_CB(skb);
++ if (cb->destructor)
++ cb->destructor(skb);
++}
++
++/**
++ * dev_gso_segment - Perform emulated hardware segmentation on skb.
++ * @skb: buffer to segment
++ *
++ * This function segments the given skb and stores the list of segments
++ * in skb->next.
++ */
++static int dev_gso_segment(struct sk_buff *skb)
++{
++ struct net_device *dev = skb->dev;
++ struct sk_buff *segs;
++ int features = dev->features & ~(illegal_highdma(dev, skb) ?
++ NETIF_F_SG : 0);
++
++ segs = skb_gso_segment(skb, features);
++
++ /* Verifying header integrity only. */
++ if (!segs)
++ return 0;
++
++ if (unlikely(IS_ERR(segs)))
++ return PTR_ERR(segs);
++
++ skb->next = segs;
++ DEV_GSO_CB(skb)->destructor = skb->destructor;
++ skb->destructor = dev_gso_skb_destructor;
++ return 0;
++}
++
++int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
++{
++ if (likely(!skb->next)) {
++ if (netdev_nit)
++ dev_queue_xmit_nit(skb, dev);
++
++ if (netif_needs_gso(dev, skb)) {
++ if (unlikely(dev_gso_segment(skb)))
++ goto out_kfree_skb;
++ if (skb->next)
++ goto gso;
++ }
++
++ return dev->hard_start_xmit(skb, dev);
++ }
++
++gso:
++ do {
++ struct sk_buff *nskb = skb->next;
++ int rc;
++
++ skb->next = nskb->next;
++ nskb->next = NULL;
++ rc = dev->hard_start_xmit(nskb, dev);
++ if (unlikely(rc)) {
++ nskb->next = skb->next;
++ skb->next = nskb;
++ return rc;
++ }
++ if (unlikely(netif_queue_stopped(dev) && skb->next))
++ return NETDEV_TX_BUSY;
++ } while (skb->next);
++
++ skb->destructor = DEV_GSO_CB(skb)->destructor;
++
++out_kfree_skb:
++ kfree_skb(skb);
+ return 0;
+ }
+
+ #define HARD_TX_LOCK(dev, cpu) { \
+ if ((dev->features & NETIF_F_LLTX) == 0) { \
+- spin_lock(&dev->xmit_lock); \
+- dev->xmit_lock_owner = cpu; \
++ netif_tx_lock(dev); \
+ } \
+ }
+
+ #define HARD_TX_UNLOCK(dev) { \
+ if ((dev->features & NETIF_F_LLTX) == 0) { \
+- dev->xmit_lock_owner = -1; \
+- spin_unlock(&dev->xmit_lock); \
++ netif_tx_unlock(dev); \
+ } \
+ }
+
+@@ -1319,9 +1391,13 @@
+ struct Qdisc *q;
+ int rc = -ENOMEM;
+
++ /* GSO will handle the following emulations directly. */
++ if (netif_needs_gso(dev, skb))
++ goto gso;
++
+ if (skb_shinfo(skb)->frag_list &&
+ !(dev->features & NETIF_F_FRAGLIST) &&
+- __skb_linearize(skb, GFP_ATOMIC))
++ __skb_linearize(skb))
+ goto out_kfree_skb;
+
+ /* Fragmented skb is linearized if device does not support SG,
+@@ -1330,25 +1406,26 @@
+ */
+ if (skb_shinfo(skb)->nr_frags &&
+ (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
+- __skb_linearize(skb, GFP_ATOMIC))
++ __skb_linearize(skb))
+ goto out_kfree_skb;
+
+ /* If packet is not checksummed and device does not support
+ * checksumming for this protocol, complete checksumming here.
+ */
+ if (skb->ip_summed == CHECKSUM_HW &&
+- (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) &&
++ (!(dev->features & NETIF_F_GEN_CSUM) &&
+ (!(dev->features & NETIF_F_IP_CSUM) ||
+ skb->protocol != htons(ETH_P_IP))))
+ if (skb_checksum_help(skb, 0))
+ goto out_kfree_skb;
+
++gso:
+ spin_lock_prefetch(&dev->queue_lock);
+
+ /* Disable soft irqs for various locks below. Also
+ * stops preemption for RCU.
+ */
+- local_bh_disable();
++ rcu_read_lock_bh();
+
+ /* Updates of qdisc are serialized by queue_lock.
+ * The struct Qdisc which is pointed to by qdisc is now a
+@@ -1382,8 +1459,8 @@
+ /* The device has no queue. Common case for software devices:
+ loopback, all the sorts of tunnels...
+
+- Really, it is unlikely that xmit_lock protection is necessary here.
+- (f.e. loopback and IP tunnels are clean ignoring statistics
++ Really, it is unlikely that netif_tx_lock protection is necessary
++ here. (f.e. loopback and IP tunnels are clean ignoring statistics
+ counters.)
+ However, it is possible, that they rely on protection
+ made by us here.
+@@ -1399,11 +1476,8 @@
+ HARD_TX_LOCK(dev, cpu);
+
+ if (!netif_queue_stopped(dev)) {
+- if (netdev_nit)
+- dev_queue_xmit_nit(skb, dev);
+-
+ rc = 0;
+- if (!dev->hard_start_xmit(skb, dev)) {
++ if (!dev_hard_start_xmit(skb, dev)) {
+ HARD_TX_UNLOCK(dev);
+ goto out;
+ }
+@@ -1422,13 +1496,13 @@
+ }
+
+ rc = -ENETDOWN;
+- local_bh_enable();
++ rcu_read_unlock_bh();
+
+ out_kfree_skb:
+ kfree_skb(skb);
+ return rc;
+ out:
+- local_bh_enable();
++ rcu_read_unlock_bh();
+ return rc;
+ }
+
+@@ -2785,7 +2859,7 @@
+ BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
+
+ spin_lock_init(&dev->queue_lock);
+- spin_lock_init(&dev->xmit_lock);
++ spin_lock_init(&dev->_xmit_lock);
+ dev->xmit_lock_owner = -1;
+ #ifdef CONFIG_NET_CLS_ACT
+ spin_lock_init(&dev->ingress_lock);
+@@ -2829,9 +2903,7 @@
+
+ /* Fix illegal SG+CSUM combinations. */
+ if ((dev->features & NETIF_F_SG) &&
+- !(dev->features & (NETIF_F_IP_CSUM |
+- NETIF_F_NO_CSUM |
+- NETIF_F_HW_CSUM))) {
++ !(dev->features & NETIF_F_ALL_CSUM)) {
+ printk("%s: Dropping NETIF_F_SG since no checksum feature.\n",
+ dev->name);
+ dev->features &= ~NETIF_F_SG;
+@@ -3371,7 +3443,6 @@
+ EXPORT_SYMBOL(__dev_get_by_index);
+ EXPORT_SYMBOL(__dev_get_by_name);
+ EXPORT_SYMBOL(__dev_remove_pack);
+-EXPORT_SYMBOL(__skb_linearize);
+ EXPORT_SYMBOL(dev_valid_name);
+ EXPORT_SYMBOL(dev_add_pack);
+ EXPORT_SYMBOL(dev_alloc_name);
+Index: tmp-linux-2.6.17/net/core/dev_mcast.c
+===================================================================
+--- tmp-linux-2.6.17.orig/net/core/dev_mcast.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/net/core/dev_mcast.c 2007-01-08 16:48:05.000000000 +0000
+@@ -62,7 +62,7 @@
+ * Device mc lists are changed by bh at least if IPv6 is enabled,
+ * so that it must be bh protected.
+ *
+- * We block accesses to device mc filters with dev->xmit_lock.
++ * We block accesses to device mc filters with netif_tx_lock.
+ */
+
+ /*
+@@ -93,9 +93,9 @@
+
+ void dev_mc_upload(struct net_device *dev)
+ {
+- spin_lock_bh(&dev->xmit_lock);
++ netif_tx_lock_bh(dev);
+ __dev_mc_upload(dev);
+- spin_unlock_bh(&dev->xmit_lock);
++ netif_tx_unlock_bh(dev);
+ }
+
+ /*
+@@ -107,7 +107,7 @@
+ int err = 0;
+ struct dev_mc_list *dmi, **dmip;
+
+- spin_lock_bh(&dev->xmit_lock);
++ netif_tx_lock_bh(dev);
+
+ for (dmip = &dev->mc_list; (dmi = *dmip) != NULL; dmip = &dmi->next) {
+ /*
+@@ -139,13 +139,13 @@
+ */
+ __dev_mc_upload(dev);
+
+- spin_unlock_bh(&dev->xmit_lock);
++ netif_tx_unlock_bh(dev);
+ return 0;
+ }
+ }
+ err = -ENOENT;
+ done:
+- spin_unlock_bh(&dev->xmit_lock);
++ netif_tx_unlock_bh(dev);
+ return err;
+ }
+
+@@ -160,7 +160,7 @@
+
+ dmi1 = kmalloc(sizeof(*dmi), GFP_ATOMIC);
+
+- spin_lock_bh(&dev->xmit_lock);
++ netif_tx_lock_bh(dev);
+ for (dmi = dev->mc_list; dmi != NULL; dmi = dmi->next) {
+ if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 &&
+ dmi->dmi_addrlen == alen) {
+@@ -176,7 +176,7 @@
+ }
+
+ if ((dmi = dmi1) == NULL) {
+- spin_unlock_bh(&dev->xmit_lock);
++ netif_tx_unlock_bh(dev);
+ return -ENOMEM;
+ }
+ memcpy(dmi->dmi_addr, addr, alen);
+@@ -189,11 +189,11 @@
+
+ __dev_mc_upload(dev);
+
+- spin_unlock_bh(&dev->xmit_lock);
++ netif_tx_unlock_bh(dev);
+ return 0;
+
+ done:
+- spin_unlock_bh(&dev->xmit_lock);
++ netif_tx_unlock_bh(dev);
+ kfree(dmi1);
+ return err;
+ }
+@@ -204,7 +204,7 @@
+
+ void dev_mc_discard(struct net_device *dev)
+ {
+- spin_lock_bh(&dev->xmit_lock);
++ netif_tx_lock_bh(dev);
+
+ while (dev->mc_list != NULL) {
+ struct dev_mc_list *tmp = dev->mc_list;
+@@ -215,7 +215,7 @@
+ }
+ dev->mc_count = 0;
+
+- spin_unlock_bh(&dev->xmit_lock);
++ netif_tx_unlock_bh(dev);
+ }
+
+ #ifdef CONFIG_PROC_FS
+@@ -250,7 +250,7 @@
+ struct dev_mc_list *m;
+ struct net_device *dev = v;
+
+- spin_lock_bh(&dev->xmit_lock);
++ netif_tx_lock_bh(dev);
+ for (m = dev->mc_list; m; m = m->next) {
+ int i;
+
+@@ -262,7 +262,7 @@
+
+ seq_putc(seq, '\n');
+ }
+- spin_unlock_bh(&dev->xmit_lock);
++ netif_tx_unlock_bh(dev);
+ return 0;
+ }
+
+Index: tmp-linux-2.6.17/net/core/ethtool.c
+===================================================================
+--- tmp-linux-2.6.17.orig/net/core/ethtool.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/net/core/ethtool.c 2007-01-08 16:48:05.000000000 +0000
+@@ -30,7 +30,7 @@
+
+ u32 ethtool_op_get_tx_csum(struct net_device *dev)
+ {
+- return (dev->features & (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM)) != 0;
++ return (dev->features & NETIF_F_ALL_CSUM) != 0;
+ }
+
+ int ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
+@@ -551,9 +551,7 @@
+ return -EFAULT;
+
+ if (edata.data &&
+- !(dev->features & (NETIF_F_IP_CSUM |
+- NETIF_F_NO_CSUM |
+- NETIF_F_HW_CSUM)))
++ !(dev->features & NETIF_F_ALL_CSUM))
+ return -EINVAL;
+
+ return __ethtool_set_sg(dev, edata.data);
+@@ -561,7 +559,7 @@
+
+ static int ethtool_get_tso(struct net_device *dev, char __user *useraddr)
+ {
+- struct ethtool_value edata = { ETHTOOL_GTSO };
++ struct ethtool_value edata = { ETHTOOL_GUFO };
+
+ if (!dev->ethtool_ops->get_tso)
+ return -EOPNOTSUPP;
+@@ -615,6 +613,29 @@
+ return dev->ethtool_ops->set_ufo(dev, edata.data);
+ }
+
++static int ethtool_get_gso(struct net_device *dev, char __user *useraddr)
++{
++ struct ethtool_value edata = { ETHTOOL_GGSO };
++
++ edata.data = dev->features & NETIF_F_GSO;
++ if (copy_to_user(useraddr, &edata, sizeof(edata)))
++ return -EFAULT;
++ return 0;
++}
++
++static int ethtool_set_gso(struct net_device *dev, char __user *useraddr)
++{
++ struct ethtool_value edata;
++
++ if (copy_from_user(&edata, useraddr, sizeof(edata)))
++ return -EFAULT;
++ if (edata.data)
++ dev->features |= NETIF_F_GSO;
++ else
++ dev->features &= ~NETIF_F_GSO;
++ return 0;
++}
++
+ static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
+ {
+ struct ethtool_test test;
+@@ -906,6 +927,12 @@
+ case ETHTOOL_SUFO:
+ rc = ethtool_set_ufo(dev, useraddr);
+ break;
++ case ETHTOOL_GGSO:
++ rc = ethtool_get_gso(dev, useraddr);
++ break;
++ case ETHTOOL_SGSO:
++ rc = ethtool_set_gso(dev, useraddr);
++ break;
+ default:
+ rc = -EOPNOTSUPP;
+ }
+Index: tmp-linux-2.6.17/net/core/netpoll.c
+===================================================================
+--- tmp-linux-2.6.17.orig/net/core/netpoll.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/net/core/netpoll.c 2007-01-08 16:48:05.000000000 +0000
+@@ -273,24 +273,21 @@
+
+ do {
+ npinfo->tries--;
+- spin_lock(&np->dev->xmit_lock);
+- np->dev->xmit_lock_owner = smp_processor_id();
++ netif_tx_lock(np->dev);
+
+ /*
+ * network drivers do not expect to be called if the queue is
+ * stopped.
+ */
+ if (netif_queue_stopped(np->dev)) {
+- np->dev->xmit_lock_owner = -1;
+- spin_unlock(&np->dev->xmit_lock);
++ netif_tx_unlock(np->dev);
+ netpoll_poll(np);
+ udelay(50);
+ continue;
+ }
+
+ status = np->dev->hard_start_xmit(skb, np->dev);
+- np->dev->xmit_lock_owner = -1;
+- spin_unlock(&np->dev->xmit_lock);
++ netif_tx_unlock(np->dev);
+
+ /* success */
+ if(!status) {
+Index: tmp-linux-2.6.17/net/core/pktgen.c
+===================================================================
+--- tmp-linux-2.6.17.orig/net/core/pktgen.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/net/core/pktgen.c 2007-01-08 16:48:05.000000000 +0000
+@@ -2897,7 +2897,7 @@
+ }
+ }
+
+- spin_lock_bh(&odev->xmit_lock);
++ netif_tx_lock_bh(odev);
+ if (!netif_queue_stopped(odev)) {
+
+ atomic_inc(&(pkt_dev->skb->users));
+@@ -2942,7 +2942,7 @@
+ pkt_dev->next_tx_ns = 0;
+ }
+
+- spin_unlock_bh(&odev->xmit_lock);
++ netif_tx_unlock_bh(odev);
+
+ /* If pkt_dev->count is zero, then run forever */
+ if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) {
+Index: tmp-linux-2.6.17/net/core/skbuff.c
+===================================================================
+--- tmp-linux-2.6.17.orig/net/core/skbuff.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/net/core/skbuff.c 2007-01-08 16:48:05.000000000 +0000
+@@ -172,9 +172,9 @@
+ shinfo = skb_shinfo(skb);
+ atomic_set(&shinfo->dataref, 1);
+ shinfo->nr_frags = 0;
+- shinfo->tso_size = 0;
+- shinfo->tso_segs = 0;
+- shinfo->ufo_size = 0;
++ shinfo->gso_size = 0;
++ shinfo->gso_segs = 0;
++ shinfo->gso_type = 0;
+ shinfo->ip6_frag_id = 0;
+ shinfo->frag_list = NULL;
+
+@@ -238,8 +238,9 @@
+
+ atomic_set(&(skb_shinfo(skb)->dataref), 1);
+ skb_shinfo(skb)->nr_frags = 0;
+- skb_shinfo(skb)->tso_size = 0;
+- skb_shinfo(skb)->tso_segs = 0;
++ skb_shinfo(skb)->gso_size = 0;
++ skb_shinfo(skb)->gso_segs = 0;
++ skb_shinfo(skb)->gso_type = 0;
+ skb_shinfo(skb)->frag_list = NULL;
+ out:
+ return skb;
+@@ -527,8 +528,9 @@
+ new->tc_index = old->tc_index;
+ #endif
+ atomic_set(&new->users, 1);
+- skb_shinfo(new)->tso_size = skb_shinfo(old)->tso_size;
+- skb_shinfo(new)->tso_segs = skb_shinfo(old)->tso_segs;
++ skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
++ skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
++ skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type;
+ }
+
+ /**
+@@ -1826,6 +1828,133 @@
+
+ EXPORT_SYMBOL_GPL(skb_pull_rcsum);
+
++/**
++ * skb_segment - Perform protocol segmentation on skb.
++ * @skb: buffer to segment
++ * @features: features for the output path (see dev->features)
++ *
++ * This function performs segmentation on the given skb. It returns
++ * the segment at the given position. It returns NULL if there are
++ * no more segments to generate, or when an error is encountered.
++ */
++struct sk_buff *skb_segment(struct sk_buff *skb, int features)
++{
++ struct sk_buff *segs = NULL;
++ struct sk_buff *tail = NULL;
++ unsigned int mss = skb_shinfo(skb)->gso_size;
++ unsigned int doffset = skb->data - skb->mac.raw;
++ unsigned int offset = doffset;
++ unsigned int headroom;
++ unsigned int len;
++ int sg = features & NETIF_F_SG;
++ int nfrags = skb_shinfo(skb)->nr_frags;
++ int err = -ENOMEM;
++ int i = 0;
++ int pos;
++
++ __skb_push(skb, doffset);
++ headroom = skb_headroom(skb);
++ pos = skb_headlen(skb);
++
++ do {
++ struct sk_buff *nskb;
++ skb_frag_t *frag;
++ int hsize, nsize;
++ int k;
++ int size;
++
++ len = skb->len - offset;
++ if (len > mss)
++ len = mss;
++
++ hsize = skb_headlen(skb) - offset;
++ if (hsize < 0)
++ hsize = 0;
++ nsize = hsize + doffset;
++ if (nsize > len + doffset || !sg)
++ nsize = len + doffset;
++
++ nskb = alloc_skb(nsize + headroom, GFP_ATOMIC);
++ if (unlikely(!nskb))
++ goto err;
++
++ if (segs)
++ tail->next = nskb;
++ else
++ segs = nskb;
++ tail = nskb;
++
++ nskb->dev = skb->dev;
++ nskb->priority = skb->priority;
++ nskb->protocol = skb->protocol;
++ nskb->dst = dst_clone(skb->dst);
++ memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
++ nskb->pkt_type = skb->pkt_type;
++ nskb->mac_len = skb->mac_len;
++
++ skb_reserve(nskb, headroom);
++ nskb->mac.raw = nskb->data;
++ nskb->nh.raw = nskb->data + skb->mac_len;
++ nskb->h.raw = nskb->nh.raw + (skb->h.raw - skb->nh.raw);
++ memcpy(skb_put(nskb, doffset), skb->data, doffset);
++
++ if (!sg) {
++ nskb->csum = skb_copy_and_csum_bits(skb, offset,
++ skb_put(nskb, len),
++ len, 0);
++ continue;
++ }
++
++ frag = skb_shinfo(nskb)->frags;
++ k = 0;
++
++ nskb->ip_summed = CHECKSUM_HW;
++ nskb->csum = skb->csum;
++ memcpy(skb_put(nskb, hsize), skb->data + offset, hsize);
++
++ while (pos < offset + len) {
++ BUG_ON(i >= nfrags);
++
++ *frag = skb_shinfo(skb)->frags[i];
++ get_page(frag->page);
++ size = frag->size;
++
++ if (pos < offset) {
++ frag->page_offset += offset - pos;
++ frag->size -= offset - pos;
++ }
++
++ k++;
++
++ if (pos + size <= offset + len) {
++ i++;
++ pos += size;
++ } else {
++ frag->size -= pos + size - (offset + len);
++ break;
++ }
++
++ frag++;
++ }
++
++ skb_shinfo(nskb)->nr_frags = k;
++ nskb->data_len = len - hsize;
++ nskb->len += nskb->data_len;
++ nskb->truesize += nskb->data_len;
++ } while ((offset += len) < skb->len);
++
++ return segs;
++
++err:
++ while ((skb = segs)) {
++ segs = skb->next;
++ kfree(skb);
++ }
++ return ERR_PTR(err);
++}
++
++EXPORT_SYMBOL_GPL(skb_segment);
++
+ void __init skb_init(void)
+ {
+ skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
+Index: tmp-linux-2.6.17/net/decnet/dn_nsp_in.c
+===================================================================
+--- tmp-linux-2.6.17.orig/net/decnet/dn_nsp_in.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/net/decnet/dn_nsp_in.c 2007-01-08 16:48:05.000000000 +0000
+@@ -801,8 +801,7 @@
+ * We linearize everything except data segments here.
+ */
+ if (cb->nsp_flags & ~0x60) {
+- if (unlikely(skb_is_nonlinear(skb)) &&
+- skb_linearize(skb, GFP_ATOMIC) != 0)
++ if (unlikely(skb_linearize(skb)))
+ goto free_out;
+ }
+
+Index: tmp-linux-2.6.17/net/decnet/dn_route.c
+===================================================================
+--- tmp-linux-2.6.17.orig/net/decnet/dn_route.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/net/decnet/dn_route.c 2007-01-08 16:48:05.000000000 +0000
+@@ -629,8 +629,7 @@
+ padlen);
+
+ if (flags & DN_RT_PKT_CNTL) {
+- if (unlikely(skb_is_nonlinear(skb)) &&
+- skb_linearize(skb, GFP_ATOMIC) != 0)
++ if (unlikely(skb_linearize(skb)))
+ goto dump_it;
+
+ switch(flags & DN_RT_CNTL_MSK) {
+Index: tmp-linux-2.6.17/net/ipv4/af_inet.c
+===================================================================
+--- tmp-linux-2.6.17.orig/net/ipv4/af_inet.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/net/ipv4/af_inet.c 2007-01-08 16:48:05.000000000 +0000
+@@ -68,6 +68,7 @@
+ */
+
+ #include <linux/config.h>
++#include <linux/err.h>
+ #include <linux/errno.h>
+ #include <linux/types.h>
+ #include <linux/socket.h>
+@@ -1096,6 +1097,54 @@
+
+ EXPORT_SYMBOL(inet_sk_rebuild_header);
+
++static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
++{
++ struct sk_buff *segs = ERR_PTR(-EINVAL);
++ struct iphdr *iph;
++ struct net_protocol *ops;
++ int proto;
++ int ihl;
++ int id;
++
++ if (!pskb_may_pull(skb, sizeof(*iph)))
++ goto out;
++
++ iph = skb->nh.iph;
++ ihl = iph->ihl * 4;
++ if (ihl < sizeof(*iph))
++ goto out;
++
++ if (!pskb_may_pull(skb, ihl))
++ goto out;
++
++ skb->h.raw = __skb_pull(skb, ihl);
++ iph = skb->nh.iph;
++ id = ntohs(iph->id);
++ proto = iph->protocol & (MAX_INET_PROTOS - 1);
++ segs = ERR_PTR(-EPROTONOSUPPORT);
++
++ rcu_read_lock();
++ ops = rcu_dereference(inet_protos[proto]);
++ if (ops && ops->gso_segment)
++ segs = ops->gso_segment(skb, features);
++ rcu_read_unlock();
++
++ if (!segs || unlikely(IS_ERR(segs)))
++ goto out;
++
++ skb = segs;
++ do {
++ iph = skb->nh.iph;
++ iph->id = htons(id++);
++ iph->tot_len = htons(skb->len - skb->mac_len);
++ iph->check = 0;
++ iph->check = ip_fast_csum(skb->nh.raw, iph->ihl);
++ } while ((skb = skb->next));
++
++out:
++ return segs;
++}
++
+ #ifdef CONFIG_IP_MULTICAST
+ static struct net_protocol igmp_protocol = {
+ .handler = igmp_rcv,
+@@ -1105,6 +1154,7 @@
+ static struct net_protocol tcp_protocol = {
+ .handler = tcp_v4_rcv,
+ .err_handler = tcp_v4_err,
++ .gso_segment = tcp_tso_segment,
+ .no_policy = 1,
+ };
+
+@@ -1150,6 +1200,7 @@
+ static struct packet_type ip_packet_type = {
+ .type = __constant_htons(ETH_P_IP),
+ .func = ip_rcv,
++ .gso_segment = inet_gso_segment,
+ };
+
+ static int __init inet_init(void)
+Index: tmp-linux-2.6.17/net/ipv4/ip_output.c
+===================================================================
+--- tmp-linux-2.6.17.orig/net/ipv4/ip_output.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/net/ipv4/ip_output.c 2007-01-08 16:48:05.000000000 +0000
+@@ -210,8 +210,7 @@
+ return dst_output(skb);
+ }
+ #endif
+- if (skb->len > dst_mtu(skb->dst) &&
+- !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
++ if (skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->gso_size)
+ return ip_fragment(skb, ip_finish_output2);
+ else
+ return ip_finish_output2(skb);
+@@ -362,7 +361,7 @@
+ }
+
+ ip_select_ident_more(iph, &rt->u.dst, sk,
+- (skb_shinfo(skb)->tso_segs ?: 1) - 1);
++ (skb_shinfo(skb)->gso_segs ?: 1) - 1);
+
+ /* Add an IP checksum. */
+ ip_send_check(iph);
+@@ -743,7 +742,8 @@
+ (length - transhdrlen));
+ if (!err) {
+ /* specify the length of each IP datagram fragment*/
+- skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen);
++ skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
++ skb_shinfo(skb)->gso_type = SKB_GSO_UDPV4;
+ __skb_queue_tail(&sk->sk_write_queue, skb);
+
+ return 0;
+@@ -839,7 +839,7 @@
+ */
+ if (transhdrlen &&
+ length + fragheaderlen <= mtu &&
+- rt->u.dst.dev->features&(NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM) &&
++ rt->u.dst.dev->features & NETIF_F_ALL_CSUM &&
+ !exthdrlen)
+ csummode = CHECKSUM_HW;
+
+@@ -1086,14 +1086,16 @@
+
+ inet->cork.length += size;
+ if ((sk->sk_protocol == IPPROTO_UDP) &&
+- (rt->u.dst.dev->features & NETIF_F_UFO))
+- skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen);
++ (rt->u.dst.dev->features & NETIF_F_UFO)) {
++ skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
++ skb_shinfo(skb)->gso_type = SKB_GSO_UDPV4;
++ }
+
+
+ while (size > 0) {
+ int i;
+
+- if (skb_shinfo(skb)->ufo_size)
++ if (skb_shinfo(skb)->gso_size)
+ len = size;
+ else {
+
+Index: tmp-linux-2.6.17/net/ipv4/ipcomp.c
+===================================================================
+--- tmp-linux-2.6.17.orig/net/ipv4/ipcomp.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/net/ipv4/ipcomp.c 2007-01-08 16:48:05.000000000 +0000
+@@ -84,7 +84,7 @@
+ static int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb)
+ {
+ u8 nexthdr;
+- int err = 0;
++ int err = -ENOMEM;
+ struct iphdr *iph;
+ union {
+ struct iphdr iph;
+@@ -92,11 +92,8 @@
+ } tmp_iph;
+
+
+- if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
+- skb_linearize(skb, GFP_ATOMIC) != 0) {
+- err = -ENOMEM;
++ if (skb_linearize_cow(skb))
+ goto out;
+- }
+
+ skb->ip_summed = CHECKSUM_NONE;
+
+@@ -171,10 +168,8 @@
+ goto out_ok;
+ }
+
+- if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
+- skb_linearize(skb, GFP_ATOMIC) != 0) {
++ if (skb_linearize_cow(skb))
+ goto out_ok;
+- }
+
+ err = ipcomp_compress(x, skb);
+ iph = skb->nh.iph;
+Index: tmp-linux-2.6.17/net/ipv4/tcp.c
+===================================================================
+--- tmp-linux-2.6.17.orig/net/ipv4/tcp.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/net/ipv4/tcp.c 2007-01-08 16:48:05.000000000 +0000
+@@ -258,6 +258,7 @@
+ #include <linux/random.h>
+ #include <linux/bootmem.h>
+ #include <linux/cache.h>
++#include <linux/err.h>
+
+ #include <net/icmp.h>
+ #include <net/tcp.h>
+@@ -571,7 +572,7 @@
+ skb->ip_summed = CHECKSUM_HW;
+ tp->write_seq += copy;
+ TCP_SKB_CB(skb)->end_seq += copy;
+- skb_shinfo(skb)->tso_segs = 0;
++ skb_shinfo(skb)->gso_segs = 0;
+
+ if (!copied)
+ TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH;
+@@ -622,14 +623,10 @@
+ ssize_t res;
+ struct sock *sk = sock->sk;
+
+-#define TCP_ZC_CSUM_FLAGS (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)
+-
+ if (!(sk->sk_route_caps & NETIF_F_SG) ||
+- !(sk->sk_route_caps & TCP_ZC_CSUM_FLAGS))
++ !(sk->sk_route_caps & NETIF_F_ALL_CSUM))
+ return sock_no_sendpage(sock, page, offset, size, flags);
+
+-#undef TCP_ZC_CSUM_FLAGS
+-
+ lock_sock(sk);
+ TCP_CHECK_TIMER(sk);
+ res = do_tcp_sendpages(sk, &page, offset, size, flags);
+@@ -726,9 +723,7 @@
+ /*
+ * Check whether we can use HW checksum.
+ */
+- if (sk->sk_route_caps &
+- (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM |
+- NETIF_F_HW_CSUM))
++ if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
+ skb->ip_summed = CHECKSUM_HW;
+
+ skb_entail(sk, tp, skb);
+@@ -824,7 +819,7 @@
+
+ tp->write_seq += copy;
+ TCP_SKB_CB(skb)->end_seq += copy;
+- skb_shinfo(skb)->tso_segs = 0;
++ skb_shinfo(skb)->gso_segs = 0;
+
+ from += copy;
+ copied += copy;
+@@ -2071,6 +2066,71 @@
+ EXPORT_SYMBOL(compat_tcp_getsockopt);
+ #endif
+
++struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
++{
++ struct sk_buff *segs = ERR_PTR(-EINVAL);
++ struct tcphdr *th;
++ unsigned thlen;
++ unsigned int seq;
++ unsigned int delta;
++ unsigned int oldlen;
++ unsigned int len;
++
++ if (!pskb_may_pull(skb, sizeof(*th)))
++ goto out;
++
++ th = skb->h.th;
++ thlen = th->doff * 4;
++ if (thlen < sizeof(*th))
++ goto out;
++
++ if (!pskb_may_pull(skb, thlen))
++ goto out;
++
++ segs = NULL;
++ if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST))
++ goto out;
++
++ oldlen = (u16)~skb->len;
++ __skb_pull(skb, thlen);
++
++ segs = skb_segment(skb, features);
++ if (IS_ERR(segs))
++ goto out;
++
++ len = skb_shinfo(skb)->gso_size;
++ delta = htonl(oldlen + (thlen + len));
++
++ skb = segs;
++ th = skb->h.th;
++ seq = ntohl(th->seq);
++
++ do {
++ th->fin = th->psh = 0;
++
++ th->check = ~csum_fold(th->check + delta);
++ if (skb->ip_summed != CHECKSUM_HW)
++ th->check = csum_fold(csum_partial(skb->h.raw, thlen,
++ skb->csum));
++
++ seq += len;
++ skb = skb->next;
++ th = skb->h.th;
++
++ th->seq = htonl(seq);
++ th->cwr = 0;
++ } while (skb->next);
++
++ delta = htonl(oldlen + (skb->tail - skb->h.raw) + skb->data_len);
++ th->check = ~csum_fold(th->check + delta);
++ if (skb->ip_summed != CHECKSUM_HW)
++ th->check = csum_fold(csum_partial(skb->h.raw, thlen,
++ skb->csum));
++
++out:
++ return segs;
++}
++
+ extern void __skb_cb_too_small_for_tcp(int, int);
+ extern struct tcp_congestion_ops tcp_reno;
+
+Index: tmp-linux-2.6.17/net/ipv4/tcp_input.c
+===================================================================
+--- tmp-linux-2.6.17.orig/net/ipv4/tcp_input.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/net/ipv4/tcp_input.c 2007-01-08 16:48:05.000000000 +0000
+@@ -1072,7 +1072,7 @@
+ else
+ pkt_len = (end_seq -
+ TCP_SKB_CB(skb)->seq);
+- if (tcp_fragment(sk, skb, pkt_len, skb_shinfo(skb)->tso_size))
++ if (tcp_fragment(sk, skb, pkt_len, skb_shinfo(skb)->gso_size))
+ break;
+ pcount = tcp_skb_pcount(skb);
+ }
+Index: tmp-linux-2.6.17/net/ipv4/tcp_output.c
+===================================================================
+--- tmp-linux-2.6.17.orig/net/ipv4/tcp_output.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/net/ipv4/tcp_output.c 2007-01-08 16:48:05.000000000 +0000
+@@ -511,15 +511,17 @@
+ /* Avoid the costly divide in the normal
+ * non-TSO case.
+ */
+- skb_shinfo(skb)->tso_segs = 1;
+- skb_shinfo(skb)->tso_size = 0;
++ skb_shinfo(skb)->gso_segs = 1;
++ skb_shinfo(skb)->gso_size = 0;
++ skb_shinfo(skb)->gso_type = 0;
+ } else {
+ unsigned int factor;
+
+ factor = skb->len + (mss_now - 1);
+ factor /= mss_now;
+- skb_shinfo(skb)->tso_segs = factor;
+- skb_shinfo(skb)->tso_size = mss_now;
++ skb_shinfo(skb)->gso_segs = factor;
++ skb_shinfo(skb)->gso_size = mss_now;
++ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+ }
+ }
+
+@@ -910,7 +912,7 @@
+
+ if (!tso_segs ||
+ (tso_segs > 1 &&
+- skb_shinfo(skb)->tso_size != mss_now)) {
++ tcp_skb_mss(skb) != mss_now)) {
+ tcp_set_skb_tso_segs(sk, skb, mss_now);
+ tso_segs = tcp_skb_pcount(skb);
+ }
+@@ -1720,8 +1722,9 @@
+ tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
+ if (!pskb_trim(skb, 0)) {
+ TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1;
+- skb_shinfo(skb)->tso_segs = 1;
+- skb_shinfo(skb)->tso_size = 0;
++ skb_shinfo(skb)->gso_segs = 1;
++ skb_shinfo(skb)->gso_size = 0;
++ skb_shinfo(skb)->gso_type = 0;
+ skb->ip_summed = CHECKSUM_NONE;
+ skb->csum = 0;
+ }
+@@ -1926,8 +1929,9 @@
+ skb->csum = 0;
+ TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
+ TCP_SKB_CB(skb)->sacked = 0;
+- skb_shinfo(skb)->tso_segs = 1;
+- skb_shinfo(skb)->tso_size = 0;
++ skb_shinfo(skb)->gso_segs = 1;
++ skb_shinfo(skb)->gso_size = 0;
++ skb_shinfo(skb)->gso_type = 0;
+
+ /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
+ TCP_SKB_CB(skb)->seq = tp->write_seq;
+@@ -1959,8 +1963,9 @@
+ skb->csum = 0;
+ TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_RST);
+ TCP_SKB_CB(skb)->sacked = 0;
+- skb_shinfo(skb)->tso_segs = 1;
+- skb_shinfo(skb)->tso_size = 0;
++ skb_shinfo(skb)->gso_segs = 1;
++ skb_shinfo(skb)->gso_size = 0;
++ skb_shinfo(skb)->gso_type = 0;
+
+ /* Send it off. */
+ TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp);
+@@ -2043,8 +2048,9 @@
+ TCP_SKB_CB(skb)->seq = tcp_rsk(req)->snt_isn;
+ TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
+ TCP_SKB_CB(skb)->sacked = 0;
+- skb_shinfo(skb)->tso_segs = 1;
+- skb_shinfo(skb)->tso_size = 0;
++ skb_shinfo(skb)->gso_segs = 1;
++ skb_shinfo(skb)->gso_size = 0;
++ skb_shinfo(skb)->gso_type = 0;
+ th->seq = htonl(TCP_SKB_CB(skb)->seq);
+ th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
+ if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
+@@ -2148,8 +2154,9 @@
+ TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN;
+ TCP_ECN_send_syn(sk, tp, buff);
+ TCP_SKB_CB(buff)->sacked = 0;
+- skb_shinfo(buff)->tso_segs = 1;
+- skb_shinfo(buff)->tso_size = 0;
++ skb_shinfo(buff)->gso_segs = 1;
++ skb_shinfo(buff)->gso_size = 0;
++ skb_shinfo(buff)->gso_type = 0;
+ buff->csum = 0;
+ TCP_SKB_CB(buff)->seq = tp->write_seq++;
+ TCP_SKB_CB(buff)->end_seq = tp->write_seq;
+@@ -2253,8 +2260,9 @@
+ buff->csum = 0;
+ TCP_SKB_CB(buff)->flags = TCPCB_FLAG_ACK;
+ TCP_SKB_CB(buff)->sacked = 0;
+- skb_shinfo(buff)->tso_segs = 1;
+- skb_shinfo(buff)->tso_size = 0;
++ skb_shinfo(buff)->gso_segs = 1;
++ skb_shinfo(buff)->gso_size = 0;
++ skb_shinfo(buff)->gso_type = 0;
+
+ /* Send it off, this clears delayed acks for us. */
+ TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp);
+@@ -2289,8 +2297,9 @@
+ skb->csum = 0;
+ TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK;
+ TCP_SKB_CB(skb)->sacked = urgent;
+- skb_shinfo(skb)->tso_segs = 1;
+- skb_shinfo(skb)->tso_size = 0;
++ skb_shinfo(skb)->gso_segs = 1;
++ skb_shinfo(skb)->gso_size = 0;
++ skb_shinfo(skb)->gso_type = 0;
+
+ /* Use a previous sequence. This should cause the other
+ * end to send an ack. Don't queue or clone SKB, just
+Index: tmp-linux-2.6.17/net/ipv4/xfrm4_output.c
+===================================================================
+--- tmp-linux-2.6.17.orig/net/ipv4/xfrm4_output.c 2007-01-08 16:48:05.000000000 +0000
++++ tmp-linux-2.6.17/net/ipv4/xfrm4_output.c 2007-01-08 16:48:05.000000000 +0000
+@@ -9,6 +9,8 @@
+ */
+
+ #include <linux/compiler.h>
++#include <linux/if_ether.h>
++#include <linux/kernel.h>
+ #include <linux/skbuff.h>
+ #include <linux/spinlock.h>
+ #include <linux/netfilter_ipv4.h>
+@@ -158,16 +160,10 @@
+ goto out_exit;
+ }
+
+-static int xfrm4_output_finish(struct sk_buff *skb)
++static int xfrm4_output_finish2(struct sk_buff *skb)
+ {
+ int err;
+
+-#ifdef CONFIG_NETFILTER
+- if (!skb->dst->xfrm) {
+- IPCB(skb)->flags |= IPSKB_REROUTED;
+- return dst_output(skb);
+- }
+-#endif
+ while (likely((err = xfrm4_output_one(skb)) == 0)) {
+ nf_reset(skb);
+
+@@ -180,7 +176,7 @@
+ return dst_output(skb);
+
+ err = nf_hook(PF_INET, NF_IP_POST_ROUTING, &skb, NULL,
+- skb->dst->dev, xfrm4_output_finish);
++ skb->dst->dev, xfrm4_output_finish2);
+ if (unlikely(err != 1))
+ break;
+ }
+@@ -188,6 +184,48 @@
+ return err;
+ }
+
++static int xfrm4_output_finish(struct sk_buff *skb)
++{
++ struct sk_buff *segs;
++
++#ifdef CONFIG_NETFILTER
++ if (!skb->dst->xfrm) {
++ IPCB(skb)->flags |= IPSKB_REROUTED;
++ return dst_output(skb);
++ }
++#endif
++
++ if (!skb_shinfo(skb)->gso_size)
++ return xfrm4_output_finish2(skb);
++
++ skb->protocol = htons(ETH_P_IP);
++ segs = skb_gso_segment(skb, 0);
++ kfree_skb(skb);
++ if (unlikely(IS_ERR(segs)))
++ return PTR_ERR(segs);
++
++ do {
++ struct sk_buff *nskb = segs->next;
++ int err;
++
++ segs->next = NULL;
++ err = xfrm4_output_finish2(segs);
++
++ if (unlikely(err)) {
++ while ((segs = nskb)) {
++ nskb = segs->next;
++ segs->next = NULL;
++ kfree_skb(segs);
++ }
++ return err;
++ }
++
++ segs = nskb;
++ } while (segs);
++
++ return 0;
++}
++
+ int xfrm4_output(struct sk_buff *skb)
+ {
+ return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev,
+Index: tmp-linux-2.6.17/net/ipv6/ip6_output.c
+===================================================================
+--- tmp-linux-2.6.17.orig/net/ipv6/ip6_output.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/net/ipv6/ip6_output.c 2007-01-08 16:48:05.000000000 +0000
+@@ -147,7 +147,7 @@
+
+ int ip6_output(struct sk_buff *skb)
+ {
+- if ((skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->ufo_size) ||
++ if ((skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->gso_size) ||
+ dst_allfrag(skb->dst))
+ return ip6_fragment(skb, ip6_output2);
+ else
+@@ -830,8 +830,9 @@
+ struct frag_hdr fhdr;
+
+ /* specify the length of each IP datagram fragment*/
+- skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen) -
+- sizeof(struct frag_hdr);
++ skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
++ sizeof(struct frag_hdr);
++ skb_shinfo(skb)->gso_type = SKB_GSO_UDPV4;
+ ipv6_select_ident(skb, &fhdr);
+ skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
+ __skb_queue_tail(&sk->sk_write_queue, skb);
+Index: tmp-linux-2.6.17/net/ipv6/ipcomp6.c
+===================================================================
+--- tmp-linux-2.6.17.orig/net/ipv6/ipcomp6.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/net/ipv6/ipcomp6.c 2007-01-08 16:48:05.000000000 +0000
+@@ -65,7 +65,7 @@
+
+ static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb)
+ {
+- int err = 0;
++ int err = -ENOMEM;
+ u8 nexthdr = 0;
+ int hdr_len = skb->h.raw - skb->nh.raw;
+ unsigned char *tmp_hdr = NULL;
+@@ -76,11 +76,8 @@
+ struct crypto_tfm *tfm;
+ int cpu;
+
+- if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
+- skb_linearize(skb, GFP_ATOMIC) != 0) {
+- err = -ENOMEM;
++ if (skb_linearize_cow(skb))
+ goto out;
+- }
+
+ skb->ip_summed = CHECKSUM_NONE;
+
+@@ -159,10 +156,8 @@
+ goto out_ok;
+ }
+
+- if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
+- skb_linearize(skb, GFP_ATOMIC) != 0) {
++ if (skb_linearize_cow(skb))
+ goto out_ok;
+- }
+
+ /* compression */
+ plen = skb->len - hdr_len;
+Index: tmp-linux-2.6.17/net/ipv6/xfrm6_output.c
+===================================================================
+--- tmp-linux-2.6.17.orig/net/ipv6/xfrm6_output.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/net/ipv6/xfrm6_output.c 2007-01-08 16:48:05.000000000 +0000
+@@ -151,7 +151,7 @@
+ goto out_exit;
+ }
+
+-static int xfrm6_output_finish(struct sk_buff *skb)
++static int xfrm6_output_finish2(struct sk_buff *skb)
+ {
+ int err;
+
+@@ -167,7 +167,7 @@
+ return dst_output(skb);
+
+ err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, &skb, NULL,
+- skb->dst->dev, xfrm6_output_finish);
++ skb->dst->dev, xfrm6_output_finish2);
+ if (unlikely(err != 1))
+ break;
+ }
+@@ -175,6 +175,41 @@
+ return err;
+ }
+
++static int xfrm6_output_finish(struct sk_buff *skb)
++{
++ struct sk_buff *segs;
++
++ if (!skb_shinfo(skb)->gso_size)
++ return xfrm6_output_finish2(skb);
++
++ skb->protocol = htons(ETH_P_IP);
++ segs = skb_gso_segment(skb, 0);
++ kfree_skb(skb);
++ if (unlikely(IS_ERR(segs)))
++ return PTR_ERR(segs);
++
++ do {
++ struct sk_buff *nskb = segs->next;
++ int err;
++
++ segs->next = NULL;
++ err = xfrm6_output_finish2(segs);
++
++ if (unlikely(err)) {
++ while ((segs = nskb)) {
++ nskb = segs->next;
++ segs->next = NULL;
++ kfree_skb(segs);
++ }
++ return err;
++ }
++
++ segs = nskb;
++ } while (segs);
++
++ return 0;
++}
++
+ int xfrm6_output(struct sk_buff *skb)
+ {
+ return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dst->dev,
+Index: tmp-linux-2.6.17/net/sched/sch_generic.c
+===================================================================
+--- tmp-linux-2.6.17.orig/net/sched/sch_generic.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/net/sched/sch_generic.c 2007-01-08 16:48:05.000000000 +0000
+@@ -72,9 +72,9 @@
+ dev->queue_lock serializes queue accesses for this device
+ AND dev->qdisc pointer itself.
+
+- dev->xmit_lock serializes accesses to device driver.
++ netif_tx_lock serializes accesses to device driver.
+
+- dev->queue_lock and dev->xmit_lock are mutually exclusive,
++ dev->queue_lock and netif_tx_lock are mutually exclusive,
+ if one is grabbed, another must be free.
+ */
+
+@@ -90,14 +90,17 @@
+ NOTE: Called under dev->queue_lock with locally disabled BH.
+ */
+
+-int qdisc_restart(struct net_device *dev)
++static inline int qdisc_restart(struct net_device *dev)
+ {
+ struct Qdisc *q = dev->qdisc;
+ struct sk_buff *skb;
+
+ /* Dequeue packet */
+- if ((skb = q->dequeue(q)) != NULL) {
++ if (((skb = dev->gso_skb)) || ((skb = q->dequeue(q)))) {
+ unsigned nolock = (dev->features & NETIF_F_LLTX);
++
++ dev->gso_skb = NULL;
++
+ /*
+ * When the driver has LLTX set it does its own locking
+ * in start_xmit. No need to add additional overhead by
+@@ -108,7 +111,7 @@
+ * will be requeued.
+ */
+ if (!nolock) {
+- if (!spin_trylock(&dev->xmit_lock)) {
++ if (!netif_tx_trylock(dev)) {
+ collision:
+ /* So, someone grabbed the driver. */
+
+@@ -126,8 +129,6 @@
+ __get_cpu_var(netdev_rx_stat).cpu_collision++;
+ goto requeue;
+ }
+- /* Remember that the driver is grabbed by us. */
+- dev->xmit_lock_owner = smp_processor_id();
+ }
+
+ {
+@@ -136,14 +137,11 @@
+
+ if (!netif_queue_stopped(dev)) {
+ int ret;
+- if (netdev_nit)
+- dev_queue_xmit_nit(skb, dev);
+
+- ret = dev->hard_start_xmit(skb, dev);
++ ret = dev_hard_start_xmit(skb, dev);
+ if (ret == NETDEV_TX_OK) {
+ if (!nolock) {
+- dev->xmit_lock_owner = -1;
+- spin_unlock(&dev->xmit_lock);
++ netif_tx_unlock(dev);
+ }
+ spin_lock(&dev->queue_lock);
+ return -1;
+@@ -157,8 +155,7 @@
+ /* NETDEV_TX_BUSY - we need to requeue */
+ /* Release the driver */
+ if (!nolock) {
+- dev->xmit_lock_owner = -1;
+- spin_unlock(&dev->xmit_lock);
++ netif_tx_unlock(dev);
+ }
+ spin_lock(&dev->queue_lock);
+ q = dev->qdisc;
+@@ -175,7 +172,10 @@
+ */
+
+ requeue:
+- q->ops->requeue(skb, q);
++ if (skb->next)
++ dev->gso_skb = skb;
++ else
++ q->ops->requeue(skb, q);
+ netif_schedule(dev);
+ return 1;
+ }
+@@ -183,11 +183,23 @@
+ return q->q.qlen;
+ }
+
++void __qdisc_run(struct net_device *dev)
++{
++ if (unlikely(dev->qdisc == &noop_qdisc))
++ goto out;
++
++ while (qdisc_restart(dev) < 0 && !netif_queue_stopped(dev))
++ /* NOTHING */;
++
++out:
++ clear_bit(__LINK_STATE_QDISC_RUNNING, &dev->state);
++}
++
+ static void dev_watchdog(unsigned long arg)
+ {
+ struct net_device *dev = (struct net_device *)arg;
+
+- spin_lock(&dev->xmit_lock);
++ netif_tx_lock(dev);
+ if (dev->qdisc != &noop_qdisc) {
+ if (netif_device_present(dev) &&
+ netif_running(dev) &&
+@@ -203,7 +215,7 @@
+ dev_hold(dev);
+ }
+ }
+- spin_unlock(&dev->xmit_lock);
++ netif_tx_unlock(dev);
+
+ dev_put(dev);
+ }
+@@ -227,17 +239,17 @@
+
+ static void dev_watchdog_up(struct net_device *dev)
+ {
+- spin_lock_bh(&dev->xmit_lock);
++ netif_tx_lock_bh(dev);
+ __netdev_watchdog_up(dev);
+- spin_unlock_bh(&dev->xmit_lock);
++ netif_tx_unlock_bh(dev);
+ }
+
+ static void dev_watchdog_down(struct net_device *dev)
+ {
+- spin_lock_bh(&dev->xmit_lock);
++ netif_tx_lock_bh(dev);
+ if (del_timer(&dev->watchdog_timer))
+ dev_put(dev);
+- spin_unlock_bh(&dev->xmit_lock);
++ netif_tx_unlock_bh(dev);
+ }
+
+ void netif_carrier_on(struct net_device *dev)
+@@ -579,10 +591,17 @@
+
+ dev_watchdog_down(dev);
+
+- while (test_bit(__LINK_STATE_SCHED, &dev->state))
++ /* Wait for outstanding dev_queue_xmit calls. */
++ synchronize_rcu();
++
++ /* Wait for outstanding qdisc_run calls. */
++ while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state))
+ yield();
+
+- spin_unlock_wait(&dev->xmit_lock);
++ if (dev->gso_skb) {
++ kfree_skb(dev->gso_skb);
++ dev->gso_skb = NULL;
++ }
+ }
+
+ void dev_init_scheduler(struct net_device *dev)
+@@ -624,6 +643,5 @@
+ EXPORT_SYMBOL(qdisc_alloc);
+ EXPORT_SYMBOL(qdisc_destroy);
+ EXPORT_SYMBOL(qdisc_reset);
+-EXPORT_SYMBOL(qdisc_restart);
+ EXPORT_SYMBOL(qdisc_lock_tree);
+ EXPORT_SYMBOL(qdisc_unlock_tree);
+Index: tmp-linux-2.6.17/net/sched/sch_teql.c
+===================================================================
+--- tmp-linux-2.6.17.orig/net/sched/sch_teql.c 2006-06-18 02:49:35.000000000 +0100
++++ tmp-linux-2.6.17/net/sched/sch_teql.c 2007-01-08 16:48:05.000000000 +0000
+@@ -302,20 +302,17 @@
+
+ switch (teql_resolve(skb, skb_res, slave)) {
+ case 0:
+- if (spin_trylock(&slave->xmit_lock)) {
+- slave->xmit_lock_owner = smp_processor_id();
++ if (netif_tx_trylock(slave)) {
+ if (!netif_queue_stopped(slave) &&
+ slave->hard_start_xmit(skb, slave) == 0) {
+- slave->xmit_lock_owner = -1;
+- spin_unlock(&slave->xmit_lock);
++ netif_tx_unlock(slave);
+ master->slaves = NEXT_SLAVE(q);
+ netif_wake_queue(dev);
+ master->stats.tx_packets++;
+ master->stats.tx_bytes += len;
+ return 0;
+ }
+- slave->xmit_lock_owner = -1;
+- spin_unlock(&slave->xmit_lock);
++ netif_tx_unlock(slave);
+ }
+ if (netif_queue_stopped(dev))
+ busy = 1;
--- /dev/null
+diff -pruN ../orig-linux-2.6.17/net/ipv4/tcp.c ./net/ipv4/tcp.c
+--- ../orig-linux-2.6.17/net/ipv4/tcp.c 2007-01-08 15:21:15.000000000 +0000
++++ ./net/ipv4/tcp.c 2007-01-08 15:24:49.000000000 +0000
+@@ -2087,13 +2087,19 @@ struct sk_buff *tcp_tso_segment(struct s
+ if (!pskb_may_pull(skb, thlen))
+ goto out;
+
+- segs = NULL;
+- if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST))
+- goto out;
+-
+ oldlen = (u16)~skb->len;
+ __skb_pull(skb, thlen);
+
++ if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
++ /* Packet is from an untrusted source, reset gso_segs. */
++ int mss = skb_shinfo(skb)->gso_size;
++
++ skb_shinfo(skb)->gso_segs = (skb->len + mss - 1) / mss;
++
++ segs = NULL;
++ goto out;
++ }
++
+ segs = skb_segment(skb, features);
+ if (IS_ERR(segs))
+ goto out;
--- /dev/null
+diff -pruN ../orig-linux-2.6.17/drivers/net/bnx2.c ./drivers/net/bnx2.c
+--- ../orig-linux-2.6.17/drivers/net/bnx2.c 2007-01-08 15:16:27.000000000 +0000
++++ ./drivers/net/bnx2.c 2007-01-08 15:25:03.000000000 +0000
+@@ -1638,7 +1638,7 @@ bnx2_tx_int(struct bnx2 *bp)
+ skb = tx_buf->skb;
+ #ifdef BCM_TSO
+ /* partial BD completions possible with TSO packets */
+- if (skb_shinfo(skb)->gso_size) {
++ if (skb_is_gso(skb)) {
+ u16 last_idx, last_ring_idx;
+
+ last_idx = sw_cons +
+diff -pruN ../orig-linux-2.6.17/drivers/net/chelsio/sge.c ./drivers/net/chelsio/sge.c
+--- ../orig-linux-2.6.17/drivers/net/chelsio/sge.c 2007-01-08 15:16:27.000000000 +0000
++++ ./drivers/net/chelsio/sge.c 2007-01-08 15:25:03.000000000 +0000
+@@ -1418,7 +1418,7 @@ int t1_start_xmit(struct sk_buff *skb, s
+ struct cpl_tx_pkt *cpl;
+
+ #ifdef NETIF_F_TSO
+- if (skb_shinfo(skb)->gso_size) {
++ if (skb_is_gso(skb)) {
+ int eth_type;
+ struct cpl_tx_pkt_lso *hdr;
+
+diff -pruN ../orig-linux-2.6.17/drivers/net/e1000/e1000_main.c ./drivers/net/e1000/e1000_main.c
+--- ../orig-linux-2.6.17/drivers/net/e1000/e1000_main.c 2007-01-08 15:22:36.000000000 +0000
++++ ./drivers/net/e1000/e1000_main.c 2007-01-08 15:26:24.000000000 +0000
+@@ -2413,7 +2413,7 @@ e1000_tso(struct e1000_adapter *adapter,
+ uint8_t ipcss, ipcso, tucss, tucso, hdr_len;
+ int err;
+
+- if (skb_shinfo(skb)->gso_size) {
++ if (skb_is_gso(skb)) {
+ if (skb_header_cloned(skb)) {
+ err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+ if (err)
+@@ -2538,7 +2538,7 @@ e1000_tx_map(struct e1000_adapter *adapt
+ * tso gets written back prematurely before the data is fully
+ * DMA'd to the controller */
+ if (!skb->data_len && tx_ring->last_tx_tso &&
+- !skb_shinfo(skb)->gso_size) {
++ !skb_is_gso(skb)) {
+ tx_ring->last_tx_tso = 0;
+ size -= 4;
+ }
+@@ -2825,8 +2825,7 @@ e1000_xmit_frame(struct sk_buff *skb, st
+
+ #ifdef NETIF_F_TSO
+ /* Controller Erratum workaround */
+- if (!skb->data_len && tx_ring->last_tx_tso &&
+- !skb_shinfo(skb)->gso_size)
++ if (!skb->data_len && tx_ring->last_tx_tso && !skb_is_gso(skb))
+ count++;
+ #endif
+
+diff -pruN ../orig-linux-2.6.17/drivers/net/forcedeth.c ./drivers/net/forcedeth.c
+--- ../orig-linux-2.6.17/drivers/net/forcedeth.c 2007-01-08 15:23:05.000000000 +0000
++++ ./drivers/net/forcedeth.c 2007-01-08 15:25:03.000000000 +0000
+@@ -1303,7 +1303,7 @@ static int nv_start_xmit(struct sk_buff
+ np->tx_skbuff[nr] = skb;
+
+ #ifdef NETIF_F_TSO
+- if (skb_shinfo(skb)->gso_size)
++ if (skb_is_gso(skb))
+ tx_flags_extra = NV_TX2_TSO | (skb_shinfo(skb)->gso_size << NV_TX2_TSO_SHIFT);
+ else
+ #endif
+diff -pruN ../orig-linux-2.6.17/drivers/net/ixgb/ixgb_main.c ./drivers/net/ixgb/ixgb_main.c
+--- ../orig-linux-2.6.17/drivers/net/ixgb/ixgb_main.c 2007-01-08 15:16:27.000000000 +0000
++++ ./drivers/net/ixgb/ixgb_main.c 2007-01-08 15:25:03.000000000 +0000
+@@ -1168,7 +1168,7 @@ ixgb_tso(struct ixgb_adapter *adapter, s
+ uint16_t ipcse, tucse, mss;
+ int err;
+
+- if(likely(skb_shinfo(skb)->gso_size)) {
++ if (likely(skb_is_gso(skb))) {
+ if (skb_header_cloned(skb)) {
+ err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+ if (err)
+diff -pruN ../orig-linux-2.6.17/drivers/net/loopback.c ./drivers/net/loopback.c
+--- ../orig-linux-2.6.17/drivers/net/loopback.c 2007-01-08 15:16:27.000000000 +0000
++++ ./drivers/net/loopback.c 2007-01-08 15:25:03.000000000 +0000
+@@ -139,7 +139,7 @@ static int loopback_xmit(struct sk_buff
+ #endif
+
+ #ifdef LOOPBACK_TSO
+- if (skb_shinfo(skb)->gso_size) {
++ if (skb_is_gso(skb)) {
+ BUG_ON(skb->protocol != htons(ETH_P_IP));
+ BUG_ON(skb->nh.iph->protocol != IPPROTO_TCP);
+
+diff -pruN ../orig-linux-2.6.17/drivers/net/sky2.c ./drivers/net/sky2.c
+--- ../orig-linux-2.6.17/drivers/net/sky2.c 2007-01-08 15:16:27.000000000 +0000
++++ ./drivers/net/sky2.c 2007-01-08 15:25:03.000000000 +0000
+@@ -1160,7 +1160,7 @@ static unsigned tx_le_req(const struct s
+ count = sizeof(dma_addr_t) / sizeof(u32);
+ count += skb_shinfo(skb)->nr_frags * count;
+
+- if (skb_shinfo(skb)->gso_size)
++ if (skb_is_gso(skb))
+ ++count;
+
+ if (skb->ip_summed == CHECKSUM_HW)
+diff -pruN ../orig-linux-2.6.17/drivers/net/typhoon.c ./drivers/net/typhoon.c
+--- ../orig-linux-2.6.17/drivers/net/typhoon.c 2007-01-08 15:16:27.000000000 +0000
++++ ./drivers/net/typhoon.c 2007-01-08 15:25:03.000000000 +0000
+@@ -805,7 +805,7 @@ typhoon_start_tx(struct sk_buff *skb, st
+ * If problems develop with TSO, check this first.
+ */
+ numDesc = skb_shinfo(skb)->nr_frags + 1;
+- if(skb_tso_size(skb))
++ if (skb_is_gso(skb))
+ numDesc++;
+
+ /* When checking for free space in the ring, we need to also
+@@ -845,7 +845,7 @@ typhoon_start_tx(struct sk_buff *skb, st
+ TYPHOON_TX_PF_VLAN_TAG_SHIFT);
+ }
+
+- if(skb_tso_size(skb)) {
++ if (skb_is_gso(skb)) {
+ first_txd->processFlags |= TYPHOON_TX_PF_TCP_SEGMENT;
+ first_txd->numDesc++;
+
+diff -pruN ../orig-linux-2.6.17/drivers/s390/net/qeth_main.c ./drivers/s390/net/qeth_main.c
+--- ../orig-linux-2.6.17/drivers/s390/net/qeth_main.c 2007-01-08 15:23:29.000000000 +0000
++++ ./drivers/s390/net/qeth_main.c 2007-01-08 15:26:49.000000000 +0000
+@@ -4417,7 +4417,6 @@ qeth_send_packet(struct qeth_card *card,
+ struct qeth_eddp_context *ctx = NULL;
+ int tx_bytes = skb->len;
+ unsigned short nr_frags = skb_shinfo(skb)->nr_frags;
+- unsigned short gso_size = skb_shinfo(skb)->gso_size;
+ int rc;
+
+ QETH_DBF_TEXT(trace, 6, "sendpkt");
+@@ -4453,7 +4452,7 @@ qeth_send_packet(struct qeth_card *card,
+ queue = card->qdio.out_qs
+ [qeth_get_priority_queue(card, skb, ipv, cast_type)];
+
+- if (skb_shinfo(skb)->gso_size)
++ if (skb_is_gso(skb))
+ large_send = card->options.large_send;
+
+ /*are we able to do TSO ? If so ,prepare and send it from here */
+@@ -4500,8 +4499,7 @@ qeth_send_packet(struct qeth_card *card,
+ card->stats.tx_packets++;
+ card->stats.tx_bytes += tx_bytes;
+ #ifdef CONFIG_QETH_PERF_STATS
+- if (gso_size &&
+- !(large_send == QETH_LARGE_SEND_NO)) {
++ if (skb_is_gso(skb) && !(large_send == QETH_LARGE_SEND_NO)) {
+ card->perf_stats.large_send_bytes += tx_bytes;
+ card->perf_stats.large_send_cnt++;
+ }
+diff -pruN ../orig-linux-2.6.17/include/linux/netdevice.h ./include/linux/netdevice.h
+--- ../orig-linux-2.6.17/include/linux/netdevice.h 2007-01-08 15:24:25.000000000 +0000
++++ ./include/linux/netdevice.h 2007-01-08 15:25:03.000000000 +0000
+@@ -546,6 +546,7 @@ struct packet_type {
+ struct net_device *);
+ struct sk_buff *(*gso_segment)(struct sk_buff *skb,
+ int features);
++ int (*gso_send_check)(struct sk_buff *skb);
+ void *af_packet_priv;
+ struct list_head list;
+ };
+@@ -990,14 +991,15 @@ extern void linkwatch_run_queue(void);
+
+ static inline int skb_gso_ok(struct sk_buff *skb, int features)
+ {
+- int feature = skb_shinfo(skb)->gso_size ?
+- skb_shinfo(skb)->gso_type << NETIF_F_GSO_SHIFT : 0;
++ int feature = skb_shinfo(skb)->gso_type << NETIF_F_GSO_SHIFT;
+ return (features & feature) == feature;
+ }
+
+ static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
+ {
+- return !skb_gso_ok(skb, dev->features);
++ return skb_is_gso(skb) &&
++ (!skb_gso_ok(skb, dev->features) ||
++ unlikely(skb->ip_summed != CHECKSUM_HW));
+ }
+
+ #endif /* __KERNEL__ */
+diff -pruN ../orig-linux-2.6.17/include/linux/skbuff.h ./include/linux/skbuff.h
+--- ../orig-linux-2.6.17/include/linux/skbuff.h 2007-01-08 15:16:27.000000000 +0000
++++ ./include/linux/skbuff.h 2007-01-08 15:25:03.000000000 +0000
+@@ -1422,5 +1422,10 @@ static inline void nf_reset(struct sk_bu
+ static inline void nf_reset(struct sk_buff *skb) {}
+ #endif /* CONFIG_NETFILTER */
+
++static inline int skb_is_gso(const struct sk_buff *skb)
++{
++ return skb_shinfo(skb)->gso_size;
++}
++
+ #endif /* __KERNEL__ */
+ #endif /* _LINUX_SKBUFF_H */
+diff -pruN ../orig-linux-2.6.17/include/net/protocol.h ./include/net/protocol.h
+--- ../orig-linux-2.6.17/include/net/protocol.h 2007-01-08 15:16:27.000000000 +0000
++++ ./include/net/protocol.h 2007-01-08 15:25:03.000000000 +0000
+@@ -37,6 +37,7 @@
+ struct net_protocol {
+ int (*handler)(struct sk_buff *skb);
+ void (*err_handler)(struct sk_buff *skb, u32 info);
++ int (*gso_send_check)(struct sk_buff *skb);
+ struct sk_buff *(*gso_segment)(struct sk_buff *skb,
+ int features);
+ int no_policy;
+diff -pruN ../orig-linux-2.6.17/include/net/tcp.h ./include/net/tcp.h
+--- ../orig-linux-2.6.17/include/net/tcp.h 2007-01-08 15:16:27.000000000 +0000
++++ ./include/net/tcp.h 2007-01-08 15:25:03.000000000 +0000
+@@ -1076,6 +1076,7 @@ extern struct request_sock_ops tcp_reque
+
+ extern int tcp_v4_destroy_sock(struct sock *sk);
+
++extern int tcp_v4_gso_send_check(struct sk_buff *skb);
+ extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features);
+
+ #ifdef CONFIG_PROC_FS
+diff -pruN ../orig-linux-2.6.17/net/bridge/br_forward.c ./net/bridge/br_forward.c
+--- ../orig-linux-2.6.17/net/bridge/br_forward.c 2007-01-08 15:22:15.000000000 +0000
++++ ./net/bridge/br_forward.c 2007-01-08 15:25:32.000000000 +0000
+@@ -38,7 +38,7 @@ static inline unsigned packet_length(con
+ int br_dev_queue_push_xmit(struct sk_buff *skb)
+ {
+ /* drop mtu oversized packets except tso */
+- if (packet_length(skb) > skb->dev->mtu && !skb_shinfo(skb)->gso_size)
++ if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb))
+ kfree_skb(skb);
+ else {
+ #ifdef CONFIG_BRIDGE_NETFILTER
+diff -pruN ../orig-linux-2.6.17/net/bridge/br_netfilter.c ./net/bridge/br_netfilter.c
+--- ../orig-linux-2.6.17/net/bridge/br_netfilter.c 2007-01-08 15:16:27.000000000 +0000
++++ ./net/bridge/br_netfilter.c 2007-01-08 15:25:03.000000000 +0000
+@@ -769,7 +769,7 @@ static int br_nf_dev_queue_xmit(struct s
+ {
+ if (skb->protocol == htons(ETH_P_IP) &&
+ skb->len > skb->dev->mtu &&
+- !skb_shinfo(skb)->gso_size)
++ !skb_is_gso(skb))
+ return ip_fragment(skb, br_dev_queue_push_xmit);
+ else
+ return br_dev_queue_push_xmit(skb);
+diff -pruN ../orig-linux-2.6.17/net/core/dev.c ./net/core/dev.c
+--- ../orig-linux-2.6.17/net/core/dev.c 2007-01-08 15:20:44.000000000 +0000
++++ ./net/core/dev.c 2007-01-08 15:25:03.000000000 +0000
+@@ -1156,9 +1156,17 @@ int skb_checksum_help(struct sk_buff *sk
+ unsigned int csum;
+ int ret = 0, offset = skb->h.raw - skb->data;
+
+- if (inward) {
+- skb->ip_summed = CHECKSUM_NONE;
+- goto out;
++ if (inward)
++ goto out_set_summed;
++
++ if (unlikely(skb_shinfo(skb)->gso_size)) {
++ static int warned;
++
++ WARN_ON(!warned);
++ warned = 1;
++
++ /* Let GSO fix up the checksum. */
++ goto out_set_summed;
+ }
+
+ if (skb_cloned(skb)) {
+@@ -1175,6 +1183,8 @@ int skb_checksum_help(struct sk_buff *sk
+ BUG_ON(skb->csum + 2 > offset);
+
+ *(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
++
++out_set_summed:
+ skb->ip_summed = CHECKSUM_NONE;
+ out:
+ return ret;
+@@ -1195,17 +1205,35 @@ struct sk_buff *skb_gso_segment(struct s
+ struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
+ struct packet_type *ptype;
+ int type = skb->protocol;
++ int err;
+
+ BUG_ON(skb_shinfo(skb)->frag_list);
+- BUG_ON(skb->ip_summed != CHECKSUM_HW);
+
+ skb->mac.raw = skb->data;
+ skb->mac_len = skb->nh.raw - skb->data;
+ __skb_pull(skb, skb->mac_len);
+
++ if (unlikely(skb->ip_summed != CHECKSUM_HW)) {
++ static int warned;
++
++ WARN_ON(!warned);
++ warned = 1;
++
++ if (skb_header_cloned(skb) &&
++ (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
++ return ERR_PTR(err);
++ }
++
+ rcu_read_lock();
+ list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
+ if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
++ if (unlikely(skb->ip_summed != CHECKSUM_HW)) {
++ err = ptype->gso_send_check(skb);
++ segs = ERR_PTR(err);
++ if (err || skb_gso_ok(skb, features))
++ break;
++ __skb_push(skb, skb->data - skb->nh.raw);
++ }
+ segs = ptype->gso_segment(skb, features);
+ break;
+ }
+diff -pruN ../orig-linux-2.6.17/net/ipv4/af_inet.c ./net/ipv4/af_inet.c
+--- ../orig-linux-2.6.17/net/ipv4/af_inet.c 2007-01-08 15:16:27.000000000 +0000
++++ ./net/ipv4/af_inet.c 2007-01-08 15:25:03.000000000 +0000
+@@ -1097,6 +1097,40 @@ int inet_sk_rebuild_header(struct sock *
+
+ EXPORT_SYMBOL(inet_sk_rebuild_header);
+
++static int inet_gso_send_check(struct sk_buff *skb)
++{
++ struct iphdr *iph;
++ struct net_protocol *ops;
++ int proto;
++ int ihl;
++ int err = -EINVAL;
++
++ if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
++ goto out;
++
++ iph = skb->nh.iph;
++ ihl = iph->ihl * 4;
++ if (ihl < sizeof(*iph))
++ goto out;
++
++ if (unlikely(!pskb_may_pull(skb, ihl)))
++ goto out;
++
++ skb->h.raw = __skb_pull(skb, ihl);
++ iph = skb->nh.iph;
++ proto = iph->protocol & (MAX_INET_PROTOS - 1);
++ err = -EPROTONOSUPPORT;
++
++ rcu_read_lock();
++ ops = rcu_dereference(inet_protos[proto]);
++ if (likely(ops && ops->gso_send_check))
++ err = ops->gso_send_check(skb);
++ rcu_read_unlock();
++
++out:
++ return err;
++}
++
+ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
+ {
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+@@ -1154,6 +1188,7 @@ static struct net_protocol igmp_protocol
+ static struct net_protocol tcp_protocol = {
+ .handler = tcp_v4_rcv,
+ .err_handler = tcp_v4_err,
++ .gso_send_check = tcp_v4_gso_send_check,
+ .gso_segment = tcp_tso_segment,
+ .no_policy = 1,
+ };
+@@ -1200,6 +1235,7 @@ static int ipv4_proc_init(void);
+ static struct packet_type ip_packet_type = {
+ .type = __constant_htons(ETH_P_IP),
+ .func = ip_rcv,
++ .gso_send_check = inet_gso_send_check,
+ .gso_segment = inet_gso_segment,
+ };
+
+diff -pruN ../orig-linux-2.6.17/net/ipv4/ip_output.c ./net/ipv4/ip_output.c
+--- ../orig-linux-2.6.17/net/ipv4/ip_output.c 2007-01-08 15:16:27.000000000 +0000
++++ ./net/ipv4/ip_output.c 2007-01-08 15:25:03.000000000 +0000
+@@ -210,7 +210,7 @@ static inline int ip_finish_output(struc
+ return dst_output(skb);
+ }
+ #endif
+- if (skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->gso_size)
++ if (skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb))
+ return ip_fragment(skb, ip_finish_output2);
+ else
+ return ip_finish_output2(skb);
+@@ -1095,7 +1095,7 @@ ssize_t ip_append_page(struct sock *sk,
+ while (size > 0) {
+ int i;
+
+- if (skb_shinfo(skb)->gso_size)
++ if (skb_is_gso(skb))
+ len = size;
+ else {
+
+diff -pruN ../orig-linux-2.6.17/net/ipv4/tcp_ipv4.c ./net/ipv4/tcp_ipv4.c
+--- ../orig-linux-2.6.17/net/ipv4/tcp_ipv4.c 2006-06-18 02:49:35.000000000 +0100
++++ ./net/ipv4/tcp_ipv4.c 2007-01-08 15:25:03.000000000 +0000
+@@ -495,6 +495,24 @@ void tcp_v4_send_check(struct sock *sk,
+ }
+ }
+
++int tcp_v4_gso_send_check(struct sk_buff *skb)
++{
++ struct iphdr *iph;
++ struct tcphdr *th;
++
++ if (!pskb_may_pull(skb, sizeof(*th)))
++ return -EINVAL;
++
++ iph = skb->nh.iph;
++ th = skb->h.th;
++
++ th->check = 0;
++ th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0);
++ skb->csum = offsetof(struct tcphdr, check);
++ skb->ip_summed = CHECKSUM_HW;
++ return 0;
++}
++
+ /*
+ * This routine will send an RST to the other tcp.
+ *
+diff -pruN ../orig-linux-2.6.17/net/ipv4/xfrm4_output.c ./net/ipv4/xfrm4_output.c
+--- ../orig-linux-2.6.17/net/ipv4/xfrm4_output.c 2007-01-08 15:16:27.000000000 +0000
++++ ./net/ipv4/xfrm4_output.c 2007-01-08 15:25:03.000000000 +0000
+@@ -195,7 +195,7 @@ static int xfrm4_output_finish(struct sk
+ }
+ #endif
+
+- if (!skb_shinfo(skb)->gso_size)
++ if (!skb_is_gso(skb))
+ return xfrm4_output_finish2(skb);
+
+ skb->protocol = htons(ETH_P_IP);
+diff -pruN ../orig-linux-2.6.17/net/ipv6/ip6_output.c ./net/ipv6/ip6_output.c
+--- ../orig-linux-2.6.17/net/ipv6/ip6_output.c 2007-01-08 15:16:27.000000000 +0000
++++ ./net/ipv6/ip6_output.c 2007-01-08 15:25:03.000000000 +0000
+@@ -147,7 +147,7 @@ static int ip6_output2(struct sk_buff *s
+
+ int ip6_output(struct sk_buff *skb)
+ {
+- if ((skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->gso_size) ||
++ if ((skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb)) ||
+ dst_allfrag(skb->dst))
+ return ip6_fragment(skb, ip6_output2);
+ else
+diff -pruN ../orig-linux-2.6.17/net/ipv6/xfrm6_output.c ./net/ipv6/xfrm6_output.c
+--- ../orig-linux-2.6.17/net/ipv6/xfrm6_output.c 2007-01-08 15:16:27.000000000 +0000
++++ ./net/ipv6/xfrm6_output.c 2007-01-08 15:25:03.000000000 +0000
+@@ -179,7 +179,7 @@ static int xfrm6_output_finish(struct sk
+ {
+ struct sk_buff *segs;
+
+- if (!skb_shinfo(skb)->gso_size)
++ if (!skb_is_gso(skb))
+ return xfrm6_output_finish2(skb);
+
+ skb->protocol = htons(ETH_P_IP);
--- /dev/null
+diff -pruN ../orig-linux-2.6.17/include/linux/netdevice.h ./include/linux/netdevice.h
+--- ../orig-linux-2.6.17/include/linux/netdevice.h 2007-01-08 15:25:03.000000000 +0000
++++ ./include/linux/netdevice.h 2007-01-08 15:27:08.000000000 +0000
+@@ -919,10 +919,10 @@ static inline void netif_tx_lock_bh(stru
+
+ static inline int netif_tx_trylock(struct net_device *dev)
+ {
+- int err = spin_trylock(&dev->_xmit_lock);
+- if (!err)
++ int ok = spin_trylock(&dev->_xmit_lock);
++ if (likely(ok))
+ dev->xmit_lock_owner = smp_processor_id();
+- return err;
++ return ok;
+ }
+
+ static inline void netif_tx_unlock(struct net_device *dev)
--- /dev/null
+diff -pruN ../orig-linux-2.6.17/net/core/dev.c ./net/core/dev.c
+--- ../orig-linux-2.6.17/net/core/dev.c 2007-01-08 15:25:03.000000000 +0000
++++ ./net/core/dev.c 2007-01-08 15:27:22.000000000 +0000
+@@ -1160,11 +1160,6 @@ int skb_checksum_help(struct sk_buff *sk
+ goto out_set_summed;
+
+ if (unlikely(skb_shinfo(skb)->gso_size)) {
+- static int warned;
+-
+- WARN_ON(!warned);
+- warned = 1;
+-
+ /* Let GSO fix up the checksum. */
+ goto out_set_summed;
+ }
+@@ -1214,11 +1209,6 @@ struct sk_buff *skb_gso_segment(struct s
+ __skb_pull(skb, skb->mac_len);
+
+ if (unlikely(skb->ip_summed != CHECKSUM_HW)) {
+- static int warned;
+-
+- WARN_ON(!warned);
+- warned = 1;
+-
+ if (skb_header_cloned(skb) &&
+ (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+ return ERR_PTR(err);
--- /dev/null
+diff -pruN ../orig-linux-2.6.17/net/ipv4/tcp_input.c ./net/ipv4/tcp_input.c
+--- ../orig-linux-2.6.17/net/ipv4/tcp_input.c 2007-01-08 15:16:27.000000000 +0000
++++ ./net/ipv4/tcp_input.c 2007-01-08 15:27:35.000000000 +0000
+@@ -127,7 +127,7 @@ static void tcp_measure_rcv_mss(struct s
+ /* skb->len may jitter because of SACKs, even if peer
+ * sends good full-sized frames.
+ */
+- len = skb->len;
++ len = skb_shinfo(skb)->gso_size ?: skb->len;
+ if (len >= icsk->icsk_ack.rcv_mss) {
+ icsk->icsk_ack.rcv_mss = len;
+ } else {
--- /dev/null
+diff -pruN ../orig-linux-2.6.17/net/core/skbuff.c ./net/core/skbuff.c
+--- ../orig-linux-2.6.17/net/core/skbuff.c 2007-01-08 15:21:05.000000000 +0000
++++ ./net/core/skbuff.c 2007-01-08 15:27:48.000000000 +0000
+@@ -1859,7 +1859,7 @@ struct sk_buff *skb_segment(struct sk_bu
+ do {
+ struct sk_buff *nskb;
+ skb_frag_t *frag;
+- int hsize, nsize;
++ int hsize;
+ int k;
+ int size;
+
+@@ -1870,11 +1870,10 @@ struct sk_buff *skb_segment(struct sk_bu
+ hsize = skb_headlen(skb) - offset;
+ if (hsize < 0)
+ hsize = 0;
+- nsize = hsize + doffset;
+- if (nsize > len + doffset || !sg)
+- nsize = len + doffset;
++ if (hsize > len || !sg)
++ hsize = len;
+
+- nskb = alloc_skb(nsize + headroom, GFP_ATOMIC);
++ nskb = alloc_skb(hsize + doffset + headroom, GFP_ATOMIC);
+ if (unlikely(!nskb))
+ goto err;
+
--- /dev/null
+diff -pruN ../orig-linux-2.6.17/arch/i386/mm/pageattr.c ./arch/i386/mm/pageattr.c
+--- ../orig-linux-2.6.17/arch/i386/mm/pageattr.c 2006-06-18 02:49:35.000000000 +0100
++++ ./arch/i386/mm/pageattr.c 2007-01-08 15:32:50.000000000 +0000
+@@ -85,7 +85,7 @@ static void set_pmd_pte(pte_t *kpte, uns
+ unsigned long flags;
+
+ set_pte_atomic(kpte, pte); /* change init_mm */
+- if (PTRS_PER_PMD > 1)
++ if (HAVE_SHARED_KERNEL_PMD)
+ return;
+
+ spin_lock_irqsave(&pgd_lock, flags);
+diff -pruN ../orig-linux-2.6.17/arch/i386/mm/pgtable.c ./arch/i386/mm/pgtable.c
+--- ../orig-linux-2.6.17/arch/i386/mm/pgtable.c 2006-06-18 02:49:35.000000000 +0100
++++ ./arch/i386/mm/pgtable.c 2007-01-08 15:32:50.000000000 +0000
+@@ -215,9 +215,10 @@ void pgd_ctor(void *pgd, kmem_cache_t *c
+ spin_lock_irqsave(&pgd_lock, flags);
+ }
+
+- clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
+- swapper_pg_dir + USER_PTRS_PER_PGD,
+- KERNEL_PGD_PTRS);
++ if (PTRS_PER_PMD == 1 || HAVE_SHARED_KERNEL_PMD)
++ clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
++ swapper_pg_dir + USER_PTRS_PER_PGD,
++ KERNEL_PGD_PTRS);
+ if (PTRS_PER_PMD > 1)
+ return;
+
+@@ -249,6 +250,30 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
+ goto out_oom;
+ set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
+ }
++
++ if (!HAVE_SHARED_KERNEL_PMD) {
++ unsigned long flags;
++
++ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
++ pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
++ if (!pmd)
++ goto out_oom;
++ set_pgd(&pgd[USER_PTRS_PER_PGD], __pgd(1 + __pa(pmd)));
++ }
++
++ spin_lock_irqsave(&pgd_lock, flags);
++ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
++ unsigned long v = (unsigned long)i << PGDIR_SHIFT;
++ pgd_t *kpgd = pgd_offset_k(v);
++ pud_t *kpud = pud_offset(kpgd, v);
++ pmd_t *kpmd = pmd_offset(kpud, v);
++ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
++ memcpy(pmd, kpmd, PAGE_SIZE);
++ }
++ pgd_list_add(pgd);
++ spin_unlock_irqrestore(&pgd_lock, flags);
++ }
++
+ return pgd;
+
+ out_oom:
+@@ -263,9 +288,23 @@ void pgd_free(pgd_t *pgd)
+ int i;
+
+ /* in the PAE case user pgd entries are overwritten before usage */
+- if (PTRS_PER_PMD > 1)
+- for (i = 0; i < USER_PTRS_PER_PGD; ++i)
+- kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
++ if (PTRS_PER_PMD > 1) {
++ for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
++ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
++ kmem_cache_free(pmd_cache, pmd);
++ }
++ if (!HAVE_SHARED_KERNEL_PMD) {
++ unsigned long flags;
++ spin_lock_irqsave(&pgd_lock, flags);
++ pgd_list_del(pgd);
++ spin_unlock_irqrestore(&pgd_lock, flags);
++ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
++ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
++ memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
++ kmem_cache_free(pmd_cache, pmd);
++ }
++ }
++ }
+ /* in the non-PAE case, free_pgtables() clears user pgd entries */
+ kmem_cache_free(pgd_cache, pgd);
+ }
+diff -pruN ../orig-linux-2.6.17/include/asm-i386/pgtable-2level-defs.h ./include/asm-i386/pgtable-2level-defs.h
+--- ../orig-linux-2.6.17/include/asm-i386/pgtable-2level-defs.h 2006-06-18 02:49:35.000000000 +0100
++++ ./include/asm-i386/pgtable-2level-defs.h 2007-01-08 15:32:50.000000000 +0000
+@@ -1,6 +1,8 @@
+ #ifndef _I386_PGTABLE_2LEVEL_DEFS_H
+ #define _I386_PGTABLE_2LEVEL_DEFS_H
+
++#define HAVE_SHARED_KERNEL_PMD 0
++
+ /*
+ * traditional i386 two-level paging structure:
+ */
+diff -pruN ../orig-linux-2.6.17/include/asm-i386/pgtable-3level-defs.h ./include/asm-i386/pgtable-3level-defs.h
+--- ../orig-linux-2.6.17/include/asm-i386/pgtable-3level-defs.h 2006-06-18 02:49:35.000000000 +0100
++++ ./include/asm-i386/pgtable-3level-defs.h 2007-01-08 15:32:50.000000000 +0000
+@@ -1,6 +1,8 @@
+ #ifndef _I386_PGTABLE_3LEVEL_DEFS_H
+ #define _I386_PGTABLE_3LEVEL_DEFS_H
+
++#define HAVE_SHARED_KERNEL_PMD 1
++
+ /*
+ * PGDIR_SHIFT determines what a top-level page table entry can map
+ */
--- /dev/null
+diff -pruN ../orig-linux-2.6.17/arch/i386/kernel/entry.S ./arch/i386/kernel/entry.S
+--- ../orig-linux-2.6.17/arch/i386/kernel/entry.S 2006-06-18 02:49:35.000000000 +0100
++++ ./arch/i386/kernel/entry.S 2007-01-08 15:33:41.000000000 +0000
+@@ -177,7 +177,7 @@ need_resched:
+
+ # sysenter call handler stub
+ ENTRY(sysenter_entry)
+- movl TSS_sysenter_esp0(%esp),%esp
++ movl SYSENTER_stack_esp0(%esp),%esp
+ sysenter_past_esp:
+ sti
+ pushl $(__USER_DS)
+@@ -496,7 +496,7 @@ device_not_available_emulate:
+ * that sets up the real kernel stack. Check here, since we can't
+ * allow the wrong stack to be used.
+ *
+- * "TSS_sysenter_esp0+12" is because the NMI/debug handler will have
++ * "SYSENTER_stack_esp0+12" is because the NMI/debug handler will have
+ * already pushed 3 words if it hits on the sysenter instruction:
+ * eflags, cs and eip.
+ *
+@@ -508,7 +508,7 @@ device_not_available_emulate:
+ cmpw $__KERNEL_CS,4(%esp); \
+ jne ok; \
+ label: \
+- movl TSS_sysenter_esp0+offset(%esp),%esp; \
++ movl SYSENTER_stack_esp0+offset(%esp),%esp; \
+ pushfl; \
+ pushl $__KERNEL_CS; \
+ pushl $sysenter_past_esp
--- /dev/null
+git-2efe55a9cec8418f0e0cde3dc3787a42fddc4411.patch
+git-2a8a3d5b65e86ec1dfef7d268c64a909eab94af7.patch
+git-3566561bfadffcb5dbc85d576be80c0dbf2cccc9.patch
+linux-2.6.19-rc1-kexec-move_segment_code-i386.patch
+git-4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f.patch
+linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch
+blktap-aio-16_03_06.patch
+fix-hz-suspend.patch
+fix-ide-cd-pio-mode.patch
+i386-mach-io-check-nmi.patch
+ipv6-no-autoconf.patch
+net-csum.patch
+net-gso-0-base.patch
+net-gso-1-check-dodgy.patch
+net-gso-2-checksum-fix.patch
+net-gso-3-fix-errorcheck.patch
+net-gso-4-kill-warnon.patch
+net-gso-5-rcv-mss.patch
+net-gso-6-linear-segmentation.patch
+pmd-shared.patch
+rename-TSS_sysenter_esp0-SYSENTER_stack_esp0.patch
+x86-increase-interrupt-vector-range.patch
+xen-hotplug.patch
+xenoprof-generic.patch
+x86-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch
+x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch
+git-dbaab49f92ff6ae6255762a948375e4036cbdbd2.patch
+x86-elfnote-as-preprocessor-macro.patch
+vsnprintf.patch
+kasprintf.patch
--- /dev/null
+diff -pruN ../orig-linux-2.6.17/lib/vsprintf.c ./lib/vsprintf.c
+--- ../orig-linux-2.6.17/lib/vsprintf.c 2006-06-18 02:49:35.000000000 +0100
++++ ./lib/vsprintf.c 2007-01-08 15:38:07.000000000 +0000
+@@ -187,49 +187,49 @@ static char * number(char * buf, char *
+ size -= precision;
+ if (!(type&(ZEROPAD+LEFT))) {
+ while(size-->0) {
+- if (buf <= end)
++ if (buf < end)
+ *buf = ' ';
+ ++buf;
+ }
+ }
+ if (sign) {
+- if (buf <= end)
++ if (buf < end)
+ *buf = sign;
+ ++buf;
+ }
+ if (type & SPECIAL) {
+ if (base==8) {
+- if (buf <= end)
++ if (buf < end)
+ *buf = '0';
+ ++buf;
+ } else if (base==16) {
+- if (buf <= end)
++ if (buf < end)
+ *buf = '0';
+ ++buf;
+- if (buf <= end)
++ if (buf < end)
+ *buf = digits[33];
+ ++buf;
+ }
+ }
+ if (!(type & LEFT)) {
+ while (size-- > 0) {
+- if (buf <= end)
++ if (buf < end)
+ *buf = c;
+ ++buf;
+ }
+ }
+ while (i < precision--) {
+- if (buf <= end)
++ if (buf < end)
+ *buf = '0';
+ ++buf;
+ }
+ while (i-- > 0) {
+- if (buf <= end)
++ if (buf < end)
+ *buf = tmp[i];
+ ++buf;
+ }
+ while (size-- > 0) {
+- if (buf <= end)
++ if (buf < end)
+ *buf = ' ';
+ ++buf;
+ }
+@@ -272,7 +272,8 @@ int vsnprintf(char *buf, size_t size, co
+ /* 'z' changed to 'Z' --davidm 1/25/99 */
+ /* 't' added for ptrdiff_t */
+
+- /* Reject out-of-range values early */
++ /* Reject out-of-range values early. Large positive sizes are
++ used for unknown buffer sizes. */
+ if (unlikely((int) size < 0)) {
+ /* There can be only one.. */
+ static int warn = 1;
+@@ -282,16 +283,17 @@ int vsnprintf(char *buf, size_t size, co
+ }
+
+ str = buf;
+- end = buf + size - 1;
++ end = buf + size;
+
+- if (end < buf - 1) {
+- end = ((void *) -1);
+- size = end - buf + 1;
++ /* Make sure end is always >= buf */
++ if (end < buf) {
++ end = ((void *)-1);
++ size = end - buf;
+ }
+
+ for (; *fmt ; ++fmt) {
+ if (*fmt != '%') {
+- if (str <= end)
++ if (str < end)
+ *str = *fmt;
+ ++str;
+ continue;
+@@ -357,17 +359,17 @@ int vsnprintf(char *buf, size_t size, co
+ case 'c':
+ if (!(flags & LEFT)) {
+ while (--field_width > 0) {
+- if (str <= end)
++ if (str < end)
+ *str = ' ';
+ ++str;
+ }
+ }
+ c = (unsigned char) va_arg(args, int);
+- if (str <= end)
++ if (str < end)
+ *str = c;
+ ++str;
+ while (--field_width > 0) {
+- if (str <= end)
++ if (str < end)
+ *str = ' ';
+ ++str;
+ }
+@@ -382,18 +384,18 @@ int vsnprintf(char *buf, size_t size, co
+
+ if (!(flags & LEFT)) {
+ while (len < field_width--) {
+- if (str <= end)
++ if (str < end)
+ *str = ' ';
+ ++str;
+ }
+ }
+ for (i = 0; i < len; ++i) {
+- if (str <= end)
++ if (str < end)
+ *str = *s;
+ ++str; ++s;
+ }
+ while (len < field_width--) {
+- if (str <= end)
++ if (str < end)
+ *str = ' ';
+ ++str;
+ }
+@@ -426,7 +428,7 @@ int vsnprintf(char *buf, size_t size, co
+ continue;
+
+ case '%':
+- if (str <= end)
++ if (str < end)
+ *str = '%';
+ ++str;
+ continue;
+@@ -449,11 +451,11 @@ int vsnprintf(char *buf, size_t size, co
+ break;
+
+ default:
+- if (str <= end)
++ if (str < end)
+ *str = '%';
+ ++str;
+ if (*fmt) {
+- if (str <= end)
++ if (str < end)
+ *str = *fmt;
+ ++str;
+ } else {
+@@ -483,14 +485,13 @@ int vsnprintf(char *buf, size_t size, co
+ str = number(str, end, num, base,
+ field_width, precision, flags);
+ }
+- if (str <= end)
+- *str = '\0';
+- else if (size > 0)
+- /* don't write out a null byte if the buf size is zero */
+- *end = '\0';
+- /* the trailing null byte doesn't count towards the total
+- * ++str;
+- */
++ if (size > 0) {
++ if (str < end)
++ *str = '\0';
++ else
++ end[-1] = '\0';
++ }
++ /* the trailing null byte doesn't count towards the total */
+ return str-buf;
+ }
+
--- /dev/null
+diff -pruN ../orig-linux-2.6.17/include/linux/elfnote.h ./include/linux/elfnote.h
+--- ../orig-linux-2.6.17/include/linux/elfnote.h 2007-01-08 15:37:15.000000000 +0000
++++ ./include/linux/elfnote.h 2007-01-08 15:37:52.000000000 +0000
+@@ -31,22 +31,24 @@
+ /*
+ * Generate a structure with the same shape as Elf{32,64}_Nhdr (which
+ * turn out to be the same size and shape), followed by the name and
+- * desc data with appropriate padding. The 'desc' argument includes
+- * the assembler pseudo op defining the type of the data: .asciz
+- * "hello, world"
++ * desc data with appropriate padding. The 'desctype' argument is the
++ * assembler pseudo op defining the type of the data e.g. .asciz while
++ * 'descdata' is the data itself e.g. "hello, world".
++ *
++ * e.g. ELFNOTE(XYZCo, 42, .asciz, "forty-two")
++ * ELFNOTE(XYZCo, 12, .long, 0xdeadbeef)
+ */
+-.macro ELFNOTE name type desc:vararg
+-.pushsection ".note.\name"
+- .align 4
+- .long 2f - 1f /* namesz */
+- .long 4f - 3f /* descsz */
+- .long \type
+-1:.asciz "\name"
+-2:.align 4
+-3:\desc
+-4:.align 4
+-.popsection
+-.endm
++#define ELFNOTE(name, type, desctype, descdata) \
++.pushsection .note.name ; \
++ .align 4 ; \
++ .long 2f - 1f /* namesz */ ; \
++ .long 4f - 3f /* descsz */ ; \
++ .long type ; \
++1:.asciz "name" ; \
++2:.align 4 ; \
++3:desctype descdata ; \
++4:.align 4 ; \
++.popsection ;
+ #else /* !__ASSEMBLER__ */
+ #include <linux/elf.h>
+ /*
--- /dev/null
+diff -pruN ../orig-linux-2.6.17/arch/i386/kernel/entry.S ./arch/i386/kernel/entry.S
+--- ../orig-linux-2.6.17/arch/i386/kernel/entry.S 2007-01-08 15:33:41.000000000 +0000
++++ ./arch/i386/kernel/entry.S 2007-01-08 15:36:17.000000000 +0000
+@@ -410,7 +410,7 @@ vector=0
+ ENTRY(irq_entries_start)
+ .rept NR_IRQS
+ ALIGN
+-1: pushl $vector-256
++1: pushl $~(vector)
+ jmp common_interrupt
+ .data
+ .long 1b
+@@ -427,7 +427,7 @@ common_interrupt:
+
+ #define BUILD_INTERRUPT(name, nr) \
+ ENTRY(name) \
+- pushl $nr-256; \
++ pushl $~(nr); \
+ SAVE_ALL \
+ movl %esp,%eax; \
+ call smp_/**/name; \
+diff -pruN ../orig-linux-2.6.17/arch/i386/kernel/irq.c ./arch/i386/kernel/irq.c
+--- ../orig-linux-2.6.17/arch/i386/kernel/irq.c 2006-06-18 02:49:35.000000000 +0100
++++ ./arch/i386/kernel/irq.c 2007-01-08 15:36:18.000000000 +0000
+@@ -53,8 +53,8 @@ static union irq_ctx *softirq_ctx[NR_CPU
+ */
+ fastcall unsigned int do_IRQ(struct pt_regs *regs)
+ {
+- /* high bits used in ret_from_ code */
+- int irq = regs->orig_eax & 0xff;
++ /* high bit used in ret_from_ code */
++ int irq = ~regs->orig_eax;
+ #ifdef CONFIG_4KSTACKS
+ union irq_ctx *curctx, *irqctx;
+ u32 *isp;
+diff -pruN ../orig-linux-2.6.17/arch/x86_64/kernel/entry.S ./arch/x86_64/kernel/entry.S
+--- ../orig-linux-2.6.17/arch/x86_64/kernel/entry.S 2006-06-18 02:49:35.000000000 +0100
++++ ./arch/x86_64/kernel/entry.S 2007-01-08 15:36:18.000000000 +0000
+@@ -596,7 +596,7 @@ retint_kernel:
+ */
+ .macro apicinterrupt num,func
+ INTR_FRAME
+- pushq $\num-256
++ pushq $~(\num)
+ CFI_ADJUST_CFA_OFFSET 8
+ interrupt \func
+ jmp ret_from_intr
+diff -pruN ../orig-linux-2.6.17/arch/x86_64/kernel/irq.c ./arch/x86_64/kernel/irq.c
+--- ../orig-linux-2.6.17/arch/x86_64/kernel/irq.c 2006-06-18 02:49:35.000000000 +0100
++++ ./arch/x86_64/kernel/irq.c 2007-01-08 15:36:18.000000000 +0000
+@@ -91,8 +91,8 @@ skip:
+ */
+ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
+ {
+- /* high bits used in ret_from_ code */
+- unsigned irq = regs->orig_rax & 0xff;
++ /* high bit used in ret_from_ code */
++ unsigned irq = ~regs->orig_rax;
+
+ exit_idle();
+ irq_enter();
+diff -pruN ../orig-linux-2.6.17/arch/x86_64/kernel/smp.c ./arch/x86_64/kernel/smp.c
+--- ../orig-linux-2.6.17/arch/x86_64/kernel/smp.c 2006-06-18 02:49:35.000000000 +0100
++++ ./arch/x86_64/kernel/smp.c 2007-01-08 15:36:18.000000000 +0000
+@@ -135,10 +135,10 @@ asmlinkage void smp_invalidate_interrupt
+
+ cpu = smp_processor_id();
+ /*
+- * orig_rax contains the interrupt vector - 256.
++ * orig_rax contains the negated interrupt vector.
+ * Use that to determine where the sender put the data.
+ */
+- sender = regs->orig_rax + 256 - INVALIDATE_TLB_VECTOR_START;
++ sender = ~regs->orig_rax - INVALIDATE_TLB_VECTOR_START;
+ f = &per_cpu(flush_state, sender);
+
+ if (!cpu_isset(cpu, f->flush_cpumask))
+diff -pruN ../orig-linux-2.6.17/include/asm-x86_64/hw_irq.h ./include/asm-x86_64/hw_irq.h
+--- ../orig-linux-2.6.17/include/asm-x86_64/hw_irq.h 2006-06-18 02:49:35.000000000 +0100
++++ ./include/asm-x86_64/hw_irq.h 2007-01-08 15:36:18.000000000 +0000
+@@ -127,7 +127,7 @@ asmlinkage void IRQ_NAME(nr); \
+ __asm__( \
+ "\n.p2align\n" \
+ "IRQ" #nr "_interrupt:\n\t" \
+- "push $" #nr "-256 ; " \
++ "push $~(" #nr ") ; " \
+ "jmp common_interrupt");
+
+ #if defined(CONFIG_X86_IO_APIC)
--- /dev/null
+diff -pruN ../orig-linux-2.6.17/arch/i386/kernel/vmlinux.lds.S ./arch/i386/kernel/vmlinux.lds.S
+--- ../orig-linux-2.6.17/arch/i386/kernel/vmlinux.lds.S 2006-06-18 02:49:35.000000000 +0100
++++ ./arch/i386/kernel/vmlinux.lds.S 2007-01-08 15:37:15.000000000 +0000
+@@ -13,6 +13,12 @@ OUTPUT_FORMAT("elf32-i386", "elf32-i386"
+ OUTPUT_ARCH(i386)
+ ENTRY(phys_startup_32)
+ jiffies = jiffies_64;
++
++PHDRS {
++ text PT_LOAD FLAGS(5); /* R_E */
++ data PT_LOAD FLAGS(7); /* RWE */
++ note PT_NOTE FLAGS(4); /* R__ */
++}
+ SECTIONS
+ {
+ . = __KERNEL_START;
+@@ -26,7 +32,7 @@ SECTIONS
+ KPROBES_TEXT
+ *(.fixup)
+ *(.gnu.warning)
+- } = 0x9090
++ } :text = 0x9090
+
+ _etext = .; /* End of text section */
+
+@@ -41,7 +47,7 @@ SECTIONS
+ .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */
+ *(.data)
+ CONSTRUCTORS
+- }
++ } :data
+
+ . = ALIGN(4096);
+ __nosave_begin = .;
+@@ -168,4 +174,6 @@ SECTIONS
+ STABS_DEBUG
+
+ DWARF_DEBUG
++
++ NOTES
+ }
+diff -pruN ../orig-linux-2.6.17/include/asm-generic/vmlinux.lds.h ./include/asm-generic/vmlinux.lds.h
+--- ../orig-linux-2.6.17/include/asm-generic/vmlinux.lds.h 2006-06-18 02:49:35.000000000 +0100
++++ ./include/asm-generic/vmlinux.lds.h 2007-01-08 15:37:15.000000000 +0000
+@@ -166,3 +166,6 @@
+ .stab.index 0 : { *(.stab.index) } \
+ .stab.indexstr 0 : { *(.stab.indexstr) } \
+ .comment 0 : { *(.comment) }
++
++#define NOTES \
++ .notes : { *(.note.*) } :note
+diff -pruN ../orig-linux-2.6.17/include/linux/elfnote.h ./include/linux/elfnote.h
+--- ../orig-linux-2.6.17/include/linux/elfnote.h 1970-01-01 01:00:00.000000000 +0100
++++ ./include/linux/elfnote.h 2007-01-08 15:37:15.000000000 +0000
+@@ -0,0 +1,88 @@
++#ifndef _LINUX_ELFNOTE_H
++#define _LINUX_ELFNOTE_H
++/*
++ * Helper macros to generate ELF Note structures, which are put into a
++ * PT_NOTE segment of the final vmlinux image. These are useful for
++ * including name-value pairs of metadata into the kernel binary (or
++ * modules?) for use by external programs.
++ *
++ * Each note has three parts: a name, a type and a desc. The name is
++ * intended to distinguish the note's originator, so it would be a
++ * company, project, subsystem, etc; it must be in a suitable form for
++ * use in a section name. The type is an integer which is used to tag
++ * the data, and is considered to be within the "name" namespace (so
++ * "FooCo"'s type 42 is distinct from "BarProj"'s type 42). The
++ * "desc" field is the actual data. There are no constraints on the
++ * desc field's contents, though typically they're fairly small.
++ *
++ * All notes from a given NAME are put into a section named
++ * .note.NAME. When the kernel image is finally linked, all the notes
++ * are packed into a single .notes section, which is mapped into the
++ * PT_NOTE segment. Because notes for a given name are grouped into
++ * the same section, they'll all be adjacent the output file.
++ *
++ * This file defines macros for both C and assembler use. Their
++ * syntax is slightly different, but they're semantically similar.
++ *
++ * See the ELF specification for more detail about ELF notes.
++ */
++
++#ifdef __ASSEMBLER__
++/*
++ * Generate a structure with the same shape as Elf{32,64}_Nhdr (which
++ * turn out to be the same size and shape), followed by the name and
++ * desc data with appropriate padding. The 'desc' argument includes
++ * the assembler pseudo op defining the type of the data: .asciz
++ * "hello, world"
++ */
++.macro ELFNOTE name type desc:vararg
++.pushsection ".note.\name"
++ .align 4
++ .long 2f - 1f /* namesz */
++ .long 4f - 3f /* descsz */
++ .long \type
++1:.asciz "\name"
++2:.align 4
++3:\desc
++4:.align 4
++.popsection
++.endm
++#else /* !__ASSEMBLER__ */
++#include <linux/elf.h>
++/*
++ * Use an anonymous structure which matches the shape of
++ * Elf{32,64}_Nhdr, but includes the name and desc data. The size and
++ * type of name and desc depend on the macro arguments. "name" must
++ * be a literal string, and "desc" must be passed by value. You may
++ * only define one note per line, since __LINE__ is used to generate
++ * unique symbols.
++ */
++#define _ELFNOTE_PASTE(a,b) a##b
++#define _ELFNOTE(size, name, unique, type, desc) \
++ static const struct { \
++ struct elf##size##_note _nhdr; \
++ unsigned char _name[sizeof(name)] \
++ __attribute__((aligned(sizeof(Elf##size##_Word)))); \
++ typeof(desc) _desc \
++ __attribute__((aligned(sizeof(Elf##size##_Word)))); \
++ } _ELFNOTE_PASTE(_note_, unique) \
++ __attribute_used__ \
++ __attribute__((section(".note." name), \
++ aligned(sizeof(Elf##size##_Word)), \
++ unused)) = { \
++ { \
++ sizeof(name), \
++ sizeof(desc), \
++ type, \
++ }, \
++ name, \
++ desc \
++ }
++#define ELFNOTE(size, name, type, desc) \
++ _ELFNOTE(size, name, __LINE__, type, desc)
++
++#define ELFNOTE32(name, type, desc) ELFNOTE(32, name, type, desc)
++#define ELFNOTE64(name, type, desc) ELFNOTE(64, name, type, desc)
++#endif /* __ASSEMBLER__ */
++
++#endif /* _LINUX_ELFNOTE_H */
--- /dev/null
+diff -pruN ../orig-linux-2.6.17/arch/x86_64/kernel/vmlinux.lds.S ./arch/x86_64/kernel/vmlinux.lds.S
+--- ../orig-linux-2.6.17/arch/x86_64/kernel/vmlinux.lds.S 2006-06-18 02:49:35.000000000 +0100
++++ ./arch/x86_64/kernel/vmlinux.lds.S 2007-01-08 15:37:29.000000000 +0000
+@@ -14,6 +14,12 @@ OUTPUT_FORMAT("elf64-x86-64", "elf64-x86
+ OUTPUT_ARCH(i386:x86-64)
+ ENTRY(phys_startup_64)
+ jiffies_64 = jiffies;
++PHDRS {
++ text PT_LOAD FLAGS(5); /* R_E */
++ data PT_LOAD FLAGS(7); /* RWE */
++ user PT_LOAD FLAGS(7); /* RWE */
++ note PT_NOTE FLAGS(4); /* R__ */
++}
+ SECTIONS
+ {
+ . = __START_KERNEL;
+@@ -32,7 +38,7 @@ SECTIONS
+ KPROBES_TEXT
+ *(.fixup)
+ *(.gnu.warning)
+- } = 0x9090
++ } :text = 0x9090
+ /* out-of-line lock text */
+ .text.lock : AT(ADDR(.text.lock) - LOAD_OFFSET) { *(.text.lock) }
+
+@@ -49,17 +55,10 @@ SECTIONS
+ .data : AT(ADDR(.data) - LOAD_OFFSET) {
+ *(.data)
+ CONSTRUCTORS
+- }
++ } :data
+
+ _edata = .; /* End of data section */
+
+- __bss_start = .; /* BSS */
+- .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
+- *(.bss.page_aligned)
+- *(.bss)
+- }
+- __bss_stop = .;
+-
+ . = ALIGN(PAGE_SIZE);
+ . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+ .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
+@@ -81,7 +80,7 @@ SECTIONS
+ #define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
+
+ . = VSYSCALL_ADDR;
+- .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { *(.vsyscall_0) }
++ .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { *(.vsyscall_0) } :user
+ __vsyscall_0 = VSYSCALL_VIRT_ADDR;
+
+ . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+@@ -124,7 +123,7 @@ SECTIONS
+ . = ALIGN(8192); /* init_task */
+ .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
+ *(.data.init_task)
+- }
++ } :data
+
+ . = ALIGN(4096);
+ .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
+@@ -194,6 +193,14 @@ SECTIONS
+ . = ALIGN(4096);
+ __nosave_end = .;
+
++ __bss_start = .; /* BSS */
++ . = ALIGN(4096);
++ .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
++ *(.bss.page_aligned)
++ *(.bss)
++ }
++ __bss_stop = .;
++
+ _end = . ;
+
+ /* Sections to be discarded */
+@@ -207,4 +214,6 @@ SECTIONS
+ STABS_DEBUG
+
+ DWARF_DEBUG
++
++ NOTES
+ }
--- /dev/null
+diff -pruN ../orig-linux-2.6.17/fs/proc/proc_misc.c ./fs/proc/proc_misc.c
+--- ../orig-linux-2.6.17/fs/proc/proc_misc.c 2006-06-18 02:49:35.000000000 +0100
++++ ./fs/proc/proc_misc.c 2007-01-08 15:36:49.000000000 +0000
+@@ -467,7 +467,7 @@ static int show_stat(struct seq_file *p,
+ (unsigned long long)cputime64_to_clock_t(irq),
+ (unsigned long long)cputime64_to_clock_t(softirq),
+ (unsigned long long)cputime64_to_clock_t(steal));
+- for_each_online_cpu(i) {
++ for_each_cpu(i) {
+
+ /* Copy values here to work around gcc-2.95.3, gcc-2.96 */
+ user = kstat_cpu(i).cpustat.user;
--- /dev/null
+diff -pruN ../orig-linux-2.6.17/drivers/oprofile/buffer_sync.c ./drivers/oprofile/buffer_sync.c
+--- ../orig-linux-2.6.17/drivers/oprofile/buffer_sync.c 2006-06-18 02:49:35.000000000 +0100
++++ ./drivers/oprofile/buffer_sync.c 2007-01-08 15:36:54.000000000 +0000
+@@ -6,6 +6,10 @@
+ *
+ * @author John Levon <levon@movementarian.org>
+ *
++ * Modified by Aravind Menon for Xen
++ * These modifications are:
++ * Copyright (C) 2005 Hewlett-Packard Co.
++ *
+ * This is the core of the buffer management. Each
+ * CPU buffer is processed and entered into the
+ * global event buffer. Such processing is necessary
+@@ -38,6 +42,7 @@ static cpumask_t marked_cpus = CPU_MASK_
+ static DEFINE_SPINLOCK(task_mortuary);
+ static void process_task_mortuary(void);
+
++static int cpu_current_domain[NR_CPUS];
+
+ /* Take ownership of the task struct and place it on the
+ * list for processing. Only after two full buffer syncs
+@@ -146,6 +151,11 @@ static void end_sync(void)
+ int sync_start(void)
+ {
+ int err;
++ int i;
++
++ for (i = 0; i < NR_CPUS; i++) {
++ cpu_current_domain[i] = COORDINATOR_DOMAIN;
++ }
+
+ start_cpu_work();
+
+@@ -275,15 +285,31 @@ static void add_cpu_switch(int i)
+ last_cookie = INVALID_COOKIE;
+ }
+
+-static void add_kernel_ctx_switch(unsigned int in_kernel)
++static void add_cpu_mode_switch(unsigned int cpu_mode)
+ {
+ add_event_entry(ESCAPE_CODE);
+- if (in_kernel)
+- add_event_entry(KERNEL_ENTER_SWITCH_CODE);
+- else
+- add_event_entry(KERNEL_EXIT_SWITCH_CODE);
++ switch (cpu_mode) {
++ case CPU_MODE_USER:
++ add_event_entry(USER_ENTER_SWITCH_CODE);
++ break;
++ case CPU_MODE_KERNEL:
++ add_event_entry(KERNEL_ENTER_SWITCH_CODE);
++ break;
++ case CPU_MODE_XEN:
++ add_event_entry(XEN_ENTER_SWITCH_CODE);
++ break;
++ default:
++ break;
++ }
+ }
+-
++
++static void add_domain_switch(unsigned long domain_id)
++{
++ add_event_entry(ESCAPE_CODE);
++ add_event_entry(DOMAIN_SWITCH_CODE);
++ add_event_entry(domain_id);
++}
++
+ static void
+ add_user_ctx_switch(struct task_struct const * task, unsigned long cookie)
+ {
+@@ -348,9 +374,9 @@ static int add_us_sample(struct mm_struc
+ * for later lookup from userspace.
+ */
+ static int
+-add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel)
++add_sample(struct mm_struct * mm, struct op_sample * s, int cpu_mode)
+ {
+- if (in_kernel) {
++ if (cpu_mode >= CPU_MODE_KERNEL) {
+ add_sample_entry(s->eip, s->event);
+ return 1;
+ } else if (mm) {
+@@ -496,15 +522,21 @@ void sync_buffer(int cpu)
+ struct mm_struct *mm = NULL;
+ struct task_struct * new;
+ unsigned long cookie = 0;
+- int in_kernel = 1;
++ int cpu_mode = 1;
+ unsigned int i;
+ sync_buffer_state state = sb_buffer_start;
+ unsigned long available;
++ int domain_switch = 0;
+
+ down(&buffer_sem);
+
+ add_cpu_switch(cpu);
+
++ /* We need to assign the first samples in this CPU buffer to the
++ same domain that we were processing at the last sync_buffer */
++ if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) {
++ add_domain_switch(cpu_current_domain[cpu]);
++ }
+ /* Remember, only we can modify tail_pos */
+
+ available = get_slots(cpu_buf);
+@@ -512,16 +544,18 @@ void sync_buffer(int cpu)
+ for (i = 0; i < available; ++i) {
+ struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos];
+
+- if (is_code(s->eip)) {
+- if (s->event <= CPU_IS_KERNEL) {
+- /* kernel/userspace switch */
+- in_kernel = s->event;
++ if (is_code(s->eip) && !domain_switch) {
++ if (s->event <= CPU_MODE_XEN) {
++ /* xen/kernel/userspace switch */
++ cpu_mode = s->event;
+ if (state == sb_buffer_start)
+ state = sb_sample_start;
+- add_kernel_ctx_switch(s->event);
++ add_cpu_mode_switch(s->event);
+ } else if (s->event == CPU_TRACE_BEGIN) {
+ state = sb_bt_start;
+ add_trace_begin();
++ } else if (s->event == CPU_DOMAIN_SWITCH) {
++ domain_switch = 1;
+ } else {
+ struct mm_struct * oldmm = mm;
+
+@@ -535,11 +569,21 @@ void sync_buffer(int cpu)
+ add_user_ctx_switch(new, cookie);
+ }
+ } else {
+- if (state >= sb_bt_start &&
+- !add_sample(mm, s, in_kernel)) {
+- if (state == sb_bt_start) {
+- state = sb_bt_ignore;
+- atomic_inc(&oprofile_stats.bt_lost_no_mapping);
++ if (domain_switch) {
++ cpu_current_domain[cpu] = s->eip;
++ add_domain_switch(s->eip);
++ domain_switch = 0;
++ } else {
++ if (cpu_current_domain[cpu] !=
++ COORDINATOR_DOMAIN) {
++ add_sample_entry(s->eip, s->event);
++ }
++ else if (state >= sb_bt_start &&
++ !add_sample(mm, s, cpu_mode)) {
++ if (state == sb_bt_start) {
++ state = sb_bt_ignore;
++ atomic_inc(&oprofile_stats.bt_lost_no_mapping);
++ }
+ }
+ }
+ }
+@@ -548,6 +592,11 @@ void sync_buffer(int cpu)
+ }
+ release_mm(mm);
+
++ /* We reset domain to COORDINATOR at each CPU switch */
++ if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) {
++ add_domain_switch(COORDINATOR_DOMAIN);
++ }
++
+ mark_done(cpu);
+
+ up(&buffer_sem);
+diff -pruN ../orig-linux-2.6.17/drivers/oprofile/cpu_buffer.c ./drivers/oprofile/cpu_buffer.c
+--- ../orig-linux-2.6.17/drivers/oprofile/cpu_buffer.c 2006-06-18 02:49:35.000000000 +0100
++++ ./drivers/oprofile/cpu_buffer.c 2007-01-08 15:36:54.000000000 +0000
+@@ -6,6 +6,10 @@
+ *
+ * @author John Levon <levon@movementarian.org>
+ *
++ * Modified by Aravind Menon for Xen
++ * These modifications are:
++ * Copyright (C) 2005 Hewlett-Packard Co.
++ *
+ * Each CPU has a local buffer that stores PC value/event
+ * pairs. We also log context switches when we notice them.
+ * Eventually each CPU's buffer is processed into the global
+@@ -34,6 +38,8 @@ static void wq_sync_buffer(void *);
+ #define DEFAULT_TIMER_EXPIRE (HZ / 10)
+ static int work_enabled;
+
++static int32_t current_domain = COORDINATOR_DOMAIN;
++
+ void free_cpu_buffers(void)
+ {
+ int i;
+@@ -57,7 +63,7 @@ int alloc_cpu_buffers(void)
+ goto fail;
+
+ b->last_task = NULL;
+- b->last_is_kernel = -1;
++ b->last_cpu_mode = -1;
+ b->tracing = 0;
+ b->buffer_size = buffer_size;
+ b->tail_pos = 0;
+@@ -113,7 +119,7 @@ void cpu_buffer_reset(struct oprofile_cp
+ * collected will populate the buffer with proper
+ * values to initialize the buffer
+ */
+- cpu_buf->last_is_kernel = -1;
++ cpu_buf->last_cpu_mode = -1;
+ cpu_buf->last_task = NULL;
+ }
+
+@@ -163,13 +169,13 @@ add_code(struct oprofile_cpu_buffer * bu
+ * because of the head/tail separation of the writer and reader
+ * of the CPU buffer.
+ *
+- * is_kernel is needed because on some architectures you cannot
++ * cpu_mode is needed because on some architectures you cannot
+ * tell if you are in kernel or user space simply by looking at
+- * pc. We tag this in the buffer by generating kernel enter/exit
+- * events whenever is_kernel changes
++ * pc. We tag this in the buffer by generating kernel/user (and xen)
++ * enter events whenever cpu_mode changes
+ */
+ static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc,
+- int is_kernel, unsigned long event)
++ int cpu_mode, unsigned long event)
+ {
+ struct task_struct * task;
+
+@@ -180,18 +186,18 @@ static int log_sample(struct oprofile_cp
+ return 0;
+ }
+
+- is_kernel = !!is_kernel;
+-
+ task = current;
+
+ /* notice a switch from user->kernel or vice versa */
+- if (cpu_buf->last_is_kernel != is_kernel) {
+- cpu_buf->last_is_kernel = is_kernel;
+- add_code(cpu_buf, is_kernel);
++ if (cpu_buf->last_cpu_mode != cpu_mode) {
++ cpu_buf->last_cpu_mode = cpu_mode;
++ add_code(cpu_buf, cpu_mode);
+ }
+-
++
+ /* notice a task switch */
+- if (cpu_buf->last_task != task) {
++ /* if not processing other domain samples */
++ if ((cpu_buf->last_task != task) &&
++ (current_domain == COORDINATOR_DOMAIN)) {
+ cpu_buf->last_task = task;
+ add_code(cpu_buf, (unsigned long)task);
+ }
+@@ -275,6 +281,25 @@ void oprofile_add_trace(unsigned long pc
+ add_sample(cpu_buf, pc, 0);
+ }
+
++int oprofile_add_domain_switch(int32_t domain_id)
++{
++ struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()];
++
++ /* should have space for switching into and out of domain
++ (2 slots each) plus one sample and one cpu mode switch */
++ if (((nr_available_slots(cpu_buf) < 6) &&
++ (domain_id != COORDINATOR_DOMAIN)) ||
++ (nr_available_slots(cpu_buf) < 2))
++ return 0;
++
++ add_code(cpu_buf, CPU_DOMAIN_SWITCH);
++ add_sample(cpu_buf, domain_id, 0);
++
++ current_domain = domain_id;
++
++ return 1;
++}
++
+ /*
+ * This serves to avoid cpu buffer overflow, and makes sure
+ * the task mortuary progresses
+diff -pruN ../orig-linux-2.6.17/drivers/oprofile/cpu_buffer.h ./drivers/oprofile/cpu_buffer.h
+--- ../orig-linux-2.6.17/drivers/oprofile/cpu_buffer.h 2006-06-18 02:49:35.000000000 +0100
++++ ./drivers/oprofile/cpu_buffer.h 2007-01-08 15:36:54.000000000 +0000
+@@ -36,7 +36,7 @@ struct oprofile_cpu_buffer {
+ volatile unsigned long tail_pos;
+ unsigned long buffer_size;
+ struct task_struct * last_task;
+- int last_is_kernel;
++ int last_cpu_mode;
+ int tracing;
+ struct op_sample * buffer;
+ unsigned long sample_received;
+@@ -51,7 +51,10 @@ extern struct oprofile_cpu_buffer cpu_bu
+ void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf);
+
+ /* transient events for the CPU buffer -> event buffer */
+-#define CPU_IS_KERNEL 1
+-#define CPU_TRACE_BEGIN 2
++#define CPU_MODE_USER 0
++#define CPU_MODE_KERNEL 1
++#define CPU_MODE_XEN 2
++#define CPU_TRACE_BEGIN 3
++#define CPU_DOMAIN_SWITCH 4
+
+ #endif /* OPROFILE_CPU_BUFFER_H */
+diff -pruN ../orig-linux-2.6.17/drivers/oprofile/event_buffer.h ./drivers/oprofile/event_buffer.h
+--- ../orig-linux-2.6.17/drivers/oprofile/event_buffer.h 2006-06-18 02:49:35.000000000 +0100
++++ ./drivers/oprofile/event_buffer.h 2007-01-08 15:36:54.000000000 +0000
+@@ -29,15 +29,20 @@ void wake_up_buffer_waiter(void);
+ #define CPU_SWITCH_CODE 2
+ #define COOKIE_SWITCH_CODE 3
+ #define KERNEL_ENTER_SWITCH_CODE 4
+-#define KERNEL_EXIT_SWITCH_CODE 5
++#define USER_ENTER_SWITCH_CODE 5
+ #define MODULE_LOADED_CODE 6
+ #define CTX_TGID_CODE 7
+ #define TRACE_BEGIN_CODE 8
+ #define TRACE_END_CODE 9
++#define XEN_ENTER_SWITCH_CODE 10
++#define DOMAIN_SWITCH_CODE 11
+
+ #define INVALID_COOKIE ~0UL
+ #define NO_COOKIE 0UL
+
++/* Constant used to refer to coordinator domain (Xen) */
++#define COORDINATOR_DOMAIN -1
++
+ /* add data to the event buffer */
+ void add_event_entry(unsigned long data);
+
+diff -pruN ../orig-linux-2.6.17/drivers/oprofile/oprof.c ./drivers/oprofile/oprof.c
+--- ../orig-linux-2.6.17/drivers/oprofile/oprof.c 2006-06-18 02:49:35.000000000 +0100
++++ ./drivers/oprofile/oprof.c 2007-01-08 15:36:54.000000000 +0000
+@@ -5,6 +5,10 @@
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
++ *
++ * Modified by Aravind Menon for Xen
++ * These modifications are:
++ * Copyright (C) 2005 Hewlett-Packard Co.
+ */
+
+ #include <linux/kernel.h>
+@@ -19,7 +23,7 @@
+ #include "cpu_buffer.h"
+ #include "buffer_sync.h"
+ #include "oprofile_stats.h"
+-
++
+ struct oprofile_operations oprofile_ops;
+
+ unsigned long oprofile_started;
+@@ -33,6 +37,32 @@ static DECLARE_MUTEX(start_sem);
+ */
+ static int timer = 0;
+
++int oprofile_set_active(int active_domains[], unsigned int adomains)
++{
++ int err;
++
++ if (!oprofile_ops.set_active)
++ return -EINVAL;
++
++ down(&start_sem);
++ err = oprofile_ops.set_active(active_domains, adomains);
++ up(&start_sem);
++ return err;
++}
++
++int oprofile_set_passive(int passive_domains[], unsigned int pdomains)
++{
++ int err;
++
++ if (!oprofile_ops.set_passive)
++ return -EINVAL;
++
++ down(&start_sem);
++ err = oprofile_ops.set_passive(passive_domains, pdomains);
++ up(&start_sem);
++ return err;
++}
++
+ int oprofile_setup(void)
+ {
+ int err;
+diff -pruN ../orig-linux-2.6.17/drivers/oprofile/oprof.h ./drivers/oprofile/oprof.h
+--- ../orig-linux-2.6.17/drivers/oprofile/oprof.h 2006-06-18 02:49:35.000000000 +0100
++++ ./drivers/oprofile/oprof.h 2007-01-08 15:36:54.000000000 +0000
+@@ -35,5 +35,8 @@ void oprofile_create_files(struct super_
+ void oprofile_timer_init(struct oprofile_operations * ops);
+
+ int oprofile_set_backtrace(unsigned long depth);
++
++int oprofile_set_active(int active_domains[], unsigned int adomains);
++int oprofile_set_passive(int passive_domains[], unsigned int pdomains);
+
+ #endif /* OPROF_H */
+diff -pruN ../orig-linux-2.6.17/drivers/oprofile/oprofile_files.c ./drivers/oprofile/oprofile_files.c
+--- ../orig-linux-2.6.17/drivers/oprofile/oprofile_files.c 2006-06-18 02:49:35.000000000 +0100
++++ ./drivers/oprofile/oprofile_files.c 2007-01-08 15:36:54.000000000 +0000
+@@ -5,15 +5,21 @@
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
++ *
++ * Modified by Aravind Menon for Xen
++ * These modifications are:
++ * Copyright (C) 2005 Hewlett-Packard Co.
+ */
+
+ #include <linux/fs.h>
+ #include <linux/oprofile.h>
++#include <asm/uaccess.h>
++#include <linux/ctype.h>
+
+ #include "event_buffer.h"
+ #include "oprofile_stats.h"
+ #include "oprof.h"
+-
++
+ unsigned long fs_buffer_size = 131072;
+ unsigned long fs_cpu_buffer_size = 8192;
+ unsigned long fs_buffer_watershed = 32768; /* FIXME: tune */
+@@ -117,11 +123,202 @@ static ssize_t dump_write(struct file *
+ static struct file_operations dump_fops = {
+ .write = dump_write,
+ };
+-
++
++#define TMPBUFSIZE 512
++
++static unsigned int adomains = 0;
++static int active_domains[MAX_OPROF_DOMAINS + 1];
++static DEFINE_MUTEX(adom_mutex);
++
++static ssize_t adomain_write(struct file * file, char const __user * buf,
++ size_t count, loff_t * offset)
++{
++ char *tmpbuf;
++ char *startp, *endp;
++ int i;
++ unsigned long val;
++ ssize_t retval = count;
++
++ if (*offset)
++ return -EINVAL;
++ if (count > TMPBUFSIZE - 1)
++ return -EINVAL;
++
++ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
++ return -ENOMEM;
++
++ if (copy_from_user(tmpbuf, buf, count)) {
++ kfree(tmpbuf);
++ return -EFAULT;
++ }
++ tmpbuf[count] = 0;
++
++ mutex_lock(&adom_mutex);
++
++ startp = tmpbuf;
++ /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */
++ for (i = 0; i <= MAX_OPROF_DOMAINS; i++) {
++ val = simple_strtoul(startp, &endp, 0);
++ if (endp == startp)
++ break;
++ while (ispunct(*endp) || isspace(*endp))
++ endp++;
++ active_domains[i] = val;
++ if (active_domains[i] != val)
++ /* Overflow, force error below */
++ i = MAX_OPROF_DOMAINS + 1;
++ startp = endp;
++ }
++ /* Force error on trailing junk */
++ adomains = *startp ? MAX_OPROF_DOMAINS + 1 : i;
++
++ kfree(tmpbuf);
++
++ if (adomains > MAX_OPROF_DOMAINS
++ || oprofile_set_active(active_domains, adomains)) {
++ adomains = 0;
++ retval = -EINVAL;
++ }
++
++ mutex_unlock(&adom_mutex);
++ return retval;
++}
++
++static ssize_t adomain_read(struct file * file, char __user * buf,
++ size_t count, loff_t * offset)
++{
++ char * tmpbuf;
++ size_t len;
++ int i;
++ ssize_t retval;
++
++ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
++ return -ENOMEM;
++
++ mutex_lock(&adom_mutex);
++
++ len = 0;
++ for (i = 0; i < adomains; i++)
++ len += snprintf(tmpbuf + len,
++ len < TMPBUFSIZE ? TMPBUFSIZE - len : 0,
++ "%u ", active_domains[i]);
++ WARN_ON(len > TMPBUFSIZE);
++ if (len != 0 && len <= TMPBUFSIZE)
++ tmpbuf[len-1] = '\n';
++
++ mutex_unlock(&adom_mutex);
++
++ retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len);
++
++ kfree(tmpbuf);
++ return retval;
++}
++
++
++static struct file_operations active_domain_ops = {
++ .read = adomain_read,
++ .write = adomain_write,
++};
++
++static unsigned int pdomains = 0;
++static int passive_domains[MAX_OPROF_DOMAINS];
++static DEFINE_MUTEX(pdom_mutex);
++
++static ssize_t pdomain_write(struct file * file, char const __user * buf,
++ size_t count, loff_t * offset)
++{
++ char *tmpbuf;
++ char *startp, *endp;
++ int i;
++ unsigned long val;
++ ssize_t retval = count;
++
++ if (*offset)
++ return -EINVAL;
++ if (count > TMPBUFSIZE - 1)
++ return -EINVAL;
++
++ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
++ return -ENOMEM;
++
++ if (copy_from_user(tmpbuf, buf, count)) {
++ kfree(tmpbuf);
++ return -EFAULT;
++ }
++ tmpbuf[count] = 0;
++
++ mutex_lock(&pdom_mutex);
++
++ startp = tmpbuf;
++ /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */
++ for (i = 0; i <= MAX_OPROF_DOMAINS; i++) {
++ val = simple_strtoul(startp, &endp, 0);
++ if (endp == startp)
++ break;
++ while (ispunct(*endp) || isspace(*endp))
++ endp++;
++ passive_domains[i] = val;
++ if (passive_domains[i] != val)
++ /* Overflow, force error below */
++ i = MAX_OPROF_DOMAINS + 1;
++ startp = endp;
++ }
++ /* Force error on trailing junk */
++ pdomains = *startp ? MAX_OPROF_DOMAINS + 1 : i;
++
++ kfree(tmpbuf);
++
++ if (pdomains > MAX_OPROF_DOMAINS
++ || oprofile_set_passive(passive_domains, pdomains)) {
++ pdomains = 0;
++ retval = -EINVAL;
++ }
++
++ mutex_unlock(&pdom_mutex);
++ return retval;
++}
++
++static ssize_t pdomain_read(struct file * file, char __user * buf,
++ size_t count, loff_t * offset)
++{
++ char * tmpbuf;
++ size_t len;
++ int i;
++ ssize_t retval;
++
++ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
++ return -ENOMEM;
++
++ mutex_lock(&pdom_mutex);
++
++ len = 0;
++ for (i = 0; i < pdomains; i++)
++ len += snprintf(tmpbuf + len,
++ len < TMPBUFSIZE ? TMPBUFSIZE - len : 0,
++ "%u ", passive_domains[i]);
++ WARN_ON(len > TMPBUFSIZE);
++ if (len != 0 && len <= TMPBUFSIZE)
++ tmpbuf[len-1] = '\n';
++
++ mutex_unlock(&pdom_mutex);
++
++ retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len);
++
++ kfree(tmpbuf);
++ return retval;
++}
++
++static struct file_operations passive_domain_ops = {
++ .read = pdomain_read,
++ .write = pdomain_write,
++};
++
+ void oprofile_create_files(struct super_block * sb, struct dentry * root)
+ {
+ oprofilefs_create_file(sb, root, "enable", &enable_fops);
+ oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666);
++ oprofilefs_create_file(sb, root, "active_domains", &active_domain_ops);
++ oprofilefs_create_file(sb, root, "passive_domains", &passive_domain_ops);
+ oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops);
+ oprofilefs_create_ulong(sb, root, "buffer_size", &fs_buffer_size);
+ oprofilefs_create_ulong(sb, root, "buffer_watershed", &fs_buffer_watershed);
+diff -pruN ../orig-linux-2.6.17/include/linux/oprofile.h ./include/linux/oprofile.h
+--- ../orig-linux-2.6.17/include/linux/oprofile.h 2006-06-18 02:49:35.000000000 +0100
++++ ./include/linux/oprofile.h 2007-01-08 15:36:54.000000000 +0000
+@@ -16,6 +16,8 @@
+ #include <linux/types.h>
+ #include <linux/spinlock.h>
+ #include <asm/atomic.h>
++
++#include <xen/interface/xenoprof.h>
+
+ struct super_block;
+ struct dentry;
+@@ -27,6 +29,11 @@ struct oprofile_operations {
+ /* create any necessary configuration files in the oprofile fs.
+ * Optional. */
+ int (*create_files)(struct super_block * sb, struct dentry * root);
++ /* setup active domains with Xen */
++ int (*set_active)(int *active_domains, unsigned int adomains);
++ /* setup passive domains with Xen */
++ int (*set_passive)(int *passive_domains, unsigned int pdomains);
++
+ /* Do any necessary interrupt setup. Optional. */
+ int (*setup)(void);
+ /* Do any necessary interrupt shutdown. Optional. */
+@@ -78,6 +85,8 @@ void oprofile_add_pc(unsigned long pc, i
+ /* add a backtrace entry, to be called from the ->backtrace callback */
+ void oprofile_add_trace(unsigned long eip);
+
++/* add a domain switch entry */
++int oprofile_add_domain_switch(int32_t domain_id);
+
+ /**
+ * Create a file of the given name as a child of the given root, with