Merge branch 'tracing/core-v2' into tracing-for-linus

author Ingo Molnar <mingo@elte.hu>

Wed, 1 Apr 2009 19:54:19 +0000 (21:54 +0200)

committer Ingo Molnar <mingo@elte.hu>

Wed, 1 Apr 2009 22:49:02 +0000 (00:49 +0200)
author Ingo Molnar <mingo@elte.hu>
Wed, 1 Apr 2009 19:54:19 +0000 (21:54 +0200)
committer Ingo Molnar <mingo@elte.hu>
Wed, 1 Apr 2009 22:49:02 +0000 (00:49 +0200)
diff --combined Documentation/kernel-parameters.txt

index 240257dd4238fc55608989541b79c37e40101abc,7643483bdd6af4e2729c34df7594903f45292a09..ebdeb7c4330e032f95e104e607b3dcb34a5c27ab
--- 1/Documentation/kernel-parameters.txt
--- 2/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@@ -44,12 -44,12 +44,13 @@@ parameter is applicable
         FB      The frame buffer device is enabled.
         HW      Appropriate hardware is enabled.
         IA-64   IA-64 architecture is enabled.
+ +      IMA     Integrity measurement architecture is enabled.
         IOSCHED More than one I/O scheduler is enabled.
         IP_PNP  IP DHCP, BOOTP, or RARP is enabled.
         ISAPNP  ISA PnP code is enabled.
         ISDN    Appropriate ISDN support is enabled.
         JOY     Appropriate joystick support is enabled.
+       KMEMTRACE kmemtrace is enabled.
         LIBATA  Libata driver is enabled
         LP      Printer support is enabled.
         LOOP    Loopback device support is enabled.
@@@ -492,23 -492,11 +493,23 @@@ and is between 256 and 4096 characters
                         Range: 0 - 8192
                         Default: 64
   
+ +      dma_debug=off   If the kernel is compiled with DMA_API_DEBUG support
+ +                      this option disables the debugging code at boot.
+ +
+ +      dma_debug_entries=<number>
+ +                      This option allows to tune the number of preallocated
+ +                      entries for DMA-API debugging code. One entry is
+ +                      required per DMA-API allocation. Use this if the
+ +                      DMA-API debugging code disables itself because the
+ +                      architectural default is too low.
+ +
         hpet=           [X86-32,HPET] option to control HPET usage
- -                      Format: { enable (default) | disable | force }
+ +                      Format: { enable (default) | disable | force |
+ +                              verbose }
                         disable: disable HPET and use PIT instead
                         force: allow force enabled of undocumented chips (ICH4,
                         VIA, nVidia)
+ +                      verbose: show contents of HPET registers during setup
   
         com20020=       [HW,NET] ARCnet - COM20020 chipset
                         Format:
@@@ -842,15 -830,6 +843,15 @@@
   
         hvc_iucv=       [S390] Number of z/VM IUCV hypervisor console (HVC)
                                terminal devices. Valid values: 0..8
+ +      hvc_iucv_allow= [S390] Comma-separated list of z/VM user IDs.
+ +                             If specified, z/VM IUCV HVC accepts connections
+ +                             from listed z/VM user IDs only.
+ +
+ +      i2c_bus=        [HW] Override the default board specific I2C bus speed
+ +                           or register an additional I2C bus that is not
+ +                           registered from board initialization code.
+ +                           Format:
+ +                           <bus_id>,<clkrate>
   
         i8042.debug     [HW] Toggle i8042 debug mode
         i8042.direct    [HW] Put keyboard port into non-translated mode
@@@ -924,15 -903,6 +925,15 @@@
         ihash_entries=  [KNL]
                         Set number of hash buckets for inode cache.
   
+ +      ima_audit=      [IMA]
+ +                      Format: { "0" | "1" }
+ +                      0 -- integrity auditing messages. (Default)
+ +                      1 -- enable informational integrity auditing messages.
+ +
+ +      ima_hash=       [IMA]
+ +                      Formt: { "sha1" | "md5" }
+ +                      default: "sha1"
+ +
         in2000=         [HW,SCSI]
                         See header of drivers/scsi/in2000.c.
   
@@@ -1078,6 -1048,15 +1079,15 @@@
                         use the HighMem zone if it exists, and the Normal
                         zone if it does not.
   
+       kmemtrace.enable=       [KNL,KMEMTRACE] Format: { yes | no }
+                               Controls whether kmemtrace is enabled
+                               at boot-time.
+ 
+       kmemtrace.subbufs=n     [KNL,KMEMTRACE] Overrides the number of
+                       subbufs kmemtrace's relay channel has. Set this
+                       higher than default (KMEMTRACE_N_SUBBUFS in code) if
+                       you experience buffer overruns.
+ 
         movablecore=nn[KMG]     [KNL,X86-32,IA-64,PPC,X86-64] This parameter
                         is similar to kernelcore except it specifies the
                         amount of memory used for migratable allocations.
@@@ -1695,8 -1674,6 +1705,8 @@@
                         See also Documentation/blockdev/paride.txt.
   
         pci=option[,option...]  [PCI] various PCI subsystem options:
+ +              earlydump       [X86] dump PCI config space before the kernel
+ +                              changes anything
                 off             [X86] don't probe for the PCI bus
                 bios            [X86-32] force use of PCI BIOS, don't access
                                 the hardware directly. Use this if your machine
@@@ -1796,15 -1773,6 +1806,15 @@@
                 cbmemsize=nn[KMG]       The fixed amount of bus space which is
                                 reserved for the CardBus bridge's memory
                                 window. The default value is 64 megabytes.
+ +              resource_alignment=
+ +                              Format:
+ +                              [<order of align>@][<domain>:]<bus>:<slot>.<func>[; ...]
+ +                              Specifies alignment and device to reassign
+ +                              aligned memory resources.
+ +                              If <order of align> is not specified,
+ +                              PAGE_SIZE is used as alignment.
+ +                              PCI-PCI bridge can be specified, if resource
+ +                              windows need to be expanded.
   
         pcie_aspm=      [PCIE] Forcibly enable or disable PCIe Active State Power
                         Management.
@@@ -1863,6 -1831,11 +1873,6 @@@
                         autoconfiguration.
                         Ranges are in pairs (memory base and size).
   
- -      dynamic_printk  Enables pr_debug()/dev_dbg() calls if
- -                      CONFIG_DYNAMIC_PRINTK_DEBUG has been enabled.
- -                      These can also be switched on/off via
- -                      <debugfs>/dynamic_printk/modules
- -
         print-fatal-signals=
                         [KNL] debug: print fatal signals
                         print-fatal-signals=1: print segfault info to
@@@ -2051,6 -2024,15 +2061,6 @@@
                         If enabled at boot time, /selinux/disable can be used
                         later to disable prior to initial policy load.
   
- -      selinux_compat_net =
- -                      [SELINUX] Set initial selinux_compat_net flag value.
- -                        Format: { "0" | "1" }
- -                        0 -- use new secmark-based packet controls
- -                        1 -- use legacy packet controls
- -                        Default value is 0 (preferred).
- -                        Value can be changed at runtime via
- -                        /selinux/compat_net.
- -
         serialnumber    [BUGS=X86-32]
   
         shapers=        [NET]
@@@ -2362,6 -2344,8 +2372,8 @@@
   
         tp720=          [HW,PS2]
   
+       trace_buf_size=nn[KMG] [ftrace] will set tracing buffer size.
+ 
         trix=           [HW,OSS] MediaTrix AudioTrix Pro
                         Format:
                         <io>,<irq>,<dma>,<dma2>,<sb_io>,<sb_irq>,<sb_dma>,<mpu_io>,<mpu_irq>
diff --combined Documentation/sysrq.txt

index afa2946892da0e8c785d6b1d2fe879751b397499,535aeb936dbce90acda756ada8988f9dd43a88a2..cf42b820ff9d5002fdf8a9cd280731a593544957
--- 1/Documentation/sysrq.txt
--- 2/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@@ -81,8 -81,6 +81,8 @@@ On all -  write a character to /proc/sy
   
   'i'     - Send a SIGKILL to all processes, except for init.
   
+ +'j'     - Forcibly "Just thaw it" - filesystems frozen by the FIFREEZE ioctl.
+ +
   'k'     - Secure Access Key (SAK) Kills all programs on the current virtual
             console. NOTE: See important comments below in SAK section.
   
@@@ -115,6 -113,8 +115,8 @@@
   
   'x'   - Used by xmon interface on ppc/powerpc platforms.
   
+ 'z'   - Dump the ftrace buffer
+ 
   '0'-'9' - Sets the console log level, controlling which kernel messages
             will be printed to your console. ('0', for example would make
             it so that only emergency messages like PANICs or OOPSes would
@@@ -162,9 -162,6 +164,9 @@@ t'E'rm and k'I'll are useful if you hav
   are unable to kill any other way, especially if it's spawning other
   processes.
   
+ +"'J'ust thaw it" is useful if your system becomes unresponsive due to a frozen
+ +(probably root) filesystem via the FIFREEZE ioctl.
+ +
   *  Sometimes SysRq seems to get 'stuck' after using it, what can I do?
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   That happens to me, also. I've found that tapping shift, alt, and control
diff --combined MAINTAINERS

index 068f5fb900209102a96e99d79a0adcece8238d6c,dd3c11c4c3d2a261b6696caf1f6cf924757a87af..25a17b49cfe10ca8dc8111636bec3950566a6ee9
--- 1/MAINTAINERS
--- 2/MAINTAINERS
+++ b/MAINTAINERS
@@@ -357,7 -357,6 +357,7 @@@ S: Odd Fixes for 2.4; Maintained for 2.
   P:    Ivan Kokshaysky
   M:    ink@jurassic.park.msu.ru
   S:    Maintained for 2.4; PCI support for 2.6.
+ +L:    linux-alpha@vger.kernel.org
   
   AMD GEODE CS5536 USB DEVICE CONTROLLER DRIVER
   P:    Thomas Dahlmann
@@@ -503,13 -502,6 +503,13 @@@ P:       Richard Purdi
   M:    rpurdie@rpsys.net
   S:    Maintained
   
+ +ARM/CORTINA SYSTEMS GEMINI ARM ARCHITECTURE
+ +P:    Paulius Zaleckas
+ +M:    paulius.zaleckas@teltonika.lt
+ +L:    linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+ +T:    git gitorious.org/linux-gemini/mainline.git
+ +S:    Maintained
+ +
   ARM/EZX SMARTPHONES (A780, A910, A1200, E680, ROKR E2 and ROKR E6)
   P:    Daniel Ribeiro
   M:    drwyrm@gmail.com
@@@ -521,12 -513,6 +521,12 @@@ L:       openezx-devel@lists.openezx.org (sub
   W:    http://www.openezx.org/
   S:    Maintained
   
+ +ARM/FARADAY FA526 PORT
+ +P:    Paulius Zaleckas
+ +M:    paulius.zaleckas@teltonika.lt
+ +L:    linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+ +S:    Maintained
+ +
   ARM/FREESCALE IMX / MXC ARM ARCHITECTURE
   P:    Sascha Hauer
   M:    kernel@pengutronix.de
@@@ -636,7 -622,7 +636,7 @@@ P: Dirk Opfe
   M:    dirk@opfer-online.de
   S:    Maintained
   
- -ARM/PALMTX SUPPORT
+ +ARM/PALMTX,PALMT5,PALMLD SUPPORT
   P:    Marek Vasut
   M:    marek.vasut@gmail.com
   W:    http://hackndev.com
@@@ -779,14 -765,6 +779,14 @@@ L:       linux-wireless@vger.kernel.or
   L:    ath9k-devel@lists.ath9k.org
   S:    Supported
   
+ +ATHEROS AR9170 WIRELESS DRIVER
+ +P:    Christian Lamparter
+ +M:    chunkeey@web.de
+ +L:    linux-wireless@vger.kernel.org
+ +W:    http://wireless.kernel.org/en/users/Drivers/ar9170
+ +S:    Maintained
+ +F:    drivers/net/wireless/ar9170/
+ +
   ATI_REMOTE2 DRIVER
   P:    Ville Syrjala
   M:    syrjala@sci.fi
@@@ -1033,8 -1011,6 +1033,8 @@@ L:      netdev@vger.kernel.or
   S:    Supported
   
   BROADCOM TG3 GIGABIT ETHERNET DRIVER
+ +P:    Matt Carlson
+ +M:    mcarlson@broadcom.com
   P:    Michael Chan
   M:    mchan@broadcom.com
   L:    netdev@vger.kernel.org
@@@ -1064,6 -1040,7 +1064,6 @@@ BTTV VIDEO4LINUX DRIVE
   P:    Mauro Carvalho Chehab
   M:    mchehab@infradead.org
   L:    linux-media@vger.kernel.org
- -L:    video4linux-list@redhat.com
   W:    http://linuxtv.org
   T:    git kernel.org:/pub/scm/linux/kernel/git/mchehab/linux-2.6.git
   S:    Maintained
@@@ -1292,12 -1269,6 +1292,12 @@@ L:    linux-crypto@vger.kernel.or
   T:    git kernel.org:/pub/scm/linux/kernel/git/herbert/crypto-2.6.git
   S:    Maintained
   
+ +CRYPTOGRAPHIC RANDOM NUMBER GENERATOR
+ +P:    Neil Horman
+ +M:    nhorman@tuxdriver.com
+ +L:    linux-crypto@vger.kernel.org
+ +S:    Maintained
+ +
   CS5535 Audio ALSA driver
   P:    Jaya Kumar
   M:    jayakumar.alsa@gmail.com
@@@ -2202,12 -2173,25 +2202,12 @@@ L:   linux-ide@vger.kernel.or
   T:    quilt kernel.org/pub/linux/kernel/people/bart/pata-2.6/
   S:    Maintained
   
- -IDE/ATAPI CDROM DRIVER
+ +IDE/ATAPI DRIVERS
   P:    Borislav Petkov
   M:    petkovbb@gmail.com
   L:    linux-ide@vger.kernel.org
   S:    Maintained
   
- -IDE/ATAPI FLOPPY DRIVERS
- -P:    Paul Bristow
- -M:    Paul Bristow <paul@paulbristow.net>
- -W:    http://paulbristow.net/linux/idefloppy.html
- -L:    linux-kernel@vger.kernel.org
- -S:    Maintained
- -
- -IDE/ATAPI TAPE DRIVERS
- -P:    Gadi Oxman
- -M:    Gadi Oxman <gadio@netvision.net.il>
- -L:    linux-kernel@vger.kernel.org
- -S:    Maintained
- -
   IDLE-I7300
   P:    Andy Henroid
   M:    andrew.d.henroid@intel.com
@@@ -2232,11 -2216,6 +2232,11 @@@ M:    stefanr@s5r6.in-berlin.d
   L:    linux1394-devel@lists.sourceforge.net
   S:    Maintained
   
+ +INTEGRITY MEASUREMENT ARCHITECTURE (IMA)
+ +P:    Mimi Zohar
+ +M:    zohar@us.ibm.com
+ +S:    Supported
+ +
   IMS TWINTURBO FRAMEBUFFER DRIVER
   L:    linux-fbdev-devel@lists.sourceforge.net (moderated for non-subscribers)
   S:    Orphan
@@@ -2642,6 -2621,12 +2642,12 @@@ M:    jason.wessel@windriver.co
   L:    kgdb-bugreport@lists.sourceforge.net
   S:    Maintained
   
+ KMEMTRACE
+ P:    Eduard - Gabriel Munteanu
+ M:    eduard.munteanu@linux360.ro
+ L:    linux-kernel@vger.kernel.org
+ S:    Maintained
+ 
   KPROBES
   P:    Ananth N Mavinakayanahalli
   M:    ananth@in.ibm.com
@@@ -2853,7 -2838,7 +2859,7 @@@ P:      Roman Zippe
   M:    zippel@linux-m68k.org
   L:    linux-m68k@lists.linux-m68k.org
   W:    http://www.linux-m68k.org/
- -W:    http://linux-m68k-cvs.ubb.ca/
+ +T:    git git.kernel.org/pub/scm/linux/kernel/git/geert/linux-m68k.git
   S:    Maintained
   
   M68K ON APPLE MACINTOSH
@@@ -3310,16 -3295,6 +3316,16 @@@ L:    orinoco-devel@lists.sourceforge.ne
   W:    http://www.nongnu.org/orinoco/
   S:    Maintained
   
+ +OSD LIBRARY
+ +P:    Boaz Harrosh
+ +M:    bharrosh@panasas.com
+ +P:    Benny Halevy
+ +M:    bhalevy@panasas.com
+ +L:    osd-dev@open-osd.org
+ +W:    http://open-osd.org
+ +T:    git://git.open-osd.org/open-osd.git
+ +S:    Maintained
+ +
   P54 WIRELESS DRIVER
   P:    Michael Wu
   M:    flamingice@sourmilk.net
@@@ -3570,22 -3545,6 +3576,22 @@@ M:    linux@arm.linux.org.u
   L:    linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
   S:    Maintained
   
+ +PXA168 SUPPORT
+ +P:    Eric Miao
+ +M:    eric.miao@marvell.com
+ +P:    Jason Chagas
+ +M:    jason.chagas@marvell.com
+ +L:    linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+ +T:    git kernel.org:/pub/scm/linux/kernel/git/ycmiao/pxa-linux-2.6.git
+ +S:    Supported
+ +
+ +PXA910 SUPPORT
+ +P:    Eric Miao
+ +M:    eric.miao@marvell.com
+ +L:    linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+ +T:    git kernel.org:/pub/scm/linux/kernel/git/ycmiao/pxa-linux-2.6.git
+ +S:    Supported
+ +
   PXA MMCI DRIVER
   S:    Orphan
   
@@@ -3636,7 -3595,7 +3642,7 @@@ S:      Maintaine
   RALINK RT2X00 WIRELESS LAN DRIVER
   P:    rt2x00 project
   L:    linux-wireless@vger.kernel.org
- -L:    rt2400-devel@lists.sourceforge.net
+ +L:    users@rt2x00.serialmonkey.com
   W:    http://rt2x00.serialmonkey.com/
   S:    Maintained
   T:    git kernel.org:/pub/scm/linux/kernel/git/ivd/rt2x00.git
@@@ -3682,12 -3641,6 +3688,12 @@@ M:    florian.fainelli@telecomint.e
   L:    netdev@vger.kernel.org
   S:    Maintained
   
+ +RDS - RELIABLE DATAGRAM SOCKETS
+ +P:    Andy Grover
+ +M:    andy.grover@oracle.com
+ +L:    rds-devel@oss.oracle.com
+ +S:    Supported
+ +
   READ-COPY UPDATE (RCU)
   P:    Dipankar Sarma
   M:    dipankar@in.ibm.com
@@@ -3779,15 -3732,6 +3785,15 @@@ L:    linux-s390@vger.kernel.or
   W:    http://www.ibm.com/developerworks/linux/linux390/
   S:    Supported
   
+ +S390 ZCRYPT DRIVER
+ +P:    Felix Beck
+ +M:    felix.beck@de.ibm.com
+ +P:    Ralph Wuerthner
+ +M:    ralph.wuerthner@de.ibm.com
+ +M:    linux390@de.ibm.com
+ +L:    linux-s390@vger.kernel.org
+ +S:    Supported
+ +
   S390 ZFCP DRIVER
   P:    Christof Schmitt
   M:    christof.schmitt@de.ibm.com
@@@ -3906,7 -3850,6 +3912,7 @@@ M:      jmorris@namei.or
   L:    linux-kernel@vger.kernel.org
   L:    linux-security-module@vger.kernel.org (suggested Cc:)
   T:    git kernel.org:pub/scm/linux/kernel/git/jmorris/security-testing-2.6.git
+ +W:    http://security.wiki.kernel.org/
   S:    Supported
   
   SECURITY CONTACT
@@@ -4348,19 -4291,6 +4354,19 @@@ L:    tlan-devel@lists.sourceforge.net (su
   W:    http://sourceforge.net/projects/tlan/
   S:    Maintained
   
+ +TOMOYO SECURITY MODULE
+ +P:    Kentaro Takeda
+ +M:    takedakn@nttdata.co.jp
+ +P:    Tetsuo Handa
+ +M:    penguin-kernel@I-love.SAKURA.ne.jp
+ +L:    linux-kernel@vger.kernel.org (kernel issues)
+ +L:    tomoyo-users-en@lists.sourceforge.jp (subscribers-only, for developers and users in English)
+ +L:    tomoyo-dev@lists.sourceforge.jp (subscribers-only, for developers in Japanese)
+ +L:    tomoyo-users@lists.sourceforge.jp (subscribers-only, for users in Japanese)
+ +W:    http://tomoyo.sourceforge.jp/
+ +T:    quilt http://svn.sourceforge.jp/svnroot/tomoyo/trunk/2.2.x/tomoyo-lsm/patches/
+ +S:    Maintained
+ +
   TOSHIBA ACPI EXTRAS DRIVER
   P:    John Belmonte
   M:    toshiba_acpi@memebeam.org
@@@ -4822,6 -4752,7 +4828,6 @@@ VIDEO FOR LINUX (V4L
   P:    Mauro Carvalho Chehab
   M:    mchehab@infradead.org
   L:    linux-media@vger.kernel.org
- -L:    video4linux-list@redhat.com
   W:    http://linuxtv.org
   T:    git kernel.org:/pub/scm/linux/kernel/git/mchehab/linux-2.6.git
   S:    Maintained
diff --combined arch/Kconfig

index 830c16a2b801d38795139b941f277011e760ba37,a092dc77c24d637f9fecaba0d8c4c017c001ea6d..dc81b34c5d82e72c574217d4b63bfda319e14530
--- 1/arch/Kconfig
--- 2/arch/Kconfig
+++ b/arch/Kconfig
@@@ -6,6 -6,7 +6,7 @@@ config OPROFIL
         tristate "OProfile system profiling (EXPERIMENTAL)"
         depends on PROFILING
         depends on HAVE_OPROFILE
+       depends on TRACING_SUPPORT
         select TRACING
         select RING_BUFFER
         help
@@@ -106,5 -107,3 +107,5 @@@ config HAVE_CL
           The <linux/clk.h> calls support software clock gating and
           thus are a key power management tool on many systems.
   
+ +config HAVE_DMA_API_DEBUG
+ +      bool
diff --combined arch/ia64/kernel/Makefile

index f2778f2c4fd9307f75c989fc8553364efe1b9253,ab6e7ec0bba39714011fec0b54cb9052bacda8bc..dc62df02167374c0124484aa852aa6ff48172837
--- 1/arch/ia64/kernel/Makefile
--- 2/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@@ -2,12 -2,16 +2,16 @@@
   # Makefile for the linux kernel.
   #
   
+ ifdef CONFIG_DYNAMIC_FTRACE
+ CFLAGS_REMOVE_ftrace.o = -pg
+ endif
+ 
   extra-y       := head.o init_task.o vmlinux.lds
   
   obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o     \
          irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o          \
          salinfo.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
- -       unwind.o mca.o mca_asm.o topology.o
+ +       unwind.o mca.o mca_asm.o topology.o dma-mapping.o
   
   obj-$(CONFIG_IA64_BRL_EMU)    += brl_emu.o
   obj-$(CONFIG_IA64_GENERIC)    += acpi-ext.o
@@@ -28,6 -32,7 +32,7 @@@ obj-$(CONFIG_IA64_CYCLONE)    += cyclone.
   obj-$(CONFIG_CPU_FREQ)                += cpufreq/
   obj-$(CONFIG_IA64_MCA_RECOVERY)       += mca_recovery.o
   obj-$(CONFIG_KPROBES)         += kprobes.o jprobes.o
+ obj-$(CONFIG_DYNAMIC_FTRACE)  += ftrace.o
   obj-$(CONFIG_KEXEC)           += machine_kexec.o relocate_kernel.o crash.o
   obj-$(CONFIG_CRASH_DUMP)      += crash_dump.o
   obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o
@@@ -43,7 -48,9 +48,7 @@@ ifneq ($(CONFIG_IA64_ESI),
   obj-y                         += esi_stub.o   # must be in kernel proper
   endif
   obj-$(CONFIG_DMAR)            += pci-dma.o
- -ifeq ($(CONFIG_DMAR), y)
   obj-$(CONFIG_SWIOTLB)         += pci-swiotlb.o
- -endif
   
   # The gate DSO image is built using a special linker script.
   targets += gate.so gate-syms.o
diff --combined arch/x86/Kconfig

index 748e50a1a15257ac6226eca13869b47e32a59de2,1a3150570785f2285e37c3c5c811d742c753697c..0885245e68080a99e0b7b29b025ed236802daf5f
--- 1/arch/x86/Kconfig
--- 2/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@@ -34,13 -34,14 +34,15 @@@ config X8
         select HAVE_FUNCTION_TRACER
         select HAVE_FUNCTION_GRAPH_TRACER
         select HAVE_FUNCTION_TRACE_MCOUNT_TEST
+       select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE
+       select HAVE_FTRACE_SYSCALLS
         select HAVE_KVM
         select HAVE_ARCH_KGDB
         select HAVE_ARCH_TRACEHOOK
         select HAVE_GENERIC_DMA_COHERENT if X86_32
         select HAVE_EFFICIENT_UNALIGNED_ACCESS
         select USER_STACKTRACE_SUPPORT
+ +      select HAVE_DMA_API_DEBUG
         select HAVE_KERNEL_GZIP
         select HAVE_KERNEL_BZIP2
         select HAVE_KERNEL_LZMA
@@@ -165,17 -166,11 +167,17 @@@ config AUDIT_ARC
   config ARCH_SUPPORTS_OPTIMIZED_INLINING
         def_bool y
   
+ +config ARCH_SUPPORTS_DEBUG_PAGEALLOC
+ +      def_bool y
+ +
   # Use the generic interrupt handling code in kernel/irq/:
   config GENERIC_HARDIRQS
         bool
         default y
   
+ +config GENERIC_HARDIRQS_NO__DO_IRQ
+ +       def_bool y
+ +
   config GENERIC_IRQ_PROBE
         bool
         default y
@@@ -1136,7 -1131,7 +1138,7 @@@ config NUMA_EM
   
   config NODES_SHIFT
         int "Maximum NUMA Nodes (as a power of 2)" if !MAXSMP
- -      range 1 9   if X86_64
+ +      range 1 9
         default "9" if MAXSMP
         default "6" if X86_64
         default "4" if X86_NUMAQ
diff --combined arch/x86/include/asm/cacheflush.h

index b3894bf52fcddf68f8d16a38e2705fc1b3ec519d,eb2221d5add28a9c22e65c6e936f155c57b991b3..e55dfc1ad453be7b84215a6016a444b497656034
--- 1/arch/x86/include/asm/cacheflush.h
--- 2/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@@ -90,9 -90,6 +90,9 @@@ int set_memory_4k(unsigned long addr, i
   int set_memory_array_uc(unsigned long *addr, int addrinarray);
   int set_memory_array_wb(unsigned long *addr, int addrinarray);
   
+ +int set_pages_array_uc(struct page **pages, int addrinarray);
+ +int set_pages_array_wb(struct page **pages, int addrinarray);
+ +
   /*
    * For legacy compatibility with the old APIs, a few functions
    * are provided that work on a "struct page".
@@@ -126,6 -123,11 +126,11 @@@ void clflush_cache_range(void *addr, un
   #ifdef CONFIG_DEBUG_RODATA
   void mark_rodata_ro(void);
   extern const int rodata_test_data;
+ void set_kernel_text_rw(void);
+ void set_kernel_text_ro(void);
+ #else
+ static inline void set_kernel_text_rw(void) { }
+ static inline void set_kernel_text_ro(void) { }
   #endif
   
   #ifdef CONFIG_DEBUG_RODATA_TEST
diff --combined arch/x86/kernel/Makefile

index c611ad64137f67c1fc06789049e8093365bf1ed7,84000eb931ffd82c9a15cef95b1984e303ba7867..145cce75cda70dcc5f90560902eff37cc6ddd3fc
--- 1/arch/x86/kernel/Makefile
--- 2/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@@ -66,10 -66,12 +66,11 @@@ obj-$(CONFIG_X86_MPPARSE)  += mpparse.
   obj-y                         += apic/
   obj-$(CONFIG_X86_REBOOTFIXUPS)        += reboot_fixups_32.o
   obj-$(CONFIG_DYNAMIC_FTRACE)  += ftrace.o
- obj-$(CONFIG_FUNCTION_GRAPH_TRACER)   += ftrace.o
+ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
+ obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
   obj-$(CONFIG_KEXEC)           += machine_kexec_$(BITS).o
   obj-$(CONFIG_KEXEC)           += relocate_kernel_$(BITS).o crash.o
   obj-$(CONFIG_CRASH_DUMP)      += crash_dump_$(BITS).o
- -obj-$(CONFIG_X86_VSMP)                += vsmp_64.o
   obj-$(CONFIG_KPROBES)         += kprobes.o
   obj-$(CONFIG_MODULES)         += module_$(BITS).o
   obj-$(CONFIG_EFI)             += efi.o efi_$(BITS).o efi_stub_$(BITS).o
@@@ -105,7 -107,7 +106,7 @@@ obj-$(CONFIG_MICROCODE)                    += microcode.
   
   obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
   
- -obj-$(CONFIG_SWIOTLB)                 += pci-swiotlb_64.o # NB rename without _64
+ +obj-$(CONFIG_SWIOTLB)                 += pci-swiotlb.o
   
   ###
   # 64 bit specific files
@@@ -119,5 -121,4 +120,5 @@@ ifeq ($(CONFIG_X86_64),y
         obj-$(CONFIG_AMD_IOMMU)         += amd_iommu_init.o amd_iommu.o
   
         obj-$(CONFIG_PCI_MMCONFIG)      += mmconf-fam10h_64.o
+ +      obj-y                           += vsmp_64.o
   endif
diff --combined arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c

index 23da96e57b17ed610b7a2a940c9055e402590e11,5e40f54171e70da2470df453ec0f16b1a4062de9..05209b5cc6ca47959c063fbd1b8c480ab7b0f7cd
--- 1/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
--- 2/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@@ -1,5 -1,5 +1,5 @@@
   /*
- - * acpi-cpufreq.c - ACPI Processor P-States Driver ($Revision: 1.4 $)
+ + * acpi-cpufreq.c - ACPI Processor P-States Driver
    *
    *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
    *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
@@@ -33,21 -33,19 +33,21 @@@
   #include <linux/cpufreq.h>
   #include <linux/compiler.h>
   #include <linux/dmi.h>
- #include <linux/ftrace.h>
+ #include <trace/power.h>
   
   #include <linux/acpi.h>
+ +#include <linux/io.h>
+ +#include <linux/delay.h>
+ +#include <linux/uaccess.h>
+ +
   #include <acpi/processor.h>
   
- -#include <asm/io.h>
   #include <asm/msr.h>
   #include <asm/processor.h>
   #include <asm/cpufeature.h>
- -#include <asm/delay.h>
- -#include <asm/uaccess.h>
   
- -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg)
+ +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
+ +              "acpi-cpufreq", msg)
   
   MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
   MODULE_DESCRIPTION("ACPI Processor P-States Driver");
@@@ -72,6 -70,8 +72,8 @@@ struct acpi_cpufreq_data 
   
   static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data);
   
+ DEFINE_TRACE(power_mark);
+ 
   /* acpi_perf_data is a pointer to percpu data. */
   static struct acpi_processor_performance *acpi_perf_data;
   
@@@ -97,7 -97,7 +99,7 @@@ static unsigned extract_io(u32 value, s
   
         perf = data->acpi_data;
   
- -      for (i=0; i<perf->state_count; i++) {
+ +      for (i = 0; i < perf->state_count; i++) {
                 if (value == perf->states[i].status)
                         return data->freq_table[i].frequency;
         }
@@@ -112,7 -112,7 +114,7 @@@ static unsigned extract_msr(u32 msr, st
         msr &= INTEL_MSR_RANGE;
         perf = data->acpi_data;
   
- -      for (i=0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
+ +      for (i = 0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
                 if (msr == perf->states[data->freq_table[i].index].status)
                         return data->freq_table[i].frequency;
         }
@@@ -140,13 -140,15 +142,13 @@@ struct io_addr 
         u8 bit_width;
   };
   
- -typedef union {
- -      struct msr_addr msr;
- -      struct io_addr io;
- -} drv_addr_union;
- -
   struct drv_cmd {
         unsigned int type;
         const struct cpumask *mask;
- -      drv_addr_union addr;
+ +      union {
+ +              struct msr_addr msr;
+ +              struct io_addr io;
+ +      } addr;
         u32 val;
   };
   
@@@ -369,7 -371,7 +371,7 @@@ static unsigned int check_freqs(const s
         unsigned int cur_freq;
         unsigned int i;
   
- -      for (i=0; i<100; i++) {
+ +      for (i = 0; i < 100; i++) {
                 cur_freq = extract_freq(get_cur_val(mask), data);
                 if (cur_freq == freq)
                         return 1;
@@@ -494,7 -496,7 +496,7 @@@ acpi_cpufreq_guess_freq(struct acpi_cpu
                 unsigned long freq;
                 unsigned long freqn = perf->states[0].core_frequency * 1000;
   
- -              for (i=0; i<(perf->state_count-1); i++) {
+ +              for (i = 0; i < (perf->state_count-1); i++) {
                         freq = freqn;
                         freqn = perf->states[i+1].core_frequency * 1000;
                         if ((2 * cpu_khz) > (freqn + freq)) {
@@@ -673,7 -675,7 +675,7 @@@ static int acpi_cpufreq_cpu_init(struc
   
         /* detect transition latency */
         policy->cpuinfo.transition_latency = 0;
- -      for (i=0; i<perf->state_count; i++) {
+ +      for (i = 0; i < perf->state_count; i++) {
                 if ((perf->states[i].transition_latency * 1000) >
                     policy->cpuinfo.transition_latency)
                         policy->cpuinfo.transition_latency =
@@@ -682,8 -684,8 +684,8 @@@
   
         data->max_freq = perf->states[0].core_frequency * 1000;
         /* table init */
- -      for (i=0; i<perf->state_count; i++) {
- -              if (i>0 && perf->states[i].core_frequency >=
+ +      for (i = 0; i < perf->state_count; i++) {
+ +              if (i > 0 && perf->states[i].core_frequency >=
                     data->freq_table[valid_states-1].frequency / 1000)
                         continue;
   
diff --combined arch/x86/kernel/kprobes.c

index 55b94614e34845cbab6e340754295e0d74462fa8,759095d53a06f9e2913663cfedff5f57dc773818..7b5169d2b00026272ed26874913c42ff315befb3
--- 1/arch/x86/kernel/kprobes.c
--- 2/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@@ -193,7 -193,7 +193,7 @@@ static int __kprobes can_boost(kprobe_o
         kprobe_opcode_t opcode;
         kprobe_opcode_t *orig_opcodes = opcodes;
   
- -      if (search_exception_tables(opcodes))
+ +      if (search_exception_tables((unsigned long)opcodes))
                 return 0;       /* Page fault may occur on this address. */
   
   retry:
@@@ -638,13 -638,13 +638,13 @@@ static void __used __kprobes kretprobe_
   #else
                         "       pushf\n"
                         /*
-                        * Skip cs, ip, orig_ax.
+                        * Skip cs, ip, orig_ax and gs.
                          * trampoline_handler() will plug in these values
                          */
-                       "       subl $12, %esp\n"
+                       "       subl $16, %esp\n"
                         "       pushl %fs\n"
-                       "       pushl %ds\n"
                         "       pushl %es\n"
+                       "       pushl %ds\n"
                         "       pushl %eax\n"
                         "       pushl %ebp\n"
                         "       pushl %edi\n"
@@@ -655,10 -655,10 +655,10 @@@
                         "       movl %esp, %eax\n"
                         "       call trampoline_handler\n"
                         /* Move flags to cs */
-                       "       movl 52(%esp), %edx\n"
-                       "       movl %edx, 48(%esp)\n"
+                       "       movl 56(%esp), %edx\n"
+                       "       movl %edx, 52(%esp)\n"
                         /* Replace saved flags with true return address. */
-                       "       movl %eax, 52(%esp)\n"
+                       "       movl %eax, 56(%esp)\n"
                         "       popl %ebx\n"
                         "       popl %ecx\n"
                         "       popl %edx\n"
@@@ -666,8 -666,8 +666,8 @@@
                         "       popl %edi\n"
                         "       popl %ebp\n"
                         "       popl %eax\n"
-                       /* Skip ip, orig_ax, es, ds, fs */
-                       "       addl $20, %esp\n"
+                       /* Skip ds, es, fs, gs, orig_ax and ip */
+                       "       addl $24, %esp\n"
                         "       popf\n"
   #endif
                         "       ret\n");
@@@ -691,6 -691,7 +691,7 @@@ static __used __kprobes void *trampolin
         regs->cs = __KERNEL_CS;
   #else
         regs->cs = __KERNEL_CS | get_kernel_rpl();
+       regs->gs = 0;
   #endif
         regs->ip = trampoline_address;
         regs->orig_ax = ~0UL;
diff --combined arch/x86/kernel/process.c

index 156f87582c6cd5e0fcb677b7c685d92c68443d72,8c037051b3537c4f5da2d02e5b69e0fc4ec6aa2b..62fc75b67e450cb368e41c4bf34685f0cbe069bd
--- 1/arch/x86/kernel/process.c
--- 2/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@@ -8,7 -8,7 +8,7 @@@
   #include <linux/module.h>
   #include <linux/pm.h>
   #include <linux/clockchips.h>
- #include <linux/ftrace.h>
+ #include <trace/power.h>
   #include <asm/system.h>
   #include <asm/apic.h>
   #include <asm/idle.h>
@@@ -22,6 -22,9 +22,9 @@@ EXPORT_SYMBOL(idle_nomwait)
   
   struct kmem_cache *task_xstate_cachep;
   
+ DEFINE_TRACE(power_start);
+ DEFINE_TRACE(power_end);
+ 
   int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
   {
         *dst = *src;
@@@ -65,11 -68,11 +68,11 @@@ void exit_thread(void
   {
         struct task_struct *me = current;
         struct thread_struct *t = &me->thread;
+ +      unsigned long *bp = t->io_bitmap_ptr;
   
- -      if (me->thread.io_bitmap_ptr) {
+ +      if (bp) {
                 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
   
- -              kfree(t->io_bitmap_ptr);
                 t->io_bitmap_ptr = NULL;
                 clear_thread_flag(TIF_IO_BITMAP);
                 /*
@@@ -78,7 -81,6 +81,7 @@@
                 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
                 t->io_bitmap_max = 0;
                 put_cpu();
+ +              kfree(bp);
         }
   
         ds_exit_thread(current);
diff --combined arch/x86/kernel/ptrace.c

index 19378715f4157b5202d823f9d751dd1f07c08b38,99749d6e87a88212fbb563c66145b58c49740adf..5c6e46320db18feb28b37d0276edc3d03648651f
--- 1/arch/x86/kernel/ptrace.c
--- 2/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@@ -21,6 -21,7 +21,7 @@@
   #include <linux/audit.h>
   #include <linux/seccomp.h>
   #include <linux/signal.h>
+ #include <linux/ftrace.h>
   
   #include <asm/uaccess.h>
   #include <asm/pgtable.h>
@@@ -685,8 -686,9 +686,8 @@@ static int ptrace_bts_config(struct tas
                 if (!cfg.signal)
                         return -EINVAL;
   
- -              return -EOPNOTSUPP;
- -
                 child->thread.bts_ovfl_signal = cfg.signal;
+ +              return -EOPNOTSUPP;
         }
   
         if ((cfg.flags & PTRACE_BTS_O_ALLOC) &&
@@@ -1415,6 -1417,9 +1416,9 @@@ asmregparm long syscall_trace_enter(str
             tracehook_report_syscall_entry(regs))
                 ret = -1L;
   
+       if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE)))
+               ftrace_syscall_enter(regs);
+ 
         if (unlikely(current->audit_context)) {
                 if (IS_IA32)
                         audit_syscall_entry(AUDIT_ARCH_I386,
@@@ -1438,6 -1443,9 +1442,9 @@@ asmregparm void syscall_trace_leave(str
         if (unlikely(current->audit_context))
                 audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
   
+       if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE)))
+               ftrace_syscall_exit(regs);
+ 
         if (test_thread_flag(TIF_SYSCALL_TRACE))
                 tracehook_report_syscall_exit(regs, 0);
   
diff --combined arch/x86/kvm/Kconfig

index 0a303c3ed11fa991902096d845a9a0c6efe7aafa,c7da3683f4c517a64288660066cfe408802a8e78..a58504ea78ccb90d766e8efbcc5563ef0b0d1303
--- 1/arch/x86/kvm/Kconfig
--- 2/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@@ -4,10 -4,6 +4,10 @@@
   config HAVE_KVM
          bool
   
+ +config HAVE_KVM_IRQCHIP
+ +       bool
+ +       default y
+ +
   menuconfig VIRTUALIZATION
         bool "Virtualization"
         depends on HAVE_KVM || X86
@@@ -59,7 -55,8 +59,8 @@@ config KVM_AM
   
   config KVM_TRACE
         bool "KVM trace support"
-       depends on KVM && MARKERS && SYSFS
+       depends on KVM && SYSFS
+       select MARKERS
         select RELAY
         select DEBUG_FS
         default n
diff --combined drivers/char/sysrq.c

index ebea9b2c30a583737a0e7800122217ff4eaf273c,30659ce9bcf4993d299e24019c67ff6a47caea13..6de020d078e1c4a746a56e281c2db2a19bc610fa
--- 1/drivers/char/sysrq.c
--- 2/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@@ -35,7 -35,7 +35,7 @@@
   #include <linux/vt_kern.h>
   #include <linux/workqueue.h>
   #include <linux/kexec.h>
- -#include <linux/irq.h>
+ +#include <linux/interrupt.h>
   #include <linux/hrtimer.h>
   #include <linux/oom.h>
   
@@@ -283,7 -283,7 +283,7 @@@ static void sysrq_ftrace_dump(int key, 
   }
   static struct sysrq_key_op sysrq_ftrace_dump_op = {
         .handler        = sysrq_ftrace_dump,
-       .help_msg       = "dumpZ-ftrace-buffer",
+       .help_msg       = "dump-ftrace-buffer(Z)",
         .action_msg     = "Dump ftrace buffer",
         .enable_mask    = SYSRQ_ENABLE_DUMP,
   };
@@@ -346,19 -346,6 +346,19 @@@ static struct sysrq_key_op sysrq_moom_o
         .enable_mask    = SYSRQ_ENABLE_SIGNAL,
   };
   
+ +#ifdef CONFIG_BLOCK
+ +static void sysrq_handle_thaw(int key, struct tty_struct *tty)
+ +{
+ +      emergency_thaw_all();
+ +}
+ +static struct sysrq_key_op sysrq_thaw_op = {
+ +      .handler        = sysrq_handle_thaw,
+ +      .help_msg       = "thaw-filesystems(J)",
+ +      .action_msg     = "Emergency Thaw of all frozen filesystems",
+ +      .enable_mask    = SYSRQ_ENABLE_SIGNAL,
+ +};
+ +#endif
+ +
   static void sysrq_handle_kill(int key, struct tty_struct *tty)
   {
         send_sig_all(SIGKILL);
@@@ -409,13 -396,9 +409,13 @@@ static struct sysrq_key_op *sysrq_key_t
         &sysrq_moom_op,                 /* f */
         /* g: May be registered by ppc for kgdb */
         NULL,                           /* g */
- -      NULL,                           /* h */
+ +      NULL,                           /* h - reserved for help */
         &sysrq_kill_op,                 /* i */
+ +#ifdef CONFIG_BLOCK
+ +      &sysrq_thaw_op,                 /* j */
+ +#else
         NULL,                           /* j */
+ +#endif
         &sysrq_SAK_op,                  /* k */
   #ifdef CONFIG_SMP
         &sysrq_showallcpus_op,          /* l */
diff --combined fs/partitions/check.c

index 38e337d51ced527842a1b86c8193f7983be260b0,8a17f7edcc74f5191114fb964b722c162207be76..99e33ef40be477902faf9b02f6cd8bfc5e20293a
--- 1/fs/partitions/check.c
--- 2/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@@ -19,6 -19,7 +19,7 @@@
   #include <linux/kmod.h>
   #include <linux/ctype.h>
   #include <linux/genhd.h>
+ #include <linux/blktrace_api.h>
   
   #include "check.h"
   
@@@ -294,6 -295,9 +295,9 @@@ static struct attribute_group part_attr
   
   static struct attribute_group *part_attr_groups[] = {
         &part_attr_group,
+ #ifdef CONFIG_BLK_DEV_IO_TRACE
+       &blk_trace_attr_group,
+ #endif
         NULL
   };
   
@@@ -400,7 -404,7 +404,7 @@@ struct hd_struct *add_partition(struct 
         pdev->devt = devt;
   
         /* delay uevent until 'holders' subdir is created */
- -      pdev->uevent_suppress = 1;
+ +      dev_set_uevent_suppress(pdev, 1);
         err = device_add(pdev);
         if (err)
                 goto out_put;
@@@ -410,7 -414,7 +414,7 @@@
         if (!p->holder_dir)
                 goto out_del;
   
- -      pdev->uevent_suppress = 0;
+ +      dev_set_uevent_suppress(pdev, 0);
         if (flags & ADDPART_FLAG_WHOLEDISK) {
                 err = device_create_file(pdev, &dev_attr_whole_disk);
                 if (err)
@@@ -422,7 -426,7 +426,7 @@@
         rcu_assign_pointer(ptbl->part[partno], p);
   
         /* suppress uevent if the disk supresses it */
- -      if (!ddev->uevent_suppress)
+ +      if (!dev_get_uevent_suppress(pdev))
                 kobject_uevent(&pdev->kobj, KOBJ_ADD);
   
         return p;
@@@ -455,7 -459,7 +459,7 @@@ void register_disk(struct gendisk *disk
         dev_set_name(ddev, disk->disk_name);
   
         /* delay uevents, until we scanned partition table */
- -      ddev->uevent_suppress = 1;
+ +      dev_set_uevent_suppress(ddev, 1);
   
         if (device_add(ddev))
                 return;
@@@ -490,7 -494,7 +494,7 @@@
   
   exit:
         /* announce disk after possible partitions are created */
- -      ddev->uevent_suppress = 0;
+ +      dev_set_uevent_suppress(ddev, 0);
         kobject_uevent(&ddev->kobj, KOBJ_ADD);
   
         /* announce possible partitions */
diff --combined include/asm-generic/vmlinux.lds.h

index a654d724d3b05b7d1e6a280d994f2a3f4df58dd1,d3bc3c86df6a76efaaf2c025470661dc44d37e4c..7fa660fd449ca9a3c4a84013f035893ba715a067
--- 1/include/asm-generic/vmlinux.lds.h
--- 2/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@@ -61,6 -61,30 +61,30 @@@
   #define BRANCH_PROFILE()
   #endif
   
+ #ifdef CONFIG_EVENT_TRACER
+ #define FTRACE_EVENTS()       VMLINUX_SYMBOL(__start_ftrace_events) = .;      \
+                       *(_ftrace_events)                               \
+                       VMLINUX_SYMBOL(__stop_ftrace_events) = .;
+ #else
+ #define FTRACE_EVENTS()
+ #endif
+ 
+ #ifdef CONFIG_TRACING
+ #define TRACE_PRINTKS() VMLINUX_SYMBOL(__start___trace_bprintk_fmt) = .;      \
+                        *(__trace_printk_fmt) /* Trace_printk fmt' pointer */ \
+                        VMLINUX_SYMBOL(__stop___trace_bprintk_fmt) = .;
+ #else
+ #define TRACE_PRINTKS()
+ #endif
+ 
+ #ifdef CONFIG_FTRACE_SYSCALLS
+ #define TRACE_SYSCALLS() VMLINUX_SYMBOL(__start_syscalls_metadata) = .;       \
+                        *(__syscalls_metadata)                         \
+                        VMLINUX_SYMBOL(__stop_syscalls_metadata) = .;
+ #else
+ #define TRACE_SYSCALLS()
+ #endif
+ 
   /* .data section */
   #define DATA_DATA                                                     \
         *(.data)                                                        \
@@@ -80,13 -104,11 +104,16 @@@
         VMLINUX_SYMBOL(__start___tracepoints) = .;                      \
         *(__tracepoints)                                                \
         VMLINUX_SYMBOL(__stop___tracepoints) = .;                       \
+ +      /* implement dynamic printk debug */                            \
+ +      . = ALIGN(8);                                                   \
+ +      VMLINUX_SYMBOL(__start___verbose) = .;                          \
+ +      *(__verbose)                                                    \
+ +      VMLINUX_SYMBOL(__stop___verbose) = .;                           \
         LIKELY_PROFILE()                                                \
-       BRANCH_PROFILE()
+       BRANCH_PROFILE()                                                \
+       TRACE_PRINTKS()                                                 \
+       FTRACE_EVENTS()                                                 \
+       TRACE_SYSCALLS()
   
   #define RO_DATA(align)                                                        \
         . = ALIGN((align));                                             \
@@@ -314,7 -336,15 +341,7 @@@
         CPU_DISCARD(init.data)                                          \
         CPU_DISCARD(init.rodata)                                        \
         MEM_DISCARD(init.data)                                          \
- -      MEM_DISCARD(init.rodata)                                        \
- -      /* implement dynamic printk debug */                            \
- -      VMLINUX_SYMBOL(__start___verbose_strings) = .;                  \
- -      *(__verbose_strings)                                            \
- -      VMLINUX_SYMBOL(__stop___verbose_strings) = .;                   \
- -      . = ALIGN(8);                                                   \
- -      VMLINUX_SYMBOL(__start___verbose) = .;                          \
- -      *(__verbose)                                                    \
- -      VMLINUX_SYMBOL(__stop___verbose) = .;
+ +      MEM_DISCARD(init.rodata)
   
   #define INIT_TEXT                                                     \
         *(.init.text)                                                   \
diff --combined include/linux/interrupt.h

index c68bffd182bbe4d977848fd3507150da83be4d7b,9b7e9d743476fa096e0c5b156dca5abc85a296d3..ce2c07d99fc3a54934d140214cd46a0f7542937b
--- 1/include/linux/interrupt.h
--- 2/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@@ -61,17 -61,6 +61,17 @@@
   
   typedef irqreturn_t (*irq_handler_t)(int, void *);
   
+ +/**
+ + * struct irqaction - per interrupt action descriptor
+ + * @handler:  interrupt handler function
+ + * @flags:    flags (see IRQF_* above)
+ + * @mask:     no comment as it is useless and about to be removed
+ + * @name:     name of the device
+ + * @dev_id:   cookie to identify the device
+ + * @next:     pointer to the next irqaction for shared interrupts
+ + * @irq:      interrupt number
+ + * @dir:      pointer to the proc/irq/NN/name entry
+ + */
   struct irqaction {
         irq_handler_t handler;
         unsigned long flags;
@@@ -117,15 -106,6 +117,15 @@@ extern void disable_irq_nosync(unsigne
   extern void disable_irq(unsigned int irq);
   extern void enable_irq(unsigned int irq);
   
+ +/* The following three functions are for the core kernel use only. */
+ +extern void suspend_device_irqs(void);
+ +extern void resume_device_irqs(void);
+ +#ifdef CONFIG_PM_SLEEP
+ +extern int check_wakeup_irqs(void);
+ +#else
+ +static inline int check_wakeup_irqs(void) { return 0; }
+ +#endif
+ +
   #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
   
   extern cpumask_var_t irq_default_affinity;
@@@ -278,6 -258,11 +278,11 @@@ enu
         NR_SOFTIRQS
   };
   
+ /* map softirq index to softirq name. update 'softirq_to_name' in
+  * kernel/softirq.c when adding a new softirq.
+  */
+ extern char *softirq_to_name[NR_SOFTIRQS];
+ 
   /* softirq mask and active fields moved to irq_cpustat_t in
    * asm/hardirq.h to get better cache usage.  KAO
    */
@@@ -482,12 -467,6 +487,12 @@@ static inline void init_irq_proc(void
   }
   #endif
   
+ +#if defined(CONFIG_GENERIC_HARDIRQS) && defined(CONFIG_DEBUG_SHIRQ)
+ +extern void debug_poll_all_shared_irqs(void);
+ +#else
+ +static inline void debug_poll_all_shared_irqs(void) { }
+ +#endif
+ +
   int show_interrupts(struct seq_file *p, void *v);
   
   struct irq_desc;
diff --combined include/linux/kernel.h

index e720b0da77517639a473118f87a996114fb668fa,1daca3b062bb27495e6d5ba4df588e9cd4d17a59..e81f2637fdef622e654a780a21487809a96d7de8
--- 1/include/linux/kernel.h
--- 2/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@@ -16,7 -16,7 +16,7 @@@
   #include <linux/log2.h>
   #include <linux/typecheck.h>
   #include <linux/ratelimit.h>
- -#include <linux/dynamic_printk.h>
+ +#include <linux/dynamic_debug.h>
   #include <asm/byteorder.h>
   #include <asm/bug.h>
   
@@@ -242,6 -242,19 +242,19 @@@ extern struct ratelimit_state printk_ra
   extern int printk_ratelimit(void);
   extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
                                    unsigned int interval_msec);
+ 
+ /*
+  * Print a one-time message (analogous to WARN_ONCE() et al):
+  */
+ #define printk_once(x...) ({                  \
+       static int __print_once = 1;            \
+                                               \
+       if (__print_once) {                     \
+               __print_once = 0;               \
+               printk(x);                      \
+       }                                       \
+ })
+ 
   #else
   static inline int vprintk(const char *s, va_list args)
         __attribute__ ((format (printf, 1, 0)));
@@@ -253,6 -266,10 +266,10 @@@ static inline int printk_ratelimit(void
   static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, \
                                           unsigned int interval_msec)   \
                 { return false; }
+ 
+ /* No effect, but we still get type checking even in the !PRINTK case: */
+ #define printk_once(x...) printk(x)
+ 
   #endif
   
   extern int printk_needs_cpu(int cpu);
@@@ -353,23 -370,153 +370,156 @@@ static inline char *pack_hex_byte(char 
           printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
   #define pr_info(fmt, ...) \
           printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
+ +#define pr_cont(fmt, ...) \
+ +      printk(KERN_CONT fmt, ##__VA_ARGS__)
   
   /* If you are writing a driver, please use dev_dbg instead */
   #if defined(DEBUG)
   #define pr_debug(fmt, ...) \
         printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
- -#elif defined(CONFIG_DYNAMIC_PRINTK_DEBUG)
+ +#elif defined(CONFIG_DYNAMIC_DEBUG)
+ +/* dynamic_pr_debug() uses pr_fmt() internally so we don't need it here */
   #define pr_debug(fmt, ...) do { \
- -      dynamic_pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \
+ +      dynamic_pr_debug(fmt, ##__VA_ARGS__); \
         } while (0)
   #else
   #define pr_debug(fmt, ...) \
         ({ if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); 0; })
   #endif
   
+ /*
+  * General tracing related utility functions - trace_printk(),
+  * tracing_on/tracing_off and tracing_start()/tracing_stop
+  *
+  * Use tracing_on/tracing_off when you want to quickly turn on or off
+  * tracing. It simply enables or disables the recording of the trace events.
+  * This also corresponds to the user space debugfs/tracing/tracing_on
+  * file, which gives a means for the kernel and userspace to interact.
+  * Place a tracing_off() in the kernel where you want tracing to end.
+  * From user space, examine the trace, and then echo 1 > tracing_on
+  * to continue tracing.
+  *
+  * tracing_stop/tracing_start has slightly more overhead. It is used
+  * by things like suspend to ram where disabling the recording of the
+  * trace is not enough, but tracing must actually stop because things
+  * like calling smp_processor_id() may crash the system.
+  *
+  * Most likely, you want to use tracing_on/tracing_off.
+  */
+ #ifdef CONFIG_RING_BUFFER
+ void tracing_on(void);
+ void tracing_off(void);
+ /* trace_off_permanent stops recording with no way to bring it back */
+ void tracing_off_permanent(void);
+ int tracing_is_on(void);
+ #else
+ static inline void tracing_on(void) { }
+ static inline void tracing_off(void) { }
+ static inline void tracing_off_permanent(void) { }
+ static inline int tracing_is_on(void) { return 0; }
+ #endif
+ #ifdef CONFIG_TRACING
+ extern void tracing_start(void);
+ extern void tracing_stop(void);
+ extern void ftrace_off_permanent(void);
+ 
+ extern void
+ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
+ 
+ static inline void __attribute__ ((format (printf, 1, 2)))
+ ____trace_printk_check_format(const char *fmt, ...)
+ {
+ }
+ #define __trace_printk_check_format(fmt, args...)                     \
+ do {                                                                  \
+       if (0)                                                          \
+               ____trace_printk_check_format(fmt, ##args);             \
+ } while (0)
+ 
+ /**
+  * trace_printk - printf formatting in the ftrace buffer
+  * @fmt: the printf format for printing
+  *
+  * Note: __trace_printk is an internal function for trace_printk and
+  *       the @ip is passed in via the trace_printk macro.
+  *
+  * This function allows a kernel developer to debug fast path sections
+  * that printk is not appropriate for. By scattering in various
+  * printk like tracing in the code, a developer can quickly see
+  * where problems are occurring.
+  *
+  * This is intended as a debugging tool for the developer only.
+  * Please refrain from leaving trace_printks scattered around in
+  * your code.
+  */
+ 
+ #define trace_printk(fmt, args...)                                    \
+ do {                                                                  \
+       __trace_printk_check_format(fmt, ##args);                       \
+       if (__builtin_constant_p(fmt)) {                                \
+               static const char *trace_printk_fmt                     \
+                 __attribute__((section("__trace_printk_fmt"))) =      \
+                       __builtin_constant_p(fmt) ? fmt : NULL;         \
+                                                                       \
+               __trace_bprintk(_THIS_IP_, trace_printk_fmt, ##args);   \
+       } else                                                          \
+               __trace_printk(_THIS_IP_, fmt, ##args);         \
+ } while (0)
+ 
+ extern int
+ __trace_bprintk(unsigned long ip, const char *fmt, ...)
+       __attribute__ ((format (printf, 2, 3)));
+ 
+ extern int
+ __trace_printk(unsigned long ip, const char *fmt, ...)
+       __attribute__ ((format (printf, 2, 3)));
+ 
+ /*
+  * The double __builtin_constant_p is because gcc will give us an error
+  * if we try to allocate the static variable to fmt if it is not a
+  * constant. Even with the outer if statement.
+  */
+ #define ftrace_vprintk(fmt, vargs)                                    \
+ do {                                                                  \
+       if (__builtin_constant_p(fmt)) {                                \
+               static const char *trace_printk_fmt                     \
+                 __attribute__((section("__trace_printk_fmt"))) =      \
+                       __builtin_constant_p(fmt) ? fmt : NULL;         \
+                                                                       \
+               __ftrace_vbprintk(_THIS_IP_, trace_printk_fmt, vargs);  \
+       } else                                                          \
+               __ftrace_vprintk(_THIS_IP_, fmt, vargs);                \
+ } while (0)
+ 
+ extern int
+ __ftrace_vbprintk(unsigned long ip, const char *fmt, va_list ap);
+ 
+ extern int
+ __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap);
+ 
+ extern void ftrace_dump(void);
+ #else
+ static inline void
+ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
+ static inline int
+ trace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2)));
+ 
+ static inline void tracing_start(void) { }
+ static inline void tracing_stop(void) { }
+ static inline void ftrace_off_permanent(void) { }
+ static inline int
+ trace_printk(const char *fmt, ...)
+ {
+       return 0;
+ }
+ static inline int
+ ftrace_vprintk(const char *fmt, va_list ap)
+ {
+       return 0;
+ }
+ static inline void ftrace_dump(void) { }
+ #endif /* CONFIG_TRACING */
+ 
   /*
    *      Display an IP address in readable format.
    */
@@@ -381,6 -528,18 +531,6 @@@
         ((unsigned char *)&addr)[3]
   #define NIPQUAD_FMT "%u.%u.%u.%u"
   
- -#if defined(__LITTLE_ENDIAN)
- -#define HIPQUAD(addr) \
- -      ((unsigned char *)&addr)[3], \
- -      ((unsigned char *)&addr)[2], \
- -      ((unsigned char *)&addr)[1], \
- -      ((unsigned char *)&addr)[0]
- -#elif defined(__BIG_ENDIAN)
- -#define HIPQUAD       NIPQUAD
- -#else
- -#error "Please fix asm/byteorder.h"
- -#endif /* __LITTLE_ENDIAN */
- -
   /*
    * min()/max()/clamp() macros that also do
    * strict type-checking.. See the
diff --combined include/linux/sched.h

index 481fad3a9b4251eda862e93633368792e3ee1707,471e36d3012352ecf5e01d2bd237be1467b01c66..5a50fdef5be5a208af6e66b5aacca1e3871287b1
--- 1/include/linux/sched.h
--- 2/include/linux/sched.h
+++ b/include/linux/sched.h
@@@ -137,6 -137,8 +137,8 @@@ extern unsigned long nr_uninterruptible
   extern unsigned long nr_active(void);
   extern unsigned long nr_iowait(void);
   
+ extern unsigned long get_parent_ip(unsigned long addr);
+ 
   struct seq_file;
   struct cfs_rq;
   struct task_group;
@@@ -391,15 -393,8 +393,15 @@@ extern void arch_unmap_area_topdown(str
                 (mm)->hiwater_vm = (mm)->total_vm;      \
   } while (0)
   
- -#define get_mm_hiwater_rss(mm)        max((mm)->hiwater_rss, get_mm_rss(mm))
- -#define get_mm_hiwater_vm(mm) max((mm)->hiwater_vm, (mm)->total_vm)
+ +static inline unsigned long get_mm_hiwater_rss(struct mm_struct *mm)
+ +{
+ +      return max(mm->hiwater_rss, get_mm_rss(mm));
+ +}
+ +
+ +static inline unsigned long get_mm_hiwater_vm(struct mm_struct *mm)
+ +{
+ +      return max(mm->hiwater_vm, mm->total_vm);
+ +}
   
   extern void set_dumpable(struct mm_struct *mm, int value);
   extern int get_dumpable(struct mm_struct *mm);
@@@ -1007,7 -1002,6 +1009,7 @@@ struct sched_class 
                               struct rq *busiest, struct sched_domain *sd,
                               enum cpu_idle_type idle);
         void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
+ +      int (*needs_post_schedule) (struct rq *this_rq);
         void (*post_schedule) (struct rq *this_rq);
         void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
   
@@@ -1062,10 -1056,6 +1064,10 @@@ struct sched_entity 
         u64                     last_wakeup;
         u64                     avg_overlap;
   
+ +      u64                     start_runtime;
+ +      u64                     avg_wakeup;
+ +      u64                     nr_migrations;
+ +
   #ifdef CONFIG_SCHEDSTATS
         u64                     wait_start;
         u64                     wait_max;
@@@ -1081,6 -1071,7 +1083,6 @@@
         u64                     exec_max;
         u64                     slice_max;
   
- -      u64                     nr_migrations;
         u64                     nr_migrations_cold;
         u64                     nr_failed_migrations_affine;
         u64                     nr_failed_migrations_running;
@@@ -1177,7 -1168,6 +1179,7 @@@ struct task_struct 
   #endif
   
         struct list_head tasks;
+ +      struct plist_node pushable_tasks;
   
         struct mm_struct *mm, *active_mm;
   
@@@ -1189,8 -1179,6 +1191,8 @@@
         /* ??? */
         unsigned int personality;
         unsigned did_exec:1;
+ +      unsigned in_execve:1;   /* Tell the LSMs that the process is doing an
+ +                               * execve */
         pid_t pid;
         pid_t tgid;
   
@@@ -1421,6 -1409,8 +1423,8 @@@
         int curr_ret_stack;
         /* Stack of return addresses for return function tracing */
         struct ftrace_ret_stack *ret_stack;
+       /* time stamp for last schedule */
+       unsigned long long ftrace_timestamp;
         /*
          * Number of functions that haven't been traced
          * because of depth overrun.
diff --combined include/linux/slub_def.h

index e37b6aa8a9fba3272deeb5cd4fc3f139bbb50579,9e3a575b2c30272ced3b97c115fb2b87925db392..a1f90528e70bf5e3e637b95549b4e08e62cbbec2
--- 1/include/linux/slub_def.h
--- 2/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@@ -10,6 -10,7 +10,7 @@@
   #include <linux/gfp.h>
   #include <linux/workqueue.h>
   #include <linux/kobject.h>
+ #include <trace/kmemtrace.h>
   
   enum stat_item {
         ALLOC_FASTPATH,         /* Allocation from cpu slab */
@@@ -46,6 -47,7 +47,6 @@@ struct kmem_cache_cpu 
   struct kmem_cache_node {
         spinlock_t list_lock;   /* Protect partial list and nr_partial */
         unsigned long nr_partial;
- -      unsigned long min_partial;
         struct list_head partial;
   #ifdef CONFIG_SLUB_DEBUG
         atomic_long_t nr_slabs;
@@@ -88,7 -90,6 +89,7 @@@ struct kmem_cache 
         void (*ctor)(void *);
         int inuse;              /* Offset to metadata */
         int align;              /* Alignment */
+ +      unsigned long min_partial;
         const char *name;       /* Name (only for display!) */
         struct list_head list;  /* List of slab caches */
   #ifdef CONFIG_SLUB_DEBUG
@@@ -129,9 -130,9 +130,9 @@@
    * This should be dropped to PAGE_SIZE / 2 once the page allocator
    * "fastpath" becomes competitive with the slab allocator fastpaths.
    */
- -#define SLUB_MAX_SIZE (PAGE_SIZE)
+ +#define SLUB_MAX_SIZE (2 * PAGE_SIZE)
   
- -#define SLUB_PAGE_SHIFT (PAGE_SHIFT + 1)
+ +#define SLUB_PAGE_SHIFT (PAGE_SHIFT + 2)
   
   /*
    * We keep the general caches in an array of slab caches that are used for
@@@ -217,13 -218,31 +218,31 @@@ static __always_inline struct kmem_cach
   void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
   void *__kmalloc(size_t size, gfp_t flags);
   
+ #ifdef CONFIG_KMEMTRACE
+ extern void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags);
+ #else
+ static __always_inline void *
+ kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags)
+ {
+       return kmem_cache_alloc(s, gfpflags);
+ }
+ #endif
+ 
   static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
   {
-       return (void *)__get_free_pages(flags | __GFP_COMP, get_order(size));
+       unsigned int order = get_order(size);
+       void *ret = (void *) __get_free_pages(flags | __GFP_COMP, order);
+ 
+       kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_, ret,
+                            size, PAGE_SIZE << order, flags);
+ 
+       return ret;
   }
   
   static __always_inline void *kmalloc(size_t size, gfp_t flags)
   {
+       void *ret;
+ 
         if (__builtin_constant_p(size)) {
                 if (size > SLUB_MAX_SIZE)
                         return kmalloc_large(size, flags);
@@@ -234,7 -253,13 +253,13 @@@
                         if (!s)
                                 return ZERO_SIZE_PTR;
   
-                       return kmem_cache_alloc(s, flags);
+                       ret = kmem_cache_alloc_notrace(s, flags);
+ 
+                       kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC,
+                                            _THIS_IP_, ret,
+                                            size, s->size, flags);
+ 
+                       return ret;
                 }
         }
         return __kmalloc(size, flags);
@@@ -244,8 -269,24 +269,24 @@@
   void *__kmalloc_node(size_t size, gfp_t flags, int node);
   void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
   
+ #ifdef CONFIG_KMEMTRACE
+ extern void *kmem_cache_alloc_node_notrace(struct kmem_cache *s,
+                                          gfp_t gfpflags,
+                                          int node);
+ #else
+ static __always_inline void *
+ kmem_cache_alloc_node_notrace(struct kmem_cache *s,
+                             gfp_t gfpflags,
+                             int node)
+ {
+       return kmem_cache_alloc_node(s, gfpflags, node);
+ }
+ #endif
+ 
   static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
   {
+       void *ret;
+ 
         if (__builtin_constant_p(size) &&
                 size <= SLUB_MAX_SIZE && !(flags & SLUB_DMA)) {
                         struct kmem_cache *s = kmalloc_slab(size);
@@@ -253,7 -294,13 +294,13 @@@
                 if (!s)
                         return ZERO_SIZE_PTR;
   
-               return kmem_cache_alloc_node(s, flags, node);
+               ret = kmem_cache_alloc_node_notrace(s, flags, node);
+ 
+               kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
+                                         _THIS_IP_, ret,
+                                         size, s->size, flags, node);
+ 
+               return ret;
         }
         return __kmalloc_node(size, flags, node);
   }
diff --combined include/linux/string.h

index 8852739f36dfe94529ad3bdb09555741b0829b9e,27ac31784ad27196f5530f8f409724c1d5a75bee..3c877d686375022f9b69773465e8b80eb92ac864
--- 1/include/linux/string.h
--- 2/include/linux/string.h
+++ b/include/linux/string.h
@@@ -10,9 -10,9 +10,10 @@@
   #include <linux/compiler.h>   /* for inline */
   #include <linux/types.h>      /* for size_t */
   #include <linux/stddef.h>     /* for NULL */
+ #include <stdarg.h>
   
   extern char *strndup_user(const char __user *, long);
+ +extern void *memdup_user(const void __user *, size_t);
   
   /*
    * Include machine specific inline routines
@@@ -112,6 -112,12 +113,12 @@@ extern void argv_free(char **argv)
   
   extern bool sysfs_streq(const char *s1, const char *s2);
   
+ #ifdef CONFIG_BINARY_PRINTF
+ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args);
+ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf);
+ int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...) __printf(3, 4);
+ #endif
+ 
   extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
                         const void *from, size_t available);
   
diff --combined init/Kconfig

index 14c483d2b7c90b4a5713b6ed116fa16260f1228d,69d5190918e566467ec9dcfc43c07e71f4d113b6..9d8cf2d2f8402b626de0cece40a1c11dc36fa1e2
--- 1/init/Kconfig
--- 2/init/Kconfig
+++ b/init/Kconfig
@@@ -1005,7 -1005,7 +1005,7 @@@ config TRACEPOINT
   
   config MARKERS
         bool "Activate markers"
-       depends on TRACEPOINTS
+       select TRACEPOINTS
         help
           Place an empty function call at each marker site. Can be
           dynamically changed for a probe function.
@@@ -1026,6 -1026,7 +1026,6 @@@ config SLABINF
   
   config RT_MUTEXES
         boolean
- -      select PLIST
   
   config BASE_SMALL
         int
diff --combined init/main.c

index 07c8658ffca54d1cb6da86a12b9f7512575590c0,b0097d2b63ae8411881a9ee7f7d9c46dd2c597ae..3585f073d636453a0c98d5efdf44f0a26568dd02
--- 1/init/main.c
--- 2/init/main.c
+++ b/init/main.c
@@@ -71,6 -71,7 +71,7 @@@
   #include <asm/setup.h>
   #include <asm/sections.h>
   #include <asm/cacheflush.h>
+ #include <trace/kmemtrace.h>
   
   #ifdef CONFIG_X86_LOCAL_APIC
   #include <asm/smp.h>
@@@ -407,7 -408,8 +408,7 @@@ static void __init smp_init(void
          * Set up the current CPU as possible to migrate to.
          * The other ones will be done by cpu_up/cpu_down()
          */
- -      cpu = smp_processor_id();
- -      cpu_set(cpu, cpu_active_map);
+ +      set_cpu_active(smp_processor_id(), true);
   
         /* FIXME: This should be done in userspace --RR */
         for_each_present_cpu(cpu) {
@@@ -648,6 -650,7 +649,7 @@@ asmlinkage void __init start_kernel(voi
         enable_debug_pagealloc();
         cpu_hotplug_init();
         kmem_cache_init();
+       kmemtrace_init();
         debug_objects_mem_init();
         idr_init_cache();
         setup_per_cpu_pageset();
@@@ -769,6 -772,7 +771,7 @@@ static void __init do_basic_setup(void
   {
         rcu_init_sched(); /* needed by module_init stage. */
         init_workqueues();
+       cpuset_init_smp();
         usermodehelper_init();
         driver_init();
         init_irq_proc();
@@@ -793,7 -797,6 +796,7 @@@ static void run_init_process(char *init
    * makes it inline to init() and it becomes part of init.text section
    */
   static noinline int init_post(void)
+ +      __releases(kernel_lock)
   {
         /* need to finish all async __init code before freeing the memory */
         async_synchronize_full();
@@@ -842,7 -845,7 +845,7 @@@ static int __init kernel_init(void * un
         /*
          * init can run on any cpu.
          */
- -      set_cpus_allowed_ptr(current, CPU_MASK_ALL_PTR);
+ +      set_cpus_allowed_ptr(current, cpu_all_mask);
         /*
          * Tell the world that we're going to be the grim
          * reaper of innocent orphaned children.
@@@ -863,8 -866,6 +866,6 @@@
         smp_init();
         sched_init_smp();
   
-       cpuset_init_smp();
- 
         do_basic_setup();
   
         /*
diff --combined kernel/irq/handle.c

index 9ebf77968871550a365713d7a29a6f131f231ee5,412370ab9a34cdc8ed5ec0fac33a75b3bbeb7d0d..343acecae629ff37c32013daede12da10b21f7fb
--- 1/kernel/irq/handle.c
--- 2/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@@ -17,6 -17,7 +17,7 @@@
   #include <linux/kernel_stat.h>
   #include <linux/rculist.h>
   #include <linux/hash.h>
+ #include <trace/irq.h>
   #include <linux/bootmem.h>
   
   #include "internals.h"
@@@ -82,21 -83,19 +83,21 @@@ static struct irq_desc irq_desc_init = 
   
   void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr)
   {
- -      unsigned long bytes;
- -      char *ptr;
         int node;
- -
- -      /* Compute how many bytes we need per irq and allocate them */
- -      bytes = nr * sizeof(unsigned int);
+ +      void *ptr;
   
         node = cpu_to_node(cpu);
- -      ptr = kzalloc_node(bytes, GFP_ATOMIC, node);
- -      printk(KERN_DEBUG "  alloc kstat_irqs on cpu %d node %d\n", cpu, node);
+ +      ptr = kzalloc_node(nr * sizeof(*desc->kstat_irqs), GFP_ATOMIC, node);
   
- -      if (ptr)
- -              desc->kstat_irqs = (unsigned int *)ptr;
+ +      /*
+ +       * don't overwite if can not get new one
+ +       * init_copy_kstat_irqs() could still use old one
+ +       */
+ +      if (ptr) {
+ +              printk(KERN_DEBUG "  alloc kstat_irqs on cpu %d node %d\n",
+ +                       cpu, node);
+ +              desc->kstat_irqs = ptr;
+ +      }
   }
   
   static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu)
@@@ -239,7 -238,6 +240,7 @@@ struct irq_desc irq_desc[NR_IRQS] __cac
         }
   };
   
+ +static unsigned int kstat_irqs_all[NR_IRQS][NR_CPUS];
   int __init early_irq_init(void)
   {
         struct irq_desc *desc;
@@@ -256,7 -254,6 +257,7 @@@
         for (i = 0; i < count; i++) {
                 desc[i].irq = i;
                 init_alloc_desc_masks(&desc[i], 0, true);
+ +              desc[i].kstat_irqs = kstat_irqs_all[i];
         }
         return arch_early_irq_init();
   }
@@@ -272,11 -269,6 +273,11 @@@ struct irq_desc *irq_to_desc_alloc_cpu(
   }
   #endif /* !CONFIG_SPARSE_IRQ */
   
+ +void clear_kstat_irqs(struct irq_desc *desc)
+ +{
+ +      memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs)));
+ +}
+ +
   /*
    * What should we do if we get a hw irq event on an illegal vector?
    * Each architecture has to answer this themself.
@@@ -338,6 -330,9 +339,9 @@@ irqreturn_t no_action(int cpl, void *de
         return IRQ_NONE;
   }
   
+ DEFINE_TRACE(irq_handler_entry);
+ DEFINE_TRACE(irq_handler_exit);
+ 
   /**
    * handle_IRQ_event - irq action chain handler
    * @irq:      the interrupt number
@@@ -350,13 -345,13 +354,15 @@@ irqreturn_t handle_IRQ_event(unsigned i
         irqreturn_t ret, retval = IRQ_NONE;
         unsigned int status = 0;
   
+ +      WARN_ONCE(!in_irq(), "BUG: IRQ handler called from non-hardirq context!");
+ +
         if (!(action->flags & IRQF_DISABLED))
                 local_irq_enable_in_hardirq();
   
         do {
+               trace_irq_handler_entry(irq, action);
                 ret = action->handler(irq, action->dev_id);
+               trace_irq_handler_exit(irq, action, ret);
                 if (ret == IRQ_HANDLED)
                         status |= action->flags;
                 retval |= ret;
@@@ -371,11 -366,6 +377,11 @@@
   }
   
   #ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
+ +
+ +#ifdef CONFIG_ENABLE_WARN_DEPRECATED
+ +# warning __do_IRQ is deprecated. Please convert to proper flow handlers
+ +#endif
+ +
   /**
    * __do_IRQ - original all in one highlevel IRQ handler
    * @irq:      the interrupt number
@@@ -496,10 -486,12 +502,10 @@@ void early_init_irq_lock_class(void
         }
   }
   
- -#ifdef CONFIG_SPARSE_IRQ
   unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
   {
         struct irq_desc *desc = irq_to_desc(irq);
         return desc ? desc->kstat_irqs[cpu] : 0;
   }
- -#endif
   EXPORT_SYMBOL(kstat_irqs_cpu);
   
diff --combined kernel/lockdep.c

index 3673a3f44d9d445f7cdf42f2ee5592731a4c0db7,71b567f5281340c7d2ea77b6097a86e41b583f1d..81b5f33970b8b8c64e07414679a6ab5822fa44fe
--- 1/kernel/lockdep.c
--- 2/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@@ -42,6 -42,7 +42,7 @@@
   #include <linux/hash.h>
   #include <linux/ftrace.h>
   #include <linux/stringify.h>
+ #include <trace/lockdep.h>
   
   #include <asm/sections.h>
   
@@@ -433,13 -434,6 +434,6 @@@ atomic_t nr_find_usage_forwards_checks
   atomic_t nr_find_usage_forwards_recursions;
   atomic_t nr_find_usage_backwards_checks;
   atomic_t nr_find_usage_backwards_recursions;
- # define debug_atomic_inc(ptr)                atomic_inc(ptr)
- # define debug_atomic_dec(ptr)                atomic_dec(ptr)
- # define debug_atomic_read(ptr)               atomic_read(ptr)
- #else
- # define debug_atomic_inc(ptr)                do { } while (0)
- # define debug_atomic_dec(ptr)                do { } while (0)
- # define debug_atomic_read(ptr)               0
   #endif
   
   /*
@@@ -1900,9 -1894,9 +1894,9 @@@ print_irq_inversion_bug(struct task_str
                 curr->comm, task_pid_nr(curr));
         print_lock(this);
         if (forwards)
-               printk("but this lock took another, %s-irq-unsafe lock in the past:\n", irqclass);
+               printk("but this lock took another, %s-unsafe lock in the past:\n", irqclass);
         else
-               printk("but this lock was taken by another, %s-irq-safe lock in the past:\n", irqclass);
+               printk("but this lock was taken by another, %s-safe lock in the past:\n", irqclass);
         print_lock_name(other);
         printk("\n\nand interrupts could create inverse lock ordering between them.\n\n");
   
@@@ -2015,7 -2009,8 +2009,8 @@@ typedef int (*check_usage_f)(struct tas
                              enum lock_usage_bit bit, const char *name);
   
   static int
- mark_lock_irq(struct task_struct *curr, struct held_lock *this, int new_bit)
+ mark_lock_irq(struct task_struct *curr, struct held_lock *this,
+               enum lock_usage_bit new_bit)
   {
         int excl_bit = exclusive_bit(new_bit);
         int read = new_bit & 1;
@@@ -2043,7 -2038,7 +2038,7 @@@
          * states.
          */
         if ((!read || !dir || STRICT_READ_CHECKS) &&
-                       !usage(curr, this, excl_bit, state_name(new_bit)))
+                       !usage(curr, this, excl_bit, state_name(new_bit & ~1)))
                 return 0;
   
         /*
@@@ -2260,7 -2255,7 +2255,7 @@@ void trace_softirqs_off(unsigned long i
                 debug_atomic_inc(&redundant_softirqs_off);
   }
   
- -void lockdep_trace_alloc(gfp_t gfp_mask)
+ +static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags)
   {
         struct task_struct *curr = current;
   
@@@ -2279,29 -2274,12 +2274,29 @@@
         if (!(gfp_mask & __GFP_FS))
                 return;
   
- -      if (DEBUG_LOCKS_WARN_ON(irqs_disabled()))
+ +      if (DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags)))
                 return;
   
         mark_held_locks(curr, RECLAIM_FS);
   }
   
+ +static void check_flags(unsigned long flags);
+ +
+ +void lockdep_trace_alloc(gfp_t gfp_mask)
+ +{
+ +      unsigned long flags;
+ +
+ +      if (unlikely(current->lockdep_recursion))
+ +              return;
+ +
+ +      raw_local_irq_save(flags);
+ +      check_flags(flags);
+ +      current->lockdep_recursion = 1;
+ +      __lockdep_trace_alloc(gfp_mask, flags);
+ +      current->lockdep_recursion = 0;
+ +      raw_local_irq_restore(flags);
+ +}
+ +
   static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock)
   {
         /*
@@@ -2929,6 -2907,8 +2924,8 @@@ void lock_set_class(struct lockdep_map 
   }
   EXPORT_SYMBOL_GPL(lock_set_class);
   
+ DEFINE_TRACE(lock_acquire);
+ 
   /*
    * We are not always called with irqs disabled - do that here,
    * and also avoid lockdep recursion:
@@@ -2939,6 -2919,8 +2936,8 @@@ void lock_acquire(struct lockdep_map *l
   {
         unsigned long flags;
   
+       trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip);
+ 
         if (unlikely(current->lockdep_recursion))
                 return;
   
@@@ -2953,11 -2935,15 +2952,15 @@@
   }
   EXPORT_SYMBOL_GPL(lock_acquire);
   
+ DEFINE_TRACE(lock_release);
+ 
   void lock_release(struct lockdep_map *lock, int nested,
                           unsigned long ip)
   {
         unsigned long flags;
   
+       trace_lock_release(lock, nested, ip);
+ 
         if (unlikely(current->lockdep_recursion))
                 return;
   
@@@ -3106,10 -3092,14 +3109,14 @@@ found_it
         lock->ip = ip;
   }
   
+ DEFINE_TRACE(lock_contended);
+ 
   void lock_contended(struct lockdep_map *lock, unsigned long ip)
   {
         unsigned long flags;
   
+       trace_lock_contended(lock, ip);
+ 
         if (unlikely(!lock_stat))
                 return;
   
@@@ -3125,10 -3115,14 +3132,14 @@@
   }
   EXPORT_SYMBOL_GPL(lock_contended);
   
+ DEFINE_TRACE(lock_acquired);
+ 
   void lock_acquired(struct lockdep_map *lock, unsigned long ip)
   {
         unsigned long flags;
   
+       trace_lock_acquired(lock, ip);
+ 
         if (unlikely(!lock_stat))
                 return;
   
diff --combined kernel/module.c

index f77ac320d0b51d021b52ba4c48dba5680e7c4d01,7fa134e0cc24610c25f5d296b32bf5c4253f28a7..41f50605eed03e8ea0538c1ce2db52ee080f62dc
--- 1/kernel/module.c
--- 2/kernel/module.c
+++ b/kernel/module.c
@@@ -856,7 -856,7 +856,7 @@@ SYSCALL_DEFINE2(delete_module, const ch
         mutex_lock(&module_mutex);
         /* Store the name of the last unloaded module for diagnostic purposes */
         strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module));
- -      unregister_dynamic_debug_module(mod->name);
+ +      ddebug_remove_module(mod->name);
         free_module(mod);
   
    out:
@@@ -1861,13 -1861,19 +1861,13 @@@ static inline void add_kallsyms(struct 
   }
   #endif /* CONFIG_KALLSYMS */
   
- -static void dynamic_printk_setup(struct mod_debug *debug, unsigned int num)
+ +static void dynamic_debug_setup(struct _ddebug *debug, unsigned int num)
   {
- -#ifdef CONFIG_DYNAMIC_PRINTK_DEBUG
- -      unsigned int i;
- -
- -      for (i = 0; i < num; i++) {
- -              register_dynamic_debug_module(debug[i].modname,
- -                                            debug[i].type,
- -                                            debug[i].logical_modname,
- -                                            debug[i].flag_names,
- -                                            debug[i].hash, debug[i].hash2);
- -      }
- -#endif /* CONFIG_DYNAMIC_PRINTK_DEBUG */
+ +#ifdef CONFIG_DYNAMIC_DEBUG
+ +      if (ddebug_add_module(debug, num, debug->modname))
+ +              printk(KERN_ERR "dynamic debug error adding module: %s\n",
+ +                                      debug->modname);
+ +#endif
   }
   
   static void *module_alloc_update_bounds(unsigned long size)
@@@ -2241,13 -2247,12 +2241,13 @@@ static noinline struct module *load_mod
         add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
   
         if (!mod->taints) {
- -              struct mod_debug *debug;
+ +              struct _ddebug *debug;
                 unsigned int num_debug;
   
                 debug = section_objs(hdr, sechdrs, secstrings, "__verbose",
                                      sizeof(*debug), &num_debug);
- -              dynamic_printk_setup(debug, num_debug);
+ +              if (debug)
+ +                      dynamic_debug_setup(debug, num_debug);
         }
   
         /* sechdrs[0].sh_size is always zero */
@@@ -2766,7 -2771,7 +2766,7 @@@ int is_module_address(unsigned long add
   
   
   /* Is this a valid kernel address? */
- __notrace_funcgraph struct module *__module_text_address(unsigned long addr)
+ struct module *__module_text_address(unsigned long addr)
   {
         struct module *mod;
   
diff --combined kernel/relay.c

index 8f2179c8056ff9f9de0b2679588d86589393b508,edc0ba6d81607e225ff8083397dc04eeeffdd871..824b91ac10f1f94f3990d5db6f7437d161f0f2a4
--- 1/kernel/relay.c
--- 2/kernel/relay.c
+++ b/kernel/relay.c
@@@ -677,9 -677,7 +677,7 @@@ int relay_late_setup_files(struct rcha
          */
         for_each_online_cpu(i) {
                 if (unlikely(!chan->buf[i])) {
-                       printk(KERN_ERR "relay_late_setup_files: CPU %u "
-                                       "has no buffer, it must have!\n", i);
-                       BUG();
+                       WARN_ONCE(1, KERN_ERR "CPU has no buffer!\n");
                         err = -EINVAL;
                         break;
                 }
@@@ -750,7 -748,7 +748,7 @@@ size_t relay_switch_subbuf(struct rchan
                          * from the scheduler (trying to re-grab
                          * rq->lock), so defer it.
                          */
- -                      __mod_timer(&buf->timer, jiffies + 1);
+ +                      mod_timer(&buf->timer, jiffies + 1);
         }
   
         old = buf->data;
diff --combined kernel/sched.c

index 73513f4e19df1b9683d5929c926c63fb8663e3e1,7299083e69e7a727cb64365d534477d1a5acd6bc..f01cb63d135622cd969cb3e365aad460f662741e
--- 1/kernel/sched.c
--- 2/kernel/sched.c
+++ b/kernel/sched.c
@@@ -331,13 -331,6 +331,13 @@@ static DEFINE_PER_CPU(struct rt_rq, ini
    */
   static DEFINE_SPINLOCK(task_group_lock);
   
+ +#ifdef CONFIG_SMP
+ +static int root_task_group_empty(void)
+ +{
+ +      return list_empty(&root_task_group.children);
+ +}
+ +#endif
+ +
   #ifdef CONFIG_FAIR_GROUP_SCHED
   #ifdef CONFIG_USER_SCHED
   # define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD)
@@@ -398,13 -391,6 +398,13 @@@ static inline void set_task_rq(struct t
   
   #else
   
+ +#ifdef CONFIG_SMP
+ +static int root_task_group_empty(void)
+ +{
+ +      return 1;
+ +}
+ +#endif
+ +
   static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
   static inline struct task_group *task_group(struct task_struct *p)
   {
@@@ -481,17 -467,11 +481,17 @@@ struct rt_rq 
         struct rt_prio_array active;
         unsigned long rt_nr_running;
   #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
- -      int highest_prio; /* highest queued rt task prio */
+ +      struct {
+ +              int curr; /* highest queued rt task prio */
+ +#ifdef CONFIG_SMP
+ +              int next; /* next highest */
+ +#endif
+ +      } highest_prio;
   #endif
   #ifdef CONFIG_SMP
         unsigned long rt_nr_migratory;
         int overloaded;
+ +      struct plist_head pushable_tasks;
   #endif
         int rt_throttled;
         u64 rt_time;
@@@ -569,6 -549,7 +569,6 @@@ struct rq 
         unsigned long nr_running;
         #define CPU_LOAD_IDX_MAX 5
         unsigned long cpu_load[CPU_LOAD_IDX_MAX];
- -      unsigned char idle_at_tick;
   #ifdef CONFIG_NO_HZ
         unsigned long last_tick_seen;
         unsigned char in_nohz_recently;
@@@ -609,7 -590,6 +609,7 @@@
         struct root_domain *rd;
         struct sched_domain *sd;
   
+ +      unsigned char idle_at_tick;
         /* For active balancing */
         int active_balance;
         int push_cpu;
@@@ -638,6 -618,9 +638,6 @@@
         /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
   
         /* sys_sched_yield() stats */
- -      unsigned int yld_exp_empty;
- -      unsigned int yld_act_empty;
- -      unsigned int yld_both_empty;
         unsigned int yld_count;
   
         /* schedule() stats */
@@@ -1200,10 -1183,10 +1200,10 @@@ static void resched_task(struct task_st
   
         assert_spin_locked(&task_rq(p)->lock);
   
- -      if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED)))
+ +      if (test_tsk_need_resched(p))
                 return;
   
- -      set_tsk_thread_flag(p, TIF_NEED_RESCHED);
+ +      set_tsk_need_resched(p);
   
         cpu = task_cpu(p);
         if (cpu == smp_processor_id())
@@@ -1259,7 -1242,7 +1259,7 @@@ void wake_up_idle_cpu(int cpu
          * lockless. The worst case is that the other CPU runs the
          * idle task through an additional NOOP schedule()
          */
- -      set_tsk_thread_flag(rq->idle, TIF_NEED_RESCHED);
+ +      set_tsk_need_resched(rq->idle);
   
         /* NEED_RESCHED must be visible before we test polling */
         smp_mb();
@@@ -1627,42 -1610,21 +1627,42 @@@ static inline void update_shares_locked
   
   #endif
   
+ +#ifdef CONFIG_PREEMPT
+ +
   /*
- - * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
+ + * fair double_lock_balance: Safely acquires both rq->locks in a fair
+ + * way at the expense of forcing extra atomic operations in all
+ + * invocations.  This assures that the double_lock is acquired using the
+ + * same underlying policy as the spinlock_t on this architecture, which
+ + * reduces latency compared to the unfair variant below.  However, it
+ + * also adds more overhead and therefore may reduce throughput.
    */
- -static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
+ +static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
+ +      __releases(this_rq->lock)
+ +      __acquires(busiest->lock)
+ +      __acquires(this_rq->lock)
+ +{
+ +      spin_unlock(&this_rq->lock);
+ +      double_rq_lock(this_rq, busiest);
+ +
+ +      return 1;
+ +}
+ +
+ +#else
+ +/*
+ + * Unfair double_lock_balance: Optimizes throughput at the expense of
+ + * latency by eliminating extra atomic operations when the locks are
+ + * already in proper order on entry.  This favors lower cpu-ids and will
+ + * grant the double lock to lower cpus over higher ids under contention,
+ + * regardless of entry order into the function.
+ + */
+ +static int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
         __releases(this_rq->lock)
         __acquires(busiest->lock)
         __acquires(this_rq->lock)
   {
         int ret = 0;
   
- -      if (unlikely(!irqs_disabled())) {
- -              /* printk() doesn't work good under rq->lock */
- -              spin_unlock(&this_rq->lock);
- -              BUG_ON(1);
- -      }
         if (unlikely(!spin_trylock(&busiest->lock))) {
                 if (busiest < this_rq) {
                         spin_unlock(&this_rq->lock);
@@@ -1675,22 -1637,6 +1675,22 @@@
         return ret;
   }
   
+ +#endif /* CONFIG_PREEMPT */
+ +
+ +/*
+ + * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
+ + */
+ +static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
+ +{
+ +      if (unlikely(!irqs_disabled())) {
+ +              /* printk() doesn't work good under rq->lock */
+ +              spin_unlock(&this_rq->lock);
+ +              BUG_ON(1);
+ +      }
+ +
+ +      return _double_lock_balance(this_rq, busiest);
+ +}
+ +
   static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
         __releases(busiest->lock)
   {
@@@ -1759,9 -1705,6 +1759,9 @@@ static void update_avg(u64 *avg, u64 sa
   
   static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
   {
+ +      if (wakeup)
+ +              p->se.start_runtime = p->se.sum_exec_runtime;
+ +
         sched_info_queued(p);
         p->sched_class->enqueue_task(rq, p, wakeup);
         p->se.on_rq = 1;
@@@ -1769,15 -1712,10 +1769,15 @@@
   
   static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep)
   {
- -      if (sleep && p->se.last_wakeup) {
- -              update_avg(&p->se.avg_overlap,
- -                         p->se.sum_exec_runtime - p->se.last_wakeup);
- -              p->se.last_wakeup = 0;
+ +      if (sleep) {
+ +              if (p->se.last_wakeup) {
+ +                      update_avg(&p->se.avg_overlap,
+ +                              p->se.sum_exec_runtime - p->se.last_wakeup);
+ +                      p->se.last_wakeup = 0;
+ +              } else {
+ +                      update_avg(&p->se.avg_wakeup,
+ +                              sysctl_sched_wakeup_granularity);
+ +              }
         }
   
         sched_info_dequeued(p);
@@@ -2079,7 -2017,7 +2079,7 @@@ unsigned long wait_task_inactive(struc
                  * it must be off the runqueue _entirely_, and not
                  * preempted!
                  *
- -               * So if it wa still runnable (but just not actively
+ +               * So if it was still runnable (but just not actively
                  * running right now), it's preempted, and we should
                  * yield - it could be a while.
                  */
@@@ -2329,7 -2267,7 +2329,7 @@@ static int try_to_wake_up(struct task_s
                 sync = 0;
   
   #ifdef CONFIG_SMP
- -      if (sched_feat(LB_WAKEUP_UPDATE)) {
+ +      if (sched_feat(LB_WAKEUP_UPDATE) && !root_task_group_empty()) {
                 struct sched_domain *sd;
   
                 this_cpu = raw_smp_processor_id();
@@@ -2407,22 -2345,6 +2407,22 @@@ out_activate
         activate_task(rq, p, 1);
         success = 1;
   
+ +      /*
+ +       * Only attribute actual wakeups done by this task.
+ +       */
+ +      if (!in_interrupt()) {
+ +              struct sched_entity *se = &current->se;
+ +              u64 sample = se->sum_exec_runtime;
+ +
+ +              if (se->last_wakeup)
+ +                      sample -= se->last_wakeup;
+ +              else
+ +                      sample -= se->start_runtime;
+ +              update_avg(&se->avg_wakeup, sample);
+ +
+ +              se->last_wakeup = se->sum_exec_runtime;
+ +      }
+ +
   out_running:
         trace_sched_wakeup(rq, p, success);
         check_preempt_curr(rq, p, sync);
@@@ -2433,6 -2355,8 +2433,6 @@@
                 p->sched_class->task_wake_up(rq, p);
   #endif
   out:
- -      current->se.last_wakeup = current->se.sum_exec_runtime;
- -
         task_rq_unlock(rq, &flags);
   
         return success;
@@@ -2462,8 -2386,6 +2462,8 @@@ static void __sched_fork(struct task_st
         p->se.prev_sum_exec_runtime     = 0;
         p->se.last_wakeup               = 0;
         p->se.avg_overlap               = 0;
+ +      p->se.start_runtime             = 0;
+ +      p->se.avg_wakeup                = sysctl_sched_wakeup_granularity;
   
   #ifdef CONFIG_SCHEDSTATS
         p->se.wait_start                = 0;
@@@ -2526,8 -2448,6 +2526,8 @@@ void sched_fork(struct task_struct *p, 
         /* Want to start with kernel preemption disabled. */
         task_thread_info(p)->preempt_count = 1;
   #endif
+ +      plist_node_init(&p->pushable_tasks, MAX_PRIO);
+ +
         put_cpu();
   }
   
@@@ -2571,7 -2491,7 +2571,7 @@@ void wake_up_new_task(struct task_struc
   #ifdef CONFIG_PREEMPT_NOTIFIERS
   
   /**
- - * preempt_notifier_register - tell me when current is being being preempted & rescheduled
+ + * preempt_notifier_register - tell me when current is being preempted & rescheduled
    * @notifier: notifier struct to register
    */
   void preempt_notifier_register(struct preempt_notifier *notifier)
@@@ -2668,12 -2588,6 +2668,12 @@@ static void finish_task_switch(struct r
   {
         struct mm_struct *mm = rq->prev_mm;
         long prev_state;
+ +#ifdef CONFIG_SMP
+ +      int post_schedule = 0;
+ +
+ +      if (current->sched_class->needs_post_schedule)
+ +              post_schedule = current->sched_class->needs_post_schedule(rq);
+ +#endif
   
         rq->prev_mm = NULL;
   
@@@ -2692,7 -2606,7 +2692,7 @@@
         finish_arch_switch(prev);
         finish_lock_switch(rq, prev);
   #ifdef CONFIG_SMP
- -      if (current->sched_class->post_schedule)
+ +      if (post_schedule)
                 current->sched_class->post_schedule(rq);
   #endif
   
@@@ -2999,7 -2913,6 +2999,7 @@@ int can_migrate_task(struct task_struc
                      struct sched_domain *sd, enum cpu_idle_type idle,
                      int *all_pinned)
   {
+ +      int tsk_cache_hot = 0;
         /*
          * We do not migrate tasks that are:
          * 1) running (obviously), or
@@@ -3023,11 -2936,10 +3023,11 @@@
          * 2) too many balance attempts have failed.
          */
   
- -      if (!task_hot(p, rq->clock, sd) ||
- -                      sd->nr_balance_failed > sd->cache_nice_tries) {
+ +      tsk_cache_hot = task_hot(p, rq->clock, sd);
+ +      if (!tsk_cache_hot ||
+ +              sd->nr_balance_failed > sd->cache_nice_tries) {
   #ifdef CONFIG_SCHEDSTATS
- -              if (task_hot(p, rq->clock, sd)) {
+ +              if (tsk_cache_hot) {
                         schedstat_inc(sd, lb_hot_gained[idle]);
                         schedstat_inc(p, se.nr_forced_migrations);
                 }
@@@ -3035,7 -2947,7 +3035,7 @@@
                 return 1;
         }
   
- -      if (task_hot(p, rq->clock, sd)) {
+ +      if (tsk_cache_hot) {
                 schedstat_inc(p, se.nr_failed_migrations_hot);
                 return 0;
         }
@@@ -3075,16 -2987,6 +3075,16 @@@ next
         pulled++;
         rem_load_move -= p->se.load.weight;
   
+ +#ifdef CONFIG_PREEMPT
+ +      /*
+ +       * NEWIDLE balancing is a source of latency, so preemptible kernels
+ +       * will stop after the first task is pulled to minimize the critical
+ +       * section.
+ +       */
+ +      if (idle == CPU_NEWLY_IDLE)
+ +              goto out;
+ +#endif
+ +
         /*
          * We only want to steal up to the prescribed amount of weighted load.
          */
@@@ -3131,15 -3033,9 +3131,15 @@@ static int move_tasks(struct rq *this_r
                                 sd, idle, all_pinned, &this_best_prio);
                 class = class->next;
   
+ +#ifdef CONFIG_PREEMPT
+ +              /*
+ +               * NEWIDLE balancing is a source of latency, so preemptible
+ +               * kernels will stop after the first task is pulled to minimize
+ +               * the critical section.
+ +               */
                 if (idle == CPU_NEWLY_IDLE && this_rq->nr_running)
                         break;
- -
+ +#endif
         } while (class && max_load_move > total_load_moved);
   
         return total_load_moved > 0;
@@@ -3189,480 -3085,246 +3189,480 @@@ static int move_one_task(struct rq *thi
   
         return 0;
   }
- -
+ +/********** Helpers for find_busiest_group ************************/
   /*
- - * find_busiest_group finds and returns the busiest CPU group within the
- - * domain. It calculates and returns the amount of weighted load which
- - * should be moved to restore balance via the imbalance parameter.
+ + * sd_lb_stats - Structure to store the statistics of a sched_domain
+ + *            during load balancing.
    */
- -static struct sched_group *
- -find_busiest_group(struct sched_domain *sd, int this_cpu,
- -                 unsigned long *imbalance, enum cpu_idle_type idle,
- -                 int *sd_idle, const struct cpumask *cpus, int *balance)
- -{
- -      struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
- -      unsigned long max_load, avg_load, total_load, this_load, total_pwr;
- -      unsigned long max_pull;
- -      unsigned long busiest_load_per_task, busiest_nr_running;
- -      unsigned long this_load_per_task, this_nr_running;
- -      int load_idx, group_imb = 0;
+ +struct sd_lb_stats {
+ +      struct sched_group *busiest; /* Busiest group in this sd */
+ +      struct sched_group *this;  /* Local group in this sd */
+ +      unsigned long total_load;  /* Total load of all groups in sd */
+ +      unsigned long total_pwr;   /*   Total power of all groups in sd */
+ +      unsigned long avg_load;    /* Average load across all groups in sd */
+ +
+ +      /** Statistics of this group */
+ +      unsigned long this_load;
+ +      unsigned long this_load_per_task;
+ +      unsigned long this_nr_running;
+ +
+ +      /* Statistics of the busiest group */
+ +      unsigned long max_load;
+ +      unsigned long busiest_load_per_task;
+ +      unsigned long busiest_nr_running;
+ +
+ +      int group_imb; /* Is there imbalance in this sd */
   #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
- -      int power_savings_balance = 1;
- -      unsigned long leader_nr_running = 0, min_load_per_task = 0;
- -      unsigned long min_nr_running = ULONG_MAX;
- -      struct sched_group *group_min = NULL, *group_leader = NULL;
+ +      int power_savings_balance; /* Is powersave balance needed for this sd */
+ +      struct sched_group *group_min; /* Least loaded group in sd */
+ +      struct sched_group *group_leader; /* Group which relieves group_min */
+ +      unsigned long min_load_per_task; /* load_per_task in group_min */
+ +      unsigned long leader_nr_running; /* Nr running of group_leader */
+ +      unsigned long min_nr_running; /* Nr running of group_min */
   #endif
+ +};
+ +
+ +/*
+ + * sg_lb_stats - stats of a sched_group required for load_balancing
+ + */
+ +struct sg_lb_stats {
+ +      unsigned long avg_load; /*Avg load across the CPUs of the group */
+ +      unsigned long group_load; /* Total load over the CPUs of the group */
+ +      unsigned long sum_nr_running; /* Nr tasks running in the group */
+ +      unsigned long sum_weighted_load; /* Weighted load of group's tasks */
+ +      unsigned long group_capacity;
+ +      int group_imb; /* Is there an imbalance in the group ? */
+ +};
+ +
+ +/**
+ + * group_first_cpu - Returns the first cpu in the cpumask of a sched_group.
+ + * @group: The group whose first cpu is to be returned.
+ + */
+ +static inline unsigned int group_first_cpu(struct sched_group *group)
+ +{
+ +      return cpumask_first(sched_group_cpus(group));
+ +}
   
- -      max_load = this_load = total_load = total_pwr = 0;
- -      busiest_load_per_task = busiest_nr_running = 0;
- -      this_load_per_task = this_nr_running = 0;
+ +/**
+ + * get_sd_load_idx - Obtain the load index for a given sched domain.
+ + * @sd: The sched_domain whose load_idx is to be obtained.
+ + * @idle: The Idle status of the CPU for whose sd load_icx is obtained.
+ + */
+ +static inline int get_sd_load_idx(struct sched_domain *sd,
+ +                                      enum cpu_idle_type idle)
+ +{
+ +      int load_idx;
   
- -      if (idle == CPU_NOT_IDLE)
+ +      switch (idle) {
+ +      case CPU_NOT_IDLE:
                 load_idx = sd->busy_idx;
- -      else if (idle == CPU_NEWLY_IDLE)
+ +              break;
+ +
+ +      case CPU_NEWLY_IDLE:
                 load_idx = sd->newidle_idx;
- -      else
+ +              break;
+ +      default:
                 load_idx = sd->idle_idx;
+ +              break;
+ +      }
   
- -      do {
- -              unsigned long load, group_capacity, max_cpu_load, min_cpu_load;
- -              int local_group;
- -              int i;
- -              int __group_imb = 0;
- -              unsigned int balance_cpu = -1, first_idle_cpu = 0;
- -              unsigned long sum_nr_running, sum_weighted_load;
- -              unsigned long sum_avg_load_per_task;
- -              unsigned long avg_load_per_task;
+ +      return load_idx;
+ +}
   
- -              local_group = cpumask_test_cpu(this_cpu,
- -                                             sched_group_cpus(group));
   
- -              if (local_group)
- -                      balance_cpu = cpumask_first(sched_group_cpus(group));
+ +#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
+ +/**
+ + * init_sd_power_savings_stats - Initialize power savings statistics for
+ + * the given sched_domain, during load balancing.
+ + *
+ + * @sd: Sched domain whose power-savings statistics are to be initialized.
+ + * @sds: Variable containing the statistics for sd.
+ + * @idle: Idle status of the CPU at which we're performing load-balancing.
+ + */
+ +static inline void init_sd_power_savings_stats(struct sched_domain *sd,
+ +      struct sd_lb_stats *sds, enum cpu_idle_type idle)
+ +{
+ +      /*
+ +       * Busy processors will not participate in power savings
+ +       * balance.
+ +       */
+ +      if (idle == CPU_NOT_IDLE || !(sd->flags & SD_POWERSAVINGS_BALANCE))
+ +              sds->power_savings_balance = 0;
+ +      else {
+ +              sds->power_savings_balance = 1;
+ +              sds->min_nr_running = ULONG_MAX;
+ +              sds->leader_nr_running = 0;
+ +      }
+ +}
   
- -              /* Tally up the load of all CPUs in the group */
- -              sum_weighted_load = sum_nr_running = avg_load = 0;
- -              sum_avg_load_per_task = avg_load_per_task = 0;
+ +/**
+ + * update_sd_power_savings_stats - Update the power saving stats for a
+ + * sched_domain while performing load balancing.
+ + *
+ + * @group: sched_group belonging to the sched_domain under consideration.
+ + * @sds: Variable containing the statistics of the sched_domain
+ + * @local_group: Does group contain the CPU for which we're performing
+ + *            load balancing ?
+ + * @sgs: Variable containing the statistics of the group.
+ + */
+ +static inline void update_sd_power_savings_stats(struct sched_group *group,
+ +      struct sd_lb_stats *sds, int local_group, struct sg_lb_stats *sgs)
+ +{
   
- -              max_cpu_load = 0;
- -              min_cpu_load = ~0UL;
+ +      if (!sds->power_savings_balance)
+ +              return;
   
- -              for_each_cpu_and(i, sched_group_cpus(group), cpus) {
- -                      struct rq *rq = cpu_rq(i);
+ +      /*
+ +       * If the local group is idle or completely loaded
+ +       * no need to do power savings balance at this domain
+ +       */
+ +      if (local_group && (sds->this_nr_running >= sgs->group_capacity ||
+ +                              !sds->this_nr_running))
+ +              sds->power_savings_balance = 0;
   
- -                      if (*sd_idle && rq->nr_running)
- -                              *sd_idle = 0;
+ +      /*
+ +       * If a group is already running at full capacity or idle,
+ +       * don't include that group in power savings calculations
+ +       */
+ +      if (!sds->power_savings_balance ||
+ +              sgs->sum_nr_running >= sgs->group_capacity ||
+ +              !sgs->sum_nr_running)
+ +              return;
   
- -                      /* Bias balancing toward cpus of our domain */
- -                      if (local_group) {
- -                              if (idle_cpu(i) && !first_idle_cpu) {
- -                                      first_idle_cpu = 1;
- -                                      balance_cpu = i;
- -                              }
+ +      /*
+ +       * Calculate the group which has the least non-idle load.
+ +       * This is the group from where we need to pick up the load
+ +       * for saving power
+ +       */
+ +      if ((sgs->sum_nr_running < sds->min_nr_running) ||
+ +          (sgs->sum_nr_running == sds->min_nr_running &&
+ +           group_first_cpu(group) > group_first_cpu(sds->group_min))) {
+ +              sds->group_min = group;
+ +              sds->min_nr_running = sgs->sum_nr_running;
+ +              sds->min_load_per_task = sgs->sum_weighted_load /
+ +                                              sgs->sum_nr_running;
+ +      }
   
- -                              load = target_load(i, load_idx);
- -                      } else {
- -                              load = source_load(i, load_idx);
- -                              if (load > max_cpu_load)
- -                                      max_cpu_load = load;
- -                              if (min_cpu_load > load)
- -                                      min_cpu_load = load;
- -                      }
+ +      /*
+ +       * Calculate the group which is almost near its
+ +       * capacity but still has some space to pick up some load
+ +       * from other group and save more power
+ +       */
+ +      if (sgs->sum_nr_running > sgs->group_capacity - 1)
+ +              return;
   
- -                      avg_load += load;
- -                      sum_nr_running += rq->nr_running;
- -                      sum_weighted_load += weighted_cpuload(i);
+ +      if (sgs->sum_nr_running > sds->leader_nr_running ||
+ +          (sgs->sum_nr_running == sds->leader_nr_running &&
+ +           group_first_cpu(group) < group_first_cpu(sds->group_leader))) {
+ +              sds->group_leader = group;
+ +              sds->leader_nr_running = sgs->sum_nr_running;
+ +      }
+ +}
   
- -                      sum_avg_load_per_task += cpu_avg_load_per_task(i);
- -              }
+ +/**
+ + * check_power_save_busiest_group - see if there is potential for some power-savings balance
+ + * @sds: Variable containing the statistics of the sched_domain
+ + *    under consideration.
+ + * @this_cpu: Cpu at which we're currently performing load-balancing.
+ + * @imbalance: Variable to store the imbalance.
+ + *
+ + * Description:
+ + * Check if we have potential to perform some power-savings balance.
+ + * If yes, set the busiest group to be the least loaded group in the
+ + * sched_domain, so that it's CPUs can be put to idle.
+ + *
+ + * Returns 1 if there is potential to perform power-savings balance.
+ + * Else returns 0.
+ + */
+ +static inline int check_power_save_busiest_group(struct sd_lb_stats *sds,
+ +                                      int this_cpu, unsigned long *imbalance)
+ +{
+ +      if (!sds->power_savings_balance)
+ +              return 0;
   
- -              /*
- -               * First idle cpu or the first cpu(busiest) in this sched group
- -               * is eligible for doing load balancing at this and above
- -               * domains. In the newly idle case, we will allow all the cpu's
- -               * to do the newly idle load balance.
- -               */
- -              if (idle != CPU_NEWLY_IDLE && local_group &&
- -                  balance_cpu != this_cpu && balance) {
- -                      *balance = 0;
- -                      goto ret;
- -              }
+ +      if (sds->this != sds->group_leader ||
+ +                      sds->group_leader == sds->group_min)
+ +              return 0;
   
- -              total_load += avg_load;
- -              total_pwr += group->__cpu_power;
+ +      *imbalance = sds->min_load_per_task;
+ +      sds->busiest = sds->group_min;
   
- -              /* Adjust by relative CPU power of the group */
- -              avg_load = sg_div_cpu_power(group,
- -                              avg_load * SCHED_LOAD_SCALE);
+ +      if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) {
+ +              cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu =
+ +                      group_first_cpu(sds->group_leader);
+ +      }
   
+ +      return 1;
   
- -              /*
- -               * Consider the group unbalanced when the imbalance is larger
- -               * than the average weight of two tasks.
- -               *
- -               * APZ: with cgroup the avg task weight can vary wildly and
- -               *      might not be a suitable number - should we keep a
- -               *      normalized nr_running number somewhere that negates
- -               *      the hierarchy?
- -               */
- -              avg_load_per_task = sg_div_cpu_power(group,
- -                              sum_avg_load_per_task * SCHED_LOAD_SCALE);
+ +}
+ +#else /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
+ +static inline void init_sd_power_savings_stats(struct sched_domain *sd,
+ +      struct sd_lb_stats *sds, enum cpu_idle_type idle)
+ +{
+ +      return;
+ +}
   
- -              if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task)
- -                      __group_imb = 1;
+ +static inline void update_sd_power_savings_stats(struct sched_group *group,
+ +      struct sd_lb_stats *sds, int local_group, struct sg_lb_stats *sgs)
+ +{
+ +      return;
+ +}
+ +
+ +static inline int check_power_save_busiest_group(struct sd_lb_stats *sds,
+ +                                      int this_cpu, unsigned long *imbalance)
+ +{
+ +      return 0;
+ +}
+ +#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
+ +
+ +
+ +/**
+ + * update_sg_lb_stats - Update sched_group's statistics for load balancing.
+ + * @group: sched_group whose statistics are to be updated.
+ + * @this_cpu: Cpu for which load balance is currently performed.
+ + * @idle: Idle status of this_cpu
+ + * @load_idx: Load index of sched_domain of this_cpu for load calc.
+ + * @sd_idle: Idle status of the sched_domain containing group.
+ + * @local_group: Does group contain this_cpu.
+ + * @cpus: Set of cpus considered for load balancing.
+ + * @balance: Should we balance.
+ + * @sgs: variable to hold the statistics for this group.
+ + */
+ +static inline void update_sg_lb_stats(struct sched_group *group, int this_cpu,
+ +                      enum cpu_idle_type idle, int load_idx, int *sd_idle,
+ +                      int local_group, const struct cpumask *cpus,
+ +                      int *balance, struct sg_lb_stats *sgs)
+ +{
+ +      unsigned long load, max_cpu_load, min_cpu_load;
+ +      int i;
+ +      unsigned int balance_cpu = -1, first_idle_cpu = 0;
+ +      unsigned long sum_avg_load_per_task;
+ +      unsigned long avg_load_per_task;
   
- -              group_capacity = group->__cpu_power / SCHED_LOAD_SCALE;
+ +      if (local_group)
+ +              balance_cpu = group_first_cpu(group);
   
+ +      /* Tally up the load of all CPUs in the group */
+ +      sum_avg_load_per_task = avg_load_per_task = 0;
+ +      max_cpu_load = 0;
+ +      min_cpu_load = ~0UL;
+ +
+ +      for_each_cpu_and(i, sched_group_cpus(group), cpus) {
+ +              struct rq *rq = cpu_rq(i);
+ +
+ +              if (*sd_idle && rq->nr_running)
+ +                      *sd_idle = 0;
+ +
+ +              /* Bias balancing toward cpus of our domain */
                 if (local_group) {
- -                      this_load = avg_load;
- -                      this = group;
- -                      this_nr_running = sum_nr_running;
- -                      this_load_per_task = sum_weighted_load;
- -              } else if (avg_load > max_load &&
- -                         (sum_nr_running > group_capacity || __group_imb)) {
- -                      max_load = avg_load;
- -                      busiest = group;
- -                      busiest_nr_running = sum_nr_running;
- -                      busiest_load_per_task = sum_weighted_load;
- -                      group_imb = __group_imb;
+ +                      if (idle_cpu(i) && !first_idle_cpu) {
+ +                              first_idle_cpu = 1;
+ +                              balance_cpu = i;
+ +                      }
+ +
+ +                      load = target_load(i, load_idx);
+ +              } else {
+ +                      load = source_load(i, load_idx);
+ +                      if (load > max_cpu_load)
+ +                              max_cpu_load = load;
+ +                      if (min_cpu_load > load)
+ +                              min_cpu_load = load;
                 }
   
- -#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
- -              /*
- -               * Busy processors will not participate in power savings
- -               * balance.
- -               */
- -              if (idle == CPU_NOT_IDLE ||
- -                              !(sd->flags & SD_POWERSAVINGS_BALANCE))
- -                      goto group_next;
+ +              sgs->group_load += load;
+ +              sgs->sum_nr_running += rq->nr_running;
+ +              sgs->sum_weighted_load += weighted_cpuload(i);
   
- -              /*
- -               * If the local group is idle or completely loaded
- -               * no need to do power savings balance at this domain
- -               */
- -              if (local_group && (this_nr_running >= group_capacity ||
- -                                  !this_nr_running))
- -                      power_savings_balance = 0;
+ +              sum_avg_load_per_task += cpu_avg_load_per_task(i);
+ +      }
   
- -              /*
- -               * If a group is already running at full capacity or idle,
- -               * don't include that group in power savings calculations
- -               */
- -              if (!power_savings_balance || sum_nr_running >= group_capacity
- -                  || !sum_nr_running)
- -                      goto group_next;
+ +      /*
+ +       * First idle cpu or the first cpu(busiest) in this sched group
+ +       * is eligible for doing load balancing at this and above
+ +       * domains. In the newly idle case, we will allow all the cpu's
+ +       * to do the newly idle load balance.
+ +       */
+ +      if (idle != CPU_NEWLY_IDLE && local_group &&
+ +          balance_cpu != this_cpu && balance) {
+ +              *balance = 0;
+ +              return;
+ +      }
   
- -              /*
- -               * Calculate the group which has the least non-idle load.
- -               * This is the group from where we need to pick up the load
- -               * for saving power
- -               */
- -              if ((sum_nr_running < min_nr_running) ||
- -                  (sum_nr_running == min_nr_running &&
- -                   cpumask_first(sched_group_cpus(group)) >
- -                   cpumask_first(sched_group_cpus(group_min)))) {
- -                      group_min = group;
- -                      min_nr_running = sum_nr_running;
- -                      min_load_per_task = sum_weighted_load /
- -                                              sum_nr_running;
- -              }
+ +      /* Adjust by relative CPU power of the group */
+ +      sgs->avg_load = sg_div_cpu_power(group,
+ +                      sgs->group_load * SCHED_LOAD_SCALE);
   
- -              /*
- -               * Calculate the group which is almost near its
- -               * capacity but still has some space to pick up some load
- -               * from other group and save more power
- -               */
- -              if (sum_nr_running <= group_capacity - 1) {
- -                      if (sum_nr_running > leader_nr_running ||
- -                          (sum_nr_running == leader_nr_running &&
- -                           cpumask_first(sched_group_cpus(group)) <
- -                           cpumask_first(sched_group_cpus(group_leader)))) {
- -                              group_leader = group;
- -                              leader_nr_running = sum_nr_running;
- -                      }
+ +
+ +      /*
+ +       * Consider the group unbalanced when the imbalance is larger
+ +       * than the average weight of two tasks.
+ +       *
+ +       * APZ: with cgroup the avg task weight can vary wildly and
+ +       *      might not be a suitable number - should we keep a
+ +       *      normalized nr_running number somewhere that negates
+ +       *      the hierarchy?
+ +       */
+ +      avg_load_per_task = sg_div_cpu_power(group,
+ +                      sum_avg_load_per_task * SCHED_LOAD_SCALE);
+ +
+ +      if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task)
+ +              sgs->group_imb = 1;
+ +
+ +      sgs->group_capacity = group->__cpu_power / SCHED_LOAD_SCALE;
+ +
+ +}
+ +
+ +/**
+ + * update_sd_lb_stats - Update sched_group's statistics for load balancing.
+ + * @sd: sched_domain whose statistics are to be updated.
+ + * @this_cpu: Cpu for which load balance is currently performed.
+ + * @idle: Idle status of this_cpu
+ + * @sd_idle: Idle status of the sched_domain containing group.
+ + * @cpus: Set of cpus considered for load balancing.
+ + * @balance: Should we balance.
+ + * @sds: variable to hold the statistics for this sched_domain.
+ + */
+ +static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
+ +                      enum cpu_idle_type idle, int *sd_idle,
+ +                      const struct cpumask *cpus, int *balance,
+ +                      struct sd_lb_stats *sds)
+ +{
+ +      struct sched_group *group = sd->groups;
+ +      struct sg_lb_stats sgs;
+ +      int load_idx;
+ +
+ +      init_sd_power_savings_stats(sd, sds, idle);
+ +      load_idx = get_sd_load_idx(sd, idle);
+ +
+ +      do {
+ +              int local_group;
+ +
+ +              local_group = cpumask_test_cpu(this_cpu,
+ +                                             sched_group_cpus(group));
+ +              memset(&sgs, 0, sizeof(sgs));
+ +              update_sg_lb_stats(group, this_cpu, idle, load_idx, sd_idle,
+ +                              local_group, cpus, balance, &sgs);
+ +
+ +              if (local_group && balance && !(*balance))
+ +                      return;
+ +
+ +              sds->total_load += sgs.group_load;
+ +              sds->total_pwr += group->__cpu_power;
+ +
+ +              if (local_group) {
+ +                      sds->this_load = sgs.avg_load;
+ +                      sds->this = group;
+ +                      sds->this_nr_running = sgs.sum_nr_running;
+ +                      sds->this_load_per_task = sgs.sum_weighted_load;
+ +              } else if (sgs.avg_load > sds->max_load &&
+ +                         (sgs.sum_nr_running > sgs.group_capacity ||
+ +                              sgs.group_imb)) {
+ +                      sds->max_load = sgs.avg_load;
+ +                      sds->busiest = group;
+ +                      sds->busiest_nr_running = sgs.sum_nr_running;
+ +                      sds->busiest_load_per_task = sgs.sum_weighted_load;
+ +                      sds->group_imb = sgs.group_imb;
                 }
- -group_next:
- -#endif
+ +
+ +              update_sd_power_savings_stats(group, sds, local_group, &sgs);
                 group = group->next;
         } while (group != sd->groups);
   
- -      if (!busiest || this_load >= max_load || busiest_nr_running == 0)
- -              goto out_balanced;
- -
- -      avg_load = (SCHED_LOAD_SCALE * total_load) / total_pwr;
+ +}
   
- -      if (this_load >= avg_load ||
- -                      100*max_load <= sd->imbalance_pct*this_load)
- -              goto out_balanced;
+ +/**
+ + * fix_small_imbalance - Calculate the minor imbalance that exists
+ + *                    amongst the groups of a sched_domain, during
+ + *                    load balancing.
+ + * @sds: Statistics of the sched_domain whose imbalance is to be calculated.
+ + * @this_cpu: The cpu at whose sched_domain we're performing load-balance.
+ + * @imbalance: Variable to store the imbalance.
+ + */
+ +static inline void fix_small_imbalance(struct sd_lb_stats *sds,
+ +                              int this_cpu, unsigned long *imbalance)
+ +{
+ +      unsigned long tmp, pwr_now = 0, pwr_move = 0;
+ +      unsigned int imbn = 2;
+ +
+ +      if (sds->this_nr_running) {
+ +              sds->this_load_per_task /= sds->this_nr_running;
+ +              if (sds->busiest_load_per_task >
+ +                              sds->this_load_per_task)
+ +                      imbn = 1;
+ +      } else
+ +              sds->this_load_per_task =
+ +                      cpu_avg_load_per_task(this_cpu);
   
- -      busiest_load_per_task /= busiest_nr_running;
- -      if (group_imb)
- -              busiest_load_per_task = min(busiest_load_per_task, avg_load);
+ +      if (sds->max_load - sds->this_load + sds->busiest_load_per_task >=
+ +                      sds->busiest_load_per_task * imbn) {
+ +              *imbalance = sds->busiest_load_per_task;
+ +              return;
+ +      }
   
         /*
- -       * We're trying to get all the cpus to the average_load, so we don't
- -       * want to push ourselves above the average load, nor do we wish to
- -       * reduce the max loaded cpu below the average load, as either of these
- -       * actions would just result in more rebalancing later, and ping-pong
- -       * tasks around. Thus we look for the minimum possible imbalance.
- -       * Negative imbalances (*we* are more loaded than anyone else) will
- -       * be counted as no imbalance for these purposes -- we can't fix that
- -       * by pulling tasks to us. Be careful of negative numbers as they'll
- -       * appear as very large values with unsigned longs.
+ +       * OK, we don't have enough imbalance to justify moving tasks,
+ +       * however we may be able to increase total CPU power used by
+ +       * moving them.
          */
- -      if (max_load <= busiest_load_per_task)
- -              goto out_balanced;
   
+ +      pwr_now += sds->busiest->__cpu_power *
+ +                      min(sds->busiest_load_per_task, sds->max_load);
+ +      pwr_now += sds->this->__cpu_power *
+ +                      min(sds->this_load_per_task, sds->this_load);
+ +      pwr_now /= SCHED_LOAD_SCALE;
+ +
+ +      /* Amount of load we'd subtract */
+ +      tmp = sg_div_cpu_power(sds->busiest,
+ +                      sds->busiest_load_per_task * SCHED_LOAD_SCALE);
+ +      if (sds->max_load > tmp)
+ +              pwr_move += sds->busiest->__cpu_power *
+ +                      min(sds->busiest_load_per_task, sds->max_load - tmp);
+ +
+ +      /* Amount of load we'd add */
+ +      if (sds->max_load * sds->busiest->__cpu_power <
+ +              sds->busiest_load_per_task * SCHED_LOAD_SCALE)
+ +              tmp = sg_div_cpu_power(sds->this,
+ +                      sds->max_load * sds->busiest->__cpu_power);
+ +      else
+ +              tmp = sg_div_cpu_power(sds->this,
+ +                      sds->busiest_load_per_task * SCHED_LOAD_SCALE);
+ +      pwr_move += sds->this->__cpu_power *
+ +                      min(sds->this_load_per_task, sds->this_load + tmp);
+ +      pwr_move /= SCHED_LOAD_SCALE;
+ +
+ +      /* Move if we gain throughput */
+ +      if (pwr_move > pwr_now)
+ +              *imbalance = sds->busiest_load_per_task;
+ +}
+ +
+ +/**
+ + * calculate_imbalance - Calculate the amount of imbalance present within the
+ + *                     groups of a given sched_domain during load balance.
+ + * @sds: statistics of the sched_domain whose imbalance is to be calculated.
+ + * @this_cpu: Cpu for which currently load balance is being performed.
+ + * @imbalance: The variable to store the imbalance.
+ + */
+ +static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
+ +              unsigned long *imbalance)
+ +{
+ +      unsigned long max_pull;
         /*
          * In the presence of smp nice balancing, certain scenarios can have
          * max load less than avg load(as we skip the groups at or below
          * its cpu_power, while calculating max_load..)
          */
- -      if (max_load < avg_load) {
+ +      if (sds->max_load < sds->avg_load) {
                 *imbalance = 0;
- -              goto small_imbalance;
+ +              return fix_small_imbalance(sds, this_cpu, imbalance);
         }
   
         /* Don't want to pull so many tasks that a group would go idle */
- -      max_pull = min(max_load - avg_load, max_load - busiest_load_per_task);
+ +      max_pull = min(sds->max_load - sds->avg_load,
+ +                      sds->max_load - sds->busiest_load_per_task);
   
         /* How much load to actually move to equalise the imbalance */
- -      *imbalance = min(max_pull * busiest->__cpu_power,
- -                              (avg_load - this_load) * this->__cpu_power)
+ +      *imbalance = min(max_pull * sds->busiest->__cpu_power,
+ +              (sds->avg_load - sds->this_load) * sds->this->__cpu_power)
                         / SCHED_LOAD_SCALE;
   
         /*
@@@ -3671,110 -3333,78 +3671,110 @@@
          * a think about bumping its value to force at least one task to be
          * moved
          */
- -      if (*imbalance < busiest_load_per_task) {
- -              unsigned long tmp, pwr_now, pwr_move;
- -              unsigned int imbn;
- -
- -small_imbalance:
- -              pwr_move = pwr_now = 0;
- -              imbn = 2;
- -              if (this_nr_running) {
- -                      this_load_per_task /= this_nr_running;
- -                      if (busiest_load_per_task > this_load_per_task)
- -                              imbn = 1;
- -              } else
- -                      this_load_per_task = cpu_avg_load_per_task(this_cpu);
+ +      if (*imbalance < sds->busiest_load_per_task)
+ +              return fix_small_imbalance(sds, this_cpu, imbalance);
   
- -              if (max_load - this_load + busiest_load_per_task >=
- -                                      busiest_load_per_task * imbn) {
- -                      *imbalance = busiest_load_per_task;
- -                      return busiest;
- -              }
+ +}
+ +/******* find_busiest_group() helpers end here *********************/
   
- -              /*
- -               * OK, we don't have enough imbalance to justify moving tasks,
- -               * however we may be able to increase total CPU power used by
- -               * moving them.
- -               */
+ +/**
+ + * find_busiest_group - Returns the busiest group within the sched_domain
+ + * if there is an imbalance. If there isn't an imbalance, and
+ + * the user has opted for power-savings, it returns a group whose
+ + * CPUs can be put to idle by rebalancing those tasks elsewhere, if
+ + * such a group exists.
+ + *
+ + * Also calculates the amount of weighted load which should be moved
+ + * to restore balance.
+ + *
+ + * @sd: The sched_domain whose busiest group is to be returned.
+ + * @this_cpu: The cpu for which load balancing is currently being performed.
+ + * @imbalance: Variable which stores amount of weighted load which should
+ + *            be moved to restore balance/put a group to idle.
+ + * @idle: The idle status of this_cpu.
+ + * @sd_idle: The idleness of sd
+ + * @cpus: The set of CPUs under consideration for load-balancing.
+ + * @balance: Pointer to a variable indicating if this_cpu
+ + *    is the appropriate cpu to perform load balancing at this_level.
+ + *
+ + * Returns:   - the busiest group if imbalance exists.
+ + *            - If no imbalance and user has opted for power-savings balance,
+ + *               return the least loaded group whose CPUs can be
+ + *               put to idle by rebalancing its tasks onto our group.
+ + */
+ +static struct sched_group *
+ +find_busiest_group(struct sched_domain *sd, int this_cpu,
+ +                 unsigned long *imbalance, enum cpu_idle_type idle,
+ +                 int *sd_idle, const struct cpumask *cpus, int *balance)
+ +{
+ +      struct sd_lb_stats sds;
   
- -              pwr_now += busiest->__cpu_power *
- -                              min(busiest_load_per_task, max_load);
- -              pwr_now += this->__cpu_power *
- -                              min(this_load_per_task, this_load);
- -              pwr_now /= SCHED_LOAD_SCALE;
- -
- -              /* Amount of load we'd subtract */
- -              tmp = sg_div_cpu_power(busiest,
- -                              busiest_load_per_task * SCHED_LOAD_SCALE);
- -              if (max_load > tmp)
- -                      pwr_move += busiest->__cpu_power *
- -                              min(busiest_load_per_task, max_load - tmp);
- -
- -              /* Amount of load we'd add */
- -              if (max_load * busiest->__cpu_power <
- -                              busiest_load_per_task * SCHED_LOAD_SCALE)
- -                      tmp = sg_div_cpu_power(this,
- -                                      max_load * busiest->__cpu_power);
- -              else
- -                      tmp = sg_div_cpu_power(this,
- -                              busiest_load_per_task * SCHED_LOAD_SCALE);
- -              pwr_move += this->__cpu_power *
- -                              min(this_load_per_task, this_load + tmp);
- -              pwr_move /= SCHED_LOAD_SCALE;
+ +      memset(&sds, 0, sizeof(sds));
   
- -              /* Move if we gain throughput */
- -              if (pwr_move > pwr_now)
- -                      *imbalance = busiest_load_per_task;
- -      }
+ +      /*
+ +       * Compute the various statistics relavent for load balancing at
+ +       * this level.
+ +       */
+ +      update_sd_lb_stats(sd, this_cpu, idle, sd_idle, cpus,
+ +                                      balance, &sds);
+ +
+ +      /* Cases where imbalance does not exist from POV of this_cpu */
+ +      /* 1) this_cpu is not the appropriate cpu to perform load balancing
+ +       *    at this level.
+ +       * 2) There is no busy sibling group to pull from.
+ +       * 3) This group is the busiest group.
+ +       * 4) This group is more busy than the avg busieness at this
+ +       *    sched_domain.
+ +       * 5) The imbalance is within the specified limit.
+ +       * 6) Any rebalance would lead to ping-pong
+ +       */
+ +      if (balance && !(*balance))
+ +              goto ret;
   
- -      return busiest;
+ +      if (!sds.busiest || sds.busiest_nr_running == 0)
+ +              goto out_balanced;
   
- -out_balanced:
- -#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
- -      if (idle == CPU_NOT_IDLE || !(sd->flags & SD_POWERSAVINGS_BALANCE))
- -              goto ret;
+ +      if (sds.this_load >= sds.max_load)
+ +              goto out_balanced;
   
- -      if (this == group_leader && group_leader != group_min) {
- -              *imbalance = min_load_per_task;
- -              if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) {
- -                      cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu =
- -                              cpumask_first(sched_group_cpus(group_leader));
- -              }
- -              return group_min;
- -      }
- -#endif
+ +      sds.avg_load = (SCHED_LOAD_SCALE * sds.total_load) / sds.total_pwr;
+ +
+ +      if (sds.this_load >= sds.avg_load)
+ +              goto out_balanced;
+ +
+ +      if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load)
+ +              goto out_balanced;
+ +
+ +      sds.busiest_load_per_task /= sds.busiest_nr_running;
+ +      if (sds.group_imb)
+ +              sds.busiest_load_per_task =
+ +                      min(sds.busiest_load_per_task, sds.avg_load);
+ +
+ +      /*
+ +       * We're trying to get all the cpus to the average_load, so we don't
+ +       * want to push ourselves above the average load, nor do we wish to
+ +       * reduce the max loaded cpu below the average load, as either of these
+ +       * actions would just result in more rebalancing later, and ping-pong
+ +       * tasks around. Thus we look for the minimum possible imbalance.
+ +       * Negative imbalances (*we* are more loaded than anyone else) will
+ +       * be counted as no imbalance for these purposes -- we can't fix that
+ +       * by pulling tasks to us. Be careful of negative numbers as they'll
+ +       * appear as very large values with unsigned longs.
+ +       */
+ +      if (sds.max_load <= sds.busiest_load_per_task)
+ +              goto out_balanced;
+ +
+ +      /* Looks like there is an imbalance. Compute it */
+ +      calculate_imbalance(&sds, this_cpu, imbalance);
+ +      return sds.busiest;
+ +
+ +out_balanced:
+ +      /*
+ +       * There is no obvious imbalance. But check if we can do some balancing
+ +       * to save power.
+ +       */
+ +      if (check_power_save_busiest_group(&sds, this_cpu, imbalance))
+ +              return sds.busiest;
   ret:
         *imbalance = 0;
         return NULL;
@@@ -4427,11 -4057,6 +4427,11 @@@ static void run_rebalance_domains(struc
   #endif
   }
   
+ +static inline int on_null_domain(int cpu)
+ +{
+ +      return !rcu_dereference(cpu_rq(cpu)->sd);
+ +}
+ +
   /*
    * Trigger the SCHED_SOFTIRQ if it is time to do periodic load balancing.
    *
@@@ -4489,9 -4114,7 +4489,9 @@@ static inline void trigger_load_balance
             cpumask_test_cpu(cpu, nohz.cpu_mask))
                 return;
   #endif
- -      if (time_after_eq(jiffies, rq->next_balance))
+ +      /* Don't need to rebalance while attached to NULL domain */
+ +      if (time_after_eq(jiffies, rq->next_balance) &&
+ +          likely(!on_null_domain(cpu)))
                 raise_softirq(SCHED_SOFTIRQ);
   }
   
@@@ -4781,10 -4404,7 +4781,7 @@@ void scheduler_tick(void
   #endif
   }
   
- #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
-                               defined(CONFIG_PREEMPT_TRACER))
- 
- static inline unsigned long get_parent_ip(unsigned long addr)
+ unsigned long get_parent_ip(unsigned long addr)
   {
         if (in_lock_functions(addr)) {
                 addr = CALLER_ADDR2;
@@@ -4794,6 -4414,9 +4791,9 @@@
         return addr;
   }
   
+ #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
+                               defined(CONFIG_PREEMPT_TRACER))
+ 
   void __kprobes add_preempt_count(int val)
   {
   #ifdef CONFIG_DEBUG_PREEMPT
@@@ -4885,33 -4508,11 +4885,33 @@@ static inline void schedule_debug(struc
   #endif
   }
   
+ +static void put_prev_task(struct rq *rq, struct task_struct *prev)
+ +{
+ +      if (prev->state == TASK_RUNNING) {
+ +              u64 runtime = prev->se.sum_exec_runtime;
+ +
+ +              runtime -= prev->se.prev_sum_exec_runtime;
+ +              runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost);
+ +
+ +              /*
+ +               * In order to avoid avg_overlap growing stale when we are
+ +               * indeed overlapping and hence not getting put to sleep, grow
+ +               * the avg_overlap on preemption.
+ +               *
+ +               * We use the average preemption runtime because that
+ +               * correlates to the amount of cache footprint a task can
+ +               * build up.
+ +               */
+ +              update_avg(&prev->se.avg_overlap, runtime);
+ +      }
+ +      prev->sched_class->put_prev_task(rq, prev);
+ +}
+ +
   /*
    * Pick up the highest-prio task:
    */
   static inline struct task_struct *
- -pick_next_task(struct rq *rq, struct task_struct *prev)
+ +pick_next_task(struct rq *rq)
   {
         const struct sched_class *class;
         struct task_struct *p;
@@@ -4983,8 -4584,8 +4983,8 @@@ need_resched_nonpreemptible
         if (unlikely(!rq->nr_running))
                 idle_balance(cpu, rq);
   
- -      prev->sched_class->put_prev_task(rq, prev);
- -      next = pick_next_task(rq, prev);
+ +      put_prev_task(rq, prev);
+ +      next = pick_next_task(rq);
   
         if (likely(prev != next)) {
                 sched_info_switch(prev, next);
@@@ -5106,7 -4707,7 +5106,7 @@@ asmlinkage void __sched preempt_schedul
                  * between schedule and now.
                  */
                 barrier();
- -      } while (unlikely(test_thread_flag(TIF_NEED_RESCHED)));
+ +      } while (need_resched());
   }
   EXPORT_SYMBOL(preempt_schedule);
   
@@@ -5135,7 -4736,7 +5135,7 @@@ asmlinkage void __sched preempt_schedul
                  * between schedule and now.
                  */
                 barrier();
- -      } while (unlikely(test_thread_flag(TIF_NEED_RESCHED)));
+ +      } while (need_resched());
   }
   
   #endif /* CONFIG_PREEMPT */
@@@ -5196,17 -4797,11 +5196,17 @@@ void __wake_up_locked(wait_queue_head_
         __wake_up_common(q, mode, 1, 0, NULL);
   }
   
+ +void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
+ +{
+ +      __wake_up_common(q, mode, 1, 0, key);
+ +}
+ +
   /**
- - * __wake_up_sync - wake up threads blocked on a waitqueue.
+ + * __wake_up_sync_key - wake up threads blocked on a waitqueue.
    * @q: the waitqueue
    * @mode: which threads
    * @nr_exclusive: how many wake-one or wake-many threads to wake up
+ + * @key: opaque value to be passed to wakeup targets
    *
    * The sync wakeup differs that the waker knows that it will schedule
    * away soon, so while the target thread will be woken up, it will not
@@@ -5215,8 -4810,8 +5215,8 @@@
    *
    * On UP it can prevent extra preemption.
    */
- -void
- -__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
+ +void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode,
+ +                      int nr_exclusive, void *key)
   {
         unsigned long flags;
         int sync = 1;
@@@ -5228,18 -4823,9 +5228,18 @@@
                 sync = 0;
   
         spin_lock_irqsave(&q->lock, flags);
- -      __wake_up_common(q, mode, nr_exclusive, sync, NULL);
+ +      __wake_up_common(q, mode, nr_exclusive, sync, key);
         spin_unlock_irqrestore(&q->lock, flags);
   }
+ +EXPORT_SYMBOL_GPL(__wake_up_sync_key);
+ +
+ +/*
+ + * __wake_up_sync - see __wake_up_sync_key()
+ + */
+ +void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
+ +{
+ +      __wake_up_sync_key(q, mode, nr_exclusive, NULL);
+ +}
   EXPORT_SYMBOL_GPL(__wake_up_sync);    /* For internal use only */
   
   /**
@@@ -5624,7 -5210,7 +5624,7 @@@ SYSCALL_DEFINE1(nice, int, increment
         if (increment > 40)
                 increment = 40;
   
- -      nice = PRIO_TO_NICE(current->static_prio) + increment;
+ +      nice = TASK_NICE(current) + increment;
         if (nice < -20)
                 nice = -20;
         if (nice > 19)
@@@ -6897,7 -6483,7 +6897,7 @@@ static void migrate_dead_tasks(unsigne
                 if (!rq->nr_running)
                         break;
                 update_rq_clock(rq);
- -              next = pick_next_task(rq, rq->curr);
+ +              next = pick_next_task(rq);
                 if (!next)
                         break;
                 next->sched_class->put_prev_task(rq, next);
@@@ -8692,15 -8278,11 +8692,15 @@@ static void init_rt_rq(struct rt_rq *rt
         __set_bit(MAX_RT_PRIO, array->bitmap);
   
   #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
- -      rt_rq->highest_prio = MAX_RT_PRIO;
+ +      rt_rq->highest_prio.curr = MAX_RT_PRIO;
+ +#ifdef CONFIG_SMP
+ +      rt_rq->highest_prio.next = MAX_RT_PRIO;
+ +#endif
   #endif
   #ifdef CONFIG_SMP
         rt_rq->rt_nr_migratory = 0;
         rt_rq->overloaded = 0;
+ +      plist_head_init(&rq->rt.pushable_tasks, &rq->lock);
   #endif
   
         rt_rq->rt_time = 0;
@@@ -10076,7 -9658,7 +10076,7 @@@ static void cpuacct_charge(struct task_
         struct cpuacct *ca;
         int cpu;
   
- -      if (!cpuacct_subsys.active)
+ +      if (unlikely(!cpuacct_subsys.active))
                 return;
   
         cpu = task_cpu(tsk);
diff --combined kernel/sched_clock.c

index 390f33234bd007ea41da7006f252e715b6293b43,7ec82c1c61c5f13b3f031b09f5b71edb7b7b8fcd..819f17ac796efeeee1068efe3e6a27ac952d3a16
--- 1/kernel/sched_clock.c
--- 2/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@@ -25,6 -25,7 +25,7 @@@
    * consistent between cpus (never more than 2 jiffies difference).
    */
   #include <linux/spinlock.h>
+ #include <linux/hardirq.h>
   #include <linux/module.h>
   #include <linux/percpu.h>
   #include <linux/ktime.h>
@@@ -44,6 -45,9 +45,6 @@@ static __read_mostly int sched_clock_ru
   
   #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
   __read_mostly int sched_clock_stable;
- -#else
- -static const int sched_clock_stable = 1;
- -#endif
   
   struct sched_clock_data {
         /*
@@@ -112,9 -116,14 +113,9 @@@ static u64 __update_sched_clock(struct 
         s64 delta = now - scd->tick_raw;
         u64 clock, min_clock, max_clock;
   
- -      WARN_ON_ONCE(!irqs_disabled());
- -
         if (unlikely(delta < 0))
                 delta = 0;
   
- -      if (unlikely(!sched_clock_running))
- -              return 0ull;
- -
         /*
          * scd->clock = clamp(scd->tick_gtod + delta,
          *                    max(scd->tick_gtod, scd->clock),
@@@ -154,6 -163,17 +155,17 @@@ u64 sched_clock_cpu(int cpu
                 return sched_clock();
   
         scd = cpu_sdc(cpu);
+ 
+       /*
+        * Normally this is not called in NMI context - but if it is,
+        * trying to do any locking here is totally lethal.
+        */
+       if (unlikely(in_nmi()))
+               return scd->clock;
+ 
+       if (unlikely(!sched_clock_running))
+               return 0ull;
+ 
         WARN_ON_ONCE(!irqs_disabled());
         now = sched_clock();
   
@@@ -193,20 -213,18 +205,20 @@@
         return clock;
   }
   
- -#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
- -
   void sched_clock_tick(void)
   {
- -      struct sched_clock_data *scd = this_scd();
+ +      struct sched_clock_data *scd;
         u64 now, now_gtod;
   
+ +      if (sched_clock_stable)
+ +              return;
+ +
         if (unlikely(!sched_clock_running))
                 return;
   
         WARN_ON_ONCE(!irqs_disabled());
   
+ +      scd = this_scd();
         now_gtod = ktime_to_ns(ktime_get());
         now = sched_clock();
   
@@@ -239,21 -257,6 +251,21 @@@ void sched_clock_idle_wakeup_event(u64 
   }
   EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
   
+ +#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
+ +
+ +void sched_clock_init(void)
+ +{
+ +      sched_clock_running = 1;
+ +}
+ +
+ +u64 sched_clock_cpu(int cpu)
+ +{
+ +      if (unlikely(!sched_clock_running))
+ +              return 0;
+ +
+ +      return sched_clock();
+ +}
+ +
   #endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
   
   unsigned long long cpu_clock(int cpu)
diff --combined kernel/workqueue.c

index 9aedd9fd825b7c332a4a2f53124b57040b013acd,e53ee18ef43199205ca3bffebc30614ff49fedb8..3003ecad08f4fcfc48a0d36952777647ef3e387c
--- 1/kernel/workqueue.c
--- 2/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@@ -33,6 -33,7 +33,7 @@@
   #include <linux/kallsyms.h>
   #include <linux/debug_locks.h>
   #include <linux/lockdep.h>
+ #include <trace/workqueue.h>
   
   /*
    * The per-CPU workqueue (if single thread, we always use the first
@@@ -125,9 -126,13 +126,13 @@@ struct cpu_workqueue_struct *get_wq_dat
         return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK);
   }
   
+ DEFINE_TRACE(workqueue_insertion);
+ 
   static void insert_work(struct cpu_workqueue_struct *cwq,
                         struct work_struct *work, struct list_head *head)
   {
+       trace_workqueue_insertion(cwq->thread, work);
+ 
         set_wq_data(work, cwq);
         /*
          * Ensure that we get the right work->data if we see the
@@@ -259,6 -264,8 +264,8 @@@ int queue_delayed_work_on(int cpu, stru
   }
   EXPORT_SYMBOL_GPL(queue_delayed_work_on);
   
+ DEFINE_TRACE(workqueue_execution);
+ 
   static void run_workqueue(struct cpu_workqueue_struct *cwq)
   {
         spin_lock_irq(&cwq->lock);
@@@ -284,7 -291,7 +291,7 @@@
                  */
                 struct lockdep_map lockdep_map = work->lockdep_map;
   #endif
- 
+               trace_workqueue_execution(cwq->thread, work);
                 cwq->current_work = work;
                 list_del_init(cwq->worklist.next);
                 spin_unlock_irq(&cwq->lock);
@@@ -416,7 -423,7 +423,7 @@@ void flush_workqueue(struct workqueue_s
         might_sleep();
         lock_map_acquire(&wq->lockdep_map);
         lock_map_release(&wq->lockdep_map);
- -      for_each_cpu_mask_nr(cpu, *cpu_map)
+ +      for_each_cpu(cpu, cpu_map)
                 flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
   }
   EXPORT_SYMBOL_GPL(flush_workqueue);
@@@ -547,7 -554,7 +554,7 @@@ static void wait_on_work(struct work_st
         wq = cwq->wq;
         cpu_map = wq_cpu_map(wq);
   
- -      for_each_cpu_mask_nr(cpu, *cpu_map)
+ +      for_each_cpu(cpu, cpu_map)
                 wait_on_cpu_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
   }
   
@@@ -765,6 -772,8 +772,8 @@@ init_cpu_workqueue(struct workqueue_str
         return cwq;
   }
   
+ DEFINE_TRACE(workqueue_creation);
+ 
   static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
   {
         struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
@@@ -787,6 -796,8 +796,8 @@@
                 sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
         cwq->thread = p;
   
+       trace_workqueue_creation(cwq->thread, cpu);
+ 
         return 0;
   }
   
@@@ -868,6 -879,8 +879,8 @@@ struct workqueue_struct *__create_workq
   }
   EXPORT_SYMBOL_GPL(__create_workqueue_key);
   
+ DEFINE_TRACE(workqueue_destruction);
+ 
   static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
   {
         /*
@@@ -891,6 -904,7 +904,7 @@@
          * checks list_empty(), and a "normal" queue_work() can't use
          * a dead CPU.
          */
+       trace_workqueue_destruction(cwq->thread);
         kthread_stop(cwq->thread);
         cwq->thread = NULL;
   }
@@@ -911,7 -925,7 +925,7 @@@ void destroy_workqueue(struct workqueue
         list_del(&wq->list);
         spin_unlock(&workqueue_lock);
   
- -      for_each_cpu_mask_nr(cpu, *cpu_map)
+ +      for_each_cpu(cpu, cpu_map)
                 cleanup_workqueue_thread(per_cpu_ptr(wq->cpu_wq, cpu));
         cpu_maps_update_done();
   
diff --combined lib/Kconfig

index 2a9c69f3448216e8ce3f6a0f5488a947dfa55060,206f36a9efb4a4d0f998068263d6f839c5422832..8ade0a7a91e09ae11e4921338d5363ee74803d05
--- 1/lib/Kconfig
--- 2/lib/Kconfig
+++ b/lib/Kconfig
@@@ -2,6 -2,9 +2,9 @@@
   # Library configuration
   #
   
+ config BINARY_PRINTF
+       def_bool n
+ 
   menu "Library routines"
   
   config BITREVERSE
@@@ -150,6 -153,12 +153,6 @@@ config TEXTSEARCH_B
   config TEXTSEARCH_FSM
         tristate
   
- -#
- -# plist support is select#ed if needed
- -#
- -config PLIST
- -      boolean
- -
   config HAS_IOMEM
         boolean
         depends on !NO_IOMEM
@@@ -182,10 -191,4 +185,10 @@@ config DISABLE_OBSOLETE_CPUMASK_FUNCTIO
          bool "Disable obsolete cpumask functions" if DEBUG_PER_CPU_MAPS
          depends on EXPERIMENTAL && BROKEN
   
+ +#
+ +# Netlink attribute parsing support is select'ed if needed
+ +#
+ +config NLATTR
+ +      bool
+ +
   endmenu
diff --combined mm/slob.c

index 7a3411524dacd555e9ce70e287a4232072391e10,596152926a8dc859bfb4e554a4db1e1e3fb64f8e..4dd6516447f2258ac44c1a2baa024c6be38194d2
--- 1/mm/slob.c
--- 2/mm/slob.c
+++ b/mm/slob.c
@@@ -65,6 -65,7 +65,7 @@@
   #include <linux/module.h>
   #include <linux/rcupdate.h>
   #include <linux/list.h>
+ #include <trace/kmemtrace.h>
   #include <asm/atomic.h>
   
   /*
@@@ -126,9 -127,9 +127,9 @@@ static LIST_HEAD(free_slob_medium)
   static LIST_HEAD(free_slob_large);
   
   /*
- - * slob_page: True for all slob pages (false for bigblock pages)
+ + * is_slob_page: True for all slob pages (false for bigblock pages)
    */
- -static inline int slob_page(struct slob_page *sp)
+ +static inline int is_slob_page(struct slob_page *sp)
   {
         return PageSlobPage((struct page *)sp);
   }
@@@ -143,11 -144,6 +144,11 @@@ static inline void clear_slob_page(stru
         __ClearPageSlobPage((struct page *)sp);
   }
   
+ +static inline struct slob_page *slob_page(const void *addr)
+ +{
+ +      return (struct slob_page *)virt_to_page(addr);
+ +}
+ +
   /*
    * slob_page_free: true for pages on free_slob_pages list.
    */
@@@ -235,7 -231,7 +236,7 @@@ static int slob_last(slob_t *s
         return !((unsigned long)slob_next(s) & ~PAGE_MASK);
   }
   
- -static void *slob_new_page(gfp_t gfp, int order, int node)
+ +static void *slob_new_pages(gfp_t gfp, int order, int node)
   {
         void *page;
   
@@@ -252,17 -248,12 +253,17 @@@
         return page_address(page);
   }
   
+ +static void slob_free_pages(void *b, int order)
+ +{
+ +      free_pages((unsigned long)b, order);
+ +}
+ +
   /*
    * Allocate a slob block within a given slob_page sp.
    */
   static void *slob_page_alloc(struct slob_page *sp, size_t size, int align)
   {
- -      slob_t *prev, *cur, *aligned = 0;
+ +      slob_t *prev, *cur, *aligned = NULL;
         int delta = 0, units = SLOB_UNITS(size);
   
         for (prev = NULL, cur = sp->free; ; prev = cur, cur = slob_next(cur)) {
@@@ -359,10 -350,10 +360,10 @@@ static void *slob_alloc(size_t size, gf
   
         /* Not enough space: must allocate a new page */
         if (!b) {
- -              b = slob_new_page(gfp & ~__GFP_ZERO, 0, node);
+ +              b = slob_new_pages(gfp & ~__GFP_ZERO, 0, node);
                 if (!b)
- -                      return 0;
- -              sp = (struct slob_page *)virt_to_page(b);
+ +                      return NULL;
+ +              sp = slob_page(b);
                 set_slob_page(sp);
   
                 spin_lock_irqsave(&slob_lock, flags);
@@@ -394,7 -385,7 +395,7 @@@ static void slob_free(void *block, int 
                 return;
         BUG_ON(!size);
   
- -      sp = (struct slob_page *)virt_to_page(block);
+ +      sp = slob_page(block);
         units = SLOB_UNITS(size);
   
         spin_lock_irqsave(&slob_lock, flags);
@@@ -403,11 -394,10 +404,11 @@@
                 /* Go directly to page allocator. Do not pass slob allocator */
                 if (slob_page_free(sp))
                         clear_slob_page_free(sp);
+ +              spin_unlock_irqrestore(&slob_lock, flags);
                 clear_slob_page(sp);
                 free_slob_page(sp);
                 free_page((unsigned long)b);
- -              goto out;
+ +              return;
         }
   
         if (!slob_page_free(sp)) {
@@@ -474,29 -464,40 +475,40 @@@ void *__kmalloc_node(size_t size, gfp_
   {
         unsigned int *m;
         int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
+       void *ret;
   
- -      lockdep_trace_alloc(flags);
+ +      lockdep_trace_alloc(gfp);
   
         if (size < PAGE_SIZE - align) {
                 if (!size)
                         return ZERO_SIZE_PTR;
   
                 m = slob_alloc(size + align, gfp, align, node);
+ 
                 if (!m)
                         return NULL;
                 *m = size;
-               return (void *)m + align;
+               ret = (void *)m + align;
+ 
+               kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
+                                         _RET_IP_, ret,
+                                         size, size + align, gfp, node);
         } else {
-               void *ret;
+               unsigned int order = get_order(size);
   
- -              ret = slob_new_page(gfp | __GFP_COMP, order, node);
+ +              ret = slob_new_pages(gfp | __GFP_COMP, get_order(size), node);
                 if (ret) {
                         struct page *page;
                         page = virt_to_page(ret);
                         page->private = size;
                 }
-               return ret;
+ 
+               kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
+                                         _RET_IP_, ret,
+                                         size, PAGE_SIZE << order, gfp, node);
         }
+ 
+       return ret;
   }
   EXPORT_SYMBOL(__kmalloc_node);
   
@@@ -507,13 -508,15 +519,15 @@@ void kfree(const void *block
         if (unlikely(ZERO_OR_NULL_PTR(block)))
                 return;
   
- -      sp = (struct slob_page *)virt_to_page(block);
- -      if (slob_page(sp)) {
+ +      sp = slob_page(block);
+ +      if (is_slob_page(sp)) {
                 int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
                 unsigned int *m = (unsigned int *)(block - align);
                 slob_free(m, *m + align);
         } else
                 put_page(&sp->page);
+ 
+       kmemtrace_mark_free(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, block);
   }
   EXPORT_SYMBOL(kfree);
   
@@@ -526,8 -529,8 +540,8 @@@ size_t ksize(const void *block
         if (unlikely(block == ZERO_SIZE_PTR))
                 return 0;
   
- -      sp = (struct slob_page *)virt_to_page(block);
- -      if (slob_page(sp)) {
+ +      sp = slob_page(block);
+ +      if (is_slob_page(sp)) {
                 int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
                 unsigned int *m = (unsigned int *)(block - align);
                 return SLOB_UNITS(*m) * SLOB_UNIT;
@@@ -583,10 -586,19 +597,19 @@@ void *kmem_cache_alloc_node(struct kmem
   {
         void *b;
   
-       if (c->size < PAGE_SIZE)
+       if (c->size < PAGE_SIZE) {
                 b = slob_alloc(c->size, flags, c->align, node);
-       else
+               kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE,
+                                         _RET_IP_, b, c->size,
+                                         SLOB_UNITS(c->size) * SLOB_UNIT,
+                                         flags, node);
+       } else {
- -              b = slob_new_page(flags, get_order(c->size), node);
+ +              b = slob_new_pages(flags, get_order(c->size), node);
+               kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE,
+                                         _RET_IP_, b, c->size,
+                                         PAGE_SIZE << get_order(c->size),
+                                         flags, node);
+       }
   
         if (c->ctor)
                 c->ctor(b);
@@@ -600,7 -612,7 +623,7 @@@ static void __kmem_cache_free(void *b, 
         if (size < PAGE_SIZE)
                 slob_free(b, size);
         else
- -              free_pages((unsigned long)b, get_order(size));
+ +              slob_free_pages(b, get_order(size));
   }
   
   static void kmem_rcu_free(struct rcu_head *head)
@@@ -622,6 -634,8 +645,8 @@@ void kmem_cache_free(struct kmem_cache 
         } else {
                 __kmem_cache_free(b, c->size);
         }
+ 
+       kmemtrace_mark_free(KMEMTRACE_TYPE_CACHE, _RET_IP_, b);
   }
   EXPORT_SYMBOL(kmem_cache_free);
   
diff --combined mm/slub.c

index c4ea9158c9fbd0e4630062aa0098406000be45c3,816734ed8aa3c9c20666676b2cf25fa86d3eb547..7aaa121d0ea9eda5cd2acddbf3707de9536da241
--- 1/mm/slub.c
--- 2/mm/slub.c
+++ b/mm/slub.c
@@@ -16,6 -16,7 +16,7 @@@
   #include <linux/slab.h>
   #include <linux/proc_fs.h>
   #include <linux/seq_file.h>
+ #include <trace/kmemtrace.h>
   #include <linux/cpu.h>
   #include <linux/cpuset.h>
   #include <linux/mempolicy.h>
@@@ -374,8 -375,14 +375,8 @@@ static struct track *get_track(struct k
   static void set_track(struct kmem_cache *s, void *object,
                         enum track_item alloc, unsigned long addr)
   {
- -      struct track *p;
- -
- -      if (s->offset)
- -              p = object + s->offset + sizeof(void *);
- -      else
- -              p = object + s->inuse;
+ +      struct track *p = get_track(s, object, alloc);
   
- -      p += alloc;
         if (addr) {
                 p->addr = addr;
                 p->cpu = smp_processor_id();
@@@ -1329,7 -1336,7 +1330,7 @@@ static struct page *get_any_partial(str
                 n = get_node(s, zone_to_nid(zone));
   
                 if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
- -                              n->nr_partial > n->min_partial) {
+ +                              n->nr_partial > s->min_partial) {
                         page = get_partial_node(n);
                         if (page)
                                 return page;
@@@ -1381,7 -1388,7 +1382,7 @@@ static void unfreeze_slab(struct kmem_c
                 slab_unlock(page);
         } else {
                 stat(c, DEACTIVATE_EMPTY);
- -              if (n->nr_partial < n->min_partial) {
+ +              if (n->nr_partial < s->min_partial) {
                         /*
                          * Adding an empty slab to the partial slabs in order
                          * to avoid page allocator overhead. This slab needs
@@@ -1618,18 -1625,46 +1619,46 @@@ static __always_inline void *slab_alloc
   
   void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
   {
-       return slab_alloc(s, gfpflags, -1, _RET_IP_);
+       void *ret = slab_alloc(s, gfpflags, -1, _RET_IP_);
+ 
+       kmemtrace_mark_alloc(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret,
+                            s->objsize, s->size, gfpflags);
+ 
+       return ret;
   }
   EXPORT_SYMBOL(kmem_cache_alloc);
   
+ #ifdef CONFIG_KMEMTRACE
+ void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags)
+ {
+       return slab_alloc(s, gfpflags, -1, _RET_IP_);
+ }
+ EXPORT_SYMBOL(kmem_cache_alloc_notrace);
+ #endif
+ 
   #ifdef CONFIG_NUMA
   void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
   {
-       return slab_alloc(s, gfpflags, node, _RET_IP_);
+       void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
+ 
+       kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret,
+                                 s->objsize, s->size, gfpflags, node);
+ 
+       return ret;
   }
   EXPORT_SYMBOL(kmem_cache_alloc_node);
   #endif
   
+ #ifdef CONFIG_KMEMTRACE
+ void *kmem_cache_alloc_node_notrace(struct kmem_cache *s,
+                                   gfp_t gfpflags,
+                                   int node)
+ {
+       return slab_alloc(s, gfpflags, node, _RET_IP_);
+ }
+ EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);
+ #endif
+ 
   /*
    * Slow patch handling. This may still be called frequently since objects
    * have a longer lifetime than the cpu slabs in most processing loads.
@@@ -1719,7 -1754,7 +1748,7 @@@ static __always_inline void slab_free(s
         c = get_cpu_slab(s, smp_processor_id());
         debug_check_no_locks_freed(object, c->objsize);
         if (!(s->flags & SLAB_DEBUG_OBJECTS))
- -              debug_check_no_obj_freed(object, s->objsize);
+ +              debug_check_no_obj_freed(object, c->objsize);
         if (likely(page == c->page && c->node >= 0)) {
                 object[c->offset] = c->freelist;
                 c->freelist = object;
@@@ -1737,6 -1772,8 +1766,8 @@@ void kmem_cache_free(struct kmem_cache 
         page = virt_to_head_page(x);
   
         slab_free(s, page, x, _RET_IP_);
+ 
+       kmemtrace_mark_free(KMEMTRACE_TYPE_CACHE, _RET_IP_, x);
   }
   EXPORT_SYMBOL(kmem_cache_free);
   
@@@ -1839,7 -1876,6 +1870,7 @@@ static inline int calculate_order(int s
         int order;
         int min_objects;
         int fraction;
+ +      int max_objects;
   
         /*
          * Attempt to find best configuration for a slab. This
@@@ -1852,9 -1888,6 +1883,9 @@@
         min_objects = slub_min_objects;
         if (!min_objects)
                 min_objects = 4 * (fls(nr_cpu_ids) + 1);
+ +      max_objects = (PAGE_SIZE << slub_max_order)/size;
+ +      min_objects = min(min_objects, max_objects);
+ +
         while (min_objects > 1) {
                 fraction = 16;
                 while (fraction >= 4) {
@@@ -1864,7 -1897,7 +1895,7 @@@
                                 return order;
                         fraction /= 2;
                 }
- -              min_objects /= 2;
+ +              min_objects --;
         }
   
         /*
@@@ -1927,6 -1960,17 +1958,6 @@@ static voi
   init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s)
   {
         n->nr_partial = 0;
- -
- -      /*
- -       * The larger the object size is, the more pages we want on the partial
- -       * list to avoid pounding the page allocator excessively.
- -       */
- -      n->min_partial = ilog2(s->size);
- -      if (n->min_partial < MIN_PARTIAL)
- -              n->min_partial = MIN_PARTIAL;
- -      else if (n->min_partial > MAX_PARTIAL)
- -              n->min_partial = MAX_PARTIAL;
- -
         spin_lock_init(&n->list_lock);
         INIT_LIST_HEAD(&n->partial);
   #ifdef CONFIG_SLUB_DEBUG
@@@ -2169,15 -2213,6 +2200,15 @@@ static int init_kmem_cache_nodes(struc
   }
   #endif
   
+ +static void set_min_partial(struct kmem_cache *s, unsigned long min)
+ +{
+ +      if (min < MIN_PARTIAL)
+ +              min = MIN_PARTIAL;
+ +      else if (min > MAX_PARTIAL)
+ +              min = MAX_PARTIAL;
+ +      s->min_partial = min;
+ +}
+ +
   /*
    * calculate_sizes() determines the order and the distribution of data within
    * a slab object.
@@@ -2316,11 -2351,6 +2347,11 @@@ static int kmem_cache_open(struct kmem_
         if (!calculate_sizes(s, -1))
                 goto error;
   
+ +      /*
+ +       * The larger the object size is, the more pages we want on the partial
+ +       * list to avoid pounding the page allocator excessively.
+ +       */
+ +      set_min_partial(s, ilog2(s->size));
         s->refcount = 1;
   #ifdef CONFIG_NUMA
         s->remote_node_defrag_ratio = 1000;
@@@ -2659,6 -2689,7 +2690,7 @@@ static struct kmem_cache *get_slab(size
   void *__kmalloc(size_t size, gfp_t flags)
   {
         struct kmem_cache *s;
+       void *ret;
   
         if (unlikely(size > SLUB_MAX_SIZE))
                 return kmalloc_large(size, flags);
@@@ -2668,7 -2699,12 +2700,12 @@@
         if (unlikely(ZERO_OR_NULL_PTR(s)))
                 return s;
   
-       return slab_alloc(s, flags, -1, _RET_IP_);
+       ret = slab_alloc(s, flags, -1, _RET_IP_);
+ 
+       kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, ret,
+                            size, s->size, flags);
+ 
+       return ret;
   }
   EXPORT_SYMBOL(__kmalloc);
   
@@@ -2687,16 -2723,30 +2724,30 @@@ static void *kmalloc_large_node(size_t 
   void *__kmalloc_node(size_t size, gfp_t flags, int node)
   {
         struct kmem_cache *s;
+       void *ret;
   
-       if (unlikely(size > SLUB_MAX_SIZE))
-               return kmalloc_large_node(size, flags, node);
+       if (unlikely(size > SLUB_MAX_SIZE)) {
+               ret = kmalloc_large_node(size, flags, node);
+ 
+               kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
+                                         _RET_IP_, ret,
+                                         size, PAGE_SIZE << get_order(size),
+                                         flags, node);
+ 
+               return ret;
+       }
   
         s = get_slab(size, flags);
   
         if (unlikely(ZERO_OR_NULL_PTR(s)))
                 return s;
   
-       return slab_alloc(s, flags, node, _RET_IP_);
+       ret = slab_alloc(s, flags, node, _RET_IP_);
+ 
+       kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, ret,
+                                 size, s->size, flags, node);
+ 
+       return ret;
   }
   EXPORT_SYMBOL(__kmalloc_node);
   #endif
@@@ -2755,6 -2805,8 +2806,8 @@@ void kfree(const void *x
                 return;
         }
         slab_free(page->slab, page, object, _RET_IP_);
+ 
+       kmemtrace_mark_free(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, x);
   }
   EXPORT_SYMBOL(kfree);
   
@@@ -3224,6 -3276,7 +3277,7 @@@ static struct notifier_block __cpuinitd
   void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
   {
         struct kmem_cache *s;
+       void *ret;
   
         if (unlikely(size > SLUB_MAX_SIZE))
                 return kmalloc_large(size, gfpflags);
@@@ -3233,13 -3286,20 +3287,20 @@@
         if (unlikely(ZERO_OR_NULL_PTR(s)))
                 return s;
   
-       return slab_alloc(s, gfpflags, -1, caller);
+       ret = slab_alloc(s, gfpflags, -1, caller);
+ 
+       /* Honor the call site pointer we recieved. */
+       kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, caller, ret, size,
+                            s->size, gfpflags);
+ 
+       return ret;
   }
   
   void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
                                         int node, unsigned long caller)
   {
         struct kmem_cache *s;
+       void *ret;
   
         if (unlikely(size > SLUB_MAX_SIZE))
                 return kmalloc_large_node(size, gfpflags, node);
@@@ -3249,7 -3309,13 +3310,13 @@@
         if (unlikely(ZERO_OR_NULL_PTR(s)))
                 return s;
   
-       return slab_alloc(s, gfpflags, node, caller);
+       ret = slab_alloc(s, gfpflags, node, caller);
+ 
+       /* Honor the call site pointer we recieved. */
+       kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, caller, ret,
+                                 size, s->size, gfpflags, node);
+ 
+       return ret;
   }
   
   #ifdef CONFIG_SLUB_DEBUG
@@@ -3838,26 -3904,6 +3905,26 @@@ static ssize_t order_show(struct kmem_c
   }
   SLAB_ATTR(order);
   
+ +static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
+ +{
+ +      return sprintf(buf, "%lu\n", s->min_partial);
+ +}
+ +
+ +static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
+ +                               size_t length)
+ +{
+ +      unsigned long min;
+ +      int err;
+ +
+ +      err = strict_strtoul(buf, 10, &min);
+ +      if (err)
+ +              return err;
+ +
+ +      set_min_partial(s, min);
+ +      return length;
+ +}
+ +SLAB_ATTR(min_partial);
+ +
   static ssize_t ctor_show(struct kmem_cache *s, char *buf)
   {
         if (s->ctor) {
@@@ -4173,7 -4219,6 +4240,7 @@@ static struct attribute *slab_attrs[] 
         &object_size_attr.attr,
         &objs_per_slab_attr.attr,
         &order_attr.attr,
+ +      &min_partial_attr.attr,
         &objects_attr.attr,
         &objects_partial_attr.attr,
         &total_objects_attr.attr,
author	Ingo Molnar <mingo@elte.hu>
	Wed, 1 Apr 2009 19:54:19 +0000 (21:54 +0200)
committer	Ingo Molnar <mingo@elte.hu>
	Wed, 1 Apr 2009 22:49:02 +0000 (00:49 +0200)
		1	2
Documentation/kernel-parameters.txt	patch \|	diff1 \|	diff2 \|	blob \| history
Documentation/sysrq.txt	patch \|	diff1 \|	diff2 \|	blob \| history
MAINTAINERS	patch \|	diff1 \|	diff2 \|	blob \| history
arch/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
arch/ia64/kernel/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/cacheflush.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/kprobes.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/process.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/ptrace.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/char/sysrq.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/partitions/check.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/asm-generic/vmlinux.lds.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/interrupt.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/kernel.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/sched.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/slub_def.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/string.h	patch \|	diff1 \|	diff2 \|	blob \| history
init/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
init/main.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/irq/handle.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/lockdep.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/module.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/relay.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched_clock.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/workqueue.c	patch \|	diff1 \|	diff2 \|	blob \| history
lib/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
mm/slob.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/slub.c	patch \|	diff1 \|	diff2 \|	blob \| history