static cpumask_var_t __read_mostly     tracing_buffer_mask;
 
+/* Define which cpu buffers are currently read in trace_pipe */
+static cpumask_var_t                   tracing_reader_cpumask;
+
 #define for_each_tracing_cpu(cpu)      \
        for_each_cpu(cpu, tracing_buffer_mask)
 
 {
        struct ring_buffer *buffer = iter->tr->buffer;
        struct trace_entry *ent, *next = NULL;
+       int cpu_file = iter->cpu_file;
        u64 next_ts = 0, ts;
        int next_cpu = -1;
        int cpu;
 
+       /*
+        * If we are in a per_cpu trace file, don't bother by iterating over
+        * all cpu and peek directly.
+        */
+       if (cpu_file > TRACE_PIPE_ALL_CPU) {
+               if (ring_buffer_empty_cpu(buffer, cpu_file))
+                       return NULL;
+               ent = peek_next_entry(iter, cpu_file, ent_ts);
+               if (ent_cpu)
+                       *ent_cpu = cpu_file;
+
+               return ent;
+       }
+
        for_each_tracing_cpu(cpu) {
 
                if (ring_buffer_empty_cpu(buffer, cpu))
 static void *s_start(struct seq_file *m, loff_t *pos)
 {
        struct trace_iterator *iter = m->private;
+       int cpu_file = iter->cpu_file;
        void *p = NULL;
        loff_t l = 0;
        int cpu;
 
                ftrace_disable_cpu();
 
-               for_each_tracing_cpu(cpu) {
-                       ring_buffer_iter_reset(iter->buffer_iter[cpu]);
-               }
+               if (cpu_file == TRACE_PIPE_ALL_CPU) {
+                       for_each_tracing_cpu(cpu)
+                               ring_buffer_iter_reset(iter->buffer_iter[cpu]);
+               } else
+                       ring_buffer_iter_reset(iter->buffer_iter[cpu_file]);
+
 
                ftrace_enable_cpu();
 
 static struct trace_iterator *
 __tracing_open(struct inode *inode, struct file *file, int *ret)
 {
+       long cpu_file = (long) inode->i_private;
        struct trace_iterator *iter;
        struct seq_file *m;
        int cpu;
        if (current_trace && current_trace->print_max)
                iter->tr = &max_tr;
        else
-               iter->tr = inode->i_private;
+               iter->tr = &global_trace;
        iter->trace = current_trace;
        iter->pos = -1;
+       iter->cpu_file = cpu_file;
 
        /* Notify the tracer early; before we stop tracing. */
        if (iter->trace && iter->trace->open)
        if (ring_buffer_overruns(iter->tr->buffer))
                iter->iter_flags |= TRACE_FILE_ANNOTATE;
 
+       if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
+               for_each_tracing_cpu(cpu) {
 
-       for_each_tracing_cpu(cpu) {
+                       iter->buffer_iter[cpu] =
+                               ring_buffer_read_start(iter->tr->buffer, cpu);
 
+                       if (!iter->buffer_iter[cpu])
+                               goto fail_buffer;
+               }
+       } else {
+               cpu = iter->cpu_file;
                iter->buffer_iter[cpu] =
-                       ring_buffer_read_start(iter->tr->buffer, cpu);
+                               ring_buffer_read_start(iter->tr->buffer, cpu);
 
                if (!iter->buffer_iter[cpu])
-                       goto fail_buffer;
+                       goto fail;
        }
 
        /* TODO stop tracer */
                if (iter->buffer_iter[cpu])
                        ring_buffer_read_finish(iter->buffer_iter[cpu]);
        }
+fail:
        mutex_unlock(&trace_types_lock);
        kfree(iter);
 
        return cnt;
 }
 
-static atomic_t tracing_reader;
-
 static int tracing_open_pipe(struct inode *inode, struct file *filp)
 {
+       long cpu_file = (long) inode->i_private;
        struct trace_iterator *iter;
+       int ret = 0;
 
        if (tracing_disabled)
                return -ENODEV;
 
-       /* We only allow for reader of the pipe */
-       if (atomic_inc_return(&tracing_reader) != 1) {
-               atomic_dec(&tracing_reader);
-               return -EBUSY;
+       mutex_lock(&trace_types_lock);
+
+       /* We only allow one reader per cpu */
+       if (cpu_file == TRACE_PIPE_ALL_CPU) {
+               if (!cpumask_empty(tracing_reader_cpumask)) {
+                       ret = -EBUSY;
+                       goto out;
+               }
+               cpumask_setall(tracing_reader_cpumask);
+       } else {
+               if (!cpumask_test_cpu(cpu_file, tracing_reader_cpumask))
+                       cpumask_set_cpu(cpu_file, tracing_reader_cpumask);
+               else {
+                       ret = -EBUSY;
+                       goto out;
+               }
        }
 
        /* create a buffer to store the information to pass to userspace */
        iter = kzalloc(sizeof(*iter), GFP_KERNEL);
-       if (!iter)
-               return -ENOMEM;
+       if (!iter) {
+               ret = -ENOMEM;
+               goto out;
+       }
 
        if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
                kfree(iter);
-               return -ENOMEM;
+               ret = -ENOMEM;
+               goto out;
        }
 
-       mutex_lock(&trace_types_lock);
-
        /* trace pipe does not show start of buffer */
        cpumask_setall(iter->started);
 
+       iter->cpu_file = cpu_file;
        iter->tr = &global_trace;
        iter->trace = current_trace;
        filp->private_data = iter;
 
        if (iter->trace->pipe_open)
                iter->trace->pipe_open(iter);
-       mutex_unlock(&trace_types_lock);
 
-       return 0;
+out:
+       mutex_unlock(&trace_types_lock);
+       return ret;
 }
 
 static int tracing_release_pipe(struct inode *inode, struct file *file)
 {
        struct trace_iterator *iter = file->private_data;
 
+       mutex_lock(&trace_types_lock);
+
+       if (iter->cpu_file == TRACE_PIPE_ALL_CPU)
+               cpumask_clear(tracing_reader_cpumask);
+       else
+               cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
+
+       mutex_unlock(&trace_types_lock);
+
        free_cpumask_var(iter->started);
        kfree(iter);
-       atomic_dec(&tracing_reader);
 
        return 0;
 }
        return d_tracer;
 }
 
+static struct dentry *d_percpu;
+
+struct dentry *tracing_dentry_percpu(void)
+{
+       static int once;
+       struct dentry *d_tracer;
+
+       if (d_percpu)
+               return d_percpu;
+
+       d_tracer = tracing_init_dentry();
+
+       if (!d_tracer)
+               return NULL;
+
+       d_percpu = debugfs_create_dir("per_cpu", d_tracer);
+
+       if (!d_percpu && !once) {
+               once = 1;
+               pr_warning("Could not create debugfs directory 'per_cpu'\n");
+               return NULL;
+       }
+
+       return d_percpu;
+}
+
+static void tracing_init_debugfs_percpu(long cpu)
+{
+       struct dentry *d_percpu = tracing_dentry_percpu();
+       struct dentry *entry;
+       /* strlen(trace_pipe) + MAX(log10(cpu)) + '\0' */
+       char filename[17];
+
+       if (cpu > 999 || cpu < 0)
+               return;
+
+       /* per cpu trace_pipe */
+       sprintf(filename, "trace_pipe%ld", cpu);
+
+       entry = debugfs_create_file(filename, 0444, d_percpu,
+                               (void *) cpu, &tracing_pipe_fops);
+       if (!entry)
+               pr_warning("Could not create debugfs '%s' entry\n", filename);
+
+       /* per cpu trace */
+       sprintf(filename, "trace%ld", cpu);
+
+       entry = debugfs_create_file(filename, 0444, d_percpu,
+                               (void *) cpu, &tracing_fops);
+       if (!entry)
+               pr_warning("Could not create debugfs '%s' entry\n", filename);
+}
+
 #ifdef CONFIG_FTRACE_SELFTEST
 /* Let selftest have access to static functions in this file */
 #include "trace_selftest.c"
 {
        struct dentry *d_tracer;
        struct dentry *entry;
+       int cpu;
 
        d_tracer = tracing_init_dentry();
 
                pr_warning("Could not create debugfs 'tracing_cpumask' entry\n");
 
        entry = debugfs_create_file("trace", 0444, d_tracer,
-                                   &global_trace, &tracing_fops);
+                                (void *) TRACE_PIPE_ALL_CPU, &tracing_fops);
        if (!entry)
                pr_warning("Could not create debugfs 'trace' entry\n");
 
        if (!entry)
                pr_warning("Could not create debugfs 'README' entry\n");
 
-       entry = debugfs_create_file("trace_pipe", 0644, d_tracer,
-                                   NULL, &tracing_pipe_fops);
+       entry = debugfs_create_file("trace_pipe", 0444, d_tracer,
+                       (void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops);
        if (!entry)
                pr_warning("Could not create debugfs "
                           "'trace_pipe' entry\n");
 #ifdef CONFIG_SYSPROF_TRACER
        init_tracer_sysprof_debugfs(d_tracer);
 #endif
+
+       for_each_tracing_cpu(cpu)
+               tracing_init_debugfs_percpu(cpu);
+
        return 0;
 }
 
        if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
                goto out_free_buffer_mask;
 
+       if (!alloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL))
+               goto out_free_tracing_cpumask;
+
        cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
        cpumask_copy(tracing_cpumask, cpu_all_mask);
+       cpumask_clear(tracing_reader_cpumask);
 
        /* TODO: make the number of buffers hot pluggable with CPUS */
        global_trace.buffer = ring_buffer_alloc(trace_buf_size,
        ret = 0;
 
 out_free_cpumask:
+       free_cpumask_var(tracing_reader_cpumask);
+out_free_tracing_cpumask:
        free_cpumask_var(tracing_cpumask);
 out_free_buffer_mask:
        free_cpumask_var(tracing_buffer_mask);