preempt_disable_notrace();
/* shift to debug/test normalization and TIME_EXTENTS */
time = sched_clock() << DEBUG_SHIFT;
- preempt_enable_notrace();
+ preempt_enable_no_resched_notrace();
return time;
}
#define TS_MASK ((1ULL << TS_SHIFT) - 1)
#define TS_DELTA_TEST (~TS_MASK)
-/*
- * This hack stolen from mm/slob.c.
- * We can store per page timing information in the page frame of the page.
- * Thanks to Peter Zijlstra for suggesting this idea.
- */
-struct buffer_page {
+struct buffer_data_page {
u64 time_stamp; /* page time stamp */
- local_t write; /* index for next write */
local_t commit; /* write commited index */
+ unsigned char data[]; /* data of buffer page */
+};
+
+struct buffer_page {
+ local_t write; /* index for next write */
unsigned read; /* index for next read */
struct list_head list; /* list of free pages */
- void *page; /* Actual data page */
+ struct buffer_data_page *page; /* Actual data page */
};
+static void rb_init_page(struct buffer_data_page *bpage)
+{
+ local_set(&bpage->commit, 0);
+}
+
/*
* Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
* this issue out.
return 0;
}
-#define BUF_PAGE_SIZE PAGE_SIZE
+#define BUF_PAGE_SIZE (PAGE_SIZE - sizeof(struct buffer_data_page))
/*
* head_page == tail_page && head == tail then buffer is empty.
};
struct ring_buffer {
- unsigned long size;
unsigned pages;
unsigned flags;
int cpus;
static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
{
struct list_head *head = &cpu_buffer->pages;
- struct buffer_page *page, *tmp;
+ struct buffer_page *bpage, *tmp;
if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
return -1;
if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
return -1;
- list_for_each_entry_safe(page, tmp, head, list) {
+ list_for_each_entry_safe(bpage, tmp, head, list) {
if (RB_WARN_ON(cpu_buffer,
- page->list.next->prev != &page->list))
+ bpage->list.next->prev != &bpage->list))
return -1;
if (RB_WARN_ON(cpu_buffer,
- page->list.prev->next != &page->list))
+ bpage->list.prev->next != &bpage->list))
return -1;
}
unsigned nr_pages)
{
struct list_head *head = &cpu_buffer->pages;
- struct buffer_page *page, *tmp;
+ struct buffer_page *bpage, *tmp;
unsigned long addr;
LIST_HEAD(pages);
unsigned i;
for (i = 0; i < nr_pages; i++) {
- page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
+ bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
- if (!page)
+ if (!bpage)
goto free_pages;
- list_add(&page->list, &pages);
+ list_add(&bpage->list, &pages);
addr = __get_free_page(GFP_KERNEL);
if (!addr)
goto free_pages;
- page->page = (void *)addr;
+ bpage->page = (void *)addr;
+ rb_init_page(bpage->page);
}
list_splice(&pages, head);
return 0;
free_pages:
- list_for_each_entry_safe(page, tmp, &pages, list) {
- list_del_init(&page->list);
- free_buffer_page(page);
+ list_for_each_entry_safe(bpage, tmp, &pages, list) {
+ list_del_init(&bpage->list);
+ free_buffer_page(bpage);
}
return -ENOMEM;
}
rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
{
struct ring_buffer_per_cpu *cpu_buffer;
- struct buffer_page *page;
+ struct buffer_page *bpage;
unsigned long addr;
int ret;
cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
INIT_LIST_HEAD(&cpu_buffer->pages);
- page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
+ bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
GFP_KERNEL, cpu_to_node(cpu));
- if (!page)
+ if (!bpage)
goto fail_free_buffer;
- cpu_buffer->reader_page = page;
+ cpu_buffer->reader_page = bpage;
addr = __get_free_page(GFP_KERNEL);
if (!addr)
goto fail_free_reader;
- page->page = (void *)addr;
+ bpage->page = (void *)addr;
+ rb_init_page(bpage->page);
INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
{
struct list_head *head = &cpu_buffer->pages;
- struct buffer_page *page, *tmp;
+ struct buffer_page *bpage, *tmp;
list_del_init(&cpu_buffer->reader_page->list);
free_buffer_page(cpu_buffer->reader_page);
- list_for_each_entry_safe(page, tmp, head, list) {
- list_del_init(&page->list);
- free_buffer_page(page);
+ list_for_each_entry_safe(bpage, tmp, head, list) {
+ list_del_init(&bpage->list);
+ free_buffer_page(bpage);
}
kfree(cpu_buffer);
}
static void
rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
{
- struct buffer_page *page;
+ struct buffer_page *bpage;
struct list_head *p;
unsigned i;
if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
return;
p = cpu_buffer->pages.next;
- page = list_entry(p, struct buffer_page, list);
- list_del_init(&page->list);
- free_buffer_page(page);
+ bpage = list_entry(p, struct buffer_page, list);
+ list_del_init(&bpage->list);
+ free_buffer_page(bpage);
}
if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
return;
rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
struct list_head *pages, unsigned nr_pages)
{
- struct buffer_page *page;
+ struct buffer_page *bpage;
struct list_head *p;
unsigned i;
if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
return;
p = pages->next;
- page = list_entry(p, struct buffer_page, list);
- list_del_init(&page->list);
- list_add_tail(&page->list, &cpu_buffer->pages);
+ bpage = list_entry(p, struct buffer_page, list);
+ list_del_init(&bpage->list);
+ list_add_tail(&bpage->list, &cpu_buffer->pages);
}
rb_reset_cpu(cpu_buffer);
{
struct ring_buffer_per_cpu *cpu_buffer;
unsigned nr_pages, rm_pages, new_pages;
- struct buffer_page *page, *tmp;
+ struct buffer_page *bpage, *tmp;
unsigned long buffer_size;
unsigned long addr;
LIST_HEAD(pages);
for_each_buffer_cpu(buffer, cpu) {
for (i = 0; i < new_pages; i++) {
- page = kzalloc_node(ALIGN(sizeof(*page),
+ bpage = kzalloc_node(ALIGN(sizeof(*bpage),
cache_line_size()),
GFP_KERNEL, cpu_to_node(cpu));
- if (!page)
+ if (!bpage)
goto free_pages;
- list_add(&page->list, &pages);
+ list_add(&bpage->list, &pages);
addr = __get_free_page(GFP_KERNEL);
if (!addr)
goto free_pages;
- page->page = (void *)addr;
+ bpage->page = (void *)addr;
+ rb_init_page(bpage->page);
}
}
return size;
free_pages:
- list_for_each_entry_safe(page, tmp, &pages, list) {
- list_del_init(&page->list);
- free_buffer_page(page);
+ list_for_each_entry_safe(bpage, tmp, &pages, list) {
+ list_del_init(&bpage->list);
+ free_buffer_page(bpage);
}
mutex_unlock(&buffer->mutex);
return -ENOMEM;
return event->type == RINGBUF_TYPE_PADDING;
}
-static inline void *__rb_page_index(struct buffer_page *page, unsigned index)
+static inline void *
+__rb_data_page_index(struct buffer_data_page *bpage, unsigned index)
+{
+ return bpage->data + index;
+}
+
+static inline void *__rb_page_index(struct buffer_page *bpage, unsigned index)
{
- return page->page + index;
+ return bpage->page->data + index;
}
static inline struct ring_buffer_event *
static inline unsigned rb_page_commit(struct buffer_page *bpage)
{
- return local_read(&bpage->commit);
+ return local_read(&bpage->page->commit);
}
/* Size is determined by what has been commited */
}
static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
- struct buffer_page **page)
+ struct buffer_page **bpage)
{
- struct list_head *p = (*page)->list.next;
+ struct list_head *p = (*bpage)->list.next;
if (p == &cpu_buffer->pages)
p = p->next;
- *page = list_entry(p, struct buffer_page, list);
+ *bpage = list_entry(p, struct buffer_page, list);
}
static inline unsigned
if (RB_WARN_ON(cpu_buffer,
cpu_buffer->commit_page == cpu_buffer->tail_page))
return;
- cpu_buffer->commit_page->commit =
+ cpu_buffer->commit_page->page->commit =
cpu_buffer->commit_page->write;
rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
- cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp;
+ cpu_buffer->write_stamp =
+ cpu_buffer->commit_page->page->time_stamp;
}
/* Now set the commit to the event's index */
- local_set(&cpu_buffer->commit_page->commit, index);
+ local_set(&cpu_buffer->commit_page->page->commit, index);
}
static inline void
* back to us). This allows us to do a simple loop to
* assign the commit to the tail.
*/
+ again:
while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
- cpu_buffer->commit_page->commit =
+ cpu_buffer->commit_page->page->commit =
cpu_buffer->commit_page->write;
rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
- cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp;
+ cpu_buffer->write_stamp =
+ cpu_buffer->commit_page->page->time_stamp;
/* add barrier to keep gcc from optimizing too much */
barrier();
}
while (rb_commit_index(cpu_buffer) !=
rb_page_write(cpu_buffer->commit_page)) {
- cpu_buffer->commit_page->commit =
+ cpu_buffer->commit_page->page->commit =
cpu_buffer->commit_page->write;
barrier();
}
+
+ /* again, keep gcc from optimizing */
+ barrier();
+
+ /*
+ * If an interrupt came in just after the first while loop
+ * and pushed the tail page forward, we will be left with
+ * a dangling commit that will never go forward.
+ */
+ if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page))
+ goto again;
}
static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
{
- cpu_buffer->read_stamp = cpu_buffer->reader_page->time_stamp;
+ cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp;
cpu_buffer->reader_page->read = 0;
}
else
rb_inc_page(cpu_buffer, &iter->head_page);
- iter->read_stamp = iter->head_page->time_stamp;
+ iter->read_stamp = iter->head_page->page->time_stamp;
iter->head = 0;
}
__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
unsigned type, unsigned long length, u64 *ts)
{
- struct buffer_page *tail_page, *head_page, *reader_page;
+ struct buffer_page *tail_page, *head_page, *reader_page, *commit_page;
unsigned long tail, write;
struct ring_buffer *buffer = cpu_buffer->buffer;
struct ring_buffer_event *event;
unsigned long flags;
+ commit_page = cpu_buffer->commit_page;
+ /* we just need to protect against interrupts */
+ barrier();
tail_page = cpu_buffer->tail_page;
write = local_add_return(length, &tail_page->write);
tail = write - length;
* it all the way around the buffer, bail, and warn
* about it.
*/
- if (unlikely(next_page == cpu_buffer->commit_page)) {
+ if (unlikely(next_page == commit_page)) {
WARN_ON_ONCE(1);
goto out_unlock;
}
*/
if (tail_page == cpu_buffer->tail_page) {
local_set(&next_page->write, 0);
- local_set(&next_page->commit, 0);
+ local_set(&next_page->page->commit, 0);
cpu_buffer->tail_page = next_page;
/* reread the time stamp */
*ts = ring_buffer_time_stamp(cpu_buffer->cpu);
- cpu_buffer->tail_page->time_stamp = *ts;
+ cpu_buffer->tail_page->page->time_stamp = *ts;
}
/*
* this page's time stamp.
*/
if (!tail && rb_is_commit(cpu_buffer, event))
- cpu_buffer->commit_page->time_stamp = *ts;
+ cpu_buffer->commit_page->page->time_stamp = *ts;
return event;
event->time_delta = *delta & TS_MASK;
event->array[0] = *delta >> TS_SHIFT;
} else {
- cpu_buffer->commit_page->time_stamp = *ts;
+ cpu_buffer->commit_page->page->time_stamp = *ts;
event->time_delta = 0;
event->array[0] = 0;
}
if (iter->head)
iter->read_stamp = cpu_buffer->read_stamp;
else
- iter->read_stamp = iter->head_page->time_stamp;
+ iter->read_stamp = iter->head_page->page->time_stamp;
}
/**
cpu_buffer->reader_page->list.prev = reader->list.prev;
local_set(&cpu_buffer->reader_page->write, 0);
- local_set(&cpu_buffer->reader_page->commit, 0);
+ local_set(&cpu_buffer->reader_page->page->commit, 0);
/* Make the reader page now replace the head */
reader->list.prev->next = &cpu_buffer->reader_page->list;
cpu_buffer->head_page
= list_entry(cpu_buffer->pages.next, struct buffer_page, list);
local_set(&cpu_buffer->head_page->write, 0);
- local_set(&cpu_buffer->head_page->commit, 0);
+ local_set(&cpu_buffer->head_page->page->commit, 0);
cpu_buffer->head_page->read = 0;
INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
local_set(&cpu_buffer->reader_page->write, 0);
- local_set(&cpu_buffer->reader_page->commit, 0);
+ local_set(&cpu_buffer->reader_page->page->commit, 0);
cpu_buffer->reader_page->read = 0;
cpu_buffer->overrun = 0;
return -EINVAL;
/* At least make sure the two buffers are somewhat the same */
- if (buffer_a->size != buffer_b->size ||
- buffer_a->pages != buffer_b->pages)
+ if (buffer_a->pages != buffer_b->pages)
return -EINVAL;
cpu_buffer_a = buffer_a->buffers[cpu];
return 0;
}
+static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
+ struct buffer_data_page *bpage)
+{
+ struct ring_buffer_event *event;
+ unsigned long head;
+
+ __raw_spin_lock(&cpu_buffer->lock);
+ for (head = 0; head < local_read(&bpage->commit);
+ head += rb_event_length(event)) {
+
+ event = __rb_data_page_index(bpage, head);
+ if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
+ return;
+ /* Only count data entries */
+ if (event->type != RINGBUF_TYPE_DATA)
+ continue;
+ cpu_buffer->entries--;
+ }
+ __raw_spin_unlock(&cpu_buffer->lock);
+}
+
+/**
+ * ring_buffer_alloc_read_page - allocate a page to read from buffer
+ * @buffer: the buffer to allocate for.
+ *
+ * This function is used in conjunction with ring_buffer_read_page.
+ * When reading a full page from the ring buffer, these functions
+ * can be used to speed up the process. The calling function should
+ * allocate a few pages first with this function. Then when it
+ * needs to get pages from the ring buffer, it passes the result
+ * of this function into ring_buffer_read_page, which will swap
+ * the page that was allocated, with the read page of the buffer.
+ *
+ * Returns:
+ * The page allocated, or NULL on error.
+ */
+void *ring_buffer_alloc_read_page(struct ring_buffer *buffer)
+{
+ unsigned long addr;
+ struct buffer_data_page *bpage;
+
+ addr = __get_free_page(GFP_KERNEL);
+ if (!addr)
+ return NULL;
+
+ bpage = (void *)addr;
+
+ return bpage;
+}
+
+/**
+ * ring_buffer_free_read_page - free an allocated read page
+ * @buffer: the buffer the page was allocate for
+ * @data: the page to free
+ *
+ * Free a page allocated from ring_buffer_alloc_read_page.
+ */
+void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
+{
+ free_page((unsigned long)data);
+}
+
+/**
+ * ring_buffer_read_page - extract a page from the ring buffer
+ * @buffer: buffer to extract from
+ * @data_page: the page to use allocated from ring_buffer_alloc_read_page
+ * @cpu: the cpu of the buffer to extract
+ * @full: should the extraction only happen when the page is full.
+ *
+ * This function will pull out a page from the ring buffer and consume it.
+ * @data_page must be the address of the variable that was returned
+ * from ring_buffer_alloc_read_page. This is because the page might be used
+ * to swap with a page in the ring buffer.
+ *
+ * for example:
+ * rpage = ring_buffer_alloc_page(buffer);
+ * if (!rpage)
+ * return error;
+ * ret = ring_buffer_read_page(buffer, &rpage, cpu, 0);
+ * if (ret)
+ * process_page(rpage);
+ *
+ * When @full is set, the function will not return true unless
+ * the writer is off the reader page.
+ *
+ * Note: it is up to the calling functions to handle sleeps and wakeups.
+ * The ring buffer can be used anywhere in the kernel and can not
+ * blindly call wake_up. The layer that uses the ring buffer must be
+ * responsible for that.
+ *
+ * Returns:
+ * 1 if data has been transferred
+ * 0 if no data has been transferred.
+ */
+int ring_buffer_read_page(struct ring_buffer *buffer,
+ void **data_page, int cpu, int full)
+{
+ struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
+ struct ring_buffer_event *event;
+ struct buffer_data_page *bpage;
+ unsigned long flags;
+ int ret = 0;
+
+ if (!data_page)
+ return 0;
+
+ bpage = *data_page;
+ if (!bpage)
+ return 0;
+
+ spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+
+ /*
+ * rb_buffer_peek will get the next ring buffer if
+ * the current reader page is empty.
+ */
+ event = rb_buffer_peek(buffer, cpu, NULL);
+ if (!event)
+ goto out;
+
+ /* check for data */
+ if (!local_read(&cpu_buffer->reader_page->page->commit))
+ goto out;
+ /*
+ * If the writer is already off of the read page, then simply
+ * switch the read page with the given page. Otherwise
+ * we need to copy the data from the reader to the writer.
+ */
+ if (cpu_buffer->reader_page == cpu_buffer->commit_page) {
+ unsigned int read = cpu_buffer->reader_page->read;
+
+ if (full)
+ goto out;
+ /* The writer is still on the reader page, we must copy */
+ bpage = cpu_buffer->reader_page->page;
+ memcpy(bpage->data,
+ cpu_buffer->reader_page->page->data + read,
+ local_read(&bpage->commit) - read);
+
+ /* consume what was read */
+ cpu_buffer->reader_page += read;
+
+ } else {
+ /* swap the pages */
+ rb_init_page(bpage);
+ bpage = cpu_buffer->reader_page->page;
+ cpu_buffer->reader_page->page = *data_page;
+ cpu_buffer->reader_page->read = 0;
+ *data_page = bpage;
+ }
+ ret = 1;
+
+ /* update the entry counter */
+ rb_remove_entries(cpu_buffer, bpage);
+ out:
+ spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+
+ return ret;
+}
+
static ssize_t
rb_simple_read(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)