#include <linux/cpuset.h>
#include <linux/mempolicy.h>
#include <linux/ctype.h>
+#include <linux/debugobjects.h>
#include <linux/kallsyms.h>
#include <linux/memory.h>
+#include <linux/math64.h>
/*
* Lock order:
/* Enable to test recovery from slab corruption on boot */
#undef SLUB_RESILIENCY_TEST
-#if PAGE_SHIFT <= 12
-
-/*
- * Small page size. Make sure that we do not fragment memory
- */
-#define DEFAULT_MAX_ORDER 1
-#define DEFAULT_MIN_OBJECTS 4
-
-#else
-
-/*
- * Large page machines are customarily able to handle larger
- * page orders.
- */
-#define DEFAULT_MAX_ORDER 2
-#define DEFAULT_MIN_OBJECTS 8
-
-#endif
-
/*
* Mininum number of partial slabs. These will be left on the partial
* lists even if they are empty. kmem_cache_shrink may reclaim them.
/* Internal SLUB flags */
#define __OBJECT_POISON 0x80000000 /* Poison object */
#define __SYSFS_ADD_DEFERRED 0x40000000 /* Not yet visible via sysfs */
-#define __KMALLOC_CACHE 0x20000000 /* objects freed using kfree */
-#define __PAGE_ALLOC_FALLBACK 0x10000000 /* Allow fallback to page alloc */
-
-/* Not all arches define cache_line_size */
-#ifndef cache_line_size
-#define cache_line_size() L1_CACHE_BYTES
-#endif
static int kmem_size = sizeof(struct kmem_cache);
enum track_item { TRACK_ALLOC, TRACK_FREE };
-#if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG)
+#ifdef CONFIG_SLUB_DEBUG
static int sysfs_slab_add(struct kmem_cache *);
static int sysfs_slab_alias(struct kmem_cache *, const char *);
static void sysfs_slab_remove(struct kmem_cache *);
return search == NULL;
}
-static void trace(struct kmem_cache *s, struct page *page, void *object, int alloc)
+static void trace(struct kmem_cache *s, struct page *page, void *object,
+ int alloc)
{
if (s->flags & SLAB_TRACE) {
printk(KERN_INFO "TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
spin_unlock(&n->list_lock);
}
-static void remove_partial(struct kmem_cache *s,
- struct page *page)
+static void remove_partial(struct kmem_cache *s, struct page *page)
{
struct kmem_cache_node *n = get_node(s, page_to_nid(page));
*
* Must hold list_lock.
*/
-static inline int lock_and_freeze_slab(struct kmem_cache_node *n, struct page *page)
+static inline int lock_and_freeze_slab(struct kmem_cache_node *n,
+ struct page *page)
{
if (slab_trylock(page)) {
list_del(&page->lru);
{
#ifdef CONFIG_NUMA
struct zonelist *zonelist;
- struct zone **z;
+ struct zoneref *z;
+ struct zone *zone;
+ enum zone_type high_zoneidx = gfp_zone(flags);
struct page *page;
/*
get_cycles() % 1024 > s->remote_node_defrag_ratio)
return NULL;
- zonelist = &NODE_DATA(
- slab_node(current->mempolicy))->node_zonelists[gfp_zone(flags)];
- for (z = zonelist->zones; *z; z++) {
+ zonelist = node_zonelist(slab_node(current->mempolicy), flags);
+ for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
struct kmem_cache_node *n;
- n = get_node(s, zone_to_nid(*z));
+ n = get_node(s, zone_to_nid(zone));
- if (n && cpuset_zone_allowed_hardwall(*z, flags) &&
+ if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
n->nr_partial > MIN_PARTIAL) {
page = get_partial_node(n);
if (page)
* so that the others get filled first. That way the
* size of the partial list stays small.
*
- * kmem_cache_shrink can reclaim any empty slabs from the
- * partial list.
+ * kmem_cache_shrink can reclaim any empty slabs from
+ * the partial list.
*/
add_partial(n, page, 1);
slab_unlock(page);
c->page = new;
goto load_freelist;
}
-
- /*
- * No memory available.
- *
- * If the slab uses higher order allocs but the object is
- * smaller than a page size then we can fallback in emergencies
- * to the page allocator via kmalloc_large. The page allocator may
- * have failed to obtain a higher order page and we can try to
- * allocate a single page if the object fits into a single page.
- * That is only possible if certain conditions are met that are being
- * checked when a slab is created.
- */
- if (!(gfpflags & __GFP_NORETRY) &&
- (s->flags & __PAGE_ALLOC_FALLBACK)) {
- if (gfpflags & __GFP_WAIT)
- local_irq_enable();
- object = kmalloc_large(s->objsize, gfpflags);
- if (gfpflags & __GFP_WAIT)
- local_irq_disable();
- return object;
- }
return NULL;
debug:
if (!alloc_debug_processing(s, c->page, object, addr))
local_irq_save(flags);
c = get_cpu_slab(s, smp_processor_id());
debug_check_no_locks_freed(object, c->objsize);
+ if (!(s->flags & SLAB_DEBUG_OBJECTS))
+ debug_check_no_obj_freed(object, s->objsize);
if (likely(page == c->page && c->node >= 0)) {
object[c->offset] = c->freelist;
c->freelist = object;
* take the list_lock.
*/
static int slub_min_order;
-static int slub_max_order = DEFAULT_MAX_ORDER;
-static int slub_min_objects = DEFAULT_MIN_OBJECTS;
+static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
+static int slub_min_objects;
/*
* Merge control. If this is set then no merging of slab caches will occur.
* system components. Generally order 0 allocations should be preferred since
* order 0 does not cause fragmentation in the page allocator. Larger objects
* be problematic to put into order 0 slabs because there may be too much
- * unused space left. We go to a higher order if more than 1/8th of the slab
+ * unused space left. We go to a higher order if more than 1/16th of the slab
* would be wasted.
*
* In order to reach satisfactory performance we must ensure that a minimum
* we reduce the minimum objects required in a slab.
*/
min_objects = slub_min_objects;
+ if (!min_objects)
+ min_objects = 4 * (fls(nr_cpu_ids) + 1);
while (min_objects > 1) {
- fraction = 8;
+ fraction = 16;
while (fraction >= 4) {
order = slab_order(size, min_objects,
slub_max_order, fraction);
* calculate_sizes() determines the order and the distribution of data within
* a slab object.
*/
-static int calculate_sizes(struct kmem_cache *s)
+static int calculate_sizes(struct kmem_cache *s, int forced_order)
{
unsigned long flags = s->flags;
unsigned long size = s->objsize;
*/
size = ALIGN(size, align);
s->size = size;
-
- if ((flags & __KMALLOC_CACHE) &&
- PAGE_SIZE / size < slub_min_objects) {
- /*
- * Kmalloc cache that would not have enough objects in
- * an order 0 page. Kmalloc slabs can fallback to
- * page allocator order 0 allocs so take a reasonably large
- * order that will allows us a good number of objects.
- */
- order = max(slub_max_order, PAGE_ALLOC_COSTLY_ORDER);
- s->flags |= __PAGE_ALLOC_FALLBACK;
- s->allocflags |= __GFP_NOWARN;
- } else
+ if (forced_order >= 0)
+ order = forced_order;
+ else
order = calculate_order(size);
if (order < 0)
s->align = align;
s->flags = kmem_cache_flags(size, flags, name, ctor);
- if (!calculate_sizes(s))
+ if (!calculate_sizes(s, -1))
goto error;
s->refcount = 1;
down_write(&slub_lock);
if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN,
- flags | __KMALLOC_CACHE, NULL))
+ flags, NULL))
goto panic;
list_add(&s->list, &slab_caches);
return 0;
/*
- * We are bringing a node online. No memory is availabe yet. We must
+ * We are bringing a node online. No memory is available yet. We must
* allocate a kmem_cache_node structure in order to bring the node
* online.
*/
kmalloc_caches[0].refcount = -1;
caches++;
- hotplug_memory_notifier(slab_memory_callback, 1);
+ hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
#endif
/* Able to allocate the per node structures */
if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE))
return 1;
- if ((s->flags & __PAGE_ALLOC_FALLBACK))
- return 1;
-
if (s->ctor)
return 1;
return slab_alloc(s, gfpflags, node, caller);
}
-#if (defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG)) || defined(CONFIG_SLABINFO)
+#ifdef CONFIG_SLUB_DEBUG
static unsigned long count_partial(struct kmem_cache_node *n,
int (*get_count)(struct page *))
{
{
return page->objects - page->inuse;
}
-#endif
-#if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG)
static int validate_slab(struct kmem_cache *s, struct page *page,
unsigned long *map)
{
len += sprintf(buf + len, "<not-available>");
if (l->sum_time != l->min_time) {
- unsigned long remainder;
-
len += sprintf(buf + len, " age=%ld/%ld/%ld",
- l->min_time,
- div_long_long_rem(l->sum_time, l->count, &remainder),
- l->max_time);
+ l->min_time,
+ (long)div_u64(l->sum_time, l->count),
+ l->max_time);
} else
len += sprintf(buf + len, " age=%ld",
l->min_time);
static int any_slab_objects(struct kmem_cache *s)
{
int node;
- int cpu;
-
- for_each_possible_cpu(cpu) {
- struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
-
- if (c && c->page)
- return 1;
- }
for_each_online_node(node) {
struct kmem_cache_node *n = get_node(s, node);
if (!n)
continue;
- if (n->nr_partial || atomic_long_read(&n->nr_slabs))
+ if (atomic_long_read(&n->total_objects))
return 1;
}
return 0;
}
SLAB_ATTR_RO(objs_per_slab);
+static ssize_t order_store(struct kmem_cache *s,
+ const char *buf, size_t length)
+{
+ unsigned long order;
+ int err;
+
+ err = strict_strtoul(buf, 10, &order);
+ if (err)
+ return err;
+
+ if (order > slub_max_order || order < slub_min_order)
+ return -EINVAL;
+
+ calculate_sizes(s, order);
+ return length;
+}
+
static ssize_t order_show(struct kmem_cache *s, char *buf)
{
return sprintf(buf, "%d\n", oo_order(s->oo));
}
-SLAB_ATTR_RO(order);
+SLAB_ATTR(order);
static ssize_t ctor_show(struct kmem_cache *s, char *buf)
{
s->flags &= ~SLAB_RED_ZONE;
if (buf[0] == '1')
s->flags |= SLAB_RED_ZONE;
- calculate_sizes(s);
+ calculate_sizes(s, -1);
return length;
}
SLAB_ATTR(red_zone);
s->flags &= ~SLAB_POISON;
if (buf[0] == '1')
s->flags |= SLAB_POISON;
- calculate_sizes(s);
+ calculate_sizes(s, -1);
return length;
}
SLAB_ATTR(poison);
s->flags &= ~SLAB_STORE_USER;
if (buf[0] == '1')
s->flags |= SLAB_STORE_USER;
- calculate_sizes(s);
+ calculate_sizes(s, -1);
return length;
}
SLAB_ATTR(store_user);
static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
const char *buf, size_t length)
{
- int n = simple_strtoul(buf, NULL, 10);
+ unsigned long ratio;
+ int err;
+
+ err = strict_strtoul(buf, 10, &ratio);
+ if (err)
+ return err;
+
+ if (ratio < 100)
+ s->remote_node_defrag_ratio = ratio * 10;
- if (n < 100)
- s->remote_node_defrag_ratio = n * 10;
return length;
}
SLAB_ATTR(remote_node_defrag_ratio);
*/
#ifdef CONFIG_SLABINFO
-ssize_t slabinfo_write(struct file *file, const char __user * buffer,
- size_t count, loff_t *ppos)
+ssize_t slabinfo_write(struct file *file, const char __user *buffer,
+ size_t count, loff_t *ppos)
{
return -EINVAL;
}