/* Enable to test recovery from slab corruption on boot */
#undef SLUB_RESILIENCY_TEST
-/*
- * Currently fastpath is not supported if preemption is enabled.
- */
-#if defined(CONFIG_FAST_CMPXCHG_LOCAL) && !defined(CONFIG_PREEMPT)
-#define SLUB_FASTPATH
-#endif
-
-#if PAGE_SHIFT <= 12
-
-/*
- * Small page size. Make sure that we do not fragment memory
- */
-#define DEFAULT_MAX_ORDER 1
-#define DEFAULT_MIN_OBJECTS 4
-
-#else
-
-/*
- * Large page machines are customarily able to handle larger
- * page orders.
- */
-#define DEFAULT_MAX_ORDER 2
-#define DEFAULT_MIN_OBJECTS 8
-
-#endif
-
/*
* Mininum number of partial slabs. These will be left on the partial
* lists even if they are empty. kmem_cache_shrink may reclaim them.
#endif
}
-/*
- * The end pointer in a slab is special. It points to the first object in the
- * slab but has bit 0 set to mark it.
- *
- * Note that SLUB relies on page_mapping returning NULL for pages with bit 0
- * in the mapping set.
- */
-static inline int is_end(void *addr)
-{
- return (unsigned long)addr & PAGE_MAPPING_ANON;
-}
-
-void *slab_address(struct page *page)
-{
- return page->end - PAGE_MAPPING_ANON;
-}
-
+/* Verify that a pointer has an address that is valid within a slab page */
static inline int check_valid_pointer(struct kmem_cache *s,
struct page *page, const void *object)
{
void *base;
- if (object == page->end)
+ if (!object)
return 1;
- base = slab_address(page);
- if (object < base || object >= base + s->objects * s->size ||
+ base = page_address(page);
+ if (object < base || object >= base + page->objects * s->size ||
(object - base) % s->size) {
return 0;
}
}
/* Loop over all objects in a slab */
-#define for_each_object(__p, __s, __addr) \
- for (__p = (__addr); __p < (__addr) + (__s)->objects * (__s)->size;\
+#define for_each_object(__p, __s, __addr, __objects) \
+ for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\
__p += (__s)->size)
/* Scan freelist */
#define for_each_free_object(__p, __s, __free) \
- for (__p = (__free); (__p) != page->end; __p = get_freepointer((__s),\
- __p))
+ for (__p = (__free); __p; __p = get_freepointer((__s), __p))
/* Determine object index from a given position */
static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
return (p - addr) / s->size;
}
+static inline struct kmem_cache_order_objects oo_make(int order,
+ unsigned long size)
+{
+ struct kmem_cache_order_objects x = {
+ (order << 16) + (PAGE_SIZE << order) / size
+ };
+
+ return x;
+}
+
+static inline int oo_order(struct kmem_cache_order_objects x)
+{
+ return x.x >> 16;
+}
+
+static inline int oo_objects(struct kmem_cache_order_objects x)
+{
+ return x.x & ((1 << 16) - 1);
+}
+
#ifdef CONFIG_SLUB_DEBUG
/*
* Debug settings:
static void print_page_info(struct page *page)
{
- printk(KERN_ERR "INFO: Slab 0x%p used=%u fp=0x%p flags=0x%04lx\n",
- page, page->inuse, page->freelist, page->flags);
+ printk(KERN_ERR "INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",
+ page, page->objects, page->inuse, page->freelist, page->flags);
}
static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
{
unsigned int off; /* Offset of last byte */
- u8 *addr = slab_address(page);
+ u8 *addr = page_address(page);
print_tracking(s, p);
static void object_err(struct kmem_cache *s, struct page *page,
u8 *object, char *reason)
{
- slab_bug(s, reason);
+ slab_bug(s, "%s", reason);
print_trailer(s, page, object);
}
va_start(args, fmt);
vsnprintf(buf, sizeof(buf), fmt, args);
va_end(args);
- slab_bug(s, fmt);
+ slab_bug(s, "%s", buf);
print_page_info(page);
dump_stack();
}
* A. Free pointer (if we cannot overwrite object on free)
* B. Tracking data for SLAB_STORE_USER
* C. Padding to reach required alignment boundary or at mininum
- * one word if debuggin is on to be able to detect writes
+ * one word if debugging is on to be able to detect writes
* before the word boundary.
*
* Padding is done using 0x5a (POISON_INUSE)
p + off, POISON_INUSE, s->size - off);
}
+/* Check the pad bytes at the end of a slab page */
static int slab_pad_check(struct kmem_cache *s, struct page *page)
{
u8 *start;
if (!(s->flags & SLAB_POISON))
return 1;
- start = slab_address(page);
- end = start + (PAGE_SIZE << s->order);
- length = s->objects * s->size;
- remainder = end - (start + length);
+ start = page_address(page);
+ length = (PAGE_SIZE << compound_order(page));
+ end = start + length;
+ remainder = length % s->size;
if (!remainder)
return 1;
- fault = check_bytes(start + length, POISON_INUSE, remainder);
+ fault = check_bytes(end - remainder, POISON_INUSE, remainder);
if (!fault)
return 1;
while (end > fault && end[-1] == POISON_INUSE)
end--;
slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
- print_section("Padding", start, length);
+ print_section("Padding", end - remainder, remainder);
restore_bytes(s, "slab padding", POISON_INUSE, start, end);
return 0;
endobject, red, s->inuse - s->objsize))
return 0;
} else {
- if ((s->flags & SLAB_POISON) && s->objsize < s->inuse)
- check_bytes_and_report(s, page, p, "Alignment padding", endobject,
- POISON_INUSE, s->inuse - s->objsize);
+ if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) {
+ check_bytes_and_report(s, page, p, "Alignment padding",
+ endobject, POISON_INUSE, s->inuse - s->objsize);
+ }
}
if (s->flags & SLAB_POISON) {
* of the free objects in this slab. May cause
* another error because the object count is now wrong.
*/
- set_freepointer(s, p, page->end);
+ set_freepointer(s, p, NULL);
return 0;
}
return 1;
static int check_slab(struct kmem_cache *s, struct page *page)
{
+ int maxobj;
+
VM_BUG_ON(!irqs_disabled());
if (!PageSlab(page)) {
slab_err(s, page, "Not a valid slab page");
return 0;
}
- if (page->inuse > s->objects) {
+
+ maxobj = (PAGE_SIZE << compound_order(page)) / s->size;
+ if (page->objects > maxobj) {
+ slab_err(s, page, "objects %u > max %u",
+ s->name, page->objects, maxobj);
+ return 0;
+ }
+ if (page->inuse > page->objects) {
slab_err(s, page, "inuse %u > max %u",
- s->name, page->inuse, s->objects);
+ s->name, page->inuse, page->objects);
return 0;
}
/* Slab_pad_check fixes things up after itself */
int nr = 0;
void *fp = page->freelist;
void *object = NULL;
+ unsigned long max_objects;
- while (fp != page->end && nr <= s->objects) {
+ while (fp && nr <= page->objects) {
if (fp == search)
return 1;
if (!check_valid_pointer(s, page, fp)) {
if (object) {
object_err(s, page, object,
"Freechain corrupt");
- set_freepointer(s, object, page->end);
+ set_freepointer(s, object, NULL);
break;
} else {
slab_err(s, page, "Freepointer corrupt");
- page->freelist = page->end;
- page->inuse = s->objects;
+ page->freelist = NULL;
+ page->inuse = page->objects;
slab_fix(s, "Freelist cleared");
return 0;
}
nr++;
}
- if (page->inuse != s->objects - nr) {
+ max_objects = (PAGE_SIZE << compound_order(page)) / s->size;
+ if (max_objects > 65535)
+ max_objects = 65535;
+
+ if (page->objects != max_objects) {
+ slab_err(s, page, "Wrong number of objects. Found %d but "
+ "should be %d", page->objects, max_objects);
+ page->objects = max_objects;
+ slab_fix(s, "Number of objects adjusted.");
+ }
+ if (page->inuse != page->objects - nr) {
slab_err(s, page, "Wrong object count. Counter is %d but "
- "counted were %d", page->inuse, s->objects - nr);
- page->inuse = s->objects - nr;
+ "counted were %d", page->inuse, page->objects - nr);
+ page->inuse = page->objects - nr;
slab_fix(s, "Object count adjusted.");
}
return search == NULL;
spin_unlock(&n->list_lock);
}
+/* Tracking of the number of slabs for debugging purposes */
+static inline unsigned long slabs_node(struct kmem_cache *s, int node)
+{
+ struct kmem_cache_node *n = get_node(s, node);
+
+ return atomic_long_read(&n->nr_slabs);
+}
+
+static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
+{
+ struct kmem_cache_node *n = get_node(s, node);
+
+ /*
+ * May be called early in order to allocate a slab for the
+ * kmem_cache_node structure. Solve the chicken-egg
+ * dilemma by deferring the increment of the count during
+ * bootstrap (see early_kmem_cache_node_alloc).
+ */
+ if (!NUMA_BUILD || n) {
+ atomic_long_inc(&n->nr_slabs);
+ atomic_long_add(objects, &n->total_objects);
+ }
+}
+static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
+{
+ struct kmem_cache_node *n = get_node(s, node);
+
+ atomic_long_dec(&n->nr_slabs);
+ atomic_long_sub(objects, &n->total_objects);
+}
+
+/* Object debug checks for alloc/free paths */
static void setup_object_debug(struct kmem_cache *s, struct page *page,
void *object)
{
if (!check_slab(s, page))
goto bad;
- if (object && !on_freelist(s, page, object)) {
+ if (!on_freelist(s, page, object)) {
object_err(s, page, object, "Object already allocated");
goto bad;
}
goto bad;
}
- if (object && !check_object(s, page, object, 0))
+ if (!check_object(s, page, object, 0))
goto bad;
/* Success perform special debug activities for allocs */
* as used avoids touching the remaining objects.
*/
slab_fix(s, "Marking all objects used");
- page->inuse = s->objects;
- page->freelist = page->end;
+ page->inuse = page->objects;
+ page->freelist = NULL;
}
return 0;
}
return 0;
if (unlikely(s != page->slab)) {
- if (!PageSlab(page))
+ if (!PageSlab(page)) {
slab_err(s, page, "Attempt to free object(0x%p) "
"outside of slab", object);
- else
- if (!page->slab) {
+ } else if (!page->slab) {
printk(KERN_ERR
"SLUB <none>: no slab for object 0x%p.\n",
object);
}
/* Special debug activities for freeing objects */
- if (!SlabFrozen(page) && page->freelist == page->end)
+ if (!SlabFrozen(page) && !page->freelist)
remove_full(s, page);
if (s->flags & SLAB_STORE_USER)
set_track(s, object, TRACK_FREE, addr);
void (*ctor)(struct kmem_cache *, void *))
{
/*
- * The page->offset field is only 16 bit wide. This is an offset
- * in units of words from the beginning of an object. If the slab
- * size is bigger then we cannot move the free pointer behind the
- * object anymore.
- *
- * On 32 bit platforms the limit is 256k. On 64bit platforms
- * the limit is 512k.
- *
- * Debugging or ctor may create a need to move the free
- * pointer. Fail if this happens.
+ * Enable debugging if selected on the kernel commandline.
*/
- if (objsize >= 65535 * sizeof(void *)) {
- BUG_ON(flags & (SLAB_RED_ZONE | SLAB_POISON |
- SLAB_STORE_USER | SLAB_DESTROY_BY_RCU));
- BUG_ON(ctor);
- } else {
- /*
- * Enable debugging if selected on the kernel commandline.
- */
- if (slub_debug && (!slub_debug_slabs ||
- strncmp(slub_debug_slabs, name,
- strlen(slub_debug_slabs)) == 0))
- flags |= slub_debug;
- }
+ if (slub_debug && (!slub_debug_slabs ||
+ strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)) == 0))
+ flags |= slub_debug;
return flags;
}
return flags;
}
#define slub_debug 0
+
+static inline unsigned long slabs_node(struct kmem_cache *s, int node)
+ { return 0; }
+static inline void inc_slabs_node(struct kmem_cache *s, int node,
+ int objects) {}
+static inline void dec_slabs_node(struct kmem_cache *s, int node,
+ int objects) {}
#endif
+
/*
* Slab allocation and freeing
*/
-static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
+static inline struct page *alloc_slab_page(gfp_t flags, int node,
+ struct kmem_cache_order_objects oo)
{
- struct page *page;
- int pages = 1 << s->order;
-
- if (s->order)
- flags |= __GFP_COMP;
-
- if (s->flags & SLAB_CACHE_DMA)
- flags |= SLUB_DMA;
-
- if (s->flags & SLAB_RECLAIM_ACCOUNT)
- flags |= __GFP_RECLAIMABLE;
+ int order = oo_order(oo);
if (node == -1)
- page = alloc_pages(flags, s->order);
+ return alloc_pages(flags, order);
else
- page = alloc_pages_node(node, flags, s->order);
+ return alloc_pages_node(node, flags, order);
+}
- if (!page)
- return NULL;
+static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
+{
+ struct page *page;
+ struct kmem_cache_order_objects oo = s->oo;
+ flags |= s->allocflags;
+
+ page = alloc_slab_page(flags | __GFP_NOWARN | __GFP_NORETRY, node,
+ oo);
+ if (unlikely(!page)) {
+ oo = s->min;
+ /*
+ * Allocation may have failed due to fragmentation.
+ * Try a lower order alloc if possible
+ */
+ page = alloc_slab_page(flags, node, oo);
+ if (!page)
+ return NULL;
+
+ stat(get_cpu_slab(s, raw_smp_processor_id()), ORDER_FALLBACK);
+ }
+ page->objects = oo_objects(oo);
mod_zone_page_state(page_zone(page),
(s->flags & SLAB_RECLAIM_ACCOUNT) ?
NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
- pages);
+ 1 << oo_order(oo));
return page;
}
static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
{
struct page *page;
- struct kmem_cache_node *n;
void *start;
void *last;
void *p;
if (!page)
goto out;
- n = get_node(s, page_to_nid(page));
- if (n)
- atomic_long_inc(&n->nr_slabs);
+ inc_slabs_node(s, page_to_nid(page), page->objects);
page->slab = s;
page->flags |= 1 << PG_slab;
if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON |
SetSlabDebug(page);
start = page_address(page);
- page->end = start + 1;
if (unlikely(s->flags & SLAB_POISON))
- memset(start, POISON_INUSE, PAGE_SIZE << s->order);
+ memset(start, POISON_INUSE, PAGE_SIZE << compound_order(page));
last = start;
- for_each_object(p, s, start) {
+ for_each_object(p, s, start, page->objects) {
setup_object(s, page, last);
set_freepointer(s, last, p);
last = p;
}
setup_object(s, page, last);
- set_freepointer(s, last, page->end);
+ set_freepointer(s, last, NULL);
page->freelist = start;
page->inuse = 0;
static void __free_slab(struct kmem_cache *s, struct page *page)
{
- int pages = 1 << s->order;
+ int order = compound_order(page);
+ int pages = 1 << order;
if (unlikely(SlabDebug(page))) {
void *p;
slab_pad_check(s, page);
- for_each_object(p, s, slab_address(page))
+ for_each_object(p, s, page_address(page),
+ page->objects)
check_object(s, page, p, 0);
ClearSlabDebug(page);
}
NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
-pages);
- page->mapping = NULL;
- __free_pages(page, s->order);
+ __ClearPageSlab(page);
+ reset_page_mapcount(page);
+ __free_pages(page, order);
}
static void rcu_free_slab(struct rcu_head *h)
static void discard_slab(struct kmem_cache *s, struct page *page)
{
- struct kmem_cache_node *n = get_node(s, page_to_nid(page));
-
- atomic_long_dec(&n->nr_slabs);
- reset_page_mapcount(page);
- __ClearPageSlab(page);
+ dec_slabs_node(s, page_to_nid(page), page->objects);
free_slab(s, page);
}
static __always_inline void slab_unlock(struct page *page)
{
- bit_spin_unlock(PG_locked, &page->flags);
+ __bit_spin_unlock(PG_locked, &page->flags);
}
static __always_inline int slab_trylock(struct page *page)
* may return off node objects because partial slabs are obtained
* from other nodes and filled up.
*
- * If /sys/slab/xx/defrag_ratio is set to 100 (which makes
+ * If /sys/kernel/slab/xx/defrag_ratio is set to 100 (which makes
* defrag_ratio = 1000) then every (well almost) allocation will
* first attempt to defrag slab caches on other nodes. This means
* scanning over all nodes to look for partial slabs which may be
get_cycles() % 1024 > s->remote_node_defrag_ratio)
return NULL;
- zonelist = &NODE_DATA(slab_node(current->mempolicy))
- ->node_zonelists[gfp_zone(flags)];
+ zonelist = &NODE_DATA(
+ slab_node(current->mempolicy))->node_zonelists[gfp_zone(flags)];
for (z = zonelist->zones; *z; z++) {
struct kmem_cache_node *n;
ClearSlabFrozen(page);
if (page->inuse) {
- if (page->freelist != page->end) {
+ if (page->freelist) {
add_partial(n, page, tail);
stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD);
} else {
* Adding an empty slab to the partial slabs in order
* to avoid page allocator overhead. This slab needs
* to come after the other slabs with objects in
- * order to fill them up. That way the size of the
- * partial list stays small. kmem_cache_shrink can
- * reclaim empty slabs from the partial list.
+ * so that the others get filled first. That way the
+ * size of the partial list stays small.
+ *
+ * kmem_cache_shrink can reclaim any empty slabs from the
+ * partial list.
*/
add_partial(n, page, 1);
slab_unlock(page);
struct page *page = c->page;
int tail = 1;
- if (c->freelist)
+ if (page->freelist)
stat(c, DEACTIVATE_REMOTE_FREES);
/*
- * Merge cpu freelist into freelist. Typically we get here
+ * Merge cpu freelist into slab freelist. Typically we get here
* because both freelists are empty. So this is unlikely
* to occur.
- *
- * We need to use _is_end here because deactivate slab may
- * be called for a debug slab. Then c->freelist may contain
- * a dummy pointer.
*/
- while (unlikely(!is_end(c->freelist))) {
+ while (unlikely(c->freelist)) {
void **object;
tail = 0; /* Hot objects. Put the slab first */
/*
* Flush cpu slab.
+ *
* Called from IPI handler with interrupts disabled.
*/
static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
* rest of the freelist to the lockless freelist.
*
* And if we were unable to get a new slab from the partial slab lists then
- * we need to allocate a new slab. This is slowest path since we may sleep.
+ * we need to allocate a new slab. This is the slowest path since it involves
+ * a call to the page allocator and the setup of a new slab.
*/
static void *__slab_alloc(struct kmem_cache *s,
gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c)
{
void **object;
struct page *new;
-#ifdef SLUB_FASTPATH
- unsigned long flags;
- local_irq_save(flags);
-#endif
+ /* We handle __GFP_ZERO in the caller */
+ gfpflags &= ~__GFP_ZERO;
+
if (!c->page)
goto new_slab;
slab_lock(c->page);
if (unlikely(!node_match(c, node)))
goto another_slab;
+
stat(c, ALLOC_REFILL);
+
load_freelist:
object = c->page->freelist;
- if (unlikely(object == c->page->end))
+ if (unlikely(!object))
goto another_slab;
if (unlikely(SlabDebug(c->page)))
goto debug;
- object = c->page->freelist;
c->freelist = object[c->offset];
- c->page->inuse = s->objects;
- c->page->freelist = c->page->end;
+ c->page->inuse = c->page->objects;
+ c->page->freelist = NULL;
c->node = page_to_nid(c->page);
unlock_out:
slab_unlock(c->page);
stat(c, ALLOC_SLOWPATH);
-out:
-#ifdef SLUB_FASTPATH
- local_irq_restore(flags);
-#endif
return object;
another_slab:
c->page = new;
goto load_freelist;
}
- object = NULL;
- goto out;
+ return NULL;
debug:
- object = c->page->freelist;
if (!alloc_debug_processing(s, c->page, object, addr))
goto another_slab;
{
void **object;
struct kmem_cache_cpu *c;
-
-/*
- * The SLUB_FASTPATH path is provisional and is currently disabled if the
- * kernel is compiled with preemption or if the arch does not support
- * fast cmpxchg operations. There are a couple of coming changes that will
- * simplify matters and allow preemption. Ultimately we may end up making
- * SLUB_FASTPATH the default.
- *
- * 1. The introduction of the per cpu allocator will avoid array lookups
- * through get_cpu_slab(). A special register can be used instead.
- *
- * 2. The introduction of per cpu atomic operations (cpu_ops) means that
- * we can realize the logic here entirely with per cpu atomics. The
- * per cpu atomic ops will take care of the preemption issues.
- */
-
-#ifdef SLUB_FASTPATH
- c = get_cpu_slab(s, raw_smp_processor_id());
- do {
- object = c->freelist;
- if (unlikely(is_end(object) || !node_match(c, node))) {
- object = __slab_alloc(s, gfpflags, node, addr, c);
- break;
- }
- stat(c, ALLOC_FASTPATH);
- } while (cmpxchg_local(&c->freelist, object, object[c->offset])
- != object);
-#else
unsigned long flags;
local_irq_save(flags);
c = get_cpu_slab(s, smp_processor_id());
- if (unlikely(is_end(c->freelist) || !node_match(c, node)))
+ if (unlikely(!c->freelist || !node_match(c, node)))
object = __slab_alloc(s, gfpflags, node, addr, c);
stat(c, ALLOC_FASTPATH);
}
local_irq_restore(flags);
-#endif
if (unlikely((gfpflags & __GFP_ZERO) && object))
memset(object, 0, c->objsize);
void **object = (void *)x;
struct kmem_cache_cpu *c;
-#ifdef SLUB_FASTPATH
- unsigned long flags;
-
- local_irq_save(flags);
-#endif
c = get_cpu_slab(s, raw_smp_processor_id());
stat(c, FREE_SLOWPATH);
slab_lock(page);
if (unlikely(SlabDebug(page)))
goto debug;
+
checks_ok:
prior = object[offset] = page->freelist;
page->freelist = object;
goto slab_empty;
/*
- * Objects left in the slab. If it
- * was not on the partial list before
+ * Objects left in the slab. If it was not on the partial list before
* then add it.
*/
- if (unlikely(prior == page->end)) {
+ if (unlikely(!prior)) {
add_partial(get_node(s, page_to_nid(page)), page, 1);
stat(c, FREE_ADD_PARTIAL);
}
out_unlock:
slab_unlock(page);
-#ifdef SLUB_FASTPATH
- local_irq_restore(flags);
-#endif
return;
slab_empty:
- if (prior != page->end) {
+ if (prior) {
/*
* Slab still on the partial list.
*/
}
slab_unlock(page);
stat(c, FREE_SLAB);
-#ifdef SLUB_FASTPATH
- local_irq_restore(flags);
-#endif
discard_slab(s, page);
return;
{
void **object = (void *)x;
struct kmem_cache_cpu *c;
-
-#ifdef SLUB_FASTPATH
- void **freelist;
-
- c = get_cpu_slab(s, raw_smp_processor_id());
- debug_check_no_locks_freed(object, s->objsize);
- do {
- freelist = c->freelist;
- barrier();
- /*
- * If the compiler would reorder the retrieval of c->page to
- * come before c->freelist then an interrupt could
- * change the cpu slab before we retrieve c->freelist. We
- * could be matching on a page no longer active and put the
- * object onto the freelist of the wrong slab.
- *
- * On the other hand: If we already have the freelist pointer
- * then any change of cpu_slab will cause the cmpxchg to fail
- * since the freelist pointers are unique per slab.
- */
- if (unlikely(page != c->page || c->node < 0)) {
- __slab_free(s, page, x, addr, c->offset);
- break;
- }
- object[c->offset] = freelist;
- stat(c, FREE_FASTPATH);
- } while (cmpxchg_local(&c->freelist, freelist, object) != freelist);
-#else
unsigned long flags;
local_irq_save(flags);
- debug_check_no_locks_freed(object, s->objsize);
c = get_cpu_slab(s, smp_processor_id());
+ debug_check_no_locks_freed(object, c->objsize);
if (likely(page == c->page && c->node >= 0)) {
object[c->offset] = c->freelist;
c->freelist = object;
__slab_free(s, page, x, addr, c->offset);
local_irq_restore(flags);
-#endif
}
void kmem_cache_free(struct kmem_cache *s, void *x)
* take the list_lock.
*/
static int slub_min_order;
-static int slub_max_order = DEFAULT_MAX_ORDER;
-static int slub_min_objects = DEFAULT_MIN_OBJECTS;
+static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
+static int slub_min_objects;
/*
* Merge control. If this is set then no merging of slab caches will occur.
* system components. Generally order 0 allocations should be preferred since
* order 0 does not cause fragmentation in the page allocator. Larger objects
* be problematic to put into order 0 slabs because there may be too much
- * unused space left. We go to a higher order if more than 1/8th of the slab
+ * unused space left. We go to a higher order if more than 1/16th of the slab
* would be wasted.
*
* In order to reach satisfactory performance we must ensure that a minimum
int rem;
int min_order = slub_min_order;
+ if ((PAGE_SIZE << min_order) / size > 65535)
+ return get_order(size * 65535) - 1;
+
for (order = max(min_order,
fls(min_objects * size - 1) - PAGE_SHIFT);
order <= max_order; order++) {
* we reduce the minimum objects required in a slab.
*/
min_objects = slub_min_objects;
+ if (!min_objects)
+ min_objects = 4 * (fls(nr_cpu_ids) + 1);
while (min_objects > 1) {
- fraction = 8;
+ fraction = 16;
while (fraction >= 4) {
order = slab_order(size, min_objects,
slub_max_order, fraction);
unsigned long align, unsigned long size)
{
/*
- * If the user wants hardware cache aligned objects then
- * follow that suggestion if the object is sufficiently
- * large.
+ * If the user wants hardware cache aligned objects then follow that
+ * suggestion if the object is sufficiently large.
*
- * The hardware cache alignment cannot override the
- * specified alignment though. If that is greater
- * then use it.
+ * The hardware cache alignment cannot override the specified
+ * alignment though. If that is greater then use it.
*/
- if ((flags & SLAB_HWCACHE_ALIGN) &&
- size > cache_line_size() / 2)
- return max_t(unsigned long, align, cache_line_size());
+ if (flags & SLAB_HWCACHE_ALIGN) {
+ unsigned long ralign = cache_line_size();
+ while (size <= ralign / 2)
+ ralign /= 2;
+ align = max(align, ralign);
+ }
if (align < ARCH_SLAB_MINALIGN)
- return ARCH_SLAB_MINALIGN;
+ align = ARCH_SLAB_MINALIGN;
return ALIGN(align, sizeof(void *));
}
struct kmem_cache_cpu *c)
{
c->page = NULL;
- c->freelist = (void *)PAGE_MAPPING_ANON;
+ c->freelist = NULL;
c->node = 0;
c->offset = s->offset / sizeof(void *);
c->objsize = s->objsize;
+#ifdef CONFIG_SLUB_STATS
+ memset(c->stat, 0, NR_SLUB_STAT_ITEMS * sizeof(unsigned));
+#endif
}
static void init_kmem_cache_node(struct kmem_cache_node *n)
{
n->nr_partial = 0;
- atomic_long_set(&n->nr_slabs, 0);
spin_lock_init(&n->list_lock);
INIT_LIST_HEAD(&n->partial);
#ifdef CONFIG_SLUB_DEBUG
+ atomic_long_set(&n->nr_slabs, 0);
INIT_LIST_HEAD(&n->full);
#endif
}
init_tracking(kmalloc_caches, n);
#endif
init_kmem_cache_node(n);
- atomic_long_inc(&n->nr_slabs);
+ inc_slabs_node(kmalloc_caches, node, page->objects);
+
/*
* lockdep requires consistent irq usage for each lock
* so even though there cannot be a race this early in
* calculate_sizes() determines the order and the distribution of data within
* a slab object.
*/
-static int calculate_sizes(struct kmem_cache *s)
+static int calculate_sizes(struct kmem_cache *s, int forced_order)
{
unsigned long flags = s->flags;
unsigned long size = s->objsize;
unsigned long align = s->align;
+ int order;
+
+ /*
+ * Round up object size to the next word boundary. We can only
+ * place the free pointer at word boundaries and this determines
+ * the possible location of the free pointer.
+ */
+ size = ALIGN(size, sizeof(void *));
+#ifdef CONFIG_SLUB_DEBUG
/*
* Determine if we can poison the object itself. If the user of
* the slab may touch the object after free or before allocation
else
s->flags &= ~__OBJECT_POISON;
- /*
- * Round up object size to the next word boundary. We can only
- * place the free pointer at word boundaries and this determines
- * the possible location of the free pointer.
- */
- size = ALIGN(size, sizeof(void *));
-#ifdef CONFIG_SLUB_DEBUG
/*
* If we are Redzoning then check if there is some space between the
* end of the object and the free pointer. If not then add an
*/
size = ALIGN(size, align);
s->size = size;
+ if (forced_order >= 0)
+ order = forced_order;
+ else
+ order = calculate_order(size);
- s->order = calculate_order(size);
- if (s->order < 0)
+ if (order < 0)
return 0;
+ s->allocflags = 0;
+ if (order)
+ s->allocflags |= __GFP_COMP;
+
+ if (s->flags & SLAB_CACHE_DMA)
+ s->allocflags |= SLUB_DMA;
+
+ if (s->flags & SLAB_RECLAIM_ACCOUNT)
+ s->allocflags |= __GFP_RECLAIMABLE;
+
/*
* Determine the number of objects per slab
*/
- s->objects = (PAGE_SIZE << s->order) / size;
+ s->oo = oo_make(order, size);
+ s->min = oo_make(get_order(size), size);
+ if (oo_objects(s->oo) > oo_objects(s->max))
+ s->max = s->oo;
- return !!s->objects;
+ return !!oo_objects(s->oo);
}
s->align = align;
s->flags = kmem_cache_flags(size, flags, name, ctor);
- if (!calculate_sizes(s))
+ if (!calculate_sizes(s, -1))
goto error;
s->refcount = 1;
if (flags & SLAB_PANIC)
panic("Cannot create slab %s size=%lu realsize=%u "
"order=%u offset=%u flags=%lx\n",
- s->name, (unsigned long)size, s->size, s->order,
+ s->name, (unsigned long)size, s->size, oo_order(s->oo),
s->offset, flags);
return 0;
}
/*
* We could also check if the object is on the slabs freelist.
* But this would be too expensive and it seems that the main
- * purpose of kmem_ptr_valid is to check if the object belongs
+ * purpose of kmem_ptr_valid() is to check if the object belongs
* to a certain slab.
*/
return 1;
}
EXPORT_SYMBOL(kmem_cache_name);
+static void list_slab_objects(struct kmem_cache *s, struct page *page,
+ const char *text)
+{
+#ifdef CONFIG_SLUB_DEBUG
+ void *addr = page_address(page);
+ void *p;
+ DECLARE_BITMAP(map, page->objects);
+
+ bitmap_zero(map, page->objects);
+ slab_err(s, page, "%s", text);
+ slab_lock(page);
+ for_each_free_object(p, s, page->freelist)
+ set_bit(slab_index(p, s, addr), map);
+
+ for_each_object(p, s, addr, page->objects) {
+
+ if (!test_bit(slab_index(p, s, addr), map)) {
+ printk(KERN_ERR "INFO: Object 0x%p @offset=%tu\n",
+ p, p - addr);
+ print_tracking(s, p);
+ }
+ }
+ slab_unlock(page);
+#endif
+}
+
/*
- * Attempt to free all slabs on a node. Return the number of slabs we
- * were unable to free.
+ * Attempt to free all partial slabs on a node.
*/
-static int free_list(struct kmem_cache *s, struct kmem_cache_node *n,
- struct list_head *list)
+static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
{
- int slabs_inuse = 0;
unsigned long flags;
struct page *page, *h;
spin_lock_irqsave(&n->list_lock, flags);
- list_for_each_entry_safe(page, h, list, lru)
+ list_for_each_entry_safe(page, h, &n->partial, lru) {
if (!page->inuse) {
list_del(&page->lru);
discard_slab(s, page);
- } else
- slabs_inuse++;
+ n->nr_partial--;
+ } else {
+ list_slab_objects(s, page,
+ "Objects remaining on kmem_cache_close()");
+ }
+ }
spin_unlock_irqrestore(&n->list_lock, flags);
- return slabs_inuse;
}
/*
for_each_node_state(node, N_NORMAL_MEMORY) {
struct kmem_cache_node *n = get_node(s, node);
- n->nr_partial -= free_list(s, n, &n->partial);
- if (atomic_long_read(&n->nr_slabs))
+ free_partial(s, n);
+ if (n->nr_partial || slabs_node(s, node))
return 1;
}
free_kmem_cache_nodes(s);
if (!s->refcount) {
list_del(&s->list);
up_write(&slub_lock);
- if (kmem_cache_close(s))
- WARN_ON(1);
+ if (kmem_cache_close(s)) {
+ printk(KERN_ERR "SLUB %s: %s called for cache that "
+ "still has objects.\n", s->name, __func__);
+ dump_stack();
+ }
sysfs_slab_remove(s);
} else
up_write(&slub_lock);
* Kmalloc subsystem
*******************************************************************/
-struct kmem_cache kmalloc_caches[PAGE_SHIFT] __cacheline_aligned;
+struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1] __cacheline_aligned;
EXPORT_SYMBOL(kmalloc_caches);
-#ifdef CONFIG_ZONE_DMA
-static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT];
-#endif
-
static int __init setup_slub_min_order(char *str)
{
get_option(&str, &slub_min_order);
down_write(&slub_lock);
if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN,
- flags, NULL))
+ flags, NULL))
goto panic;
list_add(&s->list, &slab_caches);
}
#ifdef CONFIG_ZONE_DMA
+static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT + 1];
static void sysfs_add_func(struct work_struct *w)
{
goto unlock_out;
realsize = kmalloc_caches[index].objsize;
- text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d", (unsigned int)realsize),
+ text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d",
+ (unsigned int)realsize);
s = kmalloc(kmem_size, flags & ~SLUB_DMA);
if (!s || !text || !kmem_cache_open(s, flags, text,
{
struct kmem_cache *s;
- if (unlikely(size > PAGE_SIZE / 2))
- return (void *)__get_free_pages(flags | __GFP_COMP,
- get_order(size));
+ if (unlikely(size > PAGE_SIZE))
+ return kmalloc_large(size, flags);
s = get_slab(size, flags);
}
EXPORT_SYMBOL(__kmalloc);
+static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
+{
+ struct page *page = alloc_pages_node(node, flags | __GFP_COMP,
+ get_order(size));
+
+ if (page)
+ return page_address(page);
+ else
+ return NULL;
+}
+
#ifdef CONFIG_NUMA
void *__kmalloc_node(size_t size, gfp_t flags, int node)
{
struct kmem_cache *s;
- if (unlikely(size > PAGE_SIZE / 2))
- return (void *)__get_free_pages(flags | __GFP_COMP,
- get_order(size));
+ if (unlikely(size > PAGE_SIZE))
+ return kmalloc_large_node(size, flags, node);
s = get_slab(size, flags);
struct page *page;
struct kmem_cache *s;
- BUG_ON(!object);
if (unlikely(object == ZERO_SIZE_PTR))
return 0;
page = virt_to_head_page(object);
- BUG_ON(!page);
if (unlikely(!PageSlab(page)))
return PAGE_SIZE << compound_order(page);
s = page->slab;
- BUG_ON(!s);
+#ifdef CONFIG_SLUB_DEBUG
/*
* Debugging requires use of the padding between object
* and whatever may come after it.
if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
return s->objsize;
+#endif
/*
* If we have the need to store the freelist pointer
* back there or track user information then we can
*/
if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
return s->inuse;
-
/*
* Else we can use all the padding etc for the allocation
*/
}
EXPORT_SYMBOL(kfree);
-static unsigned long count_partial(struct kmem_cache_node *n)
-{
- unsigned long flags;
- unsigned long x = 0;
- struct page *page;
-
- spin_lock_irqsave(&n->list_lock, flags);
- list_for_each_entry(page, &n->partial, lru)
- x += page->inuse;
- spin_unlock_irqrestore(&n->list_lock, flags);
- return x;
-}
-
/*
* kmem_cache_shrink removes empty slabs from the partial lists and sorts
* the remaining slabs by the number of items in use. The slabs with the
struct kmem_cache_node *n;
struct page *page;
struct page *t;
+ int objects = oo_objects(s->max);
struct list_head *slabs_by_inuse =
- kmalloc(sizeof(struct list_head) * s->objects, GFP_KERNEL);
+ kmalloc(sizeof(struct list_head) * objects, GFP_KERNEL);
unsigned long flags;
if (!slabs_by_inuse)
if (!n->nr_partial)
continue;
- for (i = 0; i < s->objects; i++)
+ for (i = 0; i < objects; i++)
INIT_LIST_HEAD(slabs_by_inuse + i);
spin_lock_irqsave(&n->list_lock, flags);
* Rebuild the partial list with the slabs filled up most
* first and the least used slabs at the end.
*/
- for (i = s->objects - 1; i >= 0; i--)
+ for (i = objects - 1; i >= 0; i--)
list_splice(slabs_by_inuse + i, n->partial.prev);
spin_unlock_irqrestore(&n->list_lock, flags);
* and offline_pages() function shoudn't call this
* callback. So, we must fail.
*/
- BUG_ON(atomic_long_read(&n->nr_slabs));
+ BUG_ON(slabs_node(s, offline_node));
s->node[offline_node] = NULL;
kmem_cache_free(kmalloc_caches, n);
caches++;
}
- for (i = KMALLOC_SHIFT_LOW; i < PAGE_SHIFT; i++) {
+ for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) {
create_kmalloc_cache(&kmalloc_caches[i],
"kmalloc", 1 << i, GFP_KERNEL);
caches++;
/*
* Patch up the size_index table if we have strange large alignment
* requirements for the kmalloc array. This is only the case for
- * mips it seems. The standard arches will not generate any code here.
+ * MIPS it seems. The standard arches will not generate any code here.
*
* Largest permitted alignment is 256 bytes due to the way we
* handle the index determination for the smaller caches.
slab_state = UP;
/* Provide the correct kmalloc names now that the caches are up */
- for (i = KMALLOC_SHIFT_LOW; i < PAGE_SHIFT; i++)
+ for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++)
kmalloc_caches[i]. name =
kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i);
kmem_size = sizeof(struct kmem_cache);
#endif
-
- printk(KERN_INFO "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"
+ printk(KERN_INFO
+ "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"
" CPUs=%d, Nodes=%d\n",
caches, cache_line_size(),
slub_min_order, slub_max_order, slub_min_objects,
*/
for_each_online_cpu(cpu)
get_cpu_slab(s, cpu)->objsize = s->objsize;
+
s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
up_write(&slub_lock);
+
if (sysfs_slab_alias(s, name))
goto err;
return s;
}
+
s = kmalloc(kmem_size, GFP_KERNEL);
if (s) {
if (kmem_cache_open(s, GFP_KERNEL, name,
}
static struct notifier_block __cpuinitdata slab_notifier = {
- &slab_cpuup_callback, NULL, 0
+ .notifier_call = slab_cpuup_callback
};
#endif
{
struct kmem_cache *s;
- if (unlikely(size > PAGE_SIZE / 2))
- return (void *)__get_free_pages(gfpflags | __GFP_COMP,
- get_order(size));
+ if (unlikely(size > PAGE_SIZE))
+ return kmalloc_large(size, gfpflags);
+
s = get_slab(size, gfpflags);
if (unlikely(ZERO_OR_NULL_PTR(s)))
{
struct kmem_cache *s;
- if (unlikely(size > PAGE_SIZE / 2))
- return (void *)__get_free_pages(gfpflags | __GFP_COMP,
- get_order(size));
+ if (unlikely(size > PAGE_SIZE))
+ return kmalloc_large_node(size, gfpflags, node);
+
s = get_slab(size, gfpflags);
if (unlikely(ZERO_OR_NULL_PTR(s)))
return slab_alloc(s, gfpflags, node, caller);
}
+#if (defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG)) || defined(CONFIG_SLABINFO)
+static unsigned long count_partial(struct kmem_cache_node *n,
+ int (*get_count)(struct page *))
+{
+ unsigned long flags;
+ unsigned long x = 0;
+ struct page *page;
+
+ spin_lock_irqsave(&n->list_lock, flags);
+ list_for_each_entry(page, &n->partial, lru)
+ x += get_count(page);
+ spin_unlock_irqrestore(&n->list_lock, flags);
+ return x;
+}
+
+static int count_inuse(struct page *page)
+{
+ return page->inuse;
+}
+
+static int count_total(struct page *page)
+{
+ return page->objects;
+}
+
+static int count_free(struct page *page)
+{
+ return page->objects - page->inuse;
+}
+#endif
+
#if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG)
static int validate_slab(struct kmem_cache *s, struct page *page,
unsigned long *map)
{
void *p;
- void *addr = slab_address(page);
+ void *addr = page_address(page);
if (!check_slab(s, page) ||
!on_freelist(s, page, NULL))
return 0;
/* Now we know that a valid freelist exists */
- bitmap_zero(map, s->objects);
+ bitmap_zero(map, page->objects);
for_each_free_object(p, s, page->freelist) {
set_bit(slab_index(p, s, addr), map);
return 0;
}
- for_each_object(p, s, addr)
+ for_each_object(p, s, addr, page->objects)
if (!test_bit(slab_index(p, s, addr), map))
if (!check_object(s, page, p, 1))
return 0;
{
int node;
unsigned long count = 0;
- unsigned long *map = kmalloc(BITS_TO_LONGS(s->objects) *
+ unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
sizeof(unsigned long), GFP_KERNEL);
if (!map)
p = kzalloc(32, GFP_KERNEL);
p[32 + sizeof(void *)] = 0x34;
printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab"
- " 0x34 -> -0x%p\n", p);
- printk(KERN_ERR "If allocated object is overwritten then not detectable\n\n");
+ " 0x34 -> -0x%p\n", p);
+ printk(KERN_ERR
+ "If allocated object is overwritten then not detectable\n\n");
validate_slab_cache(kmalloc_caches + 5);
p = kzalloc(64, GFP_KERNEL);
*p = 0x56;
printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
p);
- printk(KERN_ERR "If allocated object is overwritten then not detectable\n\n");
+ printk(KERN_ERR
+ "If allocated object is overwritten then not detectable\n\n");
validate_slab_cache(kmalloc_caches + 6);
printk(KERN_ERR "\nB. Corruption after free\n");
p = kzalloc(256, GFP_KERNEL);
kfree(p);
p[50] = 0x9a;
- printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p);
+ printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n",
+ p);
validate_slab_cache(kmalloc_caches + 8);
p = kzalloc(512, GFP_KERNEL);
static void process_slab(struct loc_track *t, struct kmem_cache *s,
struct page *page, enum track_item alloc)
{
- void *addr = slab_address(page);
- DECLARE_BITMAP(map, s->objects);
+ void *addr = page_address(page);
+ DECLARE_BITMAP(map, page->objects);
void *p;
- bitmap_zero(map, s->objects);
+ bitmap_zero(map, page->objects);
for_each_free_object(p, s, page->freelist)
set_bit(slab_index(p, s, addr), map);
- for_each_object(p, s, addr)
+ for_each_object(p, s, addr, page->objects)
if (!test_bit(slab_index(p, s, addr), map))
add_location(t, s, get_track(s, p, alloc));
}
}
enum slab_stat_type {
- SL_FULL,
- SL_PARTIAL,
- SL_CPU,
- SL_OBJECTS
+ SL_ALL, /* All slabs */
+ SL_PARTIAL, /* Only partially allocated slabs */
+ SL_CPU, /* Only slabs used for cpu caches */
+ SL_OBJECTS, /* Determine allocated objects not slabs */
+ SL_TOTAL /* Determine object capacity not slabs */
};
-#define SO_FULL (1 << SL_FULL)
+#define SO_ALL (1 << SL_ALL)
#define SO_PARTIAL (1 << SL_PARTIAL)
#define SO_CPU (1 << SL_CPU)
#define SO_OBJECTS (1 << SL_OBJECTS)
+#define SO_TOTAL (1 << SL_TOTAL)
-static unsigned long slab_objects(struct kmem_cache *s,
- char *buf, unsigned long flags)
+static ssize_t show_slab_objects(struct kmem_cache *s,
+ char *buf, unsigned long flags)
{
unsigned long total = 0;
- int cpu;
int node;
int x;
unsigned long *nodes;
unsigned long *per_cpu;
nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL);
+ if (!nodes)
+ return -ENOMEM;
per_cpu = nodes + nr_node_ids;
- for_each_possible_cpu(cpu) {
- struct page *page;
- struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
+ if (flags & SO_CPU) {
+ int cpu;
- if (!c)
- continue;
+ for_each_possible_cpu(cpu) {
+ struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
- page = c->page;
- node = c->node;
- if (node < 0)
- continue;
- if (page) {
- if (flags & SO_CPU) {
- if (flags & SO_OBJECTS)
- x = page->inuse;
+ if (!c || c->node < 0)
+ continue;
+
+ if (c->page) {
+ if (flags & SO_TOTAL)
+ x = c->page->objects;
+ else if (flags & SO_OBJECTS)
+ x = c->page->inuse;
else
x = 1;
+
total += x;
- nodes[node] += x;
+ nodes[c->node] += x;
}
- per_cpu[node]++;
+ per_cpu[c->node]++;
}
}
- for_each_node_state(node, N_NORMAL_MEMORY) {
- struct kmem_cache_node *n = get_node(s, node);
+ if (flags & SO_ALL) {
+ for_each_node_state(node, N_NORMAL_MEMORY) {
+ struct kmem_cache_node *n = get_node(s, node);
+
+ if (flags & SO_TOTAL)
+ x = atomic_long_read(&n->total_objects);
+ else if (flags & SO_OBJECTS)
+ x = atomic_long_read(&n->total_objects) -
+ count_partial(n, count_free);
- if (flags & SO_PARTIAL) {
- if (flags & SO_OBJECTS)
- x = count_partial(n);
else
- x = n->nr_partial;
+ x = atomic_long_read(&n->nr_slabs);
total += x;
nodes[node] += x;
}
- if (flags & SO_FULL) {
- int full_slabs = atomic_long_read(&n->nr_slabs)
- - per_cpu[node]
- - n->nr_partial;
+ } else if (flags & SO_PARTIAL) {
+ for_each_node_state(node, N_NORMAL_MEMORY) {
+ struct kmem_cache_node *n = get_node(s, node);
- if (flags & SO_OBJECTS)
- x = full_slabs * s->objects;
+ if (flags & SO_TOTAL)
+ x = count_partial(n, count_total);
+ else if (flags & SO_OBJECTS)
+ x = count_partial(n, count_inuse);
else
- x = full_slabs;
+ x = n->nr_partial;
total += x;
nodes[node] += x;
}
}
-
x = sprintf(buf, "%lu", total);
#ifdef CONFIG_NUMA
for_each_node_state(node, N_NORMAL_MEMORY)
static int any_slab_objects(struct kmem_cache *s)
{
int node;
- int cpu;
-
- for_each_possible_cpu(cpu) {
- struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
-
- if (c && c->page)
- return 1;
- }
for_each_online_node(node) {
struct kmem_cache_node *n = get_node(s, node);
if (!n)
continue;
- if (n->nr_partial || atomic_long_read(&n->nr_slabs))
+ if (atomic_read(&n->total_objects))
return 1;
}
return 0;
static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)
{
- return sprintf(buf, "%d\n", s->objects);
+ return sprintf(buf, "%d\n", oo_objects(s->oo));
}
SLAB_ATTR_RO(objs_per_slab);
+static ssize_t order_store(struct kmem_cache *s,
+ const char *buf, size_t length)
+{
+ int order = simple_strtoul(buf, NULL, 10);
+
+ if (order > slub_max_order || order < slub_min_order)
+ return -EINVAL;
+
+ calculate_sizes(s, order);
+ return length;
+}
+
static ssize_t order_show(struct kmem_cache *s, char *buf)
{
- return sprintf(buf, "%d\n", s->order);
+ return sprintf(buf, "%d\n", oo_order(s->oo));
}
-SLAB_ATTR_RO(order);
+SLAB_ATTR(order);
static ssize_t ctor_show(struct kmem_cache *s, char *buf)
{
static ssize_t slabs_show(struct kmem_cache *s, char *buf)
{
- return slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU);
+ return show_slab_objects(s, buf, SO_ALL);
}
SLAB_ATTR_RO(slabs);
static ssize_t partial_show(struct kmem_cache *s, char *buf)
{
- return slab_objects(s, buf, SO_PARTIAL);
+ return show_slab_objects(s, buf, SO_PARTIAL);
}
SLAB_ATTR_RO(partial);
static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
{
- return slab_objects(s, buf, SO_CPU);
+ return show_slab_objects(s, buf, SO_CPU);
}
SLAB_ATTR_RO(cpu_slabs);
static ssize_t objects_show(struct kmem_cache *s, char *buf)
{
- return slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU|SO_OBJECTS);
+ return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
}
SLAB_ATTR_RO(objects);
+static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
+{
+ return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);
+}
+SLAB_ATTR_RO(objects_partial);
+
+static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
+{
+ return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);
+}
+SLAB_ATTR_RO(total_objects);
+
static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
{
return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE));
s->flags &= ~SLAB_RED_ZONE;
if (buf[0] == '1')
s->flags |= SLAB_RED_ZONE;
- calculate_sizes(s);
+ calculate_sizes(s, -1);
return length;
}
SLAB_ATTR(red_zone);
s->flags &= ~SLAB_POISON;
if (buf[0] == '1')
s->flags |= SLAB_POISON;
- calculate_sizes(s);
+ calculate_sizes(s, -1);
return length;
}
SLAB_ATTR(poison);
s->flags &= ~SLAB_STORE_USER;
if (buf[0] == '1')
s->flags |= SLAB_STORE_USER;
- calculate_sizes(s);
+ calculate_sizes(s, -1);
return length;
}
SLAB_ATTR(store_user);
#endif
#ifdef CONFIG_SLUB_STATS
-
static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
{
unsigned long sum = 0;
len = sprintf(buf, "%lu", sum);
+#ifdef CONFIG_SMP
for_each_online_cpu(cpu) {
if (data[cpu] && len < PAGE_SIZE - 20)
- len += sprintf(buf + len, " c%d=%u", cpu, data[cpu]);
+ len += sprintf(buf + len, " C%d=%u", cpu, data[cpu]);
}
+#endif
kfree(data);
return len + sprintf(buf + len, "\n");
}
STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
-
+STAT_ATTR(ORDER_FALLBACK, order_fallback);
#endif
static struct attribute *slab_attrs[] = {
&objs_per_slab_attr.attr,
&order_attr.attr,
&objects_attr.attr,
+ &objects_partial_attr.attr,
+ &total_objects_attr.attr,
&slabs_attr.attr,
&partial_attr.attr,
&cpu_slabs_attr.attr,
&deactivate_to_head_attr.attr,
&deactivate_to_tail_attr.attr,
&deactivate_remote_frees_attr.attr,
+ &order_fallback_attr.attr,
#endif
NULL
};
#define ID_STR_LENGTH 64
/* Create a unique string id for a slab cache:
- * format
- * :[flags-]size:[memory address of kmemcache]
+ *
+ * Format :[flags-]size
*/
static char *create_unique_id(struct kmem_cache *s)
{
unsigned long nr_partials = 0;
unsigned long nr_slabs = 0;
unsigned long nr_inuse = 0;
- unsigned long nr_objs;
+ unsigned long nr_objs = 0;
+ unsigned long nr_free = 0;
struct kmem_cache *s;
int node;
nr_partials += n->nr_partial;
nr_slabs += atomic_long_read(&n->nr_slabs);
- nr_inuse += count_partial(n);
+ nr_objs += atomic_long_read(&n->total_objects);
+ nr_free += count_partial(n, count_free);
}
- nr_objs = nr_slabs * s->objects;
- nr_inuse += (nr_slabs - nr_partials) * s->objects;
+ nr_inuse = nr_objs - nr_free;
seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse,
- nr_objs, s->size, s->objects, (1 << s->order));
+ nr_objs, s->size, oo_objects(s->oo),
+ (1 << oo_order(s->oo)));
seq_printf(m, " : tunables %4u %4u %4u", 0, 0, 0);
seq_printf(m, " : slabdata %6lu %6lu %6lu", nr_slabs, nr_slabs,
0UL);