#endif
}
+/* Verify that a pointer has an address that is valid within a slab page */
static inline int check_valid_pointer(struct kmem_cache *s,
struct page *page, const void *object)
{
* A. Free pointer (if we cannot overwrite object on free)
* B. Tracking data for SLAB_STORE_USER
* C. Padding to reach required alignment boundary or at mininum
- * one word if debuggin is on to be able to detect writes
+ * one word if debugging is on to be able to detect writes
* before the word boundary.
*
* Padding is done using 0x5a (POISON_INUSE)
* may return off node objects because partial slabs are obtained
* from other nodes and filled up.
*
- * If /sys/slab/xx/defrag_ratio is set to 100 (which makes
+ * If /sys/kernel/slab/xx/defrag_ratio is set to 100 (which makes
* defrag_ratio = 1000) then every (well almost) allocation will
* first attempt to defrag slab caches on other nodes. This means
* scanning over all nodes to look for partial slabs which may be
* Adding an empty slab to the partial slabs in order
* to avoid page allocator overhead. This slab needs
* to come after the other slabs with objects in
- * order to fill them up. That way the size of the
- * partial list stays small. kmem_cache_shrink can
- * reclaim empty slabs from the partial list.
+ * so that the others get filled first. That way the
+ * size of the partial list stays small.
+ *
+ * kmem_cache_shrink can reclaim any empty slabs from the
+ * partial list.
*/
add_partial(n, page, 1);
slab_unlock(page);
struct page *page = c->page;
int tail = 1;
- if (c->freelist)
+ if (page->freelist)
stat(c, DEACTIVATE_REMOTE_FREES);
/*
- * Merge cpu freelist into freelist. Typically we get here
+ * Merge cpu freelist into slab freelist. Typically we get here
* because both freelists are empty. So this is unlikely
* to occur.
*/
/*
* Flush cpu slab.
+ *
* Called from IPI handler with interrupts disabled.
*/
static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
* rest of the freelist to the lockless freelist.
*
* And if we were unable to get a new slab from the partial slab lists then
- * we need to allocate a new slab. This is slowest path since we may sleep.
+ * we need to allocate a new slab. This is the slowest path since it involves
+ * a call to the page allocator and the setup of a new slab.
*/
static void *__slab_alloc(struct kmem_cache *s,
gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c)
void **object;
struct page *new;
+ /* We handle __GFP_ZERO in the caller */
+ gfpflags &= ~__GFP_ZERO;
+
if (!c->page)
goto new_slab;
slab_lock(c->page);
if (unlikely(!node_match(c, node)))
goto another_slab;
+
stat(c, ALLOC_REFILL);
+
load_freelist:
object = c->page->freelist;
if (unlikely(!object))
if (unlikely(SlabDebug(c->page)))
goto debug;
- object = c->page->freelist;
c->freelist = object[c->offset];
c->page->inuse = s->objects;
c->page->freelist = NULL;
* That is only possible if certain conditions are met that are being
* checked when a slab is created.
*/
- if (!(gfpflags & __GFP_NORETRY) && (s->flags & __PAGE_ALLOC_FALLBACK))
- return kmalloc_large(s->objsize, gfpflags);
-
+ if (!(gfpflags & __GFP_NORETRY) &&
+ (s->flags & __PAGE_ALLOC_FALLBACK)) {
+ if (gfpflags & __GFP_WAIT)
+ local_irq_enable();
+ object = kmalloc_large(s->objsize, gfpflags);
+ if (gfpflags & __GFP_WAIT)
+ local_irq_disable();
+ return object;
+ }
return NULL;
debug:
- object = c->page->freelist;
if (!alloc_debug_processing(s, c->page, object, addr))
goto another_slab;
if (unlikely(SlabDebug(page)))
goto debug;
+
checks_ok:
prior = object[offset] = page->freelist;
page->freelist = object;
goto slab_empty;
/*
- * Objects left in the slab. If it
- * was not on the partial list before
+ * Objects left in the slab. If it was not on the partial list before
* then add it.
*/
if (unlikely(!prior)) {
unsigned long align, unsigned long size)
{
/*
- * If the user wants hardware cache aligned objects then
- * follow that suggestion if the object is sufficiently
- * large.
+ * If the user wants hardware cache aligned objects then follow that
+ * suggestion if the object is sufficiently large.
*
- * The hardware cache alignment cannot override the
- * specified alignment though. If that is greater
- * then use it.
+ * The hardware cache alignment cannot override the specified
+ * alignment though. If that is greater then use it.
*/
- if ((flags & SLAB_HWCACHE_ALIGN) &&
- size > cache_line_size() / 2)
- return max_t(unsigned long, align, cache_line_size());
+ if (flags & SLAB_HWCACHE_ALIGN) {
+ unsigned long ralign = cache_line_size();
+ while (size <= ralign / 2)
+ ralign /= 2;
+ align = max(align, ralign);
+ }
if (align < ARCH_SLAB_MINALIGN)
- return ARCH_SLAB_MINALIGN;
+ align = ARCH_SLAB_MINALIGN;
return ALIGN(align, sizeof(void *));
}
#endif
init_kmem_cache_node(n);
atomic_long_inc(&n->nr_slabs);
+
/*
* lockdep requires consistent irq usage for each lock
* so even though there cannot be a race this early in
unsigned long size = s->objsize;
unsigned long align = s->align;
+ /*
+ * Round up object size to the next word boundary. We can only
+ * place the free pointer at word boundaries and this determines
+ * the possible location of the free pointer.
+ */
+ size = ALIGN(size, sizeof(void *));
+
+#ifdef CONFIG_SLUB_DEBUG
/*
* Determine if we can poison the object itself. If the user of
* the slab may touch the object after free or before allocation
else
s->flags &= ~__OBJECT_POISON;
- /*
- * Round up object size to the next word boundary. We can only
- * place the free pointer at word boundaries and this determines
- * the possible location of the free pointer.
- */
- size = ALIGN(size, sizeof(void *));
-#ifdef CONFIG_SLUB_DEBUG
/*
* If we are Redzoning then check if there is some space between the
* end of the object and the free pointer. If not then add an
/*
* We could also check if the object is on the slabs freelist.
* But this would be too expensive and it seems that the main
- * purpose of kmem_ptr_valid is to check if the object belongs
+ * purpose of kmem_ptr_valid() is to check if the object belongs
* to a certain slab.
*/
return 1;
}
EXPORT_SYMBOL(__kmalloc);
+static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
+{
+ struct page *page = alloc_pages_node(node, flags | __GFP_COMP,
+ get_order(size));
+
+ if (page)
+ return page_address(page);
+ else
+ return NULL;
+}
+
#ifdef CONFIG_NUMA
void *__kmalloc_node(size_t size, gfp_t flags, int node)
{
struct kmem_cache *s;
if (unlikely(size > PAGE_SIZE))
- return kmalloc_large(size, flags);
+ return kmalloc_large_node(size, flags, node);
s = get_slab(size, flags);
}
EXPORT_SYMBOL(kfree);
+#if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SLABINFO)
static unsigned long count_partial(struct kmem_cache_node *n)
{
unsigned long flags;
spin_unlock_irqrestore(&n->list_lock, flags);
return x;
}
+#endif
/*
* kmem_cache_shrink removes empty slabs from the partial lists and sorts
/*
* Patch up the size_index table if we have strange large alignment
* requirements for the kmalloc array. This is only the case for
- * mips it seems. The standard arches will not generate any code here.
+ * MIPS it seems. The standard arches will not generate any code here.
*
* Largest permitted alignment is 256 bytes due to the way we
* handle the index determination for the smaller caches.
kmem_size = sizeof(struct kmem_cache);
#endif
-
printk(KERN_INFO
"SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"
" CPUs=%d, Nodes=%d\n",
*/
for_each_online_cpu(cpu)
get_cpu_slab(s, cpu)->objsize = s->objsize;
+
s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
up_write(&slub_lock);
+
if (sysfs_slab_alias(s, name))
goto err;
return s;
}
+
s = kmalloc(kmem_size, GFP_KERNEL);
if (s) {
if (kmem_cache_open(s, GFP_KERNEL, name,
struct kmem_cache *s;
if (unlikely(size > PAGE_SIZE))
- return kmalloc_large(size, gfpflags);
+ return kmalloc_large_node(size, gfpflags, node);
s = get_slab(size, gfpflags);
#define SO_CPU (1 << SL_CPU)
#define SO_OBJECTS (1 << SL_OBJECTS)
-static unsigned long show_slab_objects(struct kmem_cache *s,
- char *buf, unsigned long flags)
+static ssize_t show_slab_objects(struct kmem_cache *s,
+ char *buf, unsigned long flags)
{
unsigned long total = 0;
int cpu;
unsigned long *per_cpu;
nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL);
+ if (!nodes)
+ return -ENOMEM;
per_cpu = nodes + nr_node_ids;
for_each_possible_cpu(cpu) {
#endif
#ifdef CONFIG_SLUB_STATS
-
static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
{
unsigned long sum = 0;
#define ID_STR_LENGTH 64
/* Create a unique string id for a slab cache:
- * format
- * :[flags-]size:[memory address of kmemcache]
+ *
+ * Format :[flags-]size
*/
static char *create_unique_id(struct kmem_cache *s)
{