X-Git-Url: http://pilppa.org/gitweb/gitweb.cgi?a=blobdiff_plain;f=mm%2Fslub.c;h=a96e11c77fd9595deec57b445888fde3b9bf4671;hb=b773ad73690b5f34eee0c76f4273ac6fcbd88f82;hp=3f056677fa8fea56ef3a678cc8e9d8a8d27b2371;hpb=acf63867ae06ef95eea7bf445ded2f05528a81b1;p=linux-2.6-omap-h63xx.git diff --git a/mm/slub.c b/mm/slub.c index 3f056677fa8..a96e11c77fd 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -204,6 +204,8 @@ static inline void ClearSlabDebug(struct page *page) /* Internal SLUB flags */ #define __OBJECT_POISON 0x80000000 /* Poison object */ #define __SYSFS_ADD_DEFERRED 0x40000000 /* Not yet visible via sysfs */ +#define __KMALLOC_CACHE 0x20000000 /* objects freed using kfree */ +#define __PAGE_ALLOC_FALLBACK 0x10000000 /* Allow fallback to page alloc */ /* Not all arches define cache_line_size */ #ifndef cache_line_size @@ -243,6 +245,7 @@ enum track_item { TRACK_ALLOC, TRACK_FREE }; static int sysfs_slab_add(struct kmem_cache *); static int sysfs_slab_alias(struct kmem_cache *, const char *); static void sysfs_slab_remove(struct kmem_cache *); + #else static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; } static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p) @@ -251,8 +254,16 @@ static inline void sysfs_slab_remove(struct kmem_cache *s) { kfree(s); } + #endif +static inline void stat(struct kmem_cache_cpu *c, enum stat_item si) +{ +#ifdef CONFIG_SLUB_STATS + c->stat[si]++; +#endif +} + /******************************************************************** * Core slab cache functions *******************************************************************/ @@ -280,6 +291,7 @@ static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu) #endif } +/* Verify that a pointer has an address that is valid within a slab page */ static inline int check_valid_pointer(struct kmem_cache *s, struct page *page, const void *object) { @@ -608,7 +620,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page, * A. Free pointer (if we cannot overwrite object on free) * B. Tracking data for SLAB_STORE_USER * C. Padding to reach required alignment boundary or at mininum - * one word if debuggin is on to be able to detect writes + * one word if debugging is on to be able to detect writes * before the word boundary. * * Padding is done using 0x5a (POISON_INUSE) @@ -685,9 +697,10 @@ static int check_object(struct kmem_cache *s, struct page *page, endobject, red, s->inuse - s->objsize)) return 0; } else { - if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) - check_bytes_and_report(s, page, p, "Alignment padding", endobject, - POISON_INUSE, s->inuse - s->objsize); + if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) { + check_bytes_and_report(s, page, p, "Alignment padding", + endobject, POISON_INUSE, s->inuse - s->objsize); + } } if (s->flags & SLAB_POISON) { @@ -840,7 +853,7 @@ static int alloc_debug_processing(struct kmem_cache *s, struct page *page, if (!check_slab(s, page)) goto bad; - if (object && !on_freelist(s, page, object)) { + if (!on_freelist(s, page, object)) { object_err(s, page, object, "Object already allocated"); goto bad; } @@ -850,7 +863,7 @@ static int alloc_debug_processing(struct kmem_cache *s, struct page *page, goto bad; } - if (object && !check_object(s, page, object, 0)) + if (!check_object(s, page, object, 0)) goto bad; /* Success perform special debug activities for allocs */ @@ -894,11 +907,10 @@ static int free_debug_processing(struct kmem_cache *s, struct page *page, return 0; if (unlikely(s != page->slab)) { - if (!PageSlab(page)) + if (!PageSlab(page)) { slab_err(s, page, "Attempt to free object(0x%p) " "outside of slab", object); - else - if (!page->slab) { + } else if (!page->slab) { printk(KERN_ERR "SLUB : no slab for object 0x%p.\n", object); @@ -986,30 +998,11 @@ static unsigned long kmem_cache_flags(unsigned long objsize, void (*ctor)(struct kmem_cache *, void *)) { /* - * The page->offset field is only 16 bit wide. This is an offset - * in units of words from the beginning of an object. If the slab - * size is bigger then we cannot move the free pointer behind the - * object anymore. - * - * On 32 bit platforms the limit is 256k. On 64bit platforms - * the limit is 512k. - * - * Debugging or ctor may create a need to move the free - * pointer. Fail if this happens. + * Enable debugging if selected on the kernel commandline. */ - if (objsize >= 65535 * sizeof(void *)) { - BUG_ON(flags & (SLAB_RED_ZONE | SLAB_POISON | - SLAB_STORE_USER | SLAB_DESTROY_BY_RCU)); - BUG_ON(ctor); - } else { - /* - * Enable debugging if selected on the kernel commandline. - */ - if (slub_debug && (!slub_debug_slabs || - strncmp(slub_debug_slabs, name, - strlen(slub_debug_slabs)) == 0)) - flags |= slub_debug; - } + if (slub_debug && (!slub_debug_slabs || + strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)) == 0)) + flags |= slub_debug; return flags; } @@ -1044,14 +1037,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) struct page *page; int pages = 1 << s->order; - if (s->order) - flags |= __GFP_COMP; - - if (s->flags & SLAB_CACHE_DMA) - flags |= SLUB_DMA; - - if (s->flags & SLAB_RECLAIM_ACCOUNT) - flags |= __GFP_RECLAIMABLE; + flags |= s->allocflags; if (node == -1) page = alloc_pages(flags, s->order); @@ -1183,7 +1169,7 @@ static __always_inline void slab_lock(struct page *page) static __always_inline void slab_unlock(struct page *page) { - bit_spin_unlock(PG_locked, &page->flags); + __bit_spin_unlock(PG_locked, &page->flags); } static __always_inline int slab_trylock(struct page *page) @@ -1283,7 +1269,7 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) * may return off node objects because partial slabs are obtained * from other nodes and filled up. * - * If /sys/slab/xx/defrag_ratio is set to 100 (which makes + * If /sys/kernel/slab/xx/defrag_ratio is set to 100 (which makes * defrag_ratio = 1000) then every (well almost) allocation will * first attempt to defrag slab caches on other nodes. This means * scanning over all nodes to look for partial slabs which may be @@ -1294,8 +1280,8 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) get_cycles() % 1024 > s->remote_node_defrag_ratio) return NULL; - zonelist = &NODE_DATA(slab_node(current->mempolicy)) - ->node_zonelists[gfp_zone(flags)]; + zonelist = &NODE_DATA( + slab_node(current->mempolicy))->node_zonelists[gfp_zone(flags)]; for (z = zonelist->zones; *z; z++) { struct kmem_cache_node *n; @@ -1337,30 +1323,38 @@ static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node) static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) { struct kmem_cache_node *n = get_node(s, page_to_nid(page)); + struct kmem_cache_cpu *c = get_cpu_slab(s, smp_processor_id()); ClearSlabFrozen(page); if (page->inuse) { - if (page->freelist) + if (page->freelist) { add_partial(n, page, tail); - else if (SlabDebug(page) && (s->flags & SLAB_STORE_USER)) - add_full(n, page); + stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); + } else { + stat(c, DEACTIVATE_FULL); + if (SlabDebug(page) && (s->flags & SLAB_STORE_USER)) + add_full(n, page); + } slab_unlock(page); - } else { + stat(c, DEACTIVATE_EMPTY); if (n->nr_partial < MIN_PARTIAL) { /* * Adding an empty slab to the partial slabs in order * to avoid page allocator overhead. This slab needs * to come after the other slabs with objects in - * order to fill them up. That way the size of the - * partial list stays small. kmem_cache_shrink can - * reclaim empty slabs from the partial list. + * so that the others get filled first. That way the + * size of the partial list stays small. + * + * kmem_cache_shrink can reclaim any empty slabs from the + * partial list. */ add_partial(n, page, 1); slab_unlock(page); } else { slab_unlock(page); + stat(get_cpu_slab(s, raw_smp_processor_id()), FREE_SLAB); discard_slab(s, page); } } @@ -1373,8 +1367,11 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) { struct page *page = c->page; int tail = 1; + + if (page->freelist) + stat(c, DEACTIVATE_REMOTE_FREES); /* - * Merge cpu freelist into freelist. Typically we get here + * Merge cpu freelist into slab freelist. Typically we get here * because both freelists are empty. So this is unlikely * to occur. */ @@ -1398,12 +1395,14 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) { + stat(c, CPUSLAB_FLUSH); slab_lock(c->page); deactivate_slab(s, c); } /* * Flush cpu slab. + * * Called from IPI handler with interrupts disabled. */ static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) @@ -1462,7 +1461,8 @@ static inline int node_match(struct kmem_cache_cpu *c, int node) * rest of the freelist to the lockless freelist. * * And if we were unable to get a new slab from the partial slab lists then - * we need to allocate a new slab. This is slowest path since we may sleep. + * we need to allocate a new slab. This is the slowest path since it involves + * a call to the page allocator and the setup of a new slab. */ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c) @@ -1476,6 +1476,9 @@ static void *__slab_alloc(struct kmem_cache *s, slab_lock(c->page); if (unlikely(!node_match(c, node))) goto another_slab; + + stat(c, ALLOC_REFILL); + load_freelist: object = c->page->freelist; if (unlikely(!object)) @@ -1483,12 +1486,13 @@ load_freelist: if (unlikely(SlabDebug(c->page))) goto debug; - object = c->page->freelist; c->freelist = object[c->offset]; c->page->inuse = s->objects; c->page->freelist = NULL; c->node = page_to_nid(c->page); +unlock_out: slab_unlock(c->page); + stat(c, ALLOC_SLOWPATH); return object; another_slab: @@ -1498,6 +1502,7 @@ new_slab: new = get_partial(s, gfpflags, node); if (new) { c->page = new; + stat(c, ALLOC_FROM_PARTIAL); goto load_freelist; } @@ -1511,6 +1516,7 @@ new_slab: if (new) { c = get_cpu_slab(s, smp_processor_id()); + stat(c, ALLOC_SLAB); if (c->page) flush_slab(s, c); slab_lock(new); @@ -1518,17 +1524,30 @@ new_slab: c->page = new; goto load_freelist; } + + /* + * No memory available. + * + * If the slab uses higher order allocs but the object is + * smaller than a page size then we can fallback in emergencies + * to the page allocator via kmalloc_large. The page allocator may + * have failed to obtain a higher order page and we can try to + * allocate a single page if the object fits into a single page. + * That is only possible if certain conditions are met that are being + * checked when a slab is created. + */ + if (!(gfpflags & __GFP_NORETRY) && (s->flags & __PAGE_ALLOC_FALLBACK)) + return kmalloc_large(s->objsize, gfpflags); + return NULL; debug: - object = c->page->freelist; if (!alloc_debug_processing(s, c->page, object, addr)) goto another_slab; c->page->inuse++; c->page->freelist = object[c->offset]; c->node = -1; - slab_unlock(c->page); - return object; + goto unlock_out; } /* @@ -1545,8 +1564,8 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, void *addr) { void **object; - unsigned long flags; struct kmem_cache_cpu *c; + unsigned long flags; local_irq_save(flags); c = get_cpu_slab(s, smp_processor_id()); @@ -1557,6 +1576,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, else { object = c->freelist; c->freelist = object[c->offset]; + stat(c, ALLOC_FASTPATH); } local_irq_restore(flags); @@ -1593,42 +1613,51 @@ static void __slab_free(struct kmem_cache *s, struct page *page, { void *prior; void **object = (void *)x; + struct kmem_cache_cpu *c; + c = get_cpu_slab(s, raw_smp_processor_id()); + stat(c, FREE_SLOWPATH); slab_lock(page); if (unlikely(SlabDebug(page))) goto debug; + checks_ok: prior = object[offset] = page->freelist; page->freelist = object; page->inuse--; - if (unlikely(SlabFrozen(page))) + if (unlikely(SlabFrozen(page))) { + stat(c, FREE_FROZEN); goto out_unlock; + } if (unlikely(!page->inuse)) goto slab_empty; /* - * Objects left in the slab. If it - * was not on the partial list before + * Objects left in the slab. If it was not on the partial list before * then add it. */ - if (unlikely(!prior)) + if (unlikely(!prior)) { add_partial(get_node(s, page_to_nid(page)), page, 1); + stat(c, FREE_ADD_PARTIAL); + } out_unlock: slab_unlock(page); return; slab_empty: - if (prior) + if (prior) { /* * Slab still on the partial list. */ remove_partial(s, page); - + stat(c, FREE_REMOVE_PARTIAL); + } slab_unlock(page); + stat(c, FREE_SLAB); discard_slab(s, page); return; @@ -1653,15 +1682,16 @@ static __always_inline void slab_free(struct kmem_cache *s, struct page *page, void *x, void *addr) { void **object = (void *)x; - unsigned long flags; struct kmem_cache_cpu *c; + unsigned long flags; local_irq_save(flags); - debug_check_no_locks_freed(object, s->objsize); c = get_cpu_slab(s, smp_processor_id()); + debug_check_no_locks_freed(object, c->objsize); if (likely(page == c->page && c->node >= 0)) { object[c->offset] = c->freelist; c->freelist = object; + stat(c, FREE_FASTPATH); } else __slab_free(s, page, x, addr, c->offset); @@ -1820,13 +1850,11 @@ static unsigned long calculate_alignment(unsigned long flags, unsigned long align, unsigned long size) { /* - * If the user wants hardware cache aligned objects then - * follow that suggestion if the object is sufficiently - * large. + * If the user wants hardware cache aligned objects then follow that + * suggestion if the object is sufficiently large. * - * The hardware cache alignment cannot override the - * specified alignment though. If that is greater - * then use it. + * The hardware cache alignment cannot override the specified + * alignment though. If that is greater then use it. */ if ((flags & SLAB_HWCACHE_ALIGN) && size > cache_line_size() / 2) @@ -2024,6 +2052,7 @@ static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags, #endif init_kmem_cache_node(n); atomic_long_inc(&n->nr_slabs); + /* * lockdep requires consistent irq usage for each lock * so even though there cannot be a race this early in @@ -2104,6 +2133,14 @@ static int calculate_sizes(struct kmem_cache *s) unsigned long size = s->objsize; unsigned long align = s->align; + /* + * Round up object size to the next word boundary. We can only + * place the free pointer at word boundaries and this determines + * the possible location of the free pointer. + */ + size = ALIGN(size, sizeof(void *)); + +#ifdef CONFIG_SLUB_DEBUG /* * Determine if we can poison the object itself. If the user of * the slab may touch the object after free or before allocation @@ -2115,14 +2152,7 @@ static int calculate_sizes(struct kmem_cache *s) else s->flags &= ~__OBJECT_POISON; - /* - * Round up object size to the next word boundary. We can only - * place the free pointer at word boundaries and this determines - * the possible location of the free pointer. - */ - size = ALIGN(size, sizeof(void *)); -#ifdef CONFIG_SLUB_DEBUG /* * If we are Redzoning then check if there is some space between the * end of the object and the free pointer. If not then add an @@ -2186,10 +2216,33 @@ static int calculate_sizes(struct kmem_cache *s) size = ALIGN(size, align); s->size = size; - s->order = calculate_order(size); + if ((flags & __KMALLOC_CACHE) && + PAGE_SIZE / size < slub_min_objects) { + /* + * Kmalloc cache that would not have enough objects in + * an order 0 page. Kmalloc slabs can fallback to + * page allocator order 0 allocs so take a reasonably large + * order that will allows us a good number of objects. + */ + s->order = max(slub_max_order, PAGE_ALLOC_COSTLY_ORDER); + s->flags |= __PAGE_ALLOC_FALLBACK; + s->allocflags |= __GFP_NOWARN; + } else + s->order = calculate_order(size); + if (s->order < 0) return 0; + s->allocflags = 0; + if (s->order) + s->allocflags |= __GFP_COMP; + + if (s->flags & SLAB_CACHE_DMA) + s->allocflags |= SLUB_DMA; + + if (s->flags & SLAB_RECLAIM_ACCOUNT) + s->allocflags |= __GFP_RECLAIMABLE; + /* * Determine the number of objects per slab */ @@ -2252,7 +2305,7 @@ int kmem_ptr_validate(struct kmem_cache *s, const void *object) /* * We could also check if the object is on the slabs freelist. * But this would be too expensive and it seems that the main - * purpose of kmem_ptr_valid is to check if the object belongs + * purpose of kmem_ptr_valid() is to check if the object belongs * to a certain slab. */ return 1; @@ -2341,11 +2394,11 @@ EXPORT_SYMBOL(kmem_cache_destroy); * Kmalloc subsystem *******************************************************************/ -struct kmem_cache kmalloc_caches[PAGE_SHIFT] __cacheline_aligned; +struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1] __cacheline_aligned; EXPORT_SYMBOL(kmalloc_caches); #ifdef CONFIG_ZONE_DMA -static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT]; +static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT + 1]; #endif static int __init setup_slub_min_order(char *str) @@ -2393,7 +2446,7 @@ static struct kmem_cache *create_kmalloc_cache(struct kmem_cache *s, down_write(&slub_lock); if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN, - flags, NULL)) + flags | __KMALLOC_CACHE, NULL)) goto panic; list_add(&s->list, &slab_caches); @@ -2446,7 +2499,8 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags) goto unlock_out; realsize = kmalloc_caches[index].objsize; - text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d", (unsigned int)realsize), + text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d", + (unsigned int)realsize); s = kmalloc(kmem_size, flags & ~SLUB_DMA); if (!s || !text || !kmem_cache_open(s, flags, text, @@ -2526,9 +2580,8 @@ void *__kmalloc(size_t size, gfp_t flags) { struct kmem_cache *s; - if (unlikely(size > PAGE_SIZE / 2)) - return (void *)__get_free_pages(flags | __GFP_COMP, - get_order(size)); + if (unlikely(size > PAGE_SIZE)) + return kmalloc_large(size, flags); s = get_slab(size, flags); @@ -2539,14 +2592,24 @@ void *__kmalloc(size_t size, gfp_t flags) } EXPORT_SYMBOL(__kmalloc); +static void *kmalloc_large_node(size_t size, gfp_t flags, int node) +{ + struct page *page = alloc_pages_node(node, flags | __GFP_COMP, + get_order(size)); + + if (page) + return page_address(page); + else + return NULL; +} + #ifdef CONFIG_NUMA void *__kmalloc_node(size_t size, gfp_t flags, int node) { struct kmem_cache *s; - if (unlikely(size > PAGE_SIZE / 2)) - return (void *)__get_free_pages(flags | __GFP_COMP, - get_order(size)); + if (unlikely(size > PAGE_SIZE)) + return kmalloc_large_node(size, flags, node); s = get_slab(size, flags); @@ -2563,19 +2626,17 @@ size_t ksize(const void *object) struct page *page; struct kmem_cache *s; - BUG_ON(!object); if (unlikely(object == ZERO_SIZE_PTR)) return 0; page = virt_to_head_page(object); - BUG_ON(!page); if (unlikely(!PageSlab(page))) return PAGE_SIZE << compound_order(page); s = page->slab; - BUG_ON(!s); +#ifdef CONFIG_SLUB_DEBUG /* * Debugging requires use of the padding between object * and whatever may come after it. @@ -2583,6 +2644,7 @@ size_t ksize(const void *object) if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) return s->objsize; +#endif /* * If we have the need to store the freelist pointer * back there or track user information then we can @@ -2590,7 +2652,6 @@ size_t ksize(const void *object) */ if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER)) return s->inuse; - /* * Else we can use all the padding etc for the allocation */ @@ -2601,6 +2662,7 @@ EXPORT_SYMBOL(ksize); void kfree(const void *x) { struct page *page; + void *object = (void *)x; if (unlikely(ZERO_OR_NULL_PTR(x))) return; @@ -2610,7 +2672,7 @@ void kfree(const void *x) put_page(page); return; } - slab_free(page->slab, page, (void *)x, __builtin_return_address(0)); + slab_free(page->slab, page, object, __builtin_return_address(0)); } EXPORT_SYMBOL(kfree); @@ -2856,7 +2918,7 @@ void __init kmem_cache_init(void) caches++; } - for (i = KMALLOC_SHIFT_LOW; i < PAGE_SHIFT; i++) { + for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) { create_kmalloc_cache(&kmalloc_caches[i], "kmalloc", 1 << i, GFP_KERNEL); caches++; @@ -2866,7 +2928,7 @@ void __init kmem_cache_init(void) /* * Patch up the size_index table if we have strange large alignment * requirements for the kmalloc array. This is only the case for - * mips it seems. The standard arches will not generate any code here. + * MIPS it seems. The standard arches will not generate any code here. * * Largest permitted alignment is 256 bytes due to the way we * handle the index determination for the smaller caches. @@ -2883,7 +2945,7 @@ void __init kmem_cache_init(void) slab_state = UP; /* Provide the correct kmalloc names now that the caches are up */ - for (i = KMALLOC_SHIFT_LOW; i < PAGE_SHIFT; i++) + for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) kmalloc_caches[i]. name = kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i); @@ -2895,8 +2957,8 @@ void __init kmem_cache_init(void) kmem_size = sizeof(struct kmem_cache); #endif - - printk(KERN_INFO "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," + printk(KERN_INFO + "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," " CPUs=%d, Nodes=%d\n", caches, cache_line_size(), slub_min_order, slub_max_order, slub_min_objects, @@ -2911,6 +2973,9 @@ static int slab_unmergeable(struct kmem_cache *s) if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE)) return 1; + if ((s->flags & __PAGE_ALLOC_FALLBACK)) + return 1; + if (s->ctor) return 1; @@ -2988,12 +3053,15 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, */ for_each_online_cpu(cpu) get_cpu_slab(s, cpu)->objsize = s->objsize; + s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); up_write(&slub_lock); + if (sysfs_slab_alias(s, name)) goto err; return s; } + s = kmalloc(kmem_size, GFP_KERNEL); if (s) { if (kmem_cache_open(s, GFP_KERNEL, name, @@ -3063,7 +3131,7 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb, } static struct notifier_block __cpuinitdata slab_notifier = { - &slab_cpuup_callback, NULL, 0 + .notifier_call = slab_cpuup_callback }; #endif @@ -3072,9 +3140,9 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller) { struct kmem_cache *s; - if (unlikely(size > PAGE_SIZE / 2)) - return (void *)__get_free_pages(gfpflags | __GFP_COMP, - get_order(size)); + if (unlikely(size > PAGE_SIZE)) + return kmalloc_large(size, gfpflags); + s = get_slab(size, gfpflags); if (unlikely(ZERO_OR_NULL_PTR(s))) @@ -3088,9 +3156,9 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, { struct kmem_cache *s; - if (unlikely(size > PAGE_SIZE / 2)) - return (void *)__get_free_pages(gfpflags | __GFP_COMP, - get_order(size)); + if (unlikely(size > PAGE_SIZE)) + return kmalloc_large_node(size, gfpflags, node); + s = get_slab(size, gfpflags); if (unlikely(ZERO_OR_NULL_PTR(s))) @@ -3221,8 +3289,9 @@ static void resiliency_test(void) p = kzalloc(32, GFP_KERNEL); p[32 + sizeof(void *)] = 0x34; printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab" - " 0x34 -> -0x%p\n", p); - printk(KERN_ERR "If allocated object is overwritten then not detectable\n\n"); + " 0x34 -> -0x%p\n", p); + printk(KERN_ERR + "If allocated object is overwritten then not detectable\n\n"); validate_slab_cache(kmalloc_caches + 5); p = kzalloc(64, GFP_KERNEL); @@ -3230,7 +3299,8 @@ static void resiliency_test(void) *p = 0x56; printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n", p); - printk(KERN_ERR "If allocated object is overwritten then not detectable\n\n"); + printk(KERN_ERR + "If allocated object is overwritten then not detectable\n\n"); validate_slab_cache(kmalloc_caches + 6); printk(KERN_ERR "\nB. Corruption after free\n"); @@ -3243,7 +3313,8 @@ static void resiliency_test(void) p = kzalloc(256, GFP_KERNEL); kfree(p); p[50] = 0x9a; - printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p); + printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", + p); validate_slab_cache(kmalloc_caches + 8); p = kzalloc(512, GFP_KERNEL); @@ -3493,8 +3564,8 @@ enum slab_stat_type { #define SO_CPU (1 << SL_CPU) #define SO_OBJECTS (1 << SL_OBJECTS) -static unsigned long slab_objects(struct kmem_cache *s, - char *buf, unsigned long flags) +static ssize_t show_slab_objects(struct kmem_cache *s, + char *buf, unsigned long flags) { unsigned long total = 0; int cpu; @@ -3504,6 +3575,8 @@ static unsigned long slab_objects(struct kmem_cache *s, unsigned long *per_cpu; nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL); + if (!nodes) + return -ENOMEM; per_cpu = nodes + nr_node_ids; for_each_possible_cpu(cpu) { @@ -3656,25 +3729,25 @@ SLAB_ATTR_RO(aliases); static ssize_t slabs_show(struct kmem_cache *s, char *buf) { - return slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU); + return show_slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU); } SLAB_ATTR_RO(slabs); static ssize_t partial_show(struct kmem_cache *s, char *buf) { - return slab_objects(s, buf, SO_PARTIAL); + return show_slab_objects(s, buf, SO_PARTIAL); } SLAB_ATTR_RO(partial); static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf) { - return slab_objects(s, buf, SO_CPU); + return show_slab_objects(s, buf, SO_CPU); } SLAB_ATTR_RO(cpu_slabs); static ssize_t objects_show(struct kmem_cache *s, char *buf) { - return slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU|SO_OBJECTS); + return show_slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU|SO_OBJECTS); } SLAB_ATTR_RO(objects); @@ -3872,6 +3945,61 @@ static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s, SLAB_ATTR(remote_node_defrag_ratio); #endif +#ifdef CONFIG_SLUB_STATS +static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si) +{ + unsigned long sum = 0; + int cpu; + int len; + int *data = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL); + + if (!data) + return -ENOMEM; + + for_each_online_cpu(cpu) { + unsigned x = get_cpu_slab(s, cpu)->stat[si]; + + data[cpu] = x; + sum += x; + } + + len = sprintf(buf, "%lu", sum); + + for_each_online_cpu(cpu) { + if (data[cpu] && len < PAGE_SIZE - 20) + len += sprintf(buf + len, " c%d=%u", cpu, data[cpu]); + } + kfree(data); + return len + sprintf(buf + len, "\n"); +} + +#define STAT_ATTR(si, text) \ +static ssize_t text##_show(struct kmem_cache *s, char *buf) \ +{ \ + return show_stat(s, buf, si); \ +} \ +SLAB_ATTR_RO(text); \ + +STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath); +STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath); +STAT_ATTR(FREE_FASTPATH, free_fastpath); +STAT_ATTR(FREE_SLOWPATH, free_slowpath); +STAT_ATTR(FREE_FROZEN, free_frozen); +STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial); +STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial); +STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial); +STAT_ATTR(ALLOC_SLAB, alloc_slab); +STAT_ATTR(ALLOC_REFILL, alloc_refill); +STAT_ATTR(FREE_SLAB, free_slab); +STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush); +STAT_ATTR(DEACTIVATE_FULL, deactivate_full); +STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty); +STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head); +STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail); +STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees); + +#endif + static struct attribute *slab_attrs[] = { &slab_size_attr.attr, &object_size_attr.attr, @@ -3901,6 +4029,25 @@ static struct attribute *slab_attrs[] = { #endif #ifdef CONFIG_NUMA &remote_node_defrag_ratio_attr.attr, +#endif +#ifdef CONFIG_SLUB_STATS + &alloc_fastpath_attr.attr, + &alloc_slowpath_attr.attr, + &free_fastpath_attr.attr, + &free_slowpath_attr.attr, + &free_frozen_attr.attr, + &free_add_partial_attr.attr, + &free_remove_partial_attr.attr, + &alloc_from_partial_attr.attr, + &alloc_slab_attr.attr, + &alloc_refill_attr.attr, + &free_slab_attr.attr, + &cpuslab_flush_attr.attr, + &deactivate_full_attr.attr, + &deactivate_empty_attr.attr, + &deactivate_to_head_attr.attr, + &deactivate_to_tail_attr.attr, + &deactivate_remote_frees_attr.attr, #endif NULL }; @@ -3982,8 +4129,8 @@ static struct kset *slab_kset; #define ID_STR_LENGTH 64 /* Create a unique string id for a slab cache: - * format - * :[flags-]size:[memory address of kmemcache] + * + * Format :[flags-]size */ static char *create_unique_id(struct kmem_cache *s) {