#include <linux/mutex.h>
#include <linux/bootmem.h>
#include <linux/sysfs.h>
-#include <asm/io.h>
+
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/io.h>
struct list_head regions;
};
-struct resv_map *resv_map_alloc(void)
+static struct resv_map *resv_map_alloc(void)
{
struct resv_map *resv_map = kmalloc(sizeof(*resv_map), GFP_KERNEL);
if (!resv_map)
return resv_map;
}
-void resv_map_release(struct kref *ref)
+static void resv_map_release(struct kref *ref)
{
struct resv_map *resv_map = container_of(ref, struct resv_map, refs);
if (!(vma->vm_flags & VM_SHARED))
return (struct resv_map *)(get_vma_private_data(vma) &
~HPAGE_RESV_MASK);
- return 0;
+ return NULL;
}
static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map)
huge_page_order(h));
if (page) {
if (arch_prepare_hugepage(page)) {
- __free_pages(page, HUGETLB_PAGE_ORDER);
+ __free_pages(page, huge_page_order(h));
return NULL;
}
prep_new_huge_page(h, page, nid);
__GFP_REPEAT|__GFP_NOWARN,
huge_page_order(h));
+ if (page && arch_prepare_hugepage(page)) {
+ __free_pages(page, huge_page_order(h));
+ return NULL;
+ }
+
spin_lock(&hugetlb_lock);
if (page) {
/*
{
struct hstate *h = &default_hstate;
return sprintf(buf,
- "HugePages_Total: %5lu\n"
- "HugePages_Free: %5lu\n"
- "HugePages_Rsvd: %5lu\n"
- "HugePages_Surp: %5lu\n"
- "Hugepagesize: %5lu kB\n",
+ "HugePages_Total: %5lu\n"
+ "HugePages_Free: %5lu\n"
+ "HugePages_Rsvd: %5lu\n"
+ "HugePages_Surp: %5lu\n"
+ "Hugepagesize: %8lu kB\n",
h->nr_huge_pages,
h->free_huge_pages,
h->resv_huge_pages,
* from other VMAs and let the children be SIGKILLed if they are faulting the
* same region.
*/
-int unmap_ref_private(struct mm_struct *mm,
- struct vm_area_struct *vma,
- struct page *page,
- unsigned long address)
+static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
+ struct page *page, unsigned long address)
{
struct vm_area_struct *iter_vma;
struct address_space *mapping;
lock_page(page);
}
+ /*
+ * If we are going to COW a private mapping later, we examine the
+ * pending reservations for this page now. This will ensure that
+ * any allocations necessary to record that reservation occur outside
+ * the spinlock.
+ */
+ if (write_access && !(vma->vm_flags & VM_SHARED))
+ if (vma_needs_reservation(h, vma, address) < 0) {
+ ret = VM_FAULT_OOM;
+ goto backout_unlocked;
+ }
+
spin_lock(&mm->page_table_lock);
size = i_size_read(mapping->host) >> huge_page_shift(h);
if (idx >= size)
backout:
spin_unlock(&mm->page_table_lock);
+backout_unlocked:
unlock_page(page);
put_page(page);
goto out;
pte_t *ptep;
pte_t entry;
int ret;
+ struct page *pagecache_page = NULL;
static DEFINE_MUTEX(hugetlb_instantiation_mutex);
struct hstate *h = hstate_vma(vma);
entry = huge_ptep_get(ptep);
if (huge_pte_none(entry)) {
ret = hugetlb_no_page(mm, vma, address, ptep, write_access);
- mutex_unlock(&hugetlb_instantiation_mutex);
- return ret;
+ goto out_mutex;
}
ret = 0;
+ /*
+ * If we are going to COW the mapping later, we examine the pending
+ * reservations for this page now. This will ensure that any
+ * allocations necessary to record that reservation occur outside the
+ * spinlock. For private mappings, we also lookup the pagecache
+ * page now as it is used to determine if a reservation has been
+ * consumed.
+ */
+ if (write_access && !pte_write(entry)) {
+ if (vma_needs_reservation(h, vma, address) < 0) {
+ ret = VM_FAULT_OOM;
+ goto out_mutex;
+ }
+
+ if (!(vma->vm_flags & VM_SHARED))
+ pagecache_page = hugetlbfs_pagecache_page(h,
+ vma, address);
+ }
+
spin_lock(&mm->page_table_lock);
/* Check for a racing update before calling hugetlb_cow */
- if (likely(pte_same(entry, huge_ptep_get(ptep))))
- if (write_access && !pte_write(entry)) {
- struct page *page;
- page = hugetlbfs_pagecache_page(h, vma, address);
- ret = hugetlb_cow(mm, vma, address, ptep, entry, page);
- if (page) {
- unlock_page(page);
- put_page(page);
- }
+ if (unlikely(!pte_same(entry, huge_ptep_get(ptep))))
+ goto out_page_table_lock;
+
+
+ if (write_access) {
+ if (!pte_write(entry)) {
+ ret = hugetlb_cow(mm, vma, address, ptep, entry,
+ pagecache_page);
+ goto out_page_table_lock;
}
+ entry = pte_mkdirty(entry);
+ }
+ entry = pte_mkyoung(entry);
+ if (huge_ptep_set_access_flags(vma, address, ptep, entry, write_access))
+ update_mmu_cache(vma, address, entry);
+
+out_page_table_lock:
spin_unlock(&mm->page_table_lock);
+
+ if (pagecache_page) {
+ unlock_page(pagecache_page);
+ put_page(pagecache_page);
+ }
+
+out_mutex:
mutex_unlock(&hugetlb_instantiation_mutex);
return ret;
return NULL;
}
+static int huge_zeropage_ok(pte_t *ptep, int write, int shared)
+{
+ if (!ptep || write || shared)
+ return 0;
+ else
+ return huge_pte_none(huge_ptep_get(ptep));
+}
+
int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
struct page **pages, struct vm_area_struct **vmas,
unsigned long *position, int *length, int i,
unsigned long vaddr = *position;
int remainder = *length;
struct hstate *h = hstate_vma(vma);
+ int zeropage_ok = 0;
+ int shared = vma->vm_flags & VM_SHARED;
spin_lock(&mm->page_table_lock);
while (vaddr < vma->vm_end && remainder) {
* first, for the page indexing below to work.
*/
pte = huge_pte_offset(mm, vaddr & huge_page_mask(h));
+ if (huge_zeropage_ok(pte, write, shared))
+ zeropage_ok = 1;
- if (!pte || huge_pte_none(huge_ptep_get(pte)) ||
+ if (!pte ||
+ (huge_pte_none(huge_ptep_get(pte)) && !zeropage_ok) ||
(write && !pte_write(huge_ptep_get(pte)))) {
int ret;
page = pte_page(huge_ptep_get(pte));
same_page:
if (pages) {
- get_page(page);
- pages[i] = page + pfn_offset;
+ if (zeropage_ok)
+ pages[i] = ZERO_PAGE(0);
+ else
+ pages[i] = page + pfn_offset;
+ get_page(pages[i]);
}
if (vmas)