Newer
Older
for (; addr != end; addr += PAGE_SIZE) {
unsigned long offset;
offset = (addr & ~PAGEMAP_WALK_MASK) >>
PAGE_SHIFT;
thp_pmd_to_pagemap_entry(&pme, pm, *pmd, offset, pmd_flags2);
err = add_to_pagemap(addr, &pme, pm);
if (err)
break;
}
spin_unlock(&walk->mm->page_table_lock);
if (pmd_trans_unstable(pmd))
return 0;
for (; addr != end; addr += PAGE_SIZE) {
/* check to see if we've left 'vma' behind
* and need a new, higher one */

Konstantin Khlebnikov
committed
if (vma && (addr >= vma->vm_end)) {
pme = make_pme(PM_NOT_PRESENT(pm->v2));

Konstantin Khlebnikov
committed
}
/* check that 'vma' actually covers this address,
* and that it isn't a huge page vma */
if (vma && (vma->vm_start <= addr) &&
!is_vm_hugetlb_page(vma)) {
pte = pte_offset_map(pmd, addr);
pte_to_pagemap_entry(&pme, pm, vma, addr, *pte);
/* unmap before userspace copy */
pte_unmap(pte);
}
err = add_to_pagemap(addr, &pme, pm);
if (err)
return err;
}
cond_resched();
return err;
}
#ifdef CONFIG_HUGETLB_PAGE
static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
pte_t pte, int offset)
{
if (pte_present(pte))
*pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset)
| PM_STATUS2(pm->v2, 0) | PM_PRESENT);

Konstantin Khlebnikov
committed
else
*pme = make_pme(PM_NOT_PRESENT(pm->v2));
/* This function walks within one hugetlb entry in the single call */
static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
struct pagemapread *pm = walk->private;
int err = 0;

Konstantin Khlebnikov
committed
pagemap_entry_t pme;
for (; addr != end; addr += PAGE_SIZE) {
int offset = (addr & ~hmask) >> PAGE_SHIFT;
huge_pte_to_pagemap_entry(&pme, pm, *pte, offset);
err = add_to_pagemap(addr, &pme, pm);
if (err)
return err;
}
cond_resched();
return err;
}
#endif /* HUGETLB_PAGE */
/*
* /proc/pid/pagemap - an array mapping virtual pages to pfns
*
* For each page in the address space, this file contains one 64-bit entry
* consisting of the following:
*
* Bits 0-54 page frame number (PFN) if present
* Bits 0-4 swap type if swapped
* Bits 5-54 swap offset if swapped
* Bits 55-60 page shift (page size = 1<<page shift)
* Bit 61 page is file-page or shared-anon
* Bit 62 page swapped
* Bit 63 page present
*
* If the page is not present but in swap, then the PFN contains an
* encoding of the swap file number and the page's offset into the
* swap. Unmapped pages return a null PFN. This allows determining
* precisely which pages are mapped (or in swap) and comparing mapped
* pages between processes.
*
* Efficient users of this interface will use /proc/pid/maps to
* determine which areas of memory are actually mapped and llseek to
* skip over unmapped regions.
*/
static ssize_t pagemap_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
struct task_struct *task = get_proc_task(file_inode(file));
struct mm_struct *mm;
struct pagemapread pm;
int ret = -ESRCH;
struct mm_walk pagemap_walk = {};
unsigned long src;
unsigned long svpfn;
unsigned long start_vaddr;
unsigned long end_vaddr;
if (!task)
goto out;
ret = -EINVAL;
/* file position must be aligned */

Thomas Tuttle
committed
if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES))
if (!count)
goto out_task;
pm.v2 = soft_dirty_cleared;
pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
pm.buffer = kmalloc(pm.len, GFP_TEMPORARY);
ret = PTR_ERR(mm);
if (!mm || IS_ERR(mm))
goto out_free;
pagemap_walk.pmd_entry = pagemap_pte_range;
pagemap_walk.pte_hole = pagemap_pte_hole;
#ifdef CONFIG_HUGETLB_PAGE
pagemap_walk.hugetlb_entry = pagemap_hugetlb_range;
#endif
pagemap_walk.mm = mm;
pagemap_walk.private = ±
src = *ppos;
svpfn = src / PM_ENTRY_BYTES;
start_vaddr = svpfn << PAGE_SHIFT;
end_vaddr = TASK_SIZE_OF(task);
/* watch out for wraparound */
if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT)
start_vaddr = end_vaddr;
/*
* The odds are that this will stop walking way
* before end_vaddr, because the length of the
* user buffer is tracked in "pm", and the walk
* will stop when we hit the end of the buffer.
*/
ret = 0;
while (count && (start_vaddr < end_vaddr)) {
int len;
unsigned long end;
pm.pos = 0;
end = (start_vaddr + PAGEMAP_WALK_SIZE) & PAGEMAP_WALK_MASK;
/* overflow ? */
if (end < start_vaddr || end > end_vaddr)
end = end_vaddr;
down_read(&mm->mmap_sem);
ret = walk_page_range(start_vaddr, end, &pagemap_walk);
up_read(&mm->mmap_sem);
start_vaddr = end;
len = min(count, PM_ENTRY_BYTES * pm.pos);
if (copy_to_user(buf, pm.buffer, len)) {
}
copied += len;
buf += len;
count -= len;
*ppos += copied;
if (!ret || ret == PM_END_OF_BUFFER)
ret = copied;
out_task:
put_task_struct(task);
out:
return ret;
}
static int pagemap_open(struct inode *inode, struct file *file)
{
pr_warn_once("Bits 55-60 of /proc/PID/pagemap entries are about "
"to stop being page-shift some time soon. See the "
"linux/Documentation/vm/pagemap.txt for details.\n");
return 0;
}
const struct file_operations proc_pagemap_operations = {
.llseek = mem_lseek, /* borrow this */
.read = pagemap_read,
.open = pagemap_open,
#endif /* CONFIG_PROC_PAGE_MONITOR */
#ifdef CONFIG_NUMA
struct numa_maps {
struct vm_area_struct *vma;
unsigned long pages;
unsigned long anon;
unsigned long active;
unsigned long writeback;
unsigned long mapcount_max;
unsigned long dirty;
unsigned long swapcache;
unsigned long node[MAX_NUMNODES];
};
struct numa_maps_private {
struct proc_maps_private proc_maps;
struct numa_maps md;
};
static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty,
unsigned long nr_pages)
{
int count = page_mapcount(page);
md->pages += nr_pages;
if (pte_dirty || PageDirty(page))
md->dirty += nr_pages;
if (PageSwapCache(page))
md->swapcache += nr_pages;
if (PageActive(page) || PageUnevictable(page))
md->active += nr_pages;
if (PageWriteback(page))
md->writeback += nr_pages;
if (PageAnon(page))
md->anon += nr_pages;
if (count > md->mapcount_max)
md->mapcount_max = count;
md->node[page_to_nid(page)] += nr_pages;
static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma,
unsigned long addr)
{
struct page *page;
int nid;
if (!pte_present(pte))
return NULL;
page = vm_normal_page(vma, addr, pte);
if (!page)
return NULL;
if (PageReserved(page))
return NULL;
nid = page_to_nid(page);
if (!node_isset(nid, node_states[N_MEMORY]))
return NULL;
return page;
}
static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
unsigned long end, struct mm_walk *walk)
{
struct numa_maps *md;
spinlock_t *ptl;
pte_t *orig_pte;
pte_t *pte;
md = walk->private;
if (pmd_trans_huge_lock(pmd, md->vma) == 1) {
pte_t huge_pte = *(pte_t *)pmd;
struct page *page;
page = can_gather_numa_stats(huge_pte, md->vma, addr);
if (page)
gather_stats(page, md, pte_dirty(huge_pte),
HPAGE_PMD_SIZE/PAGE_SIZE);
spin_unlock(&walk->mm->page_table_lock);

Andrea Arcangeli
committed
if (pmd_trans_unstable(pmd))
return 0;
orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
do {
struct page *page = can_gather_numa_stats(*pte, md->vma, addr);
if (!page)
continue;
gather_stats(page, md, pte_dirty(*pte), 1);
} while (pte++, addr += PAGE_SIZE, addr != end);
pte_unmap_unlock(orig_pte, ptl);
return 0;
}
#ifdef CONFIG_HUGETLB_PAGE
static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
unsigned long addr, unsigned long end, struct mm_walk *walk)
{
struct numa_maps *md;
struct page *page;
if (pte_none(*pte))
return 0;
page = pte_page(*pte);
if (!page)
return 0;
md = walk->private;
gather_stats(page, md, pte_dirty(*pte), 1);
return 0;
}
#else
static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
unsigned long addr, unsigned long end, struct mm_walk *walk)
{
return 0;
}
#endif
/*
* Display pages allocated per node and memory policy via /proc.
*/
static int show_numa_map(struct seq_file *m, void *v, int is_pid)
struct numa_maps_private *numa_priv = m->private;
struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
struct vm_area_struct *vma = v;
struct numa_maps *md = &numa_priv->md;
struct file *file = vma->vm_file;
struct task_struct *task = proc_priv->task;
struct mm_struct *mm = vma->vm_mm;
struct mm_walk walk = {};
struct mempolicy *pol;
int n;
char buffer[50];
if (!mm)
return 0;
/* Ensure we start with an empty set of numa_maps statistics. */
memset(md, 0, sizeof(*md));
md->vma = vma;
walk.hugetlb_entry = gather_hugetbl_stats;
walk.pmd_entry = gather_pte_stats;
walk.private = md;
walk.mm = mm;
pol = get_vma_policy(task, vma, vma->vm_start);
mpol_to_str(buffer, sizeof(buffer), pol);
mpol_cond_put(pol);
seq_printf(m, "%08lx %s", vma->vm_start, buffer);
if (file) {
seq_printf(m, " file=");
seq_path(m, &file->f_path, "\n\t= ");
} else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
seq_printf(m, " heap");
pid_t tid = vm_is_stack(task, vma, is_pid);
if (tid != 0) {
/*
* Thread stack in /proc/PID/task/TID/maps or
* the main process stack.
*/
if (!is_pid || (vma->vm_start <= mm->start_stack &&
vma->vm_end >= mm->start_stack))
seq_printf(m, " stack");
else
seq_printf(m, " stack:%d", tid);
}
if (is_vm_hugetlb_page(vma))
seq_printf(m, " huge");
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
walk_page_range(vma->vm_start, vma->vm_end, &walk);
if (!md->pages)
goto out;
if (md->anon)
seq_printf(m, " anon=%lu", md->anon);
if (md->dirty)
seq_printf(m, " dirty=%lu", md->dirty);
if (md->pages != md->anon && md->pages != md->dirty)
seq_printf(m, " mapped=%lu", md->pages);
if (md->mapcount_max > 1)
seq_printf(m, " mapmax=%lu", md->mapcount_max);
if (md->swapcache)
seq_printf(m, " swapcache=%lu", md->swapcache);
if (md->active < md->pages && !is_vm_hugetlb_page(vma))
seq_printf(m, " active=%lu", md->active);
if (md->writeback)
seq_printf(m, " writeback=%lu", md->writeback);
for_each_node_state(n, N_MEMORY)
if (md->node[n])
seq_printf(m, " N%d=%lu", n, md->node[n]);
out:
seq_putc(m, '\n');
if (m->count < m->size)
m->version = (vma != proc_priv->tail_vma) ? vma->vm_start : 0;
static int show_pid_numa_map(struct seq_file *m, void *v)
{
return show_numa_map(m, v, 1);
}
static int show_tid_numa_map(struct seq_file *m, void *v)
{
return show_numa_map(m, v, 0);
}
static const struct seq_operations proc_pid_numa_maps_op = {
.start = m_start,
.next = m_next,
.stop = m_stop,
.show = show_pid_numa_map,
};
static const struct seq_operations proc_tid_numa_maps_op = {
.start = m_start,
.next = m_next,
.stop = m_stop,
.show = show_tid_numa_map,
};
static int numa_maps_open(struct inode *inode, struct file *file,
const struct seq_operations *ops)
struct numa_maps_private *priv;
int ret = -ENOMEM;
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (priv) {
priv->proc_maps.pid = proc_pid(inode);
ret = seq_open(file, ops);
if (!ret) {
struct seq_file *m = file->private_data;
m->private = priv;
} else {
kfree(priv);
}
}
return ret;
static int pid_numa_maps_open(struct inode *inode, struct file *file)
{
return numa_maps_open(inode, file, &proc_pid_numa_maps_op);
}
static int tid_numa_maps_open(struct inode *inode, struct file *file)
{
return numa_maps_open(inode, file, &proc_tid_numa_maps_op);
}
const struct file_operations proc_pid_numa_maps_operations = {
.open = pid_numa_maps_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release_private,
};
const struct file_operations proc_tid_numa_maps_operations = {
.open = tid_numa_maps_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release_private,
#endif /* CONFIG_NUMA */