Skip to content
Snippets Groups Projects
task_mmu.c 37.4 KiB
Newer Older
  • Learn to ignore specific revisions
  • 		for (; addr != end; addr += PAGE_SIZE) {
    			unsigned long offset;
    
    			offset = (addr & ~PAGEMAP_WALK_MASK) >>
    					PAGE_SHIFT;
    
    			thp_pmd_to_pagemap_entry(&pme, pm, *pmd, offset, pmd_flags2);
    
    			err = add_to_pagemap(addr, &pme, pm);
    
    		}
    		spin_unlock(&walk->mm->page_table_lock);
    
    	if (pmd_trans_unstable(pmd))
    		return 0;
    
    	for (; addr != end; addr += PAGE_SIZE) {
    
    
    		/* check to see if we've left 'vma' behind
    		 * and need a new, higher one */
    
    			vma = find_vma(walk->mm, addr);
    
    			pme = make_pme(PM_NOT_PRESENT(pm->v2));
    
    
    		/* check that 'vma' actually covers this address,
    		 * and that it isn't a huge page vma */
    		if (vma && (vma->vm_start <= addr) &&
    		    !is_vm_hugetlb_page(vma)) {
    			pte = pte_offset_map(pmd, addr);
    
    			pte_to_pagemap_entry(&pme, pm, vma, addr, *pte);
    
    			/* unmap before userspace copy */
    			pte_unmap(pte);
    		}
    
    		err = add_to_pagemap(addr, &pme, pm);
    
    		if (err)
    			return err;
    	}
    
    	cond_resched();
    
    	return err;
    }
    
    
    static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
    
    {
    	if (pte_present(pte))
    
    		*pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset)
    
    				| PM_STATUS2(pm->v2, 0) | PM_PRESENT);
    
    		*pme = make_pme(PM_NOT_PRESENT(pm->v2));
    
    /* This function walks within one hugetlb entry in the single call */
    static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
    				 unsigned long addr, unsigned long end,
    				 struct mm_walk *walk)
    
    {
    	struct pagemapread *pm = walk->private;
    	int err = 0;
    
    
    	for (; addr != end; addr += PAGE_SIZE) {
    
    		int offset = (addr & ~hmask) >> PAGE_SHIFT;
    
    		huge_pte_to_pagemap_entry(&pme, pm, *pte, offset);
    
    		err = add_to_pagemap(addr, &pme, pm);
    
    		if (err)
    			return err;
    	}
    
    	cond_resched();
    
    	return err;
    }
    
    /*
     * /proc/pid/pagemap - an array mapping virtual pages to pfns
     *
    
     * For each page in the address space, this file contains one 64-bit entry
     * consisting of the following:
     *
    
     * Bits 0-54  page frame number (PFN) if present
    
     * Bits 5-54  swap offset if swapped
    
     * Bits 55-60 page shift (page size = 1<<page shift)
    
     * Bit  61    page is file-page or shared-anon
    
     * Bit  62    page swapped
     * Bit  63    page present
     *
     * If the page is not present but in swap, then the PFN contains an
     * encoding of the swap file number and the page's offset into the
     * swap. Unmapped pages return a null PFN. This allows determining
    
     * precisely which pages are mapped (or in swap) and comparing mapped
     * pages between processes.
     *
     * Efficient users of this interface will use /proc/pid/maps to
     * determine which areas of memory are actually mapped and llseek to
     * skip over unmapped regions.
     */
    static ssize_t pagemap_read(struct file *file, char __user *buf,
    			    size_t count, loff_t *ppos)
    {
    
    Al Viro's avatar
    Al Viro committed
    	struct task_struct *task = get_proc_task(file_inode(file));
    
    	struct mm_struct *mm;
    	struct pagemapread pm;
    	int ret = -ESRCH;
    
    	struct mm_walk pagemap_walk = {};
    
    	unsigned long src;
    	unsigned long svpfn;
    	unsigned long start_vaddr;
    	unsigned long end_vaddr;
    
    
    	if (!task)
    		goto out;
    
    	ret = -EINVAL;
    	/* file position must be aligned */
    
    	if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES))
    
    		goto out_task;
    
    	pm.v2 = soft_dirty_cleared;
    
    	pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
    	pm.buffer = kmalloc(pm.len, GFP_TEMPORARY);
    
    	ret = -ENOMEM;
    
    Cong Wang's avatar
    Cong Wang committed
    	mm = mm_access(task, PTRACE_MODE_READ);
    
    	ret = PTR_ERR(mm);
    	if (!mm || IS_ERR(mm))
    		goto out_free;
    
    	pagemap_walk.pmd_entry = pagemap_pte_range;
    	pagemap_walk.pte_hole = pagemap_pte_hole;
    
    	pagemap_walk.hugetlb_entry = pagemap_hugetlb_range;
    
    	pagemap_walk.mm = mm;
    	pagemap_walk.private = &pm;
    
    	src = *ppos;
    	svpfn = src / PM_ENTRY_BYTES;
    	start_vaddr = svpfn << PAGE_SHIFT;
    	end_vaddr = TASK_SIZE_OF(task);
    
    	/* watch out for wraparound */
    	if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT)
    		start_vaddr = end_vaddr;
    
    	/*
    	 * The odds are that this will stop walking way
    	 * before end_vaddr, because the length of the
    	 * user buffer is tracked in "pm", and the walk
    	 * will stop when we hit the end of the buffer.
    	 */
    
    	ret = 0;
    	while (count && (start_vaddr < end_vaddr)) {
    		int len;
    		unsigned long end;
    
    		pm.pos = 0;
    
    		end = (start_vaddr + PAGEMAP_WALK_SIZE) & PAGEMAP_WALK_MASK;
    
    		/* overflow ? */
    		if (end < start_vaddr || end > end_vaddr)
    			end = end_vaddr;
    		down_read(&mm->mmap_sem);
    		ret = walk_page_range(start_vaddr, end, &pagemap_walk);
    		up_read(&mm->mmap_sem);
    		start_vaddr = end;
    
    		len = min(count, PM_ENTRY_BYTES * pm.pos);
    
    		if (copy_to_user(buf, pm.buffer, len)) {
    
    			goto out_mm;
    
    		}
    		copied += len;
    		buf += len;
    		count -= len;
    
    	*ppos += copied;
    	if (!ret || ret == PM_END_OF_BUFFER)
    		ret = copied;
    
    
    out_mm:
    	mmput(mm);
    
    out_free:
    	kfree(pm.buffer);
    
    out_task:
    	put_task_struct(task);
    out:
    	return ret;
    }
    
    
    static int pagemap_open(struct inode *inode, struct file *file)
    {
    	pr_warn_once("Bits 55-60 of /proc/PID/pagemap entries are about "
    			"to stop being page-shift some time soon. See the "
    			"linux/Documentation/vm/pagemap.txt for details.\n");
    	return 0;
    }
    
    
    const struct file_operations proc_pagemap_operations = {
    	.llseek		= mem_lseek, /* borrow this */
    	.read		= pagemap_read,
    
    #endif /* CONFIG_PROC_PAGE_MONITOR */
    
    struct numa_maps {
    	struct vm_area_struct *vma;
    	unsigned long pages;
    	unsigned long anon;
    	unsigned long active;
    	unsigned long writeback;
    	unsigned long mapcount_max;
    	unsigned long dirty;
    	unsigned long swapcache;
    	unsigned long node[MAX_NUMNODES];
    };
    
    
    struct numa_maps_private {
    	struct proc_maps_private proc_maps;
    	struct numa_maps md;
    };
    
    
    static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty,
    			unsigned long nr_pages)
    
    {
    	int count = page_mapcount(page);
    
    
    	if (pte_dirty || PageDirty(page))
    
    
    	if (PageActive(page) || PageUnevictable(page))
    
    
    	if (count > md->mapcount_max)
    		md->mapcount_max = count;
    
    
    	md->node[page_to_nid(page)] += nr_pages;
    
    static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma,
    		unsigned long addr)
    {
    	struct page *page;
    	int nid;
    
    	if (!pte_present(pte))
    		return NULL;
    
    	page = vm_normal_page(vma, addr, pte);
    	if (!page)
    		return NULL;
    
    	if (PageReserved(page))
    		return NULL;
    
    	nid = page_to_nid(page);
    
    	if (!node_isset(nid, node_states[N_MEMORY]))
    
    static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
    		unsigned long end, struct mm_walk *walk)
    {
    	struct numa_maps *md;
    	spinlock_t *ptl;
    	pte_t *orig_pte;
    	pte_t *pte;
    
    	md = walk->private;
    
    
    	if (pmd_trans_huge_lock(pmd, md->vma) == 1) {
    		pte_t huge_pte = *(pte_t *)pmd;
    		struct page *page;
    
    		page = can_gather_numa_stats(huge_pte, md->vma, addr);
    		if (page)
    			gather_stats(page, md, pte_dirty(huge_pte),
    				     HPAGE_PMD_SIZE/PAGE_SIZE);
    
    		spin_unlock(&walk->mm->page_table_lock);
    
    	orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
    	do {
    
    		struct page *page = can_gather_numa_stats(*pte, md->vma, addr);
    
    		gather_stats(page, md, pte_dirty(*pte), 1);
    
    
    	} while (pte++, addr += PAGE_SIZE, addr != end);
    	pte_unmap_unlock(orig_pte, ptl);
    	return 0;
    }
    #ifdef CONFIG_HUGETLB_PAGE
    static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
    		unsigned long addr, unsigned long end, struct mm_walk *walk)
    {
    	struct numa_maps *md;
    	struct page *page;
    
    	if (pte_none(*pte))
    		return 0;
    
    	page = pte_page(*pte);
    	if (!page)
    		return 0;
    
    	md = walk->private;
    
    	gather_stats(page, md, pte_dirty(*pte), 1);
    
    	return 0;
    }
    
    #else
    static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
    		unsigned long addr, unsigned long end, struct mm_walk *walk)
    {
    	return 0;
    }
    #endif
    
    /*
     * Display pages allocated per node and memory policy via /proc.
     */
    
    static int show_numa_map(struct seq_file *m, void *v, int is_pid)
    
    	struct numa_maps_private *numa_priv = m->private;
    	struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
    
    	struct vm_area_struct *vma = v;
    
    	struct numa_maps *md = &numa_priv->md;
    
    	struct file *file = vma->vm_file;
    
    	struct task_struct *task = proc_priv->task;
    
    	struct mm_struct *mm = vma->vm_mm;
    	struct mm_walk walk = {};
    	struct mempolicy *pol;
    	int n;
    	char buffer[50];
    
    	if (!mm)
    		return 0;
    
    
    	/* Ensure we start with an empty set of numa_maps statistics. */
    	memset(md, 0, sizeof(*md));
    
    
    	md->vma = vma;
    
    	walk.hugetlb_entry = gather_hugetbl_stats;
    	walk.pmd_entry = gather_pte_stats;
    	walk.private = md;
    	walk.mm = mm;
    
    
    	pol = get_vma_policy(task, vma, vma->vm_start);
    
    	mpol_to_str(buffer, sizeof(buffer), pol);
    
    	mpol_cond_put(pol);
    
    	seq_printf(m, "%08lx %s", vma->vm_start, buffer);
    
    	if (file) {
    		seq_printf(m, " file=");
    		seq_path(m, &file->f_path, "\n\t= ");
    	} else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
    		seq_printf(m, " heap");
    
    		pid_t tid = vm_is_stack(task, vma, is_pid);
    
    		if (tid != 0) {
    			/*
    			 * Thread stack in /proc/PID/task/TID/maps or
    			 * the main process stack.
    			 */
    			if (!is_pid || (vma->vm_start <= mm->start_stack &&
    			    vma->vm_end >= mm->start_stack))
    				seq_printf(m, " stack");
    			else
    				seq_printf(m, " stack:%d", tid);
    		}
    
    	if (is_vm_hugetlb_page(vma))
    		seq_printf(m, " huge");
    
    
    	walk_page_range(vma->vm_start, vma->vm_end, &walk);
    
    	if (!md->pages)
    		goto out;
    
    	if (md->anon)
    		seq_printf(m, " anon=%lu", md->anon);
    
    	if (md->dirty)
    		seq_printf(m, " dirty=%lu", md->dirty);
    
    	if (md->pages != md->anon && md->pages != md->dirty)
    		seq_printf(m, " mapped=%lu", md->pages);
    
    	if (md->mapcount_max > 1)
    		seq_printf(m, " mapmax=%lu", md->mapcount_max);
    
    	if (md->swapcache)
    		seq_printf(m, " swapcache=%lu", md->swapcache);
    
    	if (md->active < md->pages && !is_vm_hugetlb_page(vma))
    		seq_printf(m, " active=%lu", md->active);
    
    	if (md->writeback)
    		seq_printf(m, " writeback=%lu", md->writeback);
    
    
    	for_each_node_state(n, N_MEMORY)
    
    		if (md->node[n])
    			seq_printf(m, " N%d=%lu", n, md->node[n]);
    out:
    	seq_putc(m, '\n');
    
    	if (m->count < m->size)
    
    		m->version = (vma != proc_priv->tail_vma) ? vma->vm_start : 0;
    
    static int show_pid_numa_map(struct seq_file *m, void *v)
    {
    	return show_numa_map(m, v, 1);
    }
    
    static int show_tid_numa_map(struct seq_file *m, void *v)
    {
    	return show_numa_map(m, v, 0);
    }
    
    
    static const struct seq_operations proc_pid_numa_maps_op = {
    
    	.start  = m_start,
    	.next   = m_next,
    	.stop   = m_stop,
    	.show   = show_pid_numa_map,
    
    static const struct seq_operations proc_tid_numa_maps_op = {
    	.start  = m_start,
    	.next   = m_next,
    	.stop   = m_stop,
    	.show   = show_tid_numa_map,
    };
    
    static int numa_maps_open(struct inode *inode, struct file *file,
    			  const struct seq_operations *ops)
    
    	struct numa_maps_private *priv;
    	int ret = -ENOMEM;
    	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
    	if (priv) {
    		priv->proc_maps.pid = proc_pid(inode);
    
    		if (!ret) {
    			struct seq_file *m = file->private_data;
    			m->private = priv;
    		} else {
    			kfree(priv);
    		}
    	}
    	return ret;
    
    static int pid_numa_maps_open(struct inode *inode, struct file *file)
    {
    	return numa_maps_open(inode, file, &proc_pid_numa_maps_op);
    }
    
    static int tid_numa_maps_open(struct inode *inode, struct file *file)
    {
    	return numa_maps_open(inode, file, &proc_tid_numa_maps_op);
    }
    
    const struct file_operations proc_pid_numa_maps_operations = {
    	.open		= pid_numa_maps_open,
    	.read		= seq_read,
    	.llseek		= seq_lseek,
    	.release	= seq_release_private,
    };
    
    const struct file_operations proc_tid_numa_maps_operations = {
    	.open		= tid_numa_maps_open,
    
    	.read		= seq_read,
    	.llseek		= seq_lseek,
    
    	.release	= seq_release_private,
    
    #endif /* CONFIG_NUMA */