
From: Hugh Dickins <hugh@veritas.com>

Fixes bugzilla #2219

fork's dup_mmap leaves child mm_rb as copied from parent mm while doing all
the copy_page_ranges, and then calls build_mmap_rb without holding
page_table_lock.

try_to_unmap_one's find_vma (holding page_table_lock not mmap_sem) coming
on another cpu may cause mm mayhem.  It may leave the child's mmap_cache
pointing to a vma of the parent mm.

When the parent exits and the child faults, quite what happens rather
depends on what junk then inhabits vm_page_prot, which gets set in the page
table, with page_add_rmap adding the ptep, but junk pte likely to fail the
tests for page_remove_rmap.

Eventually the child exits, the page table is freed and try_to_unmap_one
oopses on null ptep_to_mm (but in a kernel with rss limiting, usually
page_referenced hits the null ptep_to_mm first).

This took me days and days to unravel!  Big thanks to Matthieu for
reporting it with a good test case.


---

 include/linux/mm.h |    3 ++-
 kernel/fork.c      |   13 +++++++++++--
 mm/mmap.c          |   20 ++------------------
 3 files changed, 15 insertions(+), 21 deletions(-)

diff -puN include/linux/mm.h~vma-corruption-fix include/linux/mm.h
--- 25/include/linux/mm.h~vma-corruption-fix	2004-03-06 18:37:39.000000000 -0800
+++ 25-akpm/include/linux/mm.h	2004-03-06 18:37:40.000000000 -0800
@@ -530,7 +530,8 @@ extern void si_meminfo_node(struct sysin
 
 /* mmap.c */
 extern void insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
-extern void build_mmap_rb(struct mm_struct *);
+extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
+	struct rb_node **, struct rb_node *);
 extern void exit_mmap(struct mm_struct *);
 
 extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
diff -puN kernel/fork.c~vma-corruption-fix kernel/fork.c
--- 25/kernel/fork.c~vma-corruption-fix	2004-03-06 18:37:39.000000000 -0800
+++ 25-akpm/kernel/fork.c	2004-03-06 18:37:40.000000000 -0800
@@ -265,6 +265,7 @@ static struct task_struct *dup_task_stru
 static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm)
 {
 	struct vm_area_struct * mpnt, *tmp, **pprev;
+	struct rb_node **rb_link, *rb_parent;
 	int retval;
 	unsigned long charge = 0;
 
@@ -277,6 +278,9 @@ static inline int dup_mmap(struct mm_str
 	mm->map_count = 0;
 	mm->rss = 0;
 	cpus_clear(mm->cpu_vm_mask);
+	mm->mm_rb = RB_ROOT;
+	rb_link = &mm->mm_rb.rb_node;
+	rb_parent = NULL;
 	pprev = &mm->mmap;
 
 	/*
@@ -324,11 +328,17 @@ static inline int dup_mmap(struct mm_str
 
 		/*
 		 * Link in the new vma and copy the page table entries:
-		 * link in first so that swapoff can see swap entries.
+		 * link in first so that swapoff can see swap entries,
+		 * and try_to_unmap_one's find_vma find the new vma.
 		 */
 		spin_lock(&mm->page_table_lock);
 		*pprev = tmp;
 		pprev = &tmp->vm_next;
+
+		__vma_link_rb(mm, tmp, rb_link, rb_parent);
+		rb_link = &tmp->vm_rb.rb_right;
+		rb_parent = &tmp->vm_rb;
+
 		mm->map_count++;
 		retval = copy_page_range(mm, current->mm, tmp);
 		spin_unlock(&mm->page_table_lock);
@@ -340,7 +350,6 @@ static inline int dup_mmap(struct mm_str
 			goto fail;
 	}
 	retval = 0;
-	build_mmap_rb(mm);
 
 out:
 	flush_tlb_mm(current->mm);
diff -puN mm/mmap.c~vma-corruption-fix mm/mmap.c
--- 25/mm/mmap.c~vma-corruption-fix	2004-03-06 18:37:40.000000000 -0800
+++ 25-akpm/mm/mmap.c	2004-03-06 18:37:40.000000000 -0800
@@ -222,8 +222,8 @@ __vma_link_list(struct mm_struct *mm, st
 	}
 }
 
-static void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
-			struct rb_node **rb_link, struct rb_node *rb_parent)
+void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
+		struct rb_node **rb_link, struct rb_node *rb_parent)
 {
 	rb_link_node(&vma->vm_rb, rb_parent, rb_link);
 	rb_insert_color(&vma->vm_rb, &mm->mm_rb);
@@ -1404,22 +1404,6 @@ out:
 
 EXPORT_SYMBOL(do_brk);
 
-/* Build the RB tree corresponding to the VMA list. */
-void build_mmap_rb(struct mm_struct * mm)
-{
-	struct vm_area_struct * vma;
-	struct rb_node ** rb_link, * rb_parent;
-
-	mm->mm_rb = RB_ROOT;
-	rb_link = &mm->mm_rb.rb_node;
-	rb_parent = NULL;
-	for (vma = mm->mmap; vma; vma = vma->vm_next) {
-		__vma_link_rb(mm, vma, rb_link, rb_parent);
-		rb_parent = &vma->vm_rb;
-		rb_link = &rb_parent->rb_right;
-	}
-}
-
 /* Release all mmaps. */
 void exit_mmap(struct mm_struct *mm)
 {

_
