On Thu 28-09-17 18:21:18, Dan Williams wrote:
VM_MIXEDMAP is used by dax to direct mm paths like vm_normal_page()
that
the memory page it is dealing with is not typical memory from the linear
map. The get_user_pages_fast() path, since it does not resolve the vma,
is already using {pte,pmd}_devmap() as a stand-in for VM_MIXEDMAP, so we
use that as a VM_MIXEDMAP replacement in some locations. In the cases
where there is no pte to consult we fallback to using vma_is_dax() to
detect the VM_MIXEDMAP special case.
Well, I somewhat dislike the vma_is_dax() checks sprinkled around. That
seems rather errorprone (easy to forget about it when adding new check
somewhere). Can we possibly also create a helper vma_is_special() (or some
other name) which would do ((vma->vm_flags & VM_SPECIAL) || vma_is_dax(vma)
|| is_vm_hugetlb_page(vma)) and then use it in all those places?
Honza
diff --git a/mm/ksm.c b/mm/ksm.c
index 15dd7415f7b3..787dfe4f3d44 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -2358,6 +2358,9 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
VM_HUGETLB | VM_MIXEDMAP))
return 0; /* just ignore the advice */
+ if (vma_is_dax(vma))
+ return 0;
+
#ifdef VM_SAO
if (*vm_flags & VM_SAO)
return 0;
diff --git a/mm/madvise.c b/mm/madvise.c
index 21261ff0466f..40344d43e565 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -95,7 +95,7 @@ static long madvise_behavior(struct vm_area_struct *vma,
new_flags |= VM_DONTDUMP;
break;
case MADV_DODUMP:
- if (new_flags & VM_SPECIAL) {
+ if (vma_is_dax(vma) || (new_flags & VM_SPECIAL)) {
error = -EINVAL;
goto out;
}
diff --git a/mm/memory.c b/mm/memory.c
index ec4e15494901..771acaf54fe6 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -830,6 +830,8 @@ struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned
long addr,
return vma->vm_ops->find_special_page(vma, addr);
if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
return NULL;
+ if (pte_devmap(pte))
+ return NULL;
if (is_zero_pfn(pfn))
return NULL;
@@ -917,6 +919,8 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned
long addr,
}
}
+ if (pmd_devmap(pmd))
+ return NULL;
if (is_zero_pfn(pfn))
return NULL;
if (unlikely(pfn > highest_memmap_pfn))
@@ -1227,7 +1231,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct
*src_mm,
* efficient than faulting.
*/
if (!(vma->vm_flags & (VM_HUGETLB | VM_PFNMAP | VM_MIXEDMAP)) &&
- !vma->anon_vma)
+ !vma->anon_vma && !vma_is_dax(vma))
return 0;
if (is_vm_hugetlb_page(vma))
@@ -1896,12 +1900,24 @@ int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long
addr,
}
EXPORT_SYMBOL(vm_insert_pfn_prot);
+static bool vm_mixed_ok(struct vm_area_struct *vma, pfn_t pfn)
+{
+ /* these checks mirror the abort conditions in vm_normal_page */
+ if (vma->vm_flags & VM_MIXEDMAP)
+ return true;
+ if (pfn_t_devmap(pfn))
+ return true;
+ if (is_zero_pfn(pfn_t_to_pfn(pfn)))
+ return true;
+ return false;
+}
+
static int __vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
pfn_t pfn, bool mkwrite)
{
pgprot_t pgprot = vma->vm_page_prot;
- BUG_ON(!(vma->vm_flags & VM_MIXEDMAP));
+ BUG_ON(!vm_mixed_ok(vma, pfn));
if (addr < vma->vm_start || addr >= vma->vm_end)
return -EFAULT;
diff --git a/mm/migrate.c b/mm/migrate.c
index 6954c1435833..179a84a311f6 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2927,7 +2927,8 @@ int migrate_vma(const struct migrate_vma_ops *ops,
/* Sanity check the arguments */
start &= PAGE_MASK;
end &= PAGE_MASK;
- if (!vma || is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL))
+ if (!vma || is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL)
+ || vma_is_dax(dma))
return -EINVAL;
if (start < vma->vm_start || start >= vma->vm_end)
return -EINVAL;
diff --git a/mm/mlock.c b/mm/mlock.c
index dfc6f1912176..4d009350893f 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -520,7 +520,8 @@ static int mlock_fixup(struct vm_area_struct *vma, struct
vm_area_struct **prev,
vm_flags_t old_flags = vma->vm_flags;
if (newflags == vma->vm_flags || (vma->vm_flags & VM_SPECIAL) ||
- is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm))
+ is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm) ||
+ vma_is_dax(vma))
/* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */
goto out;
diff --git a/mm/mmap.c b/mm/mmap.c
index 680506faceae..2f3971a051c6 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1723,7 +1723,8 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT);
if (vm_flags & VM_LOCKED) {
if (!((vm_flags & VM_SPECIAL) || is_vm_hugetlb_page(vma) ||
- vma == get_gate_vma(current->mm)))
+ vma == get_gate_vma(current->mm) ||
+ vma_is_dax(vma)))
mm->locked_vm += (len >> PAGE_SHIFT);
else
vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
--
Jan Kara <jack(a)suse.com>
SUSE Labs, CR