tree:
https://android.googlesource.com/kernel/common android12-5.10
head: 70c9301d9c4be76630901189cc7376eb22115a3b
commit: 6971350406a366f4565bdc6b17c08ddd577f27f3 [13859/13862] ANDROID: fix mmu_notifier
race caused by not taking mmap_lock during SPF
config: x86_64-randconfig-r023-20211216
(
https://download.01.org/0day-ci/archive/20211216/202112161751.Fj2RIlo4-lk...)
compiler: clang version 14.0.0 (
https://github.com/llvm/llvm-project
dd245bab9fbb364faa1581e4f92ba3119a872fba)
reproduce (this is a W=1 build):
wget
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O
~/bin/make.cross
chmod +x ~/bin/make.cross
git remote add android-common
https://android.googlesource.com/kernel/common
git fetch --no-tags android-common android12-5.10
git checkout 6971350406a366f4565bdc6b17c08ddd577f27f3
# save the config file to linux build tree
mkdir build_dir
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir
ARCH=x86_64 SHELL=/bin/bash
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp(a)intel.com>
All errors (new ones prefixed by >>):
> mm/memory.c:4724:9: error: implicit declaration of function
'mmu_notifier_trylock' [-Werror,-Wimplicit-function-declaration]
if (!mmu_notifier_trylock(vmf->vma->vm_mm)) {
^
1 error generated.
vim +/mmu_notifier_trylock +4724 mm/memory.c
4616
4617 /*
4618 * These routines also need to handle stuff like marking pages dirty
4619 * and/or accessed for architectures that don't do it in hardware (most
4620 * RISC architectures). The early dirtying is also good on the i386.
4621 *
4622 * There is also a hook called "update_mmu_cache()" that architectures
4623 * with external mmu caches can use to update those (ie the Sparc or
4624 * PowerPC hashed page tables that act as extended TLBs).
4625 *
4626 * We enter with non-exclusive mmap_lock (to exclude vma changes, but allow
4627 * concurrent faults).
4628 *
4629 * The mmap_lock may have been released depending on flags and our return value.
4630 * See filemap_fault() and __lock_page_or_retry().
4631 */
4632 static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
4633 {
4634 pte_t entry;
4635 vm_fault_t ret = 0;
4636
4637 if (unlikely(pmd_none(*vmf->pmd))) {
4638 /*
4639 * In the case of the speculative page fault handler we abort
4640 * the speculative path immediately as the pmd is probably
4641 * in the way to be converted in a huge one. We will try
4642 * again holding the mmap_sem (which implies that the collapse
4643 * operation is done).
4644 */
4645 if (vmf->flags & FAULT_FLAG_SPECULATIVE)
4646 return VM_FAULT_RETRY;
4647 /*
4648 * Leave __pte_alloc() until later: because vm_ops->fault may
4649 * want to allocate huge page, and if we expose page table
4650 * for an instant, it will be difficult to retract from
4651 * concurrent faults and from rmap lookups.
4652 */
4653 vmf->pte = NULL;
4654 } else if (!(vmf->flags & FAULT_FLAG_SPECULATIVE)) {
4655 /*
4656 * If a huge pmd materialized under us just retry later. Use
4657 * pmd_trans_unstable() via pmd_devmap_trans_unstable() instead
4658 * of pmd_trans_huge() to ensure the pmd didn't become
4659 * pmd_trans_huge under us and then back to pmd_none, as a
4660 * result of MADV_DONTNEED running immediately after a huge pmd
4661 * fault in a different thread of this mm, in turn leading to a
4662 * misleading pmd_trans_huge() retval. All we have to ensure is
4663 * that it is a regular pmd that we can walk with
4664 * pte_offset_map() and we can do that through an atomic read
4665 * in C, which is what pmd_trans_unstable() provides.
4666 */
4667 if (pmd_devmap_trans_unstable(vmf->pmd))
4668 return 0;
4669 /*
4670 * A regular pmd is established and it can't morph into a huge
4671 * pmd from under us anymore at this point because we hold the
4672 * mmap_lock read mode and khugepaged takes it in write mode.
4673 * So now it's safe to run pte_offset_map().
4674 * This is not applicable to the speculative page fault handler
4675 * but in that case, the pte is fetched earlier in
4676 * handle_speculative_fault().
4677 */
4678 vmf->pte = pte_offset_map(vmf->pmd, vmf->address);
4679 vmf->orig_pte = *vmf->pte;
4680
4681 /*
4682 * some architectures can have larger ptes than wordsize,
4683 * e.g.ppc44x-defconfig has CONFIG_PTE_64BIT=y and
4684 * CONFIG_32BIT=y, so READ_ONCE cannot guarantee atomic
4685 * accesses. The code below just needs a consistent view
4686 * for the ifs and we later double check anyway with the
4687 * ptl lock held. So here a barrier will do.
4688 */
4689 barrier();
4690 if (pte_none(vmf->orig_pte)) {
4691 pte_unmap(vmf->pte);
4692 vmf->pte = NULL;
4693 }
4694 }
4695
4696 if (!vmf->pte) {
4697 if (vma_is_anonymous(vmf->vma))
4698 return do_anonymous_page(vmf);
4699 else if ((vmf->flags & FAULT_FLAG_SPECULATIVE) &&
4700 !vmf_allows_speculation(vmf))
4701 return VM_FAULT_RETRY;
4702 else
4703 return do_fault(vmf);
4704 }
4705
4706 if (!pte_present(vmf->orig_pte))
4707 return do_swap_page(vmf);
4708
4709 if (pte_protnone(vmf->orig_pte) && vma_is_accessible(vmf->vma))
4710 return do_numa_page(vmf);
4711
4712 if (!pte_spinlock(vmf))
4713 return VM_FAULT_RETRY;
4714 entry = vmf->orig_pte;
4715 if (unlikely(!pte_same(*vmf->pte, entry))) {
4716 update_mmu_tlb(vmf->vma, vmf->address, vmf->pte);
4717 goto unlock;
4718 }
4719 if (vmf->flags & FAULT_FLAG_WRITE) {
4720 if (!pte_write(entry)) {
4721 if (!(vmf->flags & FAULT_FLAG_SPECULATIVE))
4722 return do_wp_page(vmf);
4723
4724 if (!mmu_notifier_trylock(vmf->vma->vm_mm)) {
4725 ret = VM_FAULT_RETRY;
4726 goto unlock;
4727 }
4728
4729 ret = do_wp_page(vmf);
4730 mmu_notifier_unlock(vmf->vma->vm_mm);
4731 return ret;
4732 }
4733 entry = pte_mkdirty(entry);
4734 }
4735 entry = pte_mkyoung(entry);
4736 if (ptep_set_access_flags(vmf->vma, vmf->address, vmf->pte, entry,
4737 vmf->flags & FAULT_FLAG_WRITE)) {
4738 update_mmu_cache(vmf->vma, vmf->address, vmf->pte);
4739 } else {
4740 /* Skip spurious TLB flush for retried page fault */
4741 if (vmf->flags & FAULT_FLAG_TRIED)
4742 goto unlock;
4743 if (vmf->flags & FAULT_FLAG_SPECULATIVE)
4744 ret = VM_FAULT_RETRY;
4745 /*
4746 * This is needed only for protection faults but the arch code
4747 * is not yet telling us if this is a protection fault or not.
4748 * This still avoids useless tlb flushes for .text page faults
4749 * with threads.
4750 */
4751 if (vmf->flags & FAULT_FLAG_WRITE)
4752 flush_tlb_fix_spurious_fault(vmf->vma, vmf->address);
4753 }
4754 unlock:
4755 pte_unmap_unlock(vmf->pte, vmf->ptl);
4756 return ret;
4757 }
4758
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org