tree:
https://android.googlesource.com/kernel/common android13-5.10
head: 729a79f366e5e7376ee64b955958124c7dc28073
commit: 729a79f366e5e7376ee64b955958124c7dc28073 [2/2] ANDROID: fix mmu_notifier race
caused by not taking mmap_lock during SPF
config: arm-randconfig-r002-20211216
(
https://download.01.org/0day-ci/archive/20211216/202112162150.bjWRx19B-lk...)
compiler: clang version 14.0.0 (
https://github.com/llvm/llvm-project
dd245bab9fbb364faa1581e4f92ba3119a872fba)
reproduce (this is a W=1 build):
wget
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O
~/bin/make.cross
chmod +x ~/bin/make.cross
# install arm cross compiling tool for clang build
# apt-get install binutils-arm-linux-gnueabi
git remote add android-common
https://android.googlesource.com/kernel/common
git fetch --no-tags android-common android13-5.10
git checkout 729a79f366e5e7376ee64b955958124c7dc28073
# save the config file to linux build tree
mkdir build_dir
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir
ARCH=arm SHELL=/bin/bash
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp(a)intel.com>
All errors (new ones prefixed by >>):
> mm/memory.c:4703:9: error: implicit declaration of function
'mmu_notifier_trylock' [-Werror,-Wimplicit-function-declaration]
if (!mmu_notifier_trylock(vmf->vma->vm_mm)) {
^
1 error generated.
Kconfig warnings: (for reference only)
WARNING: unmet direct dependencies detected for QCOM_SCM
Depends on (ARM || ARM64) && HAVE_ARM_SMCCC
Selected by
- ARM_QCOM_SPM_CPUIDLE && CPU_IDLE && (ARM || ARM64) &&
(ARCH_QCOM || COMPILE_TEST && !ARM64 && MMU
vim +/mmu_notifier_trylock +4703 mm/memory.c
4595
4596 /*
4597 * These routines also need to handle stuff like marking pages dirty
4598 * and/or accessed for architectures that don't do it in hardware (most
4599 * RISC architectures). The early dirtying is also good on the i386.
4600 *
4601 * There is also a hook called "update_mmu_cache()" that architectures
4602 * with external mmu caches can use to update those (ie the Sparc or
4603 * PowerPC hashed page tables that act as extended TLBs).
4604 *
4605 * We enter with non-exclusive mmap_lock (to exclude vma changes, but allow
4606 * concurrent faults).
4607 *
4608 * The mmap_lock may have been released depending on flags and our return value.
4609 * See filemap_fault() and __lock_page_or_retry().
4610 */
4611 static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
4612 {
4613 pte_t entry;
4614 vm_fault_t ret = 0;
4615
4616 if (unlikely(pmd_none(*vmf->pmd))) {
4617 /*
4618 * In the case of the speculative page fault handler we abort
4619 * the speculative path immediately as the pmd is probably
4620 * in the way to be converted in a huge one. We will try
4621 * again holding the mmap_sem (which implies that the collapse
4622 * operation is done).
4623 */
4624 if (vmf->flags & FAULT_FLAG_SPECULATIVE)
4625 return VM_FAULT_RETRY;
4626 /*
4627 * Leave __pte_alloc() until later: because vm_ops->fault may
4628 * want to allocate huge page, and if we expose page table
4629 * for an instant, it will be difficult to retract from
4630 * concurrent faults and from rmap lookups.
4631 */
4632 vmf->pte = NULL;
4633 } else if (!(vmf->flags & FAULT_FLAG_SPECULATIVE)) {
4634 /*
4635 * If a huge pmd materialized under us just retry later. Use
4636 * pmd_trans_unstable() via pmd_devmap_trans_unstable() instead
4637 * of pmd_trans_huge() to ensure the pmd didn't become
4638 * pmd_trans_huge under us and then back to pmd_none, as a
4639 * result of MADV_DONTNEED running immediately after a huge pmd
4640 * fault in a different thread of this mm, in turn leading to a
4641 * misleading pmd_trans_huge() retval. All we have to ensure is
4642 * that it is a regular pmd that we can walk with
4643 * pte_offset_map() and we can do that through an atomic read
4644 * in C, which is what pmd_trans_unstable() provides.
4645 */
4646 if (pmd_devmap_trans_unstable(vmf->pmd))
4647 return 0;
4648 /*
4649 * A regular pmd is established and it can't morph into a huge
4650 * pmd from under us anymore at this point because we hold the
4651 * mmap_lock read mode and khugepaged takes it in write mode.
4652 * So now it's safe to run pte_offset_map().
4653 * This is not applicable to the speculative page fault handler
4654 * but in that case, the pte is fetched earlier in
4655 * handle_speculative_fault().
4656 */
4657 vmf->pte = pte_offset_map(vmf->pmd, vmf->address);
4658 vmf->orig_pte = *vmf->pte;
4659
4660 /*
4661 * some architectures can have larger ptes than wordsize,
4662 * e.g.ppc44x-defconfig has CONFIG_PTE_64BIT=y and
4663 * CONFIG_32BIT=y, so READ_ONCE cannot guarantee atomic
4664 * accesses. The code below just needs a consistent view
4665 * for the ifs and we later double check anyway with the
4666 * ptl lock held. So here a barrier will do.
4667 */
4668 barrier();
4669 if (pte_none(vmf->orig_pte)) {
4670 pte_unmap(vmf->pte);
4671 vmf->pte = NULL;
4672 }
4673 }
4674
4675 if (!vmf->pte) {
4676 if (vma_is_anonymous(vmf->vma))
4677 return do_anonymous_page(vmf);
4678 else if ((vmf->flags & FAULT_FLAG_SPECULATIVE) &&
4679 !vmf_allows_speculation(vmf))
4680 return VM_FAULT_RETRY;
4681 else
4682 return do_fault(vmf);
4683 }
4684
4685 if (!pte_present(vmf->orig_pte))
4686 return do_swap_page(vmf);
4687
4688 if (pte_protnone(vmf->orig_pte) && vma_is_accessible(vmf->vma))
4689 return do_numa_page(vmf);
4690
4691 if (!pte_spinlock(vmf))
4692 return VM_FAULT_RETRY;
4693 entry = vmf->orig_pte;
4694 if (unlikely(!pte_same(*vmf->pte, entry))) {
4695 update_mmu_tlb(vmf->vma, vmf->address, vmf->pte);
4696 goto unlock;
4697 }
4698 if (vmf->flags & FAULT_FLAG_WRITE) {
4699 if (!pte_write(entry)) {
4700 if (!(vmf->flags & FAULT_FLAG_SPECULATIVE))
4701 return do_wp_page(vmf);
4702
4703 if (!mmu_notifier_trylock(vmf->vma->vm_mm)) {
4704 ret = VM_FAULT_RETRY;
4705 goto unlock;
4706 }
4707
4708 ret = do_wp_page(vmf);
4709 mmu_notifier_unlock(vmf->vma->vm_mm);
4710 return ret;
4711 }
4712 entry = pte_mkdirty(entry);
4713 }
4714 entry = pte_mkyoung(entry);
4715 if (ptep_set_access_flags(vmf->vma, vmf->address, vmf->pte, entry,
4716 vmf->flags & FAULT_FLAG_WRITE)) {
4717 update_mmu_cache(vmf->vma, vmf->address, vmf->pte);
4718 } else {
4719 /* Skip spurious TLB flush for retried page fault */
4720 if (vmf->flags & FAULT_FLAG_TRIED)
4721 goto unlock;
4722 if (vmf->flags & FAULT_FLAG_SPECULATIVE)
4723 ret = VM_FAULT_RETRY;
4724 /*
4725 * This is needed only for protection faults but the arch code
4726 * is not yet telling us if this is a protection fault or not.
4727 * This still avoids useless tlb flushes for .text page faults
4728 * with threads.
4729 */
4730 if (vmf->flags & FAULT_FLAG_WRITE)
4731 flush_tlb_fix_spurious_fault(vmf->vma, vmf->address);
4732 }
4733 unlock:
4734 pte_unmap_unlock(vmf->pte, vmf->ptl);
4735 return ret;
4736 }
4737
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org