Skip to content

Commit b3fd8e8

Browse files
anadavIngo Molnar
authored and
Ingo Molnar
committed
x86/alternatives: Use temporary mm for text poking
text_poke() can potentially compromise security as it sets temporary PTEs in the fixmap. These PTEs might be used to rewrite the kernel code from other cores accidentally or maliciously, if an attacker gains the ability to write onto kernel memory. Moreover, since remote TLBs are not flushed after the temporary PTEs are removed, the time-window in which the code is writable is not limited if the fixmap PTEs - maliciously or accidentally - are cached in the TLB. To address these potential security hazards, use a temporary mm for patching the code. Finally, text_poke() is also not conservative enough when mapping pages, as it always tries to map 2 pages, even when a single one is sufficient. So try to be more conservative, and do not map more than needed. Signed-off-by: Nadav Amit <[email protected]> Signed-off-by: Rick Edgecombe <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Cc: <[email protected]> Cc: <[email protected]> Cc: <[email protected]> Cc: <[email protected]> Cc: <[email protected]> Cc: <[email protected]> Cc: <[email protected]> Cc: Andy Lutomirski <[email protected]> Cc: Borislav Petkov <[email protected]> Cc: Dave Hansen <[email protected]> Cc: H. Peter Anvin <[email protected]> Cc: Kees Cook <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Masami Hiramatsu <[email protected]> Cc: Rik van Riel <[email protected]> Cc: Thomas Gleixner <[email protected]> Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Ingo Molnar <[email protected]>
1 parent 4fc1970 commit b3fd8e8

File tree

3 files changed

+86
-26
lines changed

3 files changed

+86
-26
lines changed

arch/x86/include/asm/fixmap.h

-2
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,6 @@ enum fixed_addresses {
103103
#ifdef CONFIG_PARAVIRT
104104
FIX_PARAVIRT_BOOTMAP,
105105
#endif
106-
FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */
107-
FIX_TEXT_POKE0, /* first page is last, because allocation is backward */
108106
#ifdef CONFIG_X86_INTEL_MID
109107
FIX_LNW_VRTC,
110108
#endif

arch/x86/kernel/alternative.c

+86-22
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <linux/slab.h>
1313
#include <linux/kdebug.h>
1414
#include <linux/kprobes.h>
15+
#include <linux/mmu_context.h>
1516
#include <asm/text-patching.h>
1617
#include <asm/alternative.h>
1718
#include <asm/sections.h>
@@ -684,41 +685,104 @@ __ro_after_init unsigned long poking_addr;
684685

685686
static void *__text_poke(void *addr, const void *opcode, size_t len)
686687
{
688+
bool cross_page_boundary = offset_in_page(addr) + len > PAGE_SIZE;
689+
struct page *pages[2] = {NULL};
690+
temp_mm_state_t prev;
687691
unsigned long flags;
688-
char *vaddr;
689-
struct page *pages[2];
690-
int i;
692+
pte_t pte, *ptep;
693+
spinlock_t *ptl;
694+
pgprot_t pgprot;
691695

692696
/*
693-
* While boot memory allocator is runnig we cannot use struct
694-
* pages as they are not yet initialized.
697+
* While boot memory allocator is running we cannot use struct pages as
698+
* they are not yet initialized. There is no way to recover.
695699
*/
696700
BUG_ON(!after_bootmem);
697701

698702
if (!core_kernel_text((unsigned long)addr)) {
699703
pages[0] = vmalloc_to_page(addr);
700-
pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
704+
if (cross_page_boundary)
705+
pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
701706
} else {
702707
pages[0] = virt_to_page(addr);
703708
WARN_ON(!PageReserved(pages[0]));
704-
pages[1] = virt_to_page(addr + PAGE_SIZE);
709+
if (cross_page_boundary)
710+
pages[1] = virt_to_page(addr + PAGE_SIZE);
705711
}
706-
BUG_ON(!pages[0]);
712+
/*
713+
* If something went wrong, crash and burn since recovery paths are not
714+
* implemented.
715+
*/
716+
BUG_ON(!pages[0] || (cross_page_boundary && !pages[1]));
717+
707718
local_irq_save(flags);
708-
set_fixmap(FIX_TEXT_POKE0, page_to_phys(pages[0]));
709-
if (pages[1])
710-
set_fixmap(FIX_TEXT_POKE1, page_to_phys(pages[1]));
711-
vaddr = (char *)fix_to_virt(FIX_TEXT_POKE0);
712-
memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
713-
clear_fixmap(FIX_TEXT_POKE0);
714-
if (pages[1])
715-
clear_fixmap(FIX_TEXT_POKE1);
716-
local_flush_tlb();
717-
sync_core();
718-
/* Could also do a CLFLUSH here to speed up CPU recovery; but
719-
that causes hangs on some VIA CPUs. */
720-
for (i = 0; i < len; i++)
721-
BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]);
719+
720+
/*
721+
* Map the page without the global bit, as TLB flushing is done with
722+
* flush_tlb_mm_range(), which is intended for non-global PTEs.
723+
*/
724+
pgprot = __pgprot(pgprot_val(PAGE_KERNEL) & ~_PAGE_GLOBAL);
725+
726+
/*
727+
* The lock is not really needed, but this allows to avoid open-coding.
728+
*/
729+
ptep = get_locked_pte(poking_mm, poking_addr, &ptl);
730+
731+
/*
732+
* This must not fail; preallocated in poking_init().
733+
*/
734+
VM_BUG_ON(!ptep);
735+
736+
pte = mk_pte(pages[0], pgprot);
737+
set_pte_at(poking_mm, poking_addr, ptep, pte);
738+
739+
if (cross_page_boundary) {
740+
pte = mk_pte(pages[1], pgprot);
741+
set_pte_at(poking_mm, poking_addr + PAGE_SIZE, ptep + 1, pte);
742+
}
743+
744+
/*
745+
* Loading the temporary mm behaves as a compiler barrier, which
746+
* guarantees that the PTE will be set at the time memcpy() is done.
747+
*/
748+
prev = use_temporary_mm(poking_mm);
749+
750+
kasan_disable_current();
751+
memcpy((u8 *)poking_addr + offset_in_page(addr), opcode, len);
752+
kasan_enable_current();
753+
754+
/*
755+
* Ensure that the PTE is only cleared after the instructions of memcpy
756+
* were issued by using a compiler barrier.
757+
*/
758+
barrier();
759+
760+
pte_clear(poking_mm, poking_addr, ptep);
761+
if (cross_page_boundary)
762+
pte_clear(poking_mm, poking_addr + PAGE_SIZE, ptep + 1);
763+
764+
/*
765+
* Loading the previous page-table hierarchy requires a serializing
766+
* instruction that already allows the core to see the updated version.
767+
* Xen-PV is assumed to serialize execution in a similar manner.
768+
*/
769+
unuse_temporary_mm(prev);
770+
771+
/*
772+
* Flushing the TLB might involve IPIs, which would require enabled
773+
* IRQs, but not if the mm is not used, as it is in this point.
774+
*/
775+
flush_tlb_mm_range(poking_mm, poking_addr, poking_addr +
776+
(cross_page_boundary ? 2 : 1) * PAGE_SIZE,
777+
PAGE_SHIFT, false);
778+
779+
/*
780+
* If the text does not match what we just wrote then something is
781+
* fundamentally screwy; there's nothing we can really do about that.
782+
*/
783+
BUG_ON(memcmp(addr, opcode, len));
784+
785+
pte_unmap_unlock(ptep, ptl);
722786
local_irq_restore(flags);
723787
return addr;
724788
}

arch/x86/xen/mmu_pv.c

-2
Original file line numberDiff line numberDiff line change
@@ -2318,8 +2318,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
23182318
#elif defined(CONFIG_X86_VSYSCALL_EMULATION)
23192319
case VSYSCALL_PAGE:
23202320
#endif
2321-
case FIX_TEXT_POKE0:
2322-
case FIX_TEXT_POKE1:
23232321
/* All local page mappings */
23242322
pte = pfn_pte(phys, prot);
23252323
break;

0 commit comments

Comments
 (0)