|
12 | 12 | #include <linux/slab.h>
|
13 | 13 | #include <linux/kdebug.h>
|
14 | 14 | #include <linux/kprobes.h>
|
| 15 | +#include <linux/mmu_context.h> |
15 | 16 | #include <asm/text-patching.h>
|
16 | 17 | #include <asm/alternative.h>
|
17 | 18 | #include <asm/sections.h>
|
@@ -684,41 +685,104 @@ __ro_after_init unsigned long poking_addr;
|
684 | 685 |
|
685 | 686 | static void *__text_poke(void *addr, const void *opcode, size_t len)
|
686 | 687 | {
|
| 688 | + bool cross_page_boundary = offset_in_page(addr) + len > PAGE_SIZE; |
| 689 | + struct page *pages[2] = {NULL}; |
| 690 | + temp_mm_state_t prev; |
687 | 691 | unsigned long flags;
|
688 |
| - char *vaddr; |
689 |
| - struct page *pages[2]; |
690 |
| - int i; |
| 692 | + pte_t pte, *ptep; |
| 693 | + spinlock_t *ptl; |
| 694 | + pgprot_t pgprot; |
691 | 695 |
|
692 | 696 | /*
|
693 |
| - * While boot memory allocator is runnig we cannot use struct |
694 |
| - * pages as they are not yet initialized. |
| 697 | + * While boot memory allocator is running we cannot use struct pages as |
| 698 | + * they are not yet initialized. There is no way to recover. |
695 | 699 | */
|
696 | 700 | BUG_ON(!after_bootmem);
|
697 | 701 |
|
698 | 702 | if (!core_kernel_text((unsigned long)addr)) {
|
699 | 703 | pages[0] = vmalloc_to_page(addr);
|
700 |
| - pages[1] = vmalloc_to_page(addr + PAGE_SIZE); |
| 704 | + if (cross_page_boundary) |
| 705 | + pages[1] = vmalloc_to_page(addr + PAGE_SIZE); |
701 | 706 | } else {
|
702 | 707 | pages[0] = virt_to_page(addr);
|
703 | 708 | WARN_ON(!PageReserved(pages[0]));
|
704 |
| - pages[1] = virt_to_page(addr + PAGE_SIZE); |
| 709 | + if (cross_page_boundary) |
| 710 | + pages[1] = virt_to_page(addr + PAGE_SIZE); |
705 | 711 | }
|
706 |
| - BUG_ON(!pages[0]); |
| 712 | + /* |
| 713 | + * If something went wrong, crash and burn since recovery paths are not |
| 714 | + * implemented. |
| 715 | + */ |
| 716 | + BUG_ON(!pages[0] || (cross_page_boundary && !pages[1])); |
| 717 | + |
707 | 718 | local_irq_save(flags);
|
708 |
| - set_fixmap(FIX_TEXT_POKE0, page_to_phys(pages[0])); |
709 |
| - if (pages[1]) |
710 |
| - set_fixmap(FIX_TEXT_POKE1, page_to_phys(pages[1])); |
711 |
| - vaddr = (char *)fix_to_virt(FIX_TEXT_POKE0); |
712 |
| - memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len); |
713 |
| - clear_fixmap(FIX_TEXT_POKE0); |
714 |
| - if (pages[1]) |
715 |
| - clear_fixmap(FIX_TEXT_POKE1); |
716 |
| - local_flush_tlb(); |
717 |
| - sync_core(); |
718 |
| - /* Could also do a CLFLUSH here to speed up CPU recovery; but |
719 |
| - that causes hangs on some VIA CPUs. */ |
720 |
| - for (i = 0; i < len; i++) |
721 |
| - BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]); |
| 719 | + |
| 720 | + /* |
| 721 | + * Map the page without the global bit, as TLB flushing is done with |
| 722 | + * flush_tlb_mm_range(), which is intended for non-global PTEs. |
| 723 | + */ |
| 724 | + pgprot = __pgprot(pgprot_val(PAGE_KERNEL) & ~_PAGE_GLOBAL); |
| 725 | + |
| 726 | + /* |
| 727 | + * The lock is not really needed, but this allows to avoid open-coding. |
| 728 | + */ |
| 729 | + ptep = get_locked_pte(poking_mm, poking_addr, &ptl); |
| 730 | + |
| 731 | + /* |
| 732 | + * This must not fail; preallocated in poking_init(). |
| 733 | + */ |
| 734 | + VM_BUG_ON(!ptep); |
| 735 | + |
| 736 | + pte = mk_pte(pages[0], pgprot); |
| 737 | + set_pte_at(poking_mm, poking_addr, ptep, pte); |
| 738 | + |
| 739 | + if (cross_page_boundary) { |
| 740 | + pte = mk_pte(pages[1], pgprot); |
| 741 | + set_pte_at(poking_mm, poking_addr + PAGE_SIZE, ptep + 1, pte); |
| 742 | + } |
| 743 | + |
| 744 | + /* |
| 745 | + * Loading the temporary mm behaves as a compiler barrier, which |
| 746 | + * guarantees that the PTE will be set at the time memcpy() is done. |
| 747 | + */ |
| 748 | + prev = use_temporary_mm(poking_mm); |
| 749 | + |
| 750 | + kasan_disable_current(); |
| 751 | + memcpy((u8 *)poking_addr + offset_in_page(addr), opcode, len); |
| 752 | + kasan_enable_current(); |
| 753 | + |
| 754 | + /* |
| 755 | + * Ensure that the PTE is only cleared after the instructions of memcpy |
| 756 | + * were issued by using a compiler barrier. |
| 757 | + */ |
| 758 | + barrier(); |
| 759 | + |
| 760 | + pte_clear(poking_mm, poking_addr, ptep); |
| 761 | + if (cross_page_boundary) |
| 762 | + pte_clear(poking_mm, poking_addr + PAGE_SIZE, ptep + 1); |
| 763 | + |
| 764 | + /* |
| 765 | + * Loading the previous page-table hierarchy requires a serializing |
| 766 | + * instruction that already allows the core to see the updated version. |
| 767 | + * Xen-PV is assumed to serialize execution in a similar manner. |
| 768 | + */ |
| 769 | + unuse_temporary_mm(prev); |
| 770 | + |
| 771 | + /* |
| 772 | + * Flushing the TLB might involve IPIs, which would require enabled |
| 773 | + * IRQs, but not if the mm is not used, as it is in this point. |
| 774 | + */ |
| 775 | + flush_tlb_mm_range(poking_mm, poking_addr, poking_addr + |
| 776 | + (cross_page_boundary ? 2 : 1) * PAGE_SIZE, |
| 777 | + PAGE_SHIFT, false); |
| 778 | + |
| 779 | + /* |
| 780 | + * If the text does not match what we just wrote then something is |
| 781 | + * fundamentally screwy; there's nothing we can really do about that. |
| 782 | + */ |
| 783 | + BUG_ON(memcmp(addr, opcode, len)); |
| 784 | + |
| 785 | + pte_unmap_unlock(ptep, ptl); |
722 | 786 | local_irq_restore(flags);
|
723 | 787 | return addr;
|
724 | 788 | }
|
|
0 commit comments