|
4 | 4 | */
|
5 | 5 |
|
6 | 6 | #include <linux/kprobes.h>
|
| 7 | +#include <linux/mmu_context.h> |
| 8 | +#include <linux/random.h> |
7 | 9 | #include <linux/vmalloc.h>
|
8 | 10 | #include <linux/init.h>
|
9 | 11 | #include <linux/cpuhotplug.h>
|
10 | 12 | #include <linux/uaccess.h>
|
11 | 13 | #include <linux/jump_label.h>
|
12 | 14 |
|
| 15 | +#include <asm/debug.h> |
| 16 | +#include <asm/pgalloc.h> |
| 17 | +#include <asm/tlb.h> |
13 | 18 | #include <asm/tlbflush.h>
|
14 | 19 | #include <asm/page.h>
|
15 | 20 | #include <asm/code-patching.h>
|
@@ -42,11 +47,54 @@ int raw_patch_instruction(u32 *addr, ppc_inst_t instr)
|
42 | 47 | }
|
43 | 48 |
|
44 | 49 | #ifdef CONFIG_STRICT_KERNEL_RWX
|
| 50 | + |
45 | 51 | static DEFINE_PER_CPU(struct vm_struct *, text_poke_area);
|
| 52 | +static DEFINE_PER_CPU(struct mm_struct *, cpu_patching_mm); |
| 53 | +static DEFINE_PER_CPU(unsigned long, cpu_patching_addr); |
| 54 | +static DEFINE_PER_CPU(pte_t *, cpu_patching_pte); |
46 | 55 |
|
47 | 56 | static int map_patch_area(void *addr, unsigned long text_poke_addr);
|
48 | 57 | static void unmap_patch_area(unsigned long addr);
|
49 | 58 |
|
| 59 | +static bool mm_patch_enabled(void) |
| 60 | +{ |
| 61 | + return IS_ENABLED(CONFIG_SMP) && radix_enabled(); |
| 62 | +} |
| 63 | + |
| 64 | +/* |
| 65 | + * The following applies for Radix MMU. Hash MMU has different requirements, |
| 66 | + * and so is not supported. |
| 67 | + * |
| 68 | + * Changing mm requires context synchronising instructions on both sides of |
| 69 | + * the context switch, as well as a hwsync between the last instruction for |
| 70 | + * which the address of an associated storage access was translated using |
| 71 | + * the current context. |
| 72 | + * |
| 73 | + * switch_mm_irqs_off() performs an isync after the context switch. It is |
| 74 | + * the responsibility of the caller to perform the CSI and hwsync before |
| 75 | + * starting/stopping the temp mm. |
| 76 | + */ |
| 77 | +static struct mm_struct *start_using_temp_mm(struct mm_struct *temp_mm) |
| 78 | +{ |
| 79 | + struct mm_struct *orig_mm = current->active_mm; |
| 80 | + |
| 81 | + lockdep_assert_irqs_disabled(); |
| 82 | + switch_mm_irqs_off(orig_mm, temp_mm, current); |
| 83 | + |
| 84 | + WARN_ON(!mm_is_thread_local(temp_mm)); |
| 85 | + |
| 86 | + suspend_breakpoints(); |
| 87 | + return orig_mm; |
| 88 | +} |
| 89 | + |
| 90 | +static void stop_using_temp_mm(struct mm_struct *temp_mm, |
| 91 | + struct mm_struct *orig_mm) |
| 92 | +{ |
| 93 | + lockdep_assert_irqs_disabled(); |
| 94 | + switch_mm_irqs_off(temp_mm, orig_mm, current); |
| 95 | + restore_breakpoints(); |
| 96 | +} |
| 97 | + |
50 | 98 | static int text_area_cpu_up(unsigned int cpu)
|
51 | 99 | {
|
52 | 100 | struct vm_struct *area;
|
@@ -79,14 +127,86 @@ static int text_area_cpu_down(unsigned int cpu)
|
79 | 127 | return 0;
|
80 | 128 | }
|
81 | 129 |
|
| 130 | +static void put_patching_mm(struct mm_struct *mm, unsigned long patching_addr) |
| 131 | +{ |
| 132 | + struct mmu_gather tlb; |
| 133 | + |
| 134 | + tlb_gather_mmu(&tlb, mm); |
| 135 | + free_pgd_range(&tlb, patching_addr, patching_addr + PAGE_SIZE, 0, 0); |
| 136 | + mmput(mm); |
| 137 | +} |
| 138 | + |
| 139 | +static int text_area_cpu_up_mm(unsigned int cpu) |
| 140 | +{ |
| 141 | + struct mm_struct *mm; |
| 142 | + unsigned long addr; |
| 143 | + pte_t *pte; |
| 144 | + spinlock_t *ptl; |
| 145 | + |
| 146 | + mm = mm_alloc(); |
| 147 | + if (WARN_ON(!mm)) |
| 148 | + goto fail_no_mm; |
| 149 | + |
| 150 | + /* |
| 151 | + * Choose a random page-aligned address from the interval |
| 152 | + * [PAGE_SIZE .. DEFAULT_MAP_WINDOW - PAGE_SIZE]. |
| 153 | + * The lower address bound is PAGE_SIZE to avoid the zero-page. |
| 154 | + */ |
| 155 | + addr = (1 + (get_random_long() % (DEFAULT_MAP_WINDOW / PAGE_SIZE - 2))) << PAGE_SHIFT; |
| 156 | + |
| 157 | + /* |
| 158 | + * PTE allocation uses GFP_KERNEL which means we need to |
| 159 | + * pre-allocate the PTE here because we cannot do the |
| 160 | + * allocation during patching when IRQs are disabled. |
| 161 | + * |
| 162 | + * Using get_locked_pte() to avoid open coding, the lock |
| 163 | + * is unnecessary. |
| 164 | + */ |
| 165 | + pte = get_locked_pte(mm, addr, &ptl); |
| 166 | + if (!pte) |
| 167 | + goto fail_no_pte; |
| 168 | + pte_unmap_unlock(pte, ptl); |
| 169 | + |
| 170 | + this_cpu_write(cpu_patching_mm, mm); |
| 171 | + this_cpu_write(cpu_patching_addr, addr); |
| 172 | + this_cpu_write(cpu_patching_pte, pte); |
| 173 | + |
| 174 | + return 0; |
| 175 | + |
| 176 | +fail_no_pte: |
| 177 | + put_patching_mm(mm, addr); |
| 178 | +fail_no_mm: |
| 179 | + return -ENOMEM; |
| 180 | +} |
| 181 | + |
| 182 | +static int text_area_cpu_down_mm(unsigned int cpu) |
| 183 | +{ |
| 184 | + put_patching_mm(this_cpu_read(cpu_patching_mm), |
| 185 | + this_cpu_read(cpu_patching_addr)); |
| 186 | + |
| 187 | + this_cpu_write(cpu_patching_mm, NULL); |
| 188 | + this_cpu_write(cpu_patching_addr, 0); |
| 189 | + this_cpu_write(cpu_patching_pte, NULL); |
| 190 | + |
| 191 | + return 0; |
| 192 | +} |
| 193 | + |
82 | 194 | static __ro_after_init DEFINE_STATIC_KEY_FALSE(poking_init_done);
|
83 | 195 |
|
84 | 196 | void __init poking_init(void)
|
85 | 197 | {
|
86 |
| - int ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, |
87 |
| - "powerpc/text_poke:online", |
88 |
| - text_area_cpu_up, |
89 |
| - text_area_cpu_down); |
| 198 | + int ret; |
| 199 | + |
| 200 | + if (mm_patch_enabled()) |
| 201 | + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, |
| 202 | + "powerpc/text_poke_mm:online", |
| 203 | + text_area_cpu_up_mm, |
| 204 | + text_area_cpu_down_mm); |
| 205 | + else |
| 206 | + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, |
| 207 | + "powerpc/text_poke:online", |
| 208 | + text_area_cpu_up, |
| 209 | + text_area_cpu_down); |
90 | 210 |
|
91 | 211 | /* cpuhp_setup_state returns >= 0 on success */
|
92 | 212 | if (WARN_ON(ret < 0))
|
@@ -148,6 +268,50 @@ static void unmap_patch_area(unsigned long addr)
|
148 | 268 | flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
|
149 | 269 | }
|
150 | 270 |
|
| 271 | +static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr) |
| 272 | +{ |
| 273 | + int err; |
| 274 | + u32 *patch_addr; |
| 275 | + unsigned long text_poke_addr; |
| 276 | + pte_t *pte; |
| 277 | + unsigned long pfn = get_patch_pfn(addr); |
| 278 | + struct mm_struct *patching_mm; |
| 279 | + struct mm_struct *orig_mm; |
| 280 | + |
| 281 | + patching_mm = __this_cpu_read(cpu_patching_mm); |
| 282 | + pte = __this_cpu_read(cpu_patching_pte); |
| 283 | + text_poke_addr = __this_cpu_read(cpu_patching_addr); |
| 284 | + patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); |
| 285 | + |
| 286 | + __set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); |
| 287 | + |
| 288 | + /* order PTE update before use, also serves as the hwsync */ |
| 289 | + asm volatile("ptesync": : :"memory"); |
| 290 | + |
| 291 | + /* order context switch after arbitrary prior code */ |
| 292 | + isync(); |
| 293 | + |
| 294 | + orig_mm = start_using_temp_mm(patching_mm); |
| 295 | + |
| 296 | + err = __patch_instruction(addr, instr, patch_addr); |
| 297 | + |
| 298 | + /* hwsync performed by __patch_instruction (sync) if successful */ |
| 299 | + if (err) |
| 300 | + mb(); /* sync */ |
| 301 | + |
| 302 | + /* context synchronisation performed by __patch_instruction (isync or exception) */ |
| 303 | + stop_using_temp_mm(patching_mm, orig_mm); |
| 304 | + |
| 305 | + pte_clear(patching_mm, text_poke_addr, pte); |
| 306 | + /* |
| 307 | + * ptesync to order PTE update before TLB invalidation done |
| 308 | + * by radix__local_flush_tlb_page_psize (in _tlbiel_va) |
| 309 | + */ |
| 310 | + local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize); |
| 311 | + |
| 312 | + return err; |
| 313 | +} |
| 314 | + |
151 | 315 | static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
|
152 | 316 | {
|
153 | 317 | int err;
|
@@ -187,7 +351,10 @@ static int do_patch_instruction(u32 *addr, ppc_inst_t instr)
|
187 | 351 | return raw_patch_instruction(addr, instr);
|
188 | 352 |
|
189 | 353 | local_irq_save(flags);
|
190 |
| - err = __do_patch_instruction(addr, instr); |
| 354 | + if (mm_patch_enabled()) |
| 355 | + err = __do_patch_instruction_mm(addr, instr); |
| 356 | + else |
| 357 | + err = __do_patch_instruction(addr, instr); |
191 | 358 | local_irq_restore(flags);
|
192 | 359 |
|
193 | 360 | return err;
|
|
0 commit comments