Skip to content

Commit dc8fdf5

Browse files
authored
pythongh-106581: Split CALL_PY_EXACT_ARGS into uops (python#107760)
* Split `CALL_PY_EXACT_ARGS` into uops This is only the first step for doing `CALL` in Tier 2. The next step involves tracing into the called code object and back. After that we'll have to do the remaining `CALL` specialization. Finally we'll have to deal with `KW_NAMES`. Note: this moves setting `frame->return_offset` directly in front of `DISPATCH_INLINED()`, to make it easier to move it into `_PUSH_FRAME`.
1 parent 665a439 commit dc8fdf5

14 files changed

+412
-116
lines changed

Include/internal/pycore_opcode_metadata.h

+43-4
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Lib/test/test_capi/test_misc.py

+17
Original file line numberDiff line numberDiff line change
@@ -2618,6 +2618,23 @@ def testfunc(it):
26182618
with self.assertRaises(StopIteration):
26192619
next(it)
26202620

2621+
def test_call_py_exact_args(self):
2622+
def testfunc(n):
2623+
def dummy(x):
2624+
return x+1
2625+
for i in range(n):
2626+
dummy(i)
2627+
2628+
opt = _testinternalcapi.get_uop_optimizer()
2629+
with temporary_optimizer(opt):
2630+
testfunc(10)
2631+
2632+
ex = get_first_executor(testfunc)
2633+
self.assertIsNotNone(ex)
2634+
uops = {opname for opname, _, _ in ex}
2635+
self.assertIn("_PUSH_FRAME", uops)
2636+
2637+
26212638

26222639
if __name__ == "__main__":
26232640
unittest.main()

Python/abstract_interp_cases.c.h

+28
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/bytecodes.c

+66-16
Original file line numberDiff line numberDiff line change
@@ -956,13 +956,13 @@ dummy_func(
956956
{
957957
PyGenObject *gen = (PyGenObject *)receiver;
958958
_PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
959-
frame->return_offset = oparg;
960959
STACK_SHRINK(1);
961960
_PyFrame_StackPush(gen_frame, v);
962961
gen->gi_frame_state = FRAME_EXECUTING;
963962
gen->gi_exc_state.previous_item = tstate->exc_info;
964963
tstate->exc_info = &gen->gi_exc_state;
965964
SKIP_OVER(INLINE_CACHE_ENTRIES_SEND);
965+
frame->return_offset = oparg;
966966
DISPATCH_INLINED(gen_frame);
967967
}
968968
if (Py_IsNone(v) && PyIter_Check(receiver)) {
@@ -995,13 +995,13 @@ dummy_func(
995995
DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, SEND);
996996
STAT_INC(SEND, hit);
997997
_PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
998-
frame->return_offset = oparg;
999998
STACK_SHRINK(1);
1000999
_PyFrame_StackPush(gen_frame, v);
10011000
gen->gi_frame_state = FRAME_EXECUTING;
10021001
gen->gi_exc_state.previous_item = tstate->exc_info;
10031002
tstate->exc_info = &gen->gi_exc_state;
10041003
SKIP_OVER(INLINE_CACHE_ENTRIES_SEND);
1004+
frame->return_offset = oparg;
10051005
DISPATCH_INLINED(gen_frame);
10061006
}
10071007

@@ -2587,14 +2587,14 @@ dummy_func(
25872587
DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, FOR_ITER);
25882588
STAT_INC(FOR_ITER, hit);
25892589
_PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
2590-
frame->return_offset = oparg;
25912590
_PyFrame_StackPush(gen_frame, Py_None);
25922591
gen->gi_frame_state = FRAME_EXECUTING;
25932592
gen->gi_exc_state.previous_item = tstate->exc_info;
25942593
tstate->exc_info = &gen->gi_exc_state;
25952594
SKIP_OVER(INLINE_CACHE_ENTRIES_FOR_ITER);
25962595
assert(next_instr[oparg].op.code == END_FOR ||
25972596
next_instr[oparg].op.code == INSTRUMENTED_END_FOR);
2597+
frame->return_offset = oparg;
25982598
DISPATCH_INLINED(gen_frame);
25992599
}
26002600

@@ -2949,32 +2949,72 @@ dummy_func(
29492949
GO_TO_INSTRUCTION(CALL_PY_EXACT_ARGS);
29502950
}
29512951

2952-
inst(CALL_PY_EXACT_ARGS, (unused/1, func_version/2, callable, self_or_null, args[oparg] -- unused)) {
2953-
ASSERT_KWNAMES_IS_NULL();
2952+
op(_CHECK_PEP_523, (--)) {
29542953
DEOPT_IF(tstate->interp->eval_frame, CALL);
2955-
int argcount = oparg;
2956-
if (self_or_null != NULL) {
2957-
args--;
2958-
argcount++;
2959-
}
2954+
}
2955+
2956+
op(_CHECK_FUNCTION_EXACT_ARGS, (func_version/2, callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) {
2957+
ASSERT_KWNAMES_IS_NULL();
29602958
DEOPT_IF(!PyFunction_Check(callable), CALL);
29612959
PyFunctionObject *func = (PyFunctionObject *)callable;
29622960
DEOPT_IF(func->func_version != func_version, CALL);
29632961
PyCodeObject *code = (PyCodeObject *)func->func_code;
2964-
DEOPT_IF(code->co_argcount != argcount, CALL);
2962+
DEOPT_IF(code->co_argcount != oparg + (self_or_null != NULL), CALL);
2963+
}
2964+
2965+
op(_CHECK_STACK_SPACE, (callable, unused, unused[oparg] -- callable, unused, unused[oparg])) {
2966+
PyFunctionObject *func = (PyFunctionObject *)callable;
2967+
PyCodeObject *code = (PyCodeObject *)func->func_code;
29652968
DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), CALL);
2969+
}
2970+
2971+
op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null, args[oparg] -- new_frame: _PyInterpreterFrame*)) {
2972+
int argcount = oparg;
2973+
if (self_or_null != NULL) {
2974+
args--;
2975+
argcount++;
2976+
}
29662977
STAT_INC(CALL, hit);
2967-
_PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(tstate, func, argcount);
2978+
PyFunctionObject *func = (PyFunctionObject *)callable;
2979+
new_frame = _PyFrame_PushUnchecked(tstate, func, argcount);
29682980
for (int i = 0; i < argcount; i++) {
29692981
new_frame->localsplus[i] = args[i];
29702982
}
2971-
// Manipulate stack directly since we leave using DISPATCH_INLINED().
2972-
STACK_SHRINK(oparg + 2);
2973-
SKIP_OVER(INLINE_CACHE_ENTRIES_CALL);
2983+
}
2984+
2985+
// The 'unused' output effect represents the return value
2986+
// (which will be pushed when the frame returns).
2987+
// It is needed so CALL_PY_EXACT_ARGS matches its family.
2988+
op(_PUSH_FRAME, (new_frame: _PyInterpreterFrame* -- unused)) {
2989+
// Write it out explicitly because it's subtly different.
2990+
// Eventually this should be the only occurrence of this code.
29742991
frame->return_offset = 0;
2975-
DISPATCH_INLINED(new_frame);
2992+
assert(tstate->interp->eval_frame == NULL);
2993+
_PyFrame_SetStackPointer(frame, stack_pointer);
2994+
new_frame->previous = frame;
2995+
CALL_STAT_INC(inlined_py_calls);
2996+
#if TIER_ONE
2997+
frame = cframe.current_frame = new_frame;
2998+
goto start_frame;
2999+
#endif
3000+
#if TIER_TWO
3001+
frame = tstate->cframe->current_frame = new_frame;
3002+
ERROR_IF(_Py_EnterRecursivePy(tstate), exit_unwind);
3003+
stack_pointer = _PyFrame_GetStackPointer(frame);
3004+
ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive;
3005+
#endif
29763006
}
29773007

3008+
macro(CALL_PY_EXACT_ARGS) =
3009+
unused/1 + // Skip over the counter
3010+
_CHECK_PEP_523 +
3011+
_CHECK_FUNCTION_EXACT_ARGS +
3012+
_CHECK_STACK_SPACE +
3013+
_INIT_CALL_PY_EXACT_ARGS +
3014+
SAVE_IP + // Tier 2 only; special-cased oparg
3015+
SAVE_CURRENT_IP + // Sets frame->prev_instr
3016+
_PUSH_FRAME;
3017+
29783018
inst(CALL_PY_WITH_DEFAULTS, (unused/1, func_version/2, callable, self_or_null, args[oparg] -- unused)) {
29793019
ASSERT_KWNAMES_IS_NULL();
29803020
DEOPT_IF(tstate->interp->eval_frame, CALL);
@@ -3735,6 +3775,16 @@ dummy_func(
37353775
frame->prev_instr = ip_offset + oparg;
37363776
}
37373777

3778+
op(SAVE_CURRENT_IP, (--)) {
3779+
#if TIER_ONE
3780+
frame->prev_instr = next_instr - 1;
3781+
#endif
3782+
#if TIER_TWO
3783+
// Relies on a preceding SAVE_IP
3784+
frame->prev_instr--;
3785+
#endif
3786+
}
3787+
37383788
op(EXIT_TRACE, (--)) {
37393789
frame->prev_instr--; // Back up to just before destination
37403790
_PyFrame_SetStackPointer(frame, stack_pointer);

Python/ceval.c

+1-5
Original file line numberDiff line numberDiff line change
@@ -602,11 +602,6 @@ int _Py_CheckRecursiveCallPy(
602602
return 0;
603603
}
604604

605-
static inline int _Py_EnterRecursivePy(PyThreadState *tstate) {
606-
return (tstate->py_recursion_remaining-- <= 0) &&
607-
_Py_CheckRecursiveCallPy(tstate);
608-
}
609-
610605

611606
static inline void _Py_LeaveRecursiveCallPy(PyThreadState *tstate) {
612607
tstate->py_recursion_remaining++;
@@ -770,6 +765,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
770765
#endif
771766
{
772767

768+
#define TIER_ONE 1
773769
#include "generated_cases.c.h"
774770

775771
/* INSTRUMENTED_LINE has to be here, rather than in bytecodes.c,

Python/ceval_macros.h

+5
Original file line numberDiff line numberDiff line change
@@ -364,3 +364,8 @@ static const convertion_func_ptr CONVERSION_FUNCTIONS[4] = {
364364
#else
365365
#define _Py_atomic_load_relaxed_int32(ATOMIC_VAL) _Py_atomic_load_relaxed(ATOMIC_VAL)
366366
#endif
367+
368+
static inline int _Py_EnterRecursivePy(PyThreadState *tstate) {
369+
return (tstate->py_recursion_remaining-- <= 0) &&
370+
_Py_CheckRecursiveCallPy(tstate);
371+
}

Python/executor.c

+2
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
8181
OBJECT_STAT_INC(optimization_uops_executed);
8282
switch (opcode) {
8383

84+
#define TIER_TWO 2
8485
#include "executor_cases.c.h"
8586

8687
default:
@@ -106,6 +107,7 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
106107
pop_2_error:
107108
STACK_SHRINK(1);
108109
pop_1_error:
110+
pop_1_exit_unwind:
109111
STACK_SHRINK(1);
110112
error:
111113
// On ERROR_IF we return NULL as the frame.

0 commit comments

Comments
 (0)