From 4aeaa5a18178dec35284578954327015665fa4e4 Mon Sep 17 00:00:00 2001 From: ahgamut <41098605+ahgamut@users.noreply.github.com> Date: Thu, 16 Jun 2022 23:18:19 +0530 Subject: [PATCH 01/13] add CosmoImporter entry to sys.meta_path CosmoImporter.find_spec is a function similar to the find_spec methods found in _bootstrap.py and _bootstrap_external.py. It checks for built-in modules, frozen modules, and modules that may be within the zip store of the APE. For looking within the zip store, it performs upto two additional stat calls, but this is balanced by the fact that no more stat calls need to occur when loading a pyc file from the zip store. CosmoImporter calls small functions written within _bootstrap.py to create the correct ModuleSpec objects. This is done because the ModuleSpec.__init__ requires origin and is_package to be specified as kwargs, which is not easy to do from within C. --- .../python/Lib/importlib/_bootstrap.py | 16 ++ third_party/python/Python/import.c | 158 +++++++++++++++++- 2 files changed, 166 insertions(+), 8 deletions(-) diff --git a/third_party/python/Lib/importlib/_bootstrap.py b/third_party/python/Lib/importlib/_bootstrap.py index 7ff1415a2d1..b894e35a915 100644 --- a/third_party/python/Lib/importlib/_bootstrap.py +++ b/third_party/python/Lib/importlib/_bootstrap.py @@ -1033,6 +1033,21 @@ def _builtin_from_name(name): raise ImportError('no built-in module named ' + name) return _load_unlocked(spec) +def _get_builtin_spec(name): + # called from CosmoImporter in import.c + return ModuleSpec(name, BuiltinImporter, origin="built-in", is_package=False) + +def _get_frozen_spec(name, is_package): + # called from CosmoImporter in import.c + return ModuleSpec(name, FrozenImporter, origin="frozen", is_package=is_package) + +def _get_zipstore_spec(name, loader, origin, is_package): + # called from CosmoImporter in import.c + spec = ModuleSpec(name, loader, origin=origin, is_package=is_package) + spec.has_location = True + if is_package: + spec.submodule_search_locations = [origin.rpartition("/")[0]] + return spec def _setup(sys_module, _imp_module): """Setup importlib by importing needed built-in modules and injecting them @@ -1072,6 +1087,7 @@ def _install(sys_module, _imp_module): """Install importlib as the implementation of import.""" _setup(sys_module, _imp_module) + sys.meta_path.append(_imp_module.CosmoImporter) sys.meta_path.append(BuiltinImporter) sys.meta_path.append(FrozenImporter) diff --git a/third_party/python/Python/import.c b/third_party/python/Python/import.c index e62c24aa85a..0f9036a3600 100644 --- a/third_party/python/Python/import.c +++ b/third_party/python/Python/import.c @@ -2295,7 +2295,7 @@ static PyObject *_imp_source_from_cache(PyObject *module, PyObject *arg) { if (!PyArg_Parse(PyOS_FSPath(arg), "z#:source_from_cache", &path, &pathlen)) return NULL; if (!path || !endswith(path, ".pyc")) { - PyErr_Format(PyExc_ValueError, "%s does not end in .pyc", path); + return NULL; } path[pathlen - 1] = '\0'; @@ -2309,10 +2309,12 @@ static PyObject *_imp_source_from_cache(PyObject *module, PyObject *arg) { PyDoc_STRVAR(_imp_source_from_cache_doc, "given a .pyc filename, return .py"); typedef struct { - PyObject_HEAD char *name; + PyObject_HEAD + char *name; char *path; Py_ssize_t namelen; Py_ssize_t pathlen; + Py_ssize_t present; } SourcelessFileLoader; static PyTypeObject SourcelessFileLoaderType; @@ -2327,6 +2329,7 @@ static SourcelessFileLoader *SFLObject_new(PyObject *cls, PyObject *args, obj->path = NULL; obj->namelen = 0; obj->pathlen = 0; + obj->present = 0; return obj; } @@ -2366,6 +2369,7 @@ static int SFLObject_init(SourcelessFileLoader *self, PyObject *args, // TODO: should this be via PyMem_RawMalloc? self->name = strndup(name, namelen); self->path = strndup(path, pathlen); + self->present = 0; } return result; } @@ -2420,7 +2424,8 @@ static PyObject *SFLObject_get_code(SourcelessFileLoader *self, PyObject *arg) { self->name, name); goto exit; } - if (stat(self->path, &stinfo) || !(fp = fopen(self->path, "rb"))) { + self->present = self->present || !stat(self->path, &stinfo); + if (!self->present || !(fp = fopen(self->path, "rb"))) { PyErr_Format(PyExc_ImportError, "%s does not exist\n", self->path); goto exit; } @@ -2598,10 +2603,8 @@ static PyMethodDef SFLObject_methods[] = { }; static PyTypeObject SourcelessFileLoaderType = { - /* The ob_type field must be initialized in the module init function - * to be portable to Windows without using C++. */ - PyVarObject_HEAD_INIT(NULL, 0).tp_name = - "_imp.SourcelessFileLoader", /*tp_name*/ + PyVarObject_HEAD_INIT(&PyType_Type, 0) + .tp_name = "_imp.SourcelessFileLoader", /*tp_name*/ .tp_basicsize = sizeof(SourcelessFileLoader), /*tp_basicsize*/ .tp_dealloc = (destructor)SFLObject_dealloc, /*tp_dealloc*/ .tp_hash = (hashfunc)SFLObject_hash, /*tp_hash*/ @@ -2612,6 +2615,140 @@ static PyTypeObject SourcelessFileLoaderType = { .tp_new = (newfunc)SFLObject_new, /*tp_new*/ }; +typedef struct { + PyObject_HEAD +} CosmoImporter; + +static PyTypeObject CosmoImporterType; +#define CosmoImporterCheck(o) (Py_TYPE(o) == &CosmoImporterType) + +static PyObject *CosmoImporter_find_spec(PyObject *cls, PyObject **args, + Py_ssize_t nargs, PyObject *kwargs) { + static const char *const _keywords[] = {"fullname", "path", "target", NULL}; + static _PyArg_Parser _parser = {"U|OO", _keywords, 0}; + _Py_IDENTIFIER(_get_builtin_spec); + _Py_IDENTIFIER(_get_frozen_spec); + _Py_IDENTIFIER(_get_zipstore_spec); + + PyObject *fullname = NULL; + PyObject *path = NULL; + /* path is a LIST! it contains strings similar to those in sys.path, + * ie folders that are likely to contain a particular file. + * during startup the expected scenario is checking the ZIP store + * of the APE, so we ignore path and let these slower cases to + * handled by the importer objects already provided by Python. */ + PyObject *target = NULL; + PyInterpreterState *interp = PyThreadState_GET()->interp; + + const struct _frozen *p = NULL; + + static const char basepath[] = "/zip/.python/"; + const char *cname = NULL; + Py_ssize_t cnamelen = 0; + + char *newpath = NULL; + Py_ssize_t newpathsize = 0; + Py_ssize_t newpathlen = 0; + Py_ssize_t i = 0; + + SourcelessFileLoader *loader = NULL; + PyObject *origin = NULL; + long is_package = 0; + + if (!_PyArg_ParseStackAndKeywords(args, nargs, kwargs, &_parser, &fullname, + &path, &target)) { + return NULL; + } + + if (fullname == NULL) { + PyErr_SetString(PyExc_ImportError, "fullname not provided\n"); + return NULL; + } + + if ((!path || path == Py_None)) { + /* we do some of BuiltinImporter's work */ + if (is_builtin(fullname) == 1) { + return _PyObject_CallMethodIdObjArgs( + interp->importlib, &PyId__get_builtin_spec, fullname, NULL); + } + /* we do some of FrozenImporter's work */ + else if ((p = find_frozen(fullname)) != NULL) { + return _PyObject_CallMethodIdObjArgs(interp->importlib, + &PyId__get_frozen_spec, fullname, + PyBool_FromLong(p->size < 0), NULL); + } + } + + if (!PyArg_Parse(fullname, "z#:find_spec", &cname, &cnamelen)) return 0; + /* before checking within the zip store, + * we can check cname here to skip any values + * of cname that we know for sure won't be there, + * because worst case is two failed stat calls here + */ + + newpathsize = sizeof(basepath) + cnamelen + sizeof("/__init__.pyc") + 1; + newpath = _gc(malloc(newpathsize)); + bzero(newpath, newpathsize); + /* performing a memccpy sequence equivalent to: + * snprintf(newpath, newpathsize, "/zip/.python/%s.pyc", cname); */ + memccpy(newpath, basepath, '\0', newpathsize); + memccpy(newpath + sizeof(basepath) - 1, cname, '\0', + newpathsize - sizeof(basepath)); + memccpy(newpath + sizeof(basepath) + cnamelen - 1, ".pyc", '\0', + newpathsize - (sizeof(basepath) + cnamelen)); + + /* if cname part of newpath has '.' (e.g. encodings.utf_8) convert them to '/' + */ + for (i = sizeof(basepath); i < sizeof(basepath) + cnamelen - 1; i++) { + if (newpath[i] == '.') newpath[i] = '/'; + } + + if (stat(newpath, &stinfo)) { + memccpy(newpath + sizeof(basepath) + cnamelen - 1, "/__init__.pyc", '\0', + newpathsize); + is_package = 1; + } + + /* if is_package is 0, that means the above stat call succeeded */ + if (!is_package || !stat(newpath, &stinfo)) { + newpathlen = strlen(newpath); + loader = SFLObject_new(NULL, NULL, NULL); + origin = PyUnicode_FromStringAndSize(newpath, newpathlen); + if (loader == NULL || origin == NULL) { + return NULL; + } + loader->name = strdup(cname); + loader->namelen = cnamelen; + loader->path = strdup(newpath); + loader->pathlen = newpathlen; + loader->present = 1; /* this means we avoid atleast one stat call (the one + in SFLObject_get_code) */ + return _PyObject_CallMethodIdObjArgs(interp->importlib, + &PyId__get_zipstore_spec, fullname, + (PyObject *)loader, (PyObject *)origin, + PyBool_FromLong(is_package), NULL); + } + + Py_RETURN_NONE; +} + +static PyMethodDef CosmoImporter_methods[] = { + {"find_spec", (PyCFunction)CosmoImporter_find_spec, + METH_FASTCALL | METH_KEYWORDS | METH_CLASS, PyDoc_STR("")}, + {NULL, NULL} // sentinel +}; + +static PyTypeObject CosmoImporterType = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + .tp_name = "_imp.CosmoImporter", /* tp_name */ + .tp_dealloc = 0, + .tp_basicsize = sizeof(CosmoImporter), /* tp_basicsize */ + .tp_flags = Py_TPFLAGS_DEFAULT, /* tp_flags */ + .tp_methods = CosmoImporter_methods, /* tp_methods */ + .tp_init = 0, + .tp_new = 0, +}; + PyDoc_STRVAR(doc_imp, "(Extremely) low-level import machinery bits as used by importlib and imp."); @@ -2673,7 +2810,12 @@ PyInit_imp(void) if (PyType_Ready(&SourcelessFileLoaderType) < 0) goto failure; - PyModule_AddObject(m, "SourcelessFileLoader", (PyObject*)&SourcelessFileLoaderType); + if (PyModule_AddObject(m, "SourcelessFileLoader", (PyObject*)&SourcelessFileLoaderType) < 0) + goto failure; + if (PyType_Ready(&CosmoImporterType) < 0) + goto failure; + if (PyModule_AddObject(m, "CosmoImporter", (PyObject*)&CosmoImporterType) < 0) + goto failure; return m; failure: From 49074cfdbf497ee27489a8699d65734d7fd65d3d Mon Sep 17 00:00:00 2001 From: ahgamut <41098605+ahgamut@users.noreply.github.com> Date: Fri, 17 Jun 2022 01:11:05 +0530 Subject: [PATCH 02/13] update test_cmd_line to ignore CosmoImporter test_cmd_line did not consider that sys.meta_path would have a different starting entry, and so what would happen in the isolated mode test is that CosmoImporter would pick up uuid.pyc from the zip store, and claim no errors in both cases. Now the test removes CosmoImporter from sys.meta_path and so the expected behavior occurs again. --- third_party/python/Lib/test/test_cmd_line.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/third_party/python/Lib/test/test_cmd_line.py b/third_party/python/Lib/test/test_cmd_line.py index 71901fab12f..83c0e5d8720 100644 --- a/third_party/python/Lib/test/test_cmd_line.py +++ b/third_party/python/Lib/test/test_cmd_line.py @@ -483,6 +483,10 @@ def test_isolatedmode(self): with open(fake, "w") as f: f.write("raise RuntimeError('isolated mode test')\n") with open(main, "w") as f: + f.write("import sys\n") + f.write("import _imp\n") + f.write("if sys.meta_path[0] == _imp.CosmoImporter:\n") + f.write("\tsys.meta_path.pop(0)\n") f.write("import uuid\n") f.write("print('ok')\n") self.assertRaises(subprocess.CalledProcessError, From 318e32cbc058e31ec509668c0f56853c9926f8de Mon Sep 17 00:00:00 2001 From: ahgamut <41098605+ahgamut@users.noreply.github.com> Date: Fri, 17 Jun 2022 01:37:46 +0530 Subject: [PATCH 03/13] make test_atexit pass in MODE= --- third_party/python/Python/import.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/third_party/python/Python/import.c b/third_party/python/Python/import.c index 0f9036a3600..626c5d04451 100644 --- a/third_party/python/Python/import.c +++ b/third_party/python/Python/import.c @@ -2816,7 +2816,10 @@ PyInit_imp(void) goto failure; if (PyModule_AddObject(m, "CosmoImporter", (PyObject*)&CosmoImporterType) < 0) goto failure; - + /* test_atexit segfaults without the below incref, but + * I'm not supposed to Py_INCREF a static PyTypeObject, so + * what's going on? */ + Py_INCREF(&CosmoImporterType); return m; failure: Py_XDECREF(m); From c48009a3843f593a2432d726081e9329fef6004d Mon Sep 17 00:00:00 2001 From: ahgamut <41098605+ahgamut@users.noreply.github.com> Date: Sat, 18 Jun 2022 02:30:35 +0530 Subject: [PATCH 04/13] use lookup table + binary search for module names - malloc+qsort table entries for frozen modules - malloc+qsort table entries for builtin modules - static+qsort table entries for zip cdir modules used in startup - use bsearch instead of linear search for module names - use zip cdir table entries to avoid wasting stat calls - SourcelessFileLoader can't rely on stat when loading code --- third_party/python/Python/import.c | 176 ++++++++++++++++++++++++----- third_party/python/python.mk | 1 + 2 files changed, 150 insertions(+), 27 deletions(-) diff --git a/third_party/python/Python/import.c b/third_party/python/Python/import.c index 626c5d04451..0b5fbd30711 100644 --- a/third_party/python/Python/import.c +++ b/third_party/python/Python/import.c @@ -4,11 +4,13 @@ │ Python 3 │ │ https://docs.python.org/3/license.html │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/alg/alg.h" #include "libc/bits/bits.h" #include "libc/calls/calls.h" #include "libc/calls/struct/stat.h" #include "libc/calls/struct/stat.macros.h" #include "libc/fmt/conv.h" +#include "libc/macros.internal.h" #include "libc/runtime/gc.h" #include "libc/x/x.h" #include "libc/sysv/consts/o.h" @@ -93,10 +95,70 @@ module _imp #include "third_party/python/Python/clinic/import.inc" /* Initialize things */ +typedef struct { + const char *name; + union { + struct _inittab *tab; + struct _frozen *frz; + struct { + int inside_zip; /* if true, this module is in ZIP store */ + int is_package; /* if true, this module is loaded via __init__.pyc */ + /* these are single-bit values, so we can have some more + * caching-related values here to avoid syscalls + */ + }; + }; +} initentry; + +typedef struct { + size_t n; + initentry *entries; +} Lookup; + +static Lookup Builtins_Lookup = {.n = 0, .entries = NULL}; +static Lookup Frozens_Lookup = {.n = 0, .entries = NULL}; +static initentry ZipEntries[] = { + /* the below imports are attempted during startup */ + {"_bootlocale", {.inside_zip = 1, .is_package = 0}}, + {"_collections_abc", {.inside_zip = 1, .is_package = 0}}, + {"_sitebuiltins", {.inside_zip = 1, .is_package = 0}}, + {"_weakrefset", {.inside_zip = 1, .is_package = 0}}, + {"abc", {.inside_zip = 1, .is_package = 0}}, + {"codecs", {.inside_zip = 1, .is_package = 0}}, + {"encodings", {.inside_zip = 1, .is_package = 1}}, + {"encodings.aliases", {.inside_zip = 1, .is_package = 0}}, + {"encodings.latin_1", {.inside_zip = 1, .is_package = 0}}, + {"encodings.utf_8", {.inside_zip = 1, .is_package = 0}}, + {"genericpath", {.inside_zip = 1, .is_package = 0}}, + {"io", {.inside_zip = 1, .is_package = 0}}, + {"io._WindowsConsoleIO", {.inside_zip = 0, .is_package = 0}}, + {"ntpath", {.inside_zip = 1, .is_package = 0}}, + {"os", {.inside_zip = 1, .is_package = 0}}, + {"posix._getfullpathname", {.inside_zip = 0, .is_package = 0}}, + {"posix._isdir", {.inside_zip = 0, .is_package = 0}}, + {"posixpath", {.inside_zip = 1, .is_package = 0}}, + {"readline", {.inside_zip = 0, .is_package = 0}}, + {"site", {.inside_zip = 1, .is_package = 0}}, + {"sitecustomize", {.inside_zip = 0, .is_package = 0}}, + {"stat", {.inside_zip = 1, .is_package = 0}}, + {"usercustomize", {.inside_zip = 0, .is_package = 0}}, +}; +static Lookup ZipCdir_Lookup = { + .n = ARRAYLEN(ZipEntries), + .entries = ZipEntries, +}; + +static int cmp_initentry(const void *_x, const void *_y) { + const initentry *x = _x; + const initentry *y = _y; + return strcmp(x->name, y->name); +} void _PyImport_Init(void) { + size_t i, n; + PyInterpreterState *interp = PyThreadState_Get()->interp; initstr = PyUnicode_InternFromString("__init__"); if (initstr == NULL) @@ -104,6 +166,25 @@ _PyImport_Init(void) interp->builtins_copy = PyDict_Copy(interp->builtins); if (interp->builtins_copy == NULL) Py_FatalError("Can't backup builtins dict"); + + for(n=0; PyImport_Inittab[n].name; n++); + Builtins_Lookup.n = n; + Builtins_Lookup.entries = malloc(sizeof(initentry) * n); + for(i=0; i < n; i++) { + Builtins_Lookup.entries[i].name = PyImport_Inittab[i].name; + Builtins_Lookup.entries[i].tab = &(PyImport_Inittab[i]); + } + qsort(Builtins_Lookup.entries, Builtins_Lookup.n, sizeof(initentry), cmp_initentry); + + for(n=0; PyImport_FrozenModules[n].name; n++); + Frozens_Lookup.n = n; + Frozens_Lookup.entries = malloc(sizeof(initentry) * n); + for(i=0; itab->initfunc == NULL) return -1; + return 1; } return 0; } @@ -1095,6 +1187,8 @@ _imp_create_builtin(PyObject *module, PyObject *spec) /*[clinic end generated code: output=ace7ff22271e6f39 input=37f966f890384e47]*/ { struct _inittab *p; + initentry key; + initentry *res; PyObject *name; char *namestr; PyObject *mod; @@ -1104,12 +1198,14 @@ _imp_create_builtin(PyObject *module, PyObject *spec) return NULL; } + /* all builtins are static */ + /* mod = _PyImport_FindExtensionObject(name, name); if (mod || PyErr_Occurred()) { Py_DECREF(name); Py_XINCREF(mod); return mod; - } + } */ namestr = PyUnicode_AsUTF8(name); if (namestr == NULL) { @@ -1117,7 +1213,12 @@ _imp_create_builtin(PyObject *module, PyObject *spec) return NULL; } - for (p = PyImport_Inittab; p->name != NULL; p++) { + key.name = namestr; + key.tab = NULL; + res = bsearch(&key, Builtins_Lookup.entries, Builtins_Lookup.n, sizeof(initentry), cmp_initentry); + + if (res != NULL) { + p = res->tab; PyModuleDef *def; if (_PyUnicode_EqualToASCIIString(name, p->name)) { if (p->initfunc == NULL) { @@ -1161,18 +1262,20 @@ _imp_create_builtin(PyObject *module, PyObject *spec) static const struct _frozen * find_frozen(PyObject *name) { - const struct _frozen *p; + initentry key; + initentry *res; if (name == NULL) return NULL; - for (p = PyImport_FrozenModules; ; p++) { - if (p->name == NULL) - return NULL; - if (_PyUnicode_EqualToASCIIString(name, p->name)) - break; + key.name = PyUnicode_AsUTF8(name); + key.frz = NULL; + + res = bsearch(&key, Frozens_Lookup.entries, Frozens_Lookup.n, sizeof(initentry), cmp_initentry); + if (res && res->frz->name != NULL) { + return res->frz; } - return p; + return NULL; } static PyObject * @@ -2295,7 +2398,7 @@ static PyObject *_imp_source_from_cache(PyObject *module, PyObject *arg) { if (!PyArg_Parse(PyOS_FSPath(arg), "z#:source_from_cache", &path, &pathlen)) return NULL; if (!path || !endswith(path, ".pyc")) { - + PyErr_Format(PyExc_ValueError, "%s does not end in .pyc", path); return NULL; } path[pathlen - 1] = '\0'; @@ -2444,20 +2547,24 @@ static PyObject *SFLObject_get_code(SourcelessFileLoader *self, PyObject *arg) { "reached EOF while reading timestamp in %s\n", name); goto exit; } - if (headerlen < 12 || stinfo.st_size <= headerlen) { + if (headerlen < 12) { PyErr_Format(PyExc_ImportError, "reached EOF while size of source in %s\n", name); goto exit; } // return _compile_bytecode(bytes_data, name=fullname, bytecode_path=path) + /* since we don't have the stat call sometimes, we need + * a different way to load the remaining bytes into file + */ + /* rawlen = stinfo.st_size - headerlen; rawbuf = PyMem_RawMalloc(rawlen); if (rawlen != fread(rawbuf, sizeof(char), rawlen, fp)) { PyErr_Format(PyExc_ImportError, "reached EOF while size of source in %s\n", name); goto exit; - } - if (!(res = PyMarshal_ReadObjectFromString(rawbuf, rawlen))) goto exit; + }*/ + if (!(res = PyMarshal_ReadObjectFromFile(fp))) goto exit; exit: if (rawbuf) PyMem_RawFree(rawbuf); if (fp) fclose(fp); @@ -2603,7 +2710,7 @@ static PyMethodDef SFLObject_methods[] = { }; static PyTypeObject SourcelessFileLoaderType = { - PyVarObject_HEAD_INIT(&PyType_Type, 0) + PyVarObject_HEAD_INIT(NULL, 0) .tp_name = "_imp.SourcelessFileLoader", /*tp_name*/ .tp_basicsize = sizeof(SourcelessFileLoader), /*tp_basicsize*/ .tp_dealloc = (destructor)SFLObject_dealloc, /*tp_dealloc*/ @@ -2653,7 +2760,12 @@ static PyObject *CosmoImporter_find_spec(PyObject *cls, PyObject **args, SourcelessFileLoader *loader = NULL; PyObject *origin = NULL; - long is_package = 0; + int inside_zip = 0; + int is_package = 0; + int is_available = 0; + + initentry key; + initentry *res; if (!_PyArg_ParseStackAndKeywords(args, nargs, kwargs, &_parser, &fullname, &path, &target)) { @@ -2685,6 +2797,15 @@ static PyObject *CosmoImporter_find_spec(PyObject *cls, PyObject **args, * of cname that we know for sure won't be there, * because worst case is two failed stat calls here */ + key.name = cname; + key.tab = NULL; + res = bsearch(&key, ZipCdir_Lookup.entries, ZipCdir_Lookup.n, sizeof(initentry), cmp_initentry); + if (res) { + if (!res->inside_zip) + Py_RETURN_NONE; + inside_zip = res->inside_zip; + is_package = res->is_package; + } newpathsize = sizeof(basepath) + cnamelen + sizeof("/__init__.pyc") + 1; newpath = _gc(malloc(newpathsize)); @@ -2703,14 +2824,15 @@ static PyObject *CosmoImporter_find_spec(PyObject *cls, PyObject **args, if (newpath[i] == '.') newpath[i] = '/'; } - if (stat(newpath, &stinfo)) { + is_available = inside_zip || !stat(newpath, &stinfo); + if (is_package || !is_available) { memccpy(newpath + sizeof(basepath) + cnamelen - 1, "/__init__.pyc", '\0', newpathsize); is_package = 1; } - /* if is_package is 0, that means the above stat call succeeded */ - if (!is_package || !stat(newpath, &stinfo)) { + is_available = is_available || !stat(newpath, &stinfo); + if (is_available) { newpathlen = strlen(newpath); loader = SFLObject_new(NULL, NULL, NULL); origin = PyUnicode_FromStringAndSize(newpath, newpathlen); @@ -2739,7 +2861,7 @@ static PyMethodDef CosmoImporter_methods[] = { }; static PyTypeObject CosmoImporterType = { - PyVarObject_HEAD_INIT(&PyType_Type, 0) + PyVarObject_HEAD_INIT(NULL, 0) .tp_name = "_imp.CosmoImporter", /* tp_name */ .tp_dealloc = 0, .tp_basicsize = sizeof(CosmoImporter), /* tp_basicsize */ diff --git a/third_party/python/python.mk b/third_party/python/python.mk index 757e054fbf9..4e1adf81b76 100644 --- a/third_party/python/python.mk +++ b/third_party/python/python.mk @@ -441,6 +441,7 @@ THIRD_PARTY_PYTHON_STAGE1_A_SRCS = \ THIRD_PARTY_PYTHON_STAGE1_A_DIRECTDEPS = \ DSP_SCALE \ + LIBC_ALG \ LIBC_BITS \ LIBC_CALLS \ LIBC_FMT \ From 113aa300f2ba3c245247a1dc6e1c033d8f94fc72 Mon Sep 17 00:00:00 2001 From: ahgamut <41098605+ahgamut@users.noreply.github.com> Date: Sat, 18 Jun 2022 02:38:46 +0530 Subject: [PATCH 05/13] move some globals back to locals - the identifiers were used only in 1 function each - stinfo risks getting mixed in between different imports - stinfo was carrying over values from previous calls --- third_party/python/Python/import.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/third_party/python/Python/import.c b/third_party/python/Python/import.c index 0b5fbd30711..2a02c68d247 100644 --- a/third_party/python/Python/import.c +++ b/third_party/python/Python/import.c @@ -84,9 +84,6 @@ static PyObject *extensions = NULL; static PyObject *initstr = NULL; -static struct stat stinfo; -_Py_IDENTIFIER(__builtins__); -_Py_IDENTIFIER(_load_module_shim); /*[clinic input] module _imp [clinic start generated code]*/ @@ -897,6 +894,7 @@ PyImport_ExecCodeModuleWithPathnames(const char *name, PyObject *co, const char *pathname, const char *cpathname) { + struct stat stinfo; PyObject *m = NULL; PyObject *nameobj, *pathobj = NULL, *cpathobj = NULL, *external= NULL; @@ -2160,6 +2158,7 @@ dump buffer /*[clinic end generated code: output=da39a3ee5e6b4b0d input=524ce2e021e4eba6]*/ static PyObject *_check_path_mode(const char *path, uint32_t mode) { + struct stat stinfo; if (stat(path, &stinfo)) Py_RETURN_FALSE; if ((stinfo.st_mode & S_IFMT) == mode) Py_RETURN_TRUE; Py_RETURN_FALSE; @@ -2195,6 +2194,7 @@ static PyObject *_imp_path_isdir(PyObject *module, PyObject *arg) { PyDoc_STRVAR(_imp_path_isdir_doc, "check if path is dir"); static PyObject *_imp_calc_mode(PyObject *module, PyObject *arg) { + struct stat stinfo; Py_ssize_t n; const char *path; if (!PyArg_Parse(arg, "s#:_calc_mode", &path, &n)) return 0; @@ -2204,6 +2204,7 @@ static PyObject *_imp_calc_mode(PyObject *module, PyObject *arg) { PyDoc_STRVAR(_imp_calc_mode_doc, "return stat.st_mode of path"); static PyObject *_imp_calc_mtime_and_size(PyObject *module, PyObject *arg) { + struct stat stinfo; Py_ssize_t n; const char *path; if (!PyArg_Parse(arg, "z#:_calc_mtime_and_size", &path, &n)) return 0; @@ -2393,6 +2394,7 @@ static PyObject *_imp_cache_from_source(PyObject *module, PyObject **args, Py_ss PyDoc_STRVAR(_imp_cache_from_source_doc, "given a .py filename, return .pyc"); static PyObject *_imp_source_from_cache(PyObject *module, PyObject *arg) { + struct stat stinfo; char *path = NULL; Py_ssize_t pathlen = 0; if (!PyArg_Parse(PyOS_FSPath(arg), "z#:source_from_cache", &path, &pathlen)) @@ -2508,6 +2510,7 @@ static PyObject *SFLObject_get_source(SourcelessFileLoader *self, } static PyObject *SFLObject_get_code(SourcelessFileLoader *self, PyObject *arg) { + struct stat stinfo; char bytecode_header[12] = {0}; int32_t magic = 0; size_t headerlen; @@ -2572,6 +2575,7 @@ static PyObject *SFLObject_get_code(SourcelessFileLoader *self, PyObject *arg) { } static PyObject *SFLObject_get_data(SourcelessFileLoader *self, PyObject *arg) { + struct stat stinfo; char *name = NULL; char *data = NULL; size_t datalen = 0; @@ -2603,6 +2607,7 @@ static PyObject *SFLObject_get_filename(SourcelessFileLoader *self, static PyObject *SFLObject_load_module(SourcelessFileLoader *self, PyObject **args, Py_ssize_t nargs) { + _Py_IDENTIFIER(_load_module_shim); char *name = NULL; PyObject *bootstrap = NULL; PyObject *fullname = NULL; @@ -2635,6 +2640,7 @@ static PyObject *SFLObject_create_module(SourcelessFileLoader *self, static PyObject *SFLObject_exec_module(SourcelessFileLoader *self, PyObject *arg) { + _Py_IDENTIFIER(__builtins__); PyObject *module = NULL; PyObject *name = NULL; PyObject *code = NULL; @@ -2766,6 +2772,7 @@ static PyObject *CosmoImporter_find_spec(PyObject *cls, PyObject **args, initentry key; initentry *res; + struct stat stinfo; if (!_PyArg_ParseStackAndKeywords(args, nargs, kwargs, &_parser, &fullname, &path, &target)) { From f7e9253e85923cb7a032a6e939ff179c45c48384 Mon Sep 17 00:00:00 2001 From: ahgamut <41098605+ahgamut@users.noreply.github.com> Date: Sat, 18 Jun 2022 03:23:11 +0530 Subject: [PATCH 06/13] make test_atexit pass in MODE= _testcapi.run_in_subinterp makes a questionable PyImport_Cleanup(), which we work around by having a separate cleanup function for the lookup tables. also added a separate initialization function for the lookup tables for symmetry. some commented out code in the previous commit messed with GC, it was made visible again. --- third_party/python/Include/import.h | 2 + third_party/python/Python/finalize.c | 1 + third_party/python/Python/import.c | 67 +++++++++++++------------ third_party/python/Python/pylifecycle.c | 1 + 4 files changed, 39 insertions(+), 32 deletions(-) diff --git a/third_party/python/Include/import.h b/third_party/python/Include/import.h index 5e844880aa8..a2509e1a5a8 100644 --- a/third_party/python/Include/import.h +++ b/third_party/python/Include/import.h @@ -73,6 +73,8 @@ PyObject * PyImport_GetImporter(PyObject *path); PyObject * PyImport_Import(PyObject *name); PyObject * PyImport_ReloadModule(PyObject *m); void PyImport_Cleanup(void); +void _PyImportLookupTables_Init(void); +void _PyImportLookupTables_Cleanup(void); #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 int PyImport_ImportFrozenModuleObject( PyObject *name diff --git a/third_party/python/Python/finalize.c b/third_party/python/Python/finalize.c index b68b910c077..81d45ad9948 100644 --- a/third_party/python/Python/finalize.c +++ b/third_party/python/Python/finalize.c @@ -96,6 +96,7 @@ Py_FinalizeEx(void) #endif /* Destroy all modules */ PyImport_Cleanup(); + _PyImportLookupTables_Cleanup(); /* Flush sys.stdout and sys.stderr (again, in case more was printed) */ if (_Py_FlushStdFiles() < 0) { diff --git a/third_party/python/Python/import.c b/third_party/python/Python/import.c index 2a02c68d247..edddd51f0f8 100644 --- a/third_party/python/Python/import.c +++ b/third_party/python/Python/import.c @@ -154,8 +154,6 @@ static int cmp_initentry(const void *_x, const void *_y) { void _PyImport_Init(void) { - size_t i, n; - PyInterpreterState *interp = PyThreadState_Get()->interp; initstr = PyUnicode_InternFromString("__init__"); if (initstr == NULL) @@ -163,25 +161,42 @@ _PyImport_Init(void) interp->builtins_copy = PyDict_Copy(interp->builtins); if (interp->builtins_copy == NULL) Py_FatalError("Can't backup builtins dict"); +} - for(n=0; PyImport_Inittab[n].name; n++); - Builtins_Lookup.n = n; - Builtins_Lookup.entries = malloc(sizeof(initentry) * n); - for(i=0; i < n; i++) { - Builtins_Lookup.entries[i].name = PyImport_Inittab[i].name; - Builtins_Lookup.entries[i].tab = &(PyImport_Inittab[i]); +void _PyImportLookupTables_Init(void) { + size_t i, n; + if (Builtins_Lookup.entries == NULL) { + for(n=0; PyImport_Inittab[n].name; n++); + Builtins_Lookup.n = n; + Builtins_Lookup.entries = malloc(sizeof(initentry) * n); + for(i=0; i < n; i++) { + Builtins_Lookup.entries[i].name = PyImport_Inittab[i].name; + Builtins_Lookup.entries[i].tab = &(PyImport_Inittab[i]); + } + qsort(Builtins_Lookup.entries, Builtins_Lookup.n, sizeof(initentry), cmp_initentry); + } + if (Frozens_Lookup.entries == NULL) { + for(n=0; PyImport_FrozenModules[n].name; n++); + Frozens_Lookup.n = n; + Frozens_Lookup.entries = malloc(sizeof(initentry) * n); + for(i=0; i Date: Sat, 18 Jun 2022 04:17:22 +0530 Subject: [PATCH 07/13] removed BUILTIN_MODULE_NAMES from _bootstrap.py --- third_party/python/Lib/importlib/_bootstrap.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/third_party/python/Lib/importlib/_bootstrap.py b/third_party/python/Lib/importlib/_bootstrap.py index b894e35a915..7c484fb8c24 100644 --- a/third_party/python/Lib/importlib/_bootstrap.py +++ b/third_party/python/Lib/importlib/_bootstrap.py @@ -226,7 +226,7 @@ def _verbose_message(message, *args, verbosity=1): def _requires_builtin(fxn): """Decorator to verify the named module is built-in.""" def _requires_builtin_wrapper(self, fullname): - if fullname not in BUILTIN_MODULE_NAMES: + if not _imp.is_builtin(fullname): raise ImportError('{!r} is not a built-in module'.format(fullname), name=fullname) return fxn(self, fullname) @@ -631,7 +631,7 @@ def module_repr(module): def find_spec(cls, fullname, path=None, target=None): if path is not None: return None - if fullname in BUILTIN_MODULE_NAMES: + if _imp.is_builtin(fullname): return spec_from_loader(fullname, cls, origin='built-in') else: return None @@ -651,7 +651,7 @@ def find_module(cls, fullname, path=None): @classmethod def create_module(self, spec): """Create a built-in module""" - if spec.name not in BUILTIN_MODULE_NAMES: + if not _imp.is_builtin(spec.name): raise ImportError('{!r} is not a built-in module'.format(spec.name), name=spec.name) return _call_with_frames_removed(_imp.create_builtin, spec) @@ -871,7 +871,7 @@ def _find_and_load_unlocked(name, import_): msg = (_ERR_MSG + '; {!r} is not a package').format(name, parent) raise ModuleNotFoundError(msg, name=name) from None spec = _find_spec(name, path) - if spec is None and name in BUILTIN_MODULE_NAMES: + if spec is None and _imp.is_builtin(name): # If this module is a C extension, the interpreter # expects it to be a shared object located in path, # and returns spec is None because it was not found. @@ -1057,16 +1057,15 @@ def _setup(sys_module, _imp_module): modules, those two modules must be explicitly passed in. """ - global _imp, sys, BUILTIN_MODULE_NAMES + global _imp, sys _imp = _imp_module sys = sys_module - BUILTIN_MODULE_NAMES = frozenset(sys.builtin_module_names) # Set up the spec for existing builtin/frozen modules. module_type = type(sys) for name, module in sys.modules.items(): if isinstance(module, module_type): - if name in BUILTIN_MODULE_NAMES: + if _imp.is_builtin(name): loader = BuiltinImporter elif _imp.is_frozen(name): loader = FrozenImporter From a4c925e7cd874d88e1e70b7510b697a7fd17c0b2 Mon Sep 17 00:00:00 2001 From: ahgamut <41098605+ahgamut@users.noreply.github.com> Date: Mon, 20 Jun 2022 02:18:24 +0530 Subject: [PATCH 08/13] shave off stat syscalls by rearranging imports the hardest import method format to speed up is the "from x import y", because I have no idea whether y is a module or not. If y is a module, "from x import y" behaves similarly to the format "import x.y; y = x.y", which means I can bypass some checks by doing the import and equality separately. If y is a function, "from x import y" is not equivalent to the above format, and the import will trigger a ModuleNotFoundError. Unfortunately I can't check for or propagate such errors during the default APE startup without adding a whole bunch of checks, so I'd prefer to avoid these kinds of imports unless absolutely necessary. in this PR I avoid three such function imports in io.py and ntpath.py. The fix is to import the module wholesale (which happens underneath anyway) and then just check for the attribute - "import x; y = x.y", essentially similar to how it would be if y was a module. --- third_party/python/Lib/io.py | 6 ++---- third_party/python/Lib/ntpath.py | 15 ++++++++------- third_party/python/Python/import.c | 6 ++---- 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/third_party/python/Lib/io.py b/third_party/python/Lib/io.py index 968ee5073df..fbd0fe7d294 100644 --- a/third_party/python/Lib/io.py +++ b/third_party/python/Lib/io.py @@ -92,8 +92,6 @@ class TextIOBase(_io._TextIOBase, IOBase): del klass try: - from _io import _WindowsConsoleIO -except ImportError: + RawIOBase.register(_io._WindowsConsoleIO) +except AttributeError: pass -else: - RawIOBase.register(_WindowsConsoleIO) diff --git a/third_party/python/Lib/ntpath.py b/third_party/python/Lib/ntpath.py index 406f0441aaa..bcf83f09c7a 100644 --- a/third_party/python/Lib/ntpath.py +++ b/third_party/python/Lib/ntpath.py @@ -20,6 +20,7 @@ import os import sys import stat +import posix import genericpath from genericpath import * @@ -276,8 +277,8 @@ def lexists(path): # common case: drive letter roots. The alternative which uses GetVolumePathName # fails if the drive letter is the result of a SUBST. try: - from posix import _getvolumepathname -except ImportError: + _getvolumepathname = posix._getvolumepathname +except AttributeError: _getvolumepathname = None def ismount(path): """Test whether a path is a mount point (a drive root, the root of a @@ -534,9 +535,9 @@ def _abspath_fallback(path): # Return an absolute path. try: - from posix import _getfullpathname + _getfullpathname = posix._getfullpathname -except ImportError: # not running on Windows - mock up something sensible +except AttributeError: # not running on Windows - mock up something sensible abspath = _abspath_fallback else: # use native Windows method on Windows @@ -664,7 +665,7 @@ def commonpath(paths): # GetFinalPathNameByHandle is available starting with Windows 6.0. # Windows XP and non-Windows OS'es will mock _getfinalpathname. if sys.getwindowsversion()[:2] >= (6, 0): - from posix import _getfinalpathname + _getfinalpathname = posix._getfinalpathname else: raise ImportError except (AttributeError, ImportError, OSError): @@ -681,7 +682,7 @@ def _getfinalpathname(f): # attribute to tell whether or not the path is a directory. # This is overkill on Windows - just pass the path to GetFileAttributes # and check the attribute from there. - from posix import _isdir as isdir -except ImportError: + isdir = posix._isdir +except AttributeError: # Use genericpath.isdir as imported above. pass diff --git a/third_party/python/Python/import.c b/third_party/python/Python/import.c index edddd51f0f8..8519083e3d1 100644 --- a/third_party/python/Python/import.c +++ b/third_party/python/Python/import.c @@ -128,11 +128,8 @@ static initentry ZipEntries[] = { {"encodings.utf_8", {.inside_zip = 1, .is_package = 0}}, {"genericpath", {.inside_zip = 1, .is_package = 0}}, {"io", {.inside_zip = 1, .is_package = 0}}, - {"io._WindowsConsoleIO", {.inside_zip = 0, .is_package = 0}}, {"ntpath", {.inside_zip = 1, .is_package = 0}}, {"os", {.inside_zip = 1, .is_package = 0}}, - {"posix._getfullpathname", {.inside_zip = 0, .is_package = 0}}, - {"posix._isdir", {.inside_zip = 0, .is_package = 0}}, {"posixpath", {.inside_zip = 1, .is_package = 0}}, {"readline", {.inside_zip = 0, .is_package = 0}}, {"site", {.inside_zip = 1, .is_package = 0}}, @@ -2811,8 +2808,9 @@ static PyObject *CosmoImporter_find_spec(PyObject *cls, PyObject **args, key.tab = NULL; res = bsearch(&key, ZipCdir_Lookup.entries, ZipCdir_Lookup.n, sizeof(initentry), cmp_initentry); if (res) { - if (!res->inside_zip) + if (!res->inside_zip) { Py_RETURN_NONE; + } inside_zip = res->inside_zip; is_package = res->is_package; } From 216a21bcf68c2f112147ca6f01584a7ecd8fc229 Mon Sep 17 00:00:00 2001 From: ahgamut <41098605+ahgamut@users.noreply.github.com> Date: Mon, 20 Jun 2022 15:07:25 +0530 Subject: [PATCH 09/13] remove ZipCdir_Lookup table and entries it only provides a marginal boost over the existing setup, plus reasoning about package existence became difficult (what if the user deletes one of the files that have an entry in the table?) --- third_party/python/Python/import.c | 49 +----------------------------- 1 file changed, 1 insertion(+), 48 deletions(-) diff --git a/third_party/python/Python/import.c b/third_party/python/Python/import.c index 8519083e3d1..2d7abab2c38 100644 --- a/third_party/python/Python/import.c +++ b/third_party/python/Python/import.c @@ -97,13 +97,6 @@ typedef struct { union { struct _inittab *tab; struct _frozen *frz; - struct { - int inside_zip; /* if true, this module is in ZIP store */ - int is_package; /* if true, this module is loaded via __init__.pyc */ - /* these are single-bit values, so we can have some more - * caching-related values here to avoid syscalls - */ - }; }; } initentry; @@ -114,33 +107,6 @@ typedef struct { static Lookup Builtins_Lookup = {.n = 0, .entries = NULL}; static Lookup Frozens_Lookup = {.n = 0, .entries = NULL}; -static initentry ZipEntries[] = { - /* the below imports are attempted during startup */ - {"_bootlocale", {.inside_zip = 1, .is_package = 0}}, - {"_collections_abc", {.inside_zip = 1, .is_package = 0}}, - {"_sitebuiltins", {.inside_zip = 1, .is_package = 0}}, - {"_weakrefset", {.inside_zip = 1, .is_package = 0}}, - {"abc", {.inside_zip = 1, .is_package = 0}}, - {"codecs", {.inside_zip = 1, .is_package = 0}}, - {"encodings", {.inside_zip = 1, .is_package = 1}}, - {"encodings.aliases", {.inside_zip = 1, .is_package = 0}}, - {"encodings.latin_1", {.inside_zip = 1, .is_package = 0}}, - {"encodings.utf_8", {.inside_zip = 1, .is_package = 0}}, - {"genericpath", {.inside_zip = 1, .is_package = 0}}, - {"io", {.inside_zip = 1, .is_package = 0}}, - {"ntpath", {.inside_zip = 1, .is_package = 0}}, - {"os", {.inside_zip = 1, .is_package = 0}}, - {"posixpath", {.inside_zip = 1, .is_package = 0}}, - {"readline", {.inside_zip = 0, .is_package = 0}}, - {"site", {.inside_zip = 1, .is_package = 0}}, - {"sitecustomize", {.inside_zip = 0, .is_package = 0}}, - {"stat", {.inside_zip = 1, .is_package = 0}}, - {"usercustomize", {.inside_zip = 0, .is_package = 0}}, -}; -static Lookup ZipCdir_Lookup = { - .n = ARRAYLEN(ZipEntries), - .entries = ZipEntries, -}; static int cmp_initentry(const void *_x, const void *_y) { const initentry *x = _x; @@ -182,7 +148,6 @@ void _PyImportLookupTables_Init(void) { } qsort(Frozens_Lookup.entries, Frozens_Lookup.n, sizeof(initentry), cmp_initentry); } - qsort(ZipCdir_Lookup.entries, ZipCdir_Lookup.n, sizeof(initentry), cmp_initentry); } void _PyImportLookupTables_Cleanup(void) { @@ -2770,8 +2735,6 @@ static PyObject *CosmoImporter_find_spec(PyObject *cls, PyObject **args, int is_package = 0; int is_available = 0; - initentry key; - initentry *res; struct stat stinfo; if (!_PyArg_ParseStackAndKeywords(args, nargs, kwargs, &_parser, &fullname, @@ -2804,16 +2767,6 @@ static PyObject *CosmoImporter_find_spec(PyObject *cls, PyObject **args, * of cname that we know for sure won't be there, * because worst case is two failed stat calls here */ - key.name = cname; - key.tab = NULL; - res = bsearch(&key, ZipCdir_Lookup.entries, ZipCdir_Lookup.n, sizeof(initentry), cmp_initentry); - if (res) { - if (!res->inside_zip) { - Py_RETURN_NONE; - } - inside_zip = res->inside_zip; - is_package = res->is_package; - } newpathsize = sizeof(basepath) + cnamelen + sizeof("/__init__.pyc") + 1; newpath = _gc(malloc(newpathsize)); @@ -2836,10 +2789,10 @@ static PyObject *CosmoImporter_find_spec(PyObject *cls, PyObject **args, if (is_package || !is_available) { memccpy(newpath + sizeof(basepath) + cnamelen - 1, "/__init__.pyc", '\0', newpathsize); + is_available = is_available || !stat(newpath, &stinfo); is_package = 1; } - is_available = is_available || !stat(newpath, &stinfo); if (is_available) { newpathlen = strlen(newpath); loader = SFLObject_new(NULL, NULL, NULL); From 1beede466d077d0242ad458856fde4b936e89f07 Mon Sep 17 00:00:00 2001 From: ahgamut <41098605+ahgamut@users.noreply.github.com> Date: Mon, 20 Jun 2022 15:09:03 +0530 Subject: [PATCH 10/13] comment out some lines in _bootstrap - CosmoImporter calls the other two Importer's checks internally - ExtensionFileLoader will not be called because static --- third_party/python/Lib/importlib/_bootstrap.py | 4 ++-- third_party/python/Lib/importlib/_bootstrap_external.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/third_party/python/Lib/importlib/_bootstrap.py b/third_party/python/Lib/importlib/_bootstrap.py index 7c484fb8c24..9ae8032dbbf 100644 --- a/third_party/python/Lib/importlib/_bootstrap.py +++ b/third_party/python/Lib/importlib/_bootstrap.py @@ -1087,8 +1087,8 @@ def _install(sys_module, _imp_module): _setup(sys_module, _imp_module) sys.meta_path.append(_imp_module.CosmoImporter) - sys.meta_path.append(BuiltinImporter) - sys.meta_path.append(FrozenImporter) + # sys.meta_path.append(BuiltinImporter) + # sys.meta_path.append(FrozenImporter) global _bootstrap_external import _frozen_importlib_external diff --git a/third_party/python/Lib/importlib/_bootstrap_external.py b/third_party/python/Lib/importlib/_bootstrap_external.py index 43fca240a4d..54713dd829a 100644 --- a/third_party/python/Lib/importlib/_bootstrap_external.py +++ b/third_party/python/Lib/importlib/_bootstrap_external.py @@ -1346,10 +1346,10 @@ def _get_supported_file_loaders(): Each item is a tuple (loader, suffixes). """ - extensions = ExtensionFileLoader, _imp.extension_suffixes() + # extensions = ExtensionFileLoader, _imp.extension_suffixes() source = SourceFileLoader, SOURCE_SUFFIXES bytecode = SourcelessFileLoader, BYTECODE_SUFFIXES - return [bytecode, extensions, source] + return [bytecode, source] #, extensions] def _setup(_bootstrap_module): """Setup the path-based importers for importlib by importing needed From 7d5dbe292535fc68e118e5f57cb5b42a797542a9 Mon Sep 17 00:00:00 2001 From: ahgamut <41098605+ahgamut@users.noreply.github.com> Date: Tue, 21 Jun 2022 08:07:44 +0530 Subject: [PATCH 11/13] use _write_atomic even if file exists and add an error message if failure --- third_party/python/Python/import.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/third_party/python/Python/import.c b/third_party/python/Python/import.c index 2d7abab2c38..4b31a13cd84 100644 --- a/third_party/python/Python/import.c +++ b/third_party/python/Python/import.c @@ -2214,17 +2214,24 @@ static PyObject *_imp_write_atomic(PyObject *module, PyObject **args, Py_buffer data = {NULL, NULL}; uint32_t mode = 0666; int fd; + int failure = 0; if (!_PyArg_ParseStack(args, nargs, "s#y*|I:_write_atomic", &path, &n, &data, &mode)) - return 0; + goto end; mode &= 0666; - if ((fd = open(path, O_EXCL | O_CREAT | O_WRONLY, mode)) == -1 || - write(fd, data.buf, data.len) == -1) { - PyErr_Format(PyExc_OSError, ""); - if (data.obj) PyBuffer_Release(&data); - return 0; + if ((fd = open(path, O_CREAT | O_WRONLY | O_TRUNC, mode)) == -1) { + failure = 1; + PyErr_Format(PyExc_OSError, "failed to create file: %s\n", path); + goto end; + } + if (write(fd, data.buf, data.len) == -1) { + failure = 1; + PyErr_Format(PyExc_OSError, "failed to write to file: %s\n", path); + goto end; } +end: if (data.obj) PyBuffer_Release(&data); + if (failure) return 0; Py_RETURN_NONE; } PyDoc_STRVAR(_imp_write_atomic_doc, "atomic write to a file"); From f242ef4a0913b63e32ae3e33045a3efd02202d29 Mon Sep 17 00:00:00 2001 From: ahgamut <41098605+ahgamut@users.noreply.github.com> Date: Tue, 21 Jun 2022 08:26:19 +0530 Subject: [PATCH 12/13] revert file loader order back to source-first In #248 we changed _bootstrap_external to check for bytecode first, and then check for source files when loading in the import process. Now that we have our own meta_path entry, this change can be undone. --- third_party/python/Lib/importlib/_bootstrap_external.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/python/Lib/importlib/_bootstrap_external.py b/third_party/python/Lib/importlib/_bootstrap_external.py index 54713dd829a..f8ab20b5448 100644 --- a/third_party/python/Lib/importlib/_bootstrap_external.py +++ b/third_party/python/Lib/importlib/_bootstrap_external.py @@ -1349,7 +1349,7 @@ def _get_supported_file_loaders(): # extensions = ExtensionFileLoader, _imp.extension_suffixes() source = SourceFileLoader, SOURCE_SUFFIXES bytecode = SourcelessFileLoader, BYTECODE_SUFFIXES - return [bytecode, source] #, extensions] + return [source, bytecode] #, extensions] def _setup(_bootstrap_module): """Setup the path-based importers for importlib by importing needed From e44528ee86abf053b59de71905a54169e2824764 Mon Sep 17 00:00:00 2001 From: ahgamut <41098605+ahgamut@users.noreply.github.com> Date: Tue, 21 Jun 2022 09:45:37 +0530 Subject: [PATCH 13/13] skip unnecessary strcmp in create_builtin --- third_party/python/Python/import.c | 52 ++++++++++++++---------------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/third_party/python/Python/import.c b/third_party/python/Python/import.c index 4b31a13cd84..c89a13109fb 100644 --- a/third_party/python/Python/import.c +++ b/third_party/python/Python/import.c @@ -95,8 +95,8 @@ module _imp typedef struct { const char *name; union { - struct _inittab *tab; - struct _frozen *frz; + const struct _inittab *tab; + const struct _frozen *frz; }; } initentry; @@ -1183,36 +1183,34 @@ _imp_create_builtin(PyObject *module, PyObject *spec) if (res != NULL) { p = res->tab; PyModuleDef *def; - if (_PyUnicode_EqualToASCIIString(name, p->name)) { - if (p->initfunc == NULL) { - /* Cannot re-init internal module ("sys" or "builtins") */ - mod = PyImport_AddModule(namestr); - Py_DECREF(name); - return mod; - } - mod = (*p->initfunc)(); - if (mod == NULL) { + if (p->initfunc == NULL) { + /* Cannot re-init internal module ("sys" or "builtins") */ + mod = PyImport_AddModule(namestr); + Py_DECREF(name); + return mod; + } + mod = (*p->initfunc)(); + if (mod == NULL) { + Py_DECREF(name); + return NULL; + } + if (PyObject_TypeCheck(mod, &PyModuleDef_Type)) { + Py_DECREF(name); + return PyModule_FromDefAndSpec((PyModuleDef*)mod, spec); + } else { + /* Remember pointer to module init function. */ + def = PyModule_GetDef(mod); + if (def == NULL) { Py_DECREF(name); return NULL; } - if (PyObject_TypeCheck(mod, &PyModuleDef_Type)) { + def->m_base.m_init = p->initfunc; + if (_PyImport_FixupExtensionObject(mod, name, name) < 0) { Py_DECREF(name); - return PyModule_FromDefAndSpec((PyModuleDef*)mod, spec); - } else { - /* Remember pointer to module init function. */ - def = PyModule_GetDef(mod); - if (def == NULL) { - Py_DECREF(name); - return NULL; - } - def->m_base.m_init = p->initfunc; - if (_PyImport_FixupExtensionObject(mod, name, name) < 0) { - Py_DECREF(name); - return NULL; - } - Py_DECREF(name); - return mod; + return NULL; } + Py_DECREF(name); + return mod; } } Py_DECREF(name);