From 032c0c20bac10c4bf6103939322e7fec568daccc Mon Sep 17 00:00:00 2001 From: Phillip Seeber Date: Mon, 8 Jul 2024 15:46:13 +0200 Subject: [PATCH 1/2] update flake.lock --- flake.lock | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/flake.lock b/flake.lock index 5c3fb5a..ba99434 100644 --- a/flake.lock +++ b/flake.lock @@ -5,11 +5,11 @@ "systems": "systems" }, "locked": { - "lastModified": 1705309234, - "narHash": "sha256-uNRRNRKmJyCRC/8y1RqBkqWBLM034y4qN7EprSdmgyA=", + "lastModified": 1710146030, + "narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=", "owner": "numtide", "repo": "flake-utils", - "rev": "1ef2e671c3b0c19053962c07dbda38332dcebf26", + "rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a", "type": "github" }, "original": { @@ -20,11 +20,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1708407374, - "narHash": "sha256-EECzarm+uqnNDCwaGg/ppXCO11qibZ1iigORShkkDf0=", + "lastModified": 1720368505, + "narHash": "sha256-5r0pInVo5d6Enti0YwUSQK4TebITypB42bWy5su3MrQ=", "owner": "nixos", "repo": "nixpkgs", - "rev": "f33dd27a47ebdf11dc8a5eb05e7c8fbdaf89e73f", + "rev": "ab82a9612aa45284d4adf69ee81871a389669a9e", "type": "github" }, "original": { From 3b76483b82025e4c0c944cd3cd023bab3a097f8b Mon Sep 17 00:00:00 2001 From: Phillip Seeber Date: Mon, 8 Jul 2024 15:46:26 +0200 Subject: [PATCH 2/2] fix python >= 3.12 unicode handling --- cbits/hscpython-shim.c | 18 +++++++++++------ cbits/hscpython-shim.h | 4 ++-- lib/CPython/Types/Unicode.chs | 37 +++++++++++------------------------ 3 files changed, 25 insertions(+), 34 deletions(-) diff --git a/cbits/hscpython-shim.c b/cbits/hscpython-shim.c index 194d98d..f351792 100644 --- a/cbits/hscpython-shim.c +++ b/cbits/hscpython-shim.c @@ -150,13 +150,19 @@ PyObject *hscpython_Py_False() /* Unicode */ Py_ssize_t hscpython_PyUnicode_GetSize(PyObject *o) -{ return PyUnicode_GetSize(o); } - -Py_UNICODE *hscpython_PyUnicode_AsUnicode(PyObject *o) -{ return PyUnicode_AsUnicode(o); } +{ return PyUnicode_GET_LENGTH(o); } + +wchar_t *hscpython_PyUnicode_AsUnicode(PyObject *o) +{ wchar_t *wstr; + Py_ssize_t actual_size; + actual_size = PyUnicode_AsWideChar(o, NULL, 0); + wstr = malloc(actual_size); + PyUnicode_AsWideChar(o, wstr, actual_size); + return wstr; +} -PyObject *hscpython_PyUnicode_FromUnicode(Py_UNICODE *u, Py_ssize_t size) -{ return PyUnicode_FromUnicode(u, size); } +PyObject *hscpython_PyUnicode_FromUnicode(const wchar_t *u, Py_ssize_t size) +{ return PyUnicode_FromWideChar(u, size); } PyObject *hscpython_PyUnicode_FromEncodedObject(PyObject *o, const char *enc, const char *err) { return PyUnicode_FromEncodedObject(o, enc, err); } diff --git a/cbits/hscpython-shim.h b/cbits/hscpython-shim.h index c6989cc..13c7160 100644 --- a/cbits/hscpython-shim.h +++ b/cbits/hscpython-shim.h @@ -54,8 +54,8 @@ PyObject *hscpython_Py_False(); /* Unicode */ Py_ssize_t hscpython_PyUnicode_GetSize(PyObject *); -Py_UNICODE *hscpython_PyUnicode_AsUnicode(PyObject *); -PyObject *hscpython_PyUnicode_FromUnicode(Py_UNICODE *, Py_ssize_t); +wchar_t *hscpython_PyUnicode_AsUnicode(PyObject *); +PyObject *hscpython_PyUnicode_FromUnicode(const wchar_t *, Py_ssize_t); PyObject *hscpython_PyUnicode_FromEncodedObject(PyObject *, const char *, const char *); PyObject *hscpython_PyUnicode_AsEncodedString(PyObject *, const char *, const char *); PyObject *hscpython_PyUnicode_FromObject(PyObject *); diff --git a/lib/CPython/Types/Unicode.chs b/lib/CPython/Types/Unicode.chs index c103a5e..1de6757 100644 --- a/lib/CPython/Types/Unicode.chs +++ b/lib/CPython/Types/Unicode.chs @@ -51,13 +51,8 @@ module CPython.Types.Unicode import Prelude hiding (length) import Control.Exception (ErrorCall (..), throwIO) import qualified Data.Text as T - -#ifdef Py_UNICODE_WIDE -import Data.Char (chr, ord) -#else -import qualified Data.Text.Foreign as TF -#endif - +import Foreign.C.String +import Foreign.C.Types import CPython.Internal import CPython.Types.Bytes (Bytes) @@ -86,34 +81,24 @@ withErrors errors = withCString $ case errors of {# fun pure unsafe hscpython_PyUnicode_Type as unicodeType {} -> `Type' peekStaticObject* #} + toUnicode :: T.Text -> IO Unicode -toUnicode str = withBuffer toPython >>= stealObject where - toPython ptr len = let - len' = fromIntegral len - ptr' = castPtr ptr - in {# call hscpython_PyUnicode_FromUnicode #} ptr' len' -#ifdef Py_UNICODE_WIDE - ords = map (fromIntegral . ord) (T.unpack str) :: [CUInt] - withBuffer = withArrayLen ords . flip -#else - withBuffer = TF.useAsPtr str -#endif +toUnicode txt = withCWStringLen (T.unpack txt) $ \(wstr, sz) -> do + obj <- {# call hscpython_PyUnicode_FromUnicode #} (castPtr wstr) (fromIntegral sz) + stealObject obj + fromUnicode :: Unicode -> IO T.Text fromUnicode obj = withObject obj $ \ptr -> do - buffer <- {# call hscpython_PyUnicode_AsUnicode #} ptr - size <- {# call hscpython_PyUnicode_GetSize #} ptr -#ifdef Py_UNICODE_WIDE - raw <- peekArray (fromIntegral size) buffer - return . T.pack $ map (chr . fromIntegral) raw -#else - TF.fromPtr (castPtr buffer) (fromIntegral size) -#endif + wstrPtr <- {# call hscpython_PyUnicode_AsUnicode #} ptr + wstr <- peekCWString . castPtr $ wstrPtr + return . T.pack $ wstr {# fun hscpython_PyUnicode_GetSize as length { withObject* `Unicode' } -> `Integer' checkIntReturn* #} + -- | Coerce an encoded object /obj/ to an Unicode object. -- -- 'Bytes' and other char buffer compatible objects are decoded according to