From 433f064d0b005ea064e3cfbd5ed4984b249e5d92 Mon Sep 17 00:00:00 2001
From: Torsten Kilias <tkilias@users.noreply.github.com>
Date: Thu, 1 Jun 2023 11:27:18 +0200
Subject: [PATCH] #793: Added support for Pandas 2 pyarrow dtype columns for
 emitting data from Python UDFs (#357)

* Update pandas to 2.0.2 in compatible template flavors
* #796: Fixed silent data corruption when emitting dataframes with float16 dtype columns from Python UDFs
* Replace asscalar with item in test dataframe.py, because asscalar was removed
* Added handleEmitPyFloat to also support float pyarrow dtype columns with NAN and object-dtype columns with float
* Refactored and split Pandas Tests
* Added tests for more dtypes to Pandas Tests
---
 .../python/python3/python_ext_dataframe.cc    | 275 +++++++++----
 .../flavor_base/testconfig                    |   2 +-
 .../packages/python3_pip_packages             |   5 +-
 .../flavor_base/testconfig                    |   2 +-
 .../language_deps/packages/conda_packages     |   4 +-
 .../flavor_base/testconfig                    |   2 +-
 .../language_deps/packages/conda_packages     |   4 +-
 .../flavor_base/testconfig                    |   2 +-
 .../test/{python3 => pandas}/all/dataframe.py |  44 +--
 .../tests/test/pandas/all/emit_dtypes.py      | 369 ++++++++++++++++++
 .../tests/test/pandas/pandas2/pandas.py       | 361 +++++++++++++++++
 11 files changed, 957 insertions(+), 113 deletions(-)
 rename test_container/tests/test/{python3 => pandas}/all/dataframe.py (96%)
 create mode 100755 test_container/tests/test/pandas/all/emit_dtypes.py
 create mode 100755 test_container/tests/test/pandas/pandas2/pandas.py
diff --git a/exaudfclient/base/python/python3/python_ext_dataframe.cc b/exaudfclient/base/python/python3/python_ext_dataframe.cc
index 4cd94e378..36f0fffdb 100644
--- a/exaudfclient/base/python/python3/python_ext_dataframe.cc
+++ b/exaudfclient/base/python/python3/python_ext_dataframe.cc
@@ -1,4 +1,5 @@
 #include "exaudflib/swig/swig_common.h"
+#include "debug_message.h"
 
 #include <Python.h>
 
@@ -16,6 +17,7 @@
 #include <string>
 #include <tuple>
 #include <vector>
+#include <iostream>
 
 extern "C" {
 
@@ -25,9 +27,11 @@ extern "C" {
 #define PY_DATE (NPY_USERDEF+4)
 #define PY_NONETYPE (NPY_USERDEF+5)
 #define PY_BOOL (NPY_USERDEF+6)
+#define PY_FLOAT (NPY_USERDEF+7)
+#define PY_TIMESTAMP (NPY_USERDEF+8)
+
+std::map<std::string, int> pandasDTypeStrToNumpyCTypeMap {
 
-std::map<std::string, int> typeMap {
-    {"bool", NPY_BOOL},
     {"int", NPY_INT32},
     {"intc", NPY_INT32},
     {"intp", NPY_INT64},
@@ -35,22 +39,71 @@ std::map<std::string, int> typeMap {
     {"int16", NPY_INT16},
     {"int32", NPY_INT32},
     {"int64", NPY_INT64},
+    {"int8[pyarrow]", NPY_OBJECT},
+    {"int16[pyarrow]", NPY_OBJECT},
+    {"int32[pyarrow]", NPY_OBJECT},
+    {"int64[pyarrow]", NPY_OBJECT},
+    
     {"uint8", NPY_UINT8},
     {"uint16", NPY_UINT16},
     {"uint32", NPY_UINT32},
     {"uint64", NPY_UINT64},
-    {"float", NPY_FLOAT64},
-    {"float16", NPY_FLOAT16},
+    {"uint8[pyarrow]", NPY_OBJECT},
+    {"uint16[pyarrow]", NPY_OBJECT},
+    {"uint32[pyarrow]", NPY_OBJECT},
+    {"uint64[pyarrow]", NPY_OBJECT},
+    
     {"float32", NPY_FLOAT32},
     {"float64", NPY_FLOAT64},
+    {"float", NPY_FLOAT32},
+    {"double", NPY_FLOAT64},
+    {"float32[pyarrow]", NPY_OBJECT},
+    {"float64[pyarrow]", NPY_OBJECT},
+    {"float[pyarrow]", NPY_OBJECT},
+    {"double[pyarrow]", NPY_OBJECT},
+     // We let numpy convert float16 to float (32 bit) and then use the C conversion from float to double, because a proper conversion from float16 to double in C is very complicated.
+    {"float16", NPY_FLOAT32},
+    {"halffloat", NPY_FLOAT32},
+    {"float16[pyarrow]", NPY_OBJECT},
+    {"halffloat[pyarrow]", NPY_OBJECT},
+
+    {"string[pyarrow]", NPY_OBJECT},
+    {"string[python]", NPY_OBJECT},
+    {"string", NPY_OBJECT},
+
+    {"bool[pyarrow]", NPY_OBJECT},
+    {"boolean", NPY_OBJECT},
+    {"bool", NPY_BOOL},
+
+    {"datetime64[ns]", NPY_DATETIME},
+    {"timestamp[ns, tz=UTC][pyarrow]", NPY_OBJECT},
+    
+    {"object", NPY_OBJECT},
+    
+    {"py_NAType", PY_NONETYPE},
+    {"py_NoneType", PY_NONETYPE},
+    {"py_bool", PY_BOOL},
     {"py_int", PY_INT},
+    {"py_float", PY_FLOAT},
     {"py_decimal.Decimal", PY_DECIMAL},
     {"py_str", PY_STR},
     {"py_datetime.date", PY_DATE},
-    {"datetime64[ns]", NPY_DATETIME},
-    {"object", NPY_OBJECT},
-    {"py_NoneType", PY_NONETYPE},
-    {"py_bool", PY_BOOL}
+    {"py_Timestamp", PY_TIMESTAMP}
+};
+
+std::map<int, std::string> numpyCTypeToNumpyDTypeStrMap {
+    {NPY_BOOL, "bool"},
+    {NPY_INT8, "int8"},
+    {NPY_INT16, "int16"},
+    {NPY_INT32, "int32"},
+    {NPY_INT64, "int64"},
+    {NPY_UINT8, "uint8"},
+    {NPY_UINT16, "uint16"},
+    {NPY_UINT32, "uint32"},
+    {NPY_UINT64, "uint64"},
+     // We don't list NPY_FLOAT16 here, because we let numpy convert float16 to float (32 bit) and then use the C conversion from float to double, because a proper conversion from float16 to double in C is very complicated.
+    {NPY_FLOAT32, "float32"},
+    {NPY_FLOAT64, "float64"},
 };
 
 std::map<int, std::string> emitTypeMap {
@@ -390,17 +443,39 @@ inline void getColumnSetMethods(std::vector<ColumnInfo>& colInfo, std::vector<st
     }
 }
 
+inline bool endsWith(std::string const &str, std::string const &suffix) {
+    if (str.length() < suffix.length()) {
+        return false;
+    }
+    return str.rfind(suffix) == str.size() - suffix.size();
+}
+
+inline bool startsWith(std::string const &str, std::string const &prefix) {
+    if (str.length() < prefix.length()) {
+        return false;
+    }
+    return str.rfind(prefix, 0) == 0;
+}
+
 inline bool isNumpyDatetime64(const char* typeName){
-    return std::string(typeName).find("datetime64[")==0;
+    return startsWith(std::string(typeName),"datetime64[");
+}
+
+inline bool isArrowDecimal128(const char* typeName){
+    // example decimal128(3, 2)[pyarrow]
+    const std::string typeNameStr(typeName);
+    return startsWith(typeNameStr, "decimal128(") && endsWith(typeNameStr, "[pyarrow]");
 }
 
 inline void getColumnTypeInfo(PyObject *numpyTypes, std::vector<std::pair<std::string, int>>& colTypes){
     PyPtr numpyTypeIter(PyObject_GetIter(numpyTypes));
     for (PyPtr numpyType(PyIter_Next(numpyTypeIter.get())); numpyType.get(); numpyType.reset(PyIter_Next(numpyTypeIter.get()))) {
         const char *typeName = PyUnicode_AsUTF8(numpyType.get());
-        std::map<std::string, int>::iterator it = typeMap.find(typeName);
-        if (it != typeMap.end()) {
+        std::map<std::string, int>::iterator it = pandasDTypeStrToNumpyCTypeMap.find(typeName);
+        if (it != pandasDTypeStrToNumpyCTypeMap.end()) {
             colTypes.push_back(*it);
+        } else if(isArrowDecimal128(typeName)){
+            colTypes.push_back({typeName, NPY_OBJECT});
         } else if(isNumpyDatetime64(typeName)){
             std::stringstream ss;
             ss << "F-UDF-CL-SL-PYTHON-1138: emit: unsupported datetime type: " << typeName << 
@@ -417,15 +492,23 @@ inline void getColumnTypeInfo(PyObject *numpyTypes, std::vector<std::pair<std::s
 
 }
 
-#ifdef NDEBUG
 inline void printPyObject(PyObject* obj, const std::string& error_code){
         PyTypeObject* type = obj->ob_type;
         const char* p = type->tp_name;
         PyObject* objectsRepresentation = PyObject_Repr(obj);
         const char* s =  PyUnicode_AsUTF8(objectsRepresentation);
-        throw std::runtime_error(error_code+": "+std::string(s)+" "+std::string(p));
+        DBG_STREAM_MSG(std::cerr, error_code << ": " << std::string(s) << " " << std::string(p));
+}
+
+inline const PyPtr& getPandasNA(){
+    static const PyPtr pdNA(PyObject_GetAttrString(pandasModule.get(), "NA"));
+    return pdNA;
+}
+
+inline bool isNoneOrNA(PyObject* pyVal){
+    const PyPtr& pdNA = getPandasNA();
+    return pyVal == Py_None  || pyVal == pdNA.get();
 }
-#endif
 
 inline void getColumnArrays(PyObject *colArray, int numCols, int numRows, 
         std::vector<std::pair<std::string, int>>& colTypes, std::vector<PyPtr>& columnArrays){
@@ -448,14 +531,16 @@ inline void getColumnArrays(PyObject *colArray, int numCols, int numRows,
                 throw std::runtime_error(ss.str().c_str());
             }
 
-            // Get type of first non-None item in list
+            // Get type of first non-None, non-NA item in list
             PyObject *pyVal = PyList_GetItem(pyList.get(), 0);
             checkPyObjectIsNull(pyVal,"F-UDF-CL-SL-PYTHON-1126");
             std::string pyTypeName(std::string("py_") + Py_TYPE(pyVal)->tp_name);
-            for (int r = 1; r < numRows && pyVal == Py_None; r++) {
+            bool pyValIsNoneOrNA = isNoneOrNA(pyVal);
+            for (int r = 1; r < numRows && pyValIsNoneOrNA; r++) {
                 pyVal = PyList_GetItem(pyList.get(), r);
+                pyValIsNoneOrNA = isNoneOrNA(pyVal);
                 checkPyObjectIsNull(pyVal,"F-UDF-CL-SL-PYTHON-1127");
-                if (pyVal != Py_None) {
+                if (!pyValIsNoneOrNA) {
                     pyTypeName = std::string("py_") + Py_TYPE(pyVal)->tp_name;
                     break;
                 }
@@ -463,8 +548,8 @@ inline void getColumnArrays(PyObject *colArray, int numCols, int numRows,
 
             // Update type in column type info
             std::map<std::string, int>::iterator userDefIt;
-            userDefIt = typeMap.find(pyTypeName);
-            if (userDefIt != typeMap.end()) {
+            userDefIt = pandasDTypeStrToNumpyCTypeMap.find(pyTypeName);
+            if (userDefIt != pandasDTypeStrToNumpyCTypeMap.end()) {
                 colTypes[c] = *userDefIt;
             } else {
                 // TODO accept pandas.Timestamp values
@@ -493,9 +578,9 @@ inline void getColumnArrays(PyObject *colArray, int numCols, int numRows,
             PyPtr asType (PyObject_GetAttrString(array.get(), "astype"));
             PyPtr keywordArgs(PyDict_New());
             PyDict_SetItemString(keywordArgs.get(), "copy", Py_False);
-            PyPtr funcArgs(Py_BuildValue("(s)", colTypes[c].first.c_str()));
+            const std::string numpyDTypeStr = numpyCTypeToNumpyDTypeStrMap.at(colTypes[c].second);
+            PyPtr funcArgs(Py_BuildValue("(s)", numpyDTypeStr.c_str()));
             PyPtr scalarArr(PyObject_Call(asType.get(), funcArgs.get(), keywordArgs.get()));
-
             columnArrays.push_back(std::move(scalarArr));
         }
     }
@@ -724,43 +809,6 @@ inline void handleEmitNpyFloat32(
     pyResult.reset(PyObject_CallMethodObjArgs(resultHandler, pyColSetMethods[c].second.get(), pyColSetMethods[c].first.get(), pyValue.get(), NULL));
 }
 
-inline void handleEmitNpyFloat16(
-        int c, int r,
-        std::vector<PyPtr>& columnArrays,
-        std::vector<std::pair<PyPtr, PyPtr>>& pyColSetMethods,
-        std::vector<ColumnInfo>& colInfo,
-        std::vector<std::pair<std::string, int>>& colTypes,
-        PyObject *resultHandler,
-        PyPtr& pyValue,
-        PyPtr& pyResult,
-        PyPtr& pySetNullMethodName){
-    double value = static_cast<double>(*((uint16_t*)(PyArray_GETPTR1((PyArrayObject*)(columnArrays[c].get()), r))));
-    if (npy_isnan(value)) {
-        pyResult.reset(PyObject_CallMethodObjArgs(resultHandler, pySetNullMethodName.get(), pyColSetMethods[c].first.get(), NULL));
-        return;
-    }
-    switch (colInfo[c].type) {
-        case SWIGVMContainers::INT64:
-        case SWIGVMContainers::INT32:
-            pyValue.reset(PyLong_FromLong(static_cast<int64_t>(value)));
-            break;
-        case SWIGVMContainers::NUMERIC:
-            pyValue.reset(PyUnicode_FromString(std::to_string(value).c_str()));
-            break;
-        case SWIGVMContainers::DOUBLE:
-            pyValue.reset(PyFloat_FromDouble(value));
-            break;
-        default:
-        {
-            std::stringstream ss;
-            ss << "F-UDF-CL-SL-PYTHON-1064: emit column " << c << " of type " << emitTypeMap.at(colInfo[c].type) << " but data given have type " << colTypes[c].first;
-            throw std::runtime_error(ss.str().c_str());
-        }
-    }
-    checkPyPtrIsNull(pyValue);
-    pyResult.reset(PyObject_CallMethodObjArgs(resultHandler, pyColSetMethods[c].second.get(), pyColSetMethods[c].first.get(), pyValue.get(), NULL));
-}
-
 inline void handleEmitNpyBool(
         int c, int r,
         std::vector<PyPtr>& columnArrays,
@@ -810,7 +858,7 @@ inline void handleEmitPyBool(
         PyPtr& pySetNullMethodName){
     PyPtr pyBool(PyList_GetItem(columnArrays[c].get(), r));
     checkPyPtrIsNull(pyBool);
-    if (pyBool.get() == Py_None) {
+    if (isNoneOrNA(pyBool.get())) {
         pyResult.reset(PyObject_CallMethodObjArgs(resultHandler, pySetNullMethodName.get(), pyColSetMethods[c].first.get(), NULL));
         return;
     }
@@ -851,7 +899,7 @@ inline void handleEmitPyInt(
         PyPtr& pySetNullMethodName){
     PyPtr pyInt(PyList_GetItem(columnArrays[c].get(), r));
     checkPyPtrIsNull(pyInt);
-    if (pyInt.get() == Py_None) {
+    if (isNoneOrNA(pyInt.get())) {
         pyResult.reset(PyObject_CallMethodObjArgs(resultHandler, pySetNullMethodName.get(), pyColSetMethods[c].first.get(), NULL));
         return;
     }
@@ -882,6 +930,54 @@ inline void handleEmitPyInt(
     pyResult.reset(PyObject_CallMethodObjArgs(resultHandler, pyColSetMethods[c].second.get(), pyColSetMethods[c].first.get(), pyValue.get(), NULL));
 }
 
+inline void handleEmitPyFloat(
+        int c, int r,
+        std::vector<PyPtr>& columnArrays,
+        std::vector<std::pair<PyPtr, PyPtr>>& pyColSetMethods,
+        std::vector<ColumnInfo>& colInfo,
+        std::vector<std::pair<std::string, int>>& colTypes,
+        PyObject *resultHandler,
+        PyPtr& pyValue,
+        PyPtr& pyResult,
+        PyPtr& pySetNullMethodName){
+    PyPtr pyFloat(PyList_GetItem(columnArrays[c].get(), r));
+    checkPyPtrIsNull(pyFloat);
+    if (isNoneOrNA(pyFloat.get())) {
+        pyResult.reset(PyObject_CallMethodObjArgs(resultHandler, pySetNullMethodName.get(), pyColSetMethods[c].first.get(), NULL));
+        return;
+    }
+
+    switch (colInfo[c].type) {
+        case SWIGVMContainers::INT64:
+        case SWIGVMContainers::INT32:
+        {
+            double value = PyFloat_AsDouble(pyFloat.get());
+            if (value < 0 && PyErr_Occurred())
+                throw std::runtime_error("F-UDF-CL-SL-PYTHON-1139: emit() PY_FLOAT: PyFloat_AsDouble error");
+            if (npy_isnan(value)) {
+                pyResult.reset(
+                  PyObject_CallMethodObjArgs(resultHandler, pySetNullMethodName.get(), pyColSetMethods[c].first.get(), NULL));
+                return;
+            } 
+            pyValue.reset(PyLong_FromLong(static_cast<int64_t>(value)));
+            break;
+        }
+        case SWIGVMContainers::NUMERIC:
+            pyValue.reset(PyObject_Str(pyFloat.get()));
+            break;
+        case SWIGVMContainers::DOUBLE:
+            pyValue.reset(pyFloat.release());
+            break;
+        default:
+        {
+            std::stringstream ss;
+            ss << "F-UDF-CL-SL-PYTHON-1140: emit column " << c << " of type " << emitTypeMap.at(colInfo[c].type) << " but data given have type " << colTypes[c].first;
+            throw std::runtime_error(ss.str().c_str());
+        }
+    }
+    pyResult.reset(PyObject_CallMethodObjArgs(resultHandler, pyColSetMethods[c].second.get(), pyColSetMethods[c].first.get(), pyValue.get(), NULL));
+}
+
 inline void handleEmitPyDecimal(
         int c, int r,
         std::vector<PyPtr>& columnArrays,
@@ -896,7 +992,7 @@ inline void handleEmitPyDecimal(
         PyPtr& pyFloatMethodName
         ){
     PyPtr pyDecimal(PyList_GetItem(columnArrays[c].get(), r));
-    if (pyDecimal.get() == Py_None) {
+    if (isNoneOrNA(pyDecimal.get())) {
         pyResult.reset(PyObject_CallMethodObjArgs(resultHandler, pySetNullMethodName.get(), pyColSetMethods[c].first.get(), NULL));
         return;
     }
@@ -940,7 +1036,7 @@ inline void handleEmitPyStr(
         PyPtr& pySetNullMethodName){
     
     PyPtr pyString(PyList_GetItem(columnArrays[c].get(), r));
-    if (pyString.get() == Py_None) {
+    if (isNoneOrNA(pyString.get())) {
         pyResult.reset(PyObject_CallMethodObjArgs(resultHandler, pySetNullMethodName.get(), pyColSetMethods[c].first.get(), NULL));
         return;
     }
@@ -980,7 +1076,7 @@ inline void handleEmitPyDate(
         PyPtr& pySetNullMethodName,
         PyPtr& pyIsoformatMethodName){
     PyPtr pyDate(PyList_GetItem(columnArrays[c].get(), r));
-    if (pyDate.get() == Py_None) {
+    if (isNoneOrNA(pyDate.get())) {
         pyResult.reset(PyObject_CallMethodObjArgs(resultHandler, pySetNullMethodName.get(), pyColSetMethods[c].first.get(), NULL));
         return;
     }
@@ -1000,6 +1096,42 @@ inline void handleEmitPyDate(
         }
     }
 }
+inline void handleEmitPyTimestamp(
+        int c, int r,
+        std::vector<PyPtr>& columnArrays,
+        std::vector<std::pair<PyPtr, PyPtr>>& pyColSetMethods,
+        std::vector<ColumnInfo>& colInfo,
+        std::vector<std::pair<std::string, int>>& colTypes,
+        PyObject *resultHandler,
+        PyPtr& pyValue,
+        PyPtr& pyResult,
+        PyPtr& pySetNullMethodName){
+    PyPtr pyTimestamp(PyList_GetItem(columnArrays[c].get(), r));
+    if (isNoneOrNA(pyTimestamp.get())) {
+        pyResult.reset(PyObject_CallMethodObjArgs(resultHandler, pySetNullMethodName.get(), pyColSetMethods[c].first.get(), NULL));
+        return;
+    }
+
+    switch (colInfo[c].type) {
+        case SWIGVMContainers::TIMESTAMP:
+        {
+            // We call here pandas.Timestamp.tz_localize(None), because we need to remove the timezone from the timestamp.
+            // Exasol doesn't support timezones, and if we don't remove the timezone, pandas.Timestamp.isoformat will add
+            // it to the generated string.
+            pyTimestamp.reset(PyObject_CallMethod(pyTimestamp.get(), "tz_localize", "z", NULL));
+            PyPtr pyIsoDatetime(PyObject_CallMethod(pyTimestamp.get(), "isoformat", "s", " "));
+            pyResult.reset(PyObject_CallMethodObjArgs(
+                resultHandler, pyColSetMethods[c].second.get(), pyColSetMethods[c].first.get(), pyIsoDatetime.get(), NULL));
+            break;
+        }
+        default:
+        {
+            std::stringstream ss;
+            ss << "F-UDF-CL-SL-PYTHON-1141: emit column " << c << " of type " << emitTypeMap.at(colInfo[c].type) << " but data given have type " << colTypes[c].first;
+            throw std::runtime_error(ss.str().c_str());
+        }
+    }
+}
 
 
 inline void handleEmitNpyDateTime(
@@ -1146,12 +1278,6 @@ void emit(PyObject *resultHandler, std::vector<ColumnInfo>& colInfo, PyObject *d
                         handleEmitNpyFloat32(c, r, columnArrays, pyColSetMethods, colInfo, colTypes, resultHandler, pyValue, pyResult, pySetNullMethodName);
                         break;
                     }
-                    case NPY_FLOAT16:
-                    {
-                        handleEmitNpyFloat16(c, r, columnArrays, pyColSetMethods, colInfo, colTypes, resultHandler, pyValue, pyResult, pySetNullMethodName);
-                        break;
-                    }
-
                     case NPY_BOOL:
                     {
                         handleEmitNpyBool(c, r, columnArrays, pyColSetMethods, colInfo, colTypes, resultHandler, pyValue, pyResult, pySetNullMethodName);
@@ -1167,6 +1293,11 @@ void emit(PyObject *resultHandler, std::vector<ColumnInfo>& colInfo, PyObject *d
                         handleEmitPyInt(c, r, columnArrays, pyColSetMethods, colInfo, colTypes, resultHandler, pyValue, pyResult, pySetNullMethodName);
                         break;
                     }
+                    case PY_FLOAT:
+                    {
+                        handleEmitPyFloat(c, r, columnArrays, pyColSetMethods, colInfo, colTypes, resultHandler, pyValue, pyResult, pySetNullMethodName);
+                        break;
+                    }
                     case PY_DECIMAL:
                     {
                         handleEmitPyDecimal(c, r, columnArrays, pyColSetMethods, colInfo, colTypes, resultHandler, pyValue, pyResult, 
@@ -1184,6 +1315,12 @@ void emit(PyObject *resultHandler, std::vector<ColumnInfo>& colInfo, PyObject *d
                                         pySetNullMethodName, pyIsoformatMethodName);
                         break;
                     }
+                    case PY_TIMESTAMP:
+                    {
+                        handleEmitPyTimestamp(c, r, columnArrays, pyColSetMethods, colInfo, colTypes, resultHandler, pyValue, pyResult, 
+                                        pySetNullMethodName);
+                        break;
+                    }
                     case NPY_DATETIME:
                     {
                         handleEmitNpyDateTime(c, r, columnArrays, pyColSetMethods, colInfo, colTypes, resultHandler, pyValue, pyResult, 
diff --git a/flavors/python-3.7-minimal-EXASOL-6.2.0/flavor_base/testconfig b/flavors/python-3.7-minimal-EXASOL-6.2.0/flavor_base/testconfig
index 7c6eefa41..8688dd718 100644
--- a/flavors/python-3.7-minimal-EXASOL-6.2.0/flavor_base/testconfig
+++ b/flavors/python-3.7-minimal-EXASOL-6.2.0/flavor_base/testconfig
@@ -1,2 +1,2 @@
 generic_language_tests=python3
-test_folders=python3/all
+test_folders=python3/all pandas/all
diff --git a/flavors/python-3.8-minimal-EXASOL-6.2.0/flavor_base/language_deps/packages/python3_pip_packages b/flavors/python-3.8-minimal-EXASOL-6.2.0/flavor_base/language_deps/packages/python3_pip_packages
index f0269d53c..827abf566 100644
--- a/flavors/python-3.8-minimal-EXASOL-6.2.0/flavor_base/language_deps/packages/python3_pip_packages
+++ b/flavors/python-3.8-minimal-EXASOL-6.2.0/flavor_base/language_deps/packages/python3_pip_packages
@@ -1,2 +1,3 @@
-pandas|1.3.4 
-numpy|1.21.3
+pandas|2.0.2 
+numpy|1.24.3
+pyarrow|12.0.0
diff --git a/flavors/python-3.8-minimal-EXASOL-6.2.0/flavor_base/testconfig b/flavors/python-3.8-minimal-EXASOL-6.2.0/flavor_base/testconfig
index 7c6eefa41..d1bdb8113 100644
--- a/flavors/python-3.8-minimal-EXASOL-6.2.0/flavor_base/testconfig
+++ b/flavors/python-3.8-minimal-EXASOL-6.2.0/flavor_base/testconfig
@@ -1,2 +1,2 @@
 generic_language_tests=python3
-test_folders=python3/all
+test_folders=python3/all pandas/all pandas/pandas2
diff --git a/flavors/template-Exasol-all-python-3.8-conda/flavor_base/language_deps/packages/conda_packages b/flavors/template-Exasol-all-python-3.8-conda/flavor_base/language_deps/packages/conda_packages
index 4ba668b64..465ae1c0a 100644
--- a/flavors/template-Exasol-all-python-3.8-conda/flavor_base/language_deps/packages/conda_packages
+++ b/flavors/template-Exasol-all-python-3.8-conda/flavor_base/language_deps/packages/conda_packages
@@ -1,6 +1,6 @@
 python|3.8.13
-numpy|1.22.3
-pandas|1.4.2
+numpy|1.24.3
+pandas|2.0.2
 libblas|3.9.0=15_linux64_mkl
 mamba|1.3.1
 ld_impl_linux-64|2.36.1
diff --git a/flavors/template-Exasol-all-python-3.8-conda/flavor_base/testconfig b/flavors/template-Exasol-all-python-3.8-conda/flavor_base/testconfig
index 7c6eefa41..d1bdb8113 100644
--- a/flavors/template-Exasol-all-python-3.8-conda/flavor_base/testconfig
+++ b/flavors/template-Exasol-all-python-3.8-conda/flavor_base/testconfig
@@ -1,2 +1,2 @@
 generic_language_tests=python3
-test_folders=python3/all
+test_folders=python3/all pandas/all pandas/pandas2
diff --git a/flavors/template-Exasol-all-python-3.8-cuda-conda/flavor_base/language_deps/packages/conda_packages b/flavors/template-Exasol-all-python-3.8-cuda-conda/flavor_base/language_deps/packages/conda_packages
index 4ba668b64..465ae1c0a 100644
--- a/flavors/template-Exasol-all-python-3.8-cuda-conda/flavor_base/language_deps/packages/conda_packages
+++ b/flavors/template-Exasol-all-python-3.8-cuda-conda/flavor_base/language_deps/packages/conda_packages
@@ -1,6 +1,6 @@
 python|3.8.13
-numpy|1.22.3
-pandas|1.4.2
+numpy|1.24.3
+pandas|2.0.2
 libblas|3.9.0=15_linux64_mkl
 mamba|1.3.1
 ld_impl_linux-64|2.36.1
diff --git a/flavors/template-Exasol-all-python-3.8-cuda-conda/flavor_base/testconfig b/flavors/template-Exasol-all-python-3.8-cuda-conda/flavor_base/testconfig
index 7c6eefa41..d1bdb8113 100644
--- a/flavors/template-Exasol-all-python-3.8-cuda-conda/flavor_base/testconfig
+++ b/flavors/template-Exasol-all-python-3.8-cuda-conda/flavor_base/testconfig
@@ -1,2 +1,2 @@
 generic_language_tests=python3
-test_folders=python3/all
+test_folders=python3/all pandas/all pandas/pandas2
diff --git a/test_container/tests/test/python3/all/dataframe.py b/test_container/tests/test/pandas/all/dataframe.py
similarity index 96%
rename from test_container/tests/test/python3/all/dataframe.py
rename to test_container/tests/test/pandas/all/dataframe.py
index bba8d2cf2..58c9fd738 100755
--- a/test_container/tests/test/python3/all/dataframe.py
+++ b/test_container/tests/test/pandas/all/dataframe.py
@@ -5,10 +5,14 @@
 from datetime import datetime
 
 from  exasol_python_test_framework import udf
+from exasol_python_test_framework.exatest.testcase import useData
+from exasol_python_test_framework.udf.udf_debug import UdfDebugger
+from typing import List, Tuple, Union
 
 
 class PandasDataFrame(udf.TestCase):
     def setUp(self):
+        self.maxDiff=None
 
         self.query('CREATE SCHEMA FN2', ignore_errors=True)
         self.query('OPEN SCHEMA FN2', ignore_errors=True)
@@ -159,7 +163,7 @@ def test_dataframe_scalar_returns(self):
 
             def run(ctx):
                 df = ctx.get_dataframe()
-                return np.asscalar(df.iloc[0, 0] + df.iloc[0, 1])
+                return (df.iloc[0, 0] + df.iloc[0, 1]).item()
             /
             ''' % (self.col_defs_str))
         self.query(udf_sql)
@@ -217,7 +221,7 @@ def test_dataframe_scalar_emits_unique(self):
 
             def run(ctx):
                 df = ctx.get_dataframe()
-                ctx.emit(np.asscalar(df.C0))
+                ctx.emit(df.C0.item())
             /
             ''')
         print(udf_sql)
@@ -236,7 +240,7 @@ def test_dataframe_scalar_emits_all_unique(self):
 
             def run(ctx):
                 df = ctx.get_dataframe(num_rows="all")
-                ctx.emit(np.asscalar(df.C0))
+                ctx.emit(df.C0.item())
             /
             ''')
         print(udf_sql)
@@ -331,7 +335,7 @@ def test_dataframe_set_returns(self):
 
             def run(ctx):
                 df = ctx.get_dataframe(num_rows="all")
-                return np.asscalar(df.iloc[:, 0].sum())
+                return df.iloc[:, 0].sum().item()
             /
             ''' % (self.col_defs_str))
         print(udf_sql)
@@ -477,7 +481,7 @@ def run(ctx):
                     df = ctx.get_dataframe(num_rows=1)
                     if df is None:
                         break
-                    ctx.emit(np.asscalar(df.C0))
+                    ctx.emit(df.C0.item())
             /
             ''')
         print(udf_sql)
@@ -500,7 +504,7 @@ def run(ctx):
                     if df is None:
                         break
                     for i in range(df.shape[0]):
-                        ctx.emit(np.asscalar(df.iloc[i, 0]))
+                        ctx.emit(df.iloc[i, 0].item())
             /
             ''')
         print(udf_sql)
@@ -901,33 +905,6 @@ def run(ctx):
                     (234,)
                 ], rows)
 
-    def test_dataframe_set_emits_double_pyfloat_only_todo(self):
-        import datetime
-        udf_sql = udf.fixindent('''
-            CREATE OR REPLACE PYTHON3 SET SCRIPT foo(sec int) EMITS (ts double) AS
-
-            def run(ctx):
-                import pandas as pd
-                import numpy as np
-                import datetime
-
-                c1=np.empty(shape=(2),dtype=np.object_)
-
-                c1[:]=234.5
-
-                df=pd.DataFrame({0:c1})
-
-                ctx.emit(df)
-            /
-            ''')
-        print(udf_sql)
-        self.query(udf_sql)
-        select_sql = 'SELECT foo(1)'
-        print(select_sql)
-        #TODO implement support
-        with self.assertRaisesRegex(Exception, 'F-UDF-CL-SL-PYTHON-1056'):
-            rows = self.query(select_sql)
-
     def test_dataframe_set_emits_double_npfloat32_only(self):
         import datetime
         udf_sql = udf.fixindent('''
@@ -1015,7 +992,6 @@ def run(ctx):
         print(select_sql)
         rows = self.query(select_sql)
 
-
 if __name__ == '__main__':
     udf.main()
 
diff --git a/test_container/tests/test/pandas/all/emit_dtypes.py b/test_container/tests/test/pandas/all/emit_dtypes.py
new file mode 100755
index 000000000..774e0bc57
--- /dev/null
+++ b/test_container/tests/test/pandas/all/emit_dtypes.py
@@ -0,0 +1,369 @@
+#!/usr/bin/env python3
+
+from decimal import Decimal
+from datetime import date
+from datetime import datetime
+
+from  exasol_python_test_framework import udf
+from exasol_python_test_framework.exatest.testcase import useData
+from exasol_python_test_framework.udf.udf_debug import UdfDebugger
+from typing import List, Tuple, Union
+
+
+class PandasDataFrameEmitDTypes(udf.TestCase):
+    def setUp(self):
+        self.maxDiff=None
+
+        self.query(f'CREATE SCHEMA {self.__class__.__name__}', ignore_errors=True)
+        self.query(f'OPEN SCHEMA {self.__class__.__name__}', ignore_errors=True)
+
+    int_dataframe_value_str = "[[1, 2],[3, 4]]"
+    int_expected_rows = [(1, 2, None),(3, 4, None)]
+    int_to_float_expected_rows = [(1.0, 2.0, None),(3.0, 4.0, None)]
+
+    float16_dataframe_value_str = 'np.array([[1.1, 2.1],[3.1, 4.1]], dtype="float16")'
+    float_dataframe_value_str = "[[1.1, 2.1],[3.1, 4.1]]"
+    float_expected_rows = [(1.1, 2.1, None),(3.1, 4.1, None)]
+
+    str_dataframe_value_str = "[['a','b'],['c','d']]"
+    str_expected_rows = [('a','b',None),('c','d',None)]
+
+    bool_dataframe_value_str = "[[True,False],[True,False]]"
+    bool_expected_rows = [(True,False,None),(True,False,None)]
+
+    decimal_dataframe_value_str = "[[Decimal('1.1'),Decimal('2.1')],[Decimal('3.1'),Decimal('4.1')]]"
+    decimal_expected_rows = [(Decimal('1.1'),Decimal('2.1'),None),(Decimal('3.1'),Decimal('4.1'),None)]
+    int_to_decimal_expected_rows = [(Decimal('1'),Decimal('2'),None),(Decimal('3'),Decimal('4'),None)]
+
+    timestamp_dataframe_value_str = '[[pd.Timestamp(datetime(2020, 7, 27, 14, 22, 33, 673251)),' \
+                                    +'pd.Timestamp(datetime(2020, 7, 27, 14, 22, 33, 673251))],' \
+                                    +'[pd.Timestamp(datetime(2020, 7, 27, 14, 22, 33, 673251)),' \
+                                    +'pd.Timestamp(datetime(2020, 7, 27, 14, 22, 33, 673251))]]'
+    datetime_dataframe_value_str = '[[datetime(2020, 7, 27, 14, 22, 33, 673251),' \
+                                   +'datetime(2020, 7, 27, 14, 22, 33, 673251)],' \
+                                   +'[datetime(2020, 7, 27, 14, 22, 33, 673251),' \
+                                   +'datetime(2020, 7, 27, 14, 22, 33, 673251)]]'
+    datetime_expected_rows = [(datetime(2020, 7, 27, 14, 22, 33, 673000),datetime(2020, 7, 27, 14, 22, 33, 673000),None),
+                             (datetime(2020, 7, 27, 14, 22, 33, 673000),datetime(2020, 7, 27, 14, 22, 33, 673000),None)]
+    date_dataframe_value_str = '[[date(2020, 7, 27),' \
+                                   +'date(2020, 7, 27)],' \
+                                   +'[date(2020, 7, 27),' \
+                                   +'date(2020, 7, 27)]]'
+    date_expected_rows = [(date(2020, 7, 27),date(2020, 7, 27),None),
+                         (date(2020, 7, 27),date(2020, 7, 27),None)]
+
+    mixed_int_dataframe_value_str = "[[1, None],[None, 4]]"
+    mixed_int_expected_rows = [(1, None, None),(None, 4, None)]
+    mixed_int_to_float_expected_rows = [(1.0, None, None),(None, 4.0, None)]
+
+    mixed_float16_dataframe_value_str = 'np.array([[1.1, None],[None, 4.1]], dtype="float16")'
+    mixed_float_dataframe_value_str = "[[1.1, None],[None, 4.1]]"
+    mixed_float_expected_rows = [(1.1, None, None),(None, 4.1, None)]
+
+    mixed_str_dataframe_value_str = "[['a',None],[None,'d']]"
+    mixed_str_expected_rows = [('a',None,None),(None,'d',None)]
+
+    mixed_bool_dataframe_value_str = "[[True,None],[None,False]]"
+    mixed_bool_expected_rows = [(True,None,None),(None,False,None)]
+    mixed_bool_expected_rows_bool_ = [(True, False, None),(False, False, None)]
+
+    mixed_decimal_dataframe_value_str = "[[Decimal('1.1'),None],[None,Decimal('4.1')]]"
+    mixed_decimal_expected_rows = [(Decimal('1.1'),None,None),(None,Decimal('4.1'),None)]
+    mixed_int_to_decimal_expected_rows = [(Decimal('1'),None,None),(None,Decimal('4'),None)]
+
+    mixed_timestamp_dataframe_value_str = '[[pd.Timestamp(datetime(2020, 7, 27, 14, 22, 33, 673251)),None],' \
+                                    +'[None,pd.Timestamp(datetime(2020, 7, 27, 14, 22, 33, 673251))]]'
+    mixed_datetime_dataframe_value_str = '[[datetime(2020, 7, 27, 14, 22, 33, 673251),None],' \
+                                   +'[None,datetime(2020, 7, 27, 14, 22, 33, 673251)]]'
+    mixed_datetime_expected_rows = [(datetime(2020, 7, 27, 14, 22, 33, 673000),None,None),
+                                    (None,datetime(2020, 7, 27, 14, 22, 33, 673000),None)]
+    mixed_date_dataframe_value_str = '[[date(2020, 7, 27),None],' \
+                                   +'[None,date(2020, 7, 27)]]'
+    mixed_date_expected_rows = [(date(2020, 7, 27),None,None),
+                                (None,date(2020, 7, 27),None)]
+
+    none_dataframe_value_str = "[[None, None],[None, None]]"
+    none_expected_rows = [(None, None, None),(None, None, None)]
+    none_expected_rows_bool_ = [(False, False, None),(False, False, None)]
+
+    nan_dataframe_value_str = "[[np.nan, np.nan],[np.nan, np.nan]]"
+    nan_expected_rows = [(None, None, None),(None, None, None)]
+    nan_expected_rows_bool_ = [(True, True, None),(True, True, None)]
+
+    
+
+    types = [
+            # Full columns without None or NaN / Int
+
+            ("uint8", "integer", int_dataframe_value_str, int_expected_rows, False),
+            ("uint16", "integer", int_dataframe_value_str, int_expected_rows, False),
+            ("uint32", "integer", int_dataframe_value_str, int_expected_rows, False),
+            ("uint64", "integer", int_dataframe_value_str, int_expected_rows, False),
+            ("int8", "integer", int_dataframe_value_str, int_expected_rows, False),
+            ("int16", "integer", int_dataframe_value_str, int_expected_rows, False),
+            ("int32", "integer", int_dataframe_value_str, int_expected_rows, False),
+            ("int64", "integer", int_dataframe_value_str, int_expected_rows, False),
+            ("object", "integer", int_dataframe_value_str, int_expected_rows, False),
+
+            # Full columns without None or NaN / Float
+
+            ("float16", "double", float16_dataframe_value_str, float_expected_rows, True),
+            ("float32", "double", float_dataframe_value_str, float_expected_rows, True),
+            ("float64", "double", float_dataframe_value_str, float_expected_rows, False),
+            ("float", "double", float_dataframe_value_str, float_expected_rows, False),
+            ("double", "double", float_dataframe_value_str, float_expected_rows, False),
+            ("object", "double", float_dataframe_value_str, float_expected_rows, False),
+
+            # Full columns without None or NaN / Int to Float
+
+            ("uint8", "double", int_dataframe_value_str, int_to_float_expected_rows, False),
+            ("uint16", "double", int_dataframe_value_str, int_to_float_expected_rows, False),
+            ("uint32", "double", int_dataframe_value_str, int_to_float_expected_rows, False),
+            ("uint64", "double", int_dataframe_value_str, int_to_float_expected_rows, False),
+            ("int8", "double", int_dataframe_value_str, int_to_float_expected_rows, False),
+            ("int16", "double", int_dataframe_value_str, int_to_float_expected_rows, False),
+            ("int32", "double", int_dataframe_value_str, int_to_float_expected_rows, False),
+            ("int64", "double", int_dataframe_value_str, int_to_float_expected_rows, False),
+            ("object", "double", int_dataframe_value_str, int_to_float_expected_rows, False),
+
+            # Full columns without None or NaN / Float to Int
+
+            ("float16", "integer", float16_dataframe_value_str, int_expected_rows, False),
+            ("float32", "integer", float_dataframe_value_str, int_expected_rows, False),
+            ("float64", "integer", float_dataframe_value_str, int_expected_rows, False),
+            ("float", "integer", float_dataframe_value_str, int_expected_rows, False),
+            ("double", "integer", float_dataframe_value_str, int_expected_rows, False),
+            ("object", "integer", float_dataframe_value_str, int_expected_rows, False),
+
+            # Full columns without None or NaN / Int to Decimal
+
+            ("uint8", "DECIMAL(10,5)", int_dataframe_value_str, int_to_decimal_expected_rows, False),
+            ("uint16", "DECIMAL(10,5)", int_dataframe_value_str, int_to_decimal_expected_rows, False),
+            ("uint32", "DECIMAL(10,5)", int_dataframe_value_str, int_to_decimal_expected_rows, False),
+            ("uint64", "DECIMAL(10,5)", int_dataframe_value_str, int_to_decimal_expected_rows, False),
+            ("int8", "DECIMAL(10,5)", int_dataframe_value_str, int_to_decimal_expected_rows, False),
+            ("int16", "DECIMAL(10,5)", int_dataframe_value_str, int_to_decimal_expected_rows, False),
+            ("int32", "DECIMAL(10,5)", int_dataframe_value_str, int_to_decimal_expected_rows, False),
+            ("int64", "DECIMAL(10,5)", int_dataframe_value_str, int_to_decimal_expected_rows, False),
+            ("object", "DECIMAL(10,5)", int_dataframe_value_str, int_to_decimal_expected_rows, False),
+
+            # Full columns without None or NaN / Float to Decimal
+
+            ("float16", "DECIMAL(10,5)", float16_dataframe_value_str, decimal_expected_rows, True),
+            ("float32", "DECIMAL(10,5)", float_dataframe_value_str, decimal_expected_rows, True),
+            ("float64", "DECIMAL(10,5)", float_dataframe_value_str, decimal_expected_rows, True),
+            ("float", "DECIMAL(10,5)", float_dataframe_value_str, decimal_expected_rows, True),
+            ("double", "DECIMAL(10,5)", float_dataframe_value_str, decimal_expected_rows, True),
+            ("object", "DECIMAL(10,5)", float_dataframe_value_str, decimal_expected_rows, True),
+ 
+            # Full columns without None or NaN / Decimal
+
+            ("object", "DECIMAL(10,5)", decimal_dataframe_value_str, decimal_expected_rows, False),
+
+            # Full columns without None or NaN / String
+
+            ("string", "VARCHAR(2000000)", str_dataframe_value_str, str_expected_rows, False),
+            ("object", "VARCHAR(2000000)", str_dataframe_value_str, str_expected_rows, False),
+
+            # Full columns without None or NaN / Boolean
+
+            ("bool_", "boolean", bool_dataframe_value_str, bool_expected_rows, False),
+            ("boolean", "boolean", bool_dataframe_value_str, bool_expected_rows, False),
+            ("object", "boolean", bool_dataframe_value_str, bool_expected_rows, False),
+
+            # Full columns without None or NaN / Date and Time
+
+            ("datetime64[ns]", "timestamp", timestamp_dataframe_value_str, datetime_expected_rows, False),
+            ("object", "timestamp", timestamp_dataframe_value_str, datetime_expected_rows, False),
+            ("object", "timestamp", datetime_dataframe_value_str, ".*F-UDF-CL-SL-PYTHON-1056.*unexpected python type: py_datetime.datetime.*", False),
+            ("object", "timestamp", date_dataframe_value_str, ".*F-UDF-CL-SL-PYTHON-1071: emit column 0 of type TIMESTAMP but data given have type py_datetime.date.*", False),
+            ("object", "DATE", date_dataframe_value_str, date_expected_rows, False),
+
+            # Mixed columns with values and None / Int
+
+            #(u)int-dtypes don't support None or np.nan
+
+            ("object", "integer", mixed_int_dataframe_value_str, mixed_int_expected_rows, False),
+
+            # Mixed columns with values and None / Float
+
+            ("float16", "double", mixed_float16_dataframe_value_str, mixed_float_expected_rows, True),
+            ("float32", "double", mixed_float_dataframe_value_str, mixed_float_expected_rows, True),
+            ("float64", "double", mixed_float_dataframe_value_str, mixed_float_expected_rows, False),
+            ("float", "double", mixed_float_dataframe_value_str, mixed_float_expected_rows, False),
+            ("double", "double", mixed_float_dataframe_value_str, mixed_float_expected_rows, False),
+            ("object", "double", mixed_float_dataframe_value_str, mixed_float_expected_rows, False),
+
+            # Mixed columns with values and None / Float to Int
+            ("float16", "integer", mixed_float16_dataframe_value_str, mixed_int_expected_rows, False),
+            ("float32", "integer", mixed_float_dataframe_value_str, mixed_int_expected_rows, False),
+            ("float64", "integer", mixed_float_dataframe_value_str, mixed_int_expected_rows, False),
+            ("float", "integer", mixed_float_dataframe_value_str, mixed_int_expected_rows, False),
+            ("double", "integer", mixed_float_dataframe_value_str, mixed_int_expected_rows, False),
+            ("object", "integer", mixed_float_dataframe_value_str, mixed_int_expected_rows, False),
+
+            # Mixed columns with values and None / Int to Decimal
+
+            ("object", "DECIMAL(10,5)", mixed_int_dataframe_value_str, mixed_int_to_decimal_expected_rows, False),
+
+            # Mixed columns with values and None / Float to Decimal
+
+            ("float16", "DECIMAL(10,5)", mixed_float16_dataframe_value_str, mixed_decimal_expected_rows, True),
+            ("float32", "DECIMAL(10,5)", mixed_float_dataframe_value_str, mixed_decimal_expected_rows, True),
+            ("float64", "DECIMAL(10,5)", mixed_float_dataframe_value_str, mixed_decimal_expected_rows, True),
+            ("float", "DECIMAL(10,5)", mixed_float_dataframe_value_str, mixed_decimal_expected_rows, True),
+            ("double", "DECIMAL(10,5)", mixed_float_dataframe_value_str, mixed_decimal_expected_rows, True),
+            ("object", "DECIMAL(10,5)", mixed_float_dataframe_value_str, mixed_decimal_expected_rows, True),
+
+            # Mixed columns with values and None / Decimal
+
+            ("object", "DECIMAL(10,5)", mixed_decimal_dataframe_value_str, mixed_decimal_expected_rows, False),
+
+            # Mixed columns with values and None / String
+
+            ("string", "VARCHAR(2000000)", mixed_str_dataframe_value_str, mixed_str_expected_rows, False),
+            ("object", "VARCHAR(2000000)", mixed_str_dataframe_value_str, mixed_str_expected_rows, False),
+ 
+            # Mixed columns with values and None / Boolean
+
+            ("bool_", "boolean", mixed_bool_dataframe_value_str, mixed_bool_expected_rows_bool_, False),
+            ("boolean", "boolean", mixed_bool_dataframe_value_str, mixed_bool_expected_rows, False),
+            ("object", "boolean", mixed_bool_dataframe_value_str, mixed_bool_expected_rows, False),
+
+            # Mixed columns with values and None / Data and time
+
+            ("datetime64[ns]", "timestamp", mixed_timestamp_dataframe_value_str, mixed_datetime_expected_rows, False),
+            ("object", "timestamp", mixed_timestamp_dataframe_value_str, mixed_datetime_expected_rows, False),
+            ("object", "DATE", mixed_date_dataframe_value_str, mixed_date_expected_rows, False),
+
+            # None
+
+            ("object", "integer", none_dataframe_value_str, none_expected_rows, False),
+
+            ("float16", "double", none_dataframe_value_str, none_expected_rows, False),
+            ("float32", "double", none_dataframe_value_str, none_expected_rows, False),
+            ("float64", "double", none_dataframe_value_str, none_expected_rows, False),
+            ("float", "double", none_dataframe_value_str, none_expected_rows, False),
+            ("double", "double", none_dataframe_value_str, none_expected_rows, False),
+            ("object", "double", none_dataframe_value_str, none_expected_rows, False),
+
+            ("float16", "integer", none_dataframe_value_str, none_expected_rows, False),
+            ("float32", "integer", none_dataframe_value_str, none_expected_rows, False),
+            ("float64", "integer", none_dataframe_value_str, none_expected_rows, False),
+            ("float", "integer", none_dataframe_value_str, none_expected_rows, False),
+            ("double", "integer", none_dataframe_value_str, none_expected_rows, False),
+
+            ("float16", "DECIMAL(10,5)", none_dataframe_value_str, none_expected_rows, False),
+            ("float32", "DECIMAL(10,5)", none_dataframe_value_str, none_expected_rows, False),
+            ("float64", "DECIMAL(10,5)", none_dataframe_value_str, none_expected_rows, False),
+            ("float", "DECIMAL(10,5)", none_dataframe_value_str, none_expected_rows, False),
+            ("double", "DECIMAL(10,5)", none_dataframe_value_str, none_expected_rows, False),
+
+            ("object", "DECIMAL(10,5)", none_dataframe_value_str, none_expected_rows, False),
+
+            ("string", "VARCHAR(2000000)", none_dataframe_value_str, none_expected_rows, False),
+            ("object", "VARCHAR(2000000)", none_dataframe_value_str, none_expected_rows, False),
+
+            ("bool_", "boolean", none_dataframe_value_str, none_expected_rows_bool_, False),
+            ("boolean", "boolean", none_dataframe_value_str, none_expected_rows, False),
+            ("object", "boolean", none_dataframe_value_str, none_expected_rows, False),
+
+            ("datetime64[ns]", "timestamp", none_dataframe_value_str, none_expected_rows, False),
+            ("object", "timestamp", none_dataframe_value_str, none_expected_rows, False),
+            ("object", "DATE", none_dataframe_value_str, none_expected_rows, False),
+
+            # NaN
+
+            ("object", "integer", nan_dataframe_value_str, nan_expected_rows, False),
+
+            ("float16", "double", nan_dataframe_value_str, nan_expected_rows, False),
+            ("float32", "double", nan_dataframe_value_str, nan_expected_rows, False),
+            ("float64", "double", nan_dataframe_value_str, nan_expected_rows, False),
+            ("float", "double", nan_dataframe_value_str, nan_expected_rows, False),
+            ("double", "double", nan_dataframe_value_str, nan_expected_rows, False),
+            ("object", "double", nan_dataframe_value_str, nan_expected_rows, False),
+
+            ("float16", "integer", nan_dataframe_value_str, nan_expected_rows, False),
+            ("float32", "integer", nan_dataframe_value_str, nan_expected_rows, False),
+            ("float64", "integer", nan_dataframe_value_str, nan_expected_rows, False),
+            ("float", "integer", nan_dataframe_value_str, nan_expected_rows, False),
+            ("double", "integer", nan_dataframe_value_str, nan_expected_rows, False),
+
+            ("float16", "DECIMAL(10,5)", nan_dataframe_value_str, nan_expected_rows, False),
+            ("float32", "DECIMAL(10,5)", nan_dataframe_value_str, nan_expected_rows, False),
+            ("float64", "DECIMAL(10,5)", nan_dataframe_value_str, nan_expected_rows, False),
+            ("float", "DECIMAL(10,5)", nan_dataframe_value_str, nan_expected_rows, False),
+            ("double", "DECIMAL(10,5)", nan_dataframe_value_str, nan_expected_rows, False),
+
+            #("object", "DECIMAL(10,5)", nan_dataframe_value_str, None, False), # Fails with VM error: [22018] invalid character value for cast; Value: 'nan'
+
+            ("string", "VARCHAR(2000000)", nan_dataframe_value_str, nan_expected_rows, False),
+            ("object", "VARCHAR(2000000)", nan_dataframe_value_str, ".*PYTHON-1068: emit column 0 of type STRING but data given have type py_float.*", False),
+
+            ("bool_", "boolean", nan_dataframe_value_str, nan_expected_rows_bool_, False),
+            ("boolean", "boolean", nan_dataframe_value_str, nan_expected_rows, False),
+            ("object", "boolean", nan_dataframe_value_str, ".*F-UDF-CL-SL-PYTHON-1068: emit column 0 of type BOOLEAN but data given have type py_float.*", False),
+
+            ("datetime64[ns]", "timestamp", nan_dataframe_value_str, nan_expected_rows, False),
+            ("object", "timestamp", nan_dataframe_value_str, ".*F-UDF-CL-SL-PYTHON-1068: emit column 0 of type TIMESTAMP but data given have type py_float.*", False),
+            ("object", "DATE", nan_dataframe_value_str, ".*F-UDF-CL-SL-PYTHON-1068: emit column 0 of type DATE but data given have type py_float.*", False),
+
+        ]
+
+    @useData(types)
+    def test_dtype_emit(self, dtype:str, sql_type:str, dataframe_value_str:str, expected_result:Union[str,List[Tuple]], use_almost_equal:bool):
+        sql=udf.fixindent(f'''
+            CREATE OR REPLACE PYTHON3 SET SCRIPT test_dtype_emit(i integer) 
+            EMITS (o1 {sql_type}, o2 {sql_type}, traceback varchar(2000000)) AS
+
+            def run(ctx):
+                try:
+                    from decimal import Decimal
+                    import pandas as pd
+                    import numpy as np
+                    from datetime import datetime, date
+                    df = pd.DataFrame({dataframe_value_str}, dtype="{dtype}")
+                    df["traceback"]=None
+                    ctx.emit(df)
+                except:
+                    import traceback
+                    ctx.emit(None,None,traceback.format_exc())
+            /
+            ''')
+        print(sql)
+        self.query(sql)
+        with UdfDebugger(test_case=self):
+            rows = self.query('''SELECT test_dtype_emit(0)''')
+            if isinstance(expected_result,str):
+                self.assertRegex(rows[0][2], expected_result)
+            else:
+                if use_almost_equal:
+                    self.assertRowsAlmostEqual(expected_result, rows, places=1)
+                else:
+                    self.assertRowsEqual(expected_result, rows)
+
+    def isValueAlmostEqual(self, left, right, places):
+        if isinstance(left, (float, Decimal)) and isinstance(right, (float, Decimal)):
+            return round(left, places) == round(right, places)
+        else:
+            return left == right
+
+    def isRowAlmostEqual(self, left, right, places):
+        if len(left) != len(right):
+            return False
+        all_values_almost_equal = all(self.isValueAlmostEqual(lvalue, rvalue, places)
+                                      for lvalue, rvalue in zip(left, right))
+        return all_values_almost_equal
+ 
+    def assertRowsAlmostEqual(self, left, right, places):
+        lrows = [tuple(x) for x in left]
+        rrows = [tuple(x) for x in right]
+        if len(lrows) != len(rrows):
+            raise AssertionError(f'{lrows} and {rrows} have different number of rows.')
+        all_rows_almost_equal = all(self.isRowAlmostEqual(lrow, rrow, places) for lrow, rrow in zip(lrows, rrows))
+        if not all_rows_almost_equal:
+             raise AssertionError(f'{lrows} and {rrows} are not almost equal.')
+
+if __name__ == '__main__':
+    udf.main()
+
diff --git a/test_container/tests/test/pandas/pandas2/pandas.py b/test_container/tests/test/pandas/pandas2/pandas.py
new file mode 100755
index 000000000..802edd422
--- /dev/null
+++ b/test_container/tests/test/pandas/pandas2/pandas.py
@@ -0,0 +1,361 @@
+#!/usr/bin/env python3
+
+from decimal import Decimal
+from datetime import date
+from datetime import datetime
+
+from exasol_python_test_framework import udf
+from exasol_python_test_framework.exatest.testcase import useData
+from exasol_python_test_framework.udf.udf_debug import UdfDebugger
+from typing import List, Tuple, Union
+
+class Pandas2Test(udf.TestCase):
+    def setUp(self): 
+        self.query('create schema pandas2test', ignore_errors=True)
+        self.maxDiff=None
+
+    def test_pandas2_version(self):
+        sql=udf.fixindent('''
+            CREATE OR REPLACE PYTHON3 SET SCRIPT pandas2test.test_pandas2_version(i integer) EMITS (o VARCHAR(100)) AS
+
+            def run(ctx):
+                import pandas as pd
+                ctx.emit(pd.__version__)
+            /
+            ''')
+        print(sql)
+        self.query(sql)
+        rows = self.query('''SELECT pandas2test.test_pandas2_version(0)''')
+        version_parts = rows[0][0].split(".")
+        self.assertEqual("2",version_parts[0])
+
+
+    int_dataframe_value_str = "[[1, 2],[3, 4]]"
+    int_expected_rows = [(1, 2, None),(3, 4, None)]
+    int_to_float_expected_rows = [(1.0, 2.0, None),(3.0, 4.0, None)]
+
+    float16_dataframe_value_str = 'np.array([[1.1, 2.1],[3.1, 4.1]], dtype="float16")'
+    float_dataframe_value_str = "[[1.1, 2.1],[3.1, 4.1]]"
+    float_expected_rows = [(1.1, 2.1, None),(3.1, 4.1, None)]
+
+    str_dataframe_value_str = "[['a','b'],['c','d']]"
+    str_expected_rows = [('a','b',None),('c','d',None)]
+
+    bool_dataframe_value_str = "[[True,False],[True,False]]"
+    bool_expected_rows = [(True,False,None),(True,False,None)]
+
+    decimal_dataframe_value_str = "[[Decimal('1.1'),Decimal('2.1')],[Decimal('3.1'),Decimal('4.1')]]"
+    decimal_expected_rows = [(Decimal('1.1'),Decimal('2.1'),None),(Decimal('3.1'),Decimal('4.1'),None)]
+    int_to_decimal_expected_rows = [(Decimal('1'),Decimal('2'),None),(Decimal('3'),Decimal('4'),None)]
+
+    timestamp_dataframe_value_str = '[[pd.Timestamp(datetime(2020, 7, 27, 14, 22, 33, 673251)),' \
+                                    +'pd.Timestamp(datetime(2020, 7, 27, 14, 22, 33, 673251))],' \
+                                    +'[pd.Timestamp(datetime(2020, 7, 27, 14, 22, 33, 673251)),' \
+                                    +'pd.Timestamp(datetime(2020, 7, 27, 14, 22, 33, 673251))]]'
+    datetime_dataframe_value_str = '[[datetime(2020, 7, 27, 14, 22, 33, 673251),' \
+                                   +'datetime(2020, 7, 27, 14, 22, 33, 673251)],' \
+                                   +'[datetime(2020, 7, 27, 14, 22, 33, 673251),' \
+                                   +'datetime(2020, 7, 27, 14, 22, 33, 673251)]]'
+    datetime_expected_rows = [(datetime(2020, 7, 27, 14, 22, 33, 673000),datetime(2020, 7, 27, 14, 22, 33, 673000),None),
+                             (datetime(2020, 7, 27, 14, 22, 33, 673000),datetime(2020, 7, 27, 14, 22, 33, 673000),None)]
+    date_dataframe_value_str = '[[date(2020, 7, 27),' \
+                                   +'date(2020, 7, 27)],' \
+                                   +'[date(2020, 7, 27),' \
+                                   +'date(2020, 7, 27)]]'
+    date_expected_rows = [(date(2020, 7, 27),date(2020, 7, 27),None),
+                         (date(2020, 7, 27),date(2020, 7, 27),None)]
+
+    mixed_int_dataframe_value_str = "[[1, None],[None, 4]]"
+    mixed_int_expected_rows = [(1, None, None),(None, 4, None)]
+    mixed_int_to_float_expected_rows = [(1.0, None, None),(None, 4.0, None)]
+
+    mixed_float16_dataframe_value_str = 'np.array([[1.1, None],[None, 4.1]], dtype="float16")'
+    mixed_float_dataframe_value_str = "[[1.1, None],[None, 4.1]]"
+    mixed_float_expected_rows = [(1.1, None, None),(None, 4.1, None)]
+
+    mixed_str_dataframe_value_str = "[['a',None],[None,'d']]"
+    mixed_str_expected_rows = [('a',None,None),(None,'d',None)]
+
+    mixed_bool_dataframe_value_str = "[[True,None],[None,False]]"
+    mixed_bool_expected_rows = [(True,None,None),(None,False,None)]
+
+    mixed_decimal_dataframe_value_str = "[[Decimal('1.1'),None],[None,Decimal('4.1')]]"
+    mixed_decimal_expected_rows = [(Decimal('1.1'),None,None),(None,Decimal('4.1'),None)]
+    mixed_int_to_decimal_expected_rows = [(Decimal('1'),None,None),(None,Decimal('4'),None)]
+
+    mixed_timestamp_dataframe_value_str = '[[pd.Timestamp(datetime(2020, 7, 27, 14, 22, 33, 673251)),None],' \
+                                    +'[None,pd.Timestamp(datetime(2020, 7, 27, 14, 22, 33, 673251))]]'
+    mixed_datetime_dataframe_value_str = '[[datetime(2020, 7, 27, 14, 22, 33, 673251),None],' \
+                                   +'[None,datetime(2020, 7, 27, 14, 22, 33, 673251)]]'
+    mixed_datetime_expected_rows = [(datetime(2020, 7, 27, 14, 22, 33, 673000),None,None),
+                                    (None,datetime(2020, 7, 27, 14, 22, 33, 673000),None)]
+    mixed_date_dataframe_value_str = '[[date(2020, 7, 27),None],' \
+                                   +'[None,date(2020, 7, 27)]]'
+    mixed_date_expected_rows = [(date(2020, 7, 27),None,None),
+                                (None,date(2020, 7, 27),None)]
+
+    none_dataframe_value_str = "[[None, None],[None, None]]"
+    none_expected_rows = [(None, None, None),(None, None, None)]
+
+    nan_dataframe_value_str = "[[np.nan, np.nan],[np.nan, np.nan]]"
+    nan_expected_rows = [(None, None, None),(None, None, None)]
+
+    types = [
+            # Full columns without None or NaN / Int
+
+            ("dtype='uint8[pyarrow]'", "integer", int_dataframe_value_str, int_expected_rows, False),
+            ("dtype='uint16[pyarrow]'", "integer", int_dataframe_value_str, int_expected_rows, False),
+            ("dtype='uint32[pyarrow]'", "integer", int_dataframe_value_str, int_expected_rows, False),
+            ("dtype='uint64[pyarrow]'", "integer", int_dataframe_value_str, int_expected_rows, False),
+            ("dtype='int8[pyarrow]'", "integer", int_dataframe_value_str, int_expected_rows, False),
+            ("dtype='int16[pyarrow]'", "integer", int_dataframe_value_str, int_expected_rows, False),
+            ("dtype='int32[pyarrow]'", "integer", int_dataframe_value_str, int_expected_rows, False),
+            ("dtype='int64[pyarrow]'", "integer", int_dataframe_value_str, int_expected_rows, False),
+
+            # Full columns without None or NaN / Float
+
+            ("dtype='float16[pyarrow]'", "double", float16_dataframe_value_str, float_expected_rows, True),
+            ("dtype='float32[pyarrow]'", "double", float_dataframe_value_str, float_expected_rows, True),
+            ("dtype='float64[pyarrow]'", "double", float_dataframe_value_str, float_expected_rows, False),
+            ("dtype='halffloat[pyarrow]'", "double", float16_dataframe_value_str, float_expected_rows, True),
+            ("dtype='float[pyarrow]'", "double", float_dataframe_value_str, float_expected_rows, True),
+            ("dtype='double[pyarrow]'", "double", float_dataframe_value_str, float_expected_rows, False),           
+
+            # Full columns without None or NaN / Decimal
+
+            ("dtype=pd.ArrowDtype(pa.decimal128(3, scale=2))", "DECIMAL(10,5)", decimal_dataframe_value_str, decimal_expected_rows, False),
+            # Full columns without None or NaN / Int to Decimal
+
+            ("dtype='uint8[pyarrow]'", "DECIMAL(10,5)", int_dataframe_value_str, int_to_decimal_expected_rows, False),
+            ("dtype='uint16[pyarrow]'", "DECIMAL(10,5)", int_dataframe_value_str, int_to_decimal_expected_rows, False),
+            ("dtype='uint32[pyarrow]'", "DECIMAL(10,5)", int_dataframe_value_str, int_to_decimal_expected_rows, False),
+            ("dtype='uint64[pyarrow]'", "DECIMAL(10,5)", int_dataframe_value_str, int_to_decimal_expected_rows, False),
+            ("dtype='int8[pyarrow]'", "DECIMAL(10,5)", int_dataframe_value_str, int_to_decimal_expected_rows, False),
+            ("dtype='int16[pyarrow]'", "DECIMAL(10,5)", int_dataframe_value_str, int_to_decimal_expected_rows, False),
+            ("dtype='int32[pyarrow]'", "DECIMAL(10,5)", int_dataframe_value_str, int_to_decimal_expected_rows, False),
+            ("dtype='int64[pyarrow]'", "DECIMAL(10,5)", int_dataframe_value_str, int_to_decimal_expected_rows, False),
+
+            # Full columns without None or NaN / Float to Decimal
+
+            ("dtype='float16[pyarrow]'", "DECIMAL(10,5)", float16_dataframe_value_str, decimal_expected_rows, True),
+            ("dtype='float32[pyarrow]'", "DECIMAL(10,5)", float_dataframe_value_str, decimal_expected_rows, False),
+            ("dtype='float64[pyarrow]'", "DECIMAL(10,5)", float_dataframe_value_str, decimal_expected_rows, False),
+            ("dtype='halffloat[pyarrow]'", "DECIMAL(10,5)", float16_dataframe_value_str, decimal_expected_rows, True),
+            ("dtype='float[pyarrow]'", "DECIMAL(10,5)", float_dataframe_value_str, decimal_expected_rows, False),
+            ("dtype='double[pyarrow]'", "DECIMAL(10,5)", float_dataframe_value_str, decimal_expected_rows, False),           
+            
+            # Full columns without None or NaN / Int To Double
+
+            ("dtype='uint8[pyarrow]'", "double", int_dataframe_value_str, int_to_float_expected_rows, False),
+            ("dtype='uint16[pyarrow]'", "double", int_dataframe_value_str, int_to_float_expected_rows, False),
+            ("dtype='uint32[pyarrow]'", "double", int_dataframe_value_str, int_to_float_expected_rows, False),
+            ("dtype='uint64[pyarrow]'", "double", int_dataframe_value_str, int_to_float_expected_rows, False),
+            ("dtype='int8[pyarrow]'", "double", int_dataframe_value_str, int_to_float_expected_rows, False),
+            ("dtype='int16[pyarrow]'", "double", int_dataframe_value_str, int_to_float_expected_rows, False),
+            ("dtype='int32[pyarrow]'", "double", int_dataframe_value_str, int_to_float_expected_rows, False),
+            ("dtype='int64[pyarrow]'", "double", int_dataframe_value_str, int_to_float_expected_rows, False),
+
+            # Full columns without None or NaN / Float to Int
+
+            ("dtype='float16[pyarrow]'", "integer", float16_dataframe_value_str, int_expected_rows, False),
+            ("dtype='float32[pyarrow]'", "integer", float_dataframe_value_str, int_expected_rows, False),
+            ("dtype='float64[pyarrow]'", "integer", float_dataframe_value_str, int_expected_rows, False),
+            ("dtype='halffloat[pyarrow]'", "integer", float16_dataframe_value_str, int_expected_rows, False),
+            ("dtype='float[pyarrow]'", "integer", float_dataframe_value_str, int_expected_rows, False),
+            ("dtype='double[pyarrow]'", "integer", float_dataframe_value_str, int_expected_rows, False),           
+
+            # Full columns without None or NaN / String
+
+            ("dtype='string[pyarrow]'", "VARCHAR(2000000)", str_dataframe_value_str, str_expected_rows, False),
+
+            # Full columns without None or NaN / Boolean
+
+            ("dtype='bool[pyarrow]'", "boolean", bool_dataframe_value_str, bool_expected_rows, False),
+
+            # Full columns without None or NaN / Date and time
+
+            ("dtype=pd.ArrowDtype(pa.timestamp('ns','UTC'))", "timestamp", datetime_dataframe_value_str, datetime_expected_rows, False),
+            #df = pd.DataFrame([[datetime.date(2012,1,1),None],[None,None]], dtype=pd.ArrowDtype(pa.date32())) can't be created at the moment, because it fails with "AttributeError: 'ArrowDtype' object has no attribute 'tz'" and pa.date32() doesn't accept a timezone
+            #df = pd.DataFrame([[datetime.date(2012,1,1),None],[None,None]], dtype=pd.ArrowDtype(pa.date64())) can't be created at the moment, because it fails with "AttributeError: 'ArrowDtype' object has no attribute 'tz'" and pa.date32() doesn't accept a timezone
+
+            # Mixed columns with values and None / Int
+
+            ("dtype='uint8[pyarrow]'", "integer", mixed_int_dataframe_value_str, mixed_int_expected_rows, False),
+            ("dtype='uint16[pyarrow]'", "integer", mixed_int_dataframe_value_str, mixed_int_expected_rows, False),
+            ("dtype='uint32[pyarrow]'", "integer", mixed_int_dataframe_value_str, mixed_int_expected_rows, False),
+            ("dtype='uint64[pyarrow]'", "integer", mixed_int_dataframe_value_str, mixed_int_expected_rows, False),
+            ("dtype='int8[pyarrow]'", "integer", mixed_int_dataframe_value_str, mixed_int_expected_rows, False),
+            ("dtype='int16[pyarrow]'", "integer", mixed_int_dataframe_value_str, mixed_int_expected_rows, False),
+            ("dtype='int32[pyarrow]'", "integer", mixed_int_dataframe_value_str, mixed_int_expected_rows, False),
+            ("dtype='int64[pyarrow]'", "integer", mixed_int_dataframe_value_str, mixed_int_expected_rows, False),
+
+            # Mixed columns with values and None / Float
+
+            ("dtype='float16[pyarrow]'", "double", mixed_float16_dataframe_value_str, mixed_float_expected_rows, True),
+            ("dtype='float32[pyarrow]'", "double", mixed_float_dataframe_value_str, mixed_float_expected_rows, True),
+            ("dtype='float64[pyarrow]'", "double", mixed_float_dataframe_value_str, mixed_float_expected_rows, False),
+            ("dtype='halffloat[pyarrow]'", "double", mixed_float16_dataframe_value_str, mixed_float_expected_rows, True),
+            ("dtype='float[pyarrow]'", "double", mixed_float_dataframe_value_str, mixed_float_expected_rows, True),
+            ("dtype='double[pyarrow]'", "double", mixed_float_dataframe_value_str, mixed_float_expected_rows, False),           
+
+            # Mixed columns with values and None / Decimal
+
+            ("dtype=pd.ArrowDtype(pa.decimal128(3, scale=2))", "DECIMAL(10,5)", mixed_decimal_dataframe_value_str, mixed_decimal_expected_rows, False),
+            # Mixed columns with values and None / Int to Decimal
+
+            ("dtype='uint8[pyarrow]'", "DECIMAL(10,5)", mixed_int_dataframe_value_str, mixed_int_to_decimal_expected_rows, False),
+            ("dtype='uint16[pyarrow]'", "DECIMAL(10,5)", mixed_int_dataframe_value_str, mixed_int_to_decimal_expected_rows, False),
+            ("dtype='uint32[pyarrow]'", "DECIMAL(10,5)", mixed_int_dataframe_value_str, mixed_int_to_decimal_expected_rows, False),
+            ("dtype='uint64[pyarrow]'", "DECIMAL(10,5)", mixed_int_dataframe_value_str, mixed_int_to_decimal_expected_rows, False),
+            ("dtype='int8[pyarrow]'", "DECIMAL(10,5)", mixed_int_dataframe_value_str, mixed_int_to_decimal_expected_rows, False),
+            ("dtype='int16[pyarrow]'", "DECIMAL(10,5)", mixed_int_dataframe_value_str, mixed_int_to_decimal_expected_rows, False),
+            ("dtype='int32[pyarrow]'", "DECIMAL(10,5)", mixed_int_dataframe_value_str, mixed_int_to_decimal_expected_rows, False),
+            ("dtype='int64[pyarrow]'", "DECIMAL(10,5)", mixed_int_dataframe_value_str, mixed_int_to_decimal_expected_rows, False),
+
+            # Mixed columns with values and None / Float to Decimal
+
+            ("dtype='float16[pyarrow]'", "DECIMAL(10,5)", mixed_float16_dataframe_value_str, mixed_decimal_expected_rows, True),
+            ("dtype='float32[pyarrow]'", "DECIMAL(10,5)", mixed_float_dataframe_value_str, mixed_decimal_expected_rows, False),
+            ("dtype='float64[pyarrow]'", "DECIMAL(10,5)", mixed_float_dataframe_value_str, mixed_decimal_expected_rows, False),
+            ("dtype='halffloat[pyarrow]'", "DECIMAL(10,5)", mixed_float16_dataframe_value_str, mixed_decimal_expected_rows, True),
+            ("dtype='float[pyarrow]'", "DECIMAL(10,5)", mixed_float_dataframe_value_str, mixed_decimal_expected_rows, False),
+            ("dtype='double[pyarrow]'", "DECIMAL(10,5)", mixed_float_dataframe_value_str, mixed_decimal_expected_rows, False),           
+            
+            # Mixed columns with values and None / Int To Double
+
+            ("dtype='uint8[pyarrow]'", "double", mixed_int_dataframe_value_str, mixed_int_to_float_expected_rows, False),
+            ("dtype='uint16[pyarrow]'", "double", mixed_int_dataframe_value_str, mixed_int_to_float_expected_rows, False),
+            ("dtype='uint32[pyarrow]'", "double", mixed_int_dataframe_value_str, mixed_int_to_float_expected_rows, False),
+            ("dtype='uint64[pyarrow]'", "double", mixed_int_dataframe_value_str, mixed_int_to_float_expected_rows, False),
+            ("dtype='int8[pyarrow]'", "double", mixed_int_dataframe_value_str, mixed_int_to_float_expected_rows, False),
+            ("dtype='int16[pyarrow]'", "double", mixed_int_dataframe_value_str, mixed_int_to_float_expected_rows, False),
+            ("dtype='int32[pyarrow]'", "double", mixed_int_dataframe_value_str, mixed_int_to_float_expected_rows, False),
+            ("dtype='int64[pyarrow]'", "double", mixed_int_dataframe_value_str, mixed_int_to_float_expected_rows, False),
+
+            # Mixed columns with values and None / Float to Int
+
+            ("dtype='float16[pyarrow]'", "integer", mixed_float16_dataframe_value_str, mixed_int_expected_rows, False),
+            ("dtype='float32[pyarrow]'", "integer", mixed_float_dataframe_value_str, mixed_int_expected_rows, False),
+            ("dtype='float64[pyarrow]'", "integer", mixed_float_dataframe_value_str, mixed_int_expected_rows, False),
+            ("dtype='halffloat[pyarrow]'", "integer", mixed_float16_dataframe_value_str, mixed_int_expected_rows, False),
+            ("dtype='float[pyarrow]'", "integer", mixed_float_dataframe_value_str, mixed_int_expected_rows, False),
+            ("dtype='double[pyarrow]'", "integer", mixed_float_dataframe_value_str, mixed_int_expected_rows, False),           
+
+            # Mixed columns with values and None / String
+
+            ("dtype='string[pyarrow]'", "VARCHAR(2000000)", mixed_str_dataframe_value_str, mixed_str_expected_rows, False),
+
+            # Mixed columns with values and None / Boolean
+
+            ("dtype='bool[pyarrow]'", "boolean", mixed_bool_dataframe_value_str, mixed_bool_expected_rows, False),
+
+            # Mixed columns with values and None / Date and time
+
+            ("dtype=pd.ArrowDtype(pa.timestamp('ns','UTC'))", "timestamp", mixed_datetime_dataframe_value_str, mixed_datetime_expected_rows, False),
+
+            # None
+
+            ("dtype='uint8[pyarrow]'", "integer", none_dataframe_value_str, none_expected_rows, False),
+            ("dtype='uint16[pyarrow]'", "integer", none_dataframe_value_str, none_expected_rows, False),
+            ("dtype='uint32[pyarrow]'", "integer", none_dataframe_value_str, none_expected_rows, False),
+            ("dtype='uint64[pyarrow]'", "integer", none_dataframe_value_str, none_expected_rows, False),
+            ("dtype='int8[pyarrow]'", "integer", none_dataframe_value_str, none_expected_rows, False),
+            ("dtype='int16[pyarrow]'", "integer", none_dataframe_value_str, none_expected_rows, False),
+            ("dtype='int32[pyarrow]'", "integer", none_dataframe_value_str, none_expected_rows, False),
+            ("dtype='int64[pyarrow]'", "integer", none_dataframe_value_str, none_expected_rows, False),
+
+            ("dtype='float16[pyarrow]'", "float", none_dataframe_value_str, none_expected_rows, False),
+            ("dtype='float32[pyarrow]'", "float", none_dataframe_value_str, none_expected_rows, False),
+            ("dtype='float64[pyarrow]'", "float", none_dataframe_value_str, none_expected_rows, False),
+            ("dtype='halffloat[pyarrow]'", "float", none_dataframe_value_str, none_expected_rows, False),
+            ("dtype='float[pyarrow]'", "float", none_dataframe_value_str, none_expected_rows, False),
+            ("dtype='double[pyarrow]'", "float", none_dataframe_value_str, none_expected_rows, False),
+
+            ("dtype='string[pyarrow]'", "VARCHAR(2000000)", none_dataframe_value_str, none_expected_rows, False),
+
+            ("dtype='bool[pyarrow]'", "boolean", none_dataframe_value_str, none_expected_rows, False),
+
+            ("dtype=pd.ArrowDtype(pa.timestamp('ns','UTC'))", "timestamp", none_dataframe_value_str, none_expected_rows, False),
+            ("dtype=pd.ArrowDtype(pa.decimal128(3, scale=2))", "DECIMAL(10,5)", none_dataframe_value_str, none_expected_rows, False),
+
+            # NaN
+
+            ("dtype='uint8[pyarrow]'", "integer", nan_dataframe_value_str, nan_expected_rows, False),
+            ("dtype='uint16[pyarrow]'", "integer", nan_dataframe_value_str, nan_expected_rows, False),
+            ("dtype='uint32[pyarrow]'", "integer", nan_dataframe_value_str, nan_expected_rows, False),
+            ("dtype='uint64[pyarrow]'", "integer", nan_dataframe_value_str, nan_expected_rows, False),
+            ("dtype='int8[pyarrow]'", "integer", nan_dataframe_value_str, nan_expected_rows, False),
+            ("dtype='int16[pyarrow]'", "integer", nan_dataframe_value_str, nan_expected_rows, False),
+            ("dtype='int32[pyarrow]'", "integer", nan_dataframe_value_str, nan_expected_rows, False),
+            ("dtype='int64[pyarrow]'", "integer", nan_dataframe_value_str, nan_expected_rows, False),
+
+            ("dtype='float16[pyarrow]'", "float", nan_dataframe_value_str, ".*pyarrow.lib.ArrowNotImplementedError: Unsupported cast from double to halffloat using function cast_half_float.*", False),
+            ("dtype='float32[pyarrow]'", "float", nan_dataframe_value_str, nan_expected_rows, False),
+            ("dtype='float64[pyarrow]'", "float", nan_dataframe_value_str, nan_expected_rows, False),
+            ("dtype='halffloat[pyarrow]'", "float", nan_dataframe_value_str, ".*pyarrow.lib.ArrowNotImplementedError: Unsupported cast from double to halffloat using function cast_half_float.*", False),
+            ("dtype='float[pyarrow]'", "float", nan_dataframe_value_str, nan_expected_rows, False),
+            ("dtype='double[pyarrow]'", "float", nan_dataframe_value_str, nan_expected_rows, False),
+
+            ("dtype='string[pyarrow]'", "VARCHAR(2000000)", nan_dataframe_value_str, nan_expected_rows, False),
+
+            ("dtype='bool[pyarrow]'", "boolean", nan_dataframe_value_str, nan_expected_rows, False),
+
+            #("dtype=pd.ArrowDtype(pa.timestamp('ns','UTC'))", "timestamp", nan_dataframe_value_str, nan_expected_rows, False), # Dateframe creation fails with: pyarrow.lib.ArrowNotImplementedError: Unsupported cast from double to timestamp using function cast_timestamp
+            ("dtype=pd.ArrowDtype(pa.decimal128(3, scale=2))", "DECIMAL(10,5)", nan_dataframe_value_str, nan_expected_rows, False),
+        ]
+
+    @useData(types)
+    def test_dtype_emit(self, dtype_definition:str, sql_type:str, dataframe_value_str:str, expected_result:Union[str,List[Tuple]], use_almost_equal:bool):
+        sql=udf.fixindent(f'''
+            CREATE OR REPLACE PYTHON3 SET SCRIPT test_dtype_emit(i integer) 
+            EMITS (o1 {sql_type}, o2 {sql_type}, traceback varchar(2000000)) AS
+
+            def run(ctx):
+                try:
+                    from decimal import Decimal
+                    import pandas as pd
+                    import numpy as np
+                    import pyarrow as pa
+                    from datetime import datetime, date
+                    {dtype_definition}
+                    df = pd.DataFrame({dataframe_value_str}, dtype=dtype)
+                    df["traceback"]=None
+                    ctx.emit(df)
+                except:
+                    import traceback
+                    ctx.emit(None,None,traceback.format_exc())
+            /
+            ''')
+        print(sql)
+        self.query(sql)
+        with UdfDebugger(test_case=self):
+            rows = self.query('''SELECT test_dtype_emit(0)''')
+            if isinstance(expected_result,str):
+                self.assertRegex(rows[0][2], expected_result)
+            else:
+                if use_almost_equal:
+                    self.assertRowsAlmostEqual(expected_result, rows, places=1)
+                else:
+                    self.assertRowsEqual(expected_result, rows)
+
+    def isValueAlmostEqual(self, left, right, places):
+        if isinstance(left, (float, Decimal)) and isinstance(right, (float, Decimal)):
+            return round(left, places) == round(right, places)
+        else:
+            return left == right
+
+    def isRowAlmostEqual(self, left, right, places):
+        if len(left) != len(right):
+            return False
+        all_values_almost_equal = all(self.isValueAlmostEqual(lvalue, rvalue, places)
+                                      for lvalue, rvalue in zip(left, right))
+        return all_values_almost_equal
+ 
+    def assertRowsAlmostEqual(self, left, right, places):
+        lrows = [tuple(x) for x in left]
+        rrows = [tuple(x) for x in right]
+        if len(lrows) != len(rrows):
+            raise AssertionError(f'{lrows} and {rrows} have different number of rows.')
+
+if __name__ == '__main__':
+    udf.main()