From 11c87219978ffb9b7ab80417d6832cc7d52695da Mon Sep 17 00:00:00 2001 From: gargsaumya Date: Wed, 13 Aug 2025 09:59:23 +0530 Subject: [PATCH 01/12] lob support in execute --- mssql_python/cursor.py | 43 +++++++- mssql_python/pybind/CMakeLists.txt | 2 +- mssql_python/pybind/ddbc_bindings.cpp | 151 +++++++++++++++++--------- mssql_python/pybind/ddbc_bindings.h | 7 ++ 4 files changed, 146 insertions(+), 57 deletions(-) diff --git a/mssql_python/cursor.py b/mssql_python/cursor.py index a6f5bb64d..097c25a02 100644 --- a/mssql_python/cursor.py +++ b/mssql_python/cursor.py @@ -233,10 +233,11 @@ def _map_sql_type(self, param, parameters_list, i): ddbc_sql_const.SQL_C_DEFAULT.value, 1, 0, + False, ) if isinstance(param, bool): - return ddbc_sql_const.SQL_BIT.value, ddbc_sql_const.SQL_C_BIT.value, 1, 0 + return ddbc_sql_const.SQL_BIT.value, ddbc_sql_const.SQL_C_BIT.value, 1, 0, False if isinstance(param, int): if 0 <= param <= 255: @@ -245,6 +246,7 @@ def _map_sql_type(self, param, parameters_list, i): ddbc_sql_const.SQL_C_TINYINT.value, 3, 0, + False, ) if -32768 <= param <= 32767: return ( @@ -252,6 +254,7 @@ def _map_sql_type(self, param, parameters_list, i): ddbc_sql_const.SQL_C_SHORT.value, 5, 0, + False, ) if -2147483648 <= param <= 2147483647: return ( @@ -259,12 +262,14 @@ def _map_sql_type(self, param, parameters_list, i): ddbc_sql_const.SQL_C_LONG.value, 10, 0, + False, ) return ( ddbc_sql_const.SQL_BIGINT.value, ddbc_sql_const.SQL_C_SBIGINT.value, 19, 0, + False, ) if isinstance(param, float): @@ -273,6 +278,7 @@ def _map_sql_type(self, param, parameters_list, i): ddbc_sql_const.SQL_C_DOUBLE.value, 15, 0, + False, ) if isinstance(param, decimal.Decimal): @@ -284,6 +290,7 @@ def _map_sql_type(self, param, parameters_list, i): ddbc_sql_const.SQL_C_NUMERIC.value, parameters_list[i].precision, parameters_list[i].scale, + False, ) if isinstance(param, str): @@ -297,6 +304,7 @@ def _map_sql_type(self, param, parameters_list, i): ddbc_sql_const.SQL_C_WCHAR.value, len(param), 0, + False, ) # Attempt to parse as date, datetime, datetime2, timestamp, smalldatetime or time @@ -309,6 +317,7 @@ def _map_sql_type(self, param, parameters_list, i): ddbc_sql_const.SQL_C_TYPE_DATE.value, 10, 0, + False, ) if self._parse_datetime(param): parameters_list[i] = self._parse_datetime(param) @@ -317,6 +326,7 @@ def _map_sql_type(self, param, parameters_list, i): ddbc_sql_const.SQL_C_TYPE_TIMESTAMP.value, 26, 6, + False, ) if self._parse_time(param): parameters_list[i] = self._parse_time(param) @@ -325,6 +335,7 @@ def _map_sql_type(self, param, parameters_list, i): ddbc_sql_const.SQL_C_TYPE_TIME.value, 8, 0, + False, ) # String mapping logic here @@ -338,12 +349,14 @@ def _map_sql_type(self, param, parameters_list, i): ddbc_sql_const.SQL_C_WCHAR.value, utf16_len, 0, + True, ) return ( ddbc_sql_const.SQL_LONGVARCHAR.value, ddbc_sql_const.SQL_C_CHAR.value, len(param), 0, + True, ) if is_unicode: # Short Unicode strings utf16_len = len(param.encode("utf-16-le")) // 2 @@ -352,12 +365,14 @@ def _map_sql_type(self, param, parameters_list, i): ddbc_sql_const.SQL_C_WCHAR.value, utf16_len, 0, + False, ) return ( ddbc_sql_const.SQL_VARCHAR.value, ddbc_sql_const.SQL_C_CHAR.value, len(param), 0, + False, ) if isinstance(param, bytes): @@ -367,12 +382,14 @@ def _map_sql_type(self, param, parameters_list, i): ddbc_sql_const.SQL_C_BINARY.value, len(param), 0, + True, ) return ( ddbc_sql_const.SQL_BINARY.value, ddbc_sql_const.SQL_C_BINARY.value, len(param), 0, + False, ) if isinstance(param, bytearray): @@ -382,13 +399,25 @@ def _map_sql_type(self, param, parameters_list, i): ddbc_sql_const.SQL_C_BINARY.value, len(param), 0, + True, ) return ( ddbc_sql_const.SQL_BINARY.value, ddbc_sql_const.SQL_C_BINARY.value, len(param), 0, + False, ) + + # if isinstance(param, (bytes, bytearray)): + # is_large = len(param) > 8000 + # return ( + # ddbc_sql_const.SQL_VARBINARY.value if is_large else ddbc_sql_const.SQL_BINARY.value, + # ddbc_sql_const.SQL_C_BINARY.value, + # len(param), + # 0, + # is_large, + # ) if isinstance(param, datetime.datetime): return ( @@ -396,6 +425,7 @@ def _map_sql_type(self, param, parameters_list, i): ddbc_sql_const.SQL_C_TYPE_TIMESTAMP.value, 26, 6, + False, ) if isinstance(param, datetime.date): @@ -404,6 +434,7 @@ def _map_sql_type(self, param, parameters_list, i): ddbc_sql_const.SQL_C_TYPE_DATE.value, 10, 0, + False, ) if isinstance(param, datetime.time): @@ -412,6 +443,7 @@ def _map_sql_type(self, param, parameters_list, i): ddbc_sql_const.SQL_C_TYPE_TIME.value, 8, 0, + False, ) return ( @@ -419,6 +451,7 @@ def _map_sql_type(self, param, parameters_list, i): ddbc_sql_const.SQL_C_CHAR.value, len(str(param)), 0, + False, ) def _initialize_cursor(self) -> None: @@ -495,7 +528,7 @@ def _create_parameter_types_list(self, parameter, param_info, parameters_list, i paraminfo. """ paraminfo = param_info() - sql_type, c_type, column_size, decimal_digits = self._map_sql_type( + sql_type, c_type, column_size, decimal_digits, is_dae = self._map_sql_type( parameter, parameters_list, i ) paraminfo.paramCType = c_type @@ -503,6 +536,12 @@ def _create_parameter_types_list(self, parameter, param_info, parameters_list, i paraminfo.inputOutputType = ddbc_sql_const.SQL_PARAM_INPUT.value paraminfo.columnSize = column_size paraminfo.decimalDigits = decimal_digits + paraminfo.isDAE = is_dae + + if is_dae: + paraminfo.strLenOrInd = -1 # Tells ODBC this is streamed data + paraminfo.dataPtr = parameter # Will be converted to py::object* in C++ + return paraminfo def _initialize_description(self): diff --git a/mssql_python/pybind/CMakeLists.txt b/mssql_python/pybind/CMakeLists.txt index 489dfd459..8f58b31c9 100644 --- a/mssql_python/pybind/CMakeLists.txt +++ b/mssql_python/pybind/CMakeLists.txt @@ -272,7 +272,7 @@ target_compile_definitions(ddbc_bindings PRIVATE # Add warning level flags for MSVC if(MSVC) - target_compile_options(ddbc_bindings PRIVATE /W4 /WX) + target_compile_options(ddbc_bindings PRIVATE /W4 ) endif() # Add macOS-specific string conversion fix diff --git a/mssql_python/pybind/ddbc_bindings.cpp b/mssql_python/pybind/ddbc_bindings.cpp index d0a20dbd7..a07d1c65c 100644 --- a/mssql_python/pybind/ddbc_bindings.cpp +++ b/mssql_python/pybind/ddbc_bindings.cpp @@ -45,7 +45,10 @@ struct ParamInfo { SQLSMALLINT decimalDigits; // TODO: Reuse python buffer for large data using Python buffer protocol // Stores pointer to the python object that holds parameter value - // py::object* dataPtr; + + SQLLEN strLenOrInd = 0; // Required for DAE + bool isDAE = false; // Indicates if we need to stream via SQLPutData + py::object dataPtr; }; // Mirrors the SQL_NUMERIC_STRUCT. But redefined to replace val char array @@ -134,6 +137,10 @@ SQLFreeStmtFunc SQLFreeStmt_ptr = nullptr; // Diagnostic APIs SQLGetDiagRecFunc SQLGetDiagRec_ptr = nullptr; + +// DAE APIs +SQLParamDataFunc SQLParamData_ptr = nullptr; +SQLPutDataFunc SQLPutData_ptr = nullptr; SQLTablesFunc SQLTables_ptr = nullptr; namespace { @@ -245,57 +252,41 @@ SQLRETURN BindParameters(SQLHANDLE hStmt, const py::list& params, !py::isinstance(param)) { ThrowStdException(MakeParamMismatchErrorStr(paramInfo.paramCType, paramIndex)); } - std::wstring* strParam = - AllocateParamBuffer(paramBuffers, param.cast()); - if (strParam->size() > 4096 /* TODO: Fix max length */) { - ThrowStdException( - "Streaming parameters is not yet supported. Parameter size" - " must be less than 8192 bytes"); - } - - // Log detailed parameter information - LOG("SQL_C_WCHAR Parameter[{}]: Length={}, Content='{}'", - paramIndex, - strParam->size(), - (strParam->size() <= 100 - ? WideToUTF8(std::wstring(strParam->begin(), strParam->end())) - : WideToUTF8(std::wstring(strParam->begin(), strParam->begin() + 100)) + "...")); - - // Log each character's code point for debugging - if (strParam->size() <= 20) { + + if (paramInfo.isDAE) { + // deferred execution + LOG("Parameter[{}] is marked for DAE streaming", paramIndex); + dataPtr = const_cast(reinterpret_cast(¶mInfos[paramIndex])); + strLenOrIndPtr = AllocateParamBuffer(paramBuffers); + *strLenOrIndPtr = SQL_LEN_DATA_AT_EXEC(0); + bufferLength = 0; // Not used + } else { + // Normal small-string case + std::wstring* strParam = + AllocateParamBuffer(paramBuffers, param.cast()); + LOG("SQL_C_WCHAR Parameter[{}]: Length={}, Content='{}'", + paramIndex, + strParam->size(), + (strParam->size() <= 100 + ? WideToUTF8(std::wstring(strParam->begin(), strParam->end())) + : WideToUTF8(std::wstring(strParam->begin(), strParam->begin() + 100)) + "...")); + + #if defined(__APPLE__) || defined(__linux__) + std::vector* sqlwcharBuffer = + AllocateParamBuffer>(paramBuffers); + sqlwcharBuffer->resize(strParam->size() + 1, 0); for (size_t i = 0; i < strParam->size(); i++) { - unsigned char ch = static_cast((*strParam)[i]); - LOG(" char[{}] = {} ({})", i, static_cast(ch), DescribeChar(ch)); + (*sqlwcharBuffer)[i] = static_cast((*strParam)[i]); } + dataPtr = sqlwcharBuffer->data(); + bufferLength = (strParam->size() + 1) * sizeof(SQLWCHAR); + #else + dataPtr = const_cast(static_cast(strParam->c_str())); + bufferLength = (strParam->size() + 1) * sizeof(wchar_t); + #endif + strLenOrIndPtr = AllocateParamBuffer(paramBuffers); + *strLenOrIndPtr = SQL_NTS; } -#if defined(__APPLE__) || defined(__linux__) - // On macOS/Linux, we need special handling for wide characters - // Create a properly encoded SQLWCHAR buffer for the parameter - std::vector* sqlwcharBuffer = - AllocateParamBuffer>(paramBuffers); - - // Reserve space and convert from wstring to SQLWCHAR array - std::vector utf16 = WStringToSQLWCHAR(*strParam); - if (utf16.size() < strParam->size()) { - LOG("Warning: UTF-16 encoding shrank string? input={} output={}", - strParam->size(), utf16.size()); - } - if (utf16.size() > strParam->size() * 2 + 1) { - LOG("Warning: UTF-16 expansion unusually large: input={} output={}", - strParam->size(), utf16.size()); - } - *sqlwcharBuffer = std::move(utf16); - // Use the SQLWCHAR buffer instead of the wstring directly - dataPtr = sqlwcharBuffer->data(); - bufferLength = sqlwcharBuffer->size() * sizeof(SQLWCHAR); - LOG("macOS: Created SQLWCHAR buffer for parameter with size: {} bytes", bufferLength); -#else - // On Windows, wchar_t and SQLWCHAR are the same size, so direct cast works - dataPtr = const_cast(static_cast(strParam->c_str())); - bufferLength = (strParam->size() + 1 /* null terminator */) * sizeof(wchar_t); -#endif - strLenOrIndPtr = AllocateParamBuffer(paramBuffers); - *strLenOrIndPtr = SQL_NTS; break; } case SQL_C_BIT: { @@ -791,6 +782,9 @@ DriverHandle LoadDriverOrThrowException() { SQLFreeStmt_ptr = GetFunctionPointer(handle, "SQLFreeStmt"); SQLGetDiagRec_ptr = GetFunctionPointer(handle, "SQLGetDiagRecW"); + + SQLParamData_ptr = GetFunctionPointer(handle, "SQLParamData"); + SQLPutData_ptr = GetFunctionPointer(handle, "SQLPutData"); SQLTables_ptr = GetFunctionPointer(handle, "SQLTablesW"); bool success = @@ -802,7 +796,8 @@ DriverHandle LoadDriverOrThrowException() { SQLGetData_ptr && SQLNumResultCols_ptr && SQLBindCol_ptr && SQLDescribeCol_ptr && SQLMoreResults_ptr && SQLColAttribute_ptr && SQLEndTran_ptr && SQLDisconnect_ptr && SQLFreeHandle_ptr && - SQLFreeStmt_ptr && SQLGetDiagRec_ptr && SQLTables_ptr; + SQLFreeStmt_ptr && SQLGetDiagRec_ptr && SQLParamData_ptr && + SQLPutData_ptr && SQLTables_ptr; if (!success) { ThrowStdException("Failed to load required function pointers from driver."); @@ -1176,16 +1171,61 @@ SQLRETURN SQLExecute_wrap(const SqlHandlePtr statementHandle, } rc = SQLExecute_ptr(hStmt); + if (rc == SQL_NEED_DATA) { + LOG("Beginning SQLParamData/SQLPutData loop for DAE."); + SQLPOINTER paramToken = nullptr; + while ((rc = SQLParamData_ptr(hStmt, ¶mToken)) == SQL_NEED_DATA) { + // Find the paramInfo that matches the returned token + const ParamInfo* matchedInfo = nullptr; + for (auto& info : paramInfos) { + if (reinterpret_cast(const_cast(&info)) == paramToken) { + matchedInfo = &info; + break; + } + } + if (!matchedInfo) { + ThrowStdException("Unrecognized paramToken returned by SQLParamData"); + } + + const py::object& pyObj = matchedInfo->dataPtr; + if (pyObj.is_none()) { + SQLPutData_ptr(hStmt, nullptr, 0); + continue; + } + if (py::isinstance(pyObj)) { + std::string utf16_str = pyObj.attr("encode")("utf-16-le").cast(); + const char* dataPtr = utf16_str.data(); + SQLLEN totalBytes = static_cast(utf16_str.size()); + + const size_t chunkSize = 8192; + for (size_t offset = 0; offset < totalBytes; offset += chunkSize) { + size_t len = std::min(chunkSize, totalBytes - offset); + rc = SQLPutData_ptr(hStmt, (SQLPOINTER)(dataPtr + offset), static_cast(len)); + if (!SQL_SUCCEEDED(rc)) { + LOG("SQLPutData failed."); + return rc; + } + } + } else { + ThrowStdException("DAE only supported for str or bytes"); + } + } + + if (!SQL_SUCCEEDED(rc)) { + LOG("SQLParamData final rc: {}", rc); + return rc; + } + LOG("DAE complete, SQLExecute resumed internally."); + } + if (!SQL_SUCCEEDED(rc) && rc != SQL_NO_DATA) { LOG("DDBCSQLExecute: Error during execution of the statement"); return rc; } - // TODO: Handle huge input parameters by checking rc == SQL_NEED_DATA // Unbind the bound buffers for all parameters coz the buffers' memory will // be freed when this function exits (parambuffers goes out of scope) rc = SQLFreeStmt_ptr(hStmt, SQL_RESET_PARAMS); - return rc; } } @@ -2731,8 +2771,11 @@ PYBIND11_MODULE(ddbc_bindings, m) { .def_readwrite("paramCType", &ParamInfo::paramCType) .def_readwrite("paramSQLType", &ParamInfo::paramSQLType) .def_readwrite("columnSize", &ParamInfo::columnSize) - .def_readwrite("decimalDigits", &ParamInfo::decimalDigits); - + .def_readwrite("decimalDigits", &ParamInfo::decimalDigits) + .def_readwrite("strLenOrInd", &ParamInfo::strLenOrInd) + .def_readwrite("dataPtr", &ParamInfo::dataPtr) + .def_readwrite("isDAE", &ParamInfo::isDAE); + // Define numeric data class py::class_(m, "NumericData") .def(py::init<>()) diff --git a/mssql_python/pybind/ddbc_bindings.h b/mssql_python/pybind/ddbc_bindings.h index f28f610cc..12b69bfd4 100644 --- a/mssql_python/pybind/ddbc_bindings.h +++ b/mssql_python/pybind/ddbc_bindings.h @@ -203,6 +203,9 @@ typedef SQLRETURN (SQL_API* SQLFreeStmtFunc)(SQLHSTMT, SQLUSMALLINT); typedef SQLRETURN (SQL_API* SQLGetDiagRecFunc)(SQLSMALLINT, SQLHANDLE, SQLSMALLINT, SQLWCHAR*, SQLINTEGER*, SQLWCHAR*, SQLSMALLINT, SQLSMALLINT*); +// DAE APIs +typedef SQLRETURN (SQL_API* SQLParamDataFunc)(SQLHSTMT, SQLPOINTER*); +typedef SQLRETURN (SQL_API* SQLPutDataFunc)(SQLHSTMT, SQLPOINTER, SQLLEN); //------------------------------------------------------------------------------------------------- // Extern function pointer declarations (defined in ddbc_bindings.cpp) //------------------------------------------------------------------------------------------------- @@ -246,6 +249,10 @@ extern SQLFreeStmtFunc SQLFreeStmt_ptr; // Diagnostic APIs extern SQLGetDiagRecFunc SQLGetDiagRec_ptr; +// DAE APIs +extern SQLParamDataFunc SQLParamData_ptr; +extern SQLPutDataFunc SQLPutData_ptr; + // Logging utility template void LOG(const std::string& formatString, Args&&... args); From 0d3b6bfc4fed85a6d96f7d87712e2950bd5fb876 Mon Sep 17 00:00:00 2001 From: gargsaumya Date: Wed, 13 Aug 2025 11:04:25 +0530 Subject: [PATCH 02/12] cleanup --- mssql_python/cursor.py | 10 ---------- mssql_python/pybind/ddbc_bindings.cpp | 25 +++++++++++-------------- 2 files changed, 11 insertions(+), 24 deletions(-) diff --git a/mssql_python/cursor.py b/mssql_python/cursor.py index 097c25a02..4d6eadb90 100644 --- a/mssql_python/cursor.py +++ b/mssql_python/cursor.py @@ -408,16 +408,6 @@ def _map_sql_type(self, param, parameters_list, i): 0, False, ) - - # if isinstance(param, (bytes, bytearray)): - # is_large = len(param) > 8000 - # return ( - # ddbc_sql_const.SQL_VARBINARY.value if is_large else ddbc_sql_const.SQL_BINARY.value, - # ddbc_sql_const.SQL_C_BINARY.value, - # len(param), - # 0, - # is_large, - # ) if isinstance(param, datetime.datetime): return ( diff --git a/mssql_python/pybind/ddbc_bindings.cpp b/mssql_python/pybind/ddbc_bindings.cpp index a07d1c65c..926017f0b 100644 --- a/mssql_python/pybind/ddbc_bindings.cpp +++ b/mssql_python/pybind/ddbc_bindings.cpp @@ -30,7 +30,7 @@ #ifndef ARCHITECTURE #define ARCHITECTURE "win64" // Default to win64 if not defined during compilation #endif - +#define DAE_CHUNK_SIZE 8192 //------------------------------------------------------------------------------------------------- // Class definitions //------------------------------------------------------------------------------------------------- @@ -43,11 +43,8 @@ struct ParamInfo { SQLSMALLINT paramSQLType; SQLULEN columnSize; SQLSMALLINT decimalDigits; - // TODO: Reuse python buffer for large data using Python buffer protocol - // Stores pointer to the python object that holds parameter value - SQLLEN strLenOrInd = 0; // Required for DAE - bool isDAE = false; // Indicates if we need to stream via SQLPutData + bool isDAE = false; // Indicates if we need to stream py::object dataPtr; }; @@ -252,14 +249,13 @@ SQLRETURN BindParameters(SQLHANDLE hStmt, const py::list& params, !py::isinstance(param)) { ThrowStdException(MakeParamMismatchErrorStr(paramInfo.paramCType, paramIndex)); } - if (paramInfo.isDAE) { // deferred execution LOG("Parameter[{}] is marked for DAE streaming", paramIndex); dataPtr = const_cast(reinterpret_cast(¶mInfos[paramIndex])); strLenOrIndPtr = AllocateParamBuffer(paramBuffers); *strLenOrIndPtr = SQL_LEN_DATA_AT_EXEC(0); - bufferLength = 0; // Not used + bufferLength = 0; } else { // Normal small-string case std::wstring* strParam = @@ -1175,7 +1171,7 @@ SQLRETURN SQLExecute_wrap(const SqlHandlePtr statementHandle, LOG("Beginning SQLParamData/SQLPutData loop for DAE."); SQLPOINTER paramToken = nullptr; while ((rc = SQLParamData_ptr(hStmt, ¶mToken)) == SQL_NEED_DATA) { - // Find the paramInfo that matches the returned token + // Finding the paramInfo that matches the returned token const ParamInfo* matchedInfo = nullptr; for (auto& info : paramInfos) { if (reinterpret_cast(const_cast(&info)) == paramToken) { @@ -1186,18 +1182,21 @@ SQLRETURN SQLExecute_wrap(const SqlHandlePtr statementHandle, if (!matchedInfo) { ThrowStdException("Unrecognized paramToken returned by SQLParamData"); } - const py::object& pyObj = matchedInfo->dataPtr; if (pyObj.is_none()) { SQLPutData_ptr(hStmt, nullptr, 0); continue; } if (py::isinstance(pyObj)) { - std::string utf16_str = pyObj.attr("encode")("utf-16-le").cast(); + std::string utf16_str; + try { + utf16_str = pyObj.attr("encode")("utf-16-le").cast(); + } catch (const std::exception& e) { + ThrowStdException("Error encoding string to UTF-16: " + std::string(e.what())); + } const char* dataPtr = utf16_str.data(); SQLLEN totalBytes = static_cast(utf16_str.size()); - - const size_t chunkSize = 8192; + const size_t chunkSize = DAE_CHUNK_SIZE; for (size_t offset = 0; offset < totalBytes; offset += chunkSize) { size_t len = std::min(chunkSize, totalBytes - offset); rc = SQLPutData_ptr(hStmt, (SQLPOINTER)(dataPtr + offset), static_cast(len)); @@ -1210,14 +1209,12 @@ SQLRETURN SQLExecute_wrap(const SqlHandlePtr statementHandle, ThrowStdException("DAE only supported for str or bytes"); } } - if (!SQL_SUCCEEDED(rc)) { LOG("SQLParamData final rc: {}", rc); return rc; } LOG("DAE complete, SQLExecute resumed internally."); } - if (!SQL_SUCCEEDED(rc) && rc != SQL_NO_DATA) { LOG("DDBCSQLExecute: Error during execution of the statement"); return rc; From e2bd8b815f09f77282707447f3c874eaf67ce7cb Mon Sep 17 00:00:00 2001 From: gargsaumya Date: Wed, 13 Aug 2025 13:41:49 +0530 Subject: [PATCH 03/12] removing warning --- mssql_python/pybind/CMakeLists.txt | 2 +- mssql_python/pybind/ddbc_bindings.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mssql_python/pybind/CMakeLists.txt b/mssql_python/pybind/CMakeLists.txt index 8f58b31c9..489dfd459 100644 --- a/mssql_python/pybind/CMakeLists.txt +++ b/mssql_python/pybind/CMakeLists.txt @@ -272,7 +272,7 @@ target_compile_definitions(ddbc_bindings PRIVATE # Add warning level flags for MSVC if(MSVC) - target_compile_options(ddbc_bindings PRIVATE /W4 ) + target_compile_options(ddbc_bindings PRIVATE /W4 /WX) endif() # Add macOS-specific string conversion fix diff --git a/mssql_python/pybind/ddbc_bindings.cpp b/mssql_python/pybind/ddbc_bindings.cpp index 926017f0b..3f867eeb5 100644 --- a/mssql_python/pybind/ddbc_bindings.cpp +++ b/mssql_python/pybind/ddbc_bindings.cpp @@ -1195,7 +1195,7 @@ SQLRETURN SQLExecute_wrap(const SqlHandlePtr statementHandle, ThrowStdException("Error encoding string to UTF-16: " + std::string(e.what())); } const char* dataPtr = utf16_str.data(); - SQLLEN totalBytes = static_cast(utf16_str.size()); + size_t totalBytes = utf16_str.size(); const size_t chunkSize = DAE_CHUNK_SIZE; for (size_t offset = 0; offset < totalBytes; offset += chunkSize) { size_t len = std::min(chunkSize, totalBytes - offset); From 4eecb6913c9f7759d6ead4c8871e78f95e45c02f Mon Sep 17 00:00:00 2001 From: gargsaumya Date: Tue, 26 Aug 2025 23:59:15 +0530 Subject: [PATCH 04/12] resolved comments --- mssql_python/cursor.py | 30 +++++++++++++-------------- mssql_python/pybind/ddbc_bindings.cpp | 27 ++++++++++-------------- 2 files changed, 26 insertions(+), 31 deletions(-) diff --git a/mssql_python/cursor.py b/mssql_python/cursor.py index 4d6eadb90..65f1d3893 100644 --- a/mssql_python/cursor.py +++ b/mssql_python/cursor.py @@ -18,6 +18,8 @@ from mssql_python.exceptions import InterfaceError, NotSupportedError, ProgrammingError from .row import Row +# Constants for string handling +MAX_INLINE_CHAR = 4000 # NVARCHAR/VARCHAR inline limit; this triggers NVARCHAR(MAX)/VARCHAR(MAX) + DAE class Cursor: """ @@ -340,14 +342,12 @@ def _map_sql_type(self, param, parameters_list, i): # String mapping logic here is_unicode = self._is_unicode_string(param) - # TODO: revisit - if len(param) > 4000: # Long strings + if len(param) > MAX_INLINE_CHAR: # Long strings if is_unicode: - utf16_len = len(param.encode("utf-16-le")) // 2 return ( ddbc_sql_const.SQL_WLONGVARCHAR.value, ddbc_sql_const.SQL_C_WCHAR.value, - utf16_len, + len(param), 0, True, ) @@ -382,7 +382,7 @@ def _map_sql_type(self, param, parameters_list, i): ddbc_sql_const.SQL_C_BINARY.value, len(param), 0, - True, + False, ) return ( ddbc_sql_const.SQL_BINARY.value, @@ -436,13 +436,8 @@ def _map_sql_type(self, param, parameters_list, i): False, ) - return ( - ddbc_sql_const.SQL_VARCHAR.value, - ddbc_sql_const.SQL_C_CHAR.value, - len(str(param)), - 0, - False, - ) + # For safety: unknown/unhandled Python types should not silently go to SQL + raise TypeError("Unsupported parameter type: The driver cannot safely convert it to a SQL type.") def _initialize_cursor(self) -> None: """ @@ -529,7 +524,6 @@ def _create_parameter_types_list(self, parameter, param_info, parameters_list, i paraminfo.isDAE = is_dae if is_dae: - paraminfo.strLenOrInd = -1 # Tells ODBC this is streamed data paraminfo.dataPtr = parameter # Will be converted to py::object* in C++ return paraminfo @@ -791,9 +785,15 @@ def execute( self.is_stmt_prepared, use_prepare, ) - + # Check return code + try: + # Check for errors but don't raise exceptions for info/warning messages - check_error(ddbc_sql_const.SQL_HANDLE_STMT.value, self.hstmt, ret) + check_error(ddbc_sql_const.SQL_HANDLE_STMT.value, self.hstmt, ret) + except Exception as e: + log('warning', "Execute failed, resetting cursor: %s", e) + self._reset_cursor() + # Capture any diagnostic messages (SQL_SUCCESS_WITH_INFO, etc.) if self.hstmt: diff --git a/mssql_python/pybind/ddbc_bindings.cpp b/mssql_python/pybind/ddbc_bindings.cpp index 3f867eeb5..b850fcc3c 100644 --- a/mssql_python/pybind/ddbc_bindings.cpp +++ b/mssql_python/pybind/ddbc_bindings.cpp @@ -260,13 +260,7 @@ SQLRETURN BindParameters(SQLHANDLE hStmt, const py::list& params, // Normal small-string case std::wstring* strParam = AllocateParamBuffer(paramBuffers, param.cast()); - LOG("SQL_C_WCHAR Parameter[{}]: Length={}, Content='{}'", - paramIndex, - strParam->size(), - (strParam->size() <= 100 - ? WideToUTF8(std::wstring(strParam->begin(), strParam->end())) - : WideToUTF8(std::wstring(strParam->begin(), strParam->begin() + 100)) + "...")); - + LOG("SQL_C_WCHAR Parameter[{}]: Length={}, isDAE={}", paramIndex, strParam->size(), paramInfo.isDAE); #if defined(__APPLE__) || defined(__linux__) std::vector* sqlwcharBuffer = AllocateParamBuffer>(paramBuffers); @@ -1188,20 +1182,21 @@ SQLRETURN SQLExecute_wrap(const SqlHandlePtr statementHandle, continue; } if (py::isinstance(pyObj)) { - std::string utf16_str; - try { - utf16_str = pyObj.attr("encode")("utf-16-le").cast(); - } catch (const std::exception& e) { - ThrowStdException("Error encoding string to UTF-16: " + std::string(e.what())); - } - const char* dataPtr = utf16_str.data(); - size_t totalBytes = utf16_str.size(); + std::wstring wstr = pyObj.cast(); +#if defined(__APPLE__) || defined(__linux__) + auto utf16Buf = WStringToSQLWCHAR(wstr); + const char* dataPtr = reinterpret_cast(utf16Buf.data()); + size_t totalBytes = (utf16Buf.size() - 1) * sizeof(SQLWCHAR); +#else + const char* dataPtr = reinterpret_cast(wstr.data()); + size_t totalBytes = wstr.size() * sizeof(wchar_t); +#endif const size_t chunkSize = DAE_CHUNK_SIZE; for (size_t offset = 0; offset < totalBytes; offset += chunkSize) { size_t len = std::min(chunkSize, totalBytes - offset); rc = SQLPutData_ptr(hStmt, (SQLPOINTER)(dataPtr + offset), static_cast(len)); if (!SQL_SUCCEEDED(rc)) { - LOG("SQLPutData failed."); + LOG("SQLPutData failed at offset {} of {}", offset, totalBytes); return rc; } } From db1df084c23516dac21010a3873b7ab6ee6ab273 Mon Sep 17 00:00:00 2001 From: gargsaumya Date: Wed, 27 Aug 2025 00:10:42 +0530 Subject: [PATCH 05/12] resolved comments-2 --- mssql_python/cursor.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/mssql_python/cursor.py b/mssql_python/cursor.py index 65f1d3893..a841575f4 100644 --- a/mssql_python/cursor.py +++ b/mssql_python/cursor.py @@ -233,7 +233,7 @@ def _map_sql_type(self, param, parameters_list, i): return ( ddbc_sql_const.SQL_VARCHAR.value, # TODO: Add SQLDescribeParam to get correct type ddbc_sql_const.SQL_C_DEFAULT.value, - 1, + 0, 0, False, ) @@ -513,6 +513,18 @@ def _create_parameter_types_list(self, parameter, param_info, parameters_list, i paraminfo. """ paraminfo = param_info() + # Explicit None handling + if parameter is None: + paraminfo.paramSQLType = ddbc_sql_const.SQL_VARCHAR.value + paraminfo.paramCType = ddbc_sql_const.SQL_C_CHAR.value + paraminfo.columnSize = 0 + paraminfo.decimalDigits = 0 + paraminfo.isDAE = False + paraminfo.inputOutputType = ddbc_sql_const.SQL_PARAM_INPUT.value + paraminfo.strLenOrInd = ddbc_sql_const.SQL_NULL_DATA.value + paraminfo.dataPtr = None + return paraminfo + sql_type, c_type, column_size, decimal_digits, is_dae = self._map_sql_type( parameter, parameters_list, i ) From 93f47bc674cd484069780d384f4ebc1115569694 Mon Sep 17 00:00:00 2001 From: gargsaumya Date: Wed, 27 Aug 2025 00:23:05 +0530 Subject: [PATCH 06/12] resolved comments-3 --- mssql_python/cursor.py | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/mssql_python/cursor.py b/mssql_python/cursor.py index a841575f4..65f1d3893 100644 --- a/mssql_python/cursor.py +++ b/mssql_python/cursor.py @@ -233,7 +233,7 @@ def _map_sql_type(self, param, parameters_list, i): return ( ddbc_sql_const.SQL_VARCHAR.value, # TODO: Add SQLDescribeParam to get correct type ddbc_sql_const.SQL_C_DEFAULT.value, - 0, + 1, 0, False, ) @@ -513,18 +513,6 @@ def _create_parameter_types_list(self, parameter, param_info, parameters_list, i paraminfo. """ paraminfo = param_info() - # Explicit None handling - if parameter is None: - paraminfo.paramSQLType = ddbc_sql_const.SQL_VARCHAR.value - paraminfo.paramCType = ddbc_sql_const.SQL_C_CHAR.value - paraminfo.columnSize = 0 - paraminfo.decimalDigits = 0 - paraminfo.isDAE = False - paraminfo.inputOutputType = ddbc_sql_const.SQL_PARAM_INPUT.value - paraminfo.strLenOrInd = ddbc_sql_const.SQL_NULL_DATA.value - paraminfo.dataPtr = None - return paraminfo - sql_type, c_type, column_size, decimal_digits, is_dae = self._map_sql_type( parameter, parameters_list, i ) From 48338d67ead34943b9ab24e8dbd1f95da390be04 Mon Sep 17 00:00:00 2001 From: gargsaumya Date: Wed, 27 Aug 2025 00:33:41 +0530 Subject: [PATCH 07/12] resolved comments-4 --- mssql_python/cursor.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/mssql_python/cursor.py b/mssql_python/cursor.py index 65f1d3893..e41793e14 100644 --- a/mssql_python/cursor.py +++ b/mssql_python/cursor.py @@ -437,7 +437,13 @@ def _map_sql_type(self, param, parameters_list, i): ) # For safety: unknown/unhandled Python types should not silently go to SQL - raise TypeError("Unsupported parameter type: The driver cannot safely convert it to a SQL type.") + # raise TypeError("Unsupported parameter type: The driver cannot safely convert it to a SQL type.") + return ( + ddbc_sql_const.SQL_VARCHAR.value, + ddbc_sql_const.SQL_C_CHAR.value, + len(str(param)), + 0, + ) def _initialize_cursor(self) -> None: """ @@ -793,6 +799,7 @@ def execute( except Exception as e: log('warning', "Execute failed, resetting cursor: %s", e) self._reset_cursor() + raise # Capture any diagnostic messages (SQL_SUCCESS_WITH_INFO, etc.) From fcafb8d032feee2a978bf085643b4e44c08492f3 Mon Sep 17 00:00:00 2001 From: gargsaumya Date: Thu, 28 Aug 2025 22:04:15 +0530 Subject: [PATCH 08/12] minor --- mssql_python/cursor.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/mssql_python/cursor.py b/mssql_python/cursor.py index e41793e14..5bdeaed9f 100644 --- a/mssql_python/cursor.py +++ b/mssql_python/cursor.py @@ -437,13 +437,7 @@ def _map_sql_type(self, param, parameters_list, i): ) # For safety: unknown/unhandled Python types should not silently go to SQL - # raise TypeError("Unsupported parameter type: The driver cannot safely convert it to a SQL type.") - return ( - ddbc_sql_const.SQL_VARCHAR.value, - ddbc_sql_const.SQL_C_CHAR.value, - len(str(param)), - 0, - ) + raise TypeError("Unsupported parameter type: The driver cannot safely convert it to a SQL type.") def _initialize_cursor(self) -> None: """ From 24f83f9f3787f3e57d5bec294833e949f7ec2070 Mon Sep 17 00:00:00 2001 From: gargsaumya Date: Thu, 28 Aug 2025 22:27:37 +0530 Subject: [PATCH 09/12] minor --- mssql_python/pybind/ddbc_bindings.h | 214 ++++++++++++++++------------ 1 file changed, 122 insertions(+), 92 deletions(-) diff --git a/mssql_python/pybind/ddbc_bindings.h b/mssql_python/pybind/ddbc_bindings.h index 12b69bfd4..52b30d08e 100644 --- a/mssql_python/pybind/ddbc_bindings.h +++ b/mssql_python/pybind/ddbc_bindings.h @@ -32,108 +32,138 @@ using namespace pybind11::literals; #include #include -#if defined(__APPLE__) || defined(__linux__) -#include - -// Unicode constants for surrogate ranges and max scalar value -constexpr uint32_t UNICODE_SURROGATE_HIGH_START = 0xD800; -constexpr uint32_t UNICODE_SURROGATE_HIGH_END = 0xDBFF; -constexpr uint32_t UNICODE_SURROGATE_LOW_START = 0xDC00; -constexpr uint32_t UNICODE_SURROGATE_LOW_END = 0xDFFF; -constexpr uint32_t UNICODE_MAX_CODEPOINT = 0x10FFFF; -constexpr uint32_t UNICODE_REPLACEMENT_CHAR = 0xFFFD; - -// Validate whether a code point is a legal Unicode scalar value -// (excludes surrogate halves and values beyond U+10FFFF) -inline bool IsValidUnicodeScalar(uint32_t cp) { - return cp <= UNICODE_MAX_CODEPOINT && - !(cp >= UNICODE_SURROGATE_HIGH_START && cp <= UNICODE_SURROGATE_LOW_END); -} +// #if defined(__APPLE__) || defined(__linux__) +// #include + +// // Unicode constants for surrogate ranges and max scalar value +// constexpr uint32_t UNICODE_SURROGATE_HIGH_START = 0xD800; +// constexpr uint32_t UNICODE_SURROGATE_HIGH_END = 0xDBFF; +// constexpr uint32_t UNICODE_SURROGATE_LOW_START = 0xDC00; +// constexpr uint32_t UNICODE_SURROGATE_LOW_END = 0xDFFF; +// constexpr uint32_t UNICODE_MAX_CODEPOINT = 0x10FFFF; +// constexpr uint32_t UNICODE_REPLACEMENT_CHAR = 0xFFFD; + +// // Validate whether a code point is a legal Unicode scalar value +// // (excludes surrogate halves and values beyond U+10FFFF) +// inline bool IsValidUnicodeScalar(uint32_t cp) { +// return cp <= UNICODE_MAX_CODEPOINT && +// !(cp >= UNICODE_SURROGATE_HIGH_START && cp <= UNICODE_SURROGATE_LOW_END); +// } + +// inline std::wstring SQLWCHARToWString(const SQLWCHAR* sqlwStr, size_t length = SQL_NTS) { +// if (!sqlwStr) return std::wstring(); + +// if (length == SQL_NTS) { +// size_t i = 0; +// while (sqlwStr[i] != 0) ++i; +// length = i; +// } +// std::wstring result; +// result.reserve(length); + +// if constexpr (sizeof(SQLWCHAR) == 2) { +// // Decode UTF-16 to UTF-32 (with surrogate pair handling) +// for (size_t i = 0; i < length; ++i) { +// uint16_t wc = static_cast(sqlwStr[i]); +// // Check if this is a high surrogate (U+D800–U+DBFF) +// if (wc >= UNICODE_SURROGATE_HIGH_START && wc <= UNICODE_SURROGATE_HIGH_END && i + 1 < length) { +// uint16_t low = static_cast(sqlwStr[i + 1]); +// // Check if the next code unit is a low surrogate (U+DC00–U+DFFF) +// if (low >= UNICODE_SURROGATE_LOW_START && low <= UNICODE_SURROGATE_LOW_END) { +// // Combine surrogate pair into a single code point +// uint32_t cp = (((wc - UNICODE_SURROGATE_HIGH_START) << 10) | (low - UNICODE_SURROGATE_LOW_START)) + 0x10000; +// result.push_back(static_cast(cp)); +// ++i; // Skip the low surrogate +// continue; +// } +// } +// // If valid scalar then append, else append replacement char (U+FFFD) +// if (IsValidUnicodeScalar(wc)) { +// result.push_back(static_cast(wc)); +// } else { +// result.push_back(static_cast(UNICODE_REPLACEMENT_CHAR)); +// } +// } +// } else { +// // SQLWCHAR is UTF-32, so just copy with validation +// for (size_t i = 0; i < length; ++i) { +// uint32_t cp = static_cast(sqlwStr[i]); +// if (IsValidUnicodeScalar(cp)) { +// result.push_back(static_cast(cp)); +// } else { +// result.push_back(static_cast(UNICODE_REPLACEMENT_CHAR)); +// } +// } +// } +// return result; +// } + +// inline std::vector WStringToSQLWCHAR(const std::wstring& str) { +// std::vector result; +// result.reserve(str.size() + 2); +// if constexpr (sizeof(SQLWCHAR) == 2) { +// // Encode UTF-32 to UTF-16 +// for (wchar_t wc : str) { +// uint32_t cp = static_cast(wc); +// if (!IsValidUnicodeScalar(cp)) { +// cp = UNICODE_REPLACEMENT_CHAR; +// } +// if (cp <= 0xFFFF) { +// // Fits in a single UTF-16 code unit +// result.push_back(static_cast(cp)); +// } else { +// // Encode as surrogate pair +// cp -= 0x10000; +// SQLWCHAR high = static_cast((cp >> 10) + UNICODE_SURROGATE_HIGH_START); +// SQLWCHAR low = static_cast((cp & 0x3FF) + UNICODE_SURROGATE_LOW_START); +// result.push_back(high); +// result.push_back(low); +// } +// } +// } else { +// // Encode UTF-32 directly +// for (wchar_t wc : str) { +// uint32_t cp = static_cast(wc); +// if (IsValidUnicodeScalar(cp)) { +// result.push_back(static_cast(cp)); +// } else { +// result.push_back(static_cast(UNICODE_REPLACEMENT_CHAR)); +// } +// } +// } +// result.push_back(0); // null terminator +// return result; +// } +// #endif -inline std::wstring SQLWCHARToWString(const SQLWCHAR* sqlwStr, size_t length = SQL_NTS) { - if (!sqlwStr) return std::wstring(); +#if defined(__APPLE__) || defined(__linux__) + // macOS-specific headers + #include - if (length == SQL_NTS) { - size_t i = 0; - while (sqlwStr[i] != 0) ++i; - length = i; - } - std::wstring result; - result.reserve(length); + inline std::wstring SQLWCHARToWString(const SQLWCHAR* sqlwStr, size_t length = SQL_NTS) { + if (!sqlwStr) return std::wstring(); - if constexpr (sizeof(SQLWCHAR) == 2) { - // Decode UTF-16 to UTF-32 (with surrogate pair handling) - for (size_t i = 0; i < length; ++i) { - uint16_t wc = static_cast(sqlwStr[i]); - // Check if this is a high surrogate (U+D800–U+DBFF) - if (wc >= UNICODE_SURROGATE_HIGH_START && wc <= UNICODE_SURROGATE_HIGH_END && i + 1 < length) { - uint16_t low = static_cast(sqlwStr[i + 1]); - // Check if the next code unit is a low surrogate (U+DC00–U+DFFF) - if (low >= UNICODE_SURROGATE_LOW_START && low <= UNICODE_SURROGATE_LOW_END) { - // Combine surrogate pair into a single code point - uint32_t cp = (((wc - UNICODE_SURROGATE_HIGH_START) << 10) | (low - UNICODE_SURROGATE_LOW_START)) + 0x10000; - result.push_back(static_cast(cp)); - ++i; // Skip the low surrogate - continue; - } - } - // If valid scalar then append, else append replacement char (U+FFFD) - if (IsValidUnicodeScalar(wc)) { - result.push_back(static_cast(wc)); - } else { - result.push_back(static_cast(UNICODE_REPLACEMENT_CHAR)); - } + if (length == SQL_NTS) { + size_t i = 0; + while (sqlwStr[i] != 0) ++i; + length = i; } - } else { - // SQLWCHAR is UTF-32, so just copy with validation + + std::wstring result; + result.reserve(length); for (size_t i = 0; i < length; ++i) { - uint32_t cp = static_cast(sqlwStr[i]); - if (IsValidUnicodeScalar(cp)) { - result.push_back(static_cast(cp)); - } else { - result.push_back(static_cast(UNICODE_REPLACEMENT_CHAR)); - } + result.push_back(static_cast(sqlwStr[i])); } + return result; } - return result; -} -inline std::vector WStringToSQLWCHAR(const std::wstring& str) { - std::vector result; - result.reserve(str.size() + 2); - if constexpr (sizeof(SQLWCHAR) == 2) { - // Encode UTF-32 to UTF-16 - for (wchar_t wc : str) { - uint32_t cp = static_cast(wc); - if (!IsValidUnicodeScalar(cp)) { - cp = UNICODE_REPLACEMENT_CHAR; - } - if (cp <= 0xFFFF) { - // Fits in a single UTF-16 code unit - result.push_back(static_cast(cp)); - } else { - // Encode as surrogate pair - cp -= 0x10000; - SQLWCHAR high = static_cast((cp >> 10) + UNICODE_SURROGATE_HIGH_START); - SQLWCHAR low = static_cast((cp & 0x3FF) + UNICODE_SURROGATE_LOW_START); - result.push_back(high); - result.push_back(low); - } - } - } else { - // Encode UTF-32 directly - for (wchar_t wc : str) { - uint32_t cp = static_cast(wc); - if (IsValidUnicodeScalar(cp)) { - result.push_back(static_cast(cp)); - } else { - result.push_back(static_cast(UNICODE_REPLACEMENT_CHAR)); - } + inline std::vector WStringToSQLWCHAR(const std::wstring& str) { + std::vector result(str.size() + 1, 0); // +1 for null terminator + for (size_t i = 0; i < str.size(); ++i) { + result[i] = static_cast(str[i]); } + return result; } - result.push_back(0); // null terminator - return result; -} #endif #if defined(__APPLE__) || defined(__linux__) From 35ee0f6548d1b7017e0f98e64a67eead4bfff1a8 Mon Sep 17 00:00:00 2001 From: gargsaumya Date: Thu, 28 Aug 2025 22:43:55 +0530 Subject: [PATCH 10/12] linux-1 --- mssql_python/pybind/ddbc_bindings.h | 214 ++++++++++++---------------- 1 file changed, 92 insertions(+), 122 deletions(-) diff --git a/mssql_python/pybind/ddbc_bindings.h b/mssql_python/pybind/ddbc_bindings.h index 52b30d08e..12b69bfd4 100644 --- a/mssql_python/pybind/ddbc_bindings.h +++ b/mssql_python/pybind/ddbc_bindings.h @@ -32,138 +32,108 @@ using namespace pybind11::literals; #include #include -// #if defined(__APPLE__) || defined(__linux__) -// #include - -// // Unicode constants for surrogate ranges and max scalar value -// constexpr uint32_t UNICODE_SURROGATE_HIGH_START = 0xD800; -// constexpr uint32_t UNICODE_SURROGATE_HIGH_END = 0xDBFF; -// constexpr uint32_t UNICODE_SURROGATE_LOW_START = 0xDC00; -// constexpr uint32_t UNICODE_SURROGATE_LOW_END = 0xDFFF; -// constexpr uint32_t UNICODE_MAX_CODEPOINT = 0x10FFFF; -// constexpr uint32_t UNICODE_REPLACEMENT_CHAR = 0xFFFD; - -// // Validate whether a code point is a legal Unicode scalar value -// // (excludes surrogate halves and values beyond U+10FFFF) -// inline bool IsValidUnicodeScalar(uint32_t cp) { -// return cp <= UNICODE_MAX_CODEPOINT && -// !(cp >= UNICODE_SURROGATE_HIGH_START && cp <= UNICODE_SURROGATE_LOW_END); -// } - -// inline std::wstring SQLWCHARToWString(const SQLWCHAR* sqlwStr, size_t length = SQL_NTS) { -// if (!sqlwStr) return std::wstring(); - -// if (length == SQL_NTS) { -// size_t i = 0; -// while (sqlwStr[i] != 0) ++i; -// length = i; -// } -// std::wstring result; -// result.reserve(length); - -// if constexpr (sizeof(SQLWCHAR) == 2) { -// // Decode UTF-16 to UTF-32 (with surrogate pair handling) -// for (size_t i = 0; i < length; ++i) { -// uint16_t wc = static_cast(sqlwStr[i]); -// // Check if this is a high surrogate (U+D800–U+DBFF) -// if (wc >= UNICODE_SURROGATE_HIGH_START && wc <= UNICODE_SURROGATE_HIGH_END && i + 1 < length) { -// uint16_t low = static_cast(sqlwStr[i + 1]); -// // Check if the next code unit is a low surrogate (U+DC00–U+DFFF) -// if (low >= UNICODE_SURROGATE_LOW_START && low <= UNICODE_SURROGATE_LOW_END) { -// // Combine surrogate pair into a single code point -// uint32_t cp = (((wc - UNICODE_SURROGATE_HIGH_START) << 10) | (low - UNICODE_SURROGATE_LOW_START)) + 0x10000; -// result.push_back(static_cast(cp)); -// ++i; // Skip the low surrogate -// continue; -// } -// } -// // If valid scalar then append, else append replacement char (U+FFFD) -// if (IsValidUnicodeScalar(wc)) { -// result.push_back(static_cast(wc)); -// } else { -// result.push_back(static_cast(UNICODE_REPLACEMENT_CHAR)); -// } -// } -// } else { -// // SQLWCHAR is UTF-32, so just copy with validation -// for (size_t i = 0; i < length; ++i) { -// uint32_t cp = static_cast(sqlwStr[i]); -// if (IsValidUnicodeScalar(cp)) { -// result.push_back(static_cast(cp)); -// } else { -// result.push_back(static_cast(UNICODE_REPLACEMENT_CHAR)); -// } -// } -// } -// return result; -// } - -// inline std::vector WStringToSQLWCHAR(const std::wstring& str) { -// std::vector result; -// result.reserve(str.size() + 2); -// if constexpr (sizeof(SQLWCHAR) == 2) { -// // Encode UTF-32 to UTF-16 -// for (wchar_t wc : str) { -// uint32_t cp = static_cast(wc); -// if (!IsValidUnicodeScalar(cp)) { -// cp = UNICODE_REPLACEMENT_CHAR; -// } -// if (cp <= 0xFFFF) { -// // Fits in a single UTF-16 code unit -// result.push_back(static_cast(cp)); -// } else { -// // Encode as surrogate pair -// cp -= 0x10000; -// SQLWCHAR high = static_cast((cp >> 10) + UNICODE_SURROGATE_HIGH_START); -// SQLWCHAR low = static_cast((cp & 0x3FF) + UNICODE_SURROGATE_LOW_START); -// result.push_back(high); -// result.push_back(low); -// } -// } -// } else { -// // Encode UTF-32 directly -// for (wchar_t wc : str) { -// uint32_t cp = static_cast(wc); -// if (IsValidUnicodeScalar(cp)) { -// result.push_back(static_cast(cp)); -// } else { -// result.push_back(static_cast(UNICODE_REPLACEMENT_CHAR)); -// } -// } -// } -// result.push_back(0); // null terminator -// return result; -// } -// #endif - #if defined(__APPLE__) || defined(__linux__) - // macOS-specific headers - #include +#include + +// Unicode constants for surrogate ranges and max scalar value +constexpr uint32_t UNICODE_SURROGATE_HIGH_START = 0xD800; +constexpr uint32_t UNICODE_SURROGATE_HIGH_END = 0xDBFF; +constexpr uint32_t UNICODE_SURROGATE_LOW_START = 0xDC00; +constexpr uint32_t UNICODE_SURROGATE_LOW_END = 0xDFFF; +constexpr uint32_t UNICODE_MAX_CODEPOINT = 0x10FFFF; +constexpr uint32_t UNICODE_REPLACEMENT_CHAR = 0xFFFD; + +// Validate whether a code point is a legal Unicode scalar value +// (excludes surrogate halves and values beyond U+10FFFF) +inline bool IsValidUnicodeScalar(uint32_t cp) { + return cp <= UNICODE_MAX_CODEPOINT && + !(cp >= UNICODE_SURROGATE_HIGH_START && cp <= UNICODE_SURROGATE_LOW_END); +} - inline std::wstring SQLWCHARToWString(const SQLWCHAR* sqlwStr, size_t length = SQL_NTS) { - if (!sqlwStr) return std::wstring(); +inline std::wstring SQLWCHARToWString(const SQLWCHAR* sqlwStr, size_t length = SQL_NTS) { + if (!sqlwStr) return std::wstring(); - if (length == SQL_NTS) { - size_t i = 0; - while (sqlwStr[i] != 0) ++i; - length = i; - } + if (length == SQL_NTS) { + size_t i = 0; + while (sqlwStr[i] != 0) ++i; + length = i; + } + std::wstring result; + result.reserve(length); - std::wstring result; - result.reserve(length); + if constexpr (sizeof(SQLWCHAR) == 2) { + // Decode UTF-16 to UTF-32 (with surrogate pair handling) + for (size_t i = 0; i < length; ++i) { + uint16_t wc = static_cast(sqlwStr[i]); + // Check if this is a high surrogate (U+D800–U+DBFF) + if (wc >= UNICODE_SURROGATE_HIGH_START && wc <= UNICODE_SURROGATE_HIGH_END && i + 1 < length) { + uint16_t low = static_cast(sqlwStr[i + 1]); + // Check if the next code unit is a low surrogate (U+DC00–U+DFFF) + if (low >= UNICODE_SURROGATE_LOW_START && low <= UNICODE_SURROGATE_LOW_END) { + // Combine surrogate pair into a single code point + uint32_t cp = (((wc - UNICODE_SURROGATE_HIGH_START) << 10) | (low - UNICODE_SURROGATE_LOW_START)) + 0x10000; + result.push_back(static_cast(cp)); + ++i; // Skip the low surrogate + continue; + } + } + // If valid scalar then append, else append replacement char (U+FFFD) + if (IsValidUnicodeScalar(wc)) { + result.push_back(static_cast(wc)); + } else { + result.push_back(static_cast(UNICODE_REPLACEMENT_CHAR)); + } + } + } else { + // SQLWCHAR is UTF-32, so just copy with validation for (size_t i = 0; i < length; ++i) { - result.push_back(static_cast(sqlwStr[i])); + uint32_t cp = static_cast(sqlwStr[i]); + if (IsValidUnicodeScalar(cp)) { + result.push_back(static_cast(cp)); + } else { + result.push_back(static_cast(UNICODE_REPLACEMENT_CHAR)); + } } - return result; } + return result; +} - inline std::vector WStringToSQLWCHAR(const std::wstring& str) { - std::vector result(str.size() + 1, 0); // +1 for null terminator - for (size_t i = 0; i < str.size(); ++i) { - result[i] = static_cast(str[i]); +inline std::vector WStringToSQLWCHAR(const std::wstring& str) { + std::vector result; + result.reserve(str.size() + 2); + if constexpr (sizeof(SQLWCHAR) == 2) { + // Encode UTF-32 to UTF-16 + for (wchar_t wc : str) { + uint32_t cp = static_cast(wc); + if (!IsValidUnicodeScalar(cp)) { + cp = UNICODE_REPLACEMENT_CHAR; + } + if (cp <= 0xFFFF) { + // Fits in a single UTF-16 code unit + result.push_back(static_cast(cp)); + } else { + // Encode as surrogate pair + cp -= 0x10000; + SQLWCHAR high = static_cast((cp >> 10) + UNICODE_SURROGATE_HIGH_START); + SQLWCHAR low = static_cast((cp & 0x3FF) + UNICODE_SURROGATE_LOW_START); + result.push_back(high); + result.push_back(low); + } + } + } else { + // Encode UTF-32 directly + for (wchar_t wc : str) { + uint32_t cp = static_cast(wc); + if (IsValidUnicodeScalar(cp)) { + result.push_back(static_cast(cp)); + } else { + result.push_back(static_cast(UNICODE_REPLACEMENT_CHAR)); + } } - return result; } + result.push_back(0); // null terminator + return result; +} #endif #if defined(__APPLE__) || defined(__linux__) From f4f3862bf32c207901b1f8d923b82d30fb07cd9a Mon Sep 17 00:00:00 2001 From: gargsaumya Date: Thu, 28 Aug 2025 23:08:58 +0530 Subject: [PATCH 11/12] linux-2 --- mssql_python/pybind/ddbc_bindings.cpp | 40 +++++++++++++++++++-------- mssql_python/pybind/ddbc_bindings.h | 7 +++++ 2 files changed, 36 insertions(+), 11 deletions(-) diff --git a/mssql_python/pybind/ddbc_bindings.cpp b/mssql_python/pybind/ddbc_bindings.cpp index b850fcc3c..6c6343403 100644 --- a/mssql_python/pybind/ddbc_bindings.cpp +++ b/mssql_python/pybind/ddbc_bindings.cpp @@ -258,24 +258,42 @@ SQLRETURN BindParameters(SQLHANDLE hStmt, const py::list& params, bufferLength = 0; } else { // Normal small-string case + // std::wstring* strParam = + // AllocateParamBuffer(paramBuffers, param.cast()); + // LOG("SQL_C_WCHAR Parameter[{}]: Length={}, isDAE={}", paramIndex, strParam->size(), paramInfo.isDAE); + // #if defined(__APPLE__) || defined(__linux__) + // std::vector* sqlwcharBuffer = + // AllocateParamBuffer>(paramBuffers); + // sqlwcharBuffer->resize(strParam->size() + 1, 0); + // for (size_t i = 0; i < strParam->size(); i++) { + // (*sqlwcharBuffer)[i] = static_cast((*strParam)[i]); + // } + // dataPtr = sqlwcharBuffer->data(); + // bufferLength = (strParam->size() + 1) * sizeof(SQLWCHAR); + // #else + // dataPtr = const_cast(static_cast(strParam->c_str())); + // bufferLength = (strParam->size() + 1) * sizeof(wchar_t); + // #endif + // strLenOrIndPtr = AllocateParamBuffer(paramBuffers); + // *strLenOrIndPtr = SQL_NTS; + // Normal small-string case std::wstring* strParam = AllocateParamBuffer(paramBuffers, param.cast()); + LOG("SQL_C_WCHAR Parameter[{}]: Length={}, isDAE={}", paramIndex, strParam->size(), paramInfo.isDAE); - #if defined(__APPLE__) || defined(__linux__) + + // Always transcode wstring -> UTF-16 (SQLWCHAR) using the helper std::vector* sqlwcharBuffer = - AllocateParamBuffer>(paramBuffers); - sqlwcharBuffer->resize(strParam->size() + 1, 0); - for (size_t i = 0; i < strParam->size(); i++) { - (*sqlwcharBuffer)[i] = static_cast((*strParam)[i]); - } + AllocateParamBuffer>(paramBuffers, WStringToSQLWCHAR(*strParam)); + dataPtr = sqlwcharBuffer->data(); - bufferLength = (strParam->size() + 1) * sizeof(SQLWCHAR); - #else - dataPtr = const_cast(static_cast(strParam->c_str())); - bufferLength = (strParam->size() + 1) * sizeof(wchar_t); - #endif + // IMPORTANT: bufferLength must reflect the *encoded* length, including null + bufferLength = sqlwcharBuffer->size() * sizeof(SQLWCHAR); + strLenOrIndPtr = AllocateParamBuffer(paramBuffers); + // For SQL_C_WCHAR with a null-terminated buffer, SQL_NTS is correct *strLenOrIndPtr = SQL_NTS; + } break; } diff --git a/mssql_python/pybind/ddbc_bindings.h b/mssql_python/pybind/ddbc_bindings.h index 12b69bfd4..e1e624ee9 100644 --- a/mssql_python/pybind/ddbc_bindings.h +++ b/mssql_python/pybind/ddbc_bindings.h @@ -31,6 +31,13 @@ using namespace pybind11::literals; #include #include +#if defined(_WIN32) +inline std::vector WStringToSQLWCHAR(const std::wstring& str) { + std::vector result(str.begin(), str.end()); + result.push_back(0); + return result; +} +#endif #if defined(__APPLE__) || defined(__linux__) #include From e1f31560392e3a5f73494d482c751846cca38c32 Mon Sep 17 00:00:00 2001 From: gargsaumya Date: Thu, 28 Aug 2025 23:17:54 +0530 Subject: [PATCH 12/12] linux-3 --- mssql_python/pybind/ddbc_bindings.cpp | 26 -------------------------- mssql_python/pybind/ddbc_bindings.h | 1 + 2 files changed, 1 insertion(+), 26 deletions(-) diff --git a/mssql_python/pybind/ddbc_bindings.cpp b/mssql_python/pybind/ddbc_bindings.cpp index 6c6343403..8a88688ab 100644 --- a/mssql_python/pybind/ddbc_bindings.cpp +++ b/mssql_python/pybind/ddbc_bindings.cpp @@ -258,40 +258,14 @@ SQLRETURN BindParameters(SQLHANDLE hStmt, const py::list& params, bufferLength = 0; } else { // Normal small-string case - // std::wstring* strParam = - // AllocateParamBuffer(paramBuffers, param.cast()); - // LOG("SQL_C_WCHAR Parameter[{}]: Length={}, isDAE={}", paramIndex, strParam->size(), paramInfo.isDAE); - // #if defined(__APPLE__) || defined(__linux__) - // std::vector* sqlwcharBuffer = - // AllocateParamBuffer>(paramBuffers); - // sqlwcharBuffer->resize(strParam->size() + 1, 0); - // for (size_t i = 0; i < strParam->size(); i++) { - // (*sqlwcharBuffer)[i] = static_cast((*strParam)[i]); - // } - // dataPtr = sqlwcharBuffer->data(); - // bufferLength = (strParam->size() + 1) * sizeof(SQLWCHAR); - // #else - // dataPtr = const_cast(static_cast(strParam->c_str())); - // bufferLength = (strParam->size() + 1) * sizeof(wchar_t); - // #endif - // strLenOrIndPtr = AllocateParamBuffer(paramBuffers); - // *strLenOrIndPtr = SQL_NTS; - // Normal small-string case std::wstring* strParam = AllocateParamBuffer(paramBuffers, param.cast()); - LOG("SQL_C_WCHAR Parameter[{}]: Length={}, isDAE={}", paramIndex, strParam->size(), paramInfo.isDAE); - - // Always transcode wstring -> UTF-16 (SQLWCHAR) using the helper std::vector* sqlwcharBuffer = AllocateParamBuffer>(paramBuffers, WStringToSQLWCHAR(*strParam)); - dataPtr = sqlwcharBuffer->data(); - // IMPORTANT: bufferLength must reflect the *encoded* length, including null bufferLength = sqlwcharBuffer->size() * sizeof(SQLWCHAR); - strLenOrIndPtr = AllocateParamBuffer(paramBuffers); - // For SQL_C_WCHAR with a null-terminated buffer, SQL_NTS is correct *strLenOrIndPtr = SQL_NTS; } diff --git a/mssql_python/pybind/ddbc_bindings.h b/mssql_python/pybind/ddbc_bindings.h index e1e624ee9..2ae134596 100644 --- a/mssql_python/pybind/ddbc_bindings.h +++ b/mssql_python/pybind/ddbc_bindings.h @@ -31,6 +31,7 @@ using namespace pybind11::literals; #include #include + #if defined(_WIN32) inline std::vector WStringToSQLWCHAR(const std::wstring& str) { std::vector result(str.begin(), str.end());