From 8e34fa731f4c163cfb2da1745b46db19107f72c7 Mon Sep 17 00:00:00 2001 From: "github@rplusplus.com" Date: Fri, 18 Sep 2020 18:39:41 +0200 Subject: [PATCH 1/4] Basic support of unicode paths for windows --- src/readstat_io_unistd.c | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/src/readstat_io_unistd.c b/src/readstat_io_unistd.c index dd19dbc6..249a19c5 100644 --- a/src/readstat_io_unistd.c +++ b/src/readstat_io_unistd.c @@ -1,9 +1,13 @@ #include #include +#include #if !defined(_MSC_VER) # include #endif +#if defined _WIN32 +# include +#endif #include "readstat.h" #include "readstat_io_unistd.h" @@ -24,8 +28,34 @@ #endif +int open_with_unicode(const char *path, int options) +{ +#if defined _WIN32 + const int buffer_size = MultiByteToWideChar(CP_UTF8, MB_PRECOMPOSED, path, -1, NULL, 0); + + if(buffer_size <= 0) + return -1; + + wchar_t* wpath = malloc(buffer_size); + const int res = MultiByteToWideChar(CP_UTF8, MB_PRECOMPOSED, path, -1, wpath, buffer_size); + + if(res <= 0) + { + free(wpath); + return -1; + } + + int fd = _wopen(wpath, options); + + free(wpath); + return fd; +#else + return open(path, options); +#endif +} + int unistd_open_handler(const char *path, void *io_ctx) { - int fd = open(path, UNISTD_OPEN_OPTIONS); + int fd = open_with_unicode(path, UNISTD_OPEN_OPTIONS); ((unistd_io_ctx_t*) io_ctx)->fd = fd; return fd; } From 3c26f258bfd573fafcbef98ba68e63487a5ddb93 Mon Sep 17 00:00:00 2001 From: "github@rplusplus.com" Date: Mon, 21 Sep 2020 11:44:05 +0200 Subject: [PATCH 2/4] More advanced support of unicode paths for windows --- Makefile.am | 1 + VS17/ReadStat_App/ReadStat_App.vcxproj | 1 + .../ReadStat_App/ReadStat_App.vcxproj.filters | 3 + src/bin/extract_metadata.c | 3 +- src/bin/readstat.c | 3 +- src/bin/util/main.h | 55 +++++++++++++++++++ src/readstat_io_unistd.c | 18 ++++-- 7 files changed, 76 insertions(+), 8 deletions(-) create mode 100644 src/bin/util/main.h diff --git a/Makefile.am b/Makefile.am index e04fa400..9563e12a 100644 --- a/Makefile.am +++ b/Makefile.am @@ -128,6 +128,7 @@ noinst_HEADERS = \ src/bin/write/mod_xlsx.h \ src/bin/write/module.h \ src/bin/write/module_util.h \ + src/bin/util/main.h \ src/bin/util/file_format.h \ src/bin/util/quote_and_escape.h \ src/bin/util/readstat_dta_days.h \ diff --git a/VS17/ReadStat_App/ReadStat_App.vcxproj b/VS17/ReadStat_App/ReadStat_App.vcxproj index 775a1606..b89f422f 100644 --- a/VS17/ReadStat_App/ReadStat_App.vcxproj +++ b/VS17/ReadStat_App/ReadStat_App.vcxproj @@ -57,6 +57,7 @@ + diff --git a/VS17/ReadStat_App/ReadStat_App.vcxproj.filters b/VS17/ReadStat_App/ReadStat_App.vcxproj.filters index 15473541..155fe62f 100644 --- a/VS17/ReadStat_App/ReadStat_App.vcxproj.filters +++ b/VS17/ReadStat_App/ReadStat_App.vcxproj.filters @@ -131,6 +131,9 @@ Header Files + + Header Files + Header Files diff --git a/src/bin/extract_metadata.c b/src/bin/extract_metadata.c index 71c2ac06..a1bf4fae 100644 --- a/src/bin/extract_metadata.c +++ b/src/bin/extract_metadata.c @@ -10,6 +10,7 @@ #include "util/readstat_dta_days.h" #include "util/quote_and_escape.h" #include "util/file_format.h" +#include "util/main.h" #include "extract_metadata.h" #include "write/json/write_missing_values.h" #include "write/json/write_value_labels.h" @@ -304,7 +305,7 @@ cleanup: readstat_parser_free(parser); return ret; } -int main(int argc, char *argv[]) { +int portable_main(int argc, char *argv[]) { if (argc != 3) { printf("Usage: %s \n", argv[0]); return 1; diff --git a/src/bin/readstat.c b/src/bin/readstat.c index b9fece0f..41e1321c 100644 --- a/src/bin/readstat.c +++ b/src/bin/readstat.c @@ -46,6 +46,7 @@ int gettimeofday(struct timeval* t, void* timezone) #endif #include "util/file_format.h" +#include "util/main.h" #if defined _MSC_VER #define unlink _unlink @@ -482,7 +483,7 @@ static int dump_file(const char *input_filename) { return 0; } -int main(int argc, char** argv) { +int portable_main(int argc, char** argv) { char *input_filename = NULL; char *catalog_filename = NULL; char *output_filename = NULL; diff --git a/src/bin/util/main.h b/src/bin/util/main.h new file mode 100644 index 00000000..3a794d1e --- /dev/null +++ b/src/bin/util/main.h @@ -0,0 +1,55 @@ +#include +#include + + +// True main for all platforms +int portable_main(int argc, char *argv[]); + + +#if defined _WIN32 + // Standard way of decoding wide-string command-line arguments one Windows. + // Call portable_main with UTF-8 strings. + int wmain(int argc, wchar_t *argv[]) { + int ret = 1; + char** utf8_argv = calloc(argc, sizeof(char*)); + + for (int i=0; i Date: Mon, 21 Sep 2020 15:50:55 +0200 Subject: [PATCH 3/4] Try of proper unicode encoding/decoding on windows --- src/bin/util/main.h | 8 ++++---- src/readstat_io_unistd.c | 18 ++++++------------ 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/src/bin/util/main.h b/src/bin/util/main.h index 3a794d1e..32c3f9c1 100644 --- a/src/bin/util/main.h +++ b/src/bin/util/main.h @@ -14,17 +14,17 @@ int portable_main(int argc, char *argv[]); char** utf8_argv = calloc(argc, sizeof(char*)); for (int i=0; i Date: Mon, 21 Sep 2020 16:39:21 +0200 Subject: [PATCH 4/4] Fix MinGW build/issues --- src/bin/util/main.h | 31 +++++++++++++++++++++++-------- src/readstat_io_unistd.c | 3 ++- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/src/bin/util/main.h b/src/bin/util/main.h index 32c3f9c1..04143718 100644 --- a/src/bin/util/main.h +++ b/src/bin/util/main.h @@ -7,14 +7,27 @@ int portable_main(int argc, char *argv[]); #if defined _WIN32 - // Standard way of decoding wide-string command-line arguments one Windows. +#include + // Standard way of decoding wide-string command-line arguments on Windows. // Call portable_main with UTF-8 strings. - int wmain(int argc, wchar_t *argv[]) { + int main(int unused_argc, char *unused_argv[]) { + int argc; int ret = 1; - char** utf8_argv = calloc(argc, sizeof(char*)); + wchar_t** utf16_argv = NULL; + char** utf8_argv = NULL; + + // Manual standard argument decoding needed since wmain is not supported by MinGW by default. + utf16_argv = CommandLineToArgvW(GetCommandLineW(), &argc); + + if(utf16_argv == NULL) { + fprintf(stderr, "Fatal error: command line argument extraction failure\n"); + goto cleanup; + } + + utf8_argv = calloc(argc, sizeof(char*)); for (int i=0; i