From 0b3ce13787464fed14c6dd2c26e3d42c93967b80 Mon Sep 17 00:00:00 2001 From: Bhargav Date: Mon, 27 May 2024 17:27:48 +0530 Subject: [PATCH 1/4] Update readme file --- README.md | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 1799174..ecee8af 100644 --- a/README.md +++ b/README.md @@ -8,15 +8,11 @@ ## Table of Contents 1. [What is Csv2sql ?](#what) 2. [Why Csv2sql ?](#why) -3. [Using from Command Line](#cmd) - 1. [Installation and usage](#cmdinstall) - 2. [Available command line arguments](#cmdargs) - 3. [Examples of usage](#cmdexamples) -4. [Using the browser based interface](#dashboard) +3. [Using the browser based interface](#dashboard) 1. [Installation and usage](#dashboardinstall) -5. [Running from source](#sourceinstall) -6. [Supported data types](#support) -7. [Handling custom date/datetime formats](#datetime) +4. [Running from source](#sourceinstall) +5. [Supported data types](#support) +6. [Handling custom date/datetime formats](#datetime) 7. [Known issues, caveats and troubleshooting](#issues) 8. [Future plans](#future) @@ -165,20 +161,13 @@ For datetime ## Known issues, caveats and troubleshooting: -* Sometimes the app might fail when run for the first time with some error like.. - -``` -%MyXQL.Error{connection_id: 9, message: "(1067) (ER_INVALID_DEFAULT) Invalid default value... -``` -In this case, please try running the app again. - * Timestamp columns will lose there fractional seconds data or time zone information when importing to mysql. -* When importing into a postgres database you must create the database manually before running the application, otherwise it will fail. +* When importing into a mysql/postgres database you must create the database manually before running the application, otherwise it will fail. * Csvsql uses the csv file names as table names, make sure that the csv file names are valid table names. -* Make sure your csvs have correct encoding and valid column names to avoid errors.(like a csv having duplicated column names will lead to errors when inserting in to the database). +* Make sure your csvs have correct encoding and valid column names to avoid errors. * If you face database connection timeout errors try reducing the worker and db_worker count in the configurations or change the database timeout, pool size and other related database configurations. From b62a14fa94a1ec21c324f452b9c2a7fe869635e6 Mon Sep 17 00:00:00 2001 From: Bhargav Date: Mon, 27 May 2024 18:34:01 +0530 Subject: [PATCH 2/4] calculate file stats only if required --- apps/csv2sql/lib/csv2sql/stages/analyze.ex | 26 +++++++++++++--------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/apps/csv2sql/lib/csv2sql/stages/analyze.ex b/apps/csv2sql/lib/csv2sql/stages/analyze.ex index 8591e98..2bdfff3 100644 --- a/apps/csv2sql/lib/csv2sql/stages/analyze.ex +++ b/apps/csv2sql/lib/csv2sql/stages/analyze.ex @@ -146,18 +146,22 @@ defmodule Csv2sql.Stages.Analyze do end defp get_file_stats(%Csv2sql.File{path: path} = file) do - %{size: size} = File.stat!(path) - {row_count, column_types} = TypeDeducer.get_count_and_types(path) - - db_row_count = Database.get_db_row_count_if_exists(path) - - %{ + if Helpers.get_config(:insert_schema) or Helpers.get_config(:insert_data) do + %{size: size} = File.stat!(path) + {row_count, column_types} = TypeDeducer.get_count_and_types(path) + + db_row_count = Database.get_db_row_count_if_exists(path) + + %{ + file + | size: Sizeable.filesize(size), + row_count: row_count, + column_types: column_types, + existing_db_row_count: db_row_count + } + else file - | size: Sizeable.filesize(size), - row_count: row_count, - column_types: column_types, - existing_db_row_count: db_row_count - } + end end defp is_csv?(filepath) do From 28d982b98f1d01450648ebba6234b0f7fd495bd3 Mon Sep 17 00:00:00 2001 From: Bhargav Date: Mon, 27 May 2024 21:19:33 +0530 Subject: [PATCH 3/4] update readme and make varchar size as one for empty string --- README.md | 2 +- apps/csv2sql/lib/csv2sql/database/mysql.ex | 2 +- apps/csv2sql/lib/csv2sql/database/postgres.ex | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index ecee8af..cf5d225 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ Csv2Sql can automatically... * While you can have maximum utilization of your cpu to get excellent performance, csv2sql is fully **customizable**, also comes with [lots of options](#cmdargs) which can be changed to fine tune the application based on requirement and to lower down resource usage and database load. -* Csv2Sql supports **partial operations**, so if you only want to generate a schema file from the csvs without touching the database or you want to only insert data from the csvs into already created tables without creating the tables again or just validate already imported data, Csv2Sql has got you covered ! +* Csv2Sql supports **partial operations**, so if you want to only create the tables or insert data from the csvs into already created tables without creating the tables again or create both the tables and also insert the data from csvs, Csv2Sql has got you covered ! ## Use csv2sql from your browser diff --git a/apps/csv2sql/lib/csv2sql/database/mysql.ex b/apps/csv2sql/lib/csv2sql/database/mysql.ex index 1f7bb12..c808b28 100644 --- a/apps/csv2sql/lib/csv2sql/database/mysql.ex +++ b/apps/csv2sql/lib/csv2sql/database/mysql.ex @@ -11,7 +11,7 @@ defmodule Csv2sql.Database.MySql do @spec type_mapping(type_map()) :: String.t() def type_mapping(type_map) do cond do - type_map[:is_empty] -> "VARCHAR(#{varchar_limit()})" + type_map[:is_empty] -> "VARCHAR(1)" type_map[:is_date] -> "DATE" type_map[:is_datetime] -> "DATETIME" type_map[:is_boolean] -> "BIT" diff --git a/apps/csv2sql/lib/csv2sql/database/postgres.ex b/apps/csv2sql/lib/csv2sql/database/postgres.ex index e173a9e..8802336 100644 --- a/apps/csv2sql/lib/csv2sql/database/postgres.ex +++ b/apps/csv2sql/lib/csv2sql/database/postgres.ex @@ -11,7 +11,7 @@ defmodule Csv2sql.Database.Postgres do @spec type_mapping(type_map()) :: String.t() def type_mapping(type_map) do cond do - type_map[:is_empty] -> "VARCHAR(#{varchar_limit()})" + type_map[:is_empty] -> "VARCHAR(1)" type_map[:is_date] -> "DATE" type_map[:is_datetime] -> "TIMESTAMP" type_map[:is_boolean] -> "BOOLEAN" From f4ba2e11784fad6b5d88022fad42ef7ab19df971 Mon Sep 17 00:00:00 2001 From: Bhargav Date: Mon, 27 May 2024 21:54:35 +0530 Subject: [PATCH 4/4] consider max_data_length of a column for deciding string datatype --- apps/csv2sql/lib/csv2sql/database/mysql.ex | 4 ++-- apps/csv2sql/lib/csv2sql/type_deducer/type_checker.ex | 3 ++- apps/csv2sql/lib/csv2sql/type_deducer/type_deducer.ex | 3 ++- apps/csv2sql/test/type_deducer/type_deducer_test.exs | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/apps/csv2sql/lib/csv2sql/database/mysql.ex b/apps/csv2sql/lib/csv2sql/database/mysql.ex index c808b28..9415b7f 100644 --- a/apps/csv2sql/lib/csv2sql/database/mysql.ex +++ b/apps/csv2sql/lib/csv2sql/database/mysql.ex @@ -17,7 +17,8 @@ defmodule Csv2sql.Database.MySql do type_map[:is_boolean] -> "BIT" type_map[:is_integer] -> "INT" type_map[:is_float] -> "DOUBLE" - type_map[:is_text] -> "LONGTEXT" + type_map[:is_text] and type_map[:max_data_length] > 65_535 -> "LONGTEXT" + type_map[:is_text] -> "TEXT" true -> "VARCHAR(#{varchar_limit()})" end end @@ -63,5 +64,4 @@ defmodule Csv2sql.Database.MySql do do: datetime |> DateTime.to_string() |> String.trim_trailing("Z") defp to_datetime_string(val), do: val - end diff --git a/apps/csv2sql/lib/csv2sql/type_deducer/type_checker.ex b/apps/csv2sql/lib/csv2sql/type_deducer/type_checker.ex index a42ea8b..23ecd71 100644 --- a/apps/csv2sql/lib/csv2sql/type_deducer/type_checker.ex +++ b/apps/csv2sql/lib/csv2sql/type_deducer/type_checker.ex @@ -22,7 +22,8 @@ defmodule Csv2sql.TypeDeducer.TypeChecker do is_boolean: existing_type_map.is_boolean && is_boolean?(item), is_integer: existing_type_map.is_integer && is_integer?(item), is_float: existing_type_map.is_float && is_float?(item), - is_text: existing_type_map.is_text || is_text?(item_length) + is_text: existing_type_map.is_text || is_text?(item_length), + max_data_length: max(existing_type_map.max_data_length, item_length) } end end diff --git a/apps/csv2sql/lib/csv2sql/type_deducer/type_deducer.ex b/apps/csv2sql/lib/csv2sql/type_deducer/type_deducer.ex index 714c4e4..193ba25 100644 --- a/apps/csv2sql/lib/csv2sql/type_deducer/type_deducer.ex +++ b/apps/csv2sql/lib/csv2sql/type_deducer/type_deducer.ex @@ -96,7 +96,8 @@ defmodule Csv2sql.TypeDeducer do is_boolean: true, is_integer: true, is_float: true, - is_text: false + is_text: false, + max_data_length: 0 } |> List.duplicate(Enum.count(headers)) end diff --git a/apps/csv2sql/test/type_deducer/type_deducer_test.exs b/apps/csv2sql/test/type_deducer/type_deducer_test.exs index 627b310..23b3696 100644 --- a/apps/csv2sql/test/type_deducer/type_deducer_test.exs +++ b/apps/csv2sql/test/type_deducer/type_deducer_test.exs @@ -22,7 +22,7 @@ defmodule Csv2sql.TypeDeducerTest do col_type_defs = [ {"id", "INT"}, - {"name", "VARCHAR(10)"}, + {"name", "VARCHAR(120)"}, {"description", if(db_type == :mysql, do: "LONGTEXT", else: "TEXT")}, {"salary", if(db_type == :mysql, do: "DOUBLE", else: "NUMERIC(1000, 100)")}, {"permanent", if(db_type == :mysql, do: "BIT", else: "BOOLEAN")},