kreeti · sbbhargav · May 28, 2024 · May 27, 2024 · May 27, 2024 · May 27, 2024
diff --git a/README.md b/README.md
@@ -8,15 +8,11 @@
 ## Table of Contents
 1. [What is Csv2sql ?](#what)
 2. [Why Csv2sql ?](#why)
-3. [Using from Command Line](#cmd)
-	1. [Installation and usage](#cmdinstall)
-	2. [Available command line arguments](#cmdargs)
-	3. [Examples of usage](#cmdexamples)
-4. [Using the browser based interface](#dashboard)
+3. [Using the browser based interface](#dashboard)
 	1. [Installation and usage](#dashboardinstall)
-5. [Running from source](#sourceinstall)
-6. [Supported data types](#support)
-7. [Handling custom date/datetime formats](#datetime)
+4. [Running from source](#sourceinstall)
+5. [Supported data types](#support)
+6. [Handling custom date/datetime formats](#datetime)
 7. [Known issues, caveats and troubleshooting](#issues)
 8. [Future plans](#future)
 
@@ -46,7 +42,7 @@ Csv2Sql can automatically...
 
 * While you can have maximum utilization of your cpu to get excellent performance, csv2sql is fully **customizable**, also comes with [lots of options](#cmdargs) which can be changed to fine tune the application based on requirement and to lower down resource usage and database load.
 
-* Csv2Sql supports **partial operations**, so if you only want to generate a schema file from the csvs without touching the database or you want to only insert data from the csvs into already created tables without creating the tables again or just validate already imported data, Csv2Sql has got you covered !
+* Csv2Sql supports **partial operations**, so if you want to only create the tables or insert data from the csvs into already created tables without creating the tables again or create both the tables and also insert the data from csvs, Csv2Sql has got you covered !
 
 <a name="dashboard"></a>
 ## Use csv2sql from your browser
@@ -165,20 +161,13 @@ For datetime
 <a name="issues"></a>
 ## Known issues, caveats and troubleshooting:
 
-* Sometimes the app might fail when run for the first time with some error like..
-
-```
-%MyXQL.Error{connection_id: 9, message: "(1067) (ER_INVALID_DEFAULT) Invalid default value...
-```
-In this case, please try running the app again.
-
 * Timestamp columns will lose there fractional seconds data or time zone information when importing to mysql.
 
-* When importing into a postgres database you must create the database manually before running the application, otherwise it will fail.
+* When importing into a mysql/postgres database you must create the database manually before running the application, otherwise it will fail.
 
 * Csvsql uses the csv file names as table names, make sure that the csv file names are valid table names.
 
-* Make sure your csvs have correct encoding and valid column names to avoid errors.(like a csv having duplicated column names will lead to errors when inserting in to the database).
+* Make sure your csvs have correct encoding and valid column names to avoid errors.
 
 * If you face database connection timeout errors try reducing the worker and db_worker count in the configurations or change the database timeout, pool size and other related database configurations.
 

diff --git a/apps/csv2sql/lib/csv2sql/database/mysql.ex b/apps/csv2sql/lib/csv2sql/database/mysql.ex
@@ -11,13 +11,14 @@ defmodule Csv2sql.Database.MySql do
   @spec type_mapping(type_map()) :: String.t()
   def type_mapping(type_map) do
     cond do
-      type_map[:is_empty] -> "VARCHAR(#{varchar_limit()})"
+      type_map[:is_empty] -> "VARCHAR(1)"
       type_map[:is_date] -> "DATE"
       type_map[:is_datetime] -> "DATETIME"
       type_map[:is_boolean] -> "BIT"
       type_map[:is_integer] -> "INT"
       type_map[:is_float] -> "DOUBLE"
-      type_map[:is_text] -> "LONGTEXT"
+      type_map[:is_text] and type_map[:max_data_length] > 65_535 -> "LONGTEXT"
+      type_map[:is_text] -> "TEXT"
       true -> "VARCHAR(#{varchar_limit()})"
     end
   end
@@ -63,5 +64,4 @@ defmodule Csv2sql.Database.MySql do
     do: datetime |> DateTime.to_string() |> String.trim_trailing("Z")
 
   defp to_datetime_string(val), do: val
-
 end
diff --git a/apps/csv2sql/lib/csv2sql/database/postgres.ex b/apps/csv2sql/lib/csv2sql/database/postgres.ex
@@ -11,7 +11,7 @@ defmodule Csv2sql.Database.Postgres do
   @spec type_mapping(type_map()) :: String.t()
   def type_mapping(type_map) do
     cond do
-      type_map[:is_empty] -> "VARCHAR(#{varchar_limit()})"
+      type_map[:is_empty] -> "VARCHAR(1)"
       type_map[:is_date] -> "DATE"
       type_map[:is_datetime] -> "TIMESTAMP"
       type_map[:is_boolean] -> "BOOLEAN"

diff --git a/apps/csv2sql/lib/csv2sql/stages/analyze.ex b/apps/csv2sql/lib/csv2sql/stages/analyze.ex
@@ -146,18 +146,22 @@ defmodule Csv2sql.Stages.Analyze do
   end
 
   defp get_file_stats(%Csv2sql.File{path: path} = file) do
-    %{size: size} = File.stat!(path)
-    {row_count, column_types} = TypeDeducer.get_count_and_types(path)
-
-    db_row_count = Database.get_db_row_count_if_exists(path)
-
-    %{
+    if Helpers.get_config(:insert_schema) or Helpers.get_config(:insert_data) do
+      %{size: size} = File.stat!(path)
+      {row_count, column_types} = TypeDeducer.get_count_and_types(path)
+
+      db_row_count = Database.get_db_row_count_if_exists(path)
+
+      %{
+        file
+        | size: Sizeable.filesize(size),
+          row_count: row_count,
+          column_types: column_types,
+          existing_db_row_count: db_row_count
+      }
+    else
       file
-      | size: Sizeable.filesize(size),
-        row_count: row_count,
-        column_types: column_types,
-        existing_db_row_count: db_row_count
-    }
+    end
   end
 
   defp is_csv?(filepath) do

diff --git a/apps/csv2sql/lib/csv2sql/type_deducer/type_checker.ex b/apps/csv2sql/lib/csv2sql/type_deducer/type_checker.ex
@@ -22,7 +22,8 @@ defmodule Csv2sql.TypeDeducer.TypeChecker do
         is_boolean: existing_type_map.is_boolean && is_boolean?(item),
         is_integer: existing_type_map.is_integer && is_integer?(item),
         is_float: existing_type_map.is_float && is_float?(item),
-        is_text: existing_type_map.is_text || is_text?(item_length)
+        is_text: existing_type_map.is_text || is_text?(item_length),
+        max_data_length: max(existing_type_map.max_data_length, item_length)
       }
     end
   end

diff --git a/apps/csv2sql/lib/csv2sql/type_deducer/type_deducer.ex b/apps/csv2sql/lib/csv2sql/type_deducer/type_deducer.ex
@@ -96,7 +96,8 @@ defmodule Csv2sql.TypeDeducer do
       is_boolean: true,
       is_integer: true,
       is_float: true,
-      is_text: false
+      is_text: false,
+      max_data_length: 0
     }
     |> List.duplicate(Enum.count(headers))
   end

diff --git a/apps/csv2sql/test/type_deducer/type_deducer_test.exs b/apps/csv2sql/test/type_deducer/type_deducer_test.exs
@@ -22,7 +22,7 @@ defmodule Csv2sql.TypeDeducerTest do
 
       col_type_defs = [
         {"id", "INT"},
-        {"name", "VARCHAR(10)"},
+        {"name", "VARCHAR(120)"},
         {"description", if(db_type == :mysql, do: "LONGTEXT", else: "TEXT")},
         {"salary", if(db_type == :mysql, do: "DOUBLE", else: "NUMERIC(1000, 100)")},
         {"permanent", if(db_type == :mysql, do: "BIT", else: "BOOLEAN")},