@@ -542,7 +542,7 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds):
542542)
543543def read_csv (
544544 filepath_or_buffer : FilePathOrBuffer ,
545- sep = "," ,
545+ sep = lib . no_default ,
546546 delimiter = None ,
547547 # Column and Index Locations and Names
548548 header = "infer" ,
@@ -600,93 +600,14 @@ def read_csv(
600600 float_precision = None ,
601601 storage_options : StorageOptions = None ,
602602):
603- # gh-23761
604- #
605- # When a dialect is passed, it overrides any of the overlapping
606- # parameters passed in directly. We don't want to warn if the
607- # default parameters were passed in (since it probably means
608- # that the user didn't pass them in explicitly in the first place).
609- #
610- # "delimiter" is the annoying corner case because we alias it to
611- # "sep" before doing comparison to the dialect values later on.
612- # Thus, we need a flag to indicate that we need to "override"
613- # the comparison to dialect values by checking if default values
614- # for BOTH "delimiter" and "sep" were provided.
615- default_sep = ","
616-
617- if dialect is not None :
618- sep_override = delimiter is None and sep == default_sep
619- kwds = dict (sep_override = sep_override )
620- else :
621- kwds = dict ()
622-
623- # Alias sep -> delimiter.
624- if delimiter is None :
625- delimiter = sep
603+ kwds = locals ()
604+ del kwds ["filepath_or_buffer" ]
605+ del kwds ["sep" ]
626606
627- if delim_whitespace and delimiter != default_sep :
628- raise ValueError (
629- "Specified a delimiter with both sep and "
630- "delim_whitespace=True; you can only specify one."
631- )
632-
633- if engine is not None :
634- engine_specified = True
635- else :
636- engine = "c"
637- engine_specified = False
638-
639- kwds .update (
640- delimiter = delimiter ,
641- engine = engine ,
642- dialect = dialect ,
643- compression = compression ,
644- engine_specified = engine_specified ,
645- doublequote = doublequote ,
646- escapechar = escapechar ,
647- quotechar = quotechar ,
648- quoting = quoting ,
649- skipinitialspace = skipinitialspace ,
650- lineterminator = lineterminator ,
651- header = header ,
652- index_col = index_col ,
653- names = names ,
654- prefix = prefix ,
655- skiprows = skiprows ,
656- skipfooter = skipfooter ,
657- na_values = na_values ,
658- true_values = true_values ,
659- false_values = false_values ,
660- keep_default_na = keep_default_na ,
661- thousands = thousands ,
662- comment = comment ,
663- decimal = decimal ,
664- parse_dates = parse_dates ,
665- keep_date_col = keep_date_col ,
666- dayfirst = dayfirst ,
667- date_parser = date_parser ,
668- cache_dates = cache_dates ,
669- nrows = nrows ,
670- iterator = iterator ,
671- chunksize = chunksize ,
672- converters = converters ,
673- dtype = dtype ,
674- usecols = usecols ,
675- verbose = verbose ,
676- encoding = encoding ,
677- squeeze = squeeze ,
678- memory_map = memory_map ,
679- float_precision = float_precision ,
680- na_filter = na_filter ,
681- delim_whitespace = delim_whitespace ,
682- warn_bad_lines = warn_bad_lines ,
683- error_bad_lines = error_bad_lines ,
684- low_memory = low_memory ,
685- mangle_dupe_cols = mangle_dupe_cols ,
686- infer_datetime_format = infer_datetime_format ,
687- skip_blank_lines = skip_blank_lines ,
688- storage_options = storage_options ,
607+ kwds_defaults = _check_defaults_read (
608+ dialect , delimiter , delim_whitespace , engine , sep , defaults = {"delimiter" : "," }
689609 )
610+ kwds .update (kwds_defaults )
690611
691612 return _read (filepath_or_buffer , kwds )
692613
@@ -700,7 +621,7 @@ def read_csv(
700621)
701622def read_table (
702623 filepath_or_buffer : FilePathOrBuffer ,
703- sep = " \t " ,
624+ sep = lib . no_default ,
704625 delimiter = None ,
705626 # Column and Index Locations and Names
706627 header = "infer" ,
@@ -757,17 +678,16 @@ def read_table(
757678 memory_map = False ,
758679 float_precision = None ,
759680):
760- # TODO: validation duplicated in read_csv
761- if delim_whitespace and (delimiter is not None or sep != "\t " ):
762- raise ValueError (
763- "Specified a delimiter with both sep and "
764- "delim_whitespace=True; you can only specify one."
765- )
766- if delim_whitespace :
767- # In this case sep is not used so we set it to the read_csv
768- # default to avoid a ValueError
769- sep = ","
770- return read_csv (** locals ())
681+ kwds = locals ()
682+ del kwds ["filepath_or_buffer" ]
683+ del kwds ["sep" ]
684+
685+ kwds_defaults = _check_defaults_read (
686+ dialect , delimiter , delim_whitespace , engine , sep , defaults = {"delimiter" : "\t " }
687+ )
688+ kwds .update (kwds_defaults )
689+
690+ return _read (filepath_or_buffer , kwds )
771691
772692
773693def read_fwf (
@@ -3782,3 +3702,92 @@ def _make_reader(self, f):
37823702 self .skiprows ,
37833703 self .infer_nrows ,
37843704 )
3705+
3706+
3707+ def _check_defaults_read (
3708+ dialect : Union [str , csv .Dialect ],
3709+ delimiter : Union [str , object ],
3710+ delim_whitespace : bool ,
3711+ engine : str ,
3712+ sep : Union [str , object ],
3713+ defaults : Dict [str , Any ],
3714+ ):
3715+ """Check default values of input parameters of read_csv, read_table.
3716+
3717+ Parameters
3718+ ----------
3719+ dialect : str or csv.Dialect
3720+ If provided, this parameter will override values (default or not) for the
3721+ following parameters: `delimiter`, `doublequote`, `escapechar`,
3722+ `skipinitialspace`, `quotechar`, and `quoting`. If it is necessary to
3723+ override values, a ParserWarning will be issued. See csv.Dialect
3724+ documentation for more details.
3725+ delimiter : str or object
3726+ Alias for sep.
3727+ delim_whitespace : bool
3728+ Specifies whether or not whitespace (e.g. ``' '`` or ``'\t '``) will be
3729+ used as the sep. Equivalent to setting ``sep='\\ s+'``. If this option
3730+ is set to True, nothing should be passed in for the ``delimiter``
3731+ parameter.
3732+ engine : {{'c', 'python'}}
3733+ Parser engine to use. The C engine is faster while the python engine is
3734+ currently more feature-complete.
3735+ sep : str or object
3736+ A delimiter provided by the user (str) or a sentinel value, i.e.
3737+ pandas._libs.lib.no_default.
3738+ defaults: dict
3739+ Default values of input parameters.
3740+
3741+ Returns
3742+ -------
3743+ kwds : dict
3744+ Input parameters with correct values.
3745+
3746+ Raises
3747+ ------
3748+ ValueError : If a delimiter was specified with ``sep`` (or ``delimiter``) and
3749+ ``delim_whitespace=True``.
3750+ """
3751+ # fix types for sep, delimiter to Union(str, Any)
3752+ delim_default = defaults ["delimiter" ]
3753+ kwds : Dict [str , Any ] = {}
3754+ # gh-23761
3755+ #
3756+ # When a dialect is passed, it overrides any of the overlapping
3757+ # parameters passed in directly. We don't want to warn if the
3758+ # default parameters were passed in (since it probably means
3759+ # that the user didn't pass them in explicitly in the first place).
3760+ #
3761+ # "delimiter" is the annoying corner case because we alias it to
3762+ # "sep" before doing comparison to the dialect values later on.
3763+ # Thus, we need a flag to indicate that we need to "override"
3764+ # the comparison to dialect values by checking if default values
3765+ # for BOTH "delimiter" and "sep" were provided.
3766+ if dialect is not None :
3767+ kwds ["sep_override" ] = (delimiter is None ) and (
3768+ sep is lib .no_default or sep == delim_default
3769+ )
3770+
3771+ # Alias sep -> delimiter.
3772+ if delimiter is None :
3773+ delimiter = sep
3774+
3775+ if delim_whitespace and (delimiter is not lib .no_default ):
3776+ raise ValueError (
3777+ "Specified a delimiter with both sep and "
3778+ "delim_whitespace=True; you can only specify one."
3779+ )
3780+
3781+ if delimiter is lib .no_default :
3782+ # assign default separator value
3783+ kwds ["delimiter" ] = delim_default
3784+ else :
3785+ kwds ["delimiter" ] = delimiter
3786+
3787+ if engine is not None :
3788+ kwds ["engine_specified" ] = True
3789+ else :
3790+ kwds ["engine" ] = "c"
3791+ kwds ["engine_specified" ] = False
3792+
3793+ return kwds
0 commit comments