diff --git a/docs/requirements.txt b/docs/requirements.txt index 722fcd3d..570bdfc7 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -14,11 +14,12 @@ ipython ipywidgets sphinx-gallery sphinx-plotly-directive -sphinxcontrib-mermaid +sphinxcontrib-mermaid matplotlib h5py pyyaml importlib-resources rapidfuzz lark>=1.1.5 -pint \ No newline at end of file +pint +chardet diff --git a/pyproject.toml b/pyproject.toml index f2fd2696..8a8dfe9f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ description = "An ellipsometry analysis tool for reproducible and comprehensible dynamic = ["version"] authors = [ { name = "Marius Müller", email = "marius.mueller@physik.uni-giessen.de" }, - { name = "Florian Dobener", email = "pyelli@schroedingerscat.org" } + { name = "Florian Dobener", email = "pyelli@schroedingerscat.org" }, ] requires-python = ">=3.8" license = { file = "LICENSE.txt" } @@ -19,7 +19,7 @@ classifiers = [ "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12" + "Programming Language :: Python :: 3.12", ] dependencies = [ "scipy", @@ -32,6 +32,7 @@ dependencies = [ "rapidfuzz", "lark>=1.1.5", "pint", + "chardet", ] [project.optional-dependencies] @@ -75,16 +76,16 @@ indent-width = 4 [tool.ruff.lint] select = [ - "E", # pycodestyle - "W", # pycodestyle - "PL", # pylint + "E", # pycodestyle + "W", # pycodestyle + "PL", # pylint "NPY201", # numpy ] ignore = [ - "E501", # Line too long ({width} > {limit} characters) - "E701", # Multiple statements on one line (colon) - "E731", # Do not assign a lambda expression, use a def - "E402", # Module level import not at top of file + "E501", # Line too long ({width} > {limit} characters) + "E701", # Multiple statements on one line (colon) + "E731", # Do not assign a lambda expression, use a def + "E402", # Module level import not at top of file "PLR0911", # Too many return statements "PLR0912", # Too many branches "PLR0913", # Too many arguments in function definition diff --git a/requirements/dev-requirements.txt b/requirements/dev-requirements.txt index ad8e5c6b..c3a2283c 100644 --- a/requirements/dev-requirements.txt +++ b/requirements/dev-requirements.txt @@ -28,6 +28,12 @@ cfgv==3.4.0 \ --hash=sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9 \ --hash=sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560 # via pre-commit +chardet==5.2.0 \ + --hash=sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7 \ + --hash=sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970 + # via + # -r requirements/fitting-requirements.txt + # pyelli (pyproject.toml) comm==0.2.2 \ --hash=sha256:3fd7a84065306e07bea1773df6eb8282de51ba82f77c72f9c85716ab11fe980e \ --hash=sha256:e6fb86cb70ff661ee8c9c14e7d36d6de3b4066f1441be4063df9c5009f0a64d3 @@ -1278,7 +1284,6 @@ typing-extensions==4.12.2 \ # -r requirements/fitting-requirements.txt # flexcache # flexparser - # ipython # pint tzdata==2024.1 \ --hash=sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd \ diff --git a/requirements/fitting-requirements.txt b/requirements/fitting-requirements.txt index 7e80f51d..8df2c2f7 100644 --- a/requirements/fitting-requirements.txt +++ b/requirements/fitting-requirements.txt @@ -14,6 +14,12 @@ asttokens==2.4.1 \ --hash=sha256:051ed49c3dcae8913ea7cd08e46a606dba30b79993209636c4875bc1d637bc24 \ --hash=sha256:b03869718ba9a6eb027e134bfdf69f38a236d681c83c160d510768af11254ba0 # via stack-data +chardet==5.2.0 \ + --hash=sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7 \ + --hash=sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970 + # via + # -r requirements/requirements.txt + # pyelli (pyproject.toml) comm==0.2.2 \ --hash=sha256:3fd7a84065306e07bea1773df6eb8282de51ba82f77c72f9c85716ab11fe980e \ --hash=sha256:e6fb86cb70ff661ee8c9c14e7d36d6de3b4066f1441be4063df9c5009f0a64d3 @@ -764,7 +770,6 @@ typing-extensions==4.12.2 \ # -r requirements/requirements.txt # flexcache # flexparser - # ipython # pint tzdata==2024.1 \ --hash=sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd \ diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 1f8fdeb9..c9653545 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -4,6 +4,10 @@ appdirs==1.4.4 \ --hash=sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41 \ --hash=sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128 # via pint +chardet==5.2.0 \ + --hash=sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7 \ + --hash=sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970 + # via pyelli (pyproject.toml) flexcache==0.3 \ --hash=sha256:18743bd5a0621bfe2cf8d519e4c3bfdf57a269c15d1ced3fb4b64e0ff4600656 \ --hash=sha256:d43c9fea82336af6e0115e308d9d33a185390b8346a017564611f1466dcd2e32 diff --git a/src/elli/importer/__init__.py b/src/elli/importer/__init__.py index e69de29b..c68271af 100644 --- a/src/elli/importer/__init__.py +++ b/src/elli/importer/__init__.py @@ -0,0 +1,14 @@ +import chardet + + +def detect_encoding(fname: str) -> str: + r"""Detects the encoding of file fname. + Args: + fname (str): Filename + Returns: + str: Encoding identifier string. + """ + with open(fname, "rb") as f: + raw_data = f.read() + result = chardet.detect(raw_data) + return result["encoding"] diff --git a/src/elli/importer/spectraray.py b/src/elli/importer/spectraray.py index ac1c98b2..210f6ff7 100644 --- a/src/elli/importer/spectraray.py +++ b/src/elli/importer/spectraray.py @@ -9,6 +9,7 @@ from packaging.version import Version, parse from ..utils import calc_rho +from . import detect_encoding def read_spectraray_psi_delta( @@ -25,10 +26,13 @@ def read_spectraray_psi_delta( pd.DataFrame: DataFrame containing the psi/delta data in the format to be further processes inside pyElli. """ + # detect encoding + encoding = detect_encoding(fname) # read data and drop empty column psi_delta_df = pd.read_csv( fname, + encoding=encoding, index_col=0, header=None, sep=sep, @@ -82,9 +86,11 @@ def read_spectraray_mmatrix( pd.DataFrame: DataFrame containing the psi/delta data in the format to be further processes inside pyElli. """ - mueller_matrix = pd.read_csv(fname, sep=sep, decimal=decimal, index_col=0).iloc[ - :, -17:-1 - ] + encoding = detect_encoding(fname) + + mueller_matrix = pd.read_csv( + fname, encoding=encoding, sep=sep, decimal=decimal, index_col=0 + ).iloc[:, -17:-1] mueller_matrix.index.name = "Wavelength" mueller_matrix.columns = [ "M11", diff --git a/src/elli/importer/woollam.py b/src/elli/importer/woollam.py index c96da134..425df52e 100644 --- a/src/elli/importer/woollam.py +++ b/src/elli/importer/woollam.py @@ -12,6 +12,7 @@ from ..units import ureg from ..utils import calc_rho +from . import detect_encoding logger = logging.getLogger(__name__) @@ -167,7 +168,9 @@ def read_woollam_psi_delta(fname: str) -> pd.DataFrame: the format to be further processes inside pyElli. """ - with open(fname, encoding="utf-8") as fobj: + encoding = detect_encoding(fname) + + with open(fname, encoding=encoding) as fobj: line_number = fobj.tell() metadata = [] file_format = ""