From a25ac002617c9756cd0f4ff8f0d0692134006592 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Wed, 27 Mar 2024 15:10:47 -0400 Subject: [PATCH 01/23] Extract method for _read_inner, reducing complexity and indentation by 1. --- Lib/configparser.py | 202 +++++++++++++++++++++++--------------------- 1 file changed, 104 insertions(+), 98 deletions(-) diff --git a/Lib/configparser.py b/Lib/configparser.py index 3040e1fbe5b9c1..2e47d1b4f82302 100644 --- a/Lib/configparser.py +++ b/Lib/configparser.py @@ -975,6 +975,13 @@ def _read(self, fp, fpname): in an otherwise empty line or may be entered in lines holding values or section names. Please note that comments get stripped off when reading configuration files. """ + + try: + self._read_inner(fp, fpname) + finally: + self._join_multiline_values() + + def _read_inner(self, fp, fpname): elements_added = set() cursect = None # None, or a dictionary sectname = None @@ -983,59 +990,83 @@ def _read(self, fp, fpname): indent_level = 0 e = None # None, or an exception - try: - for lineno, line in enumerate(fp, start=1): - comment_start = sys.maxsize - # strip inline comments - inline_prefixes = {p: -1 for p in self._inline_comment_prefixes} - while comment_start == sys.maxsize and inline_prefixes: - next_prefixes = {} - for prefix, index in inline_prefixes.items(): - index = line.find(prefix, index+1) - if index == -1: - continue - next_prefixes[prefix] = index - if index == 0 or (index > 0 and line[index-1].isspace()): - comment_start = min(comment_start, index) - inline_prefixes = next_prefixes - # strip full line comments - for prefix in self._comment_prefixes: - if line.strip().startswith(prefix): - comment_start = 0 - break - if comment_start == sys.maxsize: - comment_start = None - value = line[:comment_start].strip() - if not value: - if self._empty_lines_in_values: - # add empty line to the value, but only if there was no - # comment on the line - if (comment_start is None and - cursect is not None and - optname and - cursect[optname] is not None): - cursect[optname].append('') # newlines added at join - else: - # empty line marks end of value - indent_level = sys.maxsize - continue - # continuation line? - first_nonspace = self.NONSPACECRE.search(line) - cur_indent_level = first_nonspace.start() if first_nonspace else 0 - if (cursect is not None and optname and - cur_indent_level > indent_level): - if cursect[optname] is None: - raise MultilineContinuationError(fpname, lineno, line) - cursect[optname].append(value) - # a section header or option header? + for lineno, line in enumerate(fp, start=1): + comment_start = sys.maxsize + # strip inline comments + inline_prefixes = {p: -1 for p in self._inline_comment_prefixes} + while comment_start == sys.maxsize and inline_prefixes: + next_prefixes = {} + for prefix, index in inline_prefixes.items(): + index = line.find(prefix, index+1) + if index == -1: + continue + next_prefixes[prefix] = index + if index == 0 or (index > 0 and line[index-1].isspace()): + comment_start = min(comment_start, index) + inline_prefixes = next_prefixes + # strip full line comments + for prefix in self._comment_prefixes: + if line.strip().startswith(prefix): + comment_start = 0 + break + if comment_start == sys.maxsize: + comment_start = None + value = line[:comment_start].strip() + if not value: + if self._empty_lines_in_values: + # add empty line to the value, but only if there was no + # comment on the line + if (comment_start is None and + cursect is not None and + optname and + cursect[optname] is not None): + cursect[optname].append('') # newlines added at join else: - if self._allow_unnamed_section and cursect is None: - sectname = UNNAMED_SECTION + # empty line marks end of value + indent_level = sys.maxsize + continue + # continuation line? + first_nonspace = self.NONSPACECRE.search(line) + cur_indent_level = first_nonspace.start() if first_nonspace else 0 + if (cursect is not None and optname and + cur_indent_level > indent_level): + if cursect[optname] is None: + raise MultilineContinuationError(fpname, lineno, line) + cursect[optname].append(value) + # a section header or option header? + else: + if self._allow_unnamed_section and cursect is None: + sectname = UNNAMED_SECTION + cursect = self._dict() + self._sections[sectname] = cursect + self._proxies[sectname] = SectionProxy(self, sectname) + elements_added.add(sectname) + + indent_level = cur_indent_level + # is it a section header? + mo = self.SECTCRE.match(value) + if mo: + sectname = mo.group('header') + if sectname in self._sections: + if self._strict and sectname in elements_added: + raise DuplicateSectionError(sectname, fpname, + lineno) + cursect = self._sections[sectname] + elements_added.add(sectname) + elif sectname == self.default_section: + cursect = self._defaults + else: cursect = self._dict() self._sections[sectname] = cursect self._proxies[sectname] = SectionProxy(self, sectname) elements_added.add(sectname) - + # So sections can't start with a continuation line + optname = None + # no section header? + elif cursect is None: + raise MissingSectionHeaderError(fpname, lineno, line) + # an option line? + else: indent_level = cur_indent_level # is it a section header? mo = self.SECTCRE.match(value) @@ -1056,67 +1087,42 @@ def _read(self, fp, fpname): elements_added.add(sectname) # So sections can't start with a continuation line optname = None - # no section header? + # no section header in the file? elif cursect is None: raise MissingSectionHeaderError(fpname, lineno, line) # an option line? else: - indent_level = cur_indent_level - # is it a section header? - mo = self.SECTCRE.match(value) + mo = self._optcre.match(value) if mo: - sectname = mo.group('header') - if sectname in self._sections: - if self._strict and sectname in elements_added: - raise DuplicateSectionError(sectname, fpname, - lineno) - cursect = self._sections[sectname] - elements_added.add(sectname) - elif sectname == self.default_section: - cursect = self._defaults + optname, vi, optval = mo.group('option', 'vi', 'value') + if not optname: + e = self._handle_error(e, fpname, lineno, line) + optname = self.optionxform(optname.rstrip()) + if (self._strict and + (sectname, optname) in elements_added): + raise DuplicateOptionError(sectname, optname, + fpname, lineno) + elements_added.add((sectname, optname)) + # This check is fine because the OPTCRE cannot + # match if it would set optval to None + if optval is not None: + optval = optval.strip() + cursect[optname] = [optval] else: - cursect = self._dict() - self._sections[sectname] = cursect - self._proxies[sectname] = SectionProxy(self, sectname) - elements_added.add(sectname) - # So sections can't start with a continuation line - optname = None - # no section header in the file? - elif cursect is None: - raise MissingSectionHeaderError(fpname, lineno, line) - # an option line? + # valueless option handling + cursect[optname] = None else: - mo = self._optcre.match(value) - if mo: - optname, vi, optval = mo.group('option', 'vi', 'value') - if not optname: - e = self._handle_error(e, fpname, lineno, line) - optname = self.optionxform(optname.rstrip()) - if (self._strict and - (sectname, optname) in elements_added): - raise DuplicateOptionError(sectname, optname, - fpname, lineno) - elements_added.add((sectname, optname)) - # This check is fine because the OPTCRE cannot - # match if it would set optval to None - if optval is not None: - optval = optval.strip() - cursect[optname] = [optval] - else: - # valueless option handling - cursect[optname] = None - else: - # a non-fatal parsing error occurred. set up the - # exception but keep going. the exception will be - # raised at the end of the file and will contain a - # list of all bogus lines - e = self._handle_error(e, fpname, lineno, line) - finally: - self._join_multiline_values() + # a non-fatal parsing error occurred. set up the + # exception but keep going. the exception will be + # raised at the end of the file and will contain a + # list of all bogus lines + e = self._handle_error(e, fpname, lineno, line) + # if any parsing errors occurred, raise an exception if e: raise e + def _join_multiline_values(self): defaults = self.default_section, self._defaults all_sections = itertools.chain((defaults,), From 1e69aaecf0ae53540b798b698ee2ca7a7a243aab Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Wed, 27 Mar 2024 15:51:16 -0400 Subject: [PATCH 02/23] Extract method for _raise_all and yield ParseErrors from _read_inner. Reduces complexity by 1 and reduces touch points for handling errors in _read_inner. --- Lib/configparser.py | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/Lib/configparser.py b/Lib/configparser.py index 2e47d1b4f82302..9db8c35c05f306 100644 --- a/Lib/configparser.py +++ b/Lib/configparser.py @@ -302,15 +302,23 @@ def __init__(self, option, section, rawval): class ParsingError(Error): """Raised when a configuration file does not follow legal syntax.""" - def __init__(self, source): + def __init__(self, source, *args): super().__init__(f'Source contains parsing errors: {source!r}') self.source = source self.errors = [] self.args = (source, ) + if args: + self.append(*args) def append(self, lineno, line): self.errors.append((lineno, line)) - self.message += '\n\t[line %2d]: %s' % (lineno, line) + self.message += '\n\t[line %2d]: %s' % (lineno, repr(line)) + + def combine(self, others): + for other in others: + for error in other.errors: + self.append(*error) + return self class MissingSectionHeaderError(ParsingError): @@ -977,10 +985,18 @@ def _read(self, fp, fpname): """ try: - self._read_inner(fp, fpname) + self._raise_all(*self._read_inner(fp, fpname)) finally: self._join_multiline_values() + def _raise_all(self, *exceptions: ParsingError): + """ + Combine any number of ParsingErrors into one and raise it. + """ + if not exceptions: + return + raise exceptions[0].combine(exceptions[1:]) + def _read_inner(self, fp, fpname): elements_added = set() cursect = None # None, or a dictionary @@ -988,7 +1004,6 @@ def _read_inner(self, fp, fpname): optname = None lineno = 0 indent_level = 0 - e = None # None, or an exception for lineno, line in enumerate(fp, start=1): comment_start = sys.maxsize @@ -1096,7 +1111,7 @@ def _read_inner(self, fp, fpname): if mo: optname, vi, optval = mo.group('option', 'vi', 'value') if not optname: - e = self._handle_error(e, fpname, lineno, line) + yield ParsingError(fpname, lineno, line) optname = self.optionxform(optname.rstrip()) if (self._strict and (sectname, optname) in elements_added): @@ -1116,11 +1131,7 @@ def _read_inner(self, fp, fpname): # exception but keep going. the exception will be # raised at the end of the file and will contain a # list of all bogus lines - e = self._handle_error(e, fpname, lineno, line) - - # if any parsing errors occurred, raise an exception - if e: - raise e + yield ParsingError(fpname, lineno, line) def _join_multiline_values(self): @@ -1141,12 +1152,6 @@ def _read_defaults(self, defaults): for key, value in defaults.items(): self._defaults[self.optionxform(key)] = value - def _handle_error(self, exc, fpname, lineno, line): - if not exc: - exc = ParsingError(fpname) - exc.append(lineno, repr(line)) - return exc - def _unify_values(self, section, vars): """Create a sequence of lookups with 'vars' taking priority over the 'section' which takes priority over the DEFAULTSECT. From 8c4778136e1f119b864838772986b2ea3252ec43 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Wed, 27 Mar 2024 15:56:19 -0400 Subject: [PATCH 03/23] Prefer iterators to splat expansion and literal indexing. --- Lib/configparser.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Lib/configparser.py b/Lib/configparser.py index 9db8c35c05f306..455baeed160ec6 100644 --- a/Lib/configparser.py +++ b/Lib/configparser.py @@ -145,12 +145,14 @@ from collections.abc import MutableMapping from collections import ChainMap as _ChainMap +import contextlib import functools import io import itertools import os import re import sys +from typing import Iterator __all__ = ("NoSectionError", "DuplicateOptionError", "DuplicateSectionError", "NoOptionError", "InterpolationError", "InterpolationDepthError", @@ -985,17 +987,16 @@ def _read(self, fp, fpname): """ try: - self._raise_all(*self._read_inner(fp, fpname)) + self._raise_all(self._read_inner(fp, fpname)) finally: self._join_multiline_values() - def _raise_all(self, *exceptions: ParsingError): + def _raise_all(self, exceptions: Iterator[ParsingError]): """ Combine any number of ParsingErrors into one and raise it. """ - if not exceptions: - return - raise exceptions[0].combine(exceptions[1:]) + with contextlib.suppress(StopIteration): + raise next(exceptions).combine(exceptions) def _read_inner(self, fp, fpname): elements_added = set() From 74798147d91270f8b084b4eadce18462e3ec871c Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Wed, 27 Mar 2024 16:09:09 -0400 Subject: [PATCH 04/23] Extract method for _strip_comments. Reduces complexity by 7. --- Lib/configparser.py | 45 ++++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/Lib/configparser.py b/Lib/configparser.py index 455baeed160ec6..883a485b15c9d2 100644 --- a/Lib/configparser.py +++ b/Lib/configparser.py @@ -1006,27 +1006,8 @@ def _read_inner(self, fp, fpname): lineno = 0 indent_level = 0 - for lineno, line in enumerate(fp, start=1): - comment_start = sys.maxsize - # strip inline comments - inline_prefixes = {p: -1 for p in self._inline_comment_prefixes} - while comment_start == sys.maxsize and inline_prefixes: - next_prefixes = {} - for prefix, index in inline_prefixes.items(): - index = line.find(prefix, index+1) - if index == -1: - continue - next_prefixes[prefix] = index - if index == 0 or (index > 0 and line[index-1].isspace()): - comment_start = min(comment_start, index) - inline_prefixes = next_prefixes - # strip full line comments - for prefix in self._comment_prefixes: - if line.strip().startswith(prefix): - comment_start = 0 - break - if comment_start == sys.maxsize: - comment_start = None + lines = map(self._strip_comments, fp) + for lineno, (line, comment_start) in enumerate(lines, start=1): value = line[:comment_start].strip() if not value: if self._empty_lines_in_values: @@ -1134,6 +1115,28 @@ def _read_inner(self, fp, fpname): # list of all bogus lines yield ParsingError(fpname, lineno, line) + def _strip_comments(self, line): + comment_start = sys.maxsize + # strip inline comments + inline_prefixes = {p: -1 for p in self._inline_comment_prefixes} + while comment_start == sys.maxsize and inline_prefixes: + next_prefixes = {} + for prefix, index in inline_prefixes.items(): + index = line.find(prefix, index+1) + if index == -1: + continue + next_prefixes[prefix] = index + if index == 0 or (index > 0 and line[index-1].isspace()): + comment_start = min(comment_start, index) + inline_prefixes = next_prefixes + # strip full line comments + for prefix in self._comment_prefixes: + if line.strip().startswith(prefix): + comment_start = 0 + break + if comment_start == sys.maxsize: + comment_start = None + return line, comment_start def _join_multiline_values(self): defaults = self.default_section, self._defaults From a2fffee5d8d4b5122f0671acd7101743ba99c048 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Wed, 27 Mar 2024 20:40:00 -0400 Subject: [PATCH 05/23] Model the file lines in a class to encapsulate the comment status and cleaned value. --- Lib/configparser.py | 66 +++++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 32 deletions(-) diff --git a/Lib/configparser.py b/Lib/configparser.py index 883a485b15c9d2..9f13d385afa5ca 100644 --- a/Lib/configparser.py +++ b/Lib/configparser.py @@ -526,6 +526,31 @@ def _interpolate_some(self, parser, option, accum, rest, section, map, "'$' must be followed by '$' or '{', " "found: %r" % (rest,)) +class _Line(str): + def _strip_comments(self, prefixes, inline_prefixes): + comment_start = sys.maxsize + # strip inline comments + inline_prefixes = {p: -1 for p in inline_prefixes} + while comment_start == sys.maxsize and inline_prefixes: + next_prefixes = {} + for prefix, index in inline_prefixes.items(): + index = self.find(prefix, index+1) + if index == -1: + continue + next_prefixes[prefix] = index + if index == 0 or (index > 0 and self[index-1].isspace()): + comment_start = min(comment_start, index) + inline_prefixes = next_prefixes + # strip full line comments + for prefix in prefixes: + if self.strip().startswith(prefix): + comment_start = 0 + break + if comment_start == sys.maxsize: + comment_start = None + self.clean = self[:comment_start].strip() + self.has_comments = comment_start is not None + class RawConfigParser(MutableMapping): """ConfigParser that does not do interpolation.""" @@ -1006,14 +1031,14 @@ def _read_inner(self, fp, fpname): lineno = 0 indent_level = 0 - lines = map(self._strip_comments, fp) - for lineno, (line, comment_start) in enumerate(lines, start=1): - value = line[:comment_start].strip() - if not value: + for lineno, line in enumerate(map(_Line, fp), start=1): + line._strip_comments(self._comment_prefixes, self._inline_comment_prefixes) + + if not line.clean: if self._empty_lines_in_values: # add empty line to the value, but only if there was no # comment on the line - if (comment_start is None and + if (not line.has_comments and cursect is not None and optname and cursect[optname] is not None): @@ -1029,7 +1054,7 @@ def _read_inner(self, fp, fpname): cur_indent_level > indent_level): if cursect[optname] is None: raise MultilineContinuationError(fpname, lineno, line) - cursect[optname].append(value) + cursect[optname].append(line.clean) # a section header or option header? else: if self._allow_unnamed_section and cursect is None: @@ -1041,7 +1066,7 @@ def _read_inner(self, fp, fpname): indent_level = cur_indent_level # is it a section header? - mo = self.SECTCRE.match(value) + mo = self.SECTCRE.match(line.clean) if mo: sectname = mo.group('header') if sectname in self._sections: @@ -1066,7 +1091,7 @@ def _read_inner(self, fp, fpname): else: indent_level = cur_indent_level # is it a section header? - mo = self.SECTCRE.match(value) + mo = self.SECTCRE.match(line.clean) if mo: sectname = mo.group('header') if sectname in self._sections: @@ -1089,7 +1114,7 @@ def _read_inner(self, fp, fpname): raise MissingSectionHeaderError(fpname, lineno, line) # an option line? else: - mo = self._optcre.match(value) + mo = self._optcre.match(line.clean) if mo: optname, vi, optval = mo.group('option', 'vi', 'value') if not optname: @@ -1115,29 +1140,6 @@ def _read_inner(self, fp, fpname): # list of all bogus lines yield ParsingError(fpname, lineno, line) - def _strip_comments(self, line): - comment_start = sys.maxsize - # strip inline comments - inline_prefixes = {p: -1 for p in self._inline_comment_prefixes} - while comment_start == sys.maxsize and inline_prefixes: - next_prefixes = {} - for prefix, index in inline_prefixes.items(): - index = line.find(prefix, index+1) - if index == -1: - continue - next_prefixes[prefix] = index - if index == 0 or (index > 0 and line[index-1].isspace()): - comment_start = min(comment_start, index) - inline_prefixes = next_prefixes - # strip full line comments - for prefix in self._comment_prefixes: - if line.strip().startswith(prefix): - comment_start = 0 - break - if comment_start == sys.maxsize: - comment_start = None - return line, comment_start - def _join_multiline_values(self): defaults = self.default_section, self._defaults all_sections = itertools.chain((defaults,), From 23468cbaac87924417af3bce3f608289696a0bef Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Wed, 27 Mar 2024 20:50:59 -0400 Subject: [PATCH 06/23] Encapsulate the read state as a dataclass --- Lib/configparser.py | 145 +++++++++++++++++++++++--------------------- 1 file changed, 76 insertions(+), 69 deletions(-) diff --git a/Lib/configparser.py b/Lib/configparser.py index 9f13d385afa5ca..ea9a6318c5f349 100644 --- a/Lib/configparser.py +++ b/Lib/configparser.py @@ -146,6 +146,7 @@ from collections.abc import MutableMapping from collections import ChainMap as _ChainMap import contextlib +from dataclasses import dataclass, field import functools import io import itertools @@ -526,6 +527,17 @@ def _interpolate_some(self, parser, option, accum, rest, section, map, "'$' must be followed by '$' or '{', " "found: %r" % (rest,)) + +@dataclass +class _ReadState: + elements_added : set[str] = field(default_factory=set) + cursect : dict[str, str] | None = None + sectname : str | None = None + optname : str | None = None + lineno : int = 0 + indent_level : int = 0 + + class _Line(str): def _strip_comments(self, prefixes, inline_prefixes): comment_start = sys.maxsize @@ -1024,14 +1036,9 @@ def _raise_all(self, exceptions: Iterator[ParsingError]): raise next(exceptions).combine(exceptions) def _read_inner(self, fp, fpname): - elements_added = set() - cursect = None # None, or a dictionary - sectname = None - optname = None - lineno = 0 - indent_level = 0 + st = _ReadState() - for lineno, line in enumerate(map(_Line, fp), start=1): + for st.lineno, line in enumerate(map(_Line, fp), start=1): line._strip_comments(self._comment_prefixes, self._inline_comment_prefixes) if not line.clean: @@ -1039,106 +1046,106 @@ def _read_inner(self, fp, fpname): # add empty line to the value, but only if there was no # comment on the line if (not line.has_comments and - cursect is not None and - optname and - cursect[optname] is not None): - cursect[optname].append('') # newlines added at join + st.cursect is not None and + st.optname and + st.cursect[st.optname] is not None): + st.cursect[st.optname].append('') # newlines added at join else: # empty line marks end of value - indent_level = sys.maxsize + st.indent_level = sys.maxsize continue # continuation line? first_nonspace = self.NONSPACECRE.search(line) cur_indent_level = first_nonspace.start() if first_nonspace else 0 - if (cursect is not None and optname and - cur_indent_level > indent_level): - if cursect[optname] is None: - raise MultilineContinuationError(fpname, lineno, line) - cursect[optname].append(line.clean) + if (st.cursect is not None and st.optname and + cur_indent_level > st.indent_level): + if st.cursect[st.optname] is None: + raise MultilineContinuationError(fpname, st.lineno, line) + st.cursect[st.optname].append(line.clean) # a section header or option header? else: - if self._allow_unnamed_section and cursect is None: - sectname = UNNAMED_SECTION - cursect = self._dict() - self._sections[sectname] = cursect - self._proxies[sectname] = SectionProxy(self, sectname) - elements_added.add(sectname) - - indent_level = cur_indent_level + if self._allow_unnamed_section and st.cursect is None: + st.sectname = UNNAMED_SECTION + st.cursect = self._dict() + self._sections[st.sectname] = st.cursect + self._proxies[st.sectname] = SectionProxy(self, st.sectname) + st.elements_added.add(st.sectname) + + st.indent_level = cur_indent_level # is it a section header? mo = self.SECTCRE.match(line.clean) if mo: - sectname = mo.group('header') - if sectname in self._sections: - if self._strict and sectname in elements_added: - raise DuplicateSectionError(sectname, fpname, - lineno) - cursect = self._sections[sectname] - elements_added.add(sectname) - elif sectname == self.default_section: - cursect = self._defaults + st.sectname = mo.group('header') + if st.sectname in self._sections: + if self._strict and st.sectname in st.elements_added: + raise DuplicateSectionError(st.sectname, fpname, + st.lineno) + st.cursect = self._sections[st.sectname] + st.elements_added.add(st.sectname) + elif st.sectname == self.default_section: + st.cursect = self._defaults else: - cursect = self._dict() - self._sections[sectname] = cursect - self._proxies[sectname] = SectionProxy(self, sectname) - elements_added.add(sectname) + st.cursect = self._dict() + self._sections[st.sectname] = st.cursect + self._proxies[st.sectname] = SectionProxy(self, st.sectname) + st.elements_added.add(st.sectname) # So sections can't start with a continuation line - optname = None + st.optname = None # no section header? - elif cursect is None: - raise MissingSectionHeaderError(fpname, lineno, line) + elif st.cursect is None: + raise MissingSectionHeaderError(fpname, st.lineno, line) # an option line? else: - indent_level = cur_indent_level + st.indent_level = cur_indent_level # is it a section header? mo = self.SECTCRE.match(line.clean) if mo: - sectname = mo.group('header') - if sectname in self._sections: - if self._strict and sectname in elements_added: - raise DuplicateSectionError(sectname, fpname, - lineno) - cursect = self._sections[sectname] - elements_added.add(sectname) - elif sectname == self.default_section: - cursect = self._defaults + st.sectname = mo.group('header') + if st.sectname in self._sections: + if self._strict and st.sectname in st.elements_added: + raise DuplicateSectionError(st.sectname, fpname, + st.lineno) + st.cursect = self._sections[st.sectname] + st.elements_added.add(st.sectname) + elif st.sectname == self.default_section: + st.cursect = self._defaults else: - cursect = self._dict() - self._sections[sectname] = cursect - self._proxies[sectname] = SectionProxy(self, sectname) - elements_added.add(sectname) + st.cursect = self._dict() + self._sections[st.sectname] = st.cursect + self._proxies[st.sectname] = SectionProxy(self, st.sectname) + st.elements_added.add(st.sectname) # So sections can't start with a continuation line - optname = None + st.optname = None # no section header in the file? - elif cursect is None: - raise MissingSectionHeaderError(fpname, lineno, line) + elif st.cursect is None: + raise MissingSectionHeaderError(fpname, st.lineno, line) # an option line? else: mo = self._optcre.match(line.clean) if mo: - optname, vi, optval = mo.group('option', 'vi', 'value') - if not optname: - yield ParsingError(fpname, lineno, line) - optname = self.optionxform(optname.rstrip()) + st.optname, vi, optval = mo.group('option', 'vi', 'value') + if not st.optname: + yield ParsingError(fpname, st.lineno, line) + st.optname = self.optionxform(st.optname.rstrip()) if (self._strict and - (sectname, optname) in elements_added): - raise DuplicateOptionError(sectname, optname, - fpname, lineno) - elements_added.add((sectname, optname)) + (st.sectname, st.optname) in st.elements_added): + raise DuplicateOptionError(st.sectname, st.optname, + fpname, st.lineno) + st.elements_added.add((st.sectname, st.optname)) # This check is fine because the OPTCRE cannot # match if it would set optval to None if optval is not None: optval = optval.strip() - cursect[optname] = [optval] + st.cursect[st.optname] = [optval] else: # valueless option handling - cursect[optname] = None + st.cursect[st.optname] = None else: # a non-fatal parsing error occurred. set up the # exception but keep going. the exception will be # raised at the end of the file and will contain a # list of all bogus lines - yield ParsingError(fpname, lineno, line) + yield ParsingError(fpname, st.lineno, line) def _join_multiline_values(self): defaults = self.default_section, self._defaults From 3d1ef0a7b05c9c05c75942e01f99a9feaf27e70f Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Wed, 27 Mar 2024 21:17:49 -0400 Subject: [PATCH 07/23] Extract _handle_continuation_line and _handle_rest methods. Reduces complexity by 8. --- Lib/configparser.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/Lib/configparser.py b/Lib/configparser.py index ea9a6318c5f349..d82fd8ac90ed87 100644 --- a/Lib/configparser.py +++ b/Lib/configparser.py @@ -1054,16 +1054,27 @@ def _read_inner(self, fp, fpname): # empty line marks end of value st.indent_level = sys.maxsize continue - # continuation line? + first_nonspace = self.NONSPACECRE.search(line) - cur_indent_level = first_nonspace.start() if first_nonspace else 0 - if (st.cursect is not None and st.optname and - cur_indent_level > st.indent_level): + st.cur_indent_level = first_nonspace.start() if first_nonspace else 0 + + if self._handle_continuation_line(st, line, fpname): + continue + + yield from self._handle_rest(st, line, fpname) + + def _handle_continuation_line(self, st, line, fpname): + # continuation line? + is_continue = (st.cursect is not None and st.optname and + st.cur_indent_level > st.indent_level) + if is_continue: if st.cursect[st.optname] is None: raise MultilineContinuationError(fpname, st.lineno, line) st.cursect[st.optname].append(line.clean) - # a section header or option header? - else: + return is_continue + + def _handle_rest(self, st, line, fpname): + # a section header or option header? if self._allow_unnamed_section and st.cursect is None: st.sectname = UNNAMED_SECTION st.cursect = self._dict() @@ -1071,7 +1082,7 @@ def _read_inner(self, fp, fpname): self._proxies[st.sectname] = SectionProxy(self, st.sectname) st.elements_added.add(st.sectname) - st.indent_level = cur_indent_level + st.indent_level = st.cur_indent_level # is it a section header? mo = self.SECTCRE.match(line.clean) if mo: @@ -1096,7 +1107,7 @@ def _read_inner(self, fp, fpname): raise MissingSectionHeaderError(fpname, st.lineno, line) # an option line? else: - st.indent_level = cur_indent_level + st.indent_level = st.cur_indent_level # is it a section header? mo = self.SECTCRE.match(line.clean) if mo: From 071baeb766e90a4d3e43ceed8ed1580e34e5eb0a Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Wed, 27 Mar 2024 21:18:43 -0400 Subject: [PATCH 08/23] Reindent --- Lib/configparser.py | 168 ++++++++++++++++++++++---------------------- 1 file changed, 84 insertions(+), 84 deletions(-) diff --git a/Lib/configparser.py b/Lib/configparser.py index d82fd8ac90ed87..838735b55b68a3 100644 --- a/Lib/configparser.py +++ b/Lib/configparser.py @@ -1064,99 +1064,99 @@ def _read_inner(self, fp, fpname): yield from self._handle_rest(st, line, fpname) def _handle_continuation_line(self, st, line, fpname): - # continuation line? - is_continue = (st.cursect is not None and st.optname and - st.cur_indent_level > st.indent_level) - if is_continue: - if st.cursect[st.optname] is None: - raise MultilineContinuationError(fpname, st.lineno, line) - st.cursect[st.optname].append(line.clean) - return is_continue + # continuation line? + is_continue = (st.cursect is not None and st.optname and + st.cur_indent_level > st.indent_level) + if is_continue: + if st.cursect[st.optname] is None: + raise MultilineContinuationError(fpname, st.lineno, line) + st.cursect[st.optname].append(line.clean) + return is_continue def _handle_rest(self, st, line, fpname): - # a section header or option header? - if self._allow_unnamed_section and st.cursect is None: - st.sectname = UNNAMED_SECTION + # a section header or option header? + if self._allow_unnamed_section and st.cursect is None: + st.sectname = UNNAMED_SECTION + st.cursect = self._dict() + self._sections[st.sectname] = st.cursect + self._proxies[st.sectname] = SectionProxy(self, st.sectname) + st.elements_added.add(st.sectname) + + st.indent_level = st.cur_indent_level + # is it a section header? + mo = self.SECTCRE.match(line.clean) + if mo: + st.sectname = mo.group('header') + if st.sectname in self._sections: + if self._strict and st.sectname in st.elements_added: + raise DuplicateSectionError(st.sectname, fpname, + st.lineno) + st.cursect = self._sections[st.sectname] + st.elements_added.add(st.sectname) + elif st.sectname == self.default_section: + st.cursect = self._defaults + else: + st.cursect = self._dict() + self._sections[st.sectname] = st.cursect + self._proxies[st.sectname] = SectionProxy(self, st.sectname) + st.elements_added.add(st.sectname) + # So sections can't start with a continuation line + st.optname = None + # no section header? + elif st.cursect is None: + raise MissingSectionHeaderError(fpname, st.lineno, line) + # an option line? + else: + st.indent_level = st.cur_indent_level + # is it a section header? + mo = self.SECTCRE.match(line.clean) + if mo: + st.sectname = mo.group('header') + if st.sectname in self._sections: + if self._strict and st.sectname in st.elements_added: + raise DuplicateSectionError(st.sectname, fpname, + st.lineno) + st.cursect = self._sections[st.sectname] + st.elements_added.add(st.sectname) + elif st.sectname == self.default_section: + st.cursect = self._defaults + else: st.cursect = self._dict() self._sections[st.sectname] = st.cursect self._proxies[st.sectname] = SectionProxy(self, st.sectname) st.elements_added.add(st.sectname) - - st.indent_level = st.cur_indent_level - # is it a section header? - mo = self.SECTCRE.match(line.clean) + # So sections can't start with a continuation line + st.optname = None + # no section header in the file? + elif st.cursect is None: + raise MissingSectionHeaderError(fpname, st.lineno, line) + # an option line? + else: + mo = self._optcre.match(line.clean) if mo: - st.sectname = mo.group('header') - if st.sectname in self._sections: - if self._strict and st.sectname in st.elements_added: - raise DuplicateSectionError(st.sectname, fpname, - st.lineno) - st.cursect = self._sections[st.sectname] - st.elements_added.add(st.sectname) - elif st.sectname == self.default_section: - st.cursect = self._defaults + st.optname, vi, optval = mo.group('option', 'vi', 'value') + if not st.optname: + yield ParsingError(fpname, st.lineno, line) + st.optname = self.optionxform(st.optname.rstrip()) + if (self._strict and + (st.sectname, st.optname) in st.elements_added): + raise DuplicateOptionError(st.sectname, st.optname, + fpname, st.lineno) + st.elements_added.add((st.sectname, st.optname)) + # This check is fine because the OPTCRE cannot + # match if it would set optval to None + if optval is not None: + optval = optval.strip() + st.cursect[st.optname] = [optval] else: - st.cursect = self._dict() - self._sections[st.sectname] = st.cursect - self._proxies[st.sectname] = SectionProxy(self, st.sectname) - st.elements_added.add(st.sectname) - # So sections can't start with a continuation line - st.optname = None - # no section header? - elif st.cursect is None: - raise MissingSectionHeaderError(fpname, st.lineno, line) - # an option line? + # valueless option handling + st.cursect[st.optname] = None else: - st.indent_level = st.cur_indent_level - # is it a section header? - mo = self.SECTCRE.match(line.clean) - if mo: - st.sectname = mo.group('header') - if st.sectname in self._sections: - if self._strict and st.sectname in st.elements_added: - raise DuplicateSectionError(st.sectname, fpname, - st.lineno) - st.cursect = self._sections[st.sectname] - st.elements_added.add(st.sectname) - elif st.sectname == self.default_section: - st.cursect = self._defaults - else: - st.cursect = self._dict() - self._sections[st.sectname] = st.cursect - self._proxies[st.sectname] = SectionProxy(self, st.sectname) - st.elements_added.add(st.sectname) - # So sections can't start with a continuation line - st.optname = None - # no section header in the file? - elif st.cursect is None: - raise MissingSectionHeaderError(fpname, st.lineno, line) - # an option line? - else: - mo = self._optcre.match(line.clean) - if mo: - st.optname, vi, optval = mo.group('option', 'vi', 'value') - if not st.optname: - yield ParsingError(fpname, st.lineno, line) - st.optname = self.optionxform(st.optname.rstrip()) - if (self._strict and - (st.sectname, st.optname) in st.elements_added): - raise DuplicateOptionError(st.sectname, st.optname, - fpname, st.lineno) - st.elements_added.add((st.sectname, st.optname)) - # This check is fine because the OPTCRE cannot - # match if it would set optval to None - if optval is not None: - optval = optval.strip() - st.cursect[st.optname] = [optval] - else: - # valueless option handling - st.cursect[st.optname] = None - else: - # a non-fatal parsing error occurred. set up the - # exception but keep going. the exception will be - # raised at the end of the file and will contain a - # list of all bogus lines - yield ParsingError(fpname, st.lineno, line) + # a non-fatal parsing error occurred. set up the + # exception but keep going. the exception will be + # raised at the end of the file and will contain a + # list of all bogus lines + yield ParsingError(fpname, st.lineno, line) def _join_multiline_values(self): defaults = self.default_section, self._defaults From 81f4ce2a52026656bbca67e86415dd19f81be8d9 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Thu, 28 Mar 2024 11:03:00 -0400 Subject: [PATCH 09/23] At least for now, collect errors in the ReadState --- Lib/configparser.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/Lib/configparser.py b/Lib/configparser.py index 838735b55b68a3..d586438869ec10 100644 --- a/Lib/configparser.py +++ b/Lib/configparser.py @@ -153,7 +153,7 @@ import os import re import sys -from typing import Iterator +from typing import Iterable __all__ = ("NoSectionError", "DuplicateOptionError", "DuplicateSectionError", "NoOptionError", "InterpolationError", "InterpolationDepthError", @@ -536,6 +536,7 @@ class _ReadState: optname : str | None = None lineno : int = 0 indent_level : int = 0 + errors : list[ParsingError] = field(default_factory=list) class _Line(str): @@ -1028,10 +1029,11 @@ def _read(self, fp, fpname): finally: self._join_multiline_values() - def _raise_all(self, exceptions: Iterator[ParsingError]): + def _raise_all(self, exceptions: Iterable[ParsingError]): """ Combine any number of ParsingErrors into one and raise it. """ + exceptions = iter(exceptions) with contextlib.suppress(StopIteration): raise next(exceptions).combine(exceptions) @@ -1061,7 +1063,9 @@ def _read_inner(self, fp, fpname): if self._handle_continuation_line(st, line, fpname): continue - yield from self._handle_rest(st, line, fpname) + self._handle_rest(st, line, fpname) + + return st.errors def _handle_continuation_line(self, st, line, fpname): # continuation line? @@ -1136,7 +1140,7 @@ def _handle_rest(self, st, line, fpname): if mo: st.optname, vi, optval = mo.group('option', 'vi', 'value') if not st.optname: - yield ParsingError(fpname, st.lineno, line) + st.errors.append(ParsingError(fpname, st.lineno, line)) st.optname = self.optionxform(st.optname.rstrip()) if (self._strict and (st.sectname, st.optname) in st.elements_added): @@ -1156,7 +1160,7 @@ def _handle_rest(self, st, line, fpname): # exception but keep going. the exception will be # raised at the end of the file and will contain a # list of all bogus lines - yield ParsingError(fpname, st.lineno, line) + st.errors.append(ParsingError(fpname, st.lineno, line)) def _join_multiline_values(self): defaults = self.default_section, self._defaults From 8942cc1522b5835e36df5a1cd04f7243b3b40de7 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Thu, 28 Mar 2024 11:06:27 -0400 Subject: [PATCH 10/23] Check for missing section header separately. --- Lib/configparser.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Lib/configparser.py b/Lib/configparser.py index d586438869ec10..7443d67715abcf 100644 --- a/Lib/configparser.py +++ b/Lib/configparser.py @@ -1089,6 +1089,10 @@ def _handle_rest(self, st, line, fpname): st.indent_level = st.cur_indent_level # is it a section header? mo = self.SECTCRE.match(line.clean) + + if not mo and st.cursect is None: + raise MissingSectionHeaderError(fpname, st.lineno, line) + if mo: st.sectname = mo.group('header') if st.sectname in self._sections: @@ -1106,10 +1110,7 @@ def _handle_rest(self, st, line, fpname): st.elements_added.add(st.sectname) # So sections can't start with a continuation line st.optname = None - # no section header? - elif st.cursect is None: - raise MissingSectionHeaderError(fpname, st.lineno, line) - # an option line? + # an option line? else: st.indent_level = st.cur_indent_level # is it a section header? From 1e72168d04d9d35c0b1e5a64496fab64de329e07 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Thu, 28 Mar 2024 11:10:57 -0400 Subject: [PATCH 11/23] Extract methods for _handle_header and _handle_option. Reduces complexity by 6. --- Lib/configparser.py | 97 +++++++++++++++++++++++---------------------- 1 file changed, 50 insertions(+), 47 deletions(-) diff --git a/Lib/configparser.py b/Lib/configparser.py index 7443d67715abcf..272c9717c804cc 100644 --- a/Lib/configparser.py +++ b/Lib/configparser.py @@ -1093,6 +1093,31 @@ def _handle_rest(self, st, line, fpname): if not mo and st.cursect is None: raise MissingSectionHeaderError(fpname, st.lineno, line) + self._handle_header(st, mo, fpname) if mo else self._handle_option(st, line, fpname) + + def _handle_header(self, st, mo, fpname): + st.sectname = mo.group('header') + if st.sectname in self._sections: + if self._strict and st.sectname in st.elements_added: + raise DuplicateSectionError(st.sectname, fpname, + st.lineno) + st.cursect = self._sections[st.sectname] + st.elements_added.add(st.sectname) + elif st.sectname == self.default_section: + st.cursect = self._defaults + else: + st.cursect = self._dict() + self._sections[st.sectname] = st.cursect + self._proxies[st.sectname] = SectionProxy(self, st.sectname) + st.elements_added.add(st.sectname) + # So sections can't start with a continuation line + st.optname = None + + def _handle_option(self, st, line, fpname): + # an option line? + st.indent_level = st.cur_indent_level + # is it a section header? + mo = self.SECTCRE.match(line.clean) if mo: st.sectname = mo.group('header') if st.sectname in self._sections: @@ -1110,58 +1135,36 @@ def _handle_rest(self, st, line, fpname): st.elements_added.add(st.sectname) # So sections can't start with a continuation line st.optname = None + # no section header in the file? + elif st.cursect is None: + raise MissingSectionHeaderError(fpname, st.lineno, line) # an option line? else: - st.indent_level = st.cur_indent_level - # is it a section header? - mo = self.SECTCRE.match(line.clean) + mo = self._optcre.match(line.clean) if mo: - st.sectname = mo.group('header') - if st.sectname in self._sections: - if self._strict and st.sectname in st.elements_added: - raise DuplicateSectionError(st.sectname, fpname, - st.lineno) - st.cursect = self._sections[st.sectname] - st.elements_added.add(st.sectname) - elif st.sectname == self.default_section: - st.cursect = self._defaults + st.optname, vi, optval = mo.group('option', 'vi', 'value') + if not st.optname: + st.errors.append(ParsingError(fpname, st.lineno, line)) + st.optname = self.optionxform(st.optname.rstrip()) + if (self._strict and + (st.sectname, st.optname) in st.elements_added): + raise DuplicateOptionError(st.sectname, st.optname, + fpname, st.lineno) + st.elements_added.add((st.sectname, st.optname)) + # This check is fine because the OPTCRE cannot + # match if it would set optval to None + if optval is not None: + optval = optval.strip() + st.cursect[st.optname] = [optval] else: - st.cursect = self._dict() - self._sections[st.sectname] = st.cursect - self._proxies[st.sectname] = SectionProxy(self, st.sectname) - st.elements_added.add(st.sectname) - # So sections can't start with a continuation line - st.optname = None - # no section header in the file? - elif st.cursect is None: - raise MissingSectionHeaderError(fpname, st.lineno, line) - # an option line? + # valueless option handling + st.cursect[st.optname] = None else: - mo = self._optcre.match(line.clean) - if mo: - st.optname, vi, optval = mo.group('option', 'vi', 'value') - if not st.optname: - st.errors.append(ParsingError(fpname, st.lineno, line)) - st.optname = self.optionxform(st.optname.rstrip()) - if (self._strict and - (st.sectname, st.optname) in st.elements_added): - raise DuplicateOptionError(st.sectname, st.optname, - fpname, st.lineno) - st.elements_added.add((st.sectname, st.optname)) - # This check is fine because the OPTCRE cannot - # match if it would set optval to None - if optval is not None: - optval = optval.strip() - st.cursect[st.optname] = [optval] - else: - # valueless option handling - st.cursect[st.optname] = None - else: - # a non-fatal parsing error occurred. set up the - # exception but keep going. the exception will be - # raised at the end of the file and will contain a - # list of all bogus lines - st.errors.append(ParsingError(fpname, st.lineno, line)) + # a non-fatal parsing error occurred. set up the + # exception but keep going. the exception will be + # raised at the end of the file and will contain a + # list of all bogus lines + st.errors.append(ParsingError(fpname, st.lineno, line)) def _join_multiline_values(self): defaults = self.default_section, self._defaults From 0dfd797486420506be9ec92db15f8f001b237414 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Thu, 28 Mar 2024 11:15:44 -0400 Subject: [PATCH 12/23] Remove unreachable code. Reduces complexity by 4. --- Lib/configparser.py | 26 ++------------------------ 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/Lib/configparser.py b/Lib/configparser.py index 272c9717c804cc..89c17dbda50bc0 100644 --- a/Lib/configparser.py +++ b/Lib/configparser.py @@ -1116,30 +1116,8 @@ def _handle_header(self, st, mo, fpname): def _handle_option(self, st, line, fpname): # an option line? st.indent_level = st.cur_indent_level - # is it a section header? - mo = self.SECTCRE.match(line.clean) - if mo: - st.sectname = mo.group('header') - if st.sectname in self._sections: - if self._strict and st.sectname in st.elements_added: - raise DuplicateSectionError(st.sectname, fpname, - st.lineno) - st.cursect = self._sections[st.sectname] - st.elements_added.add(st.sectname) - elif st.sectname == self.default_section: - st.cursect = self._defaults - else: - st.cursect = self._dict() - self._sections[st.sectname] = st.cursect - self._proxies[st.sectname] = SectionProxy(self, st.sectname) - st.elements_added.add(st.sectname) - # So sections can't start with a continuation line - st.optname = None - # no section header in the file? - elif st.cursect is None: - raise MissingSectionHeaderError(fpname, st.lineno, line) - # an option line? - else: + + if True: mo = self._optcre.match(line.clean) if mo: st.optname, vi, optval = mo.group('option', 'vi', 'value') From 77ed897299011cc7a288ba91caa6f7e97f80b2fb Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Thu, 28 Mar 2024 11:16:00 -0400 Subject: [PATCH 13/23] Remove unreachable branch --- Lib/configparser.py | 49 ++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/Lib/configparser.py b/Lib/configparser.py index 89c17dbda50bc0..70a048603b94d6 100644 --- a/Lib/configparser.py +++ b/Lib/configparser.py @@ -1117,32 +1117,31 @@ def _handle_option(self, st, line, fpname): # an option line? st.indent_level = st.cur_indent_level - if True: - mo = self._optcre.match(line.clean) - if mo: - st.optname, vi, optval = mo.group('option', 'vi', 'value') - if not st.optname: - st.errors.append(ParsingError(fpname, st.lineno, line)) - st.optname = self.optionxform(st.optname.rstrip()) - if (self._strict and - (st.sectname, st.optname) in st.elements_added): - raise DuplicateOptionError(st.sectname, st.optname, - fpname, st.lineno) - st.elements_added.add((st.sectname, st.optname)) - # This check is fine because the OPTCRE cannot - # match if it would set optval to None - if optval is not None: - optval = optval.strip() - st.cursect[st.optname] = [optval] - else: - # valueless option handling - st.cursect[st.optname] = None - else: - # a non-fatal parsing error occurred. set up the - # exception but keep going. the exception will be - # raised at the end of the file and will contain a - # list of all bogus lines + mo = self._optcre.match(line.clean) + if mo: + st.optname, vi, optval = mo.group('option', 'vi', 'value') + if not st.optname: st.errors.append(ParsingError(fpname, st.lineno, line)) + st.optname = self.optionxform(st.optname.rstrip()) + if (self._strict and + (st.sectname, st.optname) in st.elements_added): + raise DuplicateOptionError(st.sectname, st.optname, + fpname, st.lineno) + st.elements_added.add((st.sectname, st.optname)) + # This check is fine because the OPTCRE cannot + # match if it would set optval to None + if optval is not None: + optval = optval.strip() + st.cursect[st.optname] = [optval] + else: + # valueless option handling + st.cursect[st.optname] = None + else: + # a non-fatal parsing error occurred. set up the + # exception but keep going. the exception will be + # raised at the end of the file and will contain a + # list of all bogus lines + st.errors.append(ParsingError(fpname, st.lineno, line)) def _join_multiline_values(self): defaults = self.default_section, self._defaults From 76f42d3d856db5d5c0230b9d3ca47885608692af Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Thu, 28 Mar 2024 11:17:54 -0400 Subject: [PATCH 14/23] Handle error condition early. Reduces complexity by 1. --- Lib/configparser.py | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/Lib/configparser.py b/Lib/configparser.py index 70a048603b94d6..0e29011e9b5117 100644 --- a/Lib/configparser.py +++ b/Lib/configparser.py @@ -1118,30 +1118,31 @@ def _handle_option(self, st, line, fpname): st.indent_level = st.cur_indent_level mo = self._optcre.match(line.clean) - if mo: - st.optname, vi, optval = mo.group('option', 'vi', 'value') - if not st.optname: - st.errors.append(ParsingError(fpname, st.lineno, line)) - st.optname = self.optionxform(st.optname.rstrip()) - if (self._strict and - (st.sectname, st.optname) in st.elements_added): - raise DuplicateOptionError(st.sectname, st.optname, - fpname, st.lineno) - st.elements_added.add((st.sectname, st.optname)) - # This check is fine because the OPTCRE cannot - # match if it would set optval to None - if optval is not None: - optval = optval.strip() - st.cursect[st.optname] = [optval] - else: - # valueless option handling - st.cursect[st.optname] = None - else: + if not mo: # a non-fatal parsing error occurred. set up the # exception but keep going. the exception will be # raised at the end of the file and will contain a # list of all bogus lines st.errors.append(ParsingError(fpname, st.lineno, line)) + return + + st.optname, vi, optval = mo.group('option', 'vi', 'value') + if not st.optname: + st.errors.append(ParsingError(fpname, st.lineno, line)) + st.optname = self.optionxform(st.optname.rstrip()) + if (self._strict and + (st.sectname, st.optname) in st.elements_added): + raise DuplicateOptionError(st.sectname, st.optname, + fpname, st.lineno) + st.elements_added.add((st.sectname, st.optname)) + # This check is fine because the OPTCRE cannot + # match if it would set optval to None + if optval is not None: + optval = optval.strip() + st.cursect[st.optname] = [optval] + else: + # valueless option handling + st.cursect[st.optname] = None def _join_multiline_values(self): defaults = self.default_section, self._defaults From c18a2bb75844e132b35ab9560d5c6a38bdd8033f Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Fri, 29 Mar 2024 12:07:31 -0400 Subject: [PATCH 15/23] Add blurb --- .../next/Library/2024-03-29-12-07-26.gh-issue-117348.WjCYvK.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-03-29-12-07-26.gh-issue-117348.WjCYvK.rst diff --git a/Misc/NEWS.d/next/Library/2024-03-29-12-07-26.gh-issue-117348.WjCYvK.rst b/Misc/NEWS.d/next/Library/2024-03-29-12-07-26.gh-issue-117348.WjCYvK.rst new file mode 100644 index 00000000000000..cd3006c3b7b8f0 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-03-29-12-07-26.gh-issue-117348.WjCYvK.rst @@ -0,0 +1,2 @@ +Refactored :meth:`configparser.RawConfigParser._read` to reduce cyclometric +complexity and improve comprehensibility. From 97aa785854da308047bd3026b59ee2fea73cbbac Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Fri, 29 Mar 2024 12:27:13 -0400 Subject: [PATCH 16/23] Move _raise_all to ParsingError, as its behavior is most closely related to the exception class and not the reader. --- Lib/configparser.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/Lib/configparser.py b/Lib/configparser.py index 0e29011e9b5117..aa44d03d68c04c 100644 --- a/Lib/configparser.py +++ b/Lib/configparser.py @@ -323,6 +323,16 @@ def combine(self, others): self.append(*error) return self + @staticmethod + def _raise_all(exceptions: Iterable['ParsingError']): + """ + Combine any number of ParsingErrors into one and raise it. + """ + exceptions = iter(exceptions) + with contextlib.suppress(StopIteration): + raise next(exceptions).combine(exceptions) + + class MissingSectionHeaderError(ParsingError): """Raised when a key-value pair is found before any section header.""" @@ -1025,18 +1035,10 @@ def _read(self, fp, fpname): """ try: - self._raise_all(self._read_inner(fp, fpname)) + ParsingError._raise_all(self._read_inner(fp, fpname)) finally: self._join_multiline_values() - def _raise_all(self, exceptions: Iterable[ParsingError]): - """ - Combine any number of ParsingErrors into one and raise it. - """ - exceptions = iter(exceptions) - with contextlib.suppress(StopIteration): - raise next(exceptions).combine(exceptions) - def _read_inner(self, fp, fpname): st = _ReadState() From d310cb404a0e9f3b5b0270f1008ca510ab8065db Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Fri, 29 Mar 2024 12:35:20 -0400 Subject: [PATCH 17/23] Split _strip* into separate methods. --- Lib/configparser.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/Lib/configparser.py b/Lib/configparser.py index aa44d03d68c04c..90cf06502182f5 100644 --- a/Lib/configparser.py +++ b/Lib/configparser.py @@ -551,28 +551,33 @@ class _ReadState: class _Line(str): def _strip_comments(self, prefixes, inline_prefixes): + comment_start = min(self._strip_inline(inline_prefixes), self._strip_full(prefixes)) + if comment_start == sys.maxsize: + comment_start = None + self.clean = self[:comment_start].strip() + self.has_comments = comment_start is not None + + def _strip_inline(self, prefixes): comment_start = sys.maxsize - # strip inline comments - inline_prefixes = {p: -1 for p in inline_prefixes} - while comment_start == sys.maxsize and inline_prefixes: + prefixes = {p: -1 for p in prefixes} + while comment_start == sys.maxsize and prefixes: next_prefixes = {} - for prefix, index in inline_prefixes.items(): + for prefix, index in prefixes.items(): index = self.find(prefix, index+1) if index == -1: continue next_prefixes[prefix] = index if index == 0 or (index > 0 and self[index-1].isspace()): comment_start = min(comment_start, index) - inline_prefixes = next_prefixes - # strip full line comments + prefixes = next_prefixes + return comment_start + + def _strip_full(self, prefixes): for prefix in prefixes: if self.strip().startswith(prefix): - comment_start = 0 + return 0 break - if comment_start == sys.maxsize: - comment_start = None - self.clean = self[:comment_start].strip() - self.has_comments = comment_start is not None + return sys.maxsize class RawConfigParser(MutableMapping): From f2a355cd7cf45099133ab8b292b04a757466ba1b Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Fri, 29 Mar 2024 12:41:37 -0400 Subject: [PATCH 18/23] Refactor _strip_full to compute the strip just once and use 'not any' to determine the factor. --- Lib/configparser.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/Lib/configparser.py b/Lib/configparser.py index 90cf06502182f5..de76d2a4adde8f 100644 --- a/Lib/configparser.py +++ b/Lib/configparser.py @@ -573,11 +573,7 @@ def _strip_inline(self, prefixes): return comment_start def _strip_full(self, prefixes): - for prefix in prefixes: - if self.strip().startswith(prefix): - return 0 - break - return sys.maxsize + return sys.maxsize * (not any(map(self.strip().startswith, prefixes))) class RawConfigParser(MutableMapping): From 249261416824eec48d70ae45103263f223d46eae Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Fri, 29 Mar 2024 13:03:18 -0400 Subject: [PATCH 19/23] Replace use of 'sys.maxsize' with direct computation of the stripped value. --- Lib/configparser.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/Lib/configparser.py b/Lib/configparser.py index de76d2a4adde8f..f8a804a8d90431 100644 --- a/Lib/configparser.py +++ b/Lib/configparser.py @@ -551,16 +551,13 @@ class _ReadState: class _Line(str): def _strip_comments(self, prefixes, inline_prefixes): - comment_start = min(self._strip_inline(inline_prefixes), self._strip_full(prefixes)) - if comment_start == sys.maxsize: - comment_start = None - self.clean = self[:comment_start].strip() - self.has_comments = comment_start is not None + self.clean = self._strip_full(prefixes) and self._strip_inline(inline_prefixes) + self.has_comments = self.strip() != self.clean def _strip_inline(self, prefixes): - comment_start = sys.maxsize + starts = [] prefixes = {p: -1 for p in prefixes} - while comment_start == sys.maxsize and prefixes: + while not starts and prefixes: next_prefixes = {} for prefix, index in prefixes.items(): index = self.find(prefix, index+1) @@ -568,12 +565,12 @@ def _strip_inline(self, prefixes): continue next_prefixes[prefix] = index if index == 0 or (index > 0 and self[index-1].isspace()): - comment_start = min(comment_start, index) + starts.append(index) prefixes = next_prefixes - return comment_start + return self[:min(starts, default=None)].strip() def _strip_full(self, prefixes): - return sys.maxsize * (not any(map(self.strip().startswith, prefixes))) + return '' if any(map(self.strip().startswith, prefixes)) else True class RawConfigParser(MutableMapping): From 496859151f009d6c3cbf38ece82524291048fc12 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Fri, 29 Mar 2024 13:04:22 -0400 Subject: [PATCH 20/23] Extract has_comments as a dynamic property. --- Lib/configparser.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Lib/configparser.py b/Lib/configparser.py index f8a804a8d90431..ee5aa7f00b67a8 100644 --- a/Lib/configparser.py +++ b/Lib/configparser.py @@ -552,7 +552,10 @@ class _ReadState: class _Line(str): def _strip_comments(self, prefixes, inline_prefixes): self.clean = self._strip_full(prefixes) and self._strip_inline(inline_prefixes) - self.has_comments = self.strip() != self.clean + + @property + def has_comments(self): + return self.strip() != self.clean def _strip_inline(self, prefixes): starts = [] From 7d807bba8cc70a926af16261abdf50e98be6a94f Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Fri, 29 Mar 2024 13:19:05 -0400 Subject: [PATCH 21/23] Implement clean as a cached property. --- Lib/configparser.py | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/Lib/configparser.py b/Lib/configparser.py index ee5aa7f00b67a8..a580a7a1fbf741 100644 --- a/Lib/configparser.py +++ b/Lib/configparser.py @@ -549,17 +549,31 @@ class _ReadState: errors : list[ParsingError] = field(default_factory=list) +@dataclass +class _Prefixes: + full : Iterable[str] + inline : Iterable[str] + + class _Line(str): - def _strip_comments(self, prefixes, inline_prefixes): - self.clean = self._strip_full(prefixes) and self._strip_inline(inline_prefixes) + + def __new__(cls, val, *args, **kwargs): + return super().__new__(cls, val) + + def __init__(self, val, prefixes: _Prefixes): + self.prefixes = prefixes + + @functools.cached_property + def clean(self): + return self._strip_full() and self._strip_inline() @property def has_comments(self): return self.strip() != self.clean - def _strip_inline(self, prefixes): + def _strip_inline(self): starts = [] - prefixes = {p: -1 for p in prefixes} + prefixes = {p: -1 for p in self.prefixes.inline} while not starts and prefixes: next_prefixes = {} for prefix, index in prefixes.items(): @@ -572,8 +586,8 @@ def _strip_inline(self, prefixes): prefixes = next_prefixes return self[:min(starts, default=None)].strip() - def _strip_full(self, prefixes): - return '' if any(map(self.strip().startswith, prefixes)) else True + def _strip_full(self): + return '' if any(map(self.strip().startswith, self.prefixes.full)) else True class RawConfigParser(MutableMapping): @@ -1043,9 +1057,11 @@ def _read(self, fp, fpname): def _read_inner(self, fp, fpname): st = _ReadState() - for st.lineno, line in enumerate(map(_Line, fp), start=1): - line._strip_comments(self._comment_prefixes, self._inline_comment_prefixes) - + Line = functools.partial( + _Line, + prefixes=_Prefixes(full=self._comment_prefixes, inline=self._inline_comment_prefixes), + ) + for st.lineno, line in enumerate(map(Line, fp), start=1): if not line.clean: if self._empty_lines_in_values: # add empty line to the value, but only if there was no From 29cb20f225216bc221e7b396eea1526157d83139 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Fri, 29 Mar 2024 13:23:34 -0400 Subject: [PATCH 22/23] Model comment prefixes in the RawConfigParser within a prefixes namespace. --- Lib/configparser.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/Lib/configparser.py b/Lib/configparser.py index a580a7a1fbf741..591896d20cff36 100644 --- a/Lib/configparser.py +++ b/Lib/configparser.py @@ -656,8 +656,10 @@ def __init__(self, defaults=None, dict_type=_default_dict, else: self._optcre = re.compile(self._OPT_TMPL.format(delim=d), re.VERBOSE) - self._comment_prefixes = tuple(comment_prefixes or ()) - self._inline_comment_prefixes = tuple(inline_comment_prefixes or ()) + self._prefixes = _Prefixes( + full=tuple(comment_prefixes or ()), + inline=tuple(inline_comment_prefixes or ()), + ) self._strict = strict self._allow_no_value = allow_no_value self._empty_lines_in_values = empty_lines_in_values @@ -1057,10 +1059,7 @@ def _read(self, fp, fpname): def _read_inner(self, fp, fpname): st = _ReadState() - Line = functools.partial( - _Line, - prefixes=_Prefixes(full=self._comment_prefixes, inline=self._inline_comment_prefixes), - ) + Line = functools.partial(_Line, prefixes=self._prefixes) for st.lineno, line in enumerate(map(Line, fp), start=1): if not line.clean: if self._empty_lines_in_values: From c834c356f72d07e03ad8ece49f72ebb4ee8759b4 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Fri, 29 Mar 2024 14:49:49 -0400 Subject: [PATCH 23/23] Use a regular expression to search for the first match. Avoids mutating variables and tricky logic and over-computing all of the starts when only the first is relevant. --- Lib/configparser.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/Lib/configparser.py b/Lib/configparser.py index 591896d20cff36..d0326c60e9b907 100644 --- a/Lib/configparser.py +++ b/Lib/configparser.py @@ -572,19 +572,16 @@ def has_comments(self): return self.strip() != self.clean def _strip_inline(self): - starts = [] - prefixes = {p: -1 for p in self.prefixes.inline} - while not starts and prefixes: - next_prefixes = {} - for prefix, index in prefixes.items(): - index = self.find(prefix, index+1) - if index == -1: - continue - next_prefixes[prefix] = index - if index == 0 or (index > 0 and self[index-1].isspace()): - starts.append(index) - prefixes = next_prefixes - return self[:min(starts, default=None)].strip() + """ + Search for the earliest prefix at the beginning of the line or following a space. + """ + matcher = re.compile( + '|'.join(fr'(^|\s)({re.escape(prefix)})' for prefix in self.prefixes.inline) + # match nothing if no prefixes + or '(?!)' + ) + match = matcher.search(self) + return self[:match.start() if match else None].strip() def _strip_full(self): return '' if any(map(self.strip().startswith, self.prefixes.full)) else True