diff --git a/.editorconfig b/.editorconfig index 5b04b32a89e3d2..25bc5935258bd1 100644 --- a/.editorconfig +++ b/.editorconfig @@ -1,11 +1,11 @@ root = true -[*.{py,c,cpp,h,js,rst,md,yml,yaml}] +[*.{py,c,cpp,h,js,rst,md,yml,yaml,gram}] trim_trailing_whitespace = true insert_final_newline = true indent_style = space -[*.{py,c,cpp,h}] +[*.{py,c,cpp,h,gram}] indent_size = 4 [*.rst] diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 05f20e12f4653d..665befea8787c7 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -260,8 +260,8 @@ jobs: free-threading: ${{ matrix.free-threading }} os: ${{ matrix.os }} - build-ubuntu-ssltests-openssl: - name: 'Ubuntu SSL tests with OpenSSL' + build-ubuntu-ssltests: + name: 'Ubuntu SSL tests' runs-on: ${{ matrix.os }} timeout-minutes: 60 needs: build-context @@ -269,75 +269,19 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-24.04] - openssl_ver: [3.0.16, 3.1.8, 3.2.4, 3.3.3, 3.4.1] + include: + - { os: ubuntu-24.04, ssl: openssl, ssl_ver: 3.0.16 } + - { os: ubuntu-24.04, ssl: openssl, ssl_ver: 3.1.8 } + - { os: ubuntu-24.04, ssl: openssl, ssl_ver: 3.2.4 } + - { os: ubuntu-24.04, ssl: openssl, ssl_ver: 3.3.3 } + - { os: ubuntu-24.04, ssl: openssl, ssl_ver: 3.4.1 } + - { os: ubuntu-24.04, ssl: awslc, ssl_ver: 1.55.0 } # See Tools/ssl/make_ssl_data.py for notes on adding a new version env: - OPENSSL_VER: ${{ matrix.openssl_ver }} - MULTISSL_DIR: ${{ github.workspace }}/multissl - OPENSSL_DIR: ${{ github.workspace }}/multissl/openssl/${{ matrix.openssl_ver }} - LD_LIBRARY_PATH: ${{ github.workspace }}/multissl/openssl/${{ matrix.openssl_ver }}/lib - steps: - - uses: actions/checkout@v4 - with: - persist-credentials: false - - name: Runner image version - run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" - - name: Restore config.cache - uses: actions/cache@v4 - with: - path: config.cache - key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }} - - name: Register gcc problem matcher - run: echo "::add-matcher::.github/problem-matchers/gcc.json" - - name: Install dependencies - run: sudo ./.github/workflows/posix-deps-apt.sh - - name: Configure OpenSSL env vars - run: | - echo "MULTISSL_DIR=${GITHUB_WORKSPACE}/multissl" >> "$GITHUB_ENV" - echo "OPENSSL_DIR=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}" >> "$GITHUB_ENV" - echo "LD_LIBRARY_PATH=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}/lib" >> "$GITHUB_ENV" - - name: 'Restore OpenSSL build' - id: cache-openssl - uses: actions/cache@v4 - with: - path: ./multissl/openssl/${{ env.OPENSSL_VER }} - key: ${{ matrix.os }}-multissl-openssl-${{ env.OPENSSL_VER }} - - name: Install OpenSSL - if: steps.cache-openssl.outputs.cache-hit != 'true' - run: python3 Tools/ssl/multissltests.py --steps=library --base-directory "$MULTISSL_DIR" --openssl "$OPENSSL_VER" --system Linux - - name: Add ccache to PATH - run: | - echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" - - name: Configure ccache action - uses: hendrikmuhs/ccache-action@v1.2 - with: - save: false - - name: Configure CPython - run: ./configure CFLAGS="-fdiagnostics-format=json" --config-cache --enable-slower-safety --with-pydebug --with-openssl="$OPENSSL_DIR" - - name: Build CPython - run: make -j4 - - name: Display build info - run: make pythoninfo - - name: SSL tests - run: ./python Lib/test/ssltests.py - - build-ubuntu-ssltests-awslc: - name: 'Ubuntu SSL tests with AWS-LC' - runs-on: ${{ matrix.os }} - timeout-minutes: 60 - needs: build-context - if: needs.build-context.outputs.run-tests == 'true' - strategy: - fail-fast: false - matrix: - os: [ubuntu-24.04] - awslc_ver: [1.55.0] - env: - AWSLC_VER: ${{ matrix.awslc_ver}} + SSL_VER: ${{ matrix.ssl_ver }} MULTISSL_DIR: ${{ github.workspace }}/multissl - OPENSSL_DIR: ${{ github.workspace }}/multissl/aws-lc/${{ matrix.awslc_ver }} - LD_LIBRARY_PATH: ${{ github.workspace }}/multissl/aws-lc/${{ matrix.awslc_ver }}/lib + SSL_DIR: ${{ github.workspace }}/multissl/${{ matrix.ssl }}/${{ matrix.ssl_ver }} + LD_LIBRARY_PATH: ${{ github.workspace }}/multissl/${{ matrix.ssl }}/${{ matrix.ssl_ver }}/lib steps: - uses: actions/checkout@v4 with: @@ -356,22 +300,18 @@ jobs: - name: Configure SSL lib env vars run: | echo "MULTISSL_DIR=${GITHUB_WORKSPACE}/multissl" >> "$GITHUB_ENV" - echo "OPENSSL_DIR=${GITHUB_WORKSPACE}/multissl/aws-lc/${AWSLC_VER}" >> "$GITHUB_ENV" - echo "LD_LIBRARY_PATH=${GITHUB_WORKSPACE}/multissl/aws-lc/${AWSLC_VER}/lib" >> "$GITHUB_ENV" - - name: 'Restore AWS-LC build' - id: cache-aws-lc + echo "SSL_DIR=${GITHUB_WORKSPACE}/multissl/${{ matrix.ssl }}/${SSL_VER}" >> "$GITHUB_ENV" + echo "LD_LIBRARY_PATH=${GITHUB_WORKSPACE}/multissl/${{ matrix.ssl }}/${SSL_VER}/lib" >> "$GITHUB_ENV" + - name: 'Restore SSL build' + id: cache-ssl uses: actions/cache@v4 with: - path: ./multissl/aws-lc/${{ matrix.awslc_ver }} - key: ${{ matrix.os }}-multissl-aws-lc-${{ matrix.awslc_ver }} - - name: Install AWS-LC - if: steps.cache-aws-lc.outputs.cache-hit != 'true' + path: ./multissl/${{ env.SSL }}/${{ env.SSL_VER }} + key: ${{ matrix.os }}-multissl-${{ env.SSL }}-${{ env.SSL_VER }} + - name: Install SSL + if: steps.cache-ssl.outputs.cache-hit != 'true' run: | - python3 Tools/ssl/multissltests.py \ - --steps=library \ - --base-directory "$MULTISSL_DIR" \ - --awslc ${{ matrix.awslc_ver }} \ - --system Linux + python3 Tools/ssl/multissltests.py --steps=library --base-directory "$MULTISSL_DIR" --system Linux --ssl ${{ matrix.ssl }} --ssl-versions ${{ matrix.ssl_ver }} - name: Add ccache to PATH run: | echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" @@ -381,18 +321,18 @@ jobs: save: false - name: Configure CPython run: | - ./configure CFLAGS="-fdiagnostics-format=json" \ - --config-cache \ - --enable-slower-safety \ - --with-pydebug \ - --with-openssl="$OPENSSL_DIR" \ - --with-builtin-hashlib-hashes=blake2 \ - --with-ssl-default-suites=openssl + CMD=(./configure CFLAGS="-fdiagnostics-format=json" --config-cache --enable-slower-safety --with-pydebug --with-openssl="$SSL_DIR") + if [ "${{ matrix.ssl }}" = "openssl" ]; then + "${CMD[@]}" + else + "${CMD[@]}" --with-builtin-hashlib-hashes=blake2 --with-ssl-default-suites=openssl + fi - name: Build CPython - run: make -j + run: make -j4 - name: Display build info run: make pythoninfo - name: Verify python is linked to AWS-LC + if: matrix.ssl == 'aws-lc' run: ./python -c 'import ssl; print(ssl.OPENSSL_VERSION)' | grep AWS-LC - name: SSL tests run: ./python Lib/test/ssltests.py @@ -435,7 +375,7 @@ jobs: key: ${{ runner.os }}-multissl-openssl-${{ env.OPENSSL_VER }} - name: Install OpenSSL if: steps.cache-openssl.outputs.cache-hit != 'true' - run: python3 Tools/ssl/multissltests.py --steps=library --base-directory "$MULTISSL_DIR" --openssl "$OPENSSL_VER" --system Linux + run: python3 Tools/ssl/multissltests.py --steps=library --base-directory "$MULTISSL_DIR" --ssl 'openssl' --ssl-versions "$OPENSSL_VER" --system Linux - name: Add ccache to PATH run: | echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" @@ -567,7 +507,7 @@ jobs: key: ${{ matrix.os }}-multissl-openssl-${{ env.OPENSSL_VER }} - name: Install OpenSSL if: steps.cache-openssl.outputs.cache-hit != 'true' - run: python3 Tools/ssl/multissltests.py --steps=library --base-directory "$MULTISSL_DIR" --openssl "$OPENSSL_VER" --system Linux + run: python3 Tools/ssl/multissltests.py --steps=library --base-directory "$MULTISSL_DIR" --ssl 'openssl' --ssl-versions "$OPENSSL_VER" --system Linux - name: Add ccache to PATH run: | echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" @@ -703,8 +643,7 @@ jobs: - build-windows-msi - build-macos - build-ubuntu - - build-ubuntu-ssltests-awslc - - build-ubuntu-ssltests-openssl + - build-ubuntu-ssltests - build-wasi - test-hypothesis - build-asan @@ -719,8 +658,7 @@ jobs: with: allowed-failures: >- build-windows-msi, - build-ubuntu-ssltests-awslc, - build-ubuntu-ssltests-openssl, + build-ubuntu-ssltests, test-hypothesis, cifuzz, allowed-skips: >- @@ -738,8 +676,7 @@ jobs: check-generated-files, build-macos, build-ubuntu, - build-ubuntu-ssltests-awslc, - build-ubuntu-ssltests-openssl, + build-ubuntu-ssltests, build-wasi, test-hypothesis, build-asan, diff --git a/.github/workflows/reusable-docs.yml b/.github/workflows/reusable-docs.yml index 657e0a6bf662f7..7b9dc4818577eb 100644 --- a/.github/workflows/reusable-docs.yml +++ b/.github/workflows/reusable-docs.yml @@ -66,7 +66,7 @@ jobs: run: | set -Eeuo pipefail # Build docs with the nit-picky option; write warnings to file - make -C Doc/ PYTHON=../python SPHINXOPTS="--quiet --nitpicky --fail-on-warning --warning-file sphinx-warnings.txt" html + make -C Doc/ PYTHON=../python SPHINXOPTS="--quiet --nitpicky --warning-file sphinx-warnings.txt" html - name: 'Check warnings' if: github.event_name == 'pull_request' run: | @@ -75,6 +75,18 @@ jobs: --fail-if-regression \ --fail-if-improved \ --fail-if-new-news-nit + - name: 'Build EPUB documentation' + continue-on-error: true + run: | + set -Eeuo pipefail + make -C Doc/ PYTHON=../python SPHINXOPTS="--quiet" epub + pip install epubcheck + epubcheck Doc/build/epub/Python.epub &> Doc/epubcheck.txt + - name: 'Check for fatal errors in EPUB' + if: github.event_name == 'pull_request' + continue-on-error: true # until gh-136155 is fixed + run: | + python Doc/tools/check-epub.py # Run "doctest" on HEAD as new syntax doesn't exist in the latest stable release doctest: diff --git a/.github/workflows/reusable-ubuntu.yml b/.github/workflows/reusable-ubuntu.yml index 76b19fd5d1a72e..607e7949161812 100644 --- a/.github/workflows/reusable-ubuntu.yml +++ b/.github/workflows/reusable-ubuntu.yml @@ -60,7 +60,7 @@ jobs: key: ${{ inputs.os }}-multissl-openssl-${{ env.OPENSSL_VER }} - name: Install OpenSSL if: steps.cache-openssl.outputs.cache-hit != 'true' - run: python3 Tools/ssl/multissltests.py --steps=library --base-directory "$MULTISSL_DIR" --openssl "$OPENSSL_VER" --system Linux + run: python3 Tools/ssl/multissltests.py --steps=library --base-directory "$MULTISSL_DIR" --ssl openssl --ssl-versions "$OPENSSL_VER" --system Linux - name: Add ccache to PATH run: | echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" diff --git a/Doc/c-api/float.rst b/Doc/c-api/float.rst index c5a7653efca26b..489676caa3a16a 100644 --- a/Doc/c-api/float.rst +++ b/Doc/c-api/float.rst @@ -124,15 +124,15 @@ There are two problems on non-IEEE platforms: * What this does is undefined if *x* is a NaN or infinity. * ``-0.0`` and ``+0.0`` produce the same bytes string. -.. c:function:: int PyFloat_Pack2(double x, unsigned char *p, int le) +.. c:function:: int PyFloat_Pack2(double x, char *p, int le) Pack a C double as the IEEE 754 binary16 half-precision format. -.. c:function:: int PyFloat_Pack4(double x, unsigned char *p, int le) +.. c:function:: int PyFloat_Pack4(double x, char *p, int le) Pack a C double as the IEEE 754 binary32 single precision format. -.. c:function:: int PyFloat_Pack8(double x, unsigned char *p, int le) +.. c:function:: int PyFloat_Pack8(double x, char *p, int le) Pack a C double as the IEEE 754 binary64 double precision format. @@ -154,14 +154,14 @@ Return value: The unpacked double. On error, this is ``-1.0`` and Note that on a non-IEEE platform this will refuse to unpack a bytes string that represents a NaN or infinity. -.. c:function:: double PyFloat_Unpack2(const unsigned char *p, int le) +.. c:function:: double PyFloat_Unpack2(const char *p, int le) Unpack the IEEE 754 binary16 half-precision format as a C double. -.. c:function:: double PyFloat_Unpack4(const unsigned char *p, int le) +.. c:function:: double PyFloat_Unpack4(const char *p, int le) Unpack the IEEE 754 binary32 single precision format as a C double. -.. c:function:: double PyFloat_Unpack8(const unsigned char *p, int le) +.. c:function:: double PyFloat_Unpack8(const char *p, int le) Unpack the IEEE 754 binary64 double precision format as a C double. diff --git a/Doc/c-api/init_config.rst b/Doc/c-api/init_config.rst index 4fd10224262488..24be9ead3874d1 100644 --- a/Doc/c-api/init_config.rst +++ b/Doc/c-api/init_config.rst @@ -975,9 +975,7 @@ PyPreConfig Set to ``0`` or ``1`` by the :option:`-X utf8 <-X>` command line option and the :envvar:`PYTHONUTF8` environment variable. - Also set to ``1`` if the ``LC_CTYPE`` locale is ``C`` or ``POSIX``. - - Default: ``-1`` in Python config and ``0`` in isolated config. + Default: ``1``. .. _c-preinit: diff --git a/Doc/conf.py b/Doc/conf.py index c1ed94d7b46ec2..1c1f36e5bc0737 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -448,6 +448,7 @@ epub_author = 'Python Documentation Authors' epub_publisher = 'Python Software Foundation' +epub_exclude_files = ('index.xhtml', 'download.xhtml') # index pages are not valid xhtml # https://github.com/sphinx-doc/sphinx/issues/12359 diff --git a/Doc/library/doctest.rst b/Doc/library/doctest.rst index fb43cf918b84dd..5a2c6bdd27c386 100644 --- a/Doc/library/doctest.rst +++ b/Doc/library/doctest.rst @@ -311,9 +311,13 @@ Which Docstrings Are Examined? The module docstring, and all function, class and method docstrings are searched. Objects imported into the module are not searched. +.. currentmodule:: None + .. attribute:: module.__test__ :no-typesetting: +.. currentmodule:: doctest + In addition, there are cases when you want tests to be part of a module but not part of the help text, which requires that the tests not be included in the docstring. Doctest looks for a module-level variable called ``__test__`` and uses it to locate other diff --git a/Doc/library/hmac.rst b/Doc/library/hmac.rst index d6692033b2d4c3..57076c38086c79 100644 --- a/Doc/library/hmac.rst +++ b/Doc/library/hmac.rst @@ -12,6 +12,9 @@ -------------- This module implements the HMAC algorithm as described by :rfc:`2104`. +The interface allows to use any hash function with a *fixed* digest size. +In particular, extendable output functions such as SHAKE-128 or SHAKE-256 +cannot be used with HMAC. .. function:: new(key, msg=None, digestmod) diff --git a/Doc/library/importlib.rst b/Doc/library/importlib.rst index ea5a77028683b3..4f374be778d6b3 100644 --- a/Doc/library/importlib.rst +++ b/Doc/library/importlib.rst @@ -206,6 +206,10 @@ Functions :exc:`ModuleNotFoundError` is raised when the module being reloaded lacks a :class:`~importlib.machinery.ModuleSpec`. + .. warning:: + This function is not thread-safe. Calling it from multiple threads can result + in unexpected behavior. It's recommended to use the :class:`threading.Lock` + or other synchronization primitives for thread-safe module reloading. :mod:`importlib.abc` -- Abstract base classes related to import --------------------------------------------------------------- diff --git a/Doc/library/logging.rst b/Doc/library/logging.rst index 4509da5891685f..e37683e32d01f4 100644 --- a/Doc/library/logging.rst +++ b/Doc/library/logging.rst @@ -1317,7 +1317,7 @@ functions. In Python versions earlier than 3.4, this function could also be passed a text level, and would return the corresponding numeric value of the level. This undocumented behaviour was considered a mistake, and was removed in - Python 3.4, but reinstated in 3.4.2 due to retain backward compatibility. + Python 3.4, but reinstated in 3.4.2 in order to retain backward compatibility. .. function:: getHandlerByName(name) diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst index 1c1cf07a655ae7..abb0131d7d058e 100644 --- a/Doc/library/os.path.rst +++ b/Doc/library/os.path.rst @@ -508,9 +508,6 @@ the :mod:`glob` module.) .. versionchanged:: 3.4 Added Windows support. - .. versionchanged:: 3.6 - Accepts a :term:`path-like object`. - .. function:: split(path) diff --git a/Doc/library/os.rst b/Doc/library/os.rst index 1e54cfec609bd2..45ec6c7a51b7b0 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -108,6 +108,12 @@ Python UTF-8 Mode .. versionadded:: 3.7 See :pep:`540` for more details. +.. versionchanged:: next + + Python UTF-8 mode is now enabled by default (:pep:`686`). + It may be disabled with by setting :envvar:`PYTHONUTF8=0 ` as + an environment variable or by using the :option:`-X utf8=0 <-X>` command line option. + The Python UTF-8 Mode ignores the :term:`locale encoding` and forces the usage of the UTF-8 encoding: @@ -139,31 +145,22 @@ level APIs also exhibit different default behaviours: default so that attempting to open a binary file in text mode is likely to raise an exception rather than producing nonsense data. -The :ref:`Python UTF-8 Mode ` is enabled if the LC_CTYPE locale is -``C`` or ``POSIX`` at Python startup (see the :c:func:`PyConfig_Read` -function). - -It can be enabled or disabled using the :option:`-X utf8 <-X>` command line -option and the :envvar:`PYTHONUTF8` environment variable. - -If the :envvar:`PYTHONUTF8` environment variable is not set at all, then the -interpreter defaults to using the current locale settings, *unless* the current -locale is identified as a legacy ASCII-based locale (as described for -:envvar:`PYTHONCOERCECLOCALE`), and locale coercion is either disabled or -fails. In such legacy locales, the interpreter will default to enabling UTF-8 -mode unless explicitly instructed not to do so. - -The Python UTF-8 Mode can only be enabled at the Python startup. Its value +The :ref:`Python UTF-8 Mode ` is enabled by default. +It can be disabled using the :option:`-X utf8=0 <-X>` command line +option or the :envvar:`PYTHONUTF8=0 ` environment variable. +The Python UTF-8 Mode can only be disabled at Python startup. Its value can be read from :data:`sys.flags.utf8_mode `. +If the UTF-8 mode is disabled, the interpreter defaults to using +the current locale settings, *unless* the current locale is identified +as a legacy ASCII-based locale (as described for :envvar:`PYTHONCOERCECLOCALE`), +and locale coercion is either disabled or fails. +In such legacy locales, the interpreter will default to enabling UTF-8 mode +unless explicitly instructed not to do so. + See also the :ref:`UTF-8 mode on Windows ` and the :term:`filesystem encoding and error handler`. -.. seealso:: - - :pep:`686` - Python 3.15 will make :ref:`utf8-mode` default. - .. _os-procinfo: diff --git a/Doc/library/test.rst b/Doc/library/test.rst index 0aae14c15a6104..9fdb21b8badbbf 100644 --- a/Doc/library/test.rst +++ b/Doc/library/test.rst @@ -1384,6 +1384,13 @@ The :mod:`test.support.threading_helper` module provides support for threading t .. versionadded:: 3.8 +.. function:: run_concurrently(worker_func, nthreads, args=(), kwargs={}) + + Run the worker function concurrently in multiple threads. + Re-raises an exception if any thread raises one, after all threads have + finished. + + :mod:`test.support.os_helper` --- Utilities for os tests ======================================================================== diff --git a/Doc/library/venv.rst b/Doc/library/venv.rst index f16e24eac08343..de427fbafe71dc 100644 --- a/Doc/library/venv.rst +++ b/Doc/library/venv.rst @@ -22,10 +22,10 @@ The :mod:`!venv` module supports creating lightweight "virtual environments", each with their own independent set of Python packages installed in their :mod:`site` directories. A virtual environment is created on top of an existing -Python installation, known as the virtual environment's "base" Python, and may -optionally be isolated from the packages in the base environment, -so only those explicitly installed in the virtual environment are available. -See :ref:`sys-path-init-virtual-environments` and :mod:`site`'s +Python installation, known as the virtual environment's "base" Python, and by +default is isolated from the packages in the base environment, +so that only those explicitly installed in the virtual environment are +available. See :ref:`sys-path-init-virtual-environments` and :mod:`site`'s :ref:`virtual environments documentation ` for more information. diff --git a/Doc/tools/check-epub.py b/Doc/tools/check-epub.py new file mode 100644 index 00000000000000..693dc239c8ad58 --- /dev/null +++ b/Doc/tools/check-epub.py @@ -0,0 +1,24 @@ +import sys +from pathlib import Path + + +def main() -> int: + wrong_directory_msg = "Must run this script from the repo root" + if not Path("Doc").exists() or not Path("Doc").is_dir(): + raise RuntimeError(wrong_directory_msg) + + with Path("Doc/epubcheck.txt").open(encoding="UTF-8") as f: + messages = [message.split(" - ") for message in f.read().splitlines()] + + fatal_errors = [message for message in messages if message[0] == "FATAL"] + + if fatal_errors: + print("\nError: must not contain fatal errors:\n") + for error in fatal_errors: + print(" - ".join(error)) + + return len(fatal_errors) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/Doc/using/windows.rst b/Doc/using/windows.rst index 9628da3d2f6b12..7cc50bccb3724a 100644 --- a/Doc/using/windows.rst +++ b/Doc/using/windows.rst @@ -1006,6 +1006,9 @@ UTF-8 mode ========== .. versionadded:: 3.7 +.. versionchanged:: next + + Python UTF-8 mode is now enabled by default (:pep:`686`). Windows still uses legacy encodings for the system encoding (the ANSI Code Page). Python uses it for the default encoding of text files (e.g. @@ -1014,20 +1017,22 @@ Page). Python uses it for the default encoding of text files (e.g. This may cause issues because UTF-8 is widely used on the internet and most Unix systems, including WSL (Windows Subsystem for Linux). -You can use the :ref:`Python UTF-8 Mode ` to change the default text -encoding to UTF-8. You can enable the :ref:`Python UTF-8 Mode ` via -the ``-X utf8`` command line option, or the ``PYTHONUTF8=1`` environment -variable. See :envvar:`PYTHONUTF8` for enabling UTF-8 mode, and -:ref:`setting-envvars` for how to modify environment variables. - -When the :ref:`Python UTF-8 Mode ` is enabled, you can still use the +The :ref:`Python UTF-8 Mode `, enabled by default, can help by +changing the default text encoding to UTF-8. +When the :ref:`UTF-8 mode ` is enabled, you can still use the system encoding (the ANSI Code Page) via the "mbcs" codec. -Note that adding ``PYTHONUTF8=1`` to the default environment variables -will affect all Python 3.7+ applications on your system. -If you have any Python 3.7+ applications which rely on the legacy -system encoding, it is recommended to set the environment variable -temporarily or use the ``-X utf8`` command line option. +You can disable the :ref:`Python UTF-8 Mode ` via +the ``-X utf8=0`` command line option, or the ``PYTHONUTF8=0`` environment +variable. See :envvar:`PYTHONUTF8` for disabling UTF-8 mode, and +:ref:`setting-envvars` for how to modify environment variables. + +.. hint:: + Adding ``PYTHONUTF8={0,1}`` to the default environment variables + will affect all Python 3.7+ applications on your system. + If you have any Python 3.7+ applications which rely on the legacy + system encoding, it is recommended to set the environment variable + temporarily or use the ``-X utf8`` command line option. .. note:: Even when UTF-8 mode is disabled, Python uses UTF-8 by default diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index dd0bb6bd5b86b3..fe3d45b83a512e 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -172,11 +172,35 @@ production systems where traditional profiling approaches would be too intrusive Other language changes ====================== +* Python now uses UTF-8_ as the default encoding, independent of the system's + environment. This means that I/O operations without an explicit encoding, + e.g. ``open('flying-circus.txt')``, will use UTF-8. + UTF-8 is a widely-supported Unicode_ character encoding that has become a + *de facto* standard for representing text, including nearly every webpage + on the internet, many common file formats, programming languages, and more. + + This only applies when no ``encoding`` argument is given. For best + compatibility between versions of Python, ensure that an explicit ``encoding`` + argument is always provided. The :ref:`opt-in encoding warning ` + can be used to identify code that may be affected by this change. + The special special ``encoding='locale'`` argument uses the current locale + encoding, and has been supported since Python 3.10. + + To retain the previous behaviour, Python's UTF-8 mode may be disabled with + the :envvar:`PYTHONUTF8=0 ` environment variable or the + :option:`-X utf8=0 <-X>` command line option. + + .. seealso:: :pep:`686` for further details. + + .. _UTF-8: https://en.wikipedia.org/wiki/UTF-8 + .. _Unicode: https://home.unicode.org/ + + (Contributed by Adam Turner in :gh:`133711`; PEP 686 written by Inada Naoki.) + * Several error messages incorrectly using the term "argument" have been corrected. (Contributed by Stan Ulbrych in :gh:`133382`.) - New modules =========== diff --git a/Include/cpython/initconfig.h b/Include/cpython/initconfig.h index 7ce4acfeb7177d..1c979d91a40850 100644 --- a/Include/cpython/initconfig.h +++ b/Include/cpython/initconfig.h @@ -102,15 +102,14 @@ typedef struct PyPreConfig { /* Enable UTF-8 mode? (PEP 540) - Disabled by default (equals to 0). + If equal to 1, use the UTF-8 encoding and use "surrogateescape" for the + stdin & stdout error handlers. - Set to 1 by "-X utf8" and "-X utf8=1" command line options. - Set to 1 by PYTHONUTF8=1 environment variable. + Enabled by default (equal to 1; PEP 686), or if Py_UTF8Mode=1, + or if "-X utf8=1" or PYTHONUTF8=1. - Set to 0 by "-X utf8=0" and PYTHONUTF8=0. - - If equals to -1, it is set to 1 if the LC_CTYPE locale is "C" or - "POSIX", otherwise it is set to 0. Inherit Py_UTF8Mode value value. */ + Set to 0 by "-X utf8=0" or PYTHONUTF8=0. + */ int utf8_mode; /* If non-zero, enable the Python Development Mode. diff --git a/Lib/_pyrepl/console.py b/Lib/_pyrepl/console.py index 8956fb1242e52a..e0535d50396316 100644 --- a/Lib/_pyrepl/console.py +++ b/Lib/_pyrepl/console.py @@ -27,6 +27,7 @@ import linecache from dataclasses import dataclass, field import os.path +import re import sys @@ -195,7 +196,19 @@ def runsource(self, source, filename="", symbol="single"): ast.PyCF_ONLY_AST, incomplete_input=False, ) - except (SyntaxError, OverflowError, ValueError): + except SyntaxError as e: + # If it looks like pip install was entered (a common beginner + # mistake), provide a hint to use the system command prompt. + if re.match(r"^\s*(pip3?|py(thon3?)? -m pip) install.*", source): + e.add_note( + "The Python package manager (pip) can only be used" + " outside of the Python REPL.\n" + "Try the 'pip' command in a separate terminal or" + " command prompt." + ) + self.showsyntaxerror(filename, source=source) + return False + except (OverflowError, ValueError): self.showsyntaxerror(filename, source=source) return False if tree.body: diff --git a/Lib/_pyrepl/readline.py b/Lib/_pyrepl/readline.py index 9560ae779abfea..23b8fa6b9c7625 100644 --- a/Lib/_pyrepl/readline.py +++ b/Lib/_pyrepl/readline.py @@ -43,10 +43,11 @@ Console: type[ConsoleType] _error: tuple[type[Exception], ...] | type[Exception] -try: - from .unix_console import UnixConsole as Console, _error -except ImportError: + +if os.name == "nt": from .windows_console import WindowsConsole as Console, _error +else: + from .unix_console import UnixConsole as Console, _error ENCODING = sys.getdefaultencoding() or "latin1" diff --git a/Lib/doctest.py b/Lib/doctest.py index c8c95ecbb273b2..e77823f64b67e4 100644 --- a/Lib/doctest.py +++ b/Lib/doctest.py @@ -1997,8 +1997,8 @@ def testmod(m=None, name=None, globs=None, verbose=None, from module m (or the current module if m is not supplied), starting with m.__doc__. - Also test examples reachable from dict m.__test__ if it exists and is - not None. m.__test__ maps names to functions, classes and strings; + Also test examples reachable from dict m.__test__ if it exists. + m.__test__ maps names to functions, classes and strings; function and class docstrings are tested even if the name is private; strings are tested directly, as if they were docstrings. diff --git a/Lib/locale.py b/Lib/locale.py index dfedc6386cb891..0bde7ed51c66c1 100644 --- a/Lib/locale.py +++ b/Lib/locale.py @@ -651,7 +651,8 @@ def getpreferredencoding(do_setlocale=True): if sys.flags.warn_default_encoding: import warnings warnings.warn( - "UTF-8 Mode affects locale.getpreferredencoding(). Consider locale.getencoding() instead.", + "UTF-8 Mode affects locale.getpreferredencoding(). " + "Consider locale.getencoding() instead.", EncodingWarning, 2) if sys.flags.utf8_mode: return 'utf-8' diff --git a/Lib/string/templatelib.py b/Lib/string/templatelib.py index 14b40e1e36e30b..8164872432ad09 100644 --- a/Lib/string/templatelib.py +++ b/Lib/string/templatelib.py @@ -1,15 +1,22 @@ """Support for template string literals (t-strings).""" -__all__ = [ - "Interpolation", - "Template", -] - t = t"{0}" Template = type(t) Interpolation = type(t.interpolations[0]) del t +def convert(obj, /, conversion): + """Convert *obj* using formatted string literal semantics.""" + if conversion is None: + return obj + if conversion == 'r': + return repr(obj) + if conversion == 's': + return str(obj) + if conversion == 'a': + return ascii(obj) + raise ValueError(f'invalid conversion specifier: {conversion}') + def _template_unpickle(*args): import itertools diff --git a/Lib/subprocess.py b/Lib/subprocess.py index 54c2eb515b60da..79251bd5310223 100644 --- a/Lib/subprocess.py +++ b/Lib/subprocess.py @@ -380,8 +380,7 @@ def _text_encoding(): if sys.flags.utf8_mode: return "utf-8" - else: - return locale.getencoding() + return locale.getencoding() def call(*popenargs, timeout=None, **kwargs): diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index fd39d3f7c95368..196a2e5c600e24 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -570,7 +570,7 @@ def skip_android_selinux(name): is_wasi = sys.platform == "wasi" def skip_emscripten_stack_overflow(): - return unittest.skipIf(is_emscripten, "Exhausts limited stack on Emscripten") + return unittest.skipIf(is_emscripten, "Exhausts stack on Emscripten") def skip_wasi_stack_overflow(): return unittest.skipIf(is_wasi, "Exhausts stack on WASI") diff --git a/Lib/test/support/threading_helper.py b/Lib/test/support/threading_helper.py index afa25a76f63829..3e04c344a0d66f 100644 --- a/Lib/test/support/threading_helper.py +++ b/Lib/test/support/threading_helper.py @@ -248,3 +248,27 @@ def requires_working_threading(*, module=False): raise unittest.SkipTest(msg) else: return unittest.skipUnless(can_start_thread, msg) + + +def run_concurrently(worker_func, nthreads, args=(), kwargs={}): + """ + Run the worker function concurrently in multiple threads. + """ + barrier = threading.Barrier(nthreads) + + def wrapper_func(*args, **kwargs): + # Wait for all threads to reach this point before proceeding. + barrier.wait() + worker_func(*args, **kwargs) + + with catch_threading_exception() as cm: + workers = [ + threading.Thread(target=wrapper_func, args=args, kwargs=kwargs) + for _ in range(nthreads) + ] + with start_threads(workers): + pass + + # If a worker thread raises an exception, re-raise it. + if cm.exc_value is not None: + raise cm.exc_value diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py index c17d749d4a17ed..f30a1874ab96d4 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -300,6 +300,10 @@ def run_utf8_mode(arg): cmd = [sys.executable, '-X', 'utf8', '-c', code, arg] return subprocess.run(cmd, stdout=subprocess.PIPE, text=True) + def run_no_utf8_mode(arg): + cmd = [sys.executable, '-X', 'utf8=0', '-c', code, arg] + return subprocess.run(cmd, stdout=subprocess.PIPE, text=True) + valid_utf8 = 'e:\xe9, euro:\u20ac, non-bmp:\U0010ffff'.encode('utf-8') # invalid UTF-8 byte sequences with a valid UTF-8 sequence # in the middle. @@ -312,7 +316,8 @@ def run_utf8_mode(arg): ) test_args = [valid_utf8, invalid_utf8] - for run_cmd in (run_default, run_c_locale, run_utf8_mode): + for run_cmd in (run_default, run_c_locale, run_utf8_mode, + run_no_utf8_mode): with self.subTest(run_cmd=run_cmd): for arg in test_args: proc = run_cmd(arg) diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py index f6ec2cf5ce8b5c..8da6647c3f71fc 100644 --- a/Lib/test/test_descr.py +++ b/Lib/test/test_descr.py @@ -3942,7 +3942,7 @@ def __del__(self): # it as a leak. del C.__del__ - @unittest.skipIf(support.is_emscripten, "Seems to works in Pyodide?") + @support.skip_emscripten_stack_overflow() @support.skip_wasi_stack_overflow() def test_slots_trash(self): # Testing slot trash... diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index 89f4aebe28f4a1..22dfdb6bb6f138 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -543,7 +543,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): 'configure_locale': True, 'coerce_c_locale': False, 'coerce_c_locale_warn': False, - 'utf8_mode': False, + 'utf8_mode': True, } if MS_WINDOWS: PRE_CONFIG_COMPAT.update({ @@ -560,7 +560,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): configure_locale=False, isolated=True, use_environment=False, - utf8_mode=False, + utf8_mode=True, dev_mode=False, coerce_c_locale=False, ) @@ -805,12 +805,6 @@ def get_expected_config(self, expected_preconfig, expected, 'stdio_encoding', 'stdio_errors'): expected[key] = self.IGNORE_CONFIG - if not expected_preconfig['configure_locale']: - # UTF-8 Mode depends on the locale. There is no easy way - # to guess if UTF-8 Mode will be enabled or not if the locale - # is not configured. - expected_preconfig['utf8_mode'] = self.IGNORE_CONFIG - if expected_preconfig['utf8_mode'] == 1: if expected['filesystem_encoding'] is self.GET_DEFAULT_CONFIG: expected['filesystem_encoding'] = 'utf-8' diff --git a/Lib/test/test_free_threading/test_grp.py b/Lib/test/test_free_threading/test_grp.py new file mode 100644 index 00000000000000..1a47a9757702be --- /dev/null +++ b/Lib/test/test_free_threading/test_grp.py @@ -0,0 +1,35 @@ +import unittest + +from test.support import import_helper, threading_helper +from test.support.threading_helper import run_concurrently + +grp = import_helper.import_module("grp") + +from test import test_grp + + +NTHREADS = 10 + + +@threading_helper.requires_working_threading() +class TestGrp(unittest.TestCase): + def setUp(self): + self.test_grp = test_grp.GroupDatabaseTestCase() + + def test_racing_test_values(self): + # test_grp.test_values() calls grp.getgrall() and checks the entries + run_concurrently( + worker_func=self.test_grp.test_values, nthreads=NTHREADS + ) + + def test_racing_test_values_extended(self): + # test_grp.test_values_extended() calls grp.getgrall(), grp.getgrgid(), + # grp.getgrnam() and checks the entries + run_concurrently( + worker_func=self.test_grp.test_values_extended, + nthreads=NTHREADS, + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/Lib/test/test_free_threading/test_heapq.py b/Lib/test/test_free_threading/test_heapq.py index ee7adfb2b78d83..d771333ffcc9e0 100644 --- a/Lib/test/test_free_threading/test_heapq.py +++ b/Lib/test/test_free_threading/test_heapq.py @@ -3,10 +3,11 @@ import heapq from enum import Enum -from threading import Thread, Barrier, Lock +from threading import Barrier, Lock from random import shuffle, randint from test.support import threading_helper +from test.support.threading_helper import run_concurrently from test import test_heapq @@ -28,8 +29,8 @@ def test_racing_heapify(self): heap = list(range(OBJECT_COUNT)) shuffle(heap) - self.run_concurrently( - worker_func=heapq.heapify, args=(heap,), nthreads=NTHREADS + run_concurrently( + worker_func=heapq.heapify, nthreads=NTHREADS, args=(heap,) ) self.test_heapq.check_invariant(heap) @@ -40,8 +41,8 @@ def heappush_func(heap): for item in reversed(range(OBJECT_COUNT)): heapq.heappush(heap, item) - self.run_concurrently( - worker_func=heappush_func, args=(heap,), nthreads=NTHREADS + run_concurrently( + worker_func=heappush_func, nthreads=NTHREADS, args=(heap,) ) self.test_heapq.check_invariant(heap) @@ -61,10 +62,10 @@ def heappop_func(heap, pop_count): # Each local list should be sorted self.assertTrue(self.is_sorted_ascending(local_list)) - self.run_concurrently( + run_concurrently( worker_func=heappop_func, - args=(heap, per_thread_pop_count), nthreads=NTHREADS, + args=(heap, per_thread_pop_count), ) self.assertEqual(len(heap), 0) @@ -77,10 +78,10 @@ def heappushpop_func(heap, pushpop_items): popped_item = heapq.heappushpop(heap, item) self.assertTrue(popped_item <= item) - self.run_concurrently( + run_concurrently( worker_func=heappushpop_func, - args=(heap, pushpop_items), nthreads=NTHREADS, + args=(heap, pushpop_items), ) self.assertEqual(len(heap), OBJECT_COUNT) self.test_heapq.check_invariant(heap) @@ -93,10 +94,10 @@ def heapreplace_func(heap, replace_items): for item in replace_items: heapq.heapreplace(heap, item) - self.run_concurrently( + run_concurrently( worker_func=heapreplace_func, - args=(heap, replace_items), nthreads=NTHREADS, + args=(heap, replace_items), ) self.assertEqual(len(heap), OBJECT_COUNT) self.test_heapq.check_invariant(heap) @@ -105,8 +106,8 @@ def test_racing_heapify_max(self): max_heap = list(range(OBJECT_COUNT)) shuffle(max_heap) - self.run_concurrently( - worker_func=heapq.heapify_max, args=(max_heap,), nthreads=NTHREADS + run_concurrently( + worker_func=heapq.heapify_max, nthreads=NTHREADS, args=(max_heap,) ) self.test_heapq.check_max_invariant(max_heap) @@ -117,8 +118,8 @@ def heappush_max_func(max_heap): for item in range(OBJECT_COUNT): heapq.heappush_max(max_heap, item) - self.run_concurrently( - worker_func=heappush_max_func, args=(max_heap,), nthreads=NTHREADS + run_concurrently( + worker_func=heappush_max_func, nthreads=NTHREADS, args=(max_heap,) ) self.test_heapq.check_max_invariant(max_heap) @@ -138,10 +139,10 @@ def heappop_max_func(max_heap, pop_count): # Each local list should be sorted self.assertTrue(self.is_sorted_descending(local_list)) - self.run_concurrently( + run_concurrently( worker_func=heappop_max_func, - args=(max_heap, per_thread_pop_count), nthreads=NTHREADS, + args=(max_heap, per_thread_pop_count), ) self.assertEqual(len(max_heap), 0) @@ -154,10 +155,10 @@ def heappushpop_max_func(max_heap, pushpop_items): popped_item = heapq.heappushpop_max(max_heap, item) self.assertTrue(popped_item >= item) - self.run_concurrently( + run_concurrently( worker_func=heappushpop_max_func, - args=(max_heap, pushpop_items), nthreads=NTHREADS, + args=(max_heap, pushpop_items), ) self.assertEqual(len(max_heap), OBJECT_COUNT) self.test_heapq.check_max_invariant(max_heap) @@ -170,10 +171,10 @@ def heapreplace_max_func(max_heap, replace_items): for item in replace_items: heapq.heapreplace_max(max_heap, item) - self.run_concurrently( + run_concurrently( worker_func=heapreplace_max_func, - args=(max_heap, replace_items), nthreads=NTHREADS, + args=(max_heap, replace_items), ) self.assertEqual(len(max_heap), OBJECT_COUNT) self.test_heapq.check_max_invariant(max_heap) @@ -203,7 +204,7 @@ def worker(): except IndexError: pass - self.run_concurrently(worker, (), n_threads * 2) + run_concurrently(worker, n_threads * 2) @staticmethod def is_sorted_ascending(lst): @@ -241,27 +242,6 @@ def create_random_list(a, b, size): """ return [randint(-a, b) for _ in range(size)] - def run_concurrently(self, worker_func, args, nthreads): - """ - Run the worker function concurrently in multiple threads. - """ - barrier = Barrier(nthreads) - - def wrapper_func(*args): - # Wait for all threads to reach this point before proceeding. - barrier.wait() - worker_func(*args) - - with threading_helper.catch_threading_exception() as cm: - workers = ( - Thread(target=wrapper_func, args=args) for _ in range(nthreads) - ) - with threading_helper.start_threads(workers): - pass - - # Worker threads should not raise any exceptions - self.assertIsNone(cm.exc_value) - if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_free_threading/test_monitoring.py b/Lib/test/test_free_threading/test_monitoring.py index a480e398722c33..c3d0a2bcea5c17 100644 --- a/Lib/test/test_free_threading/test_monitoring.py +++ b/Lib/test/test_free_threading/test_monitoring.py @@ -2,10 +2,12 @@ environment to verify things are thread-safe in a free-threaded build""" import sys +import threading import time import unittest import weakref +from contextlib import contextmanager from sys import monitoring from test.support import threading_helper from threading import Thread, _PyRLock, Barrier @@ -192,6 +194,16 @@ def during_threads(self): self.set = not self.set +class TraceBuf: + def __init__(self): + self.traces = [] + self.traces_lock = threading.Lock() + + def append(self, trace): + with self.traces_lock: + self.traces.append(trace) + + @threading_helper.requires_working_threading() class MonitoringMisc(MonitoringTestMixin, TestCase): def register_callback(self, barrier): @@ -246,6 +258,135 @@ def f(): finally: sys.settrace(None) + def test_toggle_setprofile_no_new_events(self): + # gh-136396: Make sure that profile functions are called for newly + # created threads when profiling is toggled but the set of monitoring + # events doesn't change + traces = [] + + def profiler(frame, event, arg): + traces.append((frame.f_code.co_name, event, arg)) + + def a(x, y): + return b(x, y) + + def b(x, y): + return max(x, y) + + sys.setprofile(profiler) + try: + a(1, 2) + finally: + sys.setprofile(None) + traces.clear() + + def thread_main(x, y): + sys.setprofile(profiler) + try: + a(x, y) + finally: + sys.setprofile(None) + t = Thread(target=thread_main, args=(100, 200)) + t.start() + t.join() + + expected = [ + ("a", "call", None), + ("b", "call", None), + ("b", "c_call", max), + ("b", "c_return", max), + ("b", "return", 200), + ("a", "return", 200), + ("thread_main", "c_call", sys.setprofile), + ] + self.assertEqual(traces, expected) + + def observe_threads(self, observer, buf): + def in_child(ident): + return ident + + def child(ident): + with observer(): + in_child(ident) + + def in_parent(ident): + return ident + + def parent(barrier, ident): + barrier.wait() + with observer(): + t = Thread(target=child, args=(ident,)) + t.start() + t.join() + in_parent(ident) + + num_threads = 5 + barrier = Barrier(num_threads) + threads = [] + for i in range(num_threads): + t = Thread(target=parent, args=(barrier, i)) + t.start() + threads.append(t) + for t in threads: + t.join() + + for i in range(num_threads): + self.assertIn(("in_parent", "return", i), buf.traces) + self.assertIn(("in_child", "return", i), buf.traces) + + def test_profile_threads(self): + buf = TraceBuf() + + def profiler(frame, event, arg): + buf.append((frame.f_code.co_name, event, arg)) + + @contextmanager + def profile(): + sys.setprofile(profiler) + try: + yield + finally: + sys.setprofile(None) + + self.observe_threads(profile, buf) + + def test_trace_threads(self): + buf = TraceBuf() + + def tracer(frame, event, arg): + buf.append((frame.f_code.co_name, event, arg)) + return tracer + + @contextmanager + def trace(): + sys.settrace(tracer) + try: + yield + finally: + sys.settrace(None) + + self.observe_threads(trace, buf) + + def test_monitor_threads(self): + buf = TraceBuf() + + def monitor_py_return(code, off, retval): + buf.append((code.co_name, "return", retval)) + + monitoring.register_callback( + self.tool_id, monitoring.events.PY_RETURN, monitor_py_return + ) + + monitoring.set_events( + self.tool_id, monitoring.events.PY_RETURN + ) + + @contextmanager + def noop(): + yield + + self.observe_threads(noop, buf) + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py index 58a30c8e6ac447..b41e02c3a16379 100644 --- a/Lib/test/test_fstring.py +++ b/Lib/test/test_fstring.py @@ -1651,6 +1651,18 @@ def __repr__(self): self.assertEqual(f"{1+2 = # my comment }", '1+2 = \n 3') + self.assertEqual(f'{""" # booo + """=}', '""" # booo\n """=\' # booo\\n \'') + + self.assertEqual(f'{" # nooo "=}', '" # nooo "=\' # nooo \'') + self.assertEqual(f'{" \" # nooo \" "=}', '" \\" # nooo \\" "=\' " # nooo " \'') + + self.assertEqual(f'{ # some comment goes here + """hello"""=}', ' \n """hello"""=\'hello\'') + self.assertEqual(f'{"""# this is not a comment + a""" # this is a comment + }', '# this is not a comment\n a') + # These next lines contains tabs. Backslash escapes don't # work in f-strings. # patchcheck doesn't like these tabs. So the only way to test diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index 1e50dc43c35f5c..de3a17fe893170 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -1918,11 +1918,9 @@ def test_makedir(self): support.is_wasi, "WASI's umask is a stub." ) - @unittest.skipIf( - support.is_emscripten, - "TODO: Fails in buildbot; see #135783" - ) def test_mode(self): + # Note: in some cases, the umask might already be 2 in which case this + # will pass even if os.umask is actually broken. with os_helper.temp_umask(0o002): base = os_helper.TESTFN parent = os.path.join(base, 'dir1') diff --git a/Lib/test/test_pyrepl/test_pyrepl.py b/Lib/test/test_pyrepl/test_pyrepl.py index 98bae7dd703fd9..657a971f8769df 100644 --- a/Lib/test/test_pyrepl/test_pyrepl.py +++ b/Lib/test/test_pyrepl/test_pyrepl.py @@ -1757,3 +1757,14 @@ def test_showrefcount(self): output, _ = self.run_repl("1\n1+2\nexit()\n", cmdline_args=['-Xshowrefcount'], env=env) matches = re.findall(r'\[-?\d+ refs, \d+ blocks\]', output) self.assertEqual(len(matches), 3) + + def test_detect_pip_usage_in_repl(self): + for pip_cmd in ("pip", "pip3", "python -m pip", "python3 -m pip"): + with self.subTest(pip_cmd=pip_cmd): + output, exit_code = self.run_repl([f"{pip_cmd} install sampleproject", "exit"]) + self.assertIn("SyntaxError", output) + hint = ( + "The Python package manager (pip) can only be used" + " outside of the Python REPL" + ) + self.assertIn(hint, output) diff --git a/Lib/test/test_string/test_templatelib.py b/Lib/test/test_string/test_templatelib.py index adaf590e64dad6..1c86717155fd5a 100644 --- a/Lib/test/test_string/test_templatelib.py +++ b/Lib/test/test_string/test_templatelib.py @@ -1,7 +1,7 @@ import pickle import unittest from collections.abc import Iterator, Iterable -from string.templatelib import Template, Interpolation +from string.templatelib import Template, Interpolation, convert from test.test_string._support import TStringBaseCase, fstring @@ -169,5 +169,25 @@ def test_exhausted(self): self.assertRaises(StopIteration, next, template_iter) +class TestFunctions(unittest.TestCase): + def test_convert(self): + from fractions import Fraction + + for obj in ('Café', None, 3.14, Fraction(1, 2)): + with self.subTest(f'{obj=}'): + self.assertEqual(convert(obj, None), obj) + self.assertEqual(convert(obj, 's'), str(obj)) + self.assertEqual(convert(obj, 'r'), repr(obj)) + self.assertEqual(convert(obj, 'a'), ascii(obj)) + + # Invalid conversion specifier + with self.assertRaises(ValueError): + convert(obj, 'z') + with self.assertRaises(ValueError): + convert(obj, 1) + with self.assertRaises(ValueError): + convert(obj, object()) + + if __name__ == '__main__': unittest.main() diff --git a/Lib/test/test_utf8_mode.py b/Lib/test/test_utf8_mode.py index f66881044e16df..b8e49440c9f7da 100644 --- a/Lib/test/test_utf8_mode.py +++ b/Lib/test/test_utf8_mode.py @@ -89,8 +89,8 @@ def test_env_var(self): # the UTF-8 mode if not self.posix_locale(): # PYTHONUTF8 should be ignored if -E is used - out = self.get_output('-E', '-c', code, PYTHONUTF8='1') - self.assertEqual(out, '0') + out = self.get_output('-E', '-c', code, PYTHONUTF8='0') + self.assertEqual(out, '1') # invalid mode out = self.get_output('-c', code, PYTHONUTF8='xxx', failure=True) @@ -116,7 +116,7 @@ def test_filesystemencoding(self): # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode # and has the priority over -X utf8 and PYTHONUTF8 out = self.get_output('-X', 'utf8', '-c', code, - PYTHONUTF8='strict', + PYTHONUTF8='xxx', PYTHONLEGACYWINDOWSFSENCODING='1') self.assertEqual(out, 'mbcs/replace') diff --git a/Lib/test/test_warnings/__init__.py b/Lib/test/test_warnings/__init__.py index 5c3b1250ceb045..f89e94449b3031 100644 --- a/Lib/test/test_warnings/__init__.py +++ b/Lib/test/test_warnings/__init__.py @@ -555,13 +555,7 @@ def test_warn_explicit_non_ascii_filename(self): with self.module.catch_warnings(record=True) as w: self.module.resetwarnings() self.module.filterwarnings("always", category=UserWarning) - filenames = ["nonascii\xe9\u20ac"] - if not support.is_emscripten: - # JavaScript does not like surrogates. - # Invalid UTF-8 leading byte 0x80 encountered when - # deserializing a UTF-8 string in wasm memory to a JS - # string! - filenames.append("surrogate\udc80") + filenames = ["nonascii\xe9\u20ac", "surrogate\udc80"] for filename in filenames: try: os.fsencode(filename) diff --git a/Lib/test/test_xml_etree_c.py b/Lib/test/test_xml_etree_c.py index 9ed0f4096a45e3..270b9d6da8e7b9 100644 --- a/Lib/test/test_xml_etree_c.py +++ b/Lib/test/test_xml_etree_c.py @@ -58,7 +58,7 @@ def test_del_attribute(self): self.assertEqual(element.attrib, {'A': 'B', 'C': 'D'}) @support.skip_wasi_stack_overflow() - @unittest.skipIf(support.is_emscripten, "segfaults") + @support.skip_emscripten_stack_overflow() def test_trashcan(self): # If this test fails, it will most likely die via segfault. e = root = cET.Element('root') diff --git a/Lib/traceback.py b/Lib/traceback.py index a1f175dbbaa421..31aa8695735f2b 100644 --- a/Lib/traceback.py +++ b/Lib/traceback.py @@ -287,6 +287,12 @@ class FrameSummary: of code that was running when the frame was captured. - :attr:`locals` Either None if locals were not supplied, or a dict mapping the name to the repr() of the variable. + - :attr:`end_lineno` The last line number of the source code for this frame. + By default, it is set to lineno and indexation starts from 1. + - :attr:`colno` The column number of the source code for this frame. + By default, it is None and indexation starts from 0. + - :attr:`end_colno` The last column number of the source code for this frame. + By default, it is None and indexation starts from 0. """ __slots__ = ('filename', 'lineno', 'end_lineno', 'colno', 'end_colno', diff --git a/Misc/ACKS b/Misc/ACKS index 3814509aea030a..fabd79b9f74210 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1744,6 +1744,7 @@ Joel Shprentz Yue Shuaijie Jaysinh Shukla Terrel Shumway +Richard Si Eric Siegerman Reilly Tucker Siemens Paul Sijben @@ -1988,6 +1989,7 @@ Olivier Vielpeau Kannan Vijayan Kurt Vile Norman Vine +Tom Viner Pauli Virtanen Frank Visser Long Vo diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-08-22-19-10.gh-issue-133711.e91wUy.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-08-22-19-10.gh-issue-133711.e91wUy.rst new file mode 100644 index 00000000000000..c8d3d62763dc12 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-08-22-19-10.gh-issue-133711.e91wUy.rst @@ -0,0 +1,2 @@ +Implement :pep:`686`: Enable :ref:`Python UTF-8 Mode ` by +default. Patch by Adam Turner. diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-06-02-24-42.gh-issue-135148.r-t2sC.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-06-02-24-42.gh-issue-135148.r-t2sC.rst new file mode 100644 index 00000000000000..9b1f62433b45ed --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-06-02-24-42.gh-issue-135148.r-t2sC.rst @@ -0,0 +1,3 @@ +Fixed a bug where f-string debug expressions (using =) would incorrectly +strip out parts of strings containing escaped quotes and # characters. Patch +by Pablo Galindo. diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-12-00-03-34.gh-issue-116738.iBBAdo.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-12-00-03-34.gh-issue-116738.iBBAdo.rst new file mode 100644 index 00000000000000..2a1ed2944d8ddf --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-12-00-03-34.gh-issue-116738.iBBAdo.rst @@ -0,0 +1 @@ +Make functions in :mod:`grp` thread-safe on the :term:`free threaded ` build. diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-07-10-15-53-16.gh-issue-136525.xAko0e.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-10-15-53-16.gh-issue-136525.xAko0e.rst new file mode 100644 index 00000000000000..f28eb2ca3b71e8 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-10-15-53-16.gh-issue-136525.xAko0e.rst @@ -0,0 +1,2 @@ +Fix issue where per-thread bytecode was not instrumented for newly created +threads. diff --git a/Misc/NEWS.d/next/Documentation/2025-07-01-23-00-58.gh-issue-136155.4siQQO.rst b/Misc/NEWS.d/next/Documentation/2025-07-01-23-00-58.gh-issue-136155.4siQQO.rst new file mode 100644 index 00000000000000..70f54936c80f55 --- /dev/null +++ b/Misc/NEWS.d/next/Documentation/2025-07-01-23-00-58.gh-issue-136155.4siQQO.rst @@ -0,0 +1 @@ +We are now checking for fatal errors in EPUB builds in CI. diff --git a/Misc/NEWS.d/next/Library/2025-07-07-16-46-55.gh-issue-72327.wLvRuj.rst b/Misc/NEWS.d/next/Library/2025-07-07-16-46-55.gh-issue-72327.wLvRuj.rst new file mode 100644 index 00000000000000..f305abb655a6f6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-07-16-46-55.gh-issue-72327.wLvRuj.rst @@ -0,0 +1,2 @@ +Suggest using the system command prompt when ``pip install`` is typed into +the REPL. Patch by Tom Viner, Richard Si, and Brian Schubert. diff --git a/Misc/NEWS.d/next/Library/2025-07-15-16-37-34.gh-issue-136669.Yexwah.rst b/Misc/NEWS.d/next/Library/2025-07-15-16-37-34.gh-issue-136669.Yexwah.rst new file mode 100644 index 00000000000000..0d93397ff35d0b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-15-16-37-34.gh-issue-136669.Yexwah.rst @@ -0,0 +1 @@ +:mod:`!_asyncio` is now statically linked for improved performance. diff --git a/Modules/Setup.stdlib.in b/Modules/Setup.stdlib.in index 3a38a60a152e8c..86c8eb27c0a6c7 100644 --- a/Modules/Setup.stdlib.in +++ b/Modules/Setup.stdlib.in @@ -32,7 +32,6 @@ ############################################################################ # Modules that should always be present (POSIX and Windows): @MODULE_ARRAY_TRUE@array arraymodule.c -@MODULE__ASYNCIO_TRUE@_asyncio _asynciomodule.c @MODULE__BISECT_TRUE@_bisect _bisectmodule.c @MODULE__CSV_TRUE@_csv _csv.c @MODULE__HEAPQ_TRUE@_heapq _heapqmodule.c @@ -193,3 +192,9 @@ # Limited API template modules; must be built as shared modules. @MODULE_XXLIMITED_TRUE@xxlimited xxlimited.c @MODULE_XXLIMITED_35_TRUE@xxlimited_35 xxlimited_35.c + + +# for performance +*static* + +@MODULE__ASYNCIO_TRUE@_asyncio _asynciomodule.c diff --git a/Modules/_remote_debugging_module.c b/Modules/_remote_debugging_module.c index d72031137e0a4e..b50e5e403a1a19 100644 --- a/Modules/_remote_debugging_module.c +++ b/Modules/_remote_debugging_module.c @@ -811,7 +811,7 @@ _Py_RemoteDebug_GetAsyncioDebugAddress(proc_handle_t* handle) } #elif defined(__linux__) // On Linux, search for asyncio debug in executable or DLL - address = search_linux_map_for_section(handle, "AsyncioDebug", "_asyncio.cpython"); + address = search_linux_map_for_section(handle, "AsyncioDebug", "python"); if (address == 0) { // Error out: 'python' substring covers both executable and DLL PyObject *exc = PyErr_GetRaisedException(); @@ -820,10 +820,10 @@ _Py_RemoteDebug_GetAsyncioDebugAddress(proc_handle_t* handle) } #elif defined(__APPLE__) && TARGET_OS_OSX // On macOS, try libpython first, then fall back to python - address = search_map_for_section(handle, "AsyncioDebug", "_asyncio.cpython"); + address = search_map_for_section(handle, "AsyncioDebug", "libpython"); if (address == 0) { PyErr_Clear(); - address = search_map_for_section(handle, "AsyncioDebug", "_asyncio.cpython"); + address = search_map_for_section(handle, "AsyncioDebug", "python"); } if (address == 0) { // Error out: 'python' substring covers both executable and DLL diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index 22f426c5192bf2..65f5f8c9267b6c 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -380,7 +380,7 @@ PyDoc_STRVAR(os_chdir__doc__, "\n" "path may always be specified as a string.\n" "On some platforms, path may also be specified as an open file descriptor.\n" -" If this functionality is unavailable, using it raises an exception."); +"If this functionality is unavailable, using it raises an exception."); #define OS_CHDIR_METHODDEF \ {"chdir", _PyCFunction_CAST(os_chdir), METH_FASTCALL|METH_KEYWORDS, os_chdir__doc__}, @@ -13440,4 +13440,4 @@ os__emscripten_debugger(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef OS__EMSCRIPTEN_DEBUGGER_METHODDEF #define OS__EMSCRIPTEN_DEBUGGER_METHODDEF #endif /* !defined(OS__EMSCRIPTEN_DEBUGGER_METHODDEF) */ -/*[clinic end generated code: output=5341daae6581a62b input=a9049054013a1b77]*/ +/*[clinic end generated code: output=6cfddb3b77dc7a40 input=a9049054013a1b77]*/ diff --git a/Modules/grpmodule.c b/Modules/grpmodule.c index 29da9936b65504..652958618a2c4c 100644 --- a/Modules/grpmodule.c +++ b/Modules/grpmodule.c @@ -55,6 +55,11 @@ get_grp_state(PyObject *module) static struct PyModuleDef grpmodule; +/* Mutex to protect calls to getgrgid(), getgrnam(), and getgrent(). + * These functions return pointer to static data structure, which + * may be overwritten by any subsequent calls. */ +static PyMutex group_db_mutex = {0}; + #define DEFAULT_BUFFER_SIZE 1024 static PyObject * @@ -168,9 +173,15 @@ grp_getgrgid_impl(PyObject *module, PyObject *id) Py_END_ALLOW_THREADS #else + PyMutex_Lock(&group_db_mutex); + // The getgrgid() function need not be thread-safe. + // https://pubs.opengroup.org/onlinepubs/9699919799/functions/getgrgid.html p = getgrgid(gid); #endif if (p == NULL) { +#ifndef HAVE_GETGRGID_R + PyMutex_Unlock(&group_db_mutex); +#endif PyMem_RawFree(buf); if (nomem == 1) { return PyErr_NoMemory(); @@ -185,6 +196,8 @@ grp_getgrgid_impl(PyObject *module, PyObject *id) retval = mkgrent(module, p); #ifdef HAVE_GETGRGID_R PyMem_RawFree(buf); +#else + PyMutex_Unlock(&group_db_mutex); #endif return retval; } @@ -249,9 +262,15 @@ grp_getgrnam_impl(PyObject *module, PyObject *name) Py_END_ALLOW_THREADS #else + PyMutex_Lock(&group_db_mutex); + // The getgrnam() function need not be thread-safe. + // https://pubs.opengroup.org/onlinepubs/9699919799/functions/getgrnam.html p = getgrnam(name_chars); #endif if (p == NULL) { +#ifndef HAVE_GETGRNAM_R + PyMutex_Unlock(&group_db_mutex); +#endif if (nomem == 1) { PyErr_NoMemory(); } @@ -261,6 +280,9 @@ grp_getgrnam_impl(PyObject *module, PyObject *name) goto out; } retval = mkgrent(module, p); +#ifndef HAVE_GETGRNAM_R + PyMutex_Unlock(&group_db_mutex); +#endif out: PyMem_RawFree(buf); Py_DECREF(bytes); @@ -285,8 +307,7 @@ grp_getgrall_impl(PyObject *module) return NULL; } - static PyMutex getgrall_mutex = {0}; - PyMutex_Lock(&getgrall_mutex); + PyMutex_Lock(&group_db_mutex); setgrent(); struct group *p; @@ -306,7 +327,7 @@ grp_getgrall_impl(PyObject *module) done: endgrent(); - PyMutex_Unlock(&getgrall_mutex); + PyMutex_Unlock(&group_db_mutex); return d; } diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index b570f81b7cf7c2..47eaf5cd428a53 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -3477,12 +3477,12 @@ Change the current working directory to the specified path. path may always be specified as a string. On some platforms, path may also be specified as an open file descriptor. - If this functionality is unavailable, using it raises an exception. +If this functionality is unavailable, using it raises an exception. [clinic start generated code]*/ static PyObject * os_chdir_impl(PyObject *module, path_t *path) -/*[clinic end generated code: output=3be6400eee26eaae input=1a4a15b4d12cb15d]*/ +/*[clinic end generated code: output=3be6400eee26eaae input=a74ceab5d72adf74]*/ { int result; diff --git a/Objects/codeobject.c b/Objects/codeobject.c index ba178abc0c071e..42e021679b583f 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -3330,12 +3330,29 @@ _PyCodeArray_New(Py_ssize_t size) return arr; } +// Get the underlying code unit, leaving instrumentation +static _Py_CODEUNIT +deopt_code_unit(PyCodeObject *code, int i) +{ + _Py_CODEUNIT *src_instr = _PyCode_CODE(code) + i; + _Py_CODEUNIT inst = { + .cache = FT_ATOMIC_LOAD_UINT16_RELAXED(*(uint16_t *)src_instr)}; + int opcode = inst.op.code; + if (opcode < MIN_INSTRUMENTED_OPCODE) { + inst.op.code = _PyOpcode_Deopt[opcode]; + assert(inst.op.code < MIN_SPECIALIZED_OPCODE); + } + // JIT should not be enabled with free-threading + assert(inst.op.code != ENTER_EXECUTOR); + return inst; +} + static void copy_code(_Py_CODEUNIT *dst, PyCodeObject *co) { int code_len = (int) Py_SIZE(co); for (int i = 0; i < code_len; i += _PyInstruction_GetLength(co, i)) { - dst[i] = _Py_GetBaseCodeUnit(co, i); + dst[i] = deopt_code_unit(co, i); } _PyCode_Quicken(dst, code_len, 1); } diff --git a/Objects/dictobject.c b/Objects/dictobject.c index be62ae5eefd00d..0ed52ac5e87b6e 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -217,7 +217,7 @@ set_values(PyDictObject *mp, PyDictValues *values) #define LOAD_KEYS_NENTRIES(keys) _Py_atomic_load_ssize_relaxed(&keys->dk_nentries) #define INCREF_KEYS_FT(dk) dictkeys_incref(dk) -#define DECREF_KEYS_FT(dk, shared) dictkeys_decref(_PyInterpreterState_GET(), dk, shared) +#define DECREF_KEYS_FT(dk, shared) dictkeys_decref(dk, shared) static inline void split_keys_entry_added(PyDictKeysObject *keys) { @@ -380,8 +380,7 @@ equally good collision statistics, needed less code & used less memory. */ -static int dictresize(PyInterpreterState *interp, PyDictObject *mp, - uint8_t log_newsize, int unicode); +static int dictresize(PyDictObject *mp, uint8_t log_newsize, int unicode); static PyObject* dict_iter(PyObject *dict); @@ -444,7 +443,7 @@ dictkeys_incref(PyDictKeysObject *dk) } static inline void -dictkeys_decref(PyInterpreterState *interp, PyDictKeysObject *dk, bool use_qsbr) +dictkeys_decref(PyDictKeysObject *dk, bool use_qsbr) { if (FT_ATOMIC_LOAD_SSIZE_RELAXED(dk->dk_refcnt) < 0) { assert(FT_ATOMIC_LOAD_SSIZE_RELAXED(dk->dk_refcnt) == _Py_DICT_IMMORTAL_INITIAL_REFCNT); @@ -753,7 +752,7 @@ _PyDict_CheckConsistency(PyObject *op, int check_content) static PyDictKeysObject* -new_keys_object(PyInterpreterState *interp, uint8_t log2_size, bool unicode) +new_keys_object(uint8_t log2_size, bool unicode) { Py_ssize_t usable; int log2_bytes; @@ -867,8 +866,7 @@ free_values(PyDictValues *values, bool use_qsbr) /* Consumes a reference to the keys object */ static PyObject * -new_dict(PyInterpreterState *interp, - PyDictKeysObject *keys, PyDictValues *values, +new_dict(PyDictKeysObject *keys, PyDictValues *values, Py_ssize_t used, int free_values_on_failure) { assert(keys != NULL); @@ -876,7 +874,7 @@ new_dict(PyInterpreterState *interp, if (mp == NULL) { mp = PyObject_GC_New(PyDictObject, &PyDict_Type); if (mp == NULL) { - dictkeys_decref(interp, keys, false); + dictkeys_decref(keys, false); if (free_values_on_failure) { free_values(values, false); } @@ -894,7 +892,7 @@ new_dict(PyInterpreterState *interp, } static PyObject * -new_dict_with_shared_keys(PyInterpreterState *interp, PyDictKeysObject *keys) +new_dict_with_shared_keys(PyDictKeysObject *keys) { size_t size = shared_keys_usable_size(keys); PyDictValues *values = new_values(size); @@ -905,7 +903,7 @@ new_dict_with_shared_keys(PyInterpreterState *interp, PyDictKeysObject *keys) for (size_t i = 0; i < size; i++) { values->values[i] = NULL; } - return new_dict(interp, keys, values, 0, 1); + return new_dict(keys, values, 0, 1); } @@ -971,9 +969,8 @@ clone_combined_dict_keys(PyDictObject *orig) PyObject * PyDict_New(void) { - PyInterpreterState *interp = _PyInterpreterState_GET(); /* We don't incref Py_EMPTY_KEYS here because it is immortal. */ - return new_dict(interp, Py_EMPTY_KEYS, NULL, 0, 0); + return new_dict(Py_EMPTY_KEYS, NULL, 0, 0); } /* Search index of hash table from offset of entry table */ @@ -1714,9 +1711,9 @@ find_empty_slot(PyDictKeysObject *keys, Py_hash_t hash) } static int -insertion_resize(PyInterpreterState *interp, PyDictObject *mp, int unicode) +insertion_resize(PyDictObject *mp, int unicode) { - return dictresize(interp, mp, calculate_log2_keysize(GROWTH_RATE(mp)), unicode); + return dictresize(mp, calculate_log2_keysize(GROWTH_RATE(mp)), unicode); } static inline int @@ -1725,7 +1722,7 @@ insert_combined_dict(PyInterpreterState *interp, PyDictObject *mp, { if (mp->ma_keys->dk_usable <= 0) { /* Need to resize. */ - if (insertion_resize(interp, mp, 1) < 0) { + if (insertion_resize(mp, 1) < 0) { return -1; } } @@ -1823,7 +1820,7 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp, ASSERT_DICT_LOCKED(mp); if (DK_IS_UNICODE(mp->ma_keys) && !PyUnicode_CheckExact(key)) { - if (insertion_resize(interp, mp, 0) < 0) + if (insertion_resize(mp, 0) < 0) goto Fail; assert(mp->ma_keys->dk_kind == DICT_KEYS_GENERAL); } @@ -1838,7 +1835,7 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp, } /* No space in shared keys. Resize and continue below. */ - if (insertion_resize(interp, mp, 1) < 0) { + if (insertion_resize(mp, 1) < 0) { goto Fail; } } @@ -1893,8 +1890,7 @@ insert_to_emptydict(PyInterpreterState *interp, PyDictObject *mp, ASSERT_DICT_LOCKED(mp); int unicode = PyUnicode_CheckExact(key); - PyDictKeysObject *newkeys = new_keys_object( - interp, PyDict_LOG_MINSIZE, unicode); + PyDictKeysObject *newkeys = new_keys_object(PyDict_LOG_MINSIZE, unicode); if (newkeys == NULL) { Py_DECREF(key); Py_DECREF(value); @@ -1989,7 +1985,7 @@ This function supports: - Generic -> Generic */ static int -dictresize(PyInterpreterState *interp, PyDictObject *mp, +dictresize(PyDictObject *mp, uint8_t log2_newsize, int unicode) { PyDictKeysObject *oldkeys, *newkeys; @@ -2017,7 +2013,7 @@ dictresize(PyInterpreterState *interp, PyDictObject *mp, */ /* Allocate a new table. */ - newkeys = new_keys_object(interp, log2_newsize, unicode); + newkeys = new_keys_object(log2_newsize, unicode); if (newkeys == NULL) { return -1; } @@ -2060,7 +2056,7 @@ dictresize(PyInterpreterState *interp, PyDictObject *mp, } UNLOCK_KEYS(oldkeys); set_keys(mp, newkeys); - dictkeys_decref(interp, oldkeys, IS_DICT_SHARED(mp)); + dictkeys_decref(oldkeys, IS_DICT_SHARED(mp)); set_values(mp, NULL); if (oldvalues->embedded) { assert(oldvalues->embedded == 1); @@ -2141,7 +2137,7 @@ dictresize(PyInterpreterState *interp, PyDictObject *mp, } static PyObject * -dict_new_presized(PyInterpreterState *interp, Py_ssize_t minused, bool unicode) +dict_new_presized(Py_ssize_t minused, bool unicode) { const uint8_t log2_max_presize = 17; const Py_ssize_t max_presize = ((Py_ssize_t)1) << log2_max_presize; @@ -2162,17 +2158,16 @@ dict_new_presized(PyInterpreterState *interp, Py_ssize_t minused, bool unicode) log2_newsize = estimate_log2_keysize(minused); } - new_keys = new_keys_object(interp, log2_newsize, unicode); + new_keys = new_keys_object(log2_newsize, unicode); if (new_keys == NULL) return NULL; - return new_dict(interp, new_keys, NULL, 0, 0); + return new_dict(new_keys, NULL, 0, 0); } PyObject * _PyDict_NewPresized(Py_ssize_t minused) { - PyInterpreterState *interp = _PyInterpreterState_GET(); - return dict_new_presized(interp, minused, false); + return dict_new_presized(minused, false); } PyObject * @@ -2182,7 +2177,6 @@ _PyDict_FromItems(PyObject *const *keys, Py_ssize_t keys_offset, { bool unicode = true; PyObject *const *ks = keys; - PyInterpreterState *interp = _PyInterpreterState_GET(); for (Py_ssize_t i = 0; i < length; i++) { if (!PyUnicode_CheckExact(*ks)) { @@ -2192,7 +2186,7 @@ _PyDict_FromItems(PyObject *const *keys, Py_ssize_t keys_offset, ks += keys_offset; } - PyObject *dict = dict_new_presized(interp, length, unicode); + PyObject *dict = dict_new_presized(length, unicode); if (dict == NULL) { return NULL; } @@ -2895,7 +2889,7 @@ clear_lock_held(PyObject *op) if (oldvalues == NULL) { set_keys(mp, Py_EMPTY_KEYS); assert(oldkeys->dk_refcnt == 1); - dictkeys_decref(interp, oldkeys, IS_DICT_SHARED(mp)); + dictkeys_decref(oldkeys, IS_DICT_SHARED(mp)); } else { n = oldkeys->dk_nentries; @@ -2909,7 +2903,7 @@ clear_lock_held(PyObject *op) set_values(mp, NULL); set_keys(mp, Py_EMPTY_KEYS); free_values(oldvalues, IS_DICT_SHARED(mp)); - dictkeys_decref(interp, oldkeys, false); + dictkeys_decref(oldkeys, false); } } ASSERT_CONSISTENT(mp); @@ -3161,7 +3155,7 @@ dict_dict_fromkeys(PyInterpreterState *interp, PyDictObject *mp, uint8_t new_size = Py_MAX( estimate_log2_keysize(PyDict_GET_SIZE(iterable)), DK_LOG_SIZE(mp->ma_keys)); - if (dictresize(interp, mp, new_size, unicode)) { + if (dictresize(mp, new_size, unicode)) { Py_DECREF(mp); return NULL; } @@ -3186,7 +3180,7 @@ dict_set_fromkeys(PyInterpreterState *interp, PyDictObject *mp, uint8_t new_size = Py_MAX( estimate_log2_keysize(PySet_GET_SIZE(iterable)), DK_LOG_SIZE(mp->ma_keys)); - if (dictresize(interp, mp, new_size, 0)) { + if (dictresize(mp, new_size, 0)) { Py_DECREF(mp); return NULL; } @@ -3298,11 +3292,11 @@ dict_dealloc(PyObject *self) } free_values(values, false); } - dictkeys_decref(interp, keys, false); + dictkeys_decref(keys, false); } else if (keys != NULL) { assert(keys->dk_refcnt == 1 || keys == Py_EMPTY_KEYS); - dictkeys_decref(interp, keys, false); + dictkeys_decref(keys, false); } if (Py_IS_TYPE(mp, &PyDict_Type)) { _Py_FREELIST_FREE(dicts, mp, Py_TYPE(mp)->tp_free); @@ -3832,7 +3826,7 @@ dict_dict_merge(PyInterpreterState *interp, PyDictObject *mp, PyDictObject *othe return -1; ensure_shared_on_resize(mp); - dictkeys_decref(interp, mp->ma_keys, IS_DICT_SHARED(mp)); + dictkeys_decref(mp->ma_keys, IS_DICT_SHARED(mp)); set_keys(mp, keys); STORE_USED(mp, other->ma_used); ASSERT_CONSISTENT(mp); @@ -3851,8 +3845,7 @@ dict_dict_merge(PyInterpreterState *interp, PyDictObject *mp, PyDictObject *othe */ if (USABLE_FRACTION(DK_SIZE(mp->ma_keys)) < other->ma_used) { int unicode = DK_IS_UNICODE(other->ma_keys); - if (dictresize(interp, mp, - estimate_log2_keysize(mp->ma_used + other->ma_used), + if (dictresize(mp, estimate_log2_keysize(mp->ma_used + other->ma_used), unicode)) { return -1; } @@ -4117,7 +4110,7 @@ copy_lock_held(PyObject *o) if (keys == NULL) { return NULL; } - PyDictObject *new = (PyDictObject *)new_dict(interp, keys, NULL, 0, 0); + PyDictObject *new = (PyDictObject *)new_dict(keys, NULL, 0, 0); if (new == NULL) { /* In case of an error, `new_dict()` takes care of cleaning up `keys`. */ @@ -4362,7 +4355,7 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu } if (!PyUnicode_CheckExact(key) && DK_IS_UNICODE(mp->ma_keys)) { - if (insertion_resize(interp, mp, 0) < 0) { + if (insertion_resize(mp, 0) < 0) { if (result) { *result = NULL; } @@ -4386,7 +4379,7 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu } /* No space in shared keys. Resize and continue below. */ - if (insertion_resize(interp, mp, 1) < 0) { + if (insertion_resize(mp, 1) < 0) { goto error; } } @@ -4555,7 +4548,7 @@ dict_popitem_impl(PyDictObject *self) } /* Convert split table to combined table */ if (_PyDict_HasSplitTable(self)) { - if (dictresize(interp, self, DK_LOG_SIZE(self->ma_keys), 1) < 0) { + if (dictresize(self, DK_LOG_SIZE(self->ma_keys), 1) < 0) { Py_DECREF(res); return NULL; } @@ -6725,10 +6718,7 @@ dictvalues_reversed(PyObject *self, PyObject *Py_UNUSED(ignored)) PyDictKeysObject * _PyDict_NewKeysForClass(PyHeapTypeObject *cls) { - PyInterpreterState *interp = _PyInterpreterState_GET(); - - PyDictKeysObject *keys = new_keys_object( - interp, NEXT_LOG2_SHARED_KEYS_MAX_SIZE, 1); + PyDictKeysObject *keys = new_keys_object(NEXT_LOG2_SHARED_KEYS_MAX_SIZE, 1); if (keys == NULL) { PyErr_Clear(); } @@ -6792,8 +6782,7 @@ _PyObject_InitInlineValues(PyObject *obj, PyTypeObject *tp) } static PyDictObject * -make_dict_from_instance_attributes(PyInterpreterState *interp, - PyDictKeysObject *keys, PyDictValues *values) +make_dict_from_instance_attributes(PyDictKeysObject *keys, PyDictValues *values) { dictkeys_incref(keys); Py_ssize_t used = 0; @@ -6804,7 +6793,7 @@ make_dict_from_instance_attributes(PyInterpreterState *interp, used += 1; } } - PyDictObject *res = (PyDictObject *)new_dict(interp, keys, values, used, 0); + PyDictObject *res = (PyDictObject *)new_dict(keys, values, used, 0); return res; } @@ -6818,9 +6807,8 @@ _PyObject_MaterializeManagedDict_LockHeld(PyObject *obj) PyDictValues *values = _PyObject_InlineValues(obj); PyDictObject *dict; if (values->valid) { - PyInterpreterState *interp = _PyInterpreterState_GET(); PyDictKeysObject *keys = CACHED_KEYS(Py_TYPE(obj)); - dict = make_dict_from_instance_attributes(interp, keys, values); + dict = make_dict_from_instance_attributes(keys, values); } else { dict = (PyDictObject *)PyDict_New(); @@ -6916,7 +6904,7 @@ store_instance_attr_lock_held(PyObject *obj, PyDictValues *values, if (dict == NULL) { // Make the dict but don't publish it in the object // so that no one else will see it. - dict = make_dict_from_instance_attributes(PyInterpreterState_Get(), keys, values); + dict = make_dict_from_instance_attributes(keys, values); if (dict == NULL || _PyDict_SetItem_LockHeld(dict, name, value) < 0) { Py_XDECREF(dict); @@ -7449,11 +7437,10 @@ PyObject_ClearManagedDict(PyObject *obj) "clearing an object managed dict"); /* Clear the dict */ Py_BEGIN_CRITICAL_SECTION(dict); - PyInterpreterState *interp = _PyInterpreterState_GET(); PyDictKeysObject *oldkeys = dict->ma_keys; set_keys(dict, Py_EMPTY_KEYS); dict->ma_values = NULL; - dictkeys_decref(interp, oldkeys, IS_DICT_SHARED(dict)); + dictkeys_decref(oldkeys, IS_DICT_SHARED(dict)); STORE_USED(dict, 0); clear_inline_values(_PyObject_InlineValues(obj)); Py_END_CRITICAL_SECTION(); @@ -7490,8 +7477,7 @@ ensure_managed_dict(PyObject *obj) goto done; } #endif - dict = (PyDictObject *)new_dict_with_shared_keys(_PyInterpreterState_GET(), - CACHED_KEYS(tp)); + dict = (PyDictObject *)new_dict_with_shared_keys(CACHED_KEYS(tp)); FT_ATOMIC_STORE_PTR_RELEASE(_PyObject_ManagedDictPointer(obj)->dict, (PyDictObject *)dict); @@ -7520,9 +7506,8 @@ ensure_nonmanaged_dict(PyObject *obj, PyObject **dictptr) #endif PyTypeObject *tp = Py_TYPE(obj); if (_PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE) && (cached = CACHED_KEYS(tp))) { - PyInterpreterState *interp = _PyInterpreterState_GET(); assert(!_PyType_HasFeature(tp, Py_TPFLAGS_INLINE_VALUES)); - dict = new_dict_with_shared_keys(interp, cached); + dict = new_dict_with_shared_keys(cached); } else { dict = PyDict_New(); @@ -7578,8 +7563,7 @@ _PyObjectDict_SetItem(PyTypeObject *tp, PyObject *obj, PyObject **dictptr, void _PyDictKeys_DecRef(PyDictKeysObject *keys) { - PyInterpreterState *interp = _PyInterpreterState_GET(); - dictkeys_decref(interp, keys, false); + dictkeys_decref(keys, false); } static inline uint32_t diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c index 0a078dd594148c..81363cf8e810fe 100644 --- a/Parser/lexer/lexer.c +++ b/Parser/lexer/lexer.c @@ -121,38 +121,88 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) { } PyObject *res = NULL; - // Check if there is a # character in the expression + // Look for a # character outside of string literals int hash_detected = 0; + int in_string = 0; + char quote_char = 0; + for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) { - if (tok_mode->last_expr_buffer[i] == '#') { + char ch = tok_mode->last_expr_buffer[i]; + + // Skip escaped characters + if (ch == '\\') { + i++; + continue; + } + + // Handle quotes + if (ch == '"' || ch == '\'') { + // The following if/else block works becase there is an off number + // of quotes in STRING tokens and the lexer only ever reaches this + // function with valid STRING tokens. + // For example: """hello""" + // First quote: in_string = 1 + // Second quote: in_string = 0 + // Third quote: in_string = 1 + if (!in_string) { + in_string = 1; + quote_char = ch; + } + else if (ch == quote_char) { + in_string = 0; + } + continue; + } + + // Check for # outside strings + if (ch == '#' && !in_string) { hash_detected = 1; break; } } - + // If we found a # character in the expression, we need to handle comments if (hash_detected) { - Py_ssize_t input_length = tok_mode->last_expr_size - tok_mode->last_expr_end; - char *result = (char *)PyMem_Malloc((input_length + 1) * sizeof(char)); + // Allocate buffer for processed result + char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char)); if (!result) { return -1; } - Py_ssize_t i = 0; - Py_ssize_t j = 0; + Py_ssize_t i = 0; // Input position + Py_ssize_t j = 0; // Output position + in_string = 0; // Whether we're in a string + quote_char = 0; // Current string quote char - for (i = 0, j = 0; i < input_length; i++) { - if (tok_mode->last_expr_buffer[i] == '#') { - // Skip characters until newline or end of string - while (i < input_length && tok_mode->last_expr_buffer[i] != '\0') { - if (tok_mode->last_expr_buffer[i] == '\n') { - result[j++] = tok_mode->last_expr_buffer[i]; - break; - } + // Process each character + while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) { + char ch = tok_mode->last_expr_buffer[i]; + + // Handle string quotes + if (ch == '"' || ch == '\'') { + // See comment above to understand this part + if (!in_string) { + in_string = 1; + quote_char = ch; + } else if (ch == quote_char) { + in_string = 0; + } + result[j++] = ch; + } + // Skip comments + else if (ch == '#' && !in_string) { + while (i < tok_mode->last_expr_size - tok_mode->last_expr_end && + tok_mode->last_expr_buffer[i] != '\n') { i++; } - } else { - result[j++] = tok_mode->last_expr_buffer[i]; + if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) { + result[j++] = '\n'; + } + } + // Copy other chars + else { + result[j++] = ch; } + i++; } result[j] = '\0'; // Null-terminate the result string @@ -164,11 +214,9 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) { tok_mode->last_expr_size - tok_mode->last_expr_end, NULL ); - } - - if (!res) { + if (!res) { return -1; } token->metadata = res; diff --git a/Programs/_testembed.c b/Programs/_testembed.c index 577da65c7cdafa..88936bbc699c30 100644 --- a/Programs/_testembed.c +++ b/Programs/_testembed.c @@ -1854,9 +1854,9 @@ static int test_initconfig_get_api(void) assert(initconfig_getint(config, "dev_mode") == 1); // test PyInitConfig_GetInt() on a PyPreConfig option - assert(initconfig_getint(config, "utf8_mode") == 0); - assert(PyInitConfig_SetInt(config, "utf8_mode", 1) == 0); assert(initconfig_getint(config, "utf8_mode") == 1); + assert(PyInitConfig_SetInt(config, "utf8_mode", 0) == 0); + assert(initconfig_getint(config, "utf8_mode") == 0); // test PyInitConfig_GetStr() char *str; diff --git a/Python/emscripten_syscalls.c b/Python/emscripten_syscalls.c new file mode 100644 index 00000000000000..bb80f979420ec1 --- /dev/null +++ b/Python/emscripten_syscalls.c @@ -0,0 +1,39 @@ +#include "emscripten.h" + +// If we're running in node, report the UID of the user in the native system as +// the UID of the user. Since the nodefs will report the uid correctly, if we +// don't make getuid report it correctly too we'll see some permission errors. +// Normally __syscall_getuid32 is a stub that always returns 0 but it is +// defined with weak linkage so we can override it. +EM_JS(int, __syscall_getuid32_js, (void), { + // If we're in node and we can, report the native uid + if (ENVIRONMENT_IS_NODE) { + return process.getuid(); + } + // Fall back to the stub case of returning 0. + return 0; +}) + +int __syscall_getuid32(void) { + return __syscall_getuid32_js(); +} + +EM_JS(int, __syscall_umask_js, (int mask), { + // If we're in node and we can, call native process.umask() + if (ENVIRONMENT_IS_NODE) { + try { + return process.umask(mask); + } catch(e) { + // oops... + // NodeJS docs: "In Worker threads, process.umask(mask) will throw an exception." + // umask docs: "This system call always succeeds" + return 0; + } + } + // Fall back to the stub case of returning 0. + return 0; +}) + +int __syscall_umask(int mask) { + return __syscall_umask_js(mask); +} diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index d46598b23b3b2f..0b0ddf227e4952 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1073,6 +1073,14 @@ validate_refcounts(const mi_heap_t *heap, const mi_heap_area_t *area, return true; } + // This assert mirrors the one in Python/gc.c:update_refs(). There must be + // no tracked objects with a reference count of 0 when the cyclic + // collector starts. If there is, then the collector will double dealloc + // the object. The likely cause for hitting this is a faulty .tp_dealloc. + // Also see the comment in `update_refs()`. + _PyObject_ASSERT_WITH_MSG(op, Py_REFCNT(op) > 0, + "tracked objects must have a reference count > 0"); + _PyObject_ASSERT_WITH_MSG(op, !gc_is_unreachable(op), "object should not be marked as unreachable yet"); @@ -1422,13 +1430,6 @@ static int deduce_unreachable_heap(PyInterpreterState *interp, struct collection_state *state) { - -#ifdef GC_DEBUG - // Check that all objects are marked as unreachable and that the computed - // reference count difference (stored in `ob_tid`) is non-negative. - gc_visit_heaps(interp, &validate_refcounts, &state->base); -#endif - // Identify objects that are directly reachable from outside the GC heap // by computing the difference between the refcount and the number of // incoming references. @@ -2158,6 +2159,13 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state, state->gcstate->old[i-1].count = 0; } +#ifdef GC_DEBUG + // Before we start, check that the heap is in a good condition. There must + // be no objects with a zero reference count. And `ob_tid` must only have a + // thread if the refcount is unmerged. + gc_visit_heaps(interp, &validate_refcounts, &state->base); +#endif + _Py_FOR_EACH_TSTATE_BEGIN(interp, p) { _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)p; diff --git a/Python/initconfig.c b/Python/initconfig.c index 73a9a9bf1ca460..cc0db19d416058 100644 --- a/Python/initconfig.c +++ b/Python/initconfig.c @@ -459,7 +459,7 @@ static const char usage_envvars[] = /* --- Global configuration variables ----------------------------- */ -/* UTF-8 mode (PEP 540): if equals to 1, use the UTF-8 encoding, and change +/* UTF-8 mode (PEP 540): if equal to 1, use the UTF-8 encoding, and change stdin and stdout error handler to "surrogateescape". */ int Py_UTF8Mode = 0; int Py_DebugFlag = 0; /* Needed by parser.c */ diff --git a/Python/instrumentation.c b/Python/instrumentation.c index 13bdd041becd69..5b9da271d2745e 100644 --- a/Python/instrumentation.c +++ b/Python/instrumentation.c @@ -1530,7 +1530,7 @@ initialize_lines(PyCodeObject *code, int bytes_per_entry) case END_FOR: case END_SEND: case RESUME: - case POP_ITER: + case POP_ITER: /* END_FOR cannot start a line, as it is skipped by FOR_ITER * END_SEND cannot start a line, as it is skipped by SEND * RESUME and POP_ITER must not be instrumented with INSTRUMENTED_LINE */ diff --git a/Python/jit.c b/Python/jit.c index e232cc1f7d9250..01bc0076497c6d 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -431,8 +431,10 @@ void patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *s #if defined(__aarch64__) || defined(_M_ARM64) #define TRAMPOLINE_SIZE 16 + #define DATA_ALIGN 8 #else #define TRAMPOLINE_SIZE 0 + #define DATA_ALIGN 1 #endif // Generate and patch AArch64 trampolines. The symbols to jump to are stored @@ -522,8 +524,9 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz // Round up to the nearest page: size_t page_size = get_page_size(); assert((page_size & (page_size - 1)) == 0); - size_t padding = page_size - ((code_size + state.trampolines.size + data_size) & (page_size - 1)); - size_t total_size = code_size + state.trampolines.size + data_size + padding; + size_t code_padding = DATA_ALIGN - ((code_size + state.trampolines.size) & (DATA_ALIGN - 1)); + size_t padding = page_size - ((code_size + state.trampolines.size + code_padding + data_size) & (page_size - 1)); + size_t total_size = code_size + state.trampolines.size + code_padding + data_size + padding; unsigned char *memory = jit_alloc(total_size); if (memory == NULL) { return -1; @@ -545,7 +548,7 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz // Loop again to emit the code: unsigned char *code = memory; state.trampolines.mem = memory + code_size; - unsigned char *data = memory + code_size + state.trampolines.size; + unsigned char *data = memory + code_size + state.trampolines.size + code_padding; // Compile the shim, which handles converting between the native // calling convention and the calling convention used by jitted code // (which may be different for efficiency reasons). @@ -567,7 +570,7 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz code += group->code_size; data += group->data_size; assert(code == memory + code_size); - assert(data == memory + code_size + state.trampolines.size + data_size); + assert(data == memory + code_size + state.trampolines.size + code_padding + data_size); #ifdef MAP_JIT pthread_jit_write_protect_np(1); #endif diff --git a/Python/preconfig.c b/Python/preconfig.c index 67b2d2f2dc186d..e4cd10d9e3d40d 100644 --- a/Python/preconfig.c +++ b/Python/preconfig.c @@ -291,12 +291,12 @@ _PyPreConfig_InitCompatConfig(PyPreConfig *config) config->use_environment = -1; config->configure_locale = 1; - /* bpo-36443: C locale coercion (PEP 538) and UTF-8 Mode (PEP 540) - are disabled by default using the Compat configuration. + /* gh-80624: C locale coercion (PEP 538) is disabled by default using + the Compat configuration. - Py_UTF8Mode=1 enables the UTF-8 mode. PYTHONUTF8 environment variable + Py_UTF8Mode=0 disables the UTF-8 mode. PYTHONUTF8 environment variable is ignored (even if use_environment=1). */ - config->utf8_mode = 0; + config->utf8_mode = 1; config->coerce_c_locale = 0; config->coerce_c_locale_warn = 0; @@ -317,8 +317,8 @@ PyPreConfig_InitPythonConfig(PyPreConfig *config) config->isolated = 0; config->parse_argv = 1; config->use_environment = 1; - /* Set to -1 to enable C locale coercion (PEP 538) and UTF-8 Mode (PEP 540) - depending on the LC_CTYPE locale, PYTHONUTF8 and PYTHONCOERCECLOCALE + /* Set to -1 to enable C locale coercion (PEP 538) depending on + the LC_CTYPE locale, PYTHONUTF8 and PYTHONCOERCECLOCALE environment variables. */ config->coerce_c_locale = -1; config->coerce_c_locale_warn = -1; @@ -338,7 +338,7 @@ PyPreConfig_InitIsolatedConfig(PyPreConfig *config) config->configure_locale = 0; config->isolated = 1; config->use_environment = 0; - config->utf8_mode = 0; + config->utf8_mode = 1; config->dev_mode = 0; #ifdef MS_WINDOWS config->legacy_windows_fs_encoding = 0; @@ -649,23 +649,7 @@ preconfig_init_utf8_mode(PyPreConfig *config, const _PyPreCmdline *cmdline) return _PyStatus_OK(); } - -#ifndef MS_WINDOWS - if (config->utf8_mode < 0) { - /* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */ - const char *ctype_loc = setlocale(LC_CTYPE, NULL); - if (ctype_loc != NULL - && (strcmp(ctype_loc, "C") == 0 - || strcmp(ctype_loc, "POSIX") == 0)) - { - config->utf8_mode = 1; - } - } -#endif - - if (config->utf8_mode < 0) { - config->utf8_mode = 0; - } + config->utf8_mode = 1; return _PyStatus_OK(); } diff --git a/Tools/c-analyzer/cpython/_parser.py b/Tools/c-analyzer/cpython/_parser.py index 037fe11ea223c7..cfbf0d14348499 100644 --- a/Tools/c-analyzer/cpython/_parser.py +++ b/Tools/c-analyzer/cpython/_parser.py @@ -66,6 +66,7 @@ def clean_lines(text): Python/dynload_dl.c # dl.h Python/dynload_hpux.c # dl.h Python/emscripten_signal.c +Python/emscripten_syscalls.c Python/thread_pthread.h Python/thread_pthread_stubs.h diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index 15b18f5286b399..64a9f11a944176 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -167,6 +167,7 @@ Python/sysmodule.c - _preinit_xoptions - # XXX need race protection? Modules/faulthandler.c faulthandler_dump_traceback reentrant - Modules/faulthandler.c faulthandler_dump_c_stack reentrant - +Modules/grpmodule.c - group_db_mutex - Python/pylifecycle.c _Py_FatalErrorFormat reentrant - Python/pylifecycle.c fatal_error reentrant - diff --git a/Tools/jit/_optimizers.py b/Tools/jit/_optimizers.py index 1077e4106fdfbd..33db110b728dba 100644 --- a/Tools/jit/_optimizers.py +++ b/Tools/jit/_optimizers.py @@ -70,21 +70,21 @@ class Optimizer: path: pathlib.Path _: dataclasses.KW_ONLY - # prefix used to mangle symbols on some platforms: - prefix: str = "" + # Prefixes used to mangle local labels and symbols: + label_prefix: str + symbol_prefix: str # The first block in the linked list: _root: _Block = dataclasses.field(init=False, default_factory=_Block) _labels: dict[str, _Block] = dataclasses.field(init=False, default_factory=dict) # No groups: _re_noninstructions: typing.ClassVar[re.Pattern[str]] = re.compile( - r"\s*(?:\.|#|//|$)" + r"\s*(?:\.|#|//|;|$)" ) # One group (label): _re_label: typing.ClassVar[re.Pattern[str]] = re.compile( r'\s*(?P