diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst index ba6e46858f9d26..95b6c2bb4da026 100644 --- a/Doc/library/urllib.parse.rst +++ b/Doc/library/urllib.parse.rst @@ -50,11 +50,12 @@ URL Parsing The URL parsing functions focus on splitting a URL string into its components, or on combining URL components into a URL string. -.. function:: urlparse(urlstring, scheme=None, allow_fragments=True, *, missing_as_none=False) +.. function:: urlsplit(urlstring, scheme=None, allow_fragments=True, *, missing_as_none=False) - Parse a URL into six components, returning a 6-item :term:`named tuple`. This - corresponds to the general structure of a URL: - ``scheme://netloc/path;parameters?query#fragment``. + Parse a URL into five components, returning a 5-item :term:`named tuple` + :class:`SplitResult` or :class:`SplitResultBytes`. + This corresponds to the general structure of a URL: + ``scheme://netloc/path?query#fragment``. Each tuple item is a string, possibly empty, or ``None`` if *missing_as_none* is true. Not defined component are represented an empty string (by default) or @@ -68,15 +69,15 @@ or on combining URL components into a URL string. .. doctest:: :options: +NORMALIZE_WHITESPACE - >>> from urllib.parse import urlparse - >>> urlparse("scheme://netloc/path;parameters?query#fragment") - ParseResult(scheme='scheme', netloc='netloc', path='/path;parameters', params='', + >>> from urllib.parse import urlsplit + >>> urlsplit("scheme://netloc/path?query#fragment") + SplitResult(scheme='scheme', netloc='netloc', path='/path', query='query', fragment='fragment') - >>> o = urlparse("http://docs.python.org:80/3/library/urllib.parse.html?" + >>> o = urlsplit("http://docs.python.org:80/3/library/urllib.parse.html?" ... "highlight=params#url-parsing") >>> o - ParseResult(scheme='http', netloc='docs.python.org:80', - path='/3/library/urllib.parse.html', params='', + SplitResult(scheme='http', netloc='docs.python.org:80', + path='/3/library/urllib.parse.html', query='highlight=params', fragment='url-parsing') >>> o.scheme 'http' @@ -88,14 +89,14 @@ or on combining URL components into a URL string. 80 >>> o._replace(fragment="").geturl() 'http://docs.python.org:80/3/library/urllib.parse.html?highlight=params' - >>> urlparse("http://docs.python.org?") - ParseResult(scheme='http', netloc='docs.python.org', - path='', params='', query='', fragment='') - >>> urlparse("http://docs.python.org?", missing_as_none=True) - ParseResult(scheme='http', netloc='docs.python.org', - path='', params=None, query='', fragment=None) - - Following the syntax specifications in :rfc:`1808`, urlparse recognizes + >>> urlsplit("http://docs.python.org?") + SplitResult(scheme='http', netloc='docs.python.org', path='', + query='', fragment='') + >>> urlsplit("http://docs.python.org?", missing_as_none=True) + SplitResult(scheme='http', netloc='docs.python.org', path='', + query='', fragment=None) + + Following the syntax specifications in :rfc:`1808`, :func:`!urlsplit` recognizes a netloc only if it is properly introduced by '//'. Otherwise the input is presumed to be a relative URL and thus to start with a path component. @@ -103,19 +104,19 @@ or on combining URL components into a URL string. .. doctest:: :options: +NORMALIZE_WHITESPACE - >>> from urllib.parse import urlparse - >>> urlparse('//www.cwi.nl:80/%7Eguido/Python.html') - ParseResult(scheme='', netloc='www.cwi.nl:80', path='/%7Eguido/Python.html', - params='', query='', fragment='') - >>> urlparse('www.cwi.nl/%7Eguido/Python.html') - ParseResult(scheme='', netloc='', path='www.cwi.nl/%7Eguido/Python.html', - params='', query='', fragment='') - >>> urlparse('help/Python.html') - ParseResult(scheme='', netloc='', path='help/Python.html', - params='', query='', fragment='') - >>> urlparse('help/Python.html', missing_as_none=True) - ParseResult(scheme=None, netloc=None, path='help/Python.html', - params=None, query=None, fragment=None) + >>> from urllib.parse import urlsplit + >>> urlsplit('//www.cwi.nl:80/%7Eguido/Python.html') + SplitResult(scheme='', netloc='www.cwi.nl:80', path='/%7Eguido/Python.html', + query='', fragment='') + >>> urlsplit('www.cwi.nl/%7Eguido/Python.html') + SplitResult(scheme='', netloc='', path='www.cwi.nl/%7Eguido/Python.html', + query='', fragment='') + >>> urlsplit('help/Python.html') + SplitResult(scheme='', netloc='', path='help/Python.html', + query='', fragment='') + >>> urlsplit('help/Python.html', missing_as_none=True) + SplitResult(scheme=None, netloc=None, path='help/Python.html', + query=None, fragment=None) The *scheme* argument gives the default addressing scheme, to be used only if the URL does not specify one. It should be the same type @@ -123,7 +124,7 @@ or on combining URL components into a URL string. always allowed, and is automatically converted to ``b''`` if appropriate. If the *allow_fragments* argument is false, fragment identifiers are not - recognized. Instead, they are parsed as part of the path, parameters + recognized. Instead, they are parsed as part of the path or query component, and :attr:`fragment` is set to ``None`` or the empty string (depending on the value of *missing_as_none*) in the return value. @@ -140,12 +141,9 @@ or on combining URL components into a URL string. +------------------+-------+-------------------------+-------------------------------+ | :attr:`path` | 2 | Hierarchical path | empty string | +------------------+-------+-------------------------+-------------------------------+ - | :attr:`params` | 3 | Parameters for last | ``None`` or empty string [1]_ | - | | | path element | | - +------------------+-------+-------------------------+-------------------------------+ - | :attr:`query` | 4 | Query component | ``None`` or empty string [1]_ | + | :attr:`query` | 3 | Query component | ``None`` or empty string [1]_ | +------------------+-------+-------------------------+-------------------------------+ - | :attr:`fragment` | 5 | Fragment identifier | ``None`` or empty string [1]_ | + | :attr:`fragment` | 4 | Fragment identifier | ``None`` or empty string [1]_ | +------------------+-------+-------------------------+-------------------------------+ | :attr:`username` | | User name | ``None`` | +------------------+-------+-------------------------+-------------------------------+ @@ -171,26 +169,30 @@ or on combining URL components into a URL string. ``#``, ``@``, or ``:`` will raise a :exc:`ValueError`. If the URL is decomposed before parsing, no error will be raised. + Following some of the `WHATWG spec`_ that updates :rfc:`3986`, leading C0 + control and space characters are stripped from the URL. ``\n``, + ``\r`` and tab ``\t`` characters are removed from the URL at any position. + As is the case with all named tuples, the subclass has a few additional methods and attributes that are particularly useful. One such method is :meth:`_replace`. - The :meth:`_replace` method will return a new ParseResult object replacing specified - fields with new values. + The :meth:`_replace` method will return a new :class:`SplitResult` object + replacing specified fields with new values. .. doctest:: :options: +NORMALIZE_WHITESPACE - >>> from urllib.parse import urlparse - >>> u = urlparse('//www.cwi.nl:80/%7Eguido/Python.html') + >>> from urllib.parse import urlsplit + >>> u = urlsplit('//www.cwi.nl:80/%7Eguido/Python.html') >>> u - ParseResult(scheme='', netloc='www.cwi.nl:80', path='/%7Eguido/Python.html', - params='', query='', fragment='') + SplitResult(scheme='', netloc='www.cwi.nl:80', path='/%7Eguido/Python.html', + query='', fragment='') >>> u._replace(scheme='http') - ParseResult(scheme='http', netloc='www.cwi.nl:80', path='/%7Eguido/Python.html', - params='', query='', fragment='') + SplitResult(scheme='http', netloc='www.cwi.nl:80', path='/%7Eguido/Python.html', + query='', fragment='') .. warning:: - :func:`urlparse` does not perform validation. See :ref:`URL parsing + :func:`urlsplit` does not perform validation. See :ref:`URL parsing security ` for details. .. versionchanged:: 3.2 @@ -209,9 +211,17 @@ or on combining URL components into a URL string. Characters that affect netloc parsing under NFKC normalization will now raise :exc:`ValueError`. + .. versionchanged:: 3.10 + ASCII newline and tab characters are stripped from the URL. + + .. versionchanged:: 3.12 + Leading WHATWG C0 control and space characters are stripped from the URL. + .. versionchanged:: next Added the *missing_as_none* parameter. +.. _WHATWG spec: https://url.spec.whatwg.org/#concept-basic-url-parser + .. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', max_num_fields=None, separator='&') @@ -306,11 +316,11 @@ or on combining URL components into a URL string. separator key, with ``&`` as the default separator. -.. function:: urlunparse(parts) - urlunparse(parts, *, keep_empty) +.. function:: urlunsplit(parts) + urlunsplit(parts, *, keep_empty) - Construct a URL from a tuple as returned by ``urlparse()``. The *parts* - argument can be any six-item iterable. + Construct a URL from a tuple as returned by :func:`urlsplit`. The *parts* + argument can be any five-item iterable. This may result in a slightly different, but equivalent URL, if the URL that was parsed originally had unnecessary delimiters (for example, @@ -321,97 +331,33 @@ or on combining URL components into a URL string. This allows rebuilding a URL that was parsed with option ``missing_as_none=True``. By default, *keep_empty* is true if *parts* is the result of the - :func:`urlparse` call with ``missing_as_none=True``. + :func:`urlsplit` call with ``missing_as_none=True``. .. versionchanged:: next Added the *keep_empty* parameter. -.. function:: urlsplit(urlstring, scheme=None, allow_fragments=True, *, missing_as_none=False) - - This is similar to :func:`urlparse`, but does not split the params from the URL. - This should generally be used instead of :func:`urlparse` if the more recent URL - syntax allowing parameters to be applied to each segment of the *path* portion - of the URL (see :rfc:`2396`) is wanted. A separate function is needed to - separate the path segments and parameters. This function returns a 5-item - :term:`named tuple`:: - - (addressing scheme, network location, path, query, fragment identifier). - - The return value is a :term:`named tuple`, its items can be accessed by index - or as named attributes: - - +------------------+-------+-------------------------+-------------------------------+ - | Attribute | Index | Value | Value if not present | - +==================+=======+=========================+===============================+ - | :attr:`scheme` | 0 | URL scheme specifier | *scheme* parameter or | - | | | | empty string [1]_ | - +------------------+-------+-------------------------+-------------------------------+ - | :attr:`netloc` | 1 | Network location part | ``None`` or empty string [2]_ | - +------------------+-------+-------------------------+-------------------------------+ - | :attr:`path` | 2 | Hierarchical path | empty string | - +------------------+-------+-------------------------+-------------------------------+ - | :attr:`query` | 3 | Query component | ``None`` or empty string [2]_ | - +------------------+-------+-------------------------+-------------------------------+ - | :attr:`fragment` | 4 | Fragment identifier | ``None`` or empty string [2]_ | - +------------------+-------+-------------------------+-------------------------------+ - | :attr:`username` | | User name | ``None`` | - +------------------+-------+-------------------------+-------------------------------+ - | :attr:`password` | | Password | ``None`` | - +------------------+-------+-------------------------+-------------------------------+ - | :attr:`hostname` | | Host name (lower case) | ``None`` | - +------------------+-------+-------------------------+-------------------------------+ - | :attr:`port` | | Port number as integer, | ``None`` | - | | | if present | | - +------------------+-------+-------------------------+-------------------------------+ - - .. [2] Depending on the value of the *missing_as_none* argument. - - Reading the :attr:`port` attribute will raise a :exc:`ValueError` if - an invalid port is specified in the URL. See section - :ref:`urlparse-result-object` for more information on the result object. - - Unmatched square brackets in the :attr:`netloc` attribute will raise a - :exc:`ValueError`. - - Characters in the :attr:`netloc` attribute that decompose under NFKC - normalization (as used by the IDNA encoding) into any of ``/``, ``?``, - ``#``, ``@``, or ``:`` will raise a :exc:`ValueError`. If the URL is - decomposed before parsing, no error will be raised. - - Following some of the `WHATWG spec`_ that updates RFC 3986, leading C0 - control and space characters are stripped from the URL. ``\n``, - ``\r`` and tab ``\t`` characters are removed from the URL at any position. - - .. warning:: - - :func:`urlsplit` does not perform validation. See :ref:`URL parsing - security ` for details. - - .. versionchanged:: 3.6 - Out-of-range port numbers now raise :exc:`ValueError`, instead of - returning ``None``. - - .. versionchanged:: 3.8 - Characters that affect netloc parsing under NFKC normalization will - now raise :exc:`ValueError`. - - .. versionchanged:: 3.10 - ASCII newline and tab characters are stripped from the URL. - - .. versionchanged:: 3.12 - Leading WHATWG C0 control and space characters are stripped from the URL. +.. function:: urlparse(urlstring, scheme=None, allow_fragments=True, *, missing_as_none=False) - .. versionchanged:: next - Added the *missing_as_none* parameter. + This is similar to :func:`urlsplit`, but additionally splits the *path* + component on *path* and *params*. + This function returns a 6-item :term:`named tuple` :class:`ParseResult` + or :class:`ParseResultBytes`. + Its items are the same as for the :func:`!urlsplit` result, except that + *params* is inserted at index 3, between *path* and *query*. -.. _WHATWG spec: https://url.spec.whatwg.org/#concept-basic-url-parser + This function is based on obsoleted :rfc:`1738` and :rfc:`1808`, which + listed *params* as the main URL component. + The more recent URL syntax allows parameters to be applied to each segment + of the *path* portion of the URL (see :rfc:`3986`). + :func:`urlsplit` should generally be used instead of :func:`urlparse`. + A separate function is needed to separate the path segments and parameters. -.. function:: urlunsplit(parts) - urlunsplit(parts, *, keep_empty) +.. function:: urlunparse(parts) + urlunparse(parts, *, keep_empty) - Combine the elements of a tuple as returned by :func:`urlsplit` into a - complete URL as a string. The *parts* argument can be any five-item + Combine the elements of a tuple as returned by :func:`urlparse` into a + complete URL as a string. The *parts* argument can be any six-item iterable. This may result in a slightly different, but equivalent URL, if the @@ -423,7 +369,7 @@ or on combining URL components into a URL string. This allows rebuilding a URL that was parsed with option ``missing_as_none=True``. By default, *keep_empty* is true if *parts* is the result of the - :func:`urlsplit` call with ``missing_as_none=True``. + :func:`urlparse` call with ``missing_as_none=True``. .. versionchanged:: next Added the *keep_empty* parameter. @@ -441,7 +387,7 @@ or on combining URL components into a URL string. 'http://www.cwi.nl/%7Eguido/FAQ.html' The *allow_fragments* argument has the same meaning and default as for - :func:`urlparse`. + :func:`urlsplit`. .. note:: @@ -587,7 +533,7 @@ individual URL quoting functions. Structured Parse Results ------------------------ -The result objects from the :func:`urlparse`, :func:`urlsplit` and +The result objects from the :func:`urlsplit`, :func:`urlparse` and :func:`urldefrag` functions are subclasses of the :class:`tuple` type. These subclasses add the attributes listed in the documentation for those functions, the encoding and decoding support described in the diff --git a/Doc/library/venv.rst b/Doc/library/venv.rst index 4c000a92e87d0e..c75d7348343e16 100644 --- a/Doc/library/venv.rst +++ b/Doc/library/venv.rst @@ -550,7 +550,7 @@ subclass which installs setuptools and pip into a created virtual environment:: from subprocess import Popen, PIPE import sys from threading import Thread - from urllib.parse import urlparse + from urllib.parse import urlsplit from urllib.request import urlretrieve import venv @@ -621,7 +621,7 @@ subclass which installs setuptools and pip into a created virtual environment:: stream.close() def install_script(self, context, name, url): - _, _, path, _, _, _ = urlparse(url) + _, _, path, _, _ = urlsplit(url) fn = os.path.split(path)[-1] binpath = context.bin_path distpath = os.path.join(binpath, fn) diff --git a/Doc/library/wsgiref.rst b/Doc/library/wsgiref.rst index 381c993834753d..157d7058931c16 100644 --- a/Doc/library/wsgiref.rst +++ b/Doc/library/wsgiref.rst @@ -11,6 +11,11 @@ -------------- +.. warning:: + + :mod:`wsgiref` is a reference implementation and is not recommended for + production. The module only implements basic security checks. + The Web Server Gateway Interface (WSGI) is a standard interface between web server software and web applications written in Python. Having a standard interface makes it easy to use an application that supports WSGI with a number diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 2291228e721ffd..05cd7404066167 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -968,10 +968,10 @@ unittest urllib.parse ------------ -* Add the *missing_as_none* parameter to :func:`~urllib.parse.urlparse`, - :func:`~urllib.parse.urlsplit` and :func:`~urllib.parse.urldefrag` functions. - Add the *keep_empty* parameter to :func:`~urllib.parse.urlunparse` and - :func:`~urllib.parse.urlunsplit` functions. +* Add the *missing_as_none* parameter to :func:`~urllib.parse.urlsplit`, + :func:`~urllib.parse.urlparse` and :func:`~urllib.parse.urldefrag` functions. + Add the *keep_empty* parameter to :func:`~urllib.parse.urlunsplit` and + :func:`~urllib.parse.urlunparse` functions. This allows to distinguish between empty and not defined URI components and preserve empty components. (Contributed by Serhiy Storchaka in :gh:`67041`.) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 49a292df934bfc..8e4da0e0f09919 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -1916,63 +1916,63 @@ def test_splittype_deprecation(self): urllib.parse.splittype('') self.assertEqual(str(cm.warning), 'urllib.parse.splittype() is deprecated as of 3.8, ' - 'use urllib.parse.urlparse() instead') + 'use urllib.parse.urlsplit() instead') def test_splithost_deprecation(self): with self.assertWarns(DeprecationWarning) as cm: urllib.parse.splithost('') self.assertEqual(str(cm.warning), 'urllib.parse.splithost() is deprecated as of 3.8, ' - 'use urllib.parse.urlparse() instead') + 'use urllib.parse.urlsplit() instead') def test_splituser_deprecation(self): with self.assertWarns(DeprecationWarning) as cm: urllib.parse.splituser('') self.assertEqual(str(cm.warning), 'urllib.parse.splituser() is deprecated as of 3.8, ' - 'use urllib.parse.urlparse() instead') + 'use urllib.parse.urlsplit() instead') def test_splitpasswd_deprecation(self): with self.assertWarns(DeprecationWarning) as cm: urllib.parse.splitpasswd('') self.assertEqual(str(cm.warning), 'urllib.parse.splitpasswd() is deprecated as of 3.8, ' - 'use urllib.parse.urlparse() instead') + 'use urllib.parse.urlsplit() instead') def test_splitport_deprecation(self): with self.assertWarns(DeprecationWarning) as cm: urllib.parse.splitport('') self.assertEqual(str(cm.warning), 'urllib.parse.splitport() is deprecated as of 3.8, ' - 'use urllib.parse.urlparse() instead') + 'use urllib.parse.urlsplit() instead') def test_splitnport_deprecation(self): with self.assertWarns(DeprecationWarning) as cm: urllib.parse.splitnport('') self.assertEqual(str(cm.warning), 'urllib.parse.splitnport() is deprecated as of 3.8, ' - 'use urllib.parse.urlparse() instead') + 'use urllib.parse.urlsplit() instead') def test_splitquery_deprecation(self): with self.assertWarns(DeprecationWarning) as cm: urllib.parse.splitquery('') self.assertEqual(str(cm.warning), 'urllib.parse.splitquery() is deprecated as of 3.8, ' - 'use urllib.parse.urlparse() instead') + 'use urllib.parse.urlsplit() instead') def test_splittag_deprecation(self): with self.assertWarns(DeprecationWarning) as cm: urllib.parse.splittag('') self.assertEqual(str(cm.warning), 'urllib.parse.splittag() is deprecated as of 3.8, ' - 'use urllib.parse.urlparse() instead') + 'use urllib.parse.urlsplit() instead') def test_splitattr_deprecation(self): with self.assertWarns(DeprecationWarning) as cm: urllib.parse.splitattr('') self.assertEqual(str(cm.warning), 'urllib.parse.splitattr() is deprecated as of 3.8, ' - 'use urllib.parse.urlparse() instead') + 'use urllib.parse.urlsplit() instead') def test_splitvalue_deprecation(self): with self.assertWarns(DeprecationWarning) as cm: diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index e917f8b61bbd9e..d64f678d235b6f 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -1,6 +1,6 @@ """Parse (absolute and relative) URLs. -urlparse module is based upon the following RFC specifications. +urllib.parse module is based upon the following RFC specifications. RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding and L. Masinter, January 2005. @@ -20,7 +20,7 @@ McCahill, December 1994 RFC 3986 is considered the current standard and any future changes to -urlparse module should conform with it. The urlparse module is +urllib.parse module should conform with it. The urllib.parse module is currently not entirely compliant with this RFC due to defacto scenarios for parsing, and for backward compatibility purposes, some parsing quirks from older RFCs are retained. The testcases in @@ -463,6 +463,8 @@ def urlparse(url, scheme=None, allow_fragments=True, *, missing_as_none=_MISSING path or query. Note that % escapes are not expanded. + + urlsplit() should generally be used instead of urlparse(). """ url, scheme, _coerce_result = _coerce_args(url, scheme) if url is None: @@ -1233,7 +1235,7 @@ def unwrap(url): def splittype(url): warnings.warn("urllib.parse.splittype() is deprecated as of 3.8, " - "use urllib.parse.urlparse() instead", + "use urllib.parse.urlsplit() instead", DeprecationWarning, stacklevel=2) return _splittype(url) @@ -1254,7 +1256,7 @@ def _splittype(url): def splithost(url): warnings.warn("urllib.parse.splithost() is deprecated as of 3.8, " - "use urllib.parse.urlparse() instead", + "use urllib.parse.urlsplit() instead", DeprecationWarning, stacklevel=2) return _splithost(url) @@ -1277,7 +1279,7 @@ def _splithost(url): def splituser(host): warnings.warn("urllib.parse.splituser() is deprecated as of 3.8, " - "use urllib.parse.urlparse() instead", + "use urllib.parse.urlsplit() instead", DeprecationWarning, stacklevel=2) return _splituser(host) @@ -1290,7 +1292,7 @@ def _splituser(host): def splitpasswd(user): warnings.warn("urllib.parse.splitpasswd() is deprecated as of 3.8, " - "use urllib.parse.urlparse() instead", + "use urllib.parse.urlsplit() instead", DeprecationWarning, stacklevel=2) return _splitpasswd(user) @@ -1303,7 +1305,7 @@ def _splitpasswd(user): def splitport(host): warnings.warn("urllib.parse.splitport() is deprecated as of 3.8, " - "use urllib.parse.urlparse() instead", + "use urllib.parse.urlsplit() instead", DeprecationWarning, stacklevel=2) return _splitport(host) @@ -1326,7 +1328,7 @@ def _splitport(host): def splitnport(host, defport=-1): warnings.warn("urllib.parse.splitnport() is deprecated as of 3.8, " - "use urllib.parse.urlparse() instead", + "use urllib.parse.urlsplit() instead", DeprecationWarning, stacklevel=2) return _splitnport(host, defport) @@ -1350,7 +1352,7 @@ def _splitnport(host, defport=-1): def splitquery(url): warnings.warn("urllib.parse.splitquery() is deprecated as of 3.8, " - "use urllib.parse.urlparse() instead", + "use urllib.parse.urlsplit() instead", DeprecationWarning, stacklevel=2) return _splitquery(url) @@ -1365,7 +1367,7 @@ def _splitquery(url): def splittag(url): warnings.warn("urllib.parse.splittag() is deprecated as of 3.8, " - "use urllib.parse.urlparse() instead", + "use urllib.parse.urlsplit() instead", DeprecationWarning, stacklevel=2) return _splittag(url) @@ -1380,7 +1382,7 @@ def _splittag(url): def splitattr(url): warnings.warn("urllib.parse.splitattr() is deprecated as of 3.8, " - "use urllib.parse.urlparse() instead", + "use urllib.parse.urlsplit() instead", DeprecationWarning, stacklevel=2) return _splitattr(url) diff --git a/Misc/NEWS.d/next/Library/2026-01-31-17-15-49.gh-issue-144363.X9f0sU.rst b/Misc/NEWS.d/next/Library/2026-01-31-17-15-49.gh-issue-144363.X9f0sU.rst new file mode 100644 index 00000000000000..c17cea6613d06b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-01-31-17-15-49.gh-issue-144363.X9f0sU.rst @@ -0,0 +1 @@ +Update bundled `libexpat `_ to 2.7.4 diff --git a/Misc/sbom.spdx.json b/Misc/sbom.spdx.json index e9554bf78374a6..c79bbd2878271e 100644 --- a/Misc/sbom.spdx.json +++ b/Misc/sbom.spdx.json @@ -6,11 +6,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "39e6f567a10e36b2e77727e98e60bbcb3eb3af0b" + "checksumValue": "f1b1126ed7da8f2068302e7a692b0600e6f94b07" }, { "algorithm": "SHA256", - "checksumValue": "122f2c27000472a201d337b9b31f7eb2b52d091b02857061a8880371612d9534" + "checksumValue": "31b15de82aa19a845156169a17a5488bf597e561b2c318d159ed583139b25e87" } ], "fileName": "Modules/expat/COPYING" @@ -48,11 +48,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "a4395dd0589a97aab0904f7a5f5dc5781a086aa2" + "checksumValue": "9bd33bd279c0d7ea37b0f2d7e07c7c53b7053507" }, { "algorithm": "SHA256", - "checksumValue": "610b844bbfa3ec955772cc825db4d4db470827d57adcb214ad372d0eaf00e591" + "checksumValue": "d20997001462356b5ce3810ebf5256c8205f58462c64f21eb9bf80f8d1822b08" } ], "fileName": "Modules/expat/expat.h" @@ -62,11 +62,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "c22196e3d8bee88fcdda715623b3b9d2119d2fb3" + "checksumValue": "e658ee5d638ab326109282ff09f1541e27fff8c2" }, { "algorithm": "SHA256", - "checksumValue": "f2c2283ba03b057e92beefc7f81ba901ebb6dfc1a45b036c8a7d65808eb77a84" + "checksumValue": "dbe0582b8f8a8140aca97009e8760105ceed9e7df01ea9d8b3fe47cebf2e5b2d" } ], "fileName": "Modules/expat/expat_external.h" @@ -90,11 +90,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "7dce7d98943c5db33ae05e54801dcafb4547b9dd" + "checksumValue": "6a4a232233ba1034c3f2b459159d502e9b2d413b" }, { "algorithm": "SHA256", - "checksumValue": "6bfe307d52e7e4c71dbc30d3bd902a4905cdd83bbe4226a7e8dfa8e4c462a157" + "checksumValue": "c803935722f0dbdeeede7f040028fb119135e96dfad949479f8a5304b885bdd6" } ], "fileName": "Modules/expat/internal.h" @@ -174,11 +174,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "4c81a1f04fc653877c63c834145c18f93cd95f3e" + "checksumValue": "7d3d7d72aa56c53fb5b9e10c0e74e161381f0255" }, { "algorithm": "SHA256", - "checksumValue": "04a379615f476d55f95ca1853107e20627b48ca4afe8d0fd5981ac77188bf0a6" + "checksumValue": "f4f87aa0268d92f2b8f5e663788bfadd2e926477d0b061ed4463c02ad29a3e25" } ], "fileName": "Modules/expat/xmlparse.c" @@ -188,11 +188,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "ef767128d2dda99436712dcf3465dde5dbaab876" + "checksumValue": "c8769fcb93f00272a6e6ca560be633649c817ff7" }, { "algorithm": "SHA256", - "checksumValue": "71fb52aa302cf6f56e41943009965804f49ff2210d9bd15b258f70aaf70db772" + "checksumValue": "5b81f0eb0e144b611dbd1bc9e6037075a16bff94f823d57a81eb2a3e4999e91a" } ], "fileName": "Modules/expat/xmlrole.c" @@ -216,11 +216,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "1e2d35d90a1c269217f83d3bdf3c71cc22cb4c3f" + "checksumValue": "63e4766a09e63760c6518670509198f8d638f4ad" }, { "algorithm": "SHA256", - "checksumValue": "98d0fc735041956cc2e7bbbe2fb8f03130859410e0aee5e8015f406a37c02a3c" + "checksumValue": "0ad3f915f2748dc91bf4e4b4a50cf40bf2c95769d0eca7e3b293a230d82bb779" } ], "fileName": "Modules/expat/xmltok.c" @@ -272,11 +272,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "2d82d0a1201f78d478b30d108ff8fc27ee3e2672" + "checksumValue": "41b8c8fc275882c76d4210b7d40a18e506b07147" }, { "algorithm": "SHA256", - "checksumValue": "6ce6d03193279078d55280150fe91e7370370b504a6c123a79182f28341f3e90" + "checksumValue": "b2188c7e5fa5b33e355cf6cf342dfb8f6e23859f2a6b1ddf79841d7f84f7b196" } ], "fileName": "Modules/expat/xmltok_ns.c" @@ -1730,14 +1730,14 @@ "checksums": [ { "algorithm": "SHA256", - "checksumValue": "821ac9710d2c073eaf13e1b1895a9c9aa66c1157a99635c639fbff65cdbdd732" + "checksumValue": "461ecc8aa98ab1a68c2db788175665d1a4db640dc05bf0e289b6ea17122144ec" } ], - "downloadLocation": "https://github.com/libexpat/libexpat/releases/download/R_2_7_3/expat-2.7.3.tar.gz", + "downloadLocation": "https://github.com/libexpat/libexpat/releases/download/R_2_7_4/expat-2.7.4.tar.gz", "externalRefs": [ { "referenceCategory": "SECURITY", - "referenceLocator": "cpe:2.3:a:libexpat_project:libexpat:2.7.3:*:*:*:*:*:*:*", + "referenceLocator": "cpe:2.3:a:libexpat_project:libexpat:2.7.4:*:*:*:*:*:*:*", "referenceType": "cpe23Type" } ], @@ -1745,7 +1745,7 @@ "name": "expat", "originator": "Organization: Expat development team", "primaryPackagePurpose": "SOURCE", - "versionInfo": "2.7.3" + "versionInfo": "2.7.4" }, { "SPDXID": "SPDXRef-PACKAGE-hacl-star", diff --git a/Modules/expat/COPYING b/Modules/expat/COPYING index ce9e5939291e45..c6d184a8aae845 100644 --- a/Modules/expat/COPYING +++ b/Modules/expat/COPYING @@ -1,5 +1,5 @@ Copyright (c) 1998-2000 Thai Open Source Software Center Ltd and Clark Cooper -Copyright (c) 2001-2022 Expat maintainers +Copyright (c) 2001-2025 Expat maintainers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the diff --git a/Modules/expat/expat.h b/Modules/expat/expat.h index 290dfeb0f6dd6a..6c7c4186927725 100644 --- a/Modules/expat/expat.h +++ b/Modules/expat/expat.h @@ -11,7 +11,7 @@ Copyright (c) 2000-2005 Fred L. Drake, Jr. Copyright (c) 2001-2002 Greg Stein Copyright (c) 2002-2016 Karl Waclawek - Copyright (c) 2016-2025 Sebastian Pipping + Copyright (c) 2016-2026 Sebastian Pipping Copyright (c) 2016 Cristian Rodríguez Copyright (c) 2016 Thomas Beutlich Copyright (c) 2017 Rhodri James @@ -1082,7 +1082,7 @@ XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled); */ # define XML_MAJOR_VERSION 2 # define XML_MINOR_VERSION 7 -# define XML_MICRO_VERSION 3 +# define XML_MICRO_VERSION 4 # ifdef __cplusplus } diff --git a/Modules/expat/expat_config.h b/Modules/expat/expat_config.h index e7d9499d9078d9..09d3161dbc0fb2 100644 --- a/Modules/expat/expat_config.h +++ b/Modules/expat/expat_config.h @@ -3,7 +3,7 @@ * distribution. */ #ifndef EXPAT_CONFIG_H -#define EXPAT_CONFIG_H +#define EXPAT_CONFIG_H 1 #include #ifdef WORDS_BIGENDIAN diff --git a/Modules/expat/expat_external.h b/Modules/expat/expat_external.h index 0f01a05d0e9560..6f3f3c48ce9cff 100644 --- a/Modules/expat/expat_external.h +++ b/Modules/expat/expat_external.h @@ -12,7 +12,7 @@ Copyright (c) 2001-2002 Greg Stein Copyright (c) 2002-2006 Karl Waclawek Copyright (c) 2016 Cristian Rodríguez - Copyright (c) 2016-2019 Sebastian Pipping + Copyright (c) 2016-2025 Sebastian Pipping Copyright (c) 2017 Rhodri James Copyright (c) 2018 Yury Gribov Licensed under the MIT license: @@ -91,8 +91,7 @@ # ifndef XML_BUILDING_EXPAT /* using Expat from an application */ -# if defined(_MSC_EXTENSIONS) && ! defined(__BEOS__) \ - && ! defined(__CYGWIN__) +# if defined(_MSC_VER) && ! defined(__BEOS__) && ! defined(__CYGWIN__) # define XMLIMPORT __declspec(dllimport) # endif diff --git a/Modules/expat/internal.h b/Modules/expat/internal.h index 8f5edf48ef7c00..61266ebb7723d1 100644 --- a/Modules/expat/internal.h +++ b/Modules/expat/internal.h @@ -128,7 +128,7 @@ # elif ULONG_MAX == 18446744073709551615u // 2^64-1 # define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "ld" # define EXPAT_FMT_SIZE_T(midpart) "%" midpart "lu" -# elif defined(EMSCRIPTEN) // 32bit mode Emscripten +# elif defined(__wasm32__) // 32bit mode Emscripten or WASI SDK # define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "ld" # define EXPAT_FMT_SIZE_T(midpart) "%" midpart "zu" # else diff --git a/Modules/expat/refresh.sh b/Modules/expat/refresh.sh index d1bf5d19afa007..550340467a15a7 100755 --- a/Modules/expat/refresh.sh +++ b/Modules/expat/refresh.sh @@ -12,9 +12,9 @@ fi # Update this when updating to a new version after verifying that the changes # the update brings in are good. These values are used for verifying the SBOM, too. -expected_libexpat_tag="R_2_7_3" -expected_libexpat_version="2.7.3" -expected_libexpat_sha256="821ac9710d2c073eaf13e1b1895a9c9aa66c1157a99635c639fbff65cdbdd732" +expected_libexpat_tag="R_2_7_4" +expected_libexpat_version="2.7.4" +expected_libexpat_sha256="461ecc8aa98ab1a68c2db788175665d1a4db640dc05bf0e289b6ea17122144ec" expat_dir="$(realpath "$(dirname -- "${BASH_SOURCE[0]}")")" cd ${expat_dir} @@ -24,6 +24,9 @@ curl --location "https://github.com/libexpat/libexpat/releases/download/${expect echo "${expected_libexpat_sha256} libexpat.tar.gz" | sha256sum --check # Step 2: Pull files from the libexpat distribution + +tar xzvf libexpat.tar.gz "expat-${expected_libexpat_version}/COPYING" --strip-components 2 + declare -a lib_files lib_files=( ascii.h diff --git a/Modules/expat/xmlparse.c b/Modules/expat/xmlparse.c index a187a3a18f1994..086fca59112ee1 100644 --- a/Modules/expat/xmlparse.c +++ b/Modules/expat/xmlparse.c @@ -1,4 +1,4 @@ -/* 28bcd8b1ba7eb595d82822908257fd9c3589b4243e3c922d0369f35bfcd7b506 (2.7.3+) +/* fab937ab8b186d7d296013669c332e6dfce2f99567882cff1f8eb24223c524a7 (2.7.4+) __ __ _ ___\ \/ /_ __ __ _| |_ / _ \\ /| '_ \ / _` | __| @@ -13,7 +13,7 @@ Copyright (c) 2002-2016 Karl Waclawek Copyright (c) 2005-2009 Steven Solie Copyright (c) 2016 Eric Rahm - Copyright (c) 2016-2025 Sebastian Pipping + Copyright (c) 2016-2026 Sebastian Pipping Copyright (c) 2016 Gaurav Copyright (c) 2016 Thomas Beutlich Copyright (c) 2016 Gustavo Grieco @@ -42,6 +42,9 @@ Copyright (c) 2024-2025 Berkay Eren Ürün Copyright (c) 2024 Hanno Böck Copyright (c) 2025 Matthew Fernandez + Copyright (c) 2025 Atrem Borovik + Copyright (c) 2025 Alfonso Gregory + Copyright (c) 2026 Rosen Penev Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -101,7 +104,7 @@ #include /* INT_MAX, UINT_MAX */ #include /* fprintf */ #include /* getenv, rand_s */ -#include /* uintptr_t */ +#include /* SIZE_MAX, uintptr_t */ #include /* isnan */ #ifdef _WIN32 @@ -134,11 +137,6 @@ # endif /* defined(GRND_NONBLOCK) */ #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */ -#if defined(HAVE_LIBBSD) \ - && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM)) -# include -#endif - #if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32) # define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800 #endif @@ -155,8 +153,6 @@ * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \ * BSD / macOS >=10.7 / glibc >=2.36 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \ * BSD / macOS (including <10.7) / glibc >=2.36 (arc4random): HAVE_ARC4RANDOM, \ - * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \ - * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \ * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \ * Windows >=Vista (rand_s): _WIN32. \ \ @@ -311,8 +307,11 @@ typedef struct tag { const char *rawName; /* tagName in the original encoding */ int rawNameLength; TAG_NAME name; /* tagName in the API encoding */ - char *buf; /* buffer for name components */ - char *bufEnd; /* end of the buffer */ + union { + char *raw; /* for byte-level access (rawName storage) */ + XML_Char *str; /* for character-level access (converted name) */ + } buf; /* buffer for name components */ + char *bufEnd; /* end of the buffer */ BINDING *bindings; } TAG; @@ -349,7 +348,7 @@ typedef struct { typedef struct block { struct block *next; int size; - XML_Char s[1]; + XML_Char s[]; } BLOCK; typedef struct { @@ -1230,8 +1229,11 @@ generate_hash_secret_salt(XML_Parser parser) { # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */ /* .. and self-made low quality for backup: */ + entropy = gather_time_entropy(); +# if ! defined(__wasi__) /* Process ID is 0 bits entropy if attacker has local access */ - entropy = gather_time_entropy() ^ getpid(); + entropy ^= getpid(); +# endif /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */ if (sizeof(unsigned long) == 4) { @@ -1754,6 +1756,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, XML_ExternalEntityRefHandler oldExternalEntityRefHandler; XML_SkippedEntityHandler oldSkippedEntityHandler; XML_UnknownEncodingHandler oldUnknownEncodingHandler; + void *oldUnknownEncodingHandlerData; XML_ElementDeclHandler oldElementDeclHandler; XML_AttlistDeclHandler oldAttlistDeclHandler; XML_EntityDeclHandler oldEntityDeclHandler; @@ -1799,6 +1802,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, oldExternalEntityRefHandler = parser->m_externalEntityRefHandler; oldSkippedEntityHandler = parser->m_skippedEntityHandler; oldUnknownEncodingHandler = parser->m_unknownEncodingHandler; + oldUnknownEncodingHandlerData = parser->m_unknownEncodingHandlerData; oldElementDeclHandler = parser->m_elementDeclHandler; oldAttlistDeclHandler = parser->m_attlistDeclHandler; oldEntityDeclHandler = parser->m_entityDeclHandler; @@ -1859,6 +1863,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, parser->m_externalEntityRefHandler = oldExternalEntityRefHandler; parser->m_skippedEntityHandler = oldSkippedEntityHandler; parser->m_unknownEncodingHandler = oldUnknownEncodingHandler; + parser->m_unknownEncodingHandlerData = oldUnknownEncodingHandlerData; parser->m_elementDeclHandler = oldElementDeclHandler; parser->m_attlistDeclHandler = oldAttlistDeclHandler; parser->m_entityDeclHandler = oldEntityDeclHandler; @@ -1934,7 +1939,7 @@ XML_ParserFree(XML_Parser parser) { } p = tagList; tagList = tagList->parent; - FREE(parser, p->buf); + FREE(parser, p->buf.raw); destroyBindings(p->bindings, parser); FREE(parser, p); } @@ -2599,7 +2604,7 @@ XML_GetBuffer(XML_Parser parser, int len) { // NOTE: We are avoiding MALLOC(..) here to leave limiting // the input size to the application using Expat. newBuf = parser->m_mem.malloc_fcn(bufferSize); - if (newBuf == 0) { + if (newBuf == NULL) { parser->m_errorCode = XML_ERROR_NO_MEMORY; return NULL; } @@ -3126,7 +3131,7 @@ storeRawNames(XML_Parser parser) { size_t bufSize; size_t nameLen = sizeof(XML_Char) * (tag->name.strLen + 1); size_t rawNameLen; - char *rawNameBuf = tag->buf + nameLen; + char *rawNameBuf = tag->buf.raw + nameLen; /* Stop if already stored. Since m_tagStack is a stack, we can stop at the first entry that has already been copied; everything below it in the stack is already been accounted for in a @@ -3142,22 +3147,22 @@ storeRawNames(XML_Parser parser) { if (rawNameLen > (size_t)INT_MAX - nameLen) return XML_FALSE; bufSize = nameLen + rawNameLen; - if (bufSize > (size_t)(tag->bufEnd - tag->buf)) { - char *temp = REALLOC(parser, tag->buf, bufSize); + if (bufSize > (size_t)(tag->bufEnd - tag->buf.raw)) { + char *temp = REALLOC(parser, tag->buf.raw, bufSize); if (temp == NULL) return XML_FALSE; - /* if tag->name.str points to tag->buf (only when namespace + /* if tag->name.str points to tag->buf.str (only when namespace processing is off) then we have to update it */ - if (tag->name.str == (XML_Char *)tag->buf) + if (tag->name.str == tag->buf.str) tag->name.str = (XML_Char *)temp; /* if tag->name.localPart is set (when namespace processing is on) then update it as well, since it will always point into tag->buf */ if (tag->name.localPart) tag->name.localPart - = (XML_Char *)temp + (tag->name.localPart - (XML_Char *)tag->buf); - tag->buf = temp; + = (XML_Char *)temp + (tag->name.localPart - tag->buf.str); + tag->buf.raw = temp; tag->bufEnd = temp + bufSize; rawNameBuf = temp + nameLen; } @@ -3472,12 +3477,12 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, tag = MALLOC(parser, sizeof(TAG)); if (! tag) return XML_ERROR_NO_MEMORY; - tag->buf = MALLOC(parser, INIT_TAG_BUF_SIZE); - if (! tag->buf) { + tag->buf.raw = MALLOC(parser, INIT_TAG_BUF_SIZE); + if (! tag->buf.raw) { FREE(parser, tag); return XML_ERROR_NO_MEMORY; } - tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE; + tag->bufEnd = tag->buf.raw + INIT_TAG_BUF_SIZE; } tag->bindings = NULL; tag->parent = parser->m_tagStack; @@ -3490,31 +3495,32 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, { const char *rawNameEnd = tag->rawName + tag->rawNameLength; const char *fromPtr = tag->rawName; - toPtr = (XML_Char *)tag->buf; + toPtr = tag->buf.str; for (;;) { - int bufSize; int convLen; const enum XML_Convert_Result convert_res = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1); - convLen = (int)(toPtr - (XML_Char *)tag->buf); + convLen = (int)(toPtr - tag->buf.str); if ((fromPtr >= rawNameEnd) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) { tag->name.strLen = convLen; break; } - bufSize = (int)(tag->bufEnd - tag->buf) << 1; + if (SIZE_MAX / 2 < (size_t)(tag->bufEnd - tag->buf.raw)) + return XML_ERROR_NO_MEMORY; + const size_t bufSize = (size_t)(tag->bufEnd - tag->buf.raw) * 2; { - char *temp = REALLOC(parser, tag->buf, bufSize); + char *temp = REALLOC(parser, tag->buf.raw, bufSize); if (temp == NULL) return XML_ERROR_NO_MEMORY; - tag->buf = temp; + tag->buf.raw = temp; tag->bufEnd = temp + bufSize; toPtr = (XML_Char *)temp + convLen; } } } - tag->name.str = (XML_Char *)tag->buf; + tag->name.str = tag->buf.str; *toPtr = XML_T('\0'); result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account); @@ -3878,7 +3884,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX - if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(ATTRIBUTE)) { + if ((unsigned)parser->m_attsSize > SIZE_MAX / sizeof(ATTRIBUTE)) { parser->m_attsSize = oldAttsSize; return XML_ERROR_NO_MEMORY; } @@ -3897,7 +3903,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ # if UINT_MAX >= SIZE_MAX - if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(XML_AttrInfo)) { + if ((unsigned)parser->m_attsSize > SIZE_MAX / sizeof(XML_AttrInfo)) { parser->m_attsSize = oldAttsSize; return XML_ERROR_NO_MEMORY; } @@ -4073,7 +4079,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX - if (nsAttsSize > (size_t)(-1) / sizeof(NS_ATT)) { + if (nsAttsSize > SIZE_MAX / sizeof(NS_ATT)) { /* Restore actual size of memory in m_nsAtts */ parser->m_nsAttsPower = oldNsAttsPower; return XML_ERROR_NO_MEMORY; @@ -4256,7 +4262,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX - if ((unsigned)(n + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) { + if ((unsigned)(n + EXPAND_SPARE) > SIZE_MAX / sizeof(XML_Char)) { return XML_ERROR_NO_MEMORY; } #endif @@ -4502,7 +4508,7 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX - if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) { + if ((unsigned)(len + EXPAND_SPARE) > SIZE_MAX / sizeof(XML_Char)) { return XML_ERROR_NO_MEMORY; } #endif @@ -4529,7 +4535,7 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX - if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) { + if ((unsigned)(len + EXPAND_SPARE) > SIZE_MAX / sizeof(XML_Char)) { return XML_ERROR_NO_MEMORY; } #endif @@ -5920,15 +5926,18 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX - if (parser->m_groupSize > (size_t)(-1) / sizeof(int)) { + if (parser->m_groupSize > SIZE_MAX / sizeof(int)) { + parser->m_groupSize /= 2; return XML_ERROR_NO_MEMORY; } #endif int *const new_scaff_index = REALLOC( parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int)); - if (new_scaff_index == NULL) + if (new_scaff_index == NULL) { + parser->m_groupSize /= 2; return XML_ERROR_NO_MEMORY; + } dtd->scaffIndex = new_scaff_index; } } else { @@ -7190,7 +7199,7 @@ defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata, * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX - if ((unsigned)count > (size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE)) { + if ((unsigned)count > SIZE_MAX / sizeof(DEFAULT_ATTRIBUTE)) { return 0; } #endif @@ -7666,8 +7675,7 @@ dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, * from -Wtype-limits on platforms where * sizeof(int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX - if ((size_t)oldE->nDefaultAtts - > ((size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE))) { + if ((size_t)oldE->nDefaultAtts > SIZE_MAX / sizeof(DEFAULT_ATTRIBUTE)) { return 0; } #endif @@ -7869,7 +7877,7 @@ lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) { unsigned long newMask = (unsigned long)newSize - 1; /* Detect and prevent integer overflow */ - if (newSize > (size_t)(-1) / sizeof(NAMED *)) { + if (newSize > SIZE_MAX / sizeof(NAMED *)) { return NULL; } @@ -8105,7 +8113,7 @@ poolBytesToAllocateFor(int blockSize) { static XML_Bool FASTCALL poolGrow(STRING_POOL *pool) { if (pool->freeBlocks) { - if (pool->start == 0) { + if (pool->start == NULL) { pool->blocks = pool->freeBlocks; pool->freeBlocks = pool->freeBlocks->next; pool->blocks->next = NULL; @@ -8217,7 +8225,7 @@ nextScaffoldPart(XML_Parser parser) { * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX - if (parser->m_groupSize > ((size_t)(-1) / sizeof(int))) { + if (parser->m_groupSize > SIZE_MAX / sizeof(int)) { return -1; } #endif @@ -8244,7 +8252,7 @@ nextScaffoldPart(XML_Parser parser) { * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX - if (dtd->scaffSize > (size_t)(-1) / 2u / sizeof(CONTENT_SCAFFOLD)) { + if (dtd->scaffSize > SIZE_MAX / 2u / sizeof(CONTENT_SCAFFOLD)) { return -1; } #endif @@ -8294,15 +8302,15 @@ build_model(XML_Parser parser) { * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX - if (dtd->scaffCount > (size_t)(-1) / sizeof(XML_Content)) { + if (dtd->scaffCount > SIZE_MAX / sizeof(XML_Content)) { return NULL; } - if (dtd->contentStringLen > (size_t)(-1) / sizeof(XML_Char)) { + if (dtd->contentStringLen > SIZE_MAX / sizeof(XML_Char)) { return NULL; } #endif if (dtd->scaffCount * sizeof(XML_Content) - > (size_t)(-1) - dtd->contentStringLen * sizeof(XML_Char)) { + > SIZE_MAX - dtd->contentStringLen * sizeof(XML_Char)) { return NULL; } diff --git a/Modules/expat/xmlrole.c b/Modules/expat/xmlrole.c index 2c48bf40867953..d56bee82dd2d13 100644 --- a/Modules/expat/xmlrole.c +++ b/Modules/expat/xmlrole.c @@ -16,6 +16,7 @@ Copyright (c) 2017 Rhodri James Copyright (c) 2019 David Loffredo Copyright (c) 2021 Donghee Na + Copyright (c) 2025 Alfonso Gregory Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -46,7 +47,6 @@ # include "winconfig.h" #endif -#include "expat_external.h" #include "internal.h" #include "xmlrole.h" #include "ascii.h" diff --git a/Modules/expat/xmltok.c b/Modules/expat/xmltok.c index 95d5e84b67f11c..32cd5f147e9322 100644 --- a/Modules/expat/xmltok.c +++ b/Modules/expat/xmltok.c @@ -24,6 +24,7 @@ Copyright (c) 2022 Martin Ettl Copyright (c) 2022 Sean McBride Copyright (c) 2023 Hanno Böck + Copyright (c) 2025 Alfonso Gregory Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -56,7 +57,6 @@ # include "winconfig.h" #endif -#include "expat_external.h" #include "internal.h" #include "xmltok.h" #include "nametab.h" diff --git a/Modules/expat/xmltok_ns.c b/Modules/expat/xmltok_ns.c index fbdd3e3c7b7999..810ca2c6d0485e 100644 --- a/Modules/expat/xmltok_ns.c +++ b/Modules/expat/xmltok_ns.c @@ -12,6 +12,7 @@ Copyright (c) 2002 Fred L. Drake, Jr. Copyright (c) 2002-2006 Karl Waclawek Copyright (c) 2017-2021 Sebastian Pipping + Copyright (c) 2025 Alfonso Gregory Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -98,13 +99,13 @@ NS(findEncoding)(const ENCODING *enc, const char *ptr, const char *end) { int i; XmlUtf8Convert(enc, &ptr, end, &p, p + ENCODING_MAX - 1); if (ptr != end) - return 0; + return NULL; *p = 0; if (streqci(buf, KW_UTF_16) && enc->minBytesPerChar == 2) return enc; i = getEncodingIndex(buf); if (i == UNKNOWN_ENC) - return 0; + return NULL; return NS(encodings)[i]; }