summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sphinx/builders/epub3.py18
-rw-r--r--sphinx/util/__init__.py31
-rw-r--r--tests/test_build_epub.py8
-rw-r--r--tests/test_util.py9
4 files changed, 31 insertions, 35 deletions
diff --git a/sphinx/builders/epub3.py b/sphinx/builders/epub3.py
index adb1aaac1..9b01eec8a 100644
--- a/sphinx/builders/epub3.py
+++ b/sphinx/builders/epub3.py
@@ -6,6 +6,7 @@ Originally derived from epub.py.
from __future__ import annotations
import html
+import re
from os import path
from typing import Any, NamedTuple
@@ -14,7 +15,7 @@ from sphinx.application import Sphinx
from sphinx.builders import _epub_base
from sphinx.config import ENUM, Config
from sphinx.locale import __
-from sphinx.util import logging, xmlname_checker
+from sphinx.util import logging
from sphinx.util.fileutil import copy_asset_file
from sphinx.util.i18n import format_date
from sphinx.util.osutil import make_filename
@@ -50,6 +51,19 @@ HTML_TAG = (
'xmlns:epub="http://www.idpf.org/2007/ops">'
)
+# https://www.w3.org/TR/REC-xml/#NT-Name
+_xml_name_start_char = (
+ ':|[A-Z]|_|[a-z]|[\u00C0-\u00D6]'
+ '|[\u00D8-\u00F6]|[\u00F8-\u02FF]|[\u0370-\u037D]'
+ '|[\u037F-\u1FFF]|[\u200C-\u200D]|[\u2070-\u218F]'
+ '|[\u2C00-\u2FEF]|[\u3001-\uD7FF]|[\uF900-\uFDCF]'
+ '|[\uFDF0-\uFFFD]|[\U00010000-\U000EFFFF]'
+)
+_xml_name_char = (
+ _xml_name_start_char + r'\-|\.' '|[0-9]|\u00B7|[\u0300-\u036F]|[\u203F-\u2040]'
+)
+_XML_NAME_PATTERN = re.compile(f'({_xml_name_start_char})({_xml_name_char})*')
+
class Epub3Builder(_epub_base.EpubBuilder):
"""
@@ -187,7 +201,7 @@ def validate_config_values(app: Sphinx) -> None:
logger.warning(__('conf value "epub_language" (or "language") '
'should not be empty for EPUB3'))
# <package> unique-identifier attribute
- if not xmlname_checker().match(app.config.epub_uid):
+ if not _XML_NAME_PATTERN.match(app.config.epub_uid):
logger.warning(__('conf value "epub_uid" should be XML NAME for EPUB3'))
# dc:title
if not app.config.epub_title:
diff --git a/sphinx/util/__init__.py b/sphinx/util/__init__.py
index 837e41af8..eaa007b1a 100644
--- a/sphinx/util/__init__.py
+++ b/sphinx/util/__init__.py
@@ -371,32 +371,11 @@ def isurl(url: str) -> bool:
return bool(url) and '://' in url
-def xmlname_checker() -> re.Pattern:
- # https://www.w3.org/TR/REC-xml/#NT-Name
- name_start_chars = [
- ':', ['A', 'Z'], '_', ['a', 'z'], ['\u00C0', '\u00D6'],
- ['\u00D8', '\u00F6'], ['\u00F8', '\u02FF'], ['\u0370', '\u037D'],
- ['\u037F', '\u1FFF'], ['\u200C', '\u200D'], ['\u2070', '\u218F'],
- ['\u2C00', '\u2FEF'], ['\u3001', '\uD7FF'], ['\uF900', '\uFDCF'],
- ['\uFDF0', '\uFFFD'], ['\U00010000', '\U000EFFFF']]
-
- name_chars = [
- "\\-", "\\.", ['0', '9'], '\u00B7', ['\u0300', '\u036F'],
- ['\u203F', '\u2040']
- ]
-
- def convert(entries: Any, splitter: str = '|') -> str:
- results = []
- for entry in entries:
- if isinstance(entry, list):
- results.append('[%s]' % convert(entry, '-'))
- else:
- results.append(entry)
- return splitter.join(results)
+def _xml_name_checker():
+ # to prevent import cycles
+ from sphinx.builders.epub3 import _XML_NAME_PATTERN
- start_chars_regex = convert(name_start_chars)
- name_chars_regex = convert(name_chars)
- return re.compile(f'({start_chars_regex})({start_chars_regex}|{name_chars_regex})*')
+ return _XML_NAME_PATTERN
deprecated_alias('sphinx.util',
@@ -410,6 +389,7 @@ deprecated_alias('sphinx.util',
'rfc1123_to_epoch': _http_date.rfc1123_to_epoch,
'save_traceback': _exceptions.save_traceback,
'format_exception_cut_frames': _exceptions.format_exception_cut_frames,
+ 'xmlname_checker': _xml_name_checker,
},
RemovedInSphinx70Warning,
{
@@ -422,4 +402,5 @@ deprecated_alias('sphinx.util',
'rfc1123_to_epoch': 'sphinx.http_date.rfc1123_to_epoch',
'save_traceback': 'sphinx.exceptions.save_traceback',
'format_exception_cut_frames': 'sphinx.exceptions.format_exception_cut_frames', # NoQA: E501
+ 'xmlname_checker': 'sphinx.builders.epub3._XML_NAME_PATTERN',
})
diff --git a/tests/test_build_epub.py b/tests/test_build_epub.py
index becde92cd..a50c51e25 100644
--- a/tests/test_build_epub.py
+++ b/tests/test_build_epub.py
@@ -7,6 +7,8 @@ from xml.etree import ElementTree
import pytest
+from sphinx.builders.epub3 import _XML_NAME_PATTERN
+
# check given command is runnable
def runnable(command):
@@ -382,3 +384,9 @@ def test_run_epubcheck(app):
print(exc.stdout.decode('utf-8'))
print(exc.stderr.decode('utf-8'))
raise AssertionError('epubcheck exited with return code %s' % exc.returncode)
+
+
+def test_xml_name_pattern_check():
+ assert _XML_NAME_PATTERN.match('id-pub')
+ assert _XML_NAME_PATTERN.match('webpage')
+ assert not _XML_NAME_PATTERN.match('1bfda21')
diff --git a/tests/test_util.py b/tests/test_util.py
index 226b5b4ed..bb4f10a8c 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -6,7 +6,7 @@ import tempfile
import pytest
from sphinx.errors import ExtensionError
-from sphinx.util import encode_uri, ensuredir, import_object, parselinenos, xmlname_checker
+from sphinx.util import encode_uri, ensuredir, import_object, parselinenos
def test_encode_uri():
@@ -75,10 +75,3 @@ def test_parselinenos():
parselinenos('-', 10)
with pytest.raises(ValueError):
parselinenos('3-1', 10)
-
-
-def test_xmlname_check():
- checker = xmlname_checker()
- assert checker.match('id-pub')
- assert checker.match('webpage')
- assert not checker.match('1bfda21')