diff options
17 files changed, 254 insertions, 3 deletions
diff --git a/src/qdoc/qdoc/utilities.cpp b/src/qdoc/qdoc/utilities.cpp index d0f18338b..d4e1f8c07 100644 --- a/src/qdoc/qdoc/utilities.cpp +++ b/src/qdoc/qdoc/utilities.cpp @@ -2,6 +2,8 @@ // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 #include <QtCore/qprocess.h> +#include <QCryptographicHash> +#include "location.h" #include "utilities.h" QT_BEGIN_NAMESPACE @@ -96,11 +98,22 @@ QString comma(qsizetype wordPosition, qsizetype numberOfWords) */ QString canonicalizeFileName(const QString &name) { + auto legal_ascii = [](const uint value) { + const uint start_ascii_subset{ 32 }; + const uint end_ascii_subset{ 126 }; + + return value >= start_ascii_subset && value <= end_ascii_subset; + }; + QString result; bool begun = false; + bool has_non_alnum_content{ false }; const auto *data{name.constData()}; for (qsizetype i = 0; i < name.size(); ++i) { char16_t u{data[i].unicode()}; + if (!legal_ascii(u)) + has_non_alnum_content = true; + if (u >= 'A' && u <= 'Z') u += 'a' - 'A'; if ((u >= 'a' && u <= 'z') || (u >= '0' && u <= '9')) { @@ -114,6 +127,15 @@ QString canonicalizeFileName(const QString &name) if (result.endsWith(QLatin1Char('-'))) result.chop(1); + if (has_non_alnum_content) { + auto title_hash = QString::fromLocal8Bit( + QCryptographicHash::hash(name.toUtf8(), QCryptographicHash::Md5).toHex()); + title_hash.truncate(8); + if (!result.isEmpty()) + result.append(QLatin1Char('-')); + result.append(title_hash); + } + return result; } diff --git a/tests/auto/qdoc/generatedoutput/expected_output/8b5c72eb.html b/tests/auto/qdoc/generatedoutput/expected_output/8b5c72eb.html new file mode 100644 index 000000000..f52b65618 --- /dev/null +++ b/tests/auto/qdoc/generatedoutput/expected_output/8b5c72eb.html @@ -0,0 +1,16 @@ +<!DOCTYPE html> +<html lang="en"> +<head> + <meta charset="utf-8"> +<!-- adventures_with_non_ascii_characters.qdoc --> + <title>NonAsciiCharacterInput</title> +</head> +<body> +<div class="sidebar"><div class="sidebar-content" id="sidebar-content"></div></div> +<!-- $$$圣马苏里拉.html-description --> +<div class="descr" id="details"> +<p>This page exists solely to understand how QDoc will generate the file name for a page with non-latin characters in its name.</p> +</div> +<!-- @@@圣马苏里拉.html --> +</body> +</html> diff --git a/tests/auto/qdoc/generatedoutput/expected_output/e85685de.html b/tests/auto/qdoc/generatedoutput/expected_output/e85685de.html new file mode 100644 index 000000000..ea52873e7 --- /dev/null +++ b/tests/auto/qdoc/generatedoutput/expected_output/e85685de.html @@ -0,0 +1,16 @@ +<!DOCTYPE html> +<html lang="en"> +<head> + <meta charset="utf-8"> +<!-- adventures_with_non_ascii_characters.qdoc --> + <title>NonAsciiCharacterInput</title> +</head> +<body> +<div class="sidebar"><div class="sidebar-content" id="sidebar-content"></div></div> +<!-- $$$موزاريلا-description --> +<div class="descr" id="details"> +<p>This page exists solely to understand how QDoc will generate the file name for a page with right-to-left script in its name.</p> +</div> +<!-- @@@موزاريلا --> +</body> +</html> diff --git a/tests/auto/qdoc/generatedoutput/expected_output/html/8b5c72eb.webxml b/tests/auto/qdoc/generatedoutput/expected_output/html/8b5c72eb.webxml new file mode 100644 index 000000000..375c43732 --- /dev/null +++ b/tests/auto/qdoc/generatedoutput/expected_output/html/8b5c72eb.webxml @@ -0,0 +1,10 @@ +<?xml version="1.0" encoding="UTF-8"?> +<WebXML> + <document> + <page name="圣马苏里拉.html" href="8b5c72eb.html" status="active" location="adventures_with_non_ascii_characters.qdoc" documented="true" subtype="page" title="" fulltitle="" subtitle=""> + <description> + <para>This page exists solely to understand how QDoc will generate the file name for a page with non-latin characters in its name.</para> + </description> + </page> + </document> +</WebXML> diff --git a/tests/auto/qdoc/generatedoutput/expected_output/html/e85685de.webxml b/tests/auto/qdoc/generatedoutput/expected_output/html/e85685de.webxml new file mode 100644 index 000000000..beb4df518 --- /dev/null +++ b/tests/auto/qdoc/generatedoutput/expected_output/html/e85685de.webxml @@ -0,0 +1,10 @@ +<?xml version="1.0" encoding="UTF-8"?> +<WebXML> + <document> + <page name="موزاريلا" href="e85685de.html" status="active" location="adventures_with_non_ascii_characters.qdoc" documented="true" subtype="page" title="" fulltitle="" subtitle=""> + <description> + <para>This page exists solely to understand how QDoc will generate the file name for a page with right-to-left script in its name.</para> + </description> + </page> + </document> +</WebXML> diff --git a/tests/auto/qdoc/generatedoutput/expected_output/html/mozzarella-7c883eff.webxml b/tests/auto/qdoc/generatedoutput/expected_output/html/mozzarella-7c883eff.webxml new file mode 100644 index 000000000..13ce91b72 --- /dev/null +++ b/tests/auto/qdoc/generatedoutput/expected_output/html/mozzarella-7c883eff.webxml @@ -0,0 +1,10 @@ +<?xml version="1.0" encoding="UTF-8"?> +<WebXML> + <document> + <page name="桑塔mozzarella.html" href="mozzarella-7c883eff.html" status="active" location="adventures_with_non_ascii_characters.qdoc" documented="true" subtype="page" title="" fulltitle="" subtitle=""> + <description> + <para>This page exists solely to understand how QDoc will generate the file name for a page that mixes printable ascii with non-latin characters in its name.</para> + </description> + </page> + </document> +</WebXML> diff --git a/tests/auto/qdoc/generatedoutput/expected_output/html/nonasciicharacterinput.index b/tests/auto/qdoc/generatedoutput/expected_output/html/nonasciicharacterinput.index index 613bb05a4..aa306d7eb 100644 --- a/tests/auto/qdoc/generatedoutput/expected_output/html/nonasciicharacterinput.index +++ b/tests/auto/qdoc/generatedoutput/expected_output/html/nonasciicharacterinput.index @@ -10,5 +10,10 @@ <contents name="further-details" title="Further details" level="1"/> <contents name="ascii-characters-that-are-non-printable-ascii-such-as-or-521d09f0" title="Ascii characters that are non-printable ascii, such as ß, ü, or ø" level="2"/> </page> + <page name="SEITE_MIT_AUSSCHLIEßLICH_GROßBUCHSTABEN_IM_TITEL_ÜBERSCHRIFT.htm" href="seite-mit-ausschlie-lich-gro-buchstaben-im-titel-berschrift-htm-bfa91582.html" status="active" location="adventures_with_non_ascii_characters.qdoc" documented="true" subtype="page" title="" fulltitle="" subtitle=""/> + <page name="santaموزاريلا.html" href="santa-14209312.html" status="active" location="adventures_with_non_ascii_characters.qdoc" documented="true" subtype="page" title="" fulltitle="" subtitle=""/> + <page name="موزاريلا" href="e85685de.html" status="active" location="adventures_with_non_ascii_characters.qdoc" documented="true" subtype="page" title="" fulltitle="" subtitle=""/> + <page name="圣马苏里拉.html" href="8b5c72eb.html" status="active" location="adventures_with_non_ascii_characters.qdoc" documented="true" subtype="page" title="" fulltitle="" subtitle=""/> + <page name="桑塔mozzarella.html" href="mozzarella-7c883eff.html" status="active" location="adventures_with_non_ascii_characters.qdoc" documented="true" subtype="page" title="" fulltitle="" subtitle=""/> </namespace> </INDEX> diff --git a/tests/auto/qdoc/generatedoutput/expected_output/html/santa-14209312.webxml b/tests/auto/qdoc/generatedoutput/expected_output/html/santa-14209312.webxml new file mode 100644 index 000000000..9d07c9da8 --- /dev/null +++ b/tests/auto/qdoc/generatedoutput/expected_output/html/santa-14209312.webxml @@ -0,0 +1,10 @@ +<?xml version="1.0" encoding="UTF-8"?> +<WebXML> + <document> + <page name="santaموزاريلا.html" href="santa-14209312.html" status="active" location="adventures_with_non_ascii_characters.qdoc" documented="true" subtype="page" title="" fulltitle="" subtitle=""> + <description> + <para>This page exists solely to understand how QDoc will generate the file name for a page that mixes printable ascii with right-to-left script in its name.</para> + </description> + </page> + </document> +</WebXML> diff --git a/tests/auto/qdoc/generatedoutput/expected_output/html/seite-mit-ausschlie-lich-gro-buchstaben-im-titel-berschrift-htm-bfa91582.webxml b/tests/auto/qdoc/generatedoutput/expected_output/html/seite-mit-ausschlie-lich-gro-buchstaben-im-titel-berschrift-htm-bfa91582.webxml new file mode 100644 index 000000000..8fe3c93ee --- /dev/null +++ b/tests/auto/qdoc/generatedoutput/expected_output/html/seite-mit-ausschlie-lich-gro-buchstaben-im-titel-berschrift-htm-bfa91582.webxml @@ -0,0 +1,10 @@ +<?xml version="1.0" encoding="UTF-8"?> +<WebXML> + <document> + <page name="SEITE_MIT_AUSSCHLIEßLICH_GROßBUCHSTABEN_IM_TITEL_ÜBERSCHRIFT.htm" href="seite-mit-ausschlie-lich-gro-buchstaben-im-titel-berschrift-htm-bfa91582.html" status="active" location="adventures_with_non_ascii_characters.qdoc" documented="true" subtype="page" title="" fulltitle="" subtitle=""> + <description> + <para>This page exists solely to understand how QDoc will generate the file name for a page with non-ascii-printable latin characters in its name.</para> + </description> + </page> + </document> +</WebXML> diff --git a/tests/auto/qdoc/generatedoutput/expected_output/mozzarella-7c883eff.html b/tests/auto/qdoc/generatedoutput/expected_output/mozzarella-7c883eff.html new file mode 100644 index 000000000..bb4b3651d --- /dev/null +++ b/tests/auto/qdoc/generatedoutput/expected_output/mozzarella-7c883eff.html @@ -0,0 +1,16 @@ +<!DOCTYPE html> +<html lang="en"> +<head> + <meta charset="utf-8"> +<!-- adventures_with_non_ascii_characters.qdoc --> + <title>NonAsciiCharacterInput</title> +</head> +<body> +<div class="sidebar"><div class="sidebar-content" id="sidebar-content"></div></div> +<!-- $$$桑塔mozzarella.html-description --> +<div class="descr" id="details"> +<p>This page exists solely to understand how QDoc will generate the file name for a page that mixes printable ascii with non-latin characters in its name.</p> +</div> +<!-- @@@桑塔mozzarella.html --> +</body> +</html> diff --git a/tests/auto/qdoc/generatedoutput/expected_output/mozzarella.html b/tests/auto/qdoc/generatedoutput/expected_output/mozzarella.html new file mode 100644 index 000000000..bb4b3651d --- /dev/null +++ b/tests/auto/qdoc/generatedoutput/expected_output/mozzarella.html @@ -0,0 +1,16 @@ +<!DOCTYPE html> +<html lang="en"> +<head> + <meta charset="utf-8"> +<!-- adventures_with_non_ascii_characters.qdoc --> + <title>NonAsciiCharacterInput</title> +</head> +<body> +<div class="sidebar"><div class="sidebar-content" id="sidebar-content"></div></div> +<!-- $$$桑塔mozzarella.html-description --> +<div class="descr" id="details"> +<p>This page exists solely to understand how QDoc will generate the file name for a page that mixes printable ascii with non-latin characters in its name.</p> +</div> +<!-- @@@桑塔mozzarella.html --> +</body> +</html> diff --git a/tests/auto/qdoc/generatedoutput/expected_output/santa-14209312.html b/tests/auto/qdoc/generatedoutput/expected_output/santa-14209312.html new file mode 100644 index 000000000..f40feed36 --- /dev/null +++ b/tests/auto/qdoc/generatedoutput/expected_output/santa-14209312.html @@ -0,0 +1,16 @@ +<!DOCTYPE html> +<html lang="en"> +<head> + <meta charset="utf-8"> +<!-- adventures_with_non_ascii_characters.qdoc --> + <title>NonAsciiCharacterInput</title> +</head> +<body> +<div class="sidebar"><div class="sidebar-content" id="sidebar-content"></div></div> +<!-- $$$santaموزاريلا.html-description --> +<div class="descr" id="details"> +<p>This page exists solely to understand how QDoc will generate the file name for a page that mixes printable ascii with right-to-left script in its name.</p> +</div> +<!-- @@@santaموزاريلا.html --> +</body> +</html> diff --git a/tests/auto/qdoc/generatedoutput/expected_output/santa.html b/tests/auto/qdoc/generatedoutput/expected_output/santa.html new file mode 100644 index 000000000..f40feed36 --- /dev/null +++ b/tests/auto/qdoc/generatedoutput/expected_output/santa.html @@ -0,0 +1,16 @@ +<!DOCTYPE html> +<html lang="en"> +<head> + <meta charset="utf-8"> +<!-- adventures_with_non_ascii_characters.qdoc --> + <title>NonAsciiCharacterInput</title> +</head> +<body> +<div class="sidebar"><div class="sidebar-content" id="sidebar-content"></div></div> +<!-- $$$santaموزاريلا.html-description --> +<div class="descr" id="details"> +<p>This page exists solely to understand how QDoc will generate the file name for a page that mixes printable ascii with right-to-left script in its name.</p> +</div> +<!-- @@@santaموزاريلا.html --> +</body> +</html> diff --git a/tests/auto/qdoc/generatedoutput/expected_output/seite-mit-ausschlie-lich-gro-buchstaben-im-titel-berschrift-htm-bfa91582.html b/tests/auto/qdoc/generatedoutput/expected_output/seite-mit-ausschlie-lich-gro-buchstaben-im-titel-berschrift-htm-bfa91582.html new file mode 100644 index 000000000..16df49755 --- /dev/null +++ b/tests/auto/qdoc/generatedoutput/expected_output/seite-mit-ausschlie-lich-gro-buchstaben-im-titel-berschrift-htm-bfa91582.html @@ -0,0 +1,16 @@ +<!DOCTYPE html> +<html lang="en"> +<head> + <meta charset="utf-8"> +<!-- adventures_with_non_ascii_characters.qdoc --> + <title>NonAsciiCharacterInput</title> +</head> +<body> +<div class="sidebar"><div class="sidebar-content" id="sidebar-content"></div></div> +<!-- $$$SEITE_MIT_AUSSCHLIEßLICH_GROßBUCHSTABEN_IM_TITEL_ÜBERSCHRIFT.htm-description --> +<div class="descr" id="details"> +<p>This page exists solely to understand how QDoc will generate the file name for a page with non-ascii-printable latin characters in its name.</p> +</div> +<!-- @@@SEITE_MIT_AUSSCHLIEßLICH_GROßBUCHSTABEN_IM_TITEL_ÜBERSCHRIFT.htm --> +</body> +</html> diff --git a/tests/auto/qdoc/generatedoutput/expected_output/seite-mit-ausschlie-lich-gro-buchstaben-im-titel-berschrift-htm.html b/tests/auto/qdoc/generatedoutput/expected_output/seite-mit-ausschlie-lich-gro-buchstaben-im-titel-berschrift-htm.html new file mode 100644 index 000000000..16df49755 --- /dev/null +++ b/tests/auto/qdoc/generatedoutput/expected_output/seite-mit-ausschlie-lich-gro-buchstaben-im-titel-berschrift-htm.html @@ -0,0 +1,16 @@ +<!DOCTYPE html> +<html lang="en"> +<head> + <meta charset="utf-8"> +<!-- adventures_with_non_ascii_characters.qdoc --> + <title>NonAsciiCharacterInput</title> +</head> +<body> +<div class="sidebar"><div class="sidebar-content" id="sidebar-content"></div></div> +<!-- $$$SEITE_MIT_AUSSCHLIEßLICH_GROßBUCHSTABEN_IM_TITEL_ÜBERSCHRIFT.htm-description --> +<div class="descr" id="details"> +<p>This page exists solely to understand how QDoc will generate the file name for a page with non-ascii-printable latin characters in its name.</p> +</div> +<!-- @@@SEITE_MIT_AUSSCHLIEßLICH_GROßBUCHSTABEN_IM_TITEL_ÜBERSCHRIFT.htm --> +</body> +</html> diff --git a/tests/auto/qdoc/generatedoutput/testdata/non_ascii_character_input/adventures_with_non_ascii_characters.qdoc b/tests/auto/qdoc/generatedoutput/testdata/non_ascii_character_input/adventures_with_non_ascii_characters.qdoc index b1f23adf7..c5f09cb1c 100644 --- a/tests/auto/qdoc/generatedoutput/testdata/non_ascii_character_input/adventures_with_non_ascii_characters.qdoc +++ b/tests/auto/qdoc/generatedoutput/testdata/non_ascii_character_input/adventures_with_non_ascii_characters.qdoc @@ -52,3 +52,38 @@ to such section titles works as expected. It's made a section2 to exercise the behavior for other section levels than 1. */ + +/*! + \page SEITE_MIT_AUSSCHLIEßLICH_GROßBUCHSTABEN_IM_TITEL_ÜBERSCHRIFT.htm + + This page exists solely to understand how QDoc will generate the file name + for a page with non-ascii-printable latin characters in its name. +*/ + +/*! + \page موزاريلا سانتا.html + + This page exists solely to understand how QDoc will generate the file name + for a page with right-to-left script in its name. +*/ + +/*! + \page 圣马苏里拉.html + + This page exists solely to understand how QDoc will generate the file name + for a page with non-latin characters in its name. +*/ + +/*! + \page santaموزاريلا.html + + This page exists solely to understand how QDoc will generate the file name + for a page that mixes printable ascii with right-to-left script in its name. +*/ + +/*! + \page 桑塔mozzarella.html + + This page exists solely to understand how QDoc will generate the file name + for a page that mixes printable ascii with non-latin characters in its name. +*/ diff --git a/tests/auto/qdoc/generatedoutput/tst_generatedoutput.cpp b/tests/auto/qdoc/generatedoutput/tst_generatedoutput.cpp index 269739242..0e3b5a4ee 100644 --- a/tests/auto/qdoc/generatedoutput/tst_generatedoutput.cpp +++ b/tests/auto/qdoc/generatedoutput/tst_generatedoutput.cpp @@ -591,9 +591,20 @@ void tst_generatedOutput::proxyPage() void tst_generatedOutput::nonAsciiCharacterInput() { - testAndCompare("testdata/non_ascii_character_input/non_ascii_character_input.qdocconf", - "html/nonasciicharacterinput.index " - "adventures-with-non-ascii-characters.html"); + testAndCompare( + "testdata/non_ascii_character_input/non_ascii_character_input.qdocconf", + "html/nonasciicharacterinput.index " + "html/mozzarella-7c883eff.webxml " + "html/santa-14209312.webxml " + "html/seite-mit-ausschlie-lich-gro-buchstaben-im-titel-berschrift-htm-bfa91582.webxml " + "html/8b5c72eb.webxml " + "html/e85685de.webxml " + "seite-mit-ausschlie-lich-gro-buchstaben-im-titel-berschrift-htm-bfa91582.html " + "mozzarella-7c883eff.html " + "santa-14209312.html " + "8b5c72eb.html " + "e85685de.html " + "adventures-with-non-ascii-characters.html"); } |