diff options
author | Paul Wicking <paul.wicking@qt.io> | 2023-05-15 11:10:23 +0200 |
---|---|---|
committer | Paul Wicking <paul.wicking@qt.io> | 2023-05-15 14:56:54 +0200 |
commit | 24085ee19751fa39db5f70d2d850ff5b82a7e400 (patch) | |
tree | 94dff7645e21cab80126abd6cef6c8c86b992c86 | |
parent | 52c8f1dbdd52f6c6e5e5419a55c456c832d8cdf4 (diff) | |
download | qttools-24085ee19751fa39db5f70d2d850ff5b82a7e400.tar.gz |
QDoc: Merge Doc::canonicalTitle and Utilities::canonicalizeFileName
Following recent bug fixes in QDoc, it has become apparent that the
implementations of `Utilities::canonicalizeFileName` and
`Doc::canonicalTitle` are mostly identical. This violates the DRY
principle. The use of the former method to normalize generate values for
HTML class attributes, shows that the problem the method solves is also
closely related to generating HTML. The latter method specifically
generates "URL friendly" strings for fragment identifiers in URLs. This
indicates that both methods are poorly named, and that they both see use
relating to the same problem domain.
This patch merges the two implementations to reduce code duplication.
`Doc::canonicalTitle` is removed, and `Utilities::canonicalizeFileName`
is renamed to `asAsciiPrintable` to better represent its purpose. The
documentation is updated. All call sites are updated to use the new
method. As there is no unit test for either method, no tests are
modified.
Fixes: QTBUG-113606
Change-Id: I1fd6a654075fdf8e719bf504b1d702737dd1e42e
Reviewed-by: Topi Reiniö <topi.reinio@qt.io>
-rw-r--r-- | src/qdoc/qdoc/doc.cpp | 79 | ||||
-rw-r--r-- | src/qdoc/qdoc/doc.h | 4 | ||||
-rw-r--r-- | src/qdoc/qdoc/docbookgenerator.cpp | 16 | ||||
-rw-r--r-- | src/qdoc/qdoc/generator.cpp | 4 | ||||
-rw-r--r-- | src/qdoc/qdoc/htmlgenerator.cpp | 12 | ||||
-rw-r--r-- | src/qdoc/qdoc/qdocindexfiles.cpp | 6 | ||||
-rw-r--r-- | src/qdoc/qdoc/tree.cpp | 18 | ||||
-rw-r--r-- | src/qdoc/qdoc/utilities.cpp | 40 | ||||
-rw-r--r-- | src/qdoc/qdoc/utilities.h | 2 | ||||
-rw-r--r-- | src/qdoc/qdoc/webxmlgenerator.cpp | 6 |
10 files changed, 63 insertions, 124 deletions
diff --git a/src/qdoc/qdoc/doc.cpp b/src/qdoc/qdoc/doc.cpp index a4d196e36..1f4e517d3 100644 --- a/src/qdoc/qdoc/doc.cpp +++ b/src/qdoc/qdoc/doc.cpp @@ -12,6 +12,7 @@ #include "qmltypenode.h" #include "quoter.h" #include "text.h" +#include "utilities.h" #include <qcryptographichash.h> @@ -409,84 +410,6 @@ CodeMarker *Doc::quoteFromFile(const Location &location, Quoter "er, Resolve return marker; } -/*! - \brief Generates a url-friendly string representation from \a title. - - "Url-friendly" in this context is a string that contains only a subset of - printable ascii characters. - - The subset includes alphanumeric (alnum) characters ([a-zA-Z0-9]), printable - ascii characters, space, punctuation characters, and common symbols. - Non-alnum characters in this subset are replaced by a single dash. Leading - and trailing dashes are removed, such that the resulting string does not - start or end with a dash. Any capital character is replaced by its lowercase - counterpart. - - If any character in \a title is non-latin, or latin and not found in the - aforementioned subset (e.g. 'ß', 'å', or 'ö'), a hash of \a title is - appended to the final string. - - Returns a string that is normalized for the purpose of generating fragment - identifiers for \a title in URLs. - */ -QString Doc::canonicalTitle(const QString &title) -{ - auto legal_ascii = [](const uint value) { - const uint start_ascii_subset{ 32 }; - const uint end_ascii_subset{ 126 }; - - return value >= start_ascii_subset && value <= end_ascii_subset; - }; - - // The code below is equivalent to the following chunk, but - // has been measured to be approximately 4 times faster. - // - // QRegularExpression attributeExpr("[^A-Za-z0-9]+"); - // QString result = title.toLower(); - // result.replace(attributeExpr, " "); - // result = result.simplified(); - // result.replace(QLatin1Char(' '), QLatin1Char('-')); - - QString result; - result.reserve(title.size()); - - bool dashAppended{false}; - bool begun{false}; - qsizetype lastAlnum{0}; - bool has_non_alnum_content{false}; - - for (const auto &i : title) { - uint c = i.unicode(); - - if (!legal_ascii(c)) - has_non_alnum_content = true; - if (c >= 'A' && c <= 'Z') - c += 'a' - 'A'; - bool alnum = (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9'); - if (alnum) { - result += QLatin1Char(c); - begun = true; - dashAppended = false; - lastAlnum = result.size(); - } else if (!dashAppended) { - if (begun) - result += QLatin1Char('-'); - dashAppended = true; - } - } - result.truncate(lastAlnum); - - if (has_non_alnum_content) { - auto title_hash = QString::fromLocal8Bit( - QCryptographicHash::hash(title.toUtf8(), QCryptographicHash::Md5).toHex()); - title_hash.truncate(8); - if (!result.isEmpty()) - result.append(QLatin1Char('-')); - result.append(title_hash); - } - return result; -} - void Doc::detach() { if (m_priv == nullptr) { diff --git a/src/qdoc/qdoc/doc.h b/src/qdoc/qdoc/doc.h index a4917e3e0..948c6756b 100644 --- a/src/qdoc/qdoc/doc.h +++ b/src/qdoc/qdoc/doc.h @@ -76,8 +76,8 @@ public: static void initialize(FileResolver& file_resolver); static void terminate(); static void trimCStyleComment(Location &location, QString &str); - static CodeMarker *quoteFromFile(const Location &location, Quoter "er, ResolvedFile resolved_file); - static QString canonicalTitle(const QString &title); + static CodeMarker *quoteFromFile(const Location &location, Quoter "er, + ResolvedFile resolved_file); private: void detach(); diff --git a/src/qdoc/qdoc/docbookgenerator.cpp b/src/qdoc/qdoc/docbookgenerator.cpp index 115a9a6e1..d6e72b3b4 100644 --- a/src/qdoc/qdoc/docbookgenerator.cpp +++ b/src/qdoc/qdoc/docbookgenerator.cpp @@ -1052,7 +1052,8 @@ qsizetype DocBookGenerator::generateAtom(const Atom *atom, const Node *relative) } // If there is an anchor just after with the same ID, skip it. - if (matchAhead(atom, Atom::Target) && Doc::canonicalTitle(atom->next()->string()) == id) { + if (matchAhead(atom, Atom::Target) + && Utilities::asAsciiPrintable(atom->next()->string()) == id) { ++skipAhead; } } else { @@ -1514,7 +1515,7 @@ qsizetype DocBookGenerator::generateAtom(const Atom *atom, const Node *relative) sectionLevels.push(currentSectionLevel); m_writer->writeStartElement(dbNamespace, "section"); - writeXmlId(Doc::canonicalTitle(Text::sectionHeading(atom).toString())); + writeXmlId(Utilities::asAsciiPrintable(Text::sectionHeading(atom).toString())); newLine(); // Unlike startSectionBegin, don't start a title here. } @@ -1624,7 +1625,7 @@ qsizetype DocBookGenerator::generateAtom(const Atom *atom, const Node *relative) const Atom *next = atom->next(); QString id{""}; if (matchAhead(atom, Atom::Target)) { - id = Doc::canonicalTitle(next->string()); + id = Utilities::asAsciiPrintable(next->string()); next = next->next(); ++skipAhead; } @@ -1671,7 +1672,7 @@ qsizetype DocBookGenerator::generateAtom(const Atom *atom, const Node *relative) QString id{""}; bool hasTarget {false}; if (matchAhead(atom, Atom::Target)) { - id = Doc::canonicalTitle(atom->next()->string()); + id = Utilities::asAsciiPrintable(atom->next()->string()); ++skipAhead; hasTarget = true; } @@ -1768,13 +1769,14 @@ qsizetype DocBookGenerator::generateAtom(const Atom *atom, const Node *relative) case Atom::Target: // Sometimes, there is a \target just before a section title with the same ID. Only outut one xml:id. if (matchAhead(atom, Atom::SectionRight) && matchAhead(atom->next(), Atom::SectionLeft)) { - QString nextId = Doc::canonicalTitle(Text::sectionHeading(atom->next()->next()).toString()); - QString ownId = Doc::canonicalTitle(atom->string()); + QString nextId = Utilities::asAsciiPrintable( + Text::sectionHeading(atom->next()->next()).toString()); + QString ownId = Utilities::asAsciiPrintable(atom->string()); if (nextId == ownId) break; } - writeAnchor(Doc::canonicalTitle(atom->string())); + writeAnchor(Utilities::asAsciiPrintable(atom->string())); break; case Atom::UnhandledFormat: m_writer->writeStartElement(dbNamespace, "emphasis"); diff --git a/src/qdoc/qdoc/generator.cpp b/src/qdoc/qdoc/generator.cpp index ad58b7f44..865e7cf59 100644 --- a/src/qdoc/qdoc/generator.cpp +++ b/src/qdoc/qdoc/generator.cpp @@ -306,7 +306,7 @@ QString Generator::fileBase(const Node *node) const } } - QString canonicalName{Utilities::canonicalizeFileName(base)}; + QString canonicalName{ Utilities::asAsciiPrintable(base) }; Node *n = const_cast<Node *>(node); n->setFileNameBase(canonicalName); return canonicalName; @@ -323,7 +323,7 @@ QString Generator::linkForExampleFile(const QString &path, const QString &fileEx QString link{path}; link.prepend(s_project.toLower() + QLatin1Char('-')); - QString canonicalName{Utilities::canonicalizeFileName(link)}; + QString canonicalName{ Utilities::asAsciiPrintable(link) }; canonicalName.append(QLatin1Char('.')); canonicalName.append(fileExt.isEmpty() ? fileExtension() : fileExt); return canonicalName; diff --git a/src/qdoc/qdoc/htmlgenerator.cpp b/src/qdoc/qdoc/htmlgenerator.cpp index 545541aad..c7faf7276 100644 --- a/src/qdoc/qdoc/htmlgenerator.cpp +++ b/src/qdoc/qdoc/htmlgenerator.cpp @@ -528,7 +528,7 @@ qsizetype HtmlGenerator::generateAtom(const Atom *atom, const Node *relative, Co for (const auto §ion : sinceSections) { if (!section.members().isEmpty()) { out() << "<li>" - << "<a href=\"#" << Doc::canonicalTitle(section.title()) << "\">" + << "<a href=\"#" << Utilities::asAsciiPrintable(section.title()) << "\">" << section.title() << "</a></li>\n"; } } @@ -537,7 +537,7 @@ qsizetype HtmlGenerator::generateAtom(const Atom *atom, const Node *relative, Co int index = 0; for (const auto §ion : sinceSections) { if (!section.members().isEmpty()) { - out() << "<h3 id=\"" << Doc::canonicalTitle(section.title()) << "\">" + out() << "<h3 id=\"" << Utilities::asAsciiPrintable(section.title()) << "\">" << protectEnc(section.title()) << "</h3>\n"; if (index == Sections::SinceClasses) generateCompactList(Generic, relative, ncmap, false, QStringLiteral("Q")); @@ -867,7 +867,7 @@ qsizetype HtmlGenerator::generateAtom(const Atom *atom, const Node *relative, Co case Atom::SectionHeadingLeft: { int unit = atom->string().toInt() + hOffset(relative); out() << "<h" + QString::number(unit) + QLatin1Char(' ') << "id=\"" - << Doc::canonicalTitle(Text::sectionHeading(atom).toString()) << "\">"; + << Utilities::asAsciiPrintable(Text::sectionHeading(atom).toString()) << "\">"; m_inSectionHeading = true; break; } @@ -970,7 +970,7 @@ qsizetype HtmlGenerator::generateAtom(const Atom *atom, const Node *relative, Co case Atom::Keyword: break; case Atom::Target: - out() << "<span id=\"" << Doc::canonicalTitle(atom->string()) << "\"></span>"; + out() << "<span id=\"" << Utilities::asAsciiPrintable(atom->string()) << "\"></span>"; break; case Atom::UnhandledFormat: out() << "<b class=\"redFont\"><Missing HTML></b>"; @@ -2123,7 +2123,7 @@ void HtmlGenerator::addStatusToMap(const Aggregate *aggregate, QMap<QString, Tex if (aggregate->status() == Node::Deprecated) spanClass = u"deprecated"_s; // Disregard any version info else - spanClass = Utilities::canonicalizeFileName(status.value()); + spanClass = Utilities::asAsciiPrintable(status.value()); text.clear(); text << Atom(Atom::String, status.value()) @@ -2367,7 +2367,7 @@ void HtmlGenerator::generateTableOfContents(const Node *node, CodeMarker *marker Text headingText = Text::sectionHeading(atom); QString s = headingText.toString(); out() << "<li class=\"level" << sectionNumber << "\">"; - out() << "<a href=\"" << '#' << Doc::canonicalTitle(s) << "\">"; + out() << "<a href=\"" << '#' << Utilities::asAsciiPrintable(s) << "\">"; generateAtomList(headingText.firstAtom(), node, marker, true, numAtoms); out() << "</a></li>\n"; } diff --git a/src/qdoc/qdoc/qdocindexfiles.cpp b/src/qdoc/qdoc/qdocindexfiles.cpp index 94ca8a111..e9bdda715 100644 --- a/src/qdoc/qdoc/qdocindexfiles.cpp +++ b/src/qdoc/qdoc/qdocindexfiles.cpp @@ -1082,7 +1082,7 @@ bool QDocIndexFiles::generateIndexSection(QXmlStreamWriter &writer, Node *node, const auto &targets = node->doc().targets(); for (const Atom *target : targets) { const QString &title = target->string(); - QString name = Doc::canonicalTitle(title); + QString name = Utilities::asAsciiPrintable(title); writer.writeStartElement("target"); if (!external) writer.writeAttribute("name", name); @@ -1097,7 +1097,7 @@ bool QDocIndexFiles::generateIndexSection(QXmlStreamWriter &writer, Node *node, const auto &keywords = node->doc().keywords(); for (const Atom *keyword : keywords) { const QString &title = keyword->string(); - QString name = Doc::canonicalTitle(title); + QString name = Utilities::asAsciiPrintable(title); writer.writeStartElement("keyword"); writer.writeAttribute("name", name); if (name != title) @@ -1120,7 +1120,7 @@ bool QDocIndexFiles::generateIndexSection(QXmlStreamWriter &writer, Node *node, int level = node->doc().tableOfContentsLevels()[i]; QString title = Text::sectionHeading(item).toString(); writer.writeStartElement("contents"); - writer.writeAttribute("name", Doc::canonicalTitle(title)); + writer.writeAttribute("name", Utilities::asAsciiPrintable(title)); writer.writeAttribute("title", title); writer.writeAttribute("level", QString::number(level)); writer.writeEndElement(); // contents diff --git a/src/qdoc/qdoc/tree.cpp b/src/qdoc/qdoc/tree.cpp index 02f422061..900b299c3 100644 --- a/src/qdoc/qdoc/tree.cpp +++ b/src/qdoc/qdoc/tree.cpp @@ -722,7 +722,7 @@ QString Tree::getRef(const QString &target, const Node *node) const ++it; } while (it != m_nodesByTargetTitle.constEnd() && it.key() == target); } - QString key = Doc::canonicalTitle(target); + QString key = Utilities::asAsciiPrintable(target); it = m_nodesByTargetRef.constFind(key); if (it != m_nodesByTargetRef.constEnd()) { do { @@ -758,7 +758,7 @@ void Tree::resolveTargets(Aggregate *root) QString key = node->title(); if (!key.isEmpty()) { if (key.contains(QChar(' '))) - key = Doc::canonicalTitle(key); + key = Utilities::asAsciiPrintable(key); QList<PageNode *> nodes = m_pageNodesByTitle.values(key); bool alreadyThere = false; if (!nodes.empty()) { @@ -782,7 +782,7 @@ void Tree::resolveTargets(Aggregate *root) QString ref = refForAtom(i); QString title = Text::sectionHeading(i).toString(); if (!ref.isEmpty() && !title.isEmpty()) { - QString key = Doc::canonicalTitle(title); + QString key = Utilities::asAsciiPrintable(title); auto *target = new TargetRec(ref, TargetRec::Contents, child, 3); m_nodesByTargetRef.insert(key, target); m_nodesByTargetTitle.insert(title, target); @@ -796,7 +796,7 @@ void Tree::resolveTargets(Aggregate *root) QString title = i->string(); if (!ref.isEmpty() && !title.isEmpty()) { auto *target = new TargetRec(ref, TargetRec::Keyword, child, 1); - m_nodesByTargetRef.insert(Doc::canonicalTitle(title), target); + m_nodesByTargetRef.insert(Utilities::asAsciiPrintable(title), target); m_nodesByTargetTitle.insert(title, target); } } @@ -807,7 +807,7 @@ void Tree::resolveTargets(Aggregate *root) QString ref = refForAtom(i); QString title = i->string(); if (!ref.isEmpty() && !title.isEmpty()) { - QString key = Doc::canonicalTitle(title); + QString key = Utilities::asAsciiPrintable(title); auto *target = new TargetRec(ref, TargetRec::Target, child, 2); m_nodesByTargetRef.insert(key, target); m_nodesByTargetTitle.insert(title, target); @@ -841,7 +841,7 @@ const TargetRec *Tree::findUnambiguousTarget(const QString &target, Node::Genus TargetRec *bestTarget = findBestCandidate(m_nodesByTargetTitle, target); if (!bestTarget) - bestTarget = findBestCandidate(m_nodesByTargetRef, Doc::canonicalTitle(target)); + bestTarget = findBestCandidate(m_nodesByTargetRef, Utilities::asAsciiPrintable(target)); return bestTarget; } @@ -853,7 +853,7 @@ const PageNode *Tree::findPageNodeByTitle(const QString &title) const { PageNodeMultiMap::const_iterator it; if (title.contains(QChar(' '))) - it = m_pageNodesByTitle.constFind(Doc::canonicalTitle(title)); + it = m_pageNodesByTitle.constFind(Utilities::asAsciiPrintable(title)); else it = m_pageNodesByTitle.constFind(title); if (it != m_pageNodesByTitle.constEnd()) { @@ -890,9 +890,9 @@ QString Tree::refForAtom(const Atom *atom) { if (atom) { if (atom->type() == Atom::SectionLeft) - return Doc::canonicalTitle(Text::sectionHeading(atom).toString()); + return Utilities::asAsciiPrintable(Text::sectionHeading(atom).toString()); if ((atom->type() == Atom::Target) || (atom->type() == Atom::Keyword)) - return Doc::canonicalTitle(atom->string()); + return Utilities::asAsciiPrintable(atom->string()); } return QString(); } diff --git a/src/qdoc/qdoc/utilities.cpp b/src/qdoc/qdoc/utilities.cpp index d4e1f8c07..2825804d6 100644 --- a/src/qdoc/qdoc/utilities.cpp +++ b/src/qdoc/qdoc/utilities.cpp @@ -81,22 +81,35 @@ QString comma(qsizetype wordPosition, qsizetype numberOfWords) } /*! - \internal - Replace non-alphanum characters in \a str with hyphens - and convert all characters to lowercase. Returns the - result of the conversion with leading, trailing, and - consecutive hyphens removed. + \brief Returns an ascii-printable representation of \a str. - The implementation is equivalent to: + Replace non-ascii-printable characters in \a str from a subset of such + characters. The subset includes alphanumeric (alnum) characters + ([a-zA-Z0-9]), space, punctuation characters, and common symbols. Non-alnum + characters in this subset are replaced by a single hyphen. Leading, + trailing, and consecutive hyphens are removed, such that the resulting + string does not start or end with a hyphen. All characters are converted to + lowercase. - \code + If any character in \a str is non-latin, or latin and not found in the + aforementioned subset (e.g. 'ß', 'å', or 'ö'), a hash of \a str is appended + to the final string. + + Returns a string that is normalized for use where ascii-printable strings + are required, such as file names or fragment identifiers in URLs. + + The implementation is equivalent to: + + \code name.replace(QRegularExpression("[^A-Za-z0-9]+"), " "); name = name.simplified(); name.replace(QLatin1Char(' '), QLatin1Char('-')); name = name.toLower(); - \endcode + \endcode + + However, it has been measured to be approximately four times faster. */ -QString canonicalizeFileName(const QString &name) +QString asAsciiPrintable(const QString &str) { auto legal_ascii = [](const uint value) { const uint start_ascii_subset{ 32 }; @@ -108,12 +121,11 @@ QString canonicalizeFileName(const QString &name) QString result; bool begun = false; bool has_non_alnum_content{ false }; - const auto *data{name.constData()}; - for (qsizetype i = 0; i < name.size(); ++i) { - char16_t u{data[i].unicode()}; + + for (const auto &c : str) { + char16_t u = c.unicode(); if (!legal_ascii(u)) has_non_alnum_content = true; - if (u >= 'A' && u <= 'Z') u += 'a' - 'A'; if ((u >= 'a' && u <= 'z') || (u >= '0' && u <= '9')) { @@ -129,7 +141,7 @@ QString canonicalizeFileName(const QString &name) if (has_non_alnum_content) { auto title_hash = QString::fromLocal8Bit( - QCryptographicHash::hash(name.toUtf8(), QCryptographicHash::Md5).toHex()); + QCryptographicHash::hash(str.toUtf8(), QCryptographicHash::Md5).toHex()); title_hash.truncate(8); if (!result.isEmpty()) result.append(QLatin1Char('-')); diff --git a/src/qdoc/qdoc/utilities.h b/src/qdoc/qdoc/utilities.h index 998da48d3..0d485f650 100644 --- a/src/qdoc/qdoc/utilities.h +++ b/src/qdoc/qdoc/utilities.h @@ -19,7 +19,7 @@ bool debugging(); QString separator(qsizetype wordPosition, qsizetype numberOfWords); QString comma(qsizetype wordPosition, qsizetype numberOfWords); -QString canonicalizeFileName(const QString &name); +QString asAsciiPrintable(const QString &name); QStringList getInternalIncludePaths(const QString &compiler); } diff --git a/src/qdoc/qdoc/webxmlgenerator.cpp b/src/qdoc/qdoc/webxmlgenerator.cpp index c4f7b0340..674ae000a 100644 --- a/src/qdoc/qdoc/webxmlgenerator.cpp +++ b/src/qdoc/qdoc/webxmlgenerator.cpp @@ -11,6 +11,7 @@ #include "propertynode.h" #include "qdocdatabase.h" #include "quoter.h" +#include "utilities.h" #include <QtCore/qxmlstream.h> @@ -638,7 +639,8 @@ const Atom *WebXMLGenerator::addAtomElements(QXmlStreamWriter &writer, const Ato case Atom::SectionLeft: writer.writeStartElement("section"); - writer.writeAttribute("id", Doc::canonicalTitle(Text::sectionHeading(atom).toString())); + writer.writeAttribute("id", + Utilities::asAsciiPrintable(Text::sectionHeading(atom).toString())); break; case Atom::SectionRight: @@ -743,7 +745,7 @@ const Atom *WebXMLGenerator::addAtomElements(QXmlStreamWriter &writer, const Ato case Atom::Target: writer.writeStartElement("target"); - writer.writeAttribute("name", Doc::canonicalTitle(atom->string())); + writer.writeAttribute("name", Utilities::asAsciiPrintable(atom->string())); writer.writeEndElement(); break; |