summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Wicking <paul.wicking@qt.io>2023-05-15 11:10:23 +0200
committerPaul Wicking <paul.wicking@qt.io>2023-05-15 14:56:54 +0200
commit24085ee19751fa39db5f70d2d850ff5b82a7e400 (patch)
tree94dff7645e21cab80126abd6cef6c8c86b992c86
parent52c8f1dbdd52f6c6e5e5419a55c456c832d8cdf4 (diff)
downloadqttools-24085ee19751fa39db5f70d2d850ff5b82a7e400.tar.gz
QDoc: Merge Doc::canonicalTitle and Utilities::canonicalizeFileName
Following recent bug fixes in QDoc, it has become apparent that the implementations of `Utilities::canonicalizeFileName` and `Doc::canonicalTitle` are mostly identical. This violates the DRY principle. The use of the former method to normalize generate values for HTML class attributes, shows that the problem the method solves is also closely related to generating HTML. The latter method specifically generates "URL friendly" strings for fragment identifiers in URLs. This indicates that both methods are poorly named, and that they both see use relating to the same problem domain. This patch merges the two implementations to reduce code duplication. `Doc::canonicalTitle` is removed, and `Utilities::canonicalizeFileName` is renamed to `asAsciiPrintable` to better represent its purpose. The documentation is updated. All call sites are updated to use the new method. As there is no unit test for either method, no tests are modified. Fixes: QTBUG-113606 Change-Id: I1fd6a654075fdf8e719bf504b1d702737dd1e42e Reviewed-by: Topi Reiniö <topi.reinio@qt.io>
-rw-r--r--src/qdoc/qdoc/doc.cpp79
-rw-r--r--src/qdoc/qdoc/doc.h4
-rw-r--r--src/qdoc/qdoc/docbookgenerator.cpp16
-rw-r--r--src/qdoc/qdoc/generator.cpp4
-rw-r--r--src/qdoc/qdoc/htmlgenerator.cpp12
-rw-r--r--src/qdoc/qdoc/qdocindexfiles.cpp6
-rw-r--r--src/qdoc/qdoc/tree.cpp18
-rw-r--r--src/qdoc/qdoc/utilities.cpp40
-rw-r--r--src/qdoc/qdoc/utilities.h2
-rw-r--r--src/qdoc/qdoc/webxmlgenerator.cpp6
10 files changed, 63 insertions, 124 deletions
diff --git a/src/qdoc/qdoc/doc.cpp b/src/qdoc/qdoc/doc.cpp
index a4d196e36..1f4e517d3 100644
--- a/src/qdoc/qdoc/doc.cpp
+++ b/src/qdoc/qdoc/doc.cpp
@@ -12,6 +12,7 @@
#include "qmltypenode.h"
#include "quoter.h"
#include "text.h"
+#include "utilities.h"
#include <qcryptographichash.h>
@@ -409,84 +410,6 @@ CodeMarker *Doc::quoteFromFile(const Location &location, Quoter &quoter, Resolve
return marker;
}
-/*!
- \brief Generates a url-friendly string representation from \a title.
-
- "Url-friendly" in this context is a string that contains only a subset of
- printable ascii characters.
-
- The subset includes alphanumeric (alnum) characters ([a-zA-Z0-9]), printable
- ascii characters, space, punctuation characters, and common symbols.
- Non-alnum characters in this subset are replaced by a single dash. Leading
- and trailing dashes are removed, such that the resulting string does not
- start or end with a dash. Any capital character is replaced by its lowercase
- counterpart.
-
- If any character in \a title is non-latin, or latin and not found in the
- aforementioned subset (e.g. 'ß', 'å', or 'ö'), a hash of \a title is
- appended to the final string.
-
- Returns a string that is normalized for the purpose of generating fragment
- identifiers for \a title in URLs.
- */
-QString Doc::canonicalTitle(const QString &title)
-{
- auto legal_ascii = [](const uint value) {
- const uint start_ascii_subset{ 32 };
- const uint end_ascii_subset{ 126 };
-
- return value >= start_ascii_subset && value <= end_ascii_subset;
- };
-
- // The code below is equivalent to the following chunk, but
- // has been measured to be approximately 4 times faster.
- //
- // QRegularExpression attributeExpr("[^A-Za-z0-9]+");
- // QString result = title.toLower();
- // result.replace(attributeExpr, " ");
- // result = result.simplified();
- // result.replace(QLatin1Char(' '), QLatin1Char('-'));
-
- QString result;
- result.reserve(title.size());
-
- bool dashAppended{false};
- bool begun{false};
- qsizetype lastAlnum{0};
- bool has_non_alnum_content{false};
-
- for (const auto &i : title) {
- uint c = i.unicode();
-
- if (!legal_ascii(c))
- has_non_alnum_content = true;
- if (c >= 'A' && c <= 'Z')
- c += 'a' - 'A';
- bool alnum = (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9');
- if (alnum) {
- result += QLatin1Char(c);
- begun = true;
- dashAppended = false;
- lastAlnum = result.size();
- } else if (!dashAppended) {
- if (begun)
- result += QLatin1Char('-');
- dashAppended = true;
- }
- }
- result.truncate(lastAlnum);
-
- if (has_non_alnum_content) {
- auto title_hash = QString::fromLocal8Bit(
- QCryptographicHash::hash(title.toUtf8(), QCryptographicHash::Md5).toHex());
- title_hash.truncate(8);
- if (!result.isEmpty())
- result.append(QLatin1Char('-'));
- result.append(title_hash);
- }
- return result;
-}
-
void Doc::detach()
{
if (m_priv == nullptr) {
diff --git a/src/qdoc/qdoc/doc.h b/src/qdoc/qdoc/doc.h
index a4917e3e0..948c6756b 100644
--- a/src/qdoc/qdoc/doc.h
+++ b/src/qdoc/qdoc/doc.h
@@ -76,8 +76,8 @@ public:
static void initialize(FileResolver& file_resolver);
static void terminate();
static void trimCStyleComment(Location &location, QString &str);
- static CodeMarker *quoteFromFile(const Location &location, Quoter &quoter, ResolvedFile resolved_file);
- static QString canonicalTitle(const QString &title);
+ static CodeMarker *quoteFromFile(const Location &location, Quoter &quoter,
+ ResolvedFile resolved_file);
private:
void detach();
diff --git a/src/qdoc/qdoc/docbookgenerator.cpp b/src/qdoc/qdoc/docbookgenerator.cpp
index 115a9a6e1..d6e72b3b4 100644
--- a/src/qdoc/qdoc/docbookgenerator.cpp
+++ b/src/qdoc/qdoc/docbookgenerator.cpp
@@ -1052,7 +1052,8 @@ qsizetype DocBookGenerator::generateAtom(const Atom *atom, const Node *relative)
}
// If there is an anchor just after with the same ID, skip it.
- if (matchAhead(atom, Atom::Target) && Doc::canonicalTitle(atom->next()->string()) == id) {
+ if (matchAhead(atom, Atom::Target)
+ && Utilities::asAsciiPrintable(atom->next()->string()) == id) {
++skipAhead;
}
} else {
@@ -1514,7 +1515,7 @@ qsizetype DocBookGenerator::generateAtom(const Atom *atom, const Node *relative)
sectionLevels.push(currentSectionLevel);
m_writer->writeStartElement(dbNamespace, "section");
- writeXmlId(Doc::canonicalTitle(Text::sectionHeading(atom).toString()));
+ writeXmlId(Utilities::asAsciiPrintable(Text::sectionHeading(atom).toString()));
newLine();
// Unlike startSectionBegin, don't start a title here.
}
@@ -1624,7 +1625,7 @@ qsizetype DocBookGenerator::generateAtom(const Atom *atom, const Node *relative)
const Atom *next = atom->next();
QString id{""};
if (matchAhead(atom, Atom::Target)) {
- id = Doc::canonicalTitle(next->string());
+ id = Utilities::asAsciiPrintable(next->string());
next = next->next();
++skipAhead;
}
@@ -1671,7 +1672,7 @@ qsizetype DocBookGenerator::generateAtom(const Atom *atom, const Node *relative)
QString id{""};
bool hasTarget {false};
if (matchAhead(atom, Atom::Target)) {
- id = Doc::canonicalTitle(atom->next()->string());
+ id = Utilities::asAsciiPrintable(atom->next()->string());
++skipAhead;
hasTarget = true;
}
@@ -1768,13 +1769,14 @@ qsizetype DocBookGenerator::generateAtom(const Atom *atom, const Node *relative)
case Atom::Target:
// Sometimes, there is a \target just before a section title with the same ID. Only outut one xml:id.
if (matchAhead(atom, Atom::SectionRight) && matchAhead(atom->next(), Atom::SectionLeft)) {
- QString nextId = Doc::canonicalTitle(Text::sectionHeading(atom->next()->next()).toString());
- QString ownId = Doc::canonicalTitle(atom->string());
+ QString nextId = Utilities::asAsciiPrintable(
+ Text::sectionHeading(atom->next()->next()).toString());
+ QString ownId = Utilities::asAsciiPrintable(atom->string());
if (nextId == ownId)
break;
}
- writeAnchor(Doc::canonicalTitle(atom->string()));
+ writeAnchor(Utilities::asAsciiPrintable(atom->string()));
break;
case Atom::UnhandledFormat:
m_writer->writeStartElement(dbNamespace, "emphasis");
diff --git a/src/qdoc/qdoc/generator.cpp b/src/qdoc/qdoc/generator.cpp
index ad58b7f44..865e7cf59 100644
--- a/src/qdoc/qdoc/generator.cpp
+++ b/src/qdoc/qdoc/generator.cpp
@@ -306,7 +306,7 @@ QString Generator::fileBase(const Node *node) const
}
}
- QString canonicalName{Utilities::canonicalizeFileName(base)};
+ QString canonicalName{ Utilities::asAsciiPrintable(base) };
Node *n = const_cast<Node *>(node);
n->setFileNameBase(canonicalName);
return canonicalName;
@@ -323,7 +323,7 @@ QString Generator::linkForExampleFile(const QString &path, const QString &fileEx
QString link{path};
link.prepend(s_project.toLower() + QLatin1Char('-'));
- QString canonicalName{Utilities::canonicalizeFileName(link)};
+ QString canonicalName{ Utilities::asAsciiPrintable(link) };
canonicalName.append(QLatin1Char('.'));
canonicalName.append(fileExt.isEmpty() ? fileExtension() : fileExt);
return canonicalName;
diff --git a/src/qdoc/qdoc/htmlgenerator.cpp b/src/qdoc/qdoc/htmlgenerator.cpp
index 545541aad..c7faf7276 100644
--- a/src/qdoc/qdoc/htmlgenerator.cpp
+++ b/src/qdoc/qdoc/htmlgenerator.cpp
@@ -528,7 +528,7 @@ qsizetype HtmlGenerator::generateAtom(const Atom *atom, const Node *relative, Co
for (const auto &section : sinceSections) {
if (!section.members().isEmpty()) {
out() << "<li>"
- << "<a href=\"#" << Doc::canonicalTitle(section.title()) << "\">"
+ << "<a href=\"#" << Utilities::asAsciiPrintable(section.title()) << "\">"
<< section.title() << "</a></li>\n";
}
}
@@ -537,7 +537,7 @@ qsizetype HtmlGenerator::generateAtom(const Atom *atom, const Node *relative, Co
int index = 0;
for (const auto &section : sinceSections) {
if (!section.members().isEmpty()) {
- out() << "<h3 id=\"" << Doc::canonicalTitle(section.title()) << "\">"
+ out() << "<h3 id=\"" << Utilities::asAsciiPrintable(section.title()) << "\">"
<< protectEnc(section.title()) << "</h3>\n";
if (index == Sections::SinceClasses)
generateCompactList(Generic, relative, ncmap, false, QStringLiteral("Q"));
@@ -867,7 +867,7 @@ qsizetype HtmlGenerator::generateAtom(const Atom *atom, const Node *relative, Co
case Atom::SectionHeadingLeft: {
int unit = atom->string().toInt() + hOffset(relative);
out() << "<h" + QString::number(unit) + QLatin1Char(' ') << "id=\""
- << Doc::canonicalTitle(Text::sectionHeading(atom).toString()) << "\">";
+ << Utilities::asAsciiPrintable(Text::sectionHeading(atom).toString()) << "\">";
m_inSectionHeading = true;
break;
}
@@ -970,7 +970,7 @@ qsizetype HtmlGenerator::generateAtom(const Atom *atom, const Node *relative, Co
case Atom::Keyword:
break;
case Atom::Target:
- out() << "<span id=\"" << Doc::canonicalTitle(atom->string()) << "\"></span>";
+ out() << "<span id=\"" << Utilities::asAsciiPrintable(atom->string()) << "\"></span>";
break;
case Atom::UnhandledFormat:
out() << "<b class=\"redFont\">&lt;Missing HTML&gt;</b>";
@@ -2123,7 +2123,7 @@ void HtmlGenerator::addStatusToMap(const Aggregate *aggregate, QMap<QString, Tex
if (aggregate->status() == Node::Deprecated)
spanClass = u"deprecated"_s; // Disregard any version info
else
- spanClass = Utilities::canonicalizeFileName(status.value());
+ spanClass = Utilities::asAsciiPrintable(status.value());
text.clear();
text << Atom(Atom::String, status.value())
@@ -2367,7 +2367,7 @@ void HtmlGenerator::generateTableOfContents(const Node *node, CodeMarker *marker
Text headingText = Text::sectionHeading(atom);
QString s = headingText.toString();
out() << "<li class=\"level" << sectionNumber << "\">";
- out() << "<a href=\"" << '#' << Doc::canonicalTitle(s) << "\">";
+ out() << "<a href=\"" << '#' << Utilities::asAsciiPrintable(s) << "\">";
generateAtomList(headingText.firstAtom(), node, marker, true, numAtoms);
out() << "</a></li>\n";
}
diff --git a/src/qdoc/qdoc/qdocindexfiles.cpp b/src/qdoc/qdoc/qdocindexfiles.cpp
index 94ca8a111..e9bdda715 100644
--- a/src/qdoc/qdoc/qdocindexfiles.cpp
+++ b/src/qdoc/qdoc/qdocindexfiles.cpp
@@ -1082,7 +1082,7 @@ bool QDocIndexFiles::generateIndexSection(QXmlStreamWriter &writer, Node *node,
const auto &targets = node->doc().targets();
for (const Atom *target : targets) {
const QString &title = target->string();
- QString name = Doc::canonicalTitle(title);
+ QString name = Utilities::asAsciiPrintable(title);
writer.writeStartElement("target");
if (!external)
writer.writeAttribute("name", name);
@@ -1097,7 +1097,7 @@ bool QDocIndexFiles::generateIndexSection(QXmlStreamWriter &writer, Node *node,
const auto &keywords = node->doc().keywords();
for (const Atom *keyword : keywords) {
const QString &title = keyword->string();
- QString name = Doc::canonicalTitle(title);
+ QString name = Utilities::asAsciiPrintable(title);
writer.writeStartElement("keyword");
writer.writeAttribute("name", name);
if (name != title)
@@ -1120,7 +1120,7 @@ bool QDocIndexFiles::generateIndexSection(QXmlStreamWriter &writer, Node *node,
int level = node->doc().tableOfContentsLevels()[i];
QString title = Text::sectionHeading(item).toString();
writer.writeStartElement("contents");
- writer.writeAttribute("name", Doc::canonicalTitle(title));
+ writer.writeAttribute("name", Utilities::asAsciiPrintable(title));
writer.writeAttribute("title", title);
writer.writeAttribute("level", QString::number(level));
writer.writeEndElement(); // contents
diff --git a/src/qdoc/qdoc/tree.cpp b/src/qdoc/qdoc/tree.cpp
index 02f422061..900b299c3 100644
--- a/src/qdoc/qdoc/tree.cpp
+++ b/src/qdoc/qdoc/tree.cpp
@@ -722,7 +722,7 @@ QString Tree::getRef(const QString &target, const Node *node) const
++it;
} while (it != m_nodesByTargetTitle.constEnd() && it.key() == target);
}
- QString key = Doc::canonicalTitle(target);
+ QString key = Utilities::asAsciiPrintable(target);
it = m_nodesByTargetRef.constFind(key);
if (it != m_nodesByTargetRef.constEnd()) {
do {
@@ -758,7 +758,7 @@ void Tree::resolveTargets(Aggregate *root)
QString key = node->title();
if (!key.isEmpty()) {
if (key.contains(QChar(' ')))
- key = Doc::canonicalTitle(key);
+ key = Utilities::asAsciiPrintable(key);
QList<PageNode *> nodes = m_pageNodesByTitle.values(key);
bool alreadyThere = false;
if (!nodes.empty()) {
@@ -782,7 +782,7 @@ void Tree::resolveTargets(Aggregate *root)
QString ref = refForAtom(i);
QString title = Text::sectionHeading(i).toString();
if (!ref.isEmpty() && !title.isEmpty()) {
- QString key = Doc::canonicalTitle(title);
+ QString key = Utilities::asAsciiPrintable(title);
auto *target = new TargetRec(ref, TargetRec::Contents, child, 3);
m_nodesByTargetRef.insert(key, target);
m_nodesByTargetTitle.insert(title, target);
@@ -796,7 +796,7 @@ void Tree::resolveTargets(Aggregate *root)
QString title = i->string();
if (!ref.isEmpty() && !title.isEmpty()) {
auto *target = new TargetRec(ref, TargetRec::Keyword, child, 1);
- m_nodesByTargetRef.insert(Doc::canonicalTitle(title), target);
+ m_nodesByTargetRef.insert(Utilities::asAsciiPrintable(title), target);
m_nodesByTargetTitle.insert(title, target);
}
}
@@ -807,7 +807,7 @@ void Tree::resolveTargets(Aggregate *root)
QString ref = refForAtom(i);
QString title = i->string();
if (!ref.isEmpty() && !title.isEmpty()) {
- QString key = Doc::canonicalTitle(title);
+ QString key = Utilities::asAsciiPrintable(title);
auto *target = new TargetRec(ref, TargetRec::Target, child, 2);
m_nodesByTargetRef.insert(key, target);
m_nodesByTargetTitle.insert(title, target);
@@ -841,7 +841,7 @@ const TargetRec *Tree::findUnambiguousTarget(const QString &target, Node::Genus
TargetRec *bestTarget = findBestCandidate(m_nodesByTargetTitle, target);
if (!bestTarget)
- bestTarget = findBestCandidate(m_nodesByTargetRef, Doc::canonicalTitle(target));
+ bestTarget = findBestCandidate(m_nodesByTargetRef, Utilities::asAsciiPrintable(target));
return bestTarget;
}
@@ -853,7 +853,7 @@ const PageNode *Tree::findPageNodeByTitle(const QString &title) const
{
PageNodeMultiMap::const_iterator it;
if (title.contains(QChar(' ')))
- it = m_pageNodesByTitle.constFind(Doc::canonicalTitle(title));
+ it = m_pageNodesByTitle.constFind(Utilities::asAsciiPrintable(title));
else
it = m_pageNodesByTitle.constFind(title);
if (it != m_pageNodesByTitle.constEnd()) {
@@ -890,9 +890,9 @@ QString Tree::refForAtom(const Atom *atom)
{
if (atom) {
if (atom->type() == Atom::SectionLeft)
- return Doc::canonicalTitle(Text::sectionHeading(atom).toString());
+ return Utilities::asAsciiPrintable(Text::sectionHeading(atom).toString());
if ((atom->type() == Atom::Target) || (atom->type() == Atom::Keyword))
- return Doc::canonicalTitle(atom->string());
+ return Utilities::asAsciiPrintable(atom->string());
}
return QString();
}
diff --git a/src/qdoc/qdoc/utilities.cpp b/src/qdoc/qdoc/utilities.cpp
index d4e1f8c07..2825804d6 100644
--- a/src/qdoc/qdoc/utilities.cpp
+++ b/src/qdoc/qdoc/utilities.cpp
@@ -81,22 +81,35 @@ QString comma(qsizetype wordPosition, qsizetype numberOfWords)
}
/*!
- \internal
- Replace non-alphanum characters in \a str with hyphens
- and convert all characters to lowercase. Returns the
- result of the conversion with leading, trailing, and
- consecutive hyphens removed.
+ \brief Returns an ascii-printable representation of \a str.
- The implementation is equivalent to:
+ Replace non-ascii-printable characters in \a str from a subset of such
+ characters. The subset includes alphanumeric (alnum) characters
+ ([a-zA-Z0-9]), space, punctuation characters, and common symbols. Non-alnum
+ characters in this subset are replaced by a single hyphen. Leading,
+ trailing, and consecutive hyphens are removed, such that the resulting
+ string does not start or end with a hyphen. All characters are converted to
+ lowercase.
- \code
+ If any character in \a str is non-latin, or latin and not found in the
+ aforementioned subset (e.g. 'ß', 'å', or 'ö'), a hash of \a str is appended
+ to the final string.
+
+ Returns a string that is normalized for use where ascii-printable strings
+ are required, such as file names or fragment identifiers in URLs.
+
+ The implementation is equivalent to:
+
+ \code
name.replace(QRegularExpression("[^A-Za-z0-9]+"), " ");
name = name.simplified();
name.replace(QLatin1Char(' '), QLatin1Char('-'));
name = name.toLower();
- \endcode
+ \endcode
+
+ However, it has been measured to be approximately four times faster.
*/
-QString canonicalizeFileName(const QString &name)
+QString asAsciiPrintable(const QString &str)
{
auto legal_ascii = [](const uint value) {
const uint start_ascii_subset{ 32 };
@@ -108,12 +121,11 @@ QString canonicalizeFileName(const QString &name)
QString result;
bool begun = false;
bool has_non_alnum_content{ false };
- const auto *data{name.constData()};
- for (qsizetype i = 0; i < name.size(); ++i) {
- char16_t u{data[i].unicode()};
+
+ for (const auto &c : str) {
+ char16_t u = c.unicode();
if (!legal_ascii(u))
has_non_alnum_content = true;
-
if (u >= 'A' && u <= 'Z')
u += 'a' - 'A';
if ((u >= 'a' && u <= 'z') || (u >= '0' && u <= '9')) {
@@ -129,7 +141,7 @@ QString canonicalizeFileName(const QString &name)
if (has_non_alnum_content) {
auto title_hash = QString::fromLocal8Bit(
- QCryptographicHash::hash(name.toUtf8(), QCryptographicHash::Md5).toHex());
+ QCryptographicHash::hash(str.toUtf8(), QCryptographicHash::Md5).toHex());
title_hash.truncate(8);
if (!result.isEmpty())
result.append(QLatin1Char('-'));
diff --git a/src/qdoc/qdoc/utilities.h b/src/qdoc/qdoc/utilities.h
index 998da48d3..0d485f650 100644
--- a/src/qdoc/qdoc/utilities.h
+++ b/src/qdoc/qdoc/utilities.h
@@ -19,7 +19,7 @@ bool debugging();
QString separator(qsizetype wordPosition, qsizetype numberOfWords);
QString comma(qsizetype wordPosition, qsizetype numberOfWords);
-QString canonicalizeFileName(const QString &name);
+QString asAsciiPrintable(const QString &name);
QStringList getInternalIncludePaths(const QString &compiler);
}
diff --git a/src/qdoc/qdoc/webxmlgenerator.cpp b/src/qdoc/qdoc/webxmlgenerator.cpp
index c4f7b0340..674ae000a 100644
--- a/src/qdoc/qdoc/webxmlgenerator.cpp
+++ b/src/qdoc/qdoc/webxmlgenerator.cpp
@@ -11,6 +11,7 @@
#include "propertynode.h"
#include "qdocdatabase.h"
#include "quoter.h"
+#include "utilities.h"
#include <QtCore/qxmlstream.h>
@@ -638,7 +639,8 @@ const Atom *WebXMLGenerator::addAtomElements(QXmlStreamWriter &writer, const Ato
case Atom::SectionLeft:
writer.writeStartElement("section");
- writer.writeAttribute("id", Doc::canonicalTitle(Text::sectionHeading(atom).toString()));
+ writer.writeAttribute("id",
+ Utilities::asAsciiPrintable(Text::sectionHeading(atom).toString()));
break;
case Atom::SectionRight:
@@ -743,7 +745,7 @@ const Atom *WebXMLGenerator::addAtomElements(QXmlStreamWriter &writer, const Ato
case Atom::Target:
writer.writeStartElement("target");
- writer.writeAttribute("name", Doc::canonicalTitle(atom->string()));
+ writer.writeAttribute("name", Utilities::asAsciiPrintable(atom->string()));
writer.writeEndElement();
break;