diff options
-rw-r--r-- | src/qdoc/qdoc/doc.cpp | 79 | ||||
-rw-r--r-- | src/qdoc/qdoc/doc.h | 4 | ||||
-rw-r--r-- | src/qdoc/qdoc/docbookgenerator.cpp | 16 | ||||
-rw-r--r-- | src/qdoc/qdoc/generator.cpp | 4 | ||||
-rw-r--r-- | src/qdoc/qdoc/htmlgenerator.cpp | 12 | ||||
-rw-r--r-- | src/qdoc/qdoc/qdocindexfiles.cpp | 6 | ||||
-rw-r--r-- | src/qdoc/qdoc/tree.cpp | 18 | ||||
-rw-r--r-- | src/qdoc/qdoc/utilities.cpp | 40 | ||||
-rw-r--r-- | src/qdoc/qdoc/utilities.h | 2 | ||||
-rw-r--r-- | src/qdoc/qdoc/webxmlgenerator.cpp | 6 |
10 files changed, 63 insertions, 124 deletions
diff --git a/src/qdoc/qdoc/doc.cpp b/src/qdoc/qdoc/doc.cpp index a4d196e36..1f4e517d3 100644 --- a/src/qdoc/qdoc/doc.cpp +++ b/src/qdoc/qdoc/doc.cpp @@ -12,6 +12,7 @@ #include "qmltypenode.h" #include "quoter.h" #include "text.h" +#include "utilities.h" #include <qcryptographichash.h> @@ -409,84 +410,6 @@ CodeMarker *Doc::quoteFromFile(const Location &location, Quoter "er, Resolve return marker; } -/*! - \brief Generates a url-friendly string representation from \a title. - - "Url-friendly" in this context is a string that contains only a subset of - printable ascii characters. - - The subset includes alphanumeric (alnum) characters ([a-zA-Z0-9]), printable - ascii characters, space, punctuation characters, and common symbols. - Non-alnum characters in this subset are replaced by a single dash. Leading - and trailing dashes are removed, such that the resulting string does not - start or end with a dash. Any capital character is replaced by its lowercase - counterpart. - - If any character in \a title is non-latin, or latin and not found in the - aforementioned subset (e.g. 'ß', 'å', or 'ö'), a hash of \a title is - appended to the final string. - - Returns a string that is normalized for the purpose of generating fragment - identifiers for \a title in URLs. - */ -QString Doc::canonicalTitle(const QString &title) -{ - auto legal_ascii = [](const uint value) { - const uint start_ascii_subset{ 32 }; - const uint end_ascii_subset{ 126 }; - - return value >= start_ascii_subset && value <= end_ascii_subset; - }; - - // The code below is equivalent to the following chunk, but - // has been measured to be approximately 4 times faster. - // - // QRegularExpression attributeExpr("[^A-Za-z0-9]+"); - // QString result = title.toLower(); - // result.replace(attributeExpr, " "); - // result = result.simplified(); - // result.replace(QLatin1Char(' '), QLatin1Char('-')); - - QString result; - result.reserve(title.size()); - - bool dashAppended{false}; - bool begun{false}; - qsizetype lastAlnum{0}; - bool has_non_alnum_content{false}; - - for (const auto &i : title) { - uint c = i.unicode(); - - if (!legal_ascii(c)) - has_non_alnum_content = true; - if (c >= 'A' && c <= 'Z') - c += 'a' - 'A'; - bool alnum = (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9'); - if (alnum) { - result += QLatin1Char(c); - begun = true; - dashAppended = false; - lastAlnum = result.size(); - } else if (!dashAppended) { - if (begun) - result += QLatin1Char('-'); - dashAppended = true; - } - } - result.truncate(lastAlnum); - - if (has_non_alnum_content) { - auto title_hash = QString::fromLocal8Bit( - QCryptographicHash::hash(title.toUtf8(), QCryptographicHash::Md5).toHex()); - title_hash.truncate(8); - if (!result.isEmpty()) - result.append(QLatin1Char('-')); - result.append(title_hash); - } - return result; -} - void Doc::detach() { if (m_priv == nullptr) { diff --git a/src/qdoc/qdoc/doc.h b/src/qdoc/qdoc/doc.h index a4917e3e0..948c6756b 100644 --- a/src/qdoc/qdoc/doc.h +++ b/src/qdoc/qdoc/doc.h @@ -76,8 +76,8 @@ public: static void initialize(FileResolver& file_resolver); static void terminate(); static void trimCStyleComment(Location &location, QString &str); - static CodeMarker *quoteFromFile(const Location &location, Quoter "er, ResolvedFile resolved_file); - static QString canonicalTitle(const QString &title); + static CodeMarker *quoteFromFile(const Location &location, Quoter "er, + ResolvedFile resolved_file); private: void detach(); diff --git a/src/qdoc/qdoc/docbookgenerator.cpp b/src/qdoc/qdoc/docbookgenerator.cpp index 115a9a6e1..d6e72b3b4 100644 --- a/src/qdoc/qdoc/docbookgenerator.cpp +++ b/src/qdoc/qdoc/docbookgenerator.cpp @@ -1052,7 +1052,8 @@ qsizetype DocBookGenerator::generateAtom(const Atom *atom, const Node *relative) } // If there is an anchor just after with the same ID, skip it. - if (matchAhead(atom, Atom::Target) && Doc::canonicalTitle(atom->next()->string()) == id) { + if (matchAhead(atom, Atom::Target) + && Utilities::asAsciiPrintable(atom->next()->string()) == id) { ++skipAhead; } } else { @@ -1514,7 +1515,7 @@ qsizetype DocBookGenerator::generateAtom(const Atom *atom, const Node *relative) sectionLevels.push(currentSectionLevel); m_writer->writeStartElement(dbNamespace, "section"); - writeXmlId(Doc::canonicalTitle(Text::sectionHeading(atom).toString())); + writeXmlId(Utilities::asAsciiPrintable(Text::sectionHeading(atom).toString())); newLine(); // Unlike startSectionBegin, don't start a title here. } @@ -1624,7 +1625,7 @@ qsizetype DocBookGenerator::generateAtom(const Atom *atom, const Node *relative) const Atom *next = atom->next(); QString id{""}; if (matchAhead(atom, Atom::Target)) { - id = Doc::canonicalTitle(next->string()); + id = Utilities::asAsciiPrintable(next->string()); next = next->next(); ++skipAhead; } @@ -1671,7 +1672,7 @@ qsizetype DocBookGenerator::generateAtom(const Atom *atom, const Node *relative) QString id{""}; bool hasTarget {false}; if (matchAhead(atom, Atom::Target)) { - id = Doc::canonicalTitle(atom->next()->string()); + id = Utilities::asAsciiPrintable(atom->next()->string()); ++skipAhead; hasTarget = true; } @@ -1768,13 +1769,14 @@ qsizetype DocBookGenerator::generateAtom(const Atom *atom, const Node *relative) case Atom::Target: // Sometimes, there is a \target just before a section title with the same ID. Only outut one xml:id. if (matchAhead(atom, Atom::SectionRight) && matchAhead(atom->next(), Atom::SectionLeft)) { - QString nextId = Doc::canonicalTitle(Text::sectionHeading(atom->next()->next()).toString()); - QString ownId = Doc::canonicalTitle(atom->string()); + QString nextId = Utilities::asAsciiPrintable( + Text::sectionHeading(atom->next()->next()).toString()); + QString ownId = Utilities::asAsciiPrintable(atom->string()); if (nextId == ownId) break; } - writeAnchor(Doc::canonicalTitle(atom->string())); + writeAnchor(Utilities::asAsciiPrintable(atom->string())); break; case Atom::UnhandledFormat: m_writer->writeStartElement(dbNamespace, "emphasis"); diff --git a/src/qdoc/qdoc/generator.cpp b/src/qdoc/qdoc/generator.cpp index ad58b7f44..865e7cf59 100644 --- a/src/qdoc/qdoc/generator.cpp +++ b/src/qdoc/qdoc/generator.cpp @@ -306,7 +306,7 @@ QString Generator::fileBase(const Node *node) const } } - QString canonicalName{Utilities::canonicalizeFileName(base)}; + QString canonicalName{ Utilities::asAsciiPrintable(base) }; Node *n = const_cast<Node *>(node); n->setFileNameBase(canonicalName); return canonicalName; @@ -323,7 +323,7 @@ QString Generator::linkForExampleFile(const QString &path, const QString &fileEx QString link{path}; link.prepend(s_project.toLower() + QLatin1Char('-')); - QString canonicalName{Utilities::canonicalizeFileName(link)}; + QString canonicalName{ Utilities::asAsciiPrintable(link) }; canonicalName.append(QLatin1Char('.')); canonicalName.append(fileExt.isEmpty() ? fileExtension() : fileExt); return canonicalName; diff --git a/src/qdoc/qdoc/htmlgenerator.cpp b/src/qdoc/qdoc/htmlgenerator.cpp index 545541aad..c7faf7276 100644 --- a/src/qdoc/qdoc/htmlgenerator.cpp +++ b/src/qdoc/qdoc/htmlgenerator.cpp @@ -528,7 +528,7 @@ qsizetype HtmlGenerator::generateAtom(const Atom *atom, const Node *relative, Co for (const auto §ion : sinceSections) { if (!section.members().isEmpty()) { out() << "<li>" - << "<a href=\"#" << Doc::canonicalTitle(section.title()) << "\">" + << "<a href=\"#" << Utilities::asAsciiPrintable(section.title()) << "\">" << section.title() << "</a></li>\n"; } } @@ -537,7 +537,7 @@ qsizetype HtmlGenerator::generateAtom(const Atom *atom, const Node *relative, Co int index = 0; for (const auto §ion : sinceSections) { if (!section.members().isEmpty()) { - out() << "<h3 id=\"" << Doc::canonicalTitle(section.title()) << "\">" + out() << "<h3 id=\"" << Utilities::asAsciiPrintable(section.title()) << "\">" << protectEnc(section.title()) << "</h3>\n"; if (index == Sections::SinceClasses) generateCompactList(Generic, relative, ncmap, false, QStringLiteral("Q")); @@ -867,7 +867,7 @@ qsizetype HtmlGenerator::generateAtom(const Atom *atom, const Node *relative, Co case Atom::SectionHeadingLeft: { int unit = atom->string().toInt() + hOffset(relative); out() << "<h" + QString::number(unit) + QLatin1Char(' ') << "id=\"" - << Doc::canonicalTitle(Text::sectionHeading(atom).toString()) << "\">"; + << Utilities::asAsciiPrintable(Text::sectionHeading(atom).toString()) << "\">"; m_inSectionHeading = true; break; } @@ -970,7 +970,7 @@ qsizetype HtmlGenerator::generateAtom(const Atom *atom, const Node *relative, Co case Atom::Keyword: break; case Atom::Target: - out() << "<span id=\"" << Doc::canonicalTitle(atom->string()) << "\"></span>"; + out() << "<span id=\"" << Utilities::asAsciiPrintable(atom->string()) << "\"></span>"; break; case Atom::UnhandledFormat: out() << "<b class=\"redFont\"><Missing HTML></b>"; @@ -2123,7 +2123,7 @@ void HtmlGenerator::addStatusToMap(const Aggregate *aggregate, QMap<QString, Tex if (aggregate->status() == Node::Deprecated) spanClass = u"deprecated"_s; // Disregard any version info else - spanClass = Utilities::canonicalizeFileName(status.value()); + spanClass = Utilities::asAsciiPrintable(status.value()); text.clear(); text << Atom(Atom::String, status.value()) @@ -2367,7 +2367,7 @@ void HtmlGenerator::generateTableOfContents(const Node *node, CodeMarker *marker Text headingText = Text::sectionHeading(atom); QString s = headingText.toString(); out() << "<li class=\"level" << sectionNumber << "\">"; - out() << "<a href=\"" << '#' << Doc::canonicalTitle(s) << "\">"; + out() << "<a href=\"" << '#' << Utilities::asAsciiPrintable(s) << "\">"; generateAtomList(headingText.firstAtom(), node, marker, true, numAtoms); out() << "</a></li>\n"; } diff --git a/src/qdoc/qdoc/qdocindexfiles.cpp b/src/qdoc/qdoc/qdocindexfiles.cpp index 94ca8a111..e9bdda715 100644 --- a/src/qdoc/qdoc/qdocindexfiles.cpp +++ b/src/qdoc/qdoc/qdocindexfiles.cpp @@ -1082,7 +1082,7 @@ bool QDocIndexFiles::generateIndexSection(QXmlStreamWriter &writer, Node *node, const auto &targets = node->doc().targets(); for (const Atom *target : targets) { const QString &title = target->string(); - QString name = Doc::canonicalTitle(title); + QString name = Utilities::asAsciiPrintable(title); writer.writeStartElement("target"); if (!external) writer.writeAttribute("name", name); @@ -1097,7 +1097,7 @@ bool QDocIndexFiles::generateIndexSection(QXmlStreamWriter &writer, Node *node, const auto &keywords = node->doc().keywords(); for (const Atom *keyword : keywords) { const QString &title = keyword->string(); - QString name = Doc::canonicalTitle(title); + QString name = Utilities::asAsciiPrintable(title); writer.writeStartElement("keyword"); writer.writeAttribute("name", name); if (name != title) @@ -1120,7 +1120,7 @@ bool QDocIndexFiles::generateIndexSection(QXmlStreamWriter &writer, Node *node, int level = node->doc().tableOfContentsLevels()[i]; QString title = Text::sectionHeading(item).toString(); writer.writeStartElement("contents"); - writer.writeAttribute("name", Doc::canonicalTitle(title)); + writer.writeAttribute("name", Utilities::asAsciiPrintable(title)); writer.writeAttribute("title", title); writer.writeAttribute("level", QString::number(level)); writer.writeEndElement(); // contents diff --git a/src/qdoc/qdoc/tree.cpp b/src/qdoc/qdoc/tree.cpp index 02f422061..900b299c3 100644 --- a/src/qdoc/qdoc/tree.cpp +++ b/src/qdoc/qdoc/tree.cpp @@ -722,7 +722,7 @@ QString Tree::getRef(const QString &target, const Node *node) const ++it; } while (it != m_nodesByTargetTitle.constEnd() && it.key() == target); } - QString key = Doc::canonicalTitle(target); + QString key = Utilities::asAsciiPrintable(target); it = m_nodesByTargetRef.constFind(key); if (it != m_nodesByTargetRef.constEnd()) { do { @@ -758,7 +758,7 @@ void Tree::resolveTargets(Aggregate *root) QString key = node->title(); if (!key.isEmpty()) { if (key.contains(QChar(' '))) - key = Doc::canonicalTitle(key); + key = Utilities::asAsciiPrintable(key); QList<PageNode *> nodes = m_pageNodesByTitle.values(key); bool alreadyThere = false; if (!nodes.empty()) { @@ -782,7 +782,7 @@ void Tree::resolveTargets(Aggregate *root) QString ref = refForAtom(i); QString title = Text::sectionHeading(i).toString(); if (!ref.isEmpty() && !title.isEmpty()) { - QString key = Doc::canonicalTitle(title); + QString key = Utilities::asAsciiPrintable(title); auto *target = new TargetRec(ref, TargetRec::Contents, child, 3); m_nodesByTargetRef.insert(key, target); m_nodesByTargetTitle.insert(title, target); @@ -796,7 +796,7 @@ void Tree::resolveTargets(Aggregate *root) QString title = i->string(); if (!ref.isEmpty() && !title.isEmpty()) { auto *target = new TargetRec(ref, TargetRec::Keyword, child, 1); - m_nodesByTargetRef.insert(Doc::canonicalTitle(title), target); + m_nodesByTargetRef.insert(Utilities::asAsciiPrintable(title), target); m_nodesByTargetTitle.insert(title, target); } } @@ -807,7 +807,7 @@ void Tree::resolveTargets(Aggregate *root) QString ref = refForAtom(i); QString title = i->string(); if (!ref.isEmpty() && !title.isEmpty()) { - QString key = Doc::canonicalTitle(title); + QString key = Utilities::asAsciiPrintable(title); auto *target = new TargetRec(ref, TargetRec::Target, child, 2); m_nodesByTargetRef.insert(key, target); m_nodesByTargetTitle.insert(title, target); @@ -841,7 +841,7 @@ const TargetRec *Tree::findUnambiguousTarget(const QString &target, Node::Genus TargetRec *bestTarget = findBestCandidate(m_nodesByTargetTitle, target); if (!bestTarget) - bestTarget = findBestCandidate(m_nodesByTargetRef, Doc::canonicalTitle(target)); + bestTarget = findBestCandidate(m_nodesByTargetRef, Utilities::asAsciiPrintable(target)); return bestTarget; } @@ -853,7 +853,7 @@ const PageNode *Tree::findPageNodeByTitle(const QString &title) const { PageNodeMultiMap::const_iterator it; if (title.contains(QChar(' '))) - it = m_pageNodesByTitle.constFind(Doc::canonicalTitle(title)); + it = m_pageNodesByTitle.constFind(Utilities::asAsciiPrintable(title)); else it = m_pageNodesByTitle.constFind(title); if (it != m_pageNodesByTitle.constEnd()) { @@ -890,9 +890,9 @@ QString Tree::refForAtom(const Atom *atom) { if (atom) { if (atom->type() == Atom::SectionLeft) - return Doc::canonicalTitle(Text::sectionHeading(atom).toString()); + return Utilities::asAsciiPrintable(Text::sectionHeading(atom).toString()); if ((atom->type() == Atom::Target) || (atom->type() == Atom::Keyword)) - return Doc::canonicalTitle(atom->string()); + return Utilities::asAsciiPrintable(atom->string()); } return QString(); } diff --git a/src/qdoc/qdoc/utilities.cpp b/src/qdoc/qdoc/utilities.cpp index d4e1f8c07..2825804d6 100644 --- a/src/qdoc/qdoc/utilities.cpp +++ b/src/qdoc/qdoc/utilities.cpp @@ -81,22 +81,35 @@ QString comma(qsizetype wordPosition, qsizetype numberOfWords) } /*! - \internal - Replace non-alphanum characters in \a str with hyphens - and convert all characters to lowercase. Returns the - result of the conversion with leading, trailing, and - consecutive hyphens removed. + \brief Returns an ascii-printable representation of \a str. - The implementation is equivalent to: + Replace non-ascii-printable characters in \a str from a subset of such + characters. The subset includes alphanumeric (alnum) characters + ([a-zA-Z0-9]), space, punctuation characters, and common symbols. Non-alnum + characters in this subset are replaced by a single hyphen. Leading, + trailing, and consecutive hyphens are removed, such that the resulting + string does not start or end with a hyphen. All characters are converted to + lowercase. - \code + If any character in \a str is non-latin, or latin and not found in the + aforementioned subset (e.g. 'ß', 'å', or 'ö'), a hash of \a str is appended + to the final string. + + Returns a string that is normalized for use where ascii-printable strings + are required, such as file names or fragment identifiers in URLs. + + The implementation is equivalent to: + + \code name.replace(QRegularExpression("[^A-Za-z0-9]+"), " "); name = name.simplified(); name.replace(QLatin1Char(' '), QLatin1Char('-')); name = name.toLower(); - \endcode + \endcode + + However, it has been measured to be approximately four times faster. */ -QString canonicalizeFileName(const QString &name) +QString asAsciiPrintable(const QString &str) { auto legal_ascii = [](const uint value) { const uint start_ascii_subset{ 32 }; @@ -108,12 +121,11 @@ QString canonicalizeFileName(const QString &name) QString result; bool begun = false; bool has_non_alnum_content{ false }; - const auto *data{name.constData()}; - for (qsizetype i = 0; i < name.size(); ++i) { - char16_t u{data[i].unicode()}; + + for (const auto &c : str) { + char16_t u = c.unicode(); if (!legal_ascii(u)) has_non_alnum_content = true; - if (u >= 'A' && u <= 'Z') u += 'a' - 'A'; if ((u >= 'a' && u <= 'z') || (u >= '0' && u <= '9')) { @@ -129,7 +141,7 @@ QString canonicalizeFileName(const QString &name) if (has_non_alnum_content) { auto title_hash = QString::fromLocal8Bit( - QCryptographicHash::hash(name.toUtf8(), QCryptographicHash::Md5).toHex()); + QCryptographicHash::hash(str.toUtf8(), QCryptographicHash::Md5).toHex()); title_hash.truncate(8); if (!result.isEmpty()) result.append(QLatin1Char('-')); diff --git a/src/qdoc/qdoc/utilities.h b/src/qdoc/qdoc/utilities.h index 998da48d3..0d485f650 100644 --- a/src/qdoc/qdoc/utilities.h +++ b/src/qdoc/qdoc/utilities.h @@ -19,7 +19,7 @@ bool debugging(); QString separator(qsizetype wordPosition, qsizetype numberOfWords); QString comma(qsizetype wordPosition, qsizetype numberOfWords); -QString canonicalizeFileName(const QString &name); +QString asAsciiPrintable(const QString &name); QStringList getInternalIncludePaths(const QString &compiler); } diff --git a/src/qdoc/qdoc/webxmlgenerator.cpp b/src/qdoc/qdoc/webxmlgenerator.cpp index c4f7b0340..674ae000a 100644 --- a/src/qdoc/qdoc/webxmlgenerator.cpp +++ b/src/qdoc/qdoc/webxmlgenerator.cpp @@ -11,6 +11,7 @@ #include "propertynode.h" #include "qdocdatabase.h" #include "quoter.h" +#include "utilities.h" #include <QtCore/qxmlstream.h> @@ -638,7 +639,8 @@ const Atom *WebXMLGenerator::addAtomElements(QXmlStreamWriter &writer, const Ato case Atom::SectionLeft: writer.writeStartElement("section"); - writer.writeAttribute("id", Doc::canonicalTitle(Text::sectionHeading(atom).toString())); + writer.writeAttribute("id", + Utilities::asAsciiPrintable(Text::sectionHeading(atom).toString())); break; case Atom::SectionRight: @@ -743,7 +745,7 @@ const Atom *WebXMLGenerator::addAtomElements(QXmlStreamWriter &writer, const Ato case Atom::Target: writer.writeStartElement("target"); - writer.writeAttribute("name", Doc::canonicalTitle(atom->string())); + writer.writeAttribute("name", Utilities::asAsciiPrintable(atom->string())); writer.writeEndElement(); break; |