summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRadosław Koppel <r.koppel@k-el.com>2020-06-01 00:47:21 +0200
committerGitHub <noreply@github.com>2020-05-31 18:47:21 -0400
commite5dcee226cc2163bafc1cb06c01bd86826b490e4 (patch)
tree5f730a08cbb061542e26e9186456ff7b86e5a960
parent8eda9b05f6c6409191dbbdcb11d660755036eeab (diff)
downloadasciidoc-py3-e5dcee226cc2163bafc1cb06c01bd86826b490e4.tar.gz
Fixed character decomposition when ascii-ids is used (#110)
When ascii-ids variable is set asciidoc is expected to normalize all non-ascii characters when creating id from section name. Originally it decomposes unicode characters to base character and accent and then removes all the accents codes. This solution does not work in many cases: (like polish ł->l, german ß -> ss, or russian... well basically any russian character). This commit allows to use trans package when available to normalize chapter ids. Signed-off-by: Radosław Koppel <r.koppel@k-el.com> Co-authored-by: Matthew Peveler <matt.peveler@gmail.com>
-rw-r--r--CHANGELOG.txt1
-rwxr-xr-xasciidoc.py6
-rw-r--r--doc/asciidoc.txt13
-rw-r--r--tests/data/ascii-ids1-docbook.xml19
-rw-r--r--tests/data/ascii-ids1-docbook5.xml18
-rw-r--r--tests/data/ascii-ids1-html4.html22
-rw-r--r--tests/data/ascii-ids1-html5.html761
-rw-r--r--tests/data/ascii-ids1-xhtml11.html763
-rw-r--r--tests/data/ascii-ids1.txt9
-rw-r--r--tests/testasciidoc.conf6
10 files changed, 1613 insertions, 5 deletions
diff --git a/CHANGELOG.txt b/CHANGELOG.txt
index 5e3df83..a980659 100644
--- a/CHANGELOG.txt
+++ b/CHANGELOG.txt
@@ -22,6 +22,7 @@ Version 9.0.0 (Unreleased)
- Add simplified Chinese translation (thanks @muirmok)
- vim-asciidoc: speed up the refresh process for big files (thanks @aerostitch)
- Allow specifying floatstyle attribute for figures, tables, equations, examples in docbook (thanks @psaris)
+- Use https://pypi.org/project/trans/[trans python module] (if available) to better handle character decomposition to ascii for ascii-ids (thanks @rkel)
.Bug fixes
- Fix index terms requiring two characters instead of just one (see https://github.com/asciidoc/asciidoc-py3/pull/2#issuecomment-392605876)
diff --git a/asciidoc.py b/asciidoc.py
index 85e62e7..f9e27f9 100755
--- a/asciidoc.py
+++ b/asciidoc.py
@@ -2265,7 +2265,11 @@ class Section:
base_id = re.sub(r'\W+', '_', title).strip('_').lower()
if 'ascii-ids' in document.attributes:
# Replace non-ASCII characters with ASCII equivalents.
- base_id = unicodedata.normalize('NFKD', base_id).encode('ascii', 'ignore').decode('ascii')
+ try:
+ from trans import trans
+ base_id = trans(base_id)
+ except ImportError:
+ base_id = unicodedata.normalize('NFKD', base_id).encode('ascii', 'ignore').decode('ascii')
# Prefix the ID name with idprefix attribute or underscore if not
# defined. Prefix ensures the ID does not clash with existing IDs.
idprefix = document.attributes.get('idprefix', '_')
diff --git a/doc/asciidoc.txt b/doc/asciidoc.txt
index 65e18aa..b0b2714 100644
--- a/doc/asciidoc.txt
+++ b/doc/asciidoc.txt
@@ -810,10 +810,15 @@ The IDs are generated by the following algorithm:
- A numbered suffix (`_2`, `_3` ...) is added if a same named
auto-generated section ID exists.
- If the `ascii-ids` attribute is defined then non-ASCII characters
- are replaced with ASCII equivalents. This attribute may be
- deprecated in future releases and *should be avoided*, it's sole
- purpose is to accommodate deficient downstream applications that
- cannot process non-ASCII ID attributes.
+ are replaced with ASCII equivalents. This attribute should be
+ *should be avoided* if possible as its sole purpose is to accommodate
+ deficient downstream applications that cannot process non-ASCII ID
+ attributes. If available, it will use the
+ https://pypi.org/project/trans/[trans python module], otherwise it
+ will fallback to using NFKD algorithm, which cannot handle all
+ unicode characters. For example, 'Wstęp żółtej łąki' will be
+ translated to 'Wstep zoltej laki' under trans and 'Wstep zotej aki'
+ under NFKD.
Example: the title 'Jim's House' would generate the ID `_jim_s_house`.
diff --git a/tests/data/ascii-ids1-docbook.xml b/tests/data/ascii-ids1-docbook.xml
new file mode 100644
index 0000000..0068002
--- /dev/null
+++ b/tests/data/ascii-ids1-docbook.xml
@@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<?asciidoc-toc?>
+<?asciidoc-numbered?>
+
+<article lang="en">
+<articleinfo>
+ <date>2002-11-25</date>
+</articleinfo>
+<section id="_wstep_zoltej_laki">
+<title>Wstęp żółtej łąki</title>
+</section>
+<section id="_schon_grussen">
+<title>schön Grüßen</title>
+</section>
+<section id="_belye_motyl">
+<title>Белые Мотыль</title>
+</section>
+</article>
diff --git a/tests/data/ascii-ids1-docbook5.xml b/tests/data/ascii-ids1-docbook5.xml
new file mode 100644
index 0000000..969faaa
--- /dev/null
+++ b/tests/data/ascii-ids1-docbook5.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?asciidoc-toc?>
+<?asciidoc-numbered?>
+
+<article xmlns="http://docbook.org/ns/docbook" xmlns:xl="http://www.w3.org/1999/xlink" version="5.0" xml:lang="en">
+<info>
+ <date>2002-11-25</date>
+</info>
+<section xml:id="_wstep_zoltej_laki">
+<title>Wstęp żółtej łąki</title>
+</section>
+<section xml:id="_schon_grussen">
+<title>schön Grüßen</title>
+</section>
+<section xml:id="_belye_motyl">
+<title>Белые Мотыль</title>
+</section>
+</article>
diff --git a/tests/data/ascii-ids1-html4.html b/tests/data/ascii-ids1-html4.html
new file mode 100644
index 0000000..d6a0103
--- /dev/null
+++ b/tests/data/ascii-ids1-html4.html
@@ -0,0 +1,22 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta name="generator" content="AsciiDoc test">
+<title></title>
+</head>
+<body>
+<hr>
+<h2><a name="_wstep_zoltej_laki"></a>Wstęp żółtej łąki</h2>
+<hr>
+<h2><a name="_schon_grussen"></a>schön Grüßen</h2>
+<hr>
+<h2><a name="_belye_motyl"></a>Белые Мотыль</h2>
+<p></p>
+<p></p>
+<hr><p><small>
+Last updated
+ 2002-11-25 00:37:42 UTC
+</small></p>
+</body>
+</html>
diff --git a/tests/data/ascii-ids1-html5.html b/tests/data/ascii-ids1-html5.html
new file mode 100644
index 0000000..10afcc5
--- /dev/null
+++ b/tests/data/ascii-ids1-html5.html
@@ -0,0 +1,761 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta name="generator" content="AsciiDoc test">
+<title></title>
+<style type="text/css">
+/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
+
+/* Default font. */
+body {
+ font-family: Georgia,serif;
+}
+
+/* Title font. */
+h1, h2, h3, h4, h5, h6,
+div.title, caption.title,
+thead, p.table.header,
+#toctitle,
+#author, #revnumber, #revdate, #revremark,
+#footer {
+ font-family: Arial,Helvetica,sans-serif;
+}
+
+body {
+ margin: 1em 5% 1em 5%;
+}
+
+a {
+ color: blue;
+ text-decoration: underline;
+}
+a:visited {
+ color: fuchsia;
+}
+
+em {
+ font-style: italic;
+ color: navy;
+}
+
+strong {
+ font-weight: bold;
+ color: #083194;
+}
+
+h1, h2, h3, h4, h5, h6 {
+ color: #527bbd;
+ margin-top: 1.2em;
+ margin-bottom: 0.5em;
+ line-height: 1.3;
+}
+
+h1, h2, h3 {
+ border-bottom: 2px solid silver;
+}
+h2 {
+ padding-top: 0.5em;
+}
+h3 {
+ float: left;
+}
+h3 + * {
+ clear: left;
+}
+h5 {
+ font-size: 1.0em;
+}
+
+div.sectionbody {
+ margin-left: 0;
+}
+
+hr {
+ border: 1px solid silver;
+}
+
+p {
+ margin-top: 0.5em;
+ margin-bottom: 0.5em;
+}
+
+ul, ol, li > p {
+ margin-top: 0;
+}
+ul > li { color: #aaa; }
+ul > li > * { color: black; }
+
+.monospaced, code, pre {
+ font-family: "Courier New", Courier, monospace;
+ font-size: inherit;
+ color: navy;
+ padding: 0;
+ margin: 0;
+}
+pre {
+ white-space: pre-wrap;
+}
+
+#author {
+ color: #527bbd;
+ font-weight: bold;
+ font-size: 1.1em;
+}
+#email {
+}
+#revnumber, #revdate, #revremark {
+}
+
+#footer {
+ font-size: small;
+ border-top: 2px solid silver;
+ padding-top: 0.5em;
+ margin-top: 4.0em;
+}
+#footer-text {
+ float: left;
+ padding-bottom: 0.5em;
+}
+#footer-badges {
+ float: right;
+ padding-bottom: 0.5em;
+}
+
+#preamble {
+ margin-top: 1.5em;
+ margin-bottom: 1.5em;
+}
+div.imageblock, div.exampleblock, div.verseblock,
+div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
+div.admonitionblock {
+ margin-top: 1.0em;
+ margin-bottom: 1.5em;
+}
+div.admonitionblock {
+ margin-top: 2.0em;
+ margin-bottom: 2.0em;
+ margin-right: 10%;
+ color: #606060;
+}
+
+div.content { /* Block element content. */
+ padding: 0;
+}
+
+/* Block element titles. */
+div.title, caption.title {
+ color: #527bbd;
+ font-weight: bold;
+ text-align: left;
+ margin-top: 1.0em;
+ margin-bottom: 0.5em;
+}
+div.title + * {
+ margin-top: 0;
+}
+
+td div.title:first-child {
+ margin-top: 0.0em;
+}
+div.content div.title:first-child {
+ margin-top: 0.0em;
+}
+div.content + div.title {
+ margin-top: 0.0em;
+}
+
+div.sidebarblock > div.content {
+ background: #ffffee;
+ border: 1px solid #dddddd;
+ border-left: 4px solid #f0f0f0;
+ padding: 0.5em;
+}
+
+div.listingblock > div.content {
+ border: 1px solid #dddddd;
+ border-left: 5px solid #f0f0f0;
+ background: #f8f8f8;
+ padding: 0.5em;
+}
+
+div.quoteblock, div.verseblock {
+ padding-left: 1.0em;
+ margin-left: 1.0em;
+ margin-right: 10%;
+ border-left: 5px solid #f0f0f0;
+ color: #888;
+}
+
+div.quoteblock > div.attribution {
+ padding-top: 0.5em;
+ text-align: right;
+}
+
+div.verseblock > pre.content {
+ font-family: inherit;
+ font-size: inherit;
+}
+div.verseblock > div.attribution {
+ padding-top: 0.75em;
+ text-align: left;
+}
+/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
+div.verseblock + div.attribution {
+ text-align: left;
+}
+
+div.admonitionblock .icon {
+ vertical-align: top;
+ font-size: 1.1em;
+ font-weight: bold;
+ text-decoration: underline;
+ color: #527bbd;
+ padding-right: 0.5em;
+}
+div.admonitionblock td.content {
+ padding-left: 0.5em;
+ border-left: 3px solid #dddddd;
+}
+
+div.exampleblock > div.content {
+ border-left: 3px solid #dddddd;
+ padding-left: 0.5em;
+}
+
+div.imageblock div.content { padding-left: 0; }
+span.image img { border-style: none; vertical-align: text-bottom; }
+a.image:visited { color: white; }
+
+dl {
+ margin-top: 0.8em;
+ margin-bottom: 0.8em;
+}
+dt {
+ margin-top: 0.5em;
+ margin-bottom: 0;
+ font-style: normal;
+ color: navy;
+}
+dd > *:first-child {
+ margin-top: 0.1em;
+}
+
+ul, ol {
+ list-style-position: outside;
+}
+ol.arabic {
+ list-style-type: decimal;
+}
+ol.loweralpha {
+ list-style-type: lower-alpha;
+}
+ol.upperalpha {
+ list-style-type: upper-alpha;
+}
+ol.lowerroman {
+ list-style-type: lower-roman;
+}
+ol.upperroman {
+ list-style-type: upper-roman;
+}
+
+div.compact ul, div.compact ol,
+div.compact p, div.compact p,
+div.compact div, div.compact div {
+ margin-top: 0.1em;
+ margin-bottom: 0.1em;
+}
+
+tfoot {
+ font-weight: bold;
+}
+td > div.verse {
+ white-space: pre;
+}
+
+div.hdlist {
+ margin-top: 0.8em;
+ margin-bottom: 0.8em;
+}
+div.hdlist tr {
+ padding-bottom: 15px;
+}
+dt.hdlist1.strong, td.hdlist1.strong {
+ font-weight: bold;
+}
+td.hdlist1 {
+ vertical-align: top;
+ font-style: normal;
+ padding-right: 0.8em;
+ color: navy;
+}
+td.hdlist2 {
+ vertical-align: top;
+}
+div.hdlist.compact tr {
+ margin: 0;
+ padding-bottom: 0;
+}
+
+.comment {
+ background: yellow;
+}
+
+.footnote, .footnoteref {
+ font-size: 0.8em;
+}
+
+span.footnote, span.footnoteref {
+ vertical-align: super;
+}
+
+#footnotes {
+ margin: 20px 0 20px 0;
+ padding: 7px 0 0 0;
+}
+
+#footnotes div.footnote {
+ margin: 0 0 5px 0;
+}
+
+#footnotes hr {
+ border: none;
+ border-top: 1px solid silver;
+ height: 1px;
+ text-align: left;
+ margin-left: 0;
+ width: 20%;
+ min-width: 100px;
+}
+
+div.colist td {
+ padding-right: 0.5em;
+ padding-bottom: 0.3em;
+ vertical-align: top;
+}
+div.colist td img {
+ margin-top: 0.3em;
+}
+
+@media print {
+ #footer-badges { display: none; }
+}
+
+#toc {
+ margin-bottom: 2.5em;
+}
+
+#toctitle {
+ color: #527bbd;
+ font-size: 1.1em;
+ font-weight: bold;
+ margin-top: 1.0em;
+ margin-bottom: 0.1em;
+}
+
+div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
+ margin-top: 0;
+ margin-bottom: 0;
+}
+div.toclevel2 {
+ margin-left: 2em;
+ font-size: 0.9em;
+}
+div.toclevel3 {
+ margin-left: 4em;
+ font-size: 0.9em;
+}
+div.toclevel4 {
+ margin-left: 6em;
+ font-size: 0.9em;
+}
+
+span.aqua { color: aqua; }
+span.black { color: black; }
+span.blue { color: blue; }
+span.fuchsia { color: fuchsia; }
+span.gray { color: gray; }
+span.green { color: green; }
+span.lime { color: lime; }
+span.maroon { color: maroon; }
+span.navy { color: navy; }
+span.olive { color: olive; }
+span.purple { color: purple; }
+span.red { color: red; }
+span.silver { color: silver; }
+span.teal { color: teal; }
+span.white { color: white; }
+span.yellow { color: yellow; }
+
+span.aqua-background { background: aqua; }
+span.black-background { background: black; }
+span.blue-background { background: blue; }
+span.fuchsia-background { background: fuchsia; }
+span.gray-background { background: gray; }
+span.green-background { background: green; }
+span.lime-background { background: lime; }
+span.maroon-background { background: maroon; }
+span.navy-background { background: navy; }
+span.olive-background { background: olive; }
+span.purple-background { background: purple; }
+span.red-background { background: red; }
+span.silver-background { background: silver; }
+span.teal-background { background: teal; }
+span.white-background { background: white; }
+span.yellow-background { background: yellow; }
+
+span.big { font-size: 2em; }
+span.small { font-size: 0.6em; }
+
+span.underline { text-decoration: underline; }
+span.overline { text-decoration: overline; }
+span.line-through { text-decoration: line-through; }
+
+div.unbreakable { page-break-inside: avoid; }
+
+
+/*
+ * xhtml11 specific
+ *
+ * */
+
+div.tableblock {
+ margin-top: 1.0em;
+ margin-bottom: 1.5em;
+}
+div.tableblock > table {
+ border: 3px solid #527bbd;
+}
+thead, p.table.header {
+ font-weight: bold;
+ color: #527bbd;
+}
+p.table {
+ margin-top: 0;
+}
+/* Because the table frame attribute is overridden by CSS in most browsers. */
+div.tableblock > table[frame="void"] {
+ border-style: none;
+}
+div.tableblock > table[frame="hsides"] {
+ border-left-style: none;
+ border-right-style: none;
+}
+div.tableblock > table[frame="vsides"] {
+ border-top-style: none;
+ border-bottom-style: none;
+}
+
+
+/*
+ * html5 specific
+ *
+ * */
+
+table.tableblock {
+ margin-top: 1.0em;
+ margin-bottom: 1.5em;
+}
+thead, p.tableblock.header {
+ font-weight: bold;
+ color: #527bbd;
+}
+p.tableblock {
+ margin-top: 0;
+}
+table.tableblock {
+ border-width: 3px;
+ border-spacing: 0px;
+ border-style: solid;
+ border-color: #527bbd;
+ border-collapse: collapse;
+}
+th.tableblock, td.tableblock {
+ border-width: 1px;
+ padding: 4px;
+ border-style: solid;
+ border-color: #527bbd;
+}
+
+table.tableblock.frame-topbot {
+ border-left-style: hidden;
+ border-right-style: hidden;
+}
+table.tableblock.frame-sides {
+ border-top-style: hidden;
+ border-bottom-style: hidden;
+}
+table.tableblock.frame-none {
+ border-style: hidden;
+}
+
+th.tableblock.halign-left, td.tableblock.halign-left {
+ text-align: left;
+}
+th.tableblock.halign-center, td.tableblock.halign-center {
+ text-align: center;
+}
+th.tableblock.halign-right, td.tableblock.halign-right {
+ text-align: right;
+}
+
+th.tableblock.valign-top, td.tableblock.valign-top {
+ vertical-align: top;
+}
+th.tableblock.valign-middle, td.tableblock.valign-middle {
+ vertical-align: middle;
+}
+th.tableblock.valign-bottom, td.tableblock.valign-bottom {
+ vertical-align: bottom;
+}
+
+
+/*
+ * manpage specific
+ *
+ * */
+
+body.manpage h1 {
+ padding-top: 0.5em;
+ padding-bottom: 0.5em;
+ border-top: 2px solid silver;
+ border-bottom: 2px solid silver;
+}
+body.manpage h2 {
+ border-style: none;
+}
+body.manpage div.sectionbody {
+ margin-left: 3em;
+}
+
+@media print {
+ body.manpage div#toc { display: none; }
+}
+
+
+</style>
+<script type="text/javascript">
+/*<![CDATA[*/
+var asciidoc = { // Namespace.
+
+/////////////////////////////////////////////////////////////////////
+// Table Of Contents generator
+/////////////////////////////////////////////////////////////////////
+
+/* Author: Mihai Bazon, September 2002
+ * http://students.infoiasi.ro/~mishoo
+ *
+ * Table Of Content generator
+ * Version: 0.4
+ *
+ * Feel free to use this script under the terms of the GNU General Public
+ * License, as long as you do not remove or alter this notice.
+ */
+
+ /* modified by Troy D. Hanson, September 2006. License: GPL */
+ /* modified by Stuart Rackham, 2006, 2009. License: GPL */
+
+// toclevels = 1..4.
+toc: function (toclevels) {
+
+ function getText(el) {
+ var text = "";
+ for (var i = el.firstChild; i != null; i = i.nextSibling) {
+ if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
+ text += i.data;
+ else if (i.firstChild != null)
+ text += getText(i);
+ }
+ return text;
+ }
+
+ function TocEntry(el, text, toclevel) {
+ this.element = el;
+ this.text = text;
+ this.toclevel = toclevel;
+ }
+
+ function tocEntries(el, toclevels) {
+ var result = new Array;
+ var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
+ // Function that scans the DOM tree for header elements (the DOM2
+ // nodeIterator API would be a better technique but not supported by all
+ // browsers).
+ var iterate = function (el) {
+ for (var i = el.firstChild; i != null; i = i.nextSibling) {
+ if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
+ var mo = re.exec(i.tagName);
+ if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
+ result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
+ }
+ iterate(i);
+ }
+ }
+ }
+ iterate(el);
+ return result;
+ }
+
+ var toc = document.getElementById("toc");
+ if (!toc) {
+ return;
+ }
+
+ // Delete existing TOC entries in case we're reloading the TOC.
+ var tocEntriesToRemove = [];
+ var i;
+ for (i = 0; i < toc.childNodes.length; i++) {
+ var entry = toc.childNodes[i];
+ if (entry.nodeName.toLowerCase() == 'div'
+ && entry.getAttribute("class")
+ && entry.getAttribute("class").match(/^toclevel/))
+ tocEntriesToRemove.push(entry);
+ }
+ for (i = 0; i < tocEntriesToRemove.length; i++) {
+ toc.removeChild(tocEntriesToRemove[i]);
+ }
+
+ // Rebuild TOC entries.
+ var entries = tocEntries(document.getElementById("content"), toclevels);
+ for (var i = 0; i < entries.length; ++i) {
+ var entry = entries[i];
+ if (entry.element.id == "")
+ entry.element.id = "_toc_" + i;
+ var a = document.createElement("a");
+ a.href = "#" + entry.element.id;
+ a.appendChild(document.createTextNode(entry.text));
+ var div = document.createElement("div");
+ div.appendChild(a);
+ div.className = "toclevel" + entry.toclevel;
+ toc.appendChild(div);
+ }
+ if (entries.length == 0)
+ toc.parentNode.removeChild(toc);
+},
+
+
+/////////////////////////////////////////////////////////////////////
+// Footnotes generator
+/////////////////////////////////////////////////////////////////////
+
+/* Based on footnote generation code from:
+ * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
+ */
+
+footnotes: function () {
+ // Delete existing footnote entries in case we're reloading the footnodes.
+ var i;
+ var noteholder = document.getElementById("footnotes");
+ if (!noteholder) {
+ return;
+ }
+ var entriesToRemove = [];
+ for (i = 0; i < noteholder.childNodes.length; i++) {
+ var entry = noteholder.childNodes[i];
+ if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
+ entriesToRemove.push(entry);
+ }
+ for (i = 0; i < entriesToRemove.length; i++) {
+ noteholder.removeChild(entriesToRemove[i]);
+ }
+
+ // Rebuild footnote entries.
+ var cont = document.getElementById("content");
+ var spans = cont.getElementsByTagName("span");
+ var refs = {};
+ var n = 0;
+ for (i=0; i<spans.length; i++) {
+ if (spans[i].className == "footnote") {
+ n++;
+ var note = spans[i].getAttribute("data-note");
+ if (!note) {
+ // Use [\s\S] in place of . so multi-line matches work.
+ // Because JavaScript has no s (dotall) regex flag.
+ note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
+ spans[i].innerHTML =
+ "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
+ "' title='View footnote' class='footnote'>" + n + "</a>]";
+ spans[i].setAttribute("data-note", note);
+ }
+ noteholder.innerHTML +=
+ "<div class='footnote' id='_footnote_" + n + "'>" +
+ "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
+ n + "</a>. " + note + "</div>";
+ var id =spans[i].getAttribute("id");
+ if (id != null) refs["#"+id] = n;
+ }
+ }
+ if (n == 0)
+ noteholder.parentNode.removeChild(noteholder);
+ else {
+ // Process footnoterefs.
+ for (i=0; i<spans.length; i++) {
+ if (spans[i].className == "footnoteref") {
+ var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
+ href = href.match(/#.*/)[0]; // Because IE return full URL.
+ n = refs[href];
+ spans[i].innerHTML =
+ "[<a href='#_footnote_" + n +
+ "' title='View footnote' class='footnote'>" + n + "</a>]";
+ }
+ }
+ }
+},
+
+install: function(toclevels) {
+ var timerId;
+
+ function reinstall() {
+ asciidoc.footnotes();
+ if (toclevels) {
+ asciidoc.toc(toclevels);
+ }
+ }
+
+ function reinstallAndRemoveTimer() {
+ clearInterval(timerId);
+ reinstall();
+ }
+
+ timerId = setInterval(reinstall, 500);
+ if (document.addEventListener)
+ document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
+ else
+ window.onload = reinstallAndRemoveTimer;
+}
+
+}
+asciidoc.install();
+/*]]>*/
+</script>
+</head>
+<body class="article">
+<div id="header">
+</div>
+<div id="content">
+<div class="sect1">
+<h2 id="_wstep_zoltej_laki">Wstęp żółtej łąki</h2>
+<div class="sectionbody">
+</div>
+</div>
+<div class="sect1">
+<h2 id="_schon_grussen">schön Grüßen</h2>
+<div class="sectionbody">
+</div>
+</div>
+<div class="sect1">
+<h2 id="_belye_motyl">Белые Мотыль</h2>
+<div class="sectionbody">
+</div>
+</div>
+</div>
+<div id="footnotes"><hr></div>
+<div id="footer">
+<div id="footer-text">
+Last updated
+ 2002-11-25 00:37:42 UTC
+</div>
+</div>
+</body>
+</html>
diff --git a/tests/data/ascii-ids1-xhtml11.html b/tests/data/ascii-ids1-xhtml11.html
new file mode 100644
index 0000000..89cfd20
--- /dev/null
+++ b/tests/data/ascii-ids1-xhtml11.html
@@ -0,0 +1,763 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
+ "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
+<head>
+<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
+<meta name="generator" content="AsciiDoc test" />
+<title></title>
+<style type="text/css">
+/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
+
+/* Default font. */
+body {
+ font-family: Georgia,serif;
+}
+
+/* Title font. */
+h1, h2, h3, h4, h5, h6,
+div.title, caption.title,
+thead, p.table.header,
+#toctitle,
+#author, #revnumber, #revdate, #revremark,
+#footer {
+ font-family: Arial,Helvetica,sans-serif;
+}
+
+body {
+ margin: 1em 5% 1em 5%;
+}
+
+a {
+ color: blue;
+ text-decoration: underline;
+}
+a:visited {
+ color: fuchsia;
+}
+
+em {
+ font-style: italic;
+ color: navy;
+}
+
+strong {
+ font-weight: bold;
+ color: #083194;
+}
+
+h1, h2, h3, h4, h5, h6 {
+ color: #527bbd;
+ margin-top: 1.2em;
+ margin-bottom: 0.5em;
+ line-height: 1.3;
+}
+
+h1, h2, h3 {
+ border-bottom: 2px solid silver;
+}
+h2 {
+ padding-top: 0.5em;
+}
+h3 {
+ float: left;
+}
+h3 + * {
+ clear: left;
+}
+h5 {
+ font-size: 1.0em;
+}
+
+div.sectionbody {
+ margin-left: 0;
+}
+
+hr {
+ border: 1px solid silver;
+}
+
+p {
+ margin-top: 0.5em;
+ margin-bottom: 0.5em;
+}
+
+ul, ol, li > p {
+ margin-top: 0;
+}
+ul > li { color: #aaa; }
+ul > li > * { color: black; }
+
+.monospaced, code, pre {
+ font-family: "Courier New", Courier, monospace;
+ font-size: inherit;
+ color: navy;
+ padding: 0;
+ margin: 0;
+}
+pre {
+ white-space: pre-wrap;
+}
+
+#author {
+ color: #527bbd;
+ font-weight: bold;
+ font-size: 1.1em;
+}
+#email {
+}
+#revnumber, #revdate, #revremark {
+}
+
+#footer {
+ font-size: small;
+ border-top: 2px solid silver;
+ padding-top: 0.5em;
+ margin-top: 4.0em;
+}
+#footer-text {
+ float: left;
+ padding-bottom: 0.5em;
+}
+#footer-badges {
+ float: right;
+ padding-bottom: 0.5em;
+}
+
+#preamble {
+ margin-top: 1.5em;
+ margin-bottom: 1.5em;
+}
+div.imageblock, div.exampleblock, div.verseblock,
+div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
+div.admonitionblock {
+ margin-top: 1.0em;
+ margin-bottom: 1.5em;
+}
+div.admonitionblock {
+ margin-top: 2.0em;
+ margin-bottom: 2.0em;
+ margin-right: 10%;
+ color: #606060;
+}
+
+div.content { /* Block element content. */
+ padding: 0;
+}
+
+/* Block element titles. */
+div.title, caption.title {
+ color: #527bbd;
+ font-weight: bold;
+ text-align: left;
+ margin-top: 1.0em;
+ margin-bottom: 0.5em;
+}
+div.title + * {
+ margin-top: 0;
+}
+
+td div.title:first-child {
+ margin-top: 0.0em;
+}
+div.content div.title:first-child {
+ margin-top: 0.0em;
+}
+div.content + div.title {
+ margin-top: 0.0em;
+}
+
+div.sidebarblock > div.content {
+ background: #ffffee;
+ border: 1px solid #dddddd;
+ border-left: 4px solid #f0f0f0;
+ padding: 0.5em;
+}
+
+div.listingblock > div.content {
+ border: 1px solid #dddddd;
+ border-left: 5px solid #f0f0f0;
+ background: #f8f8f8;
+ padding: 0.5em;
+}
+
+div.quoteblock, div.verseblock {
+ padding-left: 1.0em;
+ margin-left: 1.0em;
+ margin-right: 10%;
+ border-left: 5px solid #f0f0f0;
+ color: #888;
+}
+
+div.quoteblock > div.attribution {
+ padding-top: 0.5em;
+ text-align: right;
+}
+
+div.verseblock > pre.content {
+ font-family: inherit;
+ font-size: inherit;
+}
+div.verseblock > div.attribution {
+ padding-top: 0.75em;
+ text-align: left;
+}
+/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
+div.verseblock + div.attribution {
+ text-align: left;
+}
+
+div.admonitionblock .icon {
+ vertical-align: top;
+ font-size: 1.1em;
+ font-weight: bold;
+ text-decoration: underline;
+ color: #527bbd;
+ padding-right: 0.5em;
+}
+div.admonitionblock td.content {
+ padding-left: 0.5em;
+ border-left: 3px solid #dddddd;
+}
+
+div.exampleblock > div.content {
+ border-left: 3px solid #dddddd;
+ padding-left: 0.5em;
+}
+
+div.imageblock div.content { padding-left: 0; }
+span.image img { border-style: none; vertical-align: text-bottom; }
+a.image:visited { color: white; }
+
+dl {
+ margin-top: 0.8em;
+ margin-bottom: 0.8em;
+}
+dt {
+ margin-top: 0.5em;
+ margin-bottom: 0;
+ font-style: normal;
+ color: navy;
+}
+dd > *:first-child {
+ margin-top: 0.1em;
+}
+
+ul, ol {
+ list-style-position: outside;
+}
+ol.arabic {
+ list-style-type: decimal;
+}
+ol.loweralpha {
+ list-style-type: lower-alpha;
+}
+ol.upperalpha {
+ list-style-type: upper-alpha;
+}
+ol.lowerroman {
+ list-style-type: lower-roman;
+}
+ol.upperroman {
+ list-style-type: upper-roman;
+}
+
+div.compact ul, div.compact ol,
+div.compact p, div.compact p,
+div.compact div, div.compact div {
+ margin-top: 0.1em;
+ margin-bottom: 0.1em;
+}
+
+tfoot {
+ font-weight: bold;
+}
+td > div.verse {
+ white-space: pre;
+}
+
+div.hdlist {
+ margin-top: 0.8em;
+ margin-bottom: 0.8em;
+}
+div.hdlist tr {
+ padding-bottom: 15px;
+}
+dt.hdlist1.strong, td.hdlist1.strong {
+ font-weight: bold;
+}
+td.hdlist1 {
+ vertical-align: top;
+ font-style: normal;
+ padding-right: 0.8em;
+ color: navy;
+}
+td.hdlist2 {
+ vertical-align: top;
+}
+div.hdlist.compact tr {
+ margin: 0;
+ padding-bottom: 0;
+}
+
+.comment {
+ background: yellow;
+}
+
+.footnote, .footnoteref {
+ font-size: 0.8em;
+}
+
+span.footnote, span.footnoteref {
+ vertical-align: super;
+}
+
+#footnotes {
+ margin: 20px 0 20px 0;
+ padding: 7px 0 0 0;
+}
+
+#footnotes div.footnote {
+ margin: 0 0 5px 0;
+}
+
+#footnotes hr {
+ border: none;
+ border-top: 1px solid silver;
+ height: 1px;
+ text-align: left;
+ margin-left: 0;
+ width: 20%;
+ min-width: 100px;
+}
+
+div.colist td {
+ padding-right: 0.5em;
+ padding-bottom: 0.3em;
+ vertical-align: top;
+}
+div.colist td img {
+ margin-top: 0.3em;
+}
+
+@media print {
+ #footer-badges { display: none; }
+}
+
+#toc {
+ margin-bottom: 2.5em;
+}
+
+#toctitle {
+ color: #527bbd;
+ font-size: 1.1em;
+ font-weight: bold;
+ margin-top: 1.0em;
+ margin-bottom: 0.1em;
+}
+
+div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
+ margin-top: 0;
+ margin-bottom: 0;
+}
+div.toclevel2 {
+ margin-left: 2em;
+ font-size: 0.9em;
+}
+div.toclevel3 {
+ margin-left: 4em;
+ font-size: 0.9em;
+}
+div.toclevel4 {
+ margin-left: 6em;
+ font-size: 0.9em;
+}
+
+span.aqua { color: aqua; }
+span.black { color: black; }
+span.blue { color: blue; }
+span.fuchsia { color: fuchsia; }
+span.gray { color: gray; }
+span.green { color: green; }
+span.lime { color: lime; }
+span.maroon { color: maroon; }
+span.navy { color: navy; }
+span.olive { color: olive; }
+span.purple { color: purple; }
+span.red { color: red; }
+span.silver { color: silver; }
+span.teal { color: teal; }
+span.white { color: white; }
+span.yellow { color: yellow; }
+
+span.aqua-background { background: aqua; }
+span.black-background { background: black; }
+span.blue-background { background: blue; }
+span.fuchsia-background { background: fuchsia; }
+span.gray-background { background: gray; }
+span.green-background { background: green; }
+span.lime-background { background: lime; }
+span.maroon-background { background: maroon; }
+span.navy-background { background: navy; }
+span.olive-background { background: olive; }
+span.purple-background { background: purple; }
+span.red-background { background: red; }
+span.silver-background { background: silver; }
+span.teal-background { background: teal; }
+span.white-background { background: white; }
+span.yellow-background { background: yellow; }
+
+span.big { font-size: 2em; }
+span.small { font-size: 0.6em; }
+
+span.underline { text-decoration: underline; }
+span.overline { text-decoration: overline; }
+span.line-through { text-decoration: line-through; }
+
+div.unbreakable { page-break-inside: avoid; }
+
+
+/*
+ * xhtml11 specific
+ *
+ * */
+
+div.tableblock {
+ margin-top: 1.0em;
+ margin-bottom: 1.5em;
+}
+div.tableblock > table {
+ border: 3px solid #527bbd;
+}
+thead, p.table.header {
+ font-weight: bold;
+ color: #527bbd;
+}
+p.table {
+ margin-top: 0;
+}
+/* Because the table frame attribute is overridden by CSS in most browsers. */
+div.tableblock > table[frame="void"] {
+ border-style: none;
+}
+div.tableblock > table[frame="hsides"] {
+ border-left-style: none;
+ border-right-style: none;
+}
+div.tableblock > table[frame="vsides"] {
+ border-top-style: none;
+ border-bottom-style: none;
+}
+
+
+/*
+ * html5 specific
+ *
+ * */
+
+table.tableblock {
+ margin-top: 1.0em;
+ margin-bottom: 1.5em;
+}
+thead, p.tableblock.header {
+ font-weight: bold;
+ color: #527bbd;
+}
+p.tableblock {
+ margin-top: 0;
+}
+table.tableblock {
+ border-width: 3px;
+ border-spacing: 0px;
+ border-style: solid;
+ border-color: #527bbd;
+ border-collapse: collapse;
+}
+th.tableblock, td.tableblock {
+ border-width: 1px;
+ padding: 4px;
+ border-style: solid;
+ border-color: #527bbd;
+}
+
+table.tableblock.frame-topbot {
+ border-left-style: hidden;
+ border-right-style: hidden;
+}
+table.tableblock.frame-sides {
+ border-top-style: hidden;
+ border-bottom-style: hidden;
+}
+table.tableblock.frame-none {
+ border-style: hidden;
+}
+
+th.tableblock.halign-left, td.tableblock.halign-left {
+ text-align: left;
+}
+th.tableblock.halign-center, td.tableblock.halign-center {
+ text-align: center;
+}
+th.tableblock.halign-right, td.tableblock.halign-right {
+ text-align: right;
+}
+
+th.tableblock.valign-top, td.tableblock.valign-top {
+ vertical-align: top;
+}
+th.tableblock.valign-middle, td.tableblock.valign-middle {
+ vertical-align: middle;
+}
+th.tableblock.valign-bottom, td.tableblock.valign-bottom {
+ vertical-align: bottom;
+}
+
+
+/*
+ * manpage specific
+ *
+ * */
+
+body.manpage h1 {
+ padding-top: 0.5em;
+ padding-bottom: 0.5em;
+ border-top: 2px solid silver;
+ border-bottom: 2px solid silver;
+}
+body.manpage h2 {
+ border-style: none;
+}
+body.manpage div.sectionbody {
+ margin-left: 3em;
+}
+
+@media print {
+ body.manpage div#toc { display: none; }
+}
+
+
+</style>
+<script type="text/javascript">
+/*<![CDATA[*/
+var asciidoc = { // Namespace.
+
+/////////////////////////////////////////////////////////////////////
+// Table Of Contents generator
+/////////////////////////////////////////////////////////////////////
+
+/* Author: Mihai Bazon, September 2002
+ * http://students.infoiasi.ro/~mishoo
+ *
+ * Table Of Content generator
+ * Version: 0.4
+ *
+ * Feel free to use this script under the terms of the GNU General Public
+ * License, as long as you do not remove or alter this notice.
+ */
+
+ /* modified by Troy D. Hanson, September 2006. License: GPL */
+ /* modified by Stuart Rackham, 2006, 2009. License: GPL */
+
+// toclevels = 1..4.
+toc: function (toclevels) {
+
+ function getText(el) {
+ var text = "";
+ for (var i = el.firstChild; i != null; i = i.nextSibling) {
+ if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
+ text += i.data;
+ else if (i.firstChild != null)
+ text += getText(i);
+ }
+ return text;
+ }
+
+ function TocEntry(el, text, toclevel) {
+ this.element = el;
+ this.text = text;
+ this.toclevel = toclevel;
+ }
+
+ function tocEntries(el, toclevels) {
+ var result = new Array;
+ var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
+ // Function that scans the DOM tree for header elements (the DOM2
+ // nodeIterator API would be a better technique but not supported by all
+ // browsers).
+ var iterate = function (el) {
+ for (var i = el.firstChild; i != null; i = i.nextSibling) {
+ if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
+ var mo = re.exec(i.tagName);
+ if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
+ result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
+ }
+ iterate(i);
+ }
+ }
+ }
+ iterate(el);
+ return result;
+ }
+
+ var toc = document.getElementById("toc");
+ if (!toc) {
+ return;
+ }
+
+ // Delete existing TOC entries in case we're reloading the TOC.
+ var tocEntriesToRemove = [];
+ var i;
+ for (i = 0; i < toc.childNodes.length; i++) {
+ var entry = toc.childNodes[i];
+ if (entry.nodeName.toLowerCase() == 'div'
+ && entry.getAttribute("class")
+ && entry.getAttribute("class").match(/^toclevel/))
+ tocEntriesToRemove.push(entry);
+ }
+ for (i = 0; i < tocEntriesToRemove.length; i++) {
+ toc.removeChild(tocEntriesToRemove[i]);
+ }
+
+ // Rebuild TOC entries.
+ var entries = tocEntries(document.getElementById("content"), toclevels);
+ for (var i = 0; i < entries.length; ++i) {
+ var entry = entries[i];
+ if (entry.element.id == "")
+ entry.element.id = "_toc_" + i;
+ var a = document.createElement("a");
+ a.href = "#" + entry.element.id;
+ a.appendChild(document.createTextNode(entry.text));
+ var div = document.createElement("div");
+ div.appendChild(a);
+ div.className = "toclevel" + entry.toclevel;
+ toc.appendChild(div);
+ }
+ if (entries.length == 0)
+ toc.parentNode.removeChild(toc);
+},
+
+
+/////////////////////////////////////////////////////////////////////
+// Footnotes generator
+/////////////////////////////////////////////////////////////////////
+
+/* Based on footnote generation code from:
+ * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
+ */
+
+footnotes: function () {
+ // Delete existing footnote entries in case we're reloading the footnodes.
+ var i;
+ var noteholder = document.getElementById("footnotes");
+ if (!noteholder) {
+ return;
+ }
+ var entriesToRemove = [];
+ for (i = 0; i < noteholder.childNodes.length; i++) {
+ var entry = noteholder.childNodes[i];
+ if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
+ entriesToRemove.push(entry);
+ }
+ for (i = 0; i < entriesToRemove.length; i++) {
+ noteholder.removeChild(entriesToRemove[i]);
+ }
+
+ // Rebuild footnote entries.
+ var cont = document.getElementById("content");
+ var spans = cont.getElementsByTagName("span");
+ var refs = {};
+ var n = 0;
+ for (i=0; i<spans.length; i++) {
+ if (spans[i].className == "footnote") {
+ n++;
+ var note = spans[i].getAttribute("data-note");
+ if (!note) {
+ // Use [\s\S] in place of . so multi-line matches work.
+ // Because JavaScript has no s (dotall) regex flag.
+ note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
+ spans[i].innerHTML =
+ "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
+ "' title='View footnote' class='footnote'>" + n + "</a>]";
+ spans[i].setAttribute("data-note", note);
+ }
+ noteholder.innerHTML +=
+ "<div class='footnote' id='_footnote_" + n + "'>" +
+ "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
+ n + "</a>. " + note + "</div>";
+ var id =spans[i].getAttribute("id");
+ if (id != null) refs["#"+id] = n;
+ }
+ }
+ if (n == 0)
+ noteholder.parentNode.removeChild(noteholder);
+ else {
+ // Process footnoterefs.
+ for (i=0; i<spans.length; i++) {
+ if (spans[i].className == "footnoteref") {
+ var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
+ href = href.match(/#.*/)[0]; // Because IE return full URL.
+ n = refs[href];
+ spans[i].innerHTML =
+ "[<a href='#_footnote_" + n +
+ "' title='View footnote' class='footnote'>" + n + "</a>]";
+ }
+ }
+ }
+},
+
+install: function(toclevels) {
+ var timerId;
+
+ function reinstall() {
+ asciidoc.footnotes();
+ if (toclevels) {
+ asciidoc.toc(toclevels);
+ }
+ }
+
+ function reinstallAndRemoveTimer() {
+ clearInterval(timerId);
+ reinstall();
+ }
+
+ timerId = setInterval(reinstall, 500);
+ if (document.addEventListener)
+ document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
+ else
+ window.onload = reinstallAndRemoveTimer;
+}
+
+}
+asciidoc.install();
+/*]]>*/
+</script>
+</head>
+<body class="article">
+<div id="header">
+</div>
+<div id="content">
+<div class="sect1">
+<h2 id="_wstep_zoltej_laki">Wstęp żółtej łąki</h2>
+<div class="sectionbody">
+</div>
+</div>
+<div class="sect1">
+<h2 id="_schon_grussen">schön Grüßen</h2>
+<div class="sectionbody">
+</div>
+</div>
+<div class="sect1">
+<h2 id="_belye_motyl">Белые Мотыль</h2>
+<div class="sectionbody">
+</div>
+</div>
+</div>
+<div id="footnotes"><hr /></div>
+<div id="footer">
+<div id="footer-text">
+Last updated
+ 2002-11-25 00:37:42 UTC
+</div>
+</div>
+</body>
+</html>
diff --git a/tests/data/ascii-ids1.txt b/tests/data/ascii-ids1.txt
new file mode 100644
index 0000000..67958c4
--- /dev/null
+++ b/tests/data/ascii-ids1.txt
@@ -0,0 +1,9 @@
+// Test of generated chapter ID normalized to ASCII
+:ascii-ids: 1
+
+== Wstęp żółtej łąki
+
+== schön Grüßen
+
+== Белые Мотыль
+
diff --git a/tests/testasciidoc.conf b/tests/testasciidoc.conf
index 3a646fd..a2adc91 100644
--- a/tests/testasciidoc.conf
+++ b/tests/testasciidoc.conf
@@ -885,3 +885,9 @@ data/lang-en-test.txt
{'footer-style':'revdate'}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+!Generated chapter IDs normalized to ASCII test
+
+% source
+data/ascii-ids1.txt
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%