summaryrefslogtreecommitdiff
path: root/libvaladoc/highlighter
diff options
context:
space:
mode:
authorRico Tzschichholz <ricotz@ubuntu.com>2017-06-27 12:21:44 +0200
committerRico Tzschichholz <ricotz@ubuntu.com>2017-06-27 12:21:44 +0200
commit93d9fe647be1f2effc0bfeeec903b5e030182f6c (patch)
treed28aab5aa5c0e4a149e40b22246246dd2b2d5957 /libvaladoc/highlighter
parentb4f3f73a01b86fe7f9edde7a3991b493346eea23 (diff)
downloadvala-93d9fe647be1f2effc0bfeeec903b5e030182f6c.tar.gz
Prepare valadoc source-tree for merge
Diffstat (limited to 'libvaladoc/highlighter')
-rw-r--r--libvaladoc/highlighter/codescanner.vala572
-rw-r--r--libvaladoc/highlighter/codetoken.vala58
-rw-r--r--libvaladoc/highlighter/highlighter.vala366
-rw-r--r--libvaladoc/highlighter/scanner.vala32
-rw-r--r--libvaladoc/highlighter/xmlscanner.vala374
5 files changed, 1402 insertions, 0 deletions
diff --git a/libvaladoc/highlighter/codescanner.vala b/libvaladoc/highlighter/codescanner.vala
new file mode 100644
index 000000000..8b15ee755
--- /dev/null
+++ b/libvaladoc/highlighter/codescanner.vala
@@ -0,0 +1,572 @@
+/* codescanner.vala
+ *
+ * Copyright (C) 2015 Florian Brosch
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Author:
+ * Florian Brosch <flo.brosch@gmail.com>
+ */
+
+using GLib;
+
+
+/**
+ * A cheap scanner used to highlight C and Vala source code.
+ */
+public class Valadoc.Highlighter.CodeScanner : Object, Scanner {
+ private Gee.HashMap<string, CodeTokenType?> keywords;
+ private bool enable_string_templates;
+ private bool enabel_verbatim_string;
+ private bool enable_preprocessor_define;
+ private bool enable_preprocessor_include;
+ private bool enable_keyword_escape;
+
+
+ private Queue<CodeToken> token_queue = new Queue<CodeToken> ();
+ private unowned string content;
+ private unowned string pos;
+
+
+ public CodeScanner (string content, bool enable_string_templates, bool enabel_verbatim_string,
+ bool enable_preprocessor_define, bool enable_preprocessor_include, bool enable_keyword_escape,
+ Gee.HashMap<string, CodeTokenType?> keywords)
+ {
+ this.content = content;
+ this.pos = content;
+
+ this.enable_string_templates = enable_string_templates;
+ this.enabel_verbatim_string = enabel_verbatim_string;
+ this.enable_preprocessor_define = enable_preprocessor_define;
+ this.enable_preprocessor_include = enable_preprocessor_include;
+ this.enable_keyword_escape = enable_keyword_escape;
+
+ this.keywords = keywords;
+ }
+
+ public CodeToken next () {
+ if (!token_queue.is_empty ()) {
+ return token_queue.pop_head ();
+ }
+
+
+ unowned string start;
+
+ for (start = pos; pos[0] != '\0'; pos = pos.next_char ()) {
+ if (((char*) pos) == ((char*) content) || pos[0] == '\n') {
+ unowned string line_start = pos;
+
+ while (pos[0] == ' ' || pos[0] == '\t' || pos[0] == '\n') {
+ pos = pos.offset (1);
+ }
+
+ if (pos[0] == '\0') {
+ break;
+ } else if (enable_preprocessor_include && pos.has_prefix ("#include")) {
+ unowned string end = pos;
+ if (queue_c_include ()) {
+ return dispatch (start, end);
+ } else {
+ pos = line_start;
+ continue;
+ }
+ } else if (pos.has_prefix ("#if") || pos.has_prefix ("#else") || pos.has_prefix ("#elif") || pos.has_prefix ("#endif")
+ || (enable_preprocessor_define && (pos.has_prefix ("#defined") || pos.has_prefix ("#ifdef")))) {
+
+ unowned string end = pos;
+ queue_until ('\n', CodeTokenType.PREPROCESSOR);
+ return dispatch (start, end);
+ }
+ }
+
+ if (pos[0] == '\'') {
+ unowned string end = pos;
+ queue_string_literal ("\'");
+ return dispatch (start, end);
+ }
+
+ if (pos[0] == '"' || (enable_string_templates && pos[0] == '@' && pos[1] == '"')) {
+ unowned string end = pos;
+ if (enabel_verbatim_string && (pos.has_prefix ("\"\"\"") || (enable_string_templates && pos.has_prefix ("@\"\"\"")))) {
+ queue_string_literal ("\"\"\"");
+ } else {
+ queue_string_literal ("\"");
+ }
+ return dispatch (start, end);
+ }
+
+ if (pos[0] >= '0' && pos[0] <= '9') {
+ unowned string end = pos;
+ queue_numeric_literal ();
+ return dispatch (start, end);
+ }
+
+ if (pos.has_prefix ("/*")) {
+ unowned string end = pos;
+ queue_multiline_comment ();
+ return dispatch (start, end);
+ }
+
+ if (pos.has_prefix ("//")) {
+ unowned string end = pos;
+ queue_until ('\n', CodeTokenType.COMMENT);
+ return dispatch (start, end);
+ }
+
+ if ((((char*) pos) == ((char*) content) || !isidstartchar (pos[-1])) && isidstartchar (pos[0])) {
+ unowned string end = pos;
+ if (queue_keyword ()) {
+ return dispatch (start, end);
+ } else {
+ continue;
+ }
+ }
+ }
+
+ token_queue.push_tail (new CodeToken (CodeTokenType.EOF, ""));
+ return dispatch (start, pos);
+ }
+
+ private bool queue_c_include () {
+ unowned string include_start = pos;
+ unowned string start = pos;
+ pos = pos.offset (8);
+
+ while (pos[0] == ' ' || pos[0] == '\t') {
+ pos = pos.offset (1);
+ }
+
+ char? end_char = null;
+ if (pos[0] == '"') {
+ end_char = '"';
+ } else if (pos[0] == '<') {
+ end_char = '>';
+ }
+
+ if (end_char != null) {
+ queue_token (start, pos, CodeTokenType.PREPROCESSOR);
+
+ unowned string literal_start = pos;
+ pos = pos.offset (1);
+
+ while (pos[0] != end_char && pos[0] != '\n' && pos[0] != '\0') {
+ pos = pos.offset (1);
+ }
+
+ if (pos[0] == end_char) {
+ pos = pos.offset (1);
+
+ queue_token (literal_start, pos, CodeTokenType.LITERAL);
+ start = pos;
+ } else {
+ pos = include_start;
+ token_queue.clear ();
+ return false;
+ }
+ }
+
+ while (pos[0] == ' ' || pos[0] == '\t') {
+ pos = pos.offset (1);
+ }
+
+ if (pos[0] == '\n' || pos[0] == '\0') {
+ queue_token (start, pos, CodeTokenType.PREPROCESSOR);
+ return true;
+ } else {
+ pos = include_start;
+ token_queue.clear ();
+ return false;
+ }
+ }
+
+ private bool queue_keyword () {
+ unowned string start = pos;
+ if (pos[0] == '@') {
+ pos = pos.offset (1);
+ }
+ while (isidchar (pos[0])) {
+ pos = pos.offset (1);
+ }
+
+ long length = start.pointer_to_offset (pos);
+ string word = start.substring (0, length);
+ CodeTokenType? token_type = keywords.get (word);
+ if (token_type == null) {
+ pos = start;
+ return false;
+ }
+
+ token_queue.push_tail (new CodeToken (token_type, word));
+ return true;
+ }
+
+ private void queue_multiline_comment () {
+ unowned string start = pos;
+ pos = pos.offset (2);
+
+ while (!(pos[0] == '*' && pos[1] == '/') && pos[0] != '\0') {
+ pos = pos.offset (1);
+ }
+
+ if (pos[0] != '\0') {
+ pos = pos.offset (2);
+ }
+
+ queue_token (start, pos, CodeTokenType.COMMENT);
+ }
+
+ private void queue_until (char end_char, CodeTokenType token_type) {
+ unowned string start = pos;
+ pos = pos.offset (1);
+
+ while (pos[0] != end_char && pos[0] != '\0') {
+ pos = pos.offset (1);
+ }
+
+ if (pos[0] != '\0' && pos[0] != '\n') {
+ pos = pos.offset (1);
+ }
+
+ queue_token (start, pos, token_type);
+ }
+
+ private void queue_string_literal (string end_chars) {
+ unowned string start = pos;
+ bool is_template = false;
+
+ if (pos[0] == '@') {
+ pos = pos.offset (end_chars.length + 1);
+ is_template = true;
+ } else {
+ pos = pos.offset (end_chars.length);
+ }
+
+ while (!pos.has_prefix (end_chars) && pos[0] != '\0') {
+ long skip = 0;
+
+ if ((pos[0] == '%' && has_printf_format_prefix (out skip))
+ || (pos[0] == '\\' && has_escape_prefix (out skip))
+ || (is_template && pos[0] == '$' && has_template_literal_prefix (out skip)))
+ {
+ queue_token (start, pos, CodeTokenType.LITERAL);
+
+ unowned string sub_start = pos;
+ pos = pos.offset (skip);
+ queue_token (sub_start, pos, CodeTokenType.ESCAPE);
+ start = pos;
+ } else {
+ pos = pos.offset (1);
+ }
+ }
+
+ if (pos[0] != '\0') {
+ pos = pos.offset (end_chars.length);
+ }
+
+ queue_token (start, pos, CodeTokenType.LITERAL);
+ }
+
+ private bool has_template_literal_prefix (out long skip) {
+ if (isidchar (pos[1])) {
+ skip = 1;
+ while (isidchar (pos[skip])) {
+ skip++;
+ }
+ return true;
+ }
+
+ if (pos[1] == '(') {
+ int level = 1;
+ skip = 2;
+
+ while (level > 0) {
+ switch (pos[skip]) {
+ case '(':
+ level++;
+ break;
+ case ')':
+ level--;
+ break;
+ case '\0':
+ skip = 0;
+ return false;
+ }
+ skip++;
+ }
+ return true;
+ }
+
+ skip = 0;
+ return false;
+ }
+
+ private bool has_escape_prefix (out long skip) {
+ switch (pos[1]) {
+ case 'a':
+ case 'b':
+ case 'f':
+ case 'n':
+ case 'r':
+ case 't':
+ case 'v':
+ case '\\':
+ case '\'':
+ case '\"':
+ case '?':
+ skip = 2;
+ return true;
+
+ case 'x':
+ if (pos[2].isxdigit ()) {
+ for (skip = 2; pos[skip].isxdigit (); skip++) {
+ skip++;
+ }
+
+ skip++;
+ return true;
+ }
+
+ skip = 0;
+ return false;
+
+ default:
+ if (pos[1].isdigit ()) {
+ skip = 2;
+
+ if (pos[2].isdigit ()) {
+ skip++;
+
+ if (pos[3].isdigit ()) {
+ skip++;
+ }
+ }
+
+ return true;
+ }
+
+ skip = 0;
+ return false;
+ }
+ }
+
+ private bool has_printf_format_prefix (out long skip) {
+ // %[flag][min width][precision][length modifier][conversion specifier]
+ unowned string pos = this.pos;
+ unowned string start = pos;
+
+ // '%'
+ pos = pos.offset (1);
+
+ if (pos[0] == '%') {
+ pos = pos.offset (1);
+ skip = 2;
+ return true;
+ }
+
+
+ // flags:
+ while ("#0+- ".index_of_char (pos[0]) > 0) {
+ pos = pos.offset (1);
+ }
+
+ // min width:
+ while (pos[0].isdigit ()) {
+ pos = pos.offset (1);
+ }
+
+ // precision
+ if (pos[0] == '.' && pos[1].isdigit ()) {
+ pos = pos.offset (2);
+ while (pos[0].isdigit ()) {
+ pos = pos.offset (1);
+ }
+ }
+
+ // length:
+ switch (pos[0]) {
+ case 'h':
+ pos = pos.offset (1);
+ if (pos[0] == 'h') {
+ pos = pos.offset (1);
+ }
+ break;
+
+ case 'l':
+ pos = pos.offset (1);
+ if (pos[0] == 'l') {
+ pos = pos.offset (1);
+ }
+ break;
+
+ case 'j':
+ case 'z':
+ case 't':
+ case 'L':
+ pos = pos.offset (1);
+ break;
+ }
+
+ // conversion specifier:
+ switch (pos[0]) {
+ case 'd':
+ case 'i':
+ case 'u':
+ case 'o':
+ case 'x':
+ case 'X':
+ case 'f':
+ case 'F':
+ case 'e':
+ case 'E':
+ case 'g':
+ case 'G':
+ case 'a':
+ case 'A':
+ case 'c':
+ case 's':
+ case 'p':
+ case 'n':
+ pos = pos.offset (1);
+ break;
+
+ default:
+ skip = 0;
+ return false;
+ }
+
+ skip = start.pointer_to_offset (pos);
+ return true;
+ }
+
+ private enum NumericType {
+ INTEGER,
+ REAL,
+ NONE
+ }
+
+ // based on libvala
+ private void queue_numeric_literal () {
+ NumericType numeric_type = NumericType.INTEGER;
+ unowned string start = pos;
+
+
+ // integer part
+ if (pos[0] == '0' && pos[1] == 'x' && pos[2].isxdigit ()) {
+ // hexadecimal integer literal
+ pos = pos.offset (2);
+ while (pos[0].isxdigit ()) {
+ pos = pos.offset (1);
+ }
+ } else {
+ // decimal number
+ while (pos[0].isdigit ()) {
+ pos = pos.offset (1);
+ }
+ }
+
+
+ // fractional part
+ if (pos[0] == '.' && pos[1].isdigit ()) {
+ numeric_type = NumericType.REAL;
+ pos = pos.offset (1);
+ while (pos[0].isdigit ()) {
+ pos = pos.offset (1);
+ }
+ }
+
+
+ // exponent part
+ if (pos[0] == 'e' || pos[0] == 'E') {
+ numeric_type = NumericType.REAL;
+ pos = pos.offset (1);
+ if (pos[0] == '+' || pos[0] == '-') {
+ pos = pos.offset (1);
+ }
+ while (pos[0].isdigit ()) {
+ pos = pos.offset (1);
+ }
+ }
+
+
+ // type suffix
+ switch (pos[0]) {
+ case 'l':
+ case 'L':
+ if (numeric_type == NumericType.INTEGER) {
+ pos = pos.offset (1);
+ if (pos[0] == 'l' || pos[0] == 'L') {
+ pos = pos.offset (1);
+ }
+ }
+ break;
+
+ case 'u':
+ case 'U':
+ if (numeric_type == NumericType.INTEGER) {
+ pos = pos.offset (1);
+ if (pos[0] == 'l' || pos[0] == 'L') {
+ pos = pos.offset (1);
+ if (pos[0] == 'l' || pos[0] == 'L') {
+ pos = pos.offset (1);
+ }
+ }
+ }
+ break;
+
+ case 'f':
+ case 'F':
+ case 'd':
+ case 'D':
+ numeric_type = NumericType.REAL;
+ pos = pos.offset (1);
+ break;
+ }
+
+ if (pos[0].isalnum ()) {
+ numeric_type = NumericType.NONE;
+ }
+
+ queue_token (start, pos, (numeric_type != NumericType.NONE)
+ ? CodeTokenType.LITERAL
+ : CodeTokenType.PLAIN);
+ }
+
+ private CodeToken dispatch (string start, string end) {
+ assert (token_queue.is_empty () == false);
+
+ if (((char*) start) == ((char*) end)) {
+ return token_queue.pop_head ();
+ }
+
+ long length = start.pointer_to_offset (end);
+ string content = start.substring (0, length);
+ return new CodeToken (CodeTokenType.PLAIN, content);
+ }
+
+ private void queue_token (string start, string end, CodeTokenType token_type) {
+ long length = start.pointer_to_offset (end);
+ string content = start.substring (0, length);
+ token_queue.push_tail (new CodeToken (token_type, content));
+ }
+
+ private inline bool isidchar (char c) {
+ return c.isalnum () || c == '_';
+ }
+
+ private inline bool isidstartchar (char c) {
+ return c.isalnum () || c == '_' || (c == '@' && enable_keyword_escape);
+ }
+}
+
diff --git a/libvaladoc/highlighter/codetoken.vala b/libvaladoc/highlighter/codetoken.vala
new file mode 100644
index 000000000..1a02195ea
--- /dev/null
+++ b/libvaladoc/highlighter/codetoken.vala
@@ -0,0 +1,58 @@
+/* codetoken.vala
+ *
+ * Copyright (C) 2015 Florian Brosch
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Author:
+ * Florian Brosch <flo.brosch@gmail.com>
+ */
+
+
+public class Valadoc.Highlighter.CodeToken {
+ public CodeTokenType token_type { get; private set; }
+ public string content { get; private set;}
+
+ public CodeToken (CodeTokenType type, string content) {
+ this.token_type = type;
+ this.content = content;
+ }
+}
+
+
+public enum Valadoc.Highlighter.CodeTokenType {
+ XML_ESCAPE,
+ XML_ELEMENT,
+ XML_ATTRIBUTE,
+ XML_ATTRIBUTE_VALUE,
+ XML_COMMENT,
+ XML_CDATA,
+
+ PREPROCESSOR,
+ COMMENT,
+ KEYWORD,
+ LITERAL,
+ ESCAPE,
+ PLAIN,
+ TYPE,
+ EOF;
+
+ public unowned string to_string () {
+ EnumClass enumc = (EnumClass) typeof (CodeTokenType).class_ref ();
+ unowned EnumValue? eval = enumc.get_value (this);
+ return_val_if_fail (eval != null, null);
+ return eval.value_nick;
+ }
+}
diff --git a/libvaladoc/highlighter/highlighter.vala b/libvaladoc/highlighter/highlighter.vala
new file mode 100644
index 000000000..3c2578033
--- /dev/null
+++ b/libvaladoc/highlighter/highlighter.vala
@@ -0,0 +1,366 @@
+/* codehighlighter.vala
+ *
+ * Copyright (C) 2015 Florian Brosch
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Author:
+ * Florian Brosch <flo.brosch@gmail.com>
+ */
+
+using GLib;
+using Valadoc.Content;
+
+
+public class Valadoc.Highlighter.Highlighter : Object {
+ private Gee.HashMap<string, CodeTokenType?> vala_keywords;
+ private Gee.HashMap<string, CodeTokenType?> c_keywords;
+
+
+ /**
+ * Used to highlight vala source code.
+ */
+ public Run highlight_vala (string source_code) {
+ if (vala_keywords == null) {
+ vala_keywords = new Gee.HashMap<string, CodeTokenType?> ();
+
+ // ** Types: **
+ vala_keywords.set ("string", CodeTokenType.TYPE);
+ vala_keywords.set ("bool", CodeTokenType.TYPE);
+ vala_keywords.set ("void", CodeTokenType.TYPE);
+
+ vala_keywords.set ("double", CodeTokenType.TYPE);
+ vala_keywords.set ("float", CodeTokenType.TYPE);
+
+ vala_keywords.set ("char", CodeTokenType.TYPE);
+ vala_keywords.set ("uchar", CodeTokenType.TYPE);
+ vala_keywords.set ("unichar", CodeTokenType.TYPE);
+
+ vala_keywords.set ("short", CodeTokenType.TYPE);
+ vala_keywords.set ("ushort", CodeTokenType.TYPE);
+
+ vala_keywords.set ("long", CodeTokenType.TYPE);
+ vala_keywords.set ("ulong", CodeTokenType.TYPE);
+
+ vala_keywords.set ("size_t", CodeTokenType.TYPE);
+ vala_keywords.set ("ssize_t", CodeTokenType.TYPE);
+
+ vala_keywords.set ("int", CodeTokenType.TYPE);
+ vala_keywords.set ("int8", CodeTokenType.TYPE);
+ vala_keywords.set ("int16", CodeTokenType.TYPE);
+ vala_keywords.set ("int32", CodeTokenType.TYPE);
+ vala_keywords.set ("int64", CodeTokenType.TYPE);
+
+ vala_keywords.set ("uint", CodeTokenType.TYPE);
+ vala_keywords.set ("uint8", CodeTokenType.TYPE);
+ vala_keywords.set ("uint16", CodeTokenType.TYPE);
+ vala_keywords.set ("uint32", CodeTokenType.TYPE);
+ vala_keywords.set ("uint64", CodeTokenType.TYPE);
+
+
+ // ** Literals: **
+ vala_keywords.set ("null", CodeTokenType.LITERAL);
+ vala_keywords.set ("true", CodeTokenType.LITERAL);
+ vala_keywords.set ("false", CodeTokenType.LITERAL);
+
+
+ // ** Keywords: **
+ vala_keywords.set ("return", CodeTokenType.KEYWORD);
+ vala_keywords.set ("lock", CodeTokenType.KEYWORD);
+ vala_keywords.set ("var", CodeTokenType.KEYWORD);
+ vala_keywords.set ("yield", CodeTokenType.KEYWORD);
+ vala_keywords.set ("global", CodeTokenType.KEYWORD);
+ vala_keywords.set ("construct", CodeTokenType.KEYWORD);
+
+ vala_keywords.set ("value", CodeTokenType.KEYWORD);
+ vala_keywords.set ("get", CodeTokenType.KEYWORD);
+ vala_keywords.set ("set", CodeTokenType.KEYWORD);
+
+ vala_keywords.set ("owned", CodeTokenType.KEYWORD);
+ vala_keywords.set ("unowned", CodeTokenType.KEYWORD);
+ vala_keywords.set ("const", CodeTokenType.KEYWORD);
+ vala_keywords.set ("weak", CodeTokenType.KEYWORD);
+ vala_keywords.set ("dynamic", CodeTokenType.KEYWORD);
+
+ vala_keywords.set ("out", CodeTokenType.KEYWORD);
+ vala_keywords.set ("ref", CodeTokenType.KEYWORD);
+
+ vala_keywords.set ("break", CodeTokenType.KEYWORD);
+ vala_keywords.set ("continue", CodeTokenType.KEYWORD);
+ vala_keywords.set ("return", CodeTokenType.KEYWORD);
+
+ vala_keywords.set ("if", CodeTokenType.KEYWORD);
+ vala_keywords.set ("else", CodeTokenType.KEYWORD);
+ vala_keywords.set ("switch", CodeTokenType.KEYWORD);
+ vala_keywords.set ("case", CodeTokenType.KEYWORD);
+ vala_keywords.set ("default", CodeTokenType.KEYWORD);
+
+ vala_keywords.set ("do", CodeTokenType.KEYWORD);
+ vala_keywords.set ("while", CodeTokenType.KEYWORD);
+ vala_keywords.set ("for", CodeTokenType.KEYWORD);
+ vala_keywords.set ("foreach", CodeTokenType.KEYWORD);
+ vala_keywords.set ("in", CodeTokenType.KEYWORD);
+
+ vala_keywords.set ("try", CodeTokenType.KEYWORD);
+ vala_keywords.set ("catch", CodeTokenType.KEYWORD);
+ vala_keywords.set ("finally", CodeTokenType.KEYWORD);
+ vala_keywords.set ("throw", CodeTokenType.KEYWORD);
+
+ vala_keywords.set ("class", CodeTokenType.KEYWORD);
+ vala_keywords.set ("interface", CodeTokenType.KEYWORD);
+ vala_keywords.set ("struct", CodeTokenType.KEYWORD);
+ vala_keywords.set ("enum", CodeTokenType.KEYWORD);
+ vala_keywords.set ("delegate", CodeTokenType.KEYWORD);
+ vala_keywords.set ("errordomain", CodeTokenType.KEYWORD);
+
+ vala_keywords.set ("abstract", CodeTokenType.KEYWORD);
+ vala_keywords.set ("virtual", CodeTokenType.KEYWORD);
+ vala_keywords.set ("override", CodeTokenType.KEYWORD);
+ vala_keywords.set ("signal", CodeTokenType.KEYWORD);
+ vala_keywords.set ("extern", CodeTokenType.KEYWORD);
+ vala_keywords.set ("static", CodeTokenType.KEYWORD);
+ vala_keywords.set ("async", CodeTokenType.KEYWORD);
+ vala_keywords.set ("inline", CodeTokenType.KEYWORD);
+ vala_keywords.set ("new", CodeTokenType.KEYWORD);
+
+ vala_keywords.set ("public", CodeTokenType.KEYWORD);
+ vala_keywords.set ("private", CodeTokenType.KEYWORD);
+ vala_keywords.set ("protected", CodeTokenType.KEYWORD);
+ vala_keywords.set ("internal", CodeTokenType.KEYWORD);
+
+ vala_keywords.set ("throws", CodeTokenType.KEYWORD);
+ vala_keywords.set ("requires", CodeTokenType.KEYWORD);
+ vala_keywords.set ("ensures", CodeTokenType.KEYWORD);
+ vala_keywords.set ("assert", CodeTokenType.KEYWORD);
+
+ vala_keywords.set ("namespace", CodeTokenType.KEYWORD);
+ vala_keywords.set ("using", CodeTokenType.KEYWORD);
+
+ vala_keywords.set ("as", CodeTokenType.KEYWORD);
+ vala_keywords.set ("is", CodeTokenType.KEYWORD);
+ vala_keywords.set ("in", CodeTokenType.KEYWORD);
+ vala_keywords.set ("new", CodeTokenType.KEYWORD);
+ vala_keywords.set ("delete", CodeTokenType.KEYWORD);
+ vala_keywords.set ("sizeof", CodeTokenType.KEYWORD);
+ vala_keywords.set ("typeof", CodeTokenType.KEYWORD);
+
+ vala_keywords.set ("this", CodeTokenType.KEYWORD);
+ vala_keywords.set ("base", CodeTokenType.KEYWORD);
+ }
+
+ bool enable_string_templates = true;
+ bool enable_preprocessor_define = false;
+ bool enable_preprocessor_include = false;
+ bool enable_keyword_escape = true;
+ bool enabel_verbatim_string = true;
+
+ CodeScanner scanner = new CodeScanner (source_code, enable_string_templates, enabel_verbatim_string,
+ enable_preprocessor_define, enable_preprocessor_include, enable_keyword_escape,
+ vala_keywords);
+
+ return highlight_code (scanner);
+ }
+
+ /**
+ * Used to highlight C source code.
+ */
+ public Run highlight_c (string source_code) {
+ if (c_keywords == null) {
+ c_keywords = new Gee.HashMap<string, CodeTokenType?> ();
+
+ // ** Types: **
+ c_keywords.set ("auto", CodeTokenType.TYPE);
+ c_keywords.set ("char", CodeTokenType.TYPE);
+ c_keywords.set ("const", CodeTokenType.TYPE);
+ c_keywords.set ("double", CodeTokenType.TYPE);
+ c_keywords.set ("extern", CodeTokenType.TYPE);
+ c_keywords.set ("int", CodeTokenType.TYPE);
+ c_keywords.set ("float", CodeTokenType.TYPE);
+ c_keywords.set ("long", CodeTokenType.TYPE);
+ c_keywords.set ("register", CodeTokenType.TYPE);
+ c_keywords.set ("short", CodeTokenType.TYPE);
+ c_keywords.set ("signed", CodeTokenType.TYPE);
+ c_keywords.set ("static", CodeTokenType.TYPE);
+ c_keywords.set ("unsigned", CodeTokenType.TYPE);
+ c_keywords.set ("void", CodeTokenType.TYPE);
+ c_keywords.set ("volatile", CodeTokenType.TYPE);
+
+ c_keywords.set ("gboolean", CodeTokenType.TYPE);
+ c_keywords.set ("gpointer", CodeTokenType.TYPE);
+ c_keywords.set ("gconstpointer", CodeTokenType.TYPE);
+ c_keywords.set ("gchar", CodeTokenType.TYPE);
+ c_keywords.set ("guchar", CodeTokenType.TYPE);
+ c_keywords.set ("gint", CodeTokenType.TYPE);
+ c_keywords.set ("guint", CodeTokenType.TYPE);
+ c_keywords.set ("gshort", CodeTokenType.TYPE);
+ c_keywords.set ("gushort", CodeTokenType.TYPE);
+ c_keywords.set ("glong", CodeTokenType.TYPE);
+ c_keywords.set ("gulong", CodeTokenType.TYPE);
+ c_keywords.set ("gint8", CodeTokenType.TYPE);
+ c_keywords.set ("guint8", CodeTokenType.TYPE);
+ c_keywords.set ("gint16", CodeTokenType.TYPE);
+ c_keywords.set ("guint16", CodeTokenType.TYPE);
+ c_keywords.set ("gint32", CodeTokenType.TYPE);
+ c_keywords.set ("guint32", CodeTokenType.TYPE);
+ c_keywords.set ("gint64", CodeTokenType.TYPE);
+ c_keywords.set ("guint64", CodeTokenType.TYPE);
+ c_keywords.set ("gfloat", CodeTokenType.TYPE);
+ c_keywords.set ("gdouble", CodeTokenType.TYPE);
+ c_keywords.set ("gsize", CodeTokenType.TYPE);
+ c_keywords.set ("gssize", CodeTokenType.TYPE);
+ c_keywords.set ("goffset", CodeTokenType.TYPE);
+ c_keywords.set ("gintptr", CodeTokenType.TYPE);
+ c_keywords.set ("guintptr", CodeTokenType.TYPE);
+
+
+ // ** Literals: **
+ c_keywords.set ("NULL", CodeTokenType.LITERAL);
+ c_keywords.set ("TRUE", CodeTokenType.LITERAL);
+ c_keywords.set ("FALSE", CodeTokenType.LITERAL);
+
+
+ // ** Keywords: **
+ c_keywords.set ("break", CodeTokenType.KEYWORD);
+ c_keywords.set ("case", CodeTokenType.KEYWORD);
+ c_keywords.set ("continue", CodeTokenType.KEYWORD);
+ c_keywords.set ("default", CodeTokenType.KEYWORD);
+ c_keywords.set ("do", CodeTokenType.KEYWORD);
+ c_keywords.set ("else", CodeTokenType.KEYWORD);
+ c_keywords.set ("enum", CodeTokenType.KEYWORD);
+ c_keywords.set ("for", CodeTokenType.KEYWORD);
+ c_keywords.set ("goto", CodeTokenType.KEYWORD);
+ c_keywords.set ("if", CodeTokenType.KEYWORD);
+ c_keywords.set ("return", CodeTokenType.KEYWORD);
+ c_keywords.set ("sizeof", CodeTokenType.KEYWORD);
+ c_keywords.set ("struct", CodeTokenType.KEYWORD);
+ c_keywords.set ("switch", CodeTokenType.KEYWORD);
+ c_keywords.set ("typedef", CodeTokenType.KEYWORD);
+ c_keywords.set ("union", CodeTokenType.KEYWORD);
+ c_keywords.set ("while", CodeTokenType.KEYWORD);
+ c_keywords.set ("assert", CodeTokenType.KEYWORD);
+ }
+
+ bool enable_string_templates = false;
+ bool enable_preprocessor_define = true;
+ bool enable_preprocessor_include = true;
+ bool enable_keyword_escape = false;
+ bool enabel_verbatim_string = false;
+
+ CodeScanner scanner = new CodeScanner (source_code, enable_string_templates, enabel_verbatim_string,
+ enable_preprocessor_define, enable_preprocessor_include, enable_keyword_escape,
+ c_keywords);
+
+ return highlight_code (scanner);
+ }
+
+ /**
+ * Used to highlight C source code.
+ */
+ public Run highlight_xml (string source_code) {
+ XmlScanner scanner = new XmlScanner (source_code);
+ return highlight_code (scanner);
+ }
+
+ /**
+ * Used to highlight source code.
+ */
+ private Run highlight_code (Scanner scanner) {
+ Run code = new Run (Run.Style.MONOSPACED);
+
+ for (CodeToken token = scanner.next (); token.token_type != CodeTokenType.EOF; token = scanner.next ()) {
+ switch (token.token_type) {
+ case CodeTokenType.PREPROCESSOR:
+ Run run = new Run (Run.Style.LANG_PREPROCESSOR);
+ run.content.add (new Text (token.content));
+ code.content.add (run);
+ break;
+
+ case CodeTokenType.COMMENT:
+ Run run = new Run (Run.Style.LANG_COMMENT);
+ run.content.add (new Text (token.content));
+ code.content.add (run);
+ break;
+
+ case CodeTokenType.KEYWORD:
+ Run run = new Run (Run.Style.LANG_KEYWORD);
+ run.content.add (new Text (token.content));
+ code.content.add (run);
+ break;
+
+ case CodeTokenType.LITERAL:
+ Run run = new Run (Run.Style.LANG_LITERAL);
+ run.content.add (new Text (token.content));
+ code.content.add (run);
+ break;
+
+ case CodeTokenType.TYPE:
+ Run run = new Run (Run.Style.LANG_BASIC_TYPE);
+ run.content.add (new Text (token.content));
+ code.content.add (run);
+ break;
+
+ case CodeTokenType.ESCAPE:
+ Run run = new Run (Run.Style.LANG_ESCAPE);
+ run.content.add (new Text (token.content));
+ code.content.add (run);
+ break;
+
+ case CodeTokenType.XML_ESCAPE:
+ Run run = new Run (Run.Style.XML_ESCAPE);
+ run.content.add (new Text (token.content));
+ code.content.add (run);
+ break;
+
+ case CodeTokenType.XML_ELEMENT:
+ Run run = new Run (Run.Style.XML_ELEMENT);
+ run.content.add (new Text (token.content));
+ code.content.add (run);
+ break;
+
+ case CodeTokenType.XML_ATTRIBUTE:
+ Run run = new Run (Run.Style.XML_ATTRIBUTE);
+ run.content.add (new Text (token.content));
+ code.content.add (run);
+ break;
+
+ case CodeTokenType.XML_ATTRIBUTE_VALUE:
+ Run run = new Run (Run.Style.XML_ATTRIBUTE_VALUE);
+ run.content.add (new Text (token.content));
+ code.content.add (run);
+ break;
+
+ case CodeTokenType.XML_COMMENT:
+ Run run = new Run (Run.Style.XML_COMMENT);
+ run.content.add (new Text (token.content));
+ code.content.add (run);
+ break;
+
+ case CodeTokenType.XML_CDATA:
+ Run run = new Run (Run.Style.XML_CDATA);
+ run.content.add (new Text (token.content));
+ code.content.add (run);
+ break;
+
+ default:
+ code.content.add (new Text (token.content));
+ break;
+ }
+ }
+
+ return code;
+ }
+}
+
+
diff --git a/libvaladoc/highlighter/scanner.vala b/libvaladoc/highlighter/scanner.vala
new file mode 100644
index 000000000..20eedcfb2
--- /dev/null
+++ b/libvaladoc/highlighter/scanner.vala
@@ -0,0 +1,32 @@
+/* scanner.vala
+ *
+ * Copyright (C) 2015 Florian Brosch
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Author:
+ * Florian Brosch <flo.brosch@gmail.com>
+ */
+
+using GLib;
+
+
+/**
+ * Scanner interface used to highlight source code.
+ */
+public interface Valadoc.Highlighter.Scanner : Object {
+
+ public abstract CodeToken next ();
+}
diff --git a/libvaladoc/highlighter/xmlscanner.vala b/libvaladoc/highlighter/xmlscanner.vala
new file mode 100644
index 000000000..38b87c5ee
--- /dev/null
+++ b/libvaladoc/highlighter/xmlscanner.vala
@@ -0,0 +1,374 @@
+/* xmlscanner.vala
+ *
+ * Copyright (C) 2015 Florian Brosch
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Author:
+ * Florian Brosch <flo.brosch@gmail.com>
+ */
+
+using GLib;
+
+
+/**
+ * A cheap scanner used to highlight XML.
+ */
+public class Valadoc.Highlighter.XmlScanner : Object, Scanner {
+ private Queue<CodeToken> token_queue = new Queue<CodeToken> ();
+ private unowned string content;
+ private unowned string pos;
+
+
+ public XmlScanner (string content) {
+ this.content = content;
+ this.pos = content;
+ }
+
+ public CodeToken next () {
+ if (!token_queue.is_empty ()) {
+ return token_queue.pop_head ();
+ }
+
+ unowned string start;
+
+ for (start = pos; pos[0] != '\0'; pos = pos.next_char ()) {
+ if (pos[0] == '&') {
+ unowned string begin = pos;
+ if (queue_escape ()) {
+ return dispatch (start, begin);
+ }
+ } else if (pos[0] == '<') {
+ if (pos[1] == '/') {
+ unowned string end = pos;
+ if (queue_end_element ()) {
+ return dispatch (start, end);
+ }
+ } else if (pos[1] == '!' && pos[2] == '-' && pos[3] == '-') {
+ unowned string end = pos;
+ if (queue_comment ()) {
+ return dispatch (start, end);
+ }
+ } else if (pos[1] == '!' && pos[2] == '[' && pos[3] == 'C' && pos[4] == 'D' && pos[5] == 'A' && pos[6] == 'T' && pos[7] == 'A' && pos[8] == '[') {
+ unowned string end = pos;
+ pos = pos.offset (9);
+ token_queue.push_tail (new CodeToken (CodeTokenType.XML_CDATA, "<![CDATA["));
+ return dispatch (start, end);
+ } else {
+ unowned string end = pos;
+ if (queue_start_element (start, pos[1] == '?')) {
+ return dispatch (start, end);
+ } else {
+ continue;
+ }
+ }
+ } else if (pos[0] == ']' && pos[1] == ']' && pos[2] == '>') {
+ unowned string end = pos;
+ pos = pos.offset (3);
+ token_queue.push_tail (new CodeToken (CodeTokenType.XML_CDATA, "]]>"));
+ return dispatch (start, end);
+ }
+ }
+
+ token_queue.push_tail (new CodeToken (CodeTokenType.EOF, ""));
+ return dispatch (start, pos);
+ }
+
+ private bool queue_start_element (string dispatch_start, bool xml_decl) {
+ assert (token_queue.is_empty ());
+
+ unowned string element_start = pos;
+ if (xml_decl) {
+ pos = pos.offset (2);
+ } else {
+ pos = pos.offset (1);
+ }
+
+ skip_optional_spaces (ref pos);
+
+ if (skip_id (ref pos) == false) {
+ token_queue.clear ();
+ pos = element_start;
+ return false;
+ }
+
+ skip_optional_spaces (ref pos);
+
+ queue_token (element_start, pos, CodeTokenType.XML_ELEMENT);
+
+ if (queue_attributes () == false) {
+ token_queue.clear ();
+ pos = element_start;
+ return false;
+ }
+
+ unowned string element_end_start = pos;
+
+ if (!xml_decl && pos[0] == '>') {
+ pos = pos.offset (1);
+ } else if (!xml_decl && pos[0] == '/' && pos[1] == '>') {
+ pos = pos.offset (2);
+ } else if (xml_decl && pos[0] == '?' && pos[1] == '>') {
+ pos = pos.offset (2);
+ } else {
+ token_queue.clear ();
+ pos = element_start;
+ return false;
+ }
+
+ queue_token (element_end_start, pos, CodeTokenType.XML_ELEMENT);
+ return true;
+ }
+
+ private bool queue_attributes () {
+ while (is_id_char (pos[0])) {
+ unowned string begin = pos;
+
+ if (skip_id (ref pos) == false) {
+ return false;
+ }
+
+ skip_optional_spaces (ref pos);
+
+ if (pos[0] == '=') {
+ pos = pos.offset (1);
+ } else {
+ return false;
+ }
+
+ skip_optional_spaces (ref pos);
+
+ queue_token (begin, pos, CodeTokenType.XML_ATTRIBUTE);
+ begin = pos;
+
+ if (pos[0] == '"') {
+ pos = pos.offset (1);
+ } else {
+ return false;
+ }
+
+ while (pos[0] != '"' && pos[0] != '\0') {
+ pos = pos.offset (1);
+ }
+
+ if (pos[0] == '"') {
+ pos = pos.offset (1);
+ } else {
+ return false;
+ }
+
+ skip_optional_spaces (ref pos);
+
+ queue_token (begin, pos, CodeTokenType.XML_ATTRIBUTE_VALUE);
+ }
+
+ return true;
+ }
+
+ private bool queue_end_element () {
+ unowned string start = pos;
+ pos = pos.offset (2);
+
+ skip_optional_spaces (ref pos);
+
+ if (skip_id (ref pos) == false) {
+ pos = start;
+ return false;
+ }
+
+ skip_optional_spaces (ref pos);
+
+ if (pos[0] == '>') {
+ pos = pos.offset (1);
+ } else {
+ pos = start;
+ return false;
+ }
+
+ queue_token (start, pos, CodeTokenType.XML_ELEMENT);
+ return true;
+ }
+
+ private bool queue_escape () {
+ unowned string start = pos;
+ pos = pos.offset (1);
+
+ if (skip_id (ref pos) == false) {
+ pos = start;
+ return false;
+ }
+
+ if (pos[0] == ';') {
+ pos = pos.offset (1);
+ } else {
+ pos = start;
+ return false;
+ }
+
+ queue_token (start, pos, CodeTokenType.XML_ESCAPE);
+ return true;
+ }
+
+ private bool queue_comment () {
+ unowned string start = pos;
+ pos = pos.offset (4);
+
+ while (pos[0] != '>' && pos[0] != '\0') {
+ pos = pos.offset (1);
+ }
+
+ if (pos[0] == '>') {
+ pos = pos.offset (1);
+ } else {
+ pos = start;
+ return false;
+ }
+
+ queue_token (start, pos, CodeTokenType.XML_COMMENT);
+ return true;
+ }
+
+ private static bool skip_id (ref unowned string pos) {
+ bool has_next_segment = true;
+ bool has_id = false;
+
+ while (has_next_segment) {
+ has_id = false;
+
+ while (is_id_char (pos[0])) {
+ pos = pos.offset (1);
+ has_id = true;
+ }
+
+ if (pos[0] == ':' && has_id) {
+ has_next_segment = true;
+ pos = pos.offset (1);
+ } else {
+ has_next_segment = false;
+ }
+ }
+
+ return has_id;
+ }
+
+ private static bool skip_optional_spaces (ref unowned string pos) {
+ bool skipped = false;
+
+ while (pos[0].isspace ()) {
+ pos = pos.offset (1);
+ skipped = true;
+ }
+
+ return skipped;
+ }
+
+ private CodeToken dispatch (string start, string end) {
+ assert (token_queue.is_empty () == false);
+
+ if (((char*) start) == ((char*) end)) {
+ return token_queue.pop_head ();
+ }
+
+ long length = start.pointer_to_offset (end);
+ string content = start.substring (0, length);
+ return new CodeToken (CodeTokenType.PLAIN, content);
+ }
+
+ private void queue_token (string start, string end, CodeTokenType token_type) {
+ long length = start.pointer_to_offset (end);
+ string content = start.substring (0, length);
+ token_queue.push_tail (new CodeToken (token_type, content));
+ }
+
+ private static inline bool is_id_char (char c) {
+ return c.isalnum () || c == '_' || c == '-';
+ }
+
+ internal static bool is_xml (string source) {
+ unowned string pos = source;
+
+ skip_optional_spaces (ref pos);
+
+ if (pos[0] == '<') {
+ // Comment:
+ if (pos.has_prefix ("<!--")) {
+ return true;
+ }
+
+ // CDATA:
+ if (pos.has_prefix ("<![CDATA[")) {
+ return true;
+ }
+
+
+ // Start Tag:
+ bool proc_instr = false;
+ pos = pos.offset (1);
+
+ if (pos[0] == '?') {
+ pos = pos.offset (1);
+ proc_instr = true;
+ }
+
+ // ID:
+ if (skip_id (ref pos) == false) {
+ return false;
+ }
+
+ skip_optional_spaces (ref pos);
+
+ while (skip_id (ref pos)) {
+ if (pos[0] == '=') {
+ pos = pos.offset (1);
+ } else {
+ return false;
+ }
+
+ skip_optional_spaces (ref pos);
+
+ if (pos[0] == '"') {
+ pos = pos.offset (1);
+ } else {
+ return false;
+ }
+
+ while (pos[0] != '\0' && pos[0] != '\n' && pos[0] != '"') {
+ pos = pos.offset (1);
+ }
+
+ if (pos[0] == '"') {
+ pos = pos.offset (1);
+ } else {
+ return false;
+ }
+
+ skip_optional_spaces (ref pos);
+ }
+
+ if (proc_instr && pos[0] == '?' && pos[1] == '>') {
+ return true;
+ }
+
+ if (!proc_instr && (pos[0] == '>' || (pos[0] == '/' && pos[1] == '>'))) {
+ return true;
+ }
+
+ return false;
+ } else {
+ return false;
+ }
+ }
+}
+