diff options
author | Rico Tzschichholz <ricotz@ubuntu.com> | 2017-06-27 12:21:44 +0200 |
---|---|---|
committer | Rico Tzschichholz <ricotz@ubuntu.com> | 2017-06-27 12:21:44 +0200 |
commit | 93d9fe647be1f2effc0bfeeec903b5e030182f6c (patch) | |
tree | d28aab5aa5c0e4a149e40b22246246dd2b2d5957 /libvaladoc/highlighter | |
parent | b4f3f73a01b86fe7f9edde7a3991b493346eea23 (diff) | |
download | vala-93d9fe647be1f2effc0bfeeec903b5e030182f6c.tar.gz |
Prepare valadoc source-tree for merge
Diffstat (limited to 'libvaladoc/highlighter')
-rw-r--r-- | libvaladoc/highlighter/codescanner.vala | 572 | ||||
-rw-r--r-- | libvaladoc/highlighter/codetoken.vala | 58 | ||||
-rw-r--r-- | libvaladoc/highlighter/highlighter.vala | 366 | ||||
-rw-r--r-- | libvaladoc/highlighter/scanner.vala | 32 | ||||
-rw-r--r-- | libvaladoc/highlighter/xmlscanner.vala | 374 |
5 files changed, 1402 insertions, 0 deletions
diff --git a/libvaladoc/highlighter/codescanner.vala b/libvaladoc/highlighter/codescanner.vala new file mode 100644 index 000000000..8b15ee755 --- /dev/null +++ b/libvaladoc/highlighter/codescanner.vala @@ -0,0 +1,572 @@ +/* codescanner.vala + * + * Copyright (C) 2015 Florian Brosch + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Author: + * Florian Brosch <flo.brosch@gmail.com> + */ + +using GLib; + + +/** + * A cheap scanner used to highlight C and Vala source code. + */ +public class Valadoc.Highlighter.CodeScanner : Object, Scanner { + private Gee.HashMap<string, CodeTokenType?> keywords; + private bool enable_string_templates; + private bool enabel_verbatim_string; + private bool enable_preprocessor_define; + private bool enable_preprocessor_include; + private bool enable_keyword_escape; + + + private Queue<CodeToken> token_queue = new Queue<CodeToken> (); + private unowned string content; + private unowned string pos; + + + public CodeScanner (string content, bool enable_string_templates, bool enabel_verbatim_string, + bool enable_preprocessor_define, bool enable_preprocessor_include, bool enable_keyword_escape, + Gee.HashMap<string, CodeTokenType?> keywords) + { + this.content = content; + this.pos = content; + + this.enable_string_templates = enable_string_templates; + this.enabel_verbatim_string = enabel_verbatim_string; + this.enable_preprocessor_define = enable_preprocessor_define; + this.enable_preprocessor_include = enable_preprocessor_include; + this.enable_keyword_escape = enable_keyword_escape; + + this.keywords = keywords; + } + + public CodeToken next () { + if (!token_queue.is_empty ()) { + return token_queue.pop_head (); + } + + + unowned string start; + + for (start = pos; pos[0] != '\0'; pos = pos.next_char ()) { + if (((char*) pos) == ((char*) content) || pos[0] == '\n') { + unowned string line_start = pos; + + while (pos[0] == ' ' || pos[0] == '\t' || pos[0] == '\n') { + pos = pos.offset (1); + } + + if (pos[0] == '\0') { + break; + } else if (enable_preprocessor_include && pos.has_prefix ("#include")) { + unowned string end = pos; + if (queue_c_include ()) { + return dispatch (start, end); + } else { + pos = line_start; + continue; + } + } else if (pos.has_prefix ("#if") || pos.has_prefix ("#else") || pos.has_prefix ("#elif") || pos.has_prefix ("#endif") + || (enable_preprocessor_define && (pos.has_prefix ("#defined") || pos.has_prefix ("#ifdef")))) { + + unowned string end = pos; + queue_until ('\n', CodeTokenType.PREPROCESSOR); + return dispatch (start, end); + } + } + + if (pos[0] == '\'') { + unowned string end = pos; + queue_string_literal ("\'"); + return dispatch (start, end); + } + + if (pos[0] == '"' || (enable_string_templates && pos[0] == '@' && pos[1] == '"')) { + unowned string end = pos; + if (enabel_verbatim_string && (pos.has_prefix ("\"\"\"") || (enable_string_templates && pos.has_prefix ("@\"\"\"")))) { + queue_string_literal ("\"\"\""); + } else { + queue_string_literal ("\""); + } + return dispatch (start, end); + } + + if (pos[0] >= '0' && pos[0] <= '9') { + unowned string end = pos; + queue_numeric_literal (); + return dispatch (start, end); + } + + if (pos.has_prefix ("/*")) { + unowned string end = pos; + queue_multiline_comment (); + return dispatch (start, end); + } + + if (pos.has_prefix ("//")) { + unowned string end = pos; + queue_until ('\n', CodeTokenType.COMMENT); + return dispatch (start, end); + } + + if ((((char*) pos) == ((char*) content) || !isidstartchar (pos[-1])) && isidstartchar (pos[0])) { + unowned string end = pos; + if (queue_keyword ()) { + return dispatch (start, end); + } else { + continue; + } + } + } + + token_queue.push_tail (new CodeToken (CodeTokenType.EOF, "")); + return dispatch (start, pos); + } + + private bool queue_c_include () { + unowned string include_start = pos; + unowned string start = pos; + pos = pos.offset (8); + + while (pos[0] == ' ' || pos[0] == '\t') { + pos = pos.offset (1); + } + + char? end_char = null; + if (pos[0] == '"') { + end_char = '"'; + } else if (pos[0] == '<') { + end_char = '>'; + } + + if (end_char != null) { + queue_token (start, pos, CodeTokenType.PREPROCESSOR); + + unowned string literal_start = pos; + pos = pos.offset (1); + + while (pos[0] != end_char && pos[0] != '\n' && pos[0] != '\0') { + pos = pos.offset (1); + } + + if (pos[0] == end_char) { + pos = pos.offset (1); + + queue_token (literal_start, pos, CodeTokenType.LITERAL); + start = pos; + } else { + pos = include_start; + token_queue.clear (); + return false; + } + } + + while (pos[0] == ' ' || pos[0] == '\t') { + pos = pos.offset (1); + } + + if (pos[0] == '\n' || pos[0] == '\0') { + queue_token (start, pos, CodeTokenType.PREPROCESSOR); + return true; + } else { + pos = include_start; + token_queue.clear (); + return false; + } + } + + private bool queue_keyword () { + unowned string start = pos; + if (pos[0] == '@') { + pos = pos.offset (1); + } + while (isidchar (pos[0])) { + pos = pos.offset (1); + } + + long length = start.pointer_to_offset (pos); + string word = start.substring (0, length); + CodeTokenType? token_type = keywords.get (word); + if (token_type == null) { + pos = start; + return false; + } + + token_queue.push_tail (new CodeToken (token_type, word)); + return true; + } + + private void queue_multiline_comment () { + unowned string start = pos; + pos = pos.offset (2); + + while (!(pos[0] == '*' && pos[1] == '/') && pos[0] != '\0') { + pos = pos.offset (1); + } + + if (pos[0] != '\0') { + pos = pos.offset (2); + } + + queue_token (start, pos, CodeTokenType.COMMENT); + } + + private void queue_until (char end_char, CodeTokenType token_type) { + unowned string start = pos; + pos = pos.offset (1); + + while (pos[0] != end_char && pos[0] != '\0') { + pos = pos.offset (1); + } + + if (pos[0] != '\0' && pos[0] != '\n') { + pos = pos.offset (1); + } + + queue_token (start, pos, token_type); + } + + private void queue_string_literal (string end_chars) { + unowned string start = pos; + bool is_template = false; + + if (pos[0] == '@') { + pos = pos.offset (end_chars.length + 1); + is_template = true; + } else { + pos = pos.offset (end_chars.length); + } + + while (!pos.has_prefix (end_chars) && pos[0] != '\0') { + long skip = 0; + + if ((pos[0] == '%' && has_printf_format_prefix (out skip)) + || (pos[0] == '\\' && has_escape_prefix (out skip)) + || (is_template && pos[0] == '$' && has_template_literal_prefix (out skip))) + { + queue_token (start, pos, CodeTokenType.LITERAL); + + unowned string sub_start = pos; + pos = pos.offset (skip); + queue_token (sub_start, pos, CodeTokenType.ESCAPE); + start = pos; + } else { + pos = pos.offset (1); + } + } + + if (pos[0] != '\0') { + pos = pos.offset (end_chars.length); + } + + queue_token (start, pos, CodeTokenType.LITERAL); + } + + private bool has_template_literal_prefix (out long skip) { + if (isidchar (pos[1])) { + skip = 1; + while (isidchar (pos[skip])) { + skip++; + } + return true; + } + + if (pos[1] == '(') { + int level = 1; + skip = 2; + + while (level > 0) { + switch (pos[skip]) { + case '(': + level++; + break; + case ')': + level--; + break; + case '\0': + skip = 0; + return false; + } + skip++; + } + return true; + } + + skip = 0; + return false; + } + + private bool has_escape_prefix (out long skip) { + switch (pos[1]) { + case 'a': + case 'b': + case 'f': + case 'n': + case 'r': + case 't': + case 'v': + case '\\': + case '\'': + case '\"': + case '?': + skip = 2; + return true; + + case 'x': + if (pos[2].isxdigit ()) { + for (skip = 2; pos[skip].isxdigit (); skip++) { + skip++; + } + + skip++; + return true; + } + + skip = 0; + return false; + + default: + if (pos[1].isdigit ()) { + skip = 2; + + if (pos[2].isdigit ()) { + skip++; + + if (pos[3].isdigit ()) { + skip++; + } + } + + return true; + } + + skip = 0; + return false; + } + } + + private bool has_printf_format_prefix (out long skip) { + // %[flag][min width][precision][length modifier][conversion specifier] + unowned string pos = this.pos; + unowned string start = pos; + + // '%' + pos = pos.offset (1); + + if (pos[0] == '%') { + pos = pos.offset (1); + skip = 2; + return true; + } + + + // flags: + while ("#0+- ".index_of_char (pos[0]) > 0) { + pos = pos.offset (1); + } + + // min width: + while (pos[0].isdigit ()) { + pos = pos.offset (1); + } + + // precision + if (pos[0] == '.' && pos[1].isdigit ()) { + pos = pos.offset (2); + while (pos[0].isdigit ()) { + pos = pos.offset (1); + } + } + + // length: + switch (pos[0]) { + case 'h': + pos = pos.offset (1); + if (pos[0] == 'h') { + pos = pos.offset (1); + } + break; + + case 'l': + pos = pos.offset (1); + if (pos[0] == 'l') { + pos = pos.offset (1); + } + break; + + case 'j': + case 'z': + case 't': + case 'L': + pos = pos.offset (1); + break; + } + + // conversion specifier: + switch (pos[0]) { + case 'd': + case 'i': + case 'u': + case 'o': + case 'x': + case 'X': + case 'f': + case 'F': + case 'e': + case 'E': + case 'g': + case 'G': + case 'a': + case 'A': + case 'c': + case 's': + case 'p': + case 'n': + pos = pos.offset (1); + break; + + default: + skip = 0; + return false; + } + + skip = start.pointer_to_offset (pos); + return true; + } + + private enum NumericType { + INTEGER, + REAL, + NONE + } + + // based on libvala + private void queue_numeric_literal () { + NumericType numeric_type = NumericType.INTEGER; + unowned string start = pos; + + + // integer part + if (pos[0] == '0' && pos[1] == 'x' && pos[2].isxdigit ()) { + // hexadecimal integer literal + pos = pos.offset (2); + while (pos[0].isxdigit ()) { + pos = pos.offset (1); + } + } else { + // decimal number + while (pos[0].isdigit ()) { + pos = pos.offset (1); + } + } + + + // fractional part + if (pos[0] == '.' && pos[1].isdigit ()) { + numeric_type = NumericType.REAL; + pos = pos.offset (1); + while (pos[0].isdigit ()) { + pos = pos.offset (1); + } + } + + + // exponent part + if (pos[0] == 'e' || pos[0] == 'E') { + numeric_type = NumericType.REAL; + pos = pos.offset (1); + if (pos[0] == '+' || pos[0] == '-') { + pos = pos.offset (1); + } + while (pos[0].isdigit ()) { + pos = pos.offset (1); + } + } + + + // type suffix + switch (pos[0]) { + case 'l': + case 'L': + if (numeric_type == NumericType.INTEGER) { + pos = pos.offset (1); + if (pos[0] == 'l' || pos[0] == 'L') { + pos = pos.offset (1); + } + } + break; + + case 'u': + case 'U': + if (numeric_type == NumericType.INTEGER) { + pos = pos.offset (1); + if (pos[0] == 'l' || pos[0] == 'L') { + pos = pos.offset (1); + if (pos[0] == 'l' || pos[0] == 'L') { + pos = pos.offset (1); + } + } + } + break; + + case 'f': + case 'F': + case 'd': + case 'D': + numeric_type = NumericType.REAL; + pos = pos.offset (1); + break; + } + + if (pos[0].isalnum ()) { + numeric_type = NumericType.NONE; + } + + queue_token (start, pos, (numeric_type != NumericType.NONE) + ? CodeTokenType.LITERAL + : CodeTokenType.PLAIN); + } + + private CodeToken dispatch (string start, string end) { + assert (token_queue.is_empty () == false); + + if (((char*) start) == ((char*) end)) { + return token_queue.pop_head (); + } + + long length = start.pointer_to_offset (end); + string content = start.substring (0, length); + return new CodeToken (CodeTokenType.PLAIN, content); + } + + private void queue_token (string start, string end, CodeTokenType token_type) { + long length = start.pointer_to_offset (end); + string content = start.substring (0, length); + token_queue.push_tail (new CodeToken (token_type, content)); + } + + private inline bool isidchar (char c) { + return c.isalnum () || c == '_'; + } + + private inline bool isidstartchar (char c) { + return c.isalnum () || c == '_' || (c == '@' && enable_keyword_escape); + } +} + diff --git a/libvaladoc/highlighter/codetoken.vala b/libvaladoc/highlighter/codetoken.vala new file mode 100644 index 000000000..1a02195ea --- /dev/null +++ b/libvaladoc/highlighter/codetoken.vala @@ -0,0 +1,58 @@ +/* codetoken.vala + * + * Copyright (C) 2015 Florian Brosch + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Author: + * Florian Brosch <flo.brosch@gmail.com> + */ + + +public class Valadoc.Highlighter.CodeToken { + public CodeTokenType token_type { get; private set; } + public string content { get; private set;} + + public CodeToken (CodeTokenType type, string content) { + this.token_type = type; + this.content = content; + } +} + + +public enum Valadoc.Highlighter.CodeTokenType { + XML_ESCAPE, + XML_ELEMENT, + XML_ATTRIBUTE, + XML_ATTRIBUTE_VALUE, + XML_COMMENT, + XML_CDATA, + + PREPROCESSOR, + COMMENT, + KEYWORD, + LITERAL, + ESCAPE, + PLAIN, + TYPE, + EOF; + + public unowned string to_string () { + EnumClass enumc = (EnumClass) typeof (CodeTokenType).class_ref (); + unowned EnumValue? eval = enumc.get_value (this); + return_val_if_fail (eval != null, null); + return eval.value_nick; + } +} diff --git a/libvaladoc/highlighter/highlighter.vala b/libvaladoc/highlighter/highlighter.vala new file mode 100644 index 000000000..3c2578033 --- /dev/null +++ b/libvaladoc/highlighter/highlighter.vala @@ -0,0 +1,366 @@ +/* codehighlighter.vala + * + * Copyright (C) 2015 Florian Brosch + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Author: + * Florian Brosch <flo.brosch@gmail.com> + */ + +using GLib; +using Valadoc.Content; + + +public class Valadoc.Highlighter.Highlighter : Object { + private Gee.HashMap<string, CodeTokenType?> vala_keywords; + private Gee.HashMap<string, CodeTokenType?> c_keywords; + + + /** + * Used to highlight vala source code. + */ + public Run highlight_vala (string source_code) { + if (vala_keywords == null) { + vala_keywords = new Gee.HashMap<string, CodeTokenType?> (); + + // ** Types: ** + vala_keywords.set ("string", CodeTokenType.TYPE); + vala_keywords.set ("bool", CodeTokenType.TYPE); + vala_keywords.set ("void", CodeTokenType.TYPE); + + vala_keywords.set ("double", CodeTokenType.TYPE); + vala_keywords.set ("float", CodeTokenType.TYPE); + + vala_keywords.set ("char", CodeTokenType.TYPE); + vala_keywords.set ("uchar", CodeTokenType.TYPE); + vala_keywords.set ("unichar", CodeTokenType.TYPE); + + vala_keywords.set ("short", CodeTokenType.TYPE); + vala_keywords.set ("ushort", CodeTokenType.TYPE); + + vala_keywords.set ("long", CodeTokenType.TYPE); + vala_keywords.set ("ulong", CodeTokenType.TYPE); + + vala_keywords.set ("size_t", CodeTokenType.TYPE); + vala_keywords.set ("ssize_t", CodeTokenType.TYPE); + + vala_keywords.set ("int", CodeTokenType.TYPE); + vala_keywords.set ("int8", CodeTokenType.TYPE); + vala_keywords.set ("int16", CodeTokenType.TYPE); + vala_keywords.set ("int32", CodeTokenType.TYPE); + vala_keywords.set ("int64", CodeTokenType.TYPE); + + vala_keywords.set ("uint", CodeTokenType.TYPE); + vala_keywords.set ("uint8", CodeTokenType.TYPE); + vala_keywords.set ("uint16", CodeTokenType.TYPE); + vala_keywords.set ("uint32", CodeTokenType.TYPE); + vala_keywords.set ("uint64", CodeTokenType.TYPE); + + + // ** Literals: ** + vala_keywords.set ("null", CodeTokenType.LITERAL); + vala_keywords.set ("true", CodeTokenType.LITERAL); + vala_keywords.set ("false", CodeTokenType.LITERAL); + + + // ** Keywords: ** + vala_keywords.set ("return", CodeTokenType.KEYWORD); + vala_keywords.set ("lock", CodeTokenType.KEYWORD); + vala_keywords.set ("var", CodeTokenType.KEYWORD); + vala_keywords.set ("yield", CodeTokenType.KEYWORD); + vala_keywords.set ("global", CodeTokenType.KEYWORD); + vala_keywords.set ("construct", CodeTokenType.KEYWORD); + + vala_keywords.set ("value", CodeTokenType.KEYWORD); + vala_keywords.set ("get", CodeTokenType.KEYWORD); + vala_keywords.set ("set", CodeTokenType.KEYWORD); + + vala_keywords.set ("owned", CodeTokenType.KEYWORD); + vala_keywords.set ("unowned", CodeTokenType.KEYWORD); + vala_keywords.set ("const", CodeTokenType.KEYWORD); + vala_keywords.set ("weak", CodeTokenType.KEYWORD); + vala_keywords.set ("dynamic", CodeTokenType.KEYWORD); + + vala_keywords.set ("out", CodeTokenType.KEYWORD); + vala_keywords.set ("ref", CodeTokenType.KEYWORD); + + vala_keywords.set ("break", CodeTokenType.KEYWORD); + vala_keywords.set ("continue", CodeTokenType.KEYWORD); + vala_keywords.set ("return", CodeTokenType.KEYWORD); + + vala_keywords.set ("if", CodeTokenType.KEYWORD); + vala_keywords.set ("else", CodeTokenType.KEYWORD); + vala_keywords.set ("switch", CodeTokenType.KEYWORD); + vala_keywords.set ("case", CodeTokenType.KEYWORD); + vala_keywords.set ("default", CodeTokenType.KEYWORD); + + vala_keywords.set ("do", CodeTokenType.KEYWORD); + vala_keywords.set ("while", CodeTokenType.KEYWORD); + vala_keywords.set ("for", CodeTokenType.KEYWORD); + vala_keywords.set ("foreach", CodeTokenType.KEYWORD); + vala_keywords.set ("in", CodeTokenType.KEYWORD); + + vala_keywords.set ("try", CodeTokenType.KEYWORD); + vala_keywords.set ("catch", CodeTokenType.KEYWORD); + vala_keywords.set ("finally", CodeTokenType.KEYWORD); + vala_keywords.set ("throw", CodeTokenType.KEYWORD); + + vala_keywords.set ("class", CodeTokenType.KEYWORD); + vala_keywords.set ("interface", CodeTokenType.KEYWORD); + vala_keywords.set ("struct", CodeTokenType.KEYWORD); + vala_keywords.set ("enum", CodeTokenType.KEYWORD); + vala_keywords.set ("delegate", CodeTokenType.KEYWORD); + vala_keywords.set ("errordomain", CodeTokenType.KEYWORD); + + vala_keywords.set ("abstract", CodeTokenType.KEYWORD); + vala_keywords.set ("virtual", CodeTokenType.KEYWORD); + vala_keywords.set ("override", CodeTokenType.KEYWORD); + vala_keywords.set ("signal", CodeTokenType.KEYWORD); + vala_keywords.set ("extern", CodeTokenType.KEYWORD); + vala_keywords.set ("static", CodeTokenType.KEYWORD); + vala_keywords.set ("async", CodeTokenType.KEYWORD); + vala_keywords.set ("inline", CodeTokenType.KEYWORD); + vala_keywords.set ("new", CodeTokenType.KEYWORD); + + vala_keywords.set ("public", CodeTokenType.KEYWORD); + vala_keywords.set ("private", CodeTokenType.KEYWORD); + vala_keywords.set ("protected", CodeTokenType.KEYWORD); + vala_keywords.set ("internal", CodeTokenType.KEYWORD); + + vala_keywords.set ("throws", CodeTokenType.KEYWORD); + vala_keywords.set ("requires", CodeTokenType.KEYWORD); + vala_keywords.set ("ensures", CodeTokenType.KEYWORD); + vala_keywords.set ("assert", CodeTokenType.KEYWORD); + + vala_keywords.set ("namespace", CodeTokenType.KEYWORD); + vala_keywords.set ("using", CodeTokenType.KEYWORD); + + vala_keywords.set ("as", CodeTokenType.KEYWORD); + vala_keywords.set ("is", CodeTokenType.KEYWORD); + vala_keywords.set ("in", CodeTokenType.KEYWORD); + vala_keywords.set ("new", CodeTokenType.KEYWORD); + vala_keywords.set ("delete", CodeTokenType.KEYWORD); + vala_keywords.set ("sizeof", CodeTokenType.KEYWORD); + vala_keywords.set ("typeof", CodeTokenType.KEYWORD); + + vala_keywords.set ("this", CodeTokenType.KEYWORD); + vala_keywords.set ("base", CodeTokenType.KEYWORD); + } + + bool enable_string_templates = true; + bool enable_preprocessor_define = false; + bool enable_preprocessor_include = false; + bool enable_keyword_escape = true; + bool enabel_verbatim_string = true; + + CodeScanner scanner = new CodeScanner (source_code, enable_string_templates, enabel_verbatim_string, + enable_preprocessor_define, enable_preprocessor_include, enable_keyword_escape, + vala_keywords); + + return highlight_code (scanner); + } + + /** + * Used to highlight C source code. + */ + public Run highlight_c (string source_code) { + if (c_keywords == null) { + c_keywords = new Gee.HashMap<string, CodeTokenType?> (); + + // ** Types: ** + c_keywords.set ("auto", CodeTokenType.TYPE); + c_keywords.set ("char", CodeTokenType.TYPE); + c_keywords.set ("const", CodeTokenType.TYPE); + c_keywords.set ("double", CodeTokenType.TYPE); + c_keywords.set ("extern", CodeTokenType.TYPE); + c_keywords.set ("int", CodeTokenType.TYPE); + c_keywords.set ("float", CodeTokenType.TYPE); + c_keywords.set ("long", CodeTokenType.TYPE); + c_keywords.set ("register", CodeTokenType.TYPE); + c_keywords.set ("short", CodeTokenType.TYPE); + c_keywords.set ("signed", CodeTokenType.TYPE); + c_keywords.set ("static", CodeTokenType.TYPE); + c_keywords.set ("unsigned", CodeTokenType.TYPE); + c_keywords.set ("void", CodeTokenType.TYPE); + c_keywords.set ("volatile", CodeTokenType.TYPE); + + c_keywords.set ("gboolean", CodeTokenType.TYPE); + c_keywords.set ("gpointer", CodeTokenType.TYPE); + c_keywords.set ("gconstpointer", CodeTokenType.TYPE); + c_keywords.set ("gchar", CodeTokenType.TYPE); + c_keywords.set ("guchar", CodeTokenType.TYPE); + c_keywords.set ("gint", CodeTokenType.TYPE); + c_keywords.set ("guint", CodeTokenType.TYPE); + c_keywords.set ("gshort", CodeTokenType.TYPE); + c_keywords.set ("gushort", CodeTokenType.TYPE); + c_keywords.set ("glong", CodeTokenType.TYPE); + c_keywords.set ("gulong", CodeTokenType.TYPE); + c_keywords.set ("gint8", CodeTokenType.TYPE); + c_keywords.set ("guint8", CodeTokenType.TYPE); + c_keywords.set ("gint16", CodeTokenType.TYPE); + c_keywords.set ("guint16", CodeTokenType.TYPE); + c_keywords.set ("gint32", CodeTokenType.TYPE); + c_keywords.set ("guint32", CodeTokenType.TYPE); + c_keywords.set ("gint64", CodeTokenType.TYPE); + c_keywords.set ("guint64", CodeTokenType.TYPE); + c_keywords.set ("gfloat", CodeTokenType.TYPE); + c_keywords.set ("gdouble", CodeTokenType.TYPE); + c_keywords.set ("gsize", CodeTokenType.TYPE); + c_keywords.set ("gssize", CodeTokenType.TYPE); + c_keywords.set ("goffset", CodeTokenType.TYPE); + c_keywords.set ("gintptr", CodeTokenType.TYPE); + c_keywords.set ("guintptr", CodeTokenType.TYPE); + + + // ** Literals: ** + c_keywords.set ("NULL", CodeTokenType.LITERAL); + c_keywords.set ("TRUE", CodeTokenType.LITERAL); + c_keywords.set ("FALSE", CodeTokenType.LITERAL); + + + // ** Keywords: ** + c_keywords.set ("break", CodeTokenType.KEYWORD); + c_keywords.set ("case", CodeTokenType.KEYWORD); + c_keywords.set ("continue", CodeTokenType.KEYWORD); + c_keywords.set ("default", CodeTokenType.KEYWORD); + c_keywords.set ("do", CodeTokenType.KEYWORD); + c_keywords.set ("else", CodeTokenType.KEYWORD); + c_keywords.set ("enum", CodeTokenType.KEYWORD); + c_keywords.set ("for", CodeTokenType.KEYWORD); + c_keywords.set ("goto", CodeTokenType.KEYWORD); + c_keywords.set ("if", CodeTokenType.KEYWORD); + c_keywords.set ("return", CodeTokenType.KEYWORD); + c_keywords.set ("sizeof", CodeTokenType.KEYWORD); + c_keywords.set ("struct", CodeTokenType.KEYWORD); + c_keywords.set ("switch", CodeTokenType.KEYWORD); + c_keywords.set ("typedef", CodeTokenType.KEYWORD); + c_keywords.set ("union", CodeTokenType.KEYWORD); + c_keywords.set ("while", CodeTokenType.KEYWORD); + c_keywords.set ("assert", CodeTokenType.KEYWORD); + } + + bool enable_string_templates = false; + bool enable_preprocessor_define = true; + bool enable_preprocessor_include = true; + bool enable_keyword_escape = false; + bool enabel_verbatim_string = false; + + CodeScanner scanner = new CodeScanner (source_code, enable_string_templates, enabel_verbatim_string, + enable_preprocessor_define, enable_preprocessor_include, enable_keyword_escape, + c_keywords); + + return highlight_code (scanner); + } + + /** + * Used to highlight C source code. + */ + public Run highlight_xml (string source_code) { + XmlScanner scanner = new XmlScanner (source_code); + return highlight_code (scanner); + } + + /** + * Used to highlight source code. + */ + private Run highlight_code (Scanner scanner) { + Run code = new Run (Run.Style.MONOSPACED); + + for (CodeToken token = scanner.next (); token.token_type != CodeTokenType.EOF; token = scanner.next ()) { + switch (token.token_type) { + case CodeTokenType.PREPROCESSOR: + Run run = new Run (Run.Style.LANG_PREPROCESSOR); + run.content.add (new Text (token.content)); + code.content.add (run); + break; + + case CodeTokenType.COMMENT: + Run run = new Run (Run.Style.LANG_COMMENT); + run.content.add (new Text (token.content)); + code.content.add (run); + break; + + case CodeTokenType.KEYWORD: + Run run = new Run (Run.Style.LANG_KEYWORD); + run.content.add (new Text (token.content)); + code.content.add (run); + break; + + case CodeTokenType.LITERAL: + Run run = new Run (Run.Style.LANG_LITERAL); + run.content.add (new Text (token.content)); + code.content.add (run); + break; + + case CodeTokenType.TYPE: + Run run = new Run (Run.Style.LANG_BASIC_TYPE); + run.content.add (new Text (token.content)); + code.content.add (run); + break; + + case CodeTokenType.ESCAPE: + Run run = new Run (Run.Style.LANG_ESCAPE); + run.content.add (new Text (token.content)); + code.content.add (run); + break; + + case CodeTokenType.XML_ESCAPE: + Run run = new Run (Run.Style.XML_ESCAPE); + run.content.add (new Text (token.content)); + code.content.add (run); + break; + + case CodeTokenType.XML_ELEMENT: + Run run = new Run (Run.Style.XML_ELEMENT); + run.content.add (new Text (token.content)); + code.content.add (run); + break; + + case CodeTokenType.XML_ATTRIBUTE: + Run run = new Run (Run.Style.XML_ATTRIBUTE); + run.content.add (new Text (token.content)); + code.content.add (run); + break; + + case CodeTokenType.XML_ATTRIBUTE_VALUE: + Run run = new Run (Run.Style.XML_ATTRIBUTE_VALUE); + run.content.add (new Text (token.content)); + code.content.add (run); + break; + + case CodeTokenType.XML_COMMENT: + Run run = new Run (Run.Style.XML_COMMENT); + run.content.add (new Text (token.content)); + code.content.add (run); + break; + + case CodeTokenType.XML_CDATA: + Run run = new Run (Run.Style.XML_CDATA); + run.content.add (new Text (token.content)); + code.content.add (run); + break; + + default: + code.content.add (new Text (token.content)); + break; + } + } + + return code; + } +} + + diff --git a/libvaladoc/highlighter/scanner.vala b/libvaladoc/highlighter/scanner.vala new file mode 100644 index 000000000..20eedcfb2 --- /dev/null +++ b/libvaladoc/highlighter/scanner.vala @@ -0,0 +1,32 @@ +/* scanner.vala + * + * Copyright (C) 2015 Florian Brosch + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Author: + * Florian Brosch <flo.brosch@gmail.com> + */ + +using GLib; + + +/** + * Scanner interface used to highlight source code. + */ +public interface Valadoc.Highlighter.Scanner : Object { + + public abstract CodeToken next (); +} diff --git a/libvaladoc/highlighter/xmlscanner.vala b/libvaladoc/highlighter/xmlscanner.vala new file mode 100644 index 000000000..38b87c5ee --- /dev/null +++ b/libvaladoc/highlighter/xmlscanner.vala @@ -0,0 +1,374 @@ +/* xmlscanner.vala + * + * Copyright (C) 2015 Florian Brosch + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Author: + * Florian Brosch <flo.brosch@gmail.com> + */ + +using GLib; + + +/** + * A cheap scanner used to highlight XML. + */ +public class Valadoc.Highlighter.XmlScanner : Object, Scanner { + private Queue<CodeToken> token_queue = new Queue<CodeToken> (); + private unowned string content; + private unowned string pos; + + + public XmlScanner (string content) { + this.content = content; + this.pos = content; + } + + public CodeToken next () { + if (!token_queue.is_empty ()) { + return token_queue.pop_head (); + } + + unowned string start; + + for (start = pos; pos[0] != '\0'; pos = pos.next_char ()) { + if (pos[0] == '&') { + unowned string begin = pos; + if (queue_escape ()) { + return dispatch (start, begin); + } + } else if (pos[0] == '<') { + if (pos[1] == '/') { + unowned string end = pos; + if (queue_end_element ()) { + return dispatch (start, end); + } + } else if (pos[1] == '!' && pos[2] == '-' && pos[3] == '-') { + unowned string end = pos; + if (queue_comment ()) { + return dispatch (start, end); + } + } else if (pos[1] == '!' && pos[2] == '[' && pos[3] == 'C' && pos[4] == 'D' && pos[5] == 'A' && pos[6] == 'T' && pos[7] == 'A' && pos[8] == '[') { + unowned string end = pos; + pos = pos.offset (9); + token_queue.push_tail (new CodeToken (CodeTokenType.XML_CDATA, "<![CDATA[")); + return dispatch (start, end); + } else { + unowned string end = pos; + if (queue_start_element (start, pos[1] == '?')) { + return dispatch (start, end); + } else { + continue; + } + } + } else if (pos[0] == ']' && pos[1] == ']' && pos[2] == '>') { + unowned string end = pos; + pos = pos.offset (3); + token_queue.push_tail (new CodeToken (CodeTokenType.XML_CDATA, "]]>")); + return dispatch (start, end); + } + } + + token_queue.push_tail (new CodeToken (CodeTokenType.EOF, "")); + return dispatch (start, pos); + } + + private bool queue_start_element (string dispatch_start, bool xml_decl) { + assert (token_queue.is_empty ()); + + unowned string element_start = pos; + if (xml_decl) { + pos = pos.offset (2); + } else { + pos = pos.offset (1); + } + + skip_optional_spaces (ref pos); + + if (skip_id (ref pos) == false) { + token_queue.clear (); + pos = element_start; + return false; + } + + skip_optional_spaces (ref pos); + + queue_token (element_start, pos, CodeTokenType.XML_ELEMENT); + + if (queue_attributes () == false) { + token_queue.clear (); + pos = element_start; + return false; + } + + unowned string element_end_start = pos; + + if (!xml_decl && pos[0] == '>') { + pos = pos.offset (1); + } else if (!xml_decl && pos[0] == '/' && pos[1] == '>') { + pos = pos.offset (2); + } else if (xml_decl && pos[0] == '?' && pos[1] == '>') { + pos = pos.offset (2); + } else { + token_queue.clear (); + pos = element_start; + return false; + } + + queue_token (element_end_start, pos, CodeTokenType.XML_ELEMENT); + return true; + } + + private bool queue_attributes () { + while (is_id_char (pos[0])) { + unowned string begin = pos; + + if (skip_id (ref pos) == false) { + return false; + } + + skip_optional_spaces (ref pos); + + if (pos[0] == '=') { + pos = pos.offset (1); + } else { + return false; + } + + skip_optional_spaces (ref pos); + + queue_token (begin, pos, CodeTokenType.XML_ATTRIBUTE); + begin = pos; + + if (pos[0] == '"') { + pos = pos.offset (1); + } else { + return false; + } + + while (pos[0] != '"' && pos[0] != '\0') { + pos = pos.offset (1); + } + + if (pos[0] == '"') { + pos = pos.offset (1); + } else { + return false; + } + + skip_optional_spaces (ref pos); + + queue_token (begin, pos, CodeTokenType.XML_ATTRIBUTE_VALUE); + } + + return true; + } + + private bool queue_end_element () { + unowned string start = pos; + pos = pos.offset (2); + + skip_optional_spaces (ref pos); + + if (skip_id (ref pos) == false) { + pos = start; + return false; + } + + skip_optional_spaces (ref pos); + + if (pos[0] == '>') { + pos = pos.offset (1); + } else { + pos = start; + return false; + } + + queue_token (start, pos, CodeTokenType.XML_ELEMENT); + return true; + } + + private bool queue_escape () { + unowned string start = pos; + pos = pos.offset (1); + + if (skip_id (ref pos) == false) { + pos = start; + return false; + } + + if (pos[0] == ';') { + pos = pos.offset (1); + } else { + pos = start; + return false; + } + + queue_token (start, pos, CodeTokenType.XML_ESCAPE); + return true; + } + + private bool queue_comment () { + unowned string start = pos; + pos = pos.offset (4); + + while (pos[0] != '>' && pos[0] != '\0') { + pos = pos.offset (1); + } + + if (pos[0] == '>') { + pos = pos.offset (1); + } else { + pos = start; + return false; + } + + queue_token (start, pos, CodeTokenType.XML_COMMENT); + return true; + } + + private static bool skip_id (ref unowned string pos) { + bool has_next_segment = true; + bool has_id = false; + + while (has_next_segment) { + has_id = false; + + while (is_id_char (pos[0])) { + pos = pos.offset (1); + has_id = true; + } + + if (pos[0] == ':' && has_id) { + has_next_segment = true; + pos = pos.offset (1); + } else { + has_next_segment = false; + } + } + + return has_id; + } + + private static bool skip_optional_spaces (ref unowned string pos) { + bool skipped = false; + + while (pos[0].isspace ()) { + pos = pos.offset (1); + skipped = true; + } + + return skipped; + } + + private CodeToken dispatch (string start, string end) { + assert (token_queue.is_empty () == false); + + if (((char*) start) == ((char*) end)) { + return token_queue.pop_head (); + } + + long length = start.pointer_to_offset (end); + string content = start.substring (0, length); + return new CodeToken (CodeTokenType.PLAIN, content); + } + + private void queue_token (string start, string end, CodeTokenType token_type) { + long length = start.pointer_to_offset (end); + string content = start.substring (0, length); + token_queue.push_tail (new CodeToken (token_type, content)); + } + + private static inline bool is_id_char (char c) { + return c.isalnum () || c == '_' || c == '-'; + } + + internal static bool is_xml (string source) { + unowned string pos = source; + + skip_optional_spaces (ref pos); + + if (pos[0] == '<') { + // Comment: + if (pos.has_prefix ("<!--")) { + return true; + } + + // CDATA: + if (pos.has_prefix ("<![CDATA[")) { + return true; + } + + + // Start Tag: + bool proc_instr = false; + pos = pos.offset (1); + + if (pos[0] == '?') { + pos = pos.offset (1); + proc_instr = true; + } + + // ID: + if (skip_id (ref pos) == false) { + return false; + } + + skip_optional_spaces (ref pos); + + while (skip_id (ref pos)) { + if (pos[0] == '=') { + pos = pos.offset (1); + } else { + return false; + } + + skip_optional_spaces (ref pos); + + if (pos[0] == '"') { + pos = pos.offset (1); + } else { + return false; + } + + while (pos[0] != '\0' && pos[0] != '\n' && pos[0] != '"') { + pos = pos.offset (1); + } + + if (pos[0] == '"') { + pos = pos.offset (1); + } else { + return false; + } + + skip_optional_spaces (ref pos); + } + + if (proc_instr && pos[0] == '?' && pos[1] == '>') { + return true; + } + + if (!proc_instr && (pos[0] == '>' || (pos[0] == '/' && pos[1] == '>'))) { + return true; + } + + return false; + } else { + return false; + } + } +} + |