1 files changed, 717 insertions, 0 deletions
diff --git a/gnu/javax/swing/text/html/css/CSSScanner.java b/gnu/javax/swing/text/html/css/CSSScanner.java
new file mode 100644
index 000000000..a402b9522
--- /dev/null
+++ b/gnu/javax/swing/text/html/css/CSSScanner.java
@@ -0,0 +1,717 @@
+/* CSSScanner.java -- A parser for CSS stylesheets
+   Copyright (C) 2006 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package gnu.javax.swing.text.html.css;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+
+/**
+ * A tokenizer for CSS stylesheets. This is based on the scanner definition
+ * from:
+ *
+ * http://www.w3.org/TR/CSS21/syndata.html#tokenization
+ *
+ * @author Roman Kennke (kennke@aicas.com)
+ */
+// TODO: Maybe implement more restrictive scanner:
+// http://www.w3.org/TR/CSS21/grammar.html#q2
+class CSSScanner
+{
+
+  // The tokens. This list is taken from:
+  // http://www.w3.org/TR/CSS21/syndata.html#tokenization
+  static final int IDENT = 1;
+  static final int ATKEYWORD = 2;
+  static final int STRING = 3;
+  static final int INVALID = 4;
+  static final int HASH = 5;
+  static final int NUMBER = 6;
+  static final int PERCENTAGE = 7;
+  static final int DIMENSION = 8;
+  static final int URI = 9;
+  static final int UNICODE_RANGE = 10;
+  static final int CDO = 11;
+  static final int CDC = 12;
+  static final int SEMICOLON = 13;
+  static final int CURLY_LEFT = 14;
+  static final int CURLY_RIGHT = 15;
+  static final int PAREN_LEFT = 16;
+  static final int PAREN_RIGHT = 17;
+  static final int BRACE_LEFT = 16;
+  static final int BRACE_RIGHT = 17;
+  static final int S = 18;
+  static final int COMMENT = 19;
+  static final int FUNCTION = 20;
+  static final int INCLUDES = 21;
+  static final int DASHMATCH = 22;
+  static final int DELIM = 23;
+
+  // Additional tokens defined for convenience.
+  static final int EOF = -1;
+
+  /**
+   * The input source.
+   */
+  private Reader in;
+
+  /**
+   * The parse buffer.
+   */
+  char[] parseBuffer;
+
+  /**
+   * The end index in the parseBuffer of the current token.
+   */
+  int tokenEnd;
+
+  /**
+   * The lookahead 'buffer'.
+   */
+  private int[] lookahead;
+
+  CSSScanner(Reader r)
+  {
+    lookahead = new int[2];
+    lookahead[0] = -1;
+    lookahead[1] = -1;
+    parseBuffer = new char[2048];
+    in = r;
+  }
+
+  /**
+   * Fetches the next token. The actual character data is in the parseBuffer
+   * afterwards with the tokenStart at index 0 and the tokenEnd field
+   * pointing to the end of the token.
+   *
+   * @return the next token
+   */
+  int nextToken()
+    throws IOException
+  {
+    tokenEnd = 0;
+    int token = -1;
+    int next = read();
+    if (next != -1)
+      {
+        switch (next)
+        {
+          case ';':
+            parseBuffer[0] = (char) next;
+            tokenEnd = 1;
+            token = SEMICOLON;
+            break;
+          case '{':
+            parseBuffer[0] = (char) next;
+            tokenEnd = 1;
+            token = CURLY_LEFT;
+            break;
+          case '}':
+            parseBuffer[0] = (char) next;
+            tokenEnd = 1;
+            token = CURLY_RIGHT;
+            break;
+          case '(':
+            parseBuffer[0] = (char) next;
+            tokenEnd = 1;
+            token = PAREN_LEFT;
+            break;
+          case ')':
+            parseBuffer[0] = (char) next;
+            tokenEnd = 1;
+            token = PAREN_RIGHT;
+            break;
+          case '[':
+            parseBuffer[0] = (char) next;
+            tokenEnd = 1;
+            token = BRACE_LEFT;
+            break;
+          case ']':
+            parseBuffer[0] = (char) next;
+            tokenEnd = 1;
+            token = BRACE_RIGHT;
+            break;
+          case '@':
+            parseBuffer[0] = (char) next;
+            tokenEnd = 1;
+            readIdent();
+            token = ATKEYWORD;
+            break;
+          case '#':
+            parseBuffer[0] = (char) next;
+            tokenEnd = 1;
+            readName();
+            token = HASH;
+            break;
+          case '\'':
+          case '"':
+            lookahead[0] = next;
+            readString();
+            token = STRING;
+            break;
+          case ' ':
+          case '\t':
+          case '\r':
+          case '\n':
+          case '\f':
+            lookahead[0] = next;
+            readWhitespace();
+            token = S;
+            break;
+            // FIXME: Detecting an URI involves several characters lookahead.
+//          case 'u':
+//            lookahead[0] = ch;
+//            readURI();
+//            token = URI;
+//            break;
+          case '<':
+            parseBuffer[0] = (char) next;
+            parseBuffer[1] = (char) read();
+            parseBuffer[2] = (char) read();
+            parseBuffer[3] = (char) read();
+            if (parseBuffer[1] == '!' && parseBuffer[2] == '-'
+              && parseBuffer[3] == '-')
+              {
+                token = CDO;
+                tokenEnd = 4;
+              }
+            else
+              throw new CSSLexicalException("expected CDO token");
+            break;
+          case '/':
+            lookahead[0] = next;
+            readComment();
+            token = COMMENT;
+            break;
+          case '~':
+            parseBuffer[0] = (char) next;
+            parseBuffer[1] = (char) read();
+            if (parseBuffer[1] == '=')
+              token = INCLUDES;
+            else
+              throw new CSSLexicalException("expected INCLUDES token");
+            break;
+          case '|':
+            parseBuffer[0] = (char) next;
+            parseBuffer[1] = (char) read();
+            if (parseBuffer[1] == '=')
+              token = DASHMATCH;
+            else
+              throw new CSSLexicalException("expected DASHMATCH token");
+            break;
+          case '-':
+            int ch2 = read();
+            if (ch2 == '-')
+              {
+                int ch3 = read();
+                if (ch3 == '>')
+                  {
+                    parseBuffer[0] = (char) next;
+                    parseBuffer[1] = (char) ch2;
+                    parseBuffer[2] = (char) ch3;
+                    tokenEnd = 3;
+                    token = CDC;
+                  }
+                else
+                  throw new CSSLexicalException("expected CDC token");
+              }
+            else
+              {
+                lookahead[0] = next;
+                lookahead[1] = ch2;
+                readIdent();
+                int ch3 = read();
+                if (ch3 == -1 || ch3 != '(')
+                  {
+                    lookahead[0] = ch3;
+                    token = IDENT;
+                  }
+                else
+                  {
+                    parseBuffer[tokenEnd] = (char) ch3;
+                    tokenEnd++;
+                    token = FUNCTION;
+                  }
+              }
+            break;
+          case '0':
+          case '1':
+          case '2':
+          case '3':
+          case '4':
+          case '5':
+          case '6':
+          case '7':
+          case '8':
+          case '9':
+            lookahead[0] = next;
+            readNum();
+            int ch3 = read();
+            if (ch3 == '%')
+              {
+                parseBuffer[tokenEnd] = (char) ch3;
+                tokenEnd++;
+                token = PERCENTAGE;
+              }
+            else if (ch3 == -1 || (! (ch3 == '_'
+                                      || (ch3 >= 'a' && ch3 <= 'z')
+                                      || (ch3 >= 'A' && ch3 <= 'Z')
+                                      || ch3 == '\\' || ch3 > 177)))
+              {
+                lookahead[0] = ch3;
+                token = NUMBER;
+              }
+            else
+              {
+                lookahead[0] = ch3;
+                readIdent();
+                token = DIMENSION;
+              }
+            break;
+          default:
+            // Handle IDENT that don't begin with '-'.
+            if (next == '_' || (next >= 'a' && next <= 'z')
+                || (next >= 'A' && next <= 'Z') || next == '\\' || next > 177)
+              {
+                lookahead[0] = next;
+                readIdent();
+                int ch4 = read();
+                if (ch4 == -1 || ch4 != '(')
+                  {
+                    lookahead[0] = ch4;
+                    token = IDENT;
+                  }
+                else
+                  {
+                    parseBuffer[tokenEnd] = (char) ch4;
+                    tokenEnd++;
+                    token = FUNCTION;
+                  }
+              }
+            else
+              {
+                parseBuffer[0] = (char) next;
+                tokenEnd = 1;
+                token = DELIM;
+              }
+          break;
+        }
+      }
+    return token;
+  }
+
+  String currentTokenString()
+  {
+    return new String(parseBuffer, 0, tokenEnd);
+  }
+
+  /**
+   * Reads one character from the input stream or from the lookahead
+   * buffer, if it contains one character.
+   *
+   * @return the next character
+   *
+   * @throws IOException if problems occur on the input source
+   */
+  private int read()
+    throws IOException
+  {
+    int ret;
+    if (lookahead[0] != -1)
+      {
+        ret = lookahead[0];
+        lookahead[0] = -1;
+      }
+    else if (lookahead[1] != -1)
+      {
+        ret = lookahead[1];
+        lookahead[1] = -1;
+      }
+    else
+      {
+        ret = in.read();
+      }
+    return ret;
+  }
+
+  /**
+   * Reads and identifier.
+   *
+   * @throws IOException if something goes wrong in the input source or if
+   *         the lexical analyser fails to read an identifier
+   */
+  private void readIdent()
+    throws IOException
+  {
+    int ch1 = read();
+    // Read possibly leading '-'.
+    if (ch1 == '-')
+      {
+        parseBuffer[tokenEnd] = (char) ch1;
+        tokenEnd++;
+        ch1 = read();
+      }
+    // What follows must be '_' or a-z or A-Z or nonascii (>177) or an
+    // escape.
+    if (ch1 == '_' || (ch1 >= 'a' && ch1 <= 'z')
+        || (ch1 >= 'A' && ch1 <= 'Z') || ch1 > 177)
+      {
+        parseBuffer[tokenEnd] = (char) ch1;
+        tokenEnd++;
+      }
+    else if (ch1 == '\\')
+      {
+        // Try to read an escape.
+        lookahead[0] = ch1;
+        readEscape();
+      }
+    else
+      throw new CSSLexicalException("First character of identifier incorrect");
+
+    // Read any number of [_a-zA-Z0-9-] chars.
+    int ch = read();
+    while (ch != -1 && (ch == '_' || ch == '-' || (ch >= 'a' && ch <= 'z')
+           || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9')))
+      {
+        parseBuffer[tokenEnd] = (char) ch;
+        tokenEnd++;
+        ch = read();
+      }
+
+    // Push back last read character since it doesn't belong to the IDENT.
+    lookahead[0] = ch;
+  }
+
+  /**
+   * Reads an escape.
+   *
+   * @throws IOException if something goes wrong in the input source or if
+   *         the lexical analyser fails to read an escape
+   */
+  private void readEscape()
+    throws IOException
+  {
+    int ch = read();
+    if (ch != -1 && ch == '\\')
+      {
+        parseBuffer[tokenEnd] = (char) ch;
+        tokenEnd++;
+        ch = read();
+        if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f'))
+          {
+            // Read unicode escape.
+            // Zero to five 0-9a-f chars can follow.
+            int hexcount = 0;
+            ch = read();
+            while (((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f'))
+                   && hexcount < 5)
+              {
+                parseBuffer[tokenEnd] = (char) ch;
+                tokenEnd++;
+                hexcount++;
+                ch = read();
+              }
+            // Now we can have a \r\n or any whitespace character following.
+            if (ch == '\r')
+              {
+                parseBuffer[tokenEnd] = (char) ch;
+                tokenEnd++;
+                ch = read();
+                if (ch == '\n')
+                  {
+                    parseBuffer[tokenEnd] = (char) ch;
+                    tokenEnd++;
+                  }
+                else
+                  {
+                    lookahead[0] = ch;
+                  }
+              }
+            else if (ch == ' ' || ch == '\n' || ch == '\f' || ch == '\t')
+              {
+                parseBuffer[tokenEnd] = (char) ch;
+                tokenEnd++;
+              }
+            else
+              {
+                lookahead[0] = ch;
+              }
+          }
+        else if (ch != '\n' && ch != '\r' && ch != '\f')
+          {
+            parseBuffer[tokenEnd] = (char) ch;
+            tokenEnd++;
+          }
+        else
+          throw new CSSLexicalException("Can't read escape");
+      }
+    else
+      throw new CSSLexicalException("Escape must start with '\\'");
+    
+  }
+
+  private void readName()
+    throws IOException
+  {
+    // Read first name character.
+    int ch = read();
+    if (ch != -1 && (ch == '_' || ch == '-' || (ch >= 'a' && ch <= 'z')
+           || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9')))
+      {
+        parseBuffer[tokenEnd] = (char) ch;
+        tokenEnd++;
+      }
+    else
+      throw new CSSLexicalException("Invalid name");
+
+    // Read any number (at least one) of [_a-zA-Z0-9-] chars.
+    ch = read();
+    while (ch != -1 && (ch == '_' || ch == '-' || (ch >= 'a' && ch <= 'z')
+           || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9')))
+      {
+        parseBuffer[tokenEnd] = (char) ch;
+        tokenEnd++;
+      }
+
+    // Push back last read character since it doesn't belong to the IDENT.
+    lookahead[0] = ch;
+  }
+
+  /**
+   * Reads in a string.
+   *
+   * @throws IOException
+   */
+  private void readString()
+    throws IOException
+  {
+    int ch1 = read();
+    if (ch1 != -1 && (ch1 == '\'' || ch1 == '\"'))
+      {
+        parseBuffer[tokenEnd] = (char) ch1;
+        tokenEnd++;
+
+        // Read any number of chars until we hit another chc1 char.
+        // Reject newlines, except if prefixed with \.
+        int ch = read();
+        while (ch != -1 && ch != ch1)
+          {
+            // Every non-newline and non-\ char should be ok.
+            if (ch != '\n' && ch != '\r' && ch != '\f' && ch != '\\')
+              {
+                parseBuffer[tokenEnd] = (char) ch;
+                tokenEnd++;
+              }
+            // Ok when followed by newline or as part of escape.
+            else if (ch == '\\')
+              {
+                int ch2 = read();
+                if (ch2 == '\n' || ch2 == '\r')
+                  {
+                    parseBuffer[tokenEnd] = (char) ch;
+                    parseBuffer[tokenEnd + 1] = (char) ch2;
+                    tokenEnd += 2;
+                  }
+                else
+                  {
+                    // Try to parse an escape.
+                    lookahead[0] = ch;
+                    lookahead[1] = ch2;
+                    readEscape();
+                  }
+              }
+            else
+              throw new CSSLexicalException("Invalid string");
+
+            ch = read();
+          }
+        if (ch != -1)
+          {
+            // Push the final char on the buffer.
+            parseBuffer[tokenEnd] = (char) ch;
+            tokenEnd++;
+          }
+        else
+          throw new CSSLexicalException("Unterminated string");
+      }
+    else
+      throw new CSSLexicalException("Invalid string");
+  }
+
+  /**
+   * Reads a chunk of whitespace.
+   *
+   * @throws IOException
+   */
+  private void readWhitespace()
+    throws IOException
+  {
+    int ch = read();
+    while (ch != -1 && (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n'
+           || ch == '\f'))
+      {
+        parseBuffer[tokenEnd] = (char) ch;
+        tokenEnd++;
+        ch = read();
+      }
+    // Push back last character read.
+    lookahead[0] = ch;
+    
+  }
+
+  private void readURI()
+    throws IOException
+  {
+    // FIXME: Implement.
+  }
+
+  /**
+   * Reads a comment block.
+   *
+   * @throws IOException
+   */
+  private void readComment()
+    throws IOException
+  {
+    // First we need a / and a *
+    int ch = read();
+    if (ch != -1 && ch == '/')
+      {
+        parseBuffer[tokenEnd] = (char) ch;
+        tokenEnd++;
+        ch = read();
+        if (ch != -1 && ch == '*')
+          {
+            parseBuffer[tokenEnd] = (char) ch;
+            tokenEnd++;
+            ch = read();
+            parseBuffer[tokenEnd] = (char) ch;
+            tokenEnd++;
+            boolean finished = false;
+            int lastChar = ch;
+            ch = read();
+            while (! finished && ch != -1)
+              {
+                if (lastChar == '*' && ch == '/')
+                  finished = true;
+                parseBuffer[tokenEnd] = (char) ch;
+                tokenEnd++;
+                lastChar = ch;
+                ch = read();
+              }
+          }
+      }
+    if (ch == -1)
+      throw new CSSLexicalException("Unterminated comment");
+    
+    // Push back last character read.
+    lookahead[0] = ch;
+  }
+
+  /**
+   * Reads a number.
+   *
+   * @throws IOException
+   */
+  private void readNum()
+    throws IOException
+  {
+    boolean hadDot = false;
+    // First char must be number or .
+    int ch = read();
+    if (ch != -1 && ((ch >= '0' && ch <= '9') || ch == '.'))
+      {
+        if (ch == '.')
+          hadDot = true;
+        parseBuffer[tokenEnd] = (char) ch;
+        tokenEnd++;
+        // Now read in any number of digits afterwards, and maybe one dot,
+        // if we hadn't one already.
+        ch = read();
+        while (ch != -1 && ((ch >= '0' && ch <= '9')
+                            || (ch == '.' && ! hadDot)))
+          {
+            if (ch == '.')
+              hadDot = true;
+            parseBuffer[tokenEnd] = (char) ch;
+            tokenEnd++;
+            ch = read();
+          }                            
+      }
+    else
+      throw new CSSLexicalException("Invalid number");
+
+    // Check if we haven't accidentally finished with a dot.
+    if (parseBuffer[tokenEnd - 1] == '.')
+      throw new CSSLexicalException("Invalid number");
+
+    // Push back last character read.
+    lookahead[0] = ch;
+  }
+
+  /**
+   * For testing, we read in the default.css in javax/swing/text/html
+   *
+   * @param args
+   */
+  public static void main(String[] args)
+  {
+    try
+      {
+        String name = "/javax/swing/text/html/default.css";
+        InputStream in = CSSScanner.class.getResourceAsStream(name);
+        BufferedInputStream bin = new BufferedInputStream(in);
+        InputStreamReader r = new InputStreamReader(bin);
+        CSSScanner s = new CSSScanner(r);
+        int token;
+        do
+          {
+            token = s.nextToken();
+            System.out.println("token: " + token + ": "
+                               + s.currentTokenString());
+          } while (token != -1);
+      }
+    catch (IOException ex)
+      {
+        ex.printStackTrace();
+      }
+  }
+}