big tree layout cleanup.

Dodji.
author: Dodji Seketeli <dodji@src.gnome.org> 2003-04-12 16:50:32 +0000
committer: Dodji Seketeli <dodji@src.gnome.org> 2003-04-12 16:50:32 +0000
commit: 17114030e0e37f93682c903dd818da1f0e2e6f6b (patch)
tree: f52286082af39c649dfbc86015ab080ac2a55abd /src/cr-utils.c
parent: 4f5560ef67d35121d1087aee9b5e34dece012d8a (diff)
download: libcroco-17114030e0e37f93682c903dd818da1f0e2e6f6b.tar.gz
1 files changed, 0 insertions, 1449 deletions
diff --git a/src/cr-utils.c b/src/cr-utils.c
deleted file mode 100644
index 78bbe50..0000000
--- a/src/cr-utils.c
+++ /dev/null
@@ -1,1449 +0,0 @@
-/* -*- Mode: C; indent-tabs-mode: nil; c-basic-offset: 8 -*- */
-
-/*
- * This file is part of The Croco Library
- *
- * Copyright (C) 2002-2003 Dodji Seketeli <dodji@seketeli.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2.1 of the GNU Lesser General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
- * USA
- */
-
-/*
- *$Id$
- */
-
-#include "cr-utils.h"
-
-/**
- *@file:
- *Some misc utility functions used
- *in the libcroco.
- *Note that troughout this file I will
- *refer to the CSS SPECIFICATIONS DOCUMENTATION
- *written by the w3c guys. You can find that document
- *at http://www.w3.org/TR/REC-CSS2/ .
- */
-
-
-/****************************
- *Encoding transformations and
- *encoding helpers
- ****************************/
-
-/*
- *Here is the correspondance between the ucs-4 charactere codes
- *and there matching utf-8 encoding pattern as dscribed by RFC 2279:
- *
- *UCS-4 range (hex.)    UTF-8 octet sequence (binary)
- *------------------    -----------------------------
- *0000 0000-0000 007F   0xxxxxxx
- *0000 0080-0000 07FF   110xxxxx 10xxxxxx
- *0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
- *0001 0000-001F FFFF   11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
- *0020 0000-03FF FFFF   111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
- *0400 0000-7FFF FFFF   1111110x 10xxxxxx ... 10xxxxxx
- */
-
-
-
-/**
- *Given an utf8 string buffer, calculates
- *the length of this string if it was encoded
- *in ucs4.
- *@param a_in_start a pointer to the begining of
- *the input utf8 string.
- *@param a_in_end a pointre to the end of the input
- *utf8 string (points to the last byte of the buffer)
- *@param a_len out parameter the calculated length.
- *@return CR_OK upon succesfull completion, an error code
- *otherwise.
- */
-enum CRStatus
-cr_utils_utf8_str_len_as_ucs4 (guchar *a_in_start,
-                               guchar *a_in_end,
-                               gulong *a_len)
-{        
-        guchar *byte_ptr = NULL ;
-	gint len = 0 ;
-
-	/*
-         *to store the final decoded 
-	 *unicode char
-	 */
-	guint c = 0 ;
-
-	g_return_val_if_fail (a_in_start && a_in_end && a_len,
-			      CR_BAD_PARAM_ERROR) ;
-	*a_len = 0 ;
-	
-	for (byte_ptr = a_in_start ;
-	     byte_ptr <= a_in_end ;
-             byte_ptr++) 
-        {
-		gint nb_bytes_2_decode = 0 ;
-
-		if (*byte_ptr <= 0x7F) 
-                {
-			/*
-			 *7 bits long char
-			 *encoded over 1 byte:
-			 * 0xxx xxxx
-			 */
-			c = *byte_ptr ;
-			nb_bytes_2_decode = 1 ;
-
-		} 
-                else if ((*byte_ptr & 0xE0) == 0xC0) 
-                {
-			/*
-			 *up to 11 bits long char.
-			 *encoded over 2 bytes:
-			 *110x xxxx  10xx xxxx
-			 */
-			c = *byte_ptr & 0x1F ;
-			nb_bytes_2_decode = 2 ;
-
-		} 
-                else if ((*byte_ptr & 0xF0) == 0xE0) 
-                {
-			/*
-			 *up to 16 bit long char
-			 *encoded over 3 bytes:
-			 *1110 xxxx  10xx xxxx  10xx xxxx
-			 */
-			c = *byte_ptr & 0x0F ;
-			nb_bytes_2_decode = 3 ;
-
-		} 
-                else if ((*byte_ptr & 0xF8) == 0xF0) 
-                {
-			/*
-			 *up to 21 bits long char
-			 *encoded over 4 bytes:
-			 *1111 0xxx  10xx xxxx  10xx xxxx  10xx xxxx
-			 */
-			c = *byte_ptr & 0x7 ;
-			nb_bytes_2_decode = 4 ;
-
-		} 
-                else if ((*byte_ptr & 0xFC) == 0xF8) 
-                {
-			/*
-			 *up to 26 bits long char
-			 *encoded over 5 bytes.
-                         *1111 10xx  10xx xxxx  10xx xxxx  
-			 *10xx xxxx  10xx xxxx
-			 */
-			c = *byte_ptr & 3 ;
-			nb_bytes_2_decode = 5 ;
-
-		} 
-                else if ((*byte_ptr & 0xFE) == 0xFC) 
-                {
-			/*
-			 *up to 31 bits long char
-			 *encoded over 6 bytes:
-			 *1111 110x  10xx xxxx  10xx xxxx  
-			 *10xx xxxx  10xx xxxx  10xx xxxx
-			 */
-			c = *byte_ptr & 1 ;
-			nb_bytes_2_decode = 6 ;
-
-		} 
-                else 
-                {
-			/*
-                         *BAD ENCODING
-                         */
-			return CR_ENCODING_ERROR ;
-		}
-
-		/*
-		 *Go and decode the remaining byte(s)
-		 *(if any) to get the current character.
-		 */
-		for ( ;
-		      nb_bytes_2_decode > 1 ;
-		      nb_bytes_2_decode --) 
-                {
-			/*decode the next byte*/
-			byte_ptr ++ ;
-
-			/*byte pattern must be: 10xx xxxx*/
-			if ((*byte_ptr & 0xC0) != 0x80) 
-                        {
-				return CR_ENCODING_ERROR ;
-			}
-
-			c = (c << 6) | (*byte_ptr & 0x3F) ;
-		}
-
-                len ++ ;
-	}
-
-	*a_len = len ;
-
-	return CR_OK ;
-}
-
-
-
-/**
- *Given an ucs4 string, this function
- *returns the size (in bytes) this string
- *would have occupied if it was encoded in utf-8.
- *@param a_in_start a pointer to the beginning of the input
- *buffer.
- *@param a_in_end a pointer to the end of the input buffer.
- *@param a_len out parameter. The computed length.
- *@return CR_OK upon successfull completion, an error code otherwise.
- */
-enum CRStatus
-cr_utils_ucs4_str_len_as_utf8 (guint32 *a_in_start, guint32 *a_in_end,
-                               gulong *a_len)
-{
-	gint len = 0 ;
-	guint32 *char_ptr = NULL ;
-
-	g_return_val_if_fail (a_in_start && a_in_end && a_len,
-			      CR_BAD_PARAM_ERROR) ;
-
-	for (char_ptr = a_in_start ;
-	     char_ptr <= a_in_end ;
-	     char_ptr ++) 
-        {
-		if (*char_ptr <= 0x7F) 
-                {
-			/*the utf-8 char would take 1 byte*/
-			len += 1 ;
-		} 
-                else if (*char_ptr <= 0x7FF) 
-                {
-			/*the utf-8 char would take 2 bytes*/
-			len += 2 ;
-		}
-                else if (*char_ptr <= 0xFFFF)
-                {
-                        len += 3 ;
-                }
-                else if (*char_ptr <= 0x1FFFFF)
-                {
-                        len += 4 ;
-                }
-                else if (*char_ptr <= 0x3FFFFFF)
-                {
-                        len += 5 ;
-                }
-                else if (*char_ptr <= 0x7FFFFFFF)
-                {
-                        len+= 6 ;
-                }
-	}
-
-	*a_len = len ;
-	return CR_OK ;
-}
-
-
-/**
- *Given an ucsA string, this function
- *returns the size (in bytes) this string
- *would have occupied if it was encoded in utf-8.
- *@param a_in_start a pointer to the beginning of the input
- *buffer.
- *@param a_in_end a pointer to the end of the input buffer.
- *@param a_len out parameter. The computed length.
- *@return CR_OK upon successfull completion, an error code otherwise.
- */
-enum CRStatus
-cr_utils_ucs1_str_len_as_utf8 (guchar *a_in_start, guchar *a_in_end,
-                               gulong *a_len)
-{
-        gint len = 0 ;
-	guchar *char_ptr = NULL ;
-
-	g_return_val_if_fail (a_in_start && a_in_end && a_len,
-			      CR_BAD_PARAM_ERROR) ;
-
-	for (char_ptr = a_in_start ;
-	     char_ptr <= a_in_end ;
-	     char_ptr ++) 
-        {
-		if (*char_ptr <= 0x7F) 
-                {
-			/*the utf-8 char would take 1 byte*/
-			len += 1 ;
-		} 
-                else
-                {
-			/*the utf-8 char would take 2 bytes*/
-			len += 2 ;
-		}
-        }
-
-	*a_len = len ;
-	return CR_OK ;
-}
-
-/**
- *Converts an utf8 buffer into an ucs4 buffer.
- *
- *@param a_in the input utf8 buffer to convert.
- *@param a_in_len in/out parameter. The size of the
- *input buffer to convert. After return, this parameter contains
- *the actual number of bytes consumed.
- *@param a_out the output converted ucs4 buffer. Must be allocated by
- *the caller.
- *@param a_out_len in/out parameter. The size of the output buffer.
- *If this size is actually smaller than the real needed size, the function
- *just converts what it can and returns a success status. After return,
- *this param points to the actual number of characters decoded.
- *@return CR_OK upon successfull completion, an error code otherwise.
- */
-enum CRStatus
-cr_utils_utf8_to_ucs4 (guchar * a_in, gulong *a_in_len,
-                       guint32 *a_out, gulong *a_out_len)
-{
-	gulong in_len = 0, out_len = 0, in_index = 0, out_index = 0 ;
-        enum CRStatus status = CR_OK ;
-
-	/*
-         *to store the final decoded 
-	 *unicode char
-	 */
-	guint c = 0 ;
-
-        g_return_val_if_fail (a_in && a_in_len 
-                              && a_out && a_out_len,
-                              CR_BAD_PARAM_ERROR) ;
-
-        if (*a_in_len < 1)
-        {
-                status = CR_OK ;
-                goto end ;
-        }
-
-        in_len = *a_in_len ;
-        out_len = *a_out_len ;
-
-	for (in_index = 0, out_index = 0 ;
-	     (in_index < in_len) && (out_index < out_len) ;
-             in_index++, out_index++)
-        {
-		gint nb_bytes_2_decode = 0 ;
-
-		if (a_in[in_index] <= 0x7F) 
-                {
-			/*
-			 *7 bits long char
-			 *encoded over 1 byte:
-			 * 0xxx xxxx
-			 */
-			c = a_in[in_index] ;
-			nb_bytes_2_decode = 1 ;
-
-		} 
-                else if ((a_in[in_index] & 0xE0) == 0xC0) 
-                {
-			/*
-			 *up to 11 bits long char.
-			 *encoded over 2 bytes:
-			 *110x xxxx  10xx xxxx
-			 */
-			c = a_in[in_index] & 0x1F ;
-			nb_bytes_2_decode = 2 ;
-
-		} 
-                else if ((a_in[in_index] & 0xF0) == 0xE0) 
-                {
-			/*
-			 *up to 16 bit long char
-			 *encoded over 3 bytes:
-			 *1110 xxxx  10xx xxxx  10xx xxxx
-			 */
-			c = a_in[in_index] & 0x0F ;
-			nb_bytes_2_decode = 3 ;
-
-		} 
-                else if ((a_in[in_index] & 0xF8) == 0xF0) 
-                {
-			/*
-			 *up to 21 bits long char
-			 *encoded over 4 bytes:
-			 *1111 0xxx  10xx xxxx  10xx xxxx  10xx xxxx
-			 */
-			c = a_in[in_index] & 0x7 ;
-			nb_bytes_2_decode = 4 ;
-
-		} 
-                else if ((a_in[in_index] & 0xFC) == 0xF8) 
-                {
-			/*
-			 *up to 26 bits long char
-			 *encoded over 5 bytes.
-			 *1111 10xx  10xx xxxx  10xx xxxx  
-			 *10xx xxxx  10xx xxxx
-			 */
-			c = a_in[in_index] & 3 ;
-			nb_bytes_2_decode = 5 ;
-
-		} 
-                else if ((a_in[in_index] & 0xFE) == 0xFC) 
-                {
-			/*
-			 *up to 31 bits long char
-			 *encoded over 6 bytes:
-			 *1111 110x  10xx xxxx  10xx xxxx  
-			 *10xx xxxx  10xx xxxx  10xx xxxx
-			 */
-			c = a_in[in_index] & 1 ;
-			nb_bytes_2_decode = 6 ;
-
-		} 
-                else 
-                {
-			/*BAD ENCODING*/
-			goto end ;
-		}
-
-		/*
-		 *Go and decode the remaining byte(s)
-		 *(if any) to get the current character.
-		 */
-		for ( ;
-		      nb_bytes_2_decode > 1 ;
-		      nb_bytes_2_decode --) 
-                {
-			/*decode the next byte*/
-			in_index ++ ;
-
-			/*byte pattern must be: 10xx xxxx*/
-			if ((a_in[in_index] & 0xC0) != 0x80)
-                        {
-				goto end ;
-			}
-
-			c = (c << 6) | (a_in[in_index] & 0x3F) ;
-		}
-
-                /*
-                 *The decoded ucs4 char is now
-                 *in c.
-                 */
-
-                /************************
-                 *Some security tests
-                 ***********************/
-                
-                /*be sure c is a char*/
-                if (c == 0xFFFF || c == 0xFFFE) goto end ;
-                
-                /*be sure c is inferior to the max ucs4 char value*/
-                if (c > 0x10FFFF) goto end ;
-
-                /*
-                 *c must be less than UTF16 "lower surrogate begin"
-                 *or higher than UTF16 "High surrogate end"
-                 */
-                if (c >= 0xD800 && c <= 0xDFFF) goto end ;
-
-                /*Avoid characters that equals zero*/
-                if (c == 0) goto end ;
-
-
-                a_out[out_index] = c ;
-        }
-
- end:
-        *a_out_len = out_index + 1;
-        *a_in_len = in_index + 1;
-
-        return status ;
-}
-
-
-/**
- *Reads a character from an utf8 buffer.
- *Actually decode the next character code (unicode character code)
- *and returns it.
- *@param a_in the starting address of the utf8 buffer.
- *@param a_in_len the length of the utf8 buffer.
- *@param a_out output parameter. The resulting read char.
- *@param a_consumed the number of the bytes consumed to
- *decode the returned character code.
- *@return CR_OK upon successfull completion, an error code otherwise.
- */
-enum CRStatus
-cr_utils_read_char_from_utf8_buf (guchar * a_in, gulong a_in_len,
-                                  guint32 *a_out, gulong *a_consumed)
-{
-	gulong in_len = 0, in_index = 0, nb_bytes_2_decode = 0 ;
-    enum CRStatus status = CR_OK ;
-    
-	/*
-     *to store the final decoded 
-	 *unicode char
-	 */
-	guint32 c = 0 ;
-    
-    g_return_val_if_fail (a_in && a_out && a_out
-                          && a_consumed, CR_BAD_PARAM_ERROR) ;
-    
-    if (a_in_len < 1)
-    {
-        status = CR_OK ;
-        goto end ;
-    }
-    
-    in_len = a_in_len ;
-    
-    if (*a_in <= 0x7F) 
-    {
-        /*
-         *7 bits long char
-         *encoded over 1 byte:
-         * 0xxx xxxx
-         */
-        c = *a_in ;
-        nb_bytes_2_decode = 1 ;
-        
-    } 
-    else if ((*a_in & 0xE0) == 0xC0) 
-    {
-        /*
-         *up to 11 bits long char.
-         *encoded over 2 bytes:
-         *110x xxxx  10xx xxxx
-         */
-        c = *a_in & 0x1F ;
-        nb_bytes_2_decode = 2 ;
-        
-    } 
-    else if ((*a_in & 0xF0) == 0xE0) 
-    {
-        /*
-         *up to 16 bit long char
-         *encoded over 3 bytes:
-         *1110 xxxx  10xx xxxx  10xx xxxx
-         */
-        c = *a_in & 0x0F ;
-        nb_bytes_2_decode = 3 ;
-        
-    } 
-    else if ((*a_in & 0xF8) == 0xF0) 
-    {
-        /*
-         *up to 21 bits long char
-         *encoded over 4 bytes:
-         *1111 0xxx  10xx xxxx  10xx xxxx  10xx xxxx
-         */
-        c = *a_in & 0x7 ;
-        nb_bytes_2_decode = 4 ;
-        
-    } 
-    else if ((*a_in & 0xFC) == 0xF8) 
-    {
-        /*
-         *up to 26 bits long char
-         *encoded over 5 bytes.
-         *1111 10xx  10xx xxxx  10xx xxxx  
-         *10xx xxxx  10xx xxxx
-         */
-        c = *a_in & 3 ;
-        nb_bytes_2_decode = 5 ;
-        
-    } 
-    else if ((*a_in & 0xFE) == 0xFC) 
-    {
-        /*
-         *up to 31 bits long char
-         *encoded over 6 bytes:
-         *1111 110x  10xx xxxx  10xx xxxx  
-         *10xx xxxx  10xx xxxx  10xx xxxx
-         */
-        c = *a_in & 1 ;
-        nb_bytes_2_decode = 6 ;
-        
-    } 
-    else 
-    {
-        /*BAD ENCODING*/
-        goto end ;
-    }
-    
-    if (nb_bytes_2_decode > a_in_len)
-    {
-        status = CR_END_OF_INPUT_ERROR ;
-        goto end ;
-    }
-    
-    /*
-     *Go and decode the remaining byte(s)
-     *(if any) to get the current character.
-     */
-    for ( in_index = 1 ;
-          in_index < nb_bytes_2_decode ;
-          in_index ++) 
-    {
-        /*byte pattern must be: 10xx xxxx*/
-        if ((a_in[in_index] & 0xC0) != 0x80)
-        {
-            goto end ;
-        }
-        
-        c = (c << 6) | (a_in[in_index] & 0x3F) ;
-    }
-    
-    /*
-     *The decoded ucs4 char is now
-     *in c.
-     */
-    
-    /************************
-     *Some security tests
-     ***********************/
-    
-    /*be sure c is a char*/
-    if (c == 0xFFFF || c == 0xFFFE) goto end ;
-    
-    /*be sure c is inferior to the max ucs4 char value*/
-    if (c > 0x10FFFF) goto end ;
-    
-    /*
-     *c must be less than UTF16 "lower surrogate begin"
-     *or higher than UTF16 "High surrogate end"
-     */
-    if (c >= 0xD800 && c <= 0xDFFF) goto end ;
-    
-    /*Avoid characters that equals zero*/
-    if (c == 0) goto end ;
-    
-    *a_out = c ;
-    
- end:
-    *a_consumed = nb_bytes_2_decode ;
-    
-    return status ;
-}
-
-
-/**
- *
- */
-enum CRStatus
-cr_utils_utf8_str_len_as_ucs1 (guchar *a_in_start,
-                               guchar *a_in_end,
-                               gulong *a_len)
-{
-	/*
-	 *Note: this function can be made shorter
-	 *but it considers all the cases of the utf8 encoding
-	 *to ease further extensions ...
-	 */
-
-        guchar *byte_ptr = NULL ;
-	gint len = 0 ;
-
-	/*
-         *to store the final decoded 
-	 *unicode char
-	 */
-	guint c = 0 ;
-
-	g_return_val_if_fail (a_in_start && a_in_end && a_len,
-			      CR_BAD_PARAM_ERROR) ;
-	*a_len = 0 ;
-	
-	for (byte_ptr = a_in_start ;
-	     byte_ptr <= a_in_end ;
-             byte_ptr++) 
-        {
-		gint nb_bytes_2_decode = 0 ;
-
-		if (*byte_ptr <= 0x7F) 
-                {
-			/*
-			 *7 bits long char
-			 *encoded over 1 byte:
-			 * 0xxx xxxx
-			 */
-			c = *byte_ptr ;
-			nb_bytes_2_decode = 1 ;
-
-		} 
-                else if ((*byte_ptr & 0xE0) == 0xC0) 
-                {
-			/*
-			 *up to 11 bits long char.
-			 *encoded over 2 bytes:
-			 *110x xxxx  10xx xxxx
-			 */
-			c = *byte_ptr & 0x1F ;
-			nb_bytes_2_decode = 2 ;
-
-		} 
-                else if ((*byte_ptr & 0xF0) == 0xE0) 
-                {
-			/*
-			 *up to 16 bit long char
-			 *encoded over 3 bytes:
-			 *1110 xxxx  10xx xxxx  10xx xxxx
-			 */
-			c = *byte_ptr & 0x0F ;
-			nb_bytes_2_decode = 3 ;
-
-		} 
-                else if ((*byte_ptr & 0xF8) == 0xF0) 
-                {
-			/*
-			 *up to 21 bits long char
-			 *encoded over 4 bytes:
-			 *1111 0xxx  10xx xxxx  10xx xxxx  10xx xxxx
-			 */
-			c = *byte_ptr & 0x7 ;
-			nb_bytes_2_decode = 4 ;
-
-		} 
-                else if ((*byte_ptr & 0xFC) == 0xF8) 
-                {
-			/*
-			 *up to 26 bits long char
-			 *encoded over 5 bytes.
-                         *1111 10xx  10xx xxxx  10xx xxxx  
-			 *10xx xxxx  10xx xxxx
-			 */
-			c = *byte_ptr & 3 ;
-			nb_bytes_2_decode = 5 ;
-
-		} 
-                else if ((*byte_ptr & 0xFE) == 0xFC) 
-                {
-			/*
-			 *up to 31 bits long char
-			 *encoded over 6 bytes:
-			 *1111 110x  10xx xxxx  10xx xxxx  
-			 *10xx xxxx  10xx xxxx  10xx xxxx
-			 */
-			c = *byte_ptr & 1 ;
-			nb_bytes_2_decode = 6 ;
-
-		} 
-                else 
-                {
-			/*
-                         *BAD ENCODING
-                         */
-			return CR_ENCODING_ERROR ;
-		}
-
-		/*
-		 *Go and decode the remaining byte(s)
-		 *(if any) to get the current character.
-		 */
-		for ( ;
-		      nb_bytes_2_decode > 1 ;
-		      nb_bytes_2_decode --) 
-                {
-			/*decode the next byte*/
-			byte_ptr ++ ;
-
-			/*byte pattern must be: 10xx xxxx*/
-			if ((*byte_ptr & 0xC0) != 0x80) 
-                        {
-				return CR_ENCODING_ERROR ;
-			}
-
-			c = (c << 6) | (*byte_ptr & 0x3F) ;
-		}
-
-                /*
-                 *The decoded ucs4 char is now
-                 *in c.
-                 */
-
-		if (c <= 0xFF) {/*Add other conditions to support
-				 *other char sets (ucs2, ucs3, ucs4).
-				 */
-			len ++ ;
-		} else {
-			/*the char is too long to fit
-			 *into the supposed charset len.
-			 */
-			return CR_ENCODING_ERROR ;
-		}
-	}
-
-	*a_len = len ;
-
-	return CR_OK ;
-}
-
-/**
- *Converts an utf8 string into an ucs4 string.
- *@param a_in the input string to convert.
- *@param a_in_len in/out parameter. The length of the input
- *string. After return, points to the actual number of bytes
- *consumed. This can be usefull to debug the input stream in case
- *of encoding error.
- *@param a_out out parameter. Points to the output string. It is allocated 
- *by this function and must be freed by the caller.
- *@param a_out_len out parameter. The length of the output string.
- *@return CR_OK upon successfull completion, an error code otherwise.
- *
- */
-enum CRStatus
-cr_utils_utf8_str_to_ucs4 (guchar * a_in, gulong *a_in_len,
-                           guint32 **a_out, gulong *a_out_len)
-{
-        enum CRStatus status = CR_OK ;
-
-        g_return_val_if_fail (a_in && a_in_len 
-                              && a_out && a_out_len,
-                              CR_BAD_PARAM_ERROR) ;
-
-        status =
-                cr_utils_utf8_str_len_as_ucs4 (a_in, 
-                                               &a_in[*a_in_len - 1],
-                                               a_out_len) ;
-
-        g_return_val_if_fail (status == CR_OK, status) ;
-
-        *a_out = g_malloc0 (*a_out_len * sizeof (guint32)) ;
-        
-        status =
-                cr_utils_utf8_to_ucs4 (a_in, a_in_len,
-                                       *a_out, a_out_len) ;
-
-        return status ;
-}
-
-/**
- *Converts an ucs4 buffer into an utf8 buffer.
- *
- *@param a_in the input ucs4 buffer to convert.
- *@param a_in_len in/out parameter. The size of the
- *input buffer to convert. After return, this parameter contains
- *the actual number of characters consumed.
- *@param a_out the output converted utf8 buffer. Must be allocated by
- *the caller.
- *@param a_out_len in/out parameter. The size of the output buffer.
- *If this size is actually smaller than the real needed size, the function
- *just converts what it can and returns a success status. After return,
- *this param points to the actual number of bytes in the buffer.
- *@return CR_OK upon successfull completion, an error code otherwise.
- */
-enum CRStatus
-cr_utils_ucs4_to_utf8 (guint32 *a_in, gulong *a_in_len,
-                       guchar *a_out, gulong *a_out_len)
-{
-        gulong in_len = 0, in_index = 0, out_index = 0 ;
-        enum CRStatus status = CR_OK ;
-
-        g_return_val_if_fail (a_in && a_in_len && a_out && a_out_len,
-                              CR_BAD_PARAM_ERROR) ;
-
-        if (*a_in_len < 1)
-        {
-                status =  CR_OK ;
-                goto end ;
-        }
-
-        in_len = *a_in_len ;
-
-        for (in_index = 0 ;
-             in_index < in_len ;
-             in_index++)
-        {
-                /*
-                 *FIXME: return whenever we encounter forbidden char values.
-                 */
-
-                if (a_in[in_index] <= 0x7F)
-                {
-                        a_out[out_index] = a_in[in_index] ;
-                        out_index ++ ;
-                }
-                else if (a_in[in_index] <= 0x7FF)
-                {
-                        a_out[out_index] = (0xC0 | (a_in[in_index] >> 6)) ;
-                        a_out[out_index + 1] = (0x80 | (a_in[in_index] & 0x3F));
-                        out_index += 2 ;
-                }
-                else if (a_in[in_index] <= 0xFFFF)
-                {
-                        a_out[out_index] = (0xE0 | (a_in[in_index] >> 12)) ;
-                        a_out[out_index + 1] = 
-                                (0x80 | ((a_in[in_index] >> 6) & 0x3F)) ;
-                        a_out[out_index + 2] = (0x80 | (a_in[in_index] & 0x3F)) ;
-                        out_index += 3 ;
-                }
-                else if (a_in[in_index] <= 0x1FFFFF)
-                {
-                        a_out[out_index] = (0xF0 | (a_in[in_index] >> 18)) ;
-                        a_out[out_index + 1] 
-                                = (0x80 | ((a_in[in_index] >> 12) & 0x3F)) ;
-                        a_out[out_index + 2] 
-                                = (0x80 | ((a_in[in_index] >> 6) & 0x3F)) ;
-                        a_out[out_index + 3] 
-                                = (0x80 | (a_in[in_index] & 0x3F)) ;
-                        out_index += 4 ;
-                }
-                else if (a_in[in_index] <= 0x3FFFFFF)
-                {
-                        a_out[out_index] = (0xF8 | (a_in[in_index] >> 24)) ;
-                        a_out[out_index + 1] = (0x80 | (a_in[in_index] >> 18)) ;
-                        a_out[out_index + 2] 
-                                = (0x80 | ((a_in[in_index] >> 12) & 0x3F)) ;
-                        a_out[out_index + 3] 
-                                = (0x80 | ((a_in[in_index] >> 6) & 0x3F)) ;
-                        a_out[out_index + 4] 
-                                = (0x80 | (a_in[in_index] & 0x3F)) ;
-                        out_index += 5 ;
-                }
-                else if (a_in[in_index] <= 0x7FFFFFFF)
-                {
-                        a_out[out_index] = (0xFC | (a_in[in_index] >> 30)) ;
-                        a_out[out_index + 1] = (0x80 | (a_in[in_index] >> 24)) ;
-                        a_out[out_index + 2] 
-                                = (0x80 | ((a_in[in_index] >> 18) & 0x3F)) ;
-                        a_out[out_index + 3] 
-                                = (0x80 | ((a_in[in_index] >> 12) & 0x3F)) ;
-                        a_out[out_index + 4] 
-                                = (0x80 | ((a_in[in_index] >> 6) & 0x3F)) ;
-                        a_out[out_index + 4]
-                                = (0x80 | (a_in[in_index] & 0x3F)) ;
-                        out_index += 6 ;
-                }
-                else
-                {
-                        status = CR_ENCODING_ERROR ;
-                        goto end ;
-                }
-        }/*end for*/
-        
- end:
-        *a_in_len = in_index + 1 ;
-        *a_out_len = out_index + 1 ;
-
-        return status ;
-}
-
-
-/**
- *Converts an ucs4 string into an utf8 string.
- *@param a_in the input string to convert.
- *@param a_in_len in/out parameter. The length of the input
- *string. After return, points to the actual number of characters
- *consumed. This can be usefull to debug the input string in case
- *of encoding error.
- *@param a_out out parameter. Points to the output string. It is allocated 
- *by this function and must be freed by the caller.
- *@param a_out_len out parameter. The length (in bytes) of the output string.
- *@return CR_OK upon successfull completion, an error code otherwise.
- */
-enum CRStatus
-cr_utils_ucs4_str_to_utf8 (guint32 *a_in, gulong *a_in_len,
-                           guchar **a_out, gulong *a_out_len)
-{        
-        enum CRStatus status = CR_OK ;
-
-        g_return_val_if_fail (a_in && a_in_len && a_out
-                              && a_out_len, CR_BAD_PARAM_ERROR) ;
-
-        status =
-                cr_utils_ucs4_str_len_as_utf8 (a_in,
-                                               &a_in[*a_out_len -1], 
-                                               a_out_len) ;
-
-        g_return_val_if_fail (status == CR_OK, status) ;
-
-        status =
-                cr_utils_ucs4_to_utf8 (a_in, a_in_len, *a_out, a_out_len) ;
-
-        return status ;
-}
-
-
-/**
- *Converts an ucs1 buffer into an utf8 buffer.
- *The caller must know the size of the resulting buffer and
- *allocate it prior to calling this function.
- *
- *@param a_in the input ucs1 buffer.
- *
- *@param a_in_len in/out parameter. The length of the input buffer.
- *After return, points to the number of bytes actually consumed even
- *in case of encoding error.
- *
- *@param a_out out parameter. The output utf8 converted buffer.
- *
- *@param a_out_len in/out parameter. The size of the output buffer.
- *If the output buffer size is shorter than the actual needed size, 
- *this function just convert what it can.
- *
- *@return CR_OK upon successfull completion, an error code otherwise.
- *
- */
-enum CRStatus
-cr_utils_ucs1_to_utf8 (guchar *a_in, gulong *a_in_len,
-                       guchar *a_out, gulong *a_out_len)
-{
-        gulong out_index = 0, in_index = 0, in_len = 0, out_len = 0 ;
-        enum CRStatus status = CR_OK ;
-
-        g_return_val_if_fail (a_in && a_in_len && a_out
-                              && a_out_len, CR_BAD_PARAM_ERROR) ;
-
-        if (*a_in_len < 1) 
-        {
-                status = CR_OK ;
-                goto end ;
-        }
-
-        in_len = *a_in_len ;
-        out_len = *a_out_len ;
-
-        for (in_index = 0, out_index = 0 ;  
-             (in_index < in_len) && (out_index < out_len) ;
-             in_index ++)
-        {
-                /*
-                 *FIXME: return whenever we encounter forbidden char values.
-                 */
-
-                if (a_in[in_index] <= 0x7F)
-                {
-                        a_out[out_index] = a_in[in_index] ;
-                        out_index ++ ;
-                }
-                else
-                {
-                        a_out[out_index] = (0xC0 | (a_in[in_index] >> 6)) ;
-                        a_out[out_index + 1] = (0x80 | (a_in[in_index] & 0x3F));
-                        out_index += 2 ;
-                }
-        }/*end for*/
-
- end:
-        *a_in_len = in_index  ;
-        *a_out_len = out_index ;
-
-        return CR_OK ;
-}
-
-
-/**
- *Converts an ucs1 string into an utf8 string.
- *@param a_in_start the beginning of the input string to convert.
- *@param a_in_end the end of the input string to convert.
- *@param a_out out parameter. The converted string.
- *@param a_out out parameter. The length of the converted string.
- *@return CR_OK upon successfull completion, an error code otherwise.
- *
- */
-enum CRStatus
-cr_utils_ucs1_str_to_utf8 (guchar *a_in, gulong *a_in_len,
-                           guchar **a_out, gulong *a_out_len)
-{
-        gulong in_len = 0, out_len = 0 ;
-        enum CRStatus status = CR_OK ;
-
-        g_return_val_if_fail (a_in && a_in_len && a_out
-                              && a_out_len, CR_BAD_PARAM_ERROR) ;
-        
-        if (*a_in_len < 1)
-        {
-                *a_out_len = 0 ;
-                *a_out = NULL ;
-                return CR_OK ;
-        }
-
-        status =
-                cr_utils_ucs1_str_len_as_utf8 (a_in, &a_in[*a_in_len -1], 
-                                               &out_len) ;
-
-        g_return_val_if_fail (status == CR_OK, status) ;
-
-        in_len = *a_in_len ;
-
-        *a_out = g_malloc0 (out_len) ;
-
-        status = cr_utils_ucs1_to_utf8 (a_in, a_in_len,
-                                        *a_out, &out_len) ;
-
-        *a_out_len = out_len ;
-
-        return status ;
-}
-
-
-/**
- *Converts an utf8 buffer into an ucs1 buffer.
- *The caller must know the size of the resulting
- *converted buffer, and allocated it prior to calling this
- *function.
- *
- *@param a_in the input utf8 buffer to convert.
- *
- *@param a_in_len in/out parameter. The size of the input utf8 buffer.
- *After return, points to the number of bytes consumed
- *by the function even in case of encoding error.
- *
- *@param a_out out parameter. Points to the resulting buffer.
- *Must be allocated by the caller. If the size of a_out is shorter
- *than its required size, this function converts what it can and return
- *a successfull status.
- *
- *@param a_out_len in/out parameter. The size of the output buffer.
- *After return, points to the number of bytes consumed even in case of
- *encoding error.
- *
- *@return CR_OK upon successfull completion, an error code otherwise.
- */
-enum CRStatus
-cr_utils_utf8_to_ucs1 (guchar * a_in, gulong * a_in_len,
-                       guchar *a_out, gulong *a_out_len)
-{
-	gulong in_index = 0, out_index = 0, in_len = 0, out_len = 0 ;
-        enum CRStatus status = CR_OK ;
-
-	/*
-         *to store the final decoded 
-	 *unicode char
-	 */
-	guint32 c = 0 ;
-
-        g_return_val_if_fail (a_in && a_in_len
-                              && a_out && a_out_len,
-                              CR_BAD_PARAM_ERROR) ;
-
-        if (*a_in_len < 1)
-        {
-                status = CR_OK ;
-                goto end ;
-        }
-
-        in_len = *a_in_len ;
-        out_len = *a_out_len ;
-
-	for (in_index = 0 , out_index = 0 ;
-	     (in_index < in_len) && (out_index < out_len) ;
-             in_index ++, out_index++)
-        {
-		gint nb_bytes_2_decode = 0 ;
-
-		if (a_in[in_index] <= 0x7F) 
-                {
-			/*
-			 *7 bits long char
-			 *encoded over 1 byte:
-			 * 0xxx xxxx
-			 */
-			c = a_in[in_index] ;
-			nb_bytes_2_decode = 1 ;
-                        
-		} 
-                else if ((a_in[in_index] & 0xE0) == 0xC0) 
-                {
-			/*
-			 *up to 11 bits long char.
-			 *encoded over 2 bytes:
-			 *110x xxxx  10xx xxxx
-			 */
-			c = a_in[in_index] & 0x1F ;
-			nb_bytes_2_decode = 2 ;
-                        
-		} 
-                else if ((a_in[in_index] & 0xF0) == 0xE0) 
-                {
-			/*
-			 *up to 16 bit long char
-			 *encoded over 3 bytes:
-			 *1110 xxxx  10xx xxxx  10xx xxxx
-			 */
-			c = a_in[in_index] & 0x0F ;
-			nb_bytes_2_decode = 3 ;
-                        
-		} 
-                else if ((a_in[in_index] & 0xF8) == 0xF0) 
-                {
-			/*
-			 *up to 21 bits long char
-			 *encoded over 4 bytes:
-			 *1111 0xxx  10xx xxxx  10xx xxxx  10xx xxxx
-			 */
-			c = a_in[in_index] & 0x7 ;
-			nb_bytes_2_decode = 4 ;
-                        
-		} 
-                else if ((a_in[in_index] & 0xFC) == 0xF8) 
-                {
-			/*
-			 *up to 26 bits long char
-			 *encoded over 5 bytes.
-			 *1111 10xx  10xx xxxx  10xx xxxx  
-			 *10xx xxxx  10xx xxxx
-			 */
-			c = a_in[in_index] & 3 ;
-			nb_bytes_2_decode = 5 ;
-                        
-		} 
-                else if ((a_in[in_index] & 0xFE) == 0xFC) 
-                {
-			/*
-			 *up to 31 bits long char
-			 *encoded over 6 bytes:
-			 *1111 110x  10xx xxxx  10xx xxxx  
-			 *10xx xxxx  10xx xxxx  10xx xxxx
-			 */
-			c = a_in[in_index] & 1 ;
-			nb_bytes_2_decode = 6 ;
-                        
-		} 
-                else 
-                {
-			/*BAD ENCODING*/
-                        status = CR_ENCODING_ERROR ;
-                        goto end ;
-		}
-                
-		/*
-		 *Go and decode the remaining byte(s)
-		 *(if any) to get the current character.
-		 */
-                if (in_index + nb_bytes_2_decode - 1 >= in_len)
-                {
-                        status = CR_OK ;
-                        goto end ;
-                }
-
-		for ( ;
-		      nb_bytes_2_decode > 1 ;
-		      nb_bytes_2_decode --) 
-                {
-			/*decode the next byte*/
-			in_index ++ ;
-                        
-			/*byte pattern must be: 10xx xxxx*/
-			if ((a_in[in_index] & 0xC0) != 0x80)
-                        {
-				status = CR_ENCODING_ERROR ;
-                                goto end ;
-			}
-
-			c = (c << 6) | (a_in[in_index] & 0x3F) ;
-		}
-
-                /*
-                 *The decoded ucs4 char is now
-                 *in c.
-                 */
-
-                if (c > 0xFF) 
-                {
-                        status = CR_ENCODING_ERROR ;
-                        goto end ;
-                }
-                
-                a_out[out_index] = c ;
-        }
-
- end:
-        *a_out_len = out_index ;
-        *a_in_len = in_index ;
-
-        return CR_OK ;        
-}
-
-
-/**
- *Converts an utf8 buffer into an
- *ucs1 buffer.
- *@param a_in_start the start of the input buffer.
- *@param a_in_end the end of the input buffer.
- *@param a_out out parameter. The resulting converted ucs4 buffer.
- *Must be freed by the caller.
- *@param a_out_len out parameter. The length of the converted buffer.
- *@return CR_OK upon successfull completion, an error code otherwise.
- *Note that out parameters are valid if and only if this function
- *returns CR_OK.
- */
-enum CRStatus
-cr_utils_utf8_str_to_ucs1 (guchar * a_in, gulong * a_in_len,
-                           guchar **a_out, gulong *a_out_len)
-{
-        enum CRStatus status = CR_OK ;
-
-        g_return_val_if_fail (a_in && a_in_len 
-                              && a_out && a_out_len,
-                              CR_BAD_PARAM_ERROR) ;
-
-        if (*a_in_len < 1)
-        {
-                *a_out_len = 0 ;
-                *a_out = NULL ;
-                return CR_OK ;
-        }
-
-        status =
-                cr_utils_utf8_str_len_as_ucs4 (a_in, &a_in[*a_in_len - 1],
-                                               a_out_len) ;
-
-        g_return_val_if_fail (status == CR_OK, status) ;
-
-        *a_out = g_malloc0 (*a_out_len * sizeof (guint32)) ;
-
-        status =
-                cr_utils_utf8_to_ucs1 (a_in, a_in_len,
-                                       *a_out, a_out_len) ;
-        return status ;
-}
-
-
-/*****************************************
- *CSS basic types identification utilities
- *****************************************/
-
-
-/**
- *Returns TRUE if a_char is a white space as
- *defined in the css spec in chap 4.1.1.
- *
- *white-space ::= ' '| \t|\r|\n|\f
- *
- *@param a_char the character to test.
- *return TRUE if is a white space, false otherwise.
- */
-gboolean
-cr_utils_is_white_space (guint32 a_char)
-{
-        switch (a_char)
-        {
-        case ' ': 
-        case '\t': 
-        case '\r':
-        case '\n': 
-        case '\f':
-                return TRUE ;
-                break ;
-        default:
-                return FALSE ;
-        }
-}
-
-/**
- *Returns true if the character is a newline
- *as defined in the css spec in the chap 4.1.1.
- *
- *nl ::= \n|\r\n|\r|\f
- *
- *@param a_char the character to test.
- *@return TRUE if the character is a newline, FALSE otherwise.
- */
-gboolean
-cr_utils_is_newline (guint32 a_char)
-{
-        switch (a_char)
-        {
-        case '\n':
-        case '\r':
-        case '\f':
-                return TRUE ;
-                break;
-        default:
-                return FALSE ;
-        }
-}
-
-/**
- *returns TRUE if the char is part of an hexa num char:
- *i.e hexa_char ::= [0-9A-F]
- */
-gboolean
-cr_utils_is_hexa_char (guint32 a_char)
-{
-        if ((a_char >= '0' && a_char <= '9')
-            || (a_char >= 'A' && a_char <= 'F'))
-        {
-                return TRUE ;
-        }
-        return FALSE ;
-}
-
-/**
- *Returns true if the character is a nonascii
- *character (as defined in the css spec chap 4.1.1):
- *
- *nonascii ::= [^\0-\177]
- *
- *@param a_char the character to test.
- *@return TRUE if the character is a nonascii char,
- *FALSE otherwise.
- */
-gboolean
-cr_utils_is_nonascii (guint32 a_char)
-{
-        if (a_char <= 177)
-        {
-                return FALSE ;
-        }
-
-        return TRUE ;
-}
-
-/**
- *Dumps a character a_nb times on a file.
- *@param a_char the char to dump
- *@param a_fp the destination file pointer
- *@param a_nb the number of times a_char is to be dumped.
- */
-void
-cr_utils_dump_n_chars (guchar a_char, FILE *a_fp, glong a_nb)
-{
-        glong i = 0 ;
-
-        for (i = 0 ; i < a_nb ; i++)
-        {
-                fprintf (a_fp, "%c", a_char) ;
-        }
-}
-
-void
-cr_utils_dump_n_chars2 (guchar a_char, 
-                        GString *a_string,
-                        glong a_nb)
-{
-        glong i = 0 ;
-
-        g_return_if_fail (a_string) ;
-
-        for (i = 0 ; i < a_nb ; i++)
-        {
-                g_string_append_printf (a_string, "%c", a_char) ;
-        }
-}
-
-gdouble
-cr_utils_n_to_0_dot_n (glong a_n)
-{
-        gdouble result = a_n ;
-
-        while (ABS (result) > 1)
-        {
-                result = result / 10 ;
-        }
-
-        return result ;
-}
author	Dodji Seketeli <dodji@src.gnome.org>	2003-04-12 16:50:32 +0000
committer	Dodji Seketeli <dodji@src.gnome.org>	2003-04-12 16:50:32 +0000
commit	17114030e0e37f93682c903dd818da1f0e2e6f6b (patch)
tree	f52286082af39c649dfbc86015ab080ac2a55abd /src/cr-utils.c
parent	4f5560ef67d35121d1087aee9b5e34dece012d8a (diff)
download	libcroco-17114030e0e37f93682c903dd818da1f0e2e6f6b.tar.gz