// linenoise_utf8.h
/*
 *    Copyright 2012 10gen Inc.
 *
 *    This program is free software: you can redistribute it and/or  modify
 *    it under the terms of the GNU Affero General Public License, version 3,
 *    as published by the Free Software Foundation.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU Affero General Public License for more details.
 *
 *    You should have received a copy of the GNU Affero General Public License
 *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 *    As a special exception, the copyright holders give permission to link the
 *    code of portions of this program with the OpenSSL library under certain
 *    conditions as described in each individual source file and distribute
 *    linked combinations including the program with the OpenSSL library. You
 *    must comply with the GNU Affero General Public License in all respects
 *    for all of the code used other than as permitted herein. If you modify
 *    file(s) with this exception, you may extend this exception to your
 *    version of the file(s), but you are not obligated to do so. If you do not
 *    wish to do so, delete this exception statement from your version. If you
 *    delete this exception statement from all source files in the program,
 *    then also delete it in the license file.
 */

#include <boost/smart_ptr/scoped_array.hpp>
#include <string.h>

namespace linenoise_utf8 {

typedef unsigned char   UChar8;     // UTF-8 octet
typedef unsigned int    UChar32;    // Unicode code point

// Error bits (or-ed together) returned from utf8toUChar32string
//
enum BadUTF8 {
    BadUTF8_no_error        = 0x00,
    BadUTF8_invalid_byte    = 0x01,
    BadUTF8_surrogate       = 0x02
};

/**
 * Convert a null terminated UTF-8 std::string from UTF-8 and store it in a UChar32 destination buffer
 * Always null terminates the destination std::string if at least one character position is available
 * Errors in the UTF-8 encoding will be handled in two ways: the erroneous characters will be
 * converted to the Unicode error character U+FFFD and flag bits will be set in the conversionErrorCode
 * int.
 * 
 * @param uchar32output                 Destination UChar32 buffer
 * @param utf8input                     Source UTF-8 string
 * @param outputBufferSizeInCharacters  Destination buffer size in characters
 * @param outputUnicodeCharacterCount   Number of UChar32 characters placed in output buffer
 * @param conversionErrorCode           Flag bits from enum BadUTF8, or zero if no error
 */
void copyString8to32(
        UChar32* uchar32output,
        const UChar8* utf8input,
        size_t outputBufferSizeInCharacters,
        size_t & outputUnicodeCharacterCount,
        int & conversionErrorCode );

/**
 * Copy a null terminated UChar32 std::string to a UChar32 destination buffer
 * Always null terminates the destination std::string if at least one character position is available
 * 
 * @param dest32                    Destination UChar32 buffer
 * @param source32                  Source UChar32 string
 * @param destLengthInCharacters    Destination buffer length in characters
 */
void copyString32( UChar32* dest32, const UChar32* source32, size_t destLengthInCharacters );

/**
 * Convert a specified number of UChar32 characters from a possibly null terminated UChar32 std::string to UTF-8
 * and store it in a UChar8 destination buffer
 * Always null terminates the destination std::string if at least one character position is available
 * 
 * @param dest8                     Destination UChar8 buffer
 * @param source32                  Source UChar32 string
 * @param outputBufferSizeInBytes   Destination buffer size in bytes
 * @param charCount                 Maximum number of UChar32 characters to process
 * @return                          Count of bytes written to output buffer, not including null terminator
 */
size_t copyString32to8counted( UChar8* dest8, const UChar32* source32, size_t outputBufferSizeInBytes, size_t charCount );

/**
 * Convert a null terminated UChar32 std::string to UTF-8 and store it in a UChar8 destination buffer
 * Always null terminates the destination std::string if at least one character position is available
 * 
 * @param dest8                     Destination UChar8 buffer
 * @param source32                  Source UChar32 string
 * @param outputBufferSizeInBytes   Destination buffer size in bytes
 * @return                          Count of bytes written to output buffer, not including null terminator
 */
size_t copyString32to8( UChar8* dest8, const UChar32* source32, size_t outputBufferSizeInBytes );

/**
 * Count characters (i.e. Unicode code points, array elements) in a null terminated UChar32 string
 * 
 * @param str32     Source UChar32 string
 * @return          std::string length in characters
 */
size_t strlen32( const UChar32* str32 );

/**
 * Compare two UChar32 null-terminated strings with length parameter
 *
 * @param first32   First std::string to compare
 * @param second32  Second std::string to compare
 * @param length    Maximum number of characters to compare
 * @return          Negative if first < second, positive if first > second, zero if equal
 */
int strncmp32( UChar32* first32, UChar32* second32, size_t length );

/**
 * Internally convert an array of UChar32 characters of specified length to UTF-8 and write it to fileHandle
 *
 * @param fileHandle                File handle to write to
 * @param string32                  Source UChar32 character array, may not be null terminated
 * @param sourceLengthInCharacters  Number of source characters to convert and write
 * @return                          Number of bytes written, -1 on error
 */
int write32( int fileHandle, const UChar32* string32, unsigned int sourceLengthInCharacters );

/**
 * Template and classes for UChar8 and UChar32 strings
 */
template <typename char_type>
struct UtfStringMixin {
    typedef char_type char_t; // inherited

    UtfStringMixin() : _len( 0 ), _cap( 0 ), _chars( 0 ) {}

    UtfStringMixin( const UtfStringMixin& other ) // copies like std::string
        :_len( other._len ), _cap( other._len+1 ), _chars( other._chars ), _str( new char_t[_cap] )
    {
        memcpy( _str.get(), other._str.get(), _cap * sizeof( char_t ) );
    }

    UtfStringMixin& operator= (UtfStringMixin copy) {
        this->swap( copy );
        return *this;
    }

    char_t* get() const { return _str.get(); }
    char_t& operator[](size_t idx) { return _str[idx]; }
    const char_t& operator[](size_t idx) const { return _str[idx]; }

    size_t length() const { return _len; }
    size_t capacity() const { return _cap; }
    size_t chars() const { return _chars; }

    void swap( UtfStringMixin& other ) {
        std::swap( _len, other._len );
        std::swap( _cap, other._cap );
        std::swap( _chars, other._chars );
        _str.swap( other._str );
    }

protected:
    size_t _len;    // in units of char_t without nul
    size_t _cap;    // size of _str buffer including nul
    size_t _chars;  // number of codepoints
    boost::scoped_array<char_t> _str;
};

struct Utf32String;

struct Utf8String : public UtfStringMixin<UChar8> {
    Utf8String() {}
    explicit Utf8String( const UChar32* s, int chars = -1 ) {
        if ( chars == -1 ) {
            initFrom32( s, strlen32( s ) );
        }
        else {
            initFrom32( s, chars );
        }
    }
    explicit Utf8String( const Utf32String& c ); // defined after utf32String

private:
    void initFrom32( const UChar32* s, int chars ) {
        _chars = chars;
        _cap = _chars * sizeof( UChar32 ) + 1;
        _str.reset( new char_t[_cap] );
        _len = copyString32to8counted( _str.get(), s, _cap, chars );
    }
};

struct Utf32String : public UtfStringMixin<UChar32> {
    Utf32String() {}
    explicit Utf32String( const UChar32* s ) {
        _chars = _len = strlen32( s );
        _cap = _len + 1;
        _str.reset( new UChar32[_cap] );
        memcpy( _str.get(), s, _cap * sizeof( UChar32 ) );
    }
    explicit Utf32String( const UChar32* s, int textLen ) {
        _chars = _len = textLen;
        _cap = _len + 1;
        _str.reset( new UChar32[_cap] );
        memcpy( _str.get(), s, _len * sizeof( UChar32 ) );
        _str[_len] = 0;
    }
    explicit Utf32String( const UChar8* s, int chars = -1 ) {
        initFrom8( s, chars );
    }
    explicit Utf32String( const Utf8String& c ) {
        initFrom8( c.get(), c.chars() );
    }
    explicit Utf32String( size_t reserve ) {
        _len = 0;
        _cap = reserve;
        _chars = 0;
        _str.reset( new UChar32[_cap] );
        _str[0] = 0;
    }
    void initFromBuffer( void ) {
        _chars = _len = strlen32( _str.get() );
    }

private:
    void initFrom8( const UChar8* s, int chars ) {
        Utf32String temp;
        if ( chars == -1 ) {
            temp._cap = strlen( reinterpret_cast<const char*>( s ) ) + 1; // worst case ASCII
        }
        else {
            temp._cap = chars + 1;
        }
        temp._str.reset( new char_t[temp._cap] );
        int error;
        copyString8to32( temp._str.get(), s, temp._cap, temp._chars, error );
        temp._len = temp._chars;
        this->swap( temp );
    }
};

inline Utf8String::Utf8String( const Utf32String& s ) {
    initFrom32( s.get(), s.chars() );
}

} // namespace linenoise_utf8