#ifndef _GLIBMM_USTRING_H #define _GLIBMM_USTRING_H /* Copyright (C) 2002 The gtkmm Development Team * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . */ #include #include #include #include // for std::size_t and optionally std::ptrdiff_t #include // For std::move() #include #include #include #include #include #include /* work around linker error on Visual Studio if we don't have GLIBMM_HAVE_ALLOWS_STATIC_INLINE_NPOS */ #if defined(_MSC_VER) && _MSC_VER >= 1600 && !defined(GLIBMM_HAVE_ALLOWS_STATIC_INLINE_NPOS) const std::basic_string::size_type std::basic_string::npos = (std::basic_string::size_type) -1; #endif namespace Glib { class ustring; //********** Glib::StdStringView and Glib::UStringView ************* // It would be possible to replace StdStringView and UStringView with a // template class BasicStringView + two type aliases defining StdStringView // and UStringView. But Doxygen don't generate links to type aliases. // // It would also be possible to replace StdStringView and UStringView with // a StringView class with 3 constructors, taking const std::string&, // const Glib::ustring& and const char*, respectively. The split into two classes // is by design. Using the wrong string class shall not be as easy as using // the right string class. /** Helper class to avoid unnecessary string copying in function calls. * * A %Glib::StdStringView holds a const char pointer. It can be used as an argument * type in a function that passes a const char pointer to a C function. * * Unlike std::string_view, %Glib::StdStringView shall be used only for * null-terminated strings. * @code * std::string f1(Glib::StdStringView s1, Glib::StdStringView s2); * // can be used instead of * std::string f2(const std::string& s1, const std::string& s2); * @endcode * The strings are not copied when f1() is called with string literals. * @code * auto r1 = f1("string 1", "string 2"); * @endcode * To pass a Glib::ustring to a function taking a %Glib::StdStringView, you may have * to use Glib::ustring::c_str(). * @code * std::string str = "non-UTF8 string"; * Glib::ustring ustr = "UTF8 string"; * auto r1 = f1(str, ustr.c_str()); * @endcode * * @newin{2,64} */ class GLIBMM_API StdStringView { public: StdStringView(const std::string& s) : pstring_(s.c_str()) {} StdStringView(const char* s) : pstring_(s) {} const char* c_str() const { return pstring_; } private: const char* pstring_; }; /** Helper class to avoid unnecessary string copying in function calls. * * A %Glib::UStringView holds a const char pointer. It can be used as an argument * type in a function that passes a const char pointer to a C function. * * Unlike std::string_view, %Glib::UStringView shall be used only for * null-terminated strings. * @code * Glib::ustring f1(Glib::UStringView s1, Glib::UStringView s2); * // can be used instead of * Glib::ustring f2(const Glib::ustring& s1, const Glib::ustring& s2); * @endcode * The strings are not copied when f1() is called with string literals. * @code * auto r1 = f1("string 1", "string 2"); * @endcode * To pass a std::string to a function taking a %Glib::UStringView, you may have * to use std::string::c_str(). * @code * std::string str = "non-UTF8 string"; * Glib::ustring ustr = "UTF8 string"; * auto r1 = f1(str.c_str(), ustr); * @endcode * * @newin{2,64} */ class GLIBMM_API UStringView { public: inline UStringView(const Glib::ustring& s); UStringView(const char* s) : pstring_(s) {} const char* c_str() const { return pstring_; } private: const char* pstring_; }; //*************************************************** #ifndef DOXYGEN_SHOULD_SKIP_THIS #ifndef GLIBMM_HAVE_STD_ITERATOR_TRAITS template struct IteratorTraits { using iterator_category = typename T::iterator_category; using value_type = typename T::value_type; using difference_type = typename T::difference_type; using pointer = typename T::pointer; using reference = typename T::reference; }; template struct IteratorTraits { using iterator_category = std::random_access_iterator_tag; using value_type = T; using difference_type = std::ptrdiff_t; using pointer = T*; using reference = T&; }; template struct IteratorTraits { using iterator_category = std::random_access_iterator_tag; using value_type = T; using difference_type = std::ptrdiff_t; using pointer = const T*; using reference = const T&; }; #endif /* GLIBMM_HAVE_STD_ITERATOR_TRAITS */ #endif /* DOXYGEN_SHOULD_SKIP_THIS */ /** The iterator type of Glib::ustring. * Note this is not a random access iterator but a bidirectional one, * since all index operations need to iterate over the UTF-8 data. Use * std::advance() to move to a certain position. However, all of the * relational operators are available: * == != < > <= >= * * A writeable iterator isn't provided because: The number of bytes of * the old UTF-8 character and the new one to write could be different. * Therefore, any write operation would invalidate all other iterators * pointing into the same string. * * The Glib::ustring iterated over must contain only valid UTF-8 data. * If it does not, operator++(), operator-\-() and operator*() may make * accesses outside the bounds of the string. A loop such as the following * one would not stop at the end of the string. * @code * // Bad code! Don't do this! * const char not_utf8[] = { '\x80', '\xef', '\x80', '\x80', '\xef', '\x80' }; * const Glib::ustring s(not_utf8, not_utf8 + sizeof not_utf8); * for (Glib::ustring::const_iterator it = s.begin(); it != s.end(); ++it) * std::cout << *it << std::endl; * @endcode * * @tparam T std::string::iterator or std::string::const_iterator */ template class ustring_Iterator { public: using iterator_category = std::bidirectional_iterator_tag; using value_type = gunichar; using difference_type = std::string::difference_type; using reference = value_type; using pointer = void; inline ustring_Iterator(); inline ustring_Iterator(const ustring_Iterator& other); ustring_Iterator& operator=(const ustring_Iterator& other) = default; inline value_type operator*() const; inline ustring_Iterator& operator++(); inline const ustring_Iterator operator++(int); inline ustring_Iterator& operator--(); inline const ustring_Iterator operator--(int); explicit inline ustring_Iterator(T pos); inline T base() const; private: T pos_; }; /** Extract a UCS-4 character from UTF-8 data. * Convert a single UTF-8 (multibyte) character starting at @p pos to * a UCS-4 wide character. This may read up to 6 bytes after the start * position, depending on the UTF-8 character width. You have to make * sure the source contains at least one valid UTF-8 character. * * This is mainly used by the implementation of Glib::ustring::iterator, * but it might be useful as utility function if you prefer using * std::string even for UTF-8 encoding. */ GLIBMM_API gunichar get_unichar_from_std_iterator(std::string::const_iterator pos) G_GNUC_PURE; /** %Glib::ustring has much the same interface as std::string, but contains * %Unicode characters encoded as UTF-8. * * @par About UTF-8 and ASCII * @par * The standard character set ANSI_X3.4-1968 -- more commonly known as * ASCII -- is a subset of UTF-8. So, if you want to, you can use * %Glib::ustring without even thinking about UTF-8. * @par * Whenever ASCII is mentioned in this manual, we mean the @em real ASCII * (i.e. as defined in ANSI_X3.4-1968), which contains only 7-bit characters. * %Glib::ustring can @em not be used with ASCII-compatible extended 8-bit * charsets like ISO-8859-1. It's a good idea to avoid string literals * containing non-ASCII characters (e.g. German umlauts) in source code, * or at least you should use UTF-8 literals. * @par * You can find a detailed UTF-8 and %Unicode FAQ here: * http://www.cl.cam.ac.uk/~mgk25/unicode.html * * @par Glib::ustring vs. std::string * @par * %Glib::ustring has implicit type conversions to and from std::string. * These conversions do @em not convert to/from the current locale (see * Glib::locale_from_utf8() and Glib::locale_to_utf8() if you need that). You * can always use std::string instead of %Glib::ustring -- however, using * std::string with multi-byte characters is quite hard. For instance, * std::string::operator[] might return a byte in the middle of a * character, and std::string::length() returns the number of bytes * rather than characters. So don't do that without a good reason. * @par * You cannot always use %Glib::ustring instead of std::string. * @code * Glib::ustring u("a_string_with_underscores"); * std::replace(u.begin(), u.end(), '_', ' '); // does not compile * @endcode * You can't use a Glib::ustring::iterator for writing to a %Glib::ustring. * See the documentation of Glib::ustring_Iterator for differences between it * and std::string::iterator. * @par * Many member functions and operators of %Glib::ustring and Glib::ustring_Iterator * assume that the string contains only valid UTF-8 data. If it does not, memory * outside the bounds of the string can be accessed. If you're uncertain, use * validate() and/or make_valid(). * @par * In a perfect world the C++ Standard Library would contain a UTF-8 string * class. Unfortunately, the C++98 standard doesn't mention UTF-8 at all. * C++11 has UTF-8 literals but no UTF-8 string class. Note * that std::wstring is not a UTF-8 string class because it contains only * fixed-width characters (where width could be 32, 16, or even 8 bits). * * @par Glib::ustring and stream input/output * @par * The stream I/O operators, that is operator<<() and operator>>(), perform * implicit charset conversion to/from the current locale. If that's not * what you intended (e.g. when writing to a configuration file that should * always be UTF-8 encoded) use ustring::raw() to override this behaviour. * @par * If you're using std::ostringstream to build strings for display in the * user interface, you must convert the result back to UTF-8 as shown below: * @code * std::locale::global(std::locale("")); // Set the global locale to the user's preferred locale. * // Usually unnecessary here, because Glib::init() * // does it for you. * std::ostringstream output; * output << percentage << " % done"; * label->set_text(Glib::locale_to_utf8(output.str())); * @endcode * * @par Formatted output and internationalization * @par * The methods ustring::compose() and ustring::format() provide a convenient * and powerful alternative to string streams, as shown in the example below. * Refer to the method documentation of compose() and format() for details. * @code * using Glib::ustring; * * ustring message = ustring::compose("%1 is lower than 0x%2.", * 12, ustring::format(std::hex, 16)); * @endcode * * @par Implementation notes * @par * %Glib::ustring does not inherit from std::string, because std::string was * intended to be a final class. For instance, it does not have a virtual * destructor. Also, a HAS-A relationship is more appropriate because * ustring can't just enhance the std::string interface. Rather, it has to * reimplement the interface so that all operations are based on characters * instead of bytes. */ class ustring { public: using size_type = std::string::size_type; using difference_type = std::string::difference_type; using value_type = gunichar; using reference = gunichar&; using const_reference = const gunichar&; using iterator = ustring_Iterator; using const_iterator = ustring_Iterator; #ifndef GLIBMM_HAVE_SUN_REVERSE_ITERATOR using reverse_iterator = std::reverse_iterator; using const_reverse_iterator = std::reverse_iterator; #else typedef std::reverse_iterator reverse_iterator; typedef std::reverse_iterator const_reverse_iterator; #endif /* GLIBMM_HAVE_SUN_REVERSE_ITERATOR */ #ifdef GLIBMM_HAVE_ALLOWS_STATIC_INLINE_NPOS GLIBMM_API static const size_type npos = std::string::npos; #else // The IRIX MipsPro compiler says "The indicated constant value is not known", // so we need to initalize the static member data elsewhere. GLIBMM_API static const size_type npos; #endif /*! Default constructor, which creates an empty string. */ GLIBMM_API ustring(); GLIBMM_API ~ustring() noexcept; /*! Construct a ustring as a copy of another ustring. * @param other A source string. */ GLIBMM_API ustring(const ustring& other); /*! Construct a ustring by moving from another ustring. * @param other A source string. */ GLIBMM_API ustring(ustring&& other); /*! Assign the value of another string by copying to this string. * @param other A source string. */ GLIBMM_API ustring& operator=(const ustring& other); /*! Assign the value of another string by moving to this string. * @param other A source string. */ GLIBMM_API ustring& operator=(ustring&& other); /*! Swap contents with another string. * @param other String to swap with. */ GLIBMM_API void swap(ustring& other); /*! Construct a ustring as a copy of a std::string. * @param src A source std::string containing text encoded as UTF-8. */ GLIBMM_API ustring(const std::string& src); /*! Construct a ustring by moving from a std::string. * @param src A source std::string containing text encoded as UTF-8. */ GLIBMM_API ustring(std::string&& src); /*! Construct a ustring as a copy of a substring. * @param src %Source ustring. * @param i Index of first character to copy from. * @param n Number of UTF-8 characters to copy (defaults to copying the remainder). */ GLIBMM_API ustring(const ustring& src, size_type i, size_type n = npos); /*! Construct a ustring as a partial copy of a C string. * @param src %Source C string encoded as UTF-8. * @param n Number of UTF-8 characters to copy. */ GLIBMM_API ustring(const char* src, size_type n); /*! Construct a ustring as a copy of a C string. * @param src %Source C string encoded as UTF-8. */ GLIBMM_API ustring(const char* src); /*! Construct a ustring as multiple characters. * @param n Number of characters. * @param uc UCS-4 code point to use. */ GLIBMM_API ustring(size_type n, gunichar uc); /*! Construct a ustring as multiple characters. * @param n Number of characters. * @param c ASCII character to use. */ GLIBMM_API ustring(size_type n, char c); /*! Construct a ustring as a copy of a range. * @param pbegin Start of range. * @param pend End of range. */ template ustring(In pbegin, In pend); //! @name Assign new contents. //! @{ GLIBMM_API ustring& operator=(const std::string& src); GLIBMM_API ustring& operator=(std::string&& src); GLIBMM_API ustring& operator=(const char* src); GLIBMM_API ustring& operator=(gunichar uc); GLIBMM_API ustring& operator=(char c); GLIBMM_API ustring& assign(const ustring& src); GLIBMM_API ustring& assign(ustring&& src); GLIBMM_API ustring& assign(const ustring& src, size_type i, size_type n); GLIBMM_API ustring& assign(const char* src, size_type n); GLIBMM_API ustring& assign(const char* src); GLIBMM_API ustring& assign(size_type n, gunichar uc); GLIBMM_API ustring& assign(size_type n, char c); template ustring& assign(In pbegin, In pend); //! @} //! @name Append to the string. //! @{ GLIBMM_API ustring& operator+=(const ustring& src); GLIBMM_API ustring& operator+=(const char* src); GLIBMM_API ustring& operator+=(gunichar uc); GLIBMM_API ustring& operator+=(char c); GLIBMM_API void push_back(gunichar uc); GLIBMM_API void push_back(char c); GLIBMM_API ustring& append(const ustring& src); GLIBMM_API ustring& append(const ustring& src, size_type i, size_type n); GLIBMM_API ustring& append(const char* src, size_type n); GLIBMM_API ustring& append(const char* src); GLIBMM_API ustring& append(size_type n, gunichar uc); GLIBMM_API ustring& append(size_type n, char c); template ustring& append(In pbegin, In pend); //! @} //! @name Insert into the string. //! @{ GLIBMM_API ustring& insert(size_type i, const ustring& src); GLIBMM_API ustring& insert(size_type i, const ustring& src, size_type i2, size_type n); GLIBMM_API ustring& insert(size_type i, const char* src, size_type n); GLIBMM_API ustring& insert(size_type i, const char* src); GLIBMM_API ustring& insert(size_type i, size_type n, gunichar uc); GLIBMM_API ustring& insert(size_type i, size_type n, char c); GLIBMM_API iterator insert(iterator p, gunichar uc); GLIBMM_API iterator insert(iterator p, char c); GLIBMM_API void insert(iterator p, size_type n, gunichar uc); GLIBMM_API void insert(iterator p, size_type n, char c); template void insert(iterator p, In pbegin, In pend); //! @} //! @name Replace sub-strings. //! @{ GLIBMM_API ustring& replace(size_type i, size_type n, const ustring& src); GLIBMM_API ustring& replace(size_type i, size_type n, const ustring& src, size_type i2, size_type n2); GLIBMM_API ustring& replace(size_type i, size_type n, const char* src, size_type n2); GLIBMM_API ustring& replace(size_type i, size_type n, const char* src); GLIBMM_API ustring& replace(size_type i, size_type n, size_type n2, gunichar uc); GLIBMM_API ustring& replace(size_type i, size_type n, size_type n2, char c); GLIBMM_API ustring& replace(iterator pbegin, iterator pend, const ustring& src); GLIBMM_API ustring& replace(iterator pbegin, iterator pend, const char* src, size_type n); GLIBMM_API ustring& replace(iterator pbegin, iterator pend, const char* src); GLIBMM_API ustring& replace(iterator pbegin, iterator pend, size_type n, gunichar uc); GLIBMM_API ustring& replace(iterator pbegin, iterator pend, size_type n, char c); template ustring& replace(iterator pbegin, iterator pend, In pbegin2, In pend2); //! @} //! @name Erase sub-strings. //! @{ GLIBMM_API void clear(); GLIBMM_API ustring& erase(size_type i, size_type n = npos); GLIBMM_API ustring& erase(); GLIBMM_API iterator erase(iterator p); GLIBMM_API iterator erase(iterator pbegin, iterator pend); //! @} //! @name Compare and collate. //! @{ GLIBMM_API int compare(UStringView rhs) const; GLIBMM_API int compare(size_type i, size_type n, UStringView rhs) const; GLIBMM_API int compare(size_type i, size_type n, const ustring& rhs, size_type i2, size_type n2) const; GLIBMM_API int compare(size_type i, size_type n, const char* rhs, size_type n2) const; /*! Create a unique sorting key for the UTF-8 string. If you need to * compare UTF-8 strings regularly, e.g. for sorted containers such as * std::set<>, you should consider creating a collate key first * and compare this key instead of the actual string. * * The ustring::compare() methods as well as the relational operators * == != < > <= >= are quite costly * because they have to deal with %Unicode and the collation rules defined by * the current locale. Converting both operands to UCS-4 is just the first * of several costly steps involved when comparing ustrings. So be careful. */ GLIBMM_API std::string collate_key() const; /*! Create a unique key for the UTF-8 string that can be used for caseless * sorting. ustr.casefold_collate_key() results in the same string * as ustr.casefold().collate_key(), but the former is likely more * efficient. */ GLIBMM_API std::string casefold_collate_key() const; //! @} //! @name Extract characters and sub-strings. //! @{ /*! No reference return; use replace() to write characters. */ GLIBMM_API value_type operator[](size_type i) const; /*! No reference return; use replace() to write characters. @throw std::out_of_range */ GLIBMM_API value_type at(size_type i) const; GLIBMM_API inline ustring substr(size_type i = 0, size_type n = npos) const; //! @} //! @name Access a sequence of characters. //! @{ GLIBMM_API iterator begin(); GLIBMM_API iterator end(); GLIBMM_API const_iterator begin() const; GLIBMM_API const_iterator end() const; GLIBMM_API reverse_iterator rbegin(); GLIBMM_API reverse_iterator rend(); GLIBMM_API const_reverse_iterator rbegin() const; GLIBMM_API const_reverse_iterator rend() const; /** * @newin{2,52} */ GLIBMM_API const_iterator cbegin() const; /** * @newin{2,52} */ GLIBMM_API const_iterator cend() const; //! @} //! @name Find sub-strings. //! @{ GLIBMM_API size_type find(const ustring& str, size_type i = 0) const; GLIBMM_API size_type find(const char* str, size_type i, size_type n) const; GLIBMM_API size_type find(const char* str, size_type i = 0) const; GLIBMM_API size_type find(gunichar uc, size_type i = 0) const; GLIBMM_API size_type find(char c, size_type i = 0) const; GLIBMM_API size_type rfind(const ustring& str, size_type i = npos) const; GLIBMM_API size_type rfind(const char* str, size_type i, size_type n) const; GLIBMM_API size_type rfind(const char* str, size_type i = npos) const; GLIBMM_API size_type rfind(gunichar uc, size_type i = npos) const; GLIBMM_API size_type rfind(char c, size_type i = npos) const; //! @} //! @name Match against a set of characters. //! @{ GLIBMM_API size_type find_first_of(const ustring& match, size_type i = 0) const; GLIBMM_API size_type find_first_of(const char* match, size_type i, size_type n) const; GLIBMM_API size_type find_first_of(const char* match, size_type i = 0) const; GLIBMM_API size_type find_first_of(gunichar uc, size_type i = 0) const; GLIBMM_API size_type find_first_of(char c, size_type i = 0) const; GLIBMM_API size_type find_last_of(const ustring& match, size_type i = npos) const; GLIBMM_API size_type find_last_of(const char* match, size_type i, size_type n) const; GLIBMM_API size_type find_last_of(const char* match, size_type i = npos) const; GLIBMM_API size_type find_last_of(gunichar uc, size_type i = npos) const; GLIBMM_API size_type find_last_of(char c, size_type i = npos) const; GLIBMM_API size_type find_first_not_of(const ustring& match, size_type i = 0) const; GLIBMM_API size_type find_first_not_of(const char* match, size_type i, size_type n) const; GLIBMM_API size_type find_first_not_of(const char* match, size_type i = 0) const; GLIBMM_API size_type find_first_not_of(gunichar uc, size_type i = 0) const; GLIBMM_API size_type find_first_not_of(char c, size_type i = 0) const; GLIBMM_API size_type find_last_not_of(const ustring& match, size_type i = npos) const; GLIBMM_API size_type find_last_not_of(const char* match, size_type i, size_type n) const; GLIBMM_API size_type find_last_not_of(const char* match, size_type i = npos) const; GLIBMM_API size_type find_last_not_of(gunichar uc, size_type i = npos) const; GLIBMM_API size_type find_last_not_of(char c, size_type i = npos) const; //! @} //! @name Retrieve the string's size. //! @{ /** Returns true if the string is empty. Equivalent to *this == "". * @result Whether the string is empty. */ GLIBMM_API bool empty() const; /** Returns the number of characters in the string, not including any null-termination. * @result The number of UTF-8 characters. * * @see bytes(), empty() */ GLIBMM_API size_type size() const; // We have length() as well as size(), because std::string has both. /** This is the same as size(). */ GLIBMM_API size_type length() const; /** Returns the number of bytes in the string, not including any null-termination. * @result The number of bytes. * * @see size(), empty() */ GLIBMM_API size_type bytes() const; //! @} //! @name Change the string's size. //! @{ GLIBMM_API void resize(size_type n, gunichar uc); GLIBMM_API void resize(size_type n, char c = '\0'); //! @} //! @name Control the allocated memory. //! @{ GLIBMM_API size_type capacity() const; GLIBMM_API size_type max_size() const; GLIBMM_API void reserve(size_type n = 0); //! @} //! @name Get a per-byte representation of the string. //! @{ GLIBMM_API inline operator std::string() const; // e.g. std::string str = ustring(); GLIBMM_API inline const std::string& raw() const; /*! Return the stored string, moved from the %ustring. * @newin{2,74} */ GLIBMM_API inline std::string release(); // Not necessarily an ASCII char*. Use g_utf8_*() where necessary. GLIBMM_API const char* data() const; GLIBMM_API const char* c_str() const; /*! @return Number of copied @em bytes, not characters. */ GLIBMM_API size_type copy(char* dest, size_type n, size_type i = 0) const; //! @} //! @name UTF-8 utilities. //! @{ /*! Check whether the string is valid UTF-8. */ GLIBMM_API bool validate() const; /*! Check whether the string is valid UTF-8. */ GLIBMM_API bool validate(iterator& first_invalid); /*! Check whether the string is valid UTF-8. */ GLIBMM_API bool validate(const_iterator& first_invalid) const; /*! Return a copy that is a valid UTF-8 string replacing invalid bytes in the * original with %Unicode replacement character (U+FFFD). * If the string is valid, return a copy of it. */ GLIBMM_API ustring make_valid() const; /*! Check whether the string is plain 7-bit ASCII. @par * Unlike any other ustring method, is_ascii() is safe to use on invalid * UTF-8 strings. If the string isn't valid UTF-8, it cannot be valid * ASCII either, therefore is_ascii() will just return @c false then. * @return Whether the string contains only ASCII characters. */ GLIBMM_API bool is_ascii() const; /*! "Normalize" the %Unicode character representation of the string. */ GLIBMM_API ustring normalize(NormalizeMode mode = NormalizeMode::DEFAULT_COMPOSE) const; //! @} //! @name Character case conversion. //! @{ /*! Returns a new UTF-8 string with all characters characters converted to * their uppercase equivalent, while honoring the current locale. The * resulting string may change in the number of bytes as well as in the * number of characters. For instance, the German sharp s * "ß" will be replaced by two characters * "SS" because there is no capital "ß". */ GLIBMM_API ustring uppercase() const; /*! Returns a new UTF-8 string with all characters characters converted to * their lowercase equivalent, while honoring the current locale. The * resulting string may change in the number of bytes as well as in the * number of characters. */ GLIBMM_API ustring lowercase() const; /*! Returns a caseless representation of the UTF-8 string. The resulting * string doesn't correspond to any particular case, therefore the result * is only useful to compare strings and should never be displayed to the * user. */ GLIBMM_API ustring casefold() const; //! @} //! @name Message formatting. //! @{ /* Returns fmt as is, but checks for invalid references in the format string. * @newin{2,18} */ GLIBMM_API static inline ustring compose(const ustring& fmt); /*! Substitute placeholders in a format string with the referenced arguments. * * The template string uses a similar format to Qt’s QString class, in that * %1, %2, and so on to %9 are used as placeholders * to be substituted with the string representation of the @a args 1–9, while * %% inserts a literal % in the output. Placeholders do not * have to appear in the same order as their corresponding function arguments. * * @par Example: * @code * using Glib::ustring; * const int percentage = 50; * const ustring text = ustring::compose("%1%% done", percentage); * @endcode * * @param fmt The template string, in the format described above. * @param args 1 to 9 arguments to substitute for %1 to %9 * respectively. * * @return The substituted message string. * * @throw Glib::ConvertError * * @newin{2,58} */ template static inline ustring compose(const ustring& fmt, const Ts&... args); /*! Format the argument(s) to a string representation. * * Applies the arguments in order to an std::wostringstream and returns the * resulting string. I/O manipulators may also be used as arguments. This * greatly simplifies the common task of converting a number to a string, as * demonstrated by the example below. The format() methods can also be used * in conjunction with compose() to facilitate localization of user-visible * messages. * * @code * using Glib::ustring; * double value = 22.0 / 7.0; * ustring text = ustring::format(std::fixed, std::setprecision(2), value); * @endcode * * @note The use of a wide character stream in the implementation of format() * is almost completely transparent. However, one of the instances where the * use of wide streams becomes visible is when the std::setfill() stream * manipulator is used. In order for std::setfill() to work the argument * must be of type wchar_t. This can be achieved by using the * L prefix with a character literal, as shown in the example. * * @code * using Glib::ustring; * // Insert leading zeroes to fill in at least six digits * ustring text = ustring::format(std::setfill(L'0'), std::setw(6), 123); * @endcode * * @param args One or more streamable values or I/O manipulators. * * @return The string representation of the argument stream. * * @throw Glib::ConvertError * * @newin{2,58} */ template static inline ustring format(const Ts&... args); /*! Substitute placeholders in a format string with the referenced arguments. * * This function takes a template string in the format used by C’s * printf() family of functions and an arbitrary number of arguments, * replaces each placeholder in the template with the formatted version of its * corresponding argument at the same ordinal position in the list of * subsequent arguments, and returns the result in a new Glib::ustring. * * Note: You must pass the correct count/types/order of arguments to match * the format string, as when calling printf() directly. glibmm does * not check this for you. Breaking this contract invokes undefined behavior * and is a security risk. * * The exception is that glibmm special-cases std::string and Glib::ustring, * so you can pass them in positions corresponding to %s placeholders * without having to call their .c_str() functions; glibmm does that for you. * glibmm also overloads sprintf() with @p fmt but no @p args to avoid risks. * * Said restriction also makes sprintf() unsuitable for translatable strings, * as translators cannot reorder the placeholders to suit their language. If * you wish to support translation, you should instead use compose(), as its * placeholders are numbered rather than ordinal, so they can be moved freely. * * @par Example: * @code * * const auto greeting = std::string{"Hi"}; * const auto name = Glib::ustring{"Dennis"}; * const auto your_cows = 3; * const auto my_cows = 11; * const auto cow_percentage = 100.0 * your_cows / my_cows; * * const auto text = Glib::ustring::sprintf( * "%s, %s! You have %d cows. That's about %0.2f%% of the %d cows I have.", * greeting, name, your_cows, cow_percentage, my_cows); * * std::cout << text; * // Hi, Dennis! You have 3 cows. That's about 27.27% of the 11 cows I have. * @endcode * * @param fmt The template string, in the format used by printf() et al. * @param args A set of arguments having the count/types/order required by @a fmt. * * @return The substituted string. * * @newin{2,62} */ template static inline ustring sprintf(const ustring& fmt, const Ts&... args); /*! Overload of sprintf() taking a string literal. * * The main benefit of this is not constructing a temporary ustring if @p fmt * is a string literal. A secondary effect is that it might encourage compilers * to check if the given format @p fmt matches the variadic arguments @p args. * The latter effect is a convenience at best; you must not rely on it to find * errors in your code, as your compiler might not always be able to do so. * * @param fmt The template string, in the format used by printf() et al. * @param args A set of arguments having the count/types/order required by @a fmt. * * @return The substituted string. * * @newin{2,62} */ template static inline ustring sprintf(const char* fmt, const Ts&... args); /*! Overload of sprintf() for a format string only, which returns it unchanged. * * If no @p args to be substituted are given, there is nothing to do, so the * @p fmt string is returned as-is without substitution. This is an obvious * case of mismatched format/args that we can check. Not doing so causes * warnings/errors with common compiler options, as it is a security risk. * * @param fmt The string * @return The same string. * * @newin{2,62} */ GLIBMM_API static inline ustring sprintf(const ustring& fmt); /*! Overload of sprintf() for a format string only, which returns it unchanged * and avoids creating a temporary ustring as the argument. * * @param fmt The string * @return The same string, as a ustring. * * @newin{2,62} */ GLIBMM_API static inline ustring sprintf(const char* fmt); //! @} private: #ifndef DOXYGEN_SHOULD_SKIP_THIS #ifdef GLIBMM_HAVE_STD_ITERATOR_TRAITS template ::value_type> #else template ::value_type> #endif struct SequenceToString; // The Tru64 compiler needs these partial specializations to be declared here, // as well as defined later. That's probably correct. murrayc. template struct SequenceToString; template struct SequenceToString; template class Stringify; GLIBMM_API static ustring compose_private(const ustring& fmt, std::initializer_list ilist); class FormatStream; template static inline const T& sprintify(const T& arg); GLIBMM_API static inline const char* sprintify(const ustring& arg); GLIBMM_API static inline const char* sprintify(const std::string& arg); #endif /* DOXYGEN_SHOULD_SKIP_THIS */ std::string string_; }; #ifndef DOXYGEN_SHOULD_SKIP_THIS template struct ustring::SequenceToString { }; template struct ustring::SequenceToString : public std::string { SequenceToString(In pbegin, In pend); }; template struct ustring::SequenceToString : public std::string { SequenceToString(In pbegin, In pend); }; template <> struct ustring::SequenceToString : public std::string { GLIBMM_API SequenceToString(Glib::ustring::iterator pbegin, Glib::ustring::iterator pend); }; template <> struct ustring::SequenceToString : public std::string { GLIBMM_API SequenceToString(Glib::ustring::const_iterator pbegin, Glib::ustring::const_iterator pend); }; class ustring::FormatStream { public: // noncopyable FormatStream(const ustring::FormatStream&) = delete; FormatStream& operator=(const ustring::FormatStream&) = delete; private: #ifdef GLIBMM_HAVE_WIDE_STREAM using StreamType = std::wostringstream; #else using StreamType = std::ostringstream; #endif StreamType stream_; public: GLIBMM_API FormatStream(); GLIBMM_API ~FormatStream() noexcept; template inline void stream(const T& value); GLIBMM_API inline void stream(const char* value); // This overload exists to avoid the templated stream() being called for non-const char*. GLIBMM_API inline void stream(char* value); GLIBMM_API ustring to_string() const; }; #endif /* DOXYGEN_SHOULD_SKIP_THIS */ /** Stream input operator. * @relates Glib::ustring * @throw Glib::ConvertError */ GLIBMM_API std::istream& operator>>(std::istream& is, Glib::ustring& utf8_string); /** Stream output operator. * @relates Glib::ustring * @throw Glib::ConvertError */ GLIBMM_API std::ostream& operator<<(std::ostream& os, const Glib::ustring& utf8_string); #ifdef GLIBMM_HAVE_WIDE_STREAM /** Wide stream input operator. * @relates Glib::ustring * @throw Glib::ConvertError GLIBMM_API */ std::wistream& operator>>(std::wistream& is, ustring& utf8_string); /** Wide stream output operator. * @relates Glib::ustring * @throw Glib::ConvertError */ GLIBMM_API std::wostream& operator<<(std::wostream& os, const ustring& utf8_string); #endif /* GLIBMM_HAVE_WIDE_STREAM */ /***************************************************************************/ /* Inline implementation */ /***************************************************************************/ #ifndef DOXYGEN_SHOULD_SKIP_THIS /**** Glib::ustring_Iterator<> *********************************************/ template inline ustring_Iterator::ustring_Iterator(T pos) : pos_(pos) { } template inline T ustring_Iterator::base() const { return pos_; } template inline ustring_Iterator::ustring_Iterator() : pos_() { } template inline ustring_Iterator::ustring_Iterator(const ustring_Iterator& other) : pos_(other.base()) { } template inline typename ustring_Iterator::value_type ustring_Iterator::operator*() const { return Glib::get_unichar_from_std_iterator(pos_); } template inline ustring_Iterator& ustring_Iterator::operator++() { pos_ += g_utf8_skip[static_cast(*pos_)]; return *this; } template inline const ustring_Iterator ustring_Iterator::operator++(int) { const ustring_Iterator temp(*this); this->operator++(); return temp; } template inline ustring_Iterator& ustring_Iterator::operator--() { while ((static_cast(*--pos_) & 0xC0u) == 0x80) { ; } return *this; } template inline const ustring_Iterator ustring_Iterator::operator--(int) { const ustring_Iterator temp(*this); this->operator--(); return temp; } #endif /* DOXYGEN_SHOULD_SKIP_THIS */ /** @relates Glib::ustring_Iterator */ inline bool operator==(const Glib::ustring::const_iterator& lhs, const Glib::ustring::const_iterator& rhs) { return (lhs.base() == rhs.base()); } /** @relates Glib::ustring_Iterator */ inline bool operator!=(const Glib::ustring::const_iterator& lhs, const Glib::ustring::const_iterator& rhs) { return (lhs.base() != rhs.base()); } /** @relates Glib::ustring_Iterator */ inline bool operator<(const Glib::ustring::const_iterator& lhs, const Glib::ustring::const_iterator& rhs) { return (lhs.base() < rhs.base()); } /** @relates Glib::ustring_Iterator */ inline bool operator>(const Glib::ustring::const_iterator& lhs, const Glib::ustring::const_iterator& rhs) { return (lhs.base() > rhs.base()); } /** @relates Glib::ustring_Iterator */ inline bool operator<=(const Glib::ustring::const_iterator& lhs, const Glib::ustring::const_iterator& rhs) { return (lhs.base() <= rhs.base()); } /** @relates Glib::ustring_Iterator */ inline bool operator>=(const Glib::ustring::const_iterator& lhs, const Glib::ustring::const_iterator& rhs) { return (lhs.base() >= rhs.base()); } #ifndef DOXYGEN_SHOULD_SKIP_THIS /**** Glib::ustring::SequenceToString **************************************/ template ustring::SequenceToString::SequenceToString(In pbegin, In pend) : std::string(pbegin, pend) { } template ustring::SequenceToString::SequenceToString(In pbegin, In pend) { char utf8_buf[6]; // stores a single UTF-8 character for (; pbegin != pend; ++pbegin) { const std::string::size_type utf8_len = g_unichar_to_utf8(*pbegin, utf8_buf); this->append(utf8_buf, utf8_len); } } /**** Glib::ustring::FormatStream ******************************************/ template inline void ustring::FormatStream::stream(const T& value) { stream_ << value; } inline void ustring::FormatStream::stream(const char* value) { stream_ << ustring(value); } inline void ustring::FormatStream::stream(char* value) { stream_ << ustring(value); } /**** Glib::ustring ********************************************************/ template ustring::ustring(In pbegin, In pend) : string_(Glib::ustring::SequenceToString(pbegin, pend)) { } template ustring& ustring::assign(In pbegin, In pend) { Glib::ustring::SequenceToString temp_string(pbegin, pend); string_.swap(temp_string); // constant-time operation return *this; } template ustring& ustring::append(In pbegin, In pend) { string_.append(Glib::ustring::SequenceToString(pbegin, pend)); return *this; } template void ustring::insert(ustring::iterator p, In pbegin, In pend) { size_type pos = p.base() - string_.begin(); string_.insert(pos, Glib::ustring::SequenceToString(pbegin, pend)); } template ustring& ustring::replace(ustring::iterator pbegin, ustring::iterator pend, In pbegin2, In pend2) { string_.replace(pbegin.base(), pend.base(), Glib::ustring::SequenceToString(pbegin2, pend2)); return *this; } // The ustring methods substr() and operator std::string() are inline, // so that the compiler has a fair chance to optimize the copy ctor away. inline ustring ustring::substr(ustring::size_type i, ustring::size_type n) const { return ustring(*this, i, n); } inline ustring::operator std::string() const { return string_; } inline const std::string& ustring::raw() const { return string_; } inline std::string ustring::release() { return std::move(string_); } template inline // static ustring ustring::format(const Ts&... args) { ustring::FormatStream buf; (buf.stream(args), ...); return buf.to_string(); } /** An inner class used by ustring. */ template class ustring::Stringify { private: const ustring string_; public: explicit inline Stringify(const T& arg) : string_(ustring::format(arg)) {} // noncopyable Stringify(const ustring::Stringify&) = delete; Stringify& operator=(const ustring::Stringify&) = delete; inline const ustring& ref() const { return string_; } }; /// A template specialization for Stringify: template <> class ustring::Stringify { private: const ustring& string_; public: explicit inline Stringify(const ustring& arg) : string_(arg) {} // noncopyable Stringify(const ustring::Stringify&) = delete; Stringify& operator=(const ustring::Stringify&) = delete; inline const ustring& ref() const { return string_; } }; /** A template specialization for Stringify, * because the regular template has ambiguous constructor overloads for char*. */ template <> class ustring::Stringify { private: const ustring string_; public: explicit inline Stringify(const char* arg) : string_(arg) {} // noncopyable Stringify(const ustring::Stringify&) = delete; Stringify& operator=(const ustring::Stringify&) = delete; inline const ustring& ref() const { return string_; } }; /** A template specialization for Stringify (for string literals), * because the regular template has ambiguous constructor overloads for char*. */ template class ustring::Stringify { private: const ustring string_; public: explicit inline Stringify(const char arg[N]) : string_(arg) {} // noncopyable Stringify(const ustring::Stringify&) = delete; Stringify& operator=(const ustring::Stringify&) = delete; inline const ustring& ref() const { return string_; } }; /** A template specialization for Stringify (for string literals), * because the regular template has ambiguous constructor overloads for char* * on later versions of Visual C++ (2008 and later at least). */ template class ustring::Stringify { private: const ustring string_; public: explicit inline Stringify(const char arg[N]) : string_(arg) {} // noncopyable Stringify(const ustring::Stringify&) = delete; Stringify& operator=(const ustring::Stringify&) = delete; inline const ustring& ref() const { return string_; } }; /* These helper functions used by ustring::sprintf() let users pass C++ strings * to match %s placeholders, without the hassle of writing .c_str() in user code */ template inline // static const T& ustring::sprintify(const T& arg) { return arg; } inline // static const char* ustring::sprintify(const ustring& arg) { return arg.c_str(); } inline // static const char* ustring::sprintify(const std::string& arg) { return arg.c_str(); } // Public methods inline // static ustring ustring::compose(const ustring& fmt) { return ustring::compose_private(fmt, {}); } template inline // static ustring ustring::compose(const ustring& fmt, const Ts&... args) { static_assert(sizeof...(Ts) <= 9, "ustring::compose only supports up to 9 placeholders."); return compose_private(fmt, {&Stringify(args).ref()...}); } template inline // static ustring ustring::sprintf(const ustring& fmt, const Ts&... args) { return sprintf(fmt.c_str(), args...); } template inline // static ustring ustring::sprintf(const char* fmt, const Ts&... args) { auto c_str = g_strdup_printf(fmt, sprintify(args)...); Glib::ustring ustr(c_str); g_free(c_str); return ustr; } inline // static ustring ustring::sprintf(const ustring& fmt) { return fmt; } inline // static ustring ustring::sprintf(const char* fmt) { return ustring(fmt); } #endif /* DOXYGEN_SHOULD_SKIP_THIS */ /** @relates Glib::ustring */ inline void swap(ustring& lhs, ustring& rhs) { lhs.swap(rhs); } /**** Glib::ustring -- comparison operators ********************************/ /** @relates Glib::ustring */ template >> inline bool operator==(const ustring& lhs, const T& rhs) { return (lhs.compare(rhs) == 0); } /** @relates Glib::ustring */ inline bool operator==(UStringView lhs, const ustring& rhs) { return (rhs.compare(lhs) == 0); } /** @relates Glib::ustring */ template >> inline bool operator!=(const ustring& lhs, const T& rhs) { return (lhs.compare(rhs) != 0); } /** @relates Glib::ustring */ inline bool operator!=(UStringView lhs, const ustring& rhs) { return (rhs.compare(lhs) != 0); } /** @relates Glib::ustring */ template >> inline bool operator<(const ustring& lhs, const T& rhs) { return (lhs.compare(rhs) < 0); } /** @relates Glib::ustring */ inline bool operator<(UStringView lhs, const ustring& rhs) { return (rhs.compare(lhs) > 0); } /** @relates Glib::ustring */ template >> inline bool operator>(const ustring& lhs, const T& rhs) { return (lhs.compare(rhs) > 0); } /** @relates Glib::ustring */ inline bool operator>(UStringView lhs, const ustring& rhs) { return (rhs.compare(lhs) < 0); } /** @relates Glib::ustring */ template >> inline bool operator<=(const ustring& lhs, const T& rhs) { return (lhs.compare(rhs) <= 0); } /** @relates Glib::ustring */ inline bool operator<=(UStringView lhs, const ustring& rhs) { return (rhs.compare(lhs) >= 0); } /** @relates Glib::ustring */ template >> inline bool operator>=(const ustring& lhs, const T& rhs) { return (lhs.compare(rhs) >= 0); } /** @relates Glib::ustring */ inline bool operator>=(UStringView lhs, const ustring& rhs) { return (rhs.compare(lhs) <= 0); } #ifndef DOXYGEN_SHOULD_SKIP_THIS // Don't allow implicit conversion of integer 0 to nullptr in the relational operators. // If the int versions of the relational operators are not deleted, attempts to // compare with other integer values than 0 can result in really unexpected behaviour. // See https://bugzilla.gnome.org/show_bug.cgi?id=572978#c10 bool operator==(const ustring& lhs, int rhs) = delete; bool operator==(int lhs, const ustring& rhs) = delete; bool operator!=(const ustring& lhs, int rhs) = delete; bool operator!=(int lhs, const ustring& rhs) = delete; bool operator<(const ustring& lhs, int rhs) = delete; bool operator<(int lhs, const ustring& rhs) = delete; bool operator>(const ustring& lhs, int rhs) = delete; bool operator>(int lhs, const ustring& rhs) = delete; bool operator<=(const ustring& lhs, int rhs) = delete; bool operator<=(int lhs, const ustring& rhs) = delete; bool operator>=(const ustring& lhs, int rhs) = delete; bool operator>=(int lhs, const ustring& rhs) = delete; #endif // DOXYGEN_SHOULD_SKIP_THIS /**** Glib::ustring -- concatenation operators *****************************/ /** @relates Glib::ustring */ inline ustring operator+(const ustring& lhs, const ustring& rhs) { ustring temp(lhs); temp += rhs; return temp; } /** @relates Glib::ustring */ inline ustring operator+(const ustring& lhs, const char* rhs) { ustring temp(lhs); temp += rhs; return temp; } /** @relates Glib::ustring */ inline ustring operator+(const char* lhs, const ustring& rhs) { ustring temp(lhs); temp += rhs; return temp; } /** @relates Glib::ustring */ inline ustring operator+(const ustring& lhs, gunichar rhs) { ustring temp(lhs); temp += rhs; return temp; } /** @relates Glib::ustring */ inline ustring operator+(gunichar lhs, const ustring& rhs) { ustring temp(1, lhs); temp += rhs; return temp; } /** @relates Glib::ustring */ inline ustring operator+(const ustring& lhs, char rhs) { ustring temp(lhs); temp += rhs; return temp; } /** @relates Glib::ustring */ inline ustring operator+(char lhs, const ustring& rhs) { ustring temp(1, lhs); temp += rhs; return temp; } //********** Glib::StdStringView and Glib::UStringView ************* inline UStringView::UStringView(const ustring& s) : pstring_(s.c_str()) {} } // namespace Glib #endif /* _GLIBMM_USTRING_H */