summaryrefslogtreecommitdiff
path: root/include/ruby/internal/core/rstring.h
diff options
context:
space:
mode:
author卜部昌平 <shyouhei@ruby-lang.org>2021-02-01 12:10:21 +0900
committer卜部昌平 <shyouhei@ruby-lang.org>2021-09-10 20:00:06 +0900
commit1bd133949295be3b50439c956f951f7b1bfe7d6e (patch)
treedc85850d90bd5ff59eec958f60a84ab8b5354378 /include/ruby/internal/core/rstring.h
parent746996e6c97d65f9b34021c2481719270e0941b6 (diff)
downloadruby-1bd133949295be3b50439c956f951f7b1bfe7d6e.tar.gz
include/ruby/internal/core/rstring.h: add doxygen
Must not be a bad idea to improve documents. [ci skip]
Diffstat (limited to 'include/ruby/internal/core/rstring.h')
-rw-r--r--include/ruby/internal/core/rstring.h355
1 files changed, 347 insertions, 8 deletions
diff --git a/include/ruby/internal/core/rstring.h b/include/ruby/internal/core/rstring.h
index a616ebac23..e14753ab55 100644
--- a/include/ruby/internal/core/rstring.h
+++ b/include/ruby/internal/core/rstring.h
@@ -32,14 +32,20 @@
#include "ruby/internal/warning_push.h"
#include "ruby/assert.h"
+/**
+ * Convenient casting macro.
+ *
+ * @param obj An object, which is in fact an ::RString.
+ * @return The passed object casted to ::RString.
+ */
#define RSTRING(obj) RBIMPL_CAST((struct RString *)(obj))
+
+/** @cond INTERNAL_MACRO */
#define RSTRING_NOEMBED RSTRING_NOEMBED
#define RSTRING_EMBED_LEN_MASK RSTRING_EMBED_LEN_MASK
#define RSTRING_EMBED_LEN_SHIFT RSTRING_EMBED_LEN_SHIFT
#define RSTRING_EMBED_LEN_MAX RSTRING_EMBED_LEN_MAX
#define RSTRING_FSTR RSTRING_FSTR
-
-/** @cond INTERNAL_MACRO */
#define RSTRING_EMBED_LEN RSTRING_EMBED_LEN
#define RSTRING_LEN RSTRING_LEN
#define RSTRING_LENINT RSTRING_LENINT
@@ -47,59 +53,343 @@
#define RSTRING_END RSTRING_END
/** @endcond */
+/**
+ * @name Conversion of Ruby strings into C's
+ *
+ * @{
+ */
+
+/**
+ * Ensures that the parameter object is a String. This is done by calling its
+ * `to_str` method.
+ *
+ * @param[in,out] v Arbitrary Ruby object.
+ * @exception rb_eTypeError No implicit conversion defined.
+ * @post `v` is a String.
+ */
#define StringValue(v) rb_string_value(&(v))
+
+/**
+ * Identical to #StringValue, except it returns a `char*`.
+ *
+ * @param[in,out] v Arbitrary Ruby object.
+ * @exception rb_eTypeError No implicit conversion defined.
+ * @return Converted Ruby string's backend C string.
+ * @post `v` is a String.
+ */
#define StringValuePtr(v) rb_string_value_ptr(&(v))
+
+/**
+ * Identical to #StringValuePtr, except it additionally checks for the contents
+ * for viability as a C string. Ruby can accept wider range of contents as
+ * strings, compared to C. This function is to check that.
+ *
+ * @param[in,out] v Arbitrary Ruby object.
+ * @exception rb_eTypeError No implicit conversion defined.
+ * @exception rb_eArgError String is not C-compatible.
+ * @return Converted Ruby string's backend C string.
+ * @post `v` is a String.
+ */
#define StringValueCStr(v) rb_string_value_cstr(&(v))
+
+/**
+ * @private
+ *
+ * @deprecated This macro once was a thing in the old days, but makes no sense
+ * any longer today. Exists here for backwards compatibility
+ * only. You can safely forget about it.
+ */
#define SafeStringValue(v) StringValue(v)
+
+/**
+ * Identical to #StringValue, except it additionally converts the string's
+ * encoding to default external encoding. Ruby has a concept called encodings.
+ * A string can have different encoding than the environment expects. Someone
+ * has to make sure its contents be converted to something suitable. This is
+ * that routine. Call it when necessary.
+ *
+ * @param[in,out] v Arbitrary Ruby object.
+ * @exception rb_eTypeError No implicit conversion defined.
+ * @return Converted Ruby string's backend C string.
+ * @post `v` is a String.
+ *
+ * @internal
+ *
+ * Not sure but it seems this macro does not raise on encoding
+ * incompatibilities? Doesn't sound right to @shyouhei.
+ */
#define ExportStringValue(v) do { \
StringValue(v); \
(v) = rb_str_export(v); \
} while (0)
+/** @} */
+
+/**
+ * @private
+ *
+ * Bits that you can set to ::RBasic::flags.
+ *
+ * @warning These enums are not the only bits we use for strings.
+ *
+ * @internal
+ *
+ * Actually all bits through FL_USER1 to FL_USER19 are used for strings. Why
+ * only this tiny part of them are made public here? @shyouhei can find no
+ * reason.
+ */
enum ruby_rstring_flags {
+
+ /**
+ * This flag has something to do with memory footprint. If the string is
+ * short enough, ruby tries to be creative to abuse padding bits of struct
+ * ::RString for storing contents. If this flag is set that string does
+ * _not_ do that, to resort to good old fashioned external allocation
+ * strategy instead.
+ *
+ * @warning This bit has to be considered read-only. Setting/clearing
+ * this bit without corresponding fix up must cause immediate
+ * SEGV. Also, internal structures of a string change
+ * dynamically and transparently throughout of its lifetime.
+ * Don't assume it being persistent.
+ *
+ * @internal
+ *
+ * 3rd parties must not be aware that there even is more than one way to
+ * store a string. Might better be hidden.
+ */
RSTRING_NOEMBED = RUBY_FL_USER1,
+
+ /**
+ * When a string employs embedded strategy (see ::RSTRING_NOEMBED), these
+ * bits are used to store the number of bytes actually filled into
+ * ::RString::ary.
+ *
+ * @internal
+ *
+ * 3rd parties must not be aware that there even is more than one way to
+ * store a string. Might better be hidden.
+ */
RSTRING_EMBED_LEN_MASK = RUBY_FL_USER2 | RUBY_FL_USER3 | RUBY_FL_USER4 |
RUBY_FL_USER5 | RUBY_FL_USER6,
+
/* Actually, string encodings are also encoded into the flags, using
* remaining bits.*/
+
+ /**
+ * This flag has something to do with infamous "f"string. What is a
+ * fstring? Well it is a special subkind of strings that is immutable,
+ * deduped globally, and managed by our GC. It is much like a Symbol (in
+ * fact Symbols are dynamic these days and are backended using fstrings).
+ * This concept has been silently introduced at some point in 2.x era.
+ * Since then it gained wider acceptance in the core. But extension
+ * libraries could not know that until very recently. Strings of this flag
+ * live in a special Limbo deep inside of the interpreter. Never try to
+ * manipulate it by hand.
+ *
+ * @internal
+ *
+ * Fstrings are not the only variant strings that we implement today.
+ * Other things are behind-the-scene. This is the only one that is visible
+ * from extension library. There is no clear reason why it has to be.
+ * Given there are more "polite" ways to create fstrings, it seems this bit
+ * need not be exposed to extension libraries. Might better be hidden.
+ */
RSTRING_FSTR = RUBY_FL_USER17
};
+/**
+ * This is an enum because GDB wants it (rather than a macro). People need not
+ * bother.
+ */
enum ruby_rstring_consts {
+ /** Where ::RSTRING_EMBED_LEN_MASK resides. */
RSTRING_EMBED_LEN_SHIFT = RUBY_FL_USHIFT + 2,
+
+ /** Max possible number of characters that can be embedded. */
RSTRING_EMBED_LEN_MAX = RBIMPL_EMBED_LEN_MAX_OF(char) - 1
};
+/**
+ * Ruby's String. A string in ruby conceptually has these information:
+ *
+ * - Encoding of the string.
+ * - Length of the string.
+ * - Contents of the string.
+ *
+ * It is worth noting that a string is _not_ an array of characters in ruby.
+ * It has never been. In 1.x a string was an array of integers. Since 2.x a
+ * string is no longer an array of anything. A string is a string -- just like
+ * a Time is not an integer.
+ */
struct RString {
+
+ /** Basic part, including flags and class. */
struct RBasic basic;
+
+ /** String's specific fields. */
union {
+
+ /**
+ * Strings that use separated memory region for contents use this
+ * pattern.
+ */
struct {
+
+ /**
+ * Length of the string, not including terminating NUL character.
+ *
+ * @note This is in bytes.
+ */
long len;
+
+ /**
+ * Pointer to the contents of the string. In the old days each
+ * string had dedicated memory regions. That is no longer true
+ * today, but there still are strings of such properties. This
+ * field could be used to point such things.
+ */
char *ptr;
+
+ /** Auxiliary info. */
union {
+
+ /**
+ * Capacity of `*ptr`. A continuous memory region of at least
+ * `capa` bytes is expected to exist at `*ptr`. This can be
+ * bigger than `len`.
+ */
long capa;
+
+ /**
+ * Parent of the string. Nowadays strings can share their
+ * contents each other, constructing gigantic nest of objects.
+ * This situation is called "shared", and this is the field to
+ * control such properties.
+ */
VALUE shared;
} aux;
} heap;
+
+ /**
+ * Embedded contents. When a string is short enough, it uses this area
+ * to store the contents themselves. This was impractical in the 20th
+ * century, but these days 64 bit machines can typically hold 48 bytes
+ * here. Could be sufficiently large. In this case the length is
+ * encoded into the flags.
+ */
char ary[RSTRING_EMBED_LEN_MAX + 1];
} as;
};
RBIMPL_SYMBOL_EXPORT_BEGIN()
-VALUE rb_str_to_str(VALUE);
-VALUE rb_string_value(volatile VALUE*);
-char *rb_string_value_ptr(volatile VALUE*);
-char *rb_string_value_cstr(volatile VALUE*);
-VALUE rb_str_export(VALUE);
-VALUE rb_str_export_locale(VALUE);
+/**
+ * Identical to rb_check_string_type(), except it raises exceptions in case of
+ * conversion failures.
+ *
+ * @param[in] obj Target object.
+ * @exception rb_eTypeError No implicit conversion to String.
+ * @return Return value of `obj.to_str`.
+ * @see rb_io_get_io
+ * @see rb_ary_to_ary
+ */
+VALUE rb_str_to_str(VALUE obj);
+
+/**
+ * Identical to rb_str_to_str(), except it fills the passed pointer with the
+ * converted object.
+ *
+ * @param[in,out] ptr Pointer to a variable of target object.
+ * @exception rb_eTypeError No implicit conversion to String.
+ * @return Return value of `obj.to_str`.
+ * @post `*ptr` is the return value.
+ */
+VALUE rb_string_value(volatile VALUE *ptr);
+
+/**
+ * Identical to rb_str_to_str(), except it returns the converted string's
+ * backend memory region.
+ *
+ * @param[in,out] ptr Pointer to a variable of target object.
+ * @exception rb_eTypeError No implicit conversion to String.
+ * @post `*ptr` is the return value of `obj.to_str`.
+ * @return Pointer to the contents of the return value.
+ */
+char *rb_string_value_ptr(volatile VALUE *ptr);
+
+/**
+ * Identical to rb_string_value_ptr(), except it additionally checks for the
+ * contents for viability as a C string. Ruby can accept wider range of
+ * contents as strings, compared to C. This function is to check that.
+ *
+ * @param[in,out] ptr Pointer to a variable of target object.
+ * @exception rb_eTypeError No implicit conversion to String.
+ * @exception rb_eArgError String is not C-compatible.
+ * @post `*ptr` is the return value of `obj.to_str`.
+ * @return Pointer to the contents of the return value.
+ */
+char *rb_string_value_cstr(volatile VALUE *ptr);
+
+/**
+ * Identical to rb_str_to_str(), except it additionally converts the string
+ * into default external encoding. Ruby has a concept called encodings. A
+ * string can have different encoding than the environment expects. Someone
+ * has to make sure its contents be converted to something suitable. This is
+ * that routine. Call it when necessary.
+ *
+ * @param[in] obj Target object.
+ * @exception rb_eTypeError No implicit conversion to String.
+ * @return Converted ruby string of default external encoding.
+ */
+VALUE rb_str_export(VALUE obj);
+
+/**
+ * Identical to rb_str_export(), except it converts into the locale encoding
+ * instead.
+ *
+ * @param[in] obj Target object.
+ * @exception rb_eTypeError No implicit conversion to String.
+ * @return Converted ruby string of locale encoding.
+ */
+VALUE rb_str_export_locale(VALUE obj);
RBIMPL_ATTR_ERROR(("rb_check_safe_str() and Check_SafeStr() are obsolete; use StringValue() instead"))
+/**
+ * @private
+ *
+ * @deprecated This function once was a thing in the old days, but makes no
+ * sense any longer today. Exists here for backwards
+ * compatibility only. You can safely forget about it.
+ */
void rb_check_safe_str(VALUE);
+
+/**
+ * @private
+ *
+ * @deprecated This macro once was a thing in the old days, but makes no sense
+ * any longer today. Exists here for backwards compatibility
+ * only. You can safely forget about it.
+ */
#define Check_SafeStr(v) rb_check_safe_str(RBIMPL_CAST((VALUE)(v)))
RBIMPL_SYMBOL_EXPORT_END()
RBIMPL_ATTR_PURE_UNLESS_DEBUG()
RBIMPL_ATTR_ARTIFICIAL()
+/**
+ * Queries the length of the string.
+ *
+ * @param[in] str String in question.
+ * @return Its length, in bytes.
+ * @pre `str` must be an instance of ::RString, and must has its
+ * ::RSTRING_NOEMBED flag off.
+ *
+ * @internal
+ *
+ * This was a macro before. It was inevitable to be public, since macros are
+ * global constructs. But should it be forever? Now that it is a function,
+ * @shyouhei thinks it could just be eliminated, hidden into implementation
+ * details.
+ */
static inline long
RSTRING_EMBED_LEN(VALUE str)
{
@@ -119,6 +409,15 @@ RBIMPL_WARNING_IGNORED(413)
RBIMPL_ATTR_PURE_UNLESS_DEBUG()
RBIMPL_ATTR_ARTIFICIAL()
+/**
+ * @private
+ *
+ * "Expands" an embedded string into an ordinal one. This is a function that
+ * returns aggregated type. The returned struct always has its `as.heap.len`
+ * an `as.heap.ptr` fields set appropriately.
+ *
+ * This is an implementation detail that 3rd parties should never bother.
+ */
static inline struct RString
rbimpl_rstring_getmem(VALUE str)
{
@@ -140,6 +439,13 @@ RBIMPL_WARNING_POP()
RBIMPL_ATTR_PURE_UNLESS_DEBUG()
RBIMPL_ATTR_ARTIFICIAL()
+/**
+ * Queries the length of the string.
+ *
+ * @param[in] str String in question.
+ * @return Its length, in bytes.
+ * @pre `str` must be an instance of ::RString.
+ */
static inline long
RSTRING_LEN(VALUE str)
{
@@ -147,6 +453,13 @@ RSTRING_LEN(VALUE str)
}
RBIMPL_ATTR_ARTIFICIAL()
+/**
+ * Queries the contents pointer of the string.
+ *
+ * @param[in] str String in question.
+ * @return Pointer to its contents.
+ * @pre `str` must be an instance of ::RString.
+ */
static inline char *
RSTRING_PTR(VALUE str)
{
@@ -175,6 +488,13 @@ RSTRING_PTR(VALUE str)
}
RBIMPL_ATTR_ARTIFICIAL()
+/**
+ * Queries the end of the contents pointer of the string.
+ *
+ * @param[in] str String in question.
+ * @return Pointer to its end of contents.
+ * @pre `str` must be an instance of ::RString.
+ */
static inline char *
RSTRING_END(VALUE str)
{
@@ -194,12 +514,31 @@ RSTRING_END(VALUE str)
}
RBIMPL_ATTR_ARTIFICIAL()
+/**
+ * Identical to RSTRING_LEN(), except it differs for the return type.
+ *
+ * @param[in] str String in question.
+ * @exception rb_eRangeError Too long.
+ * @return Its length, in bytes.
+ * @pre `str` must be an instance of ::RString.
+ *
+ * @internal
+ *
+ * This API seems redundant but has actual usages.
+ */
static inline int
RSTRING_LENINT(VALUE str)
{
return rb_long2int(RSTRING_LEN(str));
}
+/**
+ * Convenient macro to obtain the contents and length at once.
+ *
+ * @param str String in question.
+ * @param ptrvar Variable where its contents is stored.
+ * @param lenvar Variable where its length is stored.
+ */
#ifdef HAVE_STMT_AND_DECL_IN_EXPR
# define RSTRING_GETMEM(str, ptrvar, lenvar) \
__extension__ ({ \