//===-- Format string parser implementation for scanf ----------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // #define LIBC_COPT_SCANF_DISABLE_INDEX_MODE 1 // This will be a compile flag. #include "src/stdio/scanf_core/parser.h" #include "src/__support/arg_list.h" #include "src/__support/CPP/bit.h" #include "src/__support/CPP/bitset.h" #include "src/__support/CPP/string_view.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/ctype_utils.h" #include "src/__support/str_to_integer.h" namespace __llvm_libc { namespace scanf_core { #ifndef LIBC_COPT_SCANF_DISABLE_INDEX_MODE #define GET_ARG_VAL_SIMPLEST(arg_type, index) get_arg_value(index) #else #define GET_ARG_VAL_SIMPLEST(arg_type, _) get_next_arg_value() #endif // LIBC_COPT_SCANF_DISABLE_INDEX_MODE FormatSection Parser::get_next_section() { FormatSection section; size_t starting_pos = cur_pos; if (str[cur_pos] == '%') { // format section section.has_conv = true; ++cur_pos; [[maybe_unused]] size_t conv_index = 0; #ifndef LIBC_COPT_SCANF_DISABLE_INDEX_MODE conv_index = parse_index(&cur_pos); #endif // LIBC_COPT_SCANF_DISABLE_INDEX_MODE if (str[cur_pos] == '*') { ++cur_pos; section.flags = FormatFlags::NO_WRITE; } // handle width section.max_width = -1; if (internal::isdigit(str[cur_pos])) { auto result = internal::strtointeger(str + cur_pos, 10); section.max_width = result.value; cur_pos = cur_pos + result.parsed_len; } // TODO(michaelrj): add posix allocate flag support. // if (str[cur_pos] == 'm') { // ++cur_pos; // section.flags = FormatFlags::ALLOCATE; // } LengthModifier lm = parse_length_modifier(&cur_pos); section.length_modifier = lm; section.conv_name = str[cur_pos]; // If NO_WRITE is not set, then read the next arg as the output pointer. if ((section.flags & FormatFlags::NO_WRITE) == 0) { // Since all outputs are pointers, there's no need to distinguish when // reading from va_args. They're all the same size and stored the same. section.output_ptr = GET_ARG_VAL_SIMPLEST(void *, conv_index); } // If the end of the format section is on the '\0'. This means we need to // not advance the cur_pos and we should not count this has having a // conversion. if (str[cur_pos] != '\0') { ++cur_pos; } else { section.has_conv = false; } // If the format is a bracketed one, then we need to parse out the insides // of the brackets. if (section.conv_name == '[') { constexpr char CLOSING_BRACKET = ']'; constexpr char INVERT_FLAG = '^'; constexpr char RANGE_OPERATOR = '-'; cpp::bitset<256> scan_set; bool invert = false; // The circumflex in the first position represents the inversion flag, but // it's easier to apply that at the end so we just store it for now. if (str[cur_pos] == INVERT_FLAG) { invert = true; ++cur_pos; } // This is used to determine if a hyphen is being used as a literal or as // a range operator. size_t set_start_pos = cur_pos; // Normally the right bracket closes the set, but if it's the first // character (possibly after the inversion flag) then it's instead // included as a character in the set and the second right bracket closes // the set. if (str[cur_pos] == CLOSING_BRACKET) { scan_set.set(CLOSING_BRACKET); ++cur_pos; } while (str[cur_pos] != '\0' && str[cur_pos] != CLOSING_BRACKET) { // If a hyphen is being used as a range operator, since it's neither at // the beginning nor end of the set. if (str[cur_pos] == RANGE_OPERATOR && cur_pos != set_start_pos && str[cur_pos + 1] != CLOSING_BRACKET && str[cur_pos + 1] != '\0') { // Technically there is no requirement to correct the ordering of the // range, but since the range operator is entirely implementation // defined it seems like a good convenience. char a = str[cur_pos - 1]; char b = str[cur_pos + 1]; char start = (a < b ? a : b); char end = (a < b ? b : a); scan_set.set_range(start, end); cur_pos += 2; } else { scan_set.set(str[cur_pos]); ++cur_pos; } } if (invert) scan_set.flip(); if (str[cur_pos] == CLOSING_BRACKET) { ++cur_pos; section.scan_set = scan_set; } else { // if the end of the string was encountered, this is not a valid set. section.has_conv = false; } } } else { // raw section section.has_conv = false; while (str[cur_pos] != '%' && str[cur_pos] != '\0') ++cur_pos; } section.raw_string = {str + starting_pos, cur_pos - starting_pos}; return section; } LengthModifier Parser::parse_length_modifier(size_t *local_pos) { switch (str[*local_pos]) { case ('l'): if (str[*local_pos + 1] == 'l') { *local_pos += 2; return LengthModifier::ll; } else { ++*local_pos; return LengthModifier::l; } case ('h'): if (str[*local_pos + 1] == 'h') { *local_pos += 2; return LengthModifier::hh; } else { ++*local_pos; return LengthModifier::h; } case ('L'): ++*local_pos; return LengthModifier::L; case ('j'): ++*local_pos; return LengthModifier::j; case ('z'): ++*local_pos; return LengthModifier::z; case ('t'): ++*local_pos; return LengthModifier::t; default: return LengthModifier::NONE; } } //---------------------------------------------------- // INDEX MODE ONLY FUNCTIONS AFTER HERE: //---------------------------------------------------- #ifndef LIBC_COPT_SCANF_DISABLE_INDEX_MODE size_t Parser::parse_index(size_t *local_pos) { if (internal::isdigit(str[*local_pos])) { auto result = internal::strtointeger(str + *local_pos, 10); size_t index = result.value; if (str[*local_pos + result.parsed_len] != '$') return 0; *local_pos = 1 + result.parsed_len + *local_pos; return index; } return 0; } void Parser::args_to_index(size_t index) { if (args_index > index) { args_index = 1; args_cur = args_start; } while (args_index < index) { // Since all arguments must be pointers, we can just read all of them as // void * and not worry about type issues. args_cur.next_var(); ++args_index; } } #endif // LIBC_COPT_SCANF_DISABLE_INDEX_MODE } // namespace scanf_core } // namespace __llvm_libc