diff options
Diffstat (limited to 'tools/quickbook/src/id_generation.cpp')
-rw-r--r-- | tools/quickbook/src/id_generation.cpp | 379 |
1 files changed, 379 insertions, 0 deletions
diff --git a/tools/quickbook/src/id_generation.cpp b/tools/quickbook/src/id_generation.cpp new file mode 100644 index 000000000..b6445a581 --- /dev/null +++ b/tools/quickbook/src/id_generation.cpp @@ -0,0 +1,379 @@ +/*============================================================================= + Copyright (c) 2011, 2013 Daniel James + + Use, modification and distribution is subject to the Boost Software + License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +=============================================================================*/ + +#include <cctype> +#include "document_state_impl.hpp" +#include <boost/make_shared.hpp> +#include <boost/unordered_map.hpp> +#include <boost/lexical_cast.hpp> +#include <boost/foreach.hpp> +#include <boost/range/algorithm.hpp> + +// TODO: This should possibly try to always generate valid XML ids: +// http://www.w3.org/TR/REC-xml/#NT-NameStartChar + +namespace quickbook { + // + // The maximum size of a generated part of an id. + // + // Not a strict maximum, sometimes broken because the user + // explicitly uses a longer id, or for backwards compatibility. + + static const std::size_t max_size = 32; + + typedef std::vector<id_placeholder const*> placeholder_index; + placeholder_index index_placeholders(document_state_impl const&, boost::string_ref); + + void generate_id_block( + placeholder_index::iterator, placeholder_index::iterator, + std::vector<std::string>& generated_ids); + + std::vector<std::string> generate_ids(document_state_impl const& state, boost::string_ref xml) + { + std::vector<std::string> generated_ids(state.placeholders.size()); + + // Get a list of the placeholders in the order that we wish to + // process them. + placeholder_index placeholders = index_placeholders(state, xml); + + typedef std::vector<id_placeholder const*>::iterator iterator; + iterator it = placeholders.begin(), end = placeholders.end(); + + while (it != end) { + // We process all the ids that have the same number of dots + // together. Note that ids with different parents can clash, e.g. + // because of old fashioned id generation or anchors containing + // multiple dots. + // + // So find the group of placeholders with the same number of dots. + iterator group_begin = it, group_end = it; + while (group_end != end && (*group_end)->num_dots == (*it)->num_dots) + ++group_end; + + generate_id_block(group_begin, group_end, generated_ids); + it = group_end; + } + + return generated_ids; + } + + // + // index_placeholders + // + // Create a sorted index of the placeholders, in order + // to make numbering duplicates easy. A total order. + // + + struct placeholder_compare + { + std::vector<unsigned>& order; + + placeholder_compare(std::vector<unsigned>& order) : order(order) {} + + bool operator()(id_placeholder const* x, id_placeholder const* y) const + { + bool x_explicit = x->category.c >= id_category::explicit_id; + bool y_explicit = y->category.c >= id_category::explicit_id; + + return + x->num_dots < y->num_dots ? true : + x->num_dots > y->num_dots ? false : + x_explicit > y_explicit ? true : + x_explicit < y_explicit ? false : + order[x->index] < order[y->index]; + } + }; + + struct get_placeholder_order_callback : xml_processor::callback + { + document_state_impl const& state; + std::vector<unsigned>& order; + unsigned count; + + get_placeholder_order_callback(document_state_impl const& state, + std::vector<unsigned>& order) + : state(state), + order(order), + count(0) + {} + + void id_value(boost::string_ref value) + { + set_placeholder_order(state.get_placeholder(value)); + } + + void set_placeholder_order(id_placeholder const* p) + { + if (p && !order[p->index]) { + set_placeholder_order(p->parent); + order[p->index] = ++count; + } + } + }; + + placeholder_index index_placeholders( + document_state_impl const& state, + boost::string_ref xml) + { + // The order that the placeholder appear in the xml source. + std::vector<unsigned> order(state.placeholders.size()); + + xml_processor processor; + get_placeholder_order_callback callback(state, order); + processor.parse(xml, callback); + + placeholder_index sorted_placeholders; + sorted_placeholders.reserve(state.placeholders.size()); + BOOST_FOREACH(id_placeholder const& p, state.placeholders) + if (order[p.index]) sorted_placeholders.push_back(&p); + boost::sort(sorted_placeholders, placeholder_compare(order)); + + return sorted_placeholders; + } + + // Resolve and generate ids. + + struct generate_id_block_type + { + // The ids which won't require duplicate handling. + typedef boost::unordered_map<std::string, id_placeholder const*> + chosen_id_map; + chosen_id_map chosen_ids; + std::vector<std::string>& generated_ids; + + generate_id_block_type(std::vector<std::string>& generated_ids) : + generated_ids(generated_ids) {} + + void generate(placeholder_index::iterator begin, + placeholder_index::iterator end); + + std::string resolve_id(id_placeholder const*); + std::string generate_id(id_placeholder const*, std::string const&); + }; + + void generate_id_block(placeholder_index::iterator begin, + placeholder_index::iterator end, + std::vector<std::string>& generated_ids) + { + generate_id_block_type impl(generated_ids); + impl.generate(begin, end); + } + + void generate_id_block_type::generate(placeholder_index::iterator begin, + placeholder_index::iterator end) + { + std::vector<std::string> resolved_ids; + + for (placeholder_index::iterator i = begin; i != end; ++i) + resolved_ids.push_back(resolve_id(*i)); + + unsigned index = 0; + for (placeholder_index::iterator i = begin; i != end; ++i, ++index) + { + generated_ids[(**i).index] = + generate_id(*i, resolved_ids[index]); + } + } + + std::string generate_id_block_type::resolve_id(id_placeholder const* p) + { + std::string id = p->parent ? + generated_ids[p->parent->index] + "." + p->id : + p->id; + + if (p->category.c > id_category::numbered) { + // Reserve the id if it isn't already reserved. + chosen_id_map::iterator pos = chosen_ids.emplace(id, p).first; + + // If it was reserved by a placeholder with a lower category, + // then overwrite it. + if (p->category.c > pos->second->category.c) + pos->second = p; + } + + return id; + } + + std::string generate_id_block_type::generate_id(id_placeholder const* p, + std::string const& resolved_id) + { + if (p->category.c > id_category::numbered && + chosen_ids.at(resolved_id) == p) + { + return resolved_id; + } + + // Split the id into its parent part and child part. + // + // Note: can't just use the placeholder's parent, as the + // placeholder id might contain dots. + std::size_t child_start = resolved_id.rfind('.'); + std::string parent_id, base_id; + + if (child_start == std::string::npos) { + base_id = normalize_id(resolved_id, max_size - 1); + } + else { + parent_id = resolved_id.substr(0, child_start + 1); + base_id = normalize_id(resolved_id.substr(child_start + 1), + max_size - 1); + } + + // Since we're adding digits, don't want an id that ends in + // a digit. + + unsigned int length = base_id.size(); + + if (length > 0 && std::isdigit(base_id[length - 1])) { + if (length < max_size - 1) { + base_id += '_'; + ++length; + } + else { + while (length > 0 && std::isdigit(base_id[length -1])) + --length; + base_id.erase(length); + } + } + + unsigned count = 0; + + while (true) + { + std::string postfix = + boost::lexical_cast<std::string>(count++); + + if ((base_id.size() + postfix.size()) > max_size) { + // The id is now too long, so reduce the length and + // start again. + + // Would need a lot of ids to get this far.... + if (length == 0) throw std::runtime_error("Too many ids"); + + // Trim a character. + --length; + + // Trim any trailing digits. + while (length > 0 && std::isdigit(base_id[length -1])) + --length; + + base_id.erase(length); + count = 0; + } + else { + // Try to reserve this id. + std::string generated_id = parent_id + base_id + postfix; + + if (chosen_ids.emplace(generated_id, p).second) { + return generated_id; + } + } + } + } + + // + // replace_ids + // + // Return a copy of the xml with all the placeholders replaced by + // generated_ids. + // + + struct replace_ids_callback : xml_processor::callback + { + document_state_impl const& state; + std::vector<std::string> const* ids; + boost::string_ref::const_iterator source_pos; + std::string result; + + replace_ids_callback(document_state_impl const& state, + std::vector<std::string> const* ids) + : state(state), + ids(ids), + source_pos(), + result() + {} + + void start(boost::string_ref xml) + { + source_pos = xml.begin(); + } + + void id_value(boost::string_ref value) + { + if (id_placeholder const* p = state.get_placeholder(value)) + { + boost::string_ref id = ids ? + (*ids)[p->index] : p->unresolved_id; + + result.append(source_pos, value.begin()); + result.append(id.begin(), id.end()); + source_pos = value.end(); + } + } + + void finish(boost::string_ref xml) + { + result.append(source_pos, xml.end()); + source_pos = xml.end(); + } + }; + + std::string replace_ids(document_state_impl const& state, boost::string_ref xml, + std::vector<std::string> const* ids) + { + xml_processor processor; + replace_ids_callback callback(state, ids); + processor.parse(xml, callback); + return callback.result; + } + + // + // normalize_id + // + // Normalizes generated ids. + // + + std::string normalize_id(boost::string_ref src_id) + { + return normalize_id(src_id, max_size); + } + + std::string normalize_id(boost::string_ref src_id, std::size_t size) + { + std::string id(src_id.begin(), src_id.end()); + + std::size_t src = 0; + std::size_t dst = 0; + + while (src < id.length() && id[src] == '_') { + ++src; + } + + if (src == id.length()) { + id = "_"; + } + else { + while (src < id.length() && dst < size) { + if (id[src] == '_') { + do { + ++src; + } while(src < id.length() && id[src] == '_'); + + if (src < id.length()) id[dst++] = '_'; + } + else { + id[dst++] = id[src++]; + } + } + + id.erase(dst); + } + + return id; + } +} |