From 97ae5b7026a7225d6a541fcf885a1a9a3fc060d2 Mon Sep 17 00:00:00 2001 From: Bertrand Garrigues Date: Thu, 9 Nov 2017 00:43:17 +0100 Subject: Add class 'paragraph' for Knuth-Plass formatting. To be used in `troff' in the future, but not connected to the rest of the `troff' code yet. * src/include/trace.h: macro to trace and debug. * src/roff/troff/paragraph.h: * src/roff/troff/paragraph_l.h: * src/roff/troff/paragraph.cpp: * src/roff/troff/paragraph_word.h: New files. The class `paragraph' exposes some public methods to build the paragraph (`add_box', `add_glue', `add_optional_hyphen', `add_explicit_hyphen'). It use an interface `paragraph_word' that the caller must implement to pass the words width and the glue values to the `paragraph' class, and also a `write' method. The caller must also implement `paragraph_writer_interface', another interface used by `paragraph' to control the writing process. * test/test_paragraph.cpp: A simple example to demonstrate the usage of `paragraph', including the original example from Knuth's book `Digital Typography'. * test/test.am: add `test_paragraph' to `make check'. --- src/include/trace.h | 80 +++ src/roff/troff/paragraph.cpp | 1374 +++++++++++++++++++++++++++++++++++++++ src/roff/troff/paragraph.h | 107 +++ src/roff/troff/paragraph_l.h | 208 ++++++ src/roff/troff/paragraph_word.h | 36 + test/test.am | 9 + test/test_paragraph.cpp | 971 +++++++++++++++++++++++++++ 7 files changed, 2785 insertions(+) create mode 100644 src/include/trace.h create mode 100644 src/roff/troff/paragraph.cpp create mode 100644 src/roff/troff/paragraph.h create mode 100644 src/roff/troff/paragraph_l.h create mode 100644 src/roff/troff/paragraph_word.h create mode 100644 test/test_paragraph.cpp diff --git a/src/include/trace.h b/src/include/trace.h new file mode 100644 index 000000000..79e6ba8ef --- /dev/null +++ b/src/include/trace.h @@ -0,0 +1,80 @@ +/* Copyright (C) 2016- Free Software Foundation, Inc. + Written by Bertrand Garrigues (bertrand.garrigues@laposte.net) + +This file is part of groff. + +groff is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation, either version 3 of the License, or +(at your option) any later version. + +groff is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . */ + +#ifndef TRACE_H +#define TRACE_H + +#define LEVEL_DEBUG 3 +#define LEVEL_INFO 2 +#define LEVEL_ERROR 1 +#define LEVEL_QUIET 0 + +#include + +#ifndef TRACE_ALIGN_LENGTH +#define TRACE_ALIGN_LENGTH -1 +#endif + +// __FILE__ gives the full path of the file, we just want the file name. +#define __FILENAME__ (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__) + +#ifndef DEACTIVATE_TRACES + +#define trace_define_category(module_name, default_level) \ + int trace_category_level_##module_name = default_level + +#define trace_use_category(module_name) \ + extern int trace_category_level_##module_name + +#define trace_set_level(module_name, level) \ + do { \ + trace_category_level_##module_name = level; \ + } while(0) + +#define trace_print(module_name, min_level, fmt, args...) \ + do { \ + if ((trace_category_level_##module_name) >= (min_level)) { \ + int res##__LINE__; \ + int length_align##__LINE__ = TRACE_ALIGN_LENGTH; \ + res##__LINE__ = fprintf(stderr, "[%s:%d:%s]", __FILENAME__, __LINE__, __FUNCTION__); \ + if (length_align##__LINE__ == -1) \ + fprintf(stderr, fmt "\n", ##args); \ + else \ + fprintf(stderr, "%*s" fmt "\n", TRACE_ALIGN_LENGTH - res##__LINE__, #module_name": ", ##args); \ + fflush(stderr); \ + } \ + } while(0) + +#define trace_debug(module_name, fmt, args...) \ + trace_print(module_name, LEVEL_DEBUG, fmt, ##args) + +#define trace_info(module_name, fmt, args...) \ + trace_print(module_name, LEVEL_INFO, fmt, ##args) + +#define trace_error(module_name, fmt, args...) \ + trace_print(module_name, LEVEL_ERROR, fmt, ##args) + +#else /* DEACTIVATE_TRACES */ +#define trace_debug(args...) +#define trace_info(args...) +#define trace_error(args...) +#define trace_define_category(args...) +#define trace_set_level(args...) +#endif /* DEACTIVATE_TRACES */ + +#endif diff --git a/src/roff/troff/paragraph.cpp b/src/roff/troff/paragraph.cpp new file mode 100644 index 000000000..eeb5932f4 --- /dev/null +++ b/src/roff/troff/paragraph.cpp @@ -0,0 +1,1374 @@ +// -*- C++ -*- + +/* Copyright (C) 2016- Free Software Foundation, Inc. + Written by Bertrand Garrigues (bertrand.garrigues@laposte.net) + +This file is part of groff. + +groff is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation, either version 3 of the License, or +(at your option) any later version. + +groff is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include +#include + +#include "paragraph_l.h" +#include "paragraph.h" + +#include "trace.h" +trace_use_category(item); +trace_use_category(breakpoint); +trace_use_category(paragraph); + +#define item_debug(args...) \ + trace_debug(item, ##args) +#define item_info(args...) \ + trace_info(item, ##args) +#define item_error(args...) \ + trace_error(item, item, ##args) +#define breakpoint_debug(args...) \ + trace_debug(breakpoint, ##args) +#define breakpoint_info(args...) \ + trace_info(breakpoint, ##args) +#define breakpoint_error(args...) \ + trace_error(breakpoint, ##args) +#define paragraph_debug(args...) \ + trace_debug(paragraph, ##args) +#define paragraph_info(args...) \ + trace_info(paragraph, ##args) +#define paragraph_error(args...) \ + trace_error(paragraph, ##args) + +#define PLUS_INFINITY (INT_MAX / 4) +#define MINUS_INFINITY (INT_MIN / 4) + +/* strings used for debug */ +#define STRING_MAX_SIZE 256 + +/* + * Abstract Class item + */ +item::item() +{ + INIT_LIST_HEAD(&list_, this); + is_breakpoint_ = false; +} + +item::~item() +{ + if (word_ != NULL) + delete word_; +} + +unsigned int +item::get_width() +{ + return width_; +} + +unsigned int +item::get_stretchability() +{ + return stretchability_; +} + +unsigned int +item::get_shrinkability() +{ + return shrinkability_; +} + +int +item::get_penalty() +{ + return penalty_; +} + +item * +item::get_previous_item() +{ + return list_entry(list_.prev, item); +} + +paragraph_word * +item::get_word() +{ + return word_; +} + +int +item::sprint_word(char *str) +{ + int res = 0; + + if (is_box() == true) { + if (word_ != NULL) + res = word_->snprint(str, STRING_MAX_SIZE); + } else if (is_glue() == true) { + res = sprintf(str, "glue: width %u strech %u shrink %u", + width_, stretchability_, shrinkability_); + } else { + res = sprintf(str, "penalty: width %u value %u flag %d", + width_, penalty_, flagged_penalty_); + } + item_debug("sprint_word:%s:", str); + + return res; +} + +/* + * Class box + */ +box_item::box_item(paragraph_word *word) + : item() +{ + char sz[STRING_MAX_SIZE] = ""; + + word_ = word; + stretchability_ = 0; + shrinkability_ = 0; + penalty_ = 0; + flagged_penalty_ = false; + width_ = word->get_width(); + word_->snprint(sz, STRING_MAX_SIZE); + item_debug("new box:%s: width %u", sz, width_); +} + +box_item::~box_item() +{ +} + + +bool +box_item::is_box() +{ + return true; +} + +bool +box_item::is_glue() +{ + return false; +} + +bool +box_item::is_legal_breakpoint() +{ + return false; +} + +bool +box_item::is_forced_break() +{ + return false; +} + +bool +box_item::is_flagged_penalty() +{ + return false; +} + +int +box_item::sprint_info(char *str) +{ + char sz[STRING_MAX_SIZE] = ""; + word_->snprint(sz, STRING_MAX_SIZE); + return sprintf(str, "box '%s' (width %u)", sz, width_); +} + + +/** + * Class glue + */ +glue_item::glue_item (unsigned int width, + unsigned int stretchability, + unsigned int shrinkability) + : item() +{ + width_ = width; + stretchability_ = stretchability; + shrinkability_ = shrinkability; + penalty_ = 0; + flagged_penalty_ = false; + word_ = NULL; +} + +glue_item::~glue_item() +{ +} + +bool +glue_item::is_box() +{ + return false; +} + +bool +glue_item::is_glue() +{ + return true; +} + +bool +glue_item::is_legal_breakpoint() +{ + bool ret; + item *previous_item; + + previous_item = get_previous_item(); + if (previous_item->is_box()) + ret = true; + else + ret = false; + + return ret; +} + +bool +glue_item::is_forced_break() +{ + return false; +} + +bool +glue_item::is_flagged_penalty() +{ + return false; +} + +int +glue_item::sprint_info(char *str) +{ + char tmp[32]; + + if (stretchability_ >= PLUS_INFINITY) + sprintf(tmp, "infinity"); + else + sprintf(tmp, "%u", stretchability_); + + return sprintf(str, "glue width:%u strecth:%s shrink:%u", + width_, tmp, shrinkability_); +} + +/** + * Class penalty + * + */ +penalty_item::penalty_item(int penalty, + bool flag, + paragraph_word *optional_word = NULL) + : item() +{ + stretchability_ = 0; + shrinkability_ = 0; + penalty_ = penalty; + flagged_penalty_ = flag; + word_ = optional_word; + if (word_) + width_ = word_->get_width(); + else + width_ = 0; +} + +penalty_item::~penalty_item() +{ +} + +bool +penalty_item::is_box() +{ + return false; +} + +bool +penalty_item::is_glue() +{ + return false; +} + +bool +penalty_item::is_legal_breakpoint() +{ + bool ret; + + /* +infinity is a disallowed break */ + if (penalty_ < PLUS_INFINITY) + ret = true; + else + ret = false; + + return ret; +} + +bool +penalty_item::is_forced_break() +{ + bool ret = false; + + if (penalty_ == MINUS_INFINITY) + ret = true; + + return ret; +} + +bool +penalty_item::is_flagged_penalty() +{ + return flagged_penalty_; +} + +int +penalty_item::sprint_info(char *str) +{ + char tmp[32]; + + if (penalty_ >= PLUS_INFINITY) + sprintf(tmp, "infinity"); + else if (penalty_ <= MINUS_INFINITY) + sprintf(tmp, "-infinity"); + else + sprintf(tmp, "%d", penalty_); + + return sprintf(str, + "penalty width:%u value:%s flag:%d", + width_, + tmp, + flagged_penalty_); +} + +/** + * Class breakpoint + */ +breakpoint::breakpoint(item *break_item, + unsigned int total_width, + unsigned int total_stretch, + unsigned int total_shrink) +{ + breakpoint_debug("New breakpoint, total_width %u", total_width); + break_item_ = break_item; + line_number_ = 0; + fitness_class_ = FITNESS_CLASS_MAX; + total_width_ = total_width; + total_stretch_ = total_stretch; + total_shrink_ = total_shrink; + total_demerits_ = 0; + previous_best_ = NULL; + sz_print_ = NULL; + INIT_LIST_HEAD(&list_, this); +} + +breakpoint::~breakpoint() +{ + if (sz_print_) + free(sz_print_); +} + +unsigned int +breakpoint::get_total_width() +{ + return total_width_; +} + +unsigned int +breakpoint::get_total_stretch() +{ + return total_stretch_; +} + +unsigned int +breakpoint::get_total_shrink() +{ + return total_shrink_; +} + +unsigned int +breakpoint::get_total_width_after() +{ + unsigned int total_width_after = total_width_; + + /* We must not add the width if the item just after is, for example, an + * optional penalty, otherwise the computation of next line length will be + * incorrect. */ + if (break_item_ != NULL && break_item_->get_penalty() == 0) + total_width_after += break_item_->get_width(); + + return total_width_after; +} + +unsigned int +breakpoint::get_total_stretch_after() +{ + unsigned int total_stretch_after = total_stretch_; + + if (break_item_ != NULL) + total_stretch_after += break_item_->get_stretchability(); + + return total_stretch_after; +} + +unsigned int +breakpoint::get_total_shrink_after() +{ + unsigned int total_shrink_after = total_shrink_; + + if (break_item_ != NULL) + total_shrink_after += break_item_->get_shrinkability(); + + return total_shrink_after; +} + +item * +breakpoint::get_item() +{ + return break_item_; +} + +item * +breakpoint::get_previous_box() +{ + item *i = break_item_; + + while (i != NULL) { + if (i->is_box()) + break; + i = list_entry(i->list_.prev, item); + } + + return i; +} + +int +breakpoint::get_line_number() +{ + return line_number_; +} + +float +breakpoint::get_adjust_ratio() +{ + return adjust_ratio_; +} + +void +breakpoint::set_adjust_ratio(float adjust_ratio) +{ + adjust_ratio_ = adjust_ratio; +} + +unsigned int +breakpoint::get_total_demerits() +{ + return total_demerits_; +} + +void +breakpoint::set_total_demerits(unsigned int total_demerits) +{ + total_demerits_ = total_demerits; +} + +fitness_class_t +breakpoint::get_fitness_class() +{ + return fitness_class_; +} + +void +breakpoint::set_fitness_class(fitness_class_t fitness_class) +{ + fitness_class_ = fitness_class; +} + +breakpoint * +breakpoint::get_previous_best() +{ + return previous_best_; +} + +void +breakpoint::set_previous_best(breakpoint *previous) +{ + previous_best_ = previous; + line_number_ = previous->get_line_number() + 1; +} + + +/** + * Compute adjustement ratio between this breakpoint against the current item, + * given the 3 values of total width, stretch and shrink. candidate is used + * only to get the penalty. + */ +float +breakpoint::compute_adjust_ratio(int desired_line_length, + unsigned int total_width, + unsigned int total_stretch, + unsigned int total_shrink, + item *candidate) +{ + float ratio = 0; + int line_length; + unsigned int line_stretch; + unsigned int line_shrink; + + if (candidate == NULL) { + breakpoint_error("candidate item null"); + return -1; + } + + line_length = total_width - get_total_width_after(); +#if TRACE_BREAKPOINT >= LEVEL_DEBUG + char tmp[256]; + char tmp1[256]; + sprint(tmp); + //TODO: penalties should be printed directly + candidate->get_previous_item()->sprint_word(tmp1); + breakpoint_debug("From active breakpoint after '%s' to candidate after '%s'", + tmp, tmp1); + breakpoint_debug(" previous total width: %u", + get_total_width_after()); + breakpoint_debug(" line_length: %d", + line_length); +#endif + +/* If the candidate break is a penalty item, its width should be added (think of + * an optional hyphen) */ + if (candidate->get_penalty() > 0) { + line_length += candidate->get_width(); + } + + if (line_length < desired_line_length) { + line_stretch = total_stretch - get_total_stretch_after(); + breakpoint_debug(" line_stretch %u", line_stretch); + if (line_stretch > 0) + ratio = ((float)desired_line_length + - (float)line_length) / (float)line_stretch; + else + ratio = FLT_MAX; + } else if (line_length > desired_line_length) { + line_shrink = total_shrink - get_total_shrink_after(); + breakpoint_debug(" line_shrink %u", line_shrink); + if (line_shrink > 0) + ratio = ((float)desired_line_length + - (float)line_length) / (float)line_shrink; + else + ratio = FLT_MIN; + } + + if (ratio >= PLUS_INFINITY) + breakpoint_debug(" ratio: infinity"); + else + breakpoint_debug(" ratio: %.3f", ratio); + + return ratio; +} + +float +breakpoint::compute_badness(float adjust_ratio) +{ + float badness; + + if (adjust_ratio < -1) + badness = FLT_MAX; + else + badness = 100 * fabsf(pow(adjust_ratio, 3)); + + breakpoint_debug("badness %.3f", badness); + + return badness; +} + +unsigned int +breakpoint::compute_demerits(float badness, + item *candidate, + bool use_old_demerits_formula) +{ + unsigned int demerits; + int penalty; + int additional_penalty; + + if (break_item_ + && break_item_->is_flagged_penalty() + && candidate->is_flagged_penalty()) + additional_penalty = PLUS_INFINITY; //TODO maybe other value; + else + additional_penalty = 0; + + penalty = candidate->get_penalty(); + + /* Badness is always positive so we can simply convert float to int by adding + * 0.5 */ + if (penalty >= 0) { + if (use_old_demerits_formula) { + demerits = pow(1 + (unsigned int)(badness + 0.5) + penalty, 2) + + additional_penalty; + } + else { + demerits = pow(1 + (unsigned int)(badness + 0.5), 2) + + pow(penalty, 2) + + additional_penalty; + } + } else if (penalty <= MINUS_INFINITY) { + demerits = pow(1 + (unsigned int)(badness + 0.5), 2) + additional_penalty; + } else { + demerits = + powf(1 + (unsigned int)(badness + 0.5), 2) + - pow(penalty, 2) + + additional_penalty; + } + + breakpoint_debug("badness %.3f penalty %d demerits %u", + badness, penalty, demerits); + return demerits; +} + + +unsigned int +breakpoint::compute_adj_extra_demerits(fitness_class_t candidate_fitness) +{ + unsigned int extra_demerits = 0; + + switch (candidate_fitness) { + case FITNESS_CLASS_TIGHT: + if (fitness_class_ >= FITNESS_CLASS_LOOSE) + extra_demerits = PARAGRAPH_DEFAULT_NON_ADJACENT_FITNESS_DEMERITS; + break; + case FITNESS_CLASS_NORMAL: + if (fitness_class_ >= FITNESS_CLASS_VERY_LOOSE) + extra_demerits = PARAGRAPH_DEFAULT_NON_ADJACENT_FITNESS_DEMERITS; + break; + case FITNESS_CLASS_LOOSE: + if (fitness_class_ == FITNESS_CLASS_TIGHT) + extra_demerits = PARAGRAPH_DEFAULT_NON_ADJACENT_FITNESS_DEMERITS; + break; + case FITNESS_CLASS_VERY_LOOSE: + if (fitness_class_ <= FITNESS_CLASS_NORMAL) + extra_demerits = PARAGRAPH_DEFAULT_NON_ADJACENT_FITNESS_DEMERITS; + default: + break; //NOTE: intial breakpoint has a FITNESS_CLASS of MAX, + } + + return extra_demerits; +} + + +fitness_class_t +breakpoint::compute_fitness_class(float adjust_ratio) +{ + fitness_class_t fitness_class; + + if (adjust_ratio < (float) -0.5) + fitness_class = FITNESS_CLASS_TIGHT; + else if (adjust_ratio <= (float) 0.5) + fitness_class = FITNESS_CLASS_NORMAL; + else if (adjust_ratio <= (float) 1) + fitness_class = FITNESS_CLASS_LOOSE; + else + fitness_class = FITNESS_CLASS_VERY_LOOSE; + + return fitness_class; +} + +int +breakpoint::sprint(char *str) +{ + item *box; + int res; + + if (break_item_ != NULL) + { + box = get_previous_box(); + res = box->sprint_word(str); + if (break_item_->is_glue() == false) { + char tmp[32]; + sprintf(tmp, " (penalty: %d)", break_item_->get_penalty()); + strcat(str, tmp); + } + } else { + res = sprintf(str, "initial breakpoint"); + } + + return res; +} + + +const char * +breakpoint::print_breakpoint_info() +{ + item *box; + item *previous_breakpoint_box; + char tmp[256] = ""; + char tmp1[256] = ""; + + if (sz_print_ == NULL) { + sz_print_ = (char *)calloc(512, sizeof(char)); + + if (break_item_ != NULL) + { + box = get_previous_box(); + box->sprint_word(tmp); + } else { + sprintf(tmp, "initial breakpoint"); + } + + if (previous_best_ != NULL) { + previous_breakpoint_box = previous_best_->get_previous_box(); + if (previous_breakpoint_box != NULL) + previous_breakpoint_box->sprint_word(tmp1); + else + sprintf(tmp1, "initial breakpoint"); + snprintf(sz_print_, 511, "From '%s' to '%s'\n" + " line: %d\n" + " ratio: %.3f\n" + " total_demerits: %u\n" + " fitness class: %d\n", + tmp1, tmp, + line_number_, + adjust_ratio_, + total_demerits_, + fitness_class_); + } else { + sprintf(sz_print_, "Initial breakpoint\n"); + } + } + + return sz_print_; +} + +/** + * Class paragraph + */ +paragraph::paragraph() +{ + breakpoint *b; + + error_item_ = NULL; + tolerance_ = PARAGRAPH_DEFAULT_TOLERANCE; + line_length_ = PARAGRAPH_DEFAULT_LINE_WIDTH; + use_old_demerits_formula_ = false; + use_fitness_class_ = true; + hyphenation_penalty_ = PARAGRAPH_DEFAULT_HYPHENATION_PENALTY; + INIT_LIST_HEAD(&item_list_head_, NULL); + INIT_LIST_HEAD(&active_breaks_list_head_, NULL); + INIT_LIST_HEAD(&passive_breaks_list_head_, NULL); + array_best_breaks_ = NULL; + /* Add the 1st breakpoint */ + b = new breakpoint(NULL, 0, 0, 0); + list_add_tail(&b->list_, &active_breaks_list_head_); +} + +paragraph::~paragraph() +{ + item *pos, *n; + breakpoint *break_pos, *break_n; + /* Walk through the list and remove and delete 1 by 1 all items */ + list_for_each_entry_safe(pos, n, &item_list_head_, list_, item) { + list_del_init(&pos->list_); + delete pos; + } + + list_for_each_entry_safe( + break_pos, break_n, &passive_breaks_list_head_, list_, breakpoint) { + list_del_init(&break_pos->list_); + delete break_pos; + } + + list_for_each_entry_safe( + break_pos, break_n, &active_breaks_list_head_, list_, breakpoint) { + list_del_init(&break_pos->list_); + delete break_pos; + } + + if (array_best_breaks_) + free(array_best_breaks_); +} + +void +paragraph::config_use_old_demerits_formula() +{ + use_old_demerits_formula_ = true; +} + +void +paragraph::config_no_fitness_class() +{ + use_fitness_class_ = false; +} + +void +paragraph::config_hyphenation_penalty(unsigned int value) +{ + hyphenation_penalty_ = value; +} + +void +paragraph::add_box(paragraph_word *word) +{ + box_item *b; + + b = new box_item(word); + list_add_tail(&b->list_, &item_list_head_); +} + +void +paragraph::add_glue() +{ + item *previous; + unsigned int width = 0; + unsigned int stretchability = 0; + unsigned int shrinkability = 0; + glue_item *g; + previous = list_entry(item_list_head_.prev, item); + while (previous->is_box() == false && previous != list_entry(item_list_head_.next, item)) + previous = previous->get_previous_item(); + if (previous != NULL) { + paragraph_word *word; + word = previous->get_word(); + if (word != NULL) + word->get_next_glue_values(&width, &stretchability, &shrinkability); + } + g = new glue_item(width, stretchability, shrinkability); + list_add_tail(&g->list_, &item_list_head_); +} + +void +paragraph::add_optional_hyphen(paragraph_word *hyphen_sign) +{ + //hyphen sign have a width of 6 + penalty_item *p = new penalty_item(hyphenation_penalty_, true, hyphen_sign); + list_add_tail(&p->list_, &item_list_head_); +} + +void +paragraph::add_explicit_hyphen() +{ + penalty_item *p = new penalty_item(hyphenation_penalty_, true); + list_add_tail(&p->list_, &item_list_head_); +} + +/* + * If the last item is glue, remove it and add the finishing pattern: + * - disallowed break: penalty(0, PLUS_INFINITY, 0) + * - finishing glue: glue (0, PLUS_INFINITY, 0) + * - forced break: penalty(0, MINUS_INFINITY, 0) + */ +void +paragraph::finish() +{ + glue_item *last_glue; + penalty_item *disallowed_break_penalty = new penalty_item(PLUS_INFINITY, false); + glue_item *finishing_glue = new glue_item(0, PLUS_INFINITY, 0); + penalty_item *forced_break_penalty = new penalty_item(MINUS_INFINITY, false); + + /* FIXME we assume here that we always finish with glue so we always remove + * it. This should be more robust. */ + last_glue = list_entry(item_list_head_.prev, glue_item); + list_del_init(item_list_head_.prev); + delete(last_glue); + list_add_tail(&disallowed_break_penalty->list_, &item_list_head_); + list_add_tail(&finishing_glue->list_, &item_list_head_); + list_add_tail(&forced_break_penalty->list_, &item_list_head_); +} + + +/** + * Move a breakpoint from active list to passive list. + */ +void +paragraph::deactivate_breakpoint(breakpoint *active) +{ +#if TRACE_PARAGRAPH >= LEVEL_DEBUG + char str[256]; + active->sprint(str); + paragraph_debug(" deactivating '%s'", str); +#endif + + list_del_init(&active->list_); + list_add_tail(&active->list_, &passive_breaks_list_head_); +} + +void +paragraph::record_feasible_break(breakpoint *active, + breakpoint *candidate) +{ +#if TRACE_PARAGRAPH >= LEVEL_DEBUG + char str[256]; + active->sprint(str); + paragraph_debug(" record feasible break '%s'", str); +#endif + + candidate->set_previous_best(active); + if (list_empty(&candidate->list_)) + list_add_tail(&candidate->list_, &active_breaks_list_head_); +} + +/* + * Format the paragraph with Knuth-Plass algorithm. Algorithm general outline: + + for (all items 'b' of the paragraph) { + if (item 'b' is a legal breakpoint) { + for (all active breakpoint 'a') { + compute the adjustment ratio 'r' from 'a' to 'b' + if (r < -1) or (b is a forced break) { + deactivate 'a' (a is now too far behind) + } + if ( -r <= r < tolerance threshold) { + record feasible break from 'a' to 'b' + comptude demerits and fitness class. + } + } + if ('b' is a feasible breakpoint) { + append the best such break as active node (chose the best active node) + } + } + } +*/ + +// return 0 if OK +int +paragraph::format_knuth_plass(float tolerance, + unsigned int line_length) +{ + item *k_item; + float adjust_ratio; + breakpoint *active, *candidate, *n, *initial_breakpoint; + unsigned int total_width = 0, total_stretch = 0, total_shrink = 0; + float badness; + unsigned int demerits; + fitness_class_t fitness_class = FITNESS_CLASS_TIGHT; + int k; + breakpoint **tab_best_previous; + unsigned int *tab_total_best_demerits; + float *tab_ajust_ratio; // This is merely used for printing + int n_fitness_class; + unsigned int min_total_best_demerits; +#if TRACE_PARAGRAPH >= LEVEL_INFO + char tmp[128] = ""; + char tmp1[128] = ""; +#endif + + error_item_ = NULL; + if (use_fitness_class_) + n_fitness_class = FITNESS_CLASS_MAX; + else + n_fitness_class = 1; + tab_best_previous = (breakpoint **)calloc(n_fitness_class, + sizeof(breakpoint *)); + tab_total_best_demerits = (unsigned int *)calloc(n_fitness_class, + sizeof(unsigned int)); + tab_ajust_ratio = (float *)calloc(n_fitness_class, sizeof(float)); + tolerance_ = tolerance; + line_length_ = line_length; + + /* Walk through all the items of the paragraph */ + list_for_each_entry(k_item, &item_list_head_, list_, item) { +#if TRACE_PARAGRAPH >= LEVEL_INFO + k_item->sprint_info(tmp); + paragraph_debug("Loop: total width %u total strecth %u total shrink %u", + total_width, total_stretch, total_shrink); + paragraph_debug(" New item: %s", tmp); +#endif + if (k_item->is_legal_breakpoint()) { + min_total_best_demerits = PLUS_INFINITY; + for (k = 0; k < n_fitness_class; k++) { + tab_best_previous[k] = NULL; + tab_total_best_demerits[k] = PLUS_INFINITY; + tab_ajust_ratio[k] = PLUS_INFINITY; + } + + /* Check the candidate breakpoint against each active breakpoint */ + list_for_each_entry_safe( + active, n, &active_breaks_list_head_, list_, breakpoint) { + adjust_ratio = active->compute_adjust_ratio(line_length_, + total_width, + total_stretch, + total_shrink, + k_item); + if (k_item->is_forced_break() || adjust_ratio < -1) { + deactivate_breakpoint(active); + } + if (adjust_ratio >= -1 && adjust_ratio < tolerance_) { + /* there is a feasible break, remember of this breakpoint if the total + * demerits from the active to the candidate is less than the previous + * possible break. */ + badness = active->compute_badness(adjust_ratio); + demerits = active->compute_demerits(badness, + k_item, + use_old_demerits_formula_); + if (use_fitness_class_) { + fitness_class = active->compute_fitness_class(adjust_ratio); + demerits += + active->compute_adj_extra_demerits(fitness_class); + } + +#if TRACE_PARAGRAPH >= LEVEL_INFO + paragraph_info(" From %s to %s:", tmp1, tmp); + paragraph_info(" ratio : %.3f", adjust_ratio); + paragraph_info(" badness : %.3f", badness); + paragraph_info(" demerits : %u", demerits); + paragraph_info(" total_demerits : %u", + active->get_total_demerits() + demerits); + paragraph_info(" fitness_class: %d", fitness_class); +#endif + if (active->get_total_demerits() + demerits + < tab_total_best_demerits[fitness_class]) { + // possible break is best than previously + tab_best_previous[fitness_class] = active; + tab_total_best_demerits[fitness_class] = + active->get_total_demerits() + demerits; + tab_ajust_ratio[fitness_class] = adjust_ratio; + if (tab_total_best_demerits[fitness_class] < min_total_best_demerits) + min_total_best_demerits = tab_total_best_demerits[fitness_class]; + } + paragraph_debug(" tab_best_previous[%d]:%p", + fitness_class, + tab_best_previous[fitness_class]); + } + } + + if (min_total_best_demerits < PLUS_INFINITY) { + for (k = 0; + k < n_fitness_class; + k++) { + paragraph_debug(" tab_total_best_demerits[%d]: %d min_total_best_demerits: %u", + k, + tab_total_best_demerits[k], + min_total_best_demerits); + paragraph_debug(" tab_best_previous[%d]:%p", + k, + tab_best_previous[k]); + + if (tab_total_best_demerits[k] <= min_total_best_demerits + + PARAGRAPH_DEFAULT_NON_ADJACENT_FITNESS_DEMERITS) { + candidate = new breakpoint(k_item, + total_width, total_stretch, total_shrink); + candidate->set_total_demerits(tab_total_best_demerits[k]); + candidate->set_adjust_ratio(tab_ajust_ratio[k]); + candidate->set_fitness_class((fitness_class_t)k); + record_feasible_break(tab_best_previous[k], + candidate); + } + } + } + + /* No more active breakpoint, leave with error */ + if (list_empty(&active_breaks_list_head_)) { + paragraph_error("Could nor format paragraph"); + error_item_ = k_item; + break; + } + } + /* compute total width */ + if (k_item->get_penalty() <= 0) + total_width += k_item->get_width(); + total_stretch += k_item->get_stretchability(); + total_shrink += k_item->get_shrinkability(); + } + + /* There should be one breakpoint left, the final one; move it to the passive + list */ + list_for_each_entry_safe( + active, n, &active_breaks_list_head_, list_, breakpoint) { + deactivate_breakpoint(active); + } + + /* Create the list of best breakpoints by starting by the end of the passive + * list */ + initial_breakpoint = list_entry(passive_breaks_list_head_.next, breakpoint); + n = list_entry(passive_breaks_list_head_.prev, breakpoint); + number_lines_ = n->get_line_number(); + array_best_breaks_ = (breakpoint **)calloc(number_lines_, + sizeof(breakpoint *)); + k = 0; + while (n != NULL && n != initial_breakpoint) { + array_best_breaks_[number_lines_ - k - 1] = n; + n = n->get_previous_best(); + k++; + } + + if (tab_total_best_demerits) + free(tab_total_best_demerits); + if (tab_best_previous) + free(tab_best_previous); + if (tab_ajust_ratio) + free(tab_ajust_ratio); + + if (error_item_ != NULL) + return -1; + else + return 0; +} + + +int +paragraph::write_text(paragraph_writer_interface *pwi, int *number_lines) +{ + int res = 0; + item *pos; + breakpoint *next_feasible_breakpoint; + breakpoint *next_best_breakpoint; + int k, j_current_line; + float ratio; + float space_width; + paragraph_word *word; + + if (pwi == NULL) { + paragraph_error("Incorrect input"); + res = -1; + if (number_lines) + *number_lines = 0; + goto end; + } + + k = 0; + j_current_line = 1; + next_best_breakpoint = array_best_breaks_[k]; + /* passive_breaks_list_head_.next is the initial breakpoint */ + next_feasible_breakpoint = list_entry(passive_breaks_list_head_.next->next, + breakpoint); + list_for_each_entry(pos, &item_list_head_, list_, item) { + /* The item 'pos' correspond to a breakpoint */ + if (next_best_breakpoint && pos == next_best_breakpoint->get_item()) { + word = pos->get_word(); + if (word != NULL) // case of a hyphen + pwi->write_word_cbk(word); + pwi->break_here_cbk(j_current_line); + j_current_line++; + if (k < number_lines_ - 1) { + k++; + next_best_breakpoint = array_best_breaks_[k]; + } + } else if (pos == error_item_) { + /* nothing else can be printed, exit */ + res = -1; + goto end; + } else { + if (pos->is_box()) { // case of a word + word = pos->get_word(); // FIXME maybe + /* if (strcmp(word, "") == 0) // a space box used for indentation + pwi->write_space_cbk((float)pos->get_width()); + else*/ + pwi->write_word_cbk(word); + } else if (pos->is_glue()) { + /* Case of glue, that is, space: we must compute the space width */ + if (next_best_breakpoint != NULL) { + ratio = next_best_breakpoint->get_adjust_ratio(); + if (ratio >=0) + space_width = pos->get_width() + (pos->get_stretchability() * ratio); + else + space_width = pos->get_width() - (pos->get_shrinkability() * ratio); + pwi->write_space_cbk(space_width); + } else { + /* case where the format_knuth function failed, we set the space width + * to 1 so as this can still be used by the paragraph_printer class */ + pwi->write_space_cbk(1); + } + } + } + } + +end: + if (number_lines) + *number_lines = k + 1; + + return res; +} + +int +paragraph::get_number_of_lines() +{ + return number_lines_; +} + +// return FLT_MAX if line not found +float +paragraph::get_adjust_ratio(int line_number) +{ + float ratio = FLT_MAX; + + if (line_number <= number_lines_ && line_number >= 0) + ratio = array_best_breaks_[line_number - 1]->get_adjust_ratio(); + + return ratio; +} + +/* Returns fitness class of a line, or FITNESS_CLASS_MAX if incorrect line was + * given as parameter */ +fitness_class_t +paragraph::get_fitness_class(int line_number) +{ + fitness_class_t fitness_class = FITNESS_CLASS_MAX; + + if (line_number <= number_lines_ && line_number >= 0) + fitness_class = array_best_breaks_[line_number - 1]->get_fitness_class(); + + return fitness_class; +} + + +unsigned int +paragraph::get_total_demerits(int line_number) +{ + unsigned int total_demerits = PLUS_INFINITY; + + if (line_number <= number_lines_ && line_number >= 0) + total_demerits = array_best_breaks_[line_number - 1]->get_total_demerits(); + + return total_demerits; +} + +void paragraph::print_breakpoints() +{ + breakpoint *pos; + + list_for_each_entry(pos, &passive_breaks_list_head_, list_, breakpoint) { + printf("%s", pos->print_breakpoint_info()); + } +} + +struct breakpoint_mark { + struct list_head list_; + int offset; +}; + +paragraph_printer::paragraph_printer(paragraph *par) +{ + par_ = par; + current_index_ = 0; + size_to_print_ = 0; + max_line_length_ = 0; + n_lines_ = par_->get_number_of_lines(); + /* We reserve for 1 more lines than the actual number of lines of the + * paragraph: in case format_knuth failed to format the paragraph the last + * lines (where the failure arose) would not be counted in the total number of + * lines, but we still want to print it.*/ + tab_lines_ = (char **) calloc(n_lines_ + 1, sizeof(char *)); + tab_marks_ = (char **) calloc(n_lines_ + 1, sizeof(char *)); + tab_lines_[current_index_] = (char *) calloc(512, sizeof(char)); + tab_marks_[current_index_] = (char *) calloc(512, sizeof(char)); + /* passive_breaks_list_head_.next is the initial breakpoint */ + next_feasible_breakpoint_ = + list_entry(par->passive_breaks_list_head_.next->next, breakpoint); +} + +paragraph_printer::~paragraph_printer() +{ + int k; + for (k = 0; k < par_->get_number_of_lines() + 1; k++) { + free (tab_lines_[k]); + free (tab_marks_[k]); + } + free(tab_lines_); + free(tab_marks_); +} + +void +paragraph_printer::new_line(void) +{ + if (current_index_ < par_->get_number_of_lines() - 1) { + current_index_++; + tab_lines_[current_index_] = (char *) calloc(512, sizeof(char)); + tab_marks_[current_index_] = (char *) calloc(512, sizeof(char)); + size_to_print_ = 0; + } +} + +void +paragraph_printer::write_word_cbk(paragraph_word *word) +{ + char sz_word[256] = ""; + int len; + int k; + + word->snprint(sz_word, 256); + len = strlen(sz_word); + strcat(tab_lines_[current_index_], sz_word); + for (k = size_to_print_; k < size_to_print_ + len; k++) + tab_marks_[current_index_][k] = ' '; + size_to_print_ += len; + if (next_feasible_breakpoint_ + && word == next_feasible_breakpoint_->get_previous_box()->get_word()) { + tab_marks_[current_index_][k - 1] = '^'; + next_feasible_breakpoint_ = + list_entry(next_feasible_breakpoint_->list_.next, breakpoint); + } +} + +int +paragraph_printer::write_space_cbk(float space_width) +{ + paragraph_debug("space: %.3f", space_width); + strcat(tab_lines_[current_index_], " "); + tab_marks_[current_index_][size_to_print_] = ' '; + size_to_print_++; + + return 0; +} + +int +paragraph_printer::break_here_cbk(int line_number) +{ + if (size_to_print_ > max_line_length_) + max_line_length_ = size_to_print_; + new_line(); + + return 0; +} + +/* Works properly with ascii text only */ +int +paragraph_printer::print() +{ + int first_column_width; + int k, res; + int column1, column2, column3; + int number_lines = 0; + + res = par_->write_text(this, &number_lines); + first_column_width = printf("Number of lines: %d", + par_->get_number_of_lines()); + first_column_width += printf("%*s", + max_line_length_ + 5 - first_column_width, "|"); + column1 = printf(" adj. ratio |"); + column2 = printf(" total demerits |"); + column3 = printf(" fitness class |"); + printf("\n\n"); + + for (k = 0; k < number_lines; k++) { + printf("%-*s", first_column_width, tab_lines_[k]); + printf("%*.3f ", + column1 - 2, + par_->array_best_breaks_[k]->get_adjust_ratio()); + printf("%*u ", + column2 - 2, + par_->array_best_breaks_[k]->get_total_demerits()); + printf("%*u \n", + column3 - 2, + par_->array_best_breaks_[k]->get_fitness_class()); + printf("%s\n", tab_marks_[k]); + } + + /* if write_text failed we must print the very last line that is not part of + * the lines of formatted the paragraph */ + if (res != 0) { + printf("\nCould not finish the formatting after line:\n\n"); + printf("%s\n", tab_lines_[number_lines]); + } + return 0; +} diff --git a/src/roff/troff/paragraph.h b/src/roff/troff/paragraph.h new file mode 100644 index 000000000..ddf5f2fe7 --- /dev/null +++ b/src/roff/troff/paragraph.h @@ -0,0 +1,107 @@ +// -*- C++ -*- + +/* Copyright (C) 2016- Free Software Foundation, Inc. + Written by Bertrand Garrigues (bertrand.garrigues@laposte.net) + +This file is part of groff. + +groff is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation, either version 3 of the License, or +(at your option) any later version. + +groff is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . */ + +#include "list.h" +#include "paragraph_word.h" + +class breakpoint; + +/** + * Class paragraph + */ +class paragraph_writer_interface { +public: + virtual void write_word_cbk(paragraph_word *word) { word->write(); }; + virtual int write_space_cbk(float space_width) = 0; + virtual int break_here_cbk(int line_number) = 0; //TODO : would be + //more useful to + //return the last + //word +}; + +class paragraph { + friend class test_paragraph; + friend class paragraph_printer; + float tolerance_; + unsigned int line_length_; + // Only for test, using an original example from Knuth/Plass. + bool use_old_demerits_formula_; + bool use_fitness_class_; + unsigned int hyphenation_penalty_; + void deactivate_breakpoint(breakpoint *active); + void record_feasible_break(breakpoint *active, + breakpoint *candidate); + // Head of list of all the items of the paragraph + struct list_head item_list_head_; + // Head of lists of breakpoints + struct list_head active_breaks_list_head_; + struct list_head passive_breaks_list_head_; + breakpoint **array_best_breaks_; + int number_lines_; + item *error_item_; // last item before exiting in error + +public: + paragraph(); + ~paragraph(); + // Configuration + void config_use_old_demerits_formula(); + void config_no_fitness_class(); + void config_hyphenation_penalty(unsigned int value); + + // To build the paragraph + // TODO: enable to add a custom item + void add_box(paragraph_word *word); + void add_glue(); + void add_optional_hyphen(paragraph_word *hyphen_sign); + void add_explicit_hyphen(); + void finish(); + int + format_knuth_plass(float tolerance = PARAGRAPH_DEFAULT_TOLERANCE, + unsigned int line_length = PARAGRAPH_DEFAULT_LINE_WIDTH); + int write_text(paragraph_writer_interface *pwi, int *number_lines); + // For stats + int get_number_of_lines(); + float get_adjust_ratio(int line_number); // for stats + unsigned int get_total_demerits(int line_number); + fitness_class_t get_fitness_class(int line_number); + void print_breakpoints(); // for debug only +}; + +/* A simple helper class to print a paragraph with the main information and + * feasible breakpoints */ +class paragraph_printer : public paragraph_writer_interface { + paragraph *par_; + char **tab_lines_; + int n_lines_; + char **tab_marks_; + int size_to_print_; + int max_line_length_; + int current_index_; + breakpoint *next_feasible_breakpoint_; + void new_line(void); + +public: + paragraph_printer(paragraph *par); + ~paragraph_printer(); + void write_word_cbk(paragraph_word *word); + int write_space_cbk(float space_width); + int break_here_cbk(int line_number); + int print(); +}; diff --git a/src/roff/troff/paragraph_l.h b/src/roff/troff/paragraph_l.h new file mode 100644 index 000000000..5eb98b2ee --- /dev/null +++ b/src/roff/troff/paragraph_l.h @@ -0,0 +1,208 @@ +// -*- C++ -*- + +/* Copyright (C) 2016- Free Software Foundation, Inc. + Written by Bertrand Garrigues (bertrand.garrigues@laposte.net) + +This file is part of groff. + +groff is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation, either version 3 of othe License, or +(at your option) any later version. + +groff is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . */ + +#include +#include +#include "list.h" +#include "paragraph_word.h" + +#define PARAGRAPH_DEFAULT_TOLERANCE 1 +#define PARAGRAPH_DEFAULT_HYPHENATION_PENALTY 50 +#define PARAGRAPH_DEFAULT_LINE_WIDTH 500 +#define PARAGRAPH_DEFAULT_NON_ADJACENT_FITNESS_DEMERITS 10000 + +/* Classes local to paragraph.cpp */ + +/** + * Abstract Class item + * An item is either: + * - a box_item (contains a word). + * - a glue item (space between words). + * - a penalty item (used to put extra charges on less desirable way to break + * the paragraph). + */ +class item { + +protected: + paragraph_word *word_; + unsigned int width_; + unsigned int stretchability_; + unsigned int shrinkability_; + int penalty_; + bool flagged_penalty_; + bool is_breakpoint_; + +public: + struct list_head list_; + item(); + virtual ~item(); + unsigned int get_width(); + unsigned int get_stretchability(); + unsigned int get_shrinkability(); + int get_penalty(); + item *get_previous_item(); + paragraph_word *get_word(); + int sprint_word(char *str); + virtual bool is_box() = 0; + virtual bool is_glue() = 0; + virtual bool is_legal_breakpoint() = 0; + virtual bool is_forced_break() = 0; + virtual bool is_flagged_penalty() = 0; + virtual int sprint_info(char *str) = 0; +}; + +/** + * Class box_item + * + * A box contains a word and is not a legal breakpoint. + */ +class box_item : public item { + +public: + box_item(paragraph_word *word); + ~box_item(); + bool is_box(); + bool is_glue(); + bool is_legal_breakpoint(); + bool is_forced_break(); + bool is_flagged_penalty(); + int sprint_info(char *str); +}; + +/** + * Class glue_item + * + * The glue is the space between words. It is a legal breakpoint if the + * previous item is a box. + */ +class glue_item : public item { +public: + glue_item (unsigned int width, unsigned int stretchability, unsigned int shrinkability); + ~glue_item(); + bool is_box(); + bool is_glue(); + bool is_legal_breakpoint(); + bool is_forced_break(); + bool is_flagged_penalty(); + int sprint_info(char *str); +}; + +/** + * Class penalty_item + * + * Penalties can have special values: + * - minus infinity: it is a legaal and forced break point. + * - plus infinity: it is a disabled (forbidden) break point. + * Other values constitute a legal break point. + */ +class penalty_item : public item { + paragraph_word *optional_word_; +public: + penalty_item(int penalty, + bool flagged_penalty, + paragraph_word *optional_word); + ~penalty_item(); + unsigned int get_width(); + bool is_box(); + bool is_glue(); + bool is_legal_breakpoint(); + bool is_forced_break(); + bool is_flagged_penalty(); + int sprint_info(char *str); +}; + +/** + * Class breakpoint + * + * A breakpoint: + * - points to an item (the place where to break) + * - points to the previous best breakpoint. + * - Stores the the total width, stretch, shrink from the start of the + * paragraph. + */ + +/* Fitness class is used to avoid having, for example, a tight line + * following a loose line.*/ +typedef enum { + FITNESS_CLASS_TIGHT, + FITNESS_CLASS_NORMAL, + FITNESS_CLASS_LOOSE, + FITNESS_CLASS_VERY_LOOSE, + FITNESS_CLASS_MAX +} fitness_class_t; + +class breakpoint { + int line_number_; + float adjust_ratio_; + fitness_class_t fitness_class_; + unsigned int total_width_; + unsigned int total_stretch_; + unsigned int total_shrink_; + unsigned int total_demerits_; + item *break_item_; + breakpoint *previous_best_; + char *sz_print_; + +protected: + /* simple getters */ + unsigned int get_total_width(); + unsigned int get_total_stretch(); + unsigned int get_total_shrink(); + /* add the glue to the total */ + unsigned int get_total_width_after(); + unsigned int get_total_stretch_after(); + unsigned int get_total_shrink_after(); + +public: + struct list_head list_; + breakpoint(item *break_item, + unsigned int total_width, + unsigned int total_stretch, + unsigned int total_shrink); + ~breakpoint(); + /* simple getters and setters */ + item *get_item(); + item *get_previous_box(); + int get_line_number(); + float get_adjust_ratio(); + void set_adjust_ratio(float adjust_ratio); + unsigned int get_total_demerits(); + void set_total_demerits(unsigned int total_demerits); + fitness_class_t get_fitness_class(); + void set_fitness_class(fitness_class_t fitness_class); + breakpoint *get_previous_best(); + void set_previous_best(breakpoint *previous); + + /* compute various values */ + float compute_adjust_ratio(int desired_line_length, + unsigned int total_width, + unsigned int total_stretch, + unsigned int total_shrink, + item *candidate); + float compute_badness(float adjust_ratio); + unsigned int compute_demerits(float badness, + item *candidate, + bool use_old_demerits_formula = false); + unsigned int compute_adj_extra_demerits(fitness_class_t candidate_fitness); + fitness_class_t compute_fitness_class(float adjust_ratio); + + int sprint(char *str); + const char *print_breakpoint_info(); +}; diff --git a/src/roff/troff/paragraph_word.h b/src/roff/troff/paragraph_word.h new file mode 100644 index 000000000..91e2e2ad8 --- /dev/null +++ b/src/roff/troff/paragraph_word.h @@ -0,0 +1,36 @@ +// -*- C++ -*- + +/* Copyright (C) 2016- Free Software Foundation, Inc. + Written by Bertrand Garrigues (bertrand.garrigues@laposte.net) + +This file is part of groff. + +groff is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation, either version 3 of the License, or +(at your option) any later version. + +groff is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . */ + +#ifndef PARAGRAPH_WORD_H +#define PARAGRAPH_WORD_H + +class paragraph_word { +public: + virtual ~paragraph_word() {}; + virtual unsigned int get_width() = 0; + virtual int get_next_glue_values(unsigned int *width, + unsigned int *stretchability, + unsigned int *shrinkability) = 0; + virtual void write() = 0; + // for debug only + virtual int snprint(char *str, size_t size) { return 0; }; +}; + +#endif diff --git a/test/test.am b/test/test.am index 1bf5e392c..7fec26cd5 100644 --- a/test/test.am +++ b/test/test.am @@ -23,3 +23,12 @@ utest_list_SOURCES = test/utest_list.cpp utest_list_CXXFLAGS = $(CPPUNIT_CFLAGS) -I$(top_srcdir)/src/roff/troff utest_list_LDFLAGS = $(CPPUNIT_LIBS) endif + +TESTS += test_paragraph +check_PROGRAMS += test_paragraph +test_paragraph_SOURCES = \ + src/roff/troff/paragraph.cpp \ + test/test_paragraph.cpp + +test_paragraph_CXXFLAGS = -I$(top_srcdir)/src/roff/troff + diff --git a/test/test_paragraph.cpp b/test/test_paragraph.cpp new file mode 100644 index 000000000..2736c44fb --- /dev/null +++ b/test/test_paragraph.cpp @@ -0,0 +1,971 @@ +/* Copyright (C) 2016- Free Software Foundation, Inc. + Written by Bertrand Garrigues (bertrand.garrigues@laposte.net) + +This file is part of groff. + +groff is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation, either version 3 of the License, or +(at your option) any later version. + +groff is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "paragraph_l.h" +#include "paragraph.h" + +#define TRACE_ALIGN_LENGTH 40 +#include "trace.h" +trace_define_category(item, LEVEL_ERROR); +trace_define_category(breakpoint, LEVEL_ERROR); +trace_define_category(paragraph, LEVEL_ERROR); + +#define TEST_FAIL 1 +#define TEST_SUCCESS 0 +#define test_assert(expression, fmt, args...) \ + (((expression) == 0) ? \ + printf(" * FAIL [%s:%d]: " fmt "\n", \ + __FUNCTION__, __LINE__, ##args ), TEST_FAIL : TEST_SUCCESS) + +/* A simple implementation of paragraph_word, for ascii only. */ +class ascii_paragraph_word : public paragraph_word { +private: + /* Length of each letter as set in Knuth's original example in his + * article "Breaking Paragraph Into Lines" */ + static unsigned int char_len_[256]; + static void init_char_len(); + static bool is_class_initialized_; + char *sz_word_; + unsigned int width_; +public: + // width_ is set by the strlen of sz_word + ascii_paragraph_word(const char *sz_word); + ~ascii_paragraph_word(); + // overwrite the width calculated by the constructor, this is useful + // to create a box for indentation (which are empty but with a width + // > 0 + void set_width(unsigned int width); + unsigned int get_width(); + int get_next_glue_values(unsigned int *width, + unsigned int *stretchability, + unsigned int *shrinkability); + int snprint(char *str, size_t size); + void write(); +}; + +unsigned int ascii_paragraph_word::char_len_[256]; +bool ascii_paragraph_word::is_class_initialized_; + +void +ascii_paragraph_word::init_char_len() +{ + int k; + // Unknown characters width is set to 10 + for (k = 0; k < 256; k++) + char_len_[k] = 10; + char_len_[' '] = 0; // we set the space width to 0 here to be able to quickly + // compute the length of a line without having to deal with + // the last space + char_len_['a'] = 9; + char_len_['b'] = 10; + char_len_['c'] = 8; + char_len_['d'] = 10; + char_len_['e'] = 8; + char_len_['f'] = 6; + char_len_['g'] = 9; + char_len_['h'] = 10; + char_len_['i'] = 5; + char_len_['j'] = 6; + char_len_['k'] = 10; + char_len_['l'] = 5; + char_len_['m'] = 15; + char_len_['n'] = 10; + char_len_['o'] = 9; + char_len_['p'] = 10; + char_len_['q'] = 10; + char_len_['r'] = 7; + char_len_['s'] = 7; + char_len_['t'] = 7; + char_len_['u'] = 10; + char_len_['v'] = 9; + char_len_['w'] = 13; + char_len_['x'] = 10; + char_len_['y'] = 10; + char_len_['z'] = 8; + char_len_['C'] = 13; + char_len_['I'] = 6; + + char_len_['-'] = 6; + char_len_[','] = 5; + char_len_[';'] = 5; + char_len_['.'] = 5; + char_len_['\''] = 5; + + is_class_initialized_ = true ; +} + +ascii_paragraph_word::ascii_paragraph_word(const char *sz_word) +{ + if (is_class_initialized_ == false) + init_char_len(); + + width_ = 0; + if (sz_word) { + size_t len; + int k = 0; + len = strlen(sz_word); + sz_word_ = (char *)malloc(len + 1); + strcpy(sz_word_, sz_word); + while (sz_word_[k] != '\0') { + if (sz_word_[k] >= 0) + width_ += char_len_[(unsigned char) sz_word_[k]]; + k++; + } + } +} + +ascii_paragraph_word::~ascii_paragraph_word() +{ + if (sz_word_) + free(sz_word_); +} + +void +ascii_paragraph_word::set_width(unsigned int width) +{ + width_ = width; +} + +unsigned int +ascii_paragraph_word::get_width() +{ + return width_; +} + +int +ascii_paragraph_word::get_next_glue_values(unsigned int *width, + unsigned int *stretchability, + unsigned int *shrinkability) +{ + size_t len; + char last_character; + + if (sz_word_) { + len = strlen(sz_word_); + last_character = sz_word_[len - 1]; + } else { + last_character = 0; + } + switch (last_character) { + case ',': + *width = 6; + *stretchability = 4; + *shrinkability = 2; + break; + case ';': + *width = 6; + *stretchability = 4; + *shrinkability = 1; + break; + case '.': + *width = 8; + *stretchability = 6; + *shrinkability = 1; + break; + default: + *width = 6; + *stretchability = 3; + *shrinkability = 2; + break; + } +} + +int +ascii_paragraph_word::snprint(char *str, size_t size) +{ + return snprintf(str, size, "%s", sz_word_); +} + +void +ascii_paragraph_word::write() +{ + printf("%s", sz_word_); +} + +/* Return -1 if the word cannot be hyphenate. If it can, return the position in + the word were the hyphenation can be done. We just take all the words in the + example paragraph that can be hyphenated. */ +typedef enum { + NO_HYPHEN, + EXPLICIT_HYPHEN, + OPTIONAL_HYPHEN +} hyphen_type_t; + +class text_loader { + char *text_; // Complete paragraph text in one string + hyphen_type_t simulate_hyphenate (const char *word, + unsigned int *first_part_len); +public: + text_loader(char *text, const char *path = NULL); + ~text_loader(); + void process_text(paragraph *par, bool with_indentation); +}; + +text_loader::text_loader(char *text, const char *path) +{ + if (text) { + text_ = (char *)calloc(strlen(text) +1, sizeof (char)); + strcpy(text_, text); + } else if (path) { + FILE *fp; + char *c; + size_t text_size; + struct stat buf; + + fp = fopen (path, "r"); + if (fp == NULL) { + printf("Error:%s\n", strerror(errno)); + text_ = NULL; + } else { + stat(path, &buf); + text_ = (char *) calloc((buf.st_size) + 1, sizeof(char)); + text_size = fread(text_, 1, buf.st_size, fp); + for (c = text_; c < text_ + text_size; c++) { + if (*c == '\n') + *c = ' '; + } + fclose(fp); + } + } +} + +text_loader::~text_loader() +{ + if (text_) + free(text_); +} + +hyphen_type_t +text_loader::simulate_hyphenate (const char *word, unsigned int *first_part_len) +{ + hyphen_type_t ret = OPTIONAL_HYPHEN; + + if (word == NULL || first_part_len == NULL) + goto end; + + if (strncmp(word, "lime-tree", 9) == 0) { + *first_part_len = 5; + ret = EXPLICIT_HYPHEN; + goto end; + } + + if (strncmp(word, "wishing", 7) == 0) + *first_part_len = 4; + else if (strncmp(word, "daughters", 9) == 0) + *first_part_len = 5; + /* for the sake of simplicity we don't consider the break after beauti */ + else if (strncmp(word, "beautiful", 9) == 0) + *first_part_len = 4; + else if (strncmp(word, "youngest", 8) == 0) + *first_part_len = 5; + else if (strncmp(word, "itself", 6) == 0) + *first_part_len = 2; + else if (strncmp(word, "astonished", 10) == 0) + *first_part_len = 5; + else if (strncmp(word, "whenever", 8) == 0) + *first_part_len = 4; + else if (strncmp(word, "forest", 6) == 0) + *first_part_len = 3; + else if (strncmp(word, "under", 5) == 0) + *first_part_len = 2; + else if (strncmp(word, "lime-tree", 9) == 0) + *first_part_len = 5; + else if (strncmp(word, "fountain", 8) == 0) + *first_part_len = 4; + else if (strncmp(word, "favorite", 8) == 0) + *first_part_len = 5; + else if (strncmp(word, "plaything", 9) == 0) + *first_part_len = 4; + else if (strncmp(word, "hyphenationtest", 15) == 0) + *first_part_len = 11; + else + ret = NO_HYPHEN; + +end: + return ret; +} + +void +text_loader::process_text(paragraph *par, bool with_indentation) +{ + char *cur, *tmp; + ascii_paragraph_word *cur_word, *indentation; + unsigned int width; + hyphen_type_t type; + unsigned int hyphenate = 0; + char previous_char; + char *tmp_text; + + /* Add indentation width 18 */ + if (with_indentation) { + indentation = new ascii_paragraph_word(" "); + indentation->set_width(18); + par->add_box(indentation); + } + + /* Build the paragraph: for each word of 'text' we check if there is an + * explicit hyphen (here only the word "lime-tree"), otherwise we add an + * optional hyphen, and add the corresponding items. For example 'whenever' + * has an optional hyphen (when-ever), so we: + * - add a box of width 4 ('when'). + * - add an optional hyphen, which is a penalty item (penalty added only if + the hyphen is chosen). + * - add a box of width 4 ('ever'). + */ + tmp_text = (char *) calloc(strlen(text_) + 1, sizeof(char)); + strcpy(tmp_text, text_); + cur = strtok(tmp_text, " "); + while (cur != NULL) { + type = simulate_hyphenate(cur, &hyphenate); + if (type != NO_HYPHEN) { + tmp = (char *) malloc(hyphenate + 1); + strncpy(tmp, cur, hyphenate); + tmp[hyphenate] = '\0'; + cur_word = new ascii_paragraph_word(tmp); + par->add_box(cur_word); + free(tmp); + if (type == EXPLICIT_HYPHEN) { + par->add_explicit_hyphen(); + } else { + ascii_paragraph_word *hyphen_sign = new ascii_paragraph_word("-"); + par->add_optional_hyphen(hyphen_sign); + } + cur_word = new ascii_paragraph_word(cur + hyphenate); + par->add_box(cur_word); + par->add_glue(); + } else { + cur_word = new ascii_paragraph_word(cur); + par->add_box(cur_word); + par->add_glue(); + } + cur = strtok(NULL, " "); + } + /* Add final glue */ + par->finish(); + free(tmp_text); +} + +struct expected_break_info { + const char word[64]; + unsigned int demerit; + unsigned int total_demerits; +}; + + +#define PRINT_RESULT(n_failures) \ + do { \ + if (n_failures == 0) \ + printf("-- Test %s PASSED\n\n", __FUNCTION__); \ + else \ + fprintf(stderr, "** Test %s FAILED, %d failures\n", __FUNCTION__, n_failures); \ + } while(0) + +class test_paragraph { + +private: + text_loader *text_loader_; + int check_all_breakpoint(struct expected_break_info tab_expected[], + paragraph *par); + int check_best_breakpoint(struct expected_break_info tab_expected[], + paragraph *par); + int check_best_breakpoint(const char*tab_expected[], paragraph *par); +public: + test_paragraph(); + ~test_paragraph(); + void suite1_init(); + int test11_original_example(); + int test12_example_with_default_demerits_formula(); + int test13_example_with_larger_tolerance(); + + void suite2_init(); + int test21_hyphen_flagged_penalty(); + + void suite3_init(); + int test31_fitness_class(); +}; + +test_paragraph::test_paragraph() +{ + text_loader_ = NULL; +} + +/* Check all breakpoints list, we start at passive_breaks_list_head_.next + * because the first breakpoint is the initial breakpoint */ +int +test_paragraph::check_all_breakpoint(struct expected_break_info tab_expected[], + paragraph *par) +{ + int res = 0; + breakpoint *pos; + unsigned int total_demerits; + char word[256]; + int k = 0; + item *i; + + list_for_each_entry(pos, + par->passive_breaks_list_head_.next, + list_, + breakpoint) { + total_demerits = pos->get_total_demerits(); + res += test_assert(tab_expected[k].total_demerits == total_demerits, + "total demerits: expected: %u, actual: %u", + tab_expected[k].total_demerits, + total_demerits); + i = pos->get_previous_box(); + if (i != NULL) { + paragraph_word *pw = i->get_word(); + pw->snprint(word, 256); + i = pos->get_item(); + res += test_assert(strcmp(tab_expected[k].word, word) == 0, + "expected: '%s' actual: '%s'", + tab_expected[k].word, + word); + } + k++; + } + + return res; +} + +/* Check best breakpoints list, the first breakpoint of best_breaks_list_head_ + is the breakpoint at the end of the 1st line, not the initial breakpoint + Return the number of assert that failed or 0 if OK */ +int +test_paragraph::check_best_breakpoint(struct expected_break_info tab_expected[], + paragraph *par) +{ + int res = 0; + unsigned int total_demerits; + char word[256]; + int k = 0; + item *i; + + for (k = 0; k < par->get_number_of_lines(); k++) { + total_demerits = par->array_best_breaks_[k]->get_total_demerits(); + res += test_assert(tab_expected[k].total_demerits == total_demerits, + "total demerits: expected: %u, actual: %u", + tab_expected[k].total_demerits, + total_demerits); + i = par->array_best_breaks_[k]->get_previous_box(); + if (i == NULL) { + res += test_assert(false, "cannot find breakpoint's previous box"); + } else { + paragraph_word *pw = i->get_word(); + pw->snprint(word, 256); + i = par->array_best_breaks_[k]->get_item(); + res += test_assert(strcmp(tab_expected[k].word, word) == 0, + "expected: '%s' actual: '%s'", + tab_expected[k].word, + word); + } + } + + return res; +} + +int +test_paragraph::check_best_breakpoint(const char*tab_expected[], paragraph *par) +{ + int res = 0; + char word[256]; + int k = 0; + item *i; + + for (k = 0; k < par->get_number_of_lines(); k++) { + i = par->array_best_breaks_[k]->get_previous_box(); + if (i == NULL) { + res += test_assert(false, "cannot find breakpoint's previous box"); + } else { + paragraph_word *pw; + pw = i->get_word(); + pw->snprint(word, 256); + i = par->array_best_breaks_[k]->get_item(); + res += test_assert(strcmp(tab_expected[k], word) == 0, + "expected: '%s' actual: '%s'", + tab_expected[k], + word); + } + } + + return res; +} + +test_paragraph::~test_paragraph() +{ + if (text_loader_) + delete text_loader_; +} + +/* Load the text and add to par the corresponding box, glue, penalty */ + +/** + * Test 1 + * + * Check each line ratio. FIXME understand why the last one should be 0.004 and + * not 0.000. + */ +void +test_paragraph::suite1_init() +{ + text_loader *tl; + char text[] = + "In olden times when wishing still helped one, there lived a " + "king whose daughters were all beautiful; and the youngest was " + "so beautiful that the sun itself, which has seen so much, was " + "astonished whenever it shone in her face. Close by the king's " + "castle lay a great dark forest, and under an old lime-tree in the " + "forest was a well, and when the day was very warm, the king's " + "child went out into the forest and sat down by the side of the " + "cool fountain; and when she was bored she took a golden ball, " + "and threw it up on high and caught it; and this ball was her " + "favorite plaything."; + + printf("-- Suite 1 Initialisation\n"); + tl = new text_loader(text); + text_loader_ = tl; +} + +int +test_paragraph::test11_original_example() +{ + paragraph *par = new paragraph; + paragraph_printer *printer; + int res = 0; + float expected_line_ratio[10] = {0.774, 0.179, 0.629, 0.545, 0.000, + 0.079, 0.282, 0.294, 0.575, 0.000}; + + int n_lines; + int k; + float ratio; + struct expected_break_info all_expected[] = { + { "a", 2209, 2209 }, + { "king", 1521, 1521 }, + { "was", 4, 2213 }, + { "so", 3136, 4657 }, + { "was", 676, 2889 }, + { "aston", 3600, 8257 }, + { "king's", 289, 3178 }, + { "castle", 9, 8266 }, + { "lay", 4489, 12746 }, + { "in", 5929, 9107 }, + { "the", 1, 3179 }, + { "for", 3481, 11747 }, + { "est", 1, 8267 }, + { "was", 4, 12750 }, + { "a", 2209, 14955 }, + { "the", 676, 9783 }, + { "king's", 1, 3180 }, + { "child", 4, 8271 }, + { "went", 1, 12751 }, + { "out", 1369, 16324 }, + { "side", 16, 9799 }, + { "of", 49, 9832 }, + { "the", 9, 3189 }, + { "cool", 121, 8392 }, + { "foun", 3249, 16000 }, + { "tain;", 400, 13151 }, + { "and", 1444, 17768 }, + { "golden", 1, 9800 }, + { "ball,", 16, 3205 }, + { "and", 25, 8417 }, + { "threw", 4, 16004 }, + { "it", 289, 13440 }, + { "up", 4, 13155 }, + { "on", 1, 17769 }, + { "was", 25, 9825 }, + { "her", 400, 3605 }, + { "favor", 2601, 11018 }, + { "ite", 16, 8433 }, + { "play", 3364, 16804 }, + { "thing.", 1, 3606 } + }; + + struct expected_break_info best_expected[] = { + { "a", 2209, 2209 }, + { "was", 4, 2213 }, + { "was", 676, 2889 }, + { "king's", 289, 3178 }, + { "the", 1, 3179 }, + { "king's", 1, 3180 }, + { "the", 9, 3189 }, + { "ball,", 16, 3205 }, + { "her", 400, 3605 }, + { "thing.", 1, 3606 } + }; + + printf("-- Test11...\n"); + + par->config_use_old_demerits_formula(); + par->config_no_fitness_class(); + text_loader_->process_text(par, true); + par->format_knuth_plass(); + + /* There should be 10 lines */ + printf(" Checking the number of lines\n"); + n_lines = par->get_number_of_lines(); + if (test_assert(n_lines == 10, + "actual number of lines:%d expected: 10", + n_lines) == TEST_FAIL) { + res++; + } + + /* test ratio */ + printf(" Checking the lines adjustement ratio\n"); + for (k = 0; k < 10; k++) { + ratio = par->get_adjust_ratio(k + 1); + if (test_assert(fabs(ratio - expected_line_ratio[k]) < 0.001, + "line number %d expected: %.3f actual ratio: %.3f", + k + 1, expected_line_ratio[k], ratio) == TEST_FAIL) { + res++; + } + } + + printf(" Checking all breakpoints demerits\n"); + res += check_all_breakpoint(all_expected, par); + + printf(" Checking the best breakpoints array\n"); + res += check_best_breakpoint(best_expected, par); + printer = new paragraph_printer(par); + printer->print(); + delete printer; + PRINT_RESULT(res); + delete par; + + return res; +} + +int +test_paragraph::test12_example_with_default_demerits_formula() +{ + int res = 0; + paragraph *par = new paragraph; + fitness_class_t fitness_class; + struct expected_break_info best_expected[] = { + { "a", 2209, 2209 }, + { "was", 4, 2213 }, + { "was", 676, 2889 }, + { "king's", 289, 3178 }, + { "the", 1, 3179 }, + { "king's", 1, 3180 }, + { "the", 9, 3189 }, + { "ball,", 16, 3205 }, + { "her", 400, 3605 }, + { "thing.", 1, 3606 } + }; + fitness_class_t expected_fitness_class[10] = { + FITNESS_CLASS_LOOSE, + FITNESS_CLASS_NORMAL, + FITNESS_CLASS_LOOSE, + FITNESS_CLASS_LOOSE, + FITNESS_CLASS_NORMAL, + FITNESS_CLASS_NORMAL, + FITNESS_CLASS_NORMAL, + FITNESS_CLASS_NORMAL, + FITNESS_CLASS_LOOSE, + FITNESS_CLASS_NORMAL + }; + + int k = 0; + + printf("-- Test12...\n"); + text_loader_->process_text(par, true); + par->format_knuth_plass(); + + printf(" Checking the best breakpoints array\n"); + res += check_best_breakpoint(best_expected, par); + + printf(" Checking the lines fitness class\n"); + for (k = 0; k < 10; k++) { + fitness_class = par->get_fitness_class(k + 1); + if (test_assert( + (fitness_class == expected_fitness_class[k]), + "line number: %d expected: %d actual fitness class: %d", + k + 1, + expected_fitness_class[k], + fitness_class) == TEST_FAIL) + res++; + } + + PRINT_RESULT(res); + delete par; + + return res; +} + +int +test_paragraph::test13_example_with_larger_tolerance() +{ + int res = 0; + paragraph *par = new paragraph; + struct expected_break_info best_expected[] = { + { "a", 2209, 2209 }, + { "was", 4, 2213 }, + { "was", 676, 2889 }, + { "king's", 289, 3178 }, + { "the", 1, 3179 }, + { "king's", 1, 3180 }, + { "the", 9, 3189 }, + { "ball,", 16, 3205 }, + { "her", 400, 3605 }, + { "thing.", 1, 3606 } + }; + + printf("-- Test13...\n"); + text_loader_->process_text(par, true); + par->format_knuth_plass(10); + + printf(" Checking the best breakpoints array\n"); + res += check_best_breakpoint(best_expected, par); + + PRINT_RESULT(res); + delete par; + + return res; +} + +/* In this test the word 'hyphenationtest' can be hyphenated after + * 'hyphenation'. Letters 'A', 'B' and 'D' have a width of 10; 'C' has a width + * of 13, which means that the algorithm would naturally try to cut like this: + + AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAA hyphenation- + test BBBBBBBBBB BBBBBBBBBB BBBBBBBBBB jlC hyphenation- + test DDDDDDDDDD DDDDDDDDDD"; + + However the additional penalty for two hyphenation in a row should make the + algorithm chose the break before the first DDDDDDDDDD. + */ +void +test_paragraph::suite2_init() +{ + text_loader *tl; + char text[] = + "AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAA hyphenationtest " + "BBBBBBBBBB BBBBBBBBBB BBBBBBBBBB jlC hyphenationtest " + "DDDDDDDDDD DDDDDDDDDD"; + + printf("-- Suite 2 Initialisation\n"); + tl = new text_loader(text); + text_loader_ = tl; +} + +int +test_paragraph::test21_hyphen_flagged_penalty() +{ + int res = 0; + paragraph *par = new paragraph; + paragraph_printer *printer; + const char *best_expected[] = { + "hyphenation", + "test", //FIXME actually is should be hyphenationtest + "DDDDDDDDDD" + }; + + printf("-- Test21...\n"); + text_loader_->process_text(par, false); + par->format_knuth_plass(2); + + printf(" Checking the best breakpoints array\n"); + res += check_best_breakpoint(best_expected, par); + + printer = new paragraph_printer(par); + printer->print(); + delete printer; + PRINT_RESULT(res); + delete par; + + return res; +} + +/* There is only 1 feasible breakpoint at the first line, and the line is of +class 0. At the second line, there are two feasible breaks, a class and a +class 2; the class 2 is better but the class 1 should be chosen because of the +first line. */ +void +test_paragraph::suite3_init() +{ + text_loader *tl; + char text[] = + "The first line's best break makes it very veryyyyyy tiiiiiiiiiiiiiight, " + "the second line's best break is of class two but another break will " + "have to be preferred; it will give another line of class 0."; + + printf("-- Suite 3 Initialisation\n"); + tl = new text_loader(text); + text_loader_ = tl; +} + +int +test_paragraph::test31_fitness_class() +{ + int res = 0; + paragraph *par = new paragraph; + paragraph_printer *printer; + int k = 0; + item *i; + + const char *best_expected[] = { + "tiiiiiiiiiiiiiight,", + "will", + "0." + }; + char word[256]; + + printf("-- Test31...\n"); + text_loader_->process_text(par, false); + par->format_knuth_plass(2); + + printf(" Checking the best breakpoints array\n"); + for (k = 0; k < par->get_number_of_lines(); k++) { + i = par->array_best_breaks_[k]->get_previous_box(); + if (i == NULL) { + res += test_assert(false, "cannot find breakpoint's previous box"); + } else { + i->sprint_word(word); + i = par->array_best_breaks_[k]->get_item(); + res += test_assert(strcmp(best_expected[k], word) == 0, + "expected: '%s' actual: '%s'", + best_expected[k], + word); + } + } + + printer = new paragraph_printer(par); + printer->print(); + delete printer; + PRINT_RESULT(res); + delete par; + + return res; +} + + +int main (int argc, char **argv) +{ + int res = 0; + int c; + test_paragraph *tp; + char *file_path = NULL; + unsigned int line_length = 500; + float tolerance = 1; + long int suite_to_launch = -1; + long int test_to_launch = -1; + + while ((c = getopt (argc, argv, "f:l:s:t:T:")) != -1) { + switch (c) + { + case 'f': + file_path = optarg; + break; + case 'l': + line_length = strtol(optarg, NULL, 10); + break; + case 's': + suite_to_launch = strtol(optarg, NULL, 10); + break; + case 't': + test_to_launch = strtol(optarg, NULL, 10); + break; + case 'T': + tolerance = strtof(optarg, NULL); + break; + case '?': + if (optopt == 'f' || optopt == 's' || optopt == 't') + fprintf (stderr, "Option -%c requires an argument.\n", optopt); + else if (isprint(optopt)) + fprintf (stderr, "Unknown option `-%c'.\n", optopt); + else + fprintf (stderr, + "Unknown option character `\\x%x'.\n", + optopt); + return 1; + default: + abort(); + } + } + + if (suite_to_launch == 0 && test_to_launch > 0) { + fprintf (stderr, + "Passing test number without test suite, please use option -s"); + res = -1; + goto end; + } + + if (file_path != NULL) { + text_loader *tl = new text_loader(NULL, file_path); + paragraph *par = new paragraph(); + paragraph_printer *printer; + printf("Processing content of %s with tolerance:%.3f line length:%u\n\n", + file_path, tolerance, line_length); + tl->process_text(par, true); + par->format_knuth_plass(tolerance, line_length); + printer = new paragraph_printer(par); + printer->print(); + delete printer; + delete par; + delete tl; + } else { + if (suite_to_launch == -1 || suite_to_launch == 1) { + tp = new test_paragraph(); + tp->suite1_init(); + if (test_to_launch == -1 || test_to_launch == 1) + res += tp->test11_original_example(); + if (test_to_launch == -1 || test_to_launch == 2) + res += tp->test12_example_with_default_demerits_formula(); + if (test_to_launch == -1 || test_to_launch == 3) + res += tp->test13_example_with_larger_tolerance(); + delete tp; + } + + if (suite_to_launch == -1 || suite_to_launch == 2) { + tp = new test_paragraph(); + tp->suite2_init(); + if (test_to_launch == -1 || test_to_launch == 1) + res += tp->test21_hyphen_flagged_penalty(); + delete tp; + } + + if (suite_to_launch == -1 || suite_to_launch == 3) { + tp = new test_paragraph(); + tp->suite3_init(); + if (test_to_launch == -1 || test_to_launch == 1) + res += tp->test31_fitness_class(); + delete tp; + } + + if (res == 0) + printf("All tests passed\n"); + else + fprintf(stderr, "%d tests failed\n", res); + } + +end: + return res; +} -- cgit v1.2.1