summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBertrand Garrigues <bertrand.garrigues@laposte.net>2017-11-09 00:43:17 +0100
committerBertrand Garrigues <bertrand.garrigues@laposte.net>2017-11-09 00:43:17 +0100
commit97ae5b7026a7225d6a541fcf885a1a9a3fc060d2 (patch)
tree0ad4603b3b2ceee4208990c452ba5314aeb983ad
parent894dfbfa69e2d4bc7446818d6415d3e77f94cd85 (diff)
downloadgroff-git-format_knuth_plass.tar.gz
Add class 'paragraph' for Knuth-Plass formatting.format_knuth_plass
To be used in `troff' in the future, but not connected to the rest of the `troff' code yet. * src/include/trace.h: macro to trace and debug. * src/roff/troff/paragraph.h: * src/roff/troff/paragraph_l.h: * src/roff/troff/paragraph.cpp: * src/roff/troff/paragraph_word.h: New files. The class `paragraph' exposes some public methods to build the paragraph (`add_box', `add_glue', `add_optional_hyphen', `add_explicit_hyphen'). It use an interface `paragraph_word' that the caller must implement to pass the words width and the glue values to the `paragraph' class, and also a `write' method. The caller must also implement `paragraph_writer_interface', another interface used by `paragraph' to control the writing process. * test/test_paragraph.cpp: A simple example to demonstrate the usage of `paragraph', including the original example from Knuth's book `Digital Typography'. * test/test.am: add `test_paragraph' to `make check'.
-rw-r--r--src/include/trace.h80
-rw-r--r--src/roff/troff/paragraph.cpp1374
-rw-r--r--src/roff/troff/paragraph.h107
-rw-r--r--src/roff/troff/paragraph_l.h208
-rw-r--r--src/roff/troff/paragraph_word.h36
-rw-r--r--test/test.am9
-rw-r--r--test/test_paragraph.cpp971
7 files changed, 2785 insertions, 0 deletions
diff --git a/src/include/trace.h b/src/include/trace.h
new file mode 100644
index 000000000..79e6ba8ef
--- /dev/null
+++ b/src/include/trace.h
@@ -0,0 +1,80 @@
+/* Copyright (C) 2016- Free Software Foundation, Inc.
+ Written by Bertrand Garrigues (bertrand.garrigues@laposte.net)
+
+This file is part of groff.
+
+groff is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+groff is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef TRACE_H
+#define TRACE_H
+
+#define LEVEL_DEBUG 3
+#define LEVEL_INFO 2
+#define LEVEL_ERROR 1
+#define LEVEL_QUIET 0
+
+#include <stdio.h>
+
+#ifndef TRACE_ALIGN_LENGTH
+#define TRACE_ALIGN_LENGTH -1
+#endif
+
+// __FILE__ gives the full path of the file, we just want the file name.
+#define __FILENAME__ (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__)
+
+#ifndef DEACTIVATE_TRACES
+
+#define trace_define_category(module_name, default_level) \
+ int trace_category_level_##module_name = default_level
+
+#define trace_use_category(module_name) \
+ extern int trace_category_level_##module_name
+
+#define trace_set_level(module_name, level) \
+ do { \
+ trace_category_level_##module_name = level; \
+ } while(0)
+
+#define trace_print(module_name, min_level, fmt, args...) \
+ do { \
+ if ((trace_category_level_##module_name) >= (min_level)) { \
+ int res##__LINE__; \
+ int length_align##__LINE__ = TRACE_ALIGN_LENGTH; \
+ res##__LINE__ = fprintf(stderr, "[%s:%d:%s]", __FILENAME__, __LINE__, __FUNCTION__); \
+ if (length_align##__LINE__ == -1) \
+ fprintf(stderr, fmt "\n", ##args); \
+ else \
+ fprintf(stderr, "%*s" fmt "\n", TRACE_ALIGN_LENGTH - res##__LINE__, #module_name": ", ##args); \
+ fflush(stderr); \
+ } \
+ } while(0)
+
+#define trace_debug(module_name, fmt, args...) \
+ trace_print(module_name, LEVEL_DEBUG, fmt, ##args)
+
+#define trace_info(module_name, fmt, args...) \
+ trace_print(module_name, LEVEL_INFO, fmt, ##args)
+
+#define trace_error(module_name, fmt, args...) \
+ trace_print(module_name, LEVEL_ERROR, fmt, ##args)
+
+#else /* DEACTIVATE_TRACES */
+#define trace_debug(args...)
+#define trace_info(args...)
+#define trace_error(args...)
+#define trace_define_category(args...)
+#define trace_set_level(args...)
+#endif /* DEACTIVATE_TRACES */
+
+#endif
diff --git a/src/roff/troff/paragraph.cpp b/src/roff/troff/paragraph.cpp
new file mode 100644
index 000000000..eeb5932f4
--- /dev/null
+++ b/src/roff/troff/paragraph.cpp
@@ -0,0 +1,1374 @@
+// -*- C++ -*-
+
+/* Copyright (C) 2016- Free Software Foundation, Inc.
+ Written by Bertrand Garrigues (bertrand.garrigues@laposte.net)
+
+This file is part of groff.
+
+groff is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+groff is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <float.h>
+
+#include "paragraph_l.h"
+#include "paragraph.h"
+
+#include "trace.h"
+trace_use_category(item);
+trace_use_category(breakpoint);
+trace_use_category(paragraph);
+
+#define item_debug(args...) \
+ trace_debug(item, ##args)
+#define item_info(args...) \
+ trace_info(item, ##args)
+#define item_error(args...) \
+ trace_error(item, item, ##args)
+#define breakpoint_debug(args...) \
+ trace_debug(breakpoint, ##args)
+#define breakpoint_info(args...) \
+ trace_info(breakpoint, ##args)
+#define breakpoint_error(args...) \
+ trace_error(breakpoint, ##args)
+#define paragraph_debug(args...) \
+ trace_debug(paragraph, ##args)
+#define paragraph_info(args...) \
+ trace_info(paragraph, ##args)
+#define paragraph_error(args...) \
+ trace_error(paragraph, ##args)
+
+#define PLUS_INFINITY (INT_MAX / 4)
+#define MINUS_INFINITY (INT_MIN / 4)
+
+/* strings used for debug */
+#define STRING_MAX_SIZE 256
+
+/*
+ * Abstract Class item
+ */
+item::item()
+{
+ INIT_LIST_HEAD(&list_, this);
+ is_breakpoint_ = false;
+}
+
+item::~item()
+{
+ if (word_ != NULL)
+ delete word_;
+}
+
+unsigned int
+item::get_width()
+{
+ return width_;
+}
+
+unsigned int
+item::get_stretchability()
+{
+ return stretchability_;
+}
+
+unsigned int
+item::get_shrinkability()
+{
+ return shrinkability_;
+}
+
+int
+item::get_penalty()
+{
+ return penalty_;
+}
+
+item *
+item::get_previous_item()
+{
+ return list_entry(list_.prev, item);
+}
+
+paragraph_word *
+item::get_word()
+{
+ return word_;
+}
+
+int
+item::sprint_word(char *str)
+{
+ int res = 0;
+
+ if (is_box() == true) {
+ if (word_ != NULL)
+ res = word_->snprint(str, STRING_MAX_SIZE);
+ } else if (is_glue() == true) {
+ res = sprintf(str, "glue: width %u strech %u shrink %u",
+ width_, stretchability_, shrinkability_);
+ } else {
+ res = sprintf(str, "penalty: width %u value %u flag %d",
+ width_, penalty_, flagged_penalty_);
+ }
+ item_debug("sprint_word:%s:", str);
+
+ return res;
+}
+
+/*
+ * Class box
+ */
+box_item::box_item(paragraph_word *word)
+ : item()
+{
+ char sz[STRING_MAX_SIZE] = "";
+
+ word_ = word;
+ stretchability_ = 0;
+ shrinkability_ = 0;
+ penalty_ = 0;
+ flagged_penalty_ = false;
+ width_ = word->get_width();
+ word_->snprint(sz, STRING_MAX_SIZE);
+ item_debug("new box:%s: width %u", sz, width_);
+}
+
+box_item::~box_item()
+{
+}
+
+
+bool
+box_item::is_box()
+{
+ return true;
+}
+
+bool
+box_item::is_glue()
+{
+ return false;
+}
+
+bool
+box_item::is_legal_breakpoint()
+{
+ return false;
+}
+
+bool
+box_item::is_forced_break()
+{
+ return false;
+}
+
+bool
+box_item::is_flagged_penalty()
+{
+ return false;
+}
+
+int
+box_item::sprint_info(char *str)
+{
+ char sz[STRING_MAX_SIZE] = "";
+ word_->snprint(sz, STRING_MAX_SIZE);
+ return sprintf(str, "box '%s' (width %u)", sz, width_);
+}
+
+
+/**
+ * Class glue
+ */
+glue_item::glue_item (unsigned int width,
+ unsigned int stretchability,
+ unsigned int shrinkability)
+ : item()
+{
+ width_ = width;
+ stretchability_ = stretchability;
+ shrinkability_ = shrinkability;
+ penalty_ = 0;
+ flagged_penalty_ = false;
+ word_ = NULL;
+}
+
+glue_item::~glue_item()
+{
+}
+
+bool
+glue_item::is_box()
+{
+ return false;
+}
+
+bool
+glue_item::is_glue()
+{
+ return true;
+}
+
+bool
+glue_item::is_legal_breakpoint()
+{
+ bool ret;
+ item *previous_item;
+
+ previous_item = get_previous_item();
+ if (previous_item->is_box())
+ ret = true;
+ else
+ ret = false;
+
+ return ret;
+}
+
+bool
+glue_item::is_forced_break()
+{
+ return false;
+}
+
+bool
+glue_item::is_flagged_penalty()
+{
+ return false;
+}
+
+int
+glue_item::sprint_info(char *str)
+{
+ char tmp[32];
+
+ if (stretchability_ >= PLUS_INFINITY)
+ sprintf(tmp, "infinity");
+ else
+ sprintf(tmp, "%u", stretchability_);
+
+ return sprintf(str, "glue width:%u strecth:%s shrink:%u",
+ width_, tmp, shrinkability_);
+}
+
+/**
+ * Class penalty
+ *
+ */
+penalty_item::penalty_item(int penalty,
+ bool flag,
+ paragraph_word *optional_word = NULL)
+ : item()
+{
+ stretchability_ = 0;
+ shrinkability_ = 0;
+ penalty_ = penalty;
+ flagged_penalty_ = flag;
+ word_ = optional_word;
+ if (word_)
+ width_ = word_->get_width();
+ else
+ width_ = 0;
+}
+
+penalty_item::~penalty_item()
+{
+}
+
+bool
+penalty_item::is_box()
+{
+ return false;
+}
+
+bool
+penalty_item::is_glue()
+{
+ return false;
+}
+
+bool
+penalty_item::is_legal_breakpoint()
+{
+ bool ret;
+
+ /* +infinity is a disallowed break */
+ if (penalty_ < PLUS_INFINITY)
+ ret = true;
+ else
+ ret = false;
+
+ return ret;
+}
+
+bool
+penalty_item::is_forced_break()
+{
+ bool ret = false;
+
+ if (penalty_ == MINUS_INFINITY)
+ ret = true;
+
+ return ret;
+}
+
+bool
+penalty_item::is_flagged_penalty()
+{
+ return flagged_penalty_;
+}
+
+int
+penalty_item::sprint_info(char *str)
+{
+ char tmp[32];
+
+ if (penalty_ >= PLUS_INFINITY)
+ sprintf(tmp, "infinity");
+ else if (penalty_ <= MINUS_INFINITY)
+ sprintf(tmp, "-infinity");
+ else
+ sprintf(tmp, "%d", penalty_);
+
+ return sprintf(str,
+ "penalty width:%u value:%s flag:%d",
+ width_,
+ tmp,
+ flagged_penalty_);
+}
+
+/**
+ * Class breakpoint
+ */
+breakpoint::breakpoint(item *break_item,
+ unsigned int total_width,
+ unsigned int total_stretch,
+ unsigned int total_shrink)
+{
+ breakpoint_debug("New breakpoint, total_width %u", total_width);
+ break_item_ = break_item;
+ line_number_ = 0;
+ fitness_class_ = FITNESS_CLASS_MAX;
+ total_width_ = total_width;
+ total_stretch_ = total_stretch;
+ total_shrink_ = total_shrink;
+ total_demerits_ = 0;
+ previous_best_ = NULL;
+ sz_print_ = NULL;
+ INIT_LIST_HEAD(&list_, this);
+}
+
+breakpoint::~breakpoint()
+{
+ if (sz_print_)
+ free(sz_print_);
+}
+
+unsigned int
+breakpoint::get_total_width()
+{
+ return total_width_;
+}
+
+unsigned int
+breakpoint::get_total_stretch()
+{
+ return total_stretch_;
+}
+
+unsigned int
+breakpoint::get_total_shrink()
+{
+ return total_shrink_;
+}
+
+unsigned int
+breakpoint::get_total_width_after()
+{
+ unsigned int total_width_after = total_width_;
+
+ /* We must not add the width if the item just after is, for example, an
+ * optional penalty, otherwise the computation of next line length will be
+ * incorrect. */
+ if (break_item_ != NULL && break_item_->get_penalty() == 0)
+ total_width_after += break_item_->get_width();
+
+ return total_width_after;
+}
+
+unsigned int
+breakpoint::get_total_stretch_after()
+{
+ unsigned int total_stretch_after = total_stretch_;
+
+ if (break_item_ != NULL)
+ total_stretch_after += break_item_->get_stretchability();
+
+ return total_stretch_after;
+}
+
+unsigned int
+breakpoint::get_total_shrink_after()
+{
+ unsigned int total_shrink_after = total_shrink_;
+
+ if (break_item_ != NULL)
+ total_shrink_after += break_item_->get_shrinkability();
+
+ return total_shrink_after;
+}
+
+item *
+breakpoint::get_item()
+{
+ return break_item_;
+}
+
+item *
+breakpoint::get_previous_box()
+{
+ item *i = break_item_;
+
+ while (i != NULL) {
+ if (i->is_box())
+ break;
+ i = list_entry(i->list_.prev, item);
+ }
+
+ return i;
+}
+
+int
+breakpoint::get_line_number()
+{
+ return line_number_;
+}
+
+float
+breakpoint::get_adjust_ratio()
+{
+ return adjust_ratio_;
+}
+
+void
+breakpoint::set_adjust_ratio(float adjust_ratio)
+{
+ adjust_ratio_ = adjust_ratio;
+}
+
+unsigned int
+breakpoint::get_total_demerits()
+{
+ return total_demerits_;
+}
+
+void
+breakpoint::set_total_demerits(unsigned int total_demerits)
+{
+ total_demerits_ = total_demerits;
+}
+
+fitness_class_t
+breakpoint::get_fitness_class()
+{
+ return fitness_class_;
+}
+
+void
+breakpoint::set_fitness_class(fitness_class_t fitness_class)
+{
+ fitness_class_ = fitness_class;
+}
+
+breakpoint *
+breakpoint::get_previous_best()
+{
+ return previous_best_;
+}
+
+void
+breakpoint::set_previous_best(breakpoint *previous)
+{
+ previous_best_ = previous;
+ line_number_ = previous->get_line_number() + 1;
+}
+
+
+/**
+ * Compute adjustement ratio between this breakpoint against the current item,
+ * given the 3 values of total width, stretch and shrink. candidate is used
+ * only to get the penalty.
+ */
+float
+breakpoint::compute_adjust_ratio(int desired_line_length,
+ unsigned int total_width,
+ unsigned int total_stretch,
+ unsigned int total_shrink,
+ item *candidate)
+{
+ float ratio = 0;
+ int line_length;
+ unsigned int line_stretch;
+ unsigned int line_shrink;
+
+ if (candidate == NULL) {
+ breakpoint_error("candidate item null");
+ return -1;
+ }
+
+ line_length = total_width - get_total_width_after();
+#if TRACE_BREAKPOINT >= LEVEL_DEBUG
+ char tmp[256];
+ char tmp1[256];
+ sprint(tmp);
+ //TODO: penalties should be printed directly
+ candidate->get_previous_item()->sprint_word(tmp1);
+ breakpoint_debug("From active breakpoint after '%s' to candidate after '%s'",
+ tmp, tmp1);
+ breakpoint_debug(" previous total width: %u",
+ get_total_width_after());
+ breakpoint_debug(" line_length: %d",
+ line_length);
+#endif
+
+/* If the candidate break is a penalty item, its width should be added (think of
+ * an optional hyphen) */
+ if (candidate->get_penalty() > 0) {
+ line_length += candidate->get_width();
+ }
+
+ if (line_length < desired_line_length) {
+ line_stretch = total_stretch - get_total_stretch_after();
+ breakpoint_debug(" line_stretch %u", line_stretch);
+ if (line_stretch > 0)
+ ratio = ((float)desired_line_length
+ - (float)line_length) / (float)line_stretch;
+ else
+ ratio = FLT_MAX;
+ } else if (line_length > desired_line_length) {
+ line_shrink = total_shrink - get_total_shrink_after();
+ breakpoint_debug(" line_shrink %u", line_shrink);
+ if (line_shrink > 0)
+ ratio = ((float)desired_line_length
+ - (float)line_length) / (float)line_shrink;
+ else
+ ratio = FLT_MIN;
+ }
+
+ if (ratio >= PLUS_INFINITY)
+ breakpoint_debug(" ratio: infinity");
+ else
+ breakpoint_debug(" ratio: %.3f", ratio);
+
+ return ratio;
+}
+
+float
+breakpoint::compute_badness(float adjust_ratio)
+{
+ float badness;
+
+ if (adjust_ratio < -1)
+ badness = FLT_MAX;
+ else
+ badness = 100 * fabsf(pow(adjust_ratio, 3));
+
+ breakpoint_debug("badness %.3f", badness);
+
+ return badness;
+}
+
+unsigned int
+breakpoint::compute_demerits(float badness,
+ item *candidate,
+ bool use_old_demerits_formula)
+{
+ unsigned int demerits;
+ int penalty;
+ int additional_penalty;
+
+ if (break_item_
+ && break_item_->is_flagged_penalty()
+ && candidate->is_flagged_penalty())
+ additional_penalty = PLUS_INFINITY; //TODO maybe other value;
+ else
+ additional_penalty = 0;
+
+ penalty = candidate->get_penalty();
+
+ /* Badness is always positive so we can simply convert float to int by adding
+ * 0.5 */
+ if (penalty >= 0) {
+ if (use_old_demerits_formula) {
+ demerits = pow(1 + (unsigned int)(badness + 0.5) + penalty, 2)
+ + additional_penalty;
+ }
+ else {
+ demerits = pow(1 + (unsigned int)(badness + 0.5), 2)
+ + pow(penalty, 2)
+ + additional_penalty;
+ }
+ } else if (penalty <= MINUS_INFINITY) {
+ demerits = pow(1 + (unsigned int)(badness + 0.5), 2) + additional_penalty;
+ } else {
+ demerits =
+ powf(1 + (unsigned int)(badness + 0.5), 2)
+ - pow(penalty, 2)
+ + additional_penalty;
+ }
+
+ breakpoint_debug("badness %.3f penalty %d demerits %u",
+ badness, penalty, demerits);
+ return demerits;
+}
+
+
+unsigned int
+breakpoint::compute_adj_extra_demerits(fitness_class_t candidate_fitness)
+{
+ unsigned int extra_demerits = 0;
+
+ switch (candidate_fitness) {
+ case FITNESS_CLASS_TIGHT:
+ if (fitness_class_ >= FITNESS_CLASS_LOOSE)
+ extra_demerits = PARAGRAPH_DEFAULT_NON_ADJACENT_FITNESS_DEMERITS;
+ break;
+ case FITNESS_CLASS_NORMAL:
+ if (fitness_class_ >= FITNESS_CLASS_VERY_LOOSE)
+ extra_demerits = PARAGRAPH_DEFAULT_NON_ADJACENT_FITNESS_DEMERITS;
+ break;
+ case FITNESS_CLASS_LOOSE:
+ if (fitness_class_ == FITNESS_CLASS_TIGHT)
+ extra_demerits = PARAGRAPH_DEFAULT_NON_ADJACENT_FITNESS_DEMERITS;
+ break;
+ case FITNESS_CLASS_VERY_LOOSE:
+ if (fitness_class_ <= FITNESS_CLASS_NORMAL)
+ extra_demerits = PARAGRAPH_DEFAULT_NON_ADJACENT_FITNESS_DEMERITS;
+ default:
+ break; //NOTE: intial breakpoint has a FITNESS_CLASS of MAX,
+ }
+
+ return extra_demerits;
+}
+
+
+fitness_class_t
+breakpoint::compute_fitness_class(float adjust_ratio)
+{
+ fitness_class_t fitness_class;
+
+ if (adjust_ratio < (float) -0.5)
+ fitness_class = FITNESS_CLASS_TIGHT;
+ else if (adjust_ratio <= (float) 0.5)
+ fitness_class = FITNESS_CLASS_NORMAL;
+ else if (adjust_ratio <= (float) 1)
+ fitness_class = FITNESS_CLASS_LOOSE;
+ else
+ fitness_class = FITNESS_CLASS_VERY_LOOSE;
+
+ return fitness_class;
+}
+
+int
+breakpoint::sprint(char *str)
+{
+ item *box;
+ int res;
+
+ if (break_item_ != NULL)
+ {
+ box = get_previous_box();
+ res = box->sprint_word(str);
+ if (break_item_->is_glue() == false) {
+ char tmp[32];
+ sprintf(tmp, " (penalty: %d)", break_item_->get_penalty());
+ strcat(str, tmp);
+ }
+ } else {
+ res = sprintf(str, "initial breakpoint");
+ }
+
+ return res;
+}
+
+
+const char *
+breakpoint::print_breakpoint_info()
+{
+ item *box;
+ item *previous_breakpoint_box;
+ char tmp[256] = "";
+ char tmp1[256] = "";
+
+ if (sz_print_ == NULL) {
+ sz_print_ = (char *)calloc(512, sizeof(char));
+
+ if (break_item_ != NULL)
+ {
+ box = get_previous_box();
+ box->sprint_word(tmp);
+ } else {
+ sprintf(tmp, "initial breakpoint");
+ }
+
+ if (previous_best_ != NULL) {
+ previous_breakpoint_box = previous_best_->get_previous_box();
+ if (previous_breakpoint_box != NULL)
+ previous_breakpoint_box->sprint_word(tmp1);
+ else
+ sprintf(tmp1, "initial breakpoint");
+ snprintf(sz_print_, 511, "From '%s' to '%s'\n"
+ " line: %d\n"
+ " ratio: %.3f\n"
+ " total_demerits: %u\n"
+ " fitness class: %d\n",
+ tmp1, tmp,
+ line_number_,
+ adjust_ratio_,
+ total_demerits_,
+ fitness_class_);
+ } else {
+ sprintf(sz_print_, "Initial breakpoint\n");
+ }
+ }
+
+ return sz_print_;
+}
+
+/**
+ * Class paragraph
+ */
+paragraph::paragraph()
+{
+ breakpoint *b;
+
+ error_item_ = NULL;
+ tolerance_ = PARAGRAPH_DEFAULT_TOLERANCE;
+ line_length_ = PARAGRAPH_DEFAULT_LINE_WIDTH;
+ use_old_demerits_formula_ = false;
+ use_fitness_class_ = true;
+ hyphenation_penalty_ = PARAGRAPH_DEFAULT_HYPHENATION_PENALTY;
+ INIT_LIST_HEAD(&item_list_head_, NULL);
+ INIT_LIST_HEAD(&active_breaks_list_head_, NULL);
+ INIT_LIST_HEAD(&passive_breaks_list_head_, NULL);
+ array_best_breaks_ = NULL;
+ /* Add the 1st breakpoint */
+ b = new breakpoint(NULL, 0, 0, 0);
+ list_add_tail(&b->list_, &active_breaks_list_head_);
+}
+
+paragraph::~paragraph()
+{
+ item *pos, *n;
+ breakpoint *break_pos, *break_n;
+ /* Walk through the list and remove and delete 1 by 1 all items */
+ list_for_each_entry_safe(pos, n, &item_list_head_, list_, item) {
+ list_del_init(&pos->list_);
+ delete pos;
+ }
+
+ list_for_each_entry_safe(
+ break_pos, break_n, &passive_breaks_list_head_, list_, breakpoint) {
+ list_del_init(&break_pos->list_);
+ delete break_pos;
+ }
+
+ list_for_each_entry_safe(
+ break_pos, break_n, &active_breaks_list_head_, list_, breakpoint) {
+ list_del_init(&break_pos->list_);
+ delete break_pos;
+ }
+
+ if (array_best_breaks_)
+ free(array_best_breaks_);
+}
+
+void
+paragraph::config_use_old_demerits_formula()
+{
+ use_old_demerits_formula_ = true;
+}
+
+void
+paragraph::config_no_fitness_class()
+{
+ use_fitness_class_ = false;
+}
+
+void
+paragraph::config_hyphenation_penalty(unsigned int value)
+{
+ hyphenation_penalty_ = value;
+}
+
+void
+paragraph::add_box(paragraph_word *word)
+{
+ box_item *b;
+
+ b = new box_item(word);
+ list_add_tail(&b->list_, &item_list_head_);
+}
+
+void
+paragraph::add_glue()
+{
+ item *previous;
+ unsigned int width = 0;
+ unsigned int stretchability = 0;
+ unsigned int shrinkability = 0;
+ glue_item *g;
+ previous = list_entry(item_list_head_.prev, item);
+ while (previous->is_box() == false && previous != list_entry(item_list_head_.next, item))
+ previous = previous->get_previous_item();
+ if (previous != NULL) {
+ paragraph_word *word;
+ word = previous->get_word();
+ if (word != NULL)
+ word->get_next_glue_values(&width, &stretchability, &shrinkability);
+ }
+ g = new glue_item(width, stretchability, shrinkability);
+ list_add_tail(&g->list_, &item_list_head_);
+}
+
+void
+paragraph::add_optional_hyphen(paragraph_word *hyphen_sign)
+{
+ //hyphen sign have a width of 6
+ penalty_item *p = new penalty_item(hyphenation_penalty_, true, hyphen_sign);
+ list_add_tail(&p->list_, &item_list_head_);
+}
+
+void
+paragraph::add_explicit_hyphen()
+{
+ penalty_item *p = new penalty_item(hyphenation_penalty_, true);
+ list_add_tail(&p->list_, &item_list_head_);
+}
+
+/*
+ * If the last item is glue, remove it and add the finishing pattern:
+ * - disallowed break: penalty(0, PLUS_INFINITY, 0)
+ * - finishing glue: glue (0, PLUS_INFINITY, 0)
+ * - forced break: penalty(0, MINUS_INFINITY, 0)
+ */
+void
+paragraph::finish()
+{
+ glue_item *last_glue;
+ penalty_item *disallowed_break_penalty = new penalty_item(PLUS_INFINITY, false);
+ glue_item *finishing_glue = new glue_item(0, PLUS_INFINITY, 0);
+ penalty_item *forced_break_penalty = new penalty_item(MINUS_INFINITY, false);
+
+ /* FIXME we assume here that we always finish with glue so we always remove
+ * it. This should be more robust. */
+ last_glue = list_entry(item_list_head_.prev, glue_item);
+ list_del_init(item_list_head_.prev);
+ delete(last_glue);
+ list_add_tail(&disallowed_break_penalty->list_, &item_list_head_);
+ list_add_tail(&finishing_glue->list_, &item_list_head_);
+ list_add_tail(&forced_break_penalty->list_, &item_list_head_);
+}
+
+
+/**
+ * Move a breakpoint from active list to passive list.
+ */
+void
+paragraph::deactivate_breakpoint(breakpoint *active)
+{
+#if TRACE_PARAGRAPH >= LEVEL_DEBUG
+ char str[256];
+ active->sprint(str);
+ paragraph_debug(" deactivating '%s'", str);
+#endif
+
+ list_del_init(&active->list_);
+ list_add_tail(&active->list_, &passive_breaks_list_head_);
+}
+
+void
+paragraph::record_feasible_break(breakpoint *active,
+ breakpoint *candidate)
+{
+#if TRACE_PARAGRAPH >= LEVEL_DEBUG
+ char str[256];
+ active->sprint(str);
+ paragraph_debug(" record feasible break '%s'", str);
+#endif
+
+ candidate->set_previous_best(active);
+ if (list_empty(&candidate->list_))
+ list_add_tail(&candidate->list_, &active_breaks_list_head_);
+}
+
+/*
+ * Format the paragraph with Knuth-Plass algorithm. Algorithm general outline:
+
+ for (all items 'b' of the paragraph) {
+ if (item 'b' is a legal breakpoint) {
+ for (all active breakpoint 'a') {
+ compute the adjustment ratio 'r' from 'a' to 'b'
+ if (r < -1) or (b is a forced break) {
+ deactivate 'a' (a is now too far behind)
+ }
+ if ( -r <= r < tolerance threshold) {
+ record feasible break from 'a' to 'b'
+ comptude demerits and fitness class.
+ }
+ }
+ if ('b' is a feasible breakpoint) {
+ append the best such break as active node (chose the best active node)
+ }
+ }
+ }
+*/
+
+// return 0 if OK
+int
+paragraph::format_knuth_plass(float tolerance,
+ unsigned int line_length)
+{
+ item *k_item;
+ float adjust_ratio;
+ breakpoint *active, *candidate, *n, *initial_breakpoint;
+ unsigned int total_width = 0, total_stretch = 0, total_shrink = 0;
+ float badness;
+ unsigned int demerits;
+ fitness_class_t fitness_class = FITNESS_CLASS_TIGHT;
+ int k;
+ breakpoint **tab_best_previous;
+ unsigned int *tab_total_best_demerits;
+ float *tab_ajust_ratio; // This is merely used for printing
+ int n_fitness_class;
+ unsigned int min_total_best_demerits;
+#if TRACE_PARAGRAPH >= LEVEL_INFO
+ char tmp[128] = "";
+ char tmp1[128] = "";
+#endif
+
+ error_item_ = NULL;
+ if (use_fitness_class_)
+ n_fitness_class = FITNESS_CLASS_MAX;
+ else
+ n_fitness_class = 1;
+ tab_best_previous = (breakpoint **)calloc(n_fitness_class,
+ sizeof(breakpoint *));
+ tab_total_best_demerits = (unsigned int *)calloc(n_fitness_class,
+ sizeof(unsigned int));
+ tab_ajust_ratio = (float *)calloc(n_fitness_class, sizeof(float));
+ tolerance_ = tolerance;
+ line_length_ = line_length;
+
+ /* Walk through all the items of the paragraph */
+ list_for_each_entry(k_item, &item_list_head_, list_, item) {
+#if TRACE_PARAGRAPH >= LEVEL_INFO
+ k_item->sprint_info(tmp);
+ paragraph_debug("Loop: total width %u total strecth %u total shrink %u",
+ total_width, total_stretch, total_shrink);
+ paragraph_debug(" New item: %s", tmp);
+#endif
+ if (k_item->is_legal_breakpoint()) {
+ min_total_best_demerits = PLUS_INFINITY;
+ for (k = 0; k < n_fitness_class; k++) {
+ tab_best_previous[k] = NULL;
+ tab_total_best_demerits[k] = PLUS_INFINITY;
+ tab_ajust_ratio[k] = PLUS_INFINITY;
+ }
+
+ /* Check the candidate breakpoint against each active breakpoint */
+ list_for_each_entry_safe(
+ active, n, &active_breaks_list_head_, list_, breakpoint) {
+ adjust_ratio = active->compute_adjust_ratio(line_length_,
+ total_width,
+ total_stretch,
+ total_shrink,
+ k_item);
+ if (k_item->is_forced_break() || adjust_ratio < -1) {
+ deactivate_breakpoint(active);
+ }
+ if (adjust_ratio >= -1 && adjust_ratio < tolerance_) {
+ /* there is a feasible break, remember of this breakpoint if the total
+ * demerits from the active to the candidate is less than the previous
+ * possible break. */
+ badness = active->compute_badness(adjust_ratio);
+ demerits = active->compute_demerits(badness,
+ k_item,
+ use_old_demerits_formula_);
+ if (use_fitness_class_) {
+ fitness_class = active->compute_fitness_class(adjust_ratio);
+ demerits +=
+ active->compute_adj_extra_demerits(fitness_class);
+ }
+
+#if TRACE_PARAGRAPH >= LEVEL_INFO
+ paragraph_info(" From %s to %s:", tmp1, tmp);
+ paragraph_info(" ratio : %.3f", adjust_ratio);
+ paragraph_info(" badness : %.3f", badness);
+ paragraph_info(" demerits : %u", demerits);
+ paragraph_info(" total_demerits : %u",
+ active->get_total_demerits() + demerits);
+ paragraph_info(" fitness_class: %d", fitness_class);
+#endif
+ if (active->get_total_demerits() + demerits
+ < tab_total_best_demerits[fitness_class]) {
+ // possible break is best than previously
+ tab_best_previous[fitness_class] = active;
+ tab_total_best_demerits[fitness_class] =
+ active->get_total_demerits() + demerits;
+ tab_ajust_ratio[fitness_class] = adjust_ratio;
+ if (tab_total_best_demerits[fitness_class] < min_total_best_demerits)
+ min_total_best_demerits = tab_total_best_demerits[fitness_class];
+ }
+ paragraph_debug(" tab_best_previous[%d]:%p",
+ fitness_class,
+ tab_best_previous[fitness_class]);
+ }
+ }
+
+ if (min_total_best_demerits < PLUS_INFINITY) {
+ for (k = 0;
+ k < n_fitness_class;
+ k++) {
+ paragraph_debug(" tab_total_best_demerits[%d]: %d min_total_best_demerits: %u",
+ k,
+ tab_total_best_demerits[k],
+ min_total_best_demerits);
+ paragraph_debug(" tab_best_previous[%d]:%p",
+ k,
+ tab_best_previous[k]);
+
+ if (tab_total_best_demerits[k] <= min_total_best_demerits
+ + PARAGRAPH_DEFAULT_NON_ADJACENT_FITNESS_DEMERITS) {
+ candidate = new breakpoint(k_item,
+ total_width, total_stretch, total_shrink);
+ candidate->set_total_demerits(tab_total_best_demerits[k]);
+ candidate->set_adjust_ratio(tab_ajust_ratio[k]);
+ candidate->set_fitness_class((fitness_class_t)k);
+ record_feasible_break(tab_best_previous[k],
+ candidate);
+ }
+ }
+ }
+
+ /* No more active breakpoint, leave with error */
+ if (list_empty(&active_breaks_list_head_)) {
+ paragraph_error("Could nor format paragraph");
+ error_item_ = k_item;
+ break;
+ }
+ }
+ /* compute total width */
+ if (k_item->get_penalty() <= 0)
+ total_width += k_item->get_width();
+ total_stretch += k_item->get_stretchability();
+ total_shrink += k_item->get_shrinkability();
+ }
+
+ /* There should be one breakpoint left, the final one; move it to the passive
+ list */
+ list_for_each_entry_safe(
+ active, n, &active_breaks_list_head_, list_, breakpoint) {
+ deactivate_breakpoint(active);
+ }
+
+ /* Create the list of best breakpoints by starting by the end of the passive
+ * list */
+ initial_breakpoint = list_entry(passive_breaks_list_head_.next, breakpoint);
+ n = list_entry(passive_breaks_list_head_.prev, breakpoint);
+ number_lines_ = n->get_line_number();
+ array_best_breaks_ = (breakpoint **)calloc(number_lines_,
+ sizeof(breakpoint *));
+ k = 0;
+ while (n != NULL && n != initial_breakpoint) {
+ array_best_breaks_[number_lines_ - k - 1] = n;
+ n = n->get_previous_best();
+ k++;
+ }
+
+ if (tab_total_best_demerits)
+ free(tab_total_best_demerits);
+ if (tab_best_previous)
+ free(tab_best_previous);
+ if (tab_ajust_ratio)
+ free(tab_ajust_ratio);
+
+ if (error_item_ != NULL)
+ return -1;
+ else
+ return 0;
+}
+
+
+int
+paragraph::write_text(paragraph_writer_interface *pwi, int *number_lines)
+{
+ int res = 0;
+ item *pos;
+ breakpoint *next_feasible_breakpoint;
+ breakpoint *next_best_breakpoint;
+ int k, j_current_line;
+ float ratio;
+ float space_width;
+ paragraph_word *word;
+
+ if (pwi == NULL) {
+ paragraph_error("Incorrect input");
+ res = -1;
+ if (number_lines)
+ *number_lines = 0;
+ goto end;
+ }
+
+ k = 0;
+ j_current_line = 1;
+ next_best_breakpoint = array_best_breaks_[k];
+ /* passive_breaks_list_head_.next is the initial breakpoint */
+ next_feasible_breakpoint = list_entry(passive_breaks_list_head_.next->next,
+ breakpoint);
+ list_for_each_entry(pos, &item_list_head_, list_, item) {
+ /* The item 'pos' correspond to a breakpoint */
+ if (next_best_breakpoint && pos == next_best_breakpoint->get_item()) {
+ word = pos->get_word();
+ if (word != NULL) // case of a hyphen
+ pwi->write_word_cbk(word);
+ pwi->break_here_cbk(j_current_line);
+ j_current_line++;
+ if (k < number_lines_ - 1) {
+ k++;
+ next_best_breakpoint = array_best_breaks_[k];
+ }
+ } else if (pos == error_item_) {
+ /* nothing else can be printed, exit */
+ res = -1;
+ goto end;
+ } else {
+ if (pos->is_box()) { // case of a word
+ word = pos->get_word(); // FIXME maybe
+ /* if (strcmp(word, "") == 0) // a space box used for indentation
+ pwi->write_space_cbk((float)pos->get_width());
+ else*/
+ pwi->write_word_cbk(word);
+ } else if (pos->is_glue()) {
+ /* Case of glue, that is, space: we must compute the space width */
+ if (next_best_breakpoint != NULL) {
+ ratio = next_best_breakpoint->get_adjust_ratio();
+ if (ratio >=0)
+ space_width = pos->get_width() + (pos->get_stretchability() * ratio);
+ else
+ space_width = pos->get_width() - (pos->get_shrinkability() * ratio);
+ pwi->write_space_cbk(space_width);
+ } else {
+ /* case where the format_knuth function failed, we set the space width
+ * to 1 so as this can still be used by the paragraph_printer class */
+ pwi->write_space_cbk(1);
+ }
+ }
+ }
+ }
+
+end:
+ if (number_lines)
+ *number_lines = k + 1;
+
+ return res;
+}
+
+int
+paragraph::get_number_of_lines()
+{
+ return number_lines_;
+}
+
+// return FLT_MAX if line not found
+float
+paragraph::get_adjust_ratio(int line_number)
+{
+ float ratio = FLT_MAX;
+
+ if (line_number <= number_lines_ && line_number >= 0)
+ ratio = array_best_breaks_[line_number - 1]->get_adjust_ratio();
+
+ return ratio;
+}
+
+/* Returns fitness class of a line, or FITNESS_CLASS_MAX if incorrect line was
+ * given as parameter */
+fitness_class_t
+paragraph::get_fitness_class(int line_number)
+{
+ fitness_class_t fitness_class = FITNESS_CLASS_MAX;
+
+ if (line_number <= number_lines_ && line_number >= 0)
+ fitness_class = array_best_breaks_[line_number - 1]->get_fitness_class();
+
+ return fitness_class;
+}
+
+
+unsigned int
+paragraph::get_total_demerits(int line_number)
+{
+ unsigned int total_demerits = PLUS_INFINITY;
+
+ if (line_number <= number_lines_ && line_number >= 0)
+ total_demerits = array_best_breaks_[line_number - 1]->get_total_demerits();
+
+ return total_demerits;
+}
+
+void paragraph::print_breakpoints()
+{
+ breakpoint *pos;
+
+ list_for_each_entry(pos, &passive_breaks_list_head_, list_, breakpoint) {
+ printf("%s", pos->print_breakpoint_info());
+ }
+}
+
+struct breakpoint_mark {
+ struct list_head list_;
+ int offset;
+};
+
+paragraph_printer::paragraph_printer(paragraph *par)
+{
+ par_ = par;
+ current_index_ = 0;
+ size_to_print_ = 0;
+ max_line_length_ = 0;
+ n_lines_ = par_->get_number_of_lines();
+ /* We reserve for 1 more lines than the actual number of lines of the
+ * paragraph: in case format_knuth failed to format the paragraph the last
+ * lines (where the failure arose) would not be counted in the total number of
+ * lines, but we still want to print it.*/
+ tab_lines_ = (char **) calloc(n_lines_ + 1, sizeof(char *));
+ tab_marks_ = (char **) calloc(n_lines_ + 1, sizeof(char *));
+ tab_lines_[current_index_] = (char *) calloc(512, sizeof(char));
+ tab_marks_[current_index_] = (char *) calloc(512, sizeof(char));
+ /* passive_breaks_list_head_.next is the initial breakpoint */
+ next_feasible_breakpoint_ =
+ list_entry(par->passive_breaks_list_head_.next->next, breakpoint);
+}
+
+paragraph_printer::~paragraph_printer()
+{
+ int k;
+ for (k = 0; k < par_->get_number_of_lines() + 1; k++) {
+ free (tab_lines_[k]);
+ free (tab_marks_[k]);
+ }
+ free(tab_lines_);
+ free(tab_marks_);
+}
+
+void
+paragraph_printer::new_line(void)
+{
+ if (current_index_ < par_->get_number_of_lines() - 1) {
+ current_index_++;
+ tab_lines_[current_index_] = (char *) calloc(512, sizeof(char));
+ tab_marks_[current_index_] = (char *) calloc(512, sizeof(char));
+ size_to_print_ = 0;
+ }
+}
+
+void
+paragraph_printer::write_word_cbk(paragraph_word *word)
+{
+ char sz_word[256] = "";
+ int len;
+ int k;
+
+ word->snprint(sz_word, 256);
+ len = strlen(sz_word);
+ strcat(tab_lines_[current_index_], sz_word);
+ for (k = size_to_print_; k < size_to_print_ + len; k++)
+ tab_marks_[current_index_][k] = ' ';
+ size_to_print_ += len;
+ if (next_feasible_breakpoint_
+ && word == next_feasible_breakpoint_->get_previous_box()->get_word()) {
+ tab_marks_[current_index_][k - 1] = '^';
+ next_feasible_breakpoint_ =
+ list_entry(next_feasible_breakpoint_->list_.next, breakpoint);
+ }
+}
+
+int
+paragraph_printer::write_space_cbk(float space_width)
+{
+ paragraph_debug("space: %.3f", space_width);
+ strcat(tab_lines_[current_index_], " ");
+ tab_marks_[current_index_][size_to_print_] = ' ';
+ size_to_print_++;
+
+ return 0;
+}
+
+int
+paragraph_printer::break_here_cbk(int line_number)
+{
+ if (size_to_print_ > max_line_length_)
+ max_line_length_ = size_to_print_;
+ new_line();
+
+ return 0;
+}
+
+/* Works properly with ascii text only */
+int
+paragraph_printer::print()
+{
+ int first_column_width;
+ int k, res;
+ int column1, column2, column3;
+ int number_lines = 0;
+
+ res = par_->write_text(this, &number_lines);
+ first_column_width = printf("Number of lines: %d",
+ par_->get_number_of_lines());
+ first_column_width += printf("%*s",
+ max_line_length_ + 5 - first_column_width, "|");
+ column1 = printf(" adj. ratio |");
+ column2 = printf(" total demerits |");
+ column3 = printf(" fitness class |");
+ printf("\n\n");
+
+ for (k = 0; k < number_lines; k++) {
+ printf("%-*s", first_column_width, tab_lines_[k]);
+ printf("%*.3f ",
+ column1 - 2,
+ par_->array_best_breaks_[k]->get_adjust_ratio());
+ printf("%*u ",
+ column2 - 2,
+ par_->array_best_breaks_[k]->get_total_demerits());
+ printf("%*u \n",
+ column3 - 2,
+ par_->array_best_breaks_[k]->get_fitness_class());
+ printf("%s\n", tab_marks_[k]);
+ }
+
+ /* if write_text failed we must print the very last line that is not part of
+ * the lines of formatted the paragraph */
+ if (res != 0) {
+ printf("\nCould not finish the formatting after line:\n\n");
+ printf("%s\n", tab_lines_[number_lines]);
+ }
+ return 0;
+}
diff --git a/src/roff/troff/paragraph.h b/src/roff/troff/paragraph.h
new file mode 100644
index 000000000..ddf5f2fe7
--- /dev/null
+++ b/src/roff/troff/paragraph.h
@@ -0,0 +1,107 @@
+// -*- C++ -*-
+
+/* Copyright (C) 2016- Free Software Foundation, Inc.
+ Written by Bertrand Garrigues (bertrand.garrigues@laposte.net)
+
+This file is part of groff.
+
+groff is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+groff is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "list.h"
+#include "paragraph_word.h"
+
+class breakpoint;
+
+/**
+ * Class paragraph
+ */
+class paragraph_writer_interface {
+public:
+ virtual void write_word_cbk(paragraph_word *word) { word->write(); };
+ virtual int write_space_cbk(float space_width) = 0;
+ virtual int break_here_cbk(int line_number) = 0; //TODO : would be
+ //more useful to
+ //return the last
+ //word
+};
+
+class paragraph {
+ friend class test_paragraph;
+ friend class paragraph_printer;
+ float tolerance_;
+ unsigned int line_length_;
+ // Only for test, using an original example from Knuth/Plass.
+ bool use_old_demerits_formula_;
+ bool use_fitness_class_;
+ unsigned int hyphenation_penalty_;
+ void deactivate_breakpoint(breakpoint *active);
+ void record_feasible_break(breakpoint *active,
+ breakpoint *candidate);
+ // Head of list of all the items of the paragraph
+ struct list_head item_list_head_;
+ // Head of lists of breakpoints
+ struct list_head active_breaks_list_head_;
+ struct list_head passive_breaks_list_head_;
+ breakpoint **array_best_breaks_;
+ int number_lines_;
+ item *error_item_; // last item before exiting in error
+
+public:
+ paragraph();
+ ~paragraph();
+ // Configuration
+ void config_use_old_demerits_formula();
+ void config_no_fitness_class();
+ void config_hyphenation_penalty(unsigned int value);
+
+ // To build the paragraph
+ // TODO: enable to add a custom item
+ void add_box(paragraph_word *word);
+ void add_glue();
+ void add_optional_hyphen(paragraph_word *hyphen_sign);
+ void add_explicit_hyphen();
+ void finish();
+ int
+ format_knuth_plass(float tolerance = PARAGRAPH_DEFAULT_TOLERANCE,
+ unsigned int line_length = PARAGRAPH_DEFAULT_LINE_WIDTH);
+ int write_text(paragraph_writer_interface *pwi, int *number_lines);
+ // For stats
+ int get_number_of_lines();
+ float get_adjust_ratio(int line_number); // for stats
+ unsigned int get_total_demerits(int line_number);
+ fitness_class_t get_fitness_class(int line_number);
+ void print_breakpoints(); // for debug only
+};
+
+/* A simple helper class to print a paragraph with the main information and
+ * feasible breakpoints */
+class paragraph_printer : public paragraph_writer_interface {
+ paragraph *par_;
+ char **tab_lines_;
+ int n_lines_;
+ char **tab_marks_;
+ int size_to_print_;
+ int max_line_length_;
+ int current_index_;
+ breakpoint *next_feasible_breakpoint_;
+ void new_line(void);
+
+public:
+ paragraph_printer(paragraph *par);
+ ~paragraph_printer();
+ void write_word_cbk(paragraph_word *word);
+ int write_space_cbk(float space_width);
+ int break_here_cbk(int line_number);
+ int print();
+};
diff --git a/src/roff/troff/paragraph_l.h b/src/roff/troff/paragraph_l.h
new file mode 100644
index 000000000..5eb98b2ee
--- /dev/null
+++ b/src/roff/troff/paragraph_l.h
@@ -0,0 +1,208 @@
+// -*- C++ -*-
+
+/* Copyright (C) 2016- Free Software Foundation, Inc.
+ Written by Bertrand Garrigues (bertrand.garrigues@laposte.net)
+
+This file is part of groff.
+
+groff is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation, either version 3 of othe License, or
+(at your option) any later version.
+
+groff is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <stdbool.h>
+#include <limits.h>
+#include "list.h"
+#include "paragraph_word.h"
+
+#define PARAGRAPH_DEFAULT_TOLERANCE 1
+#define PARAGRAPH_DEFAULT_HYPHENATION_PENALTY 50
+#define PARAGRAPH_DEFAULT_LINE_WIDTH 500
+#define PARAGRAPH_DEFAULT_NON_ADJACENT_FITNESS_DEMERITS 10000
+
+/* Classes local to paragraph.cpp */
+
+/**
+ * Abstract Class item
+ * An item is either:
+ * - a box_item (contains a word).
+ * - a glue item (space between words).
+ * - a penalty item (used to put extra charges on less desirable way to break
+ * the paragraph).
+ */
+class item {
+
+protected:
+ paragraph_word *word_;
+ unsigned int width_;
+ unsigned int stretchability_;
+ unsigned int shrinkability_;
+ int penalty_;
+ bool flagged_penalty_;
+ bool is_breakpoint_;
+
+public:
+ struct list_head list_;
+ item();
+ virtual ~item();
+ unsigned int get_width();
+ unsigned int get_stretchability();
+ unsigned int get_shrinkability();
+ int get_penalty();
+ item *get_previous_item();
+ paragraph_word *get_word();
+ int sprint_word(char *str);
+ virtual bool is_box() = 0;
+ virtual bool is_glue() = 0;
+ virtual bool is_legal_breakpoint() = 0;
+ virtual bool is_forced_break() = 0;
+ virtual bool is_flagged_penalty() = 0;
+ virtual int sprint_info(char *str) = 0;
+};
+
+/**
+ * Class box_item
+ *
+ * A box contains a word and is not a legal breakpoint.
+ */
+class box_item : public item {
+
+public:
+ box_item(paragraph_word *word);
+ ~box_item();
+ bool is_box();
+ bool is_glue();
+ bool is_legal_breakpoint();
+ bool is_forced_break();
+ bool is_flagged_penalty();
+ int sprint_info(char *str);
+};
+
+/**
+ * Class glue_item
+ *
+ * The glue is the space between words. It is a legal breakpoint if the
+ * previous item is a box.
+ */
+class glue_item : public item {
+public:
+ glue_item (unsigned int width, unsigned int stretchability, unsigned int shrinkability);
+ ~glue_item();
+ bool is_box();
+ bool is_glue();
+ bool is_legal_breakpoint();
+ bool is_forced_break();
+ bool is_flagged_penalty();
+ int sprint_info(char *str);
+};
+
+/**
+ * Class penalty_item
+ *
+ * Penalties can have special values:
+ * - minus infinity: it is a legaal and forced break point.
+ * - plus infinity: it is a disabled (forbidden) break point.
+ * Other values constitute a legal break point.
+ */
+class penalty_item : public item {
+ paragraph_word *optional_word_;
+public:
+ penalty_item(int penalty,
+ bool flagged_penalty,
+ paragraph_word *optional_word);
+ ~penalty_item();
+ unsigned int get_width();
+ bool is_box();
+ bool is_glue();
+ bool is_legal_breakpoint();
+ bool is_forced_break();
+ bool is_flagged_penalty();
+ int sprint_info(char *str);
+};
+
+/**
+ * Class breakpoint
+ *
+ * A breakpoint:
+ * - points to an item (the place where to break)
+ * - points to the previous best breakpoint.
+ * - Stores the the total width, stretch, shrink from the start of the
+ * paragraph.
+ */
+
+/* Fitness class is used to avoid having, for example, a tight line
+ * following a loose line.*/
+typedef enum {
+ FITNESS_CLASS_TIGHT,
+ FITNESS_CLASS_NORMAL,
+ FITNESS_CLASS_LOOSE,
+ FITNESS_CLASS_VERY_LOOSE,
+ FITNESS_CLASS_MAX
+} fitness_class_t;
+
+class breakpoint {
+ int line_number_;
+ float adjust_ratio_;
+ fitness_class_t fitness_class_;
+ unsigned int total_width_;
+ unsigned int total_stretch_;
+ unsigned int total_shrink_;
+ unsigned int total_demerits_;
+ item *break_item_;
+ breakpoint *previous_best_;
+ char *sz_print_;
+
+protected:
+ /* simple getters */
+ unsigned int get_total_width();
+ unsigned int get_total_stretch();
+ unsigned int get_total_shrink();
+ /* add the glue to the total */
+ unsigned int get_total_width_after();
+ unsigned int get_total_stretch_after();
+ unsigned int get_total_shrink_after();
+
+public:
+ struct list_head list_;
+ breakpoint(item *break_item,
+ unsigned int total_width,
+ unsigned int total_stretch,
+ unsigned int total_shrink);
+ ~breakpoint();
+ /* simple getters and setters */
+ item *get_item();
+ item *get_previous_box();
+ int get_line_number();
+ float get_adjust_ratio();
+ void set_adjust_ratio(float adjust_ratio);
+ unsigned int get_total_demerits();
+ void set_total_demerits(unsigned int total_demerits);
+ fitness_class_t get_fitness_class();
+ void set_fitness_class(fitness_class_t fitness_class);
+ breakpoint *get_previous_best();
+ void set_previous_best(breakpoint *previous);
+
+ /* compute various values */
+ float compute_adjust_ratio(int desired_line_length,
+ unsigned int total_width,
+ unsigned int total_stretch,
+ unsigned int total_shrink,
+ item *candidate);
+ float compute_badness(float adjust_ratio);
+ unsigned int compute_demerits(float badness,
+ item *candidate,
+ bool use_old_demerits_formula = false);
+ unsigned int compute_adj_extra_demerits(fitness_class_t candidate_fitness);
+ fitness_class_t compute_fitness_class(float adjust_ratio);
+
+ int sprint(char *str);
+ const char *print_breakpoint_info();
+};
diff --git a/src/roff/troff/paragraph_word.h b/src/roff/troff/paragraph_word.h
new file mode 100644
index 000000000..91e2e2ad8
--- /dev/null
+++ b/src/roff/troff/paragraph_word.h
@@ -0,0 +1,36 @@
+// -*- C++ -*-
+
+/* Copyright (C) 2016- Free Software Foundation, Inc.
+ Written by Bertrand Garrigues (bertrand.garrigues@laposte.net)
+
+This file is part of groff.
+
+groff is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+groff is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef PARAGRAPH_WORD_H
+#define PARAGRAPH_WORD_H
+
+class paragraph_word {
+public:
+ virtual ~paragraph_word() {};
+ virtual unsigned int get_width() = 0;
+ virtual int get_next_glue_values(unsigned int *width,
+ unsigned int *stretchability,
+ unsigned int *shrinkability) = 0;
+ virtual void write() = 0;
+ // for debug only
+ virtual int snprint(char *str, size_t size) { return 0; };
+};
+
+#endif
diff --git a/test/test.am b/test/test.am
index 1bf5e392c..7fec26cd5 100644
--- a/test/test.am
+++ b/test/test.am
@@ -23,3 +23,12 @@ utest_list_SOURCES = test/utest_list.cpp
utest_list_CXXFLAGS = $(CPPUNIT_CFLAGS) -I$(top_srcdir)/src/roff/troff
utest_list_LDFLAGS = $(CPPUNIT_LIBS)
endif
+
+TESTS += test_paragraph
+check_PROGRAMS += test_paragraph
+test_paragraph_SOURCES = \
+ src/roff/troff/paragraph.cpp \
+ test/test_paragraph.cpp
+
+test_paragraph_CXXFLAGS = -I$(top_srcdir)/src/roff/troff
+
diff --git a/test/test_paragraph.cpp b/test/test_paragraph.cpp
new file mode 100644
index 000000000..2736c44fb
--- /dev/null
+++ b/test/test_paragraph.cpp
@@ -0,0 +1,971 @@
+/* Copyright (C) 2016- Free Software Foundation, Inc.
+ Written by Bertrand Garrigues (bertrand.garrigues@laposte.net)
+
+This file is part of groff.
+
+groff is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+groff is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <unistd.h>
+#include <ctype.h>
+
+#include "paragraph_l.h"
+#include "paragraph.h"
+
+#define TRACE_ALIGN_LENGTH 40
+#include "trace.h"
+trace_define_category(item, LEVEL_ERROR);
+trace_define_category(breakpoint, LEVEL_ERROR);
+trace_define_category(paragraph, LEVEL_ERROR);
+
+#define TEST_FAIL 1
+#define TEST_SUCCESS 0
+#define test_assert(expression, fmt, args...) \
+ (((expression) == 0) ? \
+ printf(" * FAIL [%s:%d]: " fmt "\n", \
+ __FUNCTION__, __LINE__, ##args ), TEST_FAIL : TEST_SUCCESS)
+
+/* A simple implementation of paragraph_word, for ascii only. */
+class ascii_paragraph_word : public paragraph_word {
+private:
+ /* Length of each letter as set in Knuth's original example in his
+ * article "Breaking Paragraph Into Lines" */
+ static unsigned int char_len_[256];
+ static void init_char_len();
+ static bool is_class_initialized_;
+ char *sz_word_;
+ unsigned int width_;
+public:
+ // width_ is set by the strlen of sz_word
+ ascii_paragraph_word(const char *sz_word);
+ ~ascii_paragraph_word();
+ // overwrite the width calculated by the constructor, this is useful
+ // to create a box for indentation (which are empty but with a width
+ // > 0
+ void set_width(unsigned int width);
+ unsigned int get_width();
+ int get_next_glue_values(unsigned int *width,
+ unsigned int *stretchability,
+ unsigned int *shrinkability);
+ int snprint(char *str, size_t size);
+ void write();
+};
+
+unsigned int ascii_paragraph_word::char_len_[256];
+bool ascii_paragraph_word::is_class_initialized_;
+
+void
+ascii_paragraph_word::init_char_len()
+{
+ int k;
+ // Unknown characters width is set to 10
+ for (k = 0; k < 256; k++)
+ char_len_[k] = 10;
+ char_len_[' '] = 0; // we set the space width to 0 here to be able to quickly
+ // compute the length of a line without having to deal with
+ // the last space
+ char_len_['a'] = 9;
+ char_len_['b'] = 10;
+ char_len_['c'] = 8;
+ char_len_['d'] = 10;
+ char_len_['e'] = 8;
+ char_len_['f'] = 6;
+ char_len_['g'] = 9;
+ char_len_['h'] = 10;
+ char_len_['i'] = 5;
+ char_len_['j'] = 6;
+ char_len_['k'] = 10;
+ char_len_['l'] = 5;
+ char_len_['m'] = 15;
+ char_len_['n'] = 10;
+ char_len_['o'] = 9;
+ char_len_['p'] = 10;
+ char_len_['q'] = 10;
+ char_len_['r'] = 7;
+ char_len_['s'] = 7;
+ char_len_['t'] = 7;
+ char_len_['u'] = 10;
+ char_len_['v'] = 9;
+ char_len_['w'] = 13;
+ char_len_['x'] = 10;
+ char_len_['y'] = 10;
+ char_len_['z'] = 8;
+ char_len_['C'] = 13;
+ char_len_['I'] = 6;
+
+ char_len_['-'] = 6;
+ char_len_[','] = 5;
+ char_len_[';'] = 5;
+ char_len_['.'] = 5;
+ char_len_['\''] = 5;
+
+ is_class_initialized_ = true ;
+}
+
+ascii_paragraph_word::ascii_paragraph_word(const char *sz_word)
+{
+ if (is_class_initialized_ == false)
+ init_char_len();
+
+ width_ = 0;
+ if (sz_word) {
+ size_t len;
+ int k = 0;
+ len = strlen(sz_word);
+ sz_word_ = (char *)malloc(len + 1);
+ strcpy(sz_word_, sz_word);
+ while (sz_word_[k] != '\0') {
+ if (sz_word_[k] >= 0)
+ width_ += char_len_[(unsigned char) sz_word_[k]];
+ k++;
+ }
+ }
+}
+
+ascii_paragraph_word::~ascii_paragraph_word()
+{
+ if (sz_word_)
+ free(sz_word_);
+}
+
+void
+ascii_paragraph_word::set_width(unsigned int width)
+{
+ width_ = width;
+}
+
+unsigned int
+ascii_paragraph_word::get_width()
+{
+ return width_;
+}
+
+int
+ascii_paragraph_word::get_next_glue_values(unsigned int *width,
+ unsigned int *stretchability,
+ unsigned int *shrinkability)
+{
+ size_t len;
+ char last_character;
+
+ if (sz_word_) {
+ len = strlen(sz_word_);
+ last_character = sz_word_[len - 1];
+ } else {
+ last_character = 0;
+ }
+ switch (last_character) {
+ case ',':
+ *width = 6;
+ *stretchability = 4;
+ *shrinkability = 2;
+ break;
+ case ';':
+ *width = 6;
+ *stretchability = 4;
+ *shrinkability = 1;
+ break;
+ case '.':
+ *width = 8;
+ *stretchability = 6;
+ *shrinkability = 1;
+ break;
+ default:
+ *width = 6;
+ *stretchability = 3;
+ *shrinkability = 2;
+ break;
+ }
+}
+
+int
+ascii_paragraph_word::snprint(char *str, size_t size)
+{
+ return snprintf(str, size, "%s", sz_word_);
+}
+
+void
+ascii_paragraph_word::write()
+{
+ printf("%s", sz_word_);
+}
+
+/* Return -1 if the word cannot be hyphenate. If it can, return the position in
+ the word were the hyphenation can be done. We just take all the words in the
+ example paragraph that can be hyphenated. */
+typedef enum {
+ NO_HYPHEN,
+ EXPLICIT_HYPHEN,
+ OPTIONAL_HYPHEN
+} hyphen_type_t;
+
+class text_loader {
+ char *text_; // Complete paragraph text in one string
+ hyphen_type_t simulate_hyphenate (const char *word,
+ unsigned int *first_part_len);
+public:
+ text_loader(char *text, const char *path = NULL);
+ ~text_loader();
+ void process_text(paragraph *par, bool with_indentation);
+};
+
+text_loader::text_loader(char *text, const char *path)
+{
+ if (text) {
+ text_ = (char *)calloc(strlen(text) +1, sizeof (char));
+ strcpy(text_, text);
+ } else if (path) {
+ FILE *fp;
+ char *c;
+ size_t text_size;
+ struct stat buf;
+
+ fp = fopen (path, "r");
+ if (fp == NULL) {
+ printf("Error:%s\n", strerror(errno));
+ text_ = NULL;
+ } else {
+ stat(path, &buf);
+ text_ = (char *) calloc((buf.st_size) + 1, sizeof(char));
+ text_size = fread(text_, 1, buf.st_size, fp);
+ for (c = text_; c < text_ + text_size; c++) {
+ if (*c == '\n')
+ *c = ' ';
+ }
+ fclose(fp);
+ }
+ }
+}
+
+text_loader::~text_loader()
+{
+ if (text_)
+ free(text_);
+}
+
+hyphen_type_t
+text_loader::simulate_hyphenate (const char *word, unsigned int *first_part_len)
+{
+ hyphen_type_t ret = OPTIONAL_HYPHEN;
+
+ if (word == NULL || first_part_len == NULL)
+ goto end;
+
+ if (strncmp(word, "lime-tree", 9) == 0) {
+ *first_part_len = 5;
+ ret = EXPLICIT_HYPHEN;
+ goto end;
+ }
+
+ if (strncmp(word, "wishing", 7) == 0)
+ *first_part_len = 4;
+ else if (strncmp(word, "daughters", 9) == 0)
+ *first_part_len = 5;
+ /* for the sake of simplicity we don't consider the break after beauti */
+ else if (strncmp(word, "beautiful", 9) == 0)
+ *first_part_len = 4;
+ else if (strncmp(word, "youngest", 8) == 0)
+ *first_part_len = 5;
+ else if (strncmp(word, "itself", 6) == 0)
+ *first_part_len = 2;
+ else if (strncmp(word, "astonished", 10) == 0)
+ *first_part_len = 5;
+ else if (strncmp(word, "whenever", 8) == 0)
+ *first_part_len = 4;
+ else if (strncmp(word, "forest", 6) == 0)
+ *first_part_len = 3;
+ else if (strncmp(word, "under", 5) == 0)
+ *first_part_len = 2;
+ else if (strncmp(word, "lime-tree", 9) == 0)
+ *first_part_len = 5;
+ else if (strncmp(word, "fountain", 8) == 0)
+ *first_part_len = 4;
+ else if (strncmp(word, "favorite", 8) == 0)
+ *first_part_len = 5;
+ else if (strncmp(word, "plaything", 9) == 0)
+ *first_part_len = 4;
+ else if (strncmp(word, "hyphenationtest", 15) == 0)
+ *first_part_len = 11;
+ else
+ ret = NO_HYPHEN;
+
+end:
+ return ret;
+}
+
+void
+text_loader::process_text(paragraph *par, bool with_indentation)
+{
+ char *cur, *tmp;
+ ascii_paragraph_word *cur_word, *indentation;
+ unsigned int width;
+ hyphen_type_t type;
+ unsigned int hyphenate = 0;
+ char previous_char;
+ char *tmp_text;
+
+ /* Add indentation width 18 */
+ if (with_indentation) {
+ indentation = new ascii_paragraph_word(" ");
+ indentation->set_width(18);
+ par->add_box(indentation);
+ }
+
+ /* Build the paragraph: for each word of 'text' we check if there is an
+ * explicit hyphen (here only the word "lime-tree"), otherwise we add an
+ * optional hyphen, and add the corresponding items. For example 'whenever'
+ * has an optional hyphen (when-ever), so we:
+ * - add a box of width 4 ('when').
+ * - add an optional hyphen, which is a penalty item (penalty added only if
+ the hyphen is chosen).
+ * - add a box of width 4 ('ever').
+ */
+ tmp_text = (char *) calloc(strlen(text_) + 1, sizeof(char));
+ strcpy(tmp_text, text_);
+ cur = strtok(tmp_text, " ");
+ while (cur != NULL) {
+ type = simulate_hyphenate(cur, &hyphenate);
+ if (type != NO_HYPHEN) {
+ tmp = (char *) malloc(hyphenate + 1);
+ strncpy(tmp, cur, hyphenate);
+ tmp[hyphenate] = '\0';
+ cur_word = new ascii_paragraph_word(tmp);
+ par->add_box(cur_word);
+ free(tmp);
+ if (type == EXPLICIT_HYPHEN) {
+ par->add_explicit_hyphen();
+ } else {
+ ascii_paragraph_word *hyphen_sign = new ascii_paragraph_word("-");
+ par->add_optional_hyphen(hyphen_sign);
+ }
+ cur_word = new ascii_paragraph_word(cur + hyphenate);
+ par->add_box(cur_word);
+ par->add_glue();
+ } else {
+ cur_word = new ascii_paragraph_word(cur);
+ par->add_box(cur_word);
+ par->add_glue();
+ }
+ cur = strtok(NULL, " ");
+ }
+ /* Add final glue */
+ par->finish();
+ free(tmp_text);
+}
+
+struct expected_break_info {
+ const char word[64];
+ unsigned int demerit;
+ unsigned int total_demerits;
+};
+
+
+#define PRINT_RESULT(n_failures) \
+ do { \
+ if (n_failures == 0) \
+ printf("-- Test %s PASSED\n\n", __FUNCTION__); \
+ else \
+ fprintf(stderr, "** Test %s FAILED, %d failures\n", __FUNCTION__, n_failures); \
+ } while(0)
+
+class test_paragraph {
+
+private:
+ text_loader *text_loader_;
+ int check_all_breakpoint(struct expected_break_info tab_expected[],
+ paragraph *par);
+ int check_best_breakpoint(struct expected_break_info tab_expected[],
+ paragraph *par);
+ int check_best_breakpoint(const char*tab_expected[], paragraph *par);
+public:
+ test_paragraph();
+ ~test_paragraph();
+ void suite1_init();
+ int test11_original_example();
+ int test12_example_with_default_demerits_formula();
+ int test13_example_with_larger_tolerance();
+
+ void suite2_init();
+ int test21_hyphen_flagged_penalty();
+
+ void suite3_init();
+ int test31_fitness_class();
+};
+
+test_paragraph::test_paragraph()
+{
+ text_loader_ = NULL;
+}
+
+/* Check all breakpoints list, we start at passive_breaks_list_head_.next
+ * because the first breakpoint is the initial breakpoint */
+int
+test_paragraph::check_all_breakpoint(struct expected_break_info tab_expected[],
+ paragraph *par)
+{
+ int res = 0;
+ breakpoint *pos;
+ unsigned int total_demerits;
+ char word[256];
+ int k = 0;
+ item *i;
+
+ list_for_each_entry(pos,
+ par->passive_breaks_list_head_.next,
+ list_,
+ breakpoint) {
+ total_demerits = pos->get_total_demerits();
+ res += test_assert(tab_expected[k].total_demerits == total_demerits,
+ "total demerits: expected: %u, actual: %u",
+ tab_expected[k].total_demerits,
+ total_demerits);
+ i = pos->get_previous_box();
+ if (i != NULL) {
+ paragraph_word *pw = i->get_word();
+ pw->snprint(word, 256);
+ i = pos->get_item();
+ res += test_assert(strcmp(tab_expected[k].word, word) == 0,
+ "expected: '%s' actual: '%s'",
+ tab_expected[k].word,
+ word);
+ }
+ k++;
+ }
+
+ return res;
+}
+
+/* Check best breakpoints list, the first breakpoint of best_breaks_list_head_
+ is the breakpoint at the end of the 1st line, not the initial breakpoint
+ Return the number of assert that failed or 0 if OK */
+int
+test_paragraph::check_best_breakpoint(struct expected_break_info tab_expected[],
+ paragraph *par)
+{
+ int res = 0;
+ unsigned int total_demerits;
+ char word[256];
+ int k = 0;
+ item *i;
+
+ for (k = 0; k < par->get_number_of_lines(); k++) {
+ total_demerits = par->array_best_breaks_[k]->get_total_demerits();
+ res += test_assert(tab_expected[k].total_demerits == total_demerits,
+ "total demerits: expected: %u, actual: %u",
+ tab_expected[k].total_demerits,
+ total_demerits);
+ i = par->array_best_breaks_[k]->get_previous_box();
+ if (i == NULL) {
+ res += test_assert(false, "cannot find breakpoint's previous box");
+ } else {
+ paragraph_word *pw = i->get_word();
+ pw->snprint(word, 256);
+ i = par->array_best_breaks_[k]->get_item();
+ res += test_assert(strcmp(tab_expected[k].word, word) == 0,
+ "expected: '%s' actual: '%s'",
+ tab_expected[k].word,
+ word);
+ }
+ }
+
+ return res;
+}
+
+int
+test_paragraph::check_best_breakpoint(const char*tab_expected[], paragraph *par)
+{
+ int res = 0;
+ char word[256];
+ int k = 0;
+ item *i;
+
+ for (k = 0; k < par->get_number_of_lines(); k++) {
+ i = par->array_best_breaks_[k]->get_previous_box();
+ if (i == NULL) {
+ res += test_assert(false, "cannot find breakpoint's previous box");
+ } else {
+ paragraph_word *pw;
+ pw = i->get_word();
+ pw->snprint(word, 256);
+ i = par->array_best_breaks_[k]->get_item();
+ res += test_assert(strcmp(tab_expected[k], word) == 0,
+ "expected: '%s' actual: '%s'",
+ tab_expected[k],
+ word);
+ }
+ }
+
+ return res;
+}
+
+test_paragraph::~test_paragraph()
+{
+ if (text_loader_)
+ delete text_loader_;
+}
+
+/* Load the text and add to par the corresponding box, glue, penalty */
+
+/**
+ * Test 1
+ *
+ * Check each line ratio. FIXME understand why the last one should be 0.004 and
+ * not 0.000.
+ */
+void
+test_paragraph::suite1_init()
+{
+ text_loader *tl;
+ char text[] =
+ "In olden times when wishing still helped one, there lived a "
+ "king whose daughters were all beautiful; and the youngest was "
+ "so beautiful that the sun itself, which has seen so much, was "
+ "astonished whenever it shone in her face. Close by the king's "
+ "castle lay a great dark forest, and under an old lime-tree in the "
+ "forest was a well, and when the day was very warm, the king's "
+ "child went out into the forest and sat down by the side of the "
+ "cool fountain; and when she was bored she took a golden ball, "
+ "and threw it up on high and caught it; and this ball was her "
+ "favorite plaything.";
+
+ printf("-- Suite 1 Initialisation\n");
+ tl = new text_loader(text);
+ text_loader_ = tl;
+}
+
+int
+test_paragraph::test11_original_example()
+{
+ paragraph *par = new paragraph;
+ paragraph_printer *printer;
+ int res = 0;
+ float expected_line_ratio[10] = {0.774, 0.179, 0.629, 0.545, 0.000,
+ 0.079, 0.282, 0.294, 0.575, 0.000};
+
+ int n_lines;
+ int k;
+ float ratio;
+ struct expected_break_info all_expected[] = {
+ { "a", 2209, 2209 },
+ { "king", 1521, 1521 },
+ { "was", 4, 2213 },
+ { "so", 3136, 4657 },
+ { "was", 676, 2889 },
+ { "aston", 3600, 8257 },
+ { "king's", 289, 3178 },
+ { "castle", 9, 8266 },
+ { "lay", 4489, 12746 },
+ { "in", 5929, 9107 },
+ { "the", 1, 3179 },
+ { "for", 3481, 11747 },
+ { "est", 1, 8267 },
+ { "was", 4, 12750 },
+ { "a", 2209, 14955 },
+ { "the", 676, 9783 },
+ { "king's", 1, 3180 },
+ { "child", 4, 8271 },
+ { "went", 1, 12751 },
+ { "out", 1369, 16324 },
+ { "side", 16, 9799 },
+ { "of", 49, 9832 },
+ { "the", 9, 3189 },
+ { "cool", 121, 8392 },
+ { "foun", 3249, 16000 },
+ { "tain;", 400, 13151 },
+ { "and", 1444, 17768 },
+ { "golden", 1, 9800 },
+ { "ball,", 16, 3205 },
+ { "and", 25, 8417 },
+ { "threw", 4, 16004 },
+ { "it", 289, 13440 },
+ { "up", 4, 13155 },
+ { "on", 1, 17769 },
+ { "was", 25, 9825 },
+ { "her", 400, 3605 },
+ { "favor", 2601, 11018 },
+ { "ite", 16, 8433 },
+ { "play", 3364, 16804 },
+ { "thing.", 1, 3606 }
+ };
+
+ struct expected_break_info best_expected[] = {
+ { "a", 2209, 2209 },
+ { "was", 4, 2213 },
+ { "was", 676, 2889 },
+ { "king's", 289, 3178 },
+ { "the", 1, 3179 },
+ { "king's", 1, 3180 },
+ { "the", 9, 3189 },
+ { "ball,", 16, 3205 },
+ { "her", 400, 3605 },
+ { "thing.", 1, 3606 }
+ };
+
+ printf("-- Test11...\n");
+
+ par->config_use_old_demerits_formula();
+ par->config_no_fitness_class();
+ text_loader_->process_text(par, true);
+ par->format_knuth_plass();
+
+ /* There should be 10 lines */
+ printf(" Checking the number of lines\n");
+ n_lines = par->get_number_of_lines();
+ if (test_assert(n_lines == 10,
+ "actual number of lines:%d expected: 10",
+ n_lines) == TEST_FAIL) {
+ res++;
+ }
+
+ /* test ratio */
+ printf(" Checking the lines adjustement ratio\n");
+ for (k = 0; k < 10; k++) {
+ ratio = par->get_adjust_ratio(k + 1);
+ if (test_assert(fabs(ratio - expected_line_ratio[k]) < 0.001,
+ "line number %d expected: %.3f actual ratio: %.3f",
+ k + 1, expected_line_ratio[k], ratio) == TEST_FAIL) {
+ res++;
+ }
+ }
+
+ printf(" Checking all breakpoints demerits\n");
+ res += check_all_breakpoint(all_expected, par);
+
+ printf(" Checking the best breakpoints array\n");
+ res += check_best_breakpoint(best_expected, par);
+ printer = new paragraph_printer(par);
+ printer->print();
+ delete printer;
+ PRINT_RESULT(res);
+ delete par;
+
+ return res;
+}
+
+int
+test_paragraph::test12_example_with_default_demerits_formula()
+{
+ int res = 0;
+ paragraph *par = new paragraph;
+ fitness_class_t fitness_class;
+ struct expected_break_info best_expected[] = {
+ { "a", 2209, 2209 },
+ { "was", 4, 2213 },
+ { "was", 676, 2889 },
+ { "king's", 289, 3178 },
+ { "the", 1, 3179 },
+ { "king's", 1, 3180 },
+ { "the", 9, 3189 },
+ { "ball,", 16, 3205 },
+ { "her", 400, 3605 },
+ { "thing.", 1, 3606 }
+ };
+ fitness_class_t expected_fitness_class[10] = {
+ FITNESS_CLASS_LOOSE,
+ FITNESS_CLASS_NORMAL,
+ FITNESS_CLASS_LOOSE,
+ FITNESS_CLASS_LOOSE,
+ FITNESS_CLASS_NORMAL,
+ FITNESS_CLASS_NORMAL,
+ FITNESS_CLASS_NORMAL,
+ FITNESS_CLASS_NORMAL,
+ FITNESS_CLASS_LOOSE,
+ FITNESS_CLASS_NORMAL
+ };
+
+ int k = 0;
+
+ printf("-- Test12...\n");
+ text_loader_->process_text(par, true);
+ par->format_knuth_plass();
+
+ printf(" Checking the best breakpoints array\n");
+ res += check_best_breakpoint(best_expected, par);
+
+ printf(" Checking the lines fitness class\n");
+ for (k = 0; k < 10; k++) {
+ fitness_class = par->get_fitness_class(k + 1);
+ if (test_assert(
+ (fitness_class == expected_fitness_class[k]),
+ "line number: %d expected: %d actual fitness class: %d",
+ k + 1,
+ expected_fitness_class[k],
+ fitness_class) == TEST_FAIL)
+ res++;
+ }
+
+ PRINT_RESULT(res);
+ delete par;
+
+ return res;
+}
+
+int
+test_paragraph::test13_example_with_larger_tolerance()
+{
+ int res = 0;
+ paragraph *par = new paragraph;
+ struct expected_break_info best_expected[] = {
+ { "a", 2209, 2209 },
+ { "was", 4, 2213 },
+ { "was", 676, 2889 },
+ { "king's", 289, 3178 },
+ { "the", 1, 3179 },
+ { "king's", 1, 3180 },
+ { "the", 9, 3189 },
+ { "ball,", 16, 3205 },
+ { "her", 400, 3605 },
+ { "thing.", 1, 3606 }
+ };
+
+ printf("-- Test13...\n");
+ text_loader_->process_text(par, true);
+ par->format_knuth_plass(10);
+
+ printf(" Checking the best breakpoints array\n");
+ res += check_best_breakpoint(best_expected, par);
+
+ PRINT_RESULT(res);
+ delete par;
+
+ return res;
+}
+
+/* In this test the word 'hyphenationtest' can be hyphenated after
+ * 'hyphenation'. Letters 'A', 'B' and 'D' have a width of 10; 'C' has a width
+ * of 13, which means that the algorithm would naturally try to cut like this:
+
+ AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAA hyphenation-
+ test BBBBBBBBBB BBBBBBBBBB BBBBBBBBBB jlC hyphenation-
+ test DDDDDDDDDD DDDDDDDDDD";
+
+ However the additional penalty for two hyphenation in a row should make the
+ algorithm chose the break before the first DDDDDDDDDD.
+ */
+void
+test_paragraph::suite2_init()
+{
+ text_loader *tl;
+ char text[] =
+ "AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAA hyphenationtest "
+ "BBBBBBBBBB BBBBBBBBBB BBBBBBBBBB jlC hyphenationtest "
+ "DDDDDDDDDD DDDDDDDDDD";
+
+ printf("-- Suite 2 Initialisation\n");
+ tl = new text_loader(text);
+ text_loader_ = tl;
+}
+
+int
+test_paragraph::test21_hyphen_flagged_penalty()
+{
+ int res = 0;
+ paragraph *par = new paragraph;
+ paragraph_printer *printer;
+ const char *best_expected[] = {
+ "hyphenation",
+ "test", //FIXME actually is should be hyphenationtest
+ "DDDDDDDDDD"
+ };
+
+ printf("-- Test21...\n");
+ text_loader_->process_text(par, false);
+ par->format_knuth_plass(2);
+
+ printf(" Checking the best breakpoints array\n");
+ res += check_best_breakpoint(best_expected, par);
+
+ printer = new paragraph_printer(par);
+ printer->print();
+ delete printer;
+ PRINT_RESULT(res);
+ delete par;
+
+ return res;
+}
+
+/* There is only 1 feasible breakpoint at the first line, and the line is of
+class 0. At the second line, there are two feasible breaks, a class and a
+class 2; the class 2 is better but the class 1 should be chosen because of the
+first line. */
+void
+test_paragraph::suite3_init()
+{
+ text_loader *tl;
+ char text[] =
+ "The first line's best break makes it very veryyyyyy tiiiiiiiiiiiiiight, "
+ "the second line's best break is of class two but another break will "
+ "have to be preferred; it will give another line of class 0.";
+
+ printf("-- Suite 3 Initialisation\n");
+ tl = new text_loader(text);
+ text_loader_ = tl;
+}
+
+int
+test_paragraph::test31_fitness_class()
+{
+ int res = 0;
+ paragraph *par = new paragraph;
+ paragraph_printer *printer;
+ int k = 0;
+ item *i;
+
+ const char *best_expected[] = {
+ "tiiiiiiiiiiiiiight,",
+ "will",
+ "0."
+ };
+ char word[256];
+
+ printf("-- Test31...\n");
+ text_loader_->process_text(par, false);
+ par->format_knuth_plass(2);
+
+ printf(" Checking the best breakpoints array\n");
+ for (k = 0; k < par->get_number_of_lines(); k++) {
+ i = par->array_best_breaks_[k]->get_previous_box();
+ if (i == NULL) {
+ res += test_assert(false, "cannot find breakpoint's previous box");
+ } else {
+ i->sprint_word(word);
+ i = par->array_best_breaks_[k]->get_item();
+ res += test_assert(strcmp(best_expected[k], word) == 0,
+ "expected: '%s' actual: '%s'",
+ best_expected[k],
+ word);
+ }
+ }
+
+ printer = new paragraph_printer(par);
+ printer->print();
+ delete printer;
+ PRINT_RESULT(res);
+ delete par;
+
+ return res;
+}
+
+
+int main (int argc, char **argv)
+{
+ int res = 0;
+ int c;
+ test_paragraph *tp;
+ char *file_path = NULL;
+ unsigned int line_length = 500;
+ float tolerance = 1;
+ long int suite_to_launch = -1;
+ long int test_to_launch = -1;
+
+ while ((c = getopt (argc, argv, "f:l:s:t:T:")) != -1) {
+ switch (c)
+ {
+ case 'f':
+ file_path = optarg;
+ break;
+ case 'l':
+ line_length = strtol(optarg, NULL, 10);
+ break;
+ case 's':
+ suite_to_launch = strtol(optarg, NULL, 10);
+ break;
+ case 't':
+ test_to_launch = strtol(optarg, NULL, 10);
+ break;
+ case 'T':
+ tolerance = strtof(optarg, NULL);
+ break;
+ case '?':
+ if (optopt == 'f' || optopt == 's' || optopt == 't')
+ fprintf (stderr, "Option -%c requires an argument.\n", optopt);
+ else if (isprint(optopt))
+ fprintf (stderr, "Unknown option `-%c'.\n", optopt);
+ else
+ fprintf (stderr,
+ "Unknown option character `\\x%x'.\n",
+ optopt);
+ return 1;
+ default:
+ abort();
+ }
+ }
+
+ if (suite_to_launch == 0 && test_to_launch > 0) {
+ fprintf (stderr,
+ "Passing test number without test suite, please use option -s");
+ res = -1;
+ goto end;
+ }
+
+ if (file_path != NULL) {
+ text_loader *tl = new text_loader(NULL, file_path);
+ paragraph *par = new paragraph();
+ paragraph_printer *printer;
+ printf("Processing content of %s with tolerance:%.3f line length:%u\n\n",
+ file_path, tolerance, line_length);
+ tl->process_text(par, true);
+ par->format_knuth_plass(tolerance, line_length);
+ printer = new paragraph_printer(par);
+ printer->print();
+ delete printer;
+ delete par;
+ delete tl;
+ } else {
+ if (suite_to_launch == -1 || suite_to_launch == 1) {
+ tp = new test_paragraph();
+ tp->suite1_init();
+ if (test_to_launch == -1 || test_to_launch == 1)
+ res += tp->test11_original_example();
+ if (test_to_launch == -1 || test_to_launch == 2)
+ res += tp->test12_example_with_default_demerits_formula();
+ if (test_to_launch == -1 || test_to_launch == 3)
+ res += tp->test13_example_with_larger_tolerance();
+ delete tp;
+ }
+
+ if (suite_to_launch == -1 || suite_to_launch == 2) {
+ tp = new test_paragraph();
+ tp->suite2_init();
+ if (test_to_launch == -1 || test_to_launch == 1)
+ res += tp->test21_hyphen_flagged_penalty();
+ delete tp;
+ }
+
+ if (suite_to_launch == -1 || suite_to_launch == 3) {
+ tp = new test_paragraph();
+ tp->suite3_init();
+ if (test_to_launch == -1 || test_to_launch == 1)
+ res += tp->test31_fitness_class();
+ delete tp;
+ }
+
+ if (res == 0)
+ printf("All tests passed\n");
+ else
+ fprintf(stderr, "%d tests failed\n", res);
+ }
+
+end:
+ return res;
+}