From fd9bd05d191b3baa16c074197913c9ef0a9478b8 Mon Sep 17 00:00:00 2001 From: dmg Date: Mon, 1 Jul 2013 19:26:30 -0400 Subject: tighten some definitions to avoid false positives without hurting precision. In particular, removed "subject" as a "legal" term. --- ChangeLog | 4 ++++ filter/Makefile | 2 +- filter/criticalword.dict | 5 ++++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index c58547b..6b2c2f5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,9 @@ 2013-07-01 dmg + * filter/criticalword.dict: tighten some definitions to avoid + false positives without hurting precision. In particular, removed + "subject" as a "legal" term. + * senttok/licensesentence.dict (publicDomain): Added another public domain. 2011-02-08 diff --git a/filter/Makefile b/filter/Makefile index eb1ffea..8c0d42b 100644 --- a/filter/Makefile +++ b/filter/Makefile @@ -1,4 +1,4 @@ default: cp ../senttok/licensesentence.dict /tmp/test.sentences ./filter.pl /tmp/test.sentences - diff -w -B /dev/null /tmp/test.badsent + egrep -v '^#' /tmp/test.badsent diff --git a/filter/criticalword.dict b/filter/criticalword.dict index a2228bf..b58a586 100755 --- a/filter/criticalword.dict +++ b/filter/criticalword.dict @@ -107,7 +107,7 @@ for details all intellectual property rights sale sell -subject +subject to terms #under# too common to be useful warranties @@ -119,6 +119,8 @@ meet some day notices legal accompanying +included with this distribution for more information +See the file public domain special exception notwithstanding @@ -128,3 +130,4 @@ suitability computer program whose purpose disclaims copyright software is covered +Copyright -- cgit v1.2.1