summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordmgerman <dmg@uvic.ca>2012-12-30 03:00:32 -0800
committerdmgerman <dmg@uvic.ca>2012-12-30 03:00:32 -0800
commitb84eee21cb5510f18d07aca937c126f22ba76134 (patch)
tree94da5deb64c6d6e20c859726a4f66a448a4c6882
parentb8f5e1e20acb69bfe4d16ad9edc05ffe4550b708 (diff)
parente35481d208a0225623450d37fa45ff95cb753a18 (diff)
downloadninka-b84eee21cb5510f18d07aca937c126f22ba76134.tar.gz
Merge pull request #2 from dktrkranz/master
Expand comments extractor and clean up.
-rw-r--r--comments/CHANGES7
-rw-r--r--comments/Makefile48
-rw-r--r--comments/README13
-rw-r--r--comments/comments.1148
-rw-r--r--comments/comments.tar.gzbin38905 -> 0 bytes
-rw-r--r--comments/dformat.cpp593
-rw-r--r--comments/dformat.h64
-rw-r--r--comments/func.cpp103
-rw-r--r--comments/func.h27
-rw-r--r--comments/io.cpp303
-rw-r--r--comments/io.h66
-rw-r--r--comments/main.cpp62
-rw-r--r--comments/main.h42
13 files changed, 1473 insertions, 3 deletions
diff --git a/comments/CHANGES b/comments/CHANGES
new file mode 100644
index 0000000..c87b288
--- /dev/null
+++ b/comments/CHANGES
@@ -0,0 +1,7 @@
+Version 1.1.
+
+* Limit number of comments output
+
+Version 1.0
+
+* First release
diff --git a/comments/Makefile b/comments/Makefile
new file mode 100644
index 0000000..3c024a3
--- /dev/null
+++ b/comments/Makefile
@@ -0,0 +1,48 @@
+VERSION = 4
+NAME = comments
+BETA = no
+
+DIR = $(NAME)-v$(VERSION)
+
+DEFINES = -DVERSION=\"$(VERSION)\" -DNAME=\"$(NAME)\" #-DBETA
+DISTROFILES = Makefile README CHANGES main.cpp main.h io.cpp io.h dformat.cpp dformat.h func.cpp func.h $(NAME).1
+OPTIONS = -O2 -Wall -Wno-deprecated
+
+#DEBUG = -g -DDEBUG -DIODEBUG
+
+
+all:
+ g++ $(DEBUG) $(OPTIONS) $(DEFINES) -c io.cpp
+ g++ $(DEBUG) $(OPTIONS) $(DEFINES) -c dformat.cpp
+ g++ $(DEBUG) $(OPTIONS) $(DEFINES) -c func.cpp
+ g++ $(DEBUG) $(OPTIONS) $(DEFINES) -c main.cpp
+ g++ -o $(NAME) main.o io.o dformat.o func.o
+
+install:
+ @echo -n "Installing binary: "
+ @cp $(NAME) /usr/local/bin
+ @chmod 755 /usr/local/bin/$(NAME)
+ @echo "Done."
+ @echo -n "Installing man page: "
+ @cp $(NAME).1 /usr/local/man/man1
+ @echo -e "Done.\n"
+ @echo -n `$(NAME)`
+ @echo -e "\nPlease read the README if you have not already done so."
+
+distro:
+ rm -rf $(DIR)*.tar.gz $(NAME) $(DIR)
+ @if [ "$(BETA)" = "yes" ]; then \
+ mkdir $(DIR)b; \
+ cp $(DISTROFILES) $(DIR)b; \
+ tar -cvzf $(DIR)b.tar.gz $(DIR)b; \
+ rm -rf $(DIR)b; \
+ else \
+ mkdir $(DIR); \
+ cp $(DISTROFILES) $(DIR); \
+ tar -cvzf $(DIR).tar.gz $(DIR); \
+ rm -rf $(DIR); \
+ fi
+
+clean:
+ rm -f *.o comments
+
diff --git a/comments/README b/comments/README
index 7a0b689..ef5ff68 100644
--- a/comments/README
+++ b/comments/README
@@ -1,9 +1,16 @@
-In order to use Ninka you must install the comments extractor found in this directory.
+In order to use Ninka you must install the comments extractor found
+in this directory.
If Ninka does not find it in the executable path, it will use the
-first 400 lines from source code, and is likely to reduce the precision of Ninka.
-
+first 400 lines from source code, and is likely to reduce the precision
+of Ninka.
+Code and Comments separator version 1.1 written by Daniel M. German
+(dmg@uvic.ca) based on Mangle v3.0.7 by Jon Newman <mangle@biz0r.biz>
+http://biz0r.biz. Unfortunately the original URL does not exist any
+more.
+This program is distributed under the GNU General Public License
+Version 2 or (at your option) any later version.
diff --git a/comments/comments.1 b/comments/comments.1
new file mode 100644
index 0000000..3b78686
--- /dev/null
+++ b/comments/comments.1
@@ -0,0 +1,148 @@
+.TH comments 1
+.SH NAME
+comments - comments is an open source, C/C++ de-commentor. It will
+remove all comments and output them to a different file.
+
+.SH SYNOPSIS
+comments <options> [file1] <options> <file2> <etc>
+ -r Don't remove CR/LF
+ -c Only remove comments
+ -o Output to stdout code and stderr comments
+ -n Append newline to source
+ -x Null out options
+ -d Keep spaces in preprocessor entries
+ -l Do no mangling
+ -m Place markers where code or comments are
+ -t Print summary in tab delimited form
+ -C Print summary in comma delimited form
+ -v Print version
+.SH DESCRIPTION
+.B Comments
+is a programming utility used to strip all comments and/or formatting from C/C++ code. It has a variety of command line
+options that make it a powerful utility.
+.SH NOTES
+.B Options
+
+Enabling options in comments is simple and straightforward. Here is an
+example of how to just remove the comments from a source:
+
+.B ~# comments -c foo.cpp
+
+More advanced features are like the ability to change the options on each
+file, here is an example of how to remove the comments from one source,
+output to stdout, but remove everything from the second source.
+
+.B ~# comments -co foo.cpp -x bar.cpp
+
+The '-x' option makes comments remove all options. Here is an example similar
+to the one above, but it instead leaves the CR/LF's in the second source.
+
+.B ~# comments -co foo.cpp -xr bar.cpp
+
+As you can see, the '-x' option was used first to remove all current
+options, and then the '-r' option was appended to make comments keep the
+CR/LF's.
+
+If you don't specify any options to a file, it will use the previous files
+options.
+
+.B ~# comments -co foo.cpp bar.cpp
+
+That command will use the '-co' options for both foo.cpp and bar.cpp
+
+.SH OPTIONS
+
+.TP
+.B -r Don't remove CR/LF
+
+With this option enabled, and no others, it will remove everything but line feeds.
+
+.TP
+.B -c Only remove comments
+
+With this option enabled, and no others, it will remove only comments from the source.
+
+.TP
+.B -o Output to stdout
+
+This option specifies the output to be printed to STDOUT. This is used primarily if you wish to redirect the data.
+
+.TP
+.B -n Append newline to source
+
+This option will make comments append a newline to the end of the source file. This is used by some people using C source code because some C compilers complan about not having a newline at the end of the source.
+
+.TP
+.B -x Null out options
+
+This option NULLs out all previous options so that other may be specified.
+
+.TP
+.B -d Keep spaces in preprocessor entries
+
+This option makes comments not do any reformatting of preprocessor entries.
+
+.TP
+.B -l Do no mangling
+
+This option tells comments not to comments the input at all. This option was only included for completeness.
+
+.TP
+.B -m Place markers in output
+
+This option tells comments to place CODE and /****/ where code or
+comments are supposed to be in their corresponding outputs
+
+.TP
+.B -t Print summary in tab delimited form
+.TP
+.B -C Print summary in comma delimited form
+
+This option makes comments print the summary information in tabular form for easier input into other programs.
+
+Example output:
+
+root@biz:~/comments# comments -t dformat.h
+.RS
+1 dformat.h dformat.h.commentsd 1147 565 50.7411
+
+Explanation of each column:
+.RS
+.RE
+1: The number of the file in order (1, 2, 3, etc).
+.RS
+.RE
+2: Name of the input file.
+.RS
+.RE
+3: Name of the output file.
+.RS
+.RE
+4: Original size of the input (bytes).
+.RS
+.RE
+5: Output size (bytes).
+.RS
+.RE
+6: Percentage of reduction calculated by (100(output_bytes/input_bytes))
+.RS
+.RE
+
+.I NOTE:
+If the '-o' option (output to stdout) option is given with '-t', the third column will be NULL.
+.RS
+.RE
+root@biz:~/comments# comments -to main.cpp >output_file
+.RS
+.RE
+1 main.cpp 844 368 56.3981
+.RE
+
+.TP
+.B -v Print version
+
+This option causes comments to print the version info and exit immediately.
+
+.SH BUGS
+None known. Email bug reports to
+.B biz@biz0r.biz
diff --git a/comments/comments.tar.gz b/comments/comments.tar.gz
deleted file mode 100644
index 0921713..0000000
--- a/comments/comments.tar.gz
+++ /dev/null
Binary files differ
diff --git a/comments/dformat.cpp b/comments/dformat.cpp
new file mode 100644
index 0000000..7108dbd
--- /dev/null
+++ b/comments/dformat.cpp
@@ -0,0 +1,593 @@
+/*
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License as published by
+** the Free Software Foundation; either version 2 of the License, or
+** (at your option) any later version.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+** GNU General Public License for more details.
+**
+** You should have received a copy of the GNU General Public License
+** along with this program; if not, write to the Free Software
+** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*
+ * mangle.cpp - Removes ALL comments and/or formatting from C/C++ code while
+ * keeping what is needed so that the program still operates
+ * the same exact way as before the conversion.
+ *
+ */
+
+#include "main.h"
+#define CLASS_ERROR_PRE "dformat"
+
+dformat::dformat(int a, char** av)
+{
+ ready=true; // We're ok unless otherwise changed
+ current_arg=1; // Start at argument 1
+ current_file=0;
+ argc=a; argv=av;
+ io.regionsCount = 0;
+ io.doneOutput = 0;
+
+ if(!load_arguments((char *)"-x")) cerr << "Programmer goofed, you should not see this. Error clearing out arguments." << endl;
+ test_args=true; // Lets make sure the arg syntax is good first
+ while(next()) ;
+
+#ifdef DEBUG
+ cerr << CLASS_ERROR_PRE << "::dformat() argument syntax ok." << endl;
+#endif
+
+ current_arg=1;
+ io.done(test_args);
+ test_args=false;
+ // Clear out the settings
+ if(!load_arguments((char *)"-x")) cerr << "Programmer goofed, you should not see this. Error clearing out arguments." << endl;
+};
+
+dformat::~dformat() {};
+
+bool dformat::next()
+{
+ char temp[FILE_NAME_LENGTH]={"\0"};
+ char temp2[FILE_NAME_LENGTH]={"\0"};
+
+ if(!ready) // Can't work if I'm not ready.
+ return false;
+
+#ifdef DEBUG
+ if(test_args)
+ cerr << CLASS_ERROR_PRE << "::next() testing argument ["
+ << argv[current_arg] << "]" << endl;
+#endif
+
+ if(current_arg<argc)
+ {
+ if(argv[current_arg][0]=='-') // we have args waiting
+ {
+ if((current_arg+1)<argc || argv[current_arg][1]=='v')
+ {
+ // load args and move to next
+ if(!load_arguments(argv[current_arg++]))
+ {
+ usage();
+ return false;
+ }
+ }
+ else
+ {
+#ifdef DEBUG
+ cerr << CLASS_ERROR_PRE << "::next() Argument included without a file." << endl;
+#endif
+ ready=false;
+ usage();
+ return false;
+ }
+ }
+
+ io.done(test_args); // Finish it off if needed.
+
+ strcpy(temp,argv[current_arg]);
+ strcat(temp,DEFAULT_MANGLED_POSTFIX);
+ strcpy(temp2,argv[current_arg]);
+ strcat(temp2,DEFAULT_COMMENTS_POSTFIX);
+ io.init(argv[current_arg],temp, temp2);
+
+ if(!io.ok())
+ {
+#ifdef DEBUG
+ cerr << CLASS_ERROR_PRE << "::next() io object not ok." << endl;
+#endif
+ ready=false;
+ return false;
+ }
+ current_arg++; // Done messing with this one, move to next
+ }
+ else if(argc==1) // tisk tisk...you need atleast 2 arguments
+ {
+ usage();
+ ready=false;
+ return false;
+ }
+ else
+ return false;
+
+ if(!test_args)
+ current_file++;
+
+ return true; // all is good
+};
+
+void dformat::done()
+{
+ if(append_newline)
+ io.out('\n');
+
+ if(!tabular_delimited_result && !comma_delimited_result)
+ {
+ if (!io.output_to_stdout) {
+ cerr << "[" << current_file << "] \"" << io.i_name << "\" (" << io.input_bytes() << "b) ";
+
+ cerr << ">> \"" << io.o_name << "\" (" << io.output_bytes() << "b) (" << (100.0-(100.0*(io.output_bytes()/io.input_bytes()))) << "% reduced)";
+ cerr << endl;
+ }
+ }
+ else if(tabular_delimited_result) // print in tabular form
+ cerr << current_file
+ << "\t" << io.i_name
+ << "\t" << io.o_name
+ << "\t" << io.input_bytes()
+ << "\t" << io.output_bytes()
+ << "\t" << (100.0-(100.0*(io.output_bytes()/io.input_bytes())))
+ << endl;
+ else if(comma_delimited_result)
+ cerr << current_file
+ << "," << io.i_name
+ << "," << io.o_name
+ << "," << io.input_bytes()
+ << "," << io.output_bytes()
+ << "," << (100.0-(100.0*(io.output_bytes()/io.input_bytes())))
+ << endl;
+
+ io.done();
+};
+
+void dformat::usage()
+{
+ cerr << "Usage: " << NAME << " <options> [file1] <options> <file2> <etc>"
+ << endl << " -r Leave CR/LF's"
+ << endl << " -o output to STDOUT"
+ << endl << " -n Append newline"
+ << endl << " -x Use default options (nulls all previous ones)"
+ << endl << " -d Leave in preprocessor whitespace"
+ // << endl << " -w Write over original"
+// << endl << " -i input from STDIN"
+ << endl << " -l Do no mangling"
+ << endl << " -m Do not add markers to output"
+ << endl << " -t Print summary in tab delimited form"
+ << endl << " -C Print summary in comma delimited form"
+ << endl << " -v Print version"
+ << endl << " -c<Number> Number of comment regions"
+ << endl;
+}
+
+bool dformat::ok()
+{
+ return (ready ? true : false);
+};
+
+bool dformat::load_arguments(char* str)
+{
+ if(strlen(str)==0)
+ return false;
+
+ for(unsigned int x=1; x<strlen(str); x++)
+ {
+ switch(str[x])
+ {
+ case 'x':
+ io.input_from_stdin=false;
+ io.output_to_stdout=false;
+ io.addMarkers=true;
+ io.regionsCount = 0;
+ append_newline=false;
+ comments_only=true;
+ keep_preprocessor_whitespace=false;
+ tabular_delimited_result=false;
+ comma_delimited_result=false;
+ leave_newline=false;
+ no_modify=false;
+ break;
+
+#ifdef asdfasd
+ case 'w':
+ io.write_over_original=true;
+ break;
+
+#endif
+ case 'm':
+ io.addMarkers=false;
+ break;
+
+ case 'n':
+ append_newline=true;
+ break;
+ case 'c':
+ // next token should be an integer..
+ {
+ int i=1;
+ char temp[256];
+ while ((x+i < strlen(str)) &&
+ str[x+i] >= '0' &&
+ str[x+i] <= '9' &&
+ x+i < 256
+ ) {
+ temp[i-1] = str[x+i];
+ i++;
+ }
+ temp[i-1] =0;
+ if (i == 1 || i > 255) {
+ cerr << "Illegal number of comment regions for -c option" << i
+ << endl;
+ exit(1);
+ }
+ io.regionsCount = atoi(temp);
+ // cerr << "Number of regions [" << io.regionsCount << "]" << endl;
+ x+=i-1;
+ }
+ break;
+
+ case 'r':
+ leave_newline=true;
+ break;
+
+ case 'd':
+ keep_preprocessor_whitespace=true;
+ break;
+
+ case 't':
+ tabular_delimited_result=true;
+ break;
+
+ case 'C':
+ comma_delimited_result=true;
+ break;
+
+ case 'v':
+ version();
+ exit(0);
+ break;
+
+ case 'l':
+ no_modify=true;
+ break;
+
+ case 'o':
+ io.output_to_stdout=true;
+ break;
+
+// case 'i':
+// io.input_from_stdin=true;
+// break;
+
+ default: // Unknown option
+ usage();
+ return false;
+ break;
+ }
+ }
+ return true;
+};
+
+// And now...the meat and potatos
+bool dformat::format()
+{
+ int x=0;
+ char c='\0';
+ bool tbool=false;
+
+#ifdef DEBUG
+ cerr << CLASS_ERROR_PRE << "::format() Now formatting [" << io.i_name << "]"
+ << endl;
+#endif
+
+ // Reset the variables
+ for(x=0; x<FLAG_HISTORY_MAX; x++)
+ {
+ flag_history[x].in_single_quote=false;
+ flag_history[x].in_double_quote=false;
+ flag_history[x].in_line_comment=false;
+ flag_history[x].in_star_comment=false;
+ flag_history[x].in_preprocessor=false;
+ flag_history[x].in_hex=false;
+ flag_history[x].num_backslashes=0;
+ }
+
+ // keep grabbing data as long as its there
+ while(io.in() && io.data_waiting())
+ {
+ if (io.doneOutput) {
+ break;
+ }
+ if(no_modify)
+ {
+ io.out();
+ continue;
+ }
+
+ switch(io.buf[0])
+ {
+ case '\'':
+ case '\"':
+ if(!flag_history[0].in_line_comment && !flag_history[0].in_star_comment)
+ {
+ if(!flag_history[0].in_single_quote && !flag_history[0].in_double_quote)
+ {
+ if(io.buf[0]=='\'')
+ flag_history[0].in_single_quote=true;
+ else if(io.buf[0]=='\"')
+ flag_history[0].in_double_quote=true;
+ }
+ else if(flag_history[0].in_single_quote || flag_history[0].in_double_quote)
+ {
+ if((flag_history[0].num_backslashes%2)) ; // just an escaped quote, reset number of backslashes
+ else
+ {
+ if(io.buf[0]=='\'' && flag_history[0].in_single_quote)
+ flag_history[0].in_single_quote=false;
+ if(io.buf[0]=='\"' && flag_history[0].in_double_quote)
+ flag_history[0].in_double_quote=false;
+ }
+ }
+ io.out();
+
+ flag_history[0].num_backslashes=0; // null out number of backslashes
+ flag_history[0].in_hex=false; // we're not in a hex value anymore
+ } else {
+ io.commentOut(); //dmg
+ }
+
+ break;
+
+ case '/':
+ if(!flag_history[0].in_single_quote && !flag_history[0].in_double_quote)
+ {
+ if(io.buf[1]=='/' && !flag_history[0].in_line_comment && !flag_history[0].in_star_comment) {
+ flag_history[0].in_line_comment=true;
+ io.commentOut(); //dmg
+ }
+ else if(io.buf[1]=='*' && !flag_history[0].in_star_comment) {
+ flag_history[0].in_star_comment=true;
+ io.commentOut(); //dmg
+ }
+ else if(!flag_history[0].in_line_comment && !flag_history[0].in_star_comment)
+ io.out();
+ else
+ io.commentOut(); //dmg
+ }
+ else
+ io.out();
+
+ if(!flag_history[0].in_line_comment && !flag_history[0].in_star_comment)
+ {
+ flag_history[0].in_hex=false; // we're not in a hex value anymore
+ flag_history[0].num_backslashes=0; // Null out number of backslashes
+ }
+ break;
+
+ case '*':
+ if(!flag_history[0].in_single_quote && !flag_history[0].in_double_quote)
+ {
+ if(io.buf[1]=='/' && flag_history[0].in_star_comment &&
+ flag_history[2].in_star_comment)
+ {
+ io.commentOut();
+ /* We need to write a \n after the comment... otherwise it gets very, very messy */
+
+ flag_history[0].in_star_comment=false;
+ io.in(); // Jump ahead one, we dont want the '/' used
+ io.commentOut();
+ io.commentOut('\n');
+ continue;
+ } else if(!flag_history[0].in_star_comment && !flag_history[0].in_line_comment) {
+ io.out();
+ } else {
+ io.commentOut();
+ }
+ }
+ else
+ io.out();
+
+ if(!flag_history[0].in_line_comment && !flag_history[0].in_star_comment)
+ {
+ flag_history[0].in_hex=false; // we're not in a hex value anymore
+ flag_history[0].num_backslashes=0; // Null out number of backslashes
+ }
+ break;
+
+ case '#':
+ if(!flag_history[0].in_line_comment && !flag_history[0].in_star_comment)
+ {
+ if(!flag_history[0].in_single_quote && !flag_history[0].in_double_quote)
+ {
+ tbool=contain_preprocessor((io.buf+1));
+ // Make sure we have the required carriage return before the preprocessor (if it really is a preprocessor)
+ if(io.last_written[(LAST_WRITTEN_LENGTH-1)] &&
+ (c=last_non_whitespace(io.last_written,(LAST_WRITTEN_LENGTH-1)))!='\r' && c!='\n' &&
+ tbool)
+ io.out('\n');
+
+ if(tbool)
+ flag_history[0].in_preprocessor=true;
+ }
+ io.out();
+ flag_history[0].num_backslashes=0; // null out number of backslashes
+ flag_history[0].in_hex=false; // we're not in a hex value anymore
+ } else {
+ io.commentOut();
+ }
+ break;
+
+ case '\n':
+ case '\r':
+ if(!flag_history[0].in_star_comment)
+ {
+ if((((is_letter(io.last_written[(LAST_WRITTEN_LENGTH-1)]) || is_number(io.last_written[(LAST_WRITTEN_LENGTH-1)]) ||
+ io.last_written[(LAST_WRITTEN_LENGTH-1)]=='_') &&
+ (is_letter(io.buf[1]) || is_number(io.buf[1]) || io.buf[1]=='_'))
+ || flag_history[0].in_preprocessor || io.buf[1]=='#') && !flag_history[0].in_single_quote &&
+ !flag_history[0].in_double_quote && !comments_only)
+ {
+ if(flag_history[0].in_preprocessor)
+ {
+ if(io.last_written[(LAST_WRITTEN_LENGTH-1)]!='\\') // make sure its not multi-line
+ {
+ flag_history[0].in_preprocessor=false;
+ io.out();
+ }
+ else if(io.last_written[(LAST_WRITTEN_LENGTH-1)]!=' ' && io.last_written[(LAST_WRITTEN_LENGTH-1)]!='\t' &&
+ !leave_newline)
+ io.out(' '); // need atleast one space inbetween preprocessor items
+ else
+ io.out();
+ }
+ else if(io.buf[1]=='#')
+ {
+ if(io.last_written[(LAST_WRITTEN_LENGTH-1)]!='\0' && (io.last_written[(LAST_WRITTEN_LENGTH-1)]!='\r' &&
+ io.last_written[(LAST_WRITTEN_LENGTH-1)]!='\n'))
+ io.out();
+ }
+ else
+ io.out(' ');
+ }
+ else if(comments_only && !flag_history[0].in_line_comment)
+ io.out();
+ else if(leave_newline && !flag_history[0].in_line_comment)
+ io.out();
+ else if(flag_history[0].in_hex) // hex values need a space after them, so put a space in place of the crlf
+ {
+ io.out(' ');
+ flag_history[0].in_hex=false; // not in the hex value anymore
+ }
+ else if(flag_history[0].in_single_quote || flag_history[0].in_double_quote)
+ io.out();
+
+ if(flag_history[0].in_line_comment && io.last_read[0]!='\\')
+ {
+ if(comments_only && (io.last_written[(LAST_WRITTEN_LENGTH-1)]!='\r' ||
+ io.last_written[(LAST_WRITTEN_LENGTH-1)]!='\n')) {
+ io.out();
+ io.commentOut(); //dmg print end of line also
+ }
+ flag_history[0].in_line_comment=false;
+ }
+
+ if(!flag_history[0].in_line_comment)
+ {
+ flag_history[0].in_hex=false; // we're not in a hex value anymore
+ flag_history[0].num_backslashes=0; // Null out number of backslashes
+ }
+ } else { // we are in a start ca
+ io.commentOut();
+#ifdef DUMPSPACES
+ io.out();
+#endif
+ }
+ break;
+
+ case ' ':
+ case '\t':
+ if(!flag_history[0].in_line_comment && !flag_history[0].in_star_comment)
+ {
+ if(flag_history[0].in_single_quote ||
+ flag_history[0].in_double_quote ||
+ comments_only) { // the only cases where we always output all of them
+ io.out();
+ }
+ else if( (flag_history[0].in_preprocessor || flag_history[0].in_hex) &&
+ ((io.last_written[(LAST_WRITTEN_LENGTH-1)]!='\n' && io.last_written[(LAST_WRITTEN_LENGTH-1)]!='\r') ||
+ (keep_preprocessor_whitespace && flag_history[0].in_preprocessor)))
+ {
+ /* preprocessors require atleast a single whitespace char preserved both in front and behind non-whitespace
+ characters. Hex values require a space afterwards. */
+ if((!is_whitespace(io.last_written[(LAST_WRITTEN_LENGTH-1)]) && !is_whitespace(io.buf[1]) &&
+ !(io.last_written[(LAST_WRITTEN_LENGTH-1)]=='#' && contain_preprocessor(io.buf)))
+ || keep_preprocessor_whitespace) {
+ io.out();
+ } else {
+ io.commentOut();
+ }
+
+ if(flag_history[0].in_hex) // get out of hex if we are in one
+ flag_history[0].in_hex=false;
+ }
+ else if(((!is_whitespace(io.last_written[(LAST_WRITTEN_LENGTH-1)]) && (is_letter(io.last_written[(LAST_WRITTEN_LENGTH-1)])
+ || io.last_written[(LAST_WRITTEN_LENGTH-1)]=='_')) || is_number(io.last_written[(LAST_WRITTEN_LENGTH-1)]))
+ &&
+ (!is_whitespace(io.buf[1]) && ((is_letter(io.buf[1])
+ || io.buf[1]=='_') || is_number(io.buf[1]))) )
+ io.out();
+ else if(!strncmp((io.buf+1),"...",3) || !strncmp((io.last_written+(LAST_WRITTEN_LENGTH-3)),"...",3))
+ io.out(); // need space before (and after) these if already there
+ else if(io.last_written[(LAST_WRITTEN_LENGTH-1)]=='/' && io.buf[1]=='*')
+ io.out(); // preserve whitespace so that if the file is mangled again it isn't construed as the start of a comment
+ else {
+ io.commentOut();
+ }
+ } else {
+ io.commentOut();
+ }
+ break;
+
+ default:
+ if(!flag_history[0].in_line_comment && !flag_history[0].in_star_comment)
+ {
+ // increase num_backslashes if this is a backslash, else set to 0
+ flag_history[0].num_backslashes=(io.buf[0]=='\\' ? (flag_history[0].num_backslashes+1) : 0);
+
+ // colons pre-separated by whitespace or a cr/lf still need separation (c++ specific)
+ if(io.last_written[(LAST_WRITTEN_LENGTH-1)]==':' &&
+ (is_whitespace(io.last_read[0]) || io.last_read[0]=='\r' || io.last_read[0]=='\n')
+ && io.buf[0]==':')
+ io.out(' ');
+
+ if(io.buf[0]=='\\' && (io.buf[1]=='\n' || io.buf[1]=='\r') && !comments_only && !leave_newline)
+ {
+ flag_history[x].num_backslashes=0;
+ io.in(); // skip over newline
+ }
+ else
+ {
+ // check to see if we are getting into a hex value as it needs a space after it
+ if((!flag_history[0].in_single_quote && !flag_history[0].in_double_quote) &&
+ io.buf[0]=='0' && (io.buf[1]=='x' || io.buf[1]=='X') && (is_letter(io.buf[2]) || is_number(io.buf[2])))
+ flag_history[0].in_hex=true;
+
+ io.out();
+ }
+
+ if(flag_history[0].in_hex && (!is_letter(io.buf[0]) && !is_number(io.buf[0])))
+ flag_history[0].in_hex=false;
+ } else {
+ io.commentOut();
+ }
+ break;
+ }
+
+ // Remember the flags of previous iterations so we may reference them
+ for(x=(FLAG_HISTORY_MAX-1); x; x--)
+ flag_history[x]=flag_history[(x-1)];
+ }
+
+ return true;
+}
+
+void dformat::version()
+{
+ cerr << NAME << " v" << VERSION
+#ifdef BETA
+ << "b"
+#endif
+ << " by Jon Newman, and adapted by Daniel M. German, based on Mangle" << endl;
+};
diff --git a/comments/dformat.h b/comments/dformat.h
new file mode 100644
index 0000000..a1ae714
--- /dev/null
+++ b/comments/dformat.h
@@ -0,0 +1,64 @@
+/*
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License as published by
+** the Free Software Foundation; either version 2 of the License, or
+** (at your option) any later version.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+** GNU General Public License for more details.
+**
+** You should have received a copy of the GNU General Public License
+** along with this program; if not, write to the Free Software
+** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+
+/*
+ * mangle.cpp - Removes ALL comments and/or formatting from C/C++ code while
+ * keeping what is needed so that the program still operates
+ * the same exact way as before the conversion.
+ *
+ * This program has been vigourously tested, if you find any logic errors
+ * where something should have been taken out that wasn't, please email me
+ * - mangle@biz0r.biz
+ *
+ */
+
+#ifndef DFORMAT_H
+#define DFORMAT_H
+#include "main.h"
+
+class dformat
+{
+ private:
+ ::io io;
+ int current_arg, current_file, argc;
+ char** argv;
+ bool ready, test_args, tabular_delimited_result, comma_delimited_result;
+ // boolean variables used in the deformatting process
+ bool append_newline, leave_newline, comments_only,
+ keep_preprocessor_whitespace, no_modify;
+ struct fhist {
+ bool in_line_comment, in_star_comment, in_single_quote,
+ in_double_quote, in_preprocessor, in_hex;
+ int num_backslashes;
+ } flag_history[FLAG_HISTORY_MAX];
+
+ // Private functions
+ bool load_arguments(char* str);
+ void usage();
+ void version();
+
+ public:
+
+ dformat(int argc, char** argv);
+ ~dformat();
+ bool next();
+ void done();
+ bool ok();
+ bool format();
+};
+
+#endif
diff --git a/comments/func.cpp b/comments/func.cpp
new file mode 100644
index 0000000..51c6ae9
--- /dev/null
+++ b/comments/func.cpp
@@ -0,0 +1,103 @@
+/*
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License as published by
+** the Free Software Foundation; either version 2 of the License, or
+** (at your option) any later version.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+** GNU General Public License for more details.
+**
+** You should have received a copy of the GNU General Public License
+** along with this program; if not, write to the Free Software
+** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "main.h"
+
+#define NUM_PREPROCESSOR_STR 14
+char *preprocessors[]={(char *)"define",
+ (char *)"include",
+ (char *)"undef",
+ (char *)"pragma",
+ (char *)"if",
+ (char *)"error",
+ (char *)"warning",
+ (char *)"else",
+ (char *)"elseif",
+ (char *)"elif",
+ (char *)"endif",
+ (char *)"ifdef",
+ (char *)"ifndef",
+ (char *)"ifdefine"};
+
+bool is_letter(char ch)
+{
+ if((ch>='a' && ch<='z') || (ch>='A' && ch<='Z'))
+ return true;
+ else
+ return false;
+}
+
+bool is_number(char ch, char next)
+{
+ if((ch>='0' && ch<='9') || (ch=='-' && is_number(next)))
+ return true;
+ else
+ return false;
+}
+
+bool move(char* from, char* to)
+{
+ if(rename(from,to))
+ return false;
+ else
+ return true;
+}
+
+bool is_whitespace(char c)
+{
+ return ((c==' ' || c=='\t') ? true : false);
+}
+
+bool contain_preprocessor(char* str)
+{
+ int x, y;
+ bool ret=false;
+
+ for(x=0; x<((signed)strlen(str)-DEFINE_SEARCH_PRECISION); x++)
+ {
+ for(y=0; y<NUM_PREPROCESSOR_STR; y++)
+ {
+ if(!strncasecmp((str+x),preprocessors[y],((strlen(str)-x)>strlen(preprocessors[y]) ? strlen(preprocessors[y]) : (strlen(str)-x))))
+ return true; // identified as a preprocessor, return true
+ }
+ if(!ret && !is_whitespace(*(str+x))) // preprocessor not found, and current char is not whitespace so this isnt a preprocessor
+ return false;
+ }
+
+ return ret;
+}
+
+char last_non_whitespace(char* string, int start)
+{
+ int x;
+ if(start>0) // start from inside array and work backwards
+ {
+ for(x=start; x; x--)
+ {
+ if(!is_whitespace(string[x]))
+ return string[x];
+ }
+ }
+ else // start from beginning and work forwards
+ {
+ for(x=0; x<(signed)strlen(string); x++)
+ {
+ if(!is_whitespace(string[x]))
+ return string[x];
+ }
+ }
+ return 'x';
+}
diff --git a/comments/func.h b/comments/func.h
new file mode 100644
index 0000000..cb7e8d5
--- /dev/null
+++ b/comments/func.h
@@ -0,0 +1,27 @@
+/*
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License as published by
+** the Free Software Foundation; either version 2 of the License, or
+** (at your option) any later version.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+** GNU General Public License for more details.
+**
+** You should have received a copy of the GNU General Public License
+** along with this program; if not, write to the Free Software
+** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+
+#ifndef FUNC_H
+
+bool is_letter(char);
+bool is_number(char, char='a');
+bool move(char*, char*);
+bool is_whitespace(char);
+bool contain_preprocessor(char*);
+char last_non_whitespace(char*,int=-1);
+
+#endif
diff --git a/comments/io.cpp b/comments/io.cpp
new file mode 100644
index 0000000..6f89340
--- /dev/null
+++ b/comments/io.cpp
@@ -0,0 +1,303 @@
+/*
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License as published by
+** the Free Software Foundation; either version 2 of the License, or
+** (at your option) any later version.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+** GNU General Public License for more details.
+**
+** You should have received a copy of the GNU General Public License
+** along with this program; if not, write to the Free Software
+** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+
+#include "main.h"
+#define CLASS_ERROR_PRE "io"
+
+io::io(char* in, char* out, char *comments, bool testing)
+{
+ file_open=false;
+ first_init=true;
+ init(in,out,comments, testing);
+};
+
+io::io()
+{
+ file_open=false;
+ first_init=true;
+ ready=false; // We didn't init a file, so we're not ready
+};
+
+io::~io() // Done, just take care of the files
+{
+ i.close();
+ o.close();
+ co.close();
+};
+
+// Initialize variables and set everything up for a new file
+void io::init(char* in, char* out, char * comments, bool testing)
+{
+ if(file_open) // dumbo didn't call done()
+ done(testing);
+
+ ready=true; // we are ready unless otherwise changed
+
+ // (re)init variables
+ memset(buf,'\0',BUF_LENGTH);
+ memset(last_written,'\0',LAST_WRITTEN_LENGTH);
+ memset(last_read,'\0',LAST_READ_LENGTH);
+ memset(o_name,'\0',FILE_NAME_LENGTH);
+ memset(i_name,'\0',FILE_NAME_LENGTH);
+ memset(c_name,'\0',FILE_NAME_LENGTH);
+ icounter=0.0; ocounter=0.0;
+ buf_count=0; // reset # of bytes in buffer
+ if(first_init) // only on first init set global_counter to 0.0
+ {
+ first_init=false;
+ iglobal_counter=0.0;
+ oglobal_counter=0.0;
+ addMarkers = 0;
+ regionsCount = 0;
+ }
+ outputRegions = 0;
+ input_line=0; input_column=0;
+ inComment = -1;
+ doneOutput = 0;
+ inCode = -1;
+ output_line=0; output_column=0;
+
+// removed because of strange bug causing input_from_stdin to always set to TRUE
+// if(!input_from_stdin)
+// {
+ i.open(in/*, ios::nocreate*/);
+ if(!i)
+ {
+ ready=false;
+ cerr << "Could not open (input) [" << in << "]" << endl;
+ }
+#ifdef DEBUG
+ else
+ cerr << CLASS_ERROR_PRE << "::init() Opened (input) ["
+ << in << "]" << endl;
+#endif
+// }
+
+ if(!output_to_stdout)
+ {
+ o.open(out);
+ if(!o)
+ {
+ ready=false;
+ cerr << CLASS_ERROR_PRE << "::init() Could not open (output) \""
+ << out << "\"" << endl;
+ }
+ co.open(comments);
+ if(!co)
+ {
+ ready=false;
+ cerr << CLASS_ERROR_PRE << "::init() Could not open (comments) \""
+ << comments << "\"" << endl;
+ }
+
+ }
+ else
+ output_to_stdout=true;
+
+ strcpy(i_name,in);
+ if(output_to_stdout){
+ strcpy(o_name,"");
+ strcpy(c_name,"");
+ } else {
+ strcpy(c_name,comments);
+ strcpy(o_name,out);
+ }
+
+ file_open=true;
+};
+
+void io::done(bool testing)
+{
+ if(!file_open) // your calling me without a open file?
+ return;
+
+ // close the files
+// removed next if because of strange bug causing input_from_stdin to be set when the code designates otherwise
+// if(!input_from_stdin)
+ i.close();
+ if(!output_to_stdout) {
+ o.close();
+ co.close();
+ }
+ file_open=false;
+}
+
+int io::get_input_line()
+{
+ return input_line;
+}
+
+int io::get_input_column()
+{
+ return input_column;
+}
+
+int io::get_output_line()
+{
+ return output_line;
+}
+
+int io::get_output_column()
+{
+ return output_column;
+}
+
+// Get data
+int io::in()
+{
+ memmove((last_read+1),last_read,(LAST_READ_LENGTH-1));
+ last_read[0]=buf[0];
+ memmove(buf,(buf+1),(BUF_LENGTH-1));
+
+ i.get(buf[(BUF_LENGTH-1)]); // get the next char
+ if(i.eof()) // EOF found, cancel that last read
+ {
+ buf[(BUF_LENGTH-1)]='\0';
+ if(buf[0] || buf_count==1) // if there is data at the front, then we erased some
+ buf_count--;
+ }
+ else
+ buf_count+=(buf_count<BUF_LENGTH ? (buf_count ? 1 : 2) : 0);
+
+#ifdef IODEBUG
+ if(!i.eof())
+ cout << i_name << " >> \"" << buf[(BUF_LENGTH-1)] << "\"" << endl;
+ else
+ cout << CLASS_ERROR_PRE << "::in() " << i_name << " [EOF] " << buf[(BUF_LENGTH-1)] << endl;
+#endif
+
+ if(buf[0]=='\n' || buf[0]=='\r')
+ {
+ input_column=0;
+ input_line++;
+ }
+ else
+ input_column++;
+
+ if(buf[(BUF_LENGTH-1)]!='\0')
+ {
+ icounter++;
+ iglobal_counter++;
+ }
+
+ return 1;
+};
+
+// see if we still have data in the buffer
+bool io::data_waiting()
+{
+ return (buf_count ? true : false);
+};
+
+
+// Output data
+void io::out(char c)
+{
+ if(!c) // replace '\0' with the value of buf[0]
+ c=buf[0];
+ if(c) // Make sure we have something to spit
+ {
+ if (inCode == 0 && addMarkers) {
+ if(output_to_stdout)
+ cout << "/****/";
+ else
+ o << "/****/";
+ }
+ inCode = 1;
+ // only reset code marker if not a space...
+ if (c != ' ' && c!= '\t' &&c!= '\n' && c!= '\r')
+ inComment = 0;
+ if(output_to_stdout)
+ cout << c;
+ else
+ o << c;
+
+ // keep track of sizes
+ ocounter++; // this file
+ oglobal_counter++; // all the files
+ }
+
+#ifdef IODEBUG
+ cout << o_name << " << \"" << c << "\"" << endl;
+#endif
+
+ if(c=='\n' || c=='\r') // new line, return column to 0
+ {
+ output_line++;
+ output_column=0;
+ }
+ else
+ output_column++;
+
+ memmove(last_written,(last_written+1),(LAST_WRITTEN_LENGTH-1));
+ last_written[(LAST_WRITTEN_LENGTH-1)]=c;
+};
+
+// Output data
+void io::commentOut(char c)
+{
+ if (inComment == 0) {
+ if (regionsCount > 0 && outputRegions+1 == regionsCount) {
+ doneOutput = 1;
+ return;
+ }
+ if (addMarkers) {
+ if(output_to_stdout)
+ cerr << "\nCODE\n ";
+ else
+ co << "\nCODE\n";
+ }
+ outputRegions++;
+ }
+
+ inComment = 1;
+ inCode = 0;
+ if(!c) // replace '\0' with the value of buf[0]
+ c=buf[0];
+ if(output_to_stdout)
+ cerr << c;
+ else
+ co << c;
+
+};
+
+
+
+bool io::ok()
+{
+ return (ready ? true : false);
+};
+
+double io::input_bytes()
+{
+ return icounter;
+};
+
+double io::output_bytes()
+{
+ return ocounter;
+};
+
+double io::global_input_bytes()
+{
+ return iglobal_counter;
+};
+
+double io::global_output_bytes()
+{
+ return oglobal_counter;
+};
diff --git a/comments/io.h b/comments/io.h
new file mode 100644
index 0000000..d7e4be0
--- /dev/null
+++ b/comments/io.h
@@ -0,0 +1,66 @@
+/*
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License as published by
+** the Free Software Foundation; either version 2 of the License, or
+** (at your option) any later version.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+** GNU General Public License for more details.
+**
+** You should have received a copy of the GNU General Public License
+** along with this program; if not, write to the Free Software
+** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#ifndef IO_H
+#define IO_H
+
+#include "main.h"
+
+class io
+{
+ private:
+ int input_line, input_column, output_line, output_column, buf_count;
+ int inComment;
+ int inCode;
+ int outputRegions;
+ double icounter, iglobal_counter, ocounter, oglobal_counter;
+ bool ready;
+ ifstream i;
+ ofstream o;
+ ofstream co;
+
+ public:
+ char i_name[FILE_NAME_LENGTH], o_name[FILE_NAME_LENGTH],c_name[FILE_NAME_LENGTH];
+ char buf[BUF_LENGTH], last_written[LAST_WRITTEN_LENGTH],
+ last_read[LAST_READ_LENGTH];
+ // io source/destination modifications
+ bool input_from_stdin, output_to_stdout,
+ first_init, file_open;
+ int addMarkers;
+ int regionsCount;
+ int doneOutput;
+
+ io(char* in, char* out, char *comments, bool testing=false);
+ io();
+ ~io();
+ void init(char* in, char* out, char *comments, bool testing=false);
+ void done(bool testing=false);
+ int get_input_line();
+ int get_input_column();
+ int get_output_line();
+ int get_output_column();
+ int in();
+ bool data_waiting();
+ double input_bytes();
+ double output_bytes();
+ double global_input_bytes();
+ double global_output_bytes();
+ void out(char c='\0');
+ void commentOut(char c='\0');
+ bool ok();
+};
+
+#endif
diff --git a/comments/main.cpp b/comments/main.cpp
new file mode 100644
index 0000000..50035c4
--- /dev/null
+++ b/comments/main.cpp
@@ -0,0 +1,62 @@
+/*
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License as published by
+** the Free Software Foundation; either version 2 of the License, or
+** (at your option) any later version.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+** GNU General Public License for more details.
+**
+** You should have received a copy of the GNU General Public License
+** along with this program; if not, write to the Free Software
+** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+
+#include "main.h"
+#include "dformat.h"
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#define MSG_PRE "main()"
+
+int main(int argc, char** argv)
+{
+ dformat dformat(argc,argv);
+ struct rlimit Limit;
+
+ Limit.rlim_cur = 10;
+ Limit.rlim_max = 10;
+ if (setrlimit(RLIMIT_CPU, &Limit) == -1) {
+ perror("eror");
+ exit(1);
+ }
+ /*
+ getrlimit(RLIMIT_CPU, &Limit);
+ cerr << Limit.rlim_cur << "\n";
+ cerr << Limit.rlim_max << "\n";
+ */
+ if(!dformat.ok())
+ {
+#ifdef DEBUG
+ cerr << "main() - dformat not ok." << endl;
+#endif
+ return 1;
+ }
+
+ while(dformat.next())
+ {
+ dformat.format();
+ dformat.done();
+ }
+
+ if(!dformat.ok())
+ {
+ cerr << MSG_PRE << " Errors occured while trying to complete requests."
+ << endl;
+ }
+
+ return 0;
+}
diff --git a/comments/main.h b/comments/main.h
new file mode 100644
index 0000000..a88e465
--- /dev/null
+++ b/comments/main.h
@@ -0,0 +1,42 @@
+/*
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License as published by
+** the Free Software Foundation; either version 2 of the License, or
+** (at your option) any later version.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+** GNU General Public License for more details.
+**
+** You should have received a copy of the GNU General Public License
+** along with this program; if not, write to the Free Software
+** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+
+#ifndef MAIN_H
+using namespace std;
+#define MAIN_H
+
+#define DEFAULT_MANGLED_POSTFIX ".code"
+#define DEFAULT_COMMENTS_POSTFIX ".comments"
+
+/* DO NOT EDIT BELOW THIS LINE */
+#define BUF_LENGTH 512
+#define FILE_NAME_LENGTH 200
+#define LAST_READ_LENGTH 3
+#define LAST_WRITTEN_LENGTH 3
+#define BYTES_PER_K 1024.0
+#define FLAG_HISTORY_MAX 5
+#define DEFINE_SEARCH_PRECISION 2
+
+#include <fstream>
+#include <iostream>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "func.h"
+#include "io.h"
+#include "dformat.h"
+#endif