summaryrefslogtreecommitdiff
path: root/comments/dformat.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'comments/dformat.cpp')
-rw-r--r--comments/dformat.cpp593
1 files changed, 593 insertions, 0 deletions
diff --git a/comments/dformat.cpp b/comments/dformat.cpp
new file mode 100644
index 0000000..3dbef34
--- /dev/null
+++ b/comments/dformat.cpp
@@ -0,0 +1,593 @@
+/*
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License as published by
+** the Free Software Foundation; either version 2 of the License, or
+** (at your option) any later version.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+** GNU General Public License for more details.
+**
+** You should have received a copy of the GNU General Public License
+** along with this program; if not, write to the Free Software
+** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*
+ * mangle.cpp - Removes ALL comments and/or formatting from C/C++ code while
+ * keeping what is needed so that the program still operates
+ * the same exact way as before the conversion.
+ *
+ */
+
+#include "main.h"
+#define CLASS_ERROR_PRE "dformat"
+
+dformat::dformat(int a, char** av)
+{
+ ready=true; // We're ok unless otherwise changed
+ current_arg=1; // Start at argument 1
+ current_file=0;
+ argc=a; argv=av;
+ io.regionsCount = 0;
+ io.doneOutput = 0;
+
+ if(!load_arguments("-x")) cerr << "Programmer goofed, you should not see this. Error clearing out arguments." << endl;
+ test_args=true; // Lets make sure the arg syntax is good first
+ while(next()) ;
+
+#ifdef DEBUG
+ cerr << CLASS_ERROR_PRE << "::dformat() argument syntax ok." << endl;
+#endif
+
+ current_arg=1;
+ io.done(test_args);
+ test_args=false;
+ // Clear out the settings
+ if(!load_arguments("-x")) cerr << "Programmer goofed, you should not see this. Error clearing out arguments." << endl;
+};
+
+dformat::~dformat() {};
+
+bool dformat::next()
+{
+ char temp[FILE_NAME_LENGTH]={"\0"};
+ char temp2[FILE_NAME_LENGTH]={"\0"};
+
+ if(!ready) // Can't work if I'm not ready.
+ return false;
+
+#ifdef DEBUG
+ if(test_args)
+ cerr << CLASS_ERROR_PRE << "::next() testing argument ["
+ << argv[current_arg] << "]" << endl;
+#endif
+
+ if(current_arg<argc)
+ {
+ if(argv[current_arg][0]=='-') // we have args waiting
+ {
+ if((current_arg+1)<argc || argv[current_arg][1]=='v')
+ {
+ // load args and move to next
+ if(!load_arguments(argv[current_arg++]))
+ {
+ usage();
+ return false;
+ }
+ }
+ else
+ {
+#ifdef DEBUG
+ cerr << CLASS_ERROR_PRE << "::next() Argument included without a file." << endl;
+#endif
+ ready=false;
+ usage();
+ return false;
+ }
+ }
+
+ io.done(test_args); // Finish it off if needed.
+
+ strcpy(temp,argv[current_arg]);
+ strcat(temp,DEFAULT_MANGLED_POSTFIX);
+ strcpy(temp2,argv[current_arg]);
+ strcat(temp2,DEFAULT_COMMENTS_POSTFIX);
+ io.init(argv[current_arg],temp, temp2);
+
+ if(!io.ok())
+ {
+#ifdef DEBUG
+ cerr << CLASS_ERROR_PRE << "::next() io object not ok." << endl;
+#endif
+ ready=false;
+ return false;
+ }
+ current_arg++; // Done messing with this one, move to next
+ }
+ else if(argc==1) // tisk tisk...you need atleast 2 arguments
+ {
+ usage();
+ ready=false;
+ return false;
+ }
+ else
+ return false;
+
+ if(!test_args)
+ current_file++;
+
+ return true; // all is good
+};
+
+void dformat::done()
+{
+ if(append_newline)
+ io.out('\n');
+
+ if(!tabular_delimited_result && !comma_delimited_result)
+ {
+ if (!io.output_to_stdout) {
+ cerr << "[" << current_file << "] \"" << io.i_name << "\" (" << io.input_bytes() << "b) ";
+
+ cerr << ">> \"" << io.o_name << "\" (" << io.output_bytes() << "b) (" << (100.0-(100.0*(io.output_bytes()/io.input_bytes()))) << "% reduced)";
+ cerr << endl;
+ }
+ }
+ else if(tabular_delimited_result) // print in tabular form
+ cerr << current_file
+ << "\t" << io.i_name
+ << "\t" << io.o_name
+ << "\t" << io.input_bytes()
+ << "\t" << io.output_bytes()
+ << "\t" << (100.0-(100.0*(io.output_bytes()/io.input_bytes())))
+ << endl;
+ else if(comma_delimited_result)
+ cerr << current_file
+ << "," << io.i_name
+ << "," << io.o_name
+ << "," << io.input_bytes()
+ << "," << io.output_bytes()
+ << "," << (100.0-(100.0*(io.output_bytes()/io.input_bytes())))
+ << endl;
+
+ io.done();
+};
+
+void dformat::usage()
+{
+ cerr << "Usage: " << NAME << " <options> [file1] <options> <file2> <etc>"
+ << endl << " -r Leave CR/LF's"
+ << endl << " -o output to STDOUT"
+ << endl << " -n Append newline"
+ << endl << " -x Use default options (nulls all previous ones)"
+ << endl << " -d Leave in preprocessor whitespace"
+ // << endl << " -w Write over original"
+// << endl << " -i input from STDIN"
+ << endl << " -l Do no mangling"
+ << endl << " -m Do not add markers to output"
+ << endl << " -t Print summary in tab delimited form"
+ << endl << " -C Print summary in comma delimited form"
+ << endl << " -v Print version"
+ << endl << " -c<Number> Number of comment regions"
+ << endl;
+}
+
+bool dformat::ok()
+{
+ return (ready ? true : false);
+};
+
+bool dformat::load_arguments(char* str)
+{
+ if(strlen(str)==0)
+ return false;
+
+ for(int x=1; x<(signed)strlen(str); x++)
+ {
+ switch(str[x])
+ {
+ case 'x':
+ io.input_from_stdin=false;
+ io.output_to_stdout=false;
+ io.addMarkers=true;
+ io.regionsCount = 0;
+ append_newline=false;
+ comments_only=true;
+ keep_preprocessor_whitespace=false;
+ tabular_delimited_result=false;
+ comma_delimited_result=false;
+ leave_newline=false;
+ no_modify=false;
+ break;
+
+#ifdef asdfasd
+ case 'w':
+ io.write_over_original=true;
+ break;
+
+#endif
+ case 'm':
+ io.addMarkers=false;
+ break;
+
+ case 'n':
+ append_newline=true;
+ break;
+ case 'c':
+ // next token should be an integer..
+ {
+ int i=1;
+ char temp[256];
+ while ((x+i < strlen(str)) &&
+ str[x+i] >= '0' &&
+ str[x+i] <= '9' &&
+ x+i < 256
+ ) {
+ temp[i-1] = str[x+i];
+ i++;
+ }
+ temp[i-1] =0;
+ if (i == 1 || i > 255) {
+ cerr << "Illegal number of comment regions for -c option" << i
+ << endl;
+ exit(1);
+ }
+ io.regionsCount = atoi(temp);
+ // cerr << "Number of regions [" << io.regionsCount << "]" << endl;
+ x+=i-1;
+ }
+ break;
+
+ case 'r':
+ leave_newline=true;
+ break;
+
+ case 'd':
+ keep_preprocessor_whitespace=true;
+ break;
+
+ case 't':
+ tabular_delimited_result=true;
+ break;
+
+ case 'C':
+ comma_delimited_result=true;
+ break;
+
+ case 'v':
+ version();
+ exit(0);
+ break;
+
+ case 'l':
+ no_modify=true;
+ break;
+
+ case 'o':
+ io.output_to_stdout=true;
+ break;
+
+// case 'i':
+// io.input_from_stdin=true;
+// break;
+
+ default: // Unknown option
+ usage();
+ return false;
+ break;
+ }
+ }
+ return true;
+};
+
+// And now...the meat and potatos
+bool dformat::format()
+{
+ int x=0;
+ char c='\0';
+ bool tbool=false;
+
+#ifdef DEBUG
+ cerr << CLASS_ERROR_PRE << "::format() Now formatting [" << io.i_name << "]"
+ << endl;
+#endif
+
+ // Reset the variables
+ for(x=0; x<FLAG_HISTORY_MAX; x++)
+ {
+ flag_history[x].in_single_quote=false;
+ flag_history[x].in_double_quote=false;
+ flag_history[x].in_line_comment=false;
+ flag_history[x].in_star_comment=false;
+ flag_history[x].in_preprocessor=false;
+ flag_history[x].in_hex=false;
+ flag_history[x].num_backslashes=0;
+ }
+
+ // keep grabbing data as long as its there
+ while(io.in() && io.data_waiting())
+ {
+ if (io.doneOutput) {
+ break;
+ }
+ if(no_modify)
+ {
+ io.out();
+ continue;
+ }
+
+ switch(io.buf[0])
+ {
+ case '\'':
+ case '\"':
+ if(!flag_history[0].in_line_comment && !flag_history[0].in_star_comment)
+ {
+ if(!flag_history[0].in_single_quote && !flag_history[0].in_double_quote)
+ {
+ if(io.buf[0]=='\'')
+ flag_history[0].in_single_quote=true;
+ else if(io.buf[0]=='\"')
+ flag_history[0].in_double_quote=true;
+ }
+ else if(flag_history[0].in_single_quote || flag_history[0].in_double_quote)
+ {
+ if((flag_history[0].num_backslashes%2)) ; // just an escaped quote, reset number of backslashes
+ else
+ {
+ if(io.buf[0]=='\'' && flag_history[0].in_single_quote)
+ flag_history[0].in_single_quote=false;
+ if(io.buf[0]=='\"' && flag_history[0].in_double_quote)
+ flag_history[0].in_double_quote=false;
+ }
+ }
+ io.out();
+
+ flag_history[0].num_backslashes=0; // null out number of backslashes
+ flag_history[0].in_hex=false; // we're not in a hex value anymore
+ } else {
+ io.commentOut(); //dmg
+ }
+
+ break;
+
+ case '/':
+ if(!flag_history[0].in_single_quote && !flag_history[0].in_double_quote)
+ {
+ if(io.buf[1]=='/' && !flag_history[0].in_line_comment && !flag_history[0].in_star_comment) {
+ flag_history[0].in_line_comment=true;
+ io.commentOut(); //dmg
+ }
+ else if(io.buf[1]=='*' && !flag_history[0].in_star_comment) {
+ flag_history[0].in_star_comment=true;
+ io.commentOut(); //dmg
+ }
+ else if(!flag_history[0].in_line_comment && !flag_history[0].in_star_comment)
+ io.out();
+ else
+ io.commentOut(); //dmg
+ }
+ else
+ io.out();
+
+ if(!flag_history[0].in_line_comment && !flag_history[0].in_star_comment)
+ {
+ flag_history[0].in_hex=false; // we're not in a hex value anymore
+ flag_history[0].num_backslashes=0; // Null out number of backslashes
+ }
+ break;
+
+ case '*':
+ if(!flag_history[0].in_single_quote && !flag_history[0].in_double_quote)
+ {
+ if(io.buf[1]=='/' && flag_history[0].in_star_comment &&
+ flag_history[2].in_star_comment)
+ {
+ io.commentOut();
+ /* We need to write a \n after the comment... otherwise it gets very, very messy */
+
+ flag_history[0].in_star_comment=false;
+ io.in(); // Jump ahead one, we dont want the '/' used
+ io.commentOut();
+ io.commentOut('\n');
+ continue;
+ } else if(!flag_history[0].in_star_comment && !flag_history[0].in_line_comment) {
+ io.out();
+ } else {
+ io.commentOut();
+ }
+ }
+ else
+ io.out();
+
+ if(!flag_history[0].in_line_comment && !flag_history[0].in_star_comment)
+ {
+ flag_history[0].in_hex=false; // we're not in a hex value anymore
+ flag_history[0].num_backslashes=0; // Null out number of backslashes
+ }
+ break;
+
+ case '#':
+ if(!flag_history[0].in_line_comment && !flag_history[0].in_star_comment)
+ {
+ if(!flag_history[0].in_single_quote && !flag_history[0].in_double_quote)
+ {
+ tbool=contain_preprocessor((io.buf+1));
+ // Make sure we have the required carriage return before the preprocessor (if it really is a preprocessor)
+ if(io.last_written[(LAST_WRITTEN_LENGTH-1)] &&
+ (c=last_non_whitespace(io.last_written,(LAST_WRITTEN_LENGTH-1)))!='\r' && c!='\n' &&
+ tbool)
+ io.out('\n');
+
+ if(tbool)
+ flag_history[0].in_preprocessor=true;
+ }
+ io.out();
+ flag_history[0].num_backslashes=0; // null out number of backslashes
+ flag_history[0].in_hex=false; // we're not in a hex value anymore
+ } else {
+ io.commentOut();
+ }
+ break;
+
+ case '\n':
+ case '\r':
+ if(!flag_history[0].in_star_comment)
+ {
+ if((((is_letter(io.last_written[(LAST_WRITTEN_LENGTH-1)]) || is_number(io.last_written[(LAST_WRITTEN_LENGTH-1)]) ||
+ io.last_written[(LAST_WRITTEN_LENGTH-1)]=='_') &&
+ (is_letter(io.buf[1]) || is_number(io.buf[1]) || io.buf[1]=='_'))
+ || flag_history[0].in_preprocessor || io.buf[1]=='#') && !flag_history[0].in_single_quote &&
+ !flag_history[0].in_double_quote && !comments_only)
+ {
+ if(flag_history[0].in_preprocessor)
+ {
+ if(io.last_written[(LAST_WRITTEN_LENGTH-1)]!='\\') // make sure its not multi-line
+ {
+ flag_history[0].in_preprocessor=false;
+ io.out();
+ }
+ else if(io.last_written[(LAST_WRITTEN_LENGTH-1)]!=' ' && io.last_written[(LAST_WRITTEN_LENGTH-1)]!='\t' &&
+ !leave_newline)
+ io.out(' '); // need atleast one space inbetween preprocessor items
+ else
+ io.out();
+ }
+ else if(io.buf[1]=='#')
+ {
+ if(io.last_written[(LAST_WRITTEN_LENGTH-1)]!='\0' && (io.last_written[(LAST_WRITTEN_LENGTH-1)]!='\r' &&
+ io.last_written[(LAST_WRITTEN_LENGTH-1)]!='\n'))
+ io.out();
+ }
+ else
+ io.out(' ');
+ }
+ else if(comments_only && !flag_history[0].in_line_comment)
+ io.out();
+ else if(leave_newline && !flag_history[0].in_line_comment)
+ io.out();
+ else if(flag_history[0].in_hex) // hex values need a space after them, so put a space in place of the crlf
+ {
+ io.out(' ');
+ flag_history[0].in_hex=false; // not in the hex value anymore
+ }
+ else if(flag_history[0].in_single_quote || flag_history[0].in_double_quote)
+ io.out();
+
+ if(flag_history[0].in_line_comment && io.last_read[0]!='\\')
+ {
+ if(comments_only && (io.last_written[(LAST_WRITTEN_LENGTH-1)]!='\r' ||
+ io.last_written[(LAST_WRITTEN_LENGTH-1)]!='\n')) {
+ io.out();
+ io.commentOut(); //dmg print end of line also
+ }
+ flag_history[0].in_line_comment=false;
+ }
+
+ if(!flag_history[0].in_line_comment)
+ {
+ flag_history[0].in_hex=false; // we're not in a hex value anymore
+ flag_history[0].num_backslashes=0; // Null out number of backslashes
+ }
+ } else { // we are in a start ca
+ io.commentOut();
+#ifdef DUMPSPACES
+ io.out();
+#endif
+ }
+ break;
+
+ case ' ':
+ case '\t':
+ if(!flag_history[0].in_line_comment && !flag_history[0].in_star_comment)
+ {
+ if(flag_history[0].in_single_quote ||
+ flag_history[0].in_double_quote ||
+ comments_only) { // the only cases where we always output all of them
+ io.out();
+ }
+ else if( (flag_history[0].in_preprocessor || flag_history[0].in_hex) &&
+ ((io.last_written[(LAST_WRITTEN_LENGTH-1)]!='\n' && io.last_written[(LAST_WRITTEN_LENGTH-1)]!='\r') ||
+ (keep_preprocessor_whitespace && flag_history[0].in_preprocessor)))
+ {
+ /* preprocessors require atleast a single whitespace char preserved both in front and behind non-whitespace
+ characters. Hex values require a space afterwards. */
+ if((!is_whitespace(io.last_written[(LAST_WRITTEN_LENGTH-1)]) && !is_whitespace(io.buf[1]) &&
+ !(io.last_written[(LAST_WRITTEN_LENGTH-1)]=='#' && contain_preprocessor(io.buf)))
+ || keep_preprocessor_whitespace) {
+ io.out();
+ } else {
+ io.commentOut();
+ }
+
+ if(flag_history[0].in_hex) // get out of hex if we are in one
+ flag_history[0].in_hex=false;
+ }
+ else if((!is_whitespace(io.last_written[(LAST_WRITTEN_LENGTH-1)]) && (is_letter(io.last_written[(LAST_WRITTEN_LENGTH-1)])
+ || io.last_written[(LAST_WRITTEN_LENGTH-1)]=='_') || is_number(io.last_written[(LAST_WRITTEN_LENGTH-1)]))
+ &&
+ (!is_whitespace(io.buf[1]) && (is_letter(io.buf[1])
+ || io.buf[1]=='_') || is_number(io.buf[1])) )
+ io.out();
+ else if(!strncmp((io.buf+1),"...",3) || !strncmp((io.last_written+(LAST_WRITTEN_LENGTH-3)),"...",3))
+ io.out(); // need space before (and after) these if already there
+ else if(io.last_written[(LAST_WRITTEN_LENGTH-1)]=='/' && io.buf[1]=='*')
+ io.out(); // preserve whitespace so that if the file is mangled again it isn't construed as the start of a comment
+ else {
+ io.commentOut();
+ }
+ } else {
+ io.commentOut();
+ }
+ break;
+
+ default:
+ if(!flag_history[0].in_line_comment && !flag_history[0].in_star_comment)
+ {
+ // increase num_backslashes if this is a backslash, else set to 0
+ flag_history[0].num_backslashes=(io.buf[0]=='\\' ? (flag_history[0].num_backslashes+1) : 0);
+
+ // colons pre-separated by whitespace or a cr/lf still need separation (c++ specific)
+ if(io.last_written[(LAST_WRITTEN_LENGTH-1)]==':' &&
+ (is_whitespace(io.last_read[0]) || io.last_read[0]=='\r' || io.last_read[0]=='\n')
+ && io.buf[0]==':')
+ io.out(' ');
+
+ if(io.buf[0]=='\\' && (io.buf[1]=='\n' || io.buf[1]=='\r') && !comments_only && !leave_newline)
+ {
+ flag_history[x].num_backslashes=0;
+ io.in(); // skip over newline
+ }
+ else
+ {
+ // check to see if we are getting into a hex value as it needs a space after it
+ if((!flag_history[0].in_single_quote && !flag_history[0].in_double_quote) &&
+ io.buf[0]=='0' && (io.buf[1]=='x' || io.buf[1]=='X') && (is_letter(io.buf[2]) || is_number(io.buf[2])))
+ flag_history[0].in_hex=true;
+
+ io.out();
+ }
+
+ if(flag_history[0].in_hex && (!is_letter(io.buf[0]) && !is_number(io.buf[0])))
+ flag_history[0].in_hex=false;
+ } else {
+ io.commentOut();
+ }
+ break;
+ }
+
+ // Remember the flags of previous iterations so we may reference them
+ for(x=(FLAG_HISTORY_MAX-1); x; x--)
+ flag_history[x]=flag_history[(x-1)];
+ }
+
+ return true;
+}
+
+void dformat::version()
+{
+ cerr << NAME << " v" << VERSION
+#ifdef BETA
+ << "b"
+#endif
+ << " by Jon Newman, and adapted by Daniel M. German, based on Mangle" << endl;
+};