diff options
-rw-r--r-- | comments/CHANGES | 7 | ||||
-rw-r--r-- | comments/Makefile | 48 | ||||
-rw-r--r-- | comments/README | 13 | ||||
-rw-r--r-- | comments/comments.1 | 148 | ||||
-rw-r--r-- | comments/comments.tar.gz | bin | 38905 -> 0 bytes | |||
-rw-r--r-- | comments/dformat.cpp | 593 | ||||
-rw-r--r-- | comments/dformat.h | 64 | ||||
-rw-r--r-- | comments/func.cpp | 103 | ||||
-rw-r--r-- | comments/func.h | 27 | ||||
-rw-r--r-- | comments/io.cpp | 303 | ||||
-rw-r--r-- | comments/io.h | 66 | ||||
-rw-r--r-- | comments/main.cpp | 62 | ||||
-rw-r--r-- | comments/main.h | 42 |
13 files changed, 1473 insertions, 3 deletions
diff --git a/comments/CHANGES b/comments/CHANGES new file mode 100644 index 0000000..c87b288 --- /dev/null +++ b/comments/CHANGES @@ -0,0 +1,7 @@ +Version 1.1. + +* Limit number of comments output + +Version 1.0 + +* First release diff --git a/comments/Makefile b/comments/Makefile new file mode 100644 index 0000000..3c024a3 --- /dev/null +++ b/comments/Makefile @@ -0,0 +1,48 @@ +VERSION = 4 +NAME = comments +BETA = no + +DIR = $(NAME)-v$(VERSION) + +DEFINES = -DVERSION=\"$(VERSION)\" -DNAME=\"$(NAME)\" #-DBETA +DISTROFILES = Makefile README CHANGES main.cpp main.h io.cpp io.h dformat.cpp dformat.h func.cpp func.h $(NAME).1 +OPTIONS = -O2 -Wall -Wno-deprecated + +#DEBUG = -g -DDEBUG -DIODEBUG + + +all: + g++ $(DEBUG) $(OPTIONS) $(DEFINES) -c io.cpp + g++ $(DEBUG) $(OPTIONS) $(DEFINES) -c dformat.cpp + g++ $(DEBUG) $(OPTIONS) $(DEFINES) -c func.cpp + g++ $(DEBUG) $(OPTIONS) $(DEFINES) -c main.cpp + g++ -o $(NAME) main.o io.o dformat.o func.o + +install: + @echo -n "Installing binary: " + @cp $(NAME) /usr/local/bin + @chmod 755 /usr/local/bin/$(NAME) + @echo "Done." + @echo -n "Installing man page: " + @cp $(NAME).1 /usr/local/man/man1 + @echo -e "Done.\n" + @echo -n `$(NAME)` + @echo -e "\nPlease read the README if you have not already done so." + +distro: + rm -rf $(DIR)*.tar.gz $(NAME) $(DIR) + @if [ "$(BETA)" = "yes" ]; then \ + mkdir $(DIR)b; \ + cp $(DISTROFILES) $(DIR)b; \ + tar -cvzf $(DIR)b.tar.gz $(DIR)b; \ + rm -rf $(DIR)b; \ + else \ + mkdir $(DIR); \ + cp $(DISTROFILES) $(DIR); \ + tar -cvzf $(DIR).tar.gz $(DIR); \ + rm -rf $(DIR); \ + fi + +clean: + rm -f *.o comments + diff --git a/comments/README b/comments/README index 7a0b689..ef5ff68 100644 --- a/comments/README +++ b/comments/README @@ -1,9 +1,16 @@ -In order to use Ninka you must install the comments extractor found in this directory. +In order to use Ninka you must install the comments extractor found +in this directory. If Ninka does not find it in the executable path, it will use the -first 400 lines from source code, and is likely to reduce the precision of Ninka. - +first 400 lines from source code, and is likely to reduce the precision +of Ninka. +Code and Comments separator version 1.1 written by Daniel M. German +(dmg@uvic.ca) based on Mangle v3.0.7 by Jon Newman <mangle@biz0r.biz> +http://biz0r.biz. Unfortunately the original URL does not exist any +more. +This program is distributed under the GNU General Public License +Version 2 or (at your option) any later version. diff --git a/comments/comments.1 b/comments/comments.1 new file mode 100644 index 0000000..3b78686 --- /dev/null +++ b/comments/comments.1 @@ -0,0 +1,148 @@ +.TH comments 1 +.SH NAME +comments - comments is an open source, C/C++ de-commentor. It will +remove all comments and output them to a different file. + +.SH SYNOPSIS +comments <options> [file1] <options> <file2> <etc> + -r Don't remove CR/LF + -c Only remove comments + -o Output to stdout code and stderr comments + -n Append newline to source + -x Null out options + -d Keep spaces in preprocessor entries + -l Do no mangling + -m Place markers where code or comments are + -t Print summary in tab delimited form + -C Print summary in comma delimited form + -v Print version +.SH DESCRIPTION +.B Comments +is a programming utility used to strip all comments and/or formatting from C/C++ code. It has a variety of command line +options that make it a powerful utility. +.SH NOTES +.B Options + +Enabling options in comments is simple and straightforward. Here is an +example of how to just remove the comments from a source: + +.B ~# comments -c foo.cpp + +More advanced features are like the ability to change the options on each +file, here is an example of how to remove the comments from one source, +output to stdout, but remove everything from the second source. + +.B ~# comments -co foo.cpp -x bar.cpp + +The '-x' option makes comments remove all options. Here is an example similar +to the one above, but it instead leaves the CR/LF's in the second source. + +.B ~# comments -co foo.cpp -xr bar.cpp + +As you can see, the '-x' option was used first to remove all current +options, and then the '-r' option was appended to make comments keep the +CR/LF's. + +If you don't specify any options to a file, it will use the previous files +options. + +.B ~# comments -co foo.cpp bar.cpp + +That command will use the '-co' options for both foo.cpp and bar.cpp + +.SH OPTIONS + +.TP +.B -r Don't remove CR/LF + +With this option enabled, and no others, it will remove everything but line feeds. + +.TP +.B -c Only remove comments + +With this option enabled, and no others, it will remove only comments from the source. + +.TP +.B -o Output to stdout + +This option specifies the output to be printed to STDOUT. This is used primarily if you wish to redirect the data. + +.TP +.B -n Append newline to source + +This option will make comments append a newline to the end of the source file. This is used by some people using C source code because some C compilers complan about not having a newline at the end of the source. + +.TP +.B -x Null out options + +This option NULLs out all previous options so that other may be specified. + +.TP +.B -d Keep spaces in preprocessor entries + +This option makes comments not do any reformatting of preprocessor entries. + +.TP +.B -l Do no mangling + +This option tells comments not to comments the input at all. This option was only included for completeness. + +.TP +.B -m Place markers in output + +This option tells comments to place CODE and /****/ where code or +comments are supposed to be in their corresponding outputs + +.TP +.B -t Print summary in tab delimited form +.TP +.B -C Print summary in comma delimited form + +This option makes comments print the summary information in tabular form for easier input into other programs. + +Example output: + +root@biz:~/comments# comments -t dformat.h +.RS +1 dformat.h dformat.h.commentsd 1147 565 50.7411 + +Explanation of each column: +.RS +.RE +1: The number of the file in order (1, 2, 3, etc). +.RS +.RE +2: Name of the input file. +.RS +.RE +3: Name of the output file. +.RS +.RE +4: Original size of the input (bytes). +.RS +.RE +5: Output size (bytes). +.RS +.RE +6: Percentage of reduction calculated by (100(output_bytes/input_bytes)) +.RS +.RE + +.I NOTE: +If the '-o' option (output to stdout) option is given with '-t', the third column will be NULL. +.RS +.RE +root@biz:~/comments# comments -to main.cpp >output_file +.RS +.RE +1 main.cpp 844 368 56.3981 +.RE + +.TP +.B -v Print version + +This option causes comments to print the version info and exit immediately. + +.SH BUGS +None known. Email bug reports to +.B biz@biz0r.biz diff --git a/comments/comments.tar.gz b/comments/comments.tar.gz Binary files differdeleted file mode 100644 index 0921713..0000000 --- a/comments/comments.tar.gz +++ /dev/null diff --git a/comments/dformat.cpp b/comments/dformat.cpp new file mode 100644 index 0000000..3dbef34 --- /dev/null +++ b/comments/dformat.cpp @@ -0,0 +1,593 @@ +/* +** This program is free software; you can redistribute it and/or modify +** it under the terms of the GNU General Public License as published by +** the Free Software Foundation; either version 2 of the License, or +** (at your option) any later version. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License +** along with this program; if not, write to the Free Software +** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +* + * mangle.cpp - Removes ALL comments and/or formatting from C/C++ code while + * keeping what is needed so that the program still operates + * the same exact way as before the conversion. + * + */ + +#include "main.h" +#define CLASS_ERROR_PRE "dformat" + +dformat::dformat(int a, char** av) +{ + ready=true; // We're ok unless otherwise changed + current_arg=1; // Start at argument 1 + current_file=0; + argc=a; argv=av; + io.regionsCount = 0; + io.doneOutput = 0; + + if(!load_arguments("-x")) cerr << "Programmer goofed, you should not see this. Error clearing out arguments." << endl; + test_args=true; // Lets make sure the arg syntax is good first + while(next()) ; + +#ifdef DEBUG + cerr << CLASS_ERROR_PRE << "::dformat() argument syntax ok." << endl; +#endif + + current_arg=1; + io.done(test_args); + test_args=false; + // Clear out the settings + if(!load_arguments("-x")) cerr << "Programmer goofed, you should not see this. Error clearing out arguments." << endl; +}; + +dformat::~dformat() {}; + +bool dformat::next() +{ + char temp[FILE_NAME_LENGTH]={"\0"}; + char temp2[FILE_NAME_LENGTH]={"\0"}; + + if(!ready) // Can't work if I'm not ready. + return false; + +#ifdef DEBUG + if(test_args) + cerr << CLASS_ERROR_PRE << "::next() testing argument [" + << argv[current_arg] << "]" << endl; +#endif + + if(current_arg<argc) + { + if(argv[current_arg][0]=='-') // we have args waiting + { + if((current_arg+1)<argc || argv[current_arg][1]=='v') + { + // load args and move to next + if(!load_arguments(argv[current_arg++])) + { + usage(); + return false; + } + } + else + { +#ifdef DEBUG + cerr << CLASS_ERROR_PRE << "::next() Argument included without a file." << endl; +#endif + ready=false; + usage(); + return false; + } + } + + io.done(test_args); // Finish it off if needed. + + strcpy(temp,argv[current_arg]); + strcat(temp,DEFAULT_MANGLED_POSTFIX); + strcpy(temp2,argv[current_arg]); + strcat(temp2,DEFAULT_COMMENTS_POSTFIX); + io.init(argv[current_arg],temp, temp2); + + if(!io.ok()) + { +#ifdef DEBUG + cerr << CLASS_ERROR_PRE << "::next() io object not ok." << endl; +#endif + ready=false; + return false; + } + current_arg++; // Done messing with this one, move to next + } + else if(argc==1) // tisk tisk...you need atleast 2 arguments + { + usage(); + ready=false; + return false; + } + else + return false; + + if(!test_args) + current_file++; + + return true; // all is good +}; + +void dformat::done() +{ + if(append_newline) + io.out('\n'); + + if(!tabular_delimited_result && !comma_delimited_result) + { + if (!io.output_to_stdout) { + cerr << "[" << current_file << "] \"" << io.i_name << "\" (" << io.input_bytes() << "b) "; + + cerr << ">> \"" << io.o_name << "\" (" << io.output_bytes() << "b) (" << (100.0-(100.0*(io.output_bytes()/io.input_bytes()))) << "% reduced)"; + cerr << endl; + } + } + else if(tabular_delimited_result) // print in tabular form + cerr << current_file + << "\t" << io.i_name + << "\t" << io.o_name + << "\t" << io.input_bytes() + << "\t" << io.output_bytes() + << "\t" << (100.0-(100.0*(io.output_bytes()/io.input_bytes()))) + << endl; + else if(comma_delimited_result) + cerr << current_file + << "," << io.i_name + << "," << io.o_name + << "," << io.input_bytes() + << "," << io.output_bytes() + << "," << (100.0-(100.0*(io.output_bytes()/io.input_bytes()))) + << endl; + + io.done(); +}; + +void dformat::usage() +{ + cerr << "Usage: " << NAME << " <options> [file1] <options> <file2> <etc>" + << endl << " -r Leave CR/LF's" + << endl << " -o output to STDOUT" + << endl << " -n Append newline" + << endl << " -x Use default options (nulls all previous ones)" + << endl << " -d Leave in preprocessor whitespace" + // << endl << " -w Write over original" +// << endl << " -i input from STDIN" + << endl << " -l Do no mangling" + << endl << " -m Do not add markers to output" + << endl << " -t Print summary in tab delimited form" + << endl << " -C Print summary in comma delimited form" + << endl << " -v Print version" + << endl << " -c<Number> Number of comment regions" + << endl; +} + +bool dformat::ok() +{ + return (ready ? true : false); +}; + +bool dformat::load_arguments(char* str) +{ + if(strlen(str)==0) + return false; + + for(int x=1; x<(signed)strlen(str); x++) + { + switch(str[x]) + { + case 'x': + io.input_from_stdin=false; + io.output_to_stdout=false; + io.addMarkers=true; + io.regionsCount = 0; + append_newline=false; + comments_only=true; + keep_preprocessor_whitespace=false; + tabular_delimited_result=false; + comma_delimited_result=false; + leave_newline=false; + no_modify=false; + break; + +#ifdef asdfasd + case 'w': + io.write_over_original=true; + break; + +#endif + case 'm': + io.addMarkers=false; + break; + + case 'n': + append_newline=true; + break; + case 'c': + // next token should be an integer.. + { + int i=1; + char temp[256]; + while ((x+i < strlen(str)) && + str[x+i] >= '0' && + str[x+i] <= '9' && + x+i < 256 + ) { + temp[i-1] = str[x+i]; + i++; + } + temp[i-1] =0; + if (i == 1 || i > 255) { + cerr << "Illegal number of comment regions for -c option" << i + << endl; + exit(1); + } + io.regionsCount = atoi(temp); + // cerr << "Number of regions [" << io.regionsCount << "]" << endl; + x+=i-1; + } + break; + + case 'r': + leave_newline=true; + break; + + case 'd': + keep_preprocessor_whitespace=true; + break; + + case 't': + tabular_delimited_result=true; + break; + + case 'C': + comma_delimited_result=true; + break; + + case 'v': + version(); + exit(0); + break; + + case 'l': + no_modify=true; + break; + + case 'o': + io.output_to_stdout=true; + break; + +// case 'i': +// io.input_from_stdin=true; +// break; + + default: // Unknown option + usage(); + return false; + break; + } + } + return true; +}; + +// And now...the meat and potatos +bool dformat::format() +{ + int x=0; + char c='\0'; + bool tbool=false; + +#ifdef DEBUG + cerr << CLASS_ERROR_PRE << "::format() Now formatting [" << io.i_name << "]" + << endl; +#endif + + // Reset the variables + for(x=0; x<FLAG_HISTORY_MAX; x++) + { + flag_history[x].in_single_quote=false; + flag_history[x].in_double_quote=false; + flag_history[x].in_line_comment=false; + flag_history[x].in_star_comment=false; + flag_history[x].in_preprocessor=false; + flag_history[x].in_hex=false; + flag_history[x].num_backslashes=0; + } + + // keep grabbing data as long as its there + while(io.in() && io.data_waiting()) + { + if (io.doneOutput) { + break; + } + if(no_modify) + { + io.out(); + continue; + } + + switch(io.buf[0]) + { + case '\'': + case '\"': + if(!flag_history[0].in_line_comment && !flag_history[0].in_star_comment) + { + if(!flag_history[0].in_single_quote && !flag_history[0].in_double_quote) + { + if(io.buf[0]=='\'') + flag_history[0].in_single_quote=true; + else if(io.buf[0]=='\"') + flag_history[0].in_double_quote=true; + } + else if(flag_history[0].in_single_quote || flag_history[0].in_double_quote) + { + if((flag_history[0].num_backslashes%2)) ; // just an escaped quote, reset number of backslashes + else + { + if(io.buf[0]=='\'' && flag_history[0].in_single_quote) + flag_history[0].in_single_quote=false; + if(io.buf[0]=='\"' && flag_history[0].in_double_quote) + flag_history[0].in_double_quote=false; + } + } + io.out(); + + flag_history[0].num_backslashes=0; // null out number of backslashes + flag_history[0].in_hex=false; // we're not in a hex value anymore + } else { + io.commentOut(); //dmg + } + + break; + + case '/': + if(!flag_history[0].in_single_quote && !flag_history[0].in_double_quote) + { + if(io.buf[1]=='/' && !flag_history[0].in_line_comment && !flag_history[0].in_star_comment) { + flag_history[0].in_line_comment=true; + io.commentOut(); //dmg + } + else if(io.buf[1]=='*' && !flag_history[0].in_star_comment) { + flag_history[0].in_star_comment=true; + io.commentOut(); //dmg + } + else if(!flag_history[0].in_line_comment && !flag_history[0].in_star_comment) + io.out(); + else + io.commentOut(); //dmg + } + else + io.out(); + + if(!flag_history[0].in_line_comment && !flag_history[0].in_star_comment) + { + flag_history[0].in_hex=false; // we're not in a hex value anymore + flag_history[0].num_backslashes=0; // Null out number of backslashes + } + break; + + case '*': + if(!flag_history[0].in_single_quote && !flag_history[0].in_double_quote) + { + if(io.buf[1]=='/' && flag_history[0].in_star_comment && + flag_history[2].in_star_comment) + { + io.commentOut(); + /* We need to write a \n after the comment... otherwise it gets very, very messy */ + + flag_history[0].in_star_comment=false; + io.in(); // Jump ahead one, we dont want the '/' used + io.commentOut(); + io.commentOut('\n'); + continue; + } else if(!flag_history[0].in_star_comment && !flag_history[0].in_line_comment) { + io.out(); + } else { + io.commentOut(); + } + } + else + io.out(); + + if(!flag_history[0].in_line_comment && !flag_history[0].in_star_comment) + { + flag_history[0].in_hex=false; // we're not in a hex value anymore + flag_history[0].num_backslashes=0; // Null out number of backslashes + } + break; + + case '#': + if(!flag_history[0].in_line_comment && !flag_history[0].in_star_comment) + { + if(!flag_history[0].in_single_quote && !flag_history[0].in_double_quote) + { + tbool=contain_preprocessor((io.buf+1)); + // Make sure we have the required carriage return before the preprocessor (if it really is a preprocessor) + if(io.last_written[(LAST_WRITTEN_LENGTH-1)] && + (c=last_non_whitespace(io.last_written,(LAST_WRITTEN_LENGTH-1)))!='\r' && c!='\n' && + tbool) + io.out('\n'); + + if(tbool) + flag_history[0].in_preprocessor=true; + } + io.out(); + flag_history[0].num_backslashes=0; // null out number of backslashes + flag_history[0].in_hex=false; // we're not in a hex value anymore + } else { + io.commentOut(); + } + break; + + case '\n': + case '\r': + if(!flag_history[0].in_star_comment) + { + if((((is_letter(io.last_written[(LAST_WRITTEN_LENGTH-1)]) || is_number(io.last_written[(LAST_WRITTEN_LENGTH-1)]) || + io.last_written[(LAST_WRITTEN_LENGTH-1)]=='_') && + (is_letter(io.buf[1]) || is_number(io.buf[1]) || io.buf[1]=='_')) + || flag_history[0].in_preprocessor || io.buf[1]=='#') && !flag_history[0].in_single_quote && + !flag_history[0].in_double_quote && !comments_only) + { + if(flag_history[0].in_preprocessor) + { + if(io.last_written[(LAST_WRITTEN_LENGTH-1)]!='\\') // make sure its not multi-line + { + flag_history[0].in_preprocessor=false; + io.out(); + } + else if(io.last_written[(LAST_WRITTEN_LENGTH-1)]!=' ' && io.last_written[(LAST_WRITTEN_LENGTH-1)]!='\t' && + !leave_newline) + io.out(' '); // need atleast one space inbetween preprocessor items + else + io.out(); + } + else if(io.buf[1]=='#') + { + if(io.last_written[(LAST_WRITTEN_LENGTH-1)]!='\0' && (io.last_written[(LAST_WRITTEN_LENGTH-1)]!='\r' && + io.last_written[(LAST_WRITTEN_LENGTH-1)]!='\n')) + io.out(); + } + else + io.out(' '); + } + else if(comments_only && !flag_history[0].in_line_comment) + io.out(); + else if(leave_newline && !flag_history[0].in_line_comment) + io.out(); + else if(flag_history[0].in_hex) // hex values need a space after them, so put a space in place of the crlf + { + io.out(' '); + flag_history[0].in_hex=false; // not in the hex value anymore + } + else if(flag_history[0].in_single_quote || flag_history[0].in_double_quote) + io.out(); + + if(flag_history[0].in_line_comment && io.last_read[0]!='\\') + { + if(comments_only && (io.last_written[(LAST_WRITTEN_LENGTH-1)]!='\r' || + io.last_written[(LAST_WRITTEN_LENGTH-1)]!='\n')) { + io.out(); + io.commentOut(); //dmg print end of line also + } + flag_history[0].in_line_comment=false; + } + + if(!flag_history[0].in_line_comment) + { + flag_history[0].in_hex=false; // we're not in a hex value anymore + flag_history[0].num_backslashes=0; // Null out number of backslashes + } + } else { // we are in a start ca + io.commentOut(); +#ifdef DUMPSPACES + io.out(); +#endif + } + break; + + case ' ': + case '\t': + if(!flag_history[0].in_line_comment && !flag_history[0].in_star_comment) + { + if(flag_history[0].in_single_quote || + flag_history[0].in_double_quote || + comments_only) { // the only cases where we always output all of them + io.out(); + } + else if( (flag_history[0].in_preprocessor || flag_history[0].in_hex) && + ((io.last_written[(LAST_WRITTEN_LENGTH-1)]!='\n' && io.last_written[(LAST_WRITTEN_LENGTH-1)]!='\r') || + (keep_preprocessor_whitespace && flag_history[0].in_preprocessor))) + { + /* preprocessors require atleast a single whitespace char preserved both in front and behind non-whitespace + characters. Hex values require a space afterwards. */ + if((!is_whitespace(io.last_written[(LAST_WRITTEN_LENGTH-1)]) && !is_whitespace(io.buf[1]) && + !(io.last_written[(LAST_WRITTEN_LENGTH-1)]=='#' && contain_preprocessor(io.buf))) + || keep_preprocessor_whitespace) { + io.out(); + } else { + io.commentOut(); + } + + if(flag_history[0].in_hex) // get out of hex if we are in one + flag_history[0].in_hex=false; + } + else if((!is_whitespace(io.last_written[(LAST_WRITTEN_LENGTH-1)]) && (is_letter(io.last_written[(LAST_WRITTEN_LENGTH-1)]) + || io.last_written[(LAST_WRITTEN_LENGTH-1)]=='_') || is_number(io.last_written[(LAST_WRITTEN_LENGTH-1)])) + && + (!is_whitespace(io.buf[1]) && (is_letter(io.buf[1]) + || io.buf[1]=='_') || is_number(io.buf[1])) ) + io.out(); + else if(!strncmp((io.buf+1),"...",3) || !strncmp((io.last_written+(LAST_WRITTEN_LENGTH-3)),"...",3)) + io.out(); // need space before (and after) these if already there + else if(io.last_written[(LAST_WRITTEN_LENGTH-1)]=='/' && io.buf[1]=='*') + io.out(); // preserve whitespace so that if the file is mangled again it isn't construed as the start of a comment + else { + io.commentOut(); + } + } else { + io.commentOut(); + } + break; + + default: + if(!flag_history[0].in_line_comment && !flag_history[0].in_star_comment) + { + // increase num_backslashes if this is a backslash, else set to 0 + flag_history[0].num_backslashes=(io.buf[0]=='\\' ? (flag_history[0].num_backslashes+1) : 0); + + // colons pre-separated by whitespace or a cr/lf still need separation (c++ specific) + if(io.last_written[(LAST_WRITTEN_LENGTH-1)]==':' && + (is_whitespace(io.last_read[0]) || io.last_read[0]=='\r' || io.last_read[0]=='\n') + && io.buf[0]==':') + io.out(' '); + + if(io.buf[0]=='\\' && (io.buf[1]=='\n' || io.buf[1]=='\r') && !comments_only && !leave_newline) + { + flag_history[x].num_backslashes=0; + io.in(); // skip over newline + } + else + { + // check to see if we are getting into a hex value as it needs a space after it + if((!flag_history[0].in_single_quote && !flag_history[0].in_double_quote) && + io.buf[0]=='0' && (io.buf[1]=='x' || io.buf[1]=='X') && (is_letter(io.buf[2]) || is_number(io.buf[2]))) + flag_history[0].in_hex=true; + + io.out(); + } + + if(flag_history[0].in_hex && (!is_letter(io.buf[0]) && !is_number(io.buf[0]))) + flag_history[0].in_hex=false; + } else { + io.commentOut(); + } + break; + } + + // Remember the flags of previous iterations so we may reference them + for(x=(FLAG_HISTORY_MAX-1); x; x--) + flag_history[x]=flag_history[(x-1)]; + } + + return true; +} + +void dformat::version() +{ + cerr << NAME << " v" << VERSION +#ifdef BETA + << "b" +#endif + << " by Jon Newman, and adapted by Daniel M. German, based on Mangle" << endl; +}; diff --git a/comments/dformat.h b/comments/dformat.h new file mode 100644 index 0000000..a1ae714 --- /dev/null +++ b/comments/dformat.h @@ -0,0 +1,64 @@ +/* +** This program is free software; you can redistribute it and/or modify +** it under the terms of the GNU General Public License as published by +** the Free Software Foundation; either version 2 of the License, or +** (at your option) any later version. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License +** along with this program; if not, write to the Free Software +** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + + +/* + * mangle.cpp - Removes ALL comments and/or formatting from C/C++ code while + * keeping what is needed so that the program still operates + * the same exact way as before the conversion. + * + * This program has been vigourously tested, if you find any logic errors + * where something should have been taken out that wasn't, please email me + * - mangle@biz0r.biz + * + */ + +#ifndef DFORMAT_H +#define DFORMAT_H +#include "main.h" + +class dformat +{ + private: + ::io io; + int current_arg, current_file, argc; + char** argv; + bool ready, test_args, tabular_delimited_result, comma_delimited_result; + // boolean variables used in the deformatting process + bool append_newline, leave_newline, comments_only, + keep_preprocessor_whitespace, no_modify; + struct fhist { + bool in_line_comment, in_star_comment, in_single_quote, + in_double_quote, in_preprocessor, in_hex; + int num_backslashes; + } flag_history[FLAG_HISTORY_MAX]; + + // Private functions + bool load_arguments(char* str); + void usage(); + void version(); + + public: + + dformat(int argc, char** argv); + ~dformat(); + bool next(); + void done(); + bool ok(); + bool format(); +}; + +#endif diff --git a/comments/func.cpp b/comments/func.cpp new file mode 100644 index 0000000..ab05c78 --- /dev/null +++ b/comments/func.cpp @@ -0,0 +1,103 @@ +/* +** This program is free software; you can redistribute it and/or modify +** it under the terms of the GNU General Public License as published by +** the Free Software Foundation; either version 2 of the License, or +** (at your option) any later version. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License +** along with this program; if not, write to the Free Software +** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "main.h" + +#define NUM_PREPROCESSOR_STR 14 +char *preprocessors[]={"define", + "include", + "undef", + "pragma", + "if", + "error", + "warning", + "else", + "elseif", + "elif", + "endif", + "ifdef", + "ifndef", + "ifdefine"}; + +bool is_letter(char ch) +{ + if((ch>='a' && ch<='z') || (ch>='A' && ch<='Z')) + return true; + else + return false; +} + +bool is_number(char ch, char next) +{ + if((ch>='0' && ch<='9') || (ch=='-' && is_number(next))) + return true; + else + return false; +} + +bool move(char* from, char* to) +{ + if(rename(from,to)) + return false; + else + return true; +} + +bool is_whitespace(char c) +{ + return ((c==' ' || c=='\t') ? true : false); +} + +bool contain_preprocessor(char* str) +{ + int x, y; + bool ret=false; + + for(x=0; x<((signed)strlen(str)-DEFINE_SEARCH_PRECISION); x++) + { + for(y=0; y<NUM_PREPROCESSOR_STR; y++) + { + if(!strncasecmp((str+x),preprocessors[y],((strlen(str)-x)>strlen(preprocessors[y]) ? strlen(preprocessors[y]) : (strlen(str)-x)))) + return true; // identified as a preprocessor, return true + } + if(!ret && !is_whitespace(*(str+x))) // preprocessor not found, and current char is not whitespace so this isnt a preprocessor + return false; + } + + return ret; +} + +char last_non_whitespace(char* string, int start) +{ + int x; + if(start>0) // start from inside array and work backwards + { + for(x=start; x; x--) + { + if(!is_whitespace(string[x])) + return string[x]; + } + } + else // start from beginning and work forwards + { + for(x=0; x<(signed)strlen(string); x++) + { + if(!is_whitespace(string[x])) + return string[x]; + } + } + return 'x'; +} diff --git a/comments/func.h b/comments/func.h new file mode 100644 index 0000000..cb7e8d5 --- /dev/null +++ b/comments/func.h @@ -0,0 +1,27 @@ +/* +** This program is free software; you can redistribute it and/or modify +** it under the terms of the GNU General Public License as published by +** the Free Software Foundation; either version 2 of the License, or +** (at your option) any later version. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License +** along with this program; if not, write to the Free Software +** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + + +#ifndef FUNC_H + +bool is_letter(char); +bool is_number(char, char='a'); +bool move(char*, char*); +bool is_whitespace(char); +bool contain_preprocessor(char*); +char last_non_whitespace(char*,int=-1); + +#endif diff --git a/comments/io.cpp b/comments/io.cpp new file mode 100644 index 0000000..6f89340 --- /dev/null +++ b/comments/io.cpp @@ -0,0 +1,303 @@ +/* +** This program is free software; you can redistribute it and/or modify +** it under the terms of the GNU General Public License as published by +** the Free Software Foundation; either version 2 of the License, or +** (at your option) any later version. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License +** along with this program; if not, write to the Free Software +** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + + +#include "main.h" +#define CLASS_ERROR_PRE "io" + +io::io(char* in, char* out, char *comments, bool testing) +{ + file_open=false; + first_init=true; + init(in,out,comments, testing); +}; + +io::io() +{ + file_open=false; + first_init=true; + ready=false; // We didn't init a file, so we're not ready +}; + +io::~io() // Done, just take care of the files +{ + i.close(); + o.close(); + co.close(); +}; + +// Initialize variables and set everything up for a new file +void io::init(char* in, char* out, char * comments, bool testing) +{ + if(file_open) // dumbo didn't call done() + done(testing); + + ready=true; // we are ready unless otherwise changed + + // (re)init variables + memset(buf,'\0',BUF_LENGTH); + memset(last_written,'\0',LAST_WRITTEN_LENGTH); + memset(last_read,'\0',LAST_READ_LENGTH); + memset(o_name,'\0',FILE_NAME_LENGTH); + memset(i_name,'\0',FILE_NAME_LENGTH); + memset(c_name,'\0',FILE_NAME_LENGTH); + icounter=0.0; ocounter=0.0; + buf_count=0; // reset # of bytes in buffer + if(first_init) // only on first init set global_counter to 0.0 + { + first_init=false; + iglobal_counter=0.0; + oglobal_counter=0.0; + addMarkers = 0; + regionsCount = 0; + } + outputRegions = 0; + input_line=0; input_column=0; + inComment = -1; + doneOutput = 0; + inCode = -1; + output_line=0; output_column=0; + +// removed because of strange bug causing input_from_stdin to always set to TRUE +// if(!input_from_stdin) +// { + i.open(in/*, ios::nocreate*/); + if(!i) + { + ready=false; + cerr << "Could not open (input) [" << in << "]" << endl; + } +#ifdef DEBUG + else + cerr << CLASS_ERROR_PRE << "::init() Opened (input) [" + << in << "]" << endl; +#endif +// } + + if(!output_to_stdout) + { + o.open(out); + if(!o) + { + ready=false; + cerr << CLASS_ERROR_PRE << "::init() Could not open (output) \"" + << out << "\"" << endl; + } + co.open(comments); + if(!co) + { + ready=false; + cerr << CLASS_ERROR_PRE << "::init() Could not open (comments) \"" + << comments << "\"" << endl; + } + + } + else + output_to_stdout=true; + + strcpy(i_name,in); + if(output_to_stdout){ + strcpy(o_name,""); + strcpy(c_name,""); + } else { + strcpy(c_name,comments); + strcpy(o_name,out); + } + + file_open=true; +}; + +void io::done(bool testing) +{ + if(!file_open) // your calling me without a open file? + return; + + // close the files +// removed next if because of strange bug causing input_from_stdin to be set when the code designates otherwise +// if(!input_from_stdin) + i.close(); + if(!output_to_stdout) { + o.close(); + co.close(); + } + file_open=false; +} + +int io::get_input_line() +{ + return input_line; +} + +int io::get_input_column() +{ + return input_column; +} + +int io::get_output_line() +{ + return output_line; +} + +int io::get_output_column() +{ + return output_column; +} + +// Get data +int io::in() +{ + memmove((last_read+1),last_read,(LAST_READ_LENGTH-1)); + last_read[0]=buf[0]; + memmove(buf,(buf+1),(BUF_LENGTH-1)); + + i.get(buf[(BUF_LENGTH-1)]); // get the next char + if(i.eof()) // EOF found, cancel that last read + { + buf[(BUF_LENGTH-1)]='\0'; + if(buf[0] || buf_count==1) // if there is data at the front, then we erased some + buf_count--; + } + else + buf_count+=(buf_count<BUF_LENGTH ? (buf_count ? 1 : 2) : 0); + +#ifdef IODEBUG + if(!i.eof()) + cout << i_name << " >> \"" << buf[(BUF_LENGTH-1)] << "\"" << endl; + else + cout << CLASS_ERROR_PRE << "::in() " << i_name << " [EOF] " << buf[(BUF_LENGTH-1)] << endl; +#endif + + if(buf[0]=='\n' || buf[0]=='\r') + { + input_column=0; + input_line++; + } + else + input_column++; + + if(buf[(BUF_LENGTH-1)]!='\0') + { + icounter++; + iglobal_counter++; + } + + return 1; +}; + +// see if we still have data in the buffer +bool io::data_waiting() +{ + return (buf_count ? true : false); +}; + + +// Output data +void io::out(char c) +{ + if(!c) // replace '\0' with the value of buf[0] + c=buf[0]; + if(c) // Make sure we have something to spit + { + if (inCode == 0 && addMarkers) { + if(output_to_stdout) + cout << "/****/"; + else + o << "/****/"; + } + inCode = 1; + // only reset code marker if not a space... + if (c != ' ' && c!= '\t' &&c!= '\n' && c!= '\r') + inComment = 0; + if(output_to_stdout) + cout << c; + else + o << c; + + // keep track of sizes + ocounter++; // this file + oglobal_counter++; // all the files + } + +#ifdef IODEBUG + cout << o_name << " << \"" << c << "\"" << endl; +#endif + + if(c=='\n' || c=='\r') // new line, return column to 0 + { + output_line++; + output_column=0; + } + else + output_column++; + + memmove(last_written,(last_written+1),(LAST_WRITTEN_LENGTH-1)); + last_written[(LAST_WRITTEN_LENGTH-1)]=c; +}; + +// Output data +void io::commentOut(char c) +{ + if (inComment == 0) { + if (regionsCount > 0 && outputRegions+1 == regionsCount) { + doneOutput = 1; + return; + } + if (addMarkers) { + if(output_to_stdout) + cerr << "\nCODE\n "; + else + co << "\nCODE\n"; + } + outputRegions++; + } + + inComment = 1; + inCode = 0; + if(!c) // replace '\0' with the value of buf[0] + c=buf[0]; + if(output_to_stdout) + cerr << c; + else + co << c; + +}; + + + +bool io::ok() +{ + return (ready ? true : false); +}; + +double io::input_bytes() +{ + return icounter; +}; + +double io::output_bytes() +{ + return ocounter; +}; + +double io::global_input_bytes() +{ + return iglobal_counter; +}; + +double io::global_output_bytes() +{ + return oglobal_counter; +}; diff --git a/comments/io.h b/comments/io.h new file mode 100644 index 0000000..d7e4be0 --- /dev/null +++ b/comments/io.h @@ -0,0 +1,66 @@ +/* +** This program is free software; you can redistribute it and/or modify +** it under the terms of the GNU General Public License as published by +** the Free Software Foundation; either version 2 of the License, or +** (at your option) any later version. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License +** along with this program; if not, write to the Free Software +** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#ifndef IO_H +#define IO_H + +#include "main.h" + +class io +{ + private: + int input_line, input_column, output_line, output_column, buf_count; + int inComment; + int inCode; + int outputRegions; + double icounter, iglobal_counter, ocounter, oglobal_counter; + bool ready; + ifstream i; + ofstream o; + ofstream co; + + public: + char i_name[FILE_NAME_LENGTH], o_name[FILE_NAME_LENGTH],c_name[FILE_NAME_LENGTH]; + char buf[BUF_LENGTH], last_written[LAST_WRITTEN_LENGTH], + last_read[LAST_READ_LENGTH]; + // io source/destination modifications + bool input_from_stdin, output_to_stdout, + first_init, file_open; + int addMarkers; + int regionsCount; + int doneOutput; + + io(char* in, char* out, char *comments, bool testing=false); + io(); + ~io(); + void init(char* in, char* out, char *comments, bool testing=false); + void done(bool testing=false); + int get_input_line(); + int get_input_column(); + int get_output_line(); + int get_output_column(); + int in(); + bool data_waiting(); + double input_bytes(); + double output_bytes(); + double global_input_bytes(); + double global_output_bytes(); + void out(char c='\0'); + void commentOut(char c='\0'); + bool ok(); +}; + +#endif diff --git a/comments/main.cpp b/comments/main.cpp new file mode 100644 index 0000000..50035c4 --- /dev/null +++ b/comments/main.cpp @@ -0,0 +1,62 @@ +/* +** This program is free software; you can redistribute it and/or modify +** it under the terms of the GNU General Public License as published by +** the Free Software Foundation; either version 2 of the License, or +** (at your option) any later version. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License +** along with this program; if not, write to the Free Software +** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + + +#include "main.h" +#include "dformat.h" +#include <sys/time.h> +#include <sys/resource.h> + +#define MSG_PRE "main()" + +int main(int argc, char** argv) +{ + dformat dformat(argc,argv); + struct rlimit Limit; + + Limit.rlim_cur = 10; + Limit.rlim_max = 10; + if (setrlimit(RLIMIT_CPU, &Limit) == -1) { + perror("eror"); + exit(1); + } + /* + getrlimit(RLIMIT_CPU, &Limit); + cerr << Limit.rlim_cur << "\n"; + cerr << Limit.rlim_max << "\n"; + */ + if(!dformat.ok()) + { +#ifdef DEBUG + cerr << "main() - dformat not ok." << endl; +#endif + return 1; + } + + while(dformat.next()) + { + dformat.format(); + dformat.done(); + } + + if(!dformat.ok()) + { + cerr << MSG_PRE << " Errors occured while trying to complete requests." + << endl; + } + + return 0; +} diff --git a/comments/main.h b/comments/main.h new file mode 100644 index 0000000..a88e465 --- /dev/null +++ b/comments/main.h @@ -0,0 +1,42 @@ +/* +** This program is free software; you can redistribute it and/or modify +** it under the terms of the GNU General Public License as published by +** the Free Software Foundation; either version 2 of the License, or +** (at your option) any later version. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License +** along with this program; if not, write to the Free Software +** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + + +#ifndef MAIN_H +using namespace std; +#define MAIN_H + +#define DEFAULT_MANGLED_POSTFIX ".code" +#define DEFAULT_COMMENTS_POSTFIX ".comments" + +/* DO NOT EDIT BELOW THIS LINE */ +#define BUF_LENGTH 512 +#define FILE_NAME_LENGTH 200 +#define LAST_READ_LENGTH 3 +#define LAST_WRITTEN_LENGTH 3 +#define BYTES_PER_K 1024.0 +#define FLAG_HISTORY_MAX 5 +#define DEFINE_SEARCH_PRECISION 2 + +#include <fstream> +#include <iostream> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include "func.h" +#include "io.h" +#include "dformat.h" +#endif |