diff options
author | David A. Wheeler <dwheeler@dwheeler.com> | 2013-08-25 08:16:13 -0400 |
---|---|---|
committer | David A. Wheeler <dwheeler@dwheeler.com> | 2013-08-25 08:16:13 -0400 |
commit | ba8f663f16c3e8c4abd2ef7ec5966a7dc338b962 (patch) | |
tree | c86c5d0b270f81c1fb72b5bb4a3db277ad2bd1e1 | |
download | sloccount-git-ba8f663f16c3e8c4abd2ef7ec5966a7dc338b962.tar.gz |
Initial commit of version 2.26.
-rw-r--r-- | COPYING | 340 | ||||
-rw-r--r-- | ChangeLog | 1018 | ||||
-rw-r--r-- | PROGRAM_LICENSE | 1 | ||||
-rw-r--r-- | README | 51 | ||||
-rw-r--r-- | SOURCES | 29 | ||||
-rw-r--r-- | TODO | 161 | ||||
-rw-r--r-- | TODO.orig | 153 | ||||
-rwxr-xr-x | ada_count | 27 | ||||
-rwxr-xr-x | append_license | 62 | ||||
-rwxr-xr-x | append_specname | 57 | ||||
-rwxr-xr-x | asm_count | 166 | ||||
-rwxr-xr-x | awk_count | 27 | ||||
-rwxr-xr-x | break_filelist | 1308 | ||||
-rwxr-xr-x | break_filelist.orig | 1084 | ||||
-rw-r--r-- | break_filelist.rej | 20 | ||||
-rw-r--r-- | c_count.c | 225 | ||||
-rw-r--r-- | c_lines_environment.dat | 98 | ||||
-rw-r--r-- | c_outfile.dat | 1 | ||||
-rwxr-xr-x | cobol_count | 82 | ||||
-rwxr-xr-x | compute_all | 87 | ||||
-rwxr-xr-x | compute_c_usc | 77 | ||||
-rwxr-xr-x | compute_java_usc | 59 | ||||
-rwxr-xr-x | compute_sloc_lang | 66 | ||||
-rwxr-xr-x | count_extensions | 56 | ||||
-rwxr-xr-x | count_unknown_ext | 32 | ||||
-rwxr-xr-x | csh_count | 27 | ||||
-rwxr-xr-x | dirmatch | 37 | ||||
-rw-r--r-- | driver.c | 110 | ||||
-rw-r--r-- | driver.h | 50 | ||||
-rwxr-xr-x | exp_count | 27 | ||||
-rwxr-xr-x | extract-count | 83 | ||||
-rwxr-xr-x | extract_license | 178 | ||||
-rwxr-xr-x | f90_count | 81 | ||||
-rwxr-xr-x | fortran_count | 83 | ||||
-rwxr-xr-x | generic_count | 77 | ||||
-rwxr-xr-x | get_sloc | 544 | ||||
-rwxr-xr-x | get_sloc_details | 103 | ||||
-rwxr-xr-x | haskell_count | 122 | ||||
-rw-r--r-- | java_lines_environment.dat | 98 | ||||
-rw-r--r-- | jsp_count.c | 1787 | ||||
-rw-r--r-- | jsp_count.l | 90 | ||||
-rwxr-xr-x | lex_count | 70 | ||||
-rw-r--r-- | lexcount1.c | 58 | ||||
-rwxr-xr-x | linux_unique | 64 | ||||
-rwxr-xr-x | lisp_count | 27 | ||||
-rwxr-xr-x | make_filelists | 193 | ||||
-rw-r--r-- | makefile | 246 | ||||
-rw-r--r-- | makefile.orig | 222 | ||||
-rwxr-xr-x | makefile_count | 27 | ||||
-rw-r--r-- | ml_count.c | 209 | ||||
-rw-r--r-- | modula3_count | 65 | ||||
-rwxr-xr-x | objc_count | 89 | ||||
-rw-r--r-- | pascal_count.c | 1714 | ||||
-rw-r--r-- | pascal_count.l | 81 | ||||
-rwxr-xr-x | perl_count | 147 | ||||
-rw-r--r-- | php_count.c | 335 | ||||
-rwxr-xr-x | print_sum | 40 | ||||
-rwxr-xr-x | print_sum_subset | 41 | ||||
-rwxr-xr-x | python_count | 120 | ||||
-rwxr-xr-x | redo_licenses | 42 | ||||
-rwxr-xr-x | rpm_unpacker | 71 | ||||
-rwxr-xr-x | ruby_count | 27 | ||||
-rwxr-xr-x | sed_count | 27 | ||||
-rwxr-xr-x | sh_count | 27 | ||||
-rwxr-xr-x | show_filecount | 58 | ||||
-rwxr-xr-x | simplecount | 84 | ||||
-rwxr-xr-x | sloccount | 258 | ||||
-rw-r--r-- | sloccount.1 | 235 | ||||
-rw-r--r-- | sloccount.1.gz | bin | 0 -> 3377 bytes | |||
-rw-r--r-- | sloccount.html | 2464 | ||||
-rw-r--r-- | sloccount.html.orig | 2440 | ||||
-rw-r--r-- | sloccount.spec | 56 | ||||
-rwxr-xr-x | sql_count | 76 | ||||
-rw-r--r-- | stripccomments.c | 50 | ||||
-rw-r--r-- | stub | 22 | ||||
-rw-r--r-- | stubsh | 23 | ||||
-rw-r--r-- | table.html | 569 | ||||
-rwxr-xr-x | tcl_count | 27 | ||||
-rw-r--r-- | testcode/conditions.CBL | 31 | ||||
-rw-r--r-- | testcode/hello.f | 10 | ||||
-rw-r--r-- | testcode/hello.f90 | 7 | ||||
-rw-r--r-- | testcode/hello.pas | 9 | ||||
-rw-r--r-- | testcode/hello1.pas | 12 | ||||
-rw-r--r-- | testcode/messages.rb | 152 | ||||
-rw-r--r-- | testcode/temp.c | 5 | ||||
-rw-r--r-- | testcode/test.hs | 19 | ||||
-rw-r--r-- | testcode/test1.inc | 23 | ||||
-rw-r--r-- | testcode/test1.lhs | 15 | ||||
-rw-r--r-- | testcode/test1.php | 27 | ||||
-rw-r--r-- | testcode/test2.lhs | 44 | ||||
-rw-r--r-- | testcode/wokka.cbl | 4 | ||||
-rw-r--r-- | testcode/wokka.cs | 8 | ||||
-rw-r--r-- | usc_subset.tar | 1 |
93 files changed, 19455 insertions, 0 deletions
@@ -0,0 +1,340 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) 19yy <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) 19yy name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 0000000..6db1fdf --- /dev/null +++ b/ChangeLog @@ -0,0 +1,1018 @@ +2004-08-01 David A. Wheeler <dwheeler, at, dwheeler.com> + * Released version 2.26. + * Modified driver.h to clearly state the GPL license. + This doesn't change anything, but it makes the + Savannah people happy. + +2004-07-31 David A. Wheeler <dwheeler, at, dwheeler.com> + * Released version 2.25. Changes are: + * Per request from Savannah, added the more detailed licensing + text to every source file. + * Modified the assembly language counting code, based on useful + feedback and a test case from Purnendu Ghosh, so that + the heuristics work better at guessing the right comment character + and they perform well. + In particular, the comment character '*' is far better supported. + * Added support for Delphi project files (.dpr files, which are + essentially in Pascal syntax), thanks to Christian Iversen. + * Some versions of Perl are apparantly causing trouble, but + I have not yet found the solution for them (other than using + a different version of Perl). The troublesome line of code + in break_filelist, which currently says: + open(FH, "-|", "md5sum", $filename) or return undef; + This could be changed to: + open(FH, "-|", "md5sum $filename") or return undef; + But I dare not fix it that way, because that would create + a security problem. Imagine downloading someone + else's source code (who you don't know), using sloccount, and + that other person has created in their source tree a file + named like this: "; rm -fr /*" or its variations. + I'd rather have the program fail in specific circumstances + (users will know when it won't work!) than to insert a known + dangerous security vulnerability. I can't reproduce this problem; + it's my hope that those who CAN will help me find a good + solution. For the moment, I'm documenting the problem here and + in the TODO list, so that people will realize WHY it hasn't + just been "fixed" with the "obvious solution". + The answer: I care about security. + +2004-05-10 David A. Wheeler <dwheeler, at, dwheeler.com> + * Released version 2.24 - a few minor bugfixes and improvements. + Automatically tries to use several different MD5 programs, until + it finds one that works - this is more flexible, and as a result, + it now works out-of-the-box on Apple Mac OS X. + SLOCCount now accepts "." as the directory to analyze, + it correctly identifies wrapper scripts left by libtool as + automatically generated code, and correctly identifies debian/rules + files as makefiles. Also, installation documentation has improved. + My thanks to Jesus M. Gonzalez-Barahona for telling me about the + Debian bug reports and testing of candidate versions. + My thanks to Koryn Grant, who told me what needed to be done + to get SLOCCount running on Mac OS X (and for testing my change). + This version resolves Debian Bug reports #173699, + #159609, and #200348. + +2004-04-27 David A. Wheeler <dwheeler, at, dwheeler.com> + * Automatically try several different MD5 programs, looking for + a working one. Originally this program REQUIRED md5sum. + This new version tried md5sum, then md5, then openssl. + The good news - the program should now 'just work' on + Apple Mac OS X. The bad news - if md5sum doesn't exist, + sloccount still has a good chance of working, but will display + odd error messages while it searches for a working MD5 program. + There doesn't seem to be an easy way in perl to suppress such + messages while still permitting "trouble reading data file" + messages. However, doing the test at run-time is much more + robust, and this way it at least has a chance of working on + systems it didn't work on at all before. + * Removed the "debian" subdirectory. There was no need for it; + it's best for the Debian package maintainers to control that + information on their own. + +2004-04-25 David A. Wheeler <dwheeler, at, dwheeler.com> + * Allow "." and ".." as specifications for directories even + when they have no subdirectories. + This resolves Debian bug report log #200348 + ("Sloccount . fails"). + * Correctly identify wrapper scripts left by libtool as + automatically generated code. + When linking against a libtool library, libtool leaves a wrapper + script in the source tree (so that the binary can be executed + in-place, without installing it), which includes this: + (line) # foo - temporary wrapper script for .libs/foo + (line) # Generated by ltmain.sh - GNU libtool 1.4.3 + (1.922.2.111 2002/10/23 02:54:36) + I fixed this by saying that any comment beginning with + "Generated by" in the first few lines must be auto-generated + code. This should correctly catch other auto-generated code too. + There is a risk that code NOT automatically generated will be + incorrectly labelled, but that's unlikely. + This resolves Debian Bug report logs #173699, + "sloccount should ignore libtool-generated wrapper scripts". + * Now identifies "debian/rules" files as a makefile. + This resolves Debian Bug report logs - #159609, + "sloccount Does not consider debian/rules to be a makefile". + * Minor fix to sloccount makefile, so that man page installs + correctly in some situations that didn't before. + My thanks to Jesus M. Gonzalez-Barahona. + +2003-11-01 David A. Wheeler <dwheeler, at, dwheeler.com> + * Version 2.23 - a few minor bugfixes and improvements. + +2003-11-01 David A. Wheeler <dwheeler, at, dwheeler.com> + * Fixed incorrect UTF-8 warnings. Perl 5.8.0 creates warnings + when the LANG value includes ".UTF-8" but the text files read + aren't UTF-8. This causes problems on Red Hat Linux 9 and others, + which set LANG to include ".UTF-8" by default. + This version quietly removes ".UTF-8" from the LANG value for + purposes of sloccount, to eliminate the problem. + +2003-11-01 David A. Wheeler <dwheeler, at, dwheeler.com> + * Fixed bad link to "options" in sloccount.html; my thanks to + Barak Zalstein (<Barak.Zalstein, at, ParthusCeva.com) for + telling me. + * Added "--version" option that prints the version number. + Thanks to Auke Jilderda (auke.jilderda, at, philips.com) + for suggesting this. + +2003-11-01 Sam Tregar <sam, at, tregar.com> + * Fixed a bug in perl_count that prevents it from + properly skipping POD. + +2003-10-30 Julian Squires <julian, at, greyfirst.ca> + * Added simple literate Haskell support. + * Added test cases for literate Haskell support. + * Updated Common LISP and Modula 3 extensions. + +2003-03-08 David A. Wheeler <dwheeler, at, dwheeler.com> + * Version 2.22 - improved OCAML support, thanks to Michal Moskal. + Other minor improvements. + +2003-02-15 Jay A. St. Pierre + * Fixed uninstalling documents to always remove DOC_DIR. + +2003-02-15 Michal Moskal + * Significantly improved OCAML support - complete rewrite of + ML handling. + +2003-01-28 David A. Wheeler <dwheeler, at, dwheeler.com> + * Version 2.21 - improved Fortran support (inc. Fortran 90); + my thanks to Erik Schnetter for implementing this! + +2002-12-17 Erik Schnetter <schnetter, at, uni-tuebingen.de> + * Added support for Fortran 90. Extensions are ".f90" and ".F90". + * Changed handling of Fortran 77 to include HPF and Open MP + statements, and to accept uppercase ".F77" as extension. + +2002-12-04 David A. Wheeler <dwheeler, at, dwheeler.com> + * Version 2.20 - minor portability and documentation improvements. + * Documentation improvements - more discussion on Intermediate COCOMO. + +2002-12-04 Linh Luong <Linh.Luong, at, trw.com> + * Modified SLOCCount so that it would run on Solaris 2.7 + (once Perl is installed and the PATH is set correctly to include + the directory where SLOCCount is installed). + This required modifying file sloccount to eliminate the + test ("[") option "-e", replacing it with the "-r" option + ("test -e" is apparantly not supported by Solaris 2.7). + Since "-r" should be available on any implementation of "test", + this is a nice portable change. + +2002-11-16 David A. Wheeler <dwheeler, at, dwheeler.com> + * Version 2.19, documentation improvement. + * Documented the "Improved COCOMO" model from Boehm, + so that users who want more accurate estimates can do at + least a little bit straight from the documentation. + For more, as always, see Boehm's book. + If anyone wants to implement logical SLOC counting, please be + my guest! Then, COCOMO II could be implemented too. + * Modified this ChangeLog to document more fully the SGI MIPS problem. + +2002-11-16 David A. Wheeler <dwheeler, at, dwheeler.com> + * Version 2.18, minor bugfix release. + * Updated the "wc -l" check; it would cause problems for users + who had never used sloccount before (because datadir had not + been created yet). Also, the "wc -l" check itself would not + reliably identify SGI systems that had horribly buggy "wc" + programs; it's believed this is a better check. + Thanks to Randal P. Andress for helping with this. + * Fixed this ChangeLog. It was Randal P. Andress who identified + the "wc -l" bug, not Bob Brown. Sorry for the misattribution, + and thanks for the bugfixing help! + * Changed rpm building command to work with rpm version 4 + (as shipped with Red Hat Linux 8.0). As of Red Hat Linux 8, + the "rpm" command only loads files, while there is now a + separate "rpmbuild" command for creating rpm files. + Those rebuilding with Red Hat Linux 7.X or less (rpm < version 4) + will need to edit the makefile slightly, as documented + in the makefile, to modify the variable RPMBUILD. + * "make rpm" now automatically uninstalls sloccount first if it can, + to eliminate unnecessary errors when building new versions of + sloccount RPMs. This only affects people modifying and + redistributing the code of sloccount (mainly, me). + +2002-11-16 Randal P. Andress + * Fixed get_sloc so that it + also accepts --filecounts as well as --filecount. + +2002-11-05 David A. Wheeler <dwheeler, at, dwheeler.com> + * Released version 2.17, which adds support for Java Server Pages + (.jsp), eliminates some warnings in newer Perl implementations, + and has a few minor fixes/improvments. + +2002-11-18 Randal P. Andress + * Randal provided the following additional information about this + really nasty problem on SGI MIPS machines. It causes gcc + to not work properly, and thus "wc" won't work properly either. + SLOCCount now detects that there's a problem and will refuse to + run if things are screwed up this badly. For those unfortunate + few who have to deal with this case, here's additional information + from Randal Andress: + + When gcc is installed on SGI MIPS from source, sgi-mips-sgi-irix6.x, + an option specification in the 'specs' file is set + incorrectly for n32. The offending line is: + %{!mno-long64:-D__LONG_MAX__=9223372036854775807LL} + Which (unless option '-mno-long64' is specified), means that + LONG_MAX is 64 bits. The trouble is two fold: + 1. This should not be the default, since for n32, + normally, long is only 32 bits. and + 2. The option did not carry into the + compiler past the pre-processor - so it did not work. + The simplest fix for gcc (it seems that it can be done locally by + editing the specs file) is to have the following line to + replace the offending line in the specs file: + %{long64:-D__LONG_MAX__=9223372036854775807LL} + This makes the default 32 and only sets it to 64 if you specify + '-long64' which *does* work all the way through the compiler. + + I had the binary for gcc 3 on the sgi freeware site installed here and + looked at it's specs file and found no problem (they have the '-long64' + option). So it seems that when they build gcc for their freeware + distribution, they fix it. + + The problem comes when someone downloads and builds gcc for themselves + on sgi. Then the installation is faulty and any n32 code that they + build is subject to this flaw if the source makes use of LONG_MAX + or any of the values derived from it. + + The real problem turned out to be quite general for sgi n32 gcc. The + 'specs' file and mips.h are not consistent resulting in 'LONG_MAX' + being given an incorrect value. + + The following 'c' program shows inconsistent values for macros for + mips-irix n32: + __LONG_MAX__ (LONG_MAX) and + _MIPS_SZLONG + This seems to stem from an improper default option in the specs file + forcing -D__LONG_MAX__=0x7fffffffffffffff + to be passed to each compile. + + Here is the test case, compile command, and output: + + # include <limits.h> + #define LONG_MAX_32_BITS 2147483647 + #include <sys/types.h> + int main () { + #if LONG_MAX <= LONG_MAX_32_BITS + printf ("LONG_MAX <= LONG_MAX_32_BITS = 0x%lx\n",LONG_MAX); + #else + printf ("LONG_MAX > LONG_MAX_32_BITS = 0x%llx\n",LONG_MAX); + #endif + + printf ("_MIPS_SZLONG = 0x%x\n",_MIPS_SZLONG); + printf ("__LONG_MAX__ = 0x%llx (size:%d)\n",__LONG_MAX__, + sizeof + (__LONG_MAX__)); + + #if LONG_MAX <= LONG_MAX_32_BITS + printf ("LONG_MAX = 0x%lx (size:%d) + \n",LONG_MAX,sizeof(LONG_MAX)); + #else + printf ("LONG_MAX = 0x%llx (size:%d) + \n",LONG_MAX,sizeof(LONG_MAX)); + #endif + + printf ("LONG_MAX_32_BITS = 0x%x (size:%d) + \n",LONG_MAX_32_BITS,sizeof(LONG_MAX_32_BITS)); + return 0; + } + ============ end test case source. + + >gcc -n32 -v -o test_limits -O0 -v -g test_limits.c + + defines include:....-D__LONG_MAX__=9223372036854775807LL.... + + =========== test output: + >test_limits + LONG_MAX > LONG_MAX_32_BITS = 0x7fffffffffffffff + _MIPS_SZLONG = 0x20 + __LONG_MAX__ = 0x7fffffffffffffff (size:8) + LONG_MAX = 0x7fffffffffffffff (size:8) + LONG_MAX_32_BITS = 0x7fffffff (size:4) + + + ======== end test case output + + By changing the specs entry: + %{!mno-long64:-D__LONG_MAX__=9223372036854775807LL} + to + %{long64:-D__LONG_MAX__=9223372036854775807LL} + as is discussed in one of the internet reports I sent earlier, the + output, + after recompiling and running is: + + LONG_MAX <= LONG_MAX_32_BITS = 0x7fffffff + _MIPS_SZLONG = 0x20 + __LONG_MAX__ = 0x7fffffff (size:4) + LONG_MAX = 0x7fffffff (size:4) + LONG_MAX_32_BITS = 0x7fffffff (size:4) + + Although I have not studied it well enough to know exactly why, the + problem has to do with the size of (long int) and the attempt of the + 'memchr' code to determine whether or not it can use 64 bit words + rather than 32 bit words in chunking through the string looking + for the specified character, "\n"(0x0a) in the case of 'wc'. + +2002-11-03 David A. Wheeler <dwheeler, at, dwheeler.com> + * Fixed makefile install/uninstall scripts to properly handle + documentation. + * Added simple check at beginning of sloccount's execution + to make sure "wc -l" actually works. + Randal P. Andress has found that on certain SGI machines, "wc -l" + produces the wrong answers. He reports, + "You may already know this, but just in case you do not, there is an + apparent bug in textutils-1.19 function 'wc' (at least as built on + SGI-n32) which is caused by an apparent bug in memchr (*s, c, n). + The bug is only evident when counting 'lines only' or + 'lines and characters' (i.e., when NOT counting words). + The result is that the filecount is short... + I replaced the memchr with very simple code and it corrected the + problem. I then installed textutils-2.1 which does not seem have + the problem." + I thought about adding this information just to the documentation, + but no one would notice it. By adding a check to the code, + most people will neither know nor care about the problem, and + the few people it DOES affect will know about the problem + right away (instead of reporting wrong answers). + Yes, a failing "wc -l" is a pretty horrific bug, but rather + than ignore the problem, it's better to detect and address it. + * Modified documentation everywhere so that it consistently + documents "--filecount" as the correct option for filecounts, + not "--filecounts". That way, the documentation is consistent. + * However, in an effort to "do the right thing", the program sloccount + will accept "--filecounts" as an alternative way to specify + --filecount. + +2002-11-02 Bob Brown <rlb, at, bluemartini.com> + * Contributed code changes to count Java Server Page (.jsp) files. + The code does not pull comments out of embedded + javascript. We don't consider that a serious limitation at all, + since no one should be sending embedded javascript comments + to client browsers anyhow. They're extremely rare. + David A. Wheeler notes that you could + argue that if you _DO_ include such comments, they're + not really functioning as comments (since they DO have an + affect on the result - they're more like print statements in an + older language instead of a traditional language's comments). + +2002-11-02 David A. Wheeler <dwheeler, at, dwheeler.com> + * Eliminated more Perl warnings by adding more + defined() wrappers to while() loops in Perl code + (based on Randal's suggestion). The problem is that Perl + handles the last line of a file oddly if it doesn't end with + a newline indicator, and it consists solely of "0". + +2002-11-02 Randal P Andress <Randal_P_Andress, at, raytheon.com> + * Eliminated some Perl warnings by adding + defined() wrappers to while() loops in Perl code. + +2002-8-24 David A. Wheeler <dwheeler, at, dwheeler.com> + * Released version 2.16, fixed limitations of old Pascal counter. + +2002-8-24 David A. Wheeler <dwheeler, at, dwheeler.com> + * Re-implemented Pascal counter (in flex). This fixes some problems + the old counter had - it handles nested comments with different + formats, and strings as well. + * Removed the BUGS information that described the Pascal counter + weaknesses.. since now they're gone! + * Added an additional detector of automatically generated files - + it's an auto-generated file if it starts with + "A lexical scanner generated by flex", since flex adds this. + Generally, this isn't a problem, since we already detect + the filename and matching .c files, but it seems worth doing. + +2002-8-22 David A. Wheeler <dwheeler, at, dwheeler.com> + * Released version 2.15, a bugfix + small feature improvement. + My sincere thanks to Jesus M. Gonzalez-Barahona, who provided + patches with lots of useful improvements. + +2002-8-22 Jesus M. Gonzalez-Barahona + * Added support for Standard ML (as language "ml"). + * A patch suggested to the Debian BTS; .hh is also a C++ extension. + * Some ".inc" files are actually Pascal, not PHP; + now ".inc" files are examined binned to either Pascal or PHP + depending on their content. + * Improved detection of Pascal files (particularly for Debian + package fpc-1.0.4). + * php_count was not closing open files before opening a new one, + and therefore sloccount could fail to count PHP code given + a VERY LONG list of PHP files in one package. + * break_filelist had problems with files including <CR> and other + weird characters at the end of the filename. Now fixed. + +2002-7-24 David A. Wheeler <dwheeler, at, dwheeler.com> + * Released version 2.14. Improved Pascal detection, improved + Pascal counting, added a reference to CCCC. + +2002-7-24 David A. Wheeler <dwheeler, at, dwheeler.com> + * Modified Pascal counting; the older (*..*) commenting structure + is now supported. Note that the Pascal counter is still imperfect; + it doesn't handle the prioritization between these two commenting + systems, and can be fooled by strings that include a + comment start indicator. Rewrites welcome, however, for most + people the current code is sufficient. This really needs to be + rewritten in flex; languages with strings and multiline comment + structures aren't handled correctly with naive Perl code. + * Documented the weaknesses in the Pascal counter as BUGS. + +2002-7-24 Ian West IWest, at, aethersystems, dot com + * Improved heuristic for detecting Pascal programs in break_filelist. + Sloccount will now categorize files as Pascal if they have + the file type ".pas" as well as ".p", though it still checks + the contents to make sure it's really pascal. + The heuristic was modified so that it's also considered Pascal + if it contains "module" and "end.", + or "program", "begin", and "end." in addition to the existing cases. + + (Ian West used sloccount to analyze a system containing + about 1.2 million lines of code in almost 10,000 files; + ninety percent of it is Ada, and the bulk of the remainder + is split between Pascal and SQL. The following is Ian's + more detailed explanation for the change): + + VAX Pascal uses "module" instead of "program" for files that + have no program block and therefore no "begin". + There is also no requirement for a Pascal file to have + procedures or functions, which is the case for files that are + equivalents of C headers. So I modified the function to + allow files to be accepted that only contain either: + "module" and "end."; or "program", "begin", and "end.". + I considered adding checks for "const", "type", and "var" but + decided they were not necessary. I have added the extra cases + without changing the existing logic so as not to upset + any cases for "unit". It is possible to optimize the logic + somewhat, but I felt clarity was better than efficiency. + + I found that some of my Pascal files were getting through + only because the word "unit" appeared in certain comments. + So I moved the line for filtering out comments above the lines + that look for the keywords. + + Pascal in general allows comments in the form (*...*) as well + as {...}, so I added a line to remove these. + + After making these changes, all my files were correctly + categorized. I also verified that the sample Pascal files + from p2c still had the same counts. + + Thank you for developing SLOCCount. It is a very useful tool. + +2002-7-15 David A. Wheeler <dwheeler, at, dwheeler.com> + * Added a reference to CCCC; http://cccc.sourceforge.net/ + +2002-5-31 David A. Wheeler <dwheeler, at, dwheeler.com> + * Released version 2.13. + * Code cleanups. Turned on gcc warnings ("-Wall" option) and + cleaned up all code that set off a warning. + This should make the code more portable as well as cleaner. + Made a minor speed optimization on an error branch. + +2002-3-30 David A. Wheeler <dwheeler, at, dwheeler.com> + * Released version 2.12. + * Added a "testcode" directory with some sample source code + files for testing. It's small now, but growth is expected. + Contributions for this test directory (especially for + edge/oddball cases) are welcome. + +2002-3-25 David A. Wheeler <dwheeler, at, dwheeler.com> + * Changed first-line recognizers so that the first line (#!) will + matched ignoring case. For most Unix/Linux systems uppercase + script statements won't work, but Windows users. + * Now recognize SpeedyCGI, a persistent CGI interface for Perl. + SpeedyCGI has most of the speed advantages of FastCGI, but + has the security advantages of CGI and has the CGI interface + (from the application writer's point of view). + SpeedyCGI perl scripts have #!/usr/bin/speedy lines instead of + #!/usr/bin/perl. More information about SpeedyCGI + can be found at http://daemoninc.com/speedycgi/ + Thanks to Priyadi Iman Nurcahyo for noticing this. + +2002-3-15 David A. Wheeler <dwheeler, at, dwheeler.com> + * Added filter to remove calls to sudo, so + "#!/usr/bin/sudo /usr/bin/python" etc as the first line + are correctly identified. + +2002-3-7 David A. Wheeler <dwheeler, at, dwheeler.com> + * Added cross-references to LOCC and CodeCount. They don't + do what I want.. which is why I wrote my own! .. but others + may find them useful. + +2002-2-28 David A. Wheeler <dwheeler, at, dwheeler.com> + * Released version 2.11. + * Added support for C#. Any ".cs" file is presumed + to be a C# file. The C SLOC counter is used to count SLOC. + Note that C# doesn't have a "header" type (Java doesn't either), + so disambiguating headers isn't needed. + * Added support for regular Haskell source files (.hs). + Their syntax is sufficiently similar that just the regular + C SLOC counter works. + Note that literate Haskell files (.lhs) are _not_ supported, + so be sure to process .lhs files into .hs files before counting. + There are two different .lhs conventions; for more info, see: + http://www.haskell.org/onlinereport/literate.html + * Tweaked COBOL counter slightly. Added support in fixed (default) + format for "*" and "/" as comment markers in column 1. + * Modified list of file extensions known not to be source code, + based on suffixes(7). This speeds things very slightly, but the + main goal is to make the "unknown" list smaller. + That way, it's much easier to see if many source code files + were incorectly ignored. In particular, compressed formats + (e.g., ".tgz") and multimedia formats (".wav") were added. + * Modified documentation to make things clear: If you want source + in a compressed file to be counted (e.g. .zip, .tar, .tgz), + you need to uncompress the file first!! + * Modified documentation to clarify that literate programming + files must be expanded first. + * Now recognize ".ph" as Perl (it's "Perl header" code). + Please let me know if this creates many false positives + (i.e., if there are programs using ".ph" in other ways). + * File count_unknown_ext modified slightly so that it now examines + ~/.slocdata. Modified documentation so that its use is + recommended and explained. It's been there for a while, but + with poor documentation I bet few understand its value. + * Modified output to clearly say that it's Open Source Software / + Free Software, licensed under the GPL. It was already stated + that way in the documentation and code, but clearly stating this + on every run makes it even harder to miss. + +2002-2-27 David A. Wheeler <dwheeler, at, dwheeler.com> + * Released version 2.10. + * COBOL support added! Now ".cbl" and ".cob" are recognized + as COBOL extensions, as well as their uppercase ".CBL" and ".COB". + The COBOL counter works as follows: + it detects if a "freeform" command has been given. Unless a + freeform command's given, a comment has "*" or "/" in column 7, + and a SLOC is a non-comment line with + at least one non-whitespace in column 8 or later (including + columns 72 or greater; it's arguable if a line that's empty + before column 72 is really a line or a comment, but I've decided + to count such odd things as lines). + If we've gone free-format, a comment is a line that has optional + whitespace and then "*".. otherwise, a line with nonwhitespace + is a SLOC. + Is this good enough? I think so, but I'm not a major COBOL user. + Feedback from real COBOL users would be welcome. + A source for COBOL test programs is: + http://www.csis.ul.ie/cobol/examples/default.htm + Information on COBOL syntax gathered from various locations, inc.: + http://cs.hofstra.edu/~vmaffea1/cobol.html + http://support.merant.com/websupport/docs/microfocus/books/ + nx31books/lrintr.htm + * Modified handling of uppercase filename extensions so they'll + be recognized as well as the more typicaly lowercase extensions. + If a file has one or more uppercase letters - and NO + lowercase letters - it's assumed that it may be a refugee from + an old OS that supported only uppercase filenames. + In that circumstance, if the filename extension doesn't match the + set of known extensions, it's made into lowercase and recompared + against the set of extensions for source code files. + This heuristic should improve recognition of source + file types for "old" programs using upper-case-only characters. + I do have concern that this may be "too greedy" an algorithm, i.e., + it might claim that some files that aren't really source code + are now source code. I don't think it will be a problem, though; + many people create filename + extensions that only differ by case in most circumstances; the + ".c" vs. ".C" thing is an exception, and since Windows folds + case it's not a very portable practice. This is a pretty + conservative heuristic; I found Cobol programs with lowercase + filenames and uppercase extensions ("x.CBL"), which wouldn't + be matched by this heuristic. For Cobol and Fortran I put in + special ".F", ".CBL", and ".COB" patterns to catch them. + With those two actions, the program should manage to + correctly identify more source files without incorrectly + matching non-source files. + * ".f77" is now also accepted as a Fortran77 extension. + Thanks to http://www.webopedia.com/quick_ref/fileextensionsfull.html + which has lots of extension information. + * Fixed a bug in handling top-level directories where there were NO + source files at all; in certain cases this would create + spurious error messages. (Fix in compute_all). + +2002-1-7 David A. Wheeler <dwheeler, at, dwheeler.com> + * Released version 2.09. + +2002-1-9 David A. Wheeler <dwheeler, at, dwheeler.com> + * Added support for the Ruby programming language, thanks to + patches from Josef Spillner. + * Documentation change: added more discussion about COCOMO, + in particular why its cost estimates appeared so large. + Some programmers think of just the coding part, and only what + they'd get paid directly.. but that's less than 10% of the + costs. + +2002-1-7 David A. Wheeler <dwheeler, at, dwheeler.com> + * Minor documentation fix - the example for --effort in + sloccount.html wasn't quite right (the base documentation + for --effort was right, it was just the example that was wrong). + My thanks to Kevin the Blue for pointing this out. + +2002-1-3 David A. Wheeler <dwheeler, at, dwheeler.com> + * Released version 2.08. + +2002-1-3 David A. Wheeler <dwheeler, at, dwheeler.com> + * Based on suggestions by Greg Sjaardema <gdsjaar@sandia.gov>: + * Modified c_count.c, function count_file to close the stream + after the file is analyzed. Otherwise, this can cause problems + with too many open files on some systems, particularly on + operating systems with small limits (e.g., Solaris). + * Added '.F' as a Fortran extension. + +2002-1-2 David A. Wheeler <dwheeler, at, dwheeler.com> + * Released version 2.07. + +2002-1-2 Vaclav Slavik <vaclav.slavik@matfyz.cz> + * Modified the RPM .spec file in the following ways: + * By default the RPM package now installs into /usr (so binaries + go into /usr/bin). Note that those who use the makefile directly + ("make install"), including tarball users, + will still default to /usr/local instead. + You can still make the RPM install to /usr/local by using + the prefix option, e.g.: + rpm -Uvh --prefix=/usr/local sloccount*.rpm + * Made it use %{_prefix} variable, i.e. changing it to install + in /usr/local or /usr is a matter of changing one line + * Use wildcards in %files section, so that you don't have to modify + the specfile when you add new executable + * Mods to make it possible to build the RPM as non-root (i.e. + BuildRoot support, %defattr in %files, PREFIX passed to make install) + +2002-1-2 Jesus M. Gonzalez Barahona <jgb@debian.org> + * Added support for Modula-3 (.m3, .i3). + * ".sc" files are counted as Lisp. + * Modified sloccount to handle EVEN LARGER systems (i.e., + so sloccount will scale even more). + In a few cases, parameters were passed on the command line + and large systems could be so large that the command line was + too long. E.G., Debian GNU/Linux. This caused a large number + of changes to different files to remove these scaleability + limitations. + * All *_count programs now accept "-f filename" and "-f -" options, + where 'filename' is a file with a list of filenames to count. + Internally the "-f" option with a filename is always used, so + that an arbitrarily long list of files can be measured and so + that "ps" will show more status information. + * compute_sloc_lang modified accordingly. + * get_sloc now has a "--stdin" option. + * Some small fixes here and there. + * This closes Debian bug #126503. + +2001-12-28 David A. Wheeler <dwheeler, at, dwheeler.com> + * Released sloccount 2.06. + +2001-12-27 David A. Wheeler <dwheeler, at, dwheeler.com> + * Fixed a minor bug in break_filelist, which caused + (in extremely unusual circumstances) a problem when + disambiguating C from C++ files in complicated situations + where this difference was hard to tell. The symptom: When + analyzing some packages (for instance, afterstep-1.6.10 as + packaged in Debian 2.2) you would get the following error: + Use of uninitialized value in pattern match (m//) at + /usr/bin/break_filelist line 962. + This could only happen after many other disambiguating rules + failed to determine if a file was C or C++ code, so the problem + was quite rare. + My thanks to Jesus M. Gonzalez-Barahona (in + Mostoles, Spain) for the patch that fixes this problem. + * Modified man page, explaining the problems of filenames with + newlines, and also noting the problems with directories + beginning with "-" (they might be confused as options). + * Minor improvements to Changelog text, so that the + changes over time were documented more clearly. + * Note that CEPIS "Upgrade" includes a paper that depends + on sloccount. This is "Counting Potatoes: the Size of Debian 2.2" + which counts the size of Debian 2.2 (instead of Red Hat Linux, + which is what I counted). The original release is at: + <http://www.upgrade-cepis.org/issues/2001/6/upgrade-vII-6.html>. + I understand that they'll make some tweaks and + release a revision of the paper on the Debian website. + It's interesting; Debian 2.2 (released in 2000, and + which did NOT have KDE), has 56 million physical SLOC and + would have cost $1.8 billion USD to develop traditionally. + That's more than Red Hat; see <http://www.dwheeler.com/sloc>. + Top languages: C (71.12%), C++ (9.79%), LISP, Shell, Perl, + Fotran, Tcl, Objective-C, Assembler, Ada, and Python in that + order. My thanks to the authors! + +2001-10-25 David A. Wheeler <dwheeler, at, dwheeler.com> + * Released sloccount 2.05. + * Added support for detecting and counting PHP code. + This was slightly tricky, because PHP's syntax has a few "gotchas" + like "here document" strings, closing working even in C++ or sh + style comments, and so on. + Note - HTML files (.html, .htm, etc) are not examined for PHP code. + You really shouldn't put a lot of PHP code in HTML documents, because + it's a maintenance problem later anyway. + The tool assigns every file a single type.. which is a problem, + because HTML files could have multiple simultaneous embedded types + (PHP, javascript, and HTML text). If the tool was modified to + assign multiple languages to a single file, I'm not sure how + to handle the file counts (counts of files for each language). + For the moment, I just assign HTML to "html". + * Modified output so that it adds a header before the language list. + +2001-10-23 David A. Wheeler <dwheeler, at, dwheeler.com> + * Released sloccount 2.01 - a minor modification to support + Cygwin users. + * Modified compute_all to make it more portable (== became =); + in particular this should help users using Cygwin. + * Modified documentation to note that, if you install Cygwin, + you HAVE to use Unix newlines (not DOS newlines) for the Cygwin + install. Thanks to Mark Ericson for the bug report & for helping + me track that down. + * Minor cleanups to the ChangeLog. + +2001-08-26 David A. Wheeler <dwheeler, at, dwheeler.com> + * Released sloccount 2.0 - it's getting a new version number because + its internal data format changed. You'll have to re-analyze + your system for the new sloccount to work. + * Improved the heuristics to identify files (esp. .h files) + as C, C++, or objective-C. The code now recognizes + ".H" (as well as ".h") as header files. + The code realizes that ".cpp" files that begin with .\" + or ,\" aren't really C++ files - XFree86 stores many + man pages with these extensions (ugh). + * Added the ability to "--append" analyses. + This means that you can analyze some projects, and then + repeatedly add new projects. sloccount even stores and + recovers md5 checksums, so it even detects duplicates + across the projects (the "first" project gets the duplicate). + * Added the ability to mark a data directory so that it's not + erased (just create a file named "sloc_noerase" in the + data directory). From then on, sloccount won't erase it until + you remove the file. + * Many changes made aren't user-visible. + Completely re-organized break_filelist, which was getting + incredibly baroque. I've improved the sloccount code + so that adding new languages is much simpler; before, it + required a number of changes in different places, which was bad. + * SLOCCount now creates far fewer files, which is important for + analyzing big systems (I was starting to run out of inodes when + analyzing entire GNU/Linux distributions). + Previous versions created stub files in every child directory + for every possible language, even those that weren't used; + since most projects only use a few languages, this was costly in + terms of inodes. Also, the totals for each language for a given + child directory are now in a single file (all-physical.sloc) + instead of being in separate files; this not only reduces inode + counts, but it also greatly simplifies later processing & eliminated + a bug (now, to process all physical SLOC counts in a given child + directory, just process that one file). + +2001-06-22 David A. Wheeler <dwheeler, at, dwheeler.com> + * Per Prabhu Ramachandran's suggestion, recognize ".H" files as + ".h"/".hpp" files (note the upper case). + +2001-06-20 David A. Wheeler <dwheeler, at, dwheeler.com> + * Released version 1.9. This eliminates installation errors + with "sql_count" and "makefile_count", + detects PostgreSQL embedded C (in addition to Oracle and Informix), + improves detection of Pascal code, and includes support for + analyzing licenses (if a directory has the file PROGRAM_LICENSE, + the file's contents are assumed to have the license name for that + top-level program). It eliminates a portability problem, so + hopefully it'll be easier to run it on Unix-like systems. + It _still_ requires the "md5sum" program to run. + +2001-06-14 David A. Wheeler <dwheeler, at, dwheeler.com> + * Changed the logic in make_filelists. + This version doesn't require a "-L" option to test which GNU + programs supported but which others (e.g., Solaris) didn't. + It still doesn't normally follow symlinks. + Not following subordinate symlinks is important for + handling oddities such as pine's build directory + /usr/src/redhat/BUILD/pine4.33/ldap in Red Hat 7.1, which + includes symlinks to directories not actually inside the + package at all (/usr/include and /usr/lib). + * Added display of licenses in the summary form, if license + information is available. + * Added undocumented programs rpm_unpacker and extract_license. + These are not installed at this time, they're just provided as + a useful starting point if someone wants them. + +2001-06-12 David A. Wheeler <dwheeler, at, dwheeler.com> + * Added support for license counting. If the top directory + of a program has a file named "PROGRAM_LICENSE", it's copied to + the .slocdata entry, and it's reported as part of a licensing total. + Note that the file LICENSE is ignored, that's often more complex. + +2001-06-08 David A. Wheeler <dwheeler, at, dwheeler.com> + * Fixed RPM spec file - it accidentally didn't install + makefile_count and sql_count. This would produce spurious + errors and inhibited the option of counting makefiles and SQL. + Also fixed the makefile to include sql_count in the executable list. + +2001-05-16 David A. Wheeler <dwheeler, at, dwheeler.com> + * Added support for auto-detecting ".pgc" files, which are + embedded PostgreSQL - they are assumed to be C files (they COULD + be C++ instead; while this will affect categorization it + won't affect final SLOC counts). Also, if there's a ".c" with + a corresponding ".pgc" file, the ".c" file is assumed to be + auto-generated. + * Thus, SLOCCount now supports embedded database commands for + Oracle, Informix, and PostgreSQL. MySQL doesn't use an + "embedded" approach, but uses a library approach that SLOCCount + could already handle. + * Fixed documentation: HTML reserved characters misused, + sql_count undocumented. + + +2001-05-14 David A. Wheeler <dwheeler, at, dwheeler.com> + * Added modifications from Gordon Hart to improve detection + of Pascal source code files. + Pascal files which only have a "unit" in them (not a full program), + or have "interface" or "implementation", + are now detected as Pascal programs. + The original Pascal specification didn't support units, but + there are Pascal programs which use them. This should result in + more accurate counts of Pascal software that uses units. + He also reminded me that Pascal is case-insensitive, spurring a + modification in the detection routines (for those who insist on + uppercase keywords.. a truly UGLY format, but we need to + support it to correctly identify such source code as Pascal). + * Modified the documentation to note that I prefer unified diffs. + I also added a reference to the TODO file, and from here on + I'll post the TODO file separately on my web site. + +2001-05-02 David A. Wheeler <dwheeler, at, dwheeler.com> + * Released version 1.8. Added several features to support + measuring programs with embedded database commands. + This includes suporting many Oracle & Informix embedded file types + (.pc, .pcc, .pad, .ec, .ecp). It also optionally counts + SQL files (.sql) and makefiles (makefile, Makefile, etc.), + though by default they are NOT included in lines-of-code counts. + See the (new) TODO file for limitations on makefile identification. + +2001-04-30 David A. Wheeler <dwheeler, at, dwheeler.com> + * Per suggestion from Gary Myer, added optional "--addlang" option + to add languages not NORMALLY counted. Currently it only + supports "makefile" and "sql". The scheme for detecting + automatically generated makefiles could use improvement. + Normally, makefiles and sql won't be counted in the final reports, + but the front-end will make the calculations and if requested their + values will be provided. + * Added an "SQL" counter and a "makefile" counter. + * Per suggestions from Gary Myer, added detection for files where + database commands (Oracle and Informix) are embedded in the code: + .pc -> Oracle Preprocessed C code + .pcc -> Oracle preprocessed C++ Code + .pad -> Oracle preprocessed Ada Code + .ec -> Informix preprocessed C code + .ecp -> Informix preprocessed C code which calls the C preprocessor + before calling the Informix preprocessor. + Handling ".pc" has heuristics, since many use ".pc" to mean + "stuff about PCs". Certain filenames not counted as C files (e.g., + "makefile.pc" and "README.pc") if they end in ".pc". + Note that if you stick C++ code into .pc files, it's counted as C. + + These embedded files are normal source files of the respective + language, with database commands stuck into them, e.g., + EXEC SQL select FIELD into :variable from TABLE; + which performs a select statement and puts the result into the + variable. The database preprocessor simply reads this file, + and converts all "EXEC SQL" statements into the appropriate calls + and outputs a normal program. + + Currently the "automatically generated" detectors don't detect + this case. For the moment, just make sure the generated files + aren't around while running SLOCCount. + + Currently the following are not handled (future release?): + .pco -> Oracle preprocessed Cobol Code + .pfo -> Oracle preprocessed Fortran Code + I don't have a Cobol counter. The Fortran counter only works + for f77, and I doubt .pfo is limited to that. + + + +2001-04-27 David A. Wheeler <dwheeler, at, dwheeler.com> + * Per suggestions from Gary Myer, + added ".a" and ".so" to the "not" list, since these are + libraries not source, and added the filename "Root" to the + "not" file list ("Root" has special meaning to CVS). + * Added a note about needing "md5sum" (Gary Myer) + * Added a TODO file. If something's on the TODO list that you'd + like, please write the code and send it in. + * Noted that running on Cygwin is MUCH slower than when running + on Linux. Truth in advertizing is only fair. + +2001-04-26 David A. Wheeler <dwheeler, at, dwheeler.com> + * Release version 1.6: the big change is support for running on + Windows. Windows users must install Cygwin first. + * Modified makefile so that SLOCCount can run on Windows systems + if "Cygwin" is installed. The basic modifications to do this + were developed by John Clezy -- Thanks!!! I spent time merging + his makefile and mine so that a single makefile could be used on + both Windows and Unix. + * Documented how to install and run SLOCCount on Windows using cygwin. + * Changed default prefix to /usr/local; you can set PREFIX to + change this, e.g., "make PREFIX=/usr". + * When counting a single project, sloccount now also reports + "Estimated average number of developers", which is simply + the person-months divided by months. As with all estimates, take + it with an ocean of salt. This isn't reported for multiproject + queries; properly doing this would require "packing" to compensate + for the fact that small projects complete before large ones if + started simultaneously. + * Improved man page (fixed a typo, etc.). + +2001-01-10 David A. Wheeler <dwheeler, at, dwheeler.com> + * Released version 1.4. This is an "ease of use" release, + greatly simplifying the installation and use of SLOCCount. + The new front-end tool "sloccount" does all the work in one step - + now just type "sloccount DIRECTORY" and it's all counted. + An RPM makes installation trivial for RPM-based systems. + A man page is now available. There are now rules for + "make install" and "make uninstall" too. + Other improvements include a schedule estimator and options + to control the effort and schedule estimators. + +2001-01-07 David A. Wheeler <dwheeler, at, dwheeler.com> + * Added an estimator of schedule as well as effort. + * Added various options to control the effort and + cost estimation: "--effort", "--personcost", "--overhead", + and "--schedule". + Now people can (through options) control the assumptions made + in the effort and cost estimations from the command line. + The output now shows the effort estimation model used. + * Changed the output slightly to pretty it up and note that + it's development EFFORT not TIME that is shown. + * Added a note at bottom asking for credit. I don't ask for any + money, but I'd like some credit if you refer to the data the + tool generates; a gentle reminder in the output seemed like the + easiest way to ask for this credit. + * Created an RPM package; now RPM-based systems can EASILY + install it. It's a relocatable package, so hopefully + "alien" can easily translate it to other formats + (such as Debian's .deb format). + * Created a "man" page for sloccount. + +2001-01-06 David A. Wheeler <dwheeler, at, dwheeler.com> + * Added front-end tool "sloccount", GREATLY improving ease-of-use. + The tool "sloccount" invokes all the other SLOCCount tools + in the right order, performing a count of a typical project + or set of projects. From now on, this is expected to be the + "usual" interface, though the pieces will still be documented + to help those with more unusual needs. + From now on, "SLOCCount" is the entire package, and + "sloccount" is this front-end tool. + * Added "--datadir" option to make_filelists (to support + "sloccount"). + * get_sloc: No longer displays languages with 0 counts. + * Documentation: documented "sloccount"; this caused major changes, + since "sloccount" is now the recommended interface for all but + those with complicated requirements. + * compute_filecount: minor optimization/simplication + +2001-01-05 David A. Wheeler <dwheeler, at, dwheeler.com> + * Released vesion 1.2. + * Changed the name of many programs, as part of a general clean-up. + I changed "compute_all" to "compute_sloc", and eliminated + most of the other "compute_*" files (replacing it with + "compute_sloc_lang"). I also changed "get_data" to "get_sloc". + This is part of a general clean-up, so that + if someone wants to package this program for installation they + don't have a thousand tiny programs polluting the namespace. + Adding "sloc" to the names makes namespace collisions less likely. + I also worked to make the program simpler. + * Made a number of documentation fixes - my thanks to Clyde Roby + for giving me feedback. + * Changed all "*_count" programs to consistently print at the end + "Total:" on a line by itself, followed on the next line by + the total lines of code all by itself. This makes the new program + get_sloc_detail simpler to implement, and also enables + get_sloc_detail to perform some error detection. + * Changed name of compressed file to ".tar.gz" and modified docs + appropriately. The problem is a bug in Netscape 4.7 clients + running on Windows; it appears that ".tgz" files don't get fully + downloaded from my hosting webserver because no type information + is provided. Originally, I tried to change the website to fix this + by creating ".htaccess" files, but that didn't work with either: + AddEncoding x-gzip gz tgz + AddType application/x-tar .tgz + or: + AddEncoding application/octet-stream tgz + So, we'll switch to .tar.gz, which works. + My thanks to Christopher Lott for this feedback. + * Removed a few garbage files. + * Added information to documentation on how to handle HUGE sets + of data directory children, i.e., where you can't even use "*" + to list the data directory children. I don't have a directory + of that kind of scale, so I can't test it directly, + but I can at least discuss how to do it; it SHOULD work. + * Changed makefile so that "ChangeLog" is now visible on the web. + + +2001-01-04 David A. Wheeler <dwheeler, at, dwheeler.com> + * Minor fixes to documentation. + * Added "--crossdups" option to break_filelist. + * Documented count_unknown_ext. + * Created new tool, "get_sloc_detail", and documented it. + Now you can get a complete report of all the SLOC data in one big + file (e.g., for exporting to another tool for analysis). + +2001-01-03 David A. Wheeler <dwheeler, at, dwheeler.com> + * First public release, version "1.0", of "SLOCCount". + Main website: http://www.dwheeler.com/sloccount + diff --git a/PROGRAM_LICENSE b/PROGRAM_LICENSE new file mode 100644 index 0000000..505faa1 --- /dev/null +++ b/PROGRAM_LICENSE @@ -0,0 +1 @@ +GPL @@ -0,0 +1,51 @@ +SLOCCount README +================= + +This directory contains "SLOCCount", a set of the programs for counting +source lines of code (SLOC) in large software systems. +It was developed by David A. Wheeler (dwheeler@dwheeler.com), +originally to count SLOC in a Linux (GNU/Linux) system, but it can be +used for counting other software systems. + +Copyright (C) 2001-2004 David A. Wheeler. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +To contact David A. Wheeler, see his website at: + http://www.dwheeler.com. + +For more information, including installation instructions and license +information, see the HTML file "sloccount.html". +The file COPYING includes the license, the standard GNU GPL version 2 license. +Even better, see the website http://www.dwheeler.com/sloccount. + +Some of the files contained in this directory aren't normally used - +instead, they're scrap special-purpose files I used when I created these +programs. Feel free to use them, but if you're packaging this program, +be selective - don't install in /usr/bin every executable here! +Just install the ones documented, plus the executables they depend on. + +Packagers: I've developed an RPM spec file and RPM, so RPM systems can +just load-and-go. If you use a different package format, you may still +find the spec file helpful for identifying what to load, and the "alien" +program can apparantly translate the RPM file to Debian '.deb' format +without any problem. + +Debian already includes a SLOCCount package. +SLOCCount 2.08 is available in Debian 3.0. +More info on the status of SLOCCount in Debian should see +http://packages.debian.org/cgi-bin/search_packages.pl?keywords=sloccount&searchon=names&subword=1&version=all&release=all + + @@ -0,0 +1,29 @@ + +"CodeCount" toolset for counting SLOC. + http://sunset.usc.edu/research/CODECOUNT/ +Covers C/C++/Ada/Java and a few others, but +NOT Python, TCL, Perl, or LISP. +I used this to count C, C++, and Java code. +These tools are under a GPL-like license, but it's NOT the GPL. +See their website for more info. + +A huge list of tools is at (mostly big $$): + http://www.concentricmc.com/toolsreport/5-3-2tools1.html +which extracts from this 1995 paper from STSC: + http://www.stsc.hill.af.mil/CrossTalk/1995/apr/Metrics.asp + +Another list: + http://www.qucis.queensu.ca/Software-Engineering/toolcat.html#label181 + +USC tools (inc. CodeCount) and info on COCOMO II is at: + http://sunset.usc.edu/available_tools/availabletools_main.html + + +Software Metrics: An Analysis of the Evolution of COCOMO and Function Points +Roger E. Masse +University of Maryland +July 8, 1997 +http://www.python.org/~rmasse/papers/software-metrics/ +(good overview paper) + + @@ -0,0 +1,161 @@ +TODO List: + + +As with all open source projects... if you want something strongly +enough, then please (1) code it and submit it, or (2) pay me to add it. +You have the source, you have the power - use it. Or has been said for years: + + Use the Source, Luke. + +I _do_ listen to user requests, but I cannot do everything myself. +I've released this program under the GPL _specifically_ so that others +will help debug and extend it. + + + +Obviously, a general "TODO" is adding support for other computer languages; +here are languages I'd like to add support for specifically: ++ Eiffel. ++ Sather (much like Eiffel). ++ CORBA IDL. ++ Forth. Comments can start with "\" (backslash) and continue to end-of-line, + or be surrounded by parens. In both cases, they must be on word + bounds-- .( is not a comment! Variable names often begin with "\"! + For example: + : 2dup ( n1 n2 -- n1 n2 n1 n2 ) \ Duplicate two numbers. + \ Pronounced: two-dupe. + over over ; + Strings begin with " (doublequote) or p" (p doublequote, for + packed strings), and these must be separate words + (e.g., followed by a whitespace). They end with a matching ". + Also, the ." word begins a string that ends in " (this word immediately + prints it the given string). + Note that "copy is a perfectly legitimate Forth word, and does NOT + start a string. + Forth sources can be stored as blocks, or as more conventional text. + Any way to detect them? + See http://www.forth.org/dpans/dpans.html for syntax definition. + See also http://www.taygeta.com/forth_style.html + and http://www.forth.org/fig.html ++ Create a "javascript" category. ".js" extention, "js" type. + (see below for a discussion of the issues with embedded scripts) ++ .pco -> Oracle preprocessed Cobol Code ++ .pfo -> Oracle preprocessed Fortran Code ++ PL/1. ++ BASIC, including Visual Basic, Future Basic, GW-Basic, QBASIC, etc. ++ Improve Ocamlyacc, comments in yacc part are C-like, but I'm not sure + about comment nesting. + + For more language examples, see the ACM "Hello World" project, which tries + to collect "Hello World" in every computer language. It's at: + http://www2.latech.edu/~acm/HelloWorld.shtml + + + +Here are other TODOs: + + +* A big one is to add support for logical SLOC, at least for C/C++. + Then add support for COCOMO II. Even partial support would be great + (e.g., not all languages)... other languages could be displayed as + "UNK" (unknown) and be considered 0. + Add options to allow display of only one, + or of both. See Park's paper, COCOMO II, and Humphrey's 1995 book. + +* In general, modify the program so that it ports more easily. Currently, + it assumes a Unix-like system (esp. in the shell programs), and it requires + md5sum as a separate executable. + There are probably some other nonportable constructs, in particular + for non-Unix systems (e.g., symlink handling and file/dirnames). + +* Rewrite Bourne shell code to either Perl or Python (prob. Python), and + make the call to md5sum optional. That way, the program + could run on Windows without Cygwin. + +* Improve the heuristics for detecting language type. + They're actually pretty good already. + +* Clean up the program. This was originally written as a one-off program + that wouldn't be used again (or distributed!), and it shows. + + The heuristics used to detect language type should + be made more modular, so it could be reused in other programs, and + so you don't HAVE to write out a list of filenames first if you + don't want to. + +* Consider rewriting everything not in C into Python. Perl is + a write-only language, and it's absurdly hard to read Perl code later. + I find Python code much cleaner. And shell isn't as portable. + + One reason I didn't rewrite it in Python is that I had concerns about + Python's licensing issues; Python versions 1.6 and up have questionable + compatibility with the GPL. Thankfully, the Free Software Foundation (FSF) + and the Python developers have worked together, and the Python + developers have fixed the license for version 2.0.1 and up. + Joy!! I'm VERY happy about this! + +* Improve the speed, primarily to support analysis of massive amounts + of data. There's a generic routine in Perl; switching that + to C would probably help. Perhaps rewriting many of the counters + using flex would speed things up, simplify maintenance, and make + supporting logical SLOC easier. + +* Handle scripts embedded in data. + Perhaps create a category, "only the code embedded in HTML" + (e.g., Javascript scripts, PHP statements, etc.). + This is currently complicated - the whole program assumes that a file + can be assigned a specific type, and HTML (etc.) might have multiple + languages embedded in it. + +* Are any CGI files (.cgi) unhandled? Are files unidentified? + +* Improve makefile identification and counting. + Currently the program does not identify as makefiles "Imakefile" + (generated by xmkmf and processed by imake, used by MIT X server) + nor automake/autoconf files (Makefile.am/Makefile.in). + Need to handle ".rules" too. + + I didn't just add these files to the "makefile" list, because + I have concerns about processing them correctly using the + makefile counter. Since most people won't count makefiles anyway, + this isn't an issue for most. I welcome patches to change this, + _IF_ you ensure that the resulting counts are correct. + + The current version is sufficient for handling programs who have + ordinary makefiles that are to be included in the SLOC count when + they enable the option to count makefiles. + + Currently the makefiles count "all non-blank lines"; conceivably + someone might want to count only the actual directives, not the + conditions under which they fire. + +* Improve the flexibility in symlink handling; see "make_filelists". + It should be rewritten. Some systems don't allow + "test"ing for symlinks, which was a portability problem - that problem + at least has been removed. + +* I've added a few utilities that I use for counting whole Linux systems + to the tar file, but they're not installed by the RPM and they're not + documented. + +* More testing! COBOL in particular is undertested. + +* Modify the code, esp. sloccount, to handle systems so large that + the data directory list can't be expanded using "*". + This would involve using "xargs" in sloccount, maybe getting rid + of the separate filelist creation, and having break_filelist + call compute_all directly (break_filelist needs to run all the time, + or its reloading of hashes during initialization would become the + bottleneck). Some of this work has already been done. + +* Perl variation support. + The code says: + open(FH, "-|", "md5sum", $filename) or return undef; + but this doesn't work on some Perls. + This could be changed to: + open(FH, "-|", "md5sum $filename") or return undef; + But I dare not fix it that way; + imagine a file named "; rm -fr /*" and variations. + + + diff --git a/TODO.orig b/TODO.orig new file mode 100644 index 0000000..86c0c20 --- /dev/null +++ b/TODO.orig @@ -0,0 +1,153 @@ +TODO List: + + +As with all open source projects... if you want something strongly +enough, then please (1) code it and submit it, or (2) pay me to add it. +You have the source, you have the power - use it. Or has been said for years: + + Use the Source, Luke. + +I _do_ listen to user requests, but I cannot do everything myself. +I've released this program under the GPL _specifically_ so that others +will help debug and extend it. + + + +Obviously, a general "TODO" is adding support for other computer languages; +here are languages I'd like to add support for specifically: ++ Eiffel. ++ Sather (much like Eiffel). ++ CORBA IDL. ++ Forth. Comments can start with "\" (backslash) and continue to end-of-line, + or be surrounded by parens. In both cases, they must be on word + bounds-- .( is not a comment! Variable names often begin with "\"! + For example: + : 2dup ( n1 n2 -- n1 n2 n1 n2 ) \ Duplicate two numbers. + \ Pronounced: two-dupe. + over over ; + Strings begin with " (doublequote) or p" (p doublequote, for + packed strings), and these must be separate words + (e.g., followed by a whitespace). They end with a matching ". + Also, the ." word begins a string that ends in " (this word immediately + prints it the given string). + Note that "copy is a perfectly legitimate Forth word, and does NOT + start a string. + Forth sources can be stored as blocks, or as more conventional text. + Any way to detect them? + See http://www.forth.org/dpans/dpans.html for syntax definition. + See also http://www.taygeta.com/forth_style.html + and http://www.forth.org/fig.html ++ Create a "javascript" category. ".js" extention, "js" type. + (see below for a discussion of the issues with embedded scripts) ++ .pco -> Oracle preprocessed Cobol Code ++ .pfo -> Oracle preprocessed Fortran Code ++ Fortran beyond Fortran 77 (.f90). ++ PL/1. ++ BASIC, including Visual Basic, Future Basic, GW-Basic, QBASIC, etc. ++ Improve ML/CAML. It uses Pascal-style comments (*..*), + double-quoted C-like strings "\n...", and .ml or .mli file extensions + (.mli is an interface file for CAML). + + For more language examples, see the ACM "Hello World" project, which tries + to collect "Hello World" in every computer language. It's at: + http://www2.latech.edu/~acm/HelloWorld.shtml + + + +Here are other TODOs: + + +* A big one is to add support for logical SLOC, at least for C/C++. + Then add support for COCOMO II. Even partial support would be great + (e.g., not all languages)... other languages could be displayed as + "UNK" (unknown) and be considered 0. + Add options to allow display of only one, + or of both. See Park's paper, COCOMO II, and Humphrey's 1995 book. + +* In general, modify the program so that it ports more easily. Currently, + it assumes a Unix-like system (esp. in the shell programs), and it requires + md5sum as a separate executable. + There are probably some other nonportable constructs, in particular + for non-Unix systems (e.g., symlink handling and file/dirnames). + +* Rewrite Bourne shell code to either Perl or Python (prob. Python), and + make the call to md5sum optional. That way, the program + could run on Windows without Cygwin. + +* Improve the heuristics for detecting language type. + They're actually pretty good already. + +* Clean up the program. This was originally written as a one-off program + that wouldn't be used again (or distributed!), and it shows. + + The heuristics used to detect language type should + be made more modular, so it could be reused in other programs, and + so you don't HAVE to write out a list of filenames first if you + don't want to. + +* Consider rewriting everything not in C into Python. Perl is + a write-only language, and it's absurdly hard to read Perl code later. + I find Python code much cleaner. And shell isn't as portable. + + One reason I didn't rewrite it in Python is that I had concerns about + Python's licensing issues; Python versions 1.6 and up have questionable + compatibility with the GPL. Thankfully, the Free Software Foundation (FSF) + and the Python developers have worked together, and the Python + developers have fixed the license for version 2.0.1 and up. + Joy!! I'm VERY happy about this! + +* Improve the speed, primarily to support analysis of massive amounts + of data. There's a generic routine in Perl; switching that + to C would probably help. Perhaps rewriting many of the counters + using flex would speed things up, simplify maintenance, and make + supporting logical SLOC easier. + +* Handle scripts embedded in data. + Perhaps create a category, "only the code embedded in HTML" + (e.g., Javascript scripts, PHP statements, etc.). + This is currently complicated - the whole program assumes that a file + can be assigned a specific type, and HTML (etc.) might have multiple + languages embedded in it. + +* Are any CGI files (.cgi) unhandled? Are files unidentified? + +* Improve makefile identification and counting. + Currently the program does not identify as makefiles "Imakefile" + (generated by xmkmf and processed by imake, used by MIT X server) + nor automake/autoconf files (Makefile.am/Makefile.in). + Need to handle ".rules" too. + + I didn't just add these files to the "makefile" list, because + I have concerns about processing them correctly using the + makefile counter. Since most people won't count makefiles anyway, + this isn't an issue for most. I welcome patches to change this, + _IF_ you ensure that the resulting counts are correct. + + The current version is sufficient for handling programs who have + ordinary makefiles that are to be included in the SLOC count when + they enable the option to count makefiles. + + Currently the makefiles count "all non-blank lines"; conceivably + someone might want to count only the actual directives, not the + conditions under which they fire. + +* Improve the flexibility in symlink handling; see "make_filelists". + It should be rewritten. Some systems don't allow + "test"ing for symlinks, which was a portability problem - that problem + at least has been removed. + +* I've added a few utilities that I use for counting whole Linux systems + to the tar file, but they're not installed by the RPM and they're not + documented. + +* More testing! COBOL in particular is undertested. + +* Modify the code, esp. sloccount, to handle systems so large that + the data directory list can't be expanded using "*". + This would involve using "xargs" in sloccount, maybe getting rid + of the separate filelist creation, and having break_filelist + call compute_all directly (break_filelist needs to run all the time, + or its reloading of hashes during initialization would become the + bottleneck). Some of this work has already been done. + + diff --git a/ada_count b/ada_count new file mode 100755 index 0000000..3204f56 --- /dev/null +++ b/ada_count @@ -0,0 +1,27 @@ +#!/bin/sh +# +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# + +generic_count '--' "$@" + diff --git a/append_license b/append_license new file mode 100755 index 0000000..4cea6d5 --- /dev/null +++ b/append_license @@ -0,0 +1,62 @@ +#!/usr/bin/perl -w + +# Given a 3-column list "sloc build-directory-name spec-filename", +# output a 4-column list which appends the license. +# You'll need to fix this up afterwards. +# +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# +$specdir = "/usr/src/redhat/SPECS"; +$garbage = ""; + +while (<>) { + ($sloc, $buildname, $specname, $garbage) = split; + chomp($specname); + print "$sloc $buildname $specname "; + + if (! (-f "$specdir/$specname")) { + die "ERROR. Cound not find spec file $specname\n"; + } + + + # Get "Copyright:" or "License:" + $license = ""; + $summary = ""; + open(SPECFILE, "<$specdir/$specname") || die "Can't open $specname\n"; + while (<SPECFILE>) { + # print; + if (m/^Summary\:(.*)/i) { $summary = $1; } + if (m/^License\:(.*)/i) { $license = $1; } + if ((! $license) && (m/^Copyright\:(.*)/i)) { $license = $1; } + } + close(SPECFILE); + + if ($license) {print "$license";} + else {print "?";} + + # print "\t"; + # print $summary; + + print "\n"; + +} diff --git a/append_specname b/append_specname new file mode 100755 index 0000000..9b8e97c --- /dev/null +++ b/append_specname @@ -0,0 +1,57 @@ +#!/usr/bin/perl -w + +# Given a 2-column list "sloc build-directory-name", +# output a 3-column list which appends the name of the spec file. +# You'll need to fix this up afterwards. +# +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# +$specdir = "/usr/src/redhat/SPECS"; +$garbage = ""; + +while (<>) { + ($sloc, $buildname, $garbage) = split; + chomp($buildname); + print "$sloc $buildname "; + + $unversioned = $buildname; + $unversioned =~ s/-[^\-]*$//; + + $reallyshort = $buildname; + $reallyshort =~ s/[0-9\.\-_]*$//; + + + if (-f "$specdir/${buildname}.spec") {print "$buildname";} + elsif (-f "$specdir/${buildname}.spec.alpha") {print "${buildname}.alpha";} + elsif (-f "$specdir/${buildname}.spec.sparc") {print "${buildname}.sparc";} + elsif (-f "$specdir/${unversioned}.spec") {print "$unversioned";} + elsif (-f "$specdir/${unversioned}.spec.alpha") {print "${unversioned}.alpha";} + elsif (-f "$specdir/${unversioned}.spec.sparc") {print "${unversioned}.sparc";} + elsif (-f "$specdir/${reallyshort}.spec") {print "$reallyshort";} + elsif (-f "$specdir/${reallyshort}.spec.alpha") {print "${reallyshort}.alpha";} + elsif (-f "$specdir/${reallyshort}.spec.sparc") {print "${reallyshort}.sparc";} + else {print "?";} + + print "\n"; + +} diff --git a/asm_count b/asm_count new file mode 100755 index 0000000..d7ad0b1 --- /dev/null +++ b/asm_count @@ -0,0 +1,166 @@ +#!/usr/bin/perl -w +# asm_count - count physical lines of code in Assembly programs. +# Usage: asm_count [-f file] [list_of_files] +# file: file with a list of files to count (if "-", read list from stdin) +# list_of_files: list of files to count +# -f file or list_of_files can be used, or both +# This is a trivial/naive program. + +# For each file, it looks at the contents to heuristically determine +# if C comments are permitted and what the "comment" character is. +# If /* and */ are in the file, then C comments are permitted. +# The punctuation mark that starts the most lines must be the comment +# character (but ignoring "/" if C comments are allowed, and +# ignoring '#' if cpp commands appear to be used) + +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. + + + +$total_sloc = 0; + +# Do we have "-f" (read list of files from second argument)? +if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) { + # Yes, we have -f + if ($ARGV[1] eq "-") { + # The list of files is in STDIN + while (<STDIN>) { + chomp ($_); + &count_file ($_); + } + } else { + # The list of files is in the file $ARGV[1] + open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $ARGV[1]\n"; + while (<FILEWITHLIST>) { + chomp ($_); + &count_file ($_); + } + close FILEWITHLIST; + } + shift @ARGV; shift @ARGV; +} +# Process all (remaining) arguments as file names +while ($file = shift @ARGV) { + &count_file ($file); +} + +print "Total:\n"; +print "$total_sloc\n"; + +sub count_file { + my ($file) = @_; + # First, use heuristics to determine the comment char and if it uses C comments + $found_c_start = 0; + $found_c_end = 0; + $cpp_suspicious = 0; + $cpp_likely = 0; + $cpp_used = 0; + %count = (); + if ($file eq "") { + *CURRENTFILE = *STDIN + } else { + open(CURRENTFILE, "<$file"); + } + while (<CURRENTFILE>) { + if (m!\/\*!) { $found_c_start++;} + if (m!\*\/!) { $found_c_end++;} + if ( (m!^#\s*define\s!) || (m!^#\s*else!)) {$cpp_suspicious++;} + if ( (m!^#\s*ifdef\s!) || (m!^#\s*endif!) || (m!#\s*include!)) {$cpp_likely++;} + if (m/^\s*([;!\/#\@\|\*])/) { $count{$1}++; } # Found a likely comment char. + } + # Done examing file, let's figure out the parameters. + if ($found_c_start && $found_c_end) { + $ccomments = 1; + $count{'/'} = 0; + # $count{'*'} = 0; # Do this to ignore '*' if C comments are used. + } else { + $ccomments = 0; + } + if (($cpp_suspicious > 2) || ($cpp_likely >= 1)) { + $cpp_used = 1; + $count{'#'} = 0; + } else { + $cpp_used = 0; + } + $likeliest = ';'; + $likeliest_count = 0; + foreach $i (keys(%count)) { + # print "DEBUG: key=$i count=$count{$i}\n"; + if ($count{$i} > $likeliest_count) { + $likeliest = $i; + $likeliest_count = $count{$i}; + } + } + # print "DEBUG: likeliest = $likeliest\n"; + $commentchar=$likeliest; + close(CURRENTFILE); + + # Now count SLOC. + $sloc = 0; + $isincomment = 0; + open(CURRENTFILE, "<$file"); + while (<CURRENTFILE>) { + # We handle C comments first, so that if an EOL-comment + # occurs inside a C comment, it's ignored. + if ($ccomments) { + # Handle C /* */ comments; this will get fooled if they're in strings, + # but that would be rare in assembly. + while ( (m!\/\*!) || (m!\*\/!)) { # While unprocessed C comment. + if ($isincomment) { + s!.*?\*\/.*!!; + $isincomment = 0; + } else { # Not in C comment, but have end comment marker. + if (! m/\/\*/) { # Whups, there's no starting marker! + print STDERR "Warning: file $file line $. has unmatched comment end\n"; + # Get us back to a plausible state: + s/.*//; # Destroy everything + $isincomment = 0; + } else { + if (! s!\/\*.*?\*\/!!) { # Try to delete whole comment. + # We couldn't delete whole comment. Delete what's there. + s!\/\*.*!!; + $isincomment = 1; + } + } + } + } + } # End of handling C comments. + # This requires $[ be unchanged. + $locate_comment = index($_, $commentchar); + if ($locate_comment >= 0) { # We found a comment character, delete comment + $_ = substr($_, 0, $locate_comment); + # print "DEBUG New text: @",$_,"@\n"; + } + # old: s/${commentchar}.*//; # Delete leading comments. + + # FOR DEBUG: print "Finally isincomment=$isincomment line=$_\n"; + if ((! $isincomment) && (m/\S/)) {$sloc++;} + } + + # End-of-file processing + print "$sloc (commentchar=$commentchar C-comments=$ccomments) $file\n"; + $total_sloc += $sloc; + $sloc = 0; + if ($isincomment) { + print STDERR "Missing comment close in file $file\n"; + } +} diff --git a/awk_count b/awk_count new file mode 100755 index 0000000..f892692 --- /dev/null +++ b/awk_count @@ -0,0 +1,27 @@ +#!/bin/sh +# +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# + +generic_count '#' $@ + diff --git a/break_filelist b/break_filelist new file mode 100755 index 0000000..7df41ab --- /dev/null +++ b/break_filelist @@ -0,0 +1,1308 @@ +#!/usr/bin/perl -w + +# break_filelist +# Take a list of dirs which contain a "filelist"; +# creates files in each directory identifying which are C, C++, Perl, etc. +# For example, "ansic.dat" lists all ANSI C files contained in filelist. +# Note: ".h" files are ambiguous (they could be C or C++); the program +# uses heuristics to determine this. +# The list of .h files is also contained in h_list.dat. + +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. + + +# If adding a new language: add the logic to open the file, +# close the file, and detect & write to the file listing that language. + +# Debatable decisions: +# Doesn't count .dsl files (stylesheets, which are partially LISP). +# Doesn't count .sql files (SQL queries & commands) + +# Note - I don't try to distinguish between TCL and [incr TCL] (itcl), +# an OO extended version of TCL. For our purposes, it's all TCL. + + +use FileHandle; + + +# Set default configuration: + +$duplicates_okay = 0; # Set to 1 if you want to count file duplicates. +$crossdups_okay = 0; # Set to 1 if duplicates okay in different filelists. +$autogen_okay = 0; # Set to 1 if you want to count autogen'ed files. +$noisy = 0; # Set to 1 if you want noisy reports. +%lang_list_files = (); + +# The following extensions are NOT code: +%not_code_extensions = ( + "html" => 1, + "in" => 1, # Debatable. + "xpm" => 1, + "po" => 1, + "am" => 1, # Debatable. + "1" => 1, # Man pages (documentation): + "2" => 1, + "3" => 1, + "4" => 1, + "5" => 1, + "6" => 1, + "7" => 1, + "8" => 1, + "9" => 1, + "n" => 1, + "gif" => 1, + "tfm" => 1, + "png" => 1, + "m4" => 1, # Debatable. + "bdf" => 1, + "sgml" => 1, + "mf" => 1, + "txt" => 1, "text" => 1, + "man" => 1, + "xbm" => 1, + "Tag" => 1, + "sgm" => 1, + "vf" => 1, + "tex" => 1, + "elc" => 1, + "gz" => 1, + "dic" => 1, + "pfb" => 1, + "fig" => 1, + "afm" => 1, # font metrics + "jpg" => 1, + "bmp" => 1, + "htm" => 1, + "kdelnk" => 1, + "desktop" => 1, + "pbm" => 1, + "pdf" => 1, + "ps" => 1, # Postscript is _USUALLY_ generated automatically. + "eps" => 1, + "doc" => 1, + "man" => 1, + "o" => 1, # Object code is generated from source code. + "a" => 1, # Static object code. + "so" => 1, # Dynamically-loaded object code. + "Y" => 1, # file compressed with "Yabba" + "Z" => 1, # file compressed with "compress" + "ad" => 1, # X application default resource file. + "arc" => 1, # arc(1) archive + "arj" => 1, # arj(1) archive + "au" => 1, # Audio sound filearj(1) archive + "wav" => 1, + "bak" => 1, # Backup files - we only want to count the "real" files. + "bz2" => 1, # bzip2(1) compressed file + "mp3" => 1, # zip archive + "tgz" => 1, # tarball + "zip" => 1, # zip archive +); + +# The following filenames are NOT code: +%not_code_filenames = ( + "README" => 1, + "Readme" => 1, + "readme" => 1, + "README.tk" => 1, # used in kdemultimedia, it's confusing. + "Changelog" => 1, + "ChangeLog" => 1, + "Repository" => 1, + "CHANGES" => 1, + "Changes" => 1, + ".cvsignore" => 1, + "Root" => 1, # CVS. + "BUGS" => 1, + "TODO" => 1, + "COPYING" => 1, + "MAINTAINERS" => 1, + "Entries" => 1, + # Skip "iconfig.h" files; they're used in Imakefiles + # (used in xlockmore): + "iconfig.h" => 1, +); + + +# A filename ending in the following extensions usually maps to the +# given language: + +# TODO: See suffixes(7) +# .al Perl autoload file +# .am automake input + +%file_extensions = ( + "c" => "ansic", + "ec" => "ansic", # Informix C. + "ecp" => "ansic", # Informix C. + "pgc" => "ansic", # Postgres embedded C/C++ (guess C) + "C" => "cpp", "cpp" => "cpp", "cxx" => "cpp", "cc" => "cpp", + "pcc" => "cpp", # Input to Oracle C++ preproc. + "m" => "objc", + # C# (C-sharp) is named 'cs', not 'c#', because + # the '#' is a comment character and I'm trying to + # avoid bug-prone conventions. + # C# doesn't support header files. + "cs" => "cs", + # Header files are allocated to the "h" language, and then + # copied to the correct location later so that C/C++/Objective-C + # can be separated. + "h" => "h", "H" => "h", "hpp" => "h", "hh" => "h", + "ada" => "ada", "adb" => "ada", "ads" => "ada", + "pad" => "ada", # Oracle Ada preprocessor. + "f" => "fortran", "F" => "fortran", # This catches "wokka.F" as Fortran. + # Warning: "Freeze" format also uses .f. Haven't heard of problems, + # freeze is extremely rare and even more rare in source code directories. + "f77" => "fortran", "F77" => "fortran", + "f90" => "f90", "F90" => "f90", + "cob" => "cobol", "cbl" => "cobol", + "COB" => "cobol", "CBL" => "cobol", # Yes, people do create wokka.CBL files + "p" => "pascal", "pas" => "pascal", "pp" => "pascal", "dpr" => "pascal", + "py" => "python", + "s" => "asm", "S" => "asm", "asm" => "asm", + "sh" => "sh", "bash" => "sh", + "csh" => "csh", "tcsh" => "csh", + "java" => "java", + "lisp" => "lisp", "el" => "lisp", "scm" => "lisp", "sc" => "lisp", + "lsp" => "lisp", "cl" => "lisp", + "jl" => "lisp", + "tcl" => "tcl", "tk" => "tcl", "itk" => "tcl", + "exp" => "exp", + "pl" => "perl", "pm" => "perl", "perl" => "perl", "ph" => "perl", + "awk" => "awk", + "sed" => "sed", + "y" => "yacc", + "l" => "lex", + "makefile" => "makefile", + "sql" => "sql", + "php" => "php", "php3" => "php", "php4" => "php", "php5" => "php", + "php6" => "php", + "inc" => "inc", # inc MAY be PHP - we'll handle it specially. + "m3" => "modula3", "i3" => "modula3", + "mg" => "modula3", "ig" => "modula3", + "ml" => "ml", "mli" => "ml", + "mly" => "ml", # ocamlyacc. In fact this is half-yacc half-ML, especially + # comments in yacc part are C-like, not ML like. + "mll" => "ml", # ocamllex, no such problems as in ocamlyacc + "rb" => "ruby", + "hs" => "haskell", "lhs" => "haskell", + # ???: .pco is Oracle Cobol + "jsp" => "jsp", # Java server pages +); + + +# GLOBAL VARIABLES + +$dup_count = 0; + +$warning_from_first_line = ""; + +%examined_directories = (); # Keys = Names of directories examined this run. + +$duplistfile = ""; + +########### + + +# Handle re-opening individual CODE_FILEs. +# CODE_FILE is public + +# Private value: +$opened_file_name = ""; + +sub reopen { + # Open file if it isn't already, else rewind. + # If filename is "", close any open file. + my $filename = shift; + chomp($filename); + # print("DEBUG: reopen($filename)\n"); + if ($filename eq "") { + if ($opened_file_name) {close(CODE_FILE);} + $opened_file_name = ""; + return; + } + if ($filename eq $opened_file_name) { + seek CODE_FILE, 0, 0; # Rewind. + } else { # We're opening a new file. + if ($opened_file_name) {close(CODE_FILE)} + open(CODE_FILE, "<$filename\0") || die "Can't open $filename"; + $opened_file_name = $filename; + } +} + +########### + +sub looks_like_cpp { + # returns a confidence level - does the file looks like it's C++? + my $filename = shift; + my $confidence = 0; + chomp($filename); + open( SUSPECT, "<$filename"); + while (defined($_ = <SUSPECT>)) { + if (m/^\s*class\b.*\{/) { # "}" + close(SUSPECT); + return 2; + } + if (m/^\s*class\b/) { + $confidence = 1; + } + } + close(SUSPECT); + return $confidence; +} + + +# Cache which files are objective-C or not. +# Key is the full file pathname; value is 1 if objective-C (else 0). +%objective_c_files = (); + +sub really_is_objc { +# Given filename, returns TRUE if its contents really are objective-C. + my $filename = shift; + chomp($filename); + + my $is_objc = 0; # Value to determine. + my $brace_lines = 0; # Lines that begin/end with curly braces. + my $plus_minus = 0; # Lines that begin with + or -. + my $word_main = 0; # Did we find "main("? + my $special = 0; # Did we find a special Objective-C pattern? + + # Return cached result, if available: + if ($objective_c_files{$filename}) { return $objective_c_files{$filename};} + + open(OBJC_FILE, "<$filename") || + die "Can't open $filename to determine if it's objective C.\n"; + while(<OBJC_FILE>) { + + if (m/^\s*[{}]/ || m/[{}];?\s*$/) { $brace_lines++;} + if (m/^\s*[+-]/) {$plus_minus++;} + if (m/\bmain\s*\(/) {$word_main++;} # "main" followed by "("? + # Handle /usr/src/redhat/BUILD/egcs-1.1.2/gcc/objc/linking.m: + if (m/^\s*\[object name\];\s*$/i) {$special=1;} + } + close(OBJC_FILE); + + if (($brace_lines > 1) && (($plus_minus > 1) || $word_main || $special)) + {$is_objc = 1;} + + $objective_c_files{$filename} = $is_objc; # Store result in cache. + + return $is_objc; +} + + +# Cache which files are lex or not. +# Key is the full file pathname; value is 1 if lex (else 0). +%lex_files = (); + +sub really_is_lex { +# Given filename, returns TRUE if its contents really is lex. +# lex file must have "%%", "%{", and "%}". +# In theory, a lex file doesn't need "%{" and "%}", but in practice +# they all have them, and requiring them avoid mislabeling a +# non-lexfile as a lex file. + + my $filename = shift; + chomp($filename); + + my $is_lex = 0; # Value to determine. + my $percent_percent = 0; + my $percent_opencurly = 0; + my $percent_closecurly = 0; + + # Return cached result, if available: + if ($lex_files{$filename}) { return $lex_files{$filename};} + + open(LEX_FILE, "<$filename") || + die "Can't open $filename to determine if it's lex.\n"; + while(<LEX_FILE>) { + $percent_percent++ if (m/^\s*\%\%/); + $percent_opencurly++ if (m/^\s*\%\{/); + $percent_closecurly++ if (m/^\s*\%\}/); + } + close(LEX_FILE); + + if ($percent_percent && $percent_opencurly && $percent_closecurly) + {$is_lex = 1;} + + $lex_files{$filename} = $is_lex; # Store result in cache. + + return $is_lex; +} + + +# Cache which files are expect or not. +# Key is the full file pathname; value is 1 if it is (else 0). +%expect_files = (); + +sub really_is_expect { +# Given filename, returns TRUE if its contents really are Expect. +# Many "exp" files (such as in Apache and Mesa) are just "export" data, +# summarizing something else # (e.g., its interface). +# Sometimes (like in RPM) it's just misc. data. +# Thus, we need to look at the file to determine +# if it's really an "expect" file. + + my $filename = shift; + chomp($filename); + +# The heuristic is as follows: it's Expect _IF_ it: +# 1. has "load_lib" command and either "#" comments or {}. +# 2. {, }, and one of: proc, if, [...], expect + + my $is_expect = 0; # Value to determine. + + my $begin_brace = 0; # Lines that begin with curly braces. + my $end_brace = 0; # Lines that begin with curly braces. + my $load_lib = 0; # Lines with the Load_lib command. + my $found_proc = 0; + my $found_if = 0; + my $found_brackets = 0; + my $found_expect = 0; + my $found_pound = 0; + + # Return cached result, if available: + if ($expect_files{$filename}) { return expect_files{$filename};} + + open(EXPECT_FILE, "<$filename") || + die "Can't open $filename to determine if it's expect.\n"; + while(<EXPECT_FILE>) { + + if (m/#/) {$found_pound++; s/#.*//;} + if (m/^\s*\{/) { $begin_brace++;} + if (m/\{\s*$/) { $begin_brace++;} + if (m/^\s*\}/) { $end_brace++;} + if (m/\};?\s*$/) { $end_brace++;} + if (m/^\s*load_lib\s+\S/) { $load_lib++;} + if (m/^\s*proc\s/) { $found_proc++;} + if (m/^\s*if\s/) { $found_if++;} + if (m/\[.*\]/) { $found_brackets++;} + if (m/^\s*expect\s/) { $found_expect++;} + } + close(EXPECT_FILE); + + if ($load_lib && ($found_pound || ($begin_brace && $end_brace))) + {$is_expect = 1;} + if ( $begin_brace && $end_brace && + ($found_proc || $found_if || $found_brackets || $found_expect)) + {$is_expect = 1;} + + $expect_files{$filename} = $is_expect; # Store result in cache. + + return $is_expect; +} + + +# Cached values. +%pascal_files = (); + +sub really_is_pascal { +# Given filename, returns TRUE if its contents really are Pascal. + +# This isn't as obvious as it seems. +# Many ".p" files are Perl files +# (such as /usr/src/redhat/BUILD/ispell-3.1/dicts/czech/glob.p), +# others are C extractions +# (such as /usr/src/redhat/BUILD/linux/include/linux/umsdos_fs.p +# and some files in linuxconf). +# However, test files in "p2c" really are Pascal, for example. + +# Note that /usr/src/redhat/BUILD/ucd-snmp-4.1.1/ov/bitmaps/UCD.20.p +# is actually C code. The heuristics determine that they're not Pascal, +# but because it ends in ".p" it's not counted as C code either. +# I believe this is actually correct behavior, because frankly it +# looks like it's automatically generated (it's a bitmap expressed as code). +# Rather than guess otherwise, we don't include it in a list of +# source files. Let's face it, someone who creates C files ending in ".p" +# and expects them to be counted by default as C files in SLOCCount needs +# their head examined. I suggest examining their head +# with a sucker rod (see syslogd(8) for more on sucker rods). + +# This heuristic counts as Pascal such files such as: +# /usr/src/redhat/BUILD/teTeX-1.0/texk/web2c/tangleboot.p +# Which is hand-generated. We don't count woven documents now anyway, +# so this is justifiable. + + my $filename = shift; + chomp($filename); + +# The heuristic is as follows: it's Pascal _IF_ it has all of the following +# (ignoring {...} and (*...*) comments): +# 1. "^..program NAME" or "^..unit NAME", +# 2. "procedure", "function", "^..interface", or "^..implementation", +# 3. a "begin", and +# 4. it ends with "end.", +# +# Or it has all of the following: +# 1. "^..module NAME" and +# 2. it ends with "end.". +# +# Or it has all of the following: +# 1. "^..program NAME", +# 2. a "begin", and +# 3. it ends with "end.". +# +# The "end." requirements in particular filter out non-Pascal. +# +# Note (jgb): this does not detect Pascal main files in fpc, like +# fpc-1.0.4/api/test/testterminfo.pas, which does not have "program" in +# it + + my $is_pascal = 0; # Value to determine. + + my $has_program = 0; + my $has_unit = 0; + my $has_module = 0; + my $has_procedure_or_function = 0; + my $found_begin = 0; + my $found_terminating_end = 0; + + # Return cached result, if available: + if ($pascal_files{$filename}) { return pascal_files{$filename};} + + open(PASCAL_FILE, "<$filename") || + die "Can't open $filename to determine if it's pascal.\n"; + while(<PASCAL_FILE>) { + s/\{.*?\}//g; # Ignore {...} comments on this line; imperfect, but effective. + s/\(\*.*?\*\)//g; # Ignore (*...*) comments on this line; imperfect, but effective. + if (m/\bprogram\s+[A-Za-z]/i) {$has_program=1;} + if (m/\bunit\s+[A-Za-z]/i) {$has_unit=1;} + if (m/\bmodule\s+[A-Za-z]/i) {$has_module=1;} + if (m/\bprocedure\b/i) { $has_procedure_or_function = 1; } + if (m/\bfunction\b/i) { $has_procedure_or_function = 1; } + if (m/^\s*interface\s+/i) { $has_procedure_or_function = 1; } + if (m/^\s*implementation\s+/i) { $has_procedure_or_function = 1; } + if (m/\bbegin\b/i) { $has_begin = 1; } + # Originally I said: + # "This heuristic fails if there are multi-line comments after + # "end."; I haven't seen that in real Pascal programs:" + # But jgb found there are a good quantity of them in Debian, specially in + # fpc (at the end of a lot of files there is a multiline comment + # with the changelog for the file). + # Therefore, assume Pascal if "end." appears anywhere in the file. + if (m/end\.\s*$/i) {$found_terminating_end = 1;} +# elsif (m/\S/) {$found_terminating_end = 0;} + } + close(PASCAL_FILE); + + # Okay, we've examined the entire file looking for clues; + # let's use those clues to determine if it's really Pascal: + + if ( ( ($has_unit || $has_program) && $has_procedure_or_function && + $has_begin && $found_terminating_end ) || + ( $has_module && $found_terminating_end ) || + ( $has_program && $has_begin && $found_terminating_end ) ) + {$is_pascal = 1;} + + $pascal_files{$filename} = $is_pascal; # Store result in cache. + + return $is_pascal; +} + +sub really_is_incpascal { +# Given filename, returns TRUE if its contents really are Pascal. +# For .inc files (mainly seen in fpc) + + my $filename = shift; + chomp($filename); + +# The heuristic is as follows: it is Pacal if any of the following: +# 1. really_is_pascal returns true +# 2. Any usual reserverd word is found (program, unit, const, begin...) + + # If the general routine for Pascal files works, we have it + if (&really_is_pascal ($filename)) { + $pascal_files{$filename} = 1; + return 1; + } + + my $is_pascal = 0; # Value to determine. + my $found_begin = 0; + + open(PASCAL_FILE, "<$filename") || + die "Can't open $filename to determine if it's pascal.\n"; + while(<PASCAL_FILE>) { + s/\{.*?\}//g; # Ignore {...} comments on this line; imperfect, but effective. + s/\(\*.*?\*\)//g; # Ignore (*...*) comments on this line; imperfect, but effective. + if (m/\bprogram\s+[A-Za-z]/i) {$is_pascal=1;} + if (m/\bunit\s+[A-Za-z]/i) {$is_pascal=1;} + if (m/\bmodule\s+[A-Za-z]/i) {$is_pascal=1;} + if (m/\bprocedure\b/i) {$is_pascal = 1; } + if (m/\bfunction\b/i) {$is_pascal = 1; } + if (m/^\s*interface\s+/i) {$is_pascal = 1; } + if (m/^\s*implementation\s+/i) {$is_pascal = 1; } + if (m/\bconstant\s+/i) {$is_pascal=1;} + if (m/\bbegin\b/i) { $found_begin = 1; } + if ((m/end\.\s*$/i) && ($found_begin = 1)) {$is_pascal = 1;} + if ($is_pascal) { + last; + } + } + + close(PASCAL_FILE); + $pascal_files{$filename} = $is_pascal; # Store result in cache. + return $is_pascal; +} + +# Cache which files are php or not. +# Key is the full file pathname; value is 1 if it is (else 0). +%php_files = (); + +sub really_is_php { +# Given filename, returns TRUE if its contents really is php. + + my $filename = shift; + chomp($filename); + + my $is_php = 0; # Value to determine. + # Need to find a matching pair of surrounds, with ending after beginning: + my $normal_surround = 0; # <?; bit 0 = <?, bit 1 = ?> + my $script_surround = 0; # <script..>; bit 0 = <script language="php"> + my $asp_surround = 0; # <%; bit 0 = <%, bit 1 = %> + + # Return cached result, if available: + if ($php_files{$filename}) { return $php_files{$filename};} + + open(PHP_FILE, "<$filename") || + die "Can't open $filename to determine if it's php.\n"; + while(<PHP_FILE>) { + if (m/\<\?/) { $normal_surround |= 1; } + if (m/\?\>/ && ($normal_surround & 1)) { $normal_surround |= 2; } + if (m/\<script.*language="?php"?/i) { $script_surround |= 1; } + if (m/\<\/script\>/i && ($script_surround & 1)) { $script_surround |= 2; } + if (m/\<\%/) { $asp_surround |= 1; } + if (m/\%\>/ && ($asp_surround & 1)) { $asp_surround |= 2; } + } + close(PHP_FILE); + + if ( ($normal_surround == 3) || ($script_surround == 3) || + ($asp_surround == 3)) { + $is_php = 1; + } + + $php_files{$filename} = $is_php; # Store result in cache. + + return $is_php; +} + + + +sub examine_dir { + # Given a file, determine if there are only C++, OBJC, C, or a mixture + # in the same directory. Returns "ansic", "cpp", "objc" or "mix" + my $filename = shift; + chomp($filename); + my $dirname = $filename; + $dirname =~ s/\/[^\/]*$//; + my $saw_ansic_in_dir = 0; + my $saw_pc_in_dir = 0; # ".pc" may mean Oracle C. + my $saw_pcc_in_dir = 0; # ".pc" may mean Oracle C++. + my $saw_cpp_in_dir = 0; + my $saw_objc_in_dir = 0; + opendir(DIR, $dirname) || die "can't opendir $dirname"; + while (defined($_ = readdir(DIR))) { + chomp; + next if (!$_); + if (m/\.(cpp|C|cxx|cc)$/ && -f "$dirname/$_") {$saw_cpp_in_dir = 1;} + if (m/\.c$/ && -f "$dirname/$_") {$saw_ansic_in_dir = 1;} + if (m/\.pc$/ && -f "$dirname/$_") {$saw_pc_in_dir = 1;} + if (m/\.pcc$/ && -f "$dirname/$_") {$saw_pcc_in_dir = 1;} + if (m/\.m$/ && -f "$dirname/$_" && &really_is_objc($dirname . "/" . $_)) + {$saw_objc_in_dir = 1;} + if (($saw_ansic_in_dir + $saw_cpp_in_dir + $saw_objc_in_dir) > 1) { + closedir(DIR); + return "mix"; + } + } + # Done searching; we saw at most one type. + if ($saw_ansic_in_dir) {return "c";} + elsif ($saw_cpp_in_dir) {return "cpp";} + elsif ($saw_objc_in_dir) {return "objc";} + elsif ($saw_pc_in_dir && (!$saw_pcc_in_dir)) {return "c";} # Guess "C". + elsif ($saw_pcc_in_dir && (!$saw_pc_in_dir)) {return "cpp";} # Guess "C". + else {return "mix";} # We didn't see anything... so let's say "mix". +} + +sub was_generated_automatically() { + # Determine if the file was generated automatically. + # Use a simple heuristic: check if first few lines have phrases like + # "generated automatically", "automatically generated", "Generated by", + # or "do not edit" as the first + # words in the line (after possible comment markers and spaces). + my $filename = shift; + + if ($autogen_okay) {return 0;}; + + chomp($filename); + reopen($filename); + $i = 15; # Look at first 15 lines. + while (defined($_ = <CODE_FILE>)) { + if (m/^[\s#\/\*;\-\%]*generated automatically/i || + m/^[\s#\/\*;\-\%]*automatically generated/i || + m/^[\s#\/\*;\-\%]*generated by /i || # libtool uses this. + m/^[\s#\/\*;\-\%]*a lexical scanner generated by flex/i || + m/^[\s#\/\*;\-\%]*this is a generated file/i || # TeTex uses this. + m/^[\s#\/\*;\-\%]*generated with the.*utility/i || # TeTex uses this. + m/^[\s#\/\*;\-\%]*do not edit/i) { + return 1; + } + $i--; + last if $i <= 0; + } + return 0; +} + + +# Previous files added, indexed by digest: + +%previous_files = (); + +$cached_digest = ""; +$cached_digest_filename = ""; + +$digest_method = undef; + +sub compute_digest_given_method { + my $filename = shift; + my $method = shift; + my $result; + + if ($method eq "md5sum") { + open(FH, "-|", "md5sum", $filename) or return undef; + $result = <FH>; + close FH; + return undef if ! defined($result); + chomp($result); + $result =~ s/^\s*//; # Not needed for GNU Textutils. + $result =~ s/[^a-fA-F0-9].*//; # Strip away end. + } elsif ($method eq "md5") { + open(FH, "-|", "md5", $filename) or return undef; + $result = <FH>; + close FH; + return undef if ! defined($result); + chomp($result); + $result =~ s/^.* //; # Strip away beginning. + } elsif ($method eq "openssl") { + open(FH, "-|", "openssl", "dgst", "-md5", $filename) or return undef; + $result = <FH>; + close FH; + return undef if ! defined($result); + chomp($result); + $result =~ s/^.* //; # Strip away beginning. + } else { + # "Can't happen" + die "Unknown method"; + } + return $result; +} + +sub compute_digest { + my $filename = shift; + my $result; + if (defined($digest_method)) { + $result = compute_digest_given_method($filename, $digest_method); + } else { + # Try each method in turn until one works. + # There doesn't seem to be a way in perl to disable an error message + # display if the command is missing, which is annoying. However, the + # program is more robust if we check for the command each time we run. + print "Finding a working MD5 command....\n"; + foreach $m ("md5sum", "md5", "openssl") { + $result = compute_digest_given_method($filename, $m); + if (defined($result)) { + $digest_method = $m; + last; + } + } + if (!defined($digest_method)) { + die "Failure - could not find a working md5 program using $filename."; + } + print "Found a working MD5 command.\n"; + } + return $result; +} + +sub get_digest { + my $filename = shift; + my $result; + # First, check the cache -- did we just compute this? + if ($filename eq $cached_digest_filename) { + return $cached_digest; # We did, so here's what it was. + } + + $result = compute_digest($filename); + # Store in most-recently-used cache. + $cached_digest = $result; + $cached_digest_filename = $filename; + return $result; +} + +sub already_added { + # returns the first file's name with the same contents, + # else returns the empty string. + + my $filename = shift; + my $digest = &get_digest($filename); + + if ($previous_files{$digest}) { + return $previous_files{$digest}; + } else { + return ""; + } +} + +sub close_lang_lists { + my $lang; + my $file; + while (($lang, $file) = each(%lang_list_files)) { + $file->close(); # Ignore any errors on close, there's little we can do. + } + %lang_list_files = (); +} + +sub force_record_file_type { + my ($filename, $type) = @_; + + if (!$type) {die "ERROR! File $filename, type $file_type\n";} + if ($type eq "c") {$type = "ansic";}; + if (!defined($lang_list_files{$type})) { + $lang_list_files{$type} = new FileHandle("${dir}/${type}_list.dat", "w") || + die "Could not open ${dir}/${type}_list.dat"; + } + $lang_list_files{$type}->printf("%s\n", $filename); +} + + +sub record_file_type { + my ($filename, $type) = @_; + # First check if the file should be auto, dup, or zero - and add there + # if so. Otherwise, add to record of 'type'. + + my $first_filename; + + if (-z $filename) { + force_record_file_type($filename, "zero"); + return; + } + + if (&was_generated_automatically($filename)) { + force_record_file_type($filename, "auto"); + return; + } + + unless (($duplicates_okay) || ($type eq "not") || ($type eq "unknown")) { + $first_filename = &already_added($filename); + if ($first_filename) { + print "Note: $filename dups $first_filename\n" if $noisy; + force_record_file_type("$filename dups $first_filename", "dup"); + $dup_count++; + return; + } else { # This isn't a duplicate - record that info, as needed. + my $digest = &get_digest($filename); + $previous_files{$digest} = $filename; + if ($duplistfile) { + print DUPLIST "$digest $filename\n"; + } + } + } + + force_record_file_type($filename, $type); +} + + + +sub file_type_from_contents() { + # Determine if file type is a scripting language, and if so, return it. + # Returns its type as a string, or the empty string if it's undetermined. + my $filename = shift; + my $command; + chomp($filename); + reopen($filename); + # Don't do $firstline = <CODE_FILE> here because the file may be binary; + # instead, read in a fixed number of bytes: + read CODE_FILE, $firstline, 200; + return "" if (!$_); + chomp($firstline); + if (!$_) {return "";} + if (!$firstline) {return "";} + + # Handle weirdness: If there's a ".cpp" file beginning with .\" + # then it clearly isn't C/C++... it's a man page. People who create + # and distribute man pages with such filename extensions should have + # a fingernail removed, slowly :-). + if (($firstline =~ m@^[,.]\\"@) && + $filename =~ m@\.(c|cpp|C|cxx|cc)$@) {return "not";} + + + if (!($firstline =~ m@^#!@)) {return "";} # No script indicator here. + + # studying $firstline doesn't speed things up, unfortunately. + + # I once used a pattern that only acknowledged very specific directories, + # but I found that many test cases use unusual script locations + # (to ensure that they're invoking the correct program they're testing). + # Thus, we depend on the program being named with postfixed whitespace, + # and either begin named by itself or with a series of lowercase + # directories ending in "/". + + # I developed these patterns by starting with patterns that appeared + # correct, and then examined the output (esp. warning messages) to see + # what I'd missed. + + $command = ""; + + # Strip out any calls to sudo + if ($firstline =~ m@^#!\s*/(usr/)?bin/sudo\s+(/.*)@) { + $firstline = "#!" . $2; + } + + if ($firstline =~ m@^#!\s*/(usr/)?bin/env\s+([a-zA-Z0-9\._]+)(\s|\Z)@i) { + $command = $2; + } elsif ($firstline =~ m@^#!\s*([a-zA-Z0-9\/\.]+\/)?([a-zA-Z0-9\._]+)(\s|\Z)@) { + $command = $2; + } + + if ( ($command =~ m/^(bash|ksh|zsh|pdksh|sh)[0-9\.]*(\.exe)?$/i) || + ($firstline =~ + m~^#!\s*\@_?(SCRIPT_)?(PATH_)?(BA|K)?SH(ELL)?(\d+)?\@?(\s|\Z)~)) { + # Note: wish(1) uses a funny trick; see wish(1) for more info. + # The following code detects this unusual wish convention. + if ($firstline =~ m@exec wish(\s|\Z)@i) { + return "tcl"; # return the type for wish. + } + # Otherwise, it's shell. + return "sh"; + } + if ( ($command =~ m/^(t?csh\d*)[0-9\.]*(\.exe)?$/i) || + ($firstline =~ m@^#!\s*xCSH_PATHx(\s|\Z)@)) { + return "csh"; + } + if ( ($command =~ m/^(mini)?perl[0-9\.]*(\.exe)?$/i) || + ($command =~ m/^speedycgi[0-9\.]*(\.exe)?$/i) || + ($firstline =~ m~^#!\s*\@_?(PATH_)?PERL\d*(PROG)?\@(\s|\Z)~) || + ($firstline =~ m~^#!\s*xPERL_PATHx(\s|\Z)~)) { + return "perl"; + } + if ($command =~ m/^python[0-9\.]*(\.exe)?$/i) { + return "python"; + } + if ($command =~ m/^(tcl|tclsh|bltwish|wish|wishx|WISH)[0-9\.]*(\.exe)?$/i) { + return "tcl"; + } + if ($command =~ m/^expectk?[0-9\.]*(\.exe)?$/i) { return "exp"; } + if ($command =~ m/^[ng]?awk[0-9\.]*(\.exe)?$/i) { return "awk"; } + if ($command =~ m/^sed$/i) { return "sed"; } + if ($command =~ m/^guile[0-9\.]*$/i) { return "lisp"; } + if ($firstline =~ m@^#!.*make\b@i) { # We'll claim that #! make is a makefile. + return "makefile"; + } + if ($firstline =~ m@^#!\s*\.(\s|\Z)@) { # Lonely period. + return ""; # Ignore the first line, it's not helping. + } + if ($firstline =~ m@^#!\s*\Z@) { # Empty line. + return ""; # Ignore the first line, it's not helping. + } + if ($firstline =~ m@^#!\s*/dev/null@) { # /dev/null is the script?!? + return ""; # Ignore nonsense ("/dev/null"). + } + if ($firstline =~ m@^#!\s*/unix(\s|Z)@) { + return ""; # Ignore nonsense ("/unix"). + } + if (($filename =~ m@\.pl$@) || ($filename =~ m@\.pm$@)) { + return ""; # Don't warn about files that will be ID'd as perl files. + } + if (($filename =~ m@\.sh$@)) { + return ""; # Don't warn about files that will be ID'd as sh files. + } + if ($firstline =~ m@^#!\s*\S@) { + $firstline =~ s/\n.*//s; # Delete everything after first line. + $warning_from_first_line = "WARNING! File $filename has unknown start: $firstline"; + return ""; + } + return ""; +} + + +sub get_file_type { + my $file_to_examine = shift; + # Return the given file's type. + # Consider the file's contents, filename, and file extension. + + $warning_from_first_line = ""; + + # Skip file names known to not be program files. + $basename = $file_to_examine; + $basename =~ s!^.*/!!; + if ($not_code_filenames{$basename}) { + print "Note: Skipping non-program filename: $file_to_examine\n" + if $noisy; + return "not"; + } + + # Skip "configure" files if there's a corresponding "configure.in" + # file; such a situation suggests that "configure" is automatically + # generated by "autoconf" from "configure.in". + if (($file_to_examine =~ m!/configure$!) && + (-s "${file_to_examine}.in")) { + print "Note: Auto-generated configure file $file_to_examine\n" + if $noisy; + return "auto"; + } + + if (($basename eq "lex.yy.c") || # Flex/Lex output! + ($basename eq "lex.yy.cc") || # Flex/Lex output - C++ scanner. + ($basename eq "y.code.c") || # yacc/bison output. + ($basename eq "y.tab.c") || # yacc output. + ($basename eq "y.tab.h")) { # yacc output. + print "Note: Auto-generated lex/yacc file $file_to_examine\n" + if $noisy; + return "auto"; + } + + # Bison is more flexible than yacc -- it can create arbitrary + # .c/.h files. If we have a .tab.[ch] file, with a corresponding + # .y file, then it's been automatically generated. + # Bison can actually save to any filename, and of course a Makefile + # can rename any file, but we can't help that. + if ($basename =~ m/\.tab\.[ch]$/) { + $possible_bison = $file_to_examine; + $possible_bison =~ s/\.tab\.[ch]$/\.y/; + if (-s "$possible_bison") { + print "Note: found bison-generated file $file_to_examine\n" + if $noisy; + return "auto"; + } + } + + # If there's a corresponding ".MASTER" file, treat this file + # as automatically-generated derivative. This handles "exmh". + if (-s "${file_to_examine}.MASTER") { + print "Note: Auto-generated non-.MASTER file $file_to_examine\n" + if $noisy; + return "auto"; + } + + # Peek at first line to determine type. Note that the file contents + # take precedence over the filename extension, because there are files + # (such as /usr/src/redhat/BUILD/teTeX-1.0/texmf/doc/mkhtml.nawk) + # which have one extension (say, ".nawk") but actually contain + # something else (at least in part): + $type = &file_type_from_contents($file_to_examine); + if ($type) { + return $type; + } + + # Use filename to determine if it's a makefile: + if (($file_to_examine =~ m/\bmakefile$/i) || + ($file_to_examine =~ m/\bmakefile\.txt$/i) || + ($file_to_examine =~ m/\bmakefile\.pc$/i) || + ($file_to_examine =~ m/\bdebian\/rules$/i)) { # "debian/rules" too. + return "makefile"; + } + + # Try to use filename extension to determine type: + if ($file_to_examine =~ m/\.([^.\/]+)$/) { + $type = $1; + + # More ugly problems: some source filenames only use + # UPPERCASE, and they can be mixed with regular files. + # Since normally filenames are lowercase or mixed case, + # presume that an all-uppercase filename means we have to assume + # that the extension must be lowercased. This particularly affects + # .C, which usually means C++ but in this case would mean plain C. + my $uppercase_filename = 0; + if (($file_to_examine =~ m/[A-Z]/) && + (! ($file_to_examine =~ m/[a-z]/))) { + $uppercase_filename = 1; + $type = lc($type); # Use lowercase version of type. + } + + # Is this type known to NOT be a program? + if ($not_code_extensions{$type}) { + return "not"; + } + + # Handle weirdness: ".hpp" is a C/C++ header file, UNLESS it's + # makefile.hpp (a makefile); see /usr/src/redhat/BUILD, + # pine4.21/pine/makefile.hpp and pine4.21/pico/makefile.hpp + # Note that pine also includes pine4.21/pine/osdep/diskquot.hpp. + # Kaffe uses .hpp for C++ header files. + if (($type eq "hpp") && ($file_to_examine =~ m/makefile\.hpp$/i)) + {return "makefile";} + + # If it's a C file but there's a ".pc" or ".pgc" file, then presume that + # it was automatically generated: + if ($type eq "c") { + $pc_name = $file_to_examine; + if ($uppercase_filename) { $pc_name =~ s/\.C$/\.PC/; } + else { $pc_name =~ s/\.c$/\.pc/; } + if (-s "$pc_name" ) { + print "Note: Auto-generated C file (from .pc file) $file_to_examine\n" + if $noisy; + return "auto"; + } + $pc_name = $file_to_examine; + if ($uppercase_filename) { $pc_name =~ s/\.C$/\.PGC/; } + else { $pc_name =~ s/\.c$/\.pgc/; } + if (-s "$pc_name" ) { + print "Note: Auto-generated C file (from .pgc file) $file_to_examine\n" + if $noisy; + return "auto"; + } + } + + # ".pc" is the official extension for Oracle C programs with + # Embedded C commands, but many programs use ".pc" to indicate + # the "PC" (MS-DOS/Windows) version of a file. + # We'll use heuristics to detect when it's not really C, + # otherwise claim it's C and move on. + if ($type eq "pc") { # If it has one of these filenames, it's not C. + if ($file_to_examine =~ m/\bmakefile\.pc$/i) { return "makefile"; } + if (($file_to_examine =~ m/\bREADME\.pc$/i) || + ($file_to_examine =~ m/\binstall\.pc$/i) || + ($file_to_examine =~ m/\bchanges\.pc$/i)) {return "not";} + else { return "c";} + } + + if (defined($file_extensions{$type})) { + $type = $file_extensions{$type}; + if ( (($type eq "exp") && (!&really_is_expect($file_to_examine))) || + (($type eq "tk") && (!&really_is_expect($file_to_examine))) || + (($type eq "objc") && (!&really_is_objc($file_to_examine))) || + (($type eq "lex") && (!&really_is_lex($file_to_examine))) || + (($type eq "pascal") && (!&really_is_pascal($file_to_examine)))) { + $type = "unknown"; + } elsif ($type eq "inc") { + if (&really_is_php($file_to_examine)) { + $type = "php"; # Hey, the .inc is PHP! + } elsif (&really_is_incpascal($file_to_examine)) { + $type = "pascal"; + } else { + $type = "unknown"; + } + }; + return $type; + } + + } + # If we were expecting a script, warn about that. + if ($warning_from_first_line) {print "$warning_from_first_line\n";} + # Don't know what it is, so report "unknown". + return "unknown"; +} + + + + +sub convert_h_files { + # Determine if the ".h" files we saw are C, OBJC, C++, or a mixture (!) + # Usually ".hpp" files are C++, but if we didn't see any C++ files then + # it probably isn't. This handles situations like pine; its has a file + # /usr/src/redhat/BUILD/pine4.21/pine/osdep/diskquot.hpp + # where the ".hpp" is for HP, not C++. (Of course, we completely miss + # the other files in that pine directory because they have truly bizarre + # extensions, but there's no easy way to handle such nonstandard things). + + if (!defined($lang_list_files{"h"})) { return; } + + my $saw_ansic = defined($lang_list_files{"ansic"}); + my $saw_cpp = defined($lang_list_files{"cpp"}); + my $saw_objc = defined($lang_list_files{"objc"}); + my $confidence; + + $lang_list_files{"h"}->close(); + + open(H_LIST, "<${dir}/h_list.dat") || die "Can't reopen h_list\n"; + + if ($saw_ansic && (!$saw_cpp) && (!$saw_objc)) { + # Only C, let's assume .h files are too + while (defined($_ = <H_LIST>)) { chomp; force_record_file_type($_, "c"); }; + } elsif ($saw_cpp && (!$saw_ansic) && (!$saw_objc)) { # Only C++ + while (defined($_ = <H_LIST>)) { chomp; force_record_file_type($_, "cpp"); }; + } elsif ($saw_objc && (!$saw_ansic) && (!$saw_cpp)) { # Only Obj-C + while (defined($_ = <H_LIST>)) { chomp; force_record_file_type($_, "objc"); }; + } else { + # Ugh, we have a mixture. Let's try to determine what we have, using + # various heuristics (looking for a matching name in the directory, + # reading the file contents, the contents in the directory, etc.) + # When all else fails, assume C. + while (defined($_=<H_LIST>)) { + chomp; + next if (!$_); + # print "DEBUG: H file $_\n"; + + $h_file = $_; + $cpp2_equivalent = + $cpp3_equivalent = $cpp4_equivalent = $objc_equivalent = $_; + $ansic_equivalent = $cpp_equivalent = $_; + $ansic_equivalent =~ s/h$/c/; + $cpp_equivalent =~ s/h$/C/; + $cpp2_equivalent =~ s/h$/cpp/; + $cpp3_equivalent =~ s/h$/cxx/; + $cpp4_equivalent =~ s/h$/cc/; + $objc_equivalent =~ s/h$/m/; + if (m!\.hpp$!) { force_record_file_type($h_file, "cpp"); } + elsif ( (-s $cpp2_equivalent) || + (-s $cpp3_equivalent) || (-s $cpp4_equivalent)) + { force_record_file_type($h_file, "cpp"); } + # Note: linuxconf has many ".m" files that match .h files, + # but the ".m" files are straight C and _NOT_ objective-C. + # The following test handles cases like this: + elsif ($saw_objc && (-s $objc_equivalent) && + &really_is_objc($objc_equivalent)) + { &force_record_file_type($h_file, "objc"); } + elsif (( -s $ansic_equivalent) && (! -s $cpp_equivalent)) + { force_record_file_type($h_file, "c"); } + elsif ((-s $cpp_equivalent) && (! -s $ansic_equivalent)) + { force_record_file_type($h_file, "cpp"); } + else { + $confidence = &looks_like_cpp($h_file); + if ($confidence == 2) + { &force_record_file_type($h_file, "cpp"); } + else { + $files_in_dir = &examine_dir($h_file); + if ($files_in_dir eq "cpp") + { &force_record_file_type($h_file, "cpp"); } + elsif ($files_in_dir eq "objc") + { &force_record_file_type($h_file, "objc"); } + elsif ($confidence == 1) + { &force_record_file_type($h_file, "cpp"); } + elsif ($h_file =~ m![a-z][0-9]*\.H$!) + # Mixed-case filename, .H extension. + { &force_record_file_type($h_file, "cpp"); } + else # We're clueless. Let's guess C. + { &force_record_file_type($h_file, "c"); }; + } + } + } + } # Done handling ".h" files. + close(H_LIST); +} + + +# MAIN PROGRAM STARTS HERE. + +# Handle options. +while (($#ARGV >= 0) && ($ARGV[0] =~ m/^--/)) { + $duplicates_okay = 1 if ($ARGV[0] =~ m/^--duplicates$/); # Count duplicates. + $crossdups_okay = 1 if ($ARGV[0] =~ m/^--crossdups$/); # Count crossdups. + $autogen_okay = 1 if ($ARGV[0] =~ m/^--autogen$/); # Count autogen. + $noisy = 1 if ($ARGV[0] =~ m/^--verbose$/); # Verbose output. + if ($ARGV[0] =~ m/^--duplistfile$/) { # File to get/record dups. + shift; + $duplistfile = $ARGV[0]; + } + last if ($ARGV[0] =~ m/^--$/); + shift; +} + +if ($#ARGV < 0) { + print "Error: No directory names given.\n"; + exit(1); +} + +if ($duplistfile) { + if (-e $duplistfile) { + open(DUPLIST, "<$duplistfile") || die "Can't open $duplistfile"; + while (defined($_ = <DUPLIST>)) { + chomp; + ($digest, $filename) = split(/ /, $_, 2); + if (defined($digest) && defined($filename)) { + $previous_files{$digest} = $filename; + } + } + close(DUPLIST); + } + open(DUPLIST, ">>$duplistfile") || die "Can't open for writing $duplistfile"; +} + + +while ( $dir = shift ) { + + if (! -d "$dir") { + print "Skipping non-directory $dir\n"; + next; + } + + if ($examined_directories{$dir}) { + print "Skipping already-examined directory $dir\n"; + next; + } + $examined_directories{$dir} = 1; + + if (! open(FILELIST, "<${dir}/filelist")) { + print "Skipping directory $dir; it doesn't contain a file 'filelist'\n"; + next; + } + + if (-r "${dir}/all-physical.sloc") { + # Skip already-analyzed directories; if it's been analyzed, we've already + # broken them down. + next; + } + + if ($crossdups_okay) { # Cross-dups okay; forget the hash of previous files. + %previous_files = (); + } + + # insert blank lines, in case we need to recover from a midway crash + if ($duplistfile) { + print DUPLIST "\n"; + } + + + $dup_count = 0; + + while (defined($_ = <FILELIST>)) { + chomp; + $file = $_; + next if (!defined($file) || ($file eq "")); + if ($file =~ m/\n/) { + print STDERR "WARNING! File name contains embedded newline; it'll be IGNORED.\n"; + print STDERR "Filename is: $file\n"; + next; + } + $file_type = &get_file_type($file); + if ($file_type) { + &record_file_type($file, $file_type); + } else { + print STDERR "WARNING! No file type selected for $file\n"; + } + } + + # Done with straightline processing. Now we need to determine if + # the ".h" files we saw are C, OBJC, C++, or a mixture (!) + &convert_h_files(); + + + # Done processing the directory. Close up shop so we're + # ready for the next directory. + + close(FILELIST); + close_lang_lists(); + reopen(""); # Close code file. + + if ($dup_count > 50) { + print "Warning: in $dir, number of duplicates=$dup_count\n"; + } + +} + + diff --git a/break_filelist.orig b/break_filelist.orig new file mode 100755 index 0000000..b34c702 --- /dev/null +++ b/break_filelist.orig @@ -0,0 +1,1084 @@ +#!/usr/bin/perl -w + +# break_filelist +# Take a list of dirs which contain a "filelist"; +# creates files in each directory identifying which are C, C++, Perl, etc. +# For example, "ansic.dat" lists all ANSI C files contained in filelist. +# Note: ".h" files are ambiguous (they could be C or C++); the program +# uses heuristics to determine this. +# The list of .h files is also contained in h_list.dat. + +# (C) Copyright 2000-2001 David A. Wheeler +# Part of "SLOCCount", and released under the GPL version 2; +# see the documentation for details. + +# If adding a new language: add the logic to open the file, +# close the file, and detect & write to the file listing that language. + +# Debatable decisions: +# Doesn't count .dsl files (stylesheets, which are partially LISP). +# Doesn't count .sql files (SQL queries & commands) + +# Note - I don't try to distinguish between TCL and [incr TCL] (itcl), +# an OO extended version of TCL. For our purposes, it's all TCL. + + +use FileHandle; + + +# Set default configuration: + +$duplicates_okay = 0; # Set to 1 if you want to count file duplicates. +$crossdups_okay = 0; # Set to 1 if duplicates okay in different filelists. +$autogen_okay = 0; # Set to 1 if you want to count autogen'ed files. +$noisy = 0; # Set to 1 if you want noisy reports. +%lang_list_files = (); + +# The following extensions are NOT code: +%not_code_extensions = ( + "html" => 1, + "in" => 1, # Debatable. + "xpm" => 1, + "po" => 1, + "am" => 1, # Debatable. + "1" => 1, # Man pages (documentation): + "2" => 1, + "3" => 1, + "4" => 1, + "5" => 1, + "6" => 1, + "7" => 1, + "8" => 1, + "9" => 1, + "n" => 1, + "gif" => 1, + "tfm" => 1, + "png" => 1, + "m4" => 1, # Debatable. + "bdf" => 1, + "sgml" => 1, + "mf" => 1, + "txt" => 1, + "man" => 1, + "xbm" => 1, + "Tag" => 1, + "sgm" => 1, + "vf" => 1, + "tex" => 1, + "elc" => 1, + "gz" => 1, + "dic" => 1, + "pfb" => 1, + "fig" => 1, + "afm" => 1, + "jpg" => 1, + "bmp" => 1, + "htm" => 1, + "kdelnk" => 1, + "desktop" => 1, + "pbm" => 1, + "pdf" => 1, + "ps" => 1, # Postscript is _USUALLY_ generated automatically. + "eps" => 1, + "doc" => 1, + "man" => 1, + "o" => 1, # Object code is generated from source code. + "a" => 1, # Static object code. + "so" => 1, # Dynamically-loaded object code. +); + +# The following filenames are NOT code: +%not_code_filenames = ( + "README" => 1, + "Readme" => 1, + "readme" => 1, + "README.tk" => 1, # used in kdemultimedia, it's confusing. + "Changelog" => 1, + "ChangeLog" => 1, + "Repository" => 1, + "CHANGES" => 1, + "Changes" => 1, + ".cvsignore" => 1, + "Root" => 1, # CVS. + "BUGS" => 1, + "TODO" => 1, + "COPYING" => 1, + "MAINTAINERS" => 1, + "Entries" => 1, + # Skip "iconfig.h" files; they're used in Imakefiles + # (used in xlockmore): + "iconfig.h" => 1, +); + + +# A filename ending in the following extensions usually maps to the +# given language: + +%file_extensions = ( + "c" => "ansic", + "ec" => "ansic", # Informix C. + "ecp" => "ansic", # Informix C. + "pgc" => "ansic", # Postgres embedded C/C++ (guess C) + "C" => "cpp", "cpp" => "cpp", "cxx" => "cpp", "cc" => "cpp", + "pcc" => "cpp", # Input to Oracle C++ preproc. + "m" => "objc", + "h" => "h", "H" => "h", "hpp" => "h", + "ada" => "ada", "adb" => "ada", "ads" => "ada", + "pad" => "ada", # Oracle Ada preprocessor. + "f" => "fortran", + "p" => "pascal", + "py" => "python", + "s" => "asm", "S" => "asm", "asm" => "asm", + "sh" => "sh", "bash" => "sh", + "csh" => "csh", "tcsh" => "csh", + "java" => "java", + "lisp" => "lisp", "el" => "lisp", "scm" => "lisp", "lsp" => "lisp", + "jl" => "lisp", + "tcl" => "tcl", "tk" => "tcl", "itk" => "tcl", + "exp" => "exp", + "pl" => "perl", "pm" => "perl", "perl" => "perl", + "awk" => "awk", + "sed" => "sed", + "y" => "yacc", + "l" => "lex", + "makefile" => "makefile", + "sql" => "sql", + "php" => "php", "php3" => "php", "php4" => "php", "php5" => "php", + "php6" => "php", + "inc" => "inc", # inc MAY be PHP - we'll handle it specially. + # ???: .pco is Oracle Cobol, need to add with a Cobol counter. +); + + +# GLOBAL VARIABLES + +$dup_count = 0; + +$warning_from_first_line = ""; + +%examined_directories = (); # Keys = Names of directories examined this run. + +$duplistfile = ""; + +########### + + +# Handle re-opening individual CODE_FILEs. +# CODE_FILE is public + +# Private value: +$opened_file_name = ""; + +sub reopen { + # Open file if it isn't already, else rewind. + # If filename is "", close any open file. + my $filename = shift; + chomp($filename); + # print("DEBUG: reopen($filename)\n"); + if ($filename eq "") { + if ($opened_file_name) {close(CODE_FILE);} + $opened_file_name = ""; + return; + } + if ($filename eq $opened_file_name) { + seek CODE_FILE, 0, 0; # Rewind. + } else { # We're opening a new file. + if ($opened_file_name) {close(CODE_FILE)} + open(CODE_FILE, "<$filename") || die "Can't open $filename"; + $opened_file_name = $filename; + } +} + +########### + +sub looks_like_cpp { + # returns a confidence level - does the file looks like it's C++? + my $filename = shift; + my $confidence = 0; + chomp($filename); + open( SUSPECT, "<$filename"); + while (<SUSPECT>) { + if (m/^\s*class\b.*\{/) { # "}" + close(SUSPECT); + return 2; + } + if (m/^\s*class\b/) { + $confidence = 1; + } + } + close(SUSPECT); + return $confidence; +} + + +# Cache which files are objective-C or not. +# Key is the full file pathname; value is 1 if objective-C (else 0). +%objective_c_files = (); + +sub really_is_objc { +# Given filename, returns TRUE if its contents really are objective-C. + my $filename = shift; + chomp($filename); + + my $is_objc = 0; # Value to determine. + my $brace_lines = 0; # Lines that begin/end with curly braces. + my $plus_minus = 0; # Lines that begin with + or -. + my $word_main = 0; # Did we find "main("? + my $special = 0; # Did we find a special Objective-C pattern? + + # Return cached result, if available: + if ($objective_c_files{$filename}) { return $objective_c_files{$filename};} + + open(OBJC_FILE, "<$filename") || + die "Can't open $filename to determine if it's objective C.\n"; + while(<OBJC_FILE>) { + + if (m/^\s*[{}]/ || m/[{}];?\s*$/) { $brace_lines++;} + if (m/^\s*[+-]/) {$plus_minus++;} + if (m/\bmain\s*\(/) {$word_main++;} # "main" followed by "("? + # Handle /usr/src/redhat/BUILD/egcs-1.1.2/gcc/objc/linking.m: + if (m/^\s*\[object name\];\s*$/i) {$special=1;} + } + close(OBJC_FILE); + + if (($brace_lines > 1) && (($plus_minus > 1) || $word_main || $special)) + {$is_objc = 1;} + + $objective_c_files{$filename} = $is_objc; # Store result in cache. + + return $is_objc; +} + + +# Cache which files are lex or not. +# Key is the full file pathname; value is 1 if lex (else 0). +%lex_files = (); + +sub really_is_lex { +# Given filename, returns TRUE if its contents really is lex. +# lex file must have "%%", "%{", and "%}". +# In theory, a lex file doesn't need "%{" and "%}", but in practice +# they all have them, and requiring them avoid mislabeling a +# non-lexfile as a lex file. + + my $filename = shift; + chomp($filename); + + my $is_lex = 0; # Value to determine. + my $percent_percent = 0; + my $percent_opencurly = 0; + my $percent_closecurly = 0; + + # Return cached result, if available: + if ($lex_files{$filename}) { return $lex_files{$filename};} + + open(LEX_FILE, "<$filename") || + die "Can't open $filename to determine if it's lex.\n"; + while(<LEX_FILE>) { + $percent_percent++ if (m/^\s*\%\%/); + $percent_opencurly++ if (m/^\s*\%\{/); + $percent_closecurly++ if (m/^\s*\%\}/); + } + close(LEX_FILE); + + if ($percent_percent && $percent_opencurly && $percent_closecurly) + {$is_lex = 1;} + + $lex_files{$filename} = $is_lex; # Store result in cache. + + return $is_lex; +} + + +# Cache which files are expect or not. +# Key is the full file pathname; value is 1 if it is (else 0). +%expect_files = (); + +sub really_is_expect { +# Given filename, returns TRUE if its contents really are Expect. +# Many "exp" files (such as in Apache and Mesa) are just "export" data, +# summarizing something else # (e.g., its interface). +# Sometimes (like in RPM) it's just misc. data. +# Thus, we need to look at the file to determine +# if it's really an "expect" file. + + my $filename = shift; + chomp($filename); + +# The heuristic is as follows: it's Expect _IF_ it: +# 1. has "load_lib" command and either "#" comments or {}. +# 2. {, }, and one of: proc, if, [...], expect + + my $is_expect = 0; # Value to determine. + + my $begin_brace = 0; # Lines that begin with curly braces. + my $end_brace = 0; # Lines that begin with curly braces. + my $load_lib = 0; # Lines with the Load_lib command. + my $found_proc = 0; + my $found_if = 0; + my $found_brackets = 0; + my $found_expect = 0; + my $found_pound = 0; + + # Return cached result, if available: + if ($expect_files{$filename}) { return expect_files{$filename};} + + open(EXPECT_FILE, "<$filename") || + die "Can't open $filename to determine if it's expect.\n"; + while(<EXPECT_FILE>) { + + if (m/#/) {$found_pound++; s/#.*//;} + if (m/^\s*\{/) { $begin_brace++;} + if (m/\{\s*$/) { $begin_brace++;} + if (m/^\s*\}/) { $end_brace++;} + if (m/\};?\s*$/) { $end_brace++;} + if (m/^\s*load_lib\s+\S/) { $load_lib++;} + if (m/^\s*proc\s/) { $found_proc++;} + if (m/^\s*if\s/) { $found_if++;} + if (m/\[.*\]/) { $found_brackets++;} + if (m/^\s*expect\s/) { $found_expect++;} + } + close(EXPECT_FILE); + + if ($load_lib && ($found_pound || ($begin_brace && $end_brace))) + {$is_expect = 1;} + if ( $begin_brace && $end_brace && + ($found_proc || $found_if || $found_brackets || $found_expect)) + {$is_expect = 1;} + + $expect_files{$filename} = $is_expect; # Store result in cache. + + return $is_expect; +} + + +# Cached values. +%pascal_files = (); + +sub really_is_pascal { +# Given filename, returns TRUE if its contents really are Pascal. + +# This isn't as obvious as it seems. +# Many ".p" files are Perl files +# (such as /usr/src/redhat/BUILD/ispell-3.1/dicts/czech/glob.p), +# others are C extractions +# (such as /usr/src/redhat/BUILD/linux/include/linux/umsdos_fs.p +# and some files in linuxconf). +# However, test files in "p2c" really are Pascal, for example. + +# Note that /usr/src/redhat/BUILD/ucd-snmp-4.1.1/ov/bitmaps/UCD.20.p +# is actually C code. The heuristics determine that they're not Pascal, +# but because it ends in ".p" it's not counted as C code either. +# I believe this is actually correct behavior, because frankly it +# looks like it's automatically generated (it's a bitmap expressed as code). +# Rather than guess otherwise, we don't include it in a list of +# source files. Let's face it, someone who creates C files ending in ".p" +# and expects them to be counted by default as C files in SLOCCount needs +# their head examined. I suggest examining their head +# with a sucker rod (see syslogd(8) for more on sucker rods). + +# This heuristic counts as Pascal such files such as: +# /usr/src/redhat/BUILD/teTeX-1.0/texk/web2c/tangleboot.p +# Which is hand-generated. We don't count woven documents now anyway, +# so this is justifiable. + + my $filename = shift; + chomp($filename); + +# The heuristic is as follows: it's Pascal _IF_ it has all of the following: +# 1. "^..program NAME(...);" or "..unit NAME". +# 2. "procedure", "function", "^..interface", or "^..implementation" +# 3. a "begin", and +# 4. it ends with "end." (ignoring {...} comments). +# The last requirement in particular filters out non-Pascal. + + + my $is_pascal = 0; # Value to determine. + + my $has_program = 0; + my $has_unit = 0; + my $has_procedure_or_function = 0; + my $found_begin = 0; + my $found_terminating_end = 0; + + # Return cached result, if available: + if ($pascal_files{$filename}) { return pascal_files{$filename};} + + open(PASCAL_FILE, "<$filename") || + die "Can't open $filename to determine if it's pascal.\n"; + while(<PASCAL_FILE>) { + if (m/\bprogram\s+[A-Za-z]/i) {$has_program=1;} + if (m/\bunit\s+[A-Za-z]/i) {$has_unit=1;} + if (m/\bprocedure\b/i) { $has_procedure_or_function = 1; } + if (m/\bfunction\b/i) { $has_procedure_or_function = 1; } + if (m/^\s*interface\s+/i) { $has_procedure_or_function = 1; } + if (m/^\s*implementation\s+/i) { $has_procedure_or_function = 1; } + if (m/\bbegin\b/i) { $has_begin = 1; } + s/\{.*?\}//g; # Ignore comments on this line; imperfect, but effective. + # This heuristic fails if there are multi-line comments after + # "end."; I haven't seen that in real Pascal programs: + if (m/end\.\s*$/i) {$found_terminating_end = 1;} + elsif (m/\S/) {$found_terminating_end = 0;} + } + close(PASCAL_FILE); + + # Okay, we've examined the entire file looking for clues; + # let's use those clues to determine if it's really Pascal: + + if ( ($has_unit || $has_program) && $has_procedure_or_function && + $has_begin && $found_terminating_end) + {$is_pascal = 1;} + + $pascal_files{$filename} = $is_pascal; # Store result in cache. + + return $is_pascal; +} + +# Cache which files are php or not. +# Key is the full file pathname; value is 1 if it is (else 0). +%php_files = (); + +sub really_is_php { +# Given filename, returns TRUE if its contents really is php. + + my $filename = shift; + chomp($filename); + + my $is_php = 0; # Value to determine. + # Need to find a matching pair of surrounds, with ending after beginning: + my $normal_surround = 0; # <?; bit 0 = <?, bit 1 = ?> + my $script_surround = 0; # <script..>; bit 0 = <script language="php"> + my $asp_surround = 0; # <%; bit 0 = <%, bit 1 = %> + + # Return cached result, if available: + if ($php_files{$filename}) { return $php_files{$filename};} + + open(PHP_FILE, "<$filename") || + die "Can't open $filename to determine if it's php.\n"; + while(<PHP_FILE>) { + if (m/\<\?/) { $normal_surround |= 1; } + if (m/\?\>/ && ($normal_surround & 1)) { $normal_surround |= 2; } + if (m/\<script.*language="?php"?/i) { $script_surround |= 1; } + if (m/\<\/script\>/i && ($script_surround & 1)) { $script_surround |= 2; } + if (m/\<\%/) { $asp_surround |= 1; } + if (m/\%\>/ && ($asp_surround & 1)) { $asp_surround |= 2; } + } + close(PHP_FILE); + + if ( ($normal_surround == 3) || ($script_surround == 3) || + ($asp_surround == 3)) { + $is_php = 1; + } + + $php_files{$filename} = $is_php; # Store result in cache. + + return $is_php; +} + + + +sub examine_dir { + # Given a file, determine if there are only C++, OBJC, C, or a mixture + # in the same directory. Returns "ansic", "cpp", "objc" or "mix" + my $filename = shift; + chomp($filename); + my $dirname = $filename; + $dirname =~ s/\/[^\/]*$//; + my $saw_ansic_in_dir = 0; + my $saw_pc_in_dir = 0; # ".pc" may mean Oracle C. + my $saw_pcc_in_dir = 0; # ".pc" may mean Oracle C++. + my $saw_cpp_in_dir = 0; + my $saw_objc_in_dir = 0; + opendir(DIR, $dirname) || die "can't opendir $dirname"; + while ($_ = readdir(DIR)) { + chomp; + next if (!$_); + if (m/\.(cpp|C|cxx|cc)$/ && -f "$dirname/$_") {$saw_cpp_in_dir = 1;} + if (m/\.c$/ && -f "$dirname/$_") {$saw_ansic_in_dir = 1;} + if (m/\.pc$/ && -f "$dirname/$_") {$saw_pc_in_dir = 1;} + if (m/\.pcc$/ && -f "$dirname/$_") {$saw_pcc_in_dir = 1;} + if (m/\.m$/ && -f "$dirname/$_" && &really_is_objc($dirname . "/" . $_)) + {$saw_objc_in_dir = 1;} + if (($saw_ansic_in_dir + $saw_cpp_in_dir + $saw_objc_in_dir) > 1) { + closedir(DIR); + return "mix"; + } + } + # Done searching; we saw at most one type. + if ($saw_ansic_in_dir) {return "c";} + elsif ($saw_cpp_in_dir) {return "cpp";} + elsif ($saw_objc_in_dir) {return "objc";} + elsif ($saw_pc_in_dir && (!$saw_pcc_in_dir)) {return "c";} # Guess "C". + elsif ($saw_pcc_in_dir && (!$saw_pc_in_dir)) {return "cpp";} # Guess "C". + else {return "mix";} # We didn't see anything... so let's say "mix". +} + +sub was_generated_automatically() { + # Determine if the file was generated automatically. + # Use a simple heuristic: check if first few lines have the + # phrase "generated automatically", or "automatically generated", + # or "do not edit" as the first + # words in the line (after possible comment markers and spaces). + my $filename = shift; + + if ($autogen_okay) {return 0;}; + + chomp($filename); + reopen($filename); + $i = 15; # Look at first 15 lines. + while (<CODE_FILE>) { + if (m/^[\s#\/\*;\-\%]*generated automatically/i || + m/^[\s#\/\*;\-\%]*automatically generated/i || + m/^[\s#\/\*;\-\%]*this is a generated file/i || # TeTex uses this. + m/^[\s#\/\*;\-\%]*generated with the.*utility/i || # TeTex uses this. + m/^[\s#\/\*;\-\%]*do not edit/i) { + return 1; + } + $i--; + last if $i <= 0; + } + return 0; +} + + +# Previous files added, indexed by digest: + +%previous_files = (); + +$cached_digest = ""; +$cached_digest_filename = ""; + +sub get_digest { + my $filename = shift; + # First, check the cache -- did we just compute this? + if ($filename eq $cached_digest_filename) { + return $cached_digest; # We did, so here's what it was. + } + + my $results = `md5sum "$filename"`; + chomp($results); + $results =~ s/^\s*//; # Not needed for GNU Textutils. + $results =~ s/[^a-fA-F0-9].*//; # Strip away end. + $cached_digest = $results; # Store in cache. + $cached_digest_filename = $filename; + return $results; +} + + +sub already_added { + # returns the first file's name with the same contents, + # else returns the empty string. + + my $filename = shift; + my $digest = &get_digest($filename); + + if ($previous_files{$digest}) { + return $previous_files{$digest}; + } else { + return ""; + } +} + +sub close_lang_lists { + my $lang; + my $file; + while (($lang, $file) = each(%lang_list_files)) { + $file->close(); # Ignore any errors on close, there's little we can do. + } + %lang_list_files = (); +} + +sub force_record_file_type { + my ($filename, $type) = @_; + + if (!$type) {die "ERROR! File $filename, type $file_type\n";} + if ($type eq "c") {$type = "ansic";}; + if (!defined($lang_list_files{$type})) { + $lang_list_files{$type} = new FileHandle("${dir}/${type}_list.dat", "w") || + die "Could not open ${dir}/${type}_list.dat"; + } + $lang_list_files{$type}->printf("%s\n", $filename); +} + + +sub record_file_type { + my ($filename, $type) = @_; + # First check if the file should be auto, dup, or zero - and add there + # if so. Otherwise, add to record of 'type'. + + my $first_filename; + + if (-z $filename) { + force_record_file_type($filename, "zero"); + return; + } + + if (&was_generated_automatically($filename)) { + force_record_file_type($filename, "auto"); + return; + } + + unless (($duplicates_okay) || ($type eq "not") || ($type eq "unknown")) { + $first_filename = &already_added($filename); + if ($first_filename) { + print "Note: $filename dups $first_filename\n" if $noisy; + force_record_file_type("$filename dups $first_filename", "dup"); + $dup_count++; + return; + } else { # This isn't a duplicate - record that info, as needed. + my $digest = &get_digest($filename); + $previous_files{$digest} = $filename; + if ($duplistfile) { + print DUPLIST "$digest $filename\n"; + } + } + } + + force_record_file_type($filename, $type); +} + + + +sub file_type_from_contents() { + # Determine if file type is a scripting language, and if so, return it. + # Returns its type as a string, or the empty string if it's undetermined. + my $filename = shift; + my $command; + chomp($filename); + reopen($filename); + # Don't do $firstline = <CODE_FILE> here because the file may be binary; + # instead, read in a fixed number of bytes: + read CODE_FILE, $firstline, 200; + return "" if (!$_); + chomp($firstline); + if (!$_) {return "";} + if (!$firstline) {return "";} + + # Handle weirdness: If there's a ".cpp" file beginning with .\" + # then it clearly isn't C/C++... it's a man page. People who create + # and distribute man pages with such filename extensions should have + # a fingernail removed, slowly :-). + if (($firstline =~ m@^[,.]\\"@) && + $filename =~ m@\.(c|cpp|C|cxx|cc)$@) {return "not";} + + + if (!($firstline =~ m@^#!@)) {return "";} # No script indicator here. + + # studying $firstline doesn't speed things up, unfortunately. + + # I once used a pattern that only acknowledged very specific directories, + # but I found that many test cases use unusual script locations + # (to ensure that they're invoking the correct program they're testing). + # Thus, we depend on the program being named with postfixed whitespace, + # and either begin named by itself or with a series of lowercase + # directories ending in "/". + + # I developed these patterns by starting with patterns that appeared + # correct, and then examined the output (esp. warning messages) to see + # what I'd missed. + + $command = ""; + if ($firstline =~ m@^#!\s*/(usr/)?bin/env\s+([a-zA-Z0-9\._]+)(\s|\Z)@) { + $command = $2; + } elsif ($firstline =~ m@^#!\s*([a-zA-Z0-9\/\.]+\/)?([a-zA-Z0-9\._]+)(\s|\Z)@) { + $command = $2; + } + + if ( ($command =~ m/^(bash|ksh|zsh|pdksh|sh)[0-9\.]*(\.exe)?$/) || + ($firstline =~ + m~^#!\s*\@_?(SCRIPT_)?(PATH_)?(BA|K)?SH(ELL)?(\d+)?\@?(\s|\Z)~)) { + # Note: wish(1) uses a funny trick; see wish(1) for more info. + # The following code detects this unusual wish convention. + if ($firstline =~ m@exec wish(\s|\Z)@) { + return "tcl"; # return the type for wish. + } + # Otherwise, it's shell. + return "sh"; + } + if ( ($command =~ m/^(t?csh\d*)[0-9\.]*(\.exe)?$/) || + ($firstline =~ m@^#!\s*xCSH_PATHx(\s|\Z)@)) { + return "csh"; + } + if ( ($command =~ m/^(mini)?perl[0-9\.]*(\.exe)?$/) || + ($firstline =~ m~^#!\s*\@_?(PATH_)?PERL\d*(PROG)?\@(\s|\Z)~) || + ($firstline =~ m~^#!\s*xPERL_PATHx(\s|\Z)~)) { + return "perl"; + } + if ($command =~ m/^python[0-9\.]*(\.exe)?$/) { + return "python"; + } + if ($command =~ m/^(tcl|tclsh|bltwish|wish|wishx|WISH)[0-9\.]*(\.exe)?$/) { + return "tcl"; + } + if ($command =~ m/^expectk?[0-9\.]*(\.exe)?$/) { return "exp"; } + if ($command =~ m/^[ng]?awk[0-9\.]*(\.exe)?$/) { return "awk"; } + if ($command =~ m/^sed$/) { return "sed"; } + if ($command =~ m/^guile[0-9\.]*$/) { return "lisp"; } + if ($firstline =~ m@^#!.*make\b@) { # We'll claim that #! make is a makefile. + return "makefile"; + } + if ($firstline =~ m@^#!\s*\.(\s|\Z)@) { # Lonely period. + return ""; # Ignore the first line, it's not helping. + } + if ($firstline =~ m@^#!\s*\Z@) { # Empty line. + return ""; # Ignore the first line, it's not helping. + } + if ($firstline =~ m@^#!\s*/dev/null@) { # /dev/null is the script?!? + return ""; # Ignore nonsense ("/dev/null"). + } + if ($firstline =~ m@^#!\s*/unix(\s|Z)@) { + return ""; # Ignore nonsense ("/unix"). + } + if (($filename =~ m@\.pl$@) || ($filename =~ m@\.pm$@)) { + return ""; # Don't warn about files that will be ID'd as perl files. + } + if (($filename =~ m@\.sh$@)) { + return ""; # Don't warn about files that will be ID'd as sh files. + } + if ($firstline =~ m@^#!\s*\S@) { + $firstline =~ s/\n.*//s; # Delete everything after first line. + $warning_from_first_line = "WARNING! File $filename has unknown start: $firstline"; + return ""; + } + return ""; +} + + +sub get_file_type { + my $file_to_examine = shift; + # Return the given file's type. + # It looks at the contents, then the filename, then file extension. + + $warning_from_first_line = ""; + + # Skip file names known to not be program files. + $basename = $file_to_examine; + $basename =~ s!^.*/!!; + if ($not_code_filenames{$basename}) { + print "Note: Skipping non-program filename: $file_to_examine\n" + if $noisy; + return "not"; + } + + # Skip "configure" files if there's a corresponding "configure.in" + # file; such a situation suggests that "configure" is automatically + # generated by "autoconf" from "configure.in". + if (($file_to_examine =~ m!/configure$!) && + (-s "${file_to_examine}.in")) { + print "Note: Auto-generated configure file $file_to_examine\n" + if $noisy; + return "auto"; + } + + if (($basename eq "lex.yy.c") || # Flex/Lex output! + ($basename eq "lex.yy.cc") || # Flex/Lex output - C++ scanner. + ($basename eq "y.code.c") || # yacc/bison output. + ($basename eq "y.tab.c") || # yacc output. + ($basename eq "y.tab.h")) { # yacc output. + print "Note: Auto-generated lex/yacc file $file_to_examine\n" + if $noisy; + return "auto"; + } + + # Bison is more flexible than yacc -- it can create arbitrary + # .c/.h files. If we have a .tab.[ch] file, with a corresponding + # .y file, then it's been automatically generated. + # Bison can actually save to any filename, and of course a Makefile + # can rename any file, but we can't help that. + if ($basename =~ m/\.tab\.[ch]$/) { + $possible_bison = $file_to_examine; + $possible_bison =~ s/\.tab\.[ch]$/\.y/; + if (-s "$possible_bison") { + print "Note: found bison-generated file $file_to_examine\n" + if $noisy; + return "auto"; + } + } + + # If there's a corresponding ".MASTER" file, treat this file + # as automatically-generated derivative. This handles "exmh". + if (-s "${file_to_examine}.MASTER") { + print "Note: Auto-generated non-.MASTER file $file_to_examine\n" + if $noisy; + return "auto"; + } + + # Peek at first line to determine type. Note that the file contents + # take precedence over the filename extension, because there are files + # (such as /usr/src/redhat/BUILD/teTeX-1.0/texmf/doc/mkhtml.nawk) + # which have one extension (say, ".nawk") but actually contain + # something else (at least in part): + $type = &file_type_from_contents($file_to_examine); + if ($type) { + return $type; + } + + # Use filename to determine if it's a makefile: + if (($file_to_examine =~ m/\bmakefile$/i) || + ($file_to_examine =~ m/\bmakefile\.txt$/i) || + ($file_to_examine =~ m/\bmakefile\.pc$/i)) { + return "makefile"; + } + + # Try to use filename extension to determine type: + if ($file_to_examine =~ m/\.([^.\/]+)$/) { + $type = $1; + + # Is this type known to NOT be a program? + if ($not_code_extensions{$type}) { + return "not"; + } + + # Handle weirdness: ".hpp" is a C/C++ header file, UNLESS it's + # makefile.hpp (a makefile); see /usr/src/redhat/BUILD, + # pine4.21/pine/makefile.hpp and pine4.21/pico/makefile.hpp + # Note that pine also includes pine4.21/pine/osdep/diskquot.hpp. + # Kaffe uses .hpp for C++ header files. + if (($type eq "hpp") && ($file_to_examine =~ m/makefile\.hpp$/)) + {return "makefile";} + + # If it's a C file but there's a ".pc" or ".pgc" file, then presume that + # it was automatically generated: + if ($type eq "c") { + $pc_name = $file_to_examine; + $pc_name =~ s/\.c$/\.pc/; + if (-s "$pc_name" ) { + print "Note: Auto-generated C file (from .pc file) $file_to_examine\n" + if $noisy; + return "auto"; + } + $pc_name = $file_to_examine; + $pc_name =~ s/\.c$/\.pgc/; + if (-s "$pc_name" ) { + print "Note: Auto-generated C file (from .pgc file) $file_to_examine\n" + if $noisy; + return "auto"; + } + } + + # ".pc" is the official extension for Oracle C programs with + # Embedded C commands, but many programs use ".pc" to indicate + # the "PC" (MS-DOS/Windows) version of a file. + # We'll use heuristics to detect when it's not really C, + # otherwise claim it's C and move on. + if ($type eq "pc") { # If it has one of these filenames, it's not C. + if ($file_to_examine =~ m/\bmakefile\.pc$/i) { return "makefile"; } + if (($file_to_examine =~ m/\bREADME\.pc$/i) || + ($file_to_examine =~ m/\binstall\.pc$/i) || + ($file_to_examine =~ m/\bchanges\.pc$/i)) {return "not";} + else { return "c";} + } + + if (defined($file_extensions{$type})) { + $type = $file_extensions{$type}; + if ( (($type eq "exp") && (!&really_is_expect($file_to_examine))) || + (($type eq "tk") && (!&really_is_expect($file_to_examine))) || + (($type eq "objc") && (!&really_is_objc($file_to_examine))) || + (($type eq "lex") && (!&really_is_lex($file_to_examine))) || + (($type eq "pascal") && (!&really_is_pascal($file_to_examine))) || + (($type eq "inc") && (!&really_is_php($file_to_examine)))) + {$type = "unknown";} + if ($type eq "inc") { $type = "php"; }; # Hey, the .inc is PHP! + return $type; + } + + } + # If we were expecting a script, warn about that. + if ($warning_from_first_line) {print "$warning_from_first_line\n";} + # Don't know what it is, so report "unknown". + return "unknown"; +} + + + + +sub convert_h_files { + # Determine if the ".h" files we saw are C, OBJC, C++, or a mixture (!) + # Usually ".hpp" files are C++, but if we didn't see any C++ files then + # it probably isn't. This handles situations like pine; its has a file + # /usr/src/redhat/BUILD/pine4.21/pine/osdep/diskquot.hpp + # where the ".hpp" is for HP, not C++. (Of course, we completely miss + # the other files in that pine directory because they have truly bizarre + # extensions, but there's no easy way to handle such nonstandard things). + + if (!defined($lang_list_files{"h"})) { return; } + + my $saw_ansic = defined($lang_list_files{"ansic"}); + my $saw_cpp = defined($lang_list_files{"cpp"}); + my $saw_objc = defined($lang_list_files{"objc"}); + my $confidence; + + $lang_list_files{"h"}->close(); + + open(H_LIST, "<${dir}/h_list.dat") || die "Can't reopen h_list\n"; + + if ($saw_ansic && (!$saw_cpp) && (!$saw_objc)) { + # Only C, let's assume .h files are too + while (<H_LIST>) { chomp; force_record_file_type($_, "c"); }; + } elsif ($saw_cpp && (!$saw_ansic) && (!$saw_objc)) { # Only C++ + while (<H_LIST>) { chomp; force_record_file_type($_, "cpp"); }; + } elsif ($saw_objc && (!$saw_ansic) && (!$saw_cpp)) { # Only Obj-C + while (<H_LIST>) { chomp; force_record_file_type($_, "objc"); }; + } else { + # Ugh, we have a mixture. Let's try to determine what we have, using + # various heuristics (looking for a matching name in the directory, + # reading the file contents, the contents in the directory, etc.) + # When all else fails, assume C. + while (<H_LIST>) { + chomp; + next if (!$_); + # print "DEBUG: H file $_\n"; + + $h_file = $_; + $cpp2_equivalent = + $cpp3_equivalent = $cpp4_equivalent = $objc_equivalent = $_; + $ansic_equivalent = $cpp_equivalent = $_; + $ansic_equivalent =~ s/h$/c/; + $cpp_equivalent =~ s/h$/C/; + $cpp2_equivalent =~ s/h$/cpp/; + $cpp3_equivalent =~ s/h$/cxx/; + $cpp4_equivalent =~ s/h$/cc/; + $objc_equivalent =~ s/h$/m/; + if (m!\.hpp$!) { force_record_file_type($h_file, "cpp"); } + elsif ( (-s $cpp2_equivalent) || + (-s $cpp3_equivalent) || (-s $cpp4_equivalent)) + { force_record_file_type($h_file, "cpp"); } + # Note: linuxconf has many ".m" files that match .h files, + # but the ".m" files are straight C and _NOT_ objective-C. + # The following test handles cases like this: + elsif ($saw_objc && (-s $objc_equivalent) && + &really_is_objc($objc_equivalent)) + { &force_record_file_type($h_file, "objc"); } + elsif (( -s $ansic_equivalent) && (! -s $cpp_equivalent)) + { force_record_file_type($h_file, "c"); } + elsif ((-s $cpp_equivalent) && (! -s $ansic_equivalent)) + { force_record_file_type($h_file, "cpp"); } + else { + $confidence = &looks_like_cpp($h_file); + if ($confidence == 2) + { &force_record_file_type($h_file, "cpp"); } + else { + $files_in_dir = &examine_dir($h_file); + if ($files_in_dir eq "cpp") + { &force_record_file_type($h_file, "cpp"); } + elsif ($files_in_dir eq "objc") + { &force_record_file_type($h_file, "objc"); } + elsif ($confidence == 1) + { &force_record_file_type($h_file, "cpp"); } + elsif ($h_file =~ m![a-z][0-9]*\.H$!) + # Mixed-case filename, .H extension. + { &force_record_file_type($h_file, "cpp"); } + else # We're clueless. Let's guess C. + { &force_record_file_type($h_file, "c"); }; + } + } + } + } # Done handling ".h" files. + close(H_LIST); +} + + +# MAIN PROGRAM STARTS HERE. + +# Handle options. +while (($#ARGV >= 0) && ($ARGV[0] =~ m/^--/)) { + $duplicates_okay = 1 if ($ARGV[0] =~ m/^--duplicates$/); # Count duplicates. + $crossdups_okay = 1 if ($ARGV[0] =~ m/^--crossdups$/); # Count crossdups. + $autogen_okay = 1 if ($ARGV[0] =~ m/^--autogen$/); # Count autogen. + $noisy = 1 if ($ARGV[0] =~ m/^--verbose$/); # Verbose output. + if ($ARGV[0] =~ m/^--duplistfile$/) { # File to get/record dups. + shift; + $duplistfile = $ARGV[0]; + } + last if ($ARGV[0] =~ m/^--$/); + shift; +} + +if ($#ARGV < 0) { + print "Error: No directory names given.\n"; + exit(1); +} + +if ($duplistfile) { + if (-e $duplistfile) { + open(DUPLIST, "<$duplistfile") || die "Can't open $duplistfile"; + while (<DUPLIST>) { + chomp; + ($digest, $filename) = split(/ /, $_, 2); + if (defined($digest) && defined($filename)) { + $previous_files{$digest} = $filename; + } + } + close(DUPLIST); + } + open(DUPLIST, ">>$duplistfile") || die "Can't open for writing $duplistfile"; +} + + +while ( $dir = shift ) { + + if (! -d "$dir") { + print "Skipping non-directory $dir\n"; + next; + } + + if ($examined_directories{$dir}) { + print "Skipping already-examined directory $dir\n"; + next; + } + $examined_directories{$dir} = 1; + + if (! open(FILELIST, "<${dir}/filelist")) { + print "Skipping directory $dir; it doesn't contain a file 'filelist'\n"; + next; + } + + if (-r "${dir}/all-physical.sloc") { + # Skip already-analyzed directories; if it's been analyzed, we've already + # broken them down. + next; + } + + if ($crossdups_okay) { # Cross-dups okay; forget the hash of previous files. + %previous_files = (); + } + + # insert blank lines, in case we need to recover from a midway crash + if ($duplistfile) { + print DUPLIST "\n"; + } + + + $dup_count = 0; + + while (<FILELIST>) { + chomp; + $file = $_; + next if (!defined($file) || ($file eq "")); + $file_type = &get_file_type($file); + if ($file_type) { + &record_file_type($file, $file_type); + } else { + print STDERR "WARNING! No file type selected for $file\n"; + } + } + + # Done with straightline processing. Now we need to determine if + # the ".h" files we saw are C, OBJC, C++, or a mixture (!) + &convert_h_files(); + + + # Done processing the directory. Close up shop so we're + # ready for the next directory. + + close(FILELIST); + close_lang_lists(); + reopen(""); # Close code file. + + if ($dup_count > 50) { + print "Warning: in $dir, number of duplicates=$dup_count\n"; + } + +} + + diff --git a/break_filelist.rej b/break_filelist.rej new file mode 100644 index 0000000..f053df2 --- /dev/null +++ b/break_filelist.rej @@ -0,0 +1,20 @@ +*************** +*** 965,972 **** + { &force_record_file_type($h_file, "objc"); } + elsif ($confidence == 1) + { &force_record_file_type($h_file, "cpp"); } +- elsif (m![a-z][0-9]*\.H$!) # Mixed-case filename, .H extension. +- { force_record_file_type($h_file, "cpp"); } + else # We're clueless. Let's guess C. + { &force_record_file_type($h_file, "c"); }; + } +--- 967,975 ---- + { &force_record_file_type($h_file, "objc"); } + elsif ($confidence == 1) + { &force_record_file_type($h_file, "cpp"); } ++ elsif ($h_file =~ m![a-z][0-9]*\.H$!) # Mixed-case filename, ++ #.H extension. ++ { &force_record_file_type($h_file, "cpp"); } + else # We're clueless. Let's guess C. + { &force_record_file_type($h_file, "c"); }; + } diff --git a/c_count.c b/c_count.c new file mode 100644 index 0000000..8581e55 --- /dev/null +++ b/c_count.c @@ -0,0 +1,225 @@ +/* c_count: given a list of C/C++/Java files on the command line, + count the SLOC in each one. SLOC = physical, non-comment lines. + This program knows about C++ and C comments (and how they interact), + and correctly ignores comment markers inside strings. + +This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC). +Copyright (C) 2001-2004 David A. Wheeler. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +To contact David A. Wheeler, see his website at: + http://www.dwheeler.com. + + Usage: Use in one of the following ways: + c_count # As filter + c_count [-f file] [list_of_files] + file: file with a list of files to count (if "-", read list from stdin) + list_of_files: list of files to count +*/ + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdlib.h> + +/* Modes */ +#define NORMAL 0 +#define INSTRING 1 +#define INCOMMENT 2 + +/* Types of comments: */ +#define ANSIC_STYLE 0 +#define CPP_STYLE 1 + +/* Not all C compilers support a boolean type, so for portability's sake, + we'll fake it. */ +#define BOOLEAN int +#define TRUE 1 +#define FALSE 0 + + +/* Globals */ +long total_sloc; + +static BOOLEAN warn_embedded_newlines = FALSE; + +int peek(FILE *stream) { + int c = getc(stream); + ungetc(c, stream); + return c; +} + +int ispeek(int c, FILE *stream) { + if (c == peek(stream)) {return 1;} + return 0; +} + +long line_number; + +int getachar(FILE *stream) { +/* Like getchar(), but keep track of line number. */ + static BOOLEAN last_char_was_newline = 0; + int c; + + c = getc(stream); + if (last_char_was_newline) line_number++; + if (c == '\n') last_char_was_newline=1; + else last_char_was_newline=0; + return c; +} + + +long sloc_count(char *filename, FILE *stream) { + /* Count the sloc in the program in stdin. */ + + long sloc = 0; + + int sawchar = 0; /* Did you see a character on this line? */ + int c; + int mode = NORMAL; /* NORMAL, INSTRING, or INCOMMENT */ + int comment_type = ANSIC_STYLE; /* ANSIC_STYLE or CPP_STYLE */ + + + /* The following implements a state machine with transitions; the + main state is "mode" and "comment_type", the transitions are + triggered by characters input. */ + + while ( (c = getachar(stream)) != EOF) { + if (mode == NORMAL) { + if (c == '"') {sawchar=1; mode = INSTRING;} + else if (c == '\'') { /* Consume single-character 'xxxx' values */ + sawchar=1; + c = getachar(stream); + if (c == '\\') c = getachar(stream); + do { + c = getachar(stream); + } while ((c != '\'') && (c != '\n') & (c != EOF)); + } else if ((c == '/') && ispeek('*', stream)) { + c = getachar(stream); + mode = INCOMMENT; + comment_type = ANSIC_STYLE; + } else if ((c == '/') && ispeek('/', stream)) { + c = getachar(stream); + mode = INCOMMENT; + comment_type = CPP_STYLE; + } else if (!isspace(c)) {sawchar = 1;} + } else if (mode == INSTRING) { + /* We only count string lines with non-whitespace -- this is to + gracefully handle syntactically invalid programs. + You could argue that multiline strings with whitespace are + still executable and should be counted. */ + if (!isspace(c)) sawchar = 1; + if (c == '"') {mode = NORMAL;} + else if ((c == '\\') && (ispeek('\"', stream) || ispeek('\\', stream))) {c = getachar(stream);} + else if ((c == '\\') && ispeek('\n', stream)) {c = getachar(stream);} + else if ((c == '\n') && warn_embedded_newlines) { + /* We found a bare newline in a string without preceding backslash. */ + fprintf(stderr, "c_count WARNING - newline in string, line %ld, file %s\n", line_number, filename); + /* We COULD warn & reset mode to "Normal", but lots of code does this, + so we'll just depend on the warning for ending the program + in a string to catch syntactically erroneous programs. */ + } + } else { /* INCOMMENT mode */ + if ((c == '\n') && (comment_type == CPP_STYLE)) { mode = NORMAL;} + if ((comment_type == ANSIC_STYLE) && (c == '*') && + ispeek('/', stream)) { c= getachar(stream); mode = NORMAL;} + } + if (c == '\n') { + if (sawchar) sloc++; + sawchar = 0; + } + } + /* We're done with the file. Handle EOF-without-EOL. */ + if (sawchar) sloc++; + sawchar = 0; + if ((mode == INCOMMENT) && (comment_type == CPP_STYLE)) { mode = NORMAL;} + + if (mode == INCOMMENT) { + fprintf(stderr, "c_count ERROR - terminated in comment in %s\n", filename); + } else if (mode == INSTRING) { + fprintf(stderr, "c_count ERROR - terminated in string in %s\n", filename); + } + + return sloc; +} + + +void count_file(char *filename) { + long sloc; + FILE *stream; + + stream = fopen(filename, "r"); + line_number = 1; + sloc = sloc_count(filename, stream); + total_sloc += sloc; + printf("%ld %s\n", sloc, filename); + fclose(stream); +} + +char *read_a_line(FILE *file) { + /* Read a line in, and return a malloc'ed buffer with the line contents. + Any newline at the end is stripped. + If there's nothing left to read, returns NULL. */ + + /* We'll create a monstrously long buffer to make life easy for us: */ + char buffer[10000]; + char *returnval; + char *newlinepos; + + returnval = fgets(buffer, sizeof(buffer), file); + if (returnval) { + newlinepos = buffer + strlen(buffer) - 1; + if (*newlinepos == '\n') {*newlinepos = '\0';}; + return strdup(buffer); + } else { + return NULL; + } +} + + +int main(int argc, char *argv[]) { + long sloc; + int i; + FILE *file_list; + char *s; + + total_sloc = 0; + line_number = 1; + + if (argc <= 1) { + sloc = sloc_count("-", stdin); + printf("%ld %s\n", sloc, "-"); + total_sloc += sloc; + } else if ((argc == 3) && (!strcmp(argv[1], "-f"))) { + if (!strcmp (argv[2], "-")) { + file_list = stdin; + } else { + file_list = fopen(argv[2], "r"); + } + if (file_list) { + while ((s = read_a_line(file_list))) { + count_file(s); + free(s); + } + } + } else { + for (i=1; i < argc; i++) { count_file(argv[i]); } + } + printf("Total:\n"); + printf("%ld\n", total_sloc); + return 0; /* Report success */ +} + diff --git a/c_lines_environment.dat b/c_lines_environment.dat new file mode 100644 index 0000000..e4a99a0 --- /dev/null +++ b/c_lines_environment.dat @@ -0,0 +1,98 @@ + Temporary Project Name (* Project_Name,in 45 spaces *)
+0 (* QA_Switch *)
+1 (* Compare_Spec *)
+999 (* Line_Length *)
+1000 (* Exec_Lines *)
+500 (* Data_Lines *)
+60.0 (* Min_Percent *)
+0.0 (* Inc_Percent *)
+0 (* Display_File *)
+0 (* Intro_Msg *)
+P (* SLOC_Def *)
+(*---------------------------------------------------------------------------*)
+(* *)
+(* Refer to the source code file, 'c_lines.c', for further information *)
+(* pertaining to the INSTALLATION PROCEDURES and EXECUTION PROCEDURES of *)
+(* this code counting tool. *)
+(* *)
+(* Note: *)
+(* 1. The above user-defined parameters must be spaced one entry per line *)
+(* of this file. Numeric entries, with the exception of 'Inc_Percent', *)
+(* are of type Integer. *)
+(* *)
+(* 2. The 'c_lines_environment.dat' file must be co-located in the *)
+(* directory/path whereas the code counting tool is to be invoked. *)
+(* Failure to do so will result in the insertion of predefined default *)
+(* values for the entries contained herein. *)
+(* *)
+(*---------------------------------------------------------------------------*)
+(* *)
+(* USER DEFINEABLE PARAMETERS *)
+(* *)
+(* Project_Name -- Allows the user to insert the name of the Program or *)
+(* Project that the source code to be counted pertains. *)
+(* The Project_Name will appear within at the headings of *)
+(* of the 'c_outfile.dat' file produced upon execution *)
+(* of the 'c_lines' code counting tool. *)
+(* *)
+(* QA_Switch -- Allows the user to turn on '1' or to turn off '0' the *)
+(* reporting of programming language reserve word usage *)
+(* as found in the summary page of 'c_outfile.dat'. *)
+(* *)
+(* Compare_Spec -- Allows the user to control the case sensitivity of the *)
+(* code counting tool. A setting of '1' indicates that *)
+(* full case sensitive comparisons must be made. A setting*)
+(* of '0' allows valid comparisons to occur between like *)
+(* letters of upper and lower case. *)
+(* *)
+(* Line_Length -- Allows user to force the code counting tool to ignore *)
+(* information beyond 'Line_Length' characters per physical*)
+(* line of input. It is recommended that the length of *)
+(* the longest physical line to be read be used, i.e. 132. *)
+(* *)
+(* Exec_Lines -- Allows the user to set a threshold whereby the number *)
+(* of files processed with executable lines in exceedance *)
+(* of 'Exec_Lines' will be reported on the summary page of *)
+(* 'c_outfile.dat'. *)
+(* *)
+(* Data_Lines -- Allows the user to set a threshold whereby the number *)
+(* of files processed with data declaration lines in *)
+(* exceedance of 'Data_Lines' will be reported on the *)
+(* summary page of 'c_outfile.dat'. *)
+(* *)
+(* Min_Percent -- Allows the user to set a threshold whereby the number *)
+(* of files processed with a ratio of comments (whole & *)
+(* embedded) to SLOC (physical or logical) is less than *)
+(* 'Min_Percent'. *)
+(* *)
+(* Inc_Percent -- Allows the user to set a progress increment whereby a *)
+(* progress message will appear on the terminal screen *)
+(* during execution of the 'c_lines' tool. The progress *)
+(* message indicates that approximately 'Inc_Percent' of *)
+(* source code files to be processed have completed since *)
+(* the previous progress message appeared. The progress *)
+(* reporting is based solely on the number of files *)
+(* contained in 'c_list.dat'. Actual run-time progress *)
+(* is dependent on the relative size of each source code *)
+(* file and the user loading of the host platform machine. *)
+(* A setting of 0.0 will disable the reporting of the *)
+(* progress message. *)
+(* *)
+(* Display_File -- Allows the user to turn on '1' or to turn off '0' the *)
+(* reporting of last file to be processed within the *)
+(* c_list.dat file. *)
+(* *)
+(* Intro_Msg -- Allows the user to turn on '1' or to turn off '0' the *)
+(* output of the introduction message as the first page *)
+(* of the 'c_outfile.dat' file. *)
+(* *)
+(* SLOC_Def -- Allows the user to select the defintion of a Source *)
+(* Line of Code (SLOC) to be used during the operation of *)
+(* the CodeCount tool. A setting of 'P' envokes the SLOC *)
+(* definition of Physical lines, a.k.a., non-comment, *)
+(* non-blank, physical lines of code or Deliverable Source *)
+(* Instructions (DSIs). A setting of 'L' envokes the SLOC *)
+(* definition of Logical lines, a.k.a., non-comment, *)
+(* non-blank, logical lines of code. *)
+(* *)
+(*---------------------------------------------------------------------------*)
diff --git a/c_outfile.dat b/c_outfile.dat new file mode 100644 index 0000000..7d60dbc --- /dev/null +++ b/c_outfile.dat @@ -0,0 +1 @@ +ERROR, unable to read c_list.dat file diff --git a/cobol_count b/cobol_count new file mode 100755 index 0000000..adda598 --- /dev/null +++ b/cobol_count @@ -0,0 +1,82 @@ +#!/usr/bin/perl +# cobol_count - count physical lines of code. +# Usage: cobol_count [-f file] [list_of_files] +# file: file with a list of files to count (if "-", read list from stdin) +# list_of_files: list of files to count +# -f file or list_of_files can be used, or both +# This is a trivial/naive program for scripts, etc. +# +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# + +$total_sloc = 0; + +# Do we have "-f" (read list of files from second argument)? +if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) { + # Yes, we have -f + if ($ARGV[1] eq "-") { + # The list of files is in STDIN + while (<STDIN>) { + chomp ($_); + &count_file ($_); + } + } else { + # The list of files is in the file $ARGV[1] + open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $filewithlist\n"; + while (<FILEWITHLIST>) { + chomp ($_); + &count_file ($_); + } + close FILEWITHLIST; + } + shift @ARGV; shift @ARGV; +} +# Process all (remaining) arguments as file names +while ($file = shift @ARGV) { + &count_file ($file); +} + +print "Total:\n"; +print "$total_sloc\n"; + +sub count_file { + my ($file) = @_; + my $sloc = 0; + my $free_format = 0; # Support "free format" source code. + + open (FILE, $file); + while (<FILE>) { + if (m/^......\$.*SET.*SOURCEFORMAT.*FREE/i) {$free_format = 1;} + if ($free_format) { + if (m/^\s*[\*\/]/) { } # Saw a comment. + elsif (m/\S/) {$sloc++;} # Saw a non-whitespace, it's SLOC. + } else { + if (m/^......[\*\/]/) {} # Saw a comment - marked in indicator area. + elsif (m/^[\*\/]/) {} # Saw a comment. + elsif (m/^........*\S/) {$sloc++;} # Saw a non-whitespace, it's SLOC. + } + } + print "$sloc $file\n"; + $total_sloc += $sloc; + close (FILE); +} diff --git a/compute_all b/compute_all new file mode 100755 index 0000000..20d5dcd --- /dev/null +++ b/compute_all @@ -0,0 +1,87 @@ +#!/bin/sh +# Computes filecounts and SLOC counts in the listed data directories +# if the don't already exist. +# +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# + +if [ "$#" -eq 0 ] +then + echo "Error: You must provide a list of directories to examine." + exit 1 +fi + +starting_dir=`pwd` + +for dir +do + if [ -d "$dir" -a -r "${dir}/filelist" ] + then + cd "$dir" + + if [ ! -r all.filecount ] + then + # Create all.filecount and all-physical.sloc; create them in + # separate files, so that we can safely restart if it stops in the middle: + > all.filecount.new + > all-physical.sloc.new + for listfile in *_list.dat + do + language=`echo $listfile | sed -e 's/_list\.dat$//'` + + # Skip language "*" - this happens if there are NO source + # files in the given directory. + if [ "$language" = "*" ]; then + continue + fi + + # Skip language "h" - it's counted in the ansic, cpp, and objc lists. + if [ "$language" = "h" ]; then + continue + fi + + numfiles=`wc -l < $listfile | tr -d " "` + echo "$language $numfiles" >> all.filecount.new + + # Ignore certain "languages" when counting SLOC: + case "$language" + in + not) true ;; + unknown) true ;; + zero) true ;; + dup) true ;; + auto) true ;; + *) + numsloc=`compute_sloc_lang $language "." | tr -d " "` + echo "$language $numsloc" >> all-physical.sloc.new + ;; + esac + done + mv all.filecount.new all.filecount + mv all-physical.sloc.new all-physical.sloc + fi + + cd "$starting_dir" + fi +done + diff --git a/compute_c_usc b/compute_c_usc new file mode 100755 index 0000000..96ec59c --- /dev/null +++ b/compute_c_usc @@ -0,0 +1,77 @@ +#!/bin/sh +# Computes C and C++ code sizes for the list of directories given; +# each directory must contain ansic_list.dat and cpp_list.dat. +# +# Change the following if it's the wrong place: + +C_DATA_ENV_FILE="/home/dwheeler/sloc/bin/c_lines_environment.dat" + +# Unfortunately, USC's code fails when c_list.dat is 0-length, +# so we work around it. + +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# + + +starting_dir=`pwd` + +for dir in $@ +do + if [ -d "$dir" -a -r "${dir}/filelist" ] + then + # ??? DEBUG: print out the name of each directory. + echo " $dir" + cd $dir + cp $C_DATA_ENV_FILE . + + if [ -s ansic_list.dat ] + then + rm -f c_list.dat + ln -s ansic_list.dat c_list.dat + c_lines + mv c_outfile.dat ansic_outfile.dat.usc + extract-count < ansic_outfile.dat.usc + mv logical.sloc ansic-logical.sloc + mv physical.sloc ansic-physical.sloc.usc + else + echo 0 > ansic-logical.sloc + echo 0 > ansic-physical.sloc.usc + fi + + if [ -s cpp_list.dat ] + then + rm -f c_list.dat + ln -s cpp_list.dat c_list.dat + c_lines + mv c_outfile.dat cpp_outfile.dat.usc + extract-count < cpp_outfile.dat.usc + mv logical.sloc cpp-logical.sloc + mv physical.sloc cpp-physical.sloc.usc + else + echo 0 > cpp-logical.sloc + echo 0 > cpp-physical.sloc.usc + fi + + cd $starting_dir + fi +done diff --git a/compute_java_usc b/compute_java_usc new file mode 100755 index 0000000..a0ffb25 --- /dev/null +++ b/compute_java_usc @@ -0,0 +1,59 @@ +#!/bin/sh +# Computes Java sloc in the listed directories. +# each directory must contain ansic_list.dat and cpp_list.dat. + +# Change the following if it's the wrong place: + +JAVA_DATA_ENV_FILE="/home/dwheeler/sloc/bin/java_lines_environment.dat" + +# Unfortunately, USC's code fails when c_list.dat is 0-length, +# so we work around it. + +# +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# + + +starting_dir=`pwd` + +for dir in $@ +do + if [ -d "$dir" -a -r "${dir}/filelist" ] + then + cd $dir + cp $JAVA_DATA_ENV_FILE . + + if [ -s java_list.dat ] + then + java_lines + extract-count < java_outfile.dat + mv logical.sloc java-logical.sloc + mv physical.sloc java-physical.sloc + else + echo 0 > java-logical.sloc + echo 0 > java-physical.sloc + fi + + cd $starting_dir + fi +done diff --git a/compute_sloc_lang b/compute_sloc_lang new file mode 100755 index 0000000..df635f7 --- /dev/null +++ b/compute_sloc_lang @@ -0,0 +1,66 @@ +#!/bin/sh +# Computes sloc in the listed directories. +# first parameter = language. +# Creates the "outfile", and prints the total. +# +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# + +language="$1" +shift + +starting_dir=`pwd` + +for dir in "$@" +do + if [ -d "$dir" -a -r "${dir}/filelist" ] + then + cd "$dir" + + if [ -s ${language}_list.dat ] + then + case "$language" in + ansic) use_c_count="y" ;; + cpp) use_c_count="y" ;; + java) use_c_count="y" ;; + yacc) use_c_count="y" ;; + cs) use_c_count="y" ;; + *) use_c_count="n" ;; + esac + + case "$use_c_count" in + y) c_count -f ${language}_list.dat > ${language}_outfile.dat ;; + *) ${language}_count -f ${language}_list.dat > ${language}_outfile.dat + ;; + esac + tail -1 < ${language}_outfile.dat + + else + rm -f ${language}_outfile.dat + echo 0 + fi + + cd "$starting_dir" + fi +done + diff --git a/count_extensions b/count_extensions new file mode 100755 index 0000000..1547d3f --- /dev/null +++ b/count_extensions @@ -0,0 +1,56 @@ +#!/usr/bin/perl -w + +# Read from standard input a list of filenames, and +# report a sorted list of extensions and filenames +# (most common ones first). + +# The format is "name count", where "count" is the number of appearances. +# "name" usually begins with a "." followed by the name of the extension. +# In the case where the filename has no extension, the name begins with "/" +# followed by the entire basename. + +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# + +%extensions = (); + +while (<>) { + if (m/\.([^.\/]+)$/) { + $type = $1; + chomp($type); + $type = ".$type"; + if (defined($extensions{$type})) { $extensions{$type}++; } + else { $extensions{$type} = 1; } + } elsif (m!/([^/]+)$!) { + $filename = $1; + chomp($filename); + $filename = "/$filename"; + if (defined($extensions{$filename})) { $extensions{$filename}++; } + else { $extensions{$filename} = 1; } + } +} + +foreach $entry (sort {$extensions{$b} <=> $extensions{$a}} keys %extensions) { + print "${entry} $extensions{$entry}\n"; +} + diff --git a/count_unknown_ext b/count_unknown_ext new file mode 100755 index 0000000..cf18647 --- /dev/null +++ b/count_unknown_ext @@ -0,0 +1,32 @@ +#!/bin/sh + +# This reports a sorted list of the "unknown" file extensions +# analyzed by a previous run of SLOCCount, most common first. +# Use this to make sure that there isn't a common language type +# that you are NOT counting. + +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. + + +find ${HOME}/.slocdata -name 'unknown_list.dat' -exec cat {} \; | \ + count_extensions | less + diff --git a/csh_count b/csh_count new file mode 100755 index 0000000..f892692 --- /dev/null +++ b/csh_count @@ -0,0 +1,27 @@ +#!/bin/sh +# +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# + +generic_count '#' $@ + diff --git a/dirmatch b/dirmatch new file mode 100755 index 0000000..abe8d49 --- /dev/null +++ b/dirmatch @@ -0,0 +1,37 @@ +#!/bin/sh + +# Dirmatch - take in standard input a list of directory name patterns, +# then print the matches of the directory names from each item in the list. + +# +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# + + +while read line +do + echo + echo "=== $line ===" + ls -1 | grep -i "$line" +done + diff --git a/driver.c b/driver.c new file mode 100644 index 0000000..1e5159e --- /dev/null +++ b/driver.c @@ -0,0 +1,110 @@ +/* driver: given a list of files on the command line, + count the SLOC in each one. + +This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC). +Copyright (C) 2001-2004 David A. Wheeler. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +To contact David A. Wheeler, see his website at: + http://www.dwheeler.com. + +*/ + +/* This is only included so that I can do some kinds of analysis + * separately on this file; normally this file is itself included: */ +#include "driver.h" + + + +void sloc_count(char *current_filename, FILE *stream) { + /* Count the sloc in the one file named "current_filename" in "stream", + * and add it to the total_sloc. */ + + filename = current_filename; + sloc = 0; + line_number = 1; + yyin = stream; + + yylex(); + + total_sloc += sloc; +} + + +void count_file(char *current_filename) { + FILE *stream; + + stream = fopen(current_filename, "r"); + if (!stream) { + sloc = 0; + fprintf(stderr, "Error: Cannot open %s\n", current_filename); + return; + } + sloc_count(current_filename, stream); + printf("%ld %s\n", sloc, current_filename); + fclose(stream); +} + +char *read_a_line(FILE *file) { + /* Read a line in, and return a malloc'ed buffer with the line contents. + Any newline at the end is stripped. + If there's nothing left to read, returns NULL. */ + + /* We'll create a monstrously long buffer to make life easy for us: */ + char buffer[10000]; + char *returnval; + char *newlinepos; + + returnval = fgets(buffer, sizeof(buffer), file); + if (returnval) { + newlinepos = buffer + strlen(buffer) - 1; + if (*newlinepos == '\n') {*newlinepos = '\0';}; + return strdup(buffer); + } else { + return NULL; + } +} + + +int main(int argc, char *argv[]) { + int i; + char *s; + FILE *file_list = NULL; + + total_sloc = 0; + + if (argc <= 1) { + sloc_count("-", stdin); + printf("%ld %s\n", sloc, "-"); + } else if ((argc == 3) && (!strcmp(argv[1], "-f"))) { + if (!strcmp (argv[2], "-")) { + file_list = stdin; + } else { + file_list = fopen(argv[2], "r"); + } + if (file_list) { + while ((s = read_a_line(file_list))) { + count_file(s); + free(s); + } + } + } else { + for (i=1; i < argc; i++) { count_file(argv[i]); } + } + printf("Total:\n"); + printf("%ld\n", total_sloc); + return 0; /* Report success */ +} diff --git a/driver.h b/driver.h new file mode 100644 index 0000000..ddeb331 --- /dev/null +++ b/driver.h @@ -0,0 +1,50 @@ +/* driver: given a list of files on the command line, + count the SLOC in each one. + +This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC). +Copyright (C) 2001-2004 David A. Wheeler. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +To contact David A. Wheeler, see his website at: + http://www.dwheeler.com. + +*/ + +#ifndef DRIVER_H +#define DRIVER_H + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdlib.h> + +/* Not all C compilers support a boolean type, so for portability's sake, + we'll fake it. */ +#define BOOLEAN int +#define TRUE 1 +#define FALSE 0 + + +/* Globals */ +unsigned long sloc; /* For current file */ +unsigned long line_number; /* Of current file */ +char *filename; /* Name of current file */ + +unsigned long total_sloc; /* For all files seen */ + + + +#endif diff --git a/exp_count b/exp_count new file mode 100755 index 0000000..f892692 --- /dev/null +++ b/exp_count @@ -0,0 +1,27 @@ +#!/bin/sh +# +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# + +generic_count '#' $@ + diff --git a/extract-count b/extract-count new file mode 100755 index 0000000..548b261 --- /dev/null +++ b/extract-count @@ -0,0 +1,83 @@ +#!/usr/bin/perl + +# Given USC output as standard input, find the # of physical and logical SLOC, and save in +# "physical.sloc" and "logical.sloc". + +# +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# + +$found =0; + +while (<>) { + if (m/^The Totals/) { + $found = 1; + last; + } +} + +if (!$found) { + die "FAILED to find the totals section in code output.\n"; +} + +while (<>) { + # DEBUG: print "Read line: $_\n"; + if (m/Physical/ || m/Logical/) { + s/^ *//; + ($total, $blank, $whole, $embedded, $compiler, $datadecl, $execinstruction, + $number_of_files, $sloc, $file_type, $sloc_definition ) = split(/[ \|]+/); + # DEBUG: print "Found match; file_type='${file_type}', sloc_definition='${sloc_definition}'\n"; + if ($file_type =~ m/code/i) { + if ($sloc_definition =~ m/Physical/i) { + `echo $sloc > physical.sloc` + } + if ($sloc_definition =~ m/Logical/i) { + `echo $sloc > logical.sloc` + } + } elsif ($file_type =~ m/DATA/i) { + if ($number_of_files > 0) { + print STDERR "WARNING! NONZERO NUMBER OF DATA FILES!\n"; + $pwd = `pwd`; + chomp($pwd); + print STDERR "Extract-count in directory ${pwd}.\n"; + # The mere existence of this file is reason to check it out: + `echo $number_of_files > data.count` + } + } + } +} + + +__END__ + +Here's a sample output (the beginning chopped off): + + Temporary Project Name +The Totals + Total Blank | Comments | Compiler Data Exec. | Number | File SLOC + Lines Lines | Whole Embedded | Direct. Decl. Instr. | of Files | SLOC Type Definition +------------------------------------------------------------------------------------------------------------------------------------ + 1938455 359776 | 146182 164828 | 0 12359 1420138 | 3172 | 1432497 CODE Physical + 1938455 359776 | 146182 164828 | 0 6507 613235 | 3172 | 619742 CODE Logical + 0 0 | 0 0 | 0 0 0 | 0 | 0 DATA Physical + diff --git a/extract_license b/extract_license new file mode 100755 index 0000000..bde556e --- /dev/null +++ b/extract_license @@ -0,0 +1,178 @@ +#!/usr/bin/perl +# extract_license +# Determine the license of a program, given 2 parameters: +# (1) the directory containing the program's source code. +# (2) the RPM spec file (which may be /dev/null) + +# This "regularizes" license names. For example, +# BSD-style, BSDish, and BSD-like all become "BSD-like". +# License names "sentence capitalization", e.g., "Freely distributable". +# It also fixes a lot of errors in Red Hat spec files. + +# +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# + +$program_dir = shift; +$rpm_spec = shift; + +$license = $copyright = ""; + +%all_licenses = (); + + +sub read_license_file() { + my $filename = shift(@_); + my $license = ""; + if ((-s $filename) && open(LICENSE_FILE, "<$filename")) { + # TODO: detect even more licenses automatically. + # It'd hard to detect BSD/MIT licenses, + # because these licenses make changes in the MIDDLE of their text. + # Thus, it's hard to avoid falsely detecting "almost" licenses. + # For example, ipf has license text that looks like a BSD/MIT license, + # but it's not even open source. + # However, we CAN detect many other kinds, so let's at least do that. + for ($i=1; $i < 9; $i++) { + $line = <LICENSE_FILE>; + if ($line =~ m/GNU GENERAL PUBLIC LICENSE/i) {$license = "GPL";} + elsif ($line =~ m/GNU LIBRARY GENERAL PUBLIC LICENSE/i) {$license = "LGPL";} + elsif ($line =~ m/GNU LESSER GENERAL PUBLIC LICENSE/i) {$license = "LGPL";} + elsif ($line =~ m/Mozilla PUBLIC LICENSE/i) {$license = "MPL";} + elsif ($line =~ m/Netscape PUBLIC LICENSE/i) {$license = "NPL";} + elsif ($line =~ m/IBM PUBLIC LICENSE/i) {$license = "IBM Public License";} + elsif ($line =~ m/\bApache Software License\b/i) {$license = "Apache";} + elsif ($line =~ m/\bThe "Artistic License"/i) {$license = "Artistic";} + } + close(LICENSE_FILE); + } + return $license; +} + +sub add_license() { + # Add to license list "all_licenses" the license in the given file, if one. + my $filename = shift(@_); + my $license = &read_license_file($filename); + if ($license) { $all_licenses{$license} = 1; } +} + +open(RPM_SPEC, "<$rpm_spec"); + +while (<RPM_SPEC>) { + if (/^Copyright:(.*)/i) {$copyright=$1;} + if (/^License:(.*)/i) {$license=$1;} +} +close(RPM_SPEC); + +if (! $license) { + $license = $copyright; +} + +# print "GOT: $license\n"; + +if ( $license ) { + $_ = $license; + + # Remove extraneous material in the middile of the license text. + s/ \(see: [^)]*\)//; # Delete parenthetical see: references. + s/, ?no warranties//; # "No warranties" not important for our purposes. + s/See COPYRIGHT file//i; + s/\b,?URW holds copyright\b//i; + + # Clean up front and back. + s/^\s*//; + s/[ \t\.]*$//; # Delete trailing periods and blanks. + + $_ = ucfirst($_); # Uppercase first character. Remove this line if need to. + + if (/^GPL2?$/i || /^GNU$/ || /^GNU ?GPL *(Version 2)?$/i) {$_ = "GPL"}; + if (/^Apache ?Group License$/i) {$_ = "Apache"}; + if (/^Apacheish$/i || /^Apache-style$/i) {$_ = "Apache-like"}; + if (/^Artistic$/i) {$_ = "Artistic"}; + if (/^BSD$/i) {$_ = "BSD"}; + if (/^BSDish$/i || /^BSD-style$/i || /^BSD-like$/) {$_ = "BSD-like"}; + if (/^Distributable$/i) {$_ = "Distributable"}; + if (/^Distributable ?\(BSD-like\)$/i) {$_ = "BSD-like"}; + if (/^Freely ?Distributable$/i) {$_ = "Freely distributable"}; + if (/^Free,no warranties.?$/i) {$_ = "Free"}; + if (/^freeware. See COPYRIGHT file.?$/i) {$_ = "Free"}; + if (/^freeware.?$/i) {$_ = "Free"}; + if (/^GPLand Artistic$/i) {$_ = "GPL and Artistic"}; + if (/^GPL ?or BSD$/i) {$_ = "GPL or BSD"}; + if (/^GPL\/XFree86$/i) {$_ = "GPL/MIT"}; + if (/^distributable- most of it GPL$/i) {$_ = "Distributable - mostly GPL"}; + if (/^IBM ?Public License Version 1.0 -/i) {$_ = "IBM Public License"}; + if (/^IBM ?Public License$/i) {$_ = "IBM Public License"}; + if (/^MIT, ?freely distributable/i) {$_ = "MIT"}; + if (/^MIT\/X ?Consortium$/i) {$_ = "MIT"}; + if (/^Non[- ]commercial[- ]use[- ]only$/i) {$_ = "Non-commercial use only"}; + if (/^Proprietary$/i) {$_ = "Proprietary"}; + if (/^Public ?domain$/i) {$_ = "Public domain"}; + if (/^Universityof Washington's Free-Fork License$/i) + {$_ = "U of Washington's Free-Fork License"}; + if (/^W3CCopyright \(BSD like\)$/i) {$_ = "BSD-like"}; + if (/^X ?Consortium[ -]?like$/i) {$_ = "MIT-like"}; + if (/^XFree86$/i) {$_ = "MIT"}; + if (/^W3C Copyright \(BSD[- ]like\)$/i) {$_ = "BSD-like"}; + + # Eliminate license if it isn't really a license. + if (/^2000Red Hat, Inc.?$/i) {$_ = ""}; + if (/^OMRON ?Corporation, OMRON Software Co., Ltd.?$/i) {$_ = ""}; + if (/^Copyright\s?.?\s?[1-9][0-9][0-9][0-9]/i) {$_ = ""}; # Not a license. + if (/^\(C\)\s?[1-9][0-9][0-9][0-9]/i) {$_ = ""}; # Not a license. + if (/^[1-9][0-9][0-9][0-9]\s/i) {$_ = ""}; # A date, not a license. + + $license = $_; +} + +if ($license) { + print $license; +} else { + # The spec file didn't tell us anything. Let's look for files that tell us. + &add_license("${program_dir}/LICENSE"); + &add_license("${program_dir}/COPYING"); + &add_license("${program_dir}/COPYING.LIB"); + &add_license("${program_dir}/Artistic"); + &add_license("${program_dir}/COPYING-2.0"); + &add_license("${program_dir}/COPYING.WTFPL"); + &add_license("${program_dir}/COPYING.GPL"); + &add_license("${program_dir}/COPYING.NEWLIB"); + &add_license("${program_dir}/COPYING.kdb"); + if (-s "${program_dir}/COPYING.BSD") { # Assume there's a BSD license. + $all_licenses{"BSD"} = 1; + } + if (-s "${program_dir}/COPYING.MIT") { # Assume there's an MIT license. + $all_licenses{"MIT"} = 1; + } + + if (%all_licenses) { + $license = ""; + foreach $license_fragment (sort(keys(%all_licenses))) { + $license .= "${license_fragment}, " + } + $license =~ s/, $//; + print $license; + } + +} +print "\n"; + diff --git a/f90_count b/f90_count new file mode 100755 index 0000000..e618493 --- /dev/null +++ b/f90_count @@ -0,0 +1,81 @@ +#!/usr/bin/perl +# f90_count - count physical lines of code in Fortran 90. +# Usage: f90_count [-f file] [list_of_files] +# file: file with a list of files to count (if "-", read list from stdin) +# list_of_files: list of files to count +# -f file or list_of_files can be used, or both + +# Ignores comment-only lines (where first nonblank character = !). +# Lines beginning with !hpf$ or !omp$ are not comments lines. + +# +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# + +$total_sloc = 0; + +# Do we have "-f" (read list of files from second argument)? +if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) { + # Yes, we have -f + if ($ARGV[1] eq "-") { + # The list of files is in STDIN + while (<STDIN>) { + chomp ($_); + &count_file ($_); + } + } else { + # The list of files is in the file $ARGV[1] + open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $ARGV[1]\n"; + while (<FILEWITHLIST>) { + chomp ($_); + &count_file ($_); + } + close FILEWITHLIST; + } + shift @ARGV; shift @ARGV; +} +# Process all (remaining) arguments as file names +while ($file = shift @ARGV) { + &count_file ($file); +} + +print "Total:\n"; +print "$total_sloc\n"; + +sub count_file { + my ($file) = @_; + my $sloc = 0; + + open (FILE, $file); + while (<FILE>) { + # a comment is m/^\s*!/ + # an empty line is m/^\s*$/ + # a HPF statement is m/^\s*!hpf\$/i + # an Open MP statement is m/^\s*!omp\$/i + if (! m/^(\s*!|\s*$)/ || m/^\s*!(hpf|omp)\$/i) {$sloc++;} + } + print "$sloc $file\n"; + $total_sloc += $sloc; + $sloc = 0; + close (FILE); +} diff --git a/fortran_count b/fortran_count new file mode 100755 index 0000000..4df1f32 --- /dev/null +++ b/fortran_count @@ -0,0 +1,83 @@ +#!/usr/bin/perl +# fortran_count - count physical lines of code in Fortran 77. +# Usage: fortran_count [-f file] [list_of_files] +# file: file with a list of files to count (if "-", read list from stdin) +# list_of_files: list of files to count +# -f file or list_of_files can be used, or both + +# Ignores comment-only lines +# (where column 1 character = C, c, *, or !, +# or where a ! is preceded only by white space) +# Lines beginning with !hpf$ or !omp$ are not comments lines either. + +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# + +$total_sloc = 0; + +# Do we have "-f" (read list of files from second argument)? +if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) { + # Yes, we have -f + if ($ARGV[1] eq "-") { + # The list of files is in STDIN + while (<STDIN>) { + chomp ($_); + &count_file ($_); + } + } else { + # The list of files is in the file $ARGV[1] + open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $ARGV[1]\n"; + while (<FILEWITHLIST>) { + chomp ($_); + &count_file ($_); + } + close FILEWITHLIST; + } + shift @ARGV; shift @ARGV; +} +# Process all (remaining) arguments as file names +while ($file = shift @ARGV) { + &count_file ($file); +} + +print "Total:\n"; +print "$total_sloc\n"; + +sub count_file { + my ($file) = @_; + my $sloc = 0; + + open (FILE, $file); + while (<FILE>) { + # a normal comment is m/^[c*!]/i + # a fancier comment is m/^\s+!/i + # an empty line is m/^\s*$/i + # a HPF statement is m/^[c*!]hpf\$/i + # an Open MP statement is m/^[c*!]omp\$/i + if (! m/^([c*!]|\s+!|\s*$)/i || m/^[c*!](hpf|omp)\$/i) {$sloc++;} + } + print "$sloc $file\n"; + $total_sloc += $sloc; + $sloc = 0; + close (FILE); +} diff --git a/generic_count b/generic_count new file mode 100755 index 0000000..e4178eb --- /dev/null +++ b/generic_count @@ -0,0 +1,77 @@ +#!/usr/bin/perl +# generic_count - count physical lines of code, given a comment marker. +# Usage: generic_count commentstart [-f file] [list_of_files] +# commentstart: string that begins a comment (continuing til end-of-line) +# file: file with a list of files to count (if "-", read list from stdin) +# list_of_files: list of files to count +# -f file or list_of_files can be used, or both +# This is a trivial/naive program for scripts, etc. + + +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# + +$commentstart = shift @ARGV; +$total_sloc = 0; + +# Do we have "-f" (read list of files from second argument)? +if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) { + # Yes, we have -f + if ($ARGV[1] eq "-") { + # The list of files is in STDIN + while (<STDIN>) { + chomp ($_); + &count_file ($_); + } + } else { + # The list of files is in the file $ARGV[1] + open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $filewithlist\n"; + while (<FILEWITHLIST>) { + chomp ($_); + &count_file ($_); + } + close FILEWITHLIST; + } + shift @ARGV; shift @ARGV; +} +# Process all (remaining) arguments as file names +while ($file = shift @ARGV) { + &count_file ($file); +} + +print "Total:\n"; +print "$total_sloc\n"; + +sub count_file { + my ($file) = @_; + my $sloc = 0; + + open (FILE, $file); + while (<FILE>) { + s/${commentstart}.*//; + if (m/\S/) {$sloc++;} + } + print "$sloc $file\n"; + $total_sloc += $sloc; + close (FILE); +} diff --git a/get_sloc b/get_sloc new file mode 100755 index 0000000..f590a8e --- /dev/null +++ b/get_sloc @@ -0,0 +1,544 @@ +#!/usr/bin/perl -w + +# get_sloc +# Take a list of dirs, and get the SLOC or filecount data from them. +# NOTE: The intended input data ignores zero-length files & ignores dups, +# so if that's true for the input data, it'll be true for the output data! + +# This code works but is NOT cleaned up-- it basically grew like +# topsy. Many of the variable names are misleading, as my needs for +# output changed. + +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. + + + +# Default values for the effort estimation model; the model is +# effort = ($effort_factor * KiloSLOC) ** $effort_exponent. +# The following numbers are for basic COCOMO: + +$effort_factor = 2.40; +$effort_exponent = 1.05; +$effort_estimation_message = "Basic COCOMO model,"; + +$schedule_factor = 2.5; +$schedule_exponent = 0.38; +$schedule_estimation_message = "Basic COCOMO model,"; + +# Average Salary / year. +# Source: ComputerWorld, Sep. 4, 2000 Salary Survey, +# average (U.S.) programmer/analyst salary. + +$person_cost = 56286.; + +# Overhead; the person cost is multiplied by this value to determine +# true annual costs. + +$overhead = 2.4; + +@license_list = ( "GPL", "LGPL", "MIT", "BSD", "distributable", + "public domain", "MPL"); + +%license_of = (); # input is name of program, output is license. + +$no_license_total = 0; + +%non_language_list = ( + "dup" => 1, + "not" => 1, + "unknown" => 1, + "auto" => 1, + "zero" => 1, +); + +%ignore_language_list = ( + "makefile" => 1, + "sql" => 1, + "html" => 1, +); + +# Default input values +$dirs_in_stdin = 0; # 0: dirs to analyze as arguments, 1: in stdin + +# Default Output Values: + +$computing_sloc = 1; # 0= showing filecounts, 1= showing SLOC. +$narrow = 1; +$sort_by = "total"; # If empty, sort by name; else "total" or lang name. +$show_effort = 0; # Show effort for each component? +$break_line = 1; # Break up long lines into multiple lines? +$show_non_lang = 0; # Show non-language counts? +$one_program = 0; # Are all files part of a single program? +$show_header = 1; # Show header? +$show_footer = 1; # Show footer? + + +# Global variables: + +@dirs = (); # Directories to examine + +%examined_directories = (); # Keys = Names of directories examined this run. + +# Subroutines. + +sub commify { +# TODO: Needs to be internationalized. + my $text = reverse $_[0]; + $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g; + return scalar reverse $text; +} + +sub numformat { +# Format number nicely with commas. + my $num = shift; + my $digits = shift; + return commify(sprintf("%0.${digits}f", $num)); +} + +sub effort_person_months { + # Given the SLOC, reply an estimate of the number of person-months + # needed to develop it traditionally. + my $total_sloc = shift; + return ( ($effort_factor*(($total_sloc/1000.0)**$effort_exponent))); +} + +sub estimate_schedule { + # Given the person-months, reply an estimate of the number of months + # needed to develop it traditionally. + my $person_months = shift; + return ($schedule_factor*($person_months**$schedule_exponent)); +} + +sub get_lang_total { + my $lang = shift; + if (defined($lang_total{$lang})) {return $lang_total{$lang}} + else {return 0;} +} + +# MAIN PROGRAM + + +# Process options (if any): + +if ($#ARGV < 0) { + print STDERR "Error! You must list at least one directory to process, or --stdin.\n"; + exit(1); +} + +while ((scalar (@ARGV) > 0) && ($ARGV[0] =~ m/^-/)) { + $arg = shift; + if ($arg eq "--") {last;} + elsif ($arg eq "--filecount") {$computing_sloc = 0;} + elsif ($arg eq "--filecounts") {$computing_sloc = 0;} + elsif ($arg eq "--sloc") {$computing_sloc = 1;} + elsif ($arg eq "--narrow") {$narrow = 1;} + elsif ($arg eq "--wide") {$narrow = 0;} + elsif ($arg eq "--break") {$break_line = 1;} + elsif ($arg eq "--nobreak") {$break_line = 0;} + elsif ($arg eq "--sort") {$sort_by = shift;} # Must be "total" or a lang. + elsif ($arg eq "--nosort") {$sort_by = "";} + elsif ($arg eq "--showother") {$show_non_lang = 1;} + elsif ($arg eq "--noshowother") {$show_non_lang = 0;} + elsif ($arg eq "--oneprogram") {$one_program = 1;} + elsif ($arg eq "--noheader") {$show_header = 0;} + elsif ($arg eq "--nofooter") {$show_footer = 0;} + elsif ($arg eq "--addlang") { $lang = shift; + if (!defined($ignore_language_list{$lang})) { + die "Sorry, but $lang isn't ignored"; }; + delete $ignore_language_list{$lang}; } + elsif ($arg eq "--addlangall") { %ignore_language_list = (); } + elsif ($arg eq "--effort") {$effort_factor = (shift)*1.0; + $effort_exponent = (shift)*1.0; + $effort_estimation_message = "effort model"} + elsif ($arg eq "--schedule") {$schedule_factor = (shift)*1.0; + $schedule_exponent = (shift)*1.0; + $schedule_estimation_message = "schedule model"} + elsif ($arg eq "--personcost") {$person_cost = (shift)*1.0;} + elsif ($arg eq "--overhead") {$overhead = (shift)*1.0;} + elsif ($arg eq "--stdin") {$dirs_in_stdin = 1;} + else {die "Unknown option: $arg\n";} +} + + +# Determine the languages to show: + + +if ($computing_sloc) { $show_non_lang = 0; } + +if (!$show_non_lang) { + # Add the non_language_list to the ignored languages. + foreach $langname (keys(%non_language_list)) + {$ignore_language_list{$langname} = 1;} +} + + +%lang_total = (); +%license_total = (); + +@data_lines = (); + +$sloc = 0; +$total_sloc = 0; +$total_lang_sloc = 0; +$grand_total_sloc = 0; +$grand_total_lang_sloc = 0; +$effort = 0.0; +$grand_total_effort = 0.0; +$grand_schedule = 0.0; + +if (!$narrow) { + # Ouch! To accurately determine the column positions and names, + # without "pre-knowing" them, we need to look through the data. + # So, we'll do it twice. This isn't efficient - if needed, + # speed it up by rewriting this to do it in-memory. + while (defined($_ = <DATAFILE>)) { + ($lang, $sloc) = split; + next if ( (!defined($lang)) || (!defined($sloc)) ); + next if ($ignore_language_list{$lang}); + $lang_total{$lang} = 0; + } +} + + +# Print the header. +if ($show_header) { +if ($narrow) { + if ($computing_sloc) { print "SLOC\t"; } + else { print "#Files\t"; } + if ($show_effort) {print "P.Y.\t";} + print "Directory\t"; + if ($computing_sloc) { print "SLOC-by-Language (Sorted)"; } + else { print "#Files-by-Language (Sorted)"; } + print "\n"; +} else { + if ($computing_sloc) { print "SLOC\t"; } + else { print "#Files\t"; } + if ($show_effort) {print "P.M.\t";} + printf "%-22s\t", "Dir"; + foreach $lang (keys(%lang_total)) { + print "$lang\t"; + $lang_total{$lang} = 0; + }; + print "\n"; +} +} + +if ($dirs_in_stdin == 1) { + while (defined($dir = <STDIN>)) { + chomp ($dir); + push (@dirs, $dir); + } +} + +while ($dir = shift) { + push (@dirs, $dir); +} + + +foreach $dir (@dirs) { + if (! -d "$dir") { + # print "Skipping non-directory $dir\n"; + next; + } + + # Skip previously-examined directories. + if ($examined_directories{$dir}) { + # print "Skipping already-examined directory $dir\n"; + next; + } + $examined_directories{$dir} = 1; + + if (! -r "${dir}/filelist") { + # print "Skipping directory $dir; it doesn't contain a file 'filelist'\n"; + next; + } + + + $simplename = $dir; + $simplename =~ s!^.*\/!!; + $total_sloc = 0; + $total_lang_sloc = 0; + $preceding_entry = 0; + + $line = ""; + %lang_data = (); + + if ($computing_sloc) { + $filename = "${dir}/all-physical.sloc"; + } else { + $filename = "${dir}/all.filecount"; + } + if (open(DATAFILE, "<$filename")) { + while (defined($_ = <DATAFILE>)) { + ($lang, $sloc) = split; + next if ( (!defined($lang)) || (!defined($sloc)) ); + next if ($ignore_language_list{$lang}); + if ($narrow) { if ($sloc) {$lang_data{$lang} = $sloc;}} + else { $line .= "${sloc}\t"; } + if ($lang eq $sort_by) {$interesting_lang_sloc = $sloc;} + $total_sloc += $sloc; + $total_lang_sloc += $sloc unless ($non_language_list{$lang}); + $lang_total{$lang} += $sloc; + } + close(DATAFILE); + } else { + print STDERR "Error openinig $filename\n"; + } + if ($narrow) { + # For narrow view, sort the language entries. + foreach $entry (sort {$lang_data{$b} <=> $lang_data{$a}} keys %lang_data){ + if ($preceding_entry) {$line .= ",";} + $preceding_entry = 1; + $line .= "${entry}=${lang_data{$entry}}"; + } + if (!$preceding_entry) {$line .= "(none)";} + } + + $grand_total_sloc += $total_sloc; + $grand_total_lang_sloc += $total_lang_sloc; + + $effort = effort_person_months($total_sloc); + $grand_total_effort += $effort; + + $schedule = estimate_schedule($effort); + if ($schedule > $grand_schedule) { + $grand_schedule = $schedule; # The longest leg wins. + } + + $displayed_effort = ""; + if ($show_effort) { $displayed_effort = sprintf "%.2f\t", $effort; } + if ($narrow) { + $displayed_name = "$simplename"; + } else { + $displayed_name = sprintf "%-22s\t", $simplename; + } + + # Add to the corresponding license, if the license is known. + $license = ""; + if (open(LICENSE_FILE, "<${dir}/PROGRAM_LICENSE")) { + $license = <LICENSE_FILE>; + chomp($license); + close(LICENSE_FILE); + if ($license) { + $license_of{$simplename} = $license; # Hash currently unused. + if (! defined($license_total{$license})) { + $license_total{$license} = 0; + } + $license_total{$license} = $license_total{$license} + $total_sloc; + } + } else { + $no_license_total += $total_sloc; + } + + if ($narrow) { + $line = sprintf "%-7d %s%-15s %-s\n", $total_sloc, $displayed_effort, + $simplename, $line; + if ($break_line && (length($line) > 77)) { # Break up long line. + $line =~ s/(.{71})([^,]*),(.*)/$1$2,\n $3/; + } + if ($license) { + $line .= " [$license]\n"; + } + } else { + $line = "${total_sloc}\t${displayed_effort}${displayed_name}${line}\n"; + } + if ($sort_by) { + if ($sort_by eq "total") {$line = "$total_sloc\t$line";} + else {$line = "$interesting_lang_sloc\t$line";} + $data_lines[$#data_lines+1] = $line; # Add to data lines. + } else { + print $line; # No sort - print immediately for speed. + } + +} + +if ($sort_by) { + # Print sorted version. This is a little inefficient, but for + # only a few hundred or thousand values it doesn't matter. + @sorted_data_lines = sort { ($b =~ /^(\d+)/)[0] <=> ($a =~ /^(\d+)/)[0] } + @data_lines; + foreach $line (@sorted_data_lines) { + $short_line = $line; + $short_line =~ s/^[^\t]*\t//; # Remove sort field. + print $short_line; + } +} + + +if (! $show_footer) {exit(0);} +if ($grand_total_sloc == 0) { + print "SLOC total is zero, no further analysis performed.\n"; + exit(1); +} + +# Print the footer. +if ($narrow) { + print "\n"; + print "\n"; + print "Totals grouped by language (dominant language first):\n"; + # If you don't want the list sorted by size of language, just do: + # foreach $lang (@language_list) { + foreach $lang (sort {&get_lang_total($b) <=> &get_lang_total($a) } keys(%lang_total) ) { + $percent = get_lang_total($lang) * 100.0 / $grand_total_sloc; + if ($percent > 0.0) { + printf "%-9s %9d (%.2f%%)\n", $lang . ":", $lang_total{$lang}, $percent; + } + }; + + if ($show_non_lang) { + # The previous list showed "non-languages", so now we'll show only the + # data for data associated with a normal language: + print "\n"; + print "\n"; + foreach $lang (sort {&get_lang_total($b) <=> &get_lang_total($a) } keys(%lang_total)) { + next if (defined($non_language_list{$lang})); + $percent = $lang_total{$lang} * 100.0 / $grand_total_lang_sloc; + if ($percent > 0.0) { + printf "%-9s %9d (%.2f%%)\n", $lang . ":", $lang_total{$lang}, $percent; + } + }; + } + +} else { # Not narrow. + + print "$grand_total_sloc\t"; + if ($show_effort) {printf "%.2f\t", $grand_total_effort;} + + printf "%-22s", "Totals"; + foreach $lang (keys(%lang_total)) { + print "\t$lang_total{$lang}"; + }; + + print "\t"; + if ($show_effort) {printf "\t";} + printf "%-22s\t", "Percentages"; + foreach $lang (keys(%lang_total)) { + $percent = $lang_total{$lang} * 100.0 / $grand_total_sloc; + printf "\t%0.2f", $percent; + }; + print "\n"; + + print "\t"; + if ($show_effort) {printf "\t";} + printf "%-22s\t", "Code Percentages"; + foreach $lang (keys(%lang_total)) { + next if (defined($non_language_list{$lang})); + $percent = $lang_total{$lang} * 100.0 / $grand_total_lang_sloc; + printf "\t%0.2f", $percent; + }; + print "\n"; +} + +print "\n"; +print "\n"; + + +if (%license_total) { + # We have license info on something, so if there's anything + # unallocated, add that to the list. + if ($no_license_total) { + $license_total{"Not listed"} = $no_license_total; + } + print "Licenses:\n"; + foreach $license (sort {$license_total{$b} <=> $license_total{$a} } keys(%license_total)) { + $percent = $license_total{$license} * 100.0 / $grand_total_sloc; + if ($percent > 0.0) { + printf "%9d (%.2f%%) %s\n", $license_total{$license}, $percent, $license; + } + }; + print "\n"; + print "\n"; + + print "Percentage of Licenses containing selected key phrases:\n"; + %license_phrase = (); + foreach $license (keys(%license_total)) { + foreach $phrase (@license_list) { + if ($license =~ m/\b$phrase\b/i) { + if (!defined($license_phrase{$phrase})) {$license_phrase{$phrase} = 0;} + $license_phrase{$phrase} = $license_phrase{$phrase} + + $license_total{$license}; + } + } + } + + foreach $phrase (sort {$license_phrase{$b} <=> $license_phrase{$a} } keys(%license_phrase)) { + $percent = $license_phrase{$phrase} * 100.0 / $grand_total_sloc; + if ($percent > 0.0) { + printf "%9d (%.2f%%) %s\n", $license_phrase{$phrase}, $percent, $phrase; + } + }; + +} + + +print "\n"; +print "\n"; + +if ($computing_sloc) { + if ($one_program) { + # If it's one program, override the grand total of effort + # and the schedule calculations by using the total SLOC. + + $grand_total_effort = effort_person_months($grand_total_sloc); + $grand_schedule = estimate_schedule($grand_total_effort); + } + printf "Total Physical Source Lines of Code (SLOC) = %s\n", + commify($grand_total_sloc); + + printf "Development Effort Estimate, Person-Years (Person-Months) = %s (%s)\n", + numformat($grand_total_effort/12.0, 2), + numformat($grand_total_effort, 2); + print " ($effort_estimation_message " . + "Person-Months = $effort_factor * (KSLOC**$effort_exponent))\n"; + + printf "Schedule Estimate, Years (Months) = %s (%s)\n", + numformat($grand_schedule/12.0, 2), + numformat($grand_schedule, 2); + print " ($schedule_estimation_message " . + "Months = $schedule_factor * (person-months**$schedule_exponent))\n"; + + # Don't show this if there are multiple programs, because the computation + # is essentially meaningless: after the "smaller" projects have completed, + # the longest one would keep going: + if ($one_program && ($grand_schedule > 0.0)) { + printf "Estimated Average Number of Developers (Effort/Schedule) = %s\n", + numformat($grand_total_effort / $grand_schedule, 2); + } + + + $value = ($grand_total_effort / 12.0) * $person_cost * $overhead; + printf "Total Estimated Cost to Develop = \$ %s\n", + numformat($value, 0); + printf " (average salary = \$%s/year, overhead = %0.2f).\n", + commify($person_cost), $overhead; + +} else { +print "Total Number of Files = $grand_total_sloc\n"; +print "Total Number of Source Code Files = $grand_total_lang_sloc\n"; +} +print "SLOCCount, Copyright (C) 2001-2004 David A. Wheeler\n"; +print "SLOCCount is Open Source Software/Free Software, licensed under the GNU GPL.\n"; +print "SLOCCount comes with ABSOLUTELY NO WARRANTY, and you are welcome to\n"; +print "redistribute it under certain conditions as specified by the GNU GPL license;\n"; +print "see the documentation for details.\n"; +print "Please credit this data as \"generated using David A. Wheeler's 'SLOCCount'.\"\n"; + diff --git a/get_sloc_details b/get_sloc_details new file mode 100755 index 0000000..56ef45a --- /dev/null +++ b/get_sloc_details @@ -0,0 +1,103 @@ +#!/usr/bin/perl -w + +# get_sloc_details +# Take a list of dirs, and get the detailed SLOC entries for every file. + +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. + + +sub print_data +{ + my $dir = shift; + my $langfile = shift; + my $saw_total = 0; + my $filename = "${dir}/${langfile}"; + my $lang = $langfile; + $lang =~ s/_outfile\.dat$//; + + open(RAWDATA, "<$filename") || + return; + # die "Can't open file in $dir for language $lang.\n"; + + if ($lang eq "asm") { + while (<RAWDATA>) { + if (m/^Total:/) { + $saw_total = 1; + last; + } + chomp; + if (m/^([0-9]+)\s+\([^\)]+\)\s+(.*)/) { + print "$1\t$lang\t$dir\t$2\n"; + } else { + print STDERR "Warning: file $filename has unexpected text: $_\n"; + } + } + } else { + while (<RAWDATA>) { + if (m/^Total:/) { + $saw_total = 1; + last; + } + chomp; + if (m/^([0-9]+)\s+(.*)/) { + print "$1\t$lang\t$dir\t$2\n"; + } else { + print STDERR "Warning: file $filename has unexpected text: $_\n"; + } + } + } + close(RAWDATA); + if (! $saw_total) { + print STDERR "Warning! No 'Total' line in $filename.\n"; + } +} + +# MAIN PROGRAM + + +if ($#ARGV < 0) { + print STDERR "Error! You must list at least one directory to process.\n"; + exit(1); +} + + +while ( $dir = shift ) { + + if (! -d "$dir") { + # print "Skipping non-directory $dir\n"; + next; + } + + if (! -r "${dir}/filelist") { + # print "Skipping directory $dir; it doesn't contain a file 'filelist'\n"; + next; + } + + opendir(DATADIR, $dir) || die "can't opendir $dir: $!"; + @outfiles = grep { /outfile\.dat$/ } readdir(DATADIR); + closedir DATADIR; + foreach $langfile (@outfiles) { + print_data($dir, $langfile); + } + +} + diff --git a/haskell_count b/haskell_count new file mode 100755 index 0000000..21299aa --- /dev/null +++ b/haskell_count @@ -0,0 +1,122 @@ +#!/usr/bin/perl -w +# haskell_count - count physical lines of code +# Strips out {- .. -} and -- comments and counts the rest. +# Pragmas, {-#...}, are counted as SLOC. +# BUG: Doesn't handle strings with embedded block comment markers gracefully. +# In practice, that shouldn't be a problem. +# Usage: haskell_count [-f file] [list_of_files] +# file: file with a list of files to count (if "-", read list from stdin) +# list_of_files: list of files to count +# -f file or list_of_files can be used, or both + +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# + + + + + +$total_sloc = 0; + +# Do we have "-f" (read list of files from second argument)? +if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) { + # Yes, we have -f + if ($ARGV[1] eq "-") { + # The list of files is in STDIN + while (<STDIN>) { + chomp ($_); + &count_file ($_); + } + } else { + # The list of files is in the file $ARGV[1] + open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $ARGV[1]\n"; + while (<FILEWITHLIST>) { + chomp ($_); + &count_file ($_); + } + close FILEWITHLIST; + } + shift @ARGV; shift @ARGV; +} +# Process all (remaining) arguments as file names +while ($file = shift @ARGV) { + &count_file ($file); +} + +print "Total:\n"; +print "$total_sloc\n"; + +sub determine_lit_type { + my ($file) = @_; + + open (FILE, $file); + while (<FILE>) { + if (m/^\\begin{code}/) { close FILE; return 2; } + if (m/^>\s/) { close FILE; return 1; } + } + + return 0; +} + +sub count_file { + my ($file) = @_; + my $sloc = 0; + my $incomment = 0; + my ($literate, $inlitblock) = (0,0); + + $literate = 1 if $file =~ /\.lhs$/; + if($literate) { $literate = determine_lit_type($file) } + + open (FILE, $file); + while (<FILE>) { + if ($literate == 1) { + if (!s/^>//) { s/.*//; } + } elsif ($literate == 2) { + if ($inlitblock) { + if (m/^\\end{code}/) { s/.*//; $inlitblock = 0; } + } elsif (!$inlitblock) { + if (m/^\\begin{code}/) { s/.*//; $inlitblock = 1; } + else { s/.*//; } + } + } + + if ($incomment) { + if (m/\-\}/) { s/^.*?\-\}//; $incomment = 0;} + else { s/.*//; } + } + if (!$incomment) { + s/--.*//; + s!{-[^#].*?-}!!g; + if (m/{-/ && (!m/{-#/)) { + s/{-.*//; + $incomment = 1; + } + } + if (m/\S/) {$sloc++;} + } + print "$sloc $file\n"; + if ($incomment) {print "ERROR: ended in comment in $ARGV\n";} + $total_sloc += $sloc; + $sloc = 0; + $incomment = 0; + close (FILE); +} diff --git a/java_lines_environment.dat b/java_lines_environment.dat new file mode 100644 index 0000000..56897e9 --- /dev/null +++ b/java_lines_environment.dat @@ -0,0 +1,98 @@ + Temporary Project Name (* Project_Name,in 45 spaces *)
+0 (* QA_Switch *)
+1 (* Compare_Spec *)
+999 (* Line_Length *)
+1000 (* Exec_Lines *)
+500 (* Data_Lines *)
+60.0 (* Min_Percent *)
+0.0 (* Inc_Percent *)
+0 (* Display_File *)
+0 (* Intro_Msg *)
+P (* SLOC_Def *)
+(*---------------------------------------------------------------------------*)
+(* *)
+(* Refer to the source code file, 'java_lines.c', for further information *)
+(* pertaining to the INSTALLATION PROCEDURES and EXECUTION PROCEDURES of *)
+(* this code counting tool. *)
+(* *)
+(* Note: *)
+(* 1. The above user-defined parameters must be spaced one entry per line *)
+(* of this file. Numeric entries, with the exception of 'Inc_Percent', *)
+(* are of type Integer. *)
+(* *)
+(* 2. The 'java_lines_environment.dat' file must be co-located in the *)
+(* directory/path whereas the code counting tool is to be invoked. *)
+(* Failure to do so will result in the insertion of predefined default *)
+(* values for the entries contained herein. *)
+(* *)
+(*---------------------------------------------------------------------------*)
+(* *)
+(* USER DEFINEABLE PARAMETERS *)
+(* *)
+(* Project_Name -- Allows the user to insert the name of the Program or *)
+(* Project that the source code to be counted pertains. *)
+(* The Project_Name will appear within at the headings of *)
+(* of the 'java_outfile.dat' file produced upon execution *)
+(* of the 'java_lines' code counting tool. *)
+(* *)
+(* QA_Switch -- Allows the user to turn on '1' or to turn off '0' the *)
+(* reporting of programming language reserve word usage *)
+(* as found in the summary page of 'java_outfile.dat'. *)
+(* *)
+(* Compare_Spec -- Allows the user to control the case sensitivity of the *)
+(* code counting tool. A setting of '1' indicates that *)
+(* full case sensitive comparisons must be made. A setting*)
+(* of '0' allows valid comparisons to occur between like *)
+(* letters of upper and lower case. *)
+(* *)
+(* Line_Length -- Allows user to force the code counting tool to ignore *)
+(* information beyond 'Line_Length' characters per physical*)
+(* line of input. It is recommended that the length of *)
+(* the longest physical line to be read be used, i.e. 132. *)
+(* *)
+(* Exec_Lines -- Allows the user to set a threshold whereby the number *)
+(* of files processed with executable lines in exceedance *)
+(* of 'Exec_Lines' will be reported on the summary page of *)
+(* 'java_outfile.dat'. *)
+(* *)
+(* Data_Lines -- Allows the user to set a threshold whereby the number *)
+(* of files processed with data declaration lines in *)
+(* exceedance of 'Data_Lines' will be reported on the *)
+(* summary page of 'java_outfile.dat'. *)
+(* *)
+(* Min_Percent -- Allows the user to set a threshold whereby the number *)
+(* of files processed with a ratio of comments (whole & *)
+(* embedded) to SLOC (physical or logical) is less than *)
+(* 'Min_Percent'. *)
+(* *)
+(* Inc_Percent -- Allows the user to set a progress increment whereby a *)
+(* progress message will appear on the terminal screen *)
+(* during execution of the 'java_lines' tool. The progress*)
+(* message indicates that approximately 'Inc_Percent' of *)
+(* source code files to be processed have completed since *)
+(* the previous progress message appeared. The progress *)
+(* reporting is based solely on the number of files *)
+(* contained in 'java_list.dat'. Actual run-time progress *)
+(* is dependent on the relative size of each source code *)
+(* file and the user loading of the host platform machine. *)
+(* A setting of 0.0 will disable the reporting of the *)
+(* progress message. *)
+(* *)
+(* Display_File -- Allows the user to turn on '1' or to turn off '0' the *)
+(* reporting of last file to be processed within the *)
+(* java_list.dat file. *)
+(* *)
+(* Intro_Msg -- Allows the user to turn on '1' or to turn off '0' the *)
+(* output of the introduction message as the first page *)
+(* of the 'java_outfile.dat' file. *)
+(* *)
+(* SLOC_Def -- Allows the user to select the definition of a Source *)
+(* Line of Code (SLOC) to be used during the operation of *)
+(* the CodeCount tool. A setting of 'P' envokes the SLOC *)
+(* definition of Physical lines, a.k.a., non-comment, *)
+(* non-blank, physical lines of code or Deliverable Source *)
+(* Instructions (DSIs). A setting of 'L' envokes the SLOC *)
+(* definition of Logical lines, a.k.a., non-comment, *)
+(* non-blank, logical lines of code. *)
+(* *)
+(*---------------------------------------------------------------------------*)
diff --git a/jsp_count.c b/jsp_count.c new file mode 100644 index 0000000..42cb2af --- /dev/null +++ b/jsp_count.c @@ -0,0 +1,1787 @@ +/* A lexical scanner generated by flex */ + +/* Scanner skeleton version: + * $Header: /home/daffy/u0/vern/flex/RCS/flex.skl,v 2.91 96/09/10 16:58:48 vern Exp $ + */ + +#define FLEX_SCANNER +#define YY_FLEX_MAJOR_VERSION 2 +#define YY_FLEX_MINOR_VERSION 5 + +#include <stdio.h> +#include <unistd.h> + + +/* cfront 1.2 defines "c_plusplus" instead of "__cplusplus" */ +#ifdef c_plusplus +#ifndef __cplusplus +#define __cplusplus +#endif +#endif + + +#ifdef __cplusplus + +#include <stdlib.h> + +/* Use prototypes in function declarations. */ +#define YY_USE_PROTOS + +/* The "const" storage-class-modifier is valid. */ +#define YY_USE_CONST + +#else /* ! __cplusplus */ + +#if __STDC__ + +#define YY_USE_PROTOS +#define YY_USE_CONST + +#endif /* __STDC__ */ +#endif /* ! __cplusplus */ + +#ifdef __TURBOC__ + #pragma warn -rch + #pragma warn -use +#include <io.h> +#include <stdlib.h> +#define YY_USE_CONST +#define YY_USE_PROTOS +#endif + +#ifdef YY_USE_CONST +#define yyconst const +#else +#define yyconst +#endif + + +#ifdef YY_USE_PROTOS +#define YY_PROTO(proto) proto +#else +#define YY_PROTO(proto) () +#endif + +/* Returned upon end-of-file. */ +#define YY_NULL 0 + +/* Promotes a possibly negative, possibly signed char to an unsigned + * integer for use as an array index. If the signed char is negative, + * we want to instead treat it as an 8-bit unsigned char, hence the + * double cast. + */ +#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c) + +/* Enter a start condition. This macro really ought to take a parameter, + * but we do it the disgusting crufty way forced on us by the ()-less + * definition of BEGIN. + */ +#define BEGIN yy_start = 1 + 2 * + +/* Translate the current start state into a value that can be later handed + * to BEGIN to return to the state. The YYSTATE alias is for lex + * compatibility. + */ +#define YY_START ((yy_start - 1) / 2) +#define YYSTATE YY_START + +/* Action number for EOF rule of a given start state. */ +#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) + +/* Special action meaning "start processing a new file". */ +#define YY_NEW_FILE yyrestart( yyin ) + +#define YY_END_OF_BUFFER_CHAR 0 + +/* Size of default input buffer. */ +#define YY_BUF_SIZE 16384 + +typedef struct yy_buffer_state *YY_BUFFER_STATE; + +extern int yyleng; +extern FILE *yyin, *yyout; + +#define EOB_ACT_CONTINUE_SCAN 0 +#define EOB_ACT_END_OF_FILE 1 +#define EOB_ACT_LAST_MATCH 2 + +/* The funky do-while in the following #define is used to turn the definition + * int a single C statement (which needs a semi-colon terminator). This + * avoids problems with code like: + * + * if ( condition_holds ) + * yyless( 5 ); + * else + * do_something_else(); + * + * Prior to using the do-while the compiler would get upset at the + * "else" because it interpreted the "if" statement as being all + * done when it reached the ';' after the yyless() call. + */ + +/* Return all but the first 'n' matched characters back to the input stream. */ + +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up yytext. */ \ + *yy_cp = yy_hold_char; \ + YY_RESTORE_YY_MORE_OFFSET \ + yy_c_buf_p = yy_cp = yy_bp + n - YY_MORE_ADJ; \ + YY_DO_BEFORE_ACTION; /* set up yytext again */ \ + } \ + while ( 0 ) + +#define unput(c) yyunput( c, yytext_ptr ) + +/* The following is because we cannot portably get our hands on size_t + * (without autoconf's help, which isn't available because we want + * flex-generated scanners to compile on their own). + */ +typedef unsigned int yy_size_t; + + +struct yy_buffer_state + { + FILE *yy_input_file; + + char *yy_ch_buf; /* input buffer */ + char *yy_buf_pos; /* current position in input buffer */ + + /* Size of input buffer in bytes, not including room for EOB + * characters. + */ + yy_size_t yy_buf_size; + + /* Number of characters read into yy_ch_buf, not including EOB + * characters. + */ + int yy_n_chars; + + /* Whether we "own" the buffer - i.e., we know we created it, + * and can realloc() it to grow it, and should free() it to + * delete it. + */ + int yy_is_our_buffer; + + /* Whether this is an "interactive" input source; if so, and + * if we're using stdio for input, then we want to use getc() + * instead of fread(), to make sure we stop fetching input after + * each newline. + */ + int yy_is_interactive; + + /* Whether we're considered to be at the beginning of a line. + * If so, '^' rules will be active on the next match, otherwise + * not. + */ + int yy_at_bol; + + /* Whether to try to fill the input buffer when we reach the + * end of it. + */ + int yy_fill_buffer; + + int yy_buffer_status; +#define YY_BUFFER_NEW 0 +#define YY_BUFFER_NORMAL 1 + /* When an EOF's been seen but there's still some text to process + * then we mark the buffer as YY_EOF_PENDING, to indicate that we + * shouldn't try reading from the input source any more. We might + * still have a bunch of tokens to match, though, because of + * possible backing-up. + * + * When we actually see the EOF, we change the status to "new" + * (via yyrestart()), so that the user can continue scanning by + * just pointing yyin at a new input file. + */ +#define YY_BUFFER_EOF_PENDING 2 + }; + +static YY_BUFFER_STATE yy_current_buffer = 0; + +/* We provide macros for accessing buffer states in case in the + * future we want to put the buffer states in a more general + * "scanner state". + */ +#define YY_CURRENT_BUFFER yy_current_buffer + + +/* yy_hold_char holds the character lost when yytext is formed. */ +static char yy_hold_char; + +static int yy_n_chars; /* number of characters read into yy_ch_buf */ + + +int yyleng; + +/* Points to current character in buffer. */ +static char *yy_c_buf_p = (char *) 0; +static int yy_init = 1; /* whether we need to initialize */ +static int yy_start = 0; /* start state number */ + +/* Flag which is used to allow yywrap()'s to do buffer switches + * instead of setting up a fresh yyin. A bit of a hack ... + */ +static int yy_did_buffer_switch_on_eof; + +void yyrestart YY_PROTO(( FILE *input_file )); + +void yy_switch_to_buffer YY_PROTO(( YY_BUFFER_STATE new_buffer )); +void yy_load_buffer_state YY_PROTO(( void )); +YY_BUFFER_STATE yy_create_buffer YY_PROTO(( FILE *file, int size )); +void yy_delete_buffer YY_PROTO(( YY_BUFFER_STATE b )); +void yy_init_buffer YY_PROTO(( YY_BUFFER_STATE b, FILE *file )); +void yy_flush_buffer YY_PROTO(( YY_BUFFER_STATE b )); +#define YY_FLUSH_BUFFER yy_flush_buffer( yy_current_buffer ) + +YY_BUFFER_STATE yy_scan_buffer YY_PROTO(( char *base, yy_size_t size )); +YY_BUFFER_STATE yy_scan_string YY_PROTO(( yyconst char *yy_str )); +YY_BUFFER_STATE yy_scan_bytes YY_PROTO(( yyconst char *bytes, int len )); + +static void *yy_flex_alloc YY_PROTO(( yy_size_t )); +static void *yy_flex_realloc YY_PROTO(( void *, yy_size_t )); +static void yy_flex_free YY_PROTO(( void * )); + +#define yy_new_buffer yy_create_buffer + +#define yy_set_interactive(is_interactive) \ + { \ + if ( ! yy_current_buffer ) \ + yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \ + yy_current_buffer->yy_is_interactive = is_interactive; \ + } + +#define yy_set_bol(at_bol) \ + { \ + if ( ! yy_current_buffer ) \ + yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \ + yy_current_buffer->yy_at_bol = at_bol; \ + } + +#define YY_AT_BOL() (yy_current_buffer->yy_at_bol) + + +#define yywrap() 1 +#define YY_SKIP_YYWRAP +typedef unsigned char YY_CHAR; +FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0; +typedef int yy_state_type; +extern char *yytext; +#define yytext_ptr yytext +static yyconst short yy_nxt[][11] = + { + { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0 + }, + + { + 9, 10, 11, 12, 10, 13, 10, 14, 10, 15, + 10 + }, + + { + 9, 10, 11, 12, 10, 13, 10, 14, 10, 15, + 10 + }, + + { + 9, 16, 16, 17, 16, 16, 16, 16, 18, 16, + 16 + }, + + { + 9, 16, 16, 17, 16, 16, 16, 16, 18, 16, + 16 + + }, + + { + 9, 19, 19, 20, 19, 19, 19, 19, 21, 19, + 19 + }, + + { + 9, 19, 19, 20, 19, 19, 19, 19, 21, 19, + 19 + }, + + { + 9, 22, 22, 23, 22, 24, 22, 22, 22, 22, + 22 + }, + + { + 9, 22, 22, 23, 22, 24, 22, 22, 22, 22, + 22 + }, + + { + -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, + -9 + + }, + + { + 9, 25, 25, -10, 25, -10, 25, 25, 25, -10, + 25 + }, + + { + 9, -11, -11, -11, -11, -11, -11, -11, -11, -11, + -11 + }, + + { + 9, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12 + }, + + { + 9, -13, -13, -13, -13, -13, -13, -13, -13, -13, + -13 + }, + + { + 9, -14, -14, -14, -14, -14, -14, -14, -14, -14, + -14 + + }, + + { + 9, -15, -15, -15, 26, -15, 27, -15, -15, -15, + -15 + }, + + { + 9, -16, -16, -16, -16, -16, -16, -16, -16, -16, + -16 + }, + + { + 9, -17, -17, -17, -17, -17, -17, -17, -17, -17, + -17 + }, + + { + 9, -18, -18, -18, -18, -18, -18, -18, 28, -18, + -18 + }, + + { + 9, -19, -19, -19, -19, -19, -19, -19, -19, -19, + -19 + + }, + + { + 9, -20, -20, -20, -20, -20, -20, -20, -20, -20, + -20 + }, + + { + 9, -21, -21, -21, -21, -21, -21, -21, 29, -21, + -21 + }, + + { + 9, 30, 30, -22, 30, -22, 30, 30, 30, 30, + 30 + }, + + { + 9, -23, -23, -23, -23, -23, -23, -23, -23, -23, + -23 + }, + + { + 9, -24, -24, -24, -24, -24, -24, -24, -24, -24, + -24 + + }, + + { + 9, 25, 25, -25, 25, -25, 25, 25, 25, -25, + 25 + }, + + { + 9, -26, -26, -26, -26, -26, -26, -26, 31, -26, + -26 + }, + + { + 9, -27, -27, -27, -27, -27, -27, -27, 32, -27, + -27 + }, + + { + 9, -28, -28, -28, -28, -28, -28, -28, -28, -28, + 33 + }, + + { + 9, -29, -29, -29, -29, -29, 34, -29, -29, -29, + -29 + + }, + + { + 9, 30, 30, -30, 30, -30, 30, 30, 30, 30, + 30 + }, + + { + 9, -31, -31, -31, -31, -31, -31, -31, 35, -31, + -31 + }, + + { + 9, -32, -32, -32, -32, -32, -32, -32, 36, -32, + -32 + }, + + { + 9, -33, -33, -33, -33, -33, -33, -33, -33, -33, + -33 + }, + + { + 9, -34, -34, -34, -34, -34, -34, -34, -34, -34, + 37 + + }, + + { + 9, -35, -35, -35, -35, -35, -35, -35, -35, -35, + -35 + }, + + { + 9, -36, -36, -36, -36, -36, -36, -36, -36, -36, + -36 + }, + + { + 9, -37, -37, -37, -37, -37, -37, -37, -37, -37, + -37 + }, + + } ; + + +static yy_state_type yy_get_previous_state YY_PROTO(( void )); +static yy_state_type yy_try_NUL_trans YY_PROTO(( yy_state_type current_state )); +static int yy_get_next_buffer YY_PROTO(( void )); +static void yy_fatal_error YY_PROTO(( yyconst char msg[] )); + +/* Done after the current pattern has been matched and before the + * corresponding action - sets up yytext. + */ +#define YY_DO_BEFORE_ACTION \ + yytext_ptr = yy_bp; \ + yyleng = (int) (yy_cp - yy_bp); \ + yy_hold_char = *yy_cp; \ + *yy_cp = '\0'; \ + yy_c_buf_p = yy_cp; + +#define YY_NUM_RULES 17 +#define YY_END_OF_BUFFER 18 +static yyconst short int yy_accept[38] = + { 0, + 0, 0, 0, 0, 0, 0, 0, 0, 18, 6, + 1, 4, 5, 7, 7, 10, 9, 10, 13, 12, + 13, 14, 15, 16, 6, 0, 0, 0, 0, 14, + 0, 0, 8, 0, 2, 3, 11 + } ; + +static yyconst int yy_ec[256] = + { 0, + 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, + 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 4, 5, 1, 1, 6, 1, 1, 7, + 1, 1, 1, 1, 8, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, + 1, 10, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1 + } ; + +static yy_state_type yy_last_accepting_state; +static char *yy_last_accepting_cpos; + +/* The intent behind this definition is that it'll catch + * any uses of REJECT which flex missed. + */ +#define REJECT reject_used_but_not_detected +#define yymore() yymore_used_but_not_detected +#define YY_MORE_ADJ 0 +#define YY_RESTORE_YY_MORE_OFFSET +char *yytext; +#line 1 "jsp_count.l" +#define INITIAL 0 +#line 2 "jsp_count.l" + +/* +This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC). +Copyright (C) 2001-2004 David A. Wheeler and Bob Brown. +This is a tweaked version by Bob Brown, derived from +David A. Wheeler's pascal_count.l. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +To contact David A. Wheeler, see his website at: + http://www.dwheeler.com. +Bob Brown's website is: http://www.openeye.com/rlb +*/ + +#include "driver.h" + +#define YY_NO_UNPUT + +/* 1 if we saw a non-comment, non-whitespace char on this line */ +int saw_char = 0; +static void count(void); + +#define chtml 1 + +#define cjsp 2 + +#define string 3 + + +/* Macros after this point can all be overridden by user definitions in + * section 1. + */ + +#ifndef YY_SKIP_YYWRAP +#ifdef __cplusplus +extern "C" int yywrap YY_PROTO(( void )); +#else +extern int yywrap YY_PROTO(( void )); +#endif +#endif + +#ifndef YY_NO_UNPUT +static void yyunput YY_PROTO(( int c, char *buf_ptr )); +#endif + +#ifndef yytext_ptr +static void yy_flex_strncpy YY_PROTO(( char *, yyconst char *, int )); +#endif + +#ifdef YY_NEED_STRLEN +static int yy_flex_strlen YY_PROTO(( yyconst char * )); +#endif + +#ifndef YY_NO_INPUT +#ifdef __cplusplus +static int yyinput YY_PROTO(( void )); +#else +static int input YY_PROTO(( void )); +#endif +#endif + +#if YY_STACK_USED +static int yy_start_stack_ptr = 0; +static int yy_start_stack_depth = 0; +static int *yy_start_stack = 0; +#ifndef YY_NO_PUSH_STATE +static void yy_push_state YY_PROTO(( int new_state )); +#endif +#ifndef YY_NO_POP_STATE +static void yy_pop_state YY_PROTO(( void )); +#endif +#ifndef YY_NO_TOP_STATE +static int yy_top_state YY_PROTO(( void )); +#endif + +#else +#define YY_NO_PUSH_STATE 1 +#define YY_NO_POP_STATE 1 +#define YY_NO_TOP_STATE 1 +#endif + +#ifdef YY_MALLOC_DECL +YY_MALLOC_DECL +#else +#if __STDC__ +#ifndef __cplusplus +#include <stdlib.h> +#endif +#else +/* Just try to get by without declaring the routines. This will fail + * miserably on non-ANSI systems for which sizeof(size_t) != sizeof(int) + * or sizeof(void*) != sizeof(int). + */ +#endif +#endif + +/* Amount of stuff to slurp up with each read. */ +#ifndef YY_READ_BUF_SIZE +#define YY_READ_BUF_SIZE 8192 +#endif + +/* Copy whatever the last rule matched to the standard output. */ + +#ifndef ECHO +/* This used to be an fputs(), but since the string might contain NUL's, + * we now use fwrite(). + */ +#define ECHO (void) fwrite( yytext, yyleng, 1, yyout ) +#endif + +/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, + * is returned in "result". + */ +#ifndef YY_INPUT +#define YY_INPUT(buf,result,max_size) \ + if ( yy_current_buffer->yy_is_interactive ) \ + { \ + int c = '*', n; \ + for ( n = 0; n < max_size && \ + (c = getc( yyin )) != EOF && c != '\n'; ++n ) \ + buf[n] = (char) c; \ + if ( c == '\n' ) \ + buf[n++] = (char) c; \ + if ( c == EOF && ferror( yyin ) ) \ + YY_FATAL_ERROR( "input in flex scanner failed" ); \ + result = n; \ + } \ + else if ( ((result = fread( buf, 1, max_size, yyin )) == 0) \ + && ferror( yyin ) ) \ + YY_FATAL_ERROR( "input in flex scanner failed" ); +#endif + +/* No semi-colon after return; correct usage is to write "yyterminate();" - + * we don't want an extra ';' after the "return" because that will cause + * some compilers to complain about unreachable statements. + */ +#ifndef yyterminate +#define yyterminate() return YY_NULL +#endif + +/* Number of entries by which start-condition stack grows. */ +#ifndef YY_START_STACK_INCR +#define YY_START_STACK_INCR 25 +#endif + +/* Report a fatal error. */ +#ifndef YY_FATAL_ERROR +#define YY_FATAL_ERROR(msg) yy_fatal_error( msg ) +#endif + +/* Default declaration of generated scanner - a define so the user can + * easily add parameters. + */ +#ifndef YY_DECL +#define YY_DECL int yylex YY_PROTO(( void )) +#endif + +/* Code executed at the beginning of each rule, after yytext and yyleng + * have been set up. + */ +#ifndef YY_USER_ACTION +#define YY_USER_ACTION +#endif + +/* Code executed at the end of each rule. */ +#ifndef YY_BREAK +#define YY_BREAK break; +#endif + +#define YY_RULE_SETUP \ + YY_USER_ACTION + +YY_DECL + { + register yy_state_type yy_current_state; + register char *yy_cp = NULL, *yy_bp = NULL; + register int yy_act; + +#line 46 "jsp_count.l" + + line_number = 1; + saw_char = 0; + BEGIN(INITIAL); + + + if ( yy_init ) + { + yy_init = 0; + +#ifdef YY_USER_INIT + YY_USER_INIT; +#endif + + if ( ! yy_start ) + yy_start = 1; /* first start state */ + + if ( ! yyin ) + yyin = stdin; + + if ( ! yyout ) + yyout = stdout; + + if ( ! yy_current_buffer ) + yy_current_buffer = + yy_create_buffer( yyin, YY_BUF_SIZE ); + + yy_load_buffer_state(); + } + + while ( 1 ) /* loops until end-of-file is reached */ + { + yy_cp = yy_c_buf_p; + + /* Support of yytext. */ + *yy_cp = yy_hold_char; + + /* yy_bp points to the position in yy_ch_buf of the start of + * the current run. + */ + yy_bp = yy_cp; + + yy_current_state = yy_start; +yy_match: + while ( (yy_current_state = yy_nxt[yy_current_state][yy_ec[YY_SC_TO_UI(*yy_cp)]]) > 0 ) + { + if ( yy_accept[yy_current_state] ) + { + yy_last_accepting_state = yy_current_state; + yy_last_accepting_cpos = yy_cp; + } + + ++yy_cp; + } + + yy_current_state = -yy_current_state; + +yy_find_action: + yy_act = yy_accept[yy_current_state]; + + YY_DO_BEFORE_ACTION; + + +do_action: /* This label is used only to access EOF actions. */ + + + switch ( yy_act ) + { /* beginning of action switch */ + case 0: /* must back up */ + /* undo the effects of YY_DO_BEFORE_ACTION */ + *yy_cp = yy_hold_char; + yy_cp = yy_last_accepting_cpos + 1; + yy_current_state = yy_last_accepting_state; + goto yy_find_action; + +case 1: +YY_RULE_SETUP +#line 51 "jsp_count.l" +/* Do nothing */ + YY_BREAK +case 2: +YY_RULE_SETUP +#line 52 "jsp_count.l" +{ BEGIN(chtml); } + YY_BREAK +case 3: +YY_RULE_SETUP +#line 53 "jsp_count.l" +{ BEGIN(cjsp); } + YY_BREAK +case 4: +YY_RULE_SETUP +#line 54 "jsp_count.l" +{ count(); } + YY_BREAK +case 5: +YY_RULE_SETUP +#line 56 "jsp_count.l" +{saw_char = 1; BEGIN(string);} + YY_BREAK +case 6: +YY_RULE_SETUP +#line 58 "jsp_count.l" +{saw_char = 1;} + YY_BREAK +case 7: +YY_RULE_SETUP +#line 59 "jsp_count.l" +{saw_char = 1;} + YY_BREAK +case 8: +YY_RULE_SETUP +#line 62 "jsp_count.l" +{ BEGIN(INITIAL); } + YY_BREAK +case 9: +YY_RULE_SETUP +#line 63 "jsp_count.l" +{ count(); } + YY_BREAK +case 10: +YY_RULE_SETUP +#line 64 "jsp_count.l" +/* no-op */ + YY_BREAK +case 11: +YY_RULE_SETUP +#line 66 "jsp_count.l" +{ BEGIN(INITIAL); } + YY_BREAK +case 12: +YY_RULE_SETUP +#line 67 "jsp_count.l" +{ count(); } + YY_BREAK +case 13: +YY_RULE_SETUP +#line 68 "jsp_count.l" +/* no-op */ + YY_BREAK +case 14: +YY_RULE_SETUP +#line 70 "jsp_count.l" +{saw_char = 1;} + YY_BREAK +case 15: +YY_RULE_SETUP +#line 71 "jsp_count.l" +{ + fprintf(stderr, "Warning: newline in string - file %s, line %ld\n", + filename, line_number); + count(); + BEGIN(INITIAL); /* Switch back; this at least limits damage */ + } + YY_BREAK +case 16: +YY_RULE_SETUP +#line 77 "jsp_count.l" +{ BEGIN(INITIAL);} + YY_BREAK +case 17: +YY_RULE_SETUP +#line 79 "jsp_count.l" +ECHO; + YY_BREAK +case YY_STATE_EOF(INITIAL): +case YY_STATE_EOF(chtml): +case YY_STATE_EOF(cjsp): +case YY_STATE_EOF(string): + yyterminate(); + + case YY_END_OF_BUFFER: + { + /* Amount of text matched not including the EOB char. */ + int yy_amount_of_matched_text = (int) (yy_cp - yytext_ptr) - 1; + + /* Undo the effects of YY_DO_BEFORE_ACTION. */ + *yy_cp = yy_hold_char; + YY_RESTORE_YY_MORE_OFFSET + + if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_NEW ) + { + /* We're scanning a new file or input source. It's + * possible that this happened because the user + * just pointed yyin at a new source and called + * yylex(). If so, then we have to assure + * consistency between yy_current_buffer and our + * globals. Here is the right place to do so, because + * this is the first action (other than possibly a + * back-up) that will match for the new input source. + */ + yy_n_chars = yy_current_buffer->yy_n_chars; + yy_current_buffer->yy_input_file = yyin; + yy_current_buffer->yy_buffer_status = YY_BUFFER_NORMAL; + } + + /* Note that here we test for yy_c_buf_p "<=" to the position + * of the first EOB in the buffer, since yy_c_buf_p will + * already have been incremented past the NUL character + * (since all states make transitions on EOB to the + * end-of-buffer state). Contrast this with the test + * in input(). + */ + if ( yy_c_buf_p <= &yy_current_buffer->yy_ch_buf[yy_n_chars] ) + { /* This was really a NUL. */ + yy_state_type yy_next_state; + + yy_c_buf_p = yytext_ptr + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state(); + + /* Okay, we're now positioned to make the NUL + * transition. We couldn't have + * yy_get_previous_state() go ahead and do it + * for us because it doesn't know how to deal + * with the possibility of jamming (and we don't + * want to build jamming into it because then it + * will run more slowly). + */ + + yy_next_state = yy_try_NUL_trans( yy_current_state ); + + yy_bp = yytext_ptr + YY_MORE_ADJ; + + if ( yy_next_state ) + { + /* Consume the NUL. */ + yy_cp = ++yy_c_buf_p; + yy_current_state = yy_next_state; + goto yy_match; + } + + else + { + yy_cp = yy_c_buf_p; + goto yy_find_action; + } + } + + else switch ( yy_get_next_buffer() ) + { + case EOB_ACT_END_OF_FILE: + { + yy_did_buffer_switch_on_eof = 0; + + if ( yywrap() ) + { + /* Note: because we've taken care in + * yy_get_next_buffer() to have set up + * yytext, we can now set up + * yy_c_buf_p so that if some total + * hoser (like flex itself) wants to + * call the scanner after we return the + * YY_NULL, it'll still work - another + * YY_NULL will get returned. + */ + yy_c_buf_p = yytext_ptr + YY_MORE_ADJ; + + yy_act = YY_STATE_EOF(YY_START); + goto do_action; + } + + else + { + if ( ! yy_did_buffer_switch_on_eof ) + YY_NEW_FILE; + } + break; + } + + case EOB_ACT_CONTINUE_SCAN: + yy_c_buf_p = + yytext_ptr + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state(); + + yy_cp = yy_c_buf_p; + yy_bp = yytext_ptr + YY_MORE_ADJ; + goto yy_match; + + case EOB_ACT_LAST_MATCH: + yy_c_buf_p = + &yy_current_buffer->yy_ch_buf[yy_n_chars]; + + yy_current_state = yy_get_previous_state(); + + yy_cp = yy_c_buf_p; + yy_bp = yytext_ptr + YY_MORE_ADJ; + goto yy_find_action; + } + break; + } + + default: + YY_FATAL_ERROR( + "fatal flex scanner internal error--no action found" ); + } /* end of action switch */ + } /* end of scanning one token */ + } /* end of yylex */ + + +/* yy_get_next_buffer - try to read in a new buffer + * + * Returns a code representing an action: + * EOB_ACT_LAST_MATCH - + * EOB_ACT_CONTINUE_SCAN - continue scanning from current position + * EOB_ACT_END_OF_FILE - end of file + */ + +static int yy_get_next_buffer() + { + register char *dest = yy_current_buffer->yy_ch_buf; + register char *source = yytext_ptr; + register int number_to_move, i; + int ret_val; + + if ( yy_c_buf_p > &yy_current_buffer->yy_ch_buf[yy_n_chars + 1] ) + YY_FATAL_ERROR( + "fatal flex scanner internal error--end of buffer missed" ); + + if ( yy_current_buffer->yy_fill_buffer == 0 ) + { /* Don't try to fill the buffer, so this is an EOF. */ + if ( yy_c_buf_p - yytext_ptr - YY_MORE_ADJ == 1 ) + { + /* We matched a single character, the EOB, so + * treat this as a final EOF. + */ + return EOB_ACT_END_OF_FILE; + } + + else + { + /* We matched some text prior to the EOB, first + * process it. + */ + return EOB_ACT_LAST_MATCH; + } + } + + /* Try to read more data. */ + + /* First move last chars to start of buffer. */ + number_to_move = (int) (yy_c_buf_p - yytext_ptr) - 1; + + for ( i = 0; i < number_to_move; ++i ) + *(dest++) = *(source++); + + if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_EOF_PENDING ) + /* don't do the read, it's not guaranteed to return an EOF, + * just force an EOF + */ + yy_current_buffer->yy_n_chars = yy_n_chars = 0; + + else + { + int num_to_read = + yy_current_buffer->yy_buf_size - number_to_move - 1; + + while ( num_to_read <= 0 ) + { /* Not enough room in the buffer - grow it. */ +#ifdef YY_USES_REJECT + YY_FATAL_ERROR( +"input buffer overflow, can't enlarge buffer because scanner uses REJECT" ); +#else + + /* just a shorter name for the current buffer */ + YY_BUFFER_STATE b = yy_current_buffer; + + int yy_c_buf_p_offset = + (int) (yy_c_buf_p - b->yy_ch_buf); + + if ( b->yy_is_our_buffer ) + { + int new_size = b->yy_buf_size * 2; + + if ( new_size <= 0 ) + b->yy_buf_size += b->yy_buf_size / 8; + else + b->yy_buf_size *= 2; + + b->yy_ch_buf = (char *) + /* Include room in for 2 EOB chars. */ + yy_flex_realloc( (void *) b->yy_ch_buf, + b->yy_buf_size + 2 ); + } + else + /* Can't grow it, we don't own it. */ + b->yy_ch_buf = 0; + + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( + "fatal error - scanner input buffer overflow" ); + + yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset]; + + num_to_read = yy_current_buffer->yy_buf_size - + number_to_move - 1; +#endif + } + + if ( num_to_read > YY_READ_BUF_SIZE ) + num_to_read = YY_READ_BUF_SIZE; + + /* Read in more data. */ + YY_INPUT( (&yy_current_buffer->yy_ch_buf[number_to_move]), + yy_n_chars, num_to_read ); + + yy_current_buffer->yy_n_chars = yy_n_chars; + } + + if ( yy_n_chars == 0 ) + { + if ( number_to_move == YY_MORE_ADJ ) + { + ret_val = EOB_ACT_END_OF_FILE; + yyrestart( yyin ); + } + + else + { + ret_val = EOB_ACT_LAST_MATCH; + yy_current_buffer->yy_buffer_status = + YY_BUFFER_EOF_PENDING; + } + } + + else + ret_val = EOB_ACT_CONTINUE_SCAN; + + yy_n_chars += number_to_move; + yy_current_buffer->yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR; + yy_current_buffer->yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR; + + yytext_ptr = &yy_current_buffer->yy_ch_buf[0]; + + return ret_val; + } + + +/* yy_get_previous_state - get the state just before the EOB char was reached */ + +static yy_state_type yy_get_previous_state() + { + register yy_state_type yy_current_state; + register char *yy_cp; + + yy_current_state = yy_start; + + for ( yy_cp = yytext_ptr + YY_MORE_ADJ; yy_cp < yy_c_buf_p; ++yy_cp ) + { + yy_current_state = yy_nxt[yy_current_state][(*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1)]; + if ( yy_accept[yy_current_state] ) + { + yy_last_accepting_state = yy_current_state; + yy_last_accepting_cpos = yy_cp; + } + } + + return yy_current_state; + } + + +/* yy_try_NUL_trans - try to make a transition on the NUL character + * + * synopsis + * next_state = yy_try_NUL_trans( current_state ); + */ + +#ifdef YY_USE_PROTOS +static yy_state_type yy_try_NUL_trans( yy_state_type yy_current_state ) +#else +static yy_state_type yy_try_NUL_trans( yy_current_state ) +yy_state_type yy_current_state; +#endif + { + register int yy_is_jam; + register char *yy_cp = yy_c_buf_p; + + yy_current_state = yy_nxt[yy_current_state][1]; + yy_is_jam = (yy_current_state <= 0); + + if ( ! yy_is_jam ) + { + if ( yy_accept[yy_current_state] ) + { + yy_last_accepting_state = yy_current_state; + yy_last_accepting_cpos = yy_cp; + } + } + + return yy_is_jam ? 0 : yy_current_state; + } + + +#ifndef YY_NO_UNPUT +#ifdef YY_USE_PROTOS +static void yyunput( int c, register char *yy_bp ) +#else +static void yyunput( c, yy_bp ) +int c; +register char *yy_bp; +#endif + { + register char *yy_cp = yy_c_buf_p; + + /* undo effects of setting up yytext */ + *yy_cp = yy_hold_char; + + if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 ) + { /* need to shift things up to make room */ + /* +2 for EOB chars. */ + register int number_to_move = yy_n_chars + 2; + register char *dest = &yy_current_buffer->yy_ch_buf[ + yy_current_buffer->yy_buf_size + 2]; + register char *source = + &yy_current_buffer->yy_ch_buf[number_to_move]; + + while ( source > yy_current_buffer->yy_ch_buf ) + *--dest = *--source; + + yy_cp += (int) (dest - source); + yy_bp += (int) (dest - source); + yy_current_buffer->yy_n_chars = + yy_n_chars = yy_current_buffer->yy_buf_size; + + if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 ) + YY_FATAL_ERROR( "flex scanner push-back overflow" ); + } + + *--yy_cp = (char) c; + + + yytext_ptr = yy_bp; + yy_hold_char = *yy_cp; + yy_c_buf_p = yy_cp; + } +#endif /* ifndef YY_NO_UNPUT */ + + +#ifndef YY_NO_INPUT +#ifdef __cplusplus +static int yyinput() +#else +static int input() +#endif + { + int c; + + *yy_c_buf_p = yy_hold_char; + + if ( *yy_c_buf_p == YY_END_OF_BUFFER_CHAR ) + { + /* yy_c_buf_p now points to the character we want to return. + * If this occurs *before* the EOB characters, then it's a + * valid NUL; if not, then we've hit the end of the buffer. + */ + if ( yy_c_buf_p < &yy_current_buffer->yy_ch_buf[yy_n_chars] ) + /* This was really a NUL. */ + *yy_c_buf_p = '\0'; + + else + { /* need more input */ + int offset = yy_c_buf_p - yytext_ptr; + ++yy_c_buf_p; + + switch ( yy_get_next_buffer() ) + { + case EOB_ACT_LAST_MATCH: + /* This happens because yy_g_n_b() + * sees that we've accumulated a + * token and flags that we need to + * try matching the token before + * proceeding. But for input(), + * there's no matching to consider. + * So convert the EOB_ACT_LAST_MATCH + * to EOB_ACT_END_OF_FILE. + */ + + /* Reset buffer status. */ + yyrestart( yyin ); + + /* fall through */ + + case EOB_ACT_END_OF_FILE: + { + if ( yywrap() ) + return EOF; + + if ( ! yy_did_buffer_switch_on_eof ) + YY_NEW_FILE; +#ifdef __cplusplus + return yyinput(); +#else + return input(); +#endif + } + + case EOB_ACT_CONTINUE_SCAN: + yy_c_buf_p = yytext_ptr + offset; + break; + } + } + } + + c = *(unsigned char *) yy_c_buf_p; /* cast for 8-bit char's */ + *yy_c_buf_p = '\0'; /* preserve yytext */ + yy_hold_char = *++yy_c_buf_p; + + + return c; + } +#endif /* YY_NO_INPUT */ + +#ifdef YY_USE_PROTOS +void yyrestart( FILE *input_file ) +#else +void yyrestart( input_file ) +FILE *input_file; +#endif + { + if ( ! yy_current_buffer ) + yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); + + yy_init_buffer( yy_current_buffer, input_file ); + yy_load_buffer_state(); + } + + +#ifdef YY_USE_PROTOS +void yy_switch_to_buffer( YY_BUFFER_STATE new_buffer ) +#else +void yy_switch_to_buffer( new_buffer ) +YY_BUFFER_STATE new_buffer; +#endif + { + if ( yy_current_buffer == new_buffer ) + return; + + if ( yy_current_buffer ) + { + /* Flush out information for old buffer. */ + *yy_c_buf_p = yy_hold_char; + yy_current_buffer->yy_buf_pos = yy_c_buf_p; + yy_current_buffer->yy_n_chars = yy_n_chars; + } + + yy_current_buffer = new_buffer; + yy_load_buffer_state(); + + /* We don't actually know whether we did this switch during + * EOF (yywrap()) processing, but the only time this flag + * is looked at is after yywrap() is called, so it's safe + * to go ahead and always set it. + */ + yy_did_buffer_switch_on_eof = 1; + } + + +#ifdef YY_USE_PROTOS +void yy_load_buffer_state( void ) +#else +void yy_load_buffer_state() +#endif + { + yy_n_chars = yy_current_buffer->yy_n_chars; + yytext_ptr = yy_c_buf_p = yy_current_buffer->yy_buf_pos; + yyin = yy_current_buffer->yy_input_file; + yy_hold_char = *yy_c_buf_p; + } + + +#ifdef YY_USE_PROTOS +YY_BUFFER_STATE yy_create_buffer( FILE *file, int size ) +#else +YY_BUFFER_STATE yy_create_buffer( file, size ) +FILE *file; +int size; +#endif + { + YY_BUFFER_STATE b; + + b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + b->yy_buf_size = size; + + /* yy_ch_buf has to be 2 characters longer than the size given because + * we need to put in 2 end-of-buffer characters. + */ + b->yy_ch_buf = (char *) yy_flex_alloc( b->yy_buf_size + 2 ); + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + b->yy_is_our_buffer = 1; + + yy_init_buffer( b, file ); + + return b; + } + + +#ifdef YY_USE_PROTOS +void yy_delete_buffer( YY_BUFFER_STATE b ) +#else +void yy_delete_buffer( b ) +YY_BUFFER_STATE b; +#endif + { + if ( ! b ) + return; + + if ( b == yy_current_buffer ) + yy_current_buffer = (YY_BUFFER_STATE) 0; + + if ( b->yy_is_our_buffer ) + yy_flex_free( (void *) b->yy_ch_buf ); + + yy_flex_free( (void *) b ); + } + + + +#ifdef YY_USE_PROTOS +void yy_init_buffer( YY_BUFFER_STATE b, FILE *file ) +#else +void yy_init_buffer( b, file ) +YY_BUFFER_STATE b; +FILE *file; +#endif + + + { + yy_flush_buffer( b ); + + b->yy_input_file = file; + b->yy_fill_buffer = 1; + +#if YY_ALWAYS_INTERACTIVE + b->yy_is_interactive = 1; +#else +#if YY_NEVER_INTERACTIVE + b->yy_is_interactive = 0; +#else + b->yy_is_interactive = file ? (isatty( fileno(file) ) > 0) : 0; +#endif +#endif + } + + +#ifdef YY_USE_PROTOS +void yy_flush_buffer( YY_BUFFER_STATE b ) +#else +void yy_flush_buffer( b ) +YY_BUFFER_STATE b; +#endif + + { + if ( ! b ) + return; + + b->yy_n_chars = 0; + + /* We always need two end-of-buffer characters. The first causes + * a transition to the end-of-buffer state. The second causes + * a jam in that state. + */ + b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR; + b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR; + + b->yy_buf_pos = &b->yy_ch_buf[0]; + + b->yy_at_bol = 1; + b->yy_buffer_status = YY_BUFFER_NEW; + + if ( b == yy_current_buffer ) + yy_load_buffer_state(); + } + + +#ifndef YY_NO_SCAN_BUFFER +#ifdef YY_USE_PROTOS +YY_BUFFER_STATE yy_scan_buffer( char *base, yy_size_t size ) +#else +YY_BUFFER_STATE yy_scan_buffer( base, size ) +char *base; +yy_size_t size; +#endif + { + YY_BUFFER_STATE b; + + if ( size < 2 || + base[size-2] != YY_END_OF_BUFFER_CHAR || + base[size-1] != YY_END_OF_BUFFER_CHAR ) + /* They forgot to leave room for the EOB's. */ + return 0; + + b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" ); + + b->yy_buf_size = size - 2; /* "- 2" to take care of EOB's */ + b->yy_buf_pos = b->yy_ch_buf = base; + b->yy_is_our_buffer = 0; + b->yy_input_file = 0; + b->yy_n_chars = b->yy_buf_size; + b->yy_is_interactive = 0; + b->yy_at_bol = 1; + b->yy_fill_buffer = 0; + b->yy_buffer_status = YY_BUFFER_NEW; + + yy_switch_to_buffer( b ); + + return b; + } +#endif + + +#ifndef YY_NO_SCAN_STRING +#ifdef YY_USE_PROTOS +YY_BUFFER_STATE yy_scan_string( yyconst char *yy_str ) +#else +YY_BUFFER_STATE yy_scan_string( yy_str ) +yyconst char *yy_str; +#endif + { + int len; + for ( len = 0; yy_str[len]; ++len ) + ; + + return yy_scan_bytes( yy_str, len ); + } +#endif + + +#ifndef YY_NO_SCAN_BYTES +#ifdef YY_USE_PROTOS +YY_BUFFER_STATE yy_scan_bytes( yyconst char *bytes, int len ) +#else +YY_BUFFER_STATE yy_scan_bytes( bytes, len ) +yyconst char *bytes; +int len; +#endif + { + YY_BUFFER_STATE b; + char *buf; + yy_size_t n; + int i; + + /* Get memory for full buffer, including space for trailing EOB's. */ + n = len + 2; + buf = (char *) yy_flex_alloc( n ); + if ( ! buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" ); + + for ( i = 0; i < len; ++i ) + buf[i] = bytes[i]; + + buf[len] = buf[len+1] = YY_END_OF_BUFFER_CHAR; + + b = yy_scan_buffer( buf, n ); + if ( ! b ) + YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" ); + + /* It's okay to grow etc. this buffer, and we should throw it + * away when we're done. + */ + b->yy_is_our_buffer = 1; + + return b; + } +#endif + + +#ifndef YY_NO_PUSH_STATE +#ifdef YY_USE_PROTOS +static void yy_push_state( int new_state ) +#else +static void yy_push_state( new_state ) +int new_state; +#endif + { + if ( yy_start_stack_ptr >= yy_start_stack_depth ) + { + yy_size_t new_size; + + yy_start_stack_depth += YY_START_STACK_INCR; + new_size = yy_start_stack_depth * sizeof( int ); + + if ( ! yy_start_stack ) + yy_start_stack = (int *) yy_flex_alloc( new_size ); + + else + yy_start_stack = (int *) yy_flex_realloc( + (void *) yy_start_stack, new_size ); + + if ( ! yy_start_stack ) + YY_FATAL_ERROR( + "out of memory expanding start-condition stack" ); + } + + yy_start_stack[yy_start_stack_ptr++] = YY_START; + + BEGIN(new_state); + } +#endif + + +#ifndef YY_NO_POP_STATE +static void yy_pop_state() + { + if ( --yy_start_stack_ptr < 0 ) + YY_FATAL_ERROR( "start-condition stack underflow" ); + + BEGIN(yy_start_stack[yy_start_stack_ptr]); + } +#endif + + +#ifndef YY_NO_TOP_STATE +static int yy_top_state() + { + return yy_start_stack[yy_start_stack_ptr - 1]; + } +#endif + +#ifndef YY_EXIT_FAILURE +#define YY_EXIT_FAILURE 2 +#endif + +#ifdef YY_USE_PROTOS +static void yy_fatal_error( yyconst char msg[] ) +#else +static void yy_fatal_error( msg ) +char msg[]; +#endif + { + (void) fprintf( stderr, "%s\n", msg ); + exit( YY_EXIT_FAILURE ); + } + + + +/* Redefine yyless() so it works in section 3 code. */ + +#undef yyless +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up yytext. */ \ + yytext[yyleng] = yy_hold_char; \ + yy_c_buf_p = yytext + n; \ + yy_hold_char = *yy_c_buf_p; \ + *yy_c_buf_p = '\0'; \ + yyleng = n; \ + } \ + while ( 0 ) + + +/* Internal utility routines. */ + +#ifndef yytext_ptr +#ifdef YY_USE_PROTOS +static void yy_flex_strncpy( char *s1, yyconst char *s2, int n ) +#else +static void yy_flex_strncpy( s1, s2, n ) +char *s1; +yyconst char *s2; +int n; +#endif + { + register int i; + for ( i = 0; i < n; ++i ) + s1[i] = s2[i]; + } +#endif + +#ifdef YY_NEED_STRLEN +#ifdef YY_USE_PROTOS +static int yy_flex_strlen( yyconst char *s ) +#else +static int yy_flex_strlen( s ) +yyconst char *s; +#endif + { + register int n; + for ( n = 0; s[n]; ++n ) + ; + + return n; + } +#endif + + +#ifdef YY_USE_PROTOS +static void *yy_flex_alloc( yy_size_t size ) +#else +static void *yy_flex_alloc( size ) +yy_size_t size; +#endif + { + return (void *) malloc( size ); + } + +#ifdef YY_USE_PROTOS +static void *yy_flex_realloc( void *ptr, yy_size_t size ) +#else +static void *yy_flex_realloc( ptr, size ) +void *ptr; +yy_size_t size; +#endif + { + /* The cast to (char *) in the following accommodates both + * implementations that use char* generic pointers, and those + * that use void* generic pointers. It works with the latter + * because both ANSI C and C++ allow castless assignment from + * any pointer type to void*, and deal with argument conversions + * as though doing an assignment. + */ + return (void *) realloc( (char *) ptr, size ); + } + +#ifdef YY_USE_PROTOS +static void yy_flex_free( void *ptr ) +#else +static void yy_flex_free( ptr ) +void *ptr; +#endif + { + free( ptr ); + } + +#if YY_MAIN +int main() + { + yylex(); + return 0; + } +#endif +#line 79 "jsp_count.l" + + +#include "driver.c" + +static void count(void) +{ + if ( saw_char ) { + sloc++; + saw_char = 0; + } + line_number++; +} diff --git a/jsp_count.l b/jsp_count.l new file mode 100644 index 0000000..a9ad5d6 --- /dev/null +++ b/jsp_count.l @@ -0,0 +1,90 @@ +%{ + +/* +This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC). +Copyright (C) 2001-2004 David A. Wheeler and Bob Brown. +This is a tweaked version by Bob Brown, derived from +David A. Wheeler's pascal_count.l. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +To contact David A. Wheeler, see his website at: + http://www.dwheeler.com. +Bob Brown's website is: http://www.openeye.com/rlb +*/ + +#include "driver.h" + +#define YY_NO_UNPUT + +/* 1 if we saw a non-comment, non-whitespace char on this line */ +int saw_char = 0; +static void count(void); + +%} + +%option noyywrap + +SPACE [ \t\n\r\f] + +%x chtml +%x cjsp +%x string + +%% + line_number = 1; + saw_char = 0; + BEGIN(INITIAL); + +[ \t\r\f] /* Do nothing */ +"<!--" { BEGIN(chtml); } +"<%--" { BEGIN(cjsp); } +\n { count(); } + +\" {saw_char = 1; BEGIN(string);} + +[^ \t\r\f(\n<"][^<\n"]* {saw_char = 1;} +. {saw_char = 1;} + + +<chtml>"-->" { BEGIN(INITIAL); } +<chtml>\n { count(); } +<chtml>. /* no-op */ + +<cjsp>"--%>" { BEGIN(INITIAL); } +<cjsp>\n { count(); } +<cjsp>. /* no-op */ + +<string>[^\"\n]+ {saw_char = 1;} +<string>\n { + fprintf(stderr, "Warning: newline in string - file %s, line %ld\n", + filename, line_number); + count(); + BEGIN(INITIAL); /* Switch back; this at least limits damage */ + } +<string>\" { BEGIN(INITIAL);} + +%% + +#include "driver.c" + +static void count(void) +{ + if ( saw_char ) { + sloc++; + saw_char = 0; + } + line_number++; +} diff --git a/lex_count b/lex_count new file mode 100755 index 0000000..f0adfaf --- /dev/null +++ b/lex_count @@ -0,0 +1,70 @@ +#!/usr/bin/perl +# lex_count +# Usage: lex_count [-f file] [list_of_files] +# file: file with a list of files to count (if "-", read list from stdin) +# list_of_files: list of files to count +# -f file or list_of_files can be used, or both + +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# + +$total_sloc = 0; + +# Do we have "-f" (read list of files from second argument)? +if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) { + # Yes, we have -f + if ($ARGV[1] eq "-") { + # The list of files is in STDIN + while (<STDIN>) { + chomp ($_); + &count_file ($_); + } + } else { + # The list of files is in the file $ARGV[1] + open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $ARGV[1]\n"; + while (<FILEWITHLIST>) { + chomp ($_); + &count_file ($_); + } + close FILEWITHLIST; + } + shift @ARGV; shift @ARGV; +} +# Process all (remaining) arguments as file names +while ($file = shift @ARGV) { + &count_file ($file); +} + +print "Total:\n"; +print "$total_sloc\n"; + +sub count_file { + my ($file) = @_; + my $sloc = 0; + + $sloc = `lexcount1 < "$file"`; + chomp($sloc); + print "$sloc $file\n"; + $total_sloc += $sloc; +} + diff --git a/lexcount1.c b/lexcount1.c new file mode 100644 index 0000000..2056b14 --- /dev/null +++ b/lexcount1.c @@ -0,0 +1,58 @@ + +/* lexcount1 - ignore C comments, count all lines with non-whitespace. */ +/* Read from stdin */ +/* Basically, this is enough machinery to count the physical SLOC for + a single file using C comments, e.g., lex. */ +/* +This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC). +Copyright (C) 2001-2004 David A. Wheeler. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +To contact David A. Wheeler, see his website at: + http://www.dwheeler.com. + +*/ + +#include <stdio.h> +#include <ctype.h> + +int peek() { + int c = getchar(); + ungetc(c, stdin); + return c; +} + +int main() { + int c; + int incomment = 0; + long sloc = 0; + int nonspace = 0; + + while ( (c = getchar()) != EOF) { + if (!incomment) { + if ((c == '/') && (peek() == '*')) {incomment=1;} + else if (!isspace(c)) {nonspace = 1;} + } else { + if ((c == '*') && (peek() == '/')) { + c= getchar(); c=getchar(); incomment=0; + } + } + if ((c == '\n') && nonspace) {sloc++;} + } + printf("%ld\n", sloc); + return 0; /* Report success. */ +} + diff --git a/linux_unique b/linux_unique new file mode 100755 index 0000000..160b9bc --- /dev/null +++ b/linux_unique @@ -0,0 +1,64 @@ +#!/bin/sh + +# Show commands as they're executed. + + +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# + +echo "Per subdir" + +print_sum_subset BUILD/linux/Documentation/ +print_sum_subset BUILD/linux/arch/ +print_sum_subset BUILD/linux/configs/ +print_sum_subset BUILD/linux/drivers/ +print_sum_subset BUILD/linux/fs/ +print_sum_subset BUILD/linux/ibcs/ +print_sum_subset BUILD/linux/include/ +print_sum_subset BUILD/linux/init/ +print_sum_subset BUILD/linux/ipc/ +print_sum_subset BUILD/linux/kernel/ +print_sum_subset BUILD/linux/ksymoops-0.7c/ +print_sum_subset BUILD/linux/lib/ +print_sum_subset BUILD/linux/mm/ +print_sum_subset BUILD/linux/net/ +print_sum_subset BUILD/linux/pcmcia-cs-3.1.8/ +print_sum_subset BUILD/linux/scripts/ + +echo "i386 vs. non-86" + +print_sum_subset BUILD/linux/arch/ +print_sum_subset BUILD/linux/arch/i386 +print_sum_subset linux/drivers/sbus/ +print_sum_subset linux/drivers/macintosh/ +print_sum_subset linux/drivers/sgi/ +print_sum_subset linux/drivers/fc4/ +print_sum_subset linux/drivers/nubus/ +print_sum_subset linux/drivers/acorn/ +print_sum_subset linux/drivers/s390/ +print_sum_subset linux/Documentation/ +print_sum_subset linux/arch + + + + diff --git a/lisp_count b/lisp_count new file mode 100755 index 0000000..ee8d8a5 --- /dev/null +++ b/lisp_count @@ -0,0 +1,27 @@ +#!/bin/sh + +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# + +generic_count ';' $@ + diff --git a/make_filelists b/make_filelists new file mode 100755 index 0000000..5440d50 --- /dev/null +++ b/make_filelists @@ -0,0 +1,193 @@ +#!/bin/sh + +# On the command line, list the source code directories, e.g.: +# /usr/src/redhat/BUILD/* +# This command creates a set of directories paralleling the source code +# directories, with a file named "filelist" listing all the files. + +# This script goes through some trouble to turn all relative references +# into absolute pathnames, to make sure that the intended files +# are always referenced. Conceivably the current directory isn't the +# data directory and the parameters given use relative addressing, +# and we need to fix all that here. + +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# + + +if [ "$#" -eq 0 ] +then + echo "Error: You must provide a list of directories." + exit 1 +fi + + +follow="" +skip="" +prefix="" +startingdir=`pwd` +datadir=`pwd` + +while [ "$#" -gt 0 ] +do + case "$1" + in + --follow) follow="-follow" + shift;; + --datadir) shift + if [ ! -d "$1" ] + then + echo "Error: $1 is not a directory" + exit 1 + fi + cd "$1" + datadir=`pwd` + cd "$startingdir" + shift;; + --skip) shift + skip="$1" + shift;; + --prefix) shift + prefix="$1" + shift;; + --) shift; break;; + --*) echo "Error: unrecognized option $1" + exit 1 + shift ;; + *) break;; + esac +done + +# Non-directories will be placed into the "top_dir" data directory: +toplevel_name="${prefix}top_dir" + +for possible_dir +do + # Reset to starting directory each time, so that relative directory + # requests will be processed correctly. + cd "$startingdir" + + # Translate "." into the name of current directory. + # We have to handle "." and ".." specially, because we can't place + # files with these names into the data directory. + if [ "$possible_dir" = "." ] + then + possible_dir=`pwd` + fi + if [ "$possible_dir" = ".." ] + then + cd .. + possible_dir=`pwd` + # Reset current directory. + cd "$startingdir" + fi + + base=`basename "$possible_dir"` + if [ "$base" = "$skip" ] + then + continue + fi + + if [ -d "$possible_dir" ] + then + # Set "dir" to real name (if possible_dir is a symlink to another + # directory, then "dir" and "possible_dir" may have very different values) + # depending on how "cd" is implemented on your shell. + cd "$possible_dir" + dir=`pwd` + + # The child directory's name is derived from possible_dir, not dir -- + # that way, directories we create will have names based on the supplied + # name (potentially a link), not the linked-to directory's name. + # Thus, symlinks can be used to disambiguate names where necessary. + childname="${prefix}${base}" + + cd "$datadir" + if [ -d "$childname" ] + then + echo "WARNING! Directory $childname pre-existed when adding $possible_dir" + else + mkdir "$childname" + fi + + echo "Creating filelist for $childname" + find "$dir" $follow -type f -print > "${childname}/filelist" + + # If it exists, copy the PROGRAM_LICENSE. + if [ -s "${dir}/PROGRAM_LICENSE" ] + then + cp "${dir}/PROGRAM_LICENSE" "${childname}/PROGRAM_LICENSE" + fi + # If it exists, copy the ORIGINAL_SPEC_FILE + if [ -s "${dir}/ORIGINAL_SPEC_FILE" ] + then + cp "${dir}/ORIGINAL_SPEC_FILE" "${childname}/ORIGINAL_SPEC_FILE" + fi + + # Do some error-checking. + if [ ! -s "${childname}/filelist" ] + then + # This is inefficient, but it doesn't matter - it's only used + # when we have an empty filelist (which is often an error condition) + saw_a_file=n + for x in ls "$dir" + do + saw_a_file=y + break + done + case $saw_a_file + in + n) + echo "Warning: directory ${childname} got no files." + echo "You may need to use the --follow option.";; + esac + fi + + elif [ -f "$possible_dir" ] + then + # We have a non-directory (regular file, symlink to a file, etc.). + # We'll just add an absolute path to it into the toplevel_name directory. + + # First, convert possible_dir into an absolute pathname if necessary: + pathname="$possible_dir" + case "$pathname" + in + /*) ;; # Already absolute pathname - do nothing. + *) pathname="${startingdir}/${possible_dir}" ;; + esac + + # Add it to the toplevel_name directory (creating the directory if needed) + cd "$datadir" + if [ ! -d "$toplevel_name" ] + then + echo "Have a non-directory at the top, so creating directory $toplevel_name" + mkdir "$toplevel_name" + fi + echo "Adding $pathname to $toplevel_name" + echo "$pathname" >> "${toplevel_name}/filelist" + else + echo "WARNING!!! Not a file nor a directory (so ignored): $possible_dir" + fi +done +exit 0 + diff --git a/makefile b/makefile new file mode 100644 index 0000000..0c029f1 --- /dev/null +++ b/makefile @@ -0,0 +1,246 @@ +# Makefile for SLOCCount. +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. + +# My thanks to John Clezy, who provided the modifications to this makefile +# to make sloccount work on Windows using Cygwin. + +# You may need to change the following options to install on your machine: + +# Set this for where to store the man pages and executables. +# If you want to store this as part of an official distribution, +# change this to "/usr": +PREFIX=/usr/local + +# Set "EXE_SUFFIX" to ".exe" if you're running on Windows, like this: +# EXE_SUFFIX=.exe +EXE_SUFFIX= + +# Set this to your C compiler, if it's not "gcc"; a likely alternative is "cc". +# The "-Wall" option turns on warnings in gcc. gcc users might also want +# to consider using "-Werror", which turns warnings into errors. +CC=gcc -Wall + +# Set this to the name of your "install" program. On some systems, +# "install -C" would be useful (so unchanged files won't be modified), +# but not all systems support this install option. "Install" should work +# for any Unix-like system as well as for Cygwin. +# "INSTALL_A_DIR" is the command to create a directory in the first place. +INSTALL=install +INSTALL_A_DIR=$(INSTALL) -d + +# Set this to the name of the program to create RPMs. +# This works for Red Hat Linux 8.0: +RPMBUILD=rpmbuild -ba +# This works for Red Hat Linux 7.X and below: +# RPMBUILD=rpm -ba + + +# From here on, nothing should need changing unless you're +# changing the code itself. + +# To change the version #, change VERSION here, sloccount.spec, +# sloccount, and sloccount.html. +# Note to self: to redistribute, do this: +# make distribute; su; make rpm; (test as desired); +# rpm -e sloccount; ^D; make my_install; send to web site. + + +NAME=sloccount +VERSION=2.26 +RPM_VERSION=1 +ARCH=i386 +VERSIONEDNAME=$(NAME)-$(VERSION) +INSTALL_DIR=$(PREFIX)/bin +MAN_DIR=$(PREFIX)/share/man +MAN_DIR_MAN1=$(MAN_DIR)/man1 +DOC_DIR=$(PREFIX)/share/doc/$(VERSIONEDNAME)-$(RPM_VERSION) +POSTED_DIR=/home/dwheeler/dwheeler.com/sloccount + +COMPILED_EXECUTABLES= \ + c_count$(EXE_SUFFIX) \ + java_count$(EXE_SUFFIX) \ + lexcount1$(EXE_SUFFIX) \ + pascal_count$(EXE_SUFFIX) \ + php_count$(EXE_SUFFIX) \ + jsp_count$(EXE_SUFFIX) \ + ml_count$(EXE_SUFFIX) + +EXECUTABLES= \ + ada_count \ + asm_count \ + awk_count \ + break_filelist \ + cobol_count \ + compute_all \ + compute_sloc_lang \ + count_extensions \ + count_unknown_ext \ + csh_count \ + exp_count \ + fortran_count \ + f90_count \ + generic_count \ + get_sloc \ + get_sloc_details \ + haskell_count \ + lex_count \ + lisp_count \ + make_filelists \ + makefile_count \ + modula3_count \ + objc_count \ + perl_count \ + print_sum \ + python_count \ + ruby_count \ + sed_count \ + sh_count \ + show_filecount \ + sloccount \ + sql_count \ + tcl_count \ + $(COMPILED_EXECUTABLES) + +MANPAGES=sloccount.1.gz + +MYDOCS=sloccount.html README TODO ChangeLog + + +all: $(COMPILED_EXECUTABLES) + +lexcount1$(EXE_SUFFIX): lexcount1.c + $(CC) lexcount1.c -o lexcount1$(EXE_SUFFIX) + +c_count$(EXE_SUFFIX): c_count.c + $(CC) c_count.c -o c_count$(EXE_SUFFIX) + +php_count$(EXE_SUFFIX): php_count.c + $(CC) php_count.c -o php_count$(EXE_SUFFIX) + +pascal_count.c: pascal_count.l driver.c driver.h + flex -Cfe -t pascal_count.l > pascal_count.c + +pascal_count$(EXE_SUFFIX): pascal_count.c + $(CC) pascal_count.c -o pascal_count$(EXE_SUFFIX) + +jsp_count.c: jsp_count.l driver.c driver.h + flex -Cfe -t jsp_count.l > jsp_count.c + +jsp_count$(EXE_SUFFIX): jsp_count.c + $(CC) jsp_count.c -o jsp_count$(EXE_SUFFIX) + +ml_count$(EXE_SUFFIX): ml_count.c + $(CC) ml_count.c -o ml_count$(EXE_SUFFIX) + +sloccount.1.gz: sloccount.1 + gzip -c sloccount.1 > sloccount.1.gz + +# Currently "java_count" is the same as "c_count": +java_count$(EXE_SUFFIX): c_count$(EXE_SUFFIX) + cp -p c_count$(EXE_SUFFIX) java_count$(EXE_SUFFIX) + +# This is USC's code counter, not built by default: +c_lines: C_LINES.C + $(CC) C_LINES.C -o c_lines$(EXE_SUFFIX) + + +install_programs: all + $(INSTALL) $(EXECUTABLES) $(INSTALL_DIR) + +uninstall_programs: + cd $(INSTALL_DIR) && rm -f $(EXECUTABLES) + +install_man: $(MANPAGES) + $(INSTALL_A_DIR) $(MAN_DIR_MAN1) + $(INSTALL) $(MANPAGES) $(MAN_DIR_MAN1) + +uninstall_man: + cd $(MAN_DIR_MAN1) && rm -f $(MANPAGES) + +install_docs: install_man + $(INSTALL_A_DIR) $(DOC_DIR) + $(INSTALL) $(MYDOCS) $(DOC_DIR) + +uninstall_docs: + rm -fr $(DOC_DIR) + + +install: install_programs install_man install_docs + +uninstall: uninstall_programs uninstall_docs uninstall_man + + +clean: + -rm -f $(COMPILED_EXECUTABLES) core sloccount.1.gz + +phptest: php_count + ./php_count *.php + ./php_count /usr/share/php/*.php + ./php_count /usr/share/php/*/*.php + +# "make distribute" creates the tarball. + + +distribute: clean $(MANPAGES) + rm -f sloccount-$(VERSION).tgz + rm -f sloccount-$(VERSION).tar.gz + mkdir 9temp + cp -pr [A-Za-z]* 9temp + mv 9temp $(VERSIONEDNAME) + rm -f $(VERSIONEDNAME)/*.tgz + rm -f $(VERSIONEDNAME)/*.tar.gz + rm -f $(VERSIONEDNAME)/*.rpm +# rm -f $(VERSIONEDNAME)/*.1.gz + rm -f $(VERSIONEDNAME)/C_LINES.C + rm -f $(VERSIONEDNAME)/java_lines.c + rm -f $(VERSIONEDNAME)/c_lines + tar -cvf - $(VERSIONEDNAME)/* | \ + gzip --best > $(VERSIONEDNAME).tar.gz + chown --reference=README $(VERSIONEDNAME).tar.gz + chmod a+rX * + rm -fr $(VERSIONEDNAME) + +my_install: distribute + chmod a+rX * + cp -p sloccount-$(VERSION).tar.gz $(POSTED_DIR) + cp -p sloccount.html $(POSTED_DIR) + cp -p ChangeLog $(POSTED_DIR) + cp -p TODO $(POSTED_DIR) + cp -p /usr/src/redhat/RPMS/$(ARCH)/$(VERSIONEDNAME)-$(RPM_VERSION)*.rpm $(POSTED_DIR) + cp -p /usr/src/redhat/SRPMS/$(VERSIONEDNAME)-$(RPM_VERSION)*.src.rpm $(POSTED_DIR) + +rpm: distribute + cp $(VERSIONEDNAME).tar.gz /usr/src/redhat/SOURCES + cp sloccount.spec /usr/src/redhat/SPECS + cd /usr/src/redhat/SPECS + # Uninstall current sloccount if any; ignore errors if not installed. + -rpm -e sloccount + $(RPMBUILD) sloccount.spec + chmod a+r /usr/src/redhat/RPMS/$(ARCH)/$(VERSIONEDNAME)-$(RPM_VERSION)*.rpm + chmod a+r /usr/src/redhat/SRPMS/$(VERSIONEDNAME)-$(RPM_VERSION)*.src.rpm + rpm -ivh /usr/src/redhat/RPMS/$(ARCH)/$(VERSIONEDNAME)-$(RPM_VERSION)*.rpm + echo "Use rpm -e $(NAME) to remove the package" + +test: all + PATH=.:${PATH}; sloccount testcode + + diff --git a/makefile.orig b/makefile.orig new file mode 100644 index 0000000..e2753d8 --- /dev/null +++ b/makefile.orig @@ -0,0 +1,222 @@ +# Makefile for SLOCCount. +# (C) Copyright David A. Wheeler 2000-2002. +# Licensed under the GPL; see sloccount.html for license information. +# My thanks to John Clezy, who provided the modifications to this makefile +# to make sloccount work on Windows using Cygwin. + + +# You may need to change the following options to install on your machine: + +# Set this for where to store the man pages and executables. +# If you want to store this as part of an official distribution, +# change this to "/usr": +PREFIX=/usr/local + +# Set "EXE_SUFFIX" to ".exe" if you're running on Windows, like this: +# EXE_SUFFIX=.exe +EXE_SUFFIX= + +# Set this to your C compiler, if it's not "gcc"; a likely alternative is "cc". +# The "-Wall" option turns on warnings in gcc. gcc users might also want +# to consider using "-Werror", which turns warnings into errors. +CC=gcc -Wall + +# Set this to the name of your "install" program. On some systems, +# "install -C" would be useful (so unchanged files won't be modified), +# but not all systems support this install option. "Install" should work +# for any Unix-like system as well as for Cygwin. +# "INSTALL_A_DIR" is the command to create a directory in the first place. +INSTALL=install +INSTALL_A_DIR=$(INSTALL) -d + +# Set this to the name of the program to create RPMs. +# This works for Red Hat Linux 8.0: +RPMBUILD=rpmbuild -ba +# This works for Red Hat Linux 7.X and below: +# RPMBUILD=rpm -ba + + +# From here on, nothing should need changing unless you're +# changing the code itself. + +# To change the version #, change VERSION here, sloccount.spec, and +# sloccount.html. +# Note to self: to redistribute, do this: +# make distribute; su; make rpm; (test as desired); +# rpm -e sloccount; ^D; make my_install; send to web site. + + +NAME=sloccount +VERSION=2.20 +RPM_VERSION=1 +ARCH=i386 +VERSIONEDNAME=$(NAME)-$(VERSION) +INSTALL_DIR=$(PREFIX)/bin +MAN_DIR=$(PREFIX)/share/man +MAN_DIR_MAN1=$(MAN_DIR)/man1 +DOC_DIR=$(PREFIX)/share/doc/$(VERSIONEDNAME)-$(RPM_VERSION) +POSTED_DIR=/home/dwheeler/dwheeler.com/sloccount + +COMPILED_EXECUTABLES= \ + c_count$(EXE_SUFFIX) \ + java_count$(EXE_SUFFIX) \ + lexcount1$(EXE_SUFFIX) \ + pascal_count$(EXE_SUFFIX) \ + php_count$(EXE_SUFFIX) \ + jsp_count$(EXE_SUFFIX) + +EXECUTABLES= \ + ada_count \ + asm_count \ + awk_count \ + break_filelist \ + cobol_count \ + compute_all \ + compute_sloc_lang \ + count_extensions \ + count_unknown_ext \ + csh_count \ + exp_count \ + fortran_count \ + generic_count \ + get_sloc \ + get_sloc_details \ + haskell_count \ + lex_count \ + lisp_count \ + make_filelists \ + makefile_count \ + ml_count \ + modula3_count \ + objc_count \ + perl_count \ + print_sum \ + python_count \ + ruby_count \ + sed_count \ + sh_count \ + show_filecount \ + sloccount \ + sql_count \ + tcl_count \ + $(COMPILED_EXECUTABLES) + +MANPAGES=sloccount.1.gz + +MYDOCS=sloccount.html README TODO ChangeLog + + +all: $(COMPILED_EXECUTABLES) + +lexcount1$(EXE_SUFFIX): lexcount1.c + $(CC) lexcount1.c -o lexcount1$(EXE_SUFFIX) + +c_count$(EXE_SUFFIX): c_count.c + $(CC) c_count.c -o c_count$(EXE_SUFFIX) + +php_count$(EXE_SUFFIX): php_count.c + $(CC) php_count.c -o php_count$(EXE_SUFFIX) + +pascal_count.c: pascal_count.l driver.c driver.h + flex -Cfe -t pascal_count.l > pascal_count.c + +pascal_count$(EXE_SUFFIX): pascal_count.c + $(CC) pascal_count.c -o pascal_count$(EXE_SUFFIX) + +jsp_count.c: jsp_count.l driver.c driver.h + flex -Cfe -t jsp_count.l > jsp_count.c + +jsp_count$(EXE_SUFFIX): jsp_count.c + $(CC) jsp_count.c -o jsp_count$(EXE_SUFFIX) + +sloccount.1.gz: sloccount.1 + gzip -c sloccount.1 > sloccount.1.gz + +# Currently "java_count" is the same as "c_count": +java_count$(EXE_SUFFIX): c_count$(EXE_SUFFIX) + cp -p c_count$(EXE_SUFFIX) java_count$(EXE_SUFFIX) + +# This is USC's code counter, not built by default: +c_lines: C_LINES.C + $(CC) C_LINES.C -o c_lines$(EXE_SUFFIX) + + +install_programs: all + $(INSTALL) $(EXECUTABLES) $(INSTALL_DIR) + +uninstall_programs: + cd $(INSTALL_DIR) && rm -f $(EXECUTABLES) + +install_man: + $(INSTALL_A_DIR) $(MAN_DIR_MAN1) + $(INSTALL) $(MANPAGES) $(MAN_DIR_MAN1) + +uninstall_man: + cd $(MAN_DIR_MAN1) && rm -f $(MANPAGES) + +install_docs: install_man + $(INSTALL_A_DIR) $(DOC_DIR) + $(INSTALL) $(MYDOCS) $(DOC_DIR) + +uninstall_docs: + cd $(DOC_DIR) && rm -f $(MYDOCS) && rmdir $(DOC_DIR) + + +install: install_programs install_man install_docs + +uninstall: uninstall_programs uninstall_docs uninstall_man + + +clean: + -rm -f $(COMPILED_EXECUTABLES) core sloccount.1.gz + +phptest: php_count + ./php_count *.php + ./php_count /usr/share/php/*.php + ./php_count /usr/share/php/*/*.php + +# "make distribute" creates the tarball. + + +distribute: clean $(MANPAGES) + rm -f sloccount-$(VERSION).tgz + rm -f sloccount-$(VERSION).tar.gz + mkdir 9temp + cp -pr [A-Za-z]* 9temp + mv 9temp $(VERSIONEDNAME) + rm -f $(VERSIONEDNAME)/*.tgz + rm -f $(VERSIONEDNAME)/*.tar.gz + rm -f $(VERSIONEDNAME)/*.rpm +# rm -f $(VERSIONEDNAME)/*.1.gz + rm -f $(VERSIONEDNAME)/C_LINES.C + rm -f $(VERSIONEDNAME)/java_lines.c + rm -f $(VERSIONEDNAME)/c_lines + tar -cvf - $(VERSIONEDNAME)/* | \ + gzip --best > $(VERSIONEDNAME).tar.gz + chown --reference=README $(VERSIONEDNAME).tar.gz + chmod a+rX * + rm -fr $(VERSIONEDNAME) + +my_install: distribute + chmod a+rX * + cp -p sloccount-$(VERSION).tar.gz $(POSTED_DIR) + cp -p sloccount.html $(POSTED_DIR) + cp -p ChangeLog $(POSTED_DIR) + cp -p TODO $(POSTED_DIR) + cp -p /usr/src/redhat/RPMS/$(ARCH)/$(VERSIONEDNAME)-$(RPM_VERSION)*.rpm $(POSTED_DIR) + cp -p /usr/src/redhat/SRPMS/$(VERSIONEDNAME)-$(RPM_VERSION)*.src.rpm $(POSTED_DIR) + + +rpm: distribute + cp $(VERSIONEDNAME).tar.gz /usr/src/redhat/SOURCES + cp sloccount.spec /usr/src/redhat/SPECS + cd /usr/src/redhat/SPECS + # Uninstall current sloccount if any; ignore errors if not installed. + -rpm -e sloccount + $(RPMBUILD) sloccount.spec + chmod a+r /usr/src/redhat/RPMS/$(ARCH)/$(VERSIONEDNAME)-$(RPM_VERSION)*.rpm + chmod a+r /usr/src/redhat/SRPMS/$(VERSIONEDNAME)-$(RPM_VERSION)*.src.rpm + rpm -ivh /usr/src/redhat/RPMS/$(ARCH)/$(VERSIONEDNAME)-$(RPM_VERSION)*.rpm + echo "Use rpm -e $(NAME) to remove the package" + + diff --git a/makefile_count b/makefile_count new file mode 100755 index 0000000..f892692 --- /dev/null +++ b/makefile_count @@ -0,0 +1,27 @@ +#!/bin/sh +# +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# + +generic_count '#' $@ + diff --git a/ml_count.c b/ml_count.c new file mode 100644 index 0000000..dc18f35 --- /dev/null +++ b/ml_count.c @@ -0,0 +1,209 @@ +/* ml_count: given a list of ML files on the command line, + count the SLOC in each one. SLOC = physical, non-comment lines. + +This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC). +Copyright (C) 2001-2004 David A. Wheeler and Michal Moskal + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +To contact David A. Wheeler, see his website at: + http://www.dwheeler.com. +Michal Moskal may be contacted at malekith at pld-linux.org. + + Based on c_count.c by: + (C) Copyright 2000 David A. Wheeler + Michal Moskal rewrote sloc_count() function, to support ML. + + Usage: Use in one of the following ways: + ml_count # As filter + ml_count [-f file] [list_of_files] + file: file with a list of files to count (if "-", read list from stdin) + list_of_files: list of files to count + + Michal Moskal states "It was easier to get string escaping and comment + nesting right in C then in Perl. It would be even easier in OCaml... ;-)" +*/ + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdlib.h> + +/* Globals */ +long total_sloc; + +int peek(FILE *stream) { + int c = getc(stream); + ungetc(c, stream); + return c; +} + +int ispeek(int c, FILE *stream) { + if (c == peek(stream)) {return 1;} + return 0; +} + +long line_number; + +int getachar(FILE *stream) { +/* Like getchar(), but keep track of line number. */ + static int last_char_was_newline = 0; + int c; + + c = getc(stream); + if (last_char_was_newline) line_number++; + if (c == '\n') last_char_was_newline=1; + else last_char_was_newline=0; + return c; +} + + +long sloc_count(char *filename, FILE *stream) { + /* Count the sloc in the program in stdin. */ + long sloc = 0; + + int sawchar = 0; /* Did you see a character on this line? */ + int c; + + int comment_lev = 0; /* Level of comment nesting. */ + int in_string = 0; /* 0 or 1 */ + + + while ((c = getachar(stream)) != EOF) { + switch (c) { + case '"': + in_string = !in_string; + break; + + case '(': + if (!in_string && ispeek('*', stream)) { + comment_lev++; + getachar(stream); /* skip '*' */ + } + break; + + case '*': + if (comment_lev && !in_string && ispeek(')', stream)) { + comment_lev--; + getachar(stream); /* skip ')' */ + continue /* while */; + } + break; + + case '\\': + /* Ignore next character if in string. But don't ignore newlines. */ + if (in_string && !ispeek('\n', stream)) + getachar(stream); + break; + + case ' ': + case '\t': + /* just ignore blanks */ + continue /* while */; + + case '\n': + if (sawchar) { + sloc++; + sawchar = 0; + } + continue /* while */; + + default: + break; + } + + if (comment_lev == 0) + sawchar = 1; + } + + /* We're done with the file. Handle EOF-without-EOL. */ + if (sawchar) sloc++; + + if (comment_lev) { + fprintf(stderr, "ml_count ERROR - terminated in comment in %s\n", filename); + } else if (in_string) { + fprintf(stderr, "ml_count ERROR - terminated in string in %s\n", filename); + } + + return sloc; +} + + +void count_file(char *filename) { + long sloc; + FILE *stream; + + stream = fopen(filename, "r"); + line_number = 1; + sloc = sloc_count(filename, stream); + total_sloc += sloc; + printf("%ld %s\n", sloc, filename); + fclose(stream); +} + +char *read_a_line(FILE *file) { + /* Read a line in, and return a malloc'ed buffer with the line contents. + Any newline at the end is stripped. + If there's nothing left to read, returns NULL. */ + + /* We'll create a monstrously long buffer to make life easy for us: */ + char buffer[10000]; + char *returnval; + char *newlinepos; + + returnval = fgets(buffer, sizeof(buffer), file); + if (returnval) { + newlinepos = buffer + strlen(buffer) - 1; + if (*newlinepos == '\n') {*newlinepos = '\0';}; + return strdup(buffer); + } else { + return NULL; + } +} + + +int main(int argc, char *argv[]) { + long sloc; + int i; + FILE *file_list; + char *s; + + total_sloc = 0; + line_number = 1; + + if (argc <= 1) { + sloc = sloc_count("-", stdin); + printf("%ld %s\n", sloc, "-"); + total_sloc += sloc; + } else if ((argc == 3) && (!strcmp(argv[1], "-f"))) { + if (!strcmp (argv[2], "-")) { + file_list = stdin; + } else { + file_list = fopen(argv[2], "r"); + } + if (file_list) { + while ((s = read_a_line(file_list))) { + count_file(s); + free(s); + } + } + } else { + for (i=1; i < argc; i++) { count_file(argv[i]); } + } + printf("Total:\n"); + printf("%ld\n", total_sloc); + return 0; /* Report success */ +} + diff --git a/modula3_count b/modula3_count new file mode 100644 index 0000000..fa2921d --- /dev/null +++ b/modula3_count @@ -0,0 +1,65 @@ +#!/usr/bin/perl -w +# modula3_count - count physical lines of code +# Strips out (* .. *) and counts the rest. +# Usage: modula3_count [-f file] [list_of_files] +# file: file with a list of files to count (if "-", read list from stdin) +# list_of_files: list of files to count +# -f file or list_of_files can be used, or both + +$total_sloc = 0; + +# Do we have "-f" (read list of files from second argument)? +if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) { + # Yes, we have -f + if ($ARGV[1] eq "-") { + # The list of files is in STDIN + while (<STDIN>) { + chomp ($_); + &count_file ($_); + } + } else { + # The list of files is in the file $ARGV[1] + open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $ARGV[1]\n"; + while (<FILEWITHLIST>) { + chomp ($_); + &count_file ($_); + } + close FILEWITHLIST; + } + shift @ARGV; shift @ARGV; +} +# Process all (remaining) arguments as file names +while ($file = shift @ARGV) { + &count_file ($file); +} + +print "Total:\n"; +print "$total_sloc\n"; + +sub count_file { + my ($file) = @_; + my $sloc = 0; + my $incomment = 0; + + open (FILE, $file); + while (<FILE>) { + if ($incomment) { + if (m/\*\)/) { s/^.*?\*\)//; $incomment = 0;} + else { s/.*//; } + } + if (!$incomment) { + s!\(\*.*?\*\)!!g; + if (m/\(\*/) { + s/\(\*.*//; + $incomment = 1; + } + } + if (m/\S/) {$sloc++;} + } + print "$sloc $file\n"; + if ($incomment) {print "ERROR: ended in comment in $file\n";} + $total_sloc += $sloc; + $sloc = 0; + $incomment = 0; + close (FILE); +} diff --git a/objc_count b/objc_count new file mode 100755 index 0000000..a74bd5b --- /dev/null +++ b/objc_count @@ -0,0 +1,89 @@ +#!/usr/bin/perl -w +# objc_count - count physical lines of code +# Strips out /* .. */ and counts the rest. +# Usage: objc_count [-f file] [list_of_files] +# file: file with a list of files to count (if "-", read list from stdin) +# list_of_files: list of files to count +# -f file or list_of_files can be used, or both + +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# + +$total_sloc = 0; + +# Do we have "-f" (read list of files from second argument)? +if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) { + # Yes, we have -f + if ($ARGV[1] eq "-") { + # The list of files is in STDIN + while (<STDIN>) { + chomp ($_); + &count_file ($_); + } + } else { + # The list of files is in the file $ARGV[1] + open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $ARGV[1]\n"; + while (<FILEWITHLIST>) { + chomp ($_); + &count_file ($_); + } + close FILEWITHLIST; + } + shift @ARGV; shift @ARGV; +} +# Process all (remaining) arguments as file names +while ($file = shift @ARGV) { + &count_file ($file); +} + +print "Total:\n"; +print "$total_sloc\n"; + +sub count_file { + my ($file) = @_; + my $sloc = 0; + my $incomment = 0; + + open (FILE, $file); + while (<FILE>) { + if ($incomment) { + if (m/\*\//) { s/^.*?\*\///; $incomment = 0;} + else { s/.*//; } + } + if (!$incomment) { + # s/\/\*.*?\*\//g; + s!\/\*.*?\*\/!!g; + if (m/\/\*/) { + s/\/\*.*//; + $incomment = 1; + } + } + if (m/\S/) {$sloc++;} + } + print "$sloc $file\n"; + if ($incomment) {print "ERROR: ended in comment in $file\n";} + $total_sloc += $sloc; + $sloc = 0; + $incomment = 0; + close (FILE); +} diff --git a/pascal_count.c b/pascal_count.c new file mode 100644 index 0000000..e7f870f --- /dev/null +++ b/pascal_count.c @@ -0,0 +1,1714 @@ +/* A lexical scanner generated by flex */ + +/* Scanner skeleton version: + * $Header: /home/daffy/u0/vern/flex/RCS/flex.skl,v 2.91 96/09/10 16:58:48 vern Exp $ + */ + +#define FLEX_SCANNER +#define YY_FLEX_MAJOR_VERSION 2 +#define YY_FLEX_MINOR_VERSION 5 + +#include <stdio.h> +#include <unistd.h> + + +/* cfront 1.2 defines "c_plusplus" instead of "__cplusplus" */ +#ifdef c_plusplus +#ifndef __cplusplus +#define __cplusplus +#endif +#endif + + +#ifdef __cplusplus + +#include <stdlib.h> + +/* Use prototypes in function declarations. */ +#define YY_USE_PROTOS + +/* The "const" storage-class-modifier is valid. */ +#define YY_USE_CONST + +#else /* ! __cplusplus */ + +#if __STDC__ + +#define YY_USE_PROTOS +#define YY_USE_CONST + +#endif /* __STDC__ */ +#endif /* ! __cplusplus */ + +#ifdef __TURBOC__ + #pragma warn -rch + #pragma warn -use +#include <io.h> +#include <stdlib.h> +#define YY_USE_CONST +#define YY_USE_PROTOS +#endif + +#ifdef YY_USE_CONST +#define yyconst const +#else +#define yyconst +#endif + + +#ifdef YY_USE_PROTOS +#define YY_PROTO(proto) proto +#else +#define YY_PROTO(proto) () +#endif + +/* Returned upon end-of-file. */ +#define YY_NULL 0 + +/* Promotes a possibly negative, possibly signed char to an unsigned + * integer for use as an array index. If the signed char is negative, + * we want to instead treat it as an 8-bit unsigned char, hence the + * double cast. + */ +#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c) + +/* Enter a start condition. This macro really ought to take a parameter, + * but we do it the disgusting crufty way forced on us by the ()-less + * definition of BEGIN. + */ +#define BEGIN yy_start = 1 + 2 * + +/* Translate the current start state into a value that can be later handed + * to BEGIN to return to the state. The YYSTATE alias is for lex + * compatibility. + */ +#define YY_START ((yy_start - 1) / 2) +#define YYSTATE YY_START + +/* Action number for EOF rule of a given start state. */ +#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) + +/* Special action meaning "start processing a new file". */ +#define YY_NEW_FILE yyrestart( yyin ) + +#define YY_END_OF_BUFFER_CHAR 0 + +/* Size of default input buffer. */ +#define YY_BUF_SIZE 16384 + +typedef struct yy_buffer_state *YY_BUFFER_STATE; + +extern int yyleng; +extern FILE *yyin, *yyout; + +#define EOB_ACT_CONTINUE_SCAN 0 +#define EOB_ACT_END_OF_FILE 1 +#define EOB_ACT_LAST_MATCH 2 + +/* The funky do-while in the following #define is used to turn the definition + * int a single C statement (which needs a semi-colon terminator). This + * avoids problems with code like: + * + * if ( condition_holds ) + * yyless( 5 ); + * else + * do_something_else(); + * + * Prior to using the do-while the compiler would get upset at the + * "else" because it interpreted the "if" statement as being all + * done when it reached the ';' after the yyless() call. + */ + +/* Return all but the first 'n' matched characters back to the input stream. */ + +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up yytext. */ \ + *yy_cp = yy_hold_char; \ + YY_RESTORE_YY_MORE_OFFSET \ + yy_c_buf_p = yy_cp = yy_bp + n - YY_MORE_ADJ; \ + YY_DO_BEFORE_ACTION; /* set up yytext again */ \ + } \ + while ( 0 ) + +#define unput(c) yyunput( c, yytext_ptr ) + +/* The following is because we cannot portably get our hands on size_t + * (without autoconf's help, which isn't available because we want + * flex-generated scanners to compile on their own). + */ +typedef unsigned int yy_size_t; + + +struct yy_buffer_state + { + FILE *yy_input_file; + + char *yy_ch_buf; /* input buffer */ + char *yy_buf_pos; /* current position in input buffer */ + + /* Size of input buffer in bytes, not including room for EOB + * characters. + */ + yy_size_t yy_buf_size; + + /* Number of characters read into yy_ch_buf, not including EOB + * characters. + */ + int yy_n_chars; + + /* Whether we "own" the buffer - i.e., we know we created it, + * and can realloc() it to grow it, and should free() it to + * delete it. + */ + int yy_is_our_buffer; + + /* Whether this is an "interactive" input source; if so, and + * if we're using stdio for input, then we want to use getc() + * instead of fread(), to make sure we stop fetching input after + * each newline. + */ + int yy_is_interactive; + + /* Whether we're considered to be at the beginning of a line. + * If so, '^' rules will be active on the next match, otherwise + * not. + */ + int yy_at_bol; + + /* Whether to try to fill the input buffer when we reach the + * end of it. + */ + int yy_fill_buffer; + + int yy_buffer_status; +#define YY_BUFFER_NEW 0 +#define YY_BUFFER_NORMAL 1 + /* When an EOF's been seen but there's still some text to process + * then we mark the buffer as YY_EOF_PENDING, to indicate that we + * shouldn't try reading from the input source any more. We might + * still have a bunch of tokens to match, though, because of + * possible backing-up. + * + * When we actually see the EOF, we change the status to "new" + * (via yyrestart()), so that the user can continue scanning by + * just pointing yyin at a new input file. + */ +#define YY_BUFFER_EOF_PENDING 2 + }; + +static YY_BUFFER_STATE yy_current_buffer = 0; + +/* We provide macros for accessing buffer states in case in the + * future we want to put the buffer states in a more general + * "scanner state". + */ +#define YY_CURRENT_BUFFER yy_current_buffer + + +/* yy_hold_char holds the character lost when yytext is formed. */ +static char yy_hold_char; + +static int yy_n_chars; /* number of characters read into yy_ch_buf */ + + +int yyleng; + +/* Points to current character in buffer. */ +static char *yy_c_buf_p = (char *) 0; +static int yy_init = 1; /* whether we need to initialize */ +static int yy_start = 0; /* start state number */ + +/* Flag which is used to allow yywrap()'s to do buffer switches + * instead of setting up a fresh yyin. A bit of a hack ... + */ +static int yy_did_buffer_switch_on_eof; + +void yyrestart YY_PROTO(( FILE *input_file )); + +void yy_switch_to_buffer YY_PROTO(( YY_BUFFER_STATE new_buffer )); +void yy_load_buffer_state YY_PROTO(( void )); +YY_BUFFER_STATE yy_create_buffer YY_PROTO(( FILE *file, int size )); +void yy_delete_buffer YY_PROTO(( YY_BUFFER_STATE b )); +void yy_init_buffer YY_PROTO(( YY_BUFFER_STATE b, FILE *file )); +void yy_flush_buffer YY_PROTO(( YY_BUFFER_STATE b )); +#define YY_FLUSH_BUFFER yy_flush_buffer( yy_current_buffer ) + +YY_BUFFER_STATE yy_scan_buffer YY_PROTO(( char *base, yy_size_t size )); +YY_BUFFER_STATE yy_scan_string YY_PROTO(( yyconst char *yy_str )); +YY_BUFFER_STATE yy_scan_bytes YY_PROTO(( yyconst char *bytes, int len )); + +static void *yy_flex_alloc YY_PROTO(( yy_size_t )); +static void *yy_flex_realloc YY_PROTO(( void *, yy_size_t )); +static void yy_flex_free YY_PROTO(( void * )); + +#define yy_new_buffer yy_create_buffer + +#define yy_set_interactive(is_interactive) \ + { \ + if ( ! yy_current_buffer ) \ + yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \ + yy_current_buffer->yy_is_interactive = is_interactive; \ + } + +#define yy_set_bol(at_bol) \ + { \ + if ( ! yy_current_buffer ) \ + yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \ + yy_current_buffer->yy_at_bol = at_bol; \ + } + +#define YY_AT_BOL() (yy_current_buffer->yy_at_bol) + + +#define yywrap() 1 +#define YY_SKIP_YYWRAP +typedef unsigned char YY_CHAR; +FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0; +typedef int yy_state_type; +extern char *yytext; +#define yytext_ptr yytext +static yyconst short yy_nxt[][10] = + { + { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }, + + { + 9, 10, 11, 12, 13, 14, 10, 10, 15, 10 + }, + + { + 9, 10, 11, 12, 13, 14, 10, 10, 15, 10 + }, + + { + 9, 16, 16, 17, 16, 16, 16, 18, 16, 16 + }, + + { + 9, 16, 16, 17, 16, 16, 16, 18, 16, 16 + }, + + { + 9, 19, 19, 20, 19, 19, 19, 19, 19, 21 + }, + + { + 9, 19, 19, 20, 19, 19, 19, 19, 19, 21 + }, + + { + 9, 22, 22, 23, 24, 22, 22, 22, 22, 22 + }, + + { + 9, 22, 22, 23, 24, 22, 22, 22, 22, 22 + }, + + { + -9, -9, -9, -9, -9, -9, -9, -9, -9, -9 + + }, + + { + 9, 25, 25, -10, -10, -10, 25, 25, -10, 25 + }, + + { + 9, -11, -11, -11, -11, -11, -11, -11, -11, -11 + }, + + { + 9, -12, -12, -12, -12, -12, -12, -12, -12, -12 + }, + + { + 9, -13, -13, -13, -13, -13, -13, -13, -13, -13 + }, + + { + 9, -14, -14, -14, -14, -14, -14, 26, -14, -14 + }, + + { + 9, -15, -15, -15, -15, -15, -15, -15, -15, -15 + }, + + { + 9, 27, 27, 28, 27, 27, 27, -16, 27, 27 + }, + + { + 9, -17, -17, -17, -17, -17, -17, -17, -17, -17 + }, + + { + 9, 29, 29, 30, 29, 29, 31, 32, 29, 29 + }, + + { + 9, 33, 33, 34, 33, 33, 33, 33, 33, -19 + + }, + + { + 9, -20, -20, -20, -20, -20, -20, -20, -20, -20 + }, + + { + 9, -21, -21, -21, -21, -21, -21, -21, -21, -21 + }, + + { + 9, 35, 35, -22, -22, 35, 35, 35, 35, 35 + }, + + { + 9, -23, -23, -23, -23, -23, -23, -23, -23, -23 + }, + + { + 9, -24, -24, -24, 36, -24, -24, -24, -24, -24 + }, + + { + 9, 25, 25, -25, -25, -25, 25, 25, -25, 25 + }, + + { + 9, -26, -26, -26, -26, -26, -26, -26, -26, -26 + }, + + { + 9, 27, 27, 28, 27, 27, 27, -27, 27, 27 + }, + + { + 9, -28, -28, -28, -28, -28, -28, -28, -28, -28 + }, + + { + 9, 29, 29, 30, 29, 29, -29, -29, 29, 29 + + }, + + { + 9, -30, -30, -30, -30, -30, -30, -30, -30, -30 + }, + + { + 9, -31, -31, -31, -31, -31, -31, -31, -31, -31 + }, + + { + 9, 29, 29, 30, 29, 29, 31, 32, 29, 29 + }, + + { + 9, 33, 33, 34, 33, 33, 33, 33, 33, -33 + }, + + { + 9, -34, -34, -34, -34, -34, -34, -34, -34, -34 + }, + + { + 9, 35, 35, -35, -35, 35, 35, 35, 35, 35 + }, + + { + 9, -36, -36, -36, -36, -36, -36, -36, -36, -36 + }, + + } ; + + +static yy_state_type yy_get_previous_state YY_PROTO(( void )); +static yy_state_type yy_try_NUL_trans YY_PROTO(( yy_state_type current_state )); +static int yy_get_next_buffer YY_PROTO(( void )); +static void yy_fatal_error YY_PROTO(( yyconst char msg[] )); + +/* Done after the current pattern has been matched and before the + * corresponding action - sets up yytext. + */ +#define YY_DO_BEFORE_ACTION \ + yytext_ptr = yy_bp; \ + yyleng = (int) (yy_cp - yy_bp); \ + yy_hold_char = *yy_cp; \ + *yy_cp = '\0'; \ + yy_c_buf_p = yy_cp; + +#define YY_NUM_RULES 20 +#define YY_END_OF_BUFFER 21 +static yyconst short int yy_accept[37] = + { 0, + 0, 0, 0, 0, 0, 0, 0, 0, 21, 6, + 1, 3, 5, 7, 4, 8, 9, 10, 13, 14, + 15, 16, 17, 19, 6, 2, 8, 9, 10, 11, + 12, 10, 13, 14, 16, 18 + } ; + +static yyconst int yy_ec[256] = + { 0, + 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, + 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 1, 1, 1, 1, 1, 1, 4, 5, + 6, 7, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 8, 1, 9, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1 + } ; + +/* The intent behind this definition is that it'll catch + * any uses of REJECT which flex missed. + */ +#define REJECT reject_used_but_not_detected +#define yymore() yymore_used_but_not_detected +#define YY_MORE_ADJ 0 +#define YY_RESTORE_YY_MORE_OFFSET +char *yytext; +#line 1 "pascal_count.l" +#define INITIAL 0 +#line 2 "pascal_count.l" + +/* +This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC). +Copyright (C) 2001-2004 David A. Wheeler. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +To contact David A. Wheeler, see his website at: + http://www.dwheeler.com. + +*/ + +#include "driver.h" + +#define YY_NO_UNPUT + +/* 1 if we saw a non-comment, non-whitespace char on this line */ +int saw_char = 0; + +#define comment 1 + +#define bcomment 2 + +#define string 3 + + +/* Macros after this point can all be overridden by user definitions in + * section 1. + */ + +#ifndef YY_SKIP_YYWRAP +#ifdef __cplusplus +extern "C" int yywrap YY_PROTO(( void )); +#else +extern int yywrap YY_PROTO(( void )); +#endif +#endif + +#ifndef YY_NO_UNPUT +static void yyunput YY_PROTO(( int c, char *buf_ptr )); +#endif + +#ifndef yytext_ptr +static void yy_flex_strncpy YY_PROTO(( char *, yyconst char *, int )); +#endif + +#ifdef YY_NEED_STRLEN +static int yy_flex_strlen YY_PROTO(( yyconst char * )); +#endif + +#ifndef YY_NO_INPUT +#ifdef __cplusplus +static int yyinput YY_PROTO(( void )); +#else +static int input YY_PROTO(( void )); +#endif +#endif + +#if YY_STACK_USED +static int yy_start_stack_ptr = 0; +static int yy_start_stack_depth = 0; +static int *yy_start_stack = 0; +#ifndef YY_NO_PUSH_STATE +static void yy_push_state YY_PROTO(( int new_state )); +#endif +#ifndef YY_NO_POP_STATE +static void yy_pop_state YY_PROTO(( void )); +#endif +#ifndef YY_NO_TOP_STATE +static int yy_top_state YY_PROTO(( void )); +#endif + +#else +#define YY_NO_PUSH_STATE 1 +#define YY_NO_POP_STATE 1 +#define YY_NO_TOP_STATE 1 +#endif + +#ifdef YY_MALLOC_DECL +YY_MALLOC_DECL +#else +#if __STDC__ +#ifndef __cplusplus +#include <stdlib.h> +#endif +#else +/* Just try to get by without declaring the routines. This will fail + * miserably on non-ANSI systems for which sizeof(size_t) != sizeof(int) + * or sizeof(void*) != sizeof(int). + */ +#endif +#endif + +/* Amount of stuff to slurp up with each read. */ +#ifndef YY_READ_BUF_SIZE +#define YY_READ_BUF_SIZE 8192 +#endif + +/* Copy whatever the last rule matched to the standard output. */ + +#ifndef ECHO +/* This used to be an fputs(), but since the string might contain NUL's, + * we now use fwrite(). + */ +#define ECHO (void) fwrite( yytext, yyleng, 1, yyout ) +#endif + +/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, + * is returned in "result". + */ +#ifndef YY_INPUT +#define YY_INPUT(buf,result,max_size) \ + if ( yy_current_buffer->yy_is_interactive ) \ + { \ + int c = '*', n; \ + for ( n = 0; n < max_size && \ + (c = getc( yyin )) != EOF && c != '\n'; ++n ) \ + buf[n] = (char) c; \ + if ( c == '\n' ) \ + buf[n++] = (char) c; \ + if ( c == EOF && ferror( yyin ) ) \ + YY_FATAL_ERROR( "input in flex scanner failed" ); \ + result = n; \ + } \ + else if ( ((result = fread( buf, 1, max_size, yyin )) == 0) \ + && ferror( yyin ) ) \ + YY_FATAL_ERROR( "input in flex scanner failed" ); +#endif + +/* No semi-colon after return; correct usage is to write "yyterminate();" - + * we don't want an extra ';' after the "return" because that will cause + * some compilers to complain about unreachable statements. + */ +#ifndef yyterminate +#define yyterminate() return YY_NULL +#endif + +/* Number of entries by which start-condition stack grows. */ +#ifndef YY_START_STACK_INCR +#define YY_START_STACK_INCR 25 +#endif + +/* Report a fatal error. */ +#ifndef YY_FATAL_ERROR +#define YY_FATAL_ERROR(msg) yy_fatal_error( msg ) +#endif + +/* Default declaration of generated scanner - a define so the user can + * easily add parameters. + */ +#ifndef YY_DECL +#define YY_DECL int yylex YY_PROTO(( void )) +#endif + +/* Code executed at the beginning of each rule, after yytext and yyleng + * have been set up. + */ +#ifndef YY_USER_ACTION +#define YY_USER_ACTION +#endif + +/* Code executed at the end of each rule. */ +#ifndef YY_BREAK +#define YY_BREAK break; +#endif + +#define YY_RULE_SETUP \ + YY_USER_ACTION + +YY_DECL + { + register yy_state_type yy_current_state; + register char *yy_cp = NULL, *yy_bp = NULL; + register int yy_act; + +#line 43 "pascal_count.l" + + line_number = 1; + saw_char = 0; + BEGIN(INITIAL); + + + if ( yy_init ) + { + yy_init = 0; + +#ifdef YY_USER_INIT + YY_USER_INIT; +#endif + + if ( ! yy_start ) + yy_start = 1; /* first start state */ + + if ( ! yyin ) + yyin = stdin; + + if ( ! yyout ) + yyout = stdout; + + if ( ! yy_current_buffer ) + yy_current_buffer = + yy_create_buffer( yyin, YY_BUF_SIZE ); + + yy_load_buffer_state(); + } + + while ( 1 ) /* loops until end-of-file is reached */ + { + yy_cp = yy_c_buf_p; + + /* Support of yytext. */ + *yy_cp = yy_hold_char; + + /* yy_bp points to the position in yy_ch_buf of the start of + * the current run. + */ + yy_bp = yy_cp; + + yy_current_state = yy_start; +yy_match: + while ( (yy_current_state = yy_nxt[yy_current_state][yy_ec[YY_SC_TO_UI(*yy_cp)]]) > 0 ) + ++yy_cp; + + yy_current_state = -yy_current_state; + +yy_find_action: + yy_act = yy_accept[yy_current_state]; + + YY_DO_BEFORE_ACTION; + + +do_action: /* This label is used only to access EOF actions. */ + + + switch ( yy_act ) + { /* beginning of action switch */ +case 1: +YY_RULE_SETUP +#line 48 "pascal_count.l" +/* Do nothing */ + YY_BREAK +case 2: +YY_RULE_SETUP +#line 49 "pascal_count.l" +{BEGIN(comment);} + YY_BREAK +case 3: +YY_RULE_SETUP +#line 50 "pascal_count.l" +{if (saw_char) {sloc++; saw_char=0;}; line_number++;} + YY_BREAK +case 4: +YY_RULE_SETUP +#line 51 "pascal_count.l" +{BEGIN(bcomment);} + YY_BREAK +case 5: +YY_RULE_SETUP +#line 52 "pascal_count.l" +{saw_char = 1; BEGIN(string);} + YY_BREAK +case 6: +YY_RULE_SETUP +#line 53 "pascal_count.l" +{saw_char = 1;} + YY_BREAK +case 7: +YY_RULE_SETUP +#line 54 "pascal_count.l" +{saw_char = 1;} + YY_BREAK +case 8: +YY_RULE_SETUP +#line 56 "pascal_count.l" +/* Do nothing */ + YY_BREAK +case 9: +YY_RULE_SETUP +#line 57 "pascal_count.l" +{if (saw_char) {sloc++; saw_char=0;}; line_number++;} + YY_BREAK +case 10: +YY_RULE_SETUP +#line 58 "pascal_count.l" +/* Do nothing */ + YY_BREAK +case 11: +YY_RULE_SETUP +#line 59 "pascal_count.l" +{if (saw_char) {sloc++; saw_char=0;}; line_number++;} + YY_BREAK +case 12: +YY_RULE_SETUP +#line 60 "pascal_count.l" +{BEGIN(INITIAL);} + YY_BREAK +case 13: +YY_RULE_SETUP +#line 62 "pascal_count.l" +/* Do nothing */ + YY_BREAK +case 14: +YY_RULE_SETUP +#line 63 "pascal_count.l" +{if (saw_char) {sloc++; saw_char=0;}; line_number++;} + YY_BREAK +case 15: +YY_RULE_SETUP +#line 64 "pascal_count.l" +{BEGIN(INITIAL);} + YY_BREAK +case 16: +YY_RULE_SETUP +#line 66 "pascal_count.l" +{saw_char = 1;} + YY_BREAK +case 17: +YY_RULE_SETUP +#line 67 "pascal_count.l" +{ + fprintf(stderr, "Warning: newline in string - file %s, line %ld\n", + filename, line_number); + if (saw_char) {sloc++; saw_char=0;}; + BEGIN(INITIAL); /* Switch back; this at least limits damage */ + line_number++; + } + YY_BREAK +case 18: +YY_RULE_SETUP +#line 74 "pascal_count.l" +{saw_char = 1;} + YY_BREAK +case 19: +YY_RULE_SETUP +#line 75 "pascal_count.l" +{saw_char = 1; BEGIN(INITIAL);} + YY_BREAK +case 20: +YY_RULE_SETUP +#line 77 "pascal_count.l" +ECHO; + YY_BREAK + case YY_STATE_EOF(INITIAL): + case YY_STATE_EOF(comment): + case YY_STATE_EOF(bcomment): + case YY_STATE_EOF(string): + yyterminate(); + + case YY_END_OF_BUFFER: + { + /* Amount of text matched not including the EOB char. */ + int yy_amount_of_matched_text = (int) (yy_cp - yytext_ptr) - 1; + + /* Undo the effects of YY_DO_BEFORE_ACTION. */ + *yy_cp = yy_hold_char; + YY_RESTORE_YY_MORE_OFFSET + + if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_NEW ) + { + /* We're scanning a new file or input source. It's + * possible that this happened because the user + * just pointed yyin at a new source and called + * yylex(). If so, then we have to assure + * consistency between yy_current_buffer and our + * globals. Here is the right place to do so, because + * this is the first action (other than possibly a + * back-up) that will match for the new input source. + */ + yy_n_chars = yy_current_buffer->yy_n_chars; + yy_current_buffer->yy_input_file = yyin; + yy_current_buffer->yy_buffer_status = YY_BUFFER_NORMAL; + } + + /* Note that here we test for yy_c_buf_p "<=" to the position + * of the first EOB in the buffer, since yy_c_buf_p will + * already have been incremented past the NUL character + * (since all states make transitions on EOB to the + * end-of-buffer state). Contrast this with the test + * in input(). + */ + if ( yy_c_buf_p <= &yy_current_buffer->yy_ch_buf[yy_n_chars] ) + { /* This was really a NUL. */ + yy_state_type yy_next_state; + + yy_c_buf_p = yytext_ptr + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state(); + + /* Okay, we're now positioned to make the NUL + * transition. We couldn't have + * yy_get_previous_state() go ahead and do it + * for us because it doesn't know how to deal + * with the possibility of jamming (and we don't + * want to build jamming into it because then it + * will run more slowly). + */ + + yy_next_state = yy_try_NUL_trans( yy_current_state ); + + yy_bp = yytext_ptr + YY_MORE_ADJ; + + if ( yy_next_state ) + { + /* Consume the NUL. */ + yy_cp = ++yy_c_buf_p; + yy_current_state = yy_next_state; + goto yy_match; + } + + else + { + yy_cp = yy_c_buf_p; + goto yy_find_action; + } + } + + else switch ( yy_get_next_buffer() ) + { + case EOB_ACT_END_OF_FILE: + { + yy_did_buffer_switch_on_eof = 0; + + if ( yywrap() ) + { + /* Note: because we've taken care in + * yy_get_next_buffer() to have set up + * yytext, we can now set up + * yy_c_buf_p so that if some total + * hoser (like flex itself) wants to + * call the scanner after we return the + * YY_NULL, it'll still work - another + * YY_NULL will get returned. + */ + yy_c_buf_p = yytext_ptr + YY_MORE_ADJ; + + yy_act = YY_STATE_EOF(YY_START); + goto do_action; + } + + else + { + if ( ! yy_did_buffer_switch_on_eof ) + YY_NEW_FILE; + } + break; + } + + case EOB_ACT_CONTINUE_SCAN: + yy_c_buf_p = + yytext_ptr + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state(); + + yy_cp = yy_c_buf_p; + yy_bp = yytext_ptr + YY_MORE_ADJ; + goto yy_match; + + case EOB_ACT_LAST_MATCH: + yy_c_buf_p = + &yy_current_buffer->yy_ch_buf[yy_n_chars]; + + yy_current_state = yy_get_previous_state(); + + yy_cp = yy_c_buf_p; + yy_bp = yytext_ptr + YY_MORE_ADJ; + goto yy_find_action; + } + break; + } + + default: + YY_FATAL_ERROR( + "fatal flex scanner internal error--no action found" ); + } /* end of action switch */ + } /* end of scanning one token */ + } /* end of yylex */ + + +/* yy_get_next_buffer - try to read in a new buffer + * + * Returns a code representing an action: + * EOB_ACT_LAST_MATCH - + * EOB_ACT_CONTINUE_SCAN - continue scanning from current position + * EOB_ACT_END_OF_FILE - end of file + */ + +static int yy_get_next_buffer() + { + register char *dest = yy_current_buffer->yy_ch_buf; + register char *source = yytext_ptr; + register int number_to_move, i; + int ret_val; + + if ( yy_c_buf_p > &yy_current_buffer->yy_ch_buf[yy_n_chars + 1] ) + YY_FATAL_ERROR( + "fatal flex scanner internal error--end of buffer missed" ); + + if ( yy_current_buffer->yy_fill_buffer == 0 ) + { /* Don't try to fill the buffer, so this is an EOF. */ + if ( yy_c_buf_p - yytext_ptr - YY_MORE_ADJ == 1 ) + { + /* We matched a single character, the EOB, so + * treat this as a final EOF. + */ + return EOB_ACT_END_OF_FILE; + } + + else + { + /* We matched some text prior to the EOB, first + * process it. + */ + return EOB_ACT_LAST_MATCH; + } + } + + /* Try to read more data. */ + + /* First move last chars to start of buffer. */ + number_to_move = (int) (yy_c_buf_p - yytext_ptr) - 1; + + for ( i = 0; i < number_to_move; ++i ) + *(dest++) = *(source++); + + if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_EOF_PENDING ) + /* don't do the read, it's not guaranteed to return an EOF, + * just force an EOF + */ + yy_current_buffer->yy_n_chars = yy_n_chars = 0; + + else + { + int num_to_read = + yy_current_buffer->yy_buf_size - number_to_move - 1; + + while ( num_to_read <= 0 ) + { /* Not enough room in the buffer - grow it. */ +#ifdef YY_USES_REJECT + YY_FATAL_ERROR( +"input buffer overflow, can't enlarge buffer because scanner uses REJECT" ); +#else + + /* just a shorter name for the current buffer */ + YY_BUFFER_STATE b = yy_current_buffer; + + int yy_c_buf_p_offset = + (int) (yy_c_buf_p - b->yy_ch_buf); + + if ( b->yy_is_our_buffer ) + { + int new_size = b->yy_buf_size * 2; + + if ( new_size <= 0 ) + b->yy_buf_size += b->yy_buf_size / 8; + else + b->yy_buf_size *= 2; + + b->yy_ch_buf = (char *) + /* Include room in for 2 EOB chars. */ + yy_flex_realloc( (void *) b->yy_ch_buf, + b->yy_buf_size + 2 ); + } + else + /* Can't grow it, we don't own it. */ + b->yy_ch_buf = 0; + + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( + "fatal error - scanner input buffer overflow" ); + + yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset]; + + num_to_read = yy_current_buffer->yy_buf_size - + number_to_move - 1; +#endif + } + + if ( num_to_read > YY_READ_BUF_SIZE ) + num_to_read = YY_READ_BUF_SIZE; + + /* Read in more data. */ + YY_INPUT( (&yy_current_buffer->yy_ch_buf[number_to_move]), + yy_n_chars, num_to_read ); + + yy_current_buffer->yy_n_chars = yy_n_chars; + } + + if ( yy_n_chars == 0 ) + { + if ( number_to_move == YY_MORE_ADJ ) + { + ret_val = EOB_ACT_END_OF_FILE; + yyrestart( yyin ); + } + + else + { + ret_val = EOB_ACT_LAST_MATCH; + yy_current_buffer->yy_buffer_status = + YY_BUFFER_EOF_PENDING; + } + } + + else + ret_val = EOB_ACT_CONTINUE_SCAN; + + yy_n_chars += number_to_move; + yy_current_buffer->yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR; + yy_current_buffer->yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR; + + yytext_ptr = &yy_current_buffer->yy_ch_buf[0]; + + return ret_val; + } + + +/* yy_get_previous_state - get the state just before the EOB char was reached */ + +static yy_state_type yy_get_previous_state() + { + register yy_state_type yy_current_state; + register char *yy_cp; + + yy_current_state = yy_start; + + for ( yy_cp = yytext_ptr + YY_MORE_ADJ; yy_cp < yy_c_buf_p; ++yy_cp ) + { + yy_current_state = yy_nxt[yy_current_state][(*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1)]; + } + + return yy_current_state; + } + + +/* yy_try_NUL_trans - try to make a transition on the NUL character + * + * synopsis + * next_state = yy_try_NUL_trans( current_state ); + */ + +#ifdef YY_USE_PROTOS +static yy_state_type yy_try_NUL_trans( yy_state_type yy_current_state ) +#else +static yy_state_type yy_try_NUL_trans( yy_current_state ) +yy_state_type yy_current_state; +#endif + { + register int yy_is_jam; + + yy_current_state = yy_nxt[yy_current_state][1]; + yy_is_jam = (yy_current_state <= 0); + + return yy_is_jam ? 0 : yy_current_state; + } + + +#ifndef YY_NO_UNPUT +#ifdef YY_USE_PROTOS +static void yyunput( int c, register char *yy_bp ) +#else +static void yyunput( c, yy_bp ) +int c; +register char *yy_bp; +#endif + { + register char *yy_cp = yy_c_buf_p; + + /* undo effects of setting up yytext */ + *yy_cp = yy_hold_char; + + if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 ) + { /* need to shift things up to make room */ + /* +2 for EOB chars. */ + register int number_to_move = yy_n_chars + 2; + register char *dest = &yy_current_buffer->yy_ch_buf[ + yy_current_buffer->yy_buf_size + 2]; + register char *source = + &yy_current_buffer->yy_ch_buf[number_to_move]; + + while ( source > yy_current_buffer->yy_ch_buf ) + *--dest = *--source; + + yy_cp += (int) (dest - source); + yy_bp += (int) (dest - source); + yy_current_buffer->yy_n_chars = + yy_n_chars = yy_current_buffer->yy_buf_size; + + if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 ) + YY_FATAL_ERROR( "flex scanner push-back overflow" ); + } + + *--yy_cp = (char) c; + + + yytext_ptr = yy_bp; + yy_hold_char = *yy_cp; + yy_c_buf_p = yy_cp; + } +#endif /* ifndef YY_NO_UNPUT */ + + +#ifndef YY_NO_INPUT +#ifdef __cplusplus +static int yyinput() +#else +static int input() +#endif + { + int c; + + *yy_c_buf_p = yy_hold_char; + + if ( *yy_c_buf_p == YY_END_OF_BUFFER_CHAR ) + { + /* yy_c_buf_p now points to the character we want to return. + * If this occurs *before* the EOB characters, then it's a + * valid NUL; if not, then we've hit the end of the buffer. + */ + if ( yy_c_buf_p < &yy_current_buffer->yy_ch_buf[yy_n_chars] ) + /* This was really a NUL. */ + *yy_c_buf_p = '\0'; + + else + { /* need more input */ + int offset = yy_c_buf_p - yytext_ptr; + ++yy_c_buf_p; + + switch ( yy_get_next_buffer() ) + { + case EOB_ACT_LAST_MATCH: + /* This happens because yy_g_n_b() + * sees that we've accumulated a + * token and flags that we need to + * try matching the token before + * proceeding. But for input(), + * there's no matching to consider. + * So convert the EOB_ACT_LAST_MATCH + * to EOB_ACT_END_OF_FILE. + */ + + /* Reset buffer status. */ + yyrestart( yyin ); + + /* fall through */ + + case EOB_ACT_END_OF_FILE: + { + if ( yywrap() ) + return EOF; + + if ( ! yy_did_buffer_switch_on_eof ) + YY_NEW_FILE; +#ifdef __cplusplus + return yyinput(); +#else + return input(); +#endif + } + + case EOB_ACT_CONTINUE_SCAN: + yy_c_buf_p = yytext_ptr + offset; + break; + } + } + } + + c = *(unsigned char *) yy_c_buf_p; /* cast for 8-bit char's */ + *yy_c_buf_p = '\0'; /* preserve yytext */ + yy_hold_char = *++yy_c_buf_p; + + + return c; + } +#endif /* YY_NO_INPUT */ + +#ifdef YY_USE_PROTOS +void yyrestart( FILE *input_file ) +#else +void yyrestart( input_file ) +FILE *input_file; +#endif + { + if ( ! yy_current_buffer ) + yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); + + yy_init_buffer( yy_current_buffer, input_file ); + yy_load_buffer_state(); + } + + +#ifdef YY_USE_PROTOS +void yy_switch_to_buffer( YY_BUFFER_STATE new_buffer ) +#else +void yy_switch_to_buffer( new_buffer ) +YY_BUFFER_STATE new_buffer; +#endif + { + if ( yy_current_buffer == new_buffer ) + return; + + if ( yy_current_buffer ) + { + /* Flush out information for old buffer. */ + *yy_c_buf_p = yy_hold_char; + yy_current_buffer->yy_buf_pos = yy_c_buf_p; + yy_current_buffer->yy_n_chars = yy_n_chars; + } + + yy_current_buffer = new_buffer; + yy_load_buffer_state(); + + /* We don't actually know whether we did this switch during + * EOF (yywrap()) processing, but the only time this flag + * is looked at is after yywrap() is called, so it's safe + * to go ahead and always set it. + */ + yy_did_buffer_switch_on_eof = 1; + } + + +#ifdef YY_USE_PROTOS +void yy_load_buffer_state( void ) +#else +void yy_load_buffer_state() +#endif + { + yy_n_chars = yy_current_buffer->yy_n_chars; + yytext_ptr = yy_c_buf_p = yy_current_buffer->yy_buf_pos; + yyin = yy_current_buffer->yy_input_file; + yy_hold_char = *yy_c_buf_p; + } + + +#ifdef YY_USE_PROTOS +YY_BUFFER_STATE yy_create_buffer( FILE *file, int size ) +#else +YY_BUFFER_STATE yy_create_buffer( file, size ) +FILE *file; +int size; +#endif + { + YY_BUFFER_STATE b; + + b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + b->yy_buf_size = size; + + /* yy_ch_buf has to be 2 characters longer than the size given because + * we need to put in 2 end-of-buffer characters. + */ + b->yy_ch_buf = (char *) yy_flex_alloc( b->yy_buf_size + 2 ); + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + b->yy_is_our_buffer = 1; + + yy_init_buffer( b, file ); + + return b; + } + + +#ifdef YY_USE_PROTOS +void yy_delete_buffer( YY_BUFFER_STATE b ) +#else +void yy_delete_buffer( b ) +YY_BUFFER_STATE b; +#endif + { + if ( ! b ) + return; + + if ( b == yy_current_buffer ) + yy_current_buffer = (YY_BUFFER_STATE) 0; + + if ( b->yy_is_our_buffer ) + yy_flex_free( (void *) b->yy_ch_buf ); + + yy_flex_free( (void *) b ); + } + + + +#ifdef YY_USE_PROTOS +void yy_init_buffer( YY_BUFFER_STATE b, FILE *file ) +#else +void yy_init_buffer( b, file ) +YY_BUFFER_STATE b; +FILE *file; +#endif + + + { + yy_flush_buffer( b ); + + b->yy_input_file = file; + b->yy_fill_buffer = 1; + +#if YY_ALWAYS_INTERACTIVE + b->yy_is_interactive = 1; +#else +#if YY_NEVER_INTERACTIVE + b->yy_is_interactive = 0; +#else + b->yy_is_interactive = file ? (isatty( fileno(file) ) > 0) : 0; +#endif +#endif + } + + +#ifdef YY_USE_PROTOS +void yy_flush_buffer( YY_BUFFER_STATE b ) +#else +void yy_flush_buffer( b ) +YY_BUFFER_STATE b; +#endif + + { + if ( ! b ) + return; + + b->yy_n_chars = 0; + + /* We always need two end-of-buffer characters. The first causes + * a transition to the end-of-buffer state. The second causes + * a jam in that state. + */ + b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR; + b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR; + + b->yy_buf_pos = &b->yy_ch_buf[0]; + + b->yy_at_bol = 1; + b->yy_buffer_status = YY_BUFFER_NEW; + + if ( b == yy_current_buffer ) + yy_load_buffer_state(); + } + + +#ifndef YY_NO_SCAN_BUFFER +#ifdef YY_USE_PROTOS +YY_BUFFER_STATE yy_scan_buffer( char *base, yy_size_t size ) +#else +YY_BUFFER_STATE yy_scan_buffer( base, size ) +char *base; +yy_size_t size; +#endif + { + YY_BUFFER_STATE b; + + if ( size < 2 || + base[size-2] != YY_END_OF_BUFFER_CHAR || + base[size-1] != YY_END_OF_BUFFER_CHAR ) + /* They forgot to leave room for the EOB's. */ + return 0; + + b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" ); + + b->yy_buf_size = size - 2; /* "- 2" to take care of EOB's */ + b->yy_buf_pos = b->yy_ch_buf = base; + b->yy_is_our_buffer = 0; + b->yy_input_file = 0; + b->yy_n_chars = b->yy_buf_size; + b->yy_is_interactive = 0; + b->yy_at_bol = 1; + b->yy_fill_buffer = 0; + b->yy_buffer_status = YY_BUFFER_NEW; + + yy_switch_to_buffer( b ); + + return b; + } +#endif + + +#ifndef YY_NO_SCAN_STRING +#ifdef YY_USE_PROTOS +YY_BUFFER_STATE yy_scan_string( yyconst char *yy_str ) +#else +YY_BUFFER_STATE yy_scan_string( yy_str ) +yyconst char *yy_str; +#endif + { + int len; + for ( len = 0; yy_str[len]; ++len ) + ; + + return yy_scan_bytes( yy_str, len ); + } +#endif + + +#ifndef YY_NO_SCAN_BYTES +#ifdef YY_USE_PROTOS +YY_BUFFER_STATE yy_scan_bytes( yyconst char *bytes, int len ) +#else +YY_BUFFER_STATE yy_scan_bytes( bytes, len ) +yyconst char *bytes; +int len; +#endif + { + YY_BUFFER_STATE b; + char *buf; + yy_size_t n; + int i; + + /* Get memory for full buffer, including space for trailing EOB's. */ + n = len + 2; + buf = (char *) yy_flex_alloc( n ); + if ( ! buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" ); + + for ( i = 0; i < len; ++i ) + buf[i] = bytes[i]; + + buf[len] = buf[len+1] = YY_END_OF_BUFFER_CHAR; + + b = yy_scan_buffer( buf, n ); + if ( ! b ) + YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" ); + + /* It's okay to grow etc. this buffer, and we should throw it + * away when we're done. + */ + b->yy_is_our_buffer = 1; + + return b; + } +#endif + + +#ifndef YY_NO_PUSH_STATE +#ifdef YY_USE_PROTOS +static void yy_push_state( int new_state ) +#else +static void yy_push_state( new_state ) +int new_state; +#endif + { + if ( yy_start_stack_ptr >= yy_start_stack_depth ) + { + yy_size_t new_size; + + yy_start_stack_depth += YY_START_STACK_INCR; + new_size = yy_start_stack_depth * sizeof( int ); + + if ( ! yy_start_stack ) + yy_start_stack = (int *) yy_flex_alloc( new_size ); + + else + yy_start_stack = (int *) yy_flex_realloc( + (void *) yy_start_stack, new_size ); + + if ( ! yy_start_stack ) + YY_FATAL_ERROR( + "out of memory expanding start-condition stack" ); + } + + yy_start_stack[yy_start_stack_ptr++] = YY_START; + + BEGIN(new_state); + } +#endif + + +#ifndef YY_NO_POP_STATE +static void yy_pop_state() + { + if ( --yy_start_stack_ptr < 0 ) + YY_FATAL_ERROR( "start-condition stack underflow" ); + + BEGIN(yy_start_stack[yy_start_stack_ptr]); + } +#endif + + +#ifndef YY_NO_TOP_STATE +static int yy_top_state() + { + return yy_start_stack[yy_start_stack_ptr - 1]; + } +#endif + +#ifndef YY_EXIT_FAILURE +#define YY_EXIT_FAILURE 2 +#endif + +#ifdef YY_USE_PROTOS +static void yy_fatal_error( yyconst char msg[] ) +#else +static void yy_fatal_error( msg ) +char msg[]; +#endif + { + (void) fprintf( stderr, "%s\n", msg ); + exit( YY_EXIT_FAILURE ); + } + + + +/* Redefine yyless() so it works in section 3 code. */ + +#undef yyless +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up yytext. */ \ + yytext[yyleng] = yy_hold_char; \ + yy_c_buf_p = yytext + n; \ + yy_hold_char = *yy_c_buf_p; \ + *yy_c_buf_p = '\0'; \ + yyleng = n; \ + } \ + while ( 0 ) + + +/* Internal utility routines. */ + +#ifndef yytext_ptr +#ifdef YY_USE_PROTOS +static void yy_flex_strncpy( char *s1, yyconst char *s2, int n ) +#else +static void yy_flex_strncpy( s1, s2, n ) +char *s1; +yyconst char *s2; +int n; +#endif + { + register int i; + for ( i = 0; i < n; ++i ) + s1[i] = s2[i]; + } +#endif + +#ifdef YY_NEED_STRLEN +#ifdef YY_USE_PROTOS +static int yy_flex_strlen( yyconst char *s ) +#else +static int yy_flex_strlen( s ) +yyconst char *s; +#endif + { + register int n; + for ( n = 0; s[n]; ++n ) + ; + + return n; + } +#endif + + +#ifdef YY_USE_PROTOS +static void *yy_flex_alloc( yy_size_t size ) +#else +static void *yy_flex_alloc( size ) +yy_size_t size; +#endif + { + return (void *) malloc( size ); + } + +#ifdef YY_USE_PROTOS +static void *yy_flex_realloc( void *ptr, yy_size_t size ) +#else +static void *yy_flex_realloc( ptr, size ) +void *ptr; +yy_size_t size; +#endif + { + /* The cast to (char *) in the following accommodates both + * implementations that use char* generic pointers, and those + * that use void* generic pointers. It works with the latter + * because both ANSI C and C++ allow castless assignment from + * any pointer type to void*, and deal with argument conversions + * as though doing an assignment. + */ + return (void *) realloc( (char *) ptr, size ); + } + +#ifdef YY_USE_PROTOS +static void yy_flex_free( void *ptr ) +#else +static void yy_flex_free( ptr ) +void *ptr; +#endif + { + free( ptr ); + } + +#if YY_MAIN +int main() + { + yylex(); + return 0; + } +#endif +#line 77 "pascal_count.l" + + +#include "driver.c" + + diff --git a/pascal_count.l b/pascal_count.l new file mode 100644 index 0000000..cedfbb4 --- /dev/null +++ b/pascal_count.l @@ -0,0 +1,81 @@ +%{ + +/* +This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC). +Copyright (C) 2001-2004 David A. Wheeler. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +To contact David A. Wheeler, see his website at: + http://www.dwheeler.com. + +*/ + +#include "driver.h" + +#define YY_NO_UNPUT + +/* 1 if we saw a non-comment, non-whitespace char on this line */ +int saw_char = 0; + +%} + +%option noyywrap + +SPACE [ \t\n\r\f] + +%x comment +%x bcomment +%x string + +%% + line_number = 1; + saw_char = 0; + BEGIN(INITIAL); + +[ \t\r\f] /* Do nothing */ +"(*" {BEGIN(comment);} +\n {if (saw_char) {sloc++; saw_char=0;}; line_number++;} +"{" {BEGIN(bcomment);} +"'" {saw_char = 1; BEGIN(string);} +[^ \t\r\f(\n{'][^(\n{']* {saw_char = 1;} +. {saw_char = 1;} + +<comment>[^*\n]+ /* Do nothing */ +<comment>[^*\n]*\n {if (saw_char) {sloc++; saw_char=0;}; line_number++;} +<comment>"*"+[^*)\n]* /* Do nothing */ +<comment>"*"+[^*)\n]*\n {if (saw_char) {sloc++; saw_char=0;}; line_number++;} +<comment>"*"+")" {BEGIN(INITIAL);} + +<bcomment>[^}\n]+ /* Do nothing */ +<bcomment>[^}\n]*\n {if (saw_char) {sloc++; saw_char=0;}; line_number++;} +<bcomment>"}" {BEGIN(INITIAL);} + +<string>[^'\n]+ {saw_char = 1;} +<string>\n { + fprintf(stderr, "Warning: newline in string - file %s, line %ld\n", + filename, line_number); + if (saw_char) {sloc++; saw_char=0;}; + BEGIN(INITIAL); /* Switch back; this at least limits damage */ + line_number++; + } +<string>'' {saw_char = 1;} +<string>' {saw_char = 1; BEGIN(INITIAL);} + +%% + +#include "driver.c" + + diff --git a/perl_count b/perl_count new file mode 100755 index 0000000..472ec33 --- /dev/null +++ b/perl_count @@ -0,0 +1,147 @@ +#!/usr/bin/perl +# perl_count - count physical lines of code in Perl programs. +# Usage: perl_count [-f file] [list_of_files] +# file: file with a list of files to count (if "-", read list from stdin) +# list_of_files: list of files to count +# -f file or list_of_files can be used, or both + +# Physical lines of Perl are MUCH HARDER to count than you'd think. +# Comments begin with "#". +# Also, anything in a "perlpod" is a comment. +# See perlpod(1) for more info; a perlpod starts with +# \s*=command, can have more commands, and ends with \s*=cut. +# Note that = followed by space is NOT a perlpod. +# Although we ignore everything after __END__ in a file, +# we will count everything after __DATA__; there's arguments for counting +# and for not counting __DATA__. + +# What's worse, "here" documents must be COUNTED AS CODE, even if +# they're FORMATTED AS A PERLPOD. Surely no one would do this, right? +# Sigh... it can happen. See perl5.005_03/pod/splitpod. + +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# + +$total_sloc = 0; + +# Do we have "-f" (read list of files from second argument)? +if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) { + # Yes, we have -f + if ($ARGV[1] eq "-") { + # The list of files is in STDIN + while (<STDIN>) { + chomp ($_); + &count_file ($_); + } + } else { + # The list of files is in the file $ARGV[1] + open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $ARGV[1]\n"; + while (<FILEWITHLIST>) { + chomp ($_); + &count_file ($_); + } + close FILEWITHLIST; + } + shift @ARGV; shift @ARGV; +} +# Process all (remaining) arguments as file names +while ($file = shift @ARGV) { + &count_file ($file); +} + +print "Total:\n"; +print "$total_sloc\n"; + +sub count_file { + my ($file) = @_; + my $sloc = 0; + my $isinpod = 0; + my $heredocument = ""; # If nonempty, identifies the ending marker. + + open (FILE, $file); + while (<FILE>) { + s/#.*//; # Delete leading comments. + if ($heredocument and m/^\s*$heredocument/) { + $heredocument = ""; # finished here doc. + } elsif (m/<<\s*["']?([A-Za-z0-9_-]+)["']?[;,]\s*$/) { + # Beginning of a here document. + $heredocument = $1; + } elsif (!$heredocument && m/^\s*=cut/) { # Ending a POD? + if (! $isinpod) { + print stderr "cut without pod start in file $file line $.\n"; + } + s/.*//; # Don't count the cut command. + $isinpod = 0; + } elsif (!$heredocument && m/^\s*=[a-zA-Z]/) { # Starting continuing a POD? + # Perlpods can have multiple contents, so it's okay if $isinpod == 1. + # Note that =(space) isn't a POD; library file perl5db.pl does this! + $isinpod = 1; + } elsif (m/^__END__/) { # Stop processing this file on __END__. + last; + } + if ((! $isinpod) && (m/\S/)) { $sloc++;} + } + # Show per-file & total; reset $isinpod per file. + print "$sloc $file\n"; + $total_sloc += $sloc; + $sloc = 0; + if ($isinpod) { + print stderr "pod without closing cut in file $file\n"; + } + # Reset state: + $isinpod = 0; + $heredocument = ""; + close (FILE); +} + +# The following is POD documentation; it should not be counted: +=head1 Test +=head2 testing +=cut + +__END__ +# The following should not be counted in a line-counting program: +print "Hello!\n"; +print "Hello!\n"; +print "Hello!\n"; +print "Hello!\n"; +print "Hello!\n"; +print "Hello!\n"; +print "Hello!\n"; +print "Hello!\n"; +print "Hello!\n"; +print "Hello!\n"; +print "Hello!\n"; +print "Hello!\n"; +print "Hello!\n"; +print "Hello!\n"; +print "Hello!\n"; +print "Hello!\n"; +print "Hello!\n"; +print "Hello!\n"; +print "Hello!\n"; +print "Hello!\n"; +print "Hello!\n"; +print "Hello!\n"; + + diff --git a/php_count.c b/php_count.c new file mode 100644 index 0000000..ee7ce10 --- /dev/null +++ b/php_count.c @@ -0,0 +1,335 @@ +/* php_count: given a list of C/C++/Java files on the command line, + count the SLOC in each one. SLOC = physical, non-comment lines. + This program knows about C++ and C comments (and how they interact), + and correctly ignores comment markers inside strings. + +This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC). +Copyright (C) 2001-2004 David A. Wheeler. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +To contact David A. Wheeler, see his website at: + http://www.dwheeler.com. + + + Usage: Use in one of the following ways: + php_count # As filter + php_count list_of_files # Counts for each file. + php_count -f fl # Counts the files listed in "fl". + +*/ + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdlib.h> + + +/* If ALLOW_SHORT_TAGS is true, then <? all by itself begins PHP code. */ +#define ALLOW_SHORT_TAGS 1 + +/* If ALLOW_ASP_TAGS is true, then <% begins PHP code. */ +#define ALLOW_ASP_TAGS 1 + + + +/* Modes: PHP starts in "NONE", and <?php etc change mode to "NORMAL". */ +enum mode_t { NONE, NORMAL, INSTRING, INCOMMENT, INSINGLESTRING, HEREDOC }; + +enum comment_t {ANSIC_STYLE, CPP_STYLE, SH_STYLE}; /* Types of comments */ +enum end_t {NORMAL_END, SCRIPT_END, ASP_END}; /* Type of ending to expect. */ + + +/* Globals */ +long total_sloc; + +long line_number; + +/* Handle input */ + +/* Number of characters in one line, maximum. */ +/* The code uses fgets() so that longer lines are truncated & not a + buffer overflow hazard. */ +#define LONGEST_LINE 20000 + +static char current_line[LONGEST_LINE]; +static char *clocation; /* points into current_line */ +static long sloc = 0; +static int sawchar = 0; /* Did you see a character on this line? */ +static int beginning_of_line = 0; +static int is_input_eof; + +void read_input_line(FILE *stream) { + /* Read in a new line - increment sloc if sawchar, & reset sawchar. */ + if (feof(stream)) { + is_input_eof = 1; + return; + } + line_number++; + fgets(current_line, sizeof(current_line)-2, stream); + clocation = &(current_line[0]); + beginning_of_line = 1; + if (current_line[0] == '\0') is_input_eof = 1; + if (sawchar) { + /* printf("DEBUG: INCREMENTING SLOC\n"); */ + sawchar = 0; + sloc++; + } +} + +void init_input(FILE *stream) { + current_line[0] = '\0'; + is_input_eof = 0; + sawchar = 0; + read_input_line(stream); +} + +void consume_char(FILE *stream) { + /* returns TRUE if there are more characters in the input. */ + beginning_of_line = 0; + if (!*clocation) read_input_line(stream); + else clocation++; +} + +int match_consume(const char *m, FILE *stream) { + /* returns TRUE & most forward if matches, and consumes */ + if (!*clocation) read_input_line(stream); + if (strncasecmp(m, clocation, strlen(m)) == 0) { + /* printf("MATCH: %s, %s\n", m, clocation); */ + clocation += strlen(m); + beginning_of_line = 0; + return 1; + } else { + return 0; + } +} + +int current_char(FILE *stream) { + if (!*clocation) read_input_line(stream); + return *clocation; +} + +char *rest_of_line(FILE *stream) { + /* returns rest of the line in a malloc'ed entry (caller must free()), + consuming it. */ + char *result; + + result = strdup(clocation); + read_input_line(stream); + return result; +} + + +void strstrip(char *s) { + /* Strip whitespace off the end of s. */ + char *p; + + /* Remove whitespace from the end by walking backwards. */ + for (p= s + strlen(s) - 1; p >= s && isspace(*p); p--) { + *p = '\0'; + } + return; +} + + +long sloc_count(char *filename, FILE *stream) { + /* Count the sloc in the program in stdin. */ + + enum mode_t mode = NONE; /* State machine state - NORMAL == PHP code */ + enum comment_t comment_type; /* ANSIC_STYLE, CPP_STYLE, SH_STYLE */ + enum end_t expected_end; /* The kind of ending expected, e.g. ?> */ + + char *heredoc_end; + + sloc = 0; + + + /* The following implements a state machine with transitions; the + main state is "mode"; the transitions are triggered by character input. */ + + while (!is_input_eof) { + /* printf("mode=%d, current_char=%c\n", mode, current_char()); */ + if (mode == NONE) { + /* Note: PHP will raise errors if something starts with + <?php and isn't followed by whitespace, e.g., <?phphello + is illegal. We won't look for this case, under the assumption + that someone won't bother to count malformed code. It's just + as well, anyway - it's few would think of doing it! + Note that simple <? followed by arbitrary characters is okay, + and is handled by the <? processing, so <?echo("hello")?> works. */ + if (match_consume("<?php", stream)) { + expected_end = NORMAL_END; + mode = NORMAL; + } else if (ALLOW_SHORT_TAGS && match_consume("<?", stream)) { + expected_end = NORMAL_END; + mode = NORMAL; + /* FIXME: <script...> should be more flexible, allowing for + other attributes etc. I haven't seen this as a real problem. */ + } else if (match_consume("<script language=\"php\">", stream)) { + expected_end = SCRIPT_END; + mode = NORMAL; + } else if (ALLOW_ASP_TAGS && match_consume("<%", stream)) { + expected_end = ASP_END; + mode = NORMAL; + } else consume_char(stream); + } else if (mode == NORMAL) { + if ((expected_end==NORMAL_END) && match_consume("?>", stream)) { + mode = NONE; + } else if ((expected_end==ASP_END) && match_consume("%>", stream)) { + mode = NONE; + } else if ((expected_end==SCRIPT_END) && match_consume("</script>", stream)) { + mode = NONE; + } else if (match_consume("\"", stream)) { + sawchar = 1; + mode = INSTRING; + } else if (match_consume("\'", stream)) { + sawchar = 1; + mode = INSINGLESTRING; + } else if (match_consume("/*", stream)) { + mode = INCOMMENT; + comment_type = ANSIC_STYLE; + } else if (match_consume("//", stream)) { + mode = INCOMMENT; + comment_type = CPP_STYLE; + } else if (match_consume("#", stream)) { + mode = INCOMMENT; + comment_type = SH_STYLE; + } else if (match_consume("<<<", stream)) { + mode = HEREDOC; + while (isspace(current_char(stream)) && !is_input_eof) {consume_char(stream);} + heredoc_end = rest_of_line(stream); + strstrip(heredoc_end); + } else { + if (!isspace(current_char(stream))) sawchar = 1; + consume_char(stream); + } + } else if (mode == INSTRING) { + /* We only count string lines with non-whitespace -- this is to + gracefully handle syntactically invalid programs. + You could argue that multiline strings with whitespace are + still executable and should be counted. */ + if (!isspace(current_char(stream))) sawchar = 1; + if (match_consume("\"", stream)) {mode = NORMAL;} + else if (match_consume("\\\"", stream) || match_consume("\\\\", stream) || + match_consume("\\\'", stream)) {} + else consume_char(stream); + } else if (mode == INSINGLESTRING) { + /* We only count string lines with non-whitespace; see above. */ + if (!isspace(current_char(stream))) sawchar = 1; + if (current_char(stream) == '\'') {} + if (match_consume("'", stream)) {mode = NORMAL; } + else if (match_consume("\\\\", stream) || match_consume("\\\'", stream)) { } + else { consume_char(stream); } + } else if (mode == INCOMMENT) { + if ((comment_type == ANSIC_STYLE) && match_consume("*/", stream)) { + mode = NORMAL; } + /* Note: in PHP, must accept ending markers, even in a comment: */ + else if ((expected_end==NORMAL_END) && match_consume("?>", stream)) + { mode = NONE; } + else if ((expected_end==ASP_END) && match_consume("%>", stream)) { mode = NONE; } + else if ((expected_end==SCRIPT_END) && match_consume("</script>", stream)) + { mode = NONE; } + else if ( ((comment_type == CPP_STYLE) || (comment_type == SH_STYLE)) && + match_consume("\n", stream)) { mode = NORMAL; } + else consume_char(stream); + } else if (mode == HEREDOC) { + if (!isspace(current_char(stream))) sawchar = 1; + if (beginning_of_line && match_consume(heredoc_end, stream)) { + mode=NORMAL; + } else { + consume_char(stream); + } + } else { + fprintf(stderr, "Warning! Unknown mode in PHP file %s, mode=%d\n", + filename, mode); + consume_char(stream); + } + } + if (mode != NONE) { + fprintf(stderr, "Warning! Unclosed PHP file %s, mode=%d\n", filename, mode); + } + + return sloc; +} + + +void count_file(char *filename) { + long sloc; + FILE *stream; + + stream = fopen(filename, "r"); + line_number = 0; + init_input(stream); + sloc = sloc_count(filename, stream); + fclose (stream); + total_sloc += sloc; + printf("%ld %s\n", sloc, filename); +} + +char *read_a_line(FILE *file) { + /* Read a line in, and return a malloc'ed buffer with the line contents. + Any newline at the end is stripped. + If there's nothing left to read, returns NULL. */ + + /* We'll create a monstrously long buffer to make life easy for us: */ + char buffer[10000]; + char *returnval; + char *newlinepos; + + returnval = fgets(buffer, sizeof(buffer), file); + if (returnval) { + newlinepos = buffer + strlen(buffer) - 1; + if (*newlinepos == '\n') {*newlinepos = '\0';}; + return strdup(buffer); + } else { + return NULL; + } +} + + +int main(int argc, char *argv[]) { + long sloc; + int i; + FILE *file_list; + char *s; + + total_sloc = 0; + line_number = 0; + + if (argc <= 1) { + init_input(stdin); + sloc = sloc_count("-", stdin); + printf("%ld %s\n", sloc, "-"); + total_sloc += sloc; + } else if ((argc == 3) && (!strcmp(argv[1], "-f"))) { + if (!strcmp (argv[2], "-")) { + file_list = stdin; + } else { + file_list = fopen(argv[2], "r"); + } + if (file_list) { + while ((s = read_a_line(file_list))) { + count_file(s); + free(s); + } + } + } else { + for (i=1; i < argc; i++) { count_file(argv[i]); } + } + printf("Total:\n"); + printf("%ld\n", total_sloc); + exit(0); +} + diff --git a/print_sum b/print_sum new file mode 100755 index 0000000..f0ef453 --- /dev/null +++ b/print_sum @@ -0,0 +1,40 @@ +#!/usr/bin/perl +# print_sum - read from stdin and print the sum. + +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# + +$total = 0.0; + +while (<>) { + if (m/^\s*([\d.]+)/) { + $maybe_number = $1; + $maybe_number =~ s/\.*$//; # chop trailing ".". + if ($maybe_number =~ m/\d/) { + $total += $maybe_number; + } + } +} + +print "$total\n"; + diff --git a/print_sum_subset b/print_sum_subset new file mode 100755 index 0000000..2db2496 --- /dev/null +++ b/print_sum_subset @@ -0,0 +1,41 @@ +#!/bin/sh + +# print the sum of SLOC for a subset of a package. +# The subset is specified using a regular expression. + +# To use, "cd" into the package data directory (with the "_outfile.dat" files), +# then specify as the first parameter the pattern defining the subset. + +# E.G.: +# cd ../data/linux +# print_sum_subset 'BUILD\/linux\/drivers\/' +# +# will show the SLOC total for the "drivers" directory & subdirs +# of the "linux" data subdirectory. + +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# + +echo -n "$1 " +grep -h "$1" *_outfile.dat | grep '^[1-9][0-9]* [^ ]' | print_sum + diff --git a/python_count b/python_count new file mode 100755 index 0000000..ab8c99e --- /dev/null +++ b/python_count @@ -0,0 +1,120 @@ +#!/usr/bin/perl -w +# python_count - count physical lines of code in Python programs. +# Usage: python_count [-f file] [list_of_files] +# file: file with a list of files to count (if "-", read list from stdin) +# list_of_files: list of files to count +# -f file or list_of_files can be used, or both +# This is a trivial/naive program. + +# Comments begin with "#". +# Python supports multi-line strings using """, which matches another """. +# When not inside a multi-line string, a line whose first non-whitespace +# is """ almost always indicates a programming comment; +# this is also true for lines beginning with '"' +# This means that a string which is part of an expression but which begins +# a new line won't be counted; this problem is rare in practice. +# This code DOES count _data_ inside a triple-quote (that's not a comment). +# Note that this isn't true for single-quote, which is used in case +# statements (etc.) but not in this context. + +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# + +$total_sloc = 0; + +# Do we have "-f" (read list of files from second argument)? +if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) { + # Yes, we have -f + if ($ARGV[1] eq "-") { + # The list of files is in STDIN + while (<STDIN>) { + chomp ($_); + &count_file ($_); + } + } else { + # The list of files is in the file $ARGV[1] + open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $ARGV[1]\n"; + while (<FILEWITHLIST>) { + chomp ($_); + &count_file ($_); + } + close FILEWITHLIST; + } + shift @ARGV; shift @ARGV; +} +# Process all (remaining) arguments as file names +while ($file = shift @ARGV) { + &count_file ($file); +} + +print "Total:\n"; +print "$total_sloc\n"; + +sub count_file { + my ($file) = @_; + my $sloc = 0; + my $isintriple = 0; # A triple-quote is in effect. + my $isincomment = 0; # We are in a multiline (triple-quoted) comment. + + open (FILE, $file); + while (<FILE>) { + if (! $isintriple) { # Normal case: + s/""".*"""//; # Delete triple-quotes that begin & end on the line. + s/^\s*"([^"]|(\\"))+"//; # Delete lonely strings starting on BOL. + s/#.*//; # Delete "#" comments. + if (m/"""/) { # Does a multiline triple-quote begin here? + $isintriple = 1; + if (m/^\s*"""/) {$isincomment = 1;} # It's a comment if at BOL. + } + } else { # we ARE in a triple. + if (m/"""/) { + if ($isincomment) { + s/.*?"""//; # Delete string text if it's a comment (not if data) + } else { + s/.*?"""/x/; # Leave something there to count. + } + # But wait! Another triple might start on this line! + # (see Python-1.5.2/Tools/freeze/makefreeze.py for an example) + if (m/"""/) { + # It did! No change in state! + } else { + $isintriple = 0; + $isincomment = 0; + } + } + } + # TO DEBUG: + # print "cmmnt=${isincomment} trp=${isintriple}: $_\n"; + if ( (!$isincomment) && m/\S/) {$sloc++;}; + } + print "$sloc $file\n"; + $total_sloc += $sloc; + $sloc = 0; + if ($isintriple) { + print STDERR "No closing triple-doublequote-marks in file $file\n"; + } + # Reset rest of state: + $isintriple = 0; + $isincomment = 0; + close (FILE); # Reset $. (line count) each time. +} diff --git a/redo_licenses b/redo_licenses new file mode 100755 index 0000000..8580b38 --- /dev/null +++ b/redo_licenses @@ -0,0 +1,42 @@ +#!/bin/sh + +# redo_licenses -- recompute licenses. + +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# + +SPECS=/usr/src/redhat/SPECS +BUILD=/usr/src/redhat/BUILD + +cd $BUILD +for builddir in * +do + specfile=`cat ${builddir}/ORIGINAL_SPEC_FILE | head -1` + specfile=${SPECS}/$specfile + echo "builddir=${builddir}, specfile=${specfile}" + /root/extract_license "$builddir" "$specfile" > ${builddir}/PROGRAM_LICENSE + license=`cat ${builddir}/PROGRAM_LICENSE | head -1` + echo " $license" + +done + diff --git a/rpm_unpacker b/rpm_unpacker new file mode 100755 index 0000000..1312066 --- /dev/null +++ b/rpm_unpacker @@ -0,0 +1,71 @@ +#!/bin/sh + +# unpacker -- unpacks RPMs into the BUILD directory. +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# + +SPECS=/usr/src/redhat/SPECS +BUILD=/usr/src/redhat/BUILD + +BUILD_LIST=/root/build_list +OLD_BUILD_LIST=${BUILD_LIST}.old + +echo "lost+found" > $OLD_BUILD_LIST + +cd $SPECS +for specfile in *.spec +do + cd $SPECS + # The "yes" is to give "yes" to "do you want to run this patch" requests - + # in particular this is needed to unpack samba.2.0.7 in Red Hat 7.1. + if yes | rpm -bp $specfile + then + # Things were fine, do nothing. + echo "UNPACKER: Successfully performed rpm -bp $specfile" + else + echo "UNPACKER WARNING - ERROR in rpm -bp $specfile" + fi + + # Find the new BUILD entries, and create cross-references to the old. + cd $BUILD + ls | sort > $BUILD_LIST + CHANGES=`comm -13 $OLD_BUILD_LIST $BUILD_LIST` + anychange="0" + for newbuild in $CHANGES + do + anychange=1 + echo $specfile > ${newbuild}/ORIGINAL_SPEC_FILE + echo "UNPACKER: added build $newbuild from $specfile" + extract_license "$newbuild" "${SPECS}/$specfile" > ${newbuild}/PROGRAM_LICENSE + # For disk space, erase all HTML files. + # If disk space is available, REMOVE THIS LINE: + # find "$newbuild" -type f -name "*.html" -exec rm {} \; + done + if [ $anychange == 0 ] + then + echo "UNPACKER: did not add a build directory for spec file $specfile" + fi + mv $BUILD_LIST $OLD_BUILD_LIST + +done + diff --git a/ruby_count b/ruby_count new file mode 100755 index 0000000..f892692 --- /dev/null +++ b/ruby_count @@ -0,0 +1,27 @@ +#!/bin/sh +# +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# + +generic_count '#' $@ + diff --git a/sed_count b/sed_count new file mode 100755 index 0000000..f892692 --- /dev/null +++ b/sed_count @@ -0,0 +1,27 @@ +#!/bin/sh +# +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# + +generic_count '#' $@ + diff --git a/sh_count b/sh_count new file mode 100755 index 0000000..f892692 --- /dev/null +++ b/sh_count @@ -0,0 +1,27 @@ +#!/bin/sh +# +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# + +generic_count '#' $@ + diff --git a/show_filecount b/show_filecount new file mode 100755 index 0000000..95f9707 --- /dev/null +++ b/show_filecount @@ -0,0 +1,58 @@ +#!/bin/sh +# given a list of data subdirs, show how many files of each type +# are in each subdir. +# +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# + +echo "Dir C C++ Python Asm sh csh Java Lisp Tcl Expect Perl ML Modula3 awk sed make not auto unknown" + +for dir in $@ +do + if [ -d "$dir" ] + then + echo $dir | tr -d '\n '; echo -n " " + wc -l < $dir/ansic_list.dat | tr -d '\n '; echo -n " " + wc -l < $dir/cpp_list.dat | tr -d '\n '; echo -n " " + wc -l < $dir/python_list.dat | tr -d '\n '; echo -n " " + wc -l < $dir/asm_list.dat | tr -d '\n '; echo -n " " + wc -l < $dir/sh_list.dat | tr -d '\n '; echo -n " " + wc -l < $dir/csh_list.dat | tr -d '\n '; echo -n " " + wc -l < $dir/java_list.dat | tr -d '\n '; echo -n " " + wc -l < $dir/lisp_list.dat | tr -d '\n '; echo -n " " + wc -l < $dir/tcl_list.dat | tr -d '\n '; echo -n " " + wc -l < $dir/exp_list.dat | tr -d '\n '; echo -n " " + wc -l < $dir/perl_list.dat | tr -d '\n '; echo -n " " + wc -l < $dir/ml_list.dat | tr -d '\n '; echo -n " " + wc -l < $dir/modula3_list.dat | tr -d '\n '; echo -n " " + wc -l < $dir/awk_list.dat | tr -d '\n '; echo -n " " + wc -l < $dir/sed_list.dat | tr -d '\n '; echo -n " " + wc -l < $dir/makefile_list.dat | tr -d '\n '; echo -n " " + wc -l < $dir/not_list.dat | tr -d '\n '; echo -n " " + wc -l < $dir/auto_list.dat | tr -d '\n '; echo -n " " + wc -l < $dir/unknown_list.dat | tr -d '\n '; echo -n " " + echo + fi +done + + diff --git a/simplecount b/simplecount new file mode 100755 index 0000000..4c9b125 --- /dev/null +++ b/simplecount @@ -0,0 +1,84 @@ +#!/usr/bin/perl -w + +# simplecount +# Usage: simple_count commentstart [-f file] [list_of_files] +# commentstart: string that begins a comment (continuing til end-of-line) +# file: file with a list of files to count (if "-", read list from stdin) +# list_of_files: list of files to count +# -f file or list_of_files can be used, or both + +# prints the number of nonblank lines after stripping comments +# (comments begin with comment-char and continue to end-of-line +# This is naive, and can be fooled by comment chars in strings, but +# that's not a significant problem. + +# +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# + +$commentchar = shift; +$total_sloc = 0; + +# Do we have "-f" (read list of files from second argument)? +if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) { + # Yes, we have -f + if ($ARGV[1] eq "-") { + # The list of files is in STDIN + while (<STDIN>) { + chomp ($_); + &count_file ($_); + } + } else { + # The list of files is in the file $ARGV[1] + open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $ARGV[1]\n"; + while (<FILEWITHLIST>) { + chomp ($_); + &count_file ($_); + } + close FILEWITHLIST; + } + shift @ARGV; shift @ARGV; +} +# Process all (remaining) arguments as file names +while ($file = shift @ARGV) { + &count_file ($file); +} + +print "Total:\n"; +print "$total_sloc\n"; + +sub count_file { + my ($file) = @_; + my $sloc = 0; + + open (FILE, $file); + while(<FILE>) { + s/${commentchar}.*//; # Strip away any commments. + if (m/\S/) { + $sloc++ + }; # Nonwhitespace in the line, count it! + } + print "$sloc $file\n"; + $total_sloc += $sloc; + close (FILE); +} diff --git a/sloccount b/sloccount new file mode 100755 index 0000000..9491227 --- /dev/null +++ b/sloccount @@ -0,0 +1,258 @@ +#!/bin/sh + +# This is the front-end program "sloccount", part of the +# SLOCCount tool suite by David A. Wheeler. +# Given a list of directories, compute the SLOC count, +# automatically creating the directory $HOME/.slocdata. + +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. + +# See the SLOCCount documentation if you need +# more details about the license. + +version=2.26 + +if [ "$#" -eq 0 ] +then + echo "Error: You must provide a directory or directories of source code." + exit 1 +fi + +startingdir=`pwd` + + +# "datadir" is some suitable safe place for the data; here's the default: +datadir=${HOME}/.slocdata + +details="n" +cached="n" +append="n" # If "append", then don't delete datadir, just add to it. +oneprogram="--oneprogram" +display_results="n" +duplicate_control="" +autogen="" +filecount="" +wide="" +effort_model="" +personcost="" +overhead="" +follow="" +addlang="" +notopfollow="" +showother="" + +# Perl 5.8.0 handles the "LANG" environment variable oddly; +# if it includes ".UTF-8" (which is does in Red Hat Linux 9 and others) +# then it will bitterly complain about ordinary text. +# So, we'll need to filter ".UTF-8" out of LANG. +if [ x"$LANG" != x ] +then + LANG=`echo "$LANG" | sed -e 's/\.UTF-8//'` + export LANG + # echo "New LANG variable: $LANG" +fi + +while [ "$#" -gt 0 ] +do + case "$1" + in + --version) echo "$version" + exit 0;; + --cached) cached="y" + shift;; + --append) append="y" + shift;; + --follow) follow="--follow" + shift;; + --notopfollow) notopfollow="--notopfollow" + shift;; + --datadir) shift + if [ ! -d "$1" ] + then + echo "Error: $1 is not a directory" + exit 1 + fi + cd "$1" + datadir=`pwd` + cd $startingdir + shift;; + --duplicates) duplicate_control="$1" + shift;; + --crossdups) duplicate_control="$1" + shift;; + --autogen) autogen="$1" + shift;; + --multiproject) oneprogram="" + shift;; + --filecount) filecount="$1" + shift;; + --filecounts) filecount="$1" + shift;; + --wide) wide="$1" + shift;; + --details) details="y" + shift;; + --addlang) addlang="$addlang $1 $2" + shift; shift;; + --addlangall) addlang="--addlangall" + shift;; + --showother) showother="--showother" + shift;; + --effort) effort_model="$1 $2 $3" + shift; shift; shift;; + --schedule) schedule_model="$1 $2 $3" + shift; shift; shift;; + --personcost) personcost="$1 $2" + shift; shift;; + --overhead) overhead="$1 $2" + shift; shift;; + --) break;; + --*) echo "Error: no such option $1" + exit 1;; + *) break;; + esac +done + +# --duplicates) duplicate_control="$1" +# --autogen) autogen="$1" +# $follow + +case "$cached" +in + y) + if [ -n "$duplicate_control" -o -n "$autogen" -o -n "$follow" ] + then + echo "Warning: You cannot control what files are selected in a cache." + echo "The option '--cached' disables --duplicates, --crossdups," + echo "--autogen, and --follow. Remove the --cached option if you" + echo "are changing what you wish to include in your calculations." + echo + fi + if [ -d "$datadir" ] + then + display_results="y" + else + echo "Sorry, data directory $datadir does not exist." + exit 1 + fi;; + n) # Not cached -- need to compute the results. + + if [ "$append" = "n" ]; then + if [ -r "${datadir}/sloc_noerase" ]; then + echo "ERROR! This data directory is marked as do-not-erase." + echo "Remove the file ${datadir}/sloc_noerase to erase it." + exit 1 + fi + if [ "$#" -eq 0 ]; then + echo "ERROR! No directories for initial analysis supplied." + exit 1 + fi + rm -fr "$datadir" + mkdir "$datadir" + fi + + # Now that "datadir" exists, first test to make sure wc -l works. + wctestfile=${datadir}/.wctestfile + echo "" > $wctestfile + echo "line two" >> $wctestfile + echo "line three" >> $wctestfile + echo "line four" >> $wctestfile + testcount=`wc -l < ${wctestfile} | sed -e 's/ //g'` + if [ "$testcount" -ne 4 ] + then + echo "FAILURE! Your wc program's -l option produces wrong results." + echo "Update your wc (probably in a textutils package) to a correct version." + exit 1 + fi + + + # Split things up if we're given only one directory on the argument line + # and that directory has more than one subdirectory: + split_things_up="n" + if [ "$#" -eq 1 ] + then + count=0 + for x in $1/* + do + if [ -d "$x" ] + then + count=`expr $count + 1` + if [ $count -gt 1 ] + then + split_things_up="y" + break + fi + fi + done + fi + # If we're appending, don't split things up. + if [ "$append" = "y" ] + then + split_things_up="n" + fi + + case $split_things_up + in + y) make_filelists $follow $notopfollow --datadir "$datadir" --skip src "$1"/* || + exit 1 + if [ -d "$1"/src ] + then + make_filelists $notopfollow --datadir "$datadir" --prefix "src_" "$1"/src/* || + exit 1 + fi + ;; + *) make_filelists $follow $notopfollow --datadir "$datadir" "$@" || exit 1 + ;; + esac + + cd $datadir + if echo "Categorizing files." && + break_filelist --duplistfile sloc_hashes $duplicate_control $autogen * && + echo "Computing results." && + compute_all * + then + display_results=y + fi + echo + echo + ;; +esac + +# If we're appending, don't display results. +if [ "$append" = "y" ] +then + display_results="n" + echo "To display results, use the --cached option." +fi + + +case $display_results +in + y) + cd $datadir + case $details + in + y) get_sloc_details * ;; + *) get_sloc $addlang $showother $filecount $oneprogram $effort_model $schedule_model $personcost $overhead * ;; + esac;; +esac + diff --git a/sloccount.1 b/sloccount.1 new file mode 100644 index 0000000..8a5820c --- /dev/null +++ b/sloccount.1 @@ -0,0 +1,235 @@ +'\" +.\" (C) Copyright 2001-2004 David A. Wheeler (dwheeler at dwheeler.com) +.\" +.\" This program is free software; you can redistribute it and/or modify +.\" it under the terms of the GNU General Public License as published by +.\" the Free Software Foundation; either version 2 of the License, or +.\" (at your option) any later version. +.\" +.\" This program is distributed in the hope that it will be useful, +.\" but WITHOUT ANY WARRANTY; without even the implied warranty of +.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +.\" GNU General Public License for more details. +.\" +.\" You should have received a copy of the GNU General Public License +.\" along with this program; if not, write to the Free Software +.\" Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +.\" +.\" David A. Wheeler's website is http://www.dwheeler.com +.\" +.\" Created Mon Jan 08 23:00:00 2001, David A. Wheeler (dwheeler at dwheeler.com) +.\" +.TH SLOCCOUNT 1 "31 July 2004" "SLOCCount" "SLOCCount" +.SH NAME +sloccount \- count source lines of code (SLOC) +.SH SYNOPSIS +.B sloccount +.RB [ --version ] +.RB [ --cached ] +.RB [ --append ] +[ \fB--datadir\fR \fIdirectory\fR ] +.RB [ --follow ] +.RB [ --duplicates ] +.RB [ --crossdups ] +.RB [ --autogen ] +.RB [ --multiproject ] +.RB [ --filecount ] +.RB [ --wide ] +.RB [ --details ] +[ \fB--effort\fR \fIF\fR \fIE\fR ] +[ \fB--schedule\fR \fIF\fR \fIE\fR ] +[ \fB--personcost\fR \fIcost\fR ] +[ \fB--overhead\fR \fIoverhead\fR ] +[ \fB--addlang\fR \fIlanguage\fR ] +[ \fB--addlangall\fR ] +.RB [ -- ] +.I directories +.SH DESCRIPTION +.PP +sloccount counts the physical source lines of code (SLOC) +contained in descendants of the specified set of directories. +It automatically determines which files are source code files, +and it automatically determines the computer language used in each file. +By default it summarizes the SLOC results and presents various estimates +(such as effort and cost to develop), +but its output can be controlled by various options. +.PP +If you give sloccount a list of two or more directories, the counts +will be broken up according to that list. +There is one important limitation: +the basenames of the directories given as parameters must be different, +because those names are used to group summary information. +Thus, you can't run "sloccount /usr/src/hello /usr/local/src/hello". +This limitation only applies to parameters of sloccount - subdirectories +descended from the top directories can have the same basename. +.PP +If you give sloccount only a single directory, sloccount tries +to automatically find a reasonable breakdown for purposes of reporting +(so it'll produce a useful report). +In this case, if the directory has at least +two subdirectories, then those subdirectories will be used as the +breakdown. +If the single directory contains files as well as directories +(or if you give sloccount some files as parameters), those files will +be assigned to the directory "top_dir" so you can tell them apart +from other contents. +Finally, if there's a subdirectory named "src", then that subdirectory is +again broken down, with all the further subdirectories prefixed with "src_". +Thus, if directory "X" has a subdirectory "src", which contains subdirectory +"modules", sloccount will report a separate count for "src_modules". +.PP +sloccount normally considers all descendants of these directories, +though unless told otherwise it ignores symbolic links. +.PP +sloccount is the usual front-end of the package of tools named "SLOCCount". +Note that the name of the entire package has capital letters, while +the name of this front-end program does not. +.PP +sloccount will normally report estimates of schedule time, effort, and +cost, and for single projects it also estimates +the average number of active developers. +These are merely estimates, not etched in stone; you can modify the +parameters used to improve the estimates. + + +.SH OPTIONS +.TP 12 +.BI --version +Report the version number of SLOCCount and immediately exit. +This option can't be usefully combined with any other option. + +.TP 12 +.BI --cached +Do not recalculate; instead, use cached results from a previous execution. +Without the --cached or --append option, +sloccount automatically removes the data directory +and recreates it. + +.TP 12 +.BI --append +Do not remove previous calculations from the data directory; +instead, add the analysis to the current contents of the data directory. + +.TP +.BI --datadir " directory" +Store or use cached data in the given data directory; default value +is "~/.slocdata". + +.TP +.BI --follow +Follow symbolic links. + +.TP +.BI --duplicates +Count all duplicates. +Normally, if files have equal content (as determined using +MD5 hash values), only one is counted. + +.TP +.BI --crossdups +Count duplicates if they occur in different portions of the breakdown. +Thus, if the top directory contains many different projects, and you +want the duplicates in different projects to count in each project, +choose this option. + +.TP +.BI --autogen +Count source code files that appear to be automatically generated. +Normally these are excluded. + +.TP +.BI --multiproject +The different directories represent different projects; +otherwise, it's assumed that all of the source code belongs +to a single project. +This doesn't change the total number of files or SLOC values, but +it does affect the effort and schedule estimates. +Given this option, +effort is computed separately for each project (and then summed), +and the schedule is the estimated schedule of the largest project. + +.TP +.BI --filecount +Display counts of files instead of SLOC. + +.TP +.BI --wide +Display in the "wide" (tab-separated) format. + +.TP +.BI --details +Display details, that is, results for every source code file. + +.TP +.BI --effort " F E" +Change the factor and exponent for the effort model. +Effort (in person-months) is computed as F*(SLOC/1000)^E. + +.TP +.BI --schedule " F E" +Change the factor and exponent for the schedule model. +Schedule (in months) is computed as F*(effort)^E. + +.TP +.BI --personcost " cost" +Change the average annual salary to +.IR cost . + +.TP +.BI --overhead " overhead" +Change the overhead value to +.IR overhead . +Estimated cost is computed as effort * personcost * overhead. + +.TP +.BI --addlang " language" +Add a language not considered by default to be a ``language'' to be +reported. +Currently the only legal values for language are "makefile", "sql", +and "html". +These files +are not normally included in the SLOC counts, although their SLOCs are +internally calculated and they are shown in the file counts. +If you want to include more than one such language, do it by +passing --addlang more than once, e.g., --addlang makefile --addlang sql. + +.TP +.BI --addlangall +Add all languages not normally included in final reports. + +.SH "NOTES" +As with many other programs using Unix-like options, +directories whose names begin with a +dash (``-'') can be misinterpreted as options. +If the directories to be analyzed might begin with a dash, use the +double-dash (``-\ -'') to indicate the end of the option list +before listing the directories. + +.SH "BUGS" +Filenames with embedded newlines (in the directories or their +descendants) won't be handled correctly; they will be interpreted +as separate filenames where the newlines are inserted. +An attacker could prevent sloccount from working by +creating filenames of the form /normal/directory ... NEWLINE/dev/zero. +Such filenames are exceedingly rare in source code because they're a pain +to work with using other tools, too. +Future versions of sloccount may internally use NUL-separated filenames +(like GNU find's -print0 command) to fix this. + +There are many more languages not yet handled by SLOCCount. + +SLOCCount only reports physical source lines of code. +It would be +very useful if it could also report logical lines of code, and perhaps +other common metrics such as McCabe's complexity measures +and complexity density (complexity/SLOC for each function or procedure). + + +.SH "SEE ALSO" +See the SLOCCount website at http://www.dwheeler.com/sloccount. +Note that more detailed documentation is available both on the website +and with the SLOCCount package. + +.SH AUTHOR +David A. Wheeler (dwheeler@dwheeler.com). + diff --git a/sloccount.1.gz b/sloccount.1.gz Binary files differnew file mode 100644 index 0000000..33d29e9 --- /dev/null +++ b/sloccount.1.gz diff --git a/sloccount.html b/sloccount.html new file mode 100644 index 0000000..233ae9a --- /dev/null +++ b/sloccount.html @@ -0,0 +1,2464 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> +<html> +<head> +<title>SLOCCount User's Guide</title> +</head> +<body bgcolor="#FFFFFF"> +<center> +<font size="+3"><b><span class="title">SLOCCount User's Guide</span></b></font> +<br> +<font size="+2"><span class="author">by David A. Wheeler (dwheeler, at, dwheeler.com)</span></font> +<br> +<font size="+2"><span class="pubdate">August 1, 2004</span></font> +<br> +<font size="+2"><span class="version">Version 2.26</span></font> +</center> +<p> +<h1><a name="introduction">Introduction</a></h1> +<p> +SLOCCount (pronounced "sloc-count") is a suite of programs for counting +physical source lines of code (SLOC) in potentially large software systems. +Thus, SLOCCount is a "software metrics tool" or "software measurement tool". +SLOCCount was developed by David A. Wheeler, +originally to count SLOC in a GNU/Linux distribution, but it can be +used for counting the SLOC of arbitrary software systems. +<p> +SLOCCount is known to work on Linux systems, and has been tested +on Red Hat Linux versions 6.2, 7, and 7.1. +SLOCCount should run on many other Unix-like systems (if Perl is installed), +in particular, I would expect a *BSD system to work well. +Windows users can run sloccount by first installing +<a href="http://sources.redhat.com/cygwin">Cygwin</a>. +SLOCCount is much slower on Windows/Cygwin, and it's not as easy to install +or use on Windows, but it works. +Of course, feel free to upgrade to an open source Unix-like system +(such as Linux or *BSD) instead :-). +<p> +SLOCCount can count physical SLOC for a wide number of languages. +Listed alphabetically, they are +Ada, Assembly (for many machines and assemblers), +awk (including gawk and nawk), +Bourne shell (and relatives such as bash, ksh, zsh, and pdksh), +C, C++, C# (also called C-sharp or cs), C shell (including tcsh), +COBOL, Expect, Fortran (including Fortran 90), Haskell, +Java, lex (including flex), +LISP (including Scheme), +makefiles (though they aren't usually shown in final reports), +Modula3, Objective-C, Pascal, Perl, PHP, Python, Ruby, sed, +SQL (normally not shown), +TCL, and Yacc. +It can gracefully handle awkward situations in many languages, +for example, it can determine the +syntax used in different assembly language files and adjust appropriately, +it knows about Python's use of string constants as comments, and it +can handle various Perl oddities (e.g., perlpods, here documents, +and Perl's _ _END_ _ marker). +It even has a "generic" SLOC counter that you may be able to use count the +SLOC of other languages (depending on the language's syntax). +<p> +SLOCCount can also take a large list of files and automatically categorize +them using a number of different heuristics. +The heuristics automatically determine if a file +is a source code file or not, and if so, which language it's written in. +For example, +it knows that ".pc" is usually a C source file for an Oracle preprocessor, +but it can detect many circumstances where it's actually a file about +a "PC" (personal computer). +For another example, it knows that ".m" is the standard extension for +Objective-C, but it will check the file contents to +see if really is Objective-C. +It will even examine file headers to attempt to accurately determine +the file's true type. +As a result, you can analyze large systems completely automatically. +<p> +Finally, SLOCCount has some report-generating tools +to collect the data generated, +and then present it in several different formats and sorted different ways. +The report-generating tool can also generate simple tab-separated files +so data can be passed on to other analysis tools (such as spreadsheets +and database systems). +<p> +SLOCCount will try to quickly estimate development time and effort given only +the lines of code it computes, using the original Basic COCOMO model. +This estimate can be improved if you can give more information about the project. +See the +<a href="#cocomo">discussion below about COCOMO, including intermediate COCOMO</a>, +if you want to improve the estimates by giving additional information about +the project. +<p> +SLOCCount is open source software/free software (OSS/FS), +released under the GNU General Public License (GPL), version 2; +see the <a href="#license">license below</a>. +The master web site for SLOCCount is +<a href="http://www.dwheeler.com/sloccount">http://www.dwheeler.com/sloccount</a>. +You can learn a lot about SLOCCount by reading the paper that caused its +creation, available at +<a href="http://www.dwheeler.com/sloc">http://www.dwheeler.com/sloc</a>. +Feel free to see my master web site at +<a href="http://www.dwheeler.com">http://www.dwheeler.com</a>, which has +other material such as the +<a href="http://www.dwheeler.com/secure-programs"><i>Secure Programming +for Linux and Unix HOWTO</i></a>, +my <a href="http://www.dwheeler.com/oss_fs_refs.html">list of +OSS/FS references</a>, and my paper +<a href="http://www.dwheeler.com/oss_fs_why.html"><i>Why OSS/FS? Look at +the Numbers!</i></a> +Please send improvements by email +to dwheeler, at, dwheeler.com (DO NOT SEND SPAM - please remove the +commas, remove the spaces, and change the word "at" into the at symbol). +<p> +The following sections first give a "quick start" +(discussing how to use SLOCCount once it's installed), +discuss basic SLOCCount concepts, +how to install it, how to set your PATH, +how to install source code on RPM-based systems if you wish, and +more information on how to use the "sloccount" front-end. +This is followed by material for advanced users: +how to use SLOCCount tools individually (for when you want more control +than the "sloccount" tool gives you), designer's notes, +the definition of SLOC, and miscellaneous notes. +The last sections states the license used (GPL) and gives +hints on how to submit changes to SLOCCount (if you decide to make changes +to the program). + + +<p> +<h1><a name="quick-start">Quick Start</a></h1> +<p> +Once you've installed SLOCCount (discussed below), +you can measure an arbitrary program by typing everything +after the dollar sign into a terminal session: +<pre> + $ sloccount <i>topmost-source-code-directory</i> +</pre> +<p> +The directory listed and all its descendants will be examined. +You'll see output while it calculates, +culminating with physical SLOC totals and +estimates of development time, schedule, and cost. +If the directory contains a set of directories, each of which is +a different project developed independently, +use the "--multiproject" option so the effort estimations +can correctly take this into account. +<p> +You can redisplay the data different ways by using the "--cached" +option, which skips the calculation stage and re-prints previously +computed information. +You can use other options to control what's displayed: +"--filecount" shows counts of files instead of SLOC, and +"--details" shows the detailed information about every source code file. +So, to display all the details of every file once you've previously +calculated the results, just type: +<pre> + sloccount --cached --details +</pre> +<p> +You'll notice that the default output ends with a request. +If you use this data (e.g., in a report), please +credit that data as being "generated using 'SLOCCount' by David A. Wheeler." +I make no money from this program, so at least please give me some credit. +<p> +SLOCCount tries to ignore all automatically generated files, but its +heuristics to detect this are necessarily imperfect (after all, even humans +sometimes have trouble determining if a file was automatically genenerated). +If possible, try to clean out automatically generated files from +the source directories -- +in many situations "make clean" does this. +<p> +There's more to SLOCCount than this, but first we'll need to +explain some basic concepts, then we'll discuss other options +and advanced uses of SLOCCount. + +<p> +<h1><a name="concepts">Basic Concepts</a></h1> +<p> +SLOCCount counts physical SLOC, also called "non-blank, non-comment lines". +More formally, physical SLOC is defined as follows: +``a physical source line of code (SLOC) is a line ending +in a newline or end-of-file marker, +and which contains at least one non-whitespace non-comment character.'' +Comment delimiters (characters other than newlines starting and ending +a comment) are considered comment characters. +Data lines only including whitespace +(e.g., lines with only tabs and spaces in multiline strings) are not included. +<p> +In SLOCCount, there are 3 different directories: +<ol> +<li>The "source code directory", a directory containing the source code + being measured + (possibly in recursive subdirectories). The directories immediately + contained in the source code directory will normally be counted separately, + so it helps if your system is designed so that this top set of directories + roughly represents the system's major components. + If it doesn't, there are various tricks you can use to group source + code into components, but it's more work. + You don't need write access to the source code directory, but + you do need read access to all files, and read and search (execute) access + to all subdirectories. +<li>The "bin directory", the directory containing the SLOCCount executables. + By default, installing the program creates a subdirectory + named "sloccount-VERSION" which is the bin directory. + The bin directory must be part of your PATH. +<li>The "data directory", which stores the analysis results. + When measuring programs using "sloccount", by default + this is the directory ".slocdata" inside your home directory. + When you use the advanced SLOCCount tools directly, + in many cases this must be your "current" directory. + Inside the data directory are "data directory children" - these are + subdirectories that contain a file named "filelist", and each child + is used to represent a different project or a different + major component of a project. +</ol> +<p> +SLOCCount can handle many different programming languages, and separate +them by type (so you can compare the use of each). +Here is the set of languages, sorted alphabetically; +common filename extensions are in +parentheses, with SLOCCount's ``standard name'' for the language +listed in brackets: +<ol> +<li>Ada (.ada, .ads, .adb, .pad) [ada] +<li>Assembly for many machines and assemblers (.s, .S, .asm) [asm] +<li>awk (.awk) [awk] +<li>Bourne shell and relatives such as bash, ksh, zsh, and pdksh (.sh) [sh] +<li>C (.c, .pc, .ec, .ecp) [ansic] +<li>C++ (.C, .cpp, .cxx, .cc, .pcc) [cpp] +<li>C# (.cs) [cs] +<li>C shell including tcsh (.csh) [csh] +<li>COBOL (.cob, .cbl, .COB, .CBL) [cobol] +<li>Expect (.exp) [exp] +<li>Fortran 77 (.f, .f77, .F, .F77) [fortran] +<li>Fortran 90 (.f90, .F90) [f90] +<li>Haskell (.hs, .lhs) [haskell]; deals with both types of literate files. +<li>Java (.java) [java] +<li>lex (.l) [lex] +<li>LISP including Scheme (.cl, .el, .scm, .lsp, .jl) [lisp] +<li>makefiles (makefile) [makefile] +<li>ML (.ml, .ml3) [ml] +<li>Modula3 (.m3, .mg, .i3, .ig) [modula3] +<li>Objective-C (.m) [objc] +<li>Pascal (.p, .pas) [pascal] +<li>Perl (.pl, .pm, .perl) [perl] +<li>PHP (.php, .php[3456], .inc) [php] +<li>Python (.py) [python] +<li>Ruby (.rb) [ruby] +<li>sed (.sed) [sed] +<li>sql (.sql) [sql] +<li>TCL (.tcl, .tk, .itk) [tcl] +<li>Yacc (.y) [yacc] +</ol> + +<p> +<h1><a name="installing">Installing SLOCCount</a></h1> +<p> +Obviously, before using SLOCCount you'll need to install it. +SLOCCount depends on other programs, in particular perl, bash, +a C compiler (gcc will do), and md5sum +(you can get a useful md5sum program in the ``textutils'' package +on many Unix-like systems), so you'll need to get them installed +if they aren't already. +<p> +If your system uses RPM version 4 or greater to install software +(e.g., Red Hat Linux 7 or later), just download the SLOCCount RPM +and install it using a normal installation command; from the text line +you can use: +<pre> + rpm -Uvh sloccount*.rpm +</pre> +<p> +Everyone else will need to install from a tar file, and Windows users will +have to install Cygwin before installing sloccount. +<p> +If you're using Windows, you'll need to first install +<a href="http://sources.redhat.com/cygwin">Cygwin</a>. +By installing Cygwin, you'll install an environment and a set of +open source Unix-like tools. +Cygwin essentially creates a Unix-like environment in which sloccount can run. +You may be able to run parts of sloccount without Cygwin, in particular, +the perl programs should run in the Windows port of Perl, but you're +on your own - many of the sloccount components expect a Unix-like environment. +If you want to install Cygwin, go to the +<a href="http://sources.redhat.com/cygwin">Cygwin main page</a> +and install it. +If you're using Cygwin, <b>install it to use Unix newlines, not +DOS newlines</b> - DOS newlines will cause odd errors in SLOCCount +(and probably other programs, too). +I have only tested a "full" Cygwin installation, so I suggest installing +everything. +If you're short on disk space, at least install +binutils, bash, fileutils, findutils, +gcc, grep, gzip, make, man, perl, readline, +sed, sh-utils, tar, textutils, unzip, and zlib; +you should probably install vim as well, +and there may be other dependencies as well. +By default Cygwin will create a directory C:\cygwin\home\NAME, +and will set up the ability to run Unix programs +(which will think that the same directory is called /home/NAME). +Now double-click on the Cygwin icon, or select from the Start menu +the selection Programs / Cygnus Solutions / Cygwin Bash shell; +you'll see a terminal screen with a Unix-like interface. +Now follow the instructions (next) for tar file users. +<p> +If you're installing from the tar file, download the file +(into your home directory is fine). +Unpacking the file will create a subdirectory, so if you want the +unpacked subdirectory to go somewhere special, "cd" to where you +want it to go. +Most likely, your home directory is just fine. +Now gunzip and untar SLOCCount (the * replaces the version #) by typing +this at a terminal session: +<pre> + gunzip -c sloccount*.tar.gz | tar xvf - +</pre> +Replace "sloccount*.tar.gz" shown above +with the full path of the downloaded file, wherever that is. +You've now created the "bin directory", which is simply the +"sloccount-VERSION" subdirectory created by the tar command +(where VERSION is the version number). +<p> +Now you need to compile the few compiled programs in the "bin directory" so +SLOCCount will be ready to go. +First, cd into the newly-created bin directory, by typing: +<pre> + cd sloccount* +</pre> +<p> +You may then need to override some installation settings. +You can can do this by editing the supplied makefile, or alternatively, +by providing options to "make" whenever you run make. +The supplied makefile assumes your C compiler is named "gcc", which +is true for most Linux systems, *BSD systems, and Windows systems using Cygwin. +If this isn't true, you'll need to set +the "CC" variable to the correct value (e.g., "cc"). +You can also modify where the files are stored; this variable is +called PREFIX and its default is /usr/local +(older versions of sloccount defaulted to /usr). +<p> +If you're using Windows and Cygwin, you +<b>must</b> override one of the installation +settings, EXE_SUFFIX, for installation to work correctly. +One way to set this value is to edit the "makefile" file so that +the line beginning with "EXE_SUFFIX" reads as follows: +<pre> + EXE_SUFFIX=.exe +</pre> +If you're using Cygwin and you choose to modify the "makefile", you +can use any text editor on the Cygwin side, or you can use a +Windows text editor if it can read and write Unix-formatted text files. +Cygwin users are free to use vim, for example. +If you're installing into your home directory and using the default locations, +Windows text editors will see the makefile as file +C:\cygwin\home\NAME\sloccount-VERSION\makefile. +Note that the Windows "Notepad" application doesn't work well, because it's not +able to handle Unix text files correctly. +Since this can be quite a pain, Cygus users may instead decide to override +make the makefile values instead during installation. +<p> +Finally, compile the few compiled programs in it by typing "make": +<pre> + make +</pre> +If you didn't edit the makefile in the previous step, you +need to provide options to make invocations to set the correct values. +This is done by simply saying (after "make") the name of the variable, +an equal sign, and its correct value. +Thus, to compile the program on a Windows system using Cygus, you can +skip modifying the makefile file by typing this instead of just "make": +<pre> + make EXE_SUFFIX=.exe +</pre> +<p> +If you want, you can install sloccount for system-wide use without +using the RPM version. +Windows users using Cygwin should probably do this, particularly +if they chose a "local" installation. +To do this, first log in as root (Cygwin users don't need to do this +for local installation). +Edit the makefile to match your system's conventions, if necessary, +and then type "make install": +<pre> + make install +</pre> +If you need to set some make options, remember to do that here too. +If you use "make install", you can uninstall it later using +"make uninstall". +Installing sloccount for system-wide use is optional; +SLOCCount works without a system-wide installation. +However, if you don't install sloccount system-wide, you'll need to +set up your PATH variable; see the section on +<a href="#path">setting your path</a>. +<p> +A note for Cygwin users (and some others): some systems, including Cygwin, +don't set up the environment quite right and thus can't display the manual +pages as installed. +The problem is that they forget to search /usr/local/share/man for +manual pages. +If you want to read the installed manual pages, type this +into a Bourne-like shell: +<pre> + MANPATH=/usr/local/share/man:/usr/share/man:/usr/man + export MANPATH +</pre> +Or, if you use a C shell: +<pre> + setenv MANPATH "/usr/local/share/man:/usr/share/man:/usr/man" +</pre> +From then on, you'll be able to view the reference manual pages +by typing "man sloccount" (or by using whatever manual page display system +you prefer). +<p> + +<p> +<h1><a name="installing-source">Installing The Source Code To Measure</a></h1> +<p> +Obviously, you must install the software source code you're counting, +so somehow you must create the "source directory" +with the source code to measure. +You must also make sure that permissions are set so the software can +read these directories and files. +<p> +For example, if you're trying to count the SLOC for an RPM-based Linux system, +install the software source code by doing the following as root +(which will place all source code into the source directory +/usr/src/redhat/BUILD): +<ol> +<li>Install all source rpm's: +<pre> + mount /mnt/cdrom + cd /mnt/cdrom/SRPMS + rpm -ivh *.src.rpm +</pre> +<li>Remove RPM spec files you don't want to count: +<pre> + cd ../SPECS + (look in contents of spec files, removing what you don't want) +</pre> +<li>build/prep all spec files: +<pre> + rpm -bp *.spec +</pre> +<li>Set permissions so the source files can be read by all: +<pre> + chmod -R a+rX /usr/src/redhat/BUILD +</pre> +</ol> +<p> +Here's an example of how to download source code from an +anonymous CVS server. +Let's say you want to examine the source code in GNOME's "gnome-core" +directory, as stored at the CVS server "anoncvs.gnome.org". +Here's how you'd do that: +<ol> +<li>Set up site and login parameters: +<pre> + export CVSROOT=':pserver:anonymous@anoncvs.gnome.org:/cvs/gnome' +</pre> +<li>Log in: +<pre> + cvs login +</pre> +<li>Check out the software (copy it to your local directory), using +mild compression to save on bandwidth: +<pre> + cvs -z3 checkout gnome-core +</pre> +</ol> +<p> +Of course, if you have a non-anonymous account, you'd set CVSROOT +to reflect this. For example, to log in using the "pserver" +protocol as ACCOUNT_NAME, do: +<pre> + export CVSROOT=':pserver:ACCOUNT_NAME@cvs.gnome.org:/cvs/gnome' +</pre> +<p> +You may need root privileges to install the source code and to give +another user permission to read it, but please avoid running the +sloccount program as root. +Although I know of no specific reason this would be a problem, +running any program as root turns off helpful safeguards. +<p> +Although SLOCCount tries to detect (and ignore) many cases where +programs are automatically generated, these heuristics are necessarily +imperfect. +So, please don't run any programs that generate other programs - just +do enough to get the source code prepared for counting. +In general you shouldn't run "make" on the source code, and if you have, +consider running "make clean" or "make really_clean" on the source code first. +It often doesn't make any difference, but identifying those circumstances +is difficult. +<p> +SLOCCount will <b>not</b> automatically uncompress files that are +compressed/archive files (such as .zip, .tar, or .tgz files). +Often such files are just "left over" old versions or files +that you're already counting. +If you want to count the contents of compressed files, uncompress them first. +<p> +SLOCCount also doesn't delve into files using "literate programming" +techniques, in part because there are too many incompatible formats +that implement it. +Thus, run the tools to extract the code from the literate programming files +before running SLOCCount. Currently, the only exception to this rule is +Haskell. + + +<h1><a name="path">Setting your PATH</a></h1> +Before you can run SLOCCount, you'll need to make sure +the SLOCCount "bin directory" is in your PATH. +If you've installed SLOCCount in a system-wide location +such as /usr/bin, then you needn't do more; the RPMs and "make install" +commands essentially do this. +<p> +Otherwise, in Bourne-shell variants, type: +<pre> + PATH="$PATH:<i>the directory with SLOCCount's executable files</i>" + export PATH +</pre> +Csh users should instead type: +<pre> + setenv PATH "$PATH:<i>the directory with SLOCCount's executable files</i>" +</pre> + +<h1><a name="using-basics">Using SLOCCount: The Basics</a></h1> + +Normal use of SLOCCount is very simple. +In a terminal window just type "sloccount", followed by a +list of the source code directories to count. +If you give it only a single directory, SLOCCount tries to be +a little clever and break the source code into +subdirectories for purposes of reporting: +<ol> +<li>if directory has at least +two subdirectories, then those subdirectories will be used as the +breakdown (see the example below). +<li>If the single directory contains files as well as directories +(or if you give sloccount some files as parameters), those files will +be assigned to the directory "top_dir" so you can tell them apart +from other directories. +<li>If there's a subdirectory named "src", then that subdirectory is again +broken down, with all the further subdirectories prefixed with "src_". +So if directory "X" has a subdirectory "src", which contains subdirectory +"modules", the program will report a separate count from "src_modules". +</ol> +In the terminology discussed above, each of these directories would become +"data directory children." +<p> +You can also give "sloccount" a list of directories, in which case the +report will be broken down by these directories +(make sure that the basenames of these directories differ). +SLOCCount normally considers all descendants of these directories, +though unless told otherwise it ignores symbolic links. +<p> +This is all easier to explain by example. +Let's say that we want to measure Apache 1.3.12 as installed using an RPM. +Once it's installed, we just type: +<pre> + sloccount /usr/src/redhat/BUILD/apache_1.3.12 +</pre> +The output we'll see shows status reports while it analyzes things, +and then it prints out: + +<pre> +SLOC Directory SLOC-by-Language (Sorted) +24728 src_modules ansic=24728 +19067 src_main ansic=19067 +8011 src_lib ansic=8011 +5501 src_os ansic=5340,sh=106,cpp=55 +3886 src_support ansic=2046,perl=1712,sh=128 +3823 src_top_dir sh=3812,ansic=11 +3788 src_include ansic=3788 +3469 src_regex ansic=3407,sh=62 +2783 src_ap ansic=2783 +1378 src_helpers sh=1345,perl=23,ansic=10 +1304 top_dir sh=1304 +104 htdocs perl=104 +31 cgi-bin sh=24,perl=7 +0 icons (none) +0 conf (none) +0 logs (none) + + +ansic: 69191 (88.85%) +sh: 6781 (8.71%) +perl: 1846 (2.37%) +cpp: 55 (0.07%) + + +Total Physical Source Lines of Code (SLOC) = 77873 +Estimated Development Effort in Person-Years (Person-Months) = 19.36 (232.36) + (Basic COCOMO model, Person-Months = 2.4 * (KSLOC**1.05)) +Estimated Schedule in Years (Months) = 1.65 (19.82) + (Basic COCOMO model, Months = 2.5 * (person-months**0.38)) +Estimated Average Number of Developers (Effort/Schedule) = 11.72 +Total Estimated Cost to Develop = $ 2615760 + (average salary = $56286/year, overhead = 2.4). + +Please credit this data as "generated using 'SLOCCount' by David A. Wheeler." +</pre> +<p> +Interpreting this should be straightforward. +The Apache directory has several subdirectories, including "htdocs", "cgi-bin", +and "src". +The "src" directory has many subdirectories in it +("modules", "main", and so on). +Code files directly +contained in the main directory /usr/src/redhat/BUILD/apache_1.3.12 +is labelled "top_dir", while +code directly contained in the src subdirectory is labelled "src_top_dir". +Code in the "src/modules" directory is labelled "src_modules" here. +The output shows each major directory broken +out, sorted from largest to smallest. +Thus, the "src/modules" directory had the most code of the directories, +24728 physical SLOC, all of it in C. +The "src/helpers" directory had a mix of shell, perl, and C; note that +when multiple languages are shown, the list of languages in that child +is also sorted from largest to smallest. +<p> +Below the per-component set is a list of all languages used, +with their total SLOC shown, sorted from most to least. +After this is the total physical SLOC (77,873 physical SLOC in this case). +<p> +Next is an estimation of the effort and schedule (calendar time) +it would take to develop this code. +For effort, the units shown are person-years (with person-months +shown in parentheses); for schedule, total years are shown first +(with months in parentheses). +When invoked through "sloccount", the default assumption is that all code is +part of a single program; the "--multiproject" option changes this +to assume that all top-level components are independently developed +programs. +When "--multiproject" is invoked, each project's efforts are estimated +separately (and then summed), and the schedule estimate presented +is the largest estimated schedule of any single component. +<p> +By default the "Basic COCOMO" model is used for estimating +effort and schedule; this model +includes design, code, test, and documentation time (both +user/admin documentation and development documentation). +<a href="#cocomo">See below for more information on COCOMO</a> +as it's used in this program. +<p> +Next are several numbers that attempt to estimate what it would have cost +to develop this program. +This is simply the amount of effort, multiplied by the average annual +salary and by the "overhead multiplier". +The default annual salary is +$56,286 per year; this value was from the +<i>ComputerWorld</i>, September 4, 2000's Salary Survey +of an average U.S. programmer/analyst salary in the year 2000. +You might consider using other numbers +(<i>ComputerWorld</i>'s September 3, 2001 Salary Survey found +an average U.S. programmer/analyst salary making $55,100, senior +systems programmers averaging $68,900, and senior systems analysts averaging +$72,300). + +<p> +Overhead is much harder to estimate; I did not find a definitive source +for information on overheads. +After informal discussions with several cost analysts, +I determined that an overhead of 2.4 +would be representative of the overhead sustained by +a typical software development company. +As discussed in the next section, you can change these numbers too. + +<p> +You may be surprised by the high cost estimates, but remember, +these include design, coding, testing, documentation (both for users +and for programmers), and a wrap rate for corporate overhead +(to cover facilities, equipment, accounting, and so on). +Many programmers forget these other costs and are shocked by the high figures. +If you only wanted to know the costs of the coding, you'd need to get +those figures. + + +<p> +Note that if any top-level directory has a file named PROGRAM_LICENSE, +that file is assumed to contain the name of the license +(e.g., "GPL", "LGPL", "MIT", "BSD", "MPL", and so on). +If there is at least one such file, sloccount will also report statistics +on licenses. + +<p> +Note: sloccount internally uses MD5 hashes to detect duplicate files, +and thus needs some program that can compute MD5 hashes. +Normally it will use "md5sum" (available, for example, as a GNU utility). +If that doesn't work, it will try to use "md5" and "openssl", and you may +see error messages in this format: +<pre> + Can't exec "md5sum": No such file or directory at + /usr/local/bin/break_filelist line 678, <CODE_FILE> line 15. + Can't exec "md5": No such file or directory at + /usr/local/bin/break_filelist line 678, <CODE_FILE> line 15. +</pre> +You can safely ignore these error messages; these simply show that +SLOCCount is probing for a working program to compute MD5 hashes. +For example, Mac OS X users normally don't have md5sum installed, but +do have md5 installed, so they will probably see the first error +message (because md5sum isn't available), followed by a note that a +working MD5 program was found. + + +<h1><a name="options">Options</a></h1> +The program "sloccount" has a large number of options +so you can control what is selected for counting and how the +results are displayed. +<p> +There are several options that control which files are selected +for counting: +<pre> + --duplicates Count all duplicate files as normal files + --crossdups Count duplicate files if they're in different data directory + children. + --autogen Count automatically generated files + --follow Follow symbolic links (normally they're ignored) + --addlang Add languages to be counted that normally aren't shown. + --append Add more files to the data directory +</pre> +Normally, files which have exactly the same content are counted only once +(data directory children are counted alphabetically, so the child +"first" in the alphabet will be considered the owner of the master copy). +If you want them all counted, use "--duplicates". +Sometimes when you use sloccount, each directory represents a different +project, in which case you might want to specify "--crossdups". +The program tries to reject files that are automatically generated +(e.g., a C file generated by bison), but you can disable this as well. +You can use "--addlang" to show makefiles and SQL files, which aren't +usually counted. +<p> +Possibly the most important option is "--cached". +Normally, when sloccount runs, it computes a lot of information and +stores this data in a "data directory" (by default, "~/.slocdata"). +The "--cached" option tells sloccount to use data previously computed, +greatly speeding up use once you've done the computation once. +The "--cached" option can't be used along with the options used to +select what files should be counted. +You can also select a different data directory by using the +"--datadir" option. +<p> +There are many options for controlling the output: +<pre> + --filecount Show counts of files instead of SLOC. + --details Present details: present one line per source code file. + --wide Show "wide" format. Ignored if "--details" selected + --multiproject Assume each directory is for a different project + (this modifies the effort estimation calculations) + --effort F E Change the effort estimation model, so that it uses + F as the factor and E as the exponent. + --schedule F E Change the schedule estimation model, so that it uses + F as the factor and E as the exponent. + --personcost P Change the average annual salary to P. + --overhead O Change the annual overhead to O. + -- End of options +</pre> +<p> +Basically, the first time you use sloccount, if you're measuring +a set of projects (not a single project) you might consider +using "--crossdups" instead of the defaults. +Then, you can redisplay data quickly by using "--cached", +combining it with options such as "--filecount". +If you want to send the data to another tool, use "--details". +<p> +If you're measuring a set of projects, you probably ought to pass +the option "--multiproject". +When "--multiproject" is used, efforts are computed for each component +separately and summed, and the time estimate used is the maximum +single estimated time. +<p> +The "--details" option dumps the available data in 4 columns, +tab-separated, where each line +represents a source code file in the data directory children identified. +The first column is the SLOC, the second column is the language type, +the third column is the name of the data directory child +(as it was given to get_sloc_details), +and the last column is the absolute pathname of the source code file. +You can then pipe this output to "sort" or some other tool for further +analysis (such as a spreadsheet or RDBMS). +<p> +You can change the parameters used to estimate effort using "--effort". +For example, if you believe that in the environment being used +you can produce 2 KSLOC/month scaling linearly, then +that means that the factor for effort you should use is 1/2 = 0.5 month/KSLOC, +and the exponent for effort is 1 (linear). +Thus, you can use "--effort 0.5 1". +<p> +You can also set the annual salary and overheads used to compute +estimated development cost. +While "$" is shown, there's no reason you have to use dollars; +the unit of development cost is the same unit as the unit used for +"--personcost". + +<h1><a name="cocomo">More about COCOMO</a></h1> + +<p> +By default SLOCCount uses a very simple estimating model for effort and schedule: +the basic COCOMO model in the "organic" mode (modes are more fully discussed below). +This model estimates effort and schedule, including design, code, test, +and documentation time (both user/admin documentation and development documentation). +Basic COCOMO is a nice simple model, and it's used as the default because +it doesn't require any information about the code other than the SLOC count +already computed. +<p> +However, basic COCOMO's accuracy is limited for the same reason - +basic COCOMO doesn't take a number of important factors into account. +If you have the necessary information, you can improve the model's accuracy +by taking these factors into account. You can at least quickly determine +if the right "mode" is being used to improve accuracy. You can also +use the "Intermediate COCOMO" and "Detailed COCOMO" models that take more +factors into account, and are likely to produce more accurate estimates as +a result. Take these estimates as just that - estimates - they're not grand truths. +If you have the necessary information, +you can improve the model's accuracy by taking these factors into account, and +pass this additional information to sloccount using its +"--effort" and "--schedule" options (as discussed in +<a href="#options">options</a>). +<p> +To use the COCOMO model, you first need to determine if your application's +mode, which can be "Organic", "embedded", or "semidetached". +Most software is "organic" (which is why it's the default). +Here are simple definitions of these modes: +<ul> +<li>Organic: Relatively small software teams develop software in a highly +familiar, in-house environment. It has a generally stable development +environment, minimal need for innovative algorithms, and requirements can +be relaxed to avoid extensive rework.</li> +<li>Semidetached: This is an intermediate +step between organic and embedded. This is generally characterized by reduced +flexibility in the requirements.</li> +<li>Embedded: The project must operate +within tight (hard-to-meet) constraints, and requirements +and interface specifications are often non-negotiable. +The software will be embedded in a complex environment that the +software must deal with as-is.</li> +</ul> +By default, SLOCCount uses the basic COCOMO model in the organic mode. +For the basic COCOMO model, here are the critical factors for --effort and --schedule:<br> +<ul> +<li>Organic: effort factor = 2.4, exponent = 1.05; schedule factor = 2.5, exponent = 0.38</li> +<li>Semidetached: effort factor = 3.0, exponent = 1.12; schedule factor = 2.5, exponent = 0.35</li> +<li>Embedded: effort factor = 3.6, exponent = 1.20; schedule factor = 2.5, exponent = 0.32</li> +</ul> +Thus, if you want to use SLOCCount but the project is actually semidetached, +you can use the options "--effort 3.0 1.12 --schedule 2.5 0.35" +to get a more accurate estimate. +<br> +For more accurate estimates, you can use the intermediate COCOMO models. +For intermediate COCOMO, use the following figures:<br> +<ul> + <li>Organic: effort base factor = 2.3, exponent = 1.05; schedule factor = 2.5, exponent = 0.38</li> + <li>Semidetached: effort base factor = 3.0, exponent = 1.12; schedule factor = 2.5, exponent = 0.35</li> + <li>Embedded: effort base factor = 2.8, exponent = 1.20; schedule factor = 2.5, exponent = 0.32</li> +</ul> +The intermediate COCOMO values for schedule are exactly the same as the basic +COCOMO model; the starting effort values are not quite the same, as noted +in Boehm's book. However, in the intermediate COCOMO model, you don't +normally use the effort factors as-is, you use various corrective factors +(called cost drivers). To use these corrections, you consider +all the cost drivers, determine what best describes them, +and multiply their corrective values by the effort base factor. +The result is the final effort factor. +Here are the cost drivers (from Boehm's book, table 8-2 and 8-3): + +<table cellpadding="2" cellspacing="2" border="1" width="100%"> + <tbody> + <tr> + <th rowspan="1" colspan="2">Cost Drivers + </th> + <th rowspan="1" colspan="6">Ratings + </th> + </tr> + <tr> + <th>ID + </th> + <th>Driver Name + </th> + <th>Very Low + </th> + <th>Low + </th> + <th>Nominal + </th> + <th>High + </th> + <th>Very High + </th> + <th>Extra High + </th> + </tr> + <tr> + <td>RELY + </td> + <td>Required software reliability + </td> + <td>0.75 (effect is slight inconvenience) + </td> + <td>0.88 (easily recovered losses) + </td> + <td>1.00 (recoverable losses) + </td> + <td>1.15 (high financial loss) + </td> + <td>1.40 (risk to human life) + </td> + <td> + </td> + </tr> + <tr> + <td>DATA + </td> + <td>Database size + </td> + <td> + </td> + <td>0.94 (database bytes/SLOC < 10) + </td> + <td>1.00 (D/S between 10 and 100) + </td> + <td>1.08 (D/S between 100 and 1000) + </td> + <td>1.16 (D/S > 1000) + </td> + <td> + </td> + </tr> + <tr> + <td>CPLX + </td> + <td>Product complexity + </td> + <td>0.70 (mostly straightline code, simple arrays, simple expressions) + </td> + <td>0.85 + </td> + <td>1.00 + </td> + <td>1.15 + </td> + <td>1.30 + </td> + <td>1.65 (microcode, multiple resource scheduling, device timing dependent coding) + </td> + </tr> + <tr> + <td>TIME + </td> + <td>Execution time constraint + </td> + <td> + </td> + <td> + </td> + <td>1.00 (<50% use of available execution time) + </td> + <td>1.11 (70% use) + </td> + <td>1.30 (85% use) + </td> + <td>1.66 (95% use) + </td> + </tr> + <tr> + <td>STOR + </td> + <td>Main storage constraint + </td> + <td> + </td> + <td> + </td> + <td>1.00 (<50% use of available storage)</td> + <td>1.06 (70% use) + </td> + <td>1.21 (85% use) + </td> + <td>1.56 (95% use) + </td> + </tr> + <tr> + <td>VIRT + </td> + <td>Virtual machine (HW and OS) volatility + </td> + <td> + </td> + <td>0.87 (major change every 12 months, minor every month) + </td> + <td>1.00 (major change every 6 months, minor every 2 weeks)</td> + <td>1.15 (major change every 2 months, minor changes every week) + </td> + <td>1.30 (major changes every 2 weeks, minor changes every 2 days) + </td> + <td> + </td> + </tr> + <tr> + <td>TURN + </td> + <td>Computer turnaround time + </td> + <td> + </td> + <td>0.87 (interactive) + </td> + <td>1.00 (average turnaround < 4 hours) + </td> + <td>1.07 + </td> + <td>1.15 + </td> + <td> + </td> + </tr> + <tr> + <td>ACAP + </td> + <td>Analyst capability + </td> + <td>1.46 (15th percentile) + </td> + <td>1.19 (35th percentile) + </td> + <td>1.00 (55th percentile) + </td> + <td>0.86 (75th percentile) + </td> + <td>0.71 (90th percentile) + </td> + <td> + </td> + </tr> + <tr> + <td>AEXP + </td> + <td>Applications experience + </td> + <td>1.29 (<= 4 months experience) + </td> + <td>1.13 (1 year) + </td> + <td>1.00 (3 years) + </td> + <td>0.91 (6 years) + </td> + <td>0.82 (12 years) + </td> + <td> + </td> + </tr> + <tr> + <td>PCAP + </td> + <td>Programmer capability + </td> + <td>1.42 (15th percentile) + </td> + <td>1.17 (35th percentile) + </td> + <td>1.00 (55th percentile) + </td> + <td>0.86 (75th percentile) + </td> + <td>0.70 (90th percentile) + </td> + <td> + </td> + </tr> + <tr> + <td>VEXP + </td> + <td>Virtual machine experience + </td> + <td>1.21 (<= 1 month experience) + </td> + <td>1.10 (4 months) + </td> + <td>1.00 (1 year) + </td> + <td>0.90 (3 years) + </td> + <td> + </td> + <td> + </td> + </tr> + <tr> + <td>LEXP + </td> + <td>Programming language experience + </td> + <td>1.14 (<= 1 month experience) + </td> + <td>1.07 (4 months) + </td> + <td>1.00 (1 year) + </td> + <td>0.95 (3 years) + </td> + <td> + </td> + <td> + </td> + </tr> + <tr> + <td>MODP + </td> + <td>Use of "modern" programming practices (e.g. structured programming) + </td> + <td>1.24 (No use) + </td> + <td>1.10 + </td> + <td>1.00 (some use) + </td> + <td>0.91 + </td> + <td>0.82 (routine use) + </td> + <td> + </td> + </tr> + <tr> + <td>TOOL + </td> + <td>Use of software tools + </td> + <td>1.24 + </td> + <td>1.10 + </td> + <td>1.00 (basic tools) + </td> + <td>0.91 (test tools) + </td> + <td>0.83 (requirements, design, management, documentation tools) + </td> + <td> + </td> + </tr> + <tr> + <td>SCED + </td> + <td>Required development schedule + </td> + <td>1.23 (75% of nominal) + </td> + <td>1.08 (85% of nominal) + </td> + <td>1.00 (nominal) + </td> + <td>1.04 (130% of nominal) + </td> + <td>1.10 (160% of nominal) + </td> + <td> + </td> + </tr> + </tbody> +</table> +<br> +<br> +<br> +So, once all of the factors have been multiplied together, you can +then use the "--effort" flag to set more accurate factors and exponents. +Note that some factors will probably not be "nominal" simply because +times have changed since COCOMO was originally developed, so a few regions +that were desirable have become more common today. +For example, +for many software projects of today, virtual machine volatility tends to +be low, and the +use of "modern" programming practices (structured programming, +object-oriented programming, abstract data types, etc.) tends to be high. +COCOMO automatically handles these differences. +<p> +For example, imagine that you're examining a fairly simple application that +meets the "organic" requirements. Organic projects have a base factor +of 2.3 and exponents of 1.05, as noted above. +We then examine all the factors to determine a corrected base factor. +For this example, imagine +that we determine the values of these cost drivers are as follows:<br> +<br> +<table cellpadding="2" cellspacing="2" border="1" width="100%"> + + <tbody> + <tr> + <td rowspan="1" colspan="2">Cost Drivers<br> + </td> + <td rowspan="1" colspan="2">Ratings<br> + </td> + </tr> + <tr> + <td>ID<br> + </td> + <td>Driver Name<br> + </td> + <td>Rating<br> + </td> + <td>Multiplier<br> + </td> + </tr> + <tr> + <td>RELY<br> + </td> + <td>Required software reliability<br> + </td> + <td>Low - easily recovered losses<br> + </td> + <td>0.88<br> + </td> + </tr> + <tr> + <td>DATA<br> + </td> + <td>Database size<br> + </td> + <td>Low<br> + </td> + <td>0.94<br> + </td> + </tr> + <tr> + <td>CPLX<br> + </td> + <td>Product complexity<br> + </td> + <td>Nominal<br> + </td> + <td>1.00<br> + </td> + </tr> + <tr> + <td>TIME<br> + </td> + <td>Execution time constraint<br> + </td> + <td>Nominal<br> + </td> + <td>1.00<br> + </td> + </tr> + <tr> + <td>STOR<br> + </td> + <td>Main storage constraint<br> + </td> + <td>Nominal<br> + </td> + <td>1.00<br> + </td> + </tr> + <tr> + <td>VIRT<br> + </td> + <td>Virtual machine (HW and OS) volatility<br> + </td> + <td>Low (major change every 12 months, minor every month)<br> + </td> + <td>0.87<br> + </td> + </tr> + <tr> + <td>TURN<br> + </td> + <td>Computer turnaround time<br> + </td> + <td>Nominal<br> + </td> + <td>1.00<br> + </td> + </tr> + <tr> + <td>ACAP<br> + </td> + <td>Analyst capability<br> + </td> + <td>Nominal (55th percentile)<br> + </td> + <td>1.00<br> + </td> + </tr> + <tr> + <td>AEXP<br> + </td> + <td>Applications experience<br> + </td> + <td>Nominal (3 years)<br> + </td> + <td>1.00<br> + </td> + </tr> + <tr> + <td>PCAP<br> + </td> + <td>Programmer capability<br> + </td> + <td>Nominal (55th percentile)<br> + </td> + <td>1.00<br> + </td> + </tr> + <tr> + <td>VEXP<br> + </td> + <td>Virtual machine experience<br> + </td> + <td>High (3 years)<br> + </td> + <td>0.90<br> + </td> + </tr> + <tr> + <td>LEXP<br> + </td> + <td>Programming language experience<br> + </td> + <td>High (3 years)<br> + </td> + <td>0.95<br> + </td> + </tr> + <tr> + <td>MODP<br> + </td> + <td>Use of "modern" programming practices (e.g. structured programming)<br> + </td> + <td>High (Routine use)<br> + </td> + <td>0.82<br> + </td> + </tr> + <tr> + <td>TOOL<br> + </td> + <td>Use of software tools<br> + </td> + <td>Nominal (basic tools)<br> + </td> + <td>1.00<br> + </td> + </tr> + <tr> + <td>SCED<br> + </td> + <td>Required development schedule<br> + </td> + <td>Nominal<br> + </td> + <td>1.00<br> + </td> + </tr> + + + + + </tbody> +</table> +<p> +So, starting with the base factor (2.3 in this case), and then multiplying +the driver values, we'll compute a final factor of: +By multiplying these driver values together in this example, we compute:<br> +<pre>2.3*0.88*0.94*1*1*1*0.87*1.00*1*1*1*0.90*0.95*0.82*1*1</pre> +For this +example, the final factor for the effort calculation is 1.1605. You would then +invoke sloccount with "--effort 1.1605 1.05" to pass in the corrected factor +and exponent for the effort estimation. +You don't need to use "--schedule" to set the factors when you're using +organic model, because in SLOCCount +the default values are the values for the organic model. +You can set scheduling parameters manually +anyway by setting "--schedule 2.5 0.38". +You <i>do</i> need to use the --schedule option for +embedded and semidetached projects, because those modes have different +schedule parameters. The final command would be:<br> +<br> +sloccount --effort 1.1605 1.05 --schedule 2.5 0.38 my_project<br> +<p> +The detailed COCOMO model requires breaking information down further. +<p> +For more information about the original COCOMO model, including the detailed +COCOMO model, see the book +<i>Software Engineering Economics</i> by Barry Boehm. +<p> +You may be surprised by the high cost estimates, but remember, +these include design, coding, testing (including +integration and testing), documentation (both for users +and for programmers), and a wrap rate for corporate overhead +(to cover facilities, equipment, accounting, and so on). +Many programmers forget these other costs and are shocked by the high cost +estimates. +<p> +If you want to know a subset of this cost, you'll need to isolate +just those figures that you're trying to measure. +For example, let's say you want to find the money a programmer would receive +to do just the coding of the units of the program +(ignoring wrap rate, design, testing, integration, and so on). +According to Boehm's book (page 65, table 5-2), +the percentage varies by product size. +For effort, code and unit test takes 42% for small (2 KSLOC), 40% for +intermediate (8 KSLOC), 38% for medium (32 KSLOC), and 36% for large +(128 KSLOC). +Sadly, Boehm doesn't separate coding from unit test; perhaps +50% of the time is spent in unit test in traditional proprietary +development (including fixing bugs found from unit test). +If you want to know the income to the programmer (instead of cost to +the company), you'll also want to remove the wrap rate. +Thus, a programmer's income to <i>only</i> write the code for a +small program (circa 2 KSLOC) would be 8.75% (42% x 50% x (1/2.4)) +of the default figure computed by SLOCCount. +<p> +In other words, less than one-tenth of the cost as computed by SLOCCount +is what actually would be made by a programmer for a small program for +just the coding task. +Note that a proprietary commercial company that bid using +this lower figure would rapidly go out of business, since this figure +ignores the many other costs they have to incur to actually develop +working products. +Programs don't arrive out of thin air; someone needs to determine what +the requirements are, how to design it, and perform at least +some testing of it. +<p> +There's another later estimation model for effort and schedule +called "COCOMO II", but COCOMO II requires logical SLOC instead +of physical SLOC. +SLOCCount doesn't currently measure logical SLOC, so +SLOCCount doesn't currently use COCOMO II. +Contributions of code to compute logical SLOC and then optionally +use COCOMO II will be gratefully accepted. + +<h1><a name="specific-files">Counting Specific Files</a></h1> +<p> +If you want to count a specific subset, you can use the "--details" +option to list individual files, pipe this into "grep" to select the +files you're interested in, and pipe the result to +my tool "print_sum" (which reads lines beginning with numbers, and +returns the total of those numbers). +If you've already done the analysis, an example would be: +<pre> + sloccount --cached --details | grep "/some/subdirectory/" | print_sum +</pre> +<p> +If you just want to count specific files, and you know what language +they're in, you +can just invoke the basic SLOC counters directly. +By convention the simple counters are named "LANGUAGE_count", +and they take on the command line a list of the +source files to count. +Here are some examples: +<pre> + c_count *.c *.cpp *.h # Count C and C++ in current directory. + asm_count *.S # Count assembly. +</pre> +All the counters (*_count) program accept a "-f FILENAME" option, where FILENAME +is a file containing the names of all the source files to count +(one file per text line). If FILENAME is "-", the + list of file names is taken from the standard input. +The "c_count" program handles both C and C++ (but not objective-C; +for that use objc_count). +The available counters are +ada_count, +asm_count, +awk_count, +c_count, +csh_count, +exp_count, +fortran_count, +f90_count, +java_count, +lex_count, +lisp_count, +ml_count, +modula3_count, +objc_count, +pascal_count, +perl_count, +python_count, +sed_count, +sh_count, +sql_count, and +tcl_count. +<p> +There is also "generic_count", which takes as its first parameter +the ``comment string'', followed by a list of files. +The comment string begins a comment that ends at the end of the line. +Sometimes, if you have source for a language not listed, generic_count +will be sufficient. +<p> +The basic SLOC counters will send output to standard out, one line per file +(showing the SLOC count and filename). +The assembly counter shows some additional information about each file. +The basic SLOC counters always complete their output with a line +saying "Total:", followe by a line with the +total SLOC count. + +<h1><a name="errors">Countering Problems and Handling Errors</a></h1> + +If you're analyzing unfamiliar code, there's always the possibility +that it uses languages not processed by SLOCCount. +To counter this, after running SLOCCount, run the following program: +<pre> + count_unknown_ext +</pre> +This will look at the resulting data (in its default data directory +location, ~/.slocdata) and report a sorted list of the file extensions +for uncategorized ("unknown") files. +The list will show every file extension and how many files had that +extension, and is sorted by most common first. +It's not a problem if an "unknown" type isn't a source code file, but +if there are a significant number of source files in this category, +you'll need to change SLOCCount to get an accurate result. + +<p> +One error report that you may see is: +<pre> + c_count ERROR - terminated in string in (filename) +</pre> + +The cause of this is that c_count (the counter for C-like languages) +keeps track of whether or not it's in a string, and when the counter +reached the end of the file, it still thought it was in a string. + +<p> +Note that c_count really does have to keep track of whether or +not it's a string. +For example, this is three lines of code, not two, because the +``comment'' is actually in string data: + +<pre> + a = "hello + /* this is not a comment */ + bye"; +</pre> +<p> +Usually this error means you have code that won't compile +given certain #define settings. E.G., XFree86 has a line of code that's +actually wrong (it has a string that's not terminated), but people +don't notice because the #define to enable it is not usually set. +Legitimate code can trigger this message, but code that triggers +this message is horrendously formatted and is begging for problems. + +<p> +In either case, the best way to handle the situation +is to modify the source code (slightly) so that the code's intent is clear +(by making sure that double-quotes balance). +If it's your own code, you definitely should fix this anyway. +You need to look at the double-quote (") characters. One approach is to +just grep for double-quote, and look at every line for text that isn't +terminated, e.g., printf("hello %s, myname); + +<p> +SLOCcount reports warnings when an unusually +large number of duplicate files are reported. +A large number of duplicates <i>may</i> suggest that you're counting +two different versions of the same program as though they were +independently developed. +You may want to cd into the data directory (usually ~/.slocdata), cd into +the child directories corresponding to each component, and then look +at their dup_list.dat files, which list the filenames that appeared +to be duplicated (and what they duplicate with). + + +<h1><a name="adding">Adding Support for New Languages</a></h1> +SLOCcount handles many languages, but if it doesn't support one you need, +you'll need to give the language a standard (lowercase ASCII) name, +then modify SLOCcount to (1) detect and (2) count code in that language. + +<ol> +<li> +To detect a new language, you'll need to modify the program break_filelist. +If the filename extension is reliable, you can modify the array +%file_extensions, which maps various filename extensions into languages. +If your needs are more complex, you'll need to modify the code +(typically in functions get_file_type or file_type_from_contents) +so that the correct file type is determined. +For example, if a file with a given filename extension is only +<i>sometimes</i> that type, you'll need to write code to examine the +file contents. +<li> +You'll need to create a SLOC counter for that language type. +It must have the name XYZ_count, where XYZ is the standard name for the +language. +<p> +For some languages, you may be able to use the ``generic_count'' program +to implement your counter - generic_count takes as its first argument +the pattern which +identifies comment begins (which continue until the end of the line); +the other arguments are the files to count. +Thus, the LISP counter looks like this: +<pre> + #!/bin/sh + generic_count ';' $@ +</pre> +The generic_count program won't work correctly if there are multiline comments +(e.g., C) or multiline string constants. +If your language is identical to C/C++'s syntax in terms of +string constant definitions and commenting syntax +(using // or /* .. */), then you can use the c_count program - in this case, +modify compute_sloc_lang so that the c_count program is used. +<p> +Otherwise, you'll have to devise your own counting program. +The program must generate files with the same format, e.g., +for every filename passed as an argument, it needs to return separate lines, +where each line presents the SLOC +for that file, a space, and the filename. +(Note: the assembly language counter produces a slightly different format.) +After that, print "Total:" on its own line, and the actual SLOC total +on the following (last) line. +</ol> + +<h1><a name="advanced">Advanced SLOCCount Use</a></h1> +For most people, the previous information is enough. +However, if you're measuring a large set of programs, or have unusual needs, +those steps may not give you enough control. +In that case, you may need to create your own "data directory" +by hand and separately run the SLOCCount tools. +Basically, "sloccount" (note the lower case) is the name for +a high-level tool which invokes many other tools; this entire +suite is named SLOCCount (note the mixed case). +The next section will describe how to invoke the various tools "manually" +so you can gain explicit control over the measuring process when +the defaults are not to your liking, along with various suggestions +for how to handle truly huge sets of data. +<p> +Here's how to manually create a "data directory" to hold +intermediate results, and how to invoke each tool in sequence +(with discussion of options): +<ol> +<li>Set your PATH to include the SLOCCount "bin directory", as discussed above. +<li>Make an empty "data directory" +(where all intermediate results will be stored); +you can pick any name and location you like for this directory. +Here, I'll use the name "data": +<pre> + mkdir ~/data +</pre> +<li>Change your current directory to this "data directory": +<pre> + cd ~/data +</pre> +The rest of these instructions assume that your current directory +is the data directory. +You can set up many different data directories if you wish, to analyze +different source programs or analyze the programs in different ways; +just "cd" to the one you want to work with. +<li>(Optional) Some of the later steps will produce +a lot of output while they're running. +If you want to capture this information into a file, use the standard +"script" command do to so. +For example, "script run1" will save the output of everything you do into +file "run1" (until you type control-D to stop saving the information). +Don't forget that you're creating such a file, or it will become VERY large, +and in particular don't type any passwords into such a session. +You can store the script in the data directory, or create a subdirectory +for such results - any data directory subdirectory that doesn't have the +special file "filelist" is not a "data directory child" and is thus +ignored by the later SLOCCount analysis routines. +<li>Now initialize the "data directory". + In particular, initialization will create the "data directory children", + a set of subdirectories equivalent to the source code directory's + top directories. Each of these data directory children (subdirectories) + will contain a file named "filelist", which + lists all filenames in the corresponding source code directory. + These data directory children + will also eventually contain intermediate results + of analysis, which you can check for validity + (also, having a cache of these values speeds later analysis steps). + <p> + You use the "make_filelists" command to initialize a data directory. + For example, if your source code is in /usr/src/redhat/BUILD, run: +<pre> + make_filelists /usr/src/redhat/BUILD/* +</pre> +<p> + Internally, make_filelists uses "find" to create the list of files, and + by default it ignores all symbolic links. However, you may need to + follow symbolic links; if you do, give make_filelists the + "--follow" option (which will use find's "-follow" option). + Here are make_filelists' options: +<pre> + --follow Follow symbolic links + --datadir D Use this data directory + --skip S Skip basenames named S + --prefix P When creating children, prepend P to their name. + -- No more options +</pre> +<p> + Although you don't normally need to do so, if you want certain files to + not be counted at all in your analysis, you can remove + data directory children or edit the "filelist" files to do so. + There's no need to remove files which aren't source code files normally; + this is handled automatically by the next step. +<p> + If you don't have a single source code directory where the subdirectories + represent the major components you want to count separately, you can + still use the tool but it's more work. + One solution is to create a "shadow" directory with the structure + you wish the program had, using symbolic links (you must use "--follow" + for this to work). + You can also just invoke make_filelists multiple times, with parameters + listing the various top-level directories you wish to include. + Note that the basenames of the directories must be unique. +<p> + If there are so many directories (e.g., a massive number of projects) + that the command line is too long, + you can run make_filelists multiple times in the same + directory with different arguments to create them. + You may find "find" and/or "xargs" helpful in doing this automatically. + For example, here's how to do the same thing using "find": +<pre> + find /usr/src/redhat/BUILD -maxdepth 1 -mindepth 1 -type d \ + -exec make_filelists {} \; +</pre> +<li>Categorize each file. +This means that we must determine which +files contain source code (eliminating auto-generated and duplicate files), +and of those files which language each file contains. +The result will be a set of files in each subdirectory of the data directory, +where each file represents a category (e.g., a language). +<pre> + break_filelist * +</pre> + At this point you might want to examine the data directory subdirectories + to ensure that "break_filelist" has correctly determined the types of + the various files. + In particular, the "unknown" category may have source files in a language + SLOCCount doesn't know about. + If the heuristics got some categorization wrong, you can modify the + break_filelist program and re-run break_filelist. +<p> + By default break_filelist removes duplicates, doesn't count + automatically generated files as normal source code files, and + only gives some feedback. You can change these defaults with the + following options: +<pre> + --duplicates Count all duplicate files as normal files + --crossdups Count duplicate files if they're in different data directory + children (i.e., in different "filelists") + --autogen Count automatically generated files + --verbose Present more verbose status information while processing. +</pre> +<p> + Duplicate control in particular is an issue; you probably don't want + duplicates counted, so that's the default. + Duplicate files are detected by determining if their MD5 checksums + are identical; the "first" duplicate encountered is the only one kept. + Normally, since shells sort directory names, this means that the + file in the alphabetically first child directory is the one counted. + You can change this around by listing directories in the sort order you + wish followed by "*"; if the same data directory child + is requested for analysis more + than once in a given execution, it's skipped after the first time. + So, if you want any duplicate files with child directory "glibc" to + count as part of "glibc", then you should provide the data directory children + list as "glibc *". +<p> + Beware of choosing something other than "*" as the parameter here, + unless you use the "--duplicates" or "--crossdups" options. + The "*" represents the list of data directory children to examine. + Since break_filelist skips duplicate files identified + in a particular run, if you run break_filelist + on only certain children, some duplicate files won't be detected. + If you're allowing duplicates (via "--duplicates" or + "--crossdups"), then this isn't a problem. + Or, you can use the ``--duplistfile'' option to store and retrieve + hashes of files, so that additional files can be handled. +<p> + If there are so many directories that the command line is too long, + you can run break_filelist multiple times and give it + a subset of the directories each time. + You'll need to use one of the duplicate control options to do this. + I would suggest using "--crossdups", which + means that duplicates inside a child will only be counted once, + eliminating at least some of the problems of duplicates. + Here's the equivalent of "break_filelist *" when there are a large + number of subdirectories: +<pre> + find . -maxdepth 1 -mindepth 1 -type d -exec break_filelist --crossdups {} \; +</pre> + Indeed, for all of the later commands where "*" is listed as the parameter + in these instructions + (for the list of data directory children), just run the above "find" + command and replace "break_filelist --crossdups" with the command shown. +<li>(Optional) +If you're not very familiar with the program you're analyzing, you +might not be sure that "break_filelist" has correctly identified +all of the files. +In particular, the system might be using an unexpected +programming language or extension not handled by SLOCCount. +If this is your circumstance, you can just run the command: +<pre> + count_unknown_ext +</pre> +(note that this command is unusual - it doesn't take any arguments, +since it's hard to imagine a case where you wouldn't want every +directory examined). +Unlike the other commands discussed, this one specifically looks at +${HOME}/.slocdata. +This command presents a list of extensions which are unknown to break_filelist, +with the most common ones listed first. +The output format is a name, followed by the number of instances; +the name begins with a "." if it's an extension, or, if there's no +extension, it begins with "/" followed by the base name of the file. +break_filelist already knows about common extensions such as ".gif" and ".png", +as well as common filenames like "README". +You can also view the contents of each of the data directory children's +files to see if break_filelist has correctly categorized the files. +<li>Now compute SLOC and filecounts for each language; you can compute for all + languages at once by calling: +<pre> + compute_all * +</pre> +If you only want to compute SLOC for a specific language, +you can invoke compute_sloc_lang, which takes as its first parameter +the SLOCCount name of the language ("ansic" for C, "cpp" for C++, +"ada" for Ada, "asm" for assembly), followed by the list +of data directory children. +Note that these names are a change from version 1.0, which +called the master program "compute_all", +and had "compute_*" programs for each language. +<p> +Notice the "*"; you can replace the "*" with just the list of +data directory children (subdirectories) to compute, if you wish. +Indeed, you'll notice that nearly all of the following commands take a +list of data directory children as arguments; when you want all of them, use +"*" (as shown in these instructions), otherwise, list the ones you want. +<p> +When you run compute_all or compute_sloc_lang, each data directory +child (subdirectory) +is consulted in turn for a list of the relevant files, and the +SLOC results are placed in that data directory child. +In each child, +the file "LANGUAGE-outfile.dat" lists the information from the +basic SLOC counters. +That is, the oufile lists the SLOC and filename +(the assembly outfile has additional information), and ends with +a line saying "Total:" followed by a line showing the total SLOC of +that language in that data directory child. +The file "all-physical.sloc" has the final total SLOC for every language +in that child directory (i.e., it's the last line of the outfile). +<li>(Optional) If you want, you can also use USC's CodeCount. +I've had trouble with these programs, so I don't do this normally. +However, you're welcome to try - they support logical SLOC measures +as well as physical ones (though not for most of the languages +supported by SLOCCount). +Sadly, they don't seem to compile in gcc without a lot of help, they +used fixed-width buffers that make me nervous, and I found a +number of bugs (e.g., it couldn't handle "/* text1 *//* text2 */" in +C code, a format that's legal and used often in the Linux kernel). +If you want to do this, +modify the files compute_c_usc and compute_java_usc so they point to the +right directories, and type: +<pre> + compute_c_usc * +</pre> +<li>Now you can analyze the results. The main tool for +presenting SLOCCount results is "get_sloc", e.g,: +<pre> + get_sloc * | less +</pre> +The get_sloc program takes many options, including: +<pre> + --filecount Display number of files instead of SLOC (SLOC is the default) + --wide Use "wide" format instead (tab-separated columns) + --nobreak Don't insert breaks in long lines + --sort X Sort by "X", where "X" is the name of a language + ("ansic", "cpp", "fortran", etc.), or "total". + By default, get_sloc sorts by "total". + --nosort Don't sort - just present results in order of directory + listing given. + --showother Show non-language totals (e.g., # duplicate files). + --oneprogram When computing effort, assume that all files are part of + a single program. By default, each subdirectory specified + is assumed to be a separate, independently-developed program. + --noheader Don't show the header + --nofooter Don't show the footer (the per-language values and totals) +</pre> +<p> +Note that unlike the "sloccount" tool, get_sloc requires the current +directory to be the data directory. +<p> +If you're displaying SLOC, get_sloc will also estimate the time it +would take to develop the software using COCOMO (using its "basic" model). +By default, this figure assumes that each of the major subdirectories was +developed independently of the others; +you can use "--oneprogram" to make the assumption that all files are +part of the same program. +The COCOMO model makes many other assumptions; see the paper at +<a href="http://www.dwheeler.com/sloc">http://www.dwheeler.com/sloc</a> +for more information. +<p> +If you need to do more analysis, you might want to use the "--wide" +option and send the data to another tool such as a spreadsheet +(e.g., gnumeric) or RDBMS (e.g., PostgreSQL). +Using the "--wide" option creates tab-separated data, which is easier to +import. +You may also want to use the "--noheader" and/or "--nofooter" options to +simplify porting the data to another tool. +<p> +Note that in version 1.0, "get_sloc" was called "get_data". +<p> +If you have so many data directory children that you can't use "*" +on the command line, get_sloc won't be as helpful. +Feel free to patch get_sloc to add this capability (as another option), +or use get_sloc_detail (discussed next) to feed the data into another tool. +<li>(Optional) If you just can't get the information you need from get_sloc, +then you can get the raw results of everything and process the data +yourself. +I have a little tool to do this, called get_sloc_details. +You invoke it in a similar manner: +<pre> +get_sloc_details * +</pre> +</ol> + +<p> +<h1><a name="designer-notes">Designer's Notes</a></h1> +<p> +Here are some ``designer's notes'' on how SLOCCount works, +including what it can handle. +<p> +The program break_filelist +has categories for each programming language it knows about, +plus the special categories ``not'' (not a source code file), +``auto'' (an automatically-generated file and thus not to be counted), +``zero'' (a zero-length file), +``dup'' (a duplicate of another file as determined by an md5 checksum), +and +``unknown'' (a file which doesn't seem to be a source code file +nor any of these other categories). +It's a good idea to examine +the ``unknown'' items later, checking the common extensions +to ensure you have not missed any common types of code. +<p> +The program break_filelist uses lots of heuristics to correctly +categorize files. +Here are few notes about its heuristics: +<ol> +<li> +break_filelist first checks for well-known extensions (such as .gif) that +cannot be program files, and for a number of common generated filenames. +<li> +It then peeks at the first few lines for "#!" followed by a legal script +name. +Sometimes it looks further, for example, many Python programs +invoke "env" and then use it to invoke python. +<li> +If that doesn't work, it uses the extension to try to determine the category. +For a number of languages, the extension is not reliable, so for those +languages it examines the file contents and uses a set of heuristics +to determine if the file actually belongs to that category. +<li> +Detecting automatically generated files is not easy, and it's +quite conceivable that it won't detect some automatically generated files. +The first 15 lines are examined, to determine if any of them +include at the beginning of the line (after spaces and +possible comment markers) one of the following phrases (ignoring +upper and lower case distinctions): +``generated automatically'', +``automatically generated'', +``this is a generated file'', +``generated with the (something) utility'', +or ``do not edit''. +<li>A number of filename conventions are used, too. +For example, +any ``configure'' file is presumed to be automatically generated if +there's a ``configure.in'' file in the same directory. +<li> +To eliminate duplicates, +the program keeps md5 checksums of each program file. +Any given md5 checksum is only counted once. +Build directories are processed alphabetically, so +if the same file content is in both directories ``a'' and ``b'', +it will be counted only once as being part of ``a'' unless you make +other arrangements. +Thus, some data directory children with names later in the alphabet may appear +smaller than would make sense at first glance. +It is very difficult to eliminate ``almost identical'' files +(e.g., an older and newer version of the same code, included in two +separate packages), because +it is difficult to determine when two ``similar'' files are essentially +the same file. +Changes such as the use of pretty-printers and massive renaming of variables +could make small changes seem large, while the small files +might easily appear to be the ``same''. +Thus, files with different contents are simply considered different. +<li> +If all else fails, the file is placed in the ``unknown'' category for +later analysis. +</ol> +<p> +One complicating factor is that I wished to separate C, C++, and +Objective-C code, but a header file ending with +``.h'' or ``.hpp'' file could be any of these languages. +In theory, ``.hpp'' is only C++, but I found that in practice this isn't true. +I developed a number of heuristics to determine, for each file, +what language a given header belonged to. +For example, if a given directory has exactly one of these languages +(ignoring header files), +the header is assumed to belong to that category as well. +Similarly, if there is a body file (e.g., ".c") that has the same name +as the header file, then presumably the header file is of the same language. +Finally, a header file with the keyword ``class'' is almost certainly not a +C header file, but a C++ header file; otherwise it's assumed to +be a C file. +<p> +None of the SLOC counters fully parse the source code; they just examine +the code using simple text processing patterns to count the SLOC. +In practice, by handling a number of special cases this seems to be fine. +Here are some notes on some of the language counters; +the language name is followed by common extensions in parentheses +and the SLOCCount name of the language in brackets: +<ol> +<li>Ada (.ada, .ads, .adb) [ada]: Comments begin with "--". +<li>Assembly (.s, .S, .asm) [asm]: +Assembly languages vary greatly in the comment character they use, +so my counter had to handle this variance. +The assembly language counter (asm_count) +first examines the file to determine if +C-style ``/*'' comments and C preprocessor commands +(e.g., ``#include'') are used. +If both ``/*'' and ``*/'' are in the file, it's assumed that +C-style comments are being used +(since it is unlikely that <i>both</i> would be used +as something else, say as string data, in the same assembly language file). +Determining if a file used the C preprocessor was trickier, since +many assembly files do use ``#'' as a comment character and some +preprocessor directives are ordinary words that might be included +in a human comment. +The heuristic used is as follows: if #ifdef, #endif, or #include are used, the +C preprocessor is used; or if at least three lines have either #define or #else, +then the C preprocessor is used. +No doubt other heuristics are possible, but this at least seems to produce +reasonable results. +The program then determines what the comment character is by identifying +which punctuation mark (from a set of possible marks) +is the most common non-space initial character on a line +(ignoring ``/'' and ``#'' if C comments or preprocessor commands, +respectively, are used). +Once the comment character has been determined, and it's been determined +if C-style comments are allowed, the lines of code +are counted in the file. +<li>awk (.awk) [awk]: Comments begin with "#". +<li>C (.c) [ansic]: Both traditional C comments (/* .. */) and C++ +(//) style comments are supported. +Although the older ANSI and ISO C standards didn't support // style +comments, in practice many C programs have used them for some time, and +the C99 standard includes them. +The C counter understands multi-line strings, so +comment characters (/* .. */ and //) are treated as data inside strings. +Conversely, the counter knows that any double-quote characters inside a +comment does not begin a C/C++ string. +<li>C++ (.C, .cpp, .cxx, .cc) [cpp]: The same counter is used for +both C and C++. +Note that break_filelist does try to separate C from C++ for purposes +of accounting between them. +<li>C# (.cs): The same counter is used as for C and C++. +Note that there are no "header" filetypes in C#. +<li>C shell (.csh) [csh]: Comments begin with "#". +<li>COBOL (.cob, .cbl) [cobol]: SLOCCount +detects if a "freeform" command has been given; until such a command is +given, fixed format is assumed. +In fixed format, comments have a "*" or "/" in column 7 or column 1; +any line that's not a comment, and has a nonwhitespace character after column 7 +(the indicator area) is counted as a source line of code. +In a freeform style, any line beginning with optional whitespace and +then "*" or "/" is considered a comment; any noncomment line +with a nonwhitespace characeter is counted as SLOC. +<li>Expect (.exp) [exp]: Comments begin with "#". +<li>Fortran 77 (.f, .f77, .F, .F77) [fortran]: Comment-only lines are lines +where column 1 character = C, c, *, or !, or +where ! is preceded only by white space. +<li>Fortran 90 (.f90, .F90) [f90]: Comment-only lines are lines +where ! is preceded only by white space. +<li>Haskell (.hs) [haskell]: +This counter handles block comments {- .. -} and single line comments (--); +pragmas {-# .. -} are counted as SLOC. +This is a simplistic counter, +and can be fooled by certain unlikely combinations of block comments +and other syntax (line-ending comments or strings). +In particular, "Hello {-" will be incorrectly interpreted as a +comment block begin, and "{- -- -}" will be incorrectly interpreted as a +comment block begin without an end. Literate files are detected by +their extension, and the style (TeX or plain text) is determined by +searching for a \begin{code} or ">" at the beginning of lines. +See the <a + href="http://www.haskell.org/onlinereport/literate.html">Haskell 98 + report section on literate Haskell</a> for more information. +<li>Java (.java) [java]: Java is counted using the same counter as C and C++. +<li>lex (.l) [lex]: Uses traditional C /* .. */ comments. +Note that this does not use the counter as C/C++ internally, since +it's quite legal in lex to have "//" (where it is NOT a comment). +<li>LISP (.cl, .el, .scm, .lsp, .jl) [lisp]: Comments begin with ";". +<li>ML (.ml, .mli, .mll, mly) [ml]: Comments nest and are enclosed in (* .. *). +<li>Modula3 (.m3, .mg, .i3, .ig) [modula3]: Comments are enclosed in (* .. *). +<li>Objective-C (.m) [objc]: Comments are old C-style /* .. */ comments. +<li>Pascal (.p, .pas) [pascal]: Comments are enclosed in curly braces {} +or (*..*). This counter has known weaknesses; see the BUGS section of +the manual page for more information. +<li>Perl (.pl, .pm, .perl) [perl]: +Comments begin with "#". +Perl permits in-line ``perlpod'' documents, ``here'' documents, and an +__END__ marker that complicate code-counting. +Perlpod documents are essentially comments, but a ``here'' document +may include text to generate them (in which case the perlpod document +is data and should be counted). +The __END__ marker indicates the end of the file from Perl's +viewpoint, even if there's more text afterwards. +<li>PHP (.php, .php[3456], .inc) [php]: +Code is counted as PHP code if it has a .php file extension; +it's also counted if it has an .inc extension and looks like PHP code. +SLOCCount does <b>not</b> count PHP code embedded in HTML files normally, +though its lower-level routines can do so if you want to +(use php_count to do this). +Any of the various ways to begin PHP code can be used +(<? .. ?>, +<?php .. ?>, +<script language="php"> .. </script>, +or even <% .. %>). +Any of the PHP comment formats (C, C++, and shell) can be used, and +any string constant formats ("here document", double quote, and single +quote) can be used as well. +<li>Python (.py) [python]: +Comments begin with "#". +Python has a convention that, at the beginning of a definition +(e.g., of a function, method, or class), an unassigned string can be +placed to describe what's being defined. Since this is essentially +a comment (though it doesn't syntactically look like one), the counter +avoids counting such strings, which may have multiple lines. +To handle this, +strings which started the beginning of a line were not counted. +Python also has the ``triple quote'' operator, permitting multiline +strings; these needed to be handled specially. +Triple quote stirngs are normally considered as data, regardless of +content, unless they were used as a comment about a definition. +<li>Ruby (.rb) [ruby]: Comments begin with "#". +<li>sed (.sed) [sed]: Comments begin with "#". +Note that these are "sed-only" files; many uses of sed are embeded in +shell scripts (and are categorized as shell scripts in those cases). +<li>shell (.sh) [sh]: Comments begin with "#". +Note that I classify ksh, bash, and the original Bourne shell sh together, +because they have very similar syntaxes. +For example, in all of these shells, +setting a variable is expressed as "varname=value", +while C shells use the use "set varname=value". +<li>TCL (.tcl, .tk, .itk) [tcl]: Comments begin with "#". +<li>Yacc (.y) [yacc]: Yacc is counted using the same counter as C and C++. +</ol> +<p> +Much of the code is written in Perl, since it's primarily a text processing +problem and Perl is good at that. +Many short scripts are Bourne shell scripts (it's good at +short scripts for calling other programs), and the +basic C/C++ SLOC counter is written in C for speed. +<p> +I originally named it "SLOC-Count", but I found that some web search +engines (notably Google) treated that as two words. +By naming it "SLOCCount", it's easier to find by those who know +the name of the program. +<p> +SLOCCount only counts physical SLOC, not logical SLOC. +Logical SLOC counting requires much more code to implement, +and I needed to cover a large number of programming languages. + + +<p> +<h1><a name="sloc-definition">Definition of SLOC</a></h1> +<p> +This tool measures ``physical SLOC.'' +Physical SLOC is defined as follows: +``a physical source line of code (SLOC) is a line ending +in a newline or end-of-file marker, +and which contains at least one non-whitespace non-comment character.'' +Comment delimiters (characters other than newlines starting and ending +a comment) are considered comment characters. +Data lines only including whitespace +(e.g., lines with only tabs and spaces in multiline strings) are not included. +<p> +To make this concrete, here's an example of a simple C program +(it strips ANSI C comments out). +On the left side is the running SLOC total, where "-" indicates a line +that is not considered a physical "source line of code": +<pre> + 1 #include <stdio.h> + - + - /* peek at the next character in stdin, but don't get it */ + 2 int peek() { + 3 int c = getchar(); + 4 ungetc(c, stdin); + 5 return c; + 6 } + - + 7 main() { + 8 int c; + 9 int incomment = 0; /* 1 = we are inside a comment */ + - +10 while ( (c = getchar()) != EOF) { +11 if (!incomment) { +12 if ((c == '/') && (peek() == '*')) {incomment=1;} +13 } else { +14 if ((c == '*') && (peek() == '/')) { +15 c= getchar(); c=getchar(); incomment=0; +16 } +17 } +18 if ((c != EOF) && !incomment) {putchar(c);} +19 } +20 } +</pre> +<p> +<a href="http://www.sei.cmu.edu/publications/documents/92.reports/92.tr.020.html">Robert E. Park et al.'s +<i>Software Size Measurement: +A Framework for Counting Source Statements</i></a> +(Technical Report CMU/SEI-92-TR-20) +presents a set of issues to be decided when trying to count code. +The paper's abstract states: +<blockquote><i> +This report presents guidelines for defining, recording, and reporting +two frequently used measures of software sizeÑ physical source lines +and logical source statements. +We propose a general framework for constructing size +definitions and use it to derive operational methods for +reducing misunderstandings in measurement results. +</i></blockquote> +<p> +Using Park's framework, here is how physical lines of code are counted: +<ol> +<li>Statement Type: I used a physical line-of-code as my basis. +I included executable statements, declarations +(e.g., data structure definitions), and compiler directives +(e.g., preprocessor commands such as #define). +I excluded all comments and blank lines. +<li>How Produced: +I included all programmed code, including any files that had been modified. +I excluded code generated with source code generators, converted with +automatic translators, and those copied or reused without change. +If a file was in the source package, I included it; if the file had +been removed from a source package (including via a patch), I did +not include it. +<li>Origin: You select the files (and thus their origin). +<li>Usage: You selects the files (and thus their usage), e.g., +you decide if you're going to +include additional applications able to run on the system but not +included with the system. +<li>Delivery: You'll decide what code to include, but of course, +if you don't have the code you can't count it. +<li>Functionality: This tool will include both operative and inoperative code +if they're mixed together. +An example of intentionally ``inoperative'' code is +code turned off by #ifdef commands; since it could be +turned on for special purposes, it made sense to count it. +An example of unintentionally ``inoperative'' code is dead or unused code. +<li>Replications: +Normally, duplicate files are ignored, unless you use +the "--duplicates" or "--crossdups" option. +The tool will count +``physical replicates of master statements stored in +the master code''. +This is simply code cut and pasted from one place to another to reuse code; +it's hard to tell where this happens, and since it has to be maintained +separately, it's fair to include this in the measure. +I excluded copies inserted, instantiated, or expanded when compiling +or linking, and I excluded postproduction replicates +(e.g., reparameterized systems). +<li>Development Status: You'll decide what code +should be included (and thus the development status of the code that +you'll accept). +<li>Languages: You can see the language list above. +<li>Clarifications: I included all statement types. +This included nulls, continues, no-ops, lone semicolons, +statements that instantiate generics, +lone curly braces ({ and }), and labels by themselves. +</ol> +<p> +Thus, SLOCCount generally follows Park's ``basic definition'', +but with the following exceptions depending on how you use it: +<ol> +<li>How Produced: +By default, this tool excludes duplicate files and +code generated with source code generators. +After all, the COCOMO model states that the +only code that should be counted is code +``produced by project personnel'', whereas these kinds of files are +instead the output of ``preprocessors and compilers.'' +If code is always maintained as the input to a code generator, and then +the code generator is re-run, it's only the code generator input's size that +validly measures the size of what is maintained. +Note that while I attempted to exclude generated code, this exclusion +is based on heuristics which may have missed some cases. +If you want to count duplicates, use the +"--autogen", "--duplicates", and/or "--crossdups" options. +If you want to count automatically generated files, pass +the "--autogen" option mentioned above. +<li>Origin: +You can choose what source code you'll measure. +Normally physical SLOC doesn't include an unmodified +``vendor-supplied language support library'' nor a +``vendor-supplied system or utility''. +However, if this is what you are measuring, then you need to include it. +If you include such code, your set will be different +than the usual ``basic definition.'' +<li>Functionality: I included counts of unintentionally inoperative code +(e.g., dead or unused code). +It is very difficult to automatically detect such code +in general for many languages. +For example, a program not directly invoked by anything else nor +installed by the installer is much more likely to be a test program, +which you may want to include in the count (you often would include it +if you're estimating effort). +Clearly, discerning human ``intent'' is hard to automate. +</ol> +<p> +Otherwise, this counter follows Park's +``basic definition'' of a physical line of code, even down to Park's +language-specific definitions where Park defined them for a language. + + +<p> +<h1><a name="miscellaneous">Miscellaneous Notes</a></h1> +<p> +There are other undocumented analysis tools in the original tar file. +Most of them are specialized scripts for my circumstances, but feel +free to use them as you wish. +<p> +If you're packaging this program, don't just copy every executable +into the system "bin" directory - many of the files are those +specialized scripts. +Just put in the bin directory every executable documented here, plus the +the files they depend on (there aren't that many). +See the RPM specification file to see what's actually installed. +<p> +You have to take any measure of SLOC (including this one) with a +large grain of salt. +Physical SLOC is sensitive to the format of source code. +There's a correlation between SLOC and development effort, and some +correlation between SLOC and functionality, +but there's absolutely no correlation between SLOC +and either "quality" or "value". +<p> +A problem of physical SLOC is that it's sensitive to formatting, +and that's a legitimate (and known) problem with the measure. +However, to be fair, logical SLOC is influenced by coding style too. +For example, the following two phrases are semantically identical, +but will have different logical SLOC values: +<pre> + int i, j; /* 1 logical SLOC */ + + int i; /* 2 logical SLOC, but it does the same thing */ + int j; +</pre> +<p> +If you discover other information that can be divided up by +data directory children (e.g., the license used), it's probably best +to add that to each subdirectory (e.g., as a "license" file in the +subdirectory). +Then you can modify tools like get_sloc +to add them to their display. +<p> +I developed SLOCCount for my own use, not originally as +a community tool, so it's certainly not beautiful code. +However, I think it's serviceable - I hope you find it useful. +Please send me patches for any improvements you make! +<p> +You can't use this tool as-is with some estimation models, such as COCOMO II, +because this tool doesn't compute logical SLOC. +I certainly would accept code contributions to add the ability to +measure logical SLOC (or related measures such as +Cyclomatic Complexity and Cyclomatic density); +selecting them could be a compile-time option. +However, measuring logical SLOC takes more development effort, so I +haven't done so; see USC's "CodeCount" for a set of code that +measures logical SLOC for some languages +(though I've had trouble with CodeCount - in particular, its C counter +doesn't correctly handle large programs like the Linux kernel). + + +<p> +<h1><a name="license">SLOCCount License</a></h1> +<p> +Here is the SLOCCount License; the file COPYING contains the standard +GPL version 2 license: +<pre> +===================================================================== +SLOCCount +Copyright (C) 2000-2001 David A. Wheeler (dwheeler, at, dwheeler.com) + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +===================================================================== +</pre> +<p> +While it's not formally required by the license, please give credit +to me and this software in any report that uses results generated by it. +<p> +This document was written by David A. Wheeler (dwheeler, at, dwheeler.com), +and is +(C) Copyright 2001 David A. Wheeler. +This document is covered by the license (GPL) listed above. +<p> +The license <i>does</i> give you the right to +use SLOCCount to analyze proprietary programs. + +<p> +<h1><a name="related-tools">Related Tools</a></h1> +<p> +One available toolset is +<a href="http://sunset.usc.edu/research/CODECOUNT">CodeCount</a>. +I tried using this toolset, but I eventually gave up. +It had too many problems handling the code I was trying to analyze, and it +does a poor job automatically categorizing code. +It also has no support for many of today's languages (such as Python, +Perl, Ruby, PHP, and so on). +However, it does a lot of analysis and measurements that SLOCCount +doesn't do, so it all depends on your need. +Its license appeared to be open source, but it's quite unusual and +I'm not enough of a lawyer to be able to confirm that. +<p> +Another tool that's available is <a href="http://csdl.ics.hawaii.edu/Research/LOCC/LOCC.html">LOCC</a>. +It's available under the GPL. +It can count Java code, and there's experimental support for C++. +LOCC is really intended for more deeply analyzing each Java file; +what's particularly interesting about it is that it can measure +"diffs" (how much has changed). +See +<a href="http://csdl.ics.hawaii.edu/Publications/MasterList.html#csdl2-00-10"> +A comparative review of LOCC and CodeCount</a>. +<p> +<a href="http://sourceforge.net/projects/cccc"> +CCCC</a> is a tool which analyzes C++ and Java files +and generates a report on various metrics of the code. +Metrics supported include lines of code, McCabe's complexity, +and metrics proposed by Chidamber & Kemerer and Henry & Kafura. +(You can see +<a href="http://cccc.sourceforge.net/">Time Littlefair's comments</a>). +CCCC is in the public domain. +It reports on metrics that sloccount doesn't, but sloccount can handle +far more computer languages. + +<p> +<h1><a name="submitting-changes">Submitting Changes</a></h1> +<p> +The GPL license doesn't require you to submit changes you make back to +its maintainer (currently me), +but it's highly recommended and wise to do so. +Because others <i>will</i> send changes to me, a version you make on your +own will slowly because obsolete and incompatible. +Rather than allowing this to happen, it's better to send changes in to me +so that the latest version of SLOCCount also has the +features you're looking for. +If you're submitting support for new languages, be sure that your +chnage correctly ignores files that aren't in that new language +(some filename extensions have multiple meanings). +You might want to look at the <a href="TODO">TODO</a> file first. +<p> +When you send changes to me, send them as "diff" results so that I can +use the "patch" program to install them. +If you can, please send ``unified diffs'' -- GNU's diff can create these +using the "-u" option. +</body> + diff --git a/sloccount.html.orig b/sloccount.html.orig new file mode 100644 index 0000000..dd0ad54 --- /dev/null +++ b/sloccount.html.orig @@ -0,0 +1,2440 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> +<html> +<head> +<title>SLOCCount User's Guide</title> +</head> +<body bgcolor="#FFFFFF"> +<center> +<font size="+3"><b><span class="title">SLOCCount User's Guide</span></b></font> +<br> +<font size="+2"><span class="author">by David A. Wheeler (dwheeler, at, dwheeler.com)</span></font> +<br> +<font size="+2"><span class="pubdate">December 2, 2002</span></font> +<br> +<font size="+2"><span class="version">Version 2.20</span></font> +</center> +<p> +<h1><a name="introduction">Introduction</a></h1> +<p> +SLOCCount (pronounced "sloc-count") is a suite of programs for counting +physical source lines of code (SLOC) in potentially large software systems. +Thus, SLOCCount is a "software metrics tool" or "software measurement tool". +SLOCCount was developed by David A. Wheeler, +originally to count SLOC in a GNU/Linux distribution, but it can be +used for counting the SLOC of arbitrary software systems. +<p> +SLOCCount is known to work on Linux systems, and has been tested +on Red Hat Linux versions 6.2, 7, and 7.1. +SLOCCount should run on many other Unix-like systems (if Perl is installed), +in particular, I would expect a *BSD system to work well. +Windows users can run sloccount by first installing +<a href="http://sources.redhat.com/cygwin">Cygwin</a>. +SLOCCount is much slower on Windows/Cygwin, and it's not as easy to install +or use on Windows, but it works. +Of course, feel free to upgrade to an open source Unix-like system +(such as Linux or *BSD) instead :-). +<p> +SLOCCount can count physical SLOC for a wide number of languages. +Listed alphabetically, they are +Ada, Assembly (for many machines and assemblers), +awk (including gawk and nawk), +Bourne shell (and relatives such as bash, ksh, zsh, and pdksh), +C, C++, C# (also called C-sharp or cs), C shell (including tcsh), +COBOL, Expect, Fortran, Haskell, +Java, lex (including flex), +LISP (including Scheme), +makefiles (though they aren't usually shown in final reports), +Modula3, Objective-C, Pascal, Perl, PHP, Python, Ruby, sed, +SQL (normally not shown), +TCL, and Yacc. +It can gracefully handle awkward situations in many languages, +for example, it can determine the +syntax used in different assembly language files and adjust appropriately, +it knows about Python's use of string constants as comments, and it +can handle various Perl oddities (e.g., perlpods, here documents, +and Perl's _ _END_ _ marker). +It even has a "generic" SLOC counter that you may be able to use count the +SLOC of other languages (depending on the language's syntax). +<p> +SLOCCount can also take a large list of files and automatically categorize +them using a number of different heuristics. +The heuristics automatically determine if a file +is a source code file or not, and if so, which language it's written in. +For example, +it knows that ".pc" is usually a C source file for an Oracle preprocessor, +but it can detect many circumstances where it's actually a file about +a "PC" (personal computer). +For another example, it knows that ".m" is the standard extension for +Objective-C, but it will check the file contents to +see if really is Objective-C. +It will even examine file headers to attempt to accurately determine +the file's true type. +As a result, you can analyze large systems completely automatically. +<p> +Finally, SLOCCount has some report-generating tools +to collect the data generated, +and then present it in several different formats and sorted different ways. +The report-generating tool can also generate simple tab-separated files +so data can be passed on to other analysis tools (such as spreadsheets +and database systems). +<p> +SLOCCount will try to quickly estimate development time and effort given only +the lines of code it computes, using the original Basic COCOMO model. +This estimate can be improved if you can give more information about the project. +See the +<a href="#cocomo">discussion below about COCOMO, including intermediate COCOMO</a>, +if you want to improve the estimates by giving additional information about +the project. +<p> +SLOCCount is open source software/free software (OSS/FS), +released under the GNU General Public License (GPL), version 2; +see the <a href="#license">license below</a>. +The master web site for SLOCCount is +<a href="http://www.dwheeler.com/sloccount">http://www.dwheeler.com/sloccount</a>. +You can learn a lot about SLOCCount by reading the paper that caused its +creation, available at +<a href="http://www.dwheeler.com/sloc">http://www.dwheeler.com/sloc</a>. +Feel free to see my master web site at +<a href="http://www.dwheeler.com">http://www.dwheeler.com</a>, which has +other material such as the +<a href="http://www.dwheeler.com/secure-programs"><i>Secure Programming +for Linux and Unix HOWTO</i></a>, +my <a href="http://www.dwheeler.com/oss_fs_refs.html">list of +OSS/FS references</a>, and my paper +<a href="http://www.dwheeler.com/oss_fs_why.html"><i>Why OSS/FS? Look at +the Numbers!</i></a> +Please send improvements by email +to dwheeler, at, dwheeler.com (DO NOT SEND SPAM - please remove the +commas, remove the spaces, and change the word "at" into the at symbol). +<p> +The following sections first give a "quick start" +(discussing how to use SLOCCount once it's installed), +discuss basic SLOCCount concepts, +how to install it, how to set your PATH, +how to install source code on RPM-based systems if you wish, and +more information on how to use the "sloccount" front-end. +This is followed by material for advanced users: +how to use SLOCCount tools individually (for when you want more control +than the "sloccount" tool gives you), designer's notes, +the definition of SLOC, and miscellaneous notes. +The last sections states the license used (GPL) and gives +hints on how to submit changes to SLOCCount (if you decide to make changes +to the program). + + +<p> +<h1><a name="quick-start">Quick Start</a></h1> +<p> +Once you've installed SLOCCount (discussed below), +you can measure an arbitrary program by typing everything +after the dollar sign into a terminal session: +<pre> + $ sloccount <i>topmost-source-code-directory</i> +</pre> +<p> +The directory listed and all its descendants will be examined. +You'll see output while it calculates, +culminating with physical SLOC totals and +estimates of development time, schedule, and cost. +If the directory contains a set of directories, each of which is +a different project developed independently, +use the "--multiproject" option so the effort estimations +can correctly take this into account. +<p> +You can redisplay the data different ways by using the "--cached" +option, which skips the calculation stage and re-prints previously +computed information. +You can use other options to control what's displayed: +"--filecount" shows counts of files instead of SLOC, and +"--details" shows the detailed information about every source code file. +So, to display all the details of every file once you've previously +calculated the results, just type: +<pre> + sloccount --cached --details +</pre> +<p> +You'll notice that the default output ends with a request. +If you use this data (e.g., in a report), please +credit that data as being "generated using 'SLOCCount' by David A. Wheeler." +I make no money from this program, so at least please give me some credit. +<p> +SLOCCount tries to ignore all automatically generated files, but its +heuristics to detect this are necessarily imperfect (after all, even humans +sometimes have trouble determining if a file was automatically genenerated). +If possible, try to clean out automatically generated files from +the source directories -- +in many situations "make clean" does this. +<p> +There's more to SLOCCount than this, but first we'll need to +explain some basic concepts, then we'll discuss other options +and advanced uses of SLOCCount. + +<p> +<h1><a name="concepts">Basic Concepts</a></h1> +<p> +SLOCCount counts physical SLOC, also called "non-blank, non-comment lines". +More formally, physical SLOC is defined as follows: +``a physical source line of code (SLOC) is a line ending +in a newline or end-of-file marker, +and which contains at least one non-whitespace non-comment character.'' +Comment delimiters (characters other than newlines starting and ending +a comment) are considered comment characters. +Data lines only including whitespace +(e.g., lines with only tabs and spaces in multiline strings) are not included. +<p> +In SLOCCount, there are 3 different directories: +<ol> +<li>The "source code directory", a directory containing the source code + being measured + (possibly in recursive subdirectories). The directories immediately + contained in the source code directory will normally be counted separately, + so it helps if your system is designed so that this top set of directories + roughly represents the system's major components. + If it doesn't, there are various tricks you can use to group source + code into components, but it's more work. + You don't need write access to the source code directory, but + you do need read access to all files, and read and search (execute) access + to all subdirectories. +<li>The "bin directory", the directory containing the SLOCCount executables. + By default, installing the program creates a subdirectory + named "sloccount-VERSION" which is the bin directory. + The bin directory must be part of your PATH. +<li>The "data directory", which stores the analysis results. + When measuring programs using "sloccount", by default + this is the directory ".slocdata" inside your home directory. + When you use the advanced SLOCCount tools directly, + in many cases this must be your "current" directory. + Inside the data directory are "data directory children" - these are + subdirectories that contain a file named "filelist", and each child + is used to represent a different project or a different + major component of a project. +</ol> +<p> +SLOCCount can handle many different programming languages, and separate +them by type (so you can compare the use of each). +Here is the set of languages, sorted alphabetically; +common filename extensions are in +parentheses, with SLOCCount's ``standard name'' for the language +listed in brackets: +<ol> +<li>Ada (.ada, .ads, .adb, .pad) [ada] +<li>Assembly for many machines and assemblers (.s, .S, .asm) [asm] +<li>awk (.awk) [awk] +<li>Bourne shell and relatives such as bash, ksh, zsh, and pdksh (.sh) [sh] +<li>C (.c, .pc, .ec, .ecp) [ansic] +<li>C++ (.C, .cpp, .cxx, .cc, .pcc) [cpp] +<li>C# (.cs) [cs] +<li>C shell including tcsh (.csh) [csh] +<li>COBOL (.cob, .cbl, .COB, .CBL) [cobol] +<li>Expect (.exp) [exp] +<li>Fortran (.f, .f77, .F) [fortran] +<li>Haskell (.hs) [haskell]; please preprocess .lhs files. +<li>Java (.java) [java] +<li>lex (.l) [lex] +<li>LISP including Scheme (.el, .scm, .lsp, .jl) [lisp] +<li>makefiles (makefile) [makefile] +<li>ML (.ml, .ml3) [ml] +<li>Modula3 (.m3, .i3) [modula3] +<li>Objective-C (.m) [objc] +<li>Pascal (.p, .pas) [pascal] +<li>Perl (.pl, .pm, .perl) [perl] +<li>PHP (.php, .php[3456], .inc) [php] +<li>Python (.py) [python] +<li>Ruby (.rb) [ruby] +<li>sed (.sed) [sed] +<li>sql (.sql) [sql] +<li>TCL (.tcl, .tk, .itk) [tcl] +<li>Yacc (.y) [yacc] +</ol> + +<p> +<h1><a name="installing">Installing SLOCCount</a></h1> +<p> +Obviously, before using SLOCCount you'll need to install it. +SLOCCount depends on other programs, in particular perl, bash, +a C compiler (gcc will do), and md5sum +(you can get a useful md5sum program in the ``textutils'' package +on many Unix-like systems), so you'll need to get them installed +if they aren't already. +<p> +If your system uses RPM version 4 or greater to install software +(e.g., Red Hat Linux 7 or later), just download the SLOCCount RPM +and install it using a normal installation command; from the text line +you can use: +<pre> + rpm -Uvh sloccount*.rpm +</pre> +<p> +Everyone else will need to install from a tar file, and Windows users will +have to install Cygwin before installing sloccount. +<p> +If you're using Windows, you'll need to first install +<a href="http://sources.redhat.com/cygwin">Cygwin</a>. +By installing Cygwin, you'll install an environment and a set of +open source Unix-like tools. +Cygwin essentially creates a Unix-like environment in which sloccount can run. +You may be able to run parts of sloccount without Cygwin, in particular, +the perl programs should run in the Windows port of Perl, but you're +on your own - many of the sloccount components expect a Unix-like environment. +If you want to install Cygwin, go to the +<a href="http://sources.redhat.com/cygwin">Cygwin main page</a> +and install it. +If you're using Cygwin, <b>install it to use Unix newlines, not +DOS newlines</b> - DOS newlines will cause odd errors in SLOCCount +(and probably other programs, too). +I have only tested a "full" Cygwin installation, so I suggest installing +everything. +If you're short on disk space, at least install +binutils, bash, fileutils, findutils, +gcc, grep, gzip, make, man, perl, readline, +sed, sh-utils, tar, textutils, unzip, and zlib; +you should probably install vim as well, +and there may be other dependencies as well. +By default Cygwin will create a directory C:\cygwin\home\NAME, +and will set up the ability to run Unix programs +(which will think that the same directory is called /home/NAME). +Now double-click on the Cygwin icon, or select from the Start menu +the selection Programs / Cygnus Solutions / Cygwin Bash shell; +you'll see a terminal screen with a Unix-like interface. +Now follow the instructions (next) for tar file users. +<p> +If you're installing from the tar file, download the file +(into your home directory is fine). +Unpacking the file will create a subdirectory, so if you want the +unpacked subdirectory to go somewhere special, "cd" to where you +want it to go. +Most likely, your home directory is just fine. +Now gunzip and untar SLOCCount (the * replaces the version #) by typing +this at a terminal session: +<pre> + gunzip -c sloccount*.tar.gz | tar xvf - +</pre> +Replace "sloccount*.tar.gz" shown above +with the full path of the downloaded file, wherever that is. +You've now created the "bin directory", which is simply the +"sloccount-VERSION" subdirectory created by the tar command +(where VERSION is the version number). +<p> +Now you need to compile the few compiled programs in the "bin directory" so +SLOCCount will be ready to go. +First, cd into the newly-created bin directory, by typing: +<pre> + cd sloccount* +</pre> +<p> +You may then need to override some installation settings. +You can can do this by editing the supplied makefile, or alternatively, +by providing options to "make" whenever you run make. +The supplied makefile assumes your C compiler is named "gcc", which +is true for most Linux systems, *BSD systems, and Windows systems using Cygwin. +If this isn't true, you'll need to set +the "CC" variable to the correct value (e.g., "cc"). +You can also modify where the files are stored; this variable is +called PREFIX and its default is /usr/local +(older versions of sloccount defaulted to /usr). +<p> +If you're using Windows and Cygwin, you +<b>must</b> override one of the installation +settings, EXE_SUFFIX, for installation to work correctly. +One way to set this value is to edit the "makefile" file so that +the line beginning with "EXE_SUFFIX" reads as follows: +<pre> + EXE_SUFFIX=.exe +</pre> +If you're using Cygwin and you choose to modify the "makefile", you +can use any text editor on the Cygwin side, or you can use a +Windows text editor if it can read and write Unix-formatted text files. +Cygwin users are free to use vim, for example. +If you're installing into your home directory and using the default locations, +Windows text editors will see the makefile as file +C:\cygwin\home\NAME\sloccount-VERSION\makefile. +Note that the Windows "Notepad" application doesn't work well, because it's not +able to handle Unix text files correctly. +Since this can be quite a pain, Cygus users may instead decide to override +make the makefile values instead during installation. +<p> +Finally, compile the few compiled programs in it by typing "make": +<pre> + make +</pre> +If you didn't edit the makefile in the previous step, you +need to provide options to make invocations to set the correct values. +This is done by simply saying (after "make") the name of the variable, +an equal sign, and its correct value. +Thus, to compile the program on a Windows system using Cygus, you can +skip modifying the makefile file by typing this instead of just "make": +<pre> + make EXE_SUFFIX=.exe +</pre> +<p> +If you want, you can install sloccount for system-wide use without +using the RPM version. +Windows users using Cygwin should probably do this, particularly +if they chose a "local" installation. +To do this, first log in as root (Cygwin users don't need to do this +for local installation). +Edit the makefile to match your system's conventions, if necessary, +and then type "make install": +<pre> + make install +</pre> +If you need to set some make options, remember to do that here too. +If you use "make install", you can uninstall it later using +"make uninstall". +Installing sloccount for system-wide use is optional; +SLOCCount works without a system-wide installation. +However, if you don't install sloccount system-wide, you'll need to +set up your PATH variable; see the section on +<a href="#path">setting your path</a>. +<p> +A note for Cygwin users (and some others): some systems, including Cygwin, +don't set up the environment quite right and thus can't display the manual +pages as installed. +The problem is that they forget to search /usr/local/share/man for +manual pages. +If you want to read the installed manual pages, type this +into a Bourne-like shell: +<pre> + MANPATH=/usr/local/share/man:/usr/share/man:/usr/man + export MANPATH +</pre> +Or, if you use a C shell: +<pre> + setenv MANPATH "/usr/local/share/man:/usr/share/man:/usr/man" +</pre> +From then on, you'll be able to view the reference manual pages +by typing "man sloccount" (or by using whatever manual page display system +you prefer). +<p> + +<p> +<h1><a name="installing-source">Installing The Source Code To Measure</a></h1> +<p> +Obviously, you must install the software source code you're counting, +so somehow you must create the "source directory" +with the source code to measure. +You must also make sure that permissions are set so the software can +read these directories and files. +<p> +For example, if you're trying to count the SLOC for an RPM-based Linux system, +install the software source code by doing the following as root +(which will place all source code into the source directory +/usr/src/redhat/BUILD): +<ol> +<li>Install all source rpm's: +<pre> + mount /mnt/cdrom + cd /mnt/cdrom/SRPMS + rpm -ivh *.src.rpm +</pre> +<li>Remove RPM spec files you don't want to count: +<pre> + cd ../SPECS + (look in contents of spec files, removing what you don't want) +</pre> +<li>build/prep all spec files: +<pre> + rpm -bp *.spec +</pre> +<li>Set permissions so the source files can be read by all: +<pre> + chmod -R a+rX /usr/src/redhat/BUILD +</pre> +</ol> +<p> +Here's an example of how to download source code from an +anonymous CVS server. +Let's say you want to examine the source code in GNOME's "gnome-core" +directory, as stored at the CVS server "anoncvs.gnome.org". +Here's how you'd do that: +<ol> +<li>Set up site and login parameters: +<pre> + export CVSROOT=':pserver:anonymous@anoncvs.gnome.org:/cvs/gnome' +</pre> +<li>Log in: +<pre> + cvs login +</pre> +<li>Check out the software (copy it to your local directory), using +mild compression to save on bandwidth: +<pre> + cvs -z3 checkout gnome-core +</pre> +</ol> +<p> +Of course, if you have a non-anonymous account, you'd set CVSROOT +to reflect this. For example, to log in using the "pserver" +protocol as ACCOUNT_NAME, do: +<pre> + export CVSROOT=':pserver:ACCOUNT_NAME@cvs.gnome.org:/cvs/gnome' +</pre> +<p> +You may need root privileges to install the source code and to give +another user permission to read it, but please avoid running the +sloccount program as root. +Although I know of no specific reason this would be a problem, +running any program as root turns off helpful safeguards. +<p> +Although SLOCCount tries to detect (and ignore) many cases where +programs are automatically generated, these heuristics are necessarily +imperfect. +So, please don't run any programs that generate other programs - just +do enough to get the source code prepared for counting. +In general you shouldn't run "make" on the source code, and if you have, +consider running "make clean" or "make really_clean" on the source code first. +It often doesn't make any difference, but identifying those circumstances +is difficult. +<p> +SLOCCount will <b>not</b> automatically uncompress files that are +compressed/archive files (such as .zip, .tar, or .tgz files). +Often such files are just "left over" old versions or files +that you're already counting. +If you want to count the contents of compressed files, uncompress them first. +<p> +SLOCCount also doesn't delve into files using "literate programming" +techniques, in part because there are too many incompatible formats +that implement it. +Thus, run the tools to extract the code from the literate programming files +before running SLOCCount. +For example, if you have many literate Haskell files (.lhs), please +extract them. + + +<h1><a name="path">Setting your PATH</a></h1> +Before you can run SLOCCount, you'll need to make sure +the SLOCCount "bin directory" is in your PATH. +If you've installed SLOCCount in a system-wide location +such as /usr/bin, then you needn't do more; the RPMs and "make install" +commands essentially do this. +<p> +Otherwise, in Bourne-shell variants, type: +<pre> + PATH="$PATH:<i>the directory with SLOCCount's executable files</i>" + export PATH +</pre> +Csh users should instead type: +<pre> + setenv PATH "$PATH:<i>the directory with SLOCCount's executable files</i>" +</pre> + +<h1><a name="using-basics">Using SLOCCount: The Basics</a></h1> + +Normal use of SLOCCount is very simple. +In a terminal window just type "sloccount", followed by a +list of the source code directories to count. +If you give it only a single directory, SLOCCount tries to be +a little clever and break the source code into +subdirectories for purposes of reporting: +<ol> +<li>if directory has at least +two subdirectories, then those subdirectories will be used as the +breakdown (see the example below). +<li>If the single directory contains files as well as directories +(or if you give sloccount some files as parameters), those files will +be assigned to the directory "top_dir" so you can tell them apart +from other directories. +<li>If there's a subdirectory named "src", then that subdirectory is again +broken down, with all the further subdirectories prefixed with "src_". +So if directory "X" has a subdirectory "src", which contains subdirectory +"modules", the program will report a separate count from "src_modules". +</ol> +In the terminology discussed above, each of these directories would become +"data directory children." +<p> +You can also give "sloccount" a list of directories, in which case the +report will be broken down by these directories +(make sure that the basenames of these directories differ). +SLOCCount normally considers all descendants of these directories, +though unless told otherwise it ignores symbolic links. +<p> +This is all easier to explain by example. +Let's say that we want to measure Apache 1.3.12 as installed using an RPM. +Once it's installed, we just type: +<pre> + sloccount /usr/src/redhat/BUILD/apache_1.3.12 +</pre> +The output we'll see shows status reports while it analyzes things, +and then it prints out: + +<pre> +SLOC Directory SLOC-by-Language (Sorted) +24728 src_modules ansic=24728 +19067 src_main ansic=19067 +8011 src_lib ansic=8011 +5501 src_os ansic=5340,sh=106,cpp=55 +3886 src_support ansic=2046,perl=1712,sh=128 +3823 src_top_dir sh=3812,ansic=11 +3788 src_include ansic=3788 +3469 src_regex ansic=3407,sh=62 +2783 src_ap ansic=2783 +1378 src_helpers sh=1345,perl=23,ansic=10 +1304 top_dir sh=1304 +104 htdocs perl=104 +31 cgi-bin sh=24,perl=7 +0 icons (none) +0 conf (none) +0 logs (none) + + +ansic: 69191 (88.85%) +sh: 6781 (8.71%) +perl: 1846 (2.37%) +cpp: 55 (0.07%) + + +Total Physical Source Lines of Code (SLOC) = 77873 +Estimated Development Effort in Person-Years (Person-Months) = 19.36 (232.36) + (Basic COCOMO model, Person-Months = 2.4 * (KSLOC**1.05)) +Estimated Schedule in Years (Months) = 1.65 (19.82) + (Basic COCOMO model, Months = 2.5 * (person-months**0.38)) +Estimated Average Number of Developers (Effort/Schedule) = 11.72 +Total Estimated Cost to Develop = $ 2615760 + (average salary = $56286/year, overhead = 2.4). + +Please credit this data as "generated using 'SLOCCount' by David A. Wheeler." +</pre> +<p> +Interpreting this should be straightward. +The Apache directory has several subdirectories, including "htdocs", "cgi-bin", +and "src". +The "src" directory has many subdirectories in it +("modules", "main", and so on). +Code files directly +contained in the main directory /usr/src/redhat/BUILD/apache_1.3.12 +is labelled "top_dir", while +code directly contained in the src subdirectory is labelled "src_top_dir". +Code in the "src/modules" directory is labelled "src_modules" here. +The output shows each major directory broken +out, sorted from largest to smallest. +Thus, the "src/modules" directory had the most code of the directories, +24728 physical SLOC, all of it in C. +The "src/helpers" directory had a mix of shell, perl, and C; note that +when multiple languages are shown, the list of languages in that child +is also sorted from largest to smallest. +<p> +Below the per-component set is a list of all languages used, +with their total SLOC shown, sorted from most to least. +After this is the total physical SLOC (77,873 physical SLOC in this case). +<p> +Next is an estimation of the effort and schedule (calendar time) +it would take to develop this code. +For effort, the units shown are person-years (with person-months +shown in parentheses); for schedule, total years are shown first +(with months in parentheses). +When invoked through "sloccount", the default assumption is that all code is +part of a single program; the "--multiproject" option changes this +to assume that all top-level components are independently developed +programs. +When "--multiproject" is invoked, each project's efforts are estimated +separately (and then summed), and the schedule estimate presented +is the largest estimated schedule of any single component. +<p> +By default the "Basic COCOMO" model is used for estimating +effort and schedule; this model +includes design, code, test, and documentation time (both +user/admin documentation and development documentation). +<a href="#cocomo">See below for more information on COCOMO</a> +as it's used in this program. +<p> +Next are several numbers that attempt to estimate what it would have cost +to develop this program. +This is simply the amount of effort, multiplied by the average annual +salary and by the "overhead multiplier". +The default annual salary is +$56,286 per year; this value was from the +<i>ComputerWorld</i>, September 4, 2000's Salary Survey +of an average U.S. programmer/analyst salary in the year 2000. +You might consider using other numbers +(<i>ComputerWorld</i>'s September 3, 2001 Salary Survey found +an average U.S. programmer/analyst salary making $55,100, senior +systems programmers averaging $68,900, and senior systems analysts averaging +$72,300). + +<p> +Overhead is much harder to estimate; I did not find a definitive source +for information on overheads. +After informal discussions with several cost analysts, +I determined that an overhead of 2.4 +would be representative of the overhead sustained by +a typical software development company. +As discussed in the next section, you can change these numbers too. + +<p> +You may be surprised by the high cost estimates, but remember, +these include design, coding, testing, documentation (both for users +and for programmers), and a wrap rate for corporate overhead +(to cover facilities, equipment, accounting, and so on). +Many programmers forget these other costs and are shocked by the high figures. +If you only wanted to know the costs of the coding, you'd need to get +those figures. + + +<p> +Note that if any top-level directory has a file named PROGRAM_LICENSE, +that file is assumed to contain the name of the license +(e.g., "GPL", "LGPL", "MIT", "BSD", "MPL", and so on). +If there is at least one such file, sloccount will also report statistics +on licenses. + + +<h1><a name="options">Options</a></h1> +The program "sloccount" has a large number of options +so you can control what is selected for counting and how the +results are displayed. +<p> +There are several options that control which files are selected +for counting: +<pre> + --duplicates Count all duplicate files as normal files + --crossdups Count duplicate files if they're in different data directory + children. + --autogen Count automatically generated files + --follow Follow symbolic links (normally they're ignored) + --addlang Add languages to be counted that normally aren't shown. + --append Add more files to the data directory +</pre> +Normally, files which have exactly the same content are counted only once +(data directory children are counted alphabetically, so the child +"first" in the alphabet will be considered the owner of the master copy). +If you want them all counted, use "--duplicates". +Sometimes when you use sloccount, each directory represents a different +project, in which case you might want to specify "--crossdups". +The program tries to reject files that are automatically generated +(e.g., a C file generated by bison), but you can disable this as well. +You can use "--addlang" to show makefiles and SQL files, which aren't +usually counted. +<p> +Possibly the most important option is "--cached". +Normally, when sloccount runs, it computes a lot of information and +stores this data in a "data directory" (by default, "~/.slocdata"). +The "--cached" option tells sloccount to use data previously computed, +greatly speeding up use once you've done the computation once. +The "--cached" option can't be used along with the options used to +select what files should be counted. +You can also select a different data directory by using the +"--datadir" option. +<p> +There are many options for controlling the output: +<pre> + --filecount Show counts of files instead of SLOC. + --details Present details: present one line per source code file. + --wide Show "wide" format. Ignored if "--details" selected + --multiproject Assume each directory is for a different project + (this modifies the effort estimation calculations) + --effort F E Change the effort estimation model, so that it uses + F as the factor and E as the exponent. + --schedule F E Change the schedule estimation model, so that it uses + F as the factor and E as the exponent. + --personcost P Change the average annual salary to P. + --overhead O Change the annual overhead to O. + -- End of options +</pre> +<p> +Basically, the first time you use sloccount, if you're measuring +a set of projects (not a single project) you might consider +using "--crossdups" instead of the defaults. +Then, you can redisplay data quickly by using "--cached", +combining it with options such as "--filecount". +If you want to send the data to another tool, use "--details". +<p> +If you're measuring a set of projects, you probably ought to pass +the option "--multiproject". +When "--multiproject" is used, efforts are computed for each component +separately and summed, and the time estimate used is the maximum +single estimated time. +<p> +The "--details" option dumps the available data in 4 columns, +tab-separated, where each line +represents a source code file in the data directory children identified. +The first column is the SLOC, the second column is the language type, +the third column is the name of the data directory child +(as it was given to get_sloc_details), +and the last column is the absolute pathname of the source code file. +You can then pipe this output to "sort" or some other tool for further +analysis (such as a spreadsheet or RDBMS). +<p> +You can change the parameters used to estimate effort using "--effort". +For example, if you believe that in the environment being used +you can produce 2 KSLOC/month scaling linearly, then +that means that the factor for effort you should use is 1/2 = 0.5 month/KSLOC, +and the exponent for effort is 1 (linear). +Thus, you can use "--effort 0.5 1". +<p> +You can also set the annual salary and overheads used to compute +estimated development cost. +While "$" is shown, there's no reason you have to use dollars; +the unit of development cost is the same unit as the unit used for +"--personcost". + +<h1><a name="cocomo">More about COCOMO</a></h1> + +<p> +By default SLOCCount uses a very simple estimating model for effort and schedule: +the basic COCOMO model in the "organic" mode (modes are more fully discussed below). +This model estimates effort and schedule, including design, code, test, +and documentation time (both user/admin documentation and development documentation). +Basic COCOMO is a nice simple model, and it's used as the default because +it doesn't require any information about the code other than the SLOC count +already computed. +<p> +However, basic COCOMO's accuracy is limited for the same reason - +basic COCOMO doesn't take a number of important factors into account. +If you have the necessary information, you can improve the model's accuracy +by taking these factors into account. You can at least quickly determine +if the right "mode" is being used to improve accuracy. You can also +use the "Intermediate COCOMO" and "Detailed COCOMO" models that take more +factors into account, and are likely to produce more accurate estimates as +a result. Take these estimates as just that - estimates - they're not grand truths. +If you have the necessary information, +you can improve the model's accuracy by taking these factors into account, and +pass this additional information to sloccount using its +"--effort" and "--schedule" options (as discussed in +<a href="options">options</a>). +<p> +To use the COCOMO model, you first need to determine if your application's +mode, which can be "Organic", "embedded", or "semidetached". +Most software is "organic" (which is why it's the default). +Here are simple definitions of these modes: +<ul> +<li>Organic: Relatively small software teams develop software in a highly +familiar, in-house environment. It has a generally stable development +environment, minimal need for innovative algorithms, and requirements can +be relaxed to avoid extensive rework.</li> +<li>Semidetached: This is an intermediate +step between organic and embedded. This is generally characterized by reduced +flexibility in the requirements.</li> +<li>Embedded: The project must operate +within tight (hard-to-meet) constraints, and requirements +and interface specifications are often non-negotiable. +The software will be embedded in a complex environment that the +software must deal with as-is.</li> +</ul> +By default, SLOCCount uses the basic COCOMO model in the organic mode. +For the basic COCOMO model, here are the critical factors for --effort and --schedule:<br> +<ul> +<li>Organic: effort factor = 2.4, exponent = 1.05; schedule factor = 2.5, exponent = 0.38</li> +<li>Semidetached: effort factor = 3.0, exponent = 1.12; schedule factor = 2.5, exponent = 0.35</li> +<li>Embedded: effort factor = 3.6, exponent = 1.20; schedule factor = 2.5, exponent = 0.32</li> +</ul> +Thus, if you want to use SLOCCount but the project is actually semidetached, +you can use the options "--effort 3.0 1.12 --schedule 2.5 0.35" +to get a more accurate estimate. +<br> +For more accurate estimates, you can use the intermediate COCOMO models. +For intermediate COCOMO, use the following figures:<br> +<ul> + <li>Organic: effort base factor = 2.3, exponent = 1.05; schedule factor = 2.5, exponent = 0.38</li> + <li>Semidetached: effort base factor = 3.0, exponent = 1.12; schedule factor = 2.5, exponent = 0.35</li> + <li>Embedded: effort base factor = 2.8, exponent = 1.20; schedule factor = 2.5, exponent = 0.32</li> +</ul> +The intermediate COCOMO values for schedule are exactly the same as the basic +COCOMO model; the starting effort values are not quite the same, as noted +in Boehm's book. However, in the intermediate COCOMO model, you don't +normally use the effort factors as-is, you use various corrective factors +(called cost drivers). To use these corrections, you consider +all the cost drivers, determine what best describes them, +and multiply their corrective values by the effort base factor. +The result is the final effort factor. +Here are the cost drivers (from Boehm's book, table 8-2 and 8-3): + +<table cellpadding="2" cellspacing="2" border="1" width="100%"> + <tbody> + <tr> + <th rowspan="1" colspan="2">Cost Drivers + </th> + <th rowspan="1" colspan="6">Ratings + </th> + </tr> + <tr> + <th>ID + </th> + <th>Driver Name + </th> + <th>Very Low + </th> + <th>Low + </th> + <th>Nominal + </th> + <th>High + </th> + <th>Very High + </th> + <th>Extra High + </th> + </tr> + <tr> + <td>RELY + </td> + <td>Required software reliability + </td> + <td>0.75 (effect is slight inconvenience) + </td> + <td>0.88 (easily recovered losses) + </td> + <td>1.00 (recoverable losses) + </td> + <td>1.15 (high financial loss) + </td> + <td>1.40 (risk to human life) + </td> + <td> + </td> + </tr> + <tr> + <td>DATA + </td> + <td>Database size + </td> + <td> + </td> + <td>0.94 (database bytes/SLOC < 10) + </td> + <td>1.00 (D/S between 10 and 100) + </td> + <td>1.08 (D/S between 100 and 1000) + </td> + <td>1.16 (D/S > 1000) + </td> + <td> + </td> + </tr> + <tr> + <td>CPLX + </td> + <td>Product complexity + </td> + <td>0.70 (mostly straightline code, simple arrays, simple expressions) + </td> + <td>0.85 + </td> + <td>1.00 + </td> + <td>1.15 + </td> + <td>1.30 + </td> + <td>1.65 (microcode, multiple resource scheduling, device timing dependent coding) + </td> + </tr> + <tr> + <td>TIME + </td> + <td>Execution time constraint + </td> + <td> + </td> + <td> + </td> + <td>1.00 (<50% use of available execution time) + </td> + <td>1.11 (70% use) + </td> + <td>1.30 (85% use) + </td> + <td>1.66 (95% use) + </td> + </tr> + <tr> + <td>STOR + </td> + <td>Main storage constraint + </td> + <td> + </td> + <td> + </td> + <td>1.00 (<50% use of available storage)</td> + <td>1.06 (70% use) + </td> + <td>1.21 (85% use) + </td> + <td>1.56 (95% use) + </td> + </tr> + <tr> + <td>VIRT + </td> + <td>Virtual machine (HW and OS) volatility + </td> + <td> + </td> + <td>0.87 (major change every 12 months, minor every month) + </td> + <td>1.00 (major change every 6 months, minor every 2 weeks)</td> + <td>1.15 (major change every 2 months, minor changes every week) + </td> + <td>1.30 (major changes every 2 weeks, minor changes every 2 days) + </td> + <td> + </td> + </tr> + <tr> + <td>TURN + </td> + <td>Computer turnaround time + </td> + <td> + </td> + <td>0.87 (interactive) + </td> + <td>1.00 (average turnaround < 4 hours) + </td> + <td>1.07 + </td> + <td>1.15 + </td> + <td> + </td> + </tr> + <tr> + <td>ACAP + </td> + <td>Analyst capability + </td> + <td>1.46 (15th percentile) + </td> + <td>1.19 (35th percentile) + </td> + <td>1.00 (55th percentile) + </td> + <td>0.86 (75th percentile) + </td> + <td>0.71 (90th percentile) + </td> + <td> + </td> + </tr> + <tr> + <td>AEXP + </td> + <td>Applications experience + </td> + <td>1.29 (<= 4 months experience) + </td> + <td>1.13 (1 year) + </td> + <td>1.00 (3 years) + </td> + <td>0.91 (6 years) + </td> + <td>0.82 (12 years) + </td> + <td> + </td> + </tr> + <tr> + <td>PCAP + </td> + <td>Programmer capability + </td> + <td>1.42 (15th percentile) + </td> + <td>1.17 (35th percentile) + </td> + <td>1.00 (55th percentile) + </td> + <td>0.86 (75th percentile) + </td> + <td>0.70 (90th percentile) + </td> + <td> + </td> + </tr> + <tr> + <td>VEXP + </td> + <td>Virtual machine experience + </td> + <td>1.21 (<= 1 month experience) + </td> + <td>1.10 (4 months) + </td> + <td>1.00 (1 year) + </td> + <td>0.90 (3 years) + </td> + <td> + </td> + <td> + </td> + </tr> + <tr> + <td>LEXP + </td> + <td>Programming language experience + </td> + <td>1.14 (<= 1 month experience) + </td> + <td>1.07 (4 months) + </td> + <td>1.00 (1 year) + </td> + <td>0.95 (3 years) + </td> + <td> + </td> + <td> + </td> + </tr> + <tr> + <td>MODP + </td> + <td>Use of "modern" programming practices (e.g. structured programming) + </td> + <td>1.24 (No use) + </td> + <td>1.10 + </td> + <td>1.00 (some use) + </td> + <td>0.91 + </td> + <td>0.82 (routine use) + </td> + <td> + </td> + </tr> + <tr> + <td>TOOL + </td> + <td>Use of software tools + </td> + <td>1.24 + </td> + <td>1.10 + </td> + <td>1.00 (basic tools) + </td> + <td>0.91 (test tools) + </td> + <td>0.83 (requirements, design, management, documentation tools) + </td> + <td> + </td> + </tr> + <tr> + <td>SCED + </td> + <td>Required development schedule + </td> + <td>1.23 (75% of nominal) + </td> + <td>1.08 (85% of nominal) + </td> + <td>1.00 (nominal) + </td> + <td>1.04 (130% of nominal) + </td> + <td>1.10 (160% of nominal) + </td> + <td> + </td> + </tr> + </tbody> +</table> +<br> +<br> +<br> +So, once all of the factors have been multiplied together, you can +then use the "--effort" flag to set more accurate factors and exponents. +Note that some factors will probably not be "nominal" simply because +times have changed since COCOMO was originally developed, so a few regions +that were desirable have become more common today. +For example, +for many software projects of today, virtual machine volatility tends to +be low, and the +use of "modern" programming practices (structured programming, +object-oriented programming, abstract data types, etc.) tends to be high. +COCOMO automatically handles these differences. +<p> +For example, imagine that you're examining a fairly simple application that +meets the "organic" requirements. Organic projects have a base factor +of 2.3 and exponents of 1.05, as noted above. +We then examine all the factors to determine a corrected base factor. +For this example, imagine +that we determine the values of these cost drivers are as follows:<br> +<br> +<table cellpadding="2" cellspacing="2" border="1" width="100%"> + + <tbody> + <tr> + <td rowspan="1" colspan="2">Cost Drivers<br> + </td> + <td rowspan="1" colspan="2">Ratings<br> + </td> + </tr> + <tr> + <td>ID<br> + </td> + <td>Driver Name<br> + </td> + <td>Rating<br> + </td> + <td>Multiplier<br> + </td> + </tr> + <tr> + <td>RELY<br> + </td> + <td>Required software reliability<br> + </td> + <td>Low - easily recovered losses<br> + </td> + <td>0.88<br> + </td> + </tr> + <tr> + <td>DATA<br> + </td> + <td>Database size<br> + </td> + <td>Low<br> + </td> + <td>0.94<br> + </td> + </tr> + <tr> + <td>CPLX<br> + </td> + <td>Product complexity<br> + </td> + <td>Nominal<br> + </td> + <td>1.00<br> + </td> + </tr> + <tr> + <td>TIME<br> + </td> + <td>Execution time constraint<br> + </td> + <td>Nominal<br> + </td> + <td>1.00<br> + </td> + </tr> + <tr> + <td>STOR<br> + </td> + <td>Main storage constraint<br> + </td> + <td>Nominal<br> + </td> + <td>1.00<br> + </td> + </tr> + <tr> + <td>VIRT<br> + </td> + <td>Virtual machine (HW and OS) volatility<br> + </td> + <td>Low (major change every 12 months, minor every month)<br> + </td> + <td>0.87<br> + </td> + </tr> + <tr> + <td>TURN<br> + </td> + <td>Computer turnaround time<br> + </td> + <td>Nominal<br> + </td> + <td>1.00<br> + </td> + </tr> + <tr> + <td>ACAP<br> + </td> + <td>Analyst capability<br> + </td> + <td>Nominal (55th percentile)<br> + </td> + <td>1.00<br> + </td> + </tr> + <tr> + <td>AEXP<br> + </td> + <td>Applications experience<br> + </td> + <td>Nominal (3 years)<br> + </td> + <td>1.00<br> + </td> + </tr> + <tr> + <td>PCAP<br> + </td> + <td>Programmer capability<br> + </td> + <td>Nominal (55th percentile)<br> + </td> + <td>1.00<br> + </td> + </tr> + <tr> + <td>VEXP<br> + </td> + <td>Virtual machine experience<br> + </td> + <td>High (3 years)<br> + </td> + <td>0.90<br> + </td> + </tr> + <tr> + <td>LEXP<br> + </td> + <td>Programming language experience<br> + </td> + <td>High (3 years)<br> + </td> + <td>0.95<br> + </td> + </tr> + <tr> + <td>MODP<br> + </td> + <td>Use of "modern" programming practices (e.g. structured programming)<br> + </td> + <td>High (Routine use)<br> + </td> + <td>0.82<br> + </td> + </tr> + <tr> + <td>TOOL<br> + </td> + <td>Use of software tools<br> + </td> + <td>Nominal (basic tools)<br> + </td> + <td>1.00<br> + </td> + </tr> + <tr> + <td>SCED<br> + </td> + <td>Required development schedule<br> + </td> + <td>Nominal<br> + </td> + <td>1.00<br> + </td> + </tr> + + + + + </tbody> +</table> +<p> +So, starting with the base factor (2.3 in this case), and then multiplying +the driver values, we'll compute a final factor of: +By multiplying these driver values together in this example, we compute:<br> +<pre>2.3*0.88*0.94*1*1*1*0.87*1.00*1*1*1*0.90*0.95*0.82*1*1</pre> +For this +example, the final factor for the effort calculation is 1.1605. You would then +invoke sloccount with "--effort 1.1605 1.05" to pass in the corrected factor +and exponent for the effort estimation. +You don't need to use "--schedule" to set the factors when you're using +organic model, because in SLOCCount +the default values are the values for the organic model. +You can set scheduling parameters manually +anyway by setting "--schedule 2.5 0.38". +You <i>do</i> need to use the --schedule option for +embedded and semidetached projects, because those modes have different +schedule parameters. The final command would be:<br> +<br> +sloccount --effort 1.1605 1.05 --schedule 2.5 0.38 my_project<br> +<p> +The detailed COCOMO model requires breaking information down further. +<p> +For more information about the original COCOMO model, including the detailed +COCOMO model, see the book +<i>Software Engineering Economics</i> by Barry Boehm. +<p> +You may be surprised by the high cost estimates, but remember, +these include design, coding, testing (including +integration and testing), documentation (both for users +and for programmers), and a wrap rate for corporate overhead +(to cover facilities, equipment, accounting, and so on). +Many programmers forget these other costs and are shocked by the high cost +estimates. +<p> +If you want to know a subset of this cost, you'll need to isolate +just those figures that you're trying to measure. +For example, let's say you want to find the money a programmer would receive +to do just the coding of the units of the program +(ignoring wrap rate, design, testing, integration, and so on). +According to Boehm's book (page 65, table 5-2), +the percentage varies by product size. +For effort, code and unit test takes 42% for small (2 KSLOC), 40% for +intermediate (8 KSLOC), 38% for medium (32 KSLOC), and 36% for large +(128 KSLOC). +Sadly, Boehm doesn't separate coding from unit test; perhaps +50% of the time is spent in unit test in traditional proprietary +development (including fixing bugs found from unit test). +If you want to know the income to the programmer (instead of cost to +the company), you'll also want to remove the wrap rate. +Thus, a programmer's income to <i>only</i> write the code for a +small program (circa 2 KSLOC) would be 8.75% (42% x 50% x (1/2.4)) +of the default figure computed by SLOCCount. +<p> +In other words, less than one-tenth of the cost as computed by SLOCCount +is what actually would be made by a programmer for a small program for +just the coding task. +Note that a proprietary commercial company that bid using +this lower figure would rapidly go out of business, since this figure +ignores the many other costs they have to incur to actually develop +working products. +Programs don't arrive out of thin air; someone needs to determine what +the requirements are, how to design it, and perform at least +some testing of it. +<p> +There's another later estimation model for effort and schedule +called "COCOMO II", but COCOMO II requires logical SLOC instead +of physical SLOC. +SLOCCount doesn't currently measure logical SLOC, so +SLOCCount doesn't currently use COCOMO II. +Contributions of code to compute logical SLOC and then optionally +use COCOMO II will be gratefully accepted. + +<h1><a name="specific-files">Counting Specific Files</a></h1> +<p> +If you want to count a specific subset, you can use the "--details" +option to list individual files, pipe this into "grep" to select the +files you're interested in, and pipe the result to +my tool "print_sum" (which reads lines beginning with numbers, and +returns the total of those numbers). +If you've already done the analysis, an example would be: +<pre> + sloccount --cached --details | grep "/some/subdirectory/" | print_sum +</pre> +<p> +If you just want to count specific files, and you know what language +they're in, you +can just invoke the basic SLOC counters directly. +By convention the simple counters are named "LANGUAGE_count", +and they take on the command line a list of the +source files to count. +Here are some examples: +<pre> + c_count *.c *.cpp *.h # Count C and C++ in current directory. + asm_count *.S # Count assembly. +</pre> +All the counters (*_count) program accept a "-f FILENAME" option, where FILENAME +is a file containing the names of all the source files to count +(one file per text line). If FILENAME is "-", the + list of file names is taken from the standard input. +The "c_count" program handles both C and C++ (but not objective-C; +for that use objc_count). +The available counters are +ada_count, +asm_count, +awk_count, +c_count, +csh_count, +exp_count, +fortran_count, +java_count, +lex_count, +lisp_count, +ml_count, +modula3_count, +objc_count, +pascal_count, +perl_count, +python_count, +sed_count, +sh_count, +sql_count, and +tcl_count. +<p> +There is also "generic_count", which takes as its first parameter +the ``comment string'', followed by a list of files. +The comment string begins a comment that ends at the end of the line. +Sometimes, if you have source for a language not listed, generic_count +will be sufficient. +<p> +The basic SLOC counters will send output to standard out, one line per file +(showing the SLOC count and filename). +The assembly counter shows some additional information about each file. +The basic SLOC counters always complete their output with a line +saying "Total:", followe by a line with the +total SLOC count. + +<h1><a name="errors">Countering Problems and Handling Errors</a></h1> + +If you're analyzing unfamiliar code, there's always the possibility +that it uses languages not processed by SLOCCount. +To counter this, after running SLOCCount, run the following program: +<pre> + count_unknown_ext +</pre> +This will look at the resulting data (in its default data directory +location, ~/.slocdata) and report a sorted list of the file extensions +for uncategorized ("unknown") files. +The list will show every file extension and how many files had that +extension, and is sorted by most common first. +It's not a problem if an "unknown" type isn't a source code file, but +if there are a significant number of source files in this category, +you'll need to change SLOCCount to get an accurate result. + +<p> +One error report that you may see is: +<pre> + c_count ERROR - terminated in string in (filename) +</pre> + +The cause of this is that c_count (the counter for C-like languages) +keeps track of whether or not it's in a string, and when the counter +reached the end of the file, it still thought it was in a string. + +<p> +Note that c_count really does have to keep track of whether or +not it's a string. +For example, this is three lines of code, not two, because the +``comment'' is actually in string data: + +<pre> + a = "hello + /* this is not a comment */ + bye"; +</pre> +<p> +Usually this error means you have code that won't compile +given certain #define settings. E.G., XFree86 has a line of code that's +actually wrong (it has a string that's not terminated), but people +don't notice because the #define to enable it is not usually set. +Legitimate code can trigger this message, but code that triggers +this message is horrendously formatted and is begging for problems. + +<p> +In either case, the best way to handle the situation +is to modify the source code (slightly) so that the code's intent is clear +(by making sure that double-quotes balance). +If it's your own code, you definitely should fix this anyway. +You need to look at the double-quote (") characters. One approach is to +just grep for double-quote, and look at every line for text that isn't +terminated, e.g., printf("hello %s, myname); + +<p> +SLOCcount reports warnings when an unusually +large number of duplicate files are reported. +A large number of duplicates <i>may</i> suggest that you're counting +two different versions of the same program as though they were +independently developed. +You may want to cd into the data directory (usually ~/.slocdata), cd into +the child directories corresponding to each component, and then look +at their dup_list.dat files, which list the filenames that appeared +to be duplicated (and what they duplicate with). + + +<h1><a name="adding">Adding Support for New Languages</a></h1> +SLOCcount handles many languages, but if it doesn't support one you need, +you'll need to give the language a standard (lowercase ASCII) name, +then modify SLOCcount to (1) detect and (2) count code in that language. + +<ol> +<li> +To detect a new language, you'll need to modify the program break_filelist. +If the filename extension is reliable, you can modify the array +%file_extensions, which maps various filename extensions into languages. +If your needs are more complex, you'll need to modify the code +(typically in functions get_file_type or file_type_from_contents) +so that the correct file type is determined. +For example, if a file with a given filename extension is only +<i>sometimes</i> that type, you'll need to write code to examine the +file contents. +<li> +You'll need to create a SLOC counter for that language type. +It must have the name XYZ_count, where XYZ is the standard name for the +language. +<p> +For some languages, you may be able to use the ``generic_count'' program +to implement your counter - generic_count takes as its first argument +the pattern which +identifies comment begins (which continue until the end of the line); +the other arguments are the files to count. +Thus, the LISP counter looks like this: +<pre> + #!/bin/sh + generic_count ';' $@ +</pre> +The generic_count program won't work correctly if there are multiline comments +(e.g., C) or multiline string constants. +If your language is identical to C/C++'s syntax in terms of +string constant definitions and commenting syntax +(using // or /* .. */), then you can use the c_count program - in this case, +modify compute_sloc_lang so that the c_count program is used. +<p> +Otherwise, you'll have to devise your own counting program. +The program must generate files with the same format, e.g., +for every filename passed as an argument, it needs to return separate lines, +where each line presents the SLOC +for that file, a space, and the filename. +(Note: the assembly language counter produces a slightly different format.) +After that, print "Total:" on its own line, and the actual SLOC total +on the following (last) line. +</ol> + +<h1><a name="advanced">Advanced SLOCCount Use</a></h1> +For most people, the previous information is enough. +However, if you're measuring a large set of programs, or have unusual needs, +those steps may not give you enough control. +In that case, you may need to create your own "data directory" +by hand and separately run the SLOCCount tools. +Basically, "sloccount" (note the lower case) is the name for +a high-level tool which invokes many other tools; this entire +suite is named SLOCCount (note the mixed case). +The next section will describe how to invoke the various tools "manually" +so you can gain explicit control over the measuring process when +the defaults are not to your liking, along with various suggestions +for how to handle truly huge sets of data. +<p> +Here's how to manually create a "data directory" to hold +intermediate results, and how to invoke each tool in sequence +(with discussion of options): +<ol> +<li>Set your PATH to include the SLOCCount "bin directory", as discussed above. +<li>Make an empty "data directory" +(where all intermediate results will be stored); +you can pick any name and location you like for this directory. +Here, I'll use the name "data": +<pre> + mkdir ~/data +</pre> +<li>Change your current directory to this "data directory": +<pre> + cd ~/data +</pre> +The rest of these instructions assume that your current directory +is the data directory. +You can set up many different data directories if you wish, to analyze +different source programs or analyze the programs in different ways; +just "cd" to the one you want to work with. +<li>(Optional) Some of the later steps will produce +a lot of output while they're running. +If you want to capture this information into a file, use the standard +"script" command do to so. +For example, "script run1" will save the output of everything you do into +file "run1" (until you type control-D to stop saving the information). +Don't forget that you're creating such a file, or it will become VERY large, +and in particular don't type any passwords into such a session. +You can store the script in the data directory, or create a subdirectory +for such results - any data directory subdirectory that doesn't have the +special file "filelist" is not a "data directory child" and is thus +ignored by the later SLOCCount analysis routines. +<li>Now initialize the "data directory". + In particular, initialization will create the "data directory children", + a set of subdirectories equivalent to the source code directory's + top directories. Each of these data directory children (subdirectories) + will contain a file named "filelist", which + lists all filenames in the corresponding source code directory. + These data directory children + will also eventually contain intermediate results + of analysis, which you can check for validity + (also, having a cache of these values speeds later analysis steps). + <p> + You use the "make_filelists" command to initialize a data directory. + For example, if your source code is in /usr/src/redhat/BUILD, run: +<pre> + make_filelists /usr/src/redhat/BUILD/* +</pre> +<p> + Internally, make_filelists uses "find" to create the list of files, and + by default it ignores all symbolic links. However, you may need to + follow symbolic links; if you do, give make_filelists the + "--follow" option (which will use find's "-follow" option). + Here are make_filelists' options: +<pre> + --follow Follow symbolic links + --datadir D Use this data directory + --skip S Skip basenames named S + --prefix P When creating children, prepend P to their name. + -- No more options +</pre> +<p> + Although you don't normally need to do so, if you want certain files to + not be counted at all in your analysis, you can remove + data directory children or edit the "filelist" files to do so. + There's no need to remove files which aren't source code files normally; + this is handled automatically by the next step. +<p> + If you don't have a single source code directory where the subdirectories + represent the major components you want to count separately, you can + still use the tool but it's more work. + One solution is to create a "shadow" directory with the structure + you wish the program had, using symbolic links (you must use "--follow" + for this to work). + You can also just invoke make_filelists multiple times, with parameters + listing the various top-level directories you wish to include. + Note that the basenames of the directories must be unique. +<p> + If there are so many directories (e.g., a massive number of projects) + that the command line is too long, + you can run make_filelists multiple times in the same + directory with different arguments to create them. + You may find "find" and/or "xargs" helpful in doing this automatically. + For example, here's how to do the same thing using "find": +<pre> + find /usr/src/redhat/BUILD -maxdepth 1 -mindepth 1 -type d \ + -exec make_filelists {} \; +</pre> +<li>Categorize each file. +This means that we must determine which +files contain source code (eliminating auto-generated and duplicate files), +and of those files which language each file contains. +The result will be a set of files in each subdirectory of the data directory, +where each file represents a category (e.g., a language). +<pre> + break_filelist * +</pre> + At this point you might want to examine the data directory subdirectories + to ensure that "break_filelist" has correctly determined the types of + the various files. + In particular, the "unknown" category may have source files in a language + SLOCCount doesn't know about. + If the heuristics got some categorization wrong, you can modify the + break_filelist program and re-run break_filelist. +<p> + By default break_filelist removes duplicates, doesn't count + automatically generated files as normal source code files, and + only gives some feedback. You can change these defaults with the + following options: +<pre> + --duplicates Count all duplicate files as normal files + --crossdups Count duplicate files if they're in different data directory + children (i.e., in different "filelists") + --autogen Count automatically generated files + --verbose Present more verbose status information while processing. +</pre> +<p> + Duplicate control in particular is an issue; you probably don't want + duplicates counted, so that's the default. + Duplicate files are detected by determining if their MD5 checksums + are identical; the "first" duplicate encountered is the only one kept. + Normally, since shells sort directory names, this means that the + file in the alphabetically first child directory is the one counted. + You can change this around by listing directories in the sort order you + wish followed by "*"; if the same data directory child + is requested for analysis more + than once in a given execution, it's skipped after the first time. + So, if you want any duplicate files with child directory "glibc" to + count as part of "glibc", then you should provide the data directory children + list as "glibc *". +<p> + Beware of choosing something other than "*" as the parameter here, + unless you use the "--duplicates" or "--crossdups" options. + The "*" represents the list of data directory children to examine. + Since break_filelist skips duplicate files identified + in a particular run, if you run break_filelist + on only certain children, some duplicate files won't be detected. + If you're allowing duplicates (via "--duplicates" or + "--crossdups"), then this isn't a problem. + Or, you can use the ``--duplistfile'' option to store and retrieve + hashes of files, so that additional files can be handled. +<p> + If there are so many directories that the command line is too long, + you can run break_filelist multiple times and give it + a subset of the directories each time. + You'll need to use one of the duplicate control options to do this. + I would suggest using "--crossdups", which + means that duplicates inside a child will only be counted once, + eliminating at least some of the problems of duplicates. + Here's the equivalent of "break_filelist *" when there are a large + number of subdirectories: +<pre> + find . -maxdepth 1 -mindepth 1 -type d -exec break_filelist --crossdups {} \; +</pre> + Indeed, for all of the later commands where "*" is listed as the parameter + in these instructions + (for the list of data directory children), just run the above "find" + command and replace "break_filelist --crossdups" with the command shown. +<li>(Optional) +If you're not very familiar with the program you're analyzing, you +might not be sure that "break_filelist" has correctly identified +all of the files. +In particular, the system might be using an unexpected +programming language or extension not handled by SLOCCount. +If this is your circumstance, you can just run the command: +<pre> + count_unknown_ext +</pre> +(note that this command is unusual - it doesn't take any arguments, +since it's hard to imagine a case where you wouldn't want every +directory examined). +Unlike the other commands discussed, this one specifically looks at +${HOME}/.slocdata. +This command presents a list of extensions which are unknown to break_filelist, +with the most common ones listed first. +The output format is a name, followed by the number of instances; +the name begins with a "." if it's an extension, or, if there's no +extension, it begins with "/" followed by the base name of the file. +break_filelist already knows about common extensions such as ".gif" and ".png", +as well as common filenames like "README". +You can also view the contents of each of the data directory children's +files to see if break_filelist has correctly categorized the files. +<li>Now compute SLOC and filecounts for each language; you can compute for all + languages at once by calling: +<pre> + compute_all * +</pre> +If you only want to compute SLOC for a specific language, +you can invoke compute_sloc_lang, which takes as its first parameter +the SLOCCount name of the language ("ansic" for C, "cpp" for C++, +"ada" for Ada, "asm" for assembly), followed by the list +of data directory children. +Note that these names are a change from version 1.0, which +called the master program "compute_all", +and had "compute_*" programs for each language. +<p> +Notice the "*"; you can replace the "*" with just the list of +data directory children (subdirectories) to compute, if you wish. +Indeed, you'll notice that nearly all of the following commands take a +list of data directory children as arguments; when you want all of them, use +"*" (as shown in these instructions), otherwise, list the ones you want. +<p> +When you run compute_all or compute_sloc_lang, each data directory +child (subdirectory) +is consulted in turn for a list of the relevant files, and the +SLOC results are placed in that data directory child. +In each child, +the file "LANGUAGE-outfile.dat" lists the information from the +basic SLOC counters. +That is, the oufile lists the SLOC and filename +(the assembly outfile has additional information), and ends with +a line saying "Total:" followed by a line showing the total SLOC of +that language in that data directory child. +The file "all-physical.sloc" has the final total SLOC for every language +in that child directory (i.e., it's the last line of the outfile). +<li>(Optional) If you want, you can also use USC's CodeCount. +I've had trouble with these programs, so I don't do this normally. +However, you're welcome to try - they support logical SLOC measures +as well as physical ones (though not for most of the languages +supported by SLOCCount). +Sadly, they don't seem to compile in gcc without a lot of help, they +used fixed-width buffers that make me nervous, and I found a +number of bugs (e.g., it couldn't handle "/* text1 *//* text2 */" in +C code, a format that's legal and used often in the Linux kernel). +If you want to do this, +modify the files compute_c_usc and compute_java_usc so they point to the +right directories, and type: +<pre> + compute_c_usc * +</pre> +<li>Now you can analyze the results. The main tool for +presenting SLOCCount results is "get_sloc", e.g,: +<pre> + get_sloc * | less +</pre> +The get_sloc program takes many options, including: +<pre> + --filecount Display number of files instead of SLOC (SLOC is the default) + --wide Use "wide" format instead (tab-separated columns) + --nobreak Don't insert breaks in long lines + --sort X Sort by "X", where "X" is the name of a language + ("ansic", "cpp", "fortran", etc.), or "total". + By default, get_sloc sorts by "total". + --nosort Don't sort - just present results in order of directory + listing given. + --showother Show non-language totals (e.g., # duplicate files). + --oneprogram When computing effort, assume that all files are part of + a single program. By default, each subdirectory specified + is assumed to be a separate, independently-developed program. + --noheader Don't show the header + --nofooter Don't show the footer (the per-language values and totals) +</pre> +<p> +Note that unlike the "sloccount" tool, get_sloc requires the current +directory to be the data directory. +<p> +If you're displaying SLOC, get_sloc will also estimate the time it +would take to develop the software using COCOMO (using its "basic" model). +By default, this figure assumes that each of the major subdirectories was +developed independently of the others; +you can use "--oneprogram" to make the assumption that all files are +part of the same program. +The COCOMO model makes many other assumptions; see the paper at +<a href="http://www.dwheeler.com/sloc">http://www.dwheeler.com/sloc</a> +for more information. +<p> +If you need to do more analysis, you might want to use the "--wide" +option and send the data to another tool such as a spreadsheet +(e.g., gnumeric) or RDBMS (e.g., PostgreSQL). +Using the "--wide" option creates tab-separated data, which is easier to +import. +You may also want to use the "--noheader" and/or "--nofooter" options to +simplify porting the data to another tool. +<p> +Note that in version 1.0, "get_sloc" was called "get_data". +<p> +If you have so many data directory children that you can't use "*" +on the command line, get_sloc won't be as helpful. +Feel free to patch get_sloc to add this capability (as another option), +or use get_sloc_detail (discussed next) to feed the data into another tool. +<li>(Optional) If you just can't get the information you need from get_sloc, +then you can get the raw results of everything and process the data +yourself. +I have a little tool to do this, called get_sloc_details. +You invoke it in a similar manner: +<pre> +get_sloc_details * +</pre> +</ol> + +<p> +<h1><a name="designer-notes">Designer's Notes</a></h1> +<p> +Here are some ``designer's notes'' on how SLOCCount works, +including what it can handle. +<p> +The program break_filelist +has categories for each programming language it knows about, +plus the special categories ``not'' (not a source code file), +``auto'' (an automatically-generated file and thus not to be counted), +``zero'' (a zero-length file), +``dup'' (a duplicate of another file as determined by an md5 checksum), +and +``unknown'' (a file which doesn't seem to be a source code file +nor any of these other categories). +It's a good idea to examine +the ``unknown'' items later, checking the common extensions +to ensure you have not missed any common types of code. +<p> +The program break_filelist uses lots of heuristics to correctly +categorize files. +Here are few notes about its heuristics: +<ol> +<li> +break_filelist first checks for well-known extensions (such as .gif) that +cannot be program files, and for a number of common generated filenames. +<li> +It then peeks at the first few lines for "#!" followed by a legal script +name. +Sometimes it looks further, for example, many Python programs +invoke "env" and then use it to invoke python. +<li> +If that doesn't work, it uses the extension to try to determine the category. +For a number of languages, the extension is not reliable, so for those +languages it examines the file contents and uses a set of heuristics +to determine if the file actually belongs to that category. +<li> +Detecting automatically generated files is not easy, and it's +quite conceivable that it won't detect some automatically generated files. +The first 15 lines are examined, to determine if any of them +include at the beginning of the line (after spaces and +possible comment markers) one of the following phrases (ignoring +upper and lower case distinctions): +``generated automatically'', +``automatically generated'', +``this is a generated file'', +``generated with the (something) utility'', +or ``do not edit''. +<li>A number of filename conventions are used, too. +For example, +any ``configure'' file is presumed to be automatically generated if +there's a ``configure.in'' file in the same directory. +<li> +To eliminate duplicates, +the program keeps md5 checksums of each program file. +Any given md5 checksum is only counted once. +Build directories are processed alphabetically, so +if the same file content is in both directories ``a'' and ``b'', +it will be counted only once as being part of ``a'' unless you make +other arrangements. +Thus, some data directory children with names later in the alphabet may appear +smaller than would make sense at first glance. +It is very difficult to eliminate ``almost identical'' files +(e.g., an older and newer version of the same code, included in two +separate packages), because +it is difficult to determine when two ``similar'' files are essentially +the same file. +Changes such as the use of pretty-printers and massive renaming of variables +could make small changes seem large, while the small files +might easily appear to be the ``same''. +Thus, files with different contents are simply considered different. +<li> +If all else fails, the file is placed in the ``unknown'' category for +later analysis. +</ol> +<p> +One complicating factor is that I wished to separate C, C++, and +Objective-C code, but a header file ending with +``.h'' or ``.hpp'' file could be any of these languages. +In theory, ``.hpp'' is only C++, but I found that in practice this isn't true. +I developed a number of heuristics to determine, for each file, +what language a given header belonged to. +For example, if a given directory has exactly one of these languages +(ignoring header files), +the header is assumed to belong to that category as well. +Similarly, if there is a body file (e.g., ".c") that has the same name +as the header file, then presumably the header file is of the same language. +Finally, a header file with the keyword ``class'' is almost certainly not a +C header file, but a C++ header file; otherwise it's assumed to +be a C file. +<p> +None of the SLOC counters fully parse the source code; they just examine +the code using simple text processing patterns to count the SLOC. +In practice, by handling a number of special cases this seems to be fine. +Here are some notes on some of the language counters; +the language name is followed by common extensions in parentheses +and the SLOCCount name of the language in brackets: +<ol> +<li>Ada (.ada, .ads, .adb) [ada]: Comments begin with "--". +<li>Assembly (.s, .S, .asm) [asm]: +Assembly languages vary greatly in the comment character they use, +so my counter had to handle this variance. +The assembly language counter (asm_count) +first examines the file to determine if +C-style ``/*'' comments and C preprocessor commands +(e.g., ``#include'') are used. +If both ``/*'' and ``*/'' are in the file, it's assumed that +C-style comments are being used +(since it is unlikely that <i>both</i> would be used +as something else, say as string data, in the same assembly language file). +Determining if a file used the C preprocessor was trickier, since +many assembly files do use ``#'' as a comment character and some +preprocessor directives are ordinary words that might be included +in a human comment. +The heuristic used is as follows: if #ifdef, #endif, or #include are used, the +C preprocessor is used; or if at least three lines have either #define or #else, +then the C preprocessor is used. +No doubt other heuristics are possible, but this at least seems to produce +reasonable results. +The program then determines what the comment character is by identifying +which punctuation mark (from a set of possible marks) +is the most common non-space initial character on a line +(ignoring ``/'' and ``#'' if C comments or preprocessor commands, +respectively, are used). +Once the comment character has been determined, and it's been determined +if C-style comments are allowed, the lines of code +are counted in the file. +<li>awk (.awk) [awk]: Comments begin with "#". +<li>C (.c) [ansic]: Both traditional C comments (/* .. */) and C++ +(//) comments are supported. +Technically, C doesn't support "//", but in practice many C programs use them. +The C counter understands multi-line strings, so +comment characters (/* .. */ and //) are treated as data inside strings. +Conversely, the counter knows that any double-quote characters inside a +comment does not begin a C/C++ string. +<li>C++ (.C, .cpp, .cxx, .cc) [cpp]: The same counter is used for +both C and C++. +Note that break_filelist does try to separate C from C++ for purposes +of accounting between them. +<li>C# (.cs): The same counter is used as for C and C++. +Note that there are no "header" filetypes in C#. +<li>C shell (.csh) [csh]: Comments begin with "#". +<li>COBOL (.cob, .cbl) [cobol]: SLOCCount +detects if a "freeform" command has been given; until such a command is +given, fixed format is assumed. +In fixed format, comments have a "*" or "/" in column 7 or column 1; +any line that's not a comment, and has a nonwhitespace character after column 7 +(the indicator area) is counted as a source line of code. +In a freeform style, any line beginning with optional whitespace and +then "*" or "/" is considered a comment; any noncomment line +with a nonwhitespace characeter is counted as SLOC. +<li>Expect (.exp) [exp]: Comments begin with "#". +<li>Fortran (.f) [fortran]: Comment-only lines are lines +where column 1 character = C, c, *, or !. +Note that this is really only a Fortran-77 SLOC counter. +<.li>Haskell (.hs) [haskell]: +This counter handles block comments {- .. -} and single line comments (--); +pragmas {-# .. -} are counted as SLOC. +This is a simplistic counter, +and can be fooled by certain unlikely combinations of block comments +and other syntax (line-ending comments or strings). +In particular, "Hello {-" will be incorrectly interpreted as a +comment block begin, and "{- -- -}" will be incorrectly interpreted as a +comment block begin without an end. +Note that .lhs (literate Haskell) is <i>not</i> supported; please +preprocess .lhs files into .hs files before counting. +See the +<a href="http://www.haskell.org/onlinereport/literate.html">Haskell 98 +report section on literate Haskell</a> for more information. +<li>Java (.java) [java]: Java is counted using the same counter as C and C++. +<li>lex (.l) [lex]: Uses traditional C /* .. */ comments. +Note that this does not use the counter as C/C++ internally, since +it's quite legal in lex to have "//" (where it is NOT a comment). +<li>LISP (.el, .scm, .lsp, .jl) [lisp]: Comments begin with ";". +<li>ML (.ml, .mli) [ml]: Comments are enclosed in (* .. *). +<li>Modula3 (.m3, .i3) [modula3]: Comments are enclosed in (* .. *). +<li>Objective-C (.m) [objc]: Comments are old C-style /* .. */ comments. +<li>Pascal (.p, .pas) [pascal]: Comments are enclosed in curly braces {} +or (*..*). This counter has known weaknesses; see the BUGS section of +the manual page for more information. +<li>Perl (.pl, .pm, .perl) [perl]: +Comments begin with "#". +Perl permits in-line ``perlpod'' documents, ``here'' documents, and an +__END__ marker that complicate code-counting. +Perlpod documents are essentially comments, but a ``here'' document +may include text to generate them (in which case the perlpod document +is data and should be counted). +The __END__ marker indicates the end of the file from Perl's +viewpoint, even if there's more text afterwards. +<li>PHP (.php, .php[3456], .inc) [php]: +Code is counted as PHP code if it has a .php file extension; +it's also counted if it has an .inc extension and looks like PHP code. +SLOCCount does <b>not</b> count PHP code embedded in HTML files normally, +though its lower-level routines can do so if you want to +(use php_count to do this). +Any of the various ways to begin PHP code can be used +(<? .. ?>, +<?php .. ?>, +<script language="php"> .. </script>, +or even <% .. %>). +Any of the PHP comment formats (C, C++, and shell) can be used, and +any string constant formats ("here document", double quote, and single +quote) can be used as well. +<li>Python (.py) [python]: +Comments begin with "#". +Python has a convention that, at the beginning of a definition +(e.g., of a function, method, or class), an unassigned string can be +placed to describe what's being defined. Since this is essentially +a comment (though it doesn't syntactically look like one), the counter +avoids counting such strings, which may have multiple lines. +To handle this, +strings which started the beginning of a line were not counted. +Python also has the ``triple quote'' operator, permitting multiline +strings; these needed to be handled specially. +Triple quote stirngs are normally considered as data, regardless of +content, unless they were used as a comment about a definition. +<li>Ruby (.rb) [ruby]: Comments begin with "#". +<li>sed (.sed) [sed]: Comments begin with "#". +Note that these are "sed-only" files; many uses of sed are embeded in +shell scripts (and are categorized as shell scripts in those cases). +<li>shell (.sh) [sh]: Comments begin with "#". +Note that I classify ksh, bash, and the original Bourne shell sh together, +because they have very similar syntaxes. +For example, in all of these shells, +setting a variable is expressed as "varname=value", +while C shells use the use "set varname=value". +<li>TCL (.tcl, .tk, .itk) [tcl]: Comments begin with "#". +<li>Yacc (.y) [yacc]: Yacc is counted using the same counter as C and C++. +</ol> +<p> +Much of the code is written in Perl, since it's primarily a text processing +problem and Perl is good at that. +Many short scripts are Bourne shell scripts (it's good at +short scripts for calling other programs), and the +basic C/C++ SLOC counter is written in C for speed. +<p> +I originally named it "SLOC-Count", but I found that some web search +engines (notably Google) treated that as two words. +By naming it "SLOCCount", it's easier to find by those who know +the name of the program. +<p> +SLOCCount only counts physical SLOC, not logical SLOC. +Logical SLOC counting requires much more code to implement, +and I needed to cover a large number of programming languages. + + +<p> +<h1><a name="sloc-definition">Definition of SLOC</a></h1> +<p> +This tool measures ``physical SLOC.'' +Physical SLOC is defined as follows: +``a physical source line of code (SLOC) is a line ending +in a newline or end-of-file marker, +and which contains at least one non-whitespace non-comment character.'' +Comment delimiters (characters other than newlines starting and ending +a comment) are considered comment characters. +Data lines only including whitespace +(e.g., lines with only tabs and spaces in multiline strings) are not included. +<p> +To make this concrete, here's an example of a simple C program +(it strips ANSI C comments out). +On the left side is the running SLOC total, where "-" indicates a line +that is not considered a physical "source line of code": +<pre> + 1 #include <stdio.h> + - + - /* peek at the next character in stdin, but don't get it */ + 2 int peek() { + 3 int c = getchar(); + 4 ungetc(c, stdin); + 5 return c; + 6 } + - + 7 main() { + 8 int c; + 9 int incomment = 0; /* 1 = we are inside a comment */ + - +10 while ( (c = getchar()) != EOF) { +11 if (!incomment) { +12 if ((c == '/') && (peek() == '*')) {incomment=1;} +13 } else { +14 if ((c == '*') && (peek() == '/')) { +15 c= getchar(); c=getchar(); incomment=0; +16 } +17 } +18 if ((c != EOF) && !incomment) {putchar(c);} +19 } +20 } +</pre> +<p> +<a href="http://www.sei.cmu.edu/publications/documents/92.reports/92.tr.020.html">Robert E. Park et al.'s +<i>Software Size Measurement: +A Framework for Counting Source Statements</i></a> +(Technical Report CMU/SEI-92-TR-20) +presents a set of issues to be decided when trying to count code. +The paper's abstract states: +<blockquote><i> +This report presents guidelines for defining, recording, and reporting +two frequently used measures of software sizeÑ physical source lines +and logical source statements. +We propose a general framework for constructing size +definitions and use it to derive operational methods for +reducing misunderstandings in measurement results. +</i></blockquote> +<p> +Using Park's framework, here is how physical lines of code are counted: +<ol> +<li>Statement Type: I used a physical line-of-code as my basis. +I included executable statements, declarations +(e.g., data structure definitions), and compiler directives +(e.g., preprocessor commands such as #define). +I excluded all comments and blank lines. +<li>How Produced: +I included all programmed code, including any files that had been modified. +I excluded code generated with source code generators, converted with +automatic translators, and those copied or reused without change. +If a file was in the source package, I included it; if the file had +been removed from a source package (including via a patch), I did +not include it. +<li>Origin: You select the files (and thus their origin). +<li>Usage: You selects the files (and thus their usage), e.g., +you decide if you're going to +include additional applications able to run on the system but not +included with the system. +<li>Delivery: You'll decide what code to include, but of course, +if you don't have the code you can't count it. +<li>Functionality: This tool will include both operative and inoperative code +if they're mixed together. +An example of intentionally ``inoperative'' code is +code turned off by #ifdef commands; since it could be +turned on for special purposes, it made sense to count it. +An example of unintentionally ``inoperative'' code is dead or unused code. +<li>Replications: +Normally, duplicate files are ignored, unless you use +the "--duplicates" or "--crossdups" option. +The tool will count +``physical replicates of master statements stored in +the master code''. +This is simply code cut and pasted from one place to another to reuse code; +it's hard to tell where this happens, and since it has to be maintained +separately, it's fair to include this in the measure. +I excluded copies inserted, instantiated, or expanded when compiling +or linking, and I excluded postproduction replicates +(e.g., reparameterized systems). +<li>Development Status: You'll decide what code +should be included (and thus the development status of the code that +you'll accept). +<li>Languages: You can see the language list above. +<li>Clarifications: I included all statement types. +This included nulls, continues, no-ops, lone semicolons, +statements that instantiate generics, +lone curly braces ({ and }), and labels by themselves. +</ol> +<p> +Thus, SLOCCount generally follows Park's ``basic definition'', +but with the following exceptions depending on how you use it: +<ol> +<li>How Produced: +By default, this tool excludes duplicate files and +code generated with source code generators. +After all, the COCOMO model states that the +only code that should be counted is code +``produced by project personnel'', whereas these kinds of files are +instead the output of ``preprocessors and compilers.'' +If code is always maintained as the input to a code generator, and then +the code generator is re-run, it's only the code generator input's size that +validly measures the size of what is maintained. +Note that while I attempted to exclude generated code, this exclusion +is based on heuristics which may have missed some cases. +If you want to count duplicates, use the +"--autogen", "--duplicates", and/or "--crossdups" options. +If you want to count automatically generated files, pass +the "--autogen" option mentioned above. +<li>Origin: +You can choose what source code you'll measure. +Normally physical SLOC doesn't include an unmodified +``vendor-supplied language support library'' nor a +``vendor-supplied system or utility''. +However, if this is what you are measuring, then you need to include it. +If you include such code, your set will be different +than the usual ``basic definition.'' +<li>Functionality: I included counts of unintentionally inoperative code +(e.g., dead or unused code). +It is very difficult to automatically detect such code +in general for many languages. +For example, a program not directly invoked by anything else nor +installed by the installer is much more likely to be a test program, +which you may want to include in the count (you often would include it +if you're estimating effort). +Clearly, discerning human ``intent'' is hard to automate. +</ol> +<p> +Otherwise, this counter follows Park's +``basic definition'' of a physical line of code, even down to Park's +language-specific definitions where Park defined them for a language. + + +<p> +<h1><a name="miscellaneous">Miscellaneous Notes</a></h1> +<p> +There are other undocumented analysis tools in the original tar file. +Most of them are specialized scripts for my circumstances, but feel +free to use them as you wish. +<p> +If you're packaging this program, don't just copy every executable +into the system "bin" directory - many of the files are those +specialized scripts. +Just put in the bin directory every executable documented here, plus the +the files they depend on (there aren't that many). +See the RPM specification file to see what's actually installed. +<p> +You have to take any measure of SLOC (including this one) with a +large grain of salt. +Physical SLOC is sensitive to the format of source code. +There's a correlation between SLOC and development effort, and some +correlation between SLOC and functionality, +but there's absolutely no correlation between SLOC +and either "quality" or "value". +<p> +A problem of physical SLOC is that it's sensitive to formatting, +and that's a legitimate (and known) problem with the measure. +However, to be fair, logical SLOC is influenced by coding style too. +For example, the following two phrases are semantically identical, +but will have different logical SLOC values: +<pre> + int i, j; /* 1 logical SLOC */ + + int i; /* 2 logical SLOC, but it does the same thing */ + int j; +</pre> +<p> +If you discover other information that can be divided up by +data directory children (e.g., the license used), it's probably best +to add that to each subdirectory (e.g., as a "license" file in the +subdirectory). +Then you can modify tools like get_sloc +to add them to their display. +<p> +I developed SLOCCount for my own use, not originally as +a community tool, so it's certainly not beautiful code. +However, I think it's serviceable - I hope you find it useful. +Please send me patches for any improvements you make! +<p> +You can't use this tool as-is with some estimation models, such as COCOMO II, +because this tool doesn't compute logical SLOC. +I certainly would accept code contributions to add the ability to +measure logical SLOC (or related measures such as +Cyclomatic Complexity and Cyclomatic density); +selecting them could be a compile-time option. +However, measuring logical SLOC takes more development effort, so I +haven't done so; see USC's "CodeCount" for a set of code that +measures logical SLOC for some languages +(though I've had trouble with CodeCount - in particular, its C counter +doesn't correctly handle large programs like the Linux kernel). + + +<p> +<h1><a name="license">SLOCCount License</a></h1> +<p> +Here is the SLOCCount License; the file COPYING contains the standard +GPL version 2 license: +<pre> +===================================================================== +SLOCCount +Copyright (C) 2000-2001 David A. Wheeler (dwheeler, at, dwheeler.com) + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +===================================================================== +</pre> +<p> +While it's not formally required by the license, please give credit +to me and this software in any report that uses results generated by it. +<p> +This document was written by David A. Wheeler (dwheeler, at, dwheeler.com), +and is +(C) Copyright 2001 David A. Wheeler. +This document is covered by the license (GPL) listed above. +<p> +The license <i>does</i> give you the right to +use SLOCCount to analyze proprietary programs. + +<p> +<h1><a name="related-tools">Related Tools</a></h1> +<p> +One available toolset is +<a href="http://sunset.usc.edu/research/CODECOUNT">CodeCount</a>. +I tried using this toolset, but I eventually gave up. +It had too many problems handling the code I was trying to analyze, and it +does a poor job automatically categorizing code. +It also has no support for many of today's languages (such as Python, +Perl, Ruby, PHP, and so on). +However, it does a lot of analysis and measurements that SLOCCount +doesn't do, so it all depends on your need. +Its license appeared to be open source, but it's quite unusual and +I'm not enough of a lawyer to be able to confirm that. +<p> +Another tool that's available is <a href="http://csdl.ics.hawaii.edu/Research/LOCC/LOCC.html">LOCC</a>. +It's available under the GPL. +It can count Java code, and there's experimental support for C++. +LOCC is really intended for more deeply analyzing each Java file; +what's particularly interesting about it is that it can measure +"diffs" (how much has changed). +See +<a href="http://csdl.ics.hawaii.edu/Publications/MasterList.html#csdl2-00-10"> +A comparative review of LOCC and CodeCount</a>. +<p> +<a href="http://sourceforge.net/projects/cccc"> +CCCC</a> is a tool which analyzes C++ and Java files +and generates a report on various metrics of the code. +Metrics supported include lines of code, McCabe's complexity, +and metrics proposed by Chidamber & Kemerer and Henry & Kafura. +(You can see +<a href="http://cccc.sourceforge.net/">Time Littlefair's comments</a>). +CCCC is in the public domain. +It reports on metrics that sloccount doesn't, but sloccount can handle +far more computer languages. + +<p> +<h1><a name="submitting-changes">Submitting Changes</a></h1> +<p> +The GPL license doesn't require you to submit changes you make back to +its maintainer (currently me), +but it's highly recommended and wise to do so. +Because others <i>will</i> send changes to me, a version you make on your +own will slowly because obsolete and incompatible. +Rather than allowing this to happen, it's better to send changes in to me +so that the latest version of SLOCCount also has the +features you're looking for. +If you're submitting support for new languages, be sure that your +chnage correctly ignores files that aren't in that new language +(some filename extensions have multiple meanings). +You might want to look at the <a href="TODO">TODO</a> file first. +<p> +When you send changes to me, send them as "diff" results so that I can +use the "patch" program to install them. +If you can, please send ``unified diffs'' -- GNU's diff can create these +using the "-u" option. +</body> + diff --git a/sloccount.spec b/sloccount.spec new file mode 100644 index 0000000..62dd7b4 --- /dev/null +++ b/sloccount.spec @@ -0,0 +1,56 @@ +# +# RPM spec file for "sloccount". +# +%define PKG_VERSION 2.26 + +Name: sloccount +Summary: Measures source lines of code (SLOC) in programs +Version: %{PKG_VERSION} +Release: 1 +Copyright: GPL +Group: Development/Tools +Source: http://www.dwheeler.com/sloccount/sloccount-%{PKG_VERSION}.tar.gz +URL: http://www.dwheeler.com/sloccount +Vendor: David A. Wheeler +Packager: David A. Wheeler <dwheeler@dwheeler.com> +Prefix: /usr +BuildRoot: /var/tmp/%name-buildroot + +%description +SLOCCount (pronounced "sloc-count") is a suite of programs for counting +physical source lines of code (SLOC) in potentially large software systems +(thus, SLOCCount is a "software metrics tool" or "software measurement tool"). +SLOCCount can count physical SLOC for a wide number of languages; +listed alphabetically, they are: Ada, Assembly, awk, Bourne shell, C, C++, +C shell, COBOL, Expect, Fortran, Java, lex/flex, LISP (including Scheme), +Modula-3, Objective-C, Pascal, Perl, PHP, Python, sed, TCL, and Yacc. +SLOCCount can automatically determine if a file +is a source code file or not, and if so, which language it's written in. +As a result, you can analyze large systems completely automatically; +it's been used to examine entire GNU/Linux distributions, for example. +SLOCCount also includes some report-generating tools +to collect the data generated and present it in several different formats. +Normally you can just run "sloccount DIRECTORY" and all the source code +in the directory and its descendants will be counted. + +%prep +%setup + +%build +make + +%install +rm -rf ${RPM_BUILD_ROOT} +mkdir -p ${RPM_BUILD_ROOT}%{_bindir} +mkdir -p ${RPM_BUILD_ROOT}%{_mandir}/man1 +make install_programs PREFIX=${RPM_BUILD_ROOT}%{_prefix} +make install_man PREFIX=${RPM_BUILD_ROOT}%{_prefix} + +%clean +rm -rf ${RPM_BUILD_ROOT} + +%files +%defattr(-, root, root) +%doc sloccount.html README ChangeLog COPYING TODO +%{_bindir}/* +%{_mandir}/*/* diff --git a/sql_count b/sql_count new file mode 100755 index 0000000..8240fd9 --- /dev/null +++ b/sql_count @@ -0,0 +1,76 @@ +#!/usr/bin/perl +# sql_count - count physical lines of code in SQL. + +# SQL is really screwed up in its commenting system. +# In ANSI, "--" means start of comment, but this causes many problems +# with automatically generated SQL queries. For example, given: +# UPDATE tbl_name SET credit=credit-!payment! +# If !payment! is automatically substituted for a negative number, +# a comment is unexpectedly generated. + +# So, this program accepts "-- " (dash-dash-space) as a comment character. +# It also supports "#" and /* .. */, which are supported by MySQL. + +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# + +$total_sloc = 0; + +# Do we have "-f" (read list of files from second argument)? +if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) { + # Yes, we have -f + if ($ARGV[1] eq "-") { + # The list of files is in STDIN + while (<STDIN>) { + chomp ($_); + &count_file ($_); + } + } else { + # The list of files is in the file $ARGV[1] + open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $filewithlist\n"; + while (<FILEWITHLIST>) { + chomp ($_); + &count_file ($_); + } + close FILEWITHLIST; + } + shift @ARGV; shift @ARGV; +} +# Process all (remaining) arguments as file names +while ($file = shift @ARGV) { + &count_file ($file); +} + +print "Total:\n"; +print "$total_sloc\n"; + +sub count_file { + my ($file) = @_; + my $sloc = 0; + + $result = `sed -e "s/#.*//" -e "s/-- .*//" < "$file" | c_count`; + $result =~ m/^\s*([0-9]+)/; + $sloc = $1; + print "$sloc $file\n"; + $total_sloc += $sloc; +} diff --git a/stripccomments.c b/stripccomments.c new file mode 100644 index 0000000..187659c --- /dev/null +++ b/stripccomments.c @@ -0,0 +1,50 @@ +/* +stripcomments - a simple program to remove C comments. + +This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC). +Copyright (C) 2001-2004 David A. Wheeler. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +To contact David A. Wheeler, see his website at: + http://www.dwheeler.com. + + +*/ + +#include <stdio.h> + +int peek() { + int c = getchar(); + ungetc(c, stdin); + return c; +} + +main() { + int c; + int incomment = 0; + + while ( (c = getchar()) != EOF) { + if (!incomment) { + if ((c == '/') && (peek() == '*')) {incomment=1;} + } else { + if ((c == '*') && (peek() == '/')) { + c= getchar(); c=getchar(); incomment=0; + } + } + if ((c != EOF) && !incomment) {putchar(c);} + } +} + @@ -0,0 +1,22 @@ + +This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC). +Copyright (C) 2001-2004 David A. Wheeler. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +To contact David A. Wheeler, see his website at: + http://www.dwheeler.com. + + @@ -0,0 +1,23 @@ +# +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# diff --git a/table.html b/table.html new file mode 100644 index 0000000..81474a3 --- /dev/null +++ b/table.html @@ -0,0 +1,569 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"><html><head><title>table</title><meta http-equiv="content-type" content="text/html; charset=ISO-8859-1"></head><body><br> +By default SLOCCount uses a very simple estimating model for effort and schedule: +the basic COCOMO model in the "organic" mode (see below for more about this). + This model estimates effort and schedule, including design, code, test, +and documentation time (both user/admin documentation and development documentation). +Basic COCOMO is a nice simple model, and it's used as the default because +it doesn't require any information about the code other than the SLOC count +already computed.<br> +<br> +However, basic COCOMO's accuracy is limited in part for the same reason - +basic COCOMO doesn't take a number of important factors into account.<br> +If you have the necessary information, you can improve the model's accuracy +by taking these factors into account. You can at least quickly determine +if the right "mode" is being used to improve accuracy. You can also +use the "Intermediate COCOMO" and "Detailed COCOMO" models that take more +factors into account, and are likely to produce more accurate estimates as +a result. Take these estimates as just that - estimates - they're not grand +truths.<br> +<br> +To use the COCOMO model, you first need to determine if your application's +mode, which can be "Organic", "embedded", or "semidetached". Most software +is "organic" (which is why it's the default). Here are simple definitions +of these modes:<br> +<ul> +<li>Organic: Relatively small software teams develop software in a highly +familiar, in-house environment. It has a generally stable development +environment, minimal need for innovative algorithms, and requirements can +be relaxed to avoid extensive rework.</li><li>Semidetached: This is an intermediate +step between organic and embedded. This is generally characterized by reduced +flexibility in the requirements.</li><li>Embedded: The project must operate +within tight (hard-to-meet) constraints, and requirements and interface specifications +are often non-negotiable. The software will be embedded in a complex environment that the software must deal with as-is.<br> + </li> +</ul> +By default, SLOCCount uses the basic COCOMO model in the organic mode. For +the basic COCOMO model, here are the critical factors for --effort and --schedule:<br> +<ul> +<li>Organic: effort factor = 2.4, exponent = 1.05; schedule factor = 2.5, exponent = 0.38</li><li>Semidetached: effort factor = 3.0, exponent = 1.12; schedule factor = 2.5, exponent = 0.35</li><li>Embedded: effort factor = 3.6, exponent = 1.20; schedule factor = 2.5, exponent = 0.32<br> + </li> +</ul> +Thus, if you want to use sloccount but the project is actually semidetached, +you can use "--effort 3.0 1.12 --schedule 2.5 0.35" as options to sloccount +to get a more accurate estimate.<br> +<br> +For more accurate estimates, you can use the intermediate COCOMO models. + For intermediate COCOMO, use the following figures:<br> +<ul> + <li>Organic: effort base factor = 2.3, exponent = 1.05; schedule factor = 2.5, exponent = 0.38</li> + <li>Semidetached: effort base factor = 3.0, exponent = 1.12; schedule factor = 2.5, exponent = 0.35</li> + <li>Embedded: effort base factor = 2.8, exponent = 1.20; schedule factor = 2.5, exponent = 0.32<br> + </li> +</ul> +The intermediate COCOMO values for schedule are exactly the same as the basic +COCOMO model; the starting effort values are not quite the same, as discussed +in Boehm's book. However, in the intermediate COCOMO model, you don't +normally use the effort factors as-is, you use various corrective factors +(called cost drivers). To use these corrections, you then consider +all the cost drivers, and multiply them by the effort base factor, to determine +the final effort factor. Here are the cost drivers (from Boehm's book, +table 8-2 and 8-3):<br> +<br> +<table cellpadding="2" cellspacing="2" border="1" width="100%"> + + + <tbody> + <tr> + <td valign="top" rowspan="1" colspan="2">Cost Drivers<br> + </td> + <td valign="top" rowspan="1" colspan="6">Ratings<br> + </td> + </tr> + <tr> + <td valign="top">ID<br> + </td> + <td valign="top">Driver Name<br> + </td> + <td valign="top">Very Low<br> + </td> + <td valign="top">Low<br> + </td> + <td valign="top">Nominal<br> + </td> + <td valign="top">High<br> + </td> + <td valign="top">Very High<br> + </td> + <td valign="top">Extra High<br> + </td> + </tr> + <tr> + <td valign="top">RELY<br> + </td> + <td valign="top">Required software reliability<br> + </td> + <td valign="top">0.75 (effect is slight inconvenience)<br> + </td> + <td valign="top">0.88 (easily recovered losses)<br> + </td> + <td valign="top">1.00 (recoverable losses)<br> + </td> + <td valign="top">1.15 (high financial loss)<br> + </td> + <td valign="top">1.40 (risk to human life)<br> + </td> + <td valign="top"><br> + </td> + </tr> + <tr> + <td valign="top">DATA<br> + </td> + <td valign="top">Database size<br> + </td> + <td valign="top"><br> + </td> + <td valign="top">0.94 (database bytes/SLOC < 10)<br> + </td> + <td valign="top">1.00 (D/S between 10 and 100)<br> + </td> + <td valign="top">1.08 (D/S between 100 and 1000)<br> + </td> + <td valign="top">1.16 (D/S > 1000)<br> + </td> + <td valign="top"><br> + </td> + </tr> + <tr> + <td valign="top">CPLX<br> + </td> + <td valign="top">Product complexity<br> + </td> + <td valign="top">0.70 (mostly straightline code, simple arrays, simple expressions)<br> + </td> + <td valign="top">0.85<br> + </td> + <td valign="top">1.00<br> + </td> + <td valign="top">1.15<br> + </td> + <td valign="top">1.30<br> + </td> + <td valign="top">1.65 (microcode, multiple resource scheduling, device timing dependent coding)<br> + </td> + </tr> + <tr> + <td valign="top">TIME<br> + </td> + <td valign="top">Execution time constraint<br> + </td> + <td valign="top"><br> + </td> + <td valign="top"><br> + </td> + <td valign="top">1.00 (<50% use of available execution time)<br> + </td> + <td valign="top">1.11 (70% use)<br> + </td> + <td valign="top">1.30 (85% use)<br> + </td> + <td valign="top">1.66 (95% use)<br> + </td> + </tr> + <tr> + <td valign="top">STOR<br> + </td> + <td valign="top">Main storage constraint<br> + </td> + <td valign="top"><br> + </td> + <td valign="top"><br> + </td> + <td valign="top">1.00 (<50% use of available storage)</td> + <td valign="top">1.06 (70% use)<br> + </td> + <td valign="top">1.21 (85% use)<br> + </td> + <td valign="top">1.56 (95% use)<br> + </td> + </tr> + <tr> + <td valign="top">VIRT<br> + </td> + <td valign="top">Virtual machine (HW and OS) volatility<br> + </td> + <td valign="top"><br> + </td> + <td valign="top">0.87 (major change every 12 months, minor every month)<br> + </td> + <td valign="top">1.00 (major change every 6 months, minor every 2 weeks)</td> + <td valign="top">1.15 (major change every 2 months, minor changes every week)<br> + </td> + <td valign="top">1.30 (major changes every 2 weeks, minor changes every 2 days)<br> + </td> + <td valign="top"><br> + </td> + </tr> + <tr> + <td valign="top">TURN<br> + </td> + <td valign="top">Computer turnaround time<br> + </td> + <td valign="top"><br> + </td> + <td valign="top">0.87 (interactive)<br> + </td> + <td valign="top">1.00 (average turnaround < 4 hours)<br> + </td> + <td valign="top">1.07<br> + </td> + <td valign="top">1.15<br> + </td> + <td valign="top"><br> + </td> + </tr> + <tr> + <td valign="top">ACAP<br> + </td> + <td valign="top">Analyst capability<br> + </td> + <td valign="top">1.46 (15th percentile)<br> + </td> + <td valign="top">1.19 (35th percentile)<br> + </td> + <td valign="top">1.00 (55th percentile)<br> + </td> + <td valign="top">0.86 (75th percentile)<br> + </td> + <td valign="top">0.71 (90th percentile)<br> + </td> + <td valign="top"><br> + </td> + </tr> + <tr> + <td valign="top">AEXP<br> + </td> + <td valign="top">Applications experience<br> + </td> + <td valign="top">1.29 (<= 4 months experience)<br> + </td> + <td valign="top">1.13 (1 year)<br> + </td> + <td valign="top">1.00 (3 years)<br> + </td> + <td valign="top">0.91 (6 years)<br> + </td> + <td valign="top">0.82 (12 years)<br> + </td> + <td valign="top"><br> + </td> + </tr> + <tr> + <td valign="top">PCAP<br> + </td> + <td valign="top">Programmer capability<br> + </td> + <td valign="top">1.42 (15th percentile)<br> + </td> + <td valign="top">1.17 (35th percentile)<br> + </td> + <td valign="top">1.00 (55th percentile)<br> + </td> + <td valign="top">0.86 (75th percentile)<br> + </td> + <td valign="top">0.70 (90th percentile)<br> + </td> + <td valign="top"><br> + </td> + </tr> + <tr> + <td valign="top">VEXP<br> + </td> + <td valign="top">Virtual machine experience<br> + </td> + <td valign="top">1.21 (<= 1 month experience)<br> + </td> + <td valign="top">1.10 (4 months)<br> + </td> + <td valign="top">1.00 (1 year)<br> + </td> + <td valign="top">0.90 (3 years)<br> + </td> + <td valign="top"><br> + </td> + <td valign="top"><br> + </td> + </tr> + <tr> + <td valign="top">LEXP<br> + </td> + <td valign="top">Programming language experience<br> + </td> + <td valign="top">1.14 (<= 1 month experience)<br> + </td> + <td valign="top">1.07 (4 months)<br> + </td> + <td valign="top">1.00 (1 year)<br> + </td> + <td valign="top">0.95 (3 years)<br> + </td> + <td valign="top"><br> + </td> + <td valign="top"><br> + </td> + </tr> + <tr> + <td valign="top">MODP<br> + </td> + <td valign="top">Use of "modern" programming practices (e.g. structured programming)<br> + </td> + <td valign="top">1.24 (No use)<br> + </td> + <td valign="top">1.10<br> + </td> + <td valign="top">1.00 (some use)<br> + </td> + <td valign="top">0.91<br> + </td> + <td valign="top">0.82 (routine use)<br> + </td> + <td valign="top"><br> + </td> + </tr> + <tr> + <td valign="top">TOOL<br> + </td> + <td valign="top">Use of software tools<br> + </td> + <td valign="top">1.24<br> + </td> + <td valign="top">1.10<br> + </td> + <td valign="top">1.00 (basic tools)<br> + </td> + <td valign="top">0.91 (test tools)<br> + </td> + <td valign="top">0.83 (requirements, design, management, documentation tools)<br> + </td> + <td valign="top"><br> + </td> + </tr> + <tr> + <td valign="top">SCED<br> + </td> + <td valign="top">Required development schedule<br> + </td> + <td valign="top">1.23 (75% of nominal)<br> + </td> + <td valign="top">1.08 (85% of nominal)<br> + </td> + <td valign="top">1.00 (nominal)<br> + </td> + <td valign="top">1.04 (130% of nominal)<br> + </td> + <td valign="top">1.10 (160% of nominal)<br> + </td> + <td valign="top"><br> + </td> + </tr> + + + + + </tbody> +</table> +<br> +<br> +<br> +So, once all of the factors have been multiplied together, you can +then use the "--effort" flag to set more accurate factors and exponents.<br> +<br> +For example, imagine that you're examining a fairly simple application that +meets the "organic" requirements. Organic projects have a base factor +of 2.3 and exponents of 1.05, as noted above. We then examine all the +factors to determine a corrected base factor. For this example, imagine +that we determine the values of these cost drivers are as follows:<br> +<br> +<table cellpadding="2" cellspacing="2" border="1" width="100%"> + + <tbody> + <tr> + <td valign="top" rowspan="1" colspan="2">Cost Drivers<br> + </td> + <td valign="top" rowspan="1" colspan="2">Ratings<br> + </td> + </tr> + <tr> + <td valign="top">ID<br> + </td> + <td valign="top">Driver Name<br> + </td> + <td valign="top">Rating<br> + </td> + <td valign="top">Multiplier<br> + </td> + </tr> + <tr> + <td valign="top">RELY<br> + </td> + <td valign="top">Required software reliability<br> + </td> + <td valign="top">Low - easily recovered losses<br> + </td> + <td valign="top">0.88<br> + </td> + </tr> + <tr> + <td valign="top">DATA<br> + </td> + <td valign="top">Database size<br> + </td> + <td valign="top">Low<br> + </td> + <td valign="top">0.94<br> + </td> + </tr> + <tr> + <td valign="top">CPLX<br> + </td> + <td valign="top">Product complexity<br> + </td> + <td valign="top">Nominal<br> + </td> + <td valign="top">1.00<br> + </td> + </tr> + <tr> + <td valign="top">TIME<br> + </td> + <td valign="top">Execution time constraint<br> + </td> + <td valign="top">Nominal<br> + </td> + <td valign="top">1.00<br> + </td> + </tr> + <tr> + <td valign="top">STOR<br> + </td> + <td valign="top">Main storage constraint<br> + </td> + <td valign="top">Nominal<br> + </td> + <td valign="top">1.00<br> + </td> + </tr> + <tr> + <td valign="top">VIRT<br> + </td> + <td valign="top">Virtual machine (HW and OS) volatility<br> + </td> + <td valign="top">Low (major change every 12 months, minor every month)<br> + </td> + <td valign="top">0.87<br> + </td> + </tr> + <tr> + <td valign="top">TURN<br> + </td> + <td valign="top">Computer turnaround time<br> + </td> + <td valign="top">Low (interactive)<br> + </td> + <td valign="top">0.87<br> + </td> + </tr> + <tr> + <td valign="top">ACAP<br> + </td> + <td valign="top">Analyst capability<br> + </td> + <td valign="top">Nominal (55th percentile)<br> + </td> + <td valign="top">1.00<br> + </td> + </tr> + <tr> + <td valign="top">AEXP<br> + </td> + <td valign="top">Applications experience<br> + </td> + <td valign="top">Nominal (3 years)<br> + </td> + <td valign="top">1.00<br> + </td> + </tr> + <tr> + <td valign="top">PCAP<br> + </td> + <td valign="top">Programmer capability<br> + </td> + <td valign="top">Nominal (55th percentile)<br> + </td> + <td valign="top">1.00<br> + </td> + </tr> + <tr> + <td valign="top">VEXP<br> + </td> + <td valign="top">Virtual machine experience<br> + </td> + <td valign="top">High (3 years)<br> + </td> + <td valign="top">0.90<br> + </td> + </tr> + <tr> + <td valign="top">LEXP<br> + </td> + <td valign="top">Programming language experience<br> + </td> + <td valign="top">High (3 years)<br> + </td> + <td valign="top">0.95<br> + </td> + </tr> + <tr> + <td valign="top">MODP<br> + </td> + <td valign="top">Use of "modern" programming practices (e.g. structured programming)<br> + </td> + <td valign="top">High (Routine use)<br> + </td> + <td valign="top">0.82<br> + </td> + </tr> + <tr> + <td valign="top">TOOL<br> + </td> + <td valign="top">Use of software tools<br> + </td> + <td valign="top">Nominal (basic tools)<br> + </td> + <td valign="top">1.00<br> + </td> + </tr> + <tr> + <td valign="top">SCED<br> + </td> + <td valign="top">Required development schedule<br> + </td> + <td valign="top">Nominal<br> + </td> + <td valign="top">1.00<br> + </td> + </tr> + + + + + </tbody> +</table> +<br> +By multiplying these driver values together in this example, we compute:<br> +<pre>0.88*0.94*1*1*1*0.87*0.87*1*1*1*0.90*0.95*0.82*1*1</pre> +The correction from these is 0.438964094, which you multiply by the base +factor (2.3 in this case) to determine a final effort factor. For this +example, the final factor for the effort calculation is 1.01. You would then +invoke sloccount with "--effort 1.01 1.05" to pass in the corrected factor +and exponent. You don't need to use "--schedule" to set the factors +(they default to the values for organic model), but you can set them manually +anyway by setting "--schedule 2.5 0.38". You <i>do</i> need to use the --schedule option for embedded and semidetached projects. The final command would be:<br> +<br> +sloccount --effort 1.01 1.05 --schedule 2.5 0.38 my_project<br> +<br> +<br> +<br> +<br> +<br> +<br> +</body></html>
\ No newline at end of file diff --git a/tcl_count b/tcl_count new file mode 100755 index 0000000..f892692 --- /dev/null +++ b/tcl_count @@ -0,0 +1,27 @@ +#!/bin/sh +# +# This is part of SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# Copyright (C) 2001-2004 David A. Wheeler. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# + +generic_count '#' $@ + diff --git a/testcode/conditions.CBL b/testcode/conditions.CBL new file mode 100644 index 0000000..8e12724 --- /dev/null +++ b/testcode/conditions.CBL @@ -0,0 +1,31 @@ + $ SET SOURCEFORMAT"FREE"
+IDENTIFICATION DIVISION.
+PROGRAM-ID. Conditions.
+AUTHOR. Michael Coughlan.
+* An example program demonstrating the use of
+* condition names (level 88's).
+* The EVALUATE and PERFORM verbs are also used.
+
+DATA DIVISION.
+WORKING-STORAGE SECTION.
+01 Char PIC X.
+ 88 Vowel VALUE "a", "e", "i", "o", "u".
+ 88 Consonant VALUE "b", "c", "d", "f", "g", "h"
+ "j" THRU "n", "p" THRU "t", "v" THRU "z".
+ 88 Digit VALUE "0" THRU "9".
+ 88 ValidCharacter VALUE "a" THRU "z", "0" THRU "9".
+
+PROCEDURE DIVISION.
+Begin.
+ DISPLAY "Enter lower case character or digit. No data ends.".
+ ACCEPT Char.
+ PERFORM UNTIL NOT ValidCharacter
+ EVALUATE TRUE
+ WHEN Vowel DISPLAY "The letter " Char " is a vowel."
+ WHEN Consonant DISPLAY "The letter " Char " is a consonant."
+ WHEN Digit DISPLAY Char " is a digit."
+ WHEN OTHER DISPLAY "problems found"
+ END-EVALUATE
+ END-PERFORM
+ STOP RUN.
+
diff --git a/testcode/hello.f b/testcode/hello.f new file mode 100644 index 0000000..f66fe77 --- /dev/null +++ b/testcode/hello.f @@ -0,0 +1,10 @@ +c Hello World +* Hello World +! Hello World + program hello + implicit none + print '("Hello, World!")' + end + ! a fancy comment +!hpf$ not a comment +!omp$ not a comment either diff --git a/testcode/hello.f90 b/testcode/hello.f90 new file mode 100644 index 0000000..6b26a2e --- /dev/null +++ b/testcode/hello.f90 @@ -0,0 +1,7 @@ +! Hello World +program hello + implicit none + print '("Hello, World!")' +end program hello +!hpf$ not a comment +!omp$ not a comment either diff --git a/testcode/hello.pas b/testcode/hello.pas new file mode 100644 index 0000000..40c6005 --- /dev/null +++ b/testcode/hello.pas @@ -0,0 +1,9 @@ +{ Hello World in Pascal, for testing SLOCCount. + This is multi-line, testing curly braces. } +(* This is another multi-line comment. + Here's another line. *) +program Hello; +begin (* Main *) + writeln ('Hello, world.') +end. (* Main *) + diff --git a/testcode/hello1.pas b/testcode/hello1.pas new file mode 100644 index 0000000..c53c0d2 --- /dev/null +++ b/testcode/hello1.pas @@ -0,0 +1,12 @@ +{ Hello World in Pascal, for testing SLOCCount. + This is multi-line, testing curly braces. } +(* This is another multi-line comment. + Here's another line. *) +(* This is { another } test. **) +program Hello; +begin (* Main *) + writeln ('Hello, world.'); + writeln ('It''s a test!'); + writeln ('Show that newlines are detected') +end. (* Main *) + diff --git a/testcode/messages.rb b/testcode/messages.rb new file mode 100644 index 0000000..1521ae6 --- /dev/null +++ b/testcode/messages.rb @@ -0,0 +1,152 @@ +#!/usr/local/bin/ruby +# messages.rb - this is a test for the Ruby SLOC counter. +# You should get 110 SLOC for this file. + +# Guru module: private messages among players +# Copyright (C) 2001, 2002 Josef Spillner, dr_maux@user.sourceforge.net +# This is used as a test case in SLOCCount, a toolsuite that counts +# source lines of code (SLOC). +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To contact David A. Wheeler, see his website at: +# http://www.dwheeler.com. +# +# + +# Commands: +# guru do i have any messages +# guru tell grubby nice to meet myself :) +# guru alert grubby + +databasedir = ENV['HOME'] + "/.ggz/grubby" + +#################################################################################### + +class GuruMessages + def initialize + @msg = Array.new + @alerts = Array.new + end + def add(fromplayer, player, message) + @entry = Array.new + newmessage = (fromplayer + " said: " + message.join(" ")).split(" ") + @entry << player << newmessage + @msg.push(@entry) + print "OK, I make sure he gets the message." + $stdout.flush + sleep 1 + end + def tell(player) + len = @msg.length + a = 0 + for i in 0..len + unless @msg[len-i] == nil + print @msg[len-i][1][0..@msg[len-i][1].length - 1].join(" ") + "\n" if player == @msg[len-i][0] + if player == @msg[len-i][0] + @msg.delete_at(len-i) + a = 1 + end + end + end + if a == 0 + print "Sorry " + player + ", I guess you're not important enough to get any messages." + end + $stdout.flush + sleep 1 + end + def alert(fromplayer, player) + @entry = Array.new << fromplayer << player + @alerts.push(@entry) + print "OK, I alert " + player + " when I see him." + $stdout.flush + sleep 1 + end + def trigger(player) + len = @alerts.length + a = 0 + for i in 0..len + unless @alerts[len-i] == nil + if player == @alerts[len-i][0] + print player + ": ALERT from " + @alerts[len-i][1] + "\n" + @alerts.delete_at(len-i) + a = 1 + end + end + end + if a == 1 + $stdout.flush + sleep 1 + return 1 + end + return 0 + end +end + +input = $stdin.gets.chomp.split(/\ /) + +mode = 0 +if (input[1] == "do") && (input[2] == "i") && (input[3] == "have") && + (input[4] == "any") && (input[5] == "messages") + mode = 1 + player = ARGV[0] +end +if (input[1] == "tell") + mode = 2 + fromplayer = ARGV[0] + player = input[2] + message = input[3..input.length] +end +if(input[1] == "alert") + mode = 3 + fromplayer = ARGV[0] + player = input[2] +end + +m = nil +begin + File.open(databasedir + "/messages") do |f| + m = Marshal.load(f) + end +rescue + m = GuruMessages.new +end + +if mode == 0 + ret = m.trigger ARGV[0] + if ret == 0 + exit + end +end +if mode == 1 + if player != nil + m.tell player + else + print "If you mind telling me who you are?" + $stdout.flush + sleep 1 + end +end +if mode == 2 + m.add fromplayer, player, message +end +if mode == 3 + m.alert fromplayer, player +end + +File.open(databasedir + "/messages", "w+") do |f| + Marshal.dump(m, f) +end + diff --git a/testcode/temp.c b/testcode/temp.c new file mode 100644 index 0000000..d540f08 --- /dev/null +++ b/testcode/temp.c @@ -0,0 +1,5 @@ + + +main() { + int i; +} diff --git a/testcode/test.hs b/testcode/test.hs new file mode 100644 index 0000000..de874df --- /dev/null +++ b/testcode/test.hs @@ -0,0 +1,19 @@ + +-- This literate program prompts the user for a number +-- and prints the factorial of that number: + +{- This is a comment. -} +{- This is a comment, + too -} + +{-# this is a pragma, COUNT IT -} + + main :: IO () + main = do putStr "Enter a number: " + l <- readLine + putStr "n!= " + print (fact (read l)) + fact :: Integer -> Integer + fact 0 = 1 + fact n = n * fact (n-1) + diff --git a/testcode/test1.inc b/testcode/test1.inc new file mode 100644 index 0000000..a56d14e --- /dev/null +++ b/testcode/test1.inc @@ -0,0 +1,23 @@ +<?php + + /** + * Test file for php_count, part of SLOCCount. This is a C-style comment. + * This file is different from .php. + */ + + // This is a C++-style comment. + + # This is a shell-style comment. + + # Here are 9 lines of code: + + function get() + { + $total = 0; + $simplestring = 'hello'; + $simplestring = '\\hello\''; + $funkystring = "hello"; + $funkystring = "$hi\\\""; + return 0; + } +?> diff --git a/testcode/test1.lhs b/testcode/test1.lhs new file mode 100644 index 0000000..3c19a70 --- /dev/null +++ b/testcode/test1.lhs @@ -0,0 +1,15 @@ +\documentstyle{article} + +\begin{document} + +\section{Introduction} + +This is a trivial program that prints the first 20 +factorials. It should have 2 lines of code. + +\begin{code} +main :: IO () +main = print [ (n, product [1..n]) | n <- [1..20]] +\end{code} + +\end{document} diff --git a/testcode/test1.php b/testcode/test1.php new file mode 100644 index 0000000..9fd2510 --- /dev/null +++ b/testcode/test1.php @@ -0,0 +1,27 @@ +<?php + + /** + * Test file for php_count, part of SLOCCount. This is a C-style comment. + */ + + // This is a C++-style comment. + + # This is a shell-style comment. + + # Here are 13 lines of code: + + function get() + { + $total = 0; + $simplestring = 'hello'; + $simplestring = '\\hello\''; + $funkystring = "hello"; + $funkystring = "$hi\\\""; + $heretest <<< wiggle +juggle + wiggle /* This doesn't end the string, so this isn't a C comment. +wiggle; + return 0; + } + +?> diff --git a/testcode/test2.lhs b/testcode/test2.lhs new file mode 100644 index 0000000..6e39905 --- /dev/null +++ b/testcode/test2.lhs @@ -0,0 +1,44 @@ + +This is an extract of a larger literate Haskell file for testing +SLOCCount. It should have 21 lines of code. + +This dumps the tree in dot format, which is very handy for visualizing +the trees. + +> dotTree name t = "digraph " ++ filter dotChars name ++ " { " ++ (dotTree' t 0) ++ " }" + +> dotTree' Empty _ = "" +> dotTree' t i | is_leaf t = "n"++(show i)++" [label=\""++(show $ x_span t)++ +> "\",shape=box]; " +> | otherwise = "n"++(show i)++" [label=\""++(show $ x_span t)++"\"]; " ++ +> "n"++(show i)++" -> n"++(show (2*i+1))++"; "++ +> "n"++(show i)++" -> n"++(show (2*i+2))++"; "++ +> dotTree' (left t) (2*i+1) ++ +> dotTree' (right t) (2*i+2) +> where is_leaf Node { left = Empty, right = Empty } = True +> is_leaf _ = False +> {- this is a comment + +foo bar baz + +> that +> spans literate blocks -} + +> dotChars '.' = False +> dotChars '/' = False +> dotChars _ = True + +These functions fill in the monotonically increasing index values for +the lines in the finite map. They also do appropriate things to combine +the world values. + +> idxList [] n = [] +> idxList (x:xs) n = (x {idx=n}):(idxList xs (n+1)) + +> idxFM' fm (x,k) = addToFM (delFromFM fm k) k (y {idx=toInteger x}) +> where y = case lookupFM fm k of +> Just foo -> foo +> Nothing -> error $ "No such key: " ++ show k + +> idxFM fm = foldl idxFM' fm (zip [1..sizeFM fm] $ keysFM fm) + diff --git a/testcode/wokka.cbl b/testcode/wokka.cbl new file mode 100644 index 0000000..d7ccd0c --- /dev/null +++ b/testcode/wokka.cbl @@ -0,0 +1,4 @@ + * Comment.
+ IDENTIFICATION DIVISION.
+ PROGRAM-ID. Conditions.
+
diff --git a/testcode/wokka.cs b/testcode/wokka.cs new file mode 100644 index 0000000..fa95425 --- /dev/null +++ b/testcode/wokka.cs @@ -0,0 +1,8 @@ + +/* comment: This has 5 physical lines of code. */ + +class Test { + static void Main() { + System.Console.WriteLine("Hello, World (in C#)"); + } +} diff --git a/usc_subset.tar b/usc_subset.tar new file mode 100644 index 0000000..67e2d5a --- /dev/null +++ b/usc_subset.tar @@ -0,0 +1 @@ +java_lines.c |