35 files changed, 7700 insertions, 0 deletions
diff --git a/CHANGELOG b/CHANGELOG
new file mode 100644
index 0000000..f86b1d8
--- /dev/null
+++ b/CHANGELOG
@@ -0,0 +1,263 @@
+2.1
+- lots of fixes to get the thing working sufficienly for cvs2git
+- main features
+  - optionally detect and display ancestor branch information for
+    new detected branches
+  - never allow more than one revision of a file to be in a single
+    patchset (this may disturb the patchset numbers for historical
+    data :-(
+  - use file revisions to help sort, so that initial imports
+    won't be listed as patchset 1, with patchset 2 as the actual
+    creation of the files.
+  - reports log times in localtime (use TZ to override)	
+- many downstream patches from Debian, special thanks to Marcus
+  Crafter for maintaining.  From the Debian changelog:
+   - patch from Kim Hansen to fix time calculation bug
+   - documentation patch from J. Bruce Fields
+   - directory name handling patch from David Kilzer
+   - others I'm not sure of, I'd happily update the credit here
+
+2.0rc1
+- resolve -r symbols during global symbol resolution
+- using two -r arguments implicitly sets -b with branch
+  from second -r (if you can speak cvsps arguments, you'll
+  understand this)
+- fix using cvsps against sourceforge where the actual
+  server repository path has many instances of the project
+  name in it and the 'strstr' was finding the wrong one.
+  instead use the equiv. of 'strrstr'
+
+2.0b10
+- fix bug where filenames with 'bad' characters (such as ' ')
+  were being sent unescaped to a shell
+- add --no-cvs-direct and -Z 0 to disable cvs-direct and compression
+- extend the 'fuzz-factor' concept by keeping a min/max date
+  encountered for each patchset and fuzz on this interval,
+  rather than fuzzing on the date of the first encountered
+  member only, helps for handling REALLY slow commits where a
+  HUGE number of individual files were modified at a slow trickle.
+- it turns out that old versions of cvs have two problems
+  1) they don't support 'rlog' (handled)
+  2) they don't support 'version', which caused the workaround
+     for 1) to fail.
+  Now cvsps detects cvs versions too old to support version and/or
+  rlog.  Additionally, if server is too old, cvs-direct is
+  simply disabled.
+- parse the rc file first so that the command line will override
+- add -q to shut-up the warnings
+
+2.0b9
+- remove Mbinary from valid-response list - can't handle it
+- much more (pedantic?) clean-up/close path in cvs_direct
+  to fix all of the lost data and hanging problems at close
+- added rlog capability to cvs_direct, switched main code
+  to use it.
+- added version capability to cvs_direct, now cvsps is 
+  completely independant of cvs binary for all operations!
+- added --root and repository command-line arguments,
+  now cvsps is completely indepentant of having working
+  directory!
+
+2.0b8
+- fix the cvs_rupdate calls to use the real repository
+  instead of the mangled use_rep_path.  This broke
+  cvs_direct generation of add/remove diffs
+- fix the sed expression to account for the difference
+  between +++ and --- in the first and second lines.
+  This broke remove diffs for direct and external.
+- attempt to fix a hang-at-flush bug in cvs-direct
+  compression handling
+
+2.0b7
+- implement compression in cvs_direct
+- add command line -Z to specify compression level.
+  applies to cvs-direct and external cvs
+- implement 'cvs_diff' in cvs-direct
+- use cvs_diff instead of cvs_rdiff because it allows
+  us to specify all of the lovely diff options
+- change system to my_system so ctrl-c will get
+  through.  indirectly, this is necessary because
+  cvs has loused up the exit codes with the 'cvs diff'
+  command, making it impossible to determine whether
+  the program exited abnormally
+- add parsing of ~/.cvsps/cvspsrc file at startup
+  where command line arguments can be listed, one
+  per line (parameters to arguments on the same line
+  as the argument). --norc prevents this
+
+2.0b6
+- fix bug where if -r was specified along with other
+  filter options, the other filter options were
+  applied first, and the start/end tags were
+  never found.
+- change -f from strstr to regex
+- change regex to EXTENDED regex
+- add the --cvs-direct option
+
+2.0b5
+- add a cvs_check_cap for capability checking. 
+- use cvs_check_cap to check whether rlog is 
+  supported.  it's not supported in 1.11 and  
+  before. warn if non-supported version is 
+  detected
+- add --no-rcmds to manually disable rlog and rdiff
+- add a fix for some bizarre inconsistencies found
+  in some random repositories, such as tagged
+  revisions that don't exist.
+- remove the adaptive crap filter and bk_log_border
+  logic now that Larry has cleaned up the exported
+  BK->CVS trees.
+
+2.0b4
+- add the bk_log_border parsing logic to handle the
+  case where 'cvs log' text was committed into the log
+- add 'adaptive crap filter' logic to handle all
+  of the different corruptions of the cvs log file
+- switch to rdiff instead of diff to not require working 
+  files to be checked out.  Note: when --diff-opts are
+  specified, diff must be used (not rdiff) because
+  rdiff doesn't support options(?!!)
+
+2.0b3
+- add the --diff-opts for explicitly setting the diff 
+  options.  important for setting increased context for 
+  example. 
+- use 'cvs rlog' instead of 'cvs log'.  important if 
+  there are empty directory pruned with cvs update -P
+  for getting consistent patchset numbering
+- clean up patch_set_affects_branch for handling INVALID
+  tags and the -r especially around a branch point
+- add the bkcvs proof-of-concept hack
+
+
+2.0b2
+- change the way INVALID tags are reported - show the exact
+  patchsets and revisions that conflict, and don't print 
+  anything for 'funky' tags/revisions
+- add psid to the patchset structure and pass through tree
+  once just to assign the numbers.  this allows reporting
+  psid when problems are found (above)
+- more code restructuring - move stats to own module
+- commenting cleanups
+- include the very rudimentary merge_utils.sh functions
+  I use to assist merging stuff
+- change the way diffs are generated, handle individual
+  members that are before/after the -r tag restrictions
+- change the semantics of the -s argument.  it now only
+  restricts the patch sets the same as the other filtration
+  arguments.  new argument -g turns on diff generation
+- add manpage section discussing tag handling semantics
+
+2.0b1
+- major restructuring of source code.  single cvsps.c split into
+  a few modules
+- major rework of the data structures in order to support the most
+  frequently requested, and most desired feature: -r.  cvsps
+  can now associated symbolic tags with specific patchsets,
+  branches are now associated with the patchset instead of 
+  the individual revision.  you can view patchsets committed
+  after a given symbolic tag, or between two tags.
+- changed the handling of same author, same log message commits
+  on different branches. this now creates multiple patchsets
+- changed the handling of 'file xyz initially added on branch xyz'
+  log messages, they now create hidden patchsets.
+- these last two items have made the patchset numbering scheme
+  incompatible with the old scheme.  patchsets are going
+  to change numbers.
+
+1.99.1 (not released)
+- redid the data structures a bit to 'normalize' the references to 
+  a file revision.  this allows the program to detect the case
+  when a file is added on a branch, and generate the right output
+  for 'cvsps -s' in this case (frequent bug report)
+- added the '-l' option to restrict patchsets based on log descr
+  content. (patch from Geoff Soutter).  added man page for it.
+- added the --summary-first option suggested by Andi Kleen to
+  have the PatchSet summary information for all patchsets at
+  the head of the output, when multiple patchsets are given
+  to the -s option.
+- added the all: target to Makefile.  (patch from Ben Elliston)
+- use a single centralized cvsps.cache file per root/repository
+  this obviates the need to 'cvsps -u' in multiple checked out
+  trees of the same repository (patch adopted from Baruch Even)
+- change the magic name TRUNK (for -b option) to HEAD to be 
+  consistent with cvs.  (patch from Henrik Nordstrom)
+- when parsing 'cvs log' output, look for the exact strings
+  that separate the log entries and the files, instead of 
+  just the first 8 characters.  this allows a wider variety
+  of stuff to be in the log format (including actual patchsets).
+  Inspired by Andy Isaacson.
+- added the -p option allowing patchset output to go into
+  individual files in a named directory.  Based on a patch
+  by Henrik Nordstrom.  This is a great idea.  Thanks Henrik.
+
+1.3.3 (why isn't this 1.4 again?)
+- added the 'memory reduction' patch from Baruch Even.  This patch
+  greatly reduces the memory footprint of CVSps against a large 
+  repository by using dynamically allocated buffers, and by using
+  a tree to hold "common" strings (to avoid having a thousand copies
+  of the string "1.1").  This patch also adds a new option '-t' which
+  displays some memory usage statistics at the top.
+
+- added the 00-strip-revision.patch from Steven Tweedie which fixes
+  a parsing problem when there are locked files in the repository
+
+- added the 'Tweedie Tweenie' patch from Steven Tweedie (01-stable-tree) 
+  which fixes a bug where interspersed commits could cause unstable 
+  tree behavior.  This would happen especially when multiple users commit 
+  at the same time to different parts of a large tree over a slow link.
+
+- added the --norc option (based on a suggestion by Soren S. Jorvang) 
+  to handle cases where people have bad stuff in their .cvsrc which makes 
+  cvsps fail.  I could have added the '-f' unconditionally to the command
+  line for cvs when run under cvsps, but somehow that seemed dangerous.
+
+1.3.2 (small feature release)
+- added the 'multi-patchset' feature to the -s option, provided in its
+  entirety by Daiki Ueno <ueno@unixuser.org>
+
+1.3.1 (bugfix release mostly)
+- fix bug with updating cache (-u) having to do with matching new and old 
+  revisions
+
+- fix timestamp_fuzz_factor bug where the fuzz was applied to loading
+  from cache by mistake.
+
+- add a spec file (from Jan IVEN <Jan.Iven@cern.ch>)
+
+- improve the parameterization of Makefile, and make things relocatable
+  (from many people, esp. Amitai Schlair <schmonz@schmonz.com>)
+
+- fix strip_path_len calculation (again - I had munged the prior fix)
+  Jeffrey Ebert <ebert@sonicsinc.com>
+
+
+1.3
+- fix case where CVSROOT is initial substring of Repository when
+  creating strip_path variable.
+
+  Thanks to Jean-Michel Rouet <jean-michel.rouet@philips.com> for
+  this fix.
+
+- fix case where a subdirectory has a different CVSROOT or Repository
+  path from the toplevel directory (happens when one project is checked
+  out inside another projects subdirectory).
+
+  Thanks to Philippe M. Chiasson <gozer@cpan.org> for the fix.
+
+
+1.2
+
+- Moved cache file location to CVS/ subdirectory.  This is where CVS keeps
+  it's meta-data, and putting the cache there keeps it out of the way of
+  other CVS operations. (thanks to Amitai Schlair for suggestion)
+
+- Added manual page.  (thanks to Amitai Schlair for suggestion)
+
+- Fixed the '-b' option to take a special branch name 'TRUNK' which
+  will restrict the output to patchsets made on the main trunk.
+
+
+1.1 Initial public release.
+-------- end of changelog ---------
+
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..5b6e7c6
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,340 @@
+		    GNU GENERAL PUBLIC LICENSE
+		       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+                       59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+		    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+			    NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
+
+	    How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..507c3e9
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,35 @@
+MAJOR=2
+MINOR=1
+CC?=gcc
+CFLAGS?=-g -O2 -Wall 
+CFLAGS+=-I. -DVERSION=\"$(MAJOR).$(MINOR)\"
+prefix?=/usr/local
+OBJS=\
+	cbtcommon/debug.o\
+	cbtcommon/hash.o\
+	cbtcommon/text_util.o\
+	cbtcommon/sio.o\
+	cbtcommon/tcpsocket.o\
+	cvsps.o\
+	cache.o\
+	util.o\
+	stats.o\
+	cap.o\
+	cvs_direct.o\
+	list_sort.o
+
+all: cvsps
+
+cvsps: $(OBJS)
+	$(CC) -o cvsps $(OBJS) -lz
+
+install:
+	[ -d $(prefix)/bin ] || mkdir -p $(prefix)/bin
+	[ -d $(prefix)/share/man/man1 ] || mkdir -p $(prefix)/share/man/man1
+	install cvsps $(prefix)/bin
+	install -m 644 cvsps.1 $(prefix)/share/man/man1
+
+clean:
+	rm -f cvsps *.o cbtcommon/*.o core
+
+.PHONY: install clean
diff --git a/README b/README
new file mode 100644
index 0000000..a8724fa
--- /dev/null
+++ b/README
@@ -0,0 +1,230 @@
+CVSps (c) 2001,2002,2003 David Mansfield
+------------------------------
+
+Overview.
+--------
+
+CVSps is a program for generating 'patchset' information from a CVS
+repository.  A patchset in this case is defined as a set of changes made
+to a collection of files, and all committed at the same time (using a
+single 'cvs commit' command).  This information is valuable to seeing the
+big picture of the evolution of a cvs project.  While cvs tracks revision
+information, it is often difficult to see what changes were committed
+'atomically' to the repository.
+
+Compiling.
+---------
+
+CVSps is a relatively small program, with only a few modules.
+The Makefile is very simple and should work in most GNU type environments.
+Unfortunately, I've only been able to test on Red Hat Linux, so YMMV.  As
+CVSps matures, I'm sure a more sophisticated build environment will evolve
+with it.  For now, just try 'make' and 'make install'.  If you have any 
+problems let me know.
+
+Running.
+-------
+
+Note: not all options are necessarily discussed here.  Please check the
+output of 'cvsps -h' and/or the manual page for the most up-to-date info.
+
+CVSps operates by parsing the 'cvs log' output.  So to run it, you must
+be in the working directory of a cvs project.  CVSps handles
+subdirectories fine, so run it in the top directory of your project.
+
+a) the ~/.cvsps/cvsps.cache file (or so it's called)
+
+Because you may have a *lot* of revision history in your project, and/or
+your connection to the cvs server may be slow or congested, CVSps uses a
+cache file for most operations.  The first time you run CVSps, just issue:
+
+cvsps
+
+and will begin reading and parsing the cvs log.  When it is finished it
+will output all of the patchset information to stdout, and it will also
+generate the '~/.cvsps/cvsps.cache' file.  Note: for historical reasons
+this file is still called the cvsps.cache file, but in reality it is named
+based on the CVS/Root and CVS/Repository contents, and thus is shared for
+the same repository checked out in multiple places.
+
+If the cache file exists, it won't ever be automatically updated.  To
+update the cache with cvs activity that has occurred since the ~/.cvsps/cvsps.cache
+was last updated, use:
+
+cvsps -u
+
+If you question the integrity of the ~/.cvsps/cvsps.cache, or for some other reason
+want to force a full cache rebuild, use (you could also 'rm' the cache file):
+
+cvsps -x
+
+b) Reading the output.
+
+CVSps's output is information about patchsets.  A patchset looks like:
+
+---------------------
+PatchSet 999
+Date: 2002/07/11 19:50:46
+Author: alan
+Branch: HEAD
+Tag: (none)
+Log:
+[PATCH] Fix several pdc202xx problems
+
+Misnaming of 20270 as 20268R
+Failure of LBA48 on 20262
+Incorrect speed detection because the old driver used host not drive side
+cable detect
+PDC202xx handling for quirks in udma reporting off some drives
+LBA48 for PIO mode
+
+BKrev: 3d2dd386wJMnehoOAhv3wL991IfXVQ
+
+Members:
+  ChangeSet:1.999->1.1000
+  MAINTAINERS:1.74->1.75
+  drivers/ide/ide-features.c:1.4->1.5
+  drivers/ide/ide-pci.c:1.18->1.19
+  drivers/ide/pdc202xx.c:1.11->1.12
+  include/linux/pci_ids.h:1.44->1.45
+
+---------------------
+
+This patchset is taken from the linux kernel BK->CVS tree.  It shows the date, 
+the author, log message and each file that was modified.  For each file the
+pre-commit and post-commit revisions are given.  You can also see (if 
+applicable, not in this case) if the files are on a branch, as well as the
+tag (see TAGS below).
+
+Patchsets are ordered by commit timestamp, so as long as the clock on your
+cvs server is monotonic, the numbering of patchsets should be invariant
+across cache-rebuilds. (see COMPATIBILITY below).
+
+c) Limiting the patchset output.
+
+The default output of CVSps is to show all patchsets.  This can be
+filtered in one of many ways.  These flags can be combined to really
+limit the output to what you're interested in.
+
+By id.  With the -s <ps range> you can specify individual PatchSets by
+number or by range.  Ranges can be of the form '<number>', '<number>-',
+'-<number>' and of course '<number>-<number>'.  Multiple ranges can be
+specified seperated by commas.  E.g.
+
+cvsps -s 999-1020,1025,4956-
+
+By author.  With the -a <author> flag you limit the output to patchsets
+committed by a given author.  The author is usually the UNIX login id.
+
+By file.  With the -f <file regex> flag you limit the output to patchsets 
+that have modified the given file.  Because a regular expression can have
+many pieces 'or'ed together, you can specify many different files here,
+for example (note also the use of the ^ character):
+
+cvsps -f '^net/ipv4|^net/core'
+
+By date.  With one date specification, CVSps shows only patchsets newer
+than the date given, and with two dates, it shows patchsets between the
+two dates.
+
+*NOTE ON DATE FORMAT*.  Because I'm lazy, only one date format is
+currently acceptable. 'YYYY/MM/DD HH:MM:SS'  where time is given as
+localtime, and HH is in 24 hour format.  NOTE ALSO that cvs tends to
+display times as GMT, but parse dates as localtime, so when using the '-D'
+with cvs you need to convert from GMT to localtime in your head.  CVSps is
+not as fancy.  It treats all dates as if in localtime, and therefore you
+give CVSps dates the same way it gives them to you.
+
+By branch.  With the -b <branch> flag you limit the output to patchsets
+that have modified the history of the given branch.  Note, this 
+doesn't necessarily mean the commit itself was made on the branch, since
+the files in question may have existed prior to the branch point, in which
+case changes made to a given file before the branch point affect the file
+as it exists in the head of the branch.  If you want to restrict to the
+main branch, use a branch of 'HEAD'.
+
+By log comment.  With the -l <regex> flag you can limit the ouptut to
+patchsets with the commit message matching the regex.
+
+By tag.  With the -r <tag1> -r <tag2> you can limit the patchsets to
+commits after a given tag1 and, optionally, before tag2.
+
+d) viewing the changes made by a patchset.
+
+To show the 'diff' output for a given patchset, use -g.
+
+It will show you the diff of changes made by the selected commits.
+Some effort was made to ensure that the patches are valid, even in the 
+case of removing or creating files, a case in which 'cvs diff' fails.  
+The patches generated are, generally speaking, applyable in the working
+directory with the '-p1' option to the patch command.
+
+e) what is timestamp fuzz factor (-z option)?
+
+There's another annoying feature of cvs.  When you commit a large change,
+the timestamp on the change is created per file committed.  For example:
+if you commit changes to 60 files on a slow server, taking, say, 60
+seconds, the 'commit time' as given in the log message for the first file
+will differ from that of the last file by 60 seconds.
+
+The fuzz factor attempts to workaround this by saying: commits by the same
+author, using the same log message, within <fuzz> seconds are considered
+part of the same patchset.  The default fuzz is 300 seconds (5 minutes).
+
+TAGS
+----
+
+Please read the manual page.
+
+COMPATIBILITY
+-------------
+
+One of the main goals of cvsps was to make the patchset numbering stable across
+all time, as long as no funny-business is done to the repository files themselves.
+
+Unfortunately, as bugs have been fixed, the numbering has changed.  This is most
+regrettable, but unavoidable.
+
+Additionally, in version 2.0, two changes have been made which will 'renumber'
+the patch sets.  
+
+1) The false 'globbing' of two commits from nearly the exact same time, by the
+same person, with the same log description but to different branches.  Now,
+these will be reported as 2 patchsets instead of one.
+
+2) The creation of a large volume of patchsets for 'file xyz was originally added on
+branch' log messages.  This occurs whenever a file is originally born on a branch,
+and is exacerbated by the fact that even when all of these files are created with
+a single commit, the 'file xyz...' messages, which contains the actual file name,
+are different, causing a proliferation of these unwanted patchsets.  These patchsets
+are now silently eliminated from the output.
+
+Reporting bugs / submitting patches.
+-----------------------------------
+
+Although the current version is perfect and bug free, you can still send
+bug reports, feature requests and patches to me at:
+
+cvsps@dm.cobite.com
+
+I will try to maintain CVSps and make releases regularly.  The most recent
+version of CVSps will always be available at http://www.cobite.com/cvsps
+
+Special thanks to my employer Cobite and Robert Lippman, who've given me
+time to develop this tool.
+
+Known Problems (this will become the FAQ if anyone ever A any Q).
+----------------------------------------------------------------
+
+1) What is the '*** file xyz doesn't match strip_path abc' error?
+
+This error occurs when one of the subdirectories of the directory you
+ran CVSps in is checked out from a different repository.  CVSps tries
+to remove the repository path information from the filenames that it
+gets to give you working-directory local pathnames.  It does this 
+at startup by parsing the CVS/Root and CVS/Repository files.  If 
+the contents of these two files is different for some subdirectory,
+all of the files in that subdirectory will be ignored. 
+
+You can always run CVSps in that subdirectory, and since it IS a 
+separate repository, that does make a little bit of sense.
diff --git a/cache.c b/cache.c
new file mode 100644
index 0000000..4c51cf7
--- /dev/null
+++ b/cache.c
@@ -0,0 +1,528 @@
+/*
+ * Copyright 2001, 2002, 2003 David Mansfield and Cobite, Inc.
+ * See COPYING file for license information 
+ */
+
+#include <stdio.h>
+#include <search.h>
+#include <string.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <time.h>
+
+#include <cbtcommon/hash.h>
+#include <cbtcommon/debug.h>
+
+#include "cache.h"
+#include "cvsps_types.h"
+#include "cvsps.h"
+#include "util.h"
+
+#define CACHE_DESCR_BOUNDARY "-=-END CVSPS DESCR-=-\n"
+
+/* change this when making the on-disk cache-format invalid */
+static int cache_version = 1;
+
+/* the tree walk API pretty much requries use of globals :-( */
+static FILE * cache_fp;
+static int ps_counter;
+
+static void write_patch_set_to_cache(PatchSet *);
+static void parse_cache_revision(PatchSetMember *, const char *);
+static void dump_patch_set(FILE *, PatchSet *);
+
+static FILE *cache_open(char const *mode)
+{
+    char *prefix;
+    char fname[PATH_MAX];
+    char root[PATH_MAX];
+    char repository[PATH_MAX];
+    FILE * fp;
+
+    /* Get the prefix */
+    prefix = get_cvsps_dir();
+    if (!prefix)
+	return NULL;
+    
+    /* Generate the full path */
+    strcpy(root, root_path);
+    strcpy(repository, repository_path);
+
+    strrep(root, '/', '#');
+    strrep(repository, '/', '#');
+
+    snprintf(fname, PATH_MAX, "%s/%s#%s", prefix, root, repository);
+    
+    if (!(fp = fopen(fname, mode)) && *mode == 'r')
+    {
+	if ((fp = fopen("CVS/cvsps.cache", mode)))
+	{
+	    fprintf(stderr, "\n");
+	    fprintf(stderr, "****WARNING**** Obsolete CVS/cvsps.cache file found.\n");
+	    fprintf(stderr, "                New file will be re-written in ~/%s/\n", CVSPS_PREFIX);
+	    fprintf(stderr, "                Old file will be ignored.\n");
+	    fprintf(stderr, "                Please manually remove the old file.\n");
+	    fprintf(stderr, "                Continuing in 5 seconds.\n");
+	    sleep(5);
+	    fclose(fp);
+	    fp = NULL;
+	}
+    }
+
+    return fp;
+}
+
+/* ************ Reading ************ */
+
+enum
+{
+    CACHE_NEED_FILE,
+    CACHE_NEED_BRANCHES,
+    CACHE_NEED_SYMBOLS,
+    CACHE_NEED_REV,
+    CACHE_NEED_PS,
+    CACHE_NEED_PS_DATE,
+    CACHE_NEED_PS_AUTHOR,
+    CACHE_NEED_PS_TAG,
+    CACHE_NEED_PS_TAG_FLAGS,
+    CACHE_NEED_PS_BRANCH,
+    CACHE_NEED_PS_BRANCH_ADD,
+    CACHE_NEED_PS_DESCR,
+    CACHE_NEED_PS_EOD,
+    CACHE_NEED_PS_MEMBERS,
+    CACHE_NEED_PS_EOM
+};
+
+time_t read_cache()
+{
+    FILE * fp;
+    char buff[BUFSIZ];
+    int state = CACHE_NEED_FILE;
+    CvsFile * f = NULL;
+    PatchSet * ps = NULL;
+    char datebuff[20] = "";
+    char authbuff[AUTH_STR_MAX] = "";
+    char tagbuff[LOG_STR_MAX] = "";
+    int tag_flags = 0;
+    char branchbuff[LOG_STR_MAX] = "";
+    int branch_add = 0;
+    char logbuff[LOG_STR_MAX] = "";
+    time_t cache_date = -1;
+    int read_version;
+
+    if (!(fp = cache_open("r")))
+	goto out;
+
+    /* first line is cache version  format "cache version: %d\n" */
+    if (!fgets(buff, BUFSIZ, fp) || strncmp(buff, "cache version:", 14))
+    {
+	debug(DEBUG_APPERROR, "bad cvsps.cache file");
+	goto out_close;
+    }
+
+    if ((read_version = atoi(buff + 15)) != cache_version)
+    {
+	debug(DEBUG_APPERROR, "bad cvsps.cache version %d, expecting %d.  ignoring cache",
+	      read_version, cache_version);
+	goto out_close;
+    }
+
+    /* second line is date cache was created, format "cache date: %d\n" */
+    if (!fgets(buff, BUFSIZ, fp) || strncmp(buff, "cache date:", 11))
+    {
+	debug(DEBUG_APPERROR, "bad cvsps.cache file");
+	goto out_close;
+    }
+
+    cache_date = atoi(buff + 12);
+    debug(DEBUG_STATUS, "read cache_date %d", (int)cache_date);
+
+    while (fgets(buff, BUFSIZ, fp))
+    {
+	int len = strlen(buff);
+
+	switch(state)
+	{
+	case CACHE_NEED_FILE:
+	    if (strncmp(buff, "file:", 5) == 0)
+	    {
+		len -= 6;
+		f = create_cvsfile();
+		f->filename = xstrdup(buff + 6);
+		f->filename[len-1] = 0; /* Remove the \n at the end of line */
+		debug(DEBUG_STATUS, "read cache filename '%s'", f->filename);
+		put_hash_object_ex(file_hash, f->filename, f, HT_NO_KEYCOPY, NULL, NULL);
+		state = CACHE_NEED_BRANCHES;
+	    }
+	    else
+	    {
+		state = CACHE_NEED_PS;
+	    }
+	    break;
+	case CACHE_NEED_BRANCHES:
+	    if (buff[0] != '\n')
+	    {
+		char * tag;
+
+		tag = strchr(buff, ':');
+		if (tag)
+		{
+		    *tag = 0;
+		    tag += 2;
+		    buff[len - 1] = 0;
+		    cvs_file_add_branch(f, buff, tag);
+		}
+	    }
+	    else
+	    {
+		f->have_branches = 1;
+		state = CACHE_NEED_SYMBOLS;
+	    }
+	    break;
+	case CACHE_NEED_SYMBOLS:
+	    if (buff[0] != '\n')
+	    {
+		char * rev;
+
+		rev = strchr(buff, ':');
+		if (rev)
+		{
+		    *rev = 0;
+		    rev += 2;
+		    buff[len - 1] = 0;
+		    cvs_file_add_symbol(f, rev, buff);
+		}
+	    }
+	    else
+	    {
+		state = CACHE_NEED_REV;
+	    }
+	    break;
+	case CACHE_NEED_REV:
+	    if (isdigit(buff[0]))
+	    {
+		char * p = strchr(buff, ' ');
+		if (p)
+		{
+		    CvsFileRevision * rev;
+		    *p++ = 0;
+		    buff[len-1] = 0;
+		    rev = cvs_file_add_revision(f, buff);
+		    if (strcmp(rev->branch, p) != 0)
+		    {
+			debug(DEBUG_APPERROR, "branch mismatch for %s:%s %s != %s", 
+			      rev->file->filename, rev->rev, rev->branch, p);
+		    }
+		}
+	    }
+	    else
+	    {
+		state = CACHE_NEED_FILE;
+	    }
+	    break;
+	case CACHE_NEED_PS:
+	    if (strncmp(buff, "patchset:", 9) == 0)
+		state = CACHE_NEED_PS_DATE;
+	    break;
+	case CACHE_NEED_PS_DATE:
+	    if (strncmp(buff, "date:", 5) == 0)
+	    {
+		/* remove prefix "date: " and LF from len */
+		len -= 6;
+		strzncpy(datebuff, buff + 6, MIN(len, sizeof(datebuff)));
+		state = CACHE_NEED_PS_AUTHOR;
+	    }
+	    break;
+	case CACHE_NEED_PS_AUTHOR:
+	    if (strncmp(buff, "author:", 7) == 0)
+	    {
+		/* remove prefix "author: " and LF from len */
+		len -= 8;
+		strzncpy(authbuff, buff + 8, MIN(len, AUTH_STR_MAX));
+		state = CACHE_NEED_PS_TAG;
+	    }
+	    break;
+	case CACHE_NEED_PS_TAG:
+	    if (strncmp(buff, "tag:", 4) == 0)
+	    {
+		/* remove prefix "tag: " and LF from len */
+		len -= 5;
+		strzncpy(tagbuff, buff + 5, MIN(len, LOG_STR_MAX));
+		state = CACHE_NEED_PS_TAG_FLAGS;
+	    }
+	    break;
+	case CACHE_NEED_PS_TAG_FLAGS:
+	    if (strncmp(buff, "tag_flags:", 10) == 0)
+	    {
+		/* remove prefix "tag_flags: " and LF from len */
+		len -= 11;
+		tag_flags = atoi(buff + 11);
+		state = CACHE_NEED_PS_BRANCH;
+	    }
+	    break;
+	case CACHE_NEED_PS_BRANCH:
+	    if (strncmp(buff, "branch:", 7) == 0)
+	    {
+		/* remove prefix "branch: " and LF from len */
+		len -= 8;
+		strzncpy(branchbuff, buff + 8, MIN(len, LOG_STR_MAX));
+		state = CACHE_NEED_PS_BRANCH_ADD;
+	    }
+	    break;
+	case CACHE_NEED_PS_BRANCH_ADD:
+	    if (strncmp(buff, "branch_add:", 11) == 0)
+	    {
+		/* remove prefix "branch_add: " and LF from len */
+		len -= 12;
+		branch_add = atoi(buff + 12);
+		state = CACHE_NEED_PS_DESCR;
+	    }
+	    break;
+	case CACHE_NEED_PS_DESCR:
+	    if (strncmp(buff, "descr:", 6) == 0)
+		state = CACHE_NEED_PS_EOD;
+	    break;
+	case CACHE_NEED_PS_EOD:
+	    if (strcmp(buff, CACHE_DESCR_BOUNDARY) == 0)
+	    {
+		debug(DEBUG_STATUS, "patch set %s %s %s %s", datebuff, authbuff, logbuff, branchbuff);
+		ps = get_patch_set(datebuff, logbuff, authbuff, branchbuff, NULL);
+		/* the tag and tag_flags will be assigned by the resolve_global_symbols code 
+		 * ps->tag = (strlen(tagbuff)) ? get_string(tagbuff) : NULL;
+		 * ps->tag_flags = tag_flags;
+		 */
+		ps->branch_add = branch_add;
+		state = CACHE_NEED_PS_MEMBERS;
+	    }
+	    else
+	    {
+		/* Make sure we have enough in the buffer */
+		if (strlen(logbuff)+strlen(buff)<LOG_STR_MAX)
+		    strcat(logbuff, buff);
+	    }
+	    break;
+	case CACHE_NEED_PS_MEMBERS:
+	    if (strncmp(buff, "members:", 8) == 0)
+		state = CACHE_NEED_PS_EOM;
+	    break;
+	case CACHE_NEED_PS_EOM:
+	    if (buff[0] == '\n')
+	    {
+		datebuff[0] = 0;
+		authbuff[0] = 0;
+		tagbuff[0] = 0;
+		tag_flags = 0;
+		branchbuff[0] = 0;
+		branch_add = 0;
+		logbuff[0] = 0;
+		state = CACHE_NEED_PS;
+	    }
+	    else
+	    {
+		PatchSetMember * psm = create_patch_set_member();
+		parse_cache_revision(psm, buff);
+		patch_set_add_member(ps, psm);
+	    }
+	    break;
+	}
+    }
+
+ out_close:
+    fclose(fp);
+ out:
+    return cache_date;
+}
+
+enum
+{
+    CR_FILENAME,
+    CR_PRE_REV,
+    CR_POST_REV,
+    CR_DEAD,
+    CR_BRANCH_POINT
+};
+
+static void parse_cache_revision(PatchSetMember * psm, const char * p_buff)
+{
+    /* The format used to generate is:
+     * "file:%s; pre_rev:%s; post_rev:%s; dead:%d; branch_point:%d\n"
+     */
+    char filename[PATH_MAX];
+    char pre[REV_STR_MAX];
+    char post[REV_STR_MAX];
+    int dead = 0;
+    int bp = 0;
+    char buff[BUFSIZ];
+    int state = CR_FILENAME;
+    const char *s;
+    char * p = buff;
+
+    strcpy(buff, p_buff);
+
+    while ((s = strsep(&p, ";")))
+    {
+	char * c = strchr(s, ':');
+
+	if (!c)
+	{
+	    debug(DEBUG_APPERROR, "invalid cache revision line '%s'|'%s'", p_buff, s);
+	    exit(1);
+	}
+
+	*c++ = 0;
+
+	switch(state)
+	{
+	case CR_FILENAME:
+	    strcpy(filename, c);
+	    break;
+	case CR_PRE_REV:
+	    strcpy(pre, c);
+	    break;
+	case CR_POST_REV:
+	    strcpy(post, c);
+	    break;
+	case CR_DEAD:
+	    dead = atoi(c);
+	    break;
+	case CR_BRANCH_POINT:
+	    bp = atoi(c);
+	    break;
+	}
+	state++;
+    }
+
+    psm->file = (CvsFile*)get_hash_object(file_hash, filename);
+
+    if (!psm->file)
+    {
+	debug(DEBUG_APPERROR, "file '%s' not found in hash", filename);
+	exit(1);
+    }
+
+    psm->pre_rev = file_get_revision(psm->file, pre);
+    psm->post_rev = file_get_revision(psm->file, post);
+    psm->post_rev->dead = dead;
+    psm->post_rev->post_psm = psm;
+
+    if (!bp)
+    {
+	if (psm->pre_rev)
+	    psm->pre_rev->pre_psm = psm;
+    }
+    else
+    {
+	list_add(&psm->post_rev->link, &psm->pre_rev->branch_children);
+    }
+}
+
+/************ Writing ************/
+
+void write_cache(time_t cache_date)
+{
+    struct hash_entry * file_iter;
+
+    ps_counter = 0;
+
+    if ((cache_fp = cache_open("w")) == NULL)
+    {
+	debug(DEBUG_SYSERROR, "can't open cvsps.cache for write");
+	return;
+    }
+
+    fprintf(cache_fp, "cache version: %d\n", cache_version);
+    fprintf(cache_fp, "cache date: %d\n", (int)cache_date);
+
+    reset_hash_iterator(file_hash);
+
+    while ((file_iter = next_hash_entry(file_hash)))
+    {
+	CvsFile * file = (CvsFile*)file_iter->he_obj;
+	struct hash_entry * rev_iter;
+
+	fprintf(cache_fp, "file: %s\n", file->filename);
+
+	reset_hash_iterator(file->branches);
+	while ((rev_iter = next_hash_entry(file->branches)))
+	{
+	    char * rev = (char *)rev_iter->he_key;
+	    char * tag = (char *)rev_iter->he_obj;
+	    fprintf(cache_fp, "%s: %s\n", rev, tag);
+	}
+
+	fprintf(cache_fp, "\n");
+
+	reset_hash_iterator(file->symbols);
+	while ((rev_iter = next_hash_entry(file->symbols)))
+	{
+	    char * tag = (char *)rev_iter->he_key;
+	    CvsFileRevision * rev = (CvsFileRevision*)rev_iter->he_obj;
+	    
+	    if (rev->present)
+		fprintf(cache_fp, "%s: %s\n", tag, rev->rev);
+	}
+
+	fprintf(cache_fp, "\n");
+
+	reset_hash_iterator(file->revisions);
+	while ((rev_iter = next_hash_entry(file->revisions)))
+	{
+	    CvsFileRevision * rev = (CvsFileRevision*)rev_iter->he_obj;
+	    if (rev->present)
+		fprintf(cache_fp, "%s %s\n", rev->rev, rev->branch);
+	}
+
+	fprintf(cache_fp, "\n");
+    }
+
+    fprintf(cache_fp, "\n");
+    walk_all_patch_sets(write_patch_set_to_cache);
+    fclose(cache_fp);
+    cache_fp = NULL;
+}
+
+static void write_patch_set_to_cache(PatchSet * ps)
+{
+    dump_patch_set(cache_fp, ps);
+}
+
+static void dump_patch_set(FILE * fp, PatchSet * ps)
+{
+    struct list_head * next = ps->members.next;
+
+    ps_counter++;
+    fprintf(fp, "patchset: %d\n", ps_counter);
+    fprintf(fp, "date: %d\n", (int)ps->date);
+    fprintf(fp, "author: %s\n", ps->author);
+    fprintf(fp, "tag: %s\n", ps->tag ? ps->tag : "");
+    fprintf(fp, "tag_flags: %d\n", ps->tag_flags);
+    fprintf(fp, "branch: %s\n", ps->branch);
+    fprintf(fp, "branch_add: %d\n", ps->branch_add);
+    fprintf(fp, "descr:\n%s", ps->descr); /* descr is guaranteed to end with LF */
+    fprintf(fp, CACHE_DESCR_BOUNDARY);
+    fprintf(fp, "members:\n");
+
+    while (next != &ps->members)
+    {
+	PatchSetMember * psm = list_entry(next, PatchSetMember, link);
+	int bp = 1;
+	
+	/* this actually deduces if this revision is a branch point... */
+	if (!psm->pre_rev || (psm->pre_rev->pre_psm && psm->pre_rev->pre_psm == psm))
+	    bp = 0;
+
+	fflush(fp);
+    
+	fprintf(fp, "file:%s; pre_rev:%s; post_rev:%s; dead:%d; branch_point:%d\n", 
+		psm->file->filename, 
+		psm->pre_rev ? psm->pre_rev->rev : "INITIAL", psm->post_rev->rev, 
+		psm->post_rev->dead, bp);
+	next = next->next;
+    }
+
+    fprintf(fp, "\n");
+}
+
+/* where's arithmetic?... */
diff --git a/cache.h b/cache.h
new file mode 100644
index 0000000..996c4bc
--- /dev/null
+++ b/cache.h
@@ -0,0 +1,12 @@
+/*
+ * Copyright 2001, 2002, 2003 David Mansfield and Cobite, Inc.
+ * See COPYING file for license information 
+ */
+
+#ifndef CACHE_H
+#define CACHE_H
+
+extern time_t read_cache();
+extern void write_cache(time_t);
+
+#endif /* CACHE_H */
diff --git a/cap.c b/cap.c
new file mode 100644
index 0000000..a6186f6
--- /dev/null
+++ b/cap.c
@@ -0,0 +1,135 @@
+/*
+ * Copyright 2001, 2002, 2003 David Mansfield and Cobite, Inc.
+ * See COPYING file for license information 
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <cbtcommon/debug.h>
+#include <cbtcommon/text_util.h>
+
+#include "cap.h"
+#include "cvs_direct.h"
+
+extern CvsServerCtx * cvs_direct_ctx;
+
+static char client_version[BUFSIZ];
+static char server_version[BUFSIZ];
+
+static int check_cvs_version(int, int, int);
+static int check_version_string(const char *, int, int, int);
+
+int cvs_check_cap(int cap)
+{
+    int ret;
+
+    switch(cap)
+    {
+    case CAP_HAVE_RLOG:
+	if (!(ret = check_cvs_version(1,11,1)))
+	{
+	    debug(DEBUG_APPERROR, 
+		  "WARNING: Your CVS client version:\n[%s]\n"
+		  "and/or server version:\n[%s]\n"
+		  "are too old to properly support the rlog command. \n"
+		  "This command was introduced in 1.11.1.  Cvsps\n"
+		  "will use log instead, but PatchSet numbering\n"
+		  "may become unstable due to pruned empty\n"
+		  "directories.\n", client_version, server_version);
+	}
+	break;
+		  
+    default:
+	debug(DEBUG_APPERROR, "unknown cvs capability check %d", cap);
+	exit(1);
+    }
+
+    return ret;
+}
+
+static void get_version_external()
+{
+    FILE * cvsfp;
+    
+    strcpy(client_version, "(UNKNOWN CLIENT)");
+    strcpy(server_version, "(UNKNOWN SERVER)");
+
+    if (!(cvsfp = popen("cvs version 2>/dev/null", "r")))
+    {
+	debug(DEBUG_APPERROR, "cannot popen cvs version. exiting");
+	exit(1);
+    }
+    
+    if (!fgets(client_version, BUFSIZ, cvsfp))
+    {
+	debug(DEBUG_APPMSG1, "WARNING: malformed CVS version: no data");
+	goto out;
+    }
+    
+    chop(client_version);
+    
+    if (strncmp(client_version, "Client", 6) == 0)
+    {
+	if (!fgets(server_version, BUFSIZ, cvsfp))
+	{
+	    debug(DEBUG_APPMSG1, "WARNING: malformed CVS version: no server data");
+	    goto out;
+	}
+	chop(server_version);
+    }
+    else
+    {
+	server_version[0] = 0;
+    }
+    
+ out:
+    pclose(cvsfp);
+}
+
+int check_cvs_version(int req_major, int req_minor, int req_extra)
+{
+    if (!client_version[0])
+    {
+	if (cvs_direct_ctx)
+	    cvs_version(cvs_direct_ctx, client_version, server_version);
+	else
+	    get_version_external();
+    }
+
+    return (check_version_string(client_version, req_major, req_minor, req_extra) &&
+	    (!server_version[0] || check_version_string(server_version, req_major, req_minor, req_extra)));
+}
+
+int check_version_string(const char * str, int req_major, int req_minor, int req_extra)
+{
+    char * p;
+    int major, minor, extra;
+    int skip = 6;
+
+    p = strstr(str, "(CVS) ");
+
+    if (!p) {
+	p = strstr(str, "(CVSNT)");
+	skip = 8;
+    }
+
+    if (!p)
+    {
+	debug(DEBUG_APPMSG1, "WARNING: malformed CVS version str: %s", str);
+	return 0;
+    }
+
+    p += skip;
+    if (sscanf(p, "%d.%d.%d", &major, &minor, &extra) != 3)
+    {	
+	debug(DEBUG_APPMSG1, "WARNING: malformed CVS version: %s", str);
+	return 0;
+    }
+
+    return (major > req_major || 
+	    (major == req_major && minor > req_minor) ||
+	    (major == req_major && minor == req_minor && extra >= req_extra));
+}
+
diff --git a/cap.h b/cap.h
new file mode 100644
index 0000000..7634015
--- /dev/null
+++ b/cap.h
@@ -0,0 +1,13 @@
+/*
+ * Copyright 2001, 2002, 2003 David Mansfield and Cobite, Inc.
+ * See COPYING file for license information 
+ */
+
+#ifndef CAP_H
+#define CAP_H
+
+#define CAP_HAVE_RLOG 1
+
+int cvs_check_cap(int);
+
+#endif /* CAP_H */
diff --git a/cbtcommon/debug.c b/cbtcommon/debug.c
new file mode 100644
index 0000000..3f5ac52
--- /dev/null
+++ b/cbtcommon/debug.c
@@ -0,0 +1,191 @@
+/*
+ * Copyright 2001, 2002, 2003 David Mansfield and Cobite, Inc.
+ * See COPYING file for license information 
+ */
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <ctype.h>
+#include <string.h>
+
+#include "debug.h"
+#include "rcsid.h"
+
+#ifdef _WIN32
+#include <windows.h>
+#endif
+
+RCSID("$Id: debug.c,v 1.14 2001/11/29 00:00:30 amb Exp $");
+
+unsigned int debuglvl = ~0;
+static FILE *debug_output_channel[DEBUG_NUM_FACILITIES];
+
+#ifdef MACINTOSH
+int ffs( int val )
+{
+    int i = 0;
+    for( i = 0; i < 32; i++ )
+    {
+	if( val & ( 1 << i ) )
+	    return i+1;
+    }
+    return 0;
+}
+#endif
+
+void vdebug(int dtype, const char *fmt, va_list ap)
+{
+  int  keep_errno;   
+  char msgbuff[8192];
+
+  /* errno could be changed by vsprintf or perror */
+  keep_errno = errno;
+
+  if (debuglvl & dtype)
+  {
+      FILE * channel = debug_output_channel[ffs(dtype)];
+
+      if (!channel)
+	  channel = stderr;
+
+#ifdef MACINTOSH
+      vsprintf(msgbuff, fmt, ap);
+#else
+      vsnprintf(msgbuff, sizeof(msgbuff), fmt, ap);
+#endif
+
+      /* DEBUG_ERROR (aka DEBUG_SYSERROR) */
+      if (dtype == DEBUG_ERROR)
+      {
+	  const char * errmsg = "";
+
+#ifndef MACINTOSH
+	  errmsg = strerror(errno);
+#endif
+
+	  fprintf(channel, "%s: %s\n", msgbuff, errmsg);
+      }
+      else
+	  fprintf(channel, "%s\n", msgbuff);
+      
+      fflush(channel);
+#ifdef _WIN32
+      if (dtype == DEBUG_SYSERROR || dtype == DEBUG_APPERROR)
+	  MessageBox(NULL, msgbuff, "Application Error", MB_OK);
+#endif
+  }
+  
+  errno = keep_errno;
+}
+
+void vmdebug(int dtype, const char * fmt, va_list ap)
+{
+    FILE * chn[DEBUG_NUM_FACILITIES];
+    int i;
+
+    memcpy(chn, debug_output_channel, sizeof(FILE*) * DEBUG_NUM_FACILITIES);
+
+    for (i = 0; i < DEBUG_NUM_FACILITIES; i++)
+	if (chn[i] == NULL)
+	    chn[i] = stderr;
+
+    for (i = 0; i < DEBUG_NUM_FACILITIES; i++)
+    {
+	if ((dtype & (1 << i)) && chn[i])
+	{
+
+	    if (debuglvl & (1 << i))
+	    {
+		int j; 
+
+		vdebug(1 << i, fmt, ap);
+		
+		for (j = i + 1; j < DEBUG_NUM_FACILITIES; j++)
+		    if (chn[j] == chn[i])
+			chn[j] = NULL;
+	    }
+	}
+    }
+}
+
+/* FIXME: use actual debug output core routine vdebug... */
+void hexdump(const char *ptr, int size, const char *fmt, ...) 
+{
+    static char hexbuff[49];
+    static char printbuff[17];
+    int count = 0;
+    va_list ap;
+    
+    if ( !debuglvl & DEBUG_STATUS )
+	return;
+    
+    va_start(ap, fmt);
+    
+    /* print the heading/banner */
+    vdebug(DEBUG_STATUS, fmt, ap);
+    
+    memset(hexbuff, 0, 49);
+    memset(printbuff, 0, 17);
+    
+    while (size--) 
+    {
+	sprintf(hexbuff + (count*3), "%02x ", (int)*((unsigned char *)ptr));
+	
+	if (isprint(*ptr))
+	    printbuff[count] = *ptr;
+	else
+	    printbuff[count] = '.';
+	
+	ptr++;
+	
+	if ( count++ == 15 ) 
+	{
+	    count = 0;
+	    debug(DEBUG_STATUS, "%s %s", hexbuff, printbuff);
+	    memset(hexbuff, 0, 49);
+	    memset(printbuff, 0, 17);
+	}
+    }
+    
+    if ( count > 0 ) {
+	while ( count % 16 != 0 ) {
+	    sprintf(hexbuff + (count * 3), "xx ");
+	    printbuff[count++] = '.';
+	}
+	debug(DEBUG_STATUS, "%s %s", hexbuff, printbuff);
+    }
+    
+    va_end(ap);
+}
+
+void
+to_hex( char* dest, const char* src, size_t n )
+{
+    while ( n-- ) 
+    {
+	sprintf( dest, "%02x ", (int)*((unsigned char *)src));
+	dest += 3;
+	src++;
+    }
+    
+    *dest = 0;
+}
+
+void debug_set_error_file(FILE *f)
+{
+    int i;
+    for (i = 0; i < DEBUG_NUM_FACILITIES; i++)
+	debug_output_channel[i] = f;
+}
+
+void debug_set_error_facility(int fac, FILE * f)
+{
+    int i;
+
+    for (i = 0; i < DEBUG_NUM_FACILITIES; i++)
+	if (!debug_output_channel[i])
+	    debug_output_channel[i] = stderr;
+
+    debug_output_channel[ffs(fac)] = f;
+}
diff --git a/cbtcommon/debug.h b/cbtcommon/debug.h
new file mode 100644
index 0000000..2ede381
--- /dev/null
+++ b/cbtcommon/debug.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2001, 2002, 2003 David Mansfield and Cobite, Inc.
+ * See COPYING file for license information 
+ */
+
+#ifndef _DEBUG_H
+#define _DEBUG_H
+
+#include <stdio.h>
+#include <stdarg.h>
+#ifndef MACINTOSH
+#include <sys/types.h>
+#endif
+
+#include "inline.h"
+
+#define DEBUG_NUM_FACILITIES  32 /* should be 64 on 64bit CPU... */
+#define DEBUG_SYSERROR  1  /* same as DEBUG_ERROR, but here for clarity */
+#define DEBUG_ERROR     1
+#define DEBUG_STATUS    2
+#define DEBUG_TCP       4
+#define DEBUG_SIGNALS   8
+#define DEBUG_APPERROR  16
+#define DEBUG_APPMSG1   32
+#define DEBUG_APPMSG2   64
+#define DEBUG_APPMSG3   128
+#define DEBUG_APPMSG4   256
+#define DEBUG_APPMSG5   512
+#define DEBUG_LIBERROR  1024
+#define DEBUG_LIBSTATUS 2048
+
+#ifdef __cplusplus
+extern "C" 
+{
+#endif
+
+extern unsigned int debuglvl;
+
+void hexdump( const char *ptr, int size, const char *fmt, ... );
+void vdebug(int dtype, const char *fmt, va_list);
+void vmdebug(int dtype, const char *fmt, va_list);
+void to_hex( char* dest, const char* src, size_t n );
+void debug_set_error_file(FILE *);
+void debug_set_error_facility(int mask, FILE *);
+
+static INLINE void debug(unsigned int dtype, const char *fmt, ...)
+{
+    va_list ap;
+    
+    if (!(debuglvl & dtype))
+	return;
+    
+    va_start(ap, fmt);
+    vdebug(dtype, fmt, ap);
+    va_end(ap);
+}
+
+static INLINE void mdebug(unsigned int dtype, const char *fmt, ...)
+{
+    va_list ap;
+    
+    if (!(debuglvl & dtype))
+	return;
+    
+    va_start(ap, fmt);
+    vmdebug(dtype, fmt, ap);
+    va_end(ap);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* DEBUG_H */
diff --git a/cbtcommon/hash.c b/cbtcommon/hash.c
new file mode 100644
index 0000000..ddc081b
--- /dev/null
+++ b/cbtcommon/hash.c
@@ -0,0 +1,272 @@
+/*
+ * Copyright 2001, 2002, 2003 David Mansfield and Cobite, Inc.
+ * See COPYING file for license information 
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "debug.h"
+#include "hash.h"
+#include "rcsid.h"
+
+RCSID("$Id: hash.c,v 1.6 2003/05/07 15:42:38 david Exp $");
+
+#define HASH_CONST 37
+
+static unsigned int hash_string(const char *);
+static struct hash_entry *scan_list(struct list_head *, const char *); 
+static struct hash_entry *get_hash_entry(struct hash_table *tbl, const char *key);
+
+struct hash_table *create_hash_table(unsigned int sz)
+{
+    struct hash_table *tbl;
+    unsigned int i;
+
+    tbl = (struct hash_table *)malloc(sizeof(*tbl) + sz*sizeof(struct list_head));
+
+    if (!tbl)
+    {
+	debug(DEBUG_APPERROR, "malloc for hash_table failed");
+	return NULL;
+    }
+	
+    tbl->ht_size  = sz;
+    tbl->ht_lists = (struct list_head *)(tbl + 1);
+    tbl->iterator = 0;
+
+    for (i = 0; i < sz; i++)
+	INIT_LIST_HEAD(&tbl->ht_lists[i]);
+
+    return tbl;
+}
+
+void destroy_hash_table(struct hash_table *tbl, void (*delete_obj)(void *))
+{
+    struct list_head  *head, *next, *tmp;
+    struct hash_entry *entry;
+    int i;
+
+    for (i = 0; i < tbl->ht_size; i++)
+    {
+	head = &tbl->ht_lists[i];
+	next = head->next;
+
+	while (next != head)
+	{
+	    tmp = next->next;
+	    entry = list_entry(next, struct hash_entry, he_list);
+	    if (delete_obj)
+		delete_obj(entry->he_obj);
+	    free(entry);
+
+	    next = tmp;
+	}
+    }
+
+    free(tbl);
+}
+
+/* FIXME: there is no way for the user of this to determine the difference
+ *        between a put to a new key value and a malloc failure
+ */
+void *put_hash_object(struct hash_table *tbl, const char *key, void *obj)
+{
+    void * retval;
+    put_hash_object_ex(tbl, key, obj, HT_KEYCOPY, NULL, &retval);
+    return retval;
+}
+
+static struct hash_entry *get_hash_entry(struct hash_table *tbl, const char *key)
+{
+    struct list_head  *head;
+    struct hash_entry *entry;
+    unsigned int hash;
+
+    hash  = hash_string(key) % tbl->ht_size;
+    head  = &tbl->ht_lists[hash];
+    entry = scan_list(head, key);
+
+    return entry;
+}
+
+void *get_hash_object(struct hash_table *tbl, const char *key)
+{
+    struct hash_entry *entry = get_hash_entry(tbl, key);
+    return (entry) ? entry->he_obj : NULL;
+}
+
+void *remove_hash_object(struct hash_table *tbl, const char *key)
+{
+    struct hash_entry *entry = get_hash_entry(tbl, key);
+    void *retval = NULL;
+
+    if (entry)
+    {
+	list_del(&entry->he_list);
+	retval = entry->he_obj;
+	free(entry);
+    }
+
+    return retval;
+}
+
+static unsigned int hash_string(register const char *key)
+{
+    register unsigned int hash = 0;
+    
+    while(*key)
+	hash = hash * HASH_CONST + *key++; 
+    
+    return hash;
+}
+
+static struct hash_entry *scan_list(struct list_head *head, const char *key)
+{
+    struct list_head  *next = head->next;
+    struct hash_entry *entry;
+
+    while (next != head)
+    {
+	entry = list_entry(next, struct hash_entry, he_list);
+	if (strcmp(entry->he_key, key) == 0)
+	    return entry;
+
+	next = next->next;
+    }
+
+    return NULL;
+}
+
+void reset_hash_iterator(struct hash_table *tbl)
+{
+    tbl->iterator = 0;
+    tbl->iterator_ptr = NULL;
+}
+
+struct hash_entry *next_hash_entry(struct hash_table *tbl)
+{
+    while( tbl->iterator < tbl->ht_size )
+    {
+	struct list_head *head = &tbl->ht_lists[ tbl->iterator ];
+
+	if( tbl->iterator_ptr == NULL )
+	    tbl->iterator_ptr = head->next;
+
+	if( tbl->iterator_ptr != head )
+	{
+	    struct list_head *tmp = tbl->iterator_ptr;
+	    tbl->iterator_ptr = tbl->iterator_ptr->next;
+	    return( list_entry( tmp, struct hash_entry, he_list ) );
+	}
+
+	else
+	{
+	    tbl->iterator++;
+	    tbl->iterator_ptr = NULL;
+	}
+    }
+
+    return( NULL );
+}
+
+int put_hash_object_ex(struct hash_table *tbl, const char *key, void *obj, int copy, 
+		       char ** oldkey, void ** oldobj)
+{
+    struct list_head *head;
+    struct hash_entry *entry;
+    unsigned int hash;
+    int retval = 0;
+
+    /* FIXME: how can get_hash_entry be changed to be usable here? 
+     * we need the value of head later if the entry is not found...
+     */
+    hash  = hash_string(key) % tbl->ht_size;
+    head  = &tbl->ht_lists[hash];
+    entry = scan_list(head, key);
+
+    if (entry)
+    {
+	if (oldkey)
+	    *oldkey = entry->he_key;
+	if (oldobj)
+	    *oldobj = entry->he_obj;
+
+	/* if 'copy' is set, then we already have an exact
+	 * private copy of the key (by definition of having
+	 * found the match in scan_list) so we do nothing.
+	 * if !copy, then we can simply assign the new
+	 * key
+	 */
+	if (!copy)
+	    entry->he_key = (char*)key; /* discard the const */
+	entry->he_obj = obj;
+    }
+    else
+    {
+	size_t s = sizeof(*entry);
+
+	if (oldkey)
+	    *oldkey = NULL;
+	if (oldobj)
+	    *oldobj = NULL;
+
+	if (copy)
+	    s +=  strlen(key) + 1;
+
+	entry = (struct hash_entry *)malloc(s);
+	
+	if (!entry)
+	{
+	    debug(DEBUG_APPERROR,"malloc failed put_hash_object key='%s'",key);
+	    retval = -1;
+	}
+	else
+	{
+	    if (copy)
+	    {
+		entry->he_key = (char *)(entry + 1);
+		strcpy(entry->he_key, key);
+	    }
+	    else
+	    {
+		entry->he_key = (char*)key; /* discard the const */
+	    }
+
+	    entry->he_obj = obj;
+
+	    list_add(&entry->he_list, head);
+	}
+    }
+
+    return retval;
+}
+
+void destroy_hash_table_ex(struct hash_table *tbl, 
+			   void (*delete_entry)(const void *, char *, void *), 
+			   const void * cookie)
+{
+    struct list_head  *head, *next, *tmp;
+    struct hash_entry *entry;
+    int i;
+    
+    for (i = 0; i < tbl->ht_size; i++)
+    {
+	head = &tbl->ht_lists[i];
+	next = head->next;
+	
+	while (next != head)
+	{
+	    tmp = next->next;
+	    entry = list_entry(next, struct hash_entry, he_list);
+	    if (delete_entry)
+		delete_entry(cookie, entry->he_key, entry->he_obj);
+	    free(entry);
+
+	    next = tmp;
+	}
+    }
+
+    free(tbl);
+}
diff --git a/cbtcommon/hash.h b/cbtcommon/hash.h
new file mode 100644
index 0000000..797e3b3
--- /dev/null
+++ b/cbtcommon/hash.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2001, 2002, 2003 David Mansfield and Cobite, Inc.
+ * See COPYING file for license information 
+ */
+
+#ifndef _COMMON_HASH_H
+#define _COMMON_HASH_H
+
+#include "list.h"
+
+struct hash_entry
+{
+    char              *he_key;
+    void              *he_obj;
+    struct list_head   he_list;
+};
+
+struct hash_table
+{
+    int                ht_size;
+    struct list_head  *ht_lists;
+    int                iterator;
+    struct list_head  *iterator_ptr;
+};
+
+enum
+{
+    HT_NO_KEYCOPY,
+    HT_KEYCOPY
+};
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct hash_table *create_hash_table(unsigned int sz);
+void destroy_hash_table(struct hash_table *tbl, void (*delete_obj)(void *));
+void *put_hash_object(struct hash_table *tbl, const char *key, void *obj);
+void *get_hash_object(struct hash_table *tbl, const char *key);
+void *remove_hash_object(struct hash_table *tbl, const char *key);
+
+int put_hash_object_ex(struct hash_table *tbl, const char *key, void *obj, int, char **, void **);
+void destroy_hash_table_ex(struct hash_table *tbl, void (*delete_entry)(const void *, char *, void *), const void *);
+
+void reset_hash_iterator(struct hash_table *tbl);
+struct hash_entry *next_hash_entry(struct hash_table *tbl);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _COMMON_HASH_H */
diff --git a/cbtcommon/inline.h b/cbtcommon/inline.h
new file mode 100644
index 0000000..776ef26
--- /dev/null
+++ b/cbtcommon/inline.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2001, 2002, 2003 David Mansfield and Cobite, Inc.
+ * See COPYING file for license information 
+ */
+
+#ifndef UTIL_INLINE_H
+#define UTIL_INLINE_H
+
+#ifdef __GNUC__
+#define INLINE __inline__
+#endif
+
+#ifdef WIN32
+#define INLINE __inline
+#endif
+
+/* INLINE of last resort... heh */
+
+#ifndef INLINE
+#define INLINE /* void */
+#endif
+
+#endif
diff --git a/cbtcommon/list.h b/cbtcommon/list.h
new file mode 100644
index 0000000..4ee245d
--- /dev/null
+++ b/cbtcommon/list.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright 2001, 2002, 2003 David Mansfield and Cobite, Inc.
+ * See COPYING file for license information 
+ */
+
+#ifndef _COMMON_LIST_H
+#define _COMMON_LIST_H
+
+/*
+ * Stolen from linux-2.1.131
+ * All comments from the original source unless otherwise noted
+ * Added: the CLEAR_LIST_NODE macro
+ */
+
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+#include "inline.h"
+
+struct list_head {
+        struct list_head *next, *prev;
+};
+
+#define LIST_HEAD(name) \
+        struct list_head name = { &name, &name }
+
+#define INIT_LIST_HEAD(ptr)  do { \
+        (ptr)->next = (ptr); (ptr)->prev = (ptr); \
+} while (0)
+
+#define CLEAR_LIST_NODE(ptr) do { \
+        (ptr)->next = NULL;  (ptr)->prev = NULL; \
+} while (0)
+
+/*
+ * Insert a new entry between two known consecutive entries. 
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static INLINE void __list_add(struct list_head *li,
+        struct list_head * prev,
+        struct list_head * next)
+{
+        next->prev = li;
+        li->next = next;
+        li->prev = prev;
+        prev->next = li;
+}
+
+/*
+ * Insert a new entry after the specified head..
+ */
+static INLINE void list_add(struct list_head *li, struct list_head *head)
+{
+        __list_add(li, head, head->next);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static INLINE void __list_del(struct list_head * prev,
+                                  struct list_head * next)
+{
+        next->prev = prev;
+        prev->next = next;
+}
+
+static INLINE void list_del(struct list_head *entry)
+{
+        __list_del(entry->prev, entry->next);
+}
+
+static INLINE int list_empty(struct list_head *head)
+{
+        return head->next == head;
+}
+
+/*
+ * Splice in "list" into "head"
+ */
+static INLINE void list_splice(struct list_head *list, struct list_head *head)
+{
+        struct list_head *first = list->next;
+
+        if (first != list) {
+                struct list_head *last = list->prev;
+                struct list_head *at = head->next;
+
+                first->prev = head;
+                head->next = first;
+
+                last->next = at;
+                at->prev = last;
+        }
+}
+
+#define list_entry(ptr, type, member) \
+        ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
+
+#endif /* _COMMON_LIST_H */
diff --git a/cbtcommon/rcsid.h b/cbtcommon/rcsid.h
new file mode 100644
index 0000000..b85b6fb
--- /dev/null
+++ b/cbtcommon/rcsid.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright 2001, 2002, 2003 David Mansfield and Cobite, Inc.
+ * See COPYING file for license information 
+ */
+
+#ifndef _COMMON_RCSID_H
+#define _COMMON_RCSID_H
+
+/* RCS Id macro (complements of bod@compusol.com.au (Brendan O'Dea)) */
+#ifdef lint
+# define RCSID(i)
+#else /* lint */
+# ifdef __GNUC__
+#  define ATTRIB_UNUSED __attribute__ ((unused))
+# else /* __GNUC__ */
+#  define ATTRIB_UNUSED
+# endif /* __GNUC__ */
+# define RCSID(i) static char const *rcsid ATTRIB_UNUSED = (i)
+#endif /* lint */
+
+#endif /* _COMMON_RCSID_H */
diff --git a/cbtcommon/sio.c b/cbtcommon/sio.c
new file mode 100644
index 0000000..a9faf81
--- /dev/null
+++ b/cbtcommon/sio.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright 2001, 2002, 2003 David Mansfield and Cobite, Inc.
+ * See COPYING file for license information 
+ */
+
+#include <stdio.h>
+
+#ifdef WIN32
+#include <io.h>
+#else
+#include <unistd.h>
+#endif
+
+#include <errno.h>
+
+#include "sio.h"
+#include "rcsid.h"
+
+RCSID("$Id: sio.c,v 1.5 2001/10/25 18:36:11 adam Exp $");
+
+ssize_t readn(int fd, void *buf, size_t len)
+{
+
+  int nleft,nread;
+
+  nleft = len;
+
+  while (nleft > 0)
+  {
+    nread = read(fd,buf,nleft);
+
+    /* there is an issue which EINTR which could leave us a bit haywire
+     * if we get a signal after having read some bytes. special handling
+     * N.B: we *do* return EINTR if no data has been read yet (thanks Karl)
+     */
+    if (nread < 0)
+    {
+      if (errno == EINTR && nleft != (int)len)
+        continue;
+      else
+	    return (nread);
+    }
+    else if (nread == 0)
+      break;
+
+    nleft -= nread;
+
+    if (nleft)
+      buf = ((char *)buf) + nread;
+  }
+  return (len - nleft);
+}
+
+ssize_t writen(int fd, const void *buf, size_t len)
+{
+  
+  int nleft, nwritten;
+
+  nleft = len;
+
+  while (nleft > 0)
+  {
+    nwritten = write(fd,buf,nleft);
+
+    /* there is an issue with EINTR if we have already written
+       a few bytes! return if we have not written any yet */
+    if (nwritten < 0 && errno == EINTR)
+    {
+      if (nleft == (int)len)
+	return nwritten;
+      
+      continue;
+    }
+    
+
+    if (nwritten <= 0)
+      return nwritten;
+
+    nleft -= nwritten;
+
+    if (nleft)
+      buf = ((char *)buf) + nwritten;
+  }
+  
+  return (len - nleft);
+}
+
diff --git a/cbtcommon/sio.h b/cbtcommon/sio.h
new file mode 100644
index 0000000..f6f2978
--- /dev/null
+++ b/cbtcommon/sio.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2001, 2002, 2003 David Mansfield and Cobite, Inc.
+ * See COPYING file for license information 
+ */
+
+#ifndef _SIO_H
+#define _SIO_H
+
+/* include for typedefs */
+#ifdef WIN32
+#include <stdio.h>
+typedef int ssize_t;
+#else
+#include <unistd.h>
+#endif
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+/* these are W.R.Stevens' famous io routines to read or write bytes to fd */
+ssize_t readn(int, void *, size_t);
+ssize_t writen(int, const void *, size_t);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SIO_H */
diff --git a/cbtcommon/tcpsocket.c b/cbtcommon/tcpsocket.c
new file mode 100644
index 0000000..27cc13a
--- /dev/null
+++ b/cbtcommon/tcpsocket.c
@@ -0,0 +1,257 @@
+/*
+ * Copyright 2001, 2002, 2003 David Mansfield and Cobite, Inc.
+ * See COPYING file for license information 
+ */
+
+#ifdef SOLARIS
+#include <strings.h>
+#else
+#include <string.h>
+#endif
+
+#ifdef WIN32
+#include <winsock2.h>
+#else /* not windows */
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <netdb.h>
+#include <errno.h>
+
+#ifdef SOLARIS
+#include <netinet/tcp.h>
+#endif
+
+#endif /* if windows */
+
+#include "tcpsocket.h"
+#include "debug.h"
+#include "rcsid.h"
+#ifdef WIN32
+#include "win32fd.h"
+#endif
+
+RCSID("$Id: tcpsocket.c,v 1.6 1999/12/27 20:35:34 david Exp $");
+
+int
+tcp_create_socket(int reuse_addr)
+{
+  int retval;
+  int yes = 1;
+
+  if ((retval = socket(AF_INET, SOCK_STREAM, 0)) < 0)
+  {
+    debug(DEBUG_ERROR, "tcp: can't create socket");
+  }
+
+  if (reuse_addr)
+  {
+    setsockopt( retval, SOL_SOCKET, SO_REUSEADDR, (char *)&yes, sizeof(int));
+  }
+
+  debug(DEBUG_TCP, "tcp: socket created");
+#ifdef WIN32
+  return get_fd(retval, WIN32_SOCKET);
+#else
+  return retval;
+#endif
+}
+
+int
+tcp_bind_and_listen(int sockfd, unsigned short tcp_port)
+{
+  struct sockaddr_in addr;
+
+  memset((char *) &addr, 0, sizeof(struct sockaddr_in));
+  addr.sin_family      = AF_INET;
+  addr.sin_addr.s_addr = htonl(INADDR_ANY);
+  addr.sin_port        = htons(tcp_port);
+
+#ifdef WIN32
+  sockfd = win32_file_table[sockfd].win32id;
+#endif
+
+  if (bind(sockfd, (struct sockaddr *)&addr, sizeof(addr)) < 0)
+  {
+    debug(DEBUG_ERROR, "tcp: can't bind to socket");
+    return -1;
+  }
+
+
+  if (listen(sockfd, LISTEN_QUEUE_SIZE) < 0)
+  {
+    debug(DEBUG_ERROR, "tcp: can't listen on socket");
+    return -1;
+  }
+
+  debug(DEBUG_TCP, "tcp: socket bound and listening");
+
+  return 0;
+}
+
+int
+tcp_accept_connection(int sockfd)
+{
+  struct sockaddr_in remaddr;
+  int addrlen;
+  int retval;
+
+#ifdef WIN32
+  sockfd = win32_file_table[sockfd].win32id;
+#endif
+
+  addrlen = sizeof(struct sockaddr_in);
+
+#ifdef WIN32
+  if ((retval = accept(sockfd, (struct sockaddr *) &remaddr, &addrlen)) == INVALID_SOCKET)
+  {
+	  debug(DEBUG_APPERROR, "tcp: error accepting connection");
+	  return -1;
+  }
+#else
+  if ((retval = accept(sockfd, (struct sockaddr *) &remaddr, &addrlen)) < 0)
+  {
+    if (errno != EINTR )
+      debug(DEBUG_ERROR, "tcp: error accepting connection");
+
+    return -1;
+  }
+#endif
+
+  debug(DEBUG_TCP, "tcp: got connection (fd=%d)", retval);
+
+  return retval;
+}
+
+unsigned int
+tcp_get_client_ip(int fd)
+{
+  struct sockaddr_in remaddr;
+  int addrlen;
+  int retval;
+  unsigned int saddr;
+
+#ifdef WIN32
+  fd = win32_file_table[fd].win32id;
+#endif
+
+  addrlen = sizeof(struct sockaddr_in);
+
+  if ((retval = getpeername(fd, (struct sockaddr *) &remaddr, &addrlen)) < 0)
+  {
+    debug(DEBUG_ERROR, "tcp: error getting remote's ip address");
+    return 0;
+  }
+
+  saddr = ntohl(remaddr.sin_addr.s_addr);
+
+  return saddr;
+}
+
+int
+tcp_connect(int sockfd, const char *rem_addr, unsigned short port)
+{
+  struct sockaddr_in addr;
+  int addrlen;
+  long ipno;
+
+#ifdef WIN32
+  sockfd = win32_file_table[sockfd].win32id;
+#endif
+
+  if ( convert_address(&ipno , rem_addr) < 0 )
+  {
+    return -1;
+  }
+
+  addrlen = sizeof(struct sockaddr_in);
+
+  memset((char *) &addr, 0, sizeof(struct sockaddr_in));
+  addr.sin_family      = AF_INET;
+  addr.sin_addr.s_addr = ipno;
+  addr.sin_port        = htons(port);
+
+  if (connect(sockfd, (struct sockaddr *)&addr, addrlen) < 0)
+  {
+    debug(DEBUG_ERROR, "connect error");
+    return -1;
+  }
+  
+  debug(DEBUG_STATUS, "tcp: connection established on port %d", port);
+  return 0;
+}
+
+int
+convert_address(long *dest, const char *addr_str)
+{
+#ifdef LINUX
+  struct in_addr ip;
+#endif
+  int retval = 0;
+  char errstr[256];
+  
+  /* first try converting "numbers and dots" notation */
+#ifdef LINUX
+  if ( inet_aton(addr_str, &ip) )
+  {
+    memcpy(dest, &ip.s_addr, sizeof(ip.s_addr));
+  }
+#else
+  if ( (*dest = inet_addr(addr_str)) != -1)
+  {
+    /* nothing */
+  }
+#endif
+  else   /* if it fails, do a gethostbyname() */
+  {
+    struct hostent *host;
+    if ((host = gethostbyname(addr_str)) == NULL)
+    {
+      switch(h_errno)
+      {
+      case HOST_NOT_FOUND:
+	strcpy(errstr, "HOST_NOT_FOUND");
+	break;
+
+      case NO_ADDRESS:
+	strcpy(errstr, "NO_ADDRESS");
+	break;
+
+      case NO_RECOVERY:
+	strcpy(errstr, "NO_RECOVERY");
+	break;
+
+      case TRY_AGAIN:
+	strcpy(errstr, "TRY_AGAIN");
+	break;
+      }
+      
+      debug(DEBUG_ERROR, "gethostbyname failed for %s: ", addr_str, errstr);
+
+      retval = -1;
+    }
+    
+    memcpy(dest, host->h_addr_list[0], sizeof(unsigned long));
+  }
+  
+  
+  return retval;
+}
+
+int tcp_get_local_address(int sockfd, unsigned int *ip, unsigned short *port)
+{
+    struct sockaddr_in addr;
+    int addrlen = sizeof(struct sockaddr_in);
+  
+    if(getsockname(sockfd, (struct sockaddr *)&addr, &addrlen) < 0)
+    {
+	debug(DEBUG_SYSERROR, "getsockname failed" );  
+	return -1;
+    }
+
+    *ip = ntohl( addr.sin_addr.s_addr );
+    *port = ntohs( addr.sin_port );
+  
+    return 0;
+}
diff --git a/cbtcommon/tcpsocket.h b/cbtcommon/tcpsocket.h
new file mode 100644
index 0000000..ac606a5
--- /dev/null
+++ b/cbtcommon/tcpsocket.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2001, 2002, 2003 David Mansfield and Cobite, Inc.
+ * See COPYING file for license information 
+ */
+
+#ifndef _TCPSOCKET_H
+#define _TCPSOCKET_H
+
+#ifdef __cplusplus
+extern "C" 
+{
+#endif
+
+#ifndef LISTEN_QUEUE_SIZE
+#define LISTEN_QUEUE_SIZE 5
+#endif
+
+#define REUSE_ADDR        1
+#define NO_REUSE_ADDR     0
+
+int tcp_create_socket(int reuse_addr);
+int tcp_bind_and_listen(int sockfd, unsigned short tcpport);
+int tcp_accept_connection(int sockfd);
+unsigned int tcp_get_client_ip(int fd);
+int tcp_connect(int sockfd, const char *rem_addr, unsigned short port);
+int convert_address(long *dest, const char *addr_str);
+int tcp_get_local_address(int sockfd, unsigned int *, unsigned short *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* TCPSOCKET_H */
diff --git a/cbtcommon/text_util.c b/cbtcommon/text_util.c
new file mode 100644
index 0000000..052a94b
--- /dev/null
+++ b/cbtcommon/text_util.c
@@ -0,0 +1,317 @@
+/*
+ * Copyright 2001, 2002, 2003 David Mansfield and Cobite, Inc.
+ * See COPYING file for license information 
+ */
+
+/**
+ * Copyright (c) 1998 Cobite, Inc. All Rights Reserved.
+ * @author Karl LaRocca
+ * @created Fri Nov  6 14:33:29 1998
+ * @version $Revision: 1.9 $$Date: 2001/10/25 18:36:11 $
+ */
+#include <ctype.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "text_util.h"
+#include "rcsid.h"
+
+RCSID("$Id: text_util.c,v 1.9 2001/10/25 18:36:11 adam Exp $");
+
+char* 
+chop( char* src )
+{
+  char* p = src + strlen(src) - 1;
+
+  while( p >= src )
+  {
+    if ( *p == '\n' || *p == '\r' )
+    {
+      *p-- = 0;
+    } 
+
+    else
+    {
+      break;
+    }
+  }
+
+  return( src );
+}
+
+char*
+digits( char* src )
+{
+  char* start = src;
+  char* check = src;
+
+  while( *check )
+  {
+    if ( isdigit( *check ) )
+    {
+      *start++ = *check;
+    }
+
+    check++;
+  }
+
+  *start = 0;
+
+  return( src );
+}
+
+char* 
+lower_case( char* src )
+{
+  char* p = src;
+
+  while( *p )
+  {
+      *p = tolower( *p );
+      p++;
+  }
+
+  return( src );
+}
+
+char*
+reverse( char* src )
+{
+  int  i;
+  int  len = strlen( src );
+  char tmp;
+
+  for( i = len / 2; --i >= 0; )
+  {
+    tmp = src[ i ];
+    src[ i ] = src[ len - i - 1 ];
+    src[ len - i - 1 ] = tmp;
+  }
+
+  return( src );
+}
+
+char* 
+trim( char* src )
+{
+  char *p = src + strlen(src) - 1;
+
+  while( p >= src && isspace(*p) )
+      *p-- = '\0';
+
+  return src;
+}
+
+char* 
+upper_case( char* src )
+{
+  char* p = src;
+
+  while( *p )
+  {
+    *p = toupper(*p);
+    p++;
+  }
+
+  return( src );
+}
+
+int
+strrcmp( const char* haystack, const char* needle )
+{
+    int hlen = strlen( haystack );
+    int nlen = strlen( needle );
+    if( hlen < nlen )
+	return( -1 );
+    else 
+	return( strcmp( haystack + hlen - nlen, needle ) );
+}
+
+/* 
+ * Finding a - anywhere in the string makes it money negative. 
+ * all characters other than digits, '-', and '.' are ignored, so:
+ * ab36-.g98 = -36.98
+ * This is fair, I think, if we don't want to reject anything as 
+ * improperly formatted.
+ */
+long 
+money2cents( const char* money )
+{
+    long retval = 0;
+    int decimal_places = -1;
+    int neg = 0;
+
+    while( *money && decimal_places < 2 )
+    {
+	if ( isdigit( *money ) )
+	{
+	    if ( decimal_places >= 0 )
+		decimal_places++;
+
+	    retval *= 10;
+	    retval += (*money) - '0';
+	}
+	
+	else if ( *money == '.' )
+	    decimal_places = 0;
+
+	else if ( *money == '-' )
+	    neg = 1;
+
+	money++;
+    }
+    
+    if ( decimal_places == 1 )
+	retval *= 10;
+    
+    else if ( decimal_places <= 0 )
+	retval *= 100;
+    
+    return( neg ? -retval : retval );
+}
+
+const char* 
+cents2money( long cents )
+{
+  static char buff[ 64 ];
+  int idx = 0; 
+  char* d = buff;
+
+  if ( cents == 0 )
+  {
+    strcpy( buff, "0.00" );
+  }
+
+  else if ( cents < 100 )
+  {
+    sprintf( buff, "0.%2.2ld", cents );
+  }
+
+  else
+  {
+    while( cents > 0 )
+    {
+      *d++ = '0' + ( cents % 10 );
+      cents = cents / 10;
+      
+      if ( idx == 1 )
+      {
+	*d++ = '.';
+      }
+      
+      else if ( cents > 0 && ( idx - 1 ) % 3 == 0 )
+      {
+	*d++ = ',';
+      }
+      
+      idx++;
+    }
+
+    *d++ = 0;
+  
+    reverse( buff );
+  }
+
+  return( buff );
+}
+
+void trim_zeros_after_decimal( char* src )
+{
+    char * end = src + strlen( src ) - 1;
+
+    while( end != src )
+    {
+	if( *end == '0' )
+	    *end = 0;
+	else if( *end == '.' )
+	{
+	    *end = 0;
+	    break;
+	}
+	else
+	    break;
+
+	end--;
+    }
+}
+
+#ifdef linux
+extern void *memfrob(void *, size_t);
+#else
+static void * memfrob(void * mem, size_t len)
+{
+    size_t i;
+    char *c = (char *)mem;
+
+    for (i = 0; i < len; i++)
+    {
+	*c = *c ^ 42;
+	c++;
+    }
+
+    return mem;
+}
+#endif
+
+// simple functions to obfuscate strings in a binary
+char* frobstr( char* src )
+{
+    char* retval = (char*)malloc( strlen(src) * 2 + 1 );
+
+    memfrob( src, strlen( src ) );
+    str2hex( retval, src, 0 );
+    memfrob( src, strlen( src ) );
+
+    return( retval );
+}
+
+char* unfrobstr( char* src )
+{
+    int slen = strlen( src ) / 2;
+    char* retval = (char*)malloc( slen + 1 );
+
+    hex2str( retval, src, 0 );
+    memfrob( retval, slen );
+
+    return( retval );
+}
+
+void str2hex( char* dest, const char* src, int slen )
+{
+    int i;
+    char* p = dest;
+
+    if( slen == 0 )
+	slen = strlen( src );
+
+    for ( i = 0; i < slen; i++ )
+    {
+	sprintf( p, "%02x", src[i] );
+	p += 2;
+    }
+    
+    *p = 0;
+}
+
+void hex2str( char* dest, const char* src, int slen )
+{
+    const char* p = src;
+    int i;
+    unsigned int v;
+
+    if( slen == 0 )
+	slen = strlen( src );
+
+    slen /= 2;
+
+    for( i = 0; i < slen; i++ )
+    {
+	sscanf( p, "%02x", &v );
+	dest[i] = (char)v;
+	p += 2;
+    }
+    
+    dest[ slen ] = 0;
+}
+
diff --git a/cbtcommon/text_util.h b/cbtcommon/text_util.h
new file mode 100644
index 0000000..b22477b
--- /dev/null
+++ b/cbtcommon/text_util.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2001, 2002, 2003 David Mansfield and Cobite, Inc.
+ * See COPYING file for license information 
+ */
+
+/**
+ * Copyright (c) 1998 Cobite, Inc. All Rights Reserved.
+ * @author Karl LaRocca
+ * @created Fri Nov  6 14:48:04 1998
+ * @version $Revision: 1.4 $$Date: 2001/10/25 18:36:11 $
+ */
+#ifndef _TEXT_UTIL_H
+#define _TEXT_UTIL_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+char*       chop( char* src );
+char*       digits( char* src );
+char*       lower_case( char* src );
+char*       reverse( char* src );
+char*       trim( char* src );
+void        trim_zeros_after_decimal( char* src );
+char*       upper_case( char* src );
+int         strrcmp( const char* haystack, const char* needle );
+
+const char* cents2money( long cents );
+long        money2cents( const char* money );
+
+// these two allocate returned memory, so be sure to free it...
+char*       frobstr( char* src );
+char*       unfrobstr( char* src );
+
+void        str2hex( char* dest, const char* src, int slen );
+void        hex2str( char* dest, const char* src, int slen );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TEXT_UTIL_H */
diff --git a/cvs_direct.c b/cvs_direct.c
new file mode 100644
index 0000000..920487d
--- /dev/null
+++ b/cvs_direct.c
@@ -0,0 +1,925 @@
+/*
+ * Copyright 2001, 2002, 2003 David Mansfield and Cobite, Inc.
+ * See COPYING file for license information 
+ */
+
+#include <string.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <zlib.h>
+#include <sys/socket.h>
+#include <cbtcommon/debug.h>
+#include <cbtcommon/text_util.h>
+#include <cbtcommon/tcpsocket.h>
+#include <cbtcommon/sio.h>
+
+#include "cvs_direct.h"
+#include "util.h"
+
+#define RD_BUFF_SIZE 4096
+
+struct _CvsServerCtx 
+{
+    int read_fd;
+    int write_fd;
+    char root[PATH_MAX];
+
+    int is_pserver;
+
+    /* buffered reads from descriptor */
+    char read_buff[RD_BUFF_SIZE];
+    char * head;
+    char * tail;
+
+    int compressed;
+    z_stream zout;
+    z_stream zin;
+
+    /* when reading compressed data, the compressed data buffer */
+    char zread_buff[RD_BUFF_SIZE];
+};
+
+static void get_cvspass(char *, const char *);
+static void send_string(CvsServerCtx *, const char *, ...);
+static int read_response(CvsServerCtx *, const char *);
+static void ctx_to_fp(CvsServerCtx * ctx, FILE * fp);
+static int read_line(CvsServerCtx * ctx, char * p);
+
+static CvsServerCtx * open_ctx_pserver(CvsServerCtx *, const char *);
+static CvsServerCtx * open_ctx_forked(CvsServerCtx *, const char *);
+
+CvsServerCtx * open_cvs_server(char * p_root, int compress)
+{
+    CvsServerCtx * ctx = (CvsServerCtx*)malloc(sizeof(*ctx));
+    char root[PATH_MAX];
+    char * p = root, *tok;
+
+    if (!ctx)
+	return NULL;
+
+    ctx->head = ctx->tail = ctx->read_buff;
+    ctx->read_fd = ctx->write_fd = -1;
+    ctx->compressed = 0;
+    ctx->is_pserver = 0;
+
+    if (compress)
+    {
+	memset(&ctx->zout, 0, sizeof(z_stream));
+	memset(&ctx->zin, 0, sizeof(z_stream));
+	
+	/* 
+	 * to 'prime' the reads, make it look like there was output
+	 * room available (i.e. we have processed all pending compressed 
+	 * data
+	 */
+	ctx->zin.avail_out = 1;
+	
+	if (deflateInit(&ctx->zout, compress) != Z_OK)
+	{
+	    free(ctx);
+	    return NULL;
+	}
+	
+	if (inflateInit(&ctx->zin) != Z_OK)
+	{
+	    deflateEnd(&ctx->zout);
+	    free(ctx);
+	    return NULL;
+	}
+    }
+
+    strcpy(root, p_root);
+
+    tok = strsep(&p, ":");
+
+    /* if root string looks like :pserver:... then the first token will be empty */
+    if (strlen(tok) == 0)
+    {
+	char * method = strsep(&p, ":");
+	if (strcmp(method, "pserver") == 0)
+	{
+	    ctx = open_ctx_pserver(ctx, p);
+	}
+	else if (strstr("local:ext:fork:server", method))
+	{
+	    /* handle all of these via fork, even local */
+	    ctx = open_ctx_forked(ctx, p);
+	}
+	else
+	{
+	    debug(DEBUG_APPERROR, "cvs_direct: unsupported cvs access method: %s", method);
+	    free(ctx);
+	    ctx = NULL;
+	}
+    }
+    else
+    {
+	ctx = open_ctx_forked(ctx, p_root);
+    }
+
+    if (ctx)
+    {
+	char buff[BUFSIZ];
+
+	send_string(ctx, "Root %s\n", ctx->root);
+
+	/* this is taken from 1.11.1p1 trace - but with Mbinary removed. we can't handle it (yet!) */
+	send_string(ctx, "Valid-responses ok error Valid-requests Checked-in New-entry Checksum Copy-file Updated Created Update-existing Merged Patched Rcs-diff Mode Mod-time Removed Remove-entry Set-static-directory Clear-static-directory Set-sticky Clear-sticky Template Set-checkin-prog Set-update-prog Notified Module-expansion Wrapper-rcsOption M E F\n", ctx->root);
+
+	send_string(ctx, "valid-requests\n");
+
+	/* check for the commands we will issue */
+	read_line(ctx, buff);
+	if (strncmp(buff, "Valid-requests", 14) != 0)
+	{
+	    debug(DEBUG_APPERROR, "cvs_direct: bad response to valid-requests command");
+	    close_cvs_server(ctx);
+	    return NULL;
+	}
+
+	if (!strstr(buff, " version") ||
+	    !strstr(buff, " rlog") ||
+	    !strstr(buff, " rdiff") || 
+	    !strstr(buff, " diff") ||
+	    !strstr(buff, " co"))
+	{
+	    debug(DEBUG_APPERROR, "cvs_direct: cvs server too old for cvs_direct");
+	    close_cvs_server(ctx);
+	    return NULL;
+	}
+	
+	read_line(ctx, buff);
+	if (strcmp(buff, "ok") != 0)
+	{
+	    debug(DEBUG_APPERROR, "cvs_direct: bad ok trailer to valid-requests command");
+	    close_cvs_server(ctx);
+	    return NULL;
+	}
+
+	/* this is myterious but 'mandatory' */
+	send_string(ctx, "UseUnchanged\n");
+
+	if (compress)
+	{
+	    send_string(ctx, "Gzip-stream %d\n", compress);
+	    ctx->compressed = 1;
+	}
+
+	debug(DEBUG_APPMSG1, "cvs_direct initialized to CVSROOT %s", ctx->root);
+    }
+
+    return ctx;
+}
+
+static CvsServerCtx * open_ctx_pserver(CvsServerCtx * ctx, const char * p_root)
+{
+    char root[PATH_MAX];
+    char full_root[PATH_MAX];
+    char * p = root, *tok, *tok2;
+    char user[BUFSIZ];
+    char server[BUFSIZ];
+    char pass[BUFSIZ];
+    char port[8];
+
+    strcpy(root, p_root);
+
+    tok = strsep(&p, ":");
+    if (strlen(tok) == 0 || !p)
+    {
+	debug(DEBUG_APPERROR, "parse error on third token");
+	goto out_free_err;
+    }
+
+    tok2 = strsep(&tok, "@");
+    if (!strlen(tok2) || (!tok || !strlen(tok)))
+    {
+	debug(DEBUG_APPERROR, "parse error on user@server in pserver");
+	goto out_free_err;
+    }
+
+    strcpy(user, tok2);
+    strcpy(server, tok);
+    
+    if (*p != '/')
+    {
+	tok = strchr(p, '/');
+	if (!tok)
+	{
+	    debug(DEBUG_APPERROR, "parse error: expecting / in root");
+	    goto out_free_err;
+	}
+	
+	memset(port, 0, sizeof(port));
+	memcpy(port, p, tok - p);
+
+	p = tok;
+    }
+    else
+    {
+	strcpy(port, "2401");
+    }
+
+    /* the line from .cvspass is fully qualified, so rebuild */
+    snprintf(full_root, PATH_MAX, ":pserver:%s@%s:%s%s", user, server, port, p);
+    get_cvspass(pass, full_root);
+
+    debug(DEBUG_TCP, "user:%s server:%s port:%s pass:%s full_root:%s", user, server, port, pass, full_root);
+
+    if ((ctx->read_fd = tcp_create_socket(REUSE_ADDR)) < 0)
+	goto out_free_err;
+
+    ctx->write_fd = dup(ctx->read_fd);
+
+    if (tcp_connect(ctx->read_fd, server, atoi(port)) < 0)
+	goto out_close_err;
+    
+    send_string(ctx, "BEGIN AUTH REQUEST\n");
+    send_string(ctx, "%s\n", p);
+    send_string(ctx, "%s\n", user);
+    send_string(ctx, "%s\n", pass);
+    send_string(ctx, "END AUTH REQUEST\n");
+
+    if (!read_response(ctx, "I LOVE YOU"))
+	goto out_close_err;
+
+    strcpy(ctx->root, p);
+    ctx->is_pserver = 1;
+
+    return ctx;
+
+ out_close_err:
+    close(ctx->read_fd);
+ out_free_err:
+    free(ctx);
+    return NULL;
+}
+
+static CvsServerCtx * open_ctx_forked(CvsServerCtx * ctx, const char * p_root)
+{
+    char root[PATH_MAX];
+    char * p = root, *tok, *tok2, *rep;
+    char execcmd[PATH_MAX];
+    int to_cvs[2];
+    int from_cvs[2];
+    pid_t pid;
+    const char * cvs_server = getenv("CVS_SERVER");
+
+    if (!cvs_server)
+	cvs_server = "cvs";
+
+    strcpy(root, p_root);
+
+    /* if there's a ':', it's remote */
+    tok = strsep(&p, ":");
+
+    if (p)
+    {
+	const char * cvs_rsh = getenv("CVS_RSH");
+
+	if (!cvs_rsh)
+	    cvs_rsh = "rsh";
+
+	tok2 = strsep(&tok, "@");
+
+	if (tok)
+	    snprintf(execcmd, PATH_MAX, "%s -l %s %s %s server", cvs_rsh, tok2, tok, cvs_server);
+	else
+	    snprintf(execcmd, PATH_MAX, "%s %s %s server", cvs_rsh, tok2, cvs_server);
+
+	rep = p;
+    }
+    else
+    {
+	snprintf(execcmd, PATH_MAX, "%s server", cvs_server);
+	rep = tok;
+    }
+
+    if (pipe(to_cvs) < 0)
+    {
+	debug(DEBUG_SYSERROR, "cvs_direct: failed to create pipe to_cvs");
+	goto out_free_err;
+    }
+
+    if (pipe(from_cvs) < 0)
+    {
+	debug(DEBUG_SYSERROR, "cvs_direct: failed to create pipe from_cvs");
+	goto out_close_err;
+    }
+
+    debug(DEBUG_TCP, "forked cmdline: %s", execcmd);
+
+    if ((pid = fork()) < 0)
+    {
+	debug(DEBUG_SYSERROR, "cvs_direct: can't fork");
+	goto out_close2_err;
+    }
+    else if (pid == 0) /* child */
+    {
+	char * argp[4];
+	argp[0] = "sh";
+	argp[1] = "-c";
+	argp[2] = execcmd;
+	argp[3] = NULL;
+
+	close(to_cvs[1]);
+	close(from_cvs[0]);
+	
+	close(0);
+	dup(to_cvs[0]);
+	close(1);
+	dup(from_cvs[1]);
+
+	execv("/bin/sh",argp);
+
+	debug(DEBUG_APPERROR, "cvs_direct: fatal: shouldn't be reached");
+	exit(1);
+    }
+
+    close(to_cvs[0]);
+    close(from_cvs[1]);
+    ctx->read_fd = from_cvs[0];
+    ctx->write_fd = to_cvs[1];
+
+    strcpy(ctx->root, rep);
+
+    return ctx;
+
+ out_close2_err:
+    close(from_cvs[0]);
+    close(from_cvs[1]);
+ out_close_err:
+    close(to_cvs[0]);
+    close(to_cvs[1]);
+ out_free_err:
+    free(ctx);
+    return NULL;
+}
+
+void close_cvs_server(CvsServerCtx * ctx)
+{
+    /* FIXME: some sort of flushing should be done for non-compressed case */
+
+    if (ctx->compressed)
+    {
+	int ret, len;
+	char buff[BUFSIZ];
+
+	/* 
+	 * there shouldn't be anything left, but we do want
+	 * to send an 'end of stream' marker, (if such a thing
+	 * actually exists..)
+	 */
+	do
+	{
+	    ctx->zout.next_out = buff;
+	    ctx->zout.avail_out = BUFSIZ;
+	    ret = deflate(&ctx->zout, Z_FINISH);
+
+	    if ((ret == Z_OK || ret == Z_STREAM_END) && ctx->zout.avail_out != BUFSIZ)
+	    {
+		len = BUFSIZ - ctx->zout.avail_out;
+		if (writen(ctx->write_fd, buff, len) != len)
+		    debug(DEBUG_APPERROR, "cvs_direct: zout: error writing final state");
+		    
+		//hexdump(buff, len, "cvs_direct: zout: sending unsent data");
+	    }
+	} while (ret == Z_OK);
+
+	if ((ret = deflateEnd(&ctx->zout)) != Z_OK)
+	    debug(DEBUG_APPERROR, "cvs_direct: zout: deflateEnd error: %s: %s", 
+		  (ret == Z_STREAM_ERROR) ? "Z_STREAM_ERROR":"Z_DATA_ERROR", ctx->zout.msg);
+    }
+    
+    /* we're done writing now */
+    debug(DEBUG_TCP, "cvs_direct: closing cvs server write connection %d", ctx->write_fd);
+    close(ctx->write_fd);
+
+    /* 
+     * if this is pserver, then read_fd is a bi-directional socket.
+     * we want to shutdown the write side, just to make sure the 
+     * server get's eof
+     */
+    if (ctx->is_pserver)
+    {
+	debug(DEBUG_TCP, "cvs_direct: shutdown on read socket");
+	if (shutdown(ctx->read_fd, SHUT_WR) < 0)
+	    debug(DEBUG_SYSERROR, "cvs_direct: error with shutdown on pserver socket");
+    }
+
+    if (ctx->compressed)
+    {
+	int ret = Z_OK, len, eof = 0;
+	char buff[BUFSIZ];
+
+	/* read to the 'eof'/'eos' marker.  there are two states we 
+	 * track, looking for Z_STREAM_END (application level EOS)
+	 * and EOF on socket.  Both should happen at the same time,
+	 * but we need to do the read first, the first time through
+	 * the loop, but we want to do one read after getting Z_STREAM_END
+	 * too.  so this loop has really ugly exit conditions.
+	 */
+	for(;;)
+	{
+	    /*
+	     * if there's nothing in the avail_in, and we
+	     * inflated everything last pass (avail_out != 0)
+	     * then slurp some more from the descriptor, 
+	     * if we get EOF, exit the loop
+	     */
+	    if (ctx->zin.avail_in == 0 && ctx->zin.avail_out != 0)
+	    {
+		debug(DEBUG_TCP, "cvs_direct: doing final slurp");
+		len = read(ctx->read_fd, ctx->zread_buff, RD_BUFF_SIZE);
+		debug(DEBUG_TCP, "cvs_direct: did final slurp: %d", len);
+
+		if (len <= 0)
+		{
+		    eof = 1;
+		    break;
+		}
+
+		/* put the data into the inflate input stream */
+		ctx->zin.next_in = ctx->zread_buff;
+		ctx->zin.avail_in = len;
+	    }
+
+	    /* 
+	     * if the last time through we got Z_STREAM_END, and we 
+	     * get back here, it means we should've gotten EOF but
+	     * didn't
+	     */
+	    if (ret == Z_STREAM_END)
+		break;
+
+	    ctx->zin.next_out = buff;
+	    ctx->zin.avail_out = BUFSIZ;
+
+	    ret = inflate(&ctx->zin, Z_SYNC_FLUSH);
+	    len = BUFSIZ - ctx->zin.avail_out;
+	    
+	    if (ret == Z_BUF_ERROR)
+		debug(DEBUG_APPERROR, "Z_BUF_ERROR");
+
+	    if (ret == Z_OK && len == 0)
+		debug(DEBUG_TCP, "cvs_direct: no data out of inflate");
+
+	    if (ret == Z_STREAM_END)
+		debug(DEBUG_TCP, "cvs_direct: got Z_STREAM_END");
+
+	    if ((ret == Z_OK || ret == Z_STREAM_END) && len > 0)
+		hexdump(buff, BUFSIZ - ctx->zin.avail_out, "cvs_direct: zin: unread data at close");
+	}
+
+	if (ret != Z_STREAM_END)
+	    debug(DEBUG_APPERROR, "cvs_direct: zin: Z_STREAM_END not encountered (premature EOF?)");
+
+	if (eof == 0)
+	    debug(DEBUG_APPERROR, "cvs_direct: zin: EOF not encountered (premature Z_STREAM_END?)");
+
+	if ((ret = inflateEnd(&ctx->zin)) != Z_OK)
+	    debug(DEBUG_APPERROR, "cvs_direct: zin: inflateEnd error: %s: %s", 
+		  (ret == Z_STREAM_ERROR) ? "Z_STREAM_ERROR":"Z_DATA_ERROR", ctx->zin.msg ? ctx->zin.msg : "");
+    }
+
+    debug(DEBUG_TCP, "cvs_direct: closing cvs server read connection %d", ctx->read_fd);
+    close(ctx->read_fd);
+
+    free(ctx);
+}
+
+static void get_cvspass(char * pass, const char * root)
+{
+    char cvspass[PATH_MAX];
+    const char * home;
+    FILE * fp;
+
+    pass[0] = 0;
+
+    if (!(home = getenv("HOME")))
+    {
+	debug(DEBUG_APPERROR, "HOME environment variable not set");
+	exit(1);
+    }
+
+    if (snprintf(cvspass, PATH_MAX, "%s/.cvspass", home) >= PATH_MAX)
+    {
+	debug(DEBUG_APPERROR, "prefix buffer overflow");
+	exit(1);
+    }
+    
+    if ((fp = fopen(cvspass, "r")))
+    {
+	char buff[BUFSIZ];
+	int len = strlen(root);
+
+	while (fgets(buff, BUFSIZ, fp))
+	{
+	    /* FIXME: what does /1 mean? */
+	    if (strncmp(buff, "/1 ", 3) != 0)
+		continue;
+
+	    if (strncmp(buff + 3, root, len) == 0)
+	    {
+		strcpy(pass, buff + 3 + len + 1);
+		chop(pass);
+		break;
+	    }
+		
+	}
+	fclose(fp);
+    }
+
+    if (!pass[0])
+	pass[0] = 'A';
+}
+
+static void send_string(CvsServerCtx * ctx, const char * str, ...)
+{
+    int len;
+    char buff[BUFSIZ];
+    va_list ap;
+
+    va_start(ap, str);
+
+    len = vsnprintf(buff, BUFSIZ, str, ap);
+    if (len >= BUFSIZ)
+    {
+	debug(DEBUG_APPERROR, "cvs_direct: command send string overflow");
+	exit(1);
+    }
+
+    if (ctx->compressed)
+    {
+	char zbuff[BUFSIZ];
+
+	if  (ctx->zout.avail_in != 0)
+	{
+	    debug(DEBUG_APPERROR, "cvs_direct: zout: last output command not flushed");
+	    exit(1);
+	}
+
+	ctx->zout.next_in = buff;
+	ctx->zout.avail_in = len;
+	ctx->zout.avail_out = 0;
+
+	while (ctx->zout.avail_in > 0 || ctx->zout.avail_out == 0)
+	{
+	    int ret;
+
+	    ctx->zout.next_out = zbuff;
+	    ctx->zout.avail_out = BUFSIZ;
+	    
+	    /* FIXME: for the arguments before a command, flushing is counterproductive */
+	    ret = deflate(&ctx->zout, Z_SYNC_FLUSH);
+	    
+	    if (ret == Z_OK)
+	    {
+		len = BUFSIZ - ctx->zout.avail_out;
+		
+		if (writen(ctx->write_fd, zbuff, len) != len)
+		{
+		    debug(DEBUG_SYSERROR, "cvs_direct: zout: can't write");
+		    exit(1);
+		}
+	    }
+	    else
+	    {
+		debug(DEBUG_APPERROR, "cvs_direct: zout: error %d %s", ret, ctx->zout.msg);
+	    }
+	}
+    }
+    else
+    {
+	if (writen(ctx->write_fd, buff, len)  != len)
+	{
+	    debug(DEBUG_SYSERROR, "cvs_direct: can't send command");
+	    exit(1);
+	}
+    }
+
+    debug(DEBUG_TCP, "string: '%s' sent", buff);
+}
+
+static int refill_buffer(CvsServerCtx * ctx)
+{
+    int len;
+
+    if (ctx->head != ctx->tail)
+    {
+	debug(DEBUG_APPERROR, "cvs_direct: refill_buffer called on non-empty buffer");
+	exit(1);
+    }
+
+    ctx->head = ctx->read_buff;
+    len = RD_BUFF_SIZE;
+	
+    if (ctx->compressed)
+    {
+	int zlen, ret;
+
+	/* if there was leftover buffer room, it's time to slurp more data */
+	do 
+	{
+	    if (ctx->zin.avail_out > 0)
+	    {
+		if (ctx->zin.avail_in != 0)
+		{
+		    debug(DEBUG_APPERROR, "cvs_direct: zin: expect 0 avail_in");
+		    exit(1);
+		}
+		zlen = read(ctx->read_fd, ctx->zread_buff, RD_BUFF_SIZE);
+		ctx->zin.next_in = ctx->zread_buff;
+		ctx->zin.avail_in = zlen;
+	    }
+	    
+	    ctx->zin.next_out = ctx->head;
+	    ctx->zin.avail_out = len;
+	    
+	    /* FIXME: we don't always need Z_SYNC_FLUSH, do we? */
+	    ret = inflate(&ctx->zin, Z_SYNC_FLUSH);
+	}
+	while (ctx->zin.avail_out == len);
+
+	if (ret == Z_OK)
+	{
+	    ctx->tail = ctx->head + (len - ctx->zin.avail_out);
+	}
+	else
+	{
+	    debug(DEBUG_APPERROR, "cvs_direct: zin: error %d %s", ret, ctx->zin.msg);
+	    exit(1);
+	}
+    }
+    else
+    {
+	len = read(ctx->read_fd, ctx->head, len);
+	ctx->tail = (len <= 0) ? ctx->head : ctx->head + len;
+    }
+
+    return len;
+}
+
+static int read_line(CvsServerCtx * ctx, char * p)
+{
+    int len = 0;
+    while (1)
+    {
+	if (ctx->head == ctx->tail)
+	    if (refill_buffer(ctx) <= 0)
+		return -1;
+
+	*p = *ctx->head++;
+
+	if (*p == '\n')
+	{
+	    *p = 0;
+	    break;
+	}
+	p++;
+	len++;
+    }
+
+    return len;
+}
+
+static int read_response(CvsServerCtx * ctx, const char * str)
+{
+    /* FIXME: more than 1 char at a time */
+    char resp[BUFSIZ];
+
+    if (read_line(ctx, resp) < 0)
+	return 0;
+
+    debug(DEBUG_TCP, "response '%s' read", resp);
+
+    return (strcmp(resp, str) == 0);
+}
+
+static void ctx_to_fp(CvsServerCtx * ctx, FILE * fp)
+{
+    char line[BUFSIZ];
+
+    while (1)
+    {
+	read_line(ctx, line);
+	debug(DEBUG_TCP, "ctx_to_fp: %s", line);
+	if (memcmp(line, "M ", 2) == 0)
+	{
+	    if (fp)
+		fprintf(fp, "%s\n", line + 2);
+	}
+	else if (memcmp(line, "E ", 2) == 0)
+	{
+	    debug(DEBUG_APPMSG1, "%s", line + 2);
+	}
+	else if (strncmp(line, "ok", 2) == 0 || strncmp(line, "error", 5) == 0)
+	{
+	    break;
+	}
+    }
+
+    if (fp)
+	fflush(fp);
+}
+
+void cvs_rdiff(CvsServerCtx * ctx, 
+	       const char * rep, const char * file, 
+	       const char * rev1, const char * rev2)
+{
+    /* NOTE: opts are ignored for rdiff, '-u' is always used */
+
+    send_string(ctx, "Argument -u\n");
+    send_string(ctx, "Argument -r\n");
+    send_string(ctx, "Argument %s\n", rev1);
+    send_string(ctx, "Argument -r\n");
+    send_string(ctx, "Argument %s\n", rev2);
+    send_string(ctx, "Argument %s%s\n", rep, file);
+    send_string(ctx, "rdiff\n");
+
+    ctx_to_fp(ctx, stdout);
+}
+
+void cvs_rupdate(CvsServerCtx * ctx, const char * rep, const char * file, const char * rev, int create, const char * opts)
+{
+    FILE * fp;
+    char cmdbuff[BUFSIZ];
+    
+    snprintf(cmdbuff, BUFSIZ, "diff %s %s /dev/null %s | sed -e '%s s|^\\([+-][+-][+-]\\) -|\\1 %s/%s|g'",
+	     opts, create?"":"-", create?"-":"", create?"2":"1", rep, file);
+
+    debug(DEBUG_TCP, "cmdbuff: %s", cmdbuff);
+
+    if (!(fp = popen(cmdbuff, "w")))
+    {
+	debug(DEBUG_APPERROR, "cvs_direct: popen for diff failed: %s", cmdbuff);
+	exit(1);
+    }
+
+    send_string(ctx, "Argument -p\n");
+    send_string(ctx, "Argument -r\n");
+    send_string(ctx, "Argument %s\n", rev);
+    send_string(ctx, "Argument %s/%s\n", rep, file);
+    send_string(ctx, "co\n");
+
+    ctx_to_fp(ctx, fp);
+
+    pclose(fp);
+}
+
+static int parse_patch_arg(char * arg, char ** str)
+{
+    char *tok, *tok2 = "";
+    tok = strsep(str, " ");
+    if (!tok)
+	return 0;
+
+    if (!*tok == '-')
+    {
+	debug(DEBUG_APPERROR, "diff_opts parse error: no '-' starting argument: %s", *str);
+	return 0;
+    }
+    
+    /* if it's not 'long format' argument, we can process it efficiently */
+    if (tok[1] == '-')
+    {
+	debug(DEBUG_APPERROR, "diff_opts parse_error: long format args not supported");
+	return 0;
+    }
+
+    /* see if command wants two args and they're separated by ' ' */
+    if (tok[2] == 0 && strchr("BdDFgiorVxYz", tok[1]))
+    {
+	tok2 = strsep(str, " ");
+	if (!tok2)
+	{
+	    debug(DEBUG_APPERROR, "diff_opts parse_error: argument %s requires two arguments", tok);
+	    return 0;
+	}
+    }
+    
+    snprintf(arg, 32, "%s%s", tok, tok2);
+    return 1;
+}
+
+void cvs_diff(CvsServerCtx * ctx, 
+	       const char * rep, const char * file, 
+	       const char * rev1, const char * rev2, const char * opts)
+{
+    char argstr[BUFSIZ], *p = argstr;
+    char arg[32];
+    char file_buff[PATH_MAX], *basename;
+
+    strzncpy(argstr, opts, BUFSIZ);
+    while (parse_patch_arg(arg, &p))
+	send_string(ctx, "Argument %s\n", arg);
+
+    send_string(ctx, "Argument -r\n");
+    send_string(ctx, "Argument %s\n", rev1);
+    send_string(ctx, "Argument -r\n");
+    send_string(ctx, "Argument %s\n", rev2);
+
+    /* 
+     * we need to separate the 'basename' of file in order to 
+     * generate the Directory directive(s)
+     */
+    strzncpy(file_buff, file, PATH_MAX);
+    if ((basename = strrchr(file_buff, '/')))
+    {
+	*basename = 0;
+	send_string(ctx, "Directory %s/%s\n", rep, file_buff);
+	send_string(ctx, "%s/%s/%s\n", ctx->root, rep, file_buff);
+    }
+    else
+    {
+	send_string(ctx, "Directory %s\n", rep, file_buff);
+	send_string(ctx, "%s/%s\n", ctx->root, rep);
+    }
+
+    send_string(ctx, "Directory .\n");
+    send_string(ctx, "%s\n", ctx->root);
+    send_string(ctx, "Argument %s/%s\n", rep, file);
+    send_string(ctx, "diff\n");
+
+    ctx_to_fp(ctx, stdout);
+}
+
+/*
+ * FIXME: the design of this sucks.  It was originally designed to fork a subprocess
+ * which read the cvs response and send it back through a pipe the main process,
+ * which fdopen(3)ed the other end, and juts used regular fgets.  This however
+ * didn't work because the reads of compressed data in the child process altered
+ * the compression state, and there was no way to resynchronize that state with
+ * the parent process.  We could use threads...
+ */
+FILE * cvs_rlog_open(CvsServerCtx * ctx, const char * rep, const char * date_str)
+{
+    /* note: use of the date_str is handled in a non-standard, cvsps specific way */
+    if (date_str && date_str[0])
+    {
+	send_string(ctx, "Argument -d\n", rep);
+	send_string(ctx, "Argument %s<1 Jan 2038 05:00:00 -0000\n", date_str);
+	send_string(ctx, "Argument -d\n", rep);
+	send_string(ctx, "Argument %s\n", date_str);
+    }
+
+    send_string(ctx, "Argument %s\n", rep);
+    send_string(ctx, "rlog\n");
+
+    /*
+     * FIXME: is it possible to create a 'fake' FILE * whose 'refill'
+     * function is below?
+     */
+    return (FILE*)ctx;
+}
+
+char * cvs_rlog_fgets(char * buff, int buflen, CvsServerCtx * ctx)
+{
+    char lbuff[BUFSIZ];
+    int len;
+
+    len = read_line(ctx, lbuff);
+    debug(DEBUG_TCP, "cvs_direct: rlog: read %s", lbuff);
+
+    if (memcmp(lbuff, "M ", 2) == 0)
+    {
+	memcpy(buff, lbuff + 2, len - 2);
+	buff[len - 2 ] = '\n';
+	buff[len - 1 ] = 0;
+    }
+    else if (memcmp(lbuff, "E ", 2) == 0)
+    {
+	debug(DEBUG_APPMSG1, "%s", lbuff + 2);
+    }
+    else if (strcmp(lbuff, "ok") == 0 ||strcmp(lbuff, "error") == 0)
+    {
+	debug(DEBUG_TCP, "cvs_direct: rlog: got command completion");
+	return NULL;
+    }
+
+    return buff;
+}
+
+void cvs_rlog_close(CvsServerCtx * ctx)
+{
+}
+
+void cvs_version(CvsServerCtx * ctx, char * client_version, char * server_version)
+{
+    char lbuff[BUFSIZ];
+    strcpy(client_version, "Client: Concurrent Versions System (CVS) 99.99.99 (client/server) cvs-direct");
+    send_string(ctx, "version\n");
+    read_line(ctx, lbuff);
+    if (memcmp(lbuff, "M ", 2) == 0)
+	sprintf(server_version, "Server: %s", lbuff + 2);
+    else
+	debug(DEBUG_APPERROR, "cvs_direct: didn't read version: %s", lbuff);
+    
+    read_line(ctx, lbuff);
+    if (strcmp(lbuff, "ok") != 0)
+	debug(DEBUG_APPERROR, "cvs_direct: protocol error reading version");
+
+    debug(DEBUG_TCP, "cvs_direct: client version %s", client_version);
+    debug(DEBUG_TCP, "cvs_direct: server version %s", server_version);
+}
diff --git a/cvs_direct.h b/cvs_direct.h
new file mode 100644
index 0000000..52a81a3
--- /dev/null
+++ b/cvs_direct.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright 2001, 2002, 2003 David Mansfield and Cobite, Inc.
+ * See COPYING file for license information 
+ */
+
+#ifndef CVS_DIRECT_H
+#define CVS_DIRECT_H
+
+#ifndef HAVE_CVSSERVERCTX_DEF
+#define HAVE_CVSSERVERCTX_DEF
+typedef struct _CvsServerCtx CvsServerCtx;
+#endif
+
+CvsServerCtx * open_cvs_server(char * root, int);
+void close_cvs_server(CvsServerCtx*);
+void cvs_rdiff(CvsServerCtx *, const char *, const char *, const char *, const char *);
+void cvs_rupdate(CvsServerCtx *, const char *, const char *, const char *, int, const char *);
+void cvs_diff(CvsServerCtx *, const char *, const char *, const char *, const char *, const char *);
+FILE * cvs_rlog_open(CvsServerCtx *, const char *, const char *);
+char * cvs_rlog_fgets(char *, int, CvsServerCtx *);
+void cvs_rlog_close(CvsServerCtx *);
+void cvs_version(CvsServerCtx *, char *, char *);
+
+#endif /* CVS_DIRECT_H */
diff --git a/cvsps.1 b/cvsps.1
new file mode 100644
index 0000000..cea0faf
--- /dev/null
+++ b/cvsps.1
@@ -0,0 +1,205 @@
+.TH "cvsps" 1
+.SH NAME
+CVSps \- create patchset information from CVS
+.SH SYNOPSIS
+.B cvsps
+[\-h] [\-x] [\-u] [\-z <fuzz>] [\-g] [\-s <patchset>] [\-a <author>] [\-f <file>] [\-d <date1> [\-d <date2>]] [\-l <text>] [\-b <branch>] [\-r <tag> [\-r <tag>]] [\-p <directory>] [\-v] [\-t] [\-\-norc] [\-\-summary\-first] [\-\-test\-log <filename>] [\-\-bkcvs] [\-\-no\-rlog] [\-\-diff\-opts <option string>] [\-\-cvs\-direct] [\-\-debuglvl <bitmask>] [\-Z <compression>] [\-\-root <cvsroot>] [\-q] [\-A] [<repository>] 
+.SH DESCRIPTION
+CVSps is a program for generating 'patchset' information from a CVS
+repository.  A patchset in this case is defined as a set of changes made
+to a collection of files, and all committed at the same time (using a
+single 'cvs commit' command).  This information is valuable to seeing the
+big picture of the evolution of a cvs project.  While cvs tracks revision
+information, it is often difficult to see what changes were committed
+'atomically' to the repository.
+.SH OPTIONS
+.TP
+.B \-h
+display usage summary
+.TP
+.B \-x
+ignore (and rebuild) ~/.cvsps/cvsps.cache file
+.TP
+.B \-u
+update ~/.cvsps/cvsps.cache file
+.TP
+.B \-z <fuzz>
+set the timestamp fuzz factor for identifying patch sets
+.TP
+.B \-g
+generate diffs of the selected patch sets
+.TP
+.B \-s <patchset>[\-[<patchset>]][,<patchset>...]
+generate a diff for a given patchsets and patchset ranges
+.TP
+.B \-a <author>
+restrict output to patchsets created by author
+.TP
+.B \-f <file>
+restrict output to patchsets involving file
+.TP
+.B \-d <date1> \-d <date2>
+if just one date specified, show
+revisions newer than date1.  If two dates specified,
+show revisions between two dates.
+.TP
+.B \-l <regex>
+restrict output to patchsets matching regex in log message
+.TP
+.B \-b <branch>
+restrict output to patchsets affecting history of branch.
+If you want to restrict to the main branch, use a branch of 'HEAD'.
+.TP
+.B \-r <tag1> \-r <tag2>
+if just one tag specified, show
+revisions since tag1. If two tags specified, show
+revisions between the two tags.
+.TP
+.B \-p <dir>
+output individual patchsets as files in <dir> as <dir>/<patchset>.patch
+.TP
+.B \-v
+show very verbose parsing messages
+.TP
+.B \-t
+show some brief memory usage statistics
+.TP
+.B \-\-norc
+when invoking cvs, ignore the .cvsrc file
+.TP
+.B \-\-summary\-first
+when multiple patchset diffs are being generated, put the patchset
+summary for all patchsets at the beginning of the output.
+.TP
+.B \-\-test\-log <captured cvs log file>
+for testing changes, you can capture cvs log output, then test against
+this captured file instead of hammering some poor CVS server
+.TP
+.B \-\-bkcvs
+(see note below) for use in parsing the BK\->CVS tree log formats only.  This enables
+some hacks which are not generally applicable.
+.TP
+.B \-\-no\-rlog
+disable the use of rlog internally.  Note: rlog is
+required for stable PatchSet numbering.  Use with care.
+.TP
+.B \-\-diffs\-opts <option string>
+send a custom set of options to diff, for example to increase
+the number of context lines, or change the diff format.
+.TP
+.B \-\-cvs\-direct (\-\-no\-cvs\-direct)
+enable (disable) built\-in cvs client code. This enables the 'pipelining' of multiple
+requests over a single client, reducing the overhead of handshaking and
+authentication to one per PatchSet instead of one per file.
+.TP
+.B \-\-debuglvl <bitmask>
+enable various debug output channels.
+.TP
+.B \-Z <compression>
+A value 1\-9 which specifies amount of compression.  A value of 0 disables compression.
+.TP
+.B \-\-root <cvsroot>
+Override the setting of CVSROOT (overrides working dir. and environment).  For --cvs-direct only.
+.TP
+.B \-q
+Be quiet about warnings.
+.B \-A
+Show ancestor branch when a new branch is found.
+.TP
+.B \<repository>
+Operate on the specified repository (overrides working dir.)
+.SH "NOTE ON TAG HANDLING"
+Tags are fundamentally 'file at a time' in cvs, but like everything else,
+it would be nice to imagine that they are 'repository at a time.'  The
+approach cvsps takes is that a tag is assigned to a patchset.  The meaning
+of this is that after this patchset, every revision of every file is after
+the tag (and conversely, before this patchset, at least one file is still
+before the tag).  However, there are two kinds of inconsistent (or 'funky')
+tags that can be created, even when following best practices for cvs.  
+.PP
+The first
+is what is called a FUNKY tag.  A funky tag is one where there are patchsets
+which are chronologically (and thus by patchset id) earlier than the tag, but
+are tagwise after.  These tags will be marked as '**FUNKY**' in the Tag: section
+of the cvsps output.  When a funky tag is specified as one of the '\-r' arguments,
+there are some number of patchsets which need to be considered out of sequence.  
+In this case, the patchsets themselves will be labeled FUNKY and will be processed
+correctly.
+.PP
+The second is called an INVALID tag.  An invalid tag is a tag where there are
+patchsets which are chronologically (and thus by patchset id) earlier than the tag,
+but which have members which are tagwise both before, and after the tag, in the
+same patchset.  If an INVALID tag is specified as one of the '\-r' arguments,
+cvsps will flag each member of the affected patchsets as before or after the tag
+and the patchset summary will indicate which members are which, and diffs will 
+be generated accordingly.
+.SH "NOTE ON CVS VERSIONS"
+Among the different cvs subcommands used by cvsps is the 'rlog' command.  The
+rlog command is used to get revision history of a module, and it disregards
+the current working directory.  The important difference between 'rlog' and 'log'
+(from cvsps perspective) is the 'rlog' will include log data for files not in
+the current working directory.  The impact of this is mainly when there are 
+directories which at one time had files, but are now empty, and have been pruned
+from the working directory with the '\-P' option.  If 'rlog' is not used, these
+files logs will not be parsed, and the PatchSet numbering will be unstable.
+.PP
+The main problem with 'rlog' is that, until cvs version 1.11.1, 'rlog' was an
+alias for the 'log' command.  This means, for old versions of cvs, 'rlog' has
+different semantics and usage.  cvsps will attempt to work around this problem
+by detecting capable versions of cvs.  If an old version is detected, 'log' will
+be used instead of 'rlog', and YMMV.
+.SH "NOTE ON GENERATED DIFFS"
+Another important note is that cvsps will attempt, whenever possible, to use the
+r\-commands (rlog, rdiff  and co) instead of the local commands (log, diff, and update).
+This is to allow cvsps to function without a completely checked out tree.  Because
+these r\-commands are used, the generated diffs will include the module directory in 
+them, and it is recommended to apply them in the working directory with the \-p1 option
+to the patch command.  However, if the \-\-diff\-opts option is specified (to change, for 
+example, the lines of context), then rdiff cannot be used, because it doesn't support
+arbitrary options.  In this case, the patches will be generated without the module
+directory in the path, and \-p0 will be required when applying the patch.  When 
+diffs are generated in cvs\-direct mode (see below), however, they will always
+be \-p1 style patches.
+.SH "NOTE ON BKCVS"
+The \-\-bkcvs option is a special operating mode that should only be used when parsing
+the log files from the BK \-> CVS exported linux kernel trees.  cvsps uses special
+semantics for recreating the BK ChangeSet metadata that has been embedded in the log
+files for those trees.  The \-\-bkcvs option should only be specified when the cache
+file is being created or updated (i.e. initial run of cvsps, or when \-u and \-x options
+are used).
+.SH "NOTE ON CVS\-DIRECT"
+As of version 2.0b6 cvsps has a partial implementation of the cvs client code built 
+in.  This reduces the RTT and/or handshaking overhead from one per patchset member
+to one per patchset.  This dramatically increases the speed of generating diffs
+over a slow link, and improves the consistency of operation.  Currently the \-\-cvs\-direct
+option turns on the use of this code, but it very well may be default by the time
+2.0 comes out.  The built\-in cvs code attempts to be compatible with cvs, but may
+have problems, which should be reported.  It honors the CVS_RSH and CVS_SERVER 
+environment variables, but does not parse the ~/.cvsrc file.
+.SH "NOTE ON CVSPS RC FILE"
+CVSps parses an rc file at startup.  This file should be located in ~/.cvsps/cvspsrc.
+The file should contain arguments, in the exact syntax as the command line, one per line.
+If an argument takes a parameter, the parameter should be on the same line as the argument.
+.SH "NOTE ON DATE FORMATS"
+All dates are reported in localtime.  This can be overridden (as usual) using the TZ
+environment variable.  Dates as arguments must be in the format 'yyyy/mm/dd hh:mm:ss'; for example,
+.IP "" 4
+$ cvsps -d '2004/05/01 00:00:00' -d '2004/07/07 12:00:00'
+.SH "SEE ALSO"
+.BR cvs ( 1 ),
+.BR ci ( 1 ),
+.BR co ( 1 ),
+.BR cvs ( 5 ),
+.BR cvsbug ( 8 ),
+.BR diff ( 1 ),
+.BR grep ( 1 ),
+.BR patch ( 1 ),
+.BR rcs ( 1 ),
+.BR rcsdiff ( 1 ),
+.BR rcsmerge ( 1 ),
+.BR rlog ( 1 ).
+.SH "REPORTING BUGS"
+Report bugs to "David Mansfield <cvsps@dm.cobite.com>"
+.SH BUGS
+No known bugs.
+
diff --git a/cvsps.c b/cvsps.c
new file mode 100644
index 0000000..1e64e3c
--- /dev/null
+++ b/cvsps.c
@@ -0,0 +1,2619 @@
+/*
+ * Copyright 2001, 2002, 2003 David Mansfield and Cobite, Inc.
+ * See COPYING file for license information 
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <unistd.h>
+#include <search.h>
+#include <time.h>
+#include <ctype.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <regex.h>
+#include <sys/wait.h> /* for WEXITSTATUS - see system(3) */
+
+#include <cbtcommon/hash.h>
+#include <cbtcommon/list.h>
+#include <cbtcommon/text_util.h>
+#include <cbtcommon/debug.h>
+#include <cbtcommon/rcsid.h>
+
+#include "cache.h"
+#include "cvsps_types.h"
+#include "cvsps.h"
+#include "util.h"
+#include "stats.h"
+#include "cap.h"
+#include "cvs_direct.h"
+#include "list_sort.h"
+
+RCSID("$Id: cvsps.c,v 4.106 2005/05/26 03:39:29 david Exp $");
+
+#define CVS_LOG_BOUNDARY "----------------------------\n"
+#define CVS_FILE_BOUNDARY "=============================================================================\n"
+
+enum
+{
+    NEED_FILE,
+    NEED_SYMS,
+    NEED_EOS,
+    NEED_START_LOG,
+    NEED_REVISION,
+    NEED_DATE_AUTHOR_STATE,
+    NEED_EOM
+};
+
+/* true globals */
+struct hash_table * file_hash;
+CvsServerCtx * cvs_direct_ctx;
+char root_path[PATH_MAX];
+char repository_path[PATH_MAX];
+
+const char * tag_flag_descr[] = {
+    "",
+    "**FUNKY**",
+    "**INVALID**",
+    "**INVALID**"
+};
+
+const char * fnk_descr[] = {
+    "",
+    "FNK_SHOW_SOME",
+    "FNK_SHOW_ALL",
+    "FNK_HIDE_ALL",
+    "FNK_HIDE_SOME"
+};
+
+/* static globals */
+static int ps_counter;
+static void * ps_tree;
+static struct hash_table * global_symbols;
+static char strip_path[PATH_MAX];
+static int strip_path_len;
+static time_t cache_date;
+static int update_cache;
+static int ignore_cache;
+static int do_write_cache;
+static int statistics;
+static const char * test_log_file;
+static struct hash_table * branch_heads;
+static struct list_head all_patch_sets;
+static struct list_head collisions;
+
+/* settable via options */
+static int timestamp_fuzz_factor = 300;
+static int do_diff;
+static const char * restrict_author;
+static int have_restrict_log;
+static regex_t restrict_log;
+static int have_restrict_file;
+static regex_t restrict_file;
+static time_t restrict_date_start;
+static time_t restrict_date_end;
+static const char * restrict_branch;
+static struct list_head show_patch_set_ranges;
+static int summary_first;
+static const char * norc = "";
+static const char * patch_set_dir;
+static const char * restrict_tag_start;
+static const char * restrict_tag_end;
+static int restrict_tag_ps_start;
+static int restrict_tag_ps_end = INT_MAX;
+static const char * diff_opts;
+static int bkcvs;
+static int no_rlog;
+static int cvs_direct;
+static int compress;
+static char compress_arg[8];
+static int track_branch_ancestry;
+
+static void check_norc(int, char *[]);
+static int parse_args(int, char *[]);
+static int parse_rc();
+static void load_from_cvs();
+static void init_paths();
+static CvsFile * parse_file(const char *);
+static CvsFileRevision * parse_revision(CvsFile * file, char * rev_str);
+static void assign_pre_revision(PatchSetMember *, CvsFileRevision * rev);
+static void check_print_patch_set(PatchSet *);
+static void print_patch_set(PatchSet *);
+static void assign_patchset_id(PatchSet *);
+static int compare_rev_strings(const char *, const char *);
+static int compare_patch_sets_by_members(const PatchSet * ps1, const PatchSet * ps2);
+static int compare_patch_sets_bk(const void *, const void *);
+static int compare_patch_sets(const void *, const void *);
+static int compare_patch_sets_bytime_list(struct list_head *, struct list_head *);
+static int compare_patch_sets_bytime(const PatchSet *, const PatchSet *);
+static int is_revision_metadata(const char *);
+static int patch_set_member_regex(PatchSet * ps, regex_t * reg);
+static int patch_set_affects_branch(PatchSet *, const char *);
+static void do_cvs_diff(PatchSet *);
+static PatchSet * create_patch_set();
+static PatchSetRange * create_patch_set_range();
+static void parse_sym(CvsFile *, char *);
+static void resolve_global_symbols();
+static int revision_affects_branch(CvsFileRevision *, const char *);
+static int is_vendor_branch(const char *);
+static void set_psm_initial(PatchSetMember * psm);
+static int check_rev_funk(PatchSet *, CvsFileRevision *);
+static CvsFileRevision * rev_follow_branch(CvsFileRevision *, const char *);
+static int before_tag(CvsFileRevision * rev, const char * tag);
+static void determine_branch_ancestor(PatchSet * ps, PatchSet * head_ps);
+static void handle_collisions();
+
+int main(int argc, char *argv[])
+{
+    debuglvl = DEBUG_APPERROR|DEBUG_SYSERROR|DEBUG_APPMSG1;
+
+    INIT_LIST_HEAD(&show_patch_set_ranges);
+
+    /*
+     * we want to parse the rc first, so command line can override it
+     * but also, --norc should stop the rc from being processed, so
+     * we look for --norc explicitly first.  Note: --norc in the rc 
+     * file itself will prevent the cvs rc file from being used.
+     */
+    check_norc(argc, argv);
+
+    if (strlen(norc) == 0 && parse_rc() < 0)
+	exit(1);
+
+    if (parse_args(argc, argv) < 0)
+	exit(1);
+
+    if (diff_opts && !cvs_direct && do_diff)
+    {
+	debug(DEBUG_APPMSG1, "\nWARNING: diff options are not supported by 'cvs rdiff'");
+	debug(DEBUG_APPMSG1, "         which is usually used to create diffs.  'cvs diff'");
+	debug(DEBUG_APPMSG1, "         will be used instead, but the resulting patches ");
+	debug(DEBUG_APPMSG1, "         will need to be applied using the '-p0' option");
+	debug(DEBUG_APPMSG1, "         to patch(1) (in the working directory), ");
+	debug(DEBUG_APPMSG1, "         instead of '-p1'\n");
+    }
+
+    file_hash = create_hash_table(1023);
+    global_symbols = create_hash_table(111);
+    branch_heads = create_hash_table(1023);
+    INIT_LIST_HEAD(&all_patch_sets);
+    INIT_LIST_HEAD(&collisions);
+
+    /* this parses some of the CVS/ files, and initializes
+     * the repository_path and other variables 
+     */
+    init_paths();
+
+    if (!ignore_cache)
+    {
+	int save_fuzz_factor = timestamp_fuzz_factor;
+
+	/* the timestamp fuzz should only be in effect when loading from
+	 * CVS, not re-fuzzed when loading from cache.  This is a hack
+	 * working around bad use of global variables
+	 */
+
+	timestamp_fuzz_factor = 0;
+
+	if ((cache_date = read_cache()) < 0)
+	    update_cache = 1;
+
+	timestamp_fuzz_factor = save_fuzz_factor;
+    }
+
+    if (cvs_direct && (do_diff || (update_cache && !test_log_file)))
+	cvs_direct_ctx = open_cvs_server(root_path, compress);
+
+    if (update_cache)
+    {
+	load_from_cvs();
+	do_write_cache = 1;
+    }
+
+    //XXX
+    //handle_collisions();
+
+    list_sort(&all_patch_sets, compare_patch_sets_bytime_list);
+
+    ps_counter = 0;
+    walk_all_patch_sets(assign_patchset_id);
+
+    handle_collisions();
+
+    resolve_global_symbols();
+
+    if (do_write_cache)
+	write_cache(cache_date);
+
+    if (statistics)
+	print_statistics(ps_tree);
+
+    /* check that the '-r' symbols (if specified) were resolved */
+    if (restrict_tag_start && restrict_tag_ps_start == 0 && 
+	strcmp(restrict_tag_start, "#CVSPS_EPOCH") != 0)
+    {
+	debug(DEBUG_APPERROR, "symbol given with -r: %s: not found", restrict_tag_start);
+	exit(1);
+    }
+
+    if (restrict_tag_end && restrict_tag_ps_end == INT_MAX)
+    {
+	debug(DEBUG_APPERROR, "symbol given with second -r: %s: not found", restrict_tag_end);
+	exit(1);
+    }
+
+    walk_all_patch_sets(check_print_patch_set);
+
+    if (summary_first++)
+	walk_all_patch_sets(check_print_patch_set);
+
+    if (cvs_direct_ctx)
+	close_cvs_server(cvs_direct_ctx);
+
+    exit(0);
+}
+
+static void load_from_cvs()
+{
+    FILE * cvsfp;
+    char buff[BUFSIZ];
+    int state = NEED_FILE;
+    CvsFile * file = NULL;
+    PatchSetMember * psm = NULL;
+    char datebuff[20];
+    char authbuff[AUTH_STR_MAX];
+    char logbuff[LOG_STR_MAX + 1];
+    int loglen = 0;
+    int have_log = 0;
+    char cmd[BUFSIZ];
+    char date_str[64];
+    char use_rep_buff[PATH_MAX];
+    char * ltype;
+
+    if (!no_rlog && !test_log_file && cvs_check_cap(CAP_HAVE_RLOG))
+    {
+	ltype = "rlog";
+	snprintf(use_rep_buff, PATH_MAX, "%s", repository_path);
+    }
+    else
+    {
+	ltype = "log";
+	use_rep_buff[0] = 0;
+    }
+
+    if (cache_date > 0)
+    {
+	struct tm * tm = gmtime(&cache_date);
+	strftime(date_str, 64, "%d %b %Y %H:%M:%S %z", tm);
+
+	/* this command asks for logs using two different date
+	 * arguments, separated by ';' (see man rlog).  The first
+	 * gets all revisions more recent than date, the second 
+	 * gets a single revision no later than date, which combined
+	 * get us all revisions that have occurred since last update
+	 * and overlaps what we had before by exactly one revision,
+	 * which is necessary to fill in the pre_rev stuff for a 
+	 * PatchSetMember
+	 */
+	snprintf(cmd, BUFSIZ, "cvs %s %s %s -d '%s<;%s' %s", compress_arg, norc, ltype, date_str, date_str, use_rep_buff);
+    }
+    else
+    {
+	date_str[0] = 0;
+	snprintf(cmd, BUFSIZ, "cvs %s %s %s %s", compress_arg, norc, ltype, use_rep_buff);
+    }
+    
+    debug(DEBUG_STATUS, "******* USING CMD %s", cmd);
+
+    cache_date = time(NULL);
+
+    /* FIXME: this is ugly, need to virtualize the accesses away from here */
+    if (test_log_file)
+	cvsfp = fopen(test_log_file, "r");
+    else if (cvs_direct_ctx)
+	cvsfp = cvs_rlog_open(cvs_direct_ctx, repository_path, date_str);
+    else
+	cvsfp = popen(cmd, "r");
+
+    if (!cvsfp)
+    {
+	debug(DEBUG_SYSERROR, "can't open cvs pipe using command %s", cmd);
+	exit(1);
+    }
+
+    for (;;)
+    {
+	char * tst;
+	if (cvs_direct_ctx)
+	    tst = cvs_rlog_fgets(buff, BUFSIZ, cvs_direct_ctx);
+	else
+	    tst = fgets(buff, BUFSIZ, cvsfp);
+
+	if (!tst)
+	    break;
+
+	debug(DEBUG_STATUS, "state: %d read line:%s", state, buff);
+
+	switch(state)
+	{
+	case NEED_FILE:
+	    if (strncmp(buff, "RCS file", 8) == 0 && (file = parse_file(buff)))
+		state = NEED_SYMS;
+	    break;
+	case NEED_SYMS:
+	    if (strncmp(buff, "symbolic names:", 15) == 0)
+		state = NEED_EOS;
+	    break;
+	case NEED_EOS:
+	    if (!isspace(buff[0]))
+	    {
+		/* see cvsps_types.h for commentary on have_branches */
+		file->have_branches = 1;
+		state = NEED_START_LOG;
+	    }
+	    else
+		parse_sym(file, buff);
+	    break;
+	case NEED_START_LOG:
+	    if (strcmp(buff, CVS_LOG_BOUNDARY) == 0)
+		state = NEED_REVISION;
+	    break;
+	case NEED_REVISION:
+	    if (strncmp(buff, "revision", 8) == 0)
+	    {
+		char new_rev[REV_STR_MAX];
+		CvsFileRevision * rev;
+
+		strcpy(new_rev, buff + 9);
+		chop(new_rev);
+
+		/* 
+		 * rev may already exist (think cvsps -u), in which
+		 * case parse_revision is a hash lookup
+		 */
+		rev = parse_revision(file, new_rev);
+
+		/* 
+		 * in the simple case, we are copying rev to psm->pre_rev
+		 * (psm refers to last patch set processed at this point)
+		 * since generally speaking the log is reverse chronological.
+		 * This breaks down slightly when branches are introduced 
+		 */
+
+		assign_pre_revision(psm, rev);
+
+		/*
+		 * if this is a new revision, it will have no post_psm associated.
+		 * otherwise we are (probably?) hitting the overlap in cvsps -u 
+		 */
+		if (!rev->post_psm)
+		{
+		    psm = rev->post_psm = create_patch_set_member();
+		    psm->post_rev = rev;
+		    psm->file = file;
+		    state = NEED_DATE_AUTHOR_STATE;
+		}
+		else
+		{
+		    /* we hit this in cvsps -u mode, we are now up-to-date
+		     * w.r.t this particular file. skip all of the rest 
+		     * of the info (revs and logs) until we hit the next file
+		     */
+		    psm = NULL;
+		    state = NEED_EOM;
+		}
+	    }
+	    break;
+	case NEED_DATE_AUTHOR_STATE:
+	    if (strncmp(buff, "date:", 5) == 0)
+	    {
+		char * p;
+
+		strncpy(datebuff, buff + 6, 19);
+		datebuff[19] = 0;
+
+		strcpy(authbuff, "unknown");
+		p = strstr(buff, "author: ");
+		if (p)
+		{
+		    char * op;
+		    p += 8;
+		    op = strchr(p, ';');
+		    if (op)
+		    {
+			strzncpy(authbuff, p, op - p + 1);
+		    }
+		}
+		
+		/* read the 'state' tag to see if this is a dead revision */
+		p = strstr(buff, "state: ");
+		if (p)
+		{
+		    char * op;
+		    p += 7;
+		    op = strchr(p, ';');
+		    if (op)
+			if (strncmp(p, "dead", MIN(4, op - p)) == 0)
+			    psm->post_rev->dead = 1;
+		}
+
+		state = NEED_EOM;
+	    }
+	    break;
+	case NEED_EOM:
+	    if (strcmp(buff, CVS_LOG_BOUNDARY) == 0)
+	    {
+		if (psm)
+		{
+		    PatchSet * ps = get_patch_set(datebuff, logbuff, authbuff, psm->post_rev->branch, psm);
+		    patch_set_add_member(ps, psm);
+		}
+
+		logbuff[0] = 0;
+		loglen = 0;
+		have_log = 0;
+		state = NEED_REVISION;
+	    }
+	    else if (strcmp(buff, CVS_FILE_BOUNDARY) == 0)
+	    {
+		if (psm)
+		{
+		    PatchSet * ps = get_patch_set(datebuff, logbuff, authbuff, psm->post_rev->branch, psm);
+		    patch_set_add_member(ps, psm);
+		    assign_pre_revision(psm, NULL);
+		}
+
+		logbuff[0] = 0;
+		loglen = 0;
+		have_log = 0;
+		psm = NULL;
+		file = NULL;
+		state = NEED_FILE;
+	    }
+	    else
+	    {
+		/* other "blahblah: information;" messages can 
+		 * follow the stuff we pay attention to
+		 */
+		if (have_log || !is_revision_metadata(buff))
+		{
+		    /* if the log buffer is full, that's it.  
+		     * 
+		     * Also, read lines (fgets) always have \n in them
+		     * which we count on.  So if truncation happens,
+		     * be careful to put a \n on.
+		     * 
+		     * Buffer has LOG_STR_MAX + 1 for room for \0 if
+		     * necessary
+		     */
+		    if (loglen < LOG_STR_MAX)
+		    {
+			int len = strlen(buff);
+			
+			if (len >= LOG_STR_MAX - loglen)
+			{
+			    debug(DEBUG_APPMSG1, "WARNING: maximum log length exceeded, truncating log");
+			    len = LOG_STR_MAX - loglen;
+			    buff[len - 1] = '\n';
+			}
+
+			debug(DEBUG_STATUS, "appending %s to log", buff);
+			memcpy(logbuff + loglen, buff, len);
+			loglen += len;
+			logbuff[loglen] = 0;
+			have_log = 1;
+		    }
+		}
+		else 
+		{
+		    debug(DEBUG_STATUS, "ignoring unhandled info %s", buff);
+		}
+	    }
+
+	    break;
+	}
+    }
+
+    if (state == NEED_SYMS)
+    {
+	debug(DEBUG_APPERROR, "Error: 'symbolic names' not found in log output.");
+	debug(DEBUG_APPERROR, "       Perhaps you should try running with --norc");
+	exit(1);
+    }
+
+    if (state != NEED_FILE)
+    {
+	debug(DEBUG_APPERROR, "Error: Log file parsing error. (%d)  Use -v to debug", state);
+	exit(1);
+    }
+    
+    if (test_log_file)
+    {
+	fclose(cvsfp);
+    }
+    else if (cvs_direct_ctx)
+    {
+	cvs_rlog_close(cvs_direct_ctx);
+    }
+    else
+    {
+	if (pclose(cvsfp) < 0)
+	{
+	    debug(DEBUG_APPERROR, "cvs rlog command exited with error. aborting");
+	    exit(1);
+	}
+    }
+}
+
+static int usage(const char * str1, const char * str2)
+{
+    if (str1)
+	debug(DEBUG_APPERROR, "\nbad usage: %s %s\n", str1, str2);
+
+    debug(DEBUG_APPERROR, "Usage: cvsps [-h] [-x] [-u] [-z <fuzz>] [-g] [-s <range>[,<range>]]  ");
+    debug(DEBUG_APPERROR, "             [-a <author>] [-f <file>] [-d <date1> [-d <date2>]] ");
+    debug(DEBUG_APPERROR, "             [-b <branch>]  [-l <regex>] [-r <tag> [-r <tag>]] ");
+    debug(DEBUG_APPERROR, "             [-p <directory>] [-v] [-t] [--norc] [--summary-first]");
+    debug(DEBUG_APPERROR, "             [--test-log <captured cvs log file>] [--bkcvs]");
+    debug(DEBUG_APPERROR, "             [--no-rlog] [--diff-opts <option string>] [--cvs-direct]");
+    debug(DEBUG_APPERROR, "             [--debuglvl <bitmask>] [-Z <compression>] [--root <cvsroot>]");
+    debug(DEBUG_APPERROR, "             [-q] [-A] [<repository>]");
+    debug(DEBUG_APPERROR, "");
+    debug(DEBUG_APPERROR, "Where:");
+    debug(DEBUG_APPERROR, "  -h display this informative message");
+    debug(DEBUG_APPERROR, "  -x ignore (and rebuild) cvsps.cache file");
+    debug(DEBUG_APPERROR, "  -u update cvsps.cache file");
+    debug(DEBUG_APPERROR, "  -z <fuzz> set the timestamp fuzz factor for identifying patch sets");
+    debug(DEBUG_APPERROR, "  -g generate diffs of the selected patch sets");
+    debug(DEBUG_APPERROR, "  -s <patch set>[-[<patch set>]][,<patch set>...] restrict patch sets by id");
+    debug(DEBUG_APPERROR, "  -a <author> restrict output to patch sets created by author");
+    debug(DEBUG_APPERROR, "  -f <file> restrict output to patch sets involving file");
+    debug(DEBUG_APPERROR, "  -d <date1> -d <date2> if just one date specified, show");
+    debug(DEBUG_APPERROR, "     revisions newer than date1.  If two dates specified,");
+    debug(DEBUG_APPERROR, "     show revisions between two dates.");
+    debug(DEBUG_APPERROR, "  -b <branch> restrict output to patch sets affecting history of branch");
+    debug(DEBUG_APPERROR, "  -l <regex> restrict output to patch sets matching <regex> in log message");
+    debug(DEBUG_APPERROR, "  -r <tag1> -r <tag2> if just one tag specified, show");
+    debug(DEBUG_APPERROR, "     revisions since tag1. If two tags specified, show");
+    debug(DEBUG_APPERROR, "     revisions between the two tags.");
+    debug(DEBUG_APPERROR, "  -p <directory> output patch sets to individual files in <directory>");
+    debug(DEBUG_APPERROR, "  -v show very verbose parsing messages");
+    debug(DEBUG_APPERROR, "  -t show some brief memory usage statistics");
+    debug(DEBUG_APPERROR, "  --norc when invoking cvs, ignore the .cvsrc file");
+    debug(DEBUG_APPERROR, "  --summary-first when multiple patch sets are shown, put all summaries first");
+    debug(DEBUG_APPERROR, "  --test-log <captured cvs log> supply a captured cvs log for testing");
+    debug(DEBUG_APPERROR, "  --diff-opts <option string> supply special set of options to diff");
+    debug(DEBUG_APPERROR, "  --bkcvs special hack for parsing the BK -> CVS log format");
+    debug(DEBUG_APPERROR, "  --no-rlog disable rlog (it's faulty in some setups)");
+    debug(DEBUG_APPERROR, "  --cvs-direct (--no-cvs-direct) enable (disable) built-in cvs client code");
+    debug(DEBUG_APPERROR, "  --debuglvl <bitmask> enable various debug channels.");
+    debug(DEBUG_APPERROR, "  -Z <compression> A value 1-9 which specifies amount of compression");
+    debug(DEBUG_APPERROR, "  --root <cvsroot> specify cvsroot.  overrides env. and working directory (cvs-direct only)");
+    debug(DEBUG_APPERROR, "  -q be quiet about warnings");
+    debug(DEBUG_APPERROR, "  -A track and report branch ancestry");
+    debug(DEBUG_APPERROR, "  <repository> apply cvsps to repository.  overrides working directory");
+    debug(DEBUG_APPERROR, "\ncvsps version %s\n", VERSION);
+
+    return -1;
+}
+
+static int parse_args(int argc, char *argv[])
+{
+    int i = 1;
+    while (i < argc)
+    {
+	if (strcmp(argv[i], "-z") == 0)
+	{
+	    if (++i >= argc)
+		return usage("argument to -z missing", "");
+
+	    timestamp_fuzz_factor = atoi(argv[i++]);
+	    continue;
+	}
+	
+	if (strcmp(argv[i], "-g") == 0)
+	{
+	    do_diff = 1;
+	    i++;
+	    continue;
+	}
+	
+	if (strcmp(argv[i], "-s") == 0)
+	{
+	    PatchSetRange * range;
+	    char * min_str, * max_str;
+
+	    if (++i >= argc)
+		return usage("argument to -s missing", "");
+
+	    min_str = strtok(argv[i++], ",");
+	    do
+	    {
+		range = create_patch_set_range();
+
+		max_str = strrchr(min_str, '-');
+		if (max_str)
+		    *max_str++ = '\0';
+		else
+		    max_str = min_str;
+
+		range->min_counter = atoi(min_str);
+
+		if (*max_str)
+		    range->max_counter = atoi(max_str);
+		else
+		    range->max_counter = INT_MAX;
+
+		list_add(&range->link, show_patch_set_ranges.prev);
+	    }
+	    while ((min_str = strtok(NULL, ",")));
+
+	    continue;
+	}
+	
+	if (strcmp(argv[i], "-a") == 0)
+	{
+	    if (++i >= argc)
+		return usage("argument to -a missing", "");
+
+	    restrict_author = argv[i++];
+	    continue;
+	}
+
+	if (strcmp(argv[i], "-l") == 0)
+	{
+	    int err;
+
+	    if (++i >= argc)
+		return usage("argument to -l missing", "");
+
+	    if ((err = regcomp(&restrict_log, argv[i++], REG_EXTENDED|REG_NOSUB)) != 0)
+	    {
+		char errbuf[256];
+		regerror(err, &restrict_log, errbuf, 256);
+		return usage("bad regex to -l", errbuf);
+	    }
+
+	    have_restrict_log = 1;
+
+	    continue;
+	}
+
+	if (strcmp(argv[i], "-f") == 0)
+	{
+	    int err;
+
+	    if (++i >= argc)
+		return usage("argument to -f missing", "");
+
+	    if ((err = regcomp(&restrict_file, argv[i++], REG_EXTENDED|REG_NOSUB)) != 0)
+	    {
+		char errbuf[256];
+		regerror(err, &restrict_file, errbuf, 256);
+		return usage("bad regex to -f", errbuf);
+	    }
+
+	    have_restrict_file = 1;
+
+	    continue;
+	}
+	
+	if (strcmp(argv[i], "-d") == 0)
+	{
+	    time_t *pt;
+
+	    if (++i >= argc)
+		return usage("argument to -d missing", "");
+
+	    pt = (restrict_date_start == 0) ? &restrict_date_start : &restrict_date_end;
+	    convert_date(pt, argv[i++]);
+	    continue;
+	}
+
+	if (strcmp(argv[i], "-r") == 0)
+	{
+	    if (++i >= argc)
+		return usage("argument to -r missing", "");
+
+	    if (restrict_tag_start)
+		restrict_tag_end = argv[i];
+	    else
+		restrict_tag_start = argv[i];
+
+	    i++;
+	    continue;
+	}
+
+	if (strcmp(argv[i], "-u") == 0)
+	{
+	    update_cache = 1;
+	    i++;
+	    continue;
+	}
+	
+	if (strcmp(argv[i], "-x") == 0)
+	{
+	    ignore_cache = 1;
+	    update_cache = 1;
+	    i++;
+	    continue;
+	}
+
+	if (strcmp(argv[i], "-b") == 0)
+	{
+	    if (++i >= argc)
+		return usage("argument to -b missing", "");
+
+	    restrict_branch = argv[i++];
+	    /* Warn if the user tries to use TRUNK. Should eventually
+	     * go away as TRUNK may be a valid branch within CVS
+	     */
+	    if (strcmp(restrict_branch, "TRUNK") == 0)
+		debug(DEBUG_APPMSG1, "WARNING: The HEAD branch of CVS is called HEAD, not TRUNK");
+	    continue;
+	}
+
+	if (strcmp(argv[i], "-p") == 0)
+	{
+	    if (++i >= argc)
+		return usage("argument to -p missing", "");
+	    
+	    patch_set_dir = argv[i++];
+	    continue;
+	}
+
+	if (strcmp(argv[i], "-v") == 0)
+	{
+	    debuglvl = ~0;
+	    i++;
+	    continue;
+	}
+	
+	if (strcmp(argv[i], "-t") == 0)
+	{
+	    statistics = 1;
+	    i++;
+	    continue;
+	}
+
+	if (strcmp(argv[i], "--summary-first") == 0)
+	{
+	    summary_first = 1;
+	    i++;
+	    continue;
+	}
+
+	if (strcmp(argv[i], "-h") == 0)
+	    return usage(NULL, NULL);
+
+	/* see special handling of --norc in main */
+	if (strcmp(argv[i], "--norc") == 0)
+	{
+	    norc = "-f";
+	    i++;
+	    continue;
+	}
+
+	if (strcmp(argv[i], "--test-log") == 0)
+	{
+	    if (++i >= argc)
+		return usage("argument to --test-log missing", "");
+
+	    test_log_file = argv[i++];
+	    continue;
+	}
+
+	if (strcmp(argv[i], "--diff-opts") == 0)
+	{
+	    if (++i >= argc)
+		return usage("argument to --diff-opts missing", "");
+
+	    /* allow diff_opts to be turned off by making empty string
+	     * into NULL
+	     */
+	    if (!strlen(argv[i]))
+		diff_opts = NULL;
+	    else
+		diff_opts = argv[i];
+	    i++;
+	    continue;
+	}
+
+	if (strcmp(argv[i], "--bkcvs") == 0)
+	{
+	    bkcvs = 1;
+	    i++;
+	    continue;
+	}
+
+	if (strcmp(argv[i], "--no-rlog") == 0)
+	{
+	    no_rlog = 1;
+	    i++;
+	    continue;
+	}
+
+	if (strcmp(argv[i], "--cvs-direct") == 0)
+	{
+	    cvs_direct = 1;
+	    i++;
+	    continue;
+	}
+
+	if (strcmp(argv[i], "--no-cvs-direct") == 0)
+	{
+	    cvs_direct = 0;
+	    i++;
+	    continue;
+	}
+
+	if (strcmp(argv[i], "--debuglvl") == 0)
+	{
+	    if (++i >= argc)
+		return usage("argument to --debuglvl missing", "");
+
+	    debuglvl = atoi(argv[i++]);
+	    continue;
+	}
+
+	if (strcmp(argv[i], "-Z") == 0)
+	{
+	    if (++i >= argc)
+		return usage("argument to -Z", "");
+
+	    compress = atoi(argv[i++]);
+
+	    if (compress < 0 || compress > 9)
+		return usage("-Z level must be between 1 and 9 inclusive (0 disables compression)", argv[i-1]);
+
+	    if (compress == 0)
+		compress_arg[0] = 0;
+	    else
+		snprintf(compress_arg, 8, "-z%d", compress);
+	    continue;
+	}
+	
+	if (strcmp(argv[i], "--root") == 0)
+	{
+	    if (++i >= argc)
+		return usage("argument to --root missing", "");
+
+	    strcpy(root_path, argv[i++]);
+	    continue;
+	}
+
+	if (strcmp(argv[i], "-q") == 0)
+	{
+	    debuglvl &= ~DEBUG_APPMSG1;
+	    i++;
+	    continue;
+	}
+
+	if (strcmp(argv[i], "-A") == 0)
+	{
+	    track_branch_ancestry = 1;
+	    i++;
+	    continue;
+	}
+
+	if (argv[i][0] == '-')
+	    return usage("invalid argument", argv[i]);
+	
+	strcpy(repository_path, argv[i++]);
+    }
+
+    return 0;
+}
+
+static int parse_rc()
+{
+    char rcfile[PATH_MAX];
+    FILE * fp;
+    snprintf(rcfile, PATH_MAX, "%s/cvspsrc", get_cvsps_dir());
+    if ((fp = fopen(rcfile, "r")))
+    {
+	char buff[BUFSIZ];
+	while (fgets(buff, BUFSIZ, fp))
+	{
+	    char * argv[3], *p;
+	    int argc = 2;
+
+	    chop(buff);
+
+	    argv[0] = "garbage";
+
+	    p = strchr(buff, ' ');
+	    if (p)
+	    {
+		*p++ = '\0';
+		argv[2] = xstrdup(p);
+		argc = 3;
+	    }
+
+	    argv[1] = xstrdup(buff);
+
+	    if (parse_args(argc, argv) < 0)
+		return -1;
+	}
+	fclose(fp);
+    }
+
+    return 0;
+}
+
+static void init_paths()
+{
+    FILE * fp;
+    char * p;
+    int len;
+
+    /* determine the CVSROOT. precedence:
+     * 1) command line
+     * 2) working directory (if present)
+     * 3) environment variable CVSROOT
+     */
+    if (!root_path[0])
+    {
+	if (!(fp = fopen("CVS/Root", "r")))
+	{
+	    const char * e;
+
+	    debug(DEBUG_STATUS, "Can't open CVS/Root");
+	    e = getenv("CVSROOT");
+
+	    if (!e)
+	    {
+		debug(DEBUG_APPERROR, "cannot determine CVSROOT");
+		exit(1);
+	    }
+	    
+	    strcpy(root_path, e);
+	}
+	else
+	{
+	    if (!fgets(root_path, PATH_MAX, fp))
+	    {
+		debug(DEBUG_APPERROR, "Error reading CVSROOT");
+		exit(1);
+	    }
+	    
+	    fclose(fp);
+	    
+	    /* chop the lf and optional trailing '/' */
+	    len = strlen(root_path) - 1;
+	    root_path[len] = 0;
+	    if (root_path[len - 1] == '/')
+		root_path[--len] = 0;
+	}
+    }
+
+    /* Determine the repository path, precedence:
+     * 1) command line
+     * 2) working directory
+     */
+      
+    if (!repository_path[0])
+    {
+	if (!(fp = fopen("CVS/Repository", "r")))
+	{
+	    debug(DEBUG_SYSERROR, "Can't open CVS/Repository");
+	    exit(1);
+	}
+	
+	if (!fgets(repository_path, PATH_MAX, fp))
+	{
+	    debug(DEBUG_APPERROR, "Error reading repository path");
+	    exit(1);
+	}
+	
+	chop(repository_path);
+	fclose(fp);
+    }
+
+    /* get the path portion of the root */
+    p = strrchr(root_path, ':');
+
+    if (!p)
+	p = root_path;
+    else 
+	p++;
+
+    /* some CVS have the CVSROOT string as part of the repository
+     * string (initial substring).  remove it.
+     */
+    len = strlen(p);
+
+    if (strncmp(p, repository_path, len) == 0)
+    {
+	int rlen = strlen(repository_path + len + 1);
+	memmove(repository_path, repository_path + len + 1, rlen + 1);
+    }
+
+    /* the 'strip_path' will be used whenever the CVS server gives us a
+     * path to an 'rcs file'.  the strip_path portion of these paths is
+     * stripped off, leaving us with the working file.
+     *
+     * NOTE: because of some bizarre 'feature' in cvs, when 'rlog' is used
+     * (instead of log) it gives the 'real' RCS file path, which can be different
+     * from the 'nominal' repository path because of symlinks in the server and 
+     * the like.  See also the 'parse_file' routine
+     */
+    strip_path_len = snprintf(strip_path, PATH_MAX, "%s/%s/", p, repository_path);
+
+    if (strip_path_len < 0 || strip_path_len >= PATH_MAX)
+    {
+	debug(DEBUG_APPERROR, "strip_path overflow");
+	exit(1);
+    }
+
+    debug(DEBUG_STATUS, "strip_path: %s", strip_path);
+}
+
+static CvsFile * parse_file(const char * buff)
+{
+    CvsFile * retval;
+    char fn[PATH_MAX];
+    int len = strlen(buff + 10);
+    char * p;
+
+    /* once a single file has been parsed ok we set this */
+    static int path_ok;
+    
+    /* chop the ",v" string and the "LF" */
+    len -= 3;
+    memcpy(fn, buff + 10, len);
+    fn[len] = 0;
+    
+    if (strncmp(fn, strip_path, strip_path_len) != 0)
+    {
+	/* if the very first file fails the strip path,
+	 * then maybe we need to try for an alternate.
+	 * this will happen if symlinks are being used
+	 * on the server.  our best guess is to look
+	 * for the final occurance of the repository
+	 * path in the filename and use that.  it should work
+	 * except in the case where:
+	 * 1) the project has no files in the top-level directory
+	 * 2) the project has a directory with the same name as the project
+	 * 3) that directory sorts alphabetically before any other directory
+	 * in which case, you are scr**ed
+	 */
+	if (!path_ok)
+	{
+	    char * p = fn, *lastp = NULL;
+
+	    while ((p = strstr(p, repository_path)))
+		lastp = p++;
+      
+	    if (lastp)
+	    {
+		int len = strlen(repository_path);
+		memcpy(strip_path, fn, lastp - fn + len + 1);
+		strip_path_len = lastp - fn + len + 1;
+		strip_path[strip_path_len] = 0;
+		debug(DEBUG_APPMSG1, "NOTICE: used alternate strip path %s", strip_path);
+		goto ok;
+	    }
+	}
+
+	/* FIXME: a subdirectory may have a different Repository path
+	 * than it's parent.  we'll fail the above test since strip_path
+	 * is global for the entire checked out tree (recursively).
+	 *
+	 * For now just ignore such files
+	 */
+	debug(DEBUG_APPMSG1, "WARNING: file %s doesn't match strip_path %s. ignoring", 
+	      fn, strip_path);
+	return NULL;
+    }
+
+ ok:
+    path_ok = 1;
+
+    /* remove from beginning the 'strip_path' string */
+    len -= strip_path_len;
+    memmove(fn, fn + strip_path_len, len);
+    fn[len] = 0;
+
+    /* check if file is in the 'Attic/' and remove it */
+    if ((p = strrchr(fn, '/')) &&
+	p - fn >= 5 && strncmp(p - 5, "Attic", 5) == 0)
+    {
+	memmove(p - 5, p + 1, len - (p - fn + 1));
+	len -= 6;
+	fn[len] = 0;
+    }
+
+    debug(DEBUG_STATUS, "stripped filename %s", fn);
+
+    retval = (CvsFile*)get_hash_object(file_hash, fn);
+
+    if (!retval)
+    {
+	if ((retval = create_cvsfile()))
+	{
+	    retval->filename = xstrdup(fn);
+	    put_hash_object_ex(file_hash, retval->filename, retval, HT_NO_KEYCOPY, NULL, NULL);
+	}
+	else
+	{
+	    debug(DEBUG_SYSERROR, "malloc failed");
+	    exit(1);
+	}
+
+	debug(DEBUG_STATUS, "new file: %s", retval->filename);
+    }
+    else
+    {
+	debug(DEBUG_STATUS, "existing file: %s", retval->filename);
+    }
+
+    return retval;
+}
+
+PatchSet * get_patch_set(const char * dte, const char * log, const char * author, const char * branch, PatchSetMember * psm)
+{
+    PatchSet * retval = NULL, **find = NULL;
+    int (*cmp1)(const void *,const void*) = (bkcvs) ? compare_patch_sets_bk : compare_patch_sets;
+
+    if (!(retval = create_patch_set()))
+    {
+	debug(DEBUG_SYSERROR, "malloc failed for PatchSet");
+	return NULL;
+    }
+
+    convert_date(&retval->date, dte);
+    retval->author = get_string(author);
+    retval->descr = xstrdup(log);
+    retval->branch = get_string(branch);
+    
+    /* we are looking for a patchset suitable for holding this member.
+     * this means two things:
+     * 1) a patchset already containing an entry for the file is no good
+     * 2) for two patchsets with same exact date/time, if they reference 
+     *    the same file, we can properly order them.  this primarily solves
+     *    the 'cvs import' problem and may not have general usefulness
+     *    because it would only work if the first member we consider is
+     *    present in the existing ps.
+     */
+    if (psm)
+	list_add(&psm->link, retval->members.prev);
+
+    find = (PatchSet**)tsearch(retval, &ps_tree, cmp1);
+
+    if (psm)
+	list_del(&psm->link);
+
+    if (*find != retval)
+    {
+	debug(DEBUG_STATUS, "found existing patch set");
+
+	if (bkcvs && strstr(retval->descr, "BKrev:"))
+	{
+	    free((*find)->descr);
+	    (*find)->descr = retval->descr;
+	}
+	else
+	{
+	    free(retval->descr);
+	}
+
+	/* keep the minimum date of any member as the 'actual' date */
+	if (retval->date < (*find)->date)
+	    (*find)->date = retval->date;
+
+	/* expand the min_date/max_date window to help finding other members .
+	 * open the window by an extra margin determined by the fuzz factor 
+	 */
+	if (retval->date - timestamp_fuzz_factor < (*find)->min_date)
+	{
+	    (*find)->min_date = retval->date - timestamp_fuzz_factor;
+	    //debug(DEBUG_APPMSG1, "WARNING: non-increasing dates in encountered patchset members");
+	}
+	else if (retval->date + timestamp_fuzz_factor > (*find)->max_date)
+	    (*find)->max_date = retval->date + timestamp_fuzz_factor;
+
+	free(retval);
+	retval = *find;
+    }
+    else
+    {
+	debug(DEBUG_STATUS, "new patch set!");
+	debug(DEBUG_STATUS, "%s %s %s", retval->author, retval->descr, dte);
+
+	retval->min_date = retval->date - timestamp_fuzz_factor;
+	retval->max_date = retval->date + timestamp_fuzz_factor;
+
+	list_add(&retval->all_link, &all_patch_sets);
+    }
+
+
+    return retval;
+}
+
+static int get_branch_ext(char * buff, const char * rev, int * leaf)
+{
+    char * p;
+    int len = strlen(rev);
+
+    /* allow get_branch(buff, buff) without destroying contents */
+    memmove(buff, rev, len);
+    buff[len] = 0;
+
+    p = strrchr(buff, '.');
+    if (!p)
+	return 0;
+    *p++ = 0;
+
+    if (leaf)
+	*leaf = atoi(p);
+
+    return 1;
+}
+
+static int get_branch(char * buff, const char * rev)
+{
+    return get_branch_ext(buff, rev, NULL);
+}
+
+/* 
+ * the goal if this function is to determine what revision to assign to
+ * the psm->pre_rev field.  usually, the log file is strictly 
+ * reverse chronological, so rev is direct ancestor to psm, 
+ * 
+ * This all breaks down at branch points however
+ */
+
+static void assign_pre_revision(PatchSetMember * psm, CvsFileRevision * rev)
+{
+    char pre[REV_STR_MAX], post[REV_STR_MAX];
+
+    if (!psm)
+	return;
+    
+    if (!rev)
+    {
+	/* if psm was last rev. for file, it's either an 
+	 * INITIAL, or first rev of a branch.  to test if it's 
+	 * the first rev of a branch, do get_branch twice - 
+	 * this should be the bp.
+	 */
+	if (get_branch(post, psm->post_rev->rev) && 
+	    get_branch(pre, post))
+	{
+	    psm->pre_rev = file_get_revision(psm->file, pre);
+	    list_add(&psm->post_rev->link, &psm->pre_rev->branch_children);
+	}
+	else
+	{
+	    set_psm_initial(psm);
+	}
+	return;
+    }
+
+    /* 
+     * is this canditate for 'pre' on the same branch as our 'post'? 
+     * this is the normal case
+     */
+    if (!get_branch(pre, rev->rev))
+    {
+	debug(DEBUG_APPERROR, "get_branch malformed input (1)");
+	return;
+    }
+
+    if (!get_branch(post, psm->post_rev->rev))
+    {
+	debug(DEBUG_APPERROR, "get_branch malformed input (2)");
+	return;
+    }
+
+    if (strcmp(pre, post) == 0)
+    {
+	psm->pre_rev = rev;
+	rev->pre_psm = psm;
+	return;
+    }
+    
+    /* branches don't match. new_psm must be head of branch,
+     * so psm is oldest rev. on branch. or oldest
+     * revision overall.  if former, derive predecessor.  
+     * use get_branch to chop another rev. off of string.
+     *
+     * FIXME:
+     * There's also a weird case.  it's possible to just re-number
+     * a revision to any future revision. i.e. rev 1.9 becomes 2.0
+     * It's not widely used.  In those cases of discontinuity,
+     * we end up stamping the predecessor as 'INITIAL' incorrectly
+     *
+     */
+    if (!get_branch(pre, post))
+    {
+	set_psm_initial(psm);
+	return;
+    }
+    
+    psm->pre_rev = file_get_revision(psm->file, pre);
+    list_add(&psm->post_rev->link, &psm->pre_rev->branch_children);
+}
+
+static void check_print_patch_set(PatchSet * ps)
+{
+    if (ps->psid < 0)
+	return;
+
+    /* the funk_factor overrides the restrict_tag_start and end */
+    if (ps->funk_factor == FNK_SHOW_SOME || ps->funk_factor == FNK_SHOW_ALL)
+	goto ok;
+
+    if (ps->funk_factor == FNK_HIDE_ALL)
+	return;
+
+    if (ps->psid <= restrict_tag_ps_start)
+    {
+	if (ps->psid == restrict_tag_ps_start)
+	    debug(DEBUG_STATUS, "PatchSet %d matches tag %s.", ps->psid, restrict_tag_start);
+	
+	return;
+    }
+    
+    if (ps->psid > restrict_tag_ps_end)
+	return;
+
+ ok:
+    if (restrict_date_start > 0 &&
+	(ps->date < restrict_date_start ||
+	 (restrict_date_end > 0 && ps->date > restrict_date_end)))
+	return;
+
+    if (restrict_author && strcmp(restrict_author, ps->author) != 0)
+	return;
+
+    if (have_restrict_log && regexec(&restrict_log, ps->descr, 0, NULL, 0) != 0)
+	return;
+
+    if (have_restrict_file && !patch_set_member_regex(ps, &restrict_file))
+	return;
+
+    if (restrict_branch && !patch_set_affects_branch(ps, restrict_branch))
+	return;
+    
+    if (!list_empty(&show_patch_set_ranges))
+    {
+	struct list_head * next = show_patch_set_ranges.next;
+	
+	while (next != &show_patch_set_ranges)
+	{
+	    PatchSetRange *range = list_entry(next, PatchSetRange, link);
+	    if (range->min_counter <= ps->psid &&
+		ps->psid <= range->max_counter)
+	    {
+		break;
+	    }
+	    next = next->next;
+	}
+	
+	if (next == &show_patch_set_ranges)
+	    return;
+    }
+
+    if (patch_set_dir)
+    {
+	char path[PATH_MAX];
+
+	snprintf(path, PATH_MAX, "%s/%d.patch", patch_set_dir, ps->psid);
+
+	fflush(stdout);
+	close(1);
+	if (open(path, O_WRONLY|O_TRUNC|O_CREAT, 0666) < 0)
+	{
+	    debug(DEBUG_SYSERROR, "can't open patch file %s", path);
+	    exit(1);
+	}
+
+	fprintf(stderr, "Directing PatchSet %d to file %s\n", ps->psid, path);
+    }
+
+    /*
+     * If the summary_first option is in effect, there will be 
+     * two passes through the tree.  the first with summary_first == 1
+     * the second with summary_first == 2.  if the option is not
+     * in effect, there will be one pass with summary_first == 0
+     *
+     * When the -s option is in effect, the show_patch_set_ranges
+     * list will be non-empty.
+     */
+    if (summary_first <= 1)
+	print_patch_set(ps);
+    if (do_diff && summary_first != 1)
+	do_cvs_diff(ps);
+
+    fflush(stdout);
+}
+
+static void print_patch_set(PatchSet * ps)
+{
+    struct tm *tm;
+    struct list_head * next;
+    const char * funk = "";
+
+    tm = localtime(&ps->date);
+    next = ps->members.next;
+    
+    funk = fnk_descr[ps->funk_factor];
+    
+    /* this '---...' is different from the 28 hyphens that separate cvs log output */
+    printf("---------------------\n");
+    printf("PatchSet %d %s\n", ps->psid, funk);
+    printf("Date: %d/%02d/%02d %02d:%02d:%02d\n", 
+	   1900 + tm->tm_year, tm->tm_mon + 1, tm->tm_mday, 
+	   tm->tm_hour, tm->tm_min, tm->tm_sec);
+    printf("Author: %s\n", ps->author);
+    printf("Branch: %s\n", ps->branch);
+    if (ps->ancestor_branch)
+	printf("Ancestor branch: %s\n", ps->ancestor_branch);
+    printf("Tag: %s %s\n", ps->tag ? ps->tag : "(none)", tag_flag_descr[ps->tag_flags]);
+    printf("Log:\n%s\n", ps->descr);
+    printf("Members: \n");
+
+    while (next != &ps->members)
+    {
+	PatchSetMember * psm = list_entry(next, PatchSetMember, link);
+	if (ps->funk_factor == FNK_SHOW_SOME && psm->bad_funk)
+	    funk = "(BEFORE START TAG)";
+	else if (ps->funk_factor == FNK_HIDE_SOME && !psm->bad_funk)
+	    funk = "(AFTER END TAG)";
+	else
+	    funk = "";
+
+	printf("\t%s:%s->%s%s %s\n", 
+	       psm->file->filename, 
+	       psm->pre_rev ? psm->pre_rev->rev : "INITIAL", 
+	       psm->post_rev->rev, 
+	       psm->post_rev->dead ? "(DEAD)": "",
+	       funk);
+
+	next = next->next;
+    }
+    
+    printf("\n");
+}
+
+/* walk all the patchsets to assign monotonic psid, 
+ * and to establish  branch ancestry
+ */
+static void assign_patchset_id(PatchSet * ps)
+{
+    /*
+     * Ignore the 'BRANCH ADD' patchsets 
+     */
+    if (!ps->branch_add)
+    {
+	ps_counter++;
+	ps->psid = ps_counter;
+	
+	if (track_branch_ancestry && strcmp(ps->branch, "HEAD") != 0)
+	{
+	    PatchSet * head_ps = (PatchSet*)get_hash_object(branch_heads, ps->branch);
+	    if (!head_ps) 
+	    {
+		head_ps = ps;
+		put_hash_object(branch_heads, ps->branch, head_ps);
+	    }
+	    
+	    determine_branch_ancestor(ps, head_ps);
+	}
+    }
+    else
+    {
+	ps->psid = -1;
+    }
+}
+
+static int compare_rev_strings(const char * cr1, const char * cr2)
+{
+    char r1[REV_STR_MAX];
+    char r2[REV_STR_MAX];
+    char *s1 = r1, *s2 = r2;
+    char *p1, *p2;
+    int n1, n2;
+
+    strcpy(s1, cr1);
+    strcpy(s2, cr2);
+
+    for (;;) 
+    {
+	p1 = strchr(s1, '.');
+	p2 = strchr(s2, '.');
+
+	if (p1) *p1++ = 0;
+	if (p2) *p2++ = 0;
+	
+	n1 = atoi(s1);
+	n2 = atoi(s2);
+	
+	if (n1 < n2)
+	    return -1;
+	if (n1 > n2)
+	    return 1;
+
+	if (!p1 && p2)
+	    return -1;
+	if (p1 && !p2)
+	    return 1;
+	if (!p1 && !p2)
+	    return 0;
+
+	s1 = p1;
+	s2 = p2;
+    }
+}
+
+static int compare_patch_sets_by_members(const PatchSet * ps1, const PatchSet * ps2)
+{
+    struct list_head * i;
+
+    for (i = ps1->members.next; i != &ps1->members; i = i->next)
+    {
+	PatchSetMember * psm1 = list_entry(i, PatchSetMember, link);
+	struct list_head * j;
+
+	for (j = ps2->members.next; j != &ps2->members; j = j->next)
+	{
+	    PatchSetMember * psm2 = list_entry(j, PatchSetMember, link);
+	    if (psm1->file == psm2->file) 
+	    {
+		int ret = compare_rev_strings(psm1->post_rev->rev, psm2->post_rev->rev);
+		//debug(DEBUG_APPMSG1, "file: %s comparing %s %s = %d", psm1->file->filename, psm1->post_rev->rev, psm2->post_rev->rev, ret);
+		return ret;
+	    }
+	}
+    }
+    
+    return 0;
+}
+
+static int compare_patch_sets_bk(const void * v_ps1, const void * v_ps2)
+{
+    const PatchSet * ps1 = (const PatchSet *)v_ps1;
+    const PatchSet * ps2 = (const PatchSet *)v_ps2;
+    long diff;
+
+    diff = ps1->date - ps2->date;
+
+    return (diff < 0) ? -1 : ((diff > 0) ? 1 : 0);
+}
+
+static int compare_patch_sets(const void * v_ps1, const void * v_ps2)
+{
+    const PatchSet * ps1 = (const PatchSet *)v_ps1;
+    const PatchSet * ps2 = (const PatchSet *)v_ps2;
+    long diff;
+    int ret;
+    time_t d, min, max;
+
+    /* We order by (author, descr, branch, members, date), but because of the fuzz factor
+     * we treat times within a certain distance as equal IFF the author
+     * and descr match.
+     */
+
+    ret = strcmp(ps1->author, ps2->author);
+    if (ret)
+	    return ret;
+
+    ret = strcmp(ps1->descr, ps2->descr);
+    if (ret)
+	    return ret;
+
+    ret = strcmp(ps1->branch, ps2->branch);
+    if (ret)
+	return ret;
+
+    ret = compare_patch_sets_by_members(ps1, ps2);
+    if (ret)
+	return ret;
+
+    /* 
+     * one of ps1 or ps2 is new.  the other should have the min_date
+     * and max_date set to a window opened by the fuzz_factor
+     */
+    if (ps1->min_date == 0)
+    {
+	d = ps1->date;
+	min = ps2->min_date;
+	max = ps2->max_date;
+    } 
+    else if (ps2->min_date == 0)
+    {
+	d = ps2->date;
+	min = ps1->min_date;
+	max = ps1->max_date;
+    }
+    else
+    {
+	debug(DEBUG_APPERROR, "how can we have both patchsets pre-existing?");
+	exit(1);
+    }
+
+    if (min < d && d < max)
+	return 0;
+
+    diff = ps1->date - ps2->date;
+
+    return (diff < 0) ? -1 : 1;
+}
+
+static int compare_patch_sets_bytime_list(struct list_head * l1, struct list_head * l2)
+{
+    const PatchSet *ps1 = list_entry(l1, PatchSet, all_link);
+    const PatchSet *ps2 = list_entry(l2, PatchSet, all_link);
+    return compare_patch_sets_bytime(ps1, ps2);
+}
+
+static int compare_patch_sets_bytime(const PatchSet * ps1, const PatchSet * ps2)
+{
+    long diff;
+    int ret;
+
+    /* When doing a time-ordering of patchsets, we don't need to
+     * fuzzy-match the time.  We've already done fuzzy-matching so we
+     * know that insertions are unique at this point.
+     */
+
+    diff = ps1->date - ps2->date;
+    if (diff)
+	return (diff < 0) ? -1 : 1;
+
+    ret = compare_patch_sets_by_members(ps1, ps2);
+    if (ret)
+	return ret;
+
+    ret = strcmp(ps1->author, ps2->author);
+    if (ret)
+	return ret;
+
+    ret = strcmp(ps1->descr, ps2->descr);
+    if (ret)
+	return ret;
+
+    ret = strcmp(ps1->branch, ps2->branch);
+    return ret;
+}
+
+
+static int is_revision_metadata(const char * buff)
+{
+    char * p1, *p2;
+    int len;
+
+    if (!(p1 = strchr(buff, ':')))
+	return 0;
+
+    p2 = strchr(buff, ' ');
+    
+    if (p2 && p2 < p1)
+	return 0;
+
+    len = strlen(buff);
+
+    /* lines have LF at end */
+    if (len > 1 && buff[len - 2] == ';')
+	return 1;
+
+    return 0;
+}
+
+static int patch_set_member_regex(PatchSet * ps, regex_t * reg)
+{
+    struct list_head * next = ps->members.next;
+
+    while (next != &ps->members)
+    {
+	PatchSetMember * psm = list_entry(next, PatchSetMember, link);
+	
+	if (regexec(&restrict_file, psm->file->filename, 0, NULL, 0) == 0)
+	    return 1;
+
+	next = next->next;
+    }
+
+    return 0;
+}
+
+static int patch_set_affects_branch(PatchSet * ps, const char * branch)
+{
+    struct list_head * next;
+
+    for (next = ps->members.next; next != &ps->members; next = next->next)
+    {
+	PatchSetMember * psm = list_entry(next, PatchSetMember, link);
+
+	/*
+	 * slight hack. if -r is specified, and this patchset
+	 * is 'before' the tag, but is FNK_SHOW_SOME, only
+	 * check if the 'after tag' revisions affect
+	 * the branch.  this is especially important when
+	 * the tag is a branch point.
+	 */
+	if (ps->funk_factor == FNK_SHOW_SOME && psm->bad_funk)
+	    continue;
+
+	if (revision_affects_branch(psm->post_rev, branch))
+	    return 1;
+    }
+
+    return 0;
+}
+
+static void do_cvs_diff(PatchSet * ps)
+{
+    struct list_head * next;
+    const char * dtype;
+    const char * dopts;
+    const char * utype;
+    char use_rep_path[PATH_MAX];
+    char esc_use_rep_path[PATH_MAX];
+
+    fflush(stdout);
+    fflush(stderr);
+
+    /* 
+     * if cvs_direct is not in effect, and diff options are specified,
+     * then we have to use diff instead of rdiff and we'll get a -p0 
+     * diff (instead of -p1) [in a manner of speaking].  So to make sure
+     * that the add/remove diffs get generated likewise, we need to use
+     * 'update' instead of 'co' 
+     *
+     * cvs_direct will always use diff (not rdiff), but will also always
+     * generate -p1 diffs.
+     */
+    if (diff_opts == NULL) 
+    {
+	dopts = "-u";
+	dtype = "rdiff";
+	utype = "co";
+	sprintf(use_rep_path, "%s/", repository_path);
+	/* the rep_path may contain characters that the shell will barf on */
+	escape_filename(esc_use_rep_path, PATH_MAX, use_rep_path);
+    }
+    else
+    {
+	dopts = diff_opts;
+	dtype = "diff";
+	utype = "update";
+	use_rep_path[0] = 0;
+	esc_use_rep_path[0] = 0;
+    }
+
+    for (next = ps->members.next; next != &ps->members; next = next->next)
+    {
+	PatchSetMember * psm = list_entry(next, PatchSetMember, link);
+	char cmdbuff[PATH_MAX * 2+1];
+	char esc_file[PATH_MAX];
+	int ret, check_ret = 0;
+
+	cmdbuff[0] = 0;
+	cmdbuff[PATH_MAX*2] = 0;
+
+	/* the filename may contain characters that the shell will barf on */
+	escape_filename(esc_file, PATH_MAX, psm->file->filename);
+
+	/*
+	 * Check the patchset funk. we may not want to diff this particular file 
+	 */
+	if (ps->funk_factor == FNK_SHOW_SOME && psm->bad_funk)
+	{
+	    printf("Index: %s\n", psm->file->filename);
+	    printf("===================================================================\n");
+	    printf("*** Member not diffed, before start tag\n");
+	    continue;
+	}
+	else if (ps->funk_factor == FNK_HIDE_SOME && !psm->bad_funk)
+	{
+	    printf("Index: %s\n", psm->file->filename);
+	    printf("===================================================================\n");
+	    printf("*** Member not diffed, after end tag\n");
+	    continue;
+	}
+
+	/* 
+	 * When creating diffs for INITIAL or DEAD revisions, we have to use 'cvs co'
+	 * or 'cvs update' to get the file, because cvs won't generate these diffs.
+	 * The problem is that this must be piped to diff, and so the resulting
+	 * diff doesn't contain the filename anywhere! (diff between - and /dev/null).
+	 * sed is used to replace the '-' with the filename. 
+	 *
+	 * It's possible for pre_rev to be a 'dead' revision. This happens when a file 
+	 * is added on a branch. post_rev will be dead dead for remove
+	 */
+	if (!psm->pre_rev || psm->pre_rev->dead || psm->post_rev->dead)
+	{
+	    int cr;
+	    const char * rev;
+
+	    if (!psm->pre_rev || psm->pre_rev->dead)
+	    {
+		cr = 1;
+		rev = psm->post_rev->rev;
+	    }
+	    else
+	    {
+		cr = 0;
+		rev = psm->pre_rev->rev;
+	    }
+
+	    if (cvs_direct_ctx)
+	    {
+		/* cvs_rupdate does the pipe through diff thing internally */
+		cvs_rupdate(cvs_direct_ctx, repository_path, psm->file->filename, rev, cr, dopts);
+	    }
+	    else
+	    {
+		snprintf(cmdbuff, PATH_MAX * 2, "cvs %s %s %s -p -r %s %s%s | diff %s %s /dev/null %s | sed -e '%s s|^\\([+-][+-][+-]\\) -|\\1 %s%s|g'",
+			 compress_arg, norc, utype, rev, esc_use_rep_path, esc_file, dopts,
+			 cr?"":"-",cr?"-":"", cr?"2":"1",
+			 use_rep_path, psm->file->filename);
+	    }
+	}
+	else
+	{
+	    /* a regular diff */
+	    if (cvs_direct_ctx)
+	    {
+		cvs_diff(cvs_direct_ctx, repository_path, psm->file->filename, psm->pre_rev->rev, psm->post_rev->rev, dopts);
+	    }
+	    else
+	    {
+		/* 'cvs diff' exit status '1' is ok, just means files are different */
+		if (strcmp(dtype, "diff") == 0)
+		    check_ret = 1;
+
+		snprintf(cmdbuff, PATH_MAX * 2, "cvs %s %s %s %s -r %s -r %s %s%s",
+			 compress_arg, norc, dtype, dopts, psm->pre_rev->rev, psm->post_rev->rev, 
+			 esc_use_rep_path, esc_file);
+	    }
+	}
+
+	/*
+	 * my_system doesn't block signals the way system does.
+	 * if ctrl-c is pressed while in there, we probably exit
+	 * immediately and hope the shell has sent the signal
+	 * to all of the process group members
+	 */
+	if (cmdbuff[0] && (ret = my_system(cmdbuff)))
+	{
+	    int stat = WEXITSTATUS(ret);
+	    
+	    /* 
+	     * cvs diff returns 1 in exit status for 'files are different'
+	     * so use a better method to check for failure
+	     */
+	    if (stat < 0 || stat > check_ret || WIFSIGNALED(ret))
+	    {
+		debug(DEBUG_APPERROR, "system command returned non-zero exit status: %d: aborting", stat);
+		exit(1);
+	    }
+	}
+    }
+}
+
+static CvsFileRevision * parse_revision(CvsFile * file, char * rev_str)
+{
+    char * p;
+
+    /* The "revision" log line can include extra information 
+     * including who is locking the file --- strip that out.
+     */
+    
+    p = rev_str;
+    while (isdigit(*p) || *p == '.')
+	    p++;
+    *p = 0;
+
+    return cvs_file_add_revision(file, rev_str);
+}
+
+CvsFileRevision * cvs_file_add_revision(CvsFile * file, const char * rev_str)
+{
+    CvsFileRevision * rev;
+
+    if (!(rev = (CvsFileRevision*)get_hash_object(file->revisions, rev_str)))
+    {
+	rev = (CvsFileRevision*)calloc(1, sizeof(*rev));
+	rev->rev = get_string(rev_str);
+	rev->file = file;
+	rev->branch = NULL;
+	rev->present = 0;
+	rev->pre_psm = NULL;
+	rev->post_psm = NULL;
+	INIT_LIST_HEAD(&rev->branch_children);
+	INIT_LIST_HEAD(&rev->tags);
+	
+	put_hash_object_ex(file->revisions, rev->rev, rev, HT_NO_KEYCOPY, NULL, NULL);
+
+	debug(DEBUG_STATUS, "added revision %s to file %s", rev_str, file->filename);
+    }
+    else
+    {
+	debug(DEBUG_STATUS, "found revision %s to file %s", rev_str, file->filename);
+    }
+
+    /* 
+     * note: we are guaranteed to get here at least once with 'have_branches' == 1.
+     * we may pass through once before this, because of symbolic tags, then once
+     * always when processing the actual revision logs
+     *
+     * rev->branch will always be set to something, maybe "HEAD"
+     */
+    if (!rev->branch && file->have_branches)
+    {
+	char branch_str[REV_STR_MAX];
+
+	/* in the cvs cvs repository (ccvs) there are tagged versions
+	 * that don't exist.  let's mark every 'known to exist' 
+	 * version
+	 */
+	rev->present = 1;
+
+	/* determine the branch this revision was committed on */
+	if (!get_branch(branch_str, rev->rev))
+	{
+	    debug(DEBUG_APPERROR, "invalid rev format %s", rev->rev);
+	    exit(1);
+	}
+	
+	rev->branch = (char*)get_hash_object(file->branches, branch_str);
+	
+	/* if there's no branch and it's not on the trunk, blab */
+	if (!rev->branch)
+	{
+	    if (get_branch(branch_str, branch_str))
+	    {
+		debug(DEBUG_APPMSG1, "WARNING: revision %s of file %s on unnamed branch", rev->rev, rev->file->filename);
+		rev->branch = "#CVSPS_NO_BRANCH";
+	    }
+	    else
+	    {
+		rev->branch = "HEAD";
+	    }
+	}
+
+	debug(DEBUG_STATUS, "revision %s of file %s on branch %s", rev->rev, rev->file->filename, rev->branch);
+    }
+
+    return rev;
+}
+
+CvsFile * create_cvsfile()
+{
+    CvsFile * f = (CvsFile*)calloc(1, sizeof(*f));
+    if (!f)
+	return NULL;
+
+    f->revisions = create_hash_table(53);
+    f->branches = create_hash_table(13);
+    f->branches_sym = create_hash_table(13);
+    f->symbols = create_hash_table(253);
+    f->have_branches = 0;
+
+    if (!f->revisions || !f->branches || !f->branches_sym)
+    {
+	if (f->branches)
+	    destroy_hash_table(f->branches, NULL);
+	if (f->revisions)
+	    destroy_hash_table(f->revisions, NULL);
+	free(f);
+	return NULL;
+    }
+   
+    return f;
+}
+
+static PatchSet * create_patch_set()
+{
+    PatchSet * ps = (PatchSet*)calloc(1, sizeof(*ps));;
+    
+    if (ps)
+    {
+	INIT_LIST_HEAD(&ps->members);
+	ps->psid = -1;
+	ps->date = 0;
+	ps->min_date = 0;
+	ps->max_date = 0;
+	ps->descr = NULL;
+	ps->author = NULL;
+	ps->tag = NULL;
+	ps->tag_flags = 0;
+	ps->branch_add = 0;
+	ps->funk_factor = 0;
+	ps->ancestor_branch = NULL;
+	CLEAR_LIST_NODE(&ps->collision_link);
+    }
+
+    return ps;
+}
+
+PatchSetMember * create_patch_set_member()
+{
+    PatchSetMember * psm = (PatchSetMember*)calloc(1, sizeof(*psm));
+    psm->pre_rev = NULL;
+    psm->post_rev = NULL;
+    psm->ps = NULL;
+    psm->file = NULL;
+    psm->bad_funk = 0;
+    return psm;
+}
+
+static PatchSetRange * create_patch_set_range()
+{
+    PatchSetRange * psr = (PatchSetRange*)calloc(1, sizeof(*psr));
+    return psr;
+}
+
+CvsFileRevision * file_get_revision(CvsFile * file, const char * r)
+{
+    CvsFileRevision * rev;
+
+    if (strcmp(r, "INITIAL") == 0)
+	return NULL;
+
+    rev = (CvsFileRevision*)get_hash_object(file->revisions, r);
+    
+    if (!rev)
+    {
+	debug(DEBUG_APPERROR, "request for non-existent rev %s in file %s", r, file->filename);
+	exit(1);
+    }
+
+    return rev;
+}
+
+/*
+ * Parse lines in the format:
+ * 
+ * <white space>tag_name: <rev>;
+ *
+ * Handles both regular tags (these go into the symbols hash)
+ * and magic-branch-tags (second to last node of revision is 0)
+ * which go into branches and branches_sym hashes.  Magic-branch
+ * format is hidden in CVS everwhere except the 'cvs log' output.
+ */
+
+static void parse_sym(CvsFile * file, char * sym)
+{
+    char * tag = sym, *eot;
+    int leaf, final_branch = -1;
+    char rev[REV_STR_MAX];
+    char rev2[REV_STR_MAX];
+    
+    while (*tag && isspace(*tag))
+	tag++;
+
+    if (!*tag)
+	return;
+
+    eot = strchr(tag, ':');
+    
+    if (!eot)
+	return;
+
+    *eot = 0;
+    eot += 2;
+    
+    if (!get_branch_ext(rev, eot, &leaf))
+    {
+	debug(DEBUG_APPERROR, "malformed revision");
+	exit(1);
+    }
+
+    /* 
+     * get_branch_ext will leave final_branch alone
+     * if there aren't enough '.' in string 
+     */
+    get_branch_ext(rev2, rev, &final_branch);
+
+    if (final_branch == 0)
+    {
+	snprintf(rev, REV_STR_MAX, "%s.%d", rev2, leaf);
+	debug(DEBUG_STATUS, "got sym: %s for %s", tag, rev);
+	
+	cvs_file_add_branch(file, rev, tag);
+    }
+    else
+    {
+	strcpy(rev, eot);
+	chop(rev);
+
+	/* see cvs manual: what is this vendor tag? */
+	if (is_vendor_branch(rev))
+	    cvs_file_add_branch(file, rev, tag);
+	else
+	    cvs_file_add_symbol(file, rev, tag);
+    }
+}
+
+void cvs_file_add_symbol(CvsFile * file, const char * rev_str, const char * p_tag_str)
+{
+    CvsFileRevision * rev;
+    GlobalSymbol * sym;
+    Tag * tag;
+
+    /* get a permanent storage string */
+    char * tag_str = get_string(p_tag_str);
+
+    debug(DEBUG_STATUS, "adding symbol to file: %s %s->%s", file->filename, tag_str, rev_str);
+    rev = cvs_file_add_revision(file, rev_str);
+    put_hash_object_ex(file->symbols, tag_str, rev, HT_NO_KEYCOPY, NULL, NULL);
+    
+    /*
+     * check the global_symbols
+     */
+    sym = (GlobalSymbol*)get_hash_object(global_symbols, tag_str);
+    if (!sym)
+    {
+	sym = (GlobalSymbol*)malloc(sizeof(*sym));
+	sym->tag = tag_str;
+	sym->ps = NULL;
+	INIT_LIST_HEAD(&sym->tags);
+
+	put_hash_object_ex(global_symbols, sym->tag, sym, HT_NO_KEYCOPY, NULL, NULL);
+    }
+
+    tag = (Tag*)malloc(sizeof(*tag));
+    tag->tag = tag_str;
+    tag->rev = rev;
+    tag->sym = sym;
+    list_add(&tag->global_link, &sym->tags);
+    list_add(&tag->rev_link, &rev->tags);
+}
+
+char * cvs_file_add_branch(CvsFile * file, const char * rev, const char * tag)
+{
+    char * new_tag;
+    char * new_rev;
+
+    if (get_hash_object(file->branches, rev))
+    {
+	debug(DEBUG_STATUS, "attempt to add existing branch %s:%s to %s", 
+	      rev, tag, file->filename);
+	return NULL;
+    }
+
+    /* get permanent storage for the strings */
+    new_tag = get_string(tag);
+    new_rev = get_string(rev); 
+
+    put_hash_object_ex(file->branches, new_rev, new_tag, HT_NO_KEYCOPY, NULL, NULL);
+    put_hash_object_ex(file->branches_sym, new_tag, new_rev, HT_NO_KEYCOPY, NULL, NULL);
+    
+    return new_tag;
+}
+
+/*
+ * Resolve each global symbol to a PatchSet.  This is
+ * not necessarily doable, because tagging isn't 
+ * necessarily done to the project as a whole, and
+ * it's possible that no tag is valid for all files 
+ * at a single point in time.  We check for that
+ * case though.
+ *
+ * Implementation: the most recent PatchSet containing
+ * a revision (post_rev) tagged by the symbol is considered
+ * the 'tagged' PatchSet.
+ */
+
+static void resolve_global_symbols()
+{
+    struct hash_entry * he_sym;
+    reset_hash_iterator(global_symbols);
+    while ((he_sym = next_hash_entry(global_symbols)))
+    {
+	GlobalSymbol * sym = (GlobalSymbol*)he_sym->he_obj;
+	PatchSet * ps;
+	struct list_head * next;
+
+	debug(DEBUG_STATUS, "resolving global symbol %s", sym->tag);
+
+	/*
+	 * First pass, determine the most recent PatchSet with a 
+	 * revision tagged with the symbolic tag.  This is 'the'
+	 * patchset with the tag
+	 */
+
+	for (next = sym->tags.next; next != &sym->tags; next = next->next)
+	{
+	    Tag * tag = list_entry(next, Tag, global_link);
+	    CvsFileRevision * rev = tag->rev;
+
+	    /* FIXME:test for rev->post_psm from DEBIAN. not sure how this could happen */
+	    if (!rev->present || !rev->post_psm)
+	    {
+		struct list_head *tmp = next->prev;
+		debug(DEBUG_APPERROR, "revision %s of file %s is tagged but not present",
+		      rev->rev, rev->file->filename);
+		/* FIXME: memleak */
+		list_del(next);
+		next = tmp;
+		continue;
+	    }
+
+	    ps = rev->post_psm->ps;
+
+	    if (!sym->ps || ps->date > sym->ps->date)
+		sym->ps = ps;
+	}
+	
+	/* convenience variable */
+	ps = sym->ps;
+
+	if (!ps)
+	{
+	    debug(DEBUG_APPERROR, "no patchset for tag %s", sym->tag);
+	    return;
+	}
+
+	ps->tag = sym->tag;
+
+	/* check if this ps is one of the '-r' patchsets */
+	if (restrict_tag_start && strcmp(restrict_tag_start, ps->tag) == 0)
+	    restrict_tag_ps_start = ps->psid;
+
+	/* the second -r implies -b */
+	if (restrict_tag_end && strcmp(restrict_tag_end, ps->tag) == 0)
+	{
+	    restrict_tag_ps_end = ps->psid;
+
+	    if (restrict_branch)
+	    {
+		if (strcmp(ps->branch, restrict_branch) != 0)
+		{
+		    debug(DEBUG_APPMSG1, 
+			  "WARNING: -b option and second -r have conflicting branches: %s %s", 
+			  restrict_branch, ps->branch);
+		}
+	    }
+	    else
+	    {
+		debug(DEBUG_APPMSG1, "NOTICE: implicit branch restriction set to %s", ps->branch);
+		restrict_branch = ps->branch;
+	    }
+	}
+
+	/* 
+	 * Second pass. 
+	 * check if this is an invalid patchset, 
+	 * check which members are invalid.  determine
+	 * the funk factor etc.
+	 */
+	for (next = sym->tags.next; next != &sym->tags; next = next->next)
+	{
+	    Tag * tag = list_entry(next, Tag, global_link);
+	    CvsFileRevision * rev = tag->rev;
+	    CvsFileRevision * next_rev = rev_follow_branch(rev, ps->branch);
+	    
+	    if (!next_rev)
+		continue;
+		
+	    /*
+	     * we want the 'tagged revision' to be valid until after
+	     * the date of the 'tagged patchset' or else there's something
+	     * funky going on
+	     */
+	    if (next_rev->post_psm->ps->date < ps->date)
+	    {
+		int flag = check_rev_funk(ps, next_rev);
+		debug(DEBUG_STATUS, "file %s revision %s tag %s: TAG VIOLATION %s",
+		      rev->file->filename, rev->rev, sym->tag, tag_flag_descr[flag]);
+		ps->tag_flags |= flag;
+	    }
+	}
+    }
+}
+
+static int revision_affects_branch(CvsFileRevision * rev, const char * branch)
+{
+    /* special case the branch called 'HEAD' */
+    if (strcmp(branch, "HEAD") == 0)
+    {
+	/* look for only one '.' in rev */
+	char * p = strchr(rev->rev, '.');
+	if (p && !strchr(p + 1, '.'))
+	    return 1;
+    }
+    else
+    {
+	char * branch_rev = (char*)get_hash_object(rev->file->branches_sym, branch);
+	
+	if (branch_rev)
+	{
+	    char post_rev[REV_STR_MAX];
+	    char branch[REV_STR_MAX];
+	    int file_leaf, branch_leaf;
+	    
+	    strcpy(branch, branch_rev);
+	    
+	    /* first get the branch the file rev is on */
+	    if (get_branch_ext(post_rev, rev->rev, &file_leaf))
+	    {
+		branch_leaf = file_leaf;
+		
+		/* check against branch and all branch ancestor branches */
+		do 
+		{
+		    debug(DEBUG_STATUS, "check %s against %s for %s", branch, post_rev, rev->file->filename);
+		    if (strcmp(branch, post_rev) == 0)
+			return (file_leaf <= branch_leaf);
+		}
+		while(get_branch_ext(branch, branch, &branch_leaf));
+	    }
+	}
+    }
+
+    return 0;
+}
+
+static int count_dots(const char * p)
+{
+    int dots = 0;
+
+    while (*p)
+	if (*p++ == '.')
+	    dots++;
+
+    return dots;
+}
+
+/*
+ * When importing vendor sources, (apparently people do this)
+ * the code is added on a 'vendor' branch, which, for some reason
+ * doesn't use the magic-branch-tag format.  Try to detect that now
+ */
+static int is_vendor_branch(const char * rev)
+{
+    return !(count_dots(rev)&1);
+}
+
+void patch_set_add_member(PatchSet * ps, PatchSetMember * psm)
+{
+    /* check if a member for the same file already exists, if so
+     * put this PatchSet on the collisions list 
+     */
+    struct list_head * next;
+    for (next = ps->members.next; next != &ps->members; next = next->next) 
+    {
+	PatchSetMember * m = list_entry(next, PatchSetMember, link);
+	if (m->file == psm->file && ps->collision_link.next == NULL) 
+		list_add(&ps->collision_link, &collisions);
+    }
+
+    psm->ps = ps;
+    list_add(&psm->link, ps->members.prev);
+}
+
+static void set_psm_initial(PatchSetMember * psm)
+{
+    psm->pre_rev = NULL;
+    if (psm->post_rev->dead)
+    {
+	/* 
+	 * we expect a 'file xyz initially added on branch abc' here
+	 * but there can only be one such member in a given patchset
+	 */
+	if (psm->ps->branch_add)
+	    debug(DEBUG_APPMSG1, "WARNING: branch_add already set!");
+	psm->ps->branch_add = 1;
+    }
+}
+
+/* 
+ * look at all revisions starting at rev and going forward until 
+ * ps->date and see whether they are invalid or just funky.
+ */
+static int check_rev_funk(PatchSet * ps, CvsFileRevision * rev)
+{
+    int retval = TAG_FUNKY;
+
+    while (rev)
+    {
+	PatchSet * next_ps = rev->post_psm->ps;
+	struct list_head * next;
+
+	if (next_ps->date > ps->date)
+	    break;
+
+	debug(DEBUG_STATUS, "ps->date %d next_ps->date %d rev->rev %s rev->branch %s", 
+	      ps->date, next_ps->date, rev->rev, rev->branch);
+
+	/*
+	 * If the ps->tag is one of the two possible '-r' tags
+	 * then the funkyness is even more important.
+	 *
+	 * In the restrict_tag_start case, this next_ps is chronologically
+	 * before ps, but tagwise after, so set the funk_factor so it will
+	 * be included.
+	 *
+	 * The restrict_tag_end case is similar, but backwards.
+	 *
+	 * Start assuming the HIDE/SHOW_ALL case, we will determine
+	 * below if we have a split ps case 
+	 */
+	if (restrict_tag_start && strcmp(ps->tag, restrict_tag_start) == 0)
+	    next_ps->funk_factor = FNK_SHOW_ALL;
+	if (restrict_tag_end && strcmp(ps->tag, restrict_tag_end) == 0)
+	    next_ps->funk_factor = FNK_HIDE_ALL;
+
+	/*
+	 * if all of the other members of this patchset are also 'after' the tag
+	 * then this is a 'funky' patchset w.r.t. the tag.  however, if some are
+	 * before then the patchset is 'invalid' w.r.t. the tag, and we mark
+	 * the members individually with 'bad_funk' ,if this tag is the
+	 * '-r' tag.  Then we can actually split the diff on this patchset
+	 */
+	for (next = next_ps->members.next; next != &next_ps->members; next = next->next)
+	{
+	    PatchSetMember * psm = list_entry(next, PatchSetMember, link);
+	    if (before_tag(psm->post_rev, ps->tag))
+	    {
+		retval = TAG_INVALID;
+		/* only set bad_funk for one of the -r tags */
+		if (next_ps->funk_factor)
+		{
+		    psm->bad_funk = 1;
+		    next_ps->funk_factor = 
+			(next_ps->funk_factor == FNK_SHOW_ALL) ? FNK_SHOW_SOME : FNK_HIDE_SOME;
+		}
+		debug(DEBUG_APPMSG1, 
+		      "WARNING: Invalid PatchSet %d, Tag %s:\n"
+		      "    %s:%s=after, %s:%s=before. Treated as 'before'", 
+		      next_ps->psid, ps->tag, 
+		      rev->file->filename, rev->rev, 
+		      psm->post_rev->file->filename, psm->post_rev->rev);
+	    }
+	}
+
+	rev = rev_follow_branch(rev, ps->branch);
+    }
+
+    return retval;
+}
+
+/* determine if the revision is before the tag */
+static int before_tag(CvsFileRevision * rev, const char * tag)
+{
+    CvsFileRevision * tagged_rev = (CvsFileRevision*)get_hash_object(rev->file->symbols, tag);
+    int retval = 0;
+
+    if (tagged_rev && 
+	revision_affects_branch(rev, tagged_rev->branch) && 
+	rev->post_psm->ps->date <= tagged_rev->post_psm->ps->date)
+	retval = 1;
+
+    debug(DEBUG_STATUS, "before_tag: %s %s %s %s %d", 
+	  rev->file->filename, tag, rev->rev, tagged_rev ? tagged_rev->rev : "N/A", retval);
+
+    return retval;
+}
+
+/* get the next revision from this one following branch if possible */
+/* FIXME: not sure if this needs to follow branches leading up to branches? */
+static CvsFileRevision * rev_follow_branch(CvsFileRevision * rev, const char * branch)
+{
+    struct list_head * next;
+
+    /* check for 'main line of inheritance' */
+    if (strcmp(rev->branch, branch) == 0)
+	return rev->pre_psm ? rev->pre_psm->post_rev : NULL;
+
+    /* look down branches */
+    for (next = rev->branch_children.next; next != &rev->branch_children; next = next->next)
+    {
+	CvsFileRevision * next_rev = list_entry(next, CvsFileRevision, link);
+	//debug(DEBUG_STATUS, "SCANNING BRANCH CHILDREN: %s %s", next_rev->branch, branch);
+	if (strcmp(next_rev->branch, branch) == 0)
+	    return next_rev;
+    }
+    
+    return NULL;
+}
+
+static void check_norc(int argc, char * argv[])
+{
+    int i = 1; 
+    while (i < argc)
+    {
+	if (strcmp(argv[i], "--norc") == 0)
+	{
+	    norc = "-f";
+	    break;
+	}
+	i++;
+    }
+}
+
+static void determine_branch_ancestor(PatchSet * ps, PatchSet * head_ps)
+{
+    struct list_head * next;
+    CvsFileRevision * rev;
+
+    /* PatchSet 1 has no ancestor */
+    if (ps->psid == 1)
+	return;
+
+    /* HEAD branch patchsets have no ancestry, but callers should know that */
+    if (strcmp(ps->branch, "HEAD") == 0)
+    {
+	debug(DEBUG_APPMSG1, "WARNING: no branch ancestry for HEAD");
+	return;
+    }
+
+    for (next = ps->members.next; next != &ps->members; next = next->next) 
+    {
+	PatchSetMember * psm = list_entry(next, PatchSetMember, link);
+	rev = psm->pre_rev;
+	int d1, d2;
+
+	/* the reason this is at all complicated has to do with a 
+	 * branch off of a branch.  it is possible (and indeed 
+	 * likely) that some file would not have been modified 
+	 * from the initial branch point to the branch-off-branch 
+	 * point, and therefore the branch-off-branch point is 
+	 * really branch-off-HEAD for that specific member (file).  
+	 * in that case, rev->branch will say HEAD but we want 
+	 * to know the symbolic name of the first branch
+	 * so we continue to look member after member until we find
+	 * the 'deepest' branching.  deepest can actually be determined
+	 * by considering the revision currently indicated by 
+	 * ps->ancestor_branch (by symbolic lookup) and rev->rev. the 
+	 * one with more dots wins
+	 *
+	 * also, the first commit in which a branch-off-branch is 
+	 * mentioned may ONLY modify files never committed since
+	 * original branch-off-HEAD was created, so we have to keep
+	 * checking, ps after ps to be sure to get the deepest ancestor
+	 *
+	 * note: rev is the pre-commit revision, not the post-commit
+	 */
+	if (!head_ps->ancestor_branch)
+	    d1 = 0;
+	else if (strcmp(ps->branch, rev->branch) == 0)
+	    continue;
+	else if (strcmp(head_ps->ancestor_branch, "HEAD") == 0)
+	    d1 = 1;
+	else {
+	    /* branch_rev may not exist if the file was added on this branch for example */
+	    const char * branch_rev = (char *)get_hash_object(rev->file->branches_sym, head_ps->ancestor_branch);
+	    d1 = branch_rev ? count_dots(branch_rev) : 1;
+	}
+	
+	/* HACK: we sometimes pretend to derive from the import branch.  
+	 * just don't do that.  this is the easiest way to prevent... 
+	 */
+	d2 = (strcmp(rev->rev, "1.1.1.1") == 0) ? 0 : count_dots(rev->rev);
+	
+	if (d2 > d1)
+	    head_ps->ancestor_branch = rev->branch;
+
+ 	//printf("-----> %d ancestry %s %s %s\n", ps->psid, ps->branch, head_ps->ancestor_branch, rev->file->filename);
+    }
+}
+
+static void handle_collisions()
+{
+    struct list_head *next;
+    for (next = collisions.next; next != &collisions; next = next->next) 
+    {
+	PatchSet * ps = list_entry(next, PatchSet, collision_link);
+	printf("PatchSet %d has collisions\n", ps->psid);
+    }
+}
+
+void walk_all_patch_sets(void (*action)(PatchSet *))
+{
+    struct list_head * next;;
+    for (next = all_patch_sets.next; next != &all_patch_sets; next = next->next) {
+	PatchSet * ps = list_entry(next, PatchSet, all_link);
+	action(ps);
+    }
+}
diff --git a/cvsps.h b/cvsps.h
new file mode 100644
index 0000000..280a253
--- /dev/null
+++ b/cvsps.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2001, 2002, 2003 David Mansfield and Cobite, Inc.
+ * See COPYING file for license information 
+ */
+
+#ifndef CVSPS_H
+#define CVSPS_H
+
+#ifndef HAVE_CVSSERVERCTX_DEF
+#define HAVE_CVSSERVERCTX_DEF
+typedef struct _CvsServerCtx CvsServerCtx;
+#endif
+
+#ifndef PATH_MAX
+#define PATH_MAX 4096
+#endif
+
+extern struct hash_table * file_hash;
+extern const char * tag_flag_descr[];
+extern CvsServerCtx * cvs_direct_ctx;
+extern char root_path[];
+extern char repository_path[];
+
+CvsFile * create_cvsfile();
+CvsFileRevision * cvs_file_add_revision(CvsFile *, const char *);
+void cvs_file_add_symbol(CvsFile * file, const char * rev, const char * tag);
+char * cvs_file_add_branch(CvsFile *, const char *, const char *);
+PatchSet * get_patch_set(const char *, const char *, const char *, const char *, PatchSetMember *);
+PatchSetMember * create_patch_set_member();
+CvsFileRevision * file_get_revision(CvsFile *, const char *);
+void patch_set_add_member(PatchSet * ps, PatchSetMember * psm);
+void walk_all_patch_sets(void (*action)(PatchSet *));
+
+#endif /* CVSPS_H */
diff --git a/cvsps.spec b/cvsps.spec
new file mode 100644
index 0000000..f8fa473
--- /dev/null
+++ b/cvsps.spec
@@ -0,0 +1,48 @@
+Version: 2.1
+Summary: CVSps is a program for generating 'patchset' information from a CVS repository
+Name: cvsps
+Release: 1
+URL: http://www.cobite.com/cvsps/
+Source0: %{name}-%{version}.tar.gz
+License: GPL
+Group: Development/Tools
+BuildRoot: %{_tmppath}/%{name}-root
+prefix: /usr
+
+%description 
+CVSps is a program for generating 'patchset' information from a CVS
+repository. A patchset in this case is defined as a set of changes
+made to a collection of files, and all committed at the same time
+(using a single 'cvs commit' command). This information is valuable to
+seeing the big picture of the evolution of a cvs project. While cvs
+tracks revision information, it is often difficult to see what changes
+were committed 'atomically' to the repository.
+
+%prep
+%setup -q
+
+%build
+make
+
+%install
+rm -rf $RPM_BUILD_ROOT
+%makeinstall
+
+%clean
+rm -rf $RPM_BUILD_ROOT
+
+%files
+%defattr(-,root,root)
+%doc README CHANGELOG COPYING
+%{prefix}/bin/cvsps
+%{prefix}/man/man*/*
+
+%changelog
+* Tue Apr  1 2002 David Mansfield <cvsps@dm.cobite.com>
+- (no really - not April fools joke)
+- revise spec file from Jan
+- merge Makefile changes
+* Tue Mar  5 2002 Jan IVEN <jan.iven@cern.ch>
+- Initial build.
+
+
diff --git a/cvsps_types.h b/cvsps_types.h
new file mode 100644
index 0000000..b41e2a9
--- /dev/null
+++ b/cvsps_types.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright 2001, 2002, 2003 David Mansfield and Cobite, Inc.
+ * See COPYING file for license information 
+ */
+
+#ifndef CVSPS_TYPES_H
+#define CVSPS_TYPES_H
+
+#include <time.h>
+
+#define LOG_STR_MAX 32768
+#define AUTH_STR_MAX 64
+#define REV_STR_MAX 64
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+
+typedef struct _CvsFile CvsFile;
+typedef struct _PatchSet PatchSet;
+typedef struct _PatchSetMember PatchSetMember;
+typedef struct _PatchSetRange PatchSetRange;
+typedef struct _CvsFileRevision CvsFileRevision;
+typedef struct _GlobalSymbol GlobalSymbol;
+typedef struct _Tag Tag;
+
+struct _CvsFileRevision
+{
+    char * rev;
+    int dead;
+    CvsFile * file;
+    char * branch;
+    /*
+     * In the cvs cvs repository (ccvs project) there are tagged
+     * revisions that don't exist. track 'confirmed' revisions
+     * so as to not let them screw us up.
+     */
+    int present;
+
+    /*
+     * A revision can be part of many PatchSets because it may
+     * be the branch point of many branches (as a pre_rev).  
+     * It should, however, be the 'post_rev' of only one 
+     * PatchSetMember.  The 'main line of inheritence' is
+     * kept in pre_psm, and all 'branch revisions' are kept
+     * in a list.
+     */
+    PatchSetMember * pre_psm;
+    PatchSetMember * post_psm;
+    struct list_head branch_children;
+    
+    /* 
+     * for linking this 'first branch rev' into the parent branch_children
+     */
+    struct list_head link;
+
+    /*
+     * A list of all Tag structures tagging this revision
+     */
+    struct list_head tags;
+};
+
+struct _CvsFile
+{
+    char *filename;
+    struct hash_table * revisions;    /* rev_str to revision [CvsFileRevision*] */
+    struct hash_table * branches;     /* branch to branch_sym [char*]           */
+    struct hash_table * branches_sym; /* branch_sym to branch [char*]           */
+    struct hash_table * symbols;      /* tag to revision [CvsFileRevision*]     */
+    /* 
+     * this is a hack. when we initially create entries in the symbol hash
+     * we don't have the branch info, so the CvsFileRevisions get created 
+     * with the branch attribute NULL.  Later we need to resolve these.
+     */
+    int have_branches;
+};
+
+struct _PatchSetMember
+{
+    CvsFileRevision * pre_rev;
+    CvsFileRevision * post_rev;
+    PatchSet * ps;
+    CvsFile * file;
+    /*
+     * bad_funk is only set w.r.t the -r tags
+     */
+    int bad_funk;
+    struct list_head link;
+};
+
+/* 
+ * these are bit flags for tag flags 
+ * they apply to any patchset that
+ * has an assoctiated tag
+ */
+#define TAG_FUNKY   0x1
+#define TAG_INVALID 0x2
+
+/* values for funk_factor. they apply
+ * only to the -r tags, to patchsets
+ * that have an odd relationship to the
+ * tag
+ */
+#define FNK_SHOW_SOME  1
+#define FNK_SHOW_ALL   2
+#define FNK_HIDE_ALL   3
+#define FNK_HIDE_SOME  4
+
+struct _PatchSet
+{
+    int psid;
+    time_t date;
+    time_t min_date;
+    time_t max_date;
+    char *descr;
+    char *author;
+    char *tag;
+    int tag_flags;
+    char *branch;
+    char *ancestor_branch;
+    struct list_head members;
+    /*
+     * A 'branch add' patch set is a bogus patch set created automatically
+     * when a 'file xyz was initially added on branch abc'
+     * we want to ignore these.  fortunately, there's a way to detect them
+     * without resorting to looking at the log message.
+     */
+    int branch_add;
+    /*
+     * If the '-r' option specifies a funky tag, we will need to detect the
+     * PatchSets that come chronologically before the tag, but are logically
+     * after, and vice-versa if a second -r option was specified
+     */
+    int funk_factor;
+
+    /* for putting onto a list */
+    struct list_head all_link;
+    struct list_head collision_link;
+};
+
+struct _PatchSetRange
+{
+    int min_counter;
+    int max_counter;
+    struct list_head link;
+};
+
+struct _GlobalSymbol
+{
+    char * tag;
+    PatchSet * ps;
+    struct list_head tags;
+};
+
+struct _Tag
+{
+    GlobalSymbol * sym;
+    CvsFileRevision * rev;
+    char * tag;
+    struct list_head global_link;
+    struct list_head rev_link;
+};
+
+#endif /* CVSPS_TYPES_H */
diff --git a/list_sort.c b/list_sort.c
new file mode 100644
index 0000000..6c6f54c
--- /dev/null
+++ b/list_sort.c
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2001, 2002, 2003 David Mansfield and Cobite, Inc.
+ * See COPYING file for license information 
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "list_sort.h"
+
+void list_sort(struct list_head * list, int (*node_compare)(struct list_head *, struct list_head *))
+{
+    struct list_head *p, *q, *t;
+    struct list_head tmp;
+    int merges = 0;
+    int k = 1;
+    int psize, qsize; 
+
+    if (list_empty(list))
+	return;
+
+    do
+    {
+	INIT_LIST_HEAD(&tmp);
+	p = list->next;
+	merges = 0;
+	psize = qsize = 0;
+
+	while (p != list)
+	{
+	    merges++;
+	    q = p;
+
+	    while (q != list && psize < k)
+	    {
+		q = q->next;
+		psize++;
+	    }
+		
+	    qsize = k;
+
+	    while (psize || (qsize && q != list))
+	    {
+		if (psize && (qsize == 0 || q == list || node_compare(p, q) <= 0))
+		{
+		    t = p;
+		    p = p->next;
+		    psize--;
+		}
+		else if (qsize == 0)
+		{
+		    printf("whoaa. qsize is zero\n");
+		    exit (1);
+		}
+		else
+		{
+		    t = q;
+		    q = q->next;
+		    qsize--;
+		}
+		
+		list_del(t);
+		
+		list_add(t, tmp.prev);
+	    }
+
+	    p = q;
+	}
+
+	if (!list_empty(list))
+	{
+	    printf("whoaa. initial list not empty\n");
+	    exit (1);
+	}
+	    
+	list_splice(&tmp, list);
+	k *= 2;
+
+	//printf("done w sort pass %d %d\n", k, merges);
+    }
+    while (merges > 1);
+}
+
diff --git a/list_sort.h b/list_sort.h
new file mode 100644
index 0000000..5733472
--- /dev/null
+++ b/list_sort.h
@@ -0,0 +1,13 @@
+/*
+ * Copyright 2001, 2002, 2003 David Mansfield and Cobite, Inc.
+ * See COPYING file for license information 
+ */
+
+#ifndef LIST_SORT_H
+#define LIST_SORT_H
+
+#include <cbtcommon/list.h>
+
+void list_sort(struct list_head *, int (*)(struct list_head *, struct list_head *));
+
+#endif /* LIST_SORT_H */
diff --git a/merge_utils.sh b/merge_utils.sh
new file mode 100644
index 0000000..137cde6
--- /dev/null
+++ b/merge_utils.sh
@@ -0,0 +1,50 @@
+##################################################
+# utility bash functions to help with merging    #
+##################################################
+# copyright 2003 David Mansfield                 #
+##################################################
+# usage:  . merge_utils.sh                       #
+##################################################
+
+#
+# show patchset
+#
+function sps() { 
+less $PATCHSETDIR/$1.patch
+}
+
+#
+# test apply patchset
+#
+function tps() { 
+cat $PATCHSETDIR/$1.patch | patch -p1 --dry-run
+}
+
+#
+# apply patchset
+#
+function aps() { 
+cat $PATCHSETDIR/$1.patch | patch -p1
+}
+
+#
+# commit changes as merge of patchset.
+#
+function cps() {
+LOGMSG=`cat $PATCHSETDIR/$1.patch | perl -e '$line = 0; while(<>) {
+    if ($line == 1) { if (/PatchSet ([[:digit:]]*)/) { $ps = $1; }}
+    if ($line == 2) { if (/Date: (.*)/) { $dt = $1; }}
+    if ($line == 4) { if (/Branch: (.*)/) { $br = $1; }}
+    if ($line == 7) { $lg = $_; chop($lg) }
+    $line++;
+}
+print "Merge ps:$ps date:$dt branch:$br log:$lg\n";
+'`
+echo Committing with log message "'$LOGMSG'"
+if [ "$2" != "-n" ]
+then 
+    cvs commit -m"$LOGMSG"
+fi
+}
+
+echo "Don't forget to set \$PATCHSETDIR to the directory where you patchset diffs are"
diff --git a/stats.c b/stats.c
new file mode 100644
index 0000000..0276a50
--- /dev/null
+++ b/stats.c
@@ -0,0 +1,141 @@
+/*
+ * Copyright 2001, 2002, 2003 David Mansfield and Cobite, Inc.
+ * See COPYING file for license information 
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <search.h>
+#include <cbtcommon/hash.h>
+
+#include "cvsps_types.h"
+#include "cvsps.h"
+
+static unsigned int num_patch_sets = 0;
+static unsigned int num_ps_member = 0, max_ps_member_in_ps = 0;
+static unsigned int num_authors = 0, max_author_len = 0, total_author_len = 0;
+static unsigned int max_descr_len = 0, total_descr_len = 0;
+struct hash_table *author_hash;
+
+static void count_hash(struct hash_table *hash, unsigned int *total, 
+	unsigned int *max_val)
+{
+    int counter = 0;
+    struct hash_entry *fh;
+    
+    reset_hash_iterator(hash);
+    while ((fh = next_hash_entry(hash)))
+	counter++;
+
+    *total += counter;
+    *max_val= MAX(*max_val, counter);
+}
+
+static void stat_ps_tree_node(const void * nodep, const VISIT which, const int depth)
+{
+    int desc_len;
+    PatchSet * ps;
+    struct list_head * next;
+    int counter;
+    void * old;
+
+    /* Make sure we have it if we do statistics */
+    if (!author_hash)
+	author_hash = create_hash_table(1023);
+
+    switch(which)
+    {
+    case postorder:
+    case leaf:
+	ps = *(PatchSet**)nodep;
+	num_patch_sets++;
+
+	old = NULL;
+
+	/* Author statistics */
+	if (put_hash_object_ex(author_hash, ps->author, ps->author, HT_NO_KEYCOPY, NULL, &old) >= 0 && !old)
+	{
+	    int len = strlen(ps->author);
+	    num_authors++;
+	    max_author_len = MAX(max_author_len, len);
+	    total_author_len += len;
+	}
+
+	/* Log message statistics */
+	desc_len = strlen(ps->descr);
+	max_descr_len = MAX(max_descr_len, desc_len);
+	total_descr_len += desc_len;
+	
+	/* PatchSet member statistics */
+	counter = 0;
+	next = ps->members.next;
+	while (next != &ps->members)
+	{
+	    counter++;
+	    next = next->next;
+	}
+
+	num_ps_member += counter;
+	max_ps_member_in_ps = MAX(max_ps_member_in_ps, counter);
+	break;
+
+    default:
+	break;
+    }
+}
+
+void print_statistics(void * ps_tree)
+{
+    /* Statistics data */
+    unsigned int num_files = 0, max_file_len = 0, total_file_len = 0;
+    unsigned int total_revisions = 0, max_revisions_for_file = 0;
+    unsigned int total_branches = 0, max_branches_for_file = 0;
+    unsigned int total_branches_sym = 0, max_branches_sym_for_file = 0;
+
+    /* Other vars */
+    struct hash_entry *he;
+   
+    printf("Statistics:\n");
+    fflush(stdout);
+
+    /* Gather file statistics */
+    reset_hash_iterator(file_hash);
+    while ((he=next_hash_entry(file_hash)))
+    {
+	int len = strlen(he->he_key);
+	CvsFile *file = (CvsFile *)he->he_obj;
+	
+	num_files++;
+	max_file_len = MAX(max_file_len, len);
+	total_file_len += len;
+
+	count_hash(file->revisions, &total_revisions, &max_revisions_for_file);
+	count_hash(file->branches, &total_branches, &max_branches_for_file);
+	count_hash(file->branches_sym, &total_branches_sym,
+	    &max_branches_sym_for_file);
+    }
+
+    /* Print file statistics */
+    printf("Num files: %d\nMax filename len: %d, Average filename len: %.2f\n",
+	    num_files, max_file_len, (float)total_file_len/num_files);
+
+    printf("Max revisions for file: %d, Average revisions for file: %.2f\n",
+	  max_revisions_for_file, (float)total_revisions/num_files);
+    printf("Max branches for file: %d, Average branches for file: %.2f\n",
+	  max_branches_for_file, (float)total_branches/num_files);
+    printf("Max branches_sym for file: %d, Average branches_sym for file: %.2f\n",
+	  max_branches_sym_for_file, (float)total_branches_sym/num_files);
+
+    /* Gather patchset statistics */
+    twalk(ps_tree, stat_ps_tree_node);
+
+    /* Print patchset statistics */
+    printf("Num patchsets: %d\n", num_patch_sets);
+    printf("Max PS members in PS: %d\nAverage PS members in PS: %.2f\n",
+	    max_ps_member_in_ps, (float)num_ps_member/num_patch_sets);
+    printf("Num authors: %d, Max author len: %d, Avg. author len: %.2f\n", 
+	    num_authors, max_author_len, (float)total_author_len/num_authors);
+    printf("Max desc len: %d, Avg. desc len: %.2f\n",
+	    max_descr_len, (float)total_descr_len/num_patch_sets);
+}
+
diff --git a/stats.h b/stats.h
new file mode 100644
index 0000000..77ce5c0
--- /dev/null
+++ b/stats.h
@@ -0,0 +1,11 @@
+/*
+ * Copyright 2001, 2002, 2003 David Mansfield and Cobite, Inc.
+ * See COPYING file for license information 
+ */
+
+#ifndef STATS_H
+#define STATS_H
+
+void print_statistics(void * ps_tree);
+
+#endif /* STATS_H */
diff --git a/util.c b/util.c
new file mode 100644
index 0000000..e4b9d14
--- /dev/null
+++ b/util.c
@@ -0,0 +1,291 @@
+/*
+ * Copyright 2001, 2002, 2003 David Mansfield and Cobite, Inc.
+ * See COPYING file for license information 
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <limits.h>
+#include <assert.h>
+#include <search.h>
+#include <time.h>
+#include <errno.h>
+#include <signal.h>
+#include <regex.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include <cbtcommon/debug.h>
+
+#include "util.h"
+
+typedef int (*compare_func)(const void *, const void *);
+
+static void * string_tree;
+char *readfile(char const *filename, char *buf, size_t size)
+{
+    FILE *fp;
+    char *ptr;
+    size_t len;
+
+    fp = fopen(filename, "r");
+    if (!fp)
+	return NULL;
+
+    ptr = fgets(buf, size, fp);
+    fclose(fp);
+
+    if (!ptr)
+	return NULL;
+
+    len = strlen(buf);
+    if (buf[len-1] == '\n')
+	buf[len-1] = '\0';
+    
+    return buf;
+}
+
+char *strrep(char *s, char find, char replace)
+{
+    char * p = s;
+    while (*p)
+    {
+	if (*p == find)
+	    *p = replace;
+	p++;
+    }
+
+    return s;
+}
+
+char *get_cvsps_dir()
+{
+    struct stat sbuf;
+    static char prefix[PATH_MAX];
+    const char * home;
+
+    if (prefix[0])
+	return prefix;
+
+    if (!(home = getenv("HOME")))
+    {
+	debug(DEBUG_APPERROR, "HOME environment variable not set");
+	exit(1);
+    }
+
+    if (snprintf(prefix, PATH_MAX, "%s/%s", home, CVSPS_PREFIX) >= PATH_MAX)
+    {
+	debug(DEBUG_APPERROR, "prefix buffer overflow");
+	exit(1);
+    }
+
+    /* Make sure the prefix directory exists */
+    if (stat(prefix, &sbuf) < 0)
+    {
+	int ret;
+	ret = mkdir(prefix, 0777);
+	if (ret < 0)
+	{
+	    debug(DEBUG_SYSERROR, "Cannot create the cvsps directory '%s'", CVSPS_PREFIX);
+	    exit(1);
+	}
+    }
+    else
+    {
+	if (!(S_ISDIR(sbuf.st_mode)))
+	    debug(DEBUG_APPERROR, "cvsps directory '%s' is not a directory!", CVSPS_PREFIX);
+    }
+
+    return prefix;
+}
+
+char *xstrdup(char const *str)
+{
+    char *ret;
+    assert(str);
+    ret = strdup(str);
+    if (!ret)
+    {
+	debug(DEBUG_ERROR, "strdup failed");
+	exit(1);
+    }
+
+    return ret;
+}
+
+void strzncpy(char * dst, const char * src, int n)
+{
+    strncpy(dst, src, n);
+    dst[n - 1] = 0;
+}
+
+char *get_string(char const *str)
+{
+    char ** res;
+
+    if (!str)
+	return NULL;
+    
+    res = (char **)tfind(str, &string_tree, (compare_func)strcmp);
+    if (!res)
+    {
+	char *key = xstrdup(str);
+	res = (char **)tsearch(key, &string_tree, (compare_func)strcmp);
+	*res = key;
+    }
+
+    return *res;
+}
+
+static int get_int_substr(const char * str, const regmatch_t * p)
+{
+    char buff[256];
+    memcpy(buff, str + p->rm_so, p->rm_eo - p->rm_so);
+    buff[p->rm_eo - p->rm_so] = 0;
+    return atoi(buff);
+}
+
+static time_t mktime_utc(struct tm * tm)
+{
+    char * old_tz = getenv("TZ");
+    time_t ret;
+
+    setenv("TZ", "UTC", 1);
+
+    tzset();
+	    
+    ret = mktime(tm);
+
+    if (old_tz)
+	setenv("TZ", old_tz, 1);
+    else 
+	unsetenv("TZ");
+
+    tzset();
+
+    return ret;
+}
+
+void convert_date(time_t * t, const char * dte)
+{
+    static regex_t date_re;
+    static int init_re;
+
+#define MAX_MATCH 16
+    size_t nmatch = MAX_MATCH;
+    regmatch_t match[MAX_MATCH];
+
+    if (!init_re) 
+    {
+	if (regcomp(&date_re, "([0-9]{4})[-/]([0-9]{2})[-/]([0-9]{2}) ([0-9]{2}):([0-9]{2}):([0-9]{2})", REG_EXTENDED)) 
+	{
+	    fprintf(stderr, "FATAL: date regex compilation error\n");
+	    exit(1);
+	}
+	init_re = 1;
+    }
+    
+    if (regexec(&date_re, dte, nmatch, match, 0) == 0)
+    {
+	regmatch_t * pm = match;
+	struct tm tm = {0};
+
+	/* first regmatch_t is match location of entire re */
+	pm++;
+	
+	tm.tm_year = get_int_substr(dte, pm++);
+	tm.tm_mon  = get_int_substr(dte, pm++);
+	tm.tm_mday = get_int_substr(dte, pm++);
+	tm.tm_hour = get_int_substr(dte, pm++);
+	tm.tm_min  = get_int_substr(dte, pm++);
+	tm.tm_sec  = get_int_substr(dte, pm++);
+
+	tm.tm_year -= 1900;
+	tm.tm_mon--;
+
+	*t = mktime_utc(&tm);
+    }
+    else
+    {
+	*t = atoi(dte);
+    }
+}
+
+static struct timeval start_time;
+
+void timing_start()
+{
+    gettimeofday(&start_time, NULL);
+}
+
+void timing_stop(const char * msg)
+{
+    struct timeval stop_time;
+    gettimeofday(&stop_time, NULL);
+    stop_time.tv_sec -= start_time.tv_sec;
+    stop_time.tv_usec -= start_time.tv_usec;
+    if (stop_time.tv_usec < 0)
+	stop_time.tv_sec--,stop_time.tv_usec += 1000000;
+
+    printf("Elapsed time for %s: %d.%06d\n", msg, (int)stop_time.tv_sec, (int)stop_time.tv_usec);
+}
+
+extern char ** environ;
+
+/* taken from the linux manual page for system */
+int my_system (const char *command) 
+{
+    int pid, status;
+    
+    if (command == 0)
+	return 1;
+    pid = fork();
+    if (pid == -1)
+	return -1;
+    if (pid == 0) {
+	char *argv[4];
+	argv[0] = "sh";
+	argv[1] = "-c";
+	argv[2] = (char*)command; /* discard const */
+	argv[3] = 0;
+	execve("/bin/sh", argv, environ);
+	exit(127);
+    }
+    do {
+	if (waitpid(pid, &status, 0) == -1) {
+	    if (errno != EINTR)
+		return -1;
+	} else
+	    return status;
+    } while(1);
+}
+
+int escape_filename(char * dst, int len, const char * src)
+{
+    static char * naughty_chars = " \\\"'@<>=;|&()#$`?*[!:{";
+
+    if (len > 0)
+    {
+	while (len > 1 && *src)
+	{
+	    if (strchr(naughty_chars, *src))
+	    {
+		if (len == 2)
+		    break;
+		*dst++ = '\\';
+		len--;
+	    }
+	    
+	    *dst++ = *src++;
+	    len--;
+	}
+
+	*dst = 0;
+    }
+
+    return (*src == 0) ? 0 : -1;
+}
diff --git a/util.h b/util.h
new file mode 100644
index 0000000..4d85cc8
--- /dev/null
+++ b/util.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2001, 2002, 2003 David Mansfield and Cobite, Inc.
+ * See COPYING file for license information 
+ */
+
+#ifndef UTIL_H
+#define UTIL_H
+
+#define CVSPS_PREFIX ".cvsps"
+
+#ifndef PATH_MAX
+#define PATH_MAX 4096
+#endif
+
+char *xstrdup(char const *);
+void strzncpy(char * dst, const char * src, int n);
+char *readfile(char const *filename, char *buf, size_t size);
+char *strrep(char *s, char find, char replace);
+char *get_cvsps_dir();
+char *get_string(char const *str);
+void convert_date(time_t *, const char *);
+void timing_start();
+void timing_stop(const char *);
+int my_system(const char *);
+int escape_filename(char *, int, const char *);
+
+#endif /* UTIL_H */