From 10ff0e06801af15050848c701f606ac5de3ebc06 Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Sat, 1 Nov 2008 17:35:50 +0000 Subject: Moved from private repository. --- COPYING | 340 ++++ Makefile.in | 75 + aapl/COPYING | 502 ++++++ aapl/README | 6 + aapl/astring.h | 808 +++++++++ aapl/avlbasic.h | 65 + aapl/avlcommon.h | 1630 ++++++++++++++++++ aapl/avlibasic.h | 67 + aapl/avlikeyless.h | 64 + aapl/avlimap.h | 77 + aapl/avlimel.h | 79 + aapl/avlimelkey.h | 76 + aapl/avliset.h | 75 + aapl/avlitree.h | 78 + aapl/avlkeyless.h | 58 + aapl/avlmap.h | 74 + aapl/avlmel.h | 74 + aapl/avlmelkey.h | 71 + aapl/avlset.h | 70 + aapl/avltree.h | 73 + aapl/bstcommon.h | 814 +++++++++ aapl/bstmap.h | 113 ++ aapl/bstset.h | 86 + aapl/bsttable.h | 84 + aapl/bubblesort.h | 94 ++ aapl/compare.h | 273 +++ aapl/dlcommon.h | 790 +++++++++ aapl/dlist.h | 64 + aapl/dlistmel.h | 71 + aapl/dlistval.h | 71 + aapl/insertsort.h | 94 ++ aapl/mergesort.h | 140 ++ aapl/quicksort.h | 185 ++ aapl/resize.h | 344 ++++ aapl/sbstmap.h | 121 ++ aapl/sbstset.h | 94 ++ aapl/sbsttable.h | 93 + aapl/svector.h | 1350 +++++++++++++++ aapl/table.h | 252 +++ aapl/vector.h | 1189 +++++++++++++ colm.vim | 89 + colm/Makefile.in | 123 ++ colm/bytecode.cpp | 4092 ++++++++++++++++++++++++++++++++++++++++++++ colm/bytecode.h | 615 +++++++ colm/closure.cpp | 450 +++++ colm/colm.h | 92 + colm/compile.cpp | 2492 +++++++++++++++++++++++++++ colm/dotgen.cpp | 369 ++++ colm/dotgen.h | 51 + colm/fsmap.cpp | 854 ++++++++++ colm/fsmattach.cpp | 425 +++++ colm/fsmbase.cpp | 602 +++++++ colm/fsmcodegen.cpp | 1089 ++++++++++++ colm/fsmcodegen.h | 214 +++ colm/fsmexec.cpp | 209 +++ colm/fsmgraph.cpp | 1399 +++++++++++++++ colm/fsmgraph.h | 1392 +++++++++++++++ colm/fsmmin.cpp | 732 ++++++++ colm/fsmrun.cpp | 890 ++++++++++ colm/fsmrun.h | 132 ++ colm/fsmstate.cpp | 467 +++++ colm/input.cpp | 144 ++ colm/input.h | 145 ++ colm/list.cpp | 105 ++ colm/lmparse.kh | 106 ++ colm/lmparse.kl | 2013 ++++++++++++++++++++++ colm/lmscan.h | 131 ++ colm/lmscan.rl | 545 ++++++ colm/main.cpp | 357 ++++ colm/map.cpp | 806 +++++++++ colm/parsedata.cpp | 1830 ++++++++++++++++++++ colm/parsedata.h | 904 ++++++++++ colm/parsetree.cpp | 2172 ++++++++++++++++++++++++ colm/parsetree.h | 1605 ++++++++++++++++++ colm/pdabuild.cpp | 1698 +++++++++++++++++++ colm/pdacodegen.cpp | 536 ++++++ colm/pdacodegen.h | 105 ++ colm/pdagraph.cpp | 518 ++++++ colm/pdagraph.h | 504 ++++++ colm/pdarun.cpp | 646 +++++++ colm/pdarun.h | 511 ++++++ colm/redbuild.cpp | 645 +++++++ colm/redbuild.h | 161 ++ colm/redfsm.cpp | 1112 ++++++++++++ colm/redfsm.h | 531 ++++++ colm/string.cpp | 177 ++ colm/tree.cpp | 160 ++ common/Makefile.in | 67 + common/buffer.h | 55 + common/common.cpp | 257 +++ common/common.h | 283 ++++ common/config.h.in | 50 + common/pcheck.h | 48 + configure | 4222 ++++++++++++++++++++++++++++++++++++++++++++++ configure.in | 79 + test/Makefile | 46 + test/backtrack1.lm | 24 + test/backtrack2.lm | 24 + test/backtrack3.lm | 27 + test/btscan.in | 2 + test/btscan.lm | 34 + test/constructex.in | 3 + test/constructex.lm | 37 + test/counting1.in | 1 + test/counting1.lm | 91 + test/counting2.lm | 82 + test/counting3.lm | 92 + test/counting4.lm | 89 + test/cxx/Makefile | 34 + test/cxx/cxx.lm | 2163 ++++++++++++++++++++++++ test/cxx/input01.cpp | 17 + test/cxx/input02.cpp | 16 + test/cxx/input03.cpp | 19 + test/cxx/input04.cpp | 17 + test/cxx/input05.cpp | 8 + test/cxx/input06.cpp | 7 + test/cxx/input07.cpp | 18 + test/cxx/input08.cpp | 13 + test/cxx/input09.cpp | 7 + test/cxx/input10.cpp | 11 + test/cxx/input11.cpp | 2 + test/cxx/input12.cpp | 8 + test/cxx/input13.cpp | 14 + test/cxx/preproc | 4 + test/diff/Makefile | 34 + test/diff/diff.lm | 84 + test/diff/input1.diff | 86 + test/dns/Makefile | 20 + test/dns/dns.lm | 488 ++++++ test/dns/dumpdns | 11 + test/dns/extract.c | 48 + test/heredoc.in | 3 + test/heredoc.lm | 45 + test/html/Makefile | 34 + test/html/html-lextag.lm | 324 ++++ test/html/html.lm | 307 ++++ test/html/input01.html | 8 + test/http/Makefile | 34 + test/http/http.lm | 68 + test/http/input1 | 2 + test/http/input2 | 13 + test/http/input3 | 8 + test/http/xinetd.conf | 10 + test/island.in | 19 + test/island.lm | 57 + test/liftattrs.in | 3 + test/liftattrs.lm | 74 + test/mailbox.in | 29 + test/mailbox.lm | 44 + test/matchex.in | 3 + test/matchex.lm | 34 + test/maxlen.lm | 44 + test/nestedcomm.in | 1 + test/nestedcomm.lm | 41 + test/python/Makefile | 18 + test/python/input1.py | 18 + test/python/input2.py | 20 + test/python/input3.py | 1 + test/python/input4.py | 10 + test/python/python.lm | 726 ++++++++ test/ragelambig.in | 1 + test/ragelambig1.lm | 65 + test/ragelambig2.lm | 65 + test/ragelambig3.lm | 64 + test/ragelambig4.lm | 69 + test/rediv.in | 1 + test/rediv.lm | 92 + test/ruby/Makefile | 34 + test/ruby/ruby.lm | 627 +++++++ test/rubyhere.in | 8 + test/rubyhere.lm | 89 + test/string.in | 2 + test/string.lm | 54 + test/superid.in | 1 + test/superid.lm | 59 + test/tags.in | 1 + test/tags.lm | 82 + test/til.in | 14 + test/til.lm | 124 ++ test/travs1.in | 1 + test/travs1.lm | 144 ++ test/travs2.in | 1 + test/travs2.lm | 93 + test/xml/Makefile | 34 + test/xml/xml.in | 3962 +++++++++++++++++++++++++++++++++++++++++++ test/xml/xml.lm | 167 ++ version.mk | 2 + 187 files changed, 62225 insertions(+) create mode 100644 COPYING create mode 100644 Makefile.in create mode 100644 aapl/COPYING create mode 100644 aapl/README create mode 100644 aapl/astring.h create mode 100644 aapl/avlbasic.h create mode 100644 aapl/avlcommon.h create mode 100644 aapl/avlibasic.h create mode 100644 aapl/avlikeyless.h create mode 100644 aapl/avlimap.h create mode 100644 aapl/avlimel.h create mode 100644 aapl/avlimelkey.h create mode 100644 aapl/avliset.h create mode 100644 aapl/avlitree.h create mode 100644 aapl/avlkeyless.h create mode 100644 aapl/avlmap.h create mode 100644 aapl/avlmel.h create mode 100644 aapl/avlmelkey.h create mode 100644 aapl/avlset.h create mode 100644 aapl/avltree.h create mode 100644 aapl/bstcommon.h create mode 100644 aapl/bstmap.h create mode 100644 aapl/bstset.h create mode 100644 aapl/bsttable.h create mode 100644 aapl/bubblesort.h create mode 100644 aapl/compare.h create mode 100644 aapl/dlcommon.h create mode 100644 aapl/dlist.h create mode 100644 aapl/dlistmel.h create mode 100644 aapl/dlistval.h create mode 100644 aapl/insertsort.h create mode 100644 aapl/mergesort.h create mode 100644 aapl/quicksort.h create mode 100644 aapl/resize.h create mode 100644 aapl/sbstmap.h create mode 100644 aapl/sbstset.h create mode 100644 aapl/sbsttable.h create mode 100644 aapl/svector.h create mode 100644 aapl/table.h create mode 100644 aapl/vector.h create mode 100644 colm.vim create mode 100644 colm/Makefile.in create mode 100644 colm/bytecode.cpp create mode 100644 colm/bytecode.h create mode 100644 colm/closure.cpp create mode 100644 colm/colm.h create mode 100644 colm/compile.cpp create mode 100644 colm/dotgen.cpp create mode 100644 colm/dotgen.h create mode 100644 colm/fsmap.cpp create mode 100644 colm/fsmattach.cpp create mode 100644 colm/fsmbase.cpp create mode 100644 colm/fsmcodegen.cpp create mode 100644 colm/fsmcodegen.h create mode 100644 colm/fsmexec.cpp create mode 100644 colm/fsmgraph.cpp create mode 100644 colm/fsmgraph.h create mode 100644 colm/fsmmin.cpp create mode 100644 colm/fsmrun.cpp create mode 100644 colm/fsmrun.h create mode 100644 colm/fsmstate.cpp create mode 100644 colm/input.cpp create mode 100644 colm/input.h create mode 100644 colm/list.cpp create mode 100644 colm/lmparse.kh create mode 100644 colm/lmparse.kl create mode 100644 colm/lmscan.h create mode 100644 colm/lmscan.rl create mode 100644 colm/main.cpp create mode 100644 colm/map.cpp create mode 100644 colm/parsedata.cpp create mode 100644 colm/parsedata.h create mode 100644 colm/parsetree.cpp create mode 100644 colm/parsetree.h create mode 100644 colm/pdabuild.cpp create mode 100644 colm/pdacodegen.cpp create mode 100644 colm/pdacodegen.h create mode 100644 colm/pdagraph.cpp create mode 100644 colm/pdagraph.h create mode 100644 colm/pdarun.cpp create mode 100644 colm/pdarun.h create mode 100644 colm/redbuild.cpp create mode 100644 colm/redbuild.h create mode 100644 colm/redfsm.cpp create mode 100644 colm/redfsm.h create mode 100644 colm/string.cpp create mode 100644 colm/tree.cpp create mode 100644 common/Makefile.in create mode 100644 common/buffer.h create mode 100644 common/common.cpp create mode 100644 common/common.h create mode 100644 common/config.h.in create mode 100644 common/pcheck.h create mode 100755 configure create mode 100644 configure.in create mode 100644 test/Makefile create mode 100644 test/backtrack1.lm create mode 100644 test/backtrack2.lm create mode 100644 test/backtrack3.lm create mode 100644 test/btscan.in create mode 100644 test/btscan.lm create mode 100644 test/constructex.in create mode 100644 test/constructex.lm create mode 100644 test/counting1.in create mode 100644 test/counting1.lm create mode 100644 test/counting2.lm create mode 100644 test/counting3.lm create mode 100644 test/counting4.lm create mode 100644 test/cxx/Makefile create mode 100644 test/cxx/cxx.lm create mode 100644 test/cxx/input01.cpp create mode 100644 test/cxx/input02.cpp create mode 100644 test/cxx/input03.cpp create mode 100644 test/cxx/input04.cpp create mode 100644 test/cxx/input05.cpp create mode 100644 test/cxx/input06.cpp create mode 100644 test/cxx/input07.cpp create mode 100644 test/cxx/input08.cpp create mode 100644 test/cxx/input09.cpp create mode 100644 test/cxx/input10.cpp create mode 100644 test/cxx/input11.cpp create mode 100644 test/cxx/input12.cpp create mode 100644 test/cxx/input13.cpp create mode 100755 test/cxx/preproc create mode 100644 test/diff/Makefile create mode 100644 test/diff/diff.lm create mode 100644 test/diff/input1.diff create mode 100644 test/dns/Makefile create mode 100644 test/dns/dns.lm create mode 100644 test/dns/dumpdns create mode 100644 test/dns/extract.c create mode 100644 test/heredoc.in create mode 100644 test/heredoc.lm create mode 100644 test/html/Makefile create mode 100644 test/html/html-lextag.lm create mode 100644 test/html/html.lm create mode 100644 test/html/input01.html create mode 100644 test/http/Makefile create mode 100644 test/http/http.lm create mode 100644 test/http/input1 create mode 100644 test/http/input2 create mode 100644 test/http/input3 create mode 100644 test/http/xinetd.conf create mode 100644 test/island.in create mode 100644 test/island.lm create mode 100644 test/liftattrs.in create mode 100644 test/liftattrs.lm create mode 100644 test/mailbox.in create mode 100644 test/mailbox.lm create mode 100644 test/matchex.in create mode 100644 test/matchex.lm create mode 100644 test/maxlen.lm create mode 100644 test/nestedcomm.in create mode 100644 test/nestedcomm.lm create mode 100644 test/python/Makefile create mode 100644 test/python/input1.py create mode 100644 test/python/input2.py create mode 100644 test/python/input3.py create mode 100644 test/python/input4.py create mode 100644 test/python/python.lm create mode 100644 test/ragelambig.in create mode 100644 test/ragelambig1.lm create mode 100644 test/ragelambig2.lm create mode 100644 test/ragelambig3.lm create mode 100644 test/ragelambig4.lm create mode 100644 test/rediv.in create mode 100644 test/rediv.lm create mode 100644 test/ruby/Makefile create mode 100644 test/ruby/ruby.lm create mode 100644 test/rubyhere.in create mode 100644 test/rubyhere.lm create mode 100644 test/string.in create mode 100644 test/string.lm create mode 100644 test/superid.in create mode 100644 test/superid.lm create mode 100644 test/tags.in create mode 100644 test/tags.lm create mode 100644 test/til.in create mode 100644 test/til.lm create mode 100644 test/travs1.in create mode 100644 test/travs1.lm create mode 100644 test/travs2.in create mode 100644 test/travs2.lm create mode 100644 test/xml/Makefile create mode 100644 test/xml/xml.in create mode 100644 test/xml/xml.lm create mode 100644 version.mk diff --git a/COPYING b/COPYING new file mode 100644 index 00000000..ec0507be --- /dev/null +++ b/COPYING @@ -0,0 +1,340 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/Makefile.in b/Makefile.in new file mode 100644 index 00000000..26e4b879 --- /dev/null +++ b/Makefile.in @@ -0,0 +1,75 @@ +# +# Copyright 2001-2007 Adrian Thurston +# + +# This file is part of Colm. +# +# Colm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Colm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Colm; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +BUILD_SUBDIRS = common colm +ALL_SUBDIRS = $(BUILD_SUBDIRS) test + +#************************************* + +# Programs +CXX = @CXX@ + +# Get the version info. +include version.mk + +# build targets +all: $(BUILD_SUBDIRS) + +.PHONY: $(BUILD_SUBDIRS) + +$(BUILD_SUBDIRS): + @cd $@ && $(MAKE) + +# clean targets. + +CLEAN_SUBDIRS = $(ALL_SUBDIRS:%=%-clean) + +.PHONY: $(CLEAN_SUBDIRS) + +$(CLEAN_SUBDIRS): + @cd $(@:%-clean=%) && $(MAKE) clean + +clean: $(CLEAN_SUBDIRS) + rm -f tags + +# distcleaan targets + +DISTCLEAN_SUBDIRS = $(ALL_SUBDIRS:%=%-distclean) + +.PHONY: $(DISTCLEAN_SUBDIRS) + +$(DISTCLEAN_SUBDIRS): + @cd $(@:%-distclean=%) && $(MAKE) clean + +distclean: $(DISTCLEAN_SUBDIRS) + rm -f Makefile config.cache config.status config.log + +#install targets + +INSTALL_SUBDIRS = $(BUILD_SUBDIRS:%=%-install) + +.PHONY: $(INSTALL_SUBDIRS) + +$(INSTALL_SUBDIRS): + @cd $(@:%-install=%) && $(MAKE) install + +install: $(INSTALL_SUBDIRS) + diff --git a/aapl/COPYING b/aapl/COPYING new file mode 100644 index 00000000..c6ed510b --- /dev/null +++ b/aapl/COPYING @@ -0,0 +1,502 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + + To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random Hacker. + + , 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! diff --git a/aapl/README b/aapl/README new file mode 100644 index 00000000..a2fa5e65 --- /dev/null +++ b/aapl/README @@ -0,0 +1,6 @@ +This directory contains the Aapl source distribution. For the +documentation, build scripts, test programs, ChangeLog, etc. get the +aapldev package. + +AaplDev and other information about Aapl is available from +http://www.elude.ca/aapl/ diff --git a/aapl/astring.h b/aapl/astring.h new file mode 100644 index 00000000..37cc0cc4 --- /dev/null +++ b/aapl/astring.h @@ -0,0 +1,808 @@ +/* + * Copyright 2002 Adrian Thurston + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_ASTRING_H +#define _AAPL_ASTRING_H + +#include +#include +#include +#include +#include +#include + +#ifdef AAPL_NAMESPACE +namespace Aapl { +#endif + +#ifdef AAPL_DOCUMENTATION + +/** + * \defgroup astring String + * \brief Implicitly shared copy-on-write string. + * + * @{ + */ + +/** + * \class String + * \brief Implicitly shared copy-on-write string. + */ + +/*@}*/ + +class String +{ +public: + /** + * \brief Create a null string. Data points to NULL. + */ + String(); + + /** + * \brief Construct a string from a c-style string. + * + * A new buffer is allocated for the c string. Initially, this string will + * be the only String class referencing the data. + */ + String( const char *s ); + + /** + * \brief Construct a string from a c-style string of specific length. + * + * A new buffer is allocated for the c string. Initially, this string will + * be the only String class referencing the data. + */ + String( const char *s, long len ); + + /** + * \brief Construct a string from another String. + * + * A refernce to the buffer allocated for s is taken. A new buffer is + * not allocated. + */ + String( const String &s ); + + /** + * \brief Construct a string using snprintf. + * + * Requires a maximum length for the resulting string. If the formatting + * (not including trailing null) requires more space than maxLen, the + * result will be truncated to maxLen long. Only the length actually + * written will be used by the new string. This string will be the only + * String class referencing the data. + */ + String( long maxLen, const char *format, ... ) + + /** + * \brief Clean up the string. + * + * If the string is not null, the referenced data is detached. If no other + * string refernces the detached data, it is deleted. + */ + ~String(); + + /** + * \brief Set the string from a c-style string. + * + * If this string is not null, the current buffer is dereferenced and + * possibly deleted. A new buffer is allocated (or possibly the old buffer + * reused) for the string. Initially, this string will be the only String + * class referencing the data. + * + * If s is null, then this string becomes a null ptr. + * + * \returns A reference to this. + */ + String &operator=( const char *s ); + + /** + * \brief Set the string from a c-style of specific length. + * + * If this string is not null, the current buffer is dereferenced and + * possibly deleted. A new buffer is allocated (or possibly the old buffer + * reused) for the string. Initially, this string will be the only String + * class referencing the data. + * + * If s is null, then this string becomes a null ptr. + * + * \returns A reference to this. + */ + void setAs( const char *s, long len ); + + /** + * \brief Set the string from a single char. + * + * The current buffer is dereferenced and possibly deleted. A new buffer + * is allocated (or possibly the old buffer reused) for the string. + * Initially, this string will be the only String class referencing the + * data. + * + * If s is null, then this string becomes a null ptr. + * + * \returns A reference to this. + */ + String &operator=( const char c ); + + + /** + * \brief Set the string from another String. + * + * If this string is not null, the current buffer is dereferenced and + * possibly deleted. A reference to the buffer allocated for s is taken. + * A new buffer is not allocated. + * + * If s is null, then this string becomes a null ptr. + * + * \returns a reference to this. + */ + String &operator=( const String &s ); + + /** + * \brief Append a c string to the end of this string. + * + * If this string shares its allocation with another, a copy is first + * taken. The buffer for this string is grown and s is appended to the + * end. + * + * If s is null nothing happens. + * + * \returns a reference to this. + */ + String &operator+=( const char *s ); + + /** + * \brief Append a c string of specific length to the end of this string. + * + * If this string shares its allocation with another, a copy is first + * taken. The buffer for this string is grown and s is appended to the + * end. + * + * If s is null nothing happens. + * + * \returns a reference to this. + */ + void append( const char *s, long len ); + + /** + * \brief Append a single char to the end of this string. + * + * If this string shares its allocation with another, a copy is first + * taken. The buffer for this string is grown and s is appended to the + * end. + * + * \returns a reference to this. + */ + String &operator+=( const char c ); + + /** + * \brief Append a String to the end of this string. + * + * If this string shares its allocation with another, a copy is first + * taken. The buffer for this string is grown and the data of s is + * appeneded to the end. + * + * If s is null nothing happens. + * + * returns a reference to this. + */ + String &operator+=( const String &s ); + + /** + * \brief Cast to a char star. + * + * \returns the string data. A null string returns 0. + */ + operator char*() const; + + /** + * \brief Get a pointer to the data. + * + * \returns the string Data + */ + char *get() const; + + /** + * \brief Get the length of the string + * + * If the string is null, then undefined behaviour results. + * + * \returns the length of the string. + */ + long length() const; + + /** + * \brief Pointer to the data. + * + * Publically accessible pointer to the data. Immediately in front of the + * string data block is the string header which stores the refcount and + * length. Consequently, care should be taken if modifying this pointer. + */ + char *data; +}; + +/** + * \relates String + * \brief Concatenate a c-style string and a String. + * + * \returns The concatenation of the two strings in a String. + */ +String operator+( const String &s1, const char *s2 ); + +/** + * \relates String + * \brief Concatenate a String and a c-style string. + * + * \returns The concatenation of the two strings in a String. + */ +String operator+( const char *s1, const String &s2 ); + +/** + * \relates String + * \brief Concatenate two String classes. + * + * \returns The concatenation of the two strings in a String. + */ +String operator+( const String &s1, const String &s2 ); + +#endif + +template class StrTmpl +{ +public: + class Fresh {}; + + /* Header located just before string data. Keeps the length and a refcount on + * the data. */ + struct Head + { + long refCount; + long length; + }; + + /** + * \brief Create a null string. + */ + StrTmpl() : data(0) { } + + /* Clean up the string. */ + ~StrTmpl(); + + /* Construct a string from a c-style string. */ + StrTmpl( const char *s ); + + /* Construct a string from a c-style string of specific len. */ + StrTmpl( const char *s, long len ); + + /* Allocate len spaces. */ + StrTmpl( const Fresh &, long len ); + + /* Construct a string from another StrTmpl. */ + StrTmpl( const StrTmpl &s ); + + /* Construct a string from with, sprintf. */ + StrTmpl( long lenGuess, const char *format, ... ); + + /* Set the string from a c-style string. */ + StrTmpl &operator=( const char *s ); + + /* Set the string from a c-style string of specific len. */ + void setAs( const char *s, long len ); + + /* Allocate len spaces. */ + void setAs( const Fresh &, long len ); + + void chop( long len ); + + /* Construct a string from with, sprintf. */ + void setAs( long lenGuess, const char *format, ... ); + + /* Set the string from a single char. */ + StrTmpl &operator=( const char c ); + + /* Set the string from another StrTmpl. */ + StrTmpl &operator=( const StrTmpl &s ); + + /* Append a c string to the end of this string. */ + StrTmpl &operator+=( const char *s ); + + /* Append a c string to the end of this string of specifi len. */ + void append( const char *s, long len ); + + /* Append a single char to the end of this string. */ + StrTmpl &operator+=( const char c ); + + /* Append an StrTmpl to the end of this string. */ + StrTmpl &operator+=( const StrTmpl &s ); + + /* Cast to a char star. */ + operator char*() const { return data; } + + /* Get a pointer to the data. */ + char *get() const { return data; } + + /* Return the length of the string. Must check for null data pointer. */ + long length() const { return data ? (((Head*)data)-1)->length : 0; } + + /** + * \brief Pointer to the data. + */ + char *data; + +protected: + /* Make space for a string of length len to be appended. */ + char *appendSpace( long len ); + void initSpace( long length ); + void setSpace( long length ); + + template friend StrTmpl operator+( + const StrTmpl &s1, const char *s2 ); + template friend StrTmpl operator+( + const char *s1, const StrTmpl &s2 ); + template friend StrTmpl operator+( + const StrTmpl &s1, const StrTmpl &s2 ); + +private: + /* A dummy struct solely to make a constructor that will never be + * ambiguous with the public constructors. */ + struct DisAmbig { }; + StrTmpl( char *data, const DisAmbig & ) : data(data) { } +}; + +/* Free all mem used by the string. */ +template StrTmpl::~StrTmpl() +{ + if ( data != 0 ) { + /* If we are the only ones referencing the string, then delete it. */ + Head *head = ((Head*) data) - 1; + head->refCount -= 1; + if ( head->refCount == 0 ) + free( head ); + } +} + +/* Create from a c-style string. */ +template StrTmpl::StrTmpl( const char *s ) +{ + if ( s == 0 ) + data = 0; + else { + /* Find the length and allocate the space for the shared string. */ + long length = strlen( s ); + + /* Init space for the data. */ + initSpace( length ); + + /* Copy in the data. */ + memcpy( data, s, length+1 ); + } +} + +/* Create from a c-style string. */ +template StrTmpl::StrTmpl( const char *s, long length ) +{ + if ( s == 0 ) + data = 0; + else { + /* Init space for the data. */ + initSpace( length ); + + /* Copy in the data. */ + memcpy( data, s, length ); + data[length] = 0; + } +} + +/* Create from a c-style string. */ +template StrTmpl::StrTmpl( const Fresh &, long length ) +{ + /* Init space for the data. */ + initSpace( length ); + data[length] = 0; +} + +/* Create from another string class. */ +template StrTmpl::StrTmpl( const StrTmpl &s ) +{ + if ( s.data == 0 ) + data = 0; + else { + /* Take a reference to the string. */ + Head *strHead = ((Head*)s.data) - 1; + strHead->refCount += 1; + data = (char*) (strHead+1); + } +} + +/* Construct a string from with, sprintf. */ +template StrTmpl::StrTmpl( long lenGuess, const char *format, ... ) +{ + /* Set the string for len. */ + initSpace( lenGuess ); + + va_list args; + + /* Write to the temporary buffer. */ + va_start( args, format ); + + long written = vsnprintf( data, lenGuess+1, format, args ); + if ( written > lenGuess ) { + setSpace( written ); + written = vsnprintf( data, written+1, format, args ); + } + chop( written ); + + va_end( args ); +} + +/* Construct a string from with, sprintf. */ +template void StrTmpl::setAs( long lenGuess, const char *format, ... ) +{ + /* Set the string for len. */ + setSpace( lenGuess ); + + va_list args; + + /* Write to the temporary buffer. */ + va_start( args, format ); + + long written = vsnprintf( data, lenGuess+1, format, args ); + if ( written > lenGuess ) { + setSpace( written ); + written = vsnprintf( data, written+1, format, args ); + } + chop( written ); + + va_end( args ); +} + +template void StrTmpl::initSpace( long length ) +{ + /* Find the length and allocate the space for the shared string. */ + Head *head = (Head*) malloc( sizeof(Head) + length+1 ); + if ( head == 0 ) + throw std::bad_alloc(); + + /* Init the header. */ + head->refCount = 1; + head->length = length; + + /* Save the pointer to the data. */ + data = (char*) (head+1); +} + + +/* Set this string to be the c string exactly. The old string is discarded. + * Returns a reference to this. */ +template StrTmpl &StrTmpl::operator=( const char *s ) +{ + if ( s == 0 ) { + /* Just free the data, we are being set to null. */ + if ( data != 0 ) { + Head *head = ((Head*)data) - 1; + head->refCount -= 1; + if ( head->refCount == 0 ) + free(head); + data = 0; + } + } + else { + /* Find the length of the string we are setting. */ + long length = strlen( s ); + + /* Set the string for len. */ + setSpace( length ); + + /* Copy in the data. */ + memcpy( data, s, length+1 ); + } + return *this; +} + +/* Set this string to be the c string exactly. The old string is discarded. + * Returns a reference to this. */ +template void StrTmpl::setAs( const char *s, long length ) +{ + if ( s == 0 ) { + /* Just free the data, we are being set to null. */ + if ( data != 0 ) { + Head *head = ((Head*)data) - 1; + head->refCount -= 1; + if ( head->refCount == 0 ) + free(head); + data = 0; + } + } + else { + /* Set the string for len. */ + setSpace( length ); + + /* Copy in the data. */ + memcpy( data, s, length ); + data[length] = 0; + } +} + +template void StrTmpl::chop( long length ) +{ + /* Detach from the existing string. */ + Head *head = ((Head*)data) - 1; + assert( head->refCount == 1 ); + assert( length <= head->length ); + head->length = length; + data[length] = 0; +} + +/* Set this string to be the c string exactly. The old string is discarded. + * Returns a reference to this. */ +template void StrTmpl::setAs( const Fresh &, long length ) +{ + setSpace( length ); + data[length] = 0; +} + +/* Set this string to be the single char exactly. The old string is discarded. + * Returns a reference to this. */ +template StrTmpl &StrTmpl::operator=( const char c ) +{ + /* Set to length 1. */ + setSpace( 1 ); + + /* Copy in the data. */ + data[0] = c; + data[1] = 0; + + /* Return ourselves. */ + return *this; +} + +/* Set this string to be the StrTmpl s exactly. The old string is + * discarded. */ +template StrTmpl &StrTmpl::operator=( const StrTmpl &s ) +{ + /* Detach from the existing string. */ + if ( data != 0 ) { + Head *head = ((Head*)data) - 1; + head->refCount -= 1; + if ( head->refCount == 0 ) + free( head ); + } + + if ( s.data != 0 ) { + /* Take a reference to the string. */ + Head *strHead = ((Head*)s.data) - 1; + strHead->refCount += 1; + data = (char*)(strHead+1); + } + else { + /* Setting from a null string, just null our pointer. */ + data = 0; + } + return *this; +} + +/* Prepare the string to be set to something else of the given length. */ +template void StrTmpl::setSpace( long length ) +{ + /* Detach from the existing string. */ + Head *head = ((Head*)data) - 1; + if ( data != 0 && --head->refCount == 0 ) { + /* Resuse the space. */ + head = (Head*) realloc( head, sizeof(Head) + length+1 ); + } + else { + /* Need to make new space, there is no usable old space. */ + head = (Head*) malloc( sizeof(Head) + length+1 ); + } + if ( head == 0 ) + throw std::bad_alloc(); + + /* Init the header. */ + head->refCount = 1; + head->length = length; + + /* Copy in the data and save the pointer to it. */ + data = (char*) (head+1); +} + + +/* Append a c-style string to the end of this string. Returns a reference to + * this */ +template StrTmpl &StrTmpl::operator+=( const char *s ) +{ + /* Find the length of the string appended. */ + if ( s != 0 ) { + /* Get the string length and make space on the end. */ + long addedLen = strlen( s ); + char *dest = appendSpace( addedLen ); + + /* Copy the data in. Plus one for the null. */ + memcpy( dest, s, addedLen+1 ); + } + return *this; +} + +/* Append a c-style string of specific length to the end of this string. + * Returns a reference to this */ +template void StrTmpl::append( const char *s, long length ) +{ + /* Find the length of the string appended. */ + if ( s != 0 ) { + /* Make space on the end. */ + char *dest = appendSpace( length ); + + /* Copy the data in. Plus one for the null. */ + memcpy( dest, s, length ); + dest[length] = 0; + } +} + +/* Append a single char to the end of this string. Returns a reference to + * this */ +template StrTmpl &StrTmpl::operator+=( const char c ) +{ + /* Grow on the end. */ + char *dst = appendSpace( 1 ); + + /* Append a single charachter. */ + dst[0] = c; + dst[1] = 0; + return *this; +} + + +/* Append an StrTmpl string to the end of this string. Returns a reference + * to this */ +template StrTmpl &StrTmpl::operator+=( const StrTmpl &s ) +{ + /* Find the length of the string appended. */ + if ( s.data != 0 ) { + /* Find the length to append. */ + long addedLen = (((Head*)s.data) - 1)->length; + + /* Make space on the end to put the string. */ + char *dest = appendSpace( addedLen ); + + /* Append the data, add one for the null. */ + memcpy( dest, s.data, addedLen+1 ); + } + return *this; +} + +/* Make space for a string of length len to be appended. */ +template char *StrTmpl::appendSpace( long len ) +{ + /* Find the length of this and the string appended. */ + Head *head = (((Head*)data) - 1); + long thisLen = head->length; + + if ( head->refCount == 1 ) { + /* No other string is using the space, grow this space. */ + head = (Head*) realloc( head, + sizeof(Head) + thisLen + len + 1 ); + if ( head == 0 ) + throw std::bad_alloc(); + data = (char*) (head+1); + + /* Adjust the length. */ + head->length += len; + } + else { + /* Another string is using this space, make new space. */ + head->refCount -= 1; + Head *newHead = (Head*) malloc( + sizeof(Head) + thisLen + len + 1 ); + if ( newHead == 0 ) + throw std::bad_alloc(); + data = (char*) (newHead+1); + + /* Set the new header and data from this. */ + newHead->refCount = 1; + newHead->length = thisLen + len; + memcpy( data, head+1, thisLen ); + } + + /* Return writing position. */ + return data + thisLen; +} + +/* Concatenate a String and a c-style string. */ +template StrTmpl operator+( const StrTmpl &s1, const char *s2 ) +{ + /* Find s2 length and alloc the space for the result. */ + long str1Len = (((typename StrTmpl::Head*)(s1.data)) - 1)->length; + long str2Len = strlen( s2 ); + + typename StrTmpl::Head *head = (typename StrTmpl::Head*) + malloc( sizeof(typename StrTmpl::Head) + str1Len + str2Len + 1 ); + if ( head == 0 ) + throw std::bad_alloc(); + + /* Set up the header. */ + head->refCount = 1; + head->length = str1Len + str2Len; + + /* Save the pointer to data and copy the data in. */ + char *data = (char*) (head+1); + memcpy( data, s1.data, str1Len ); + memcpy( data + str1Len, s2, str2Len + 1 ); + return StrTmpl( data, typename StrTmpl::DisAmbig() ); +} + +/* Concatenate a c-style string and a String. */ +template StrTmpl operator+( const char *s1, const StrTmpl &s2 ) +{ + /* Find s2 length and alloc the space for the result. */ + long str1Len = strlen( s1 ); + long str2Len = (((typename StrTmpl::Head*)(s2.data)) - 1)->length; + + typename StrTmpl::Head *head = (typename StrTmpl::Head*) + malloc( sizeof(typename StrTmpl::Head) + str1Len + str2Len + 1 ); + if ( head == 0 ) + throw std::bad_alloc(); + + /* Set up the header. */ + head->refCount = 1; + head->length = str1Len + str2Len; + + /* Save the pointer to data and copy the data in. */ + char *data = (char*) (head+1); + memcpy( data, s1, str1Len ); + memcpy( data + str1Len, s2.data, str2Len + 1 ); + return StrTmpl( data, typename StrTmpl::DisAmbig() ); +} + +/* Add two StrTmpl strings. */ +template StrTmpl operator+( const StrTmpl &s1, const StrTmpl &s2 ) +{ + /* Find s2 length and alloc the space for the result. */ + long str1Len = (((typename StrTmpl::Head*)(s1.data)) - 1)->length; + long str2Len = (((typename StrTmpl::Head*)(s2.data)) - 1)->length; + typename StrTmpl::Head *head = (typename StrTmpl::Head*) + malloc( sizeof(typename StrTmpl::Head) + str1Len + str2Len + 1 ); + if ( head == 0 ) + throw std::bad_alloc(); + + /* Set up the header. */ + head->refCount = 1; + head->length = str1Len + str2Len; + + /* Save the pointer to data and copy the data in. */ + char *data = (char*) (head+1); + memcpy( data, s1.data, str1Len ); + memcpy( data + str1Len, s2.data, str2Len + 1 ); + return StrTmpl( data, typename StrTmpl::DisAmbig() ); +} + +/* Operator used in case the compiler does not support the conversion. */ +template inline std::ostream &operator<<( std::ostream &o, const StrTmpl &s ) +{ + return o.write( s.data, s.length() ); +} + +typedef StrTmpl String; + + +#ifdef AAPL_NAMESPACE +} +#endif + +#endif /* _AAPL_ASTRING_H */ diff --git a/aapl/avlbasic.h b/aapl/avlbasic.h new file mode 100644 index 00000000..780ef07a --- /dev/null +++ b/aapl/avlbasic.h @@ -0,0 +1,65 @@ +/* + * Copyright 2002 Adrian Thurston + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_AVLBASIC_H +#define _AAPL_AVLBASIC_H + +#include "compare.h" + +/** + * \addtogroup avltree + * @{ + */ + +/** + * \class AvlBasic + * \brief AVL Tree in which the entire element structure is the key. + * + * AvlBasic is an AVL tree that does not distinguish between the element that + * it contains and the key. The entire element structure is the key that is + * used to compare the relative ordering of elements. This is similar to the + * BstSet structure. + * + * AvlBasic does not assume ownership of elements in the tree. Items must be + * explicitly de-allocated. + */ + +/*@}*/ + +#define BASE_EL(name) name +#define BASEKEY(name) name +#define AVLMEL_CLASSDEF class Element, class Compare +#define AVLMEL_TEMPDEF class Element, class Compare +#define AVLMEL_TEMPUSE Element, Compare +#define AvlTree AvlBasic +#define AVL_BASIC + +#include "avlcommon.h" + +#undef BASE_EL +#undef BASEKEY +#undef AVLMEL_CLASSDEF +#undef AVLMEL_TEMPDEF +#undef AVLMEL_TEMPUSE +#undef AvlTree +#undef AVL_BASIC + +#endif /* _AAPL_AVLBASIC_H */ diff --git a/aapl/avlcommon.h b/aapl/avlcommon.h new file mode 100644 index 00000000..fca4ea4f --- /dev/null +++ b/aapl/avlcommon.h @@ -0,0 +1,1630 @@ +/* + * Copyright 2001 Adrian Thurston + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* This header is not wrapped in ifndef becuase it is not intended to + * be included by the user. */ + +#include + +#ifdef AAPL_NAMESPACE +namespace Aapl { +#endif + +#ifdef WALKABLE +/* This is used by AvlTree, AvlMel and AvlMelKey so it + * must be protected by global ifdefs. */ +#ifndef __AAPL_AVLI_EL__ +#define __AAPL_AVLI_EL__ + +/** + * \brief Tree element properties for linked AVL trees. + * + * AvliTreeEl needs to be inherited by classes that intend to be element in an + * AvliTree. + */ +template struct AvliTreeEl +{ + /** + * \brief Tree pointers connecting element in a tree. + */ + SubClassEl *left, *right, *parent; + + /** + * \brief Linked list pointers. + */ + SubClassEl *prev, *next; + + /** + * \brief Height of the tree rooted at this element. + * + * Height is required by the AVL balancing algorithm. + */ + long height; +}; +#endif /* __AAPL_AVLI_EL__ */ + +#else /* not WALKABLE */ + +/* This is used by All the non walkable trees so it must be + * protected by a global ifdef. */ +#ifndef __AAPL_AVL_EL__ +#define __AAPL_AVL_EL__ +/** + * \brief Tree element properties for linked AVL trees. + * + * AvlTreeEl needs to be inherited by classes that intend to be element in an + * AvlTree. + */ +template struct AvlTreeEl +{ + /** + * \brief Tree pointers connecting element in a tree. + */ + SubClassEl *left, *right, *parent; + + /** + * \brief Height of the tree rooted at this element. + * + * Height is required by the AVL balancing algorithm. + */ + long height; +}; +#endif /* __AAPL_AVL_EL__ */ +#endif /* def WALKABLE */ + + +#if defined( AVLTREE_MAP ) + +#ifdef WALKABLE + +/** + * \brief Tree element for AvliMap + * + * Stores the key and value pair. + */ +template struct AvliMapEl : + public AvliTreeEl< AvliMapEl > +{ + AvliMapEl(const Key &key) + : key(key) { } + AvliMapEl(const Key &key, const Value &value) + : key(key), value(value) { } + + const Key &getKey() const { return key; } + + /** \brief The key. */ + Key key; + + /** \brief The value. */ + Value value; +}; +#else /* not WALKABLE */ + +/** + * \brief Tree element for AvlMap + * + * Stores the key and value pair. + */ +template struct AvlMapEl : + public AvlTreeEl< AvlMapEl > +{ + AvlMapEl(const Key &key) + : key(key) { } + AvlMapEl(const Key &key, const Value &value) + : key(key), value(value) { } + + const Key &getKey() const { return key; } + + /** \brief The key. */ + Key key; + + /** \brief The value. */ + Value value; +}; +#endif /* def WALKABLE */ + +#elif defined( AVLTREE_SET ) + +#ifdef WALKABLE +/** + * \brief Tree element for AvliSet + * + * Stores the key. + */ +template struct AvliSetEl : + public AvliTreeEl< AvliSetEl > +{ + AvliSetEl(const Key &key) : key(key) { } + + const Key &getKey() const { return key; } + + /** \brief The key. */ + Key key; +}; +#else /* not WALKABLE */ +/** + * \brief Tree element for AvlSet + * + * Stores the key. + */ +template struct AvlSetEl : + public AvlTreeEl< AvlSetEl > +{ + AvlSetEl(const Key &key) : key(key) { } + + const Key &getKey() const { return key; } + + /** \brief The key. */ + Key key; +}; +#endif /* def WALKABLE */ + +#endif /* AVLTREE_SET */ + +/* Common AvlTree Class */ +template < AVLMEL_CLASSDEF > class AvlTree +#if !defined( AVL_KEYLESS ) && defined ( WALKABLE ) + : public Compare, public BASELIST +#elif !defined( AVL_KEYLESS ) + : public Compare +#elif defined( WALKABLE ) + : public BASELIST +#endif +{ +public: + /** + * \brief Create an empty tree. + */ +#ifdef WALKABLE + AvlTree() : root(0), treeSize(0) { } +#else + AvlTree() : root(0), head(0), tail(0), treeSize(0) { } +#endif + + /** + * \brief Perform a deep copy of the tree. + * + * Each element is duplicated for the new tree. Copy constructors are used + * to create the new elements. + */ + AvlTree(const AvlTree &other); + +#if defined( AVLTREE_MAP ) || defined( AVLTREE_SET ) + /** + * \brief Clear the contents of the tree. + * + * All element are deleted. + */ + ~AvlTree() { empty(); } + + /** + * \brief Perform a deep copy of the tree. + * + * Each element is duplicated for the new tree. Copy constructors are used + * to create the new element. If this tree contains items, they are first + * deleted. + * + * \returns A reference to this. + */ + AvlTree &operator=( const AvlTree &tree ); + + /** + * \brief Transfer the elements of another tree into this. + * + * First deletes all elements in this tree. + */ + void transfer( AvlTree &tree ); +#else + /** + * \brief Abandon all elements in the tree. + * + * Tree elements are not deleted. + */ + ~AvlTree() {} + + /** + * \brief Perform a deep copy of the tree. + * + * Each element is duplicated for the new tree. Copy constructors are used + * to create the new element. If this tree contains items, they are + * abandoned. + * + * \returns A reference to this. + */ + AvlTree &operator=( const AvlTree &tree ); + + /** + * \brief Transfer the elements of another tree into this. + * + * All elements in this tree are abandoned first. + */ + void transfer( AvlTree &tree ); +#endif + +#ifndef AVL_KEYLESS + /* Insert a element into the tree. */ + Element *insert( Element *element, Element **lastFound = 0 ); + +#ifdef AVL_BASIC + /* Find a element in the tree. Returns the element if + * element exists, false otherwise. */ + Element *find( const Element *element ) const; + +#else + Element *insert( const Key &key, Element **lastFound = 0 ); + +#ifdef AVLTREE_MAP + Element *insert( const Key &key, const Value &val, + Element **lastFound = 0 ); +#endif + + /* Find a element in the tree. Returns the element if + * key exists, false otherwise. */ + Element *find( const Key &key ) const; + + /* Detach a element from the tree. */ + Element *detach( const Key &key ); + + /* Detach and delete a element from the tree. */ + bool remove( const Key &key ); +#endif /* AVL_BASIC */ +#endif /* AVL_KEYLESS */ + + /* Detach a element from the tree. */ + Element *detach( Element *element ); + + /* Detach and delete a element from the tree. */ + void remove( Element *element ); + + /* Free all memory used by tree. */ + void empty(); + + /* Abandon all element in the tree. Does not delete element. */ + void abandon(); + + /** Root element of the tree. */ + Element *root; + +#ifndef WALKABLE + Element *head, *tail; +#endif + + /** The number of element in the tree. */ + long treeSize; + + /** \brief Return the number of elements in the tree. */ + long length() const { return treeSize; } + + /** \brief Return the number of elements in the tree. */ + long size() const { return treeSize; } + + /* Various classes for setting the iterator */ + struct Iter; + struct IterFirst { IterFirst( const AvlTree &t ) : t(t) { } const AvlTree &t; }; + struct IterLast { IterLast( const AvlTree &t ) : t(t) { } const AvlTree &t; }; + struct IterNext { IterNext( const Iter &i ) : i(i) { } const Iter &i; }; + struct IterPrev { IterPrev( const Iter &i ) : i(i) { } const Iter &i; }; + +#ifdef WALKABLE + /** + * \brief Avl Tree Iterator. + * \ingroup iterators + */ + struct Iter + { + /* Default construct. */ + Iter() : ptr(0) { } + + /* Construct from an avl tree and iterator-setting classes. */ + Iter( const AvlTree &t ) : ptr(t.head) { } + Iter( const IterFirst &af ) : ptr(af.t.head) { } + Iter( const IterLast &al ) : ptr(al.t.tail) { } + Iter( const IterNext &an ) : ptr(findNext(an.i.ptr)) { } + Iter( const IterPrev &ap ) : ptr(findPrev(ap.i.ptr)) { } + + /* Assign from a tree and iterator-setting classes. */ + Iter &operator=( const AvlTree &tree ) { ptr = tree.head; return *this; } + Iter &operator=( const IterFirst &af ) { ptr = af.t.head; return *this; } + Iter &operator=( const IterLast &al ) { ptr = al.t.tail; return *this; } + Iter &operator=( const IterNext &an ) { ptr = findNext(an.i.ptr); return *this; } + Iter &operator=( const IterPrev &ap ) { ptr = findPrev(ap.i.ptr); return *this; } + + /** \brief Less than end? */ + bool lte() const { return ptr != 0; } + + /** \brief At end? */ + bool end() const { return ptr == 0; } + + /** \brief Greater than beginning? */ + bool gtb() const { return ptr != 0; } + + /** \brief At beginning? */ + bool beg() const { return ptr == 0; } + + /** \brief At first element? */ + bool first() const { return ptr && ptr->BASE_EL(prev) == 0; } + + /** \brief At last element? */ + bool last() const { return ptr && ptr->BASE_EL(next) == 0; } + + /** \brief Implicit cast to Element*. */ + operator Element*() const { return ptr; } + + /** \brief Dereference operator returns Element&. */ + Element &operator *() const { return *ptr; } + + /** \brief Arrow operator returns Element*. */ + Element *operator->() const { return ptr; } + + /** \brief Move to next item. */ + inline Element *operator++(); + + /** \brief Move to next item. */ + inline Element *operator++(int); + + /** \brief Move to next item. */ + inline Element *increment(); + + /** \brief Move to previous item. */ + inline Element *operator--(); + + /** \brief Move to previous item. */ + inline Element *operator--(int); + + /** \brief Move to previous item. */ + inline Element *decrement(); + + /** \brief Return the next item. Does not modify this. */ + IterNext next() const { return IterNext( *this ); } + + /** \brief Return the previous item. Does not modify this. */ + IterPrev prev() const { return IterPrev( *this ); } + + private: + static Element *findPrev( Element *element ) { return element->BASE_EL(prev); } + static Element *findNext( Element *element ) { return element->BASE_EL(next); } + + public: + + /** \brief The iterator is simply a pointer. */ + Element *ptr; + }; + +#else + + /** + * \brief Avl Tree Iterator. + * \ingroup iterators + */ + struct Iter + { + /* Default construct. */ + Iter() : ptr(0), tree(0) { } + + /* Construct from a tree and iterator-setting classes. */ + Iter( const AvlTree &t ) : ptr(t.head), tree(&t) { } + Iter( const IterFirst &af ) : ptr(af.t.head), tree(&af.t) { } + Iter( const IterLast &al ) : ptr(al.t.tail), tree(&al.t) { } + Iter( const IterNext &an ) : ptr(findNext(an.i.ptr)), tree(an.i.tree) { } + Iter( const IterPrev &ap ) : ptr(findPrev(ap.i.ptr)), tree(ap.i.tree) { } + + /* Assign from a tree and iterator-setting classes. */ + Iter &operator=( const AvlTree &t ) + { ptr = t.head; tree = &t; return *this; } + Iter &operator=( const IterFirst &af ) + { ptr = af.t.head; tree = &af.t; return *this; } + Iter &operator=( const IterLast &al ) + { ptr = al.t.tail; tree = &al.t; return *this; } + Iter &operator=( const IterNext &an ) + { ptr = findNext(an.i.ptr); tree = an.i.tree; return *this; } + Iter &operator=( const IterPrev &ap ) + { ptr = findPrev(ap.i.ptr); tree = ap.i.tree; return *this; } + + /** \brief Less than end? */ + bool lte() const { return ptr != 0; } + + /** \brief At end? */ + bool end() const { return ptr == 0; } + + /** \brief Greater than beginning? */ + bool gtb() const { return ptr != 0; } + + /** \brief At beginning? */ + bool beg() const { return ptr == 0; } + + /** \brief At first element? */ + bool first() const { return ptr && ptr == tree->head; } + + /** \brief At last element? */ + bool last() const { return ptr && ptr == tree->tail; } + + /** \brief Implicit cast to Element*. */ + operator Element*() const { return ptr; } + + /** \brief Dereference operator returns Element&. */ + Element &operator *() const { return *ptr; } + + /** \brief Arrow operator returns Element*. */ + Element *operator->() const { return ptr; } + + /** \brief Move to next item. */ + inline Element *operator++(); + + /** \brief Move to next item. */ + inline Element *operator++(int); + + /** \brief Move to next item. */ + inline Element *increment(); + + /** \brief Move to previous item. */ + inline Element *operator--(); + + /** \brief Move to previous item. */ + inline Element *operator--(int); + + /** \brief Move to previous item. */ + inline Element *decrement(); + + /** \brief Return the next item. Does not modify this. */ + IterNext next() const { return IterNext( *this ); } + + /** \brief Return the previous item. Does not modify this. */ + IterPrev prev() const { return IterPrev( *this ); } + + private: + static Element *findPrev( Element *element ); + static Element *findNext( Element *element ); + + public: + /** \brief The iterator is simply a pointer. */ + Element *ptr; + + /* The list is not walkable so we need to keep a pointerto the tree + * so we can test against head and tail in O(1) time. */ + const AvlTree *tree; + }; +#endif + + /** \brief Return first element. */ + IterFirst first() { return IterFirst( *this ); } + + /** \brief Return last element. */ + IterLast last() { return IterLast( *this ); } + +protected: + /* Recursive worker for the copy constructor. */ + Element *copyBranch( Element *element ); + + /* Recursively delete element in the tree. */ + void deleteChildrenOf(Element *n); + + /* rebalance the tree beginning at the leaf whose + * grandparent is unbalanced. */ + Element *rebalance(Element *start); + + /* Move up the tree from a given element, recalculating the heights. */ + void recalcHeights(Element *start); + + /* Move up the tree and find the first element whose + * grand-parent is unbalanced. */ + Element *findFirstUnbalGP(Element *start); + + /* Move up the tree and find the first element which is unbalanced. */ + Element *findFirstUnbalEl(Element *start); + + /* Replace a element in the tree with another element not in the tree. */ + void replaceEl(Element *element, Element *replacement); + + /* Remove a element from the tree and put another (normally a child of element) + * in its place. */ + void removeEl(Element *element, Element *filler); + + /* Once an insertion point is found at a leaf then do the insert. */ + void attachRebal( Element *element, Element *parentEl, Element *lastLess ); +}; + +/* Copy constructor. New up each item. */ +template AvlTree:: + AvlTree(const AvlTree &other) +#if !defined( AVL_KEYLESS ) && defined ( WALKABLE ) + /* BASELIST should be made empty. The copyBranch function + * will fill in the details for us. */ + : Compare( other ), BASELIST() +#elif !defined( AVL_KEYLESS ) + : Compare( other ) +#elif defined( WALKABLE ) + : BASELIST( ) +#endif +{ + treeSize = other.treeSize; + root = other.root; + +#ifndef WALKABLE + head = 0; + tail = 0; +#endif + + /* If there is a root, copy the tree. */ + if ( other.root != 0 ) + root = copyBranch( other.root ); +} + +#if defined( AVLTREE_MAP ) || defined( AVLTREE_SET ) + +/* Assignment does deep copy. */ +template AvlTree &AvlTree:: + operator=( const AvlTree &other ) +{ + /* Clear the tree first. */ + empty(); + + /* Reset the list pointers, the tree copy will fill in the list for us. */ +#ifdef WALKABLE + BASELIST::abandon(); +#else + head = 0; + tail = 0; +#endif + + /* Copy the entire tree. */ + treeSize = other.treeSize; + root = other.root; + if ( other.root != 0 ) + root = copyBranch( other.root ); + return *this; +} + +template void AvlTree:: + transfer(AvlTree &other) +{ + /* Clear the tree first. */ + empty(); + + treeSize = other.treeSize; + root = other.root; + +#ifdef WALKABLE + BASELIST::head = other.BASELIST::head; + BASELIST::tail = other.BASELIST::tail; + BASELIST::listLen = other.BASELIST::listLen; +#else + head = other.head; + tail = other.tail; +#endif + + other.abandon(); +} + +#else /* ! AVLTREE_MAP && ! AVLTREE_SET */ + +/* Assignment does deep copy. This version does not clear the tree first. */ +template AvlTree &AvlTree:: + operator=( const AvlTree &other ) +{ + /* Reset the list pointers, the tree copy will fill in the list for us. */ +#ifdef WALKABLE + BASELIST::abandon(); +#else + head = 0; + tail = 0; +#endif + + /* Copy the entire tree. */ + treeSize = other.treeSize; + root = other.root; + if ( other.root != 0 ) + root = copyBranch( other.root ); + return *this; +} + +template void AvlTree:: + transfer(AvlTree &other) +{ + treeSize = other.treeSize; + root = other.root; + +#ifdef WALKABLE + BASELIST::head = other.BASELIST::head; + BASELIST::tail = other.BASELIST::tail; + BASELIST::listLen = other.BASELIST::listLen; +#else + head = other.head; + tail = other.tail; +#endif + + other.abandon(); +} + +#endif + +/* + * Iterator operators. + */ + +/* Prefix ++ */ +template Element *AvlTree::Iter:: + operator++() +{ + return ptr = findNext( ptr ); +} + +/* Postfix ++ */ +template Element *AvlTree::Iter:: + operator++(int) +{ + Element *rtn = ptr; + ptr = findNext( ptr ); + return rtn; +} + +/* increment */ +template Element *AvlTree::Iter:: + increment() +{ + return ptr = findNext( ptr ); +} + +/* Prefix -- */ +template Element *AvlTree::Iter:: + operator--() +{ + return ptr = findPrev( ptr ); +} + +/* Postfix -- */ +template Element *AvlTree::Iter:: + operator--(int) +{ + Element *rtn = ptr; + ptr = findPrev( ptr ); + return rtn; +} + +/* decrement */ +template Element *AvlTree::Iter:: + decrement() +{ + return ptr = findPrev( ptr ); +} + +#ifndef WALKABLE + +/* Move ahead one. */ +template Element *AvlTree::Iter:: + findNext( Element *element ) +{ + /* Try to go right once then infinite left. */ + if ( element->BASE_EL(right) != 0 ) { + element = element->BASE_EL(right); + while ( element->BASE_EL(left) != 0 ) + element = element->BASE_EL(left); + } + else { + /* Go up to parent until we were just a left child. */ + while ( true ) { + Element *last = element; + element = element->BASE_EL(parent); + if ( element == 0 || element->BASE_EL(left) == last ) + break; + } + } + return element; +} + +/* Move back one. */ +template Element *AvlTree::Iter:: + findPrev( Element *element ) +{ + /* Try to go left once then infinite right. */ + if ( element->BASE_EL(left) != 0 ) { + element = element->BASE_EL(left); + while ( element->BASE_EL(right) != 0 ) + element = element->BASE_EL(right); + } + else { + /* Go up to parent until we were just a left child. */ + while ( true ) { + Element *last = element; + element = element->BASE_EL(parent); + if ( element == 0 || element->BASE_EL(right) == last ) + break; + } + } + return element; +} + +#endif + + +/* Recursive worker for tree copying. */ +template Element *AvlTree:: + copyBranch( Element *element ) +{ + /* Duplicate element. Either the base element's copy constructor or defaul + * constructor will get called. Both will suffice for initting the + * pointers to null when they need to be. */ + Element *retVal = new Element(*element); + + /* If the left tree is there, copy it. */ + if ( retVal->BASE_EL(left) ) { + retVal->BASE_EL(left) = copyBranch(retVal->BASE_EL(left)); + retVal->BASE_EL(left)->BASE_EL(parent) = retVal; + } + +#ifdef WALKABLE + BASELIST::addAfter( BASELIST::tail, retVal ); +#else + if ( head == 0 ) + head = retVal; + tail = retVal; +#endif + + /* If the right tree is there, copy it. */ + if ( retVal->BASE_EL(right) ) { + retVal->BASE_EL(right) = copyBranch(retVal->BASE_EL(right)); + retVal->BASE_EL(right)->BASE_EL(parent) = retVal; + } + return retVal; +} + +/* Once an insertion position is found, attach a element to the tree. */ +template void AvlTree:: + attachRebal( Element *element, Element *parentEl, Element *lastLess ) +{ + /* Increment the number of element in the tree. */ + treeSize += 1; + + /* Set element's parent. */ + element->BASE_EL(parent) = parentEl; + + /* New element always starts as a leaf with height 1. */ + element->BASE_EL(left) = 0; + element->BASE_EL(right) = 0; + element->BASE_EL(height) = 1; + + /* Are we inserting in the tree somewhere? */ + if ( parentEl != 0 ) { + /* We have a parent so we are somewhere in the tree. If the parent + * equals lastLess, then the last traversal in the insertion went + * left, otherwise it went right. */ + if ( lastLess == parentEl ) { + parentEl->BASE_EL(left) = element; +#ifdef WALKABLE + BASELIST::addBefore( parentEl, element ); +#endif + } + else { + parentEl->BASE_EL(right) = element; +#ifdef WALKABLE + BASELIST::addAfter( parentEl, element ); +#endif + } + +#ifndef WALKABLE + /* Maintain the first and last pointers. */ + if ( head->BASE_EL(left) == element ) + head = element; + + /* Maintain the first and last pointers. */ + if ( tail->BASE_EL(right) == element ) + tail = element; +#endif + } + else { + /* No parent element so we are inserting the root. */ + root = element; +#ifdef WALKABLE + BASELIST::addAfter( BASELIST::tail, element ); +#else + head = tail = element; +#endif + } + + + /* Recalculate the heights. */ + recalcHeights(parentEl); + + /* Find the first unbalance. */ + Element *ub = findFirstUnbalGP(element); + + /* rebalance. */ + if ( ub != 0 ) + { + /* We assert that after this single rotation the + * tree is now properly balanced. */ + rebalance(ub); + } +} + +#ifndef AVL_KEYLESS + +/** + * \brief Insert an existing element into the tree. + * + * If the insert succeeds and lastFound is given then it is set to the element + * inserted. If the insert fails then lastFound is set to the existing element in + * the tree that has the same key as element. If the element's avl pointers are + * already in use then undefined behaviour results. + * + * \returns The element inserted upon success, null upon failure. + */ +template Element *AvlTree:: + insert( Element *element, Element **lastFound ) +{ + long keyRelation; + Element *curEl = root, *parentEl = 0; + Element *lastLess = 0; + + while (true) { + if ( curEl == 0 ) { + /* We are at an external element and did not find the key we were + * looking for. Attach underneath the leaf and rebalance. */ + attachRebal( element, parentEl, lastLess ); + + if ( lastFound != 0 ) + *lastFound = element; + return element; + } + +#ifdef AVL_BASIC + keyRelation = compare( *element, *curEl ); +#else + keyRelation = compare( element->BASEKEY(getKey()), + curEl->BASEKEY(getKey()) ); +#endif + + /* Do we go left? */ + if ( keyRelation < 0 ) { + parentEl = lastLess = curEl; + curEl = curEl->BASE_EL(left); + } + /* Do we go right? */ + else if ( keyRelation > 0 ) { + parentEl = curEl; + curEl = curEl->BASE_EL(right); + } + /* We have hit the target. */ + else { + if ( lastFound != 0 ) + *lastFound = curEl; + return 0; + } + } +} + +#ifdef AVL_BASIC + +/** + * \brief Find a element in the tree with the given key. + * + * \returns The element if key exists, null if the key does not exist. + */ +template Element *AvlTree:: + find( const Element *element ) const +{ + Element *curEl = root; + long keyRelation; + + while (curEl) { + keyRelation = compare( *element, *curEl ); + + /* Do we go left? */ + if ( keyRelation < 0 ) + curEl = curEl->BASE_EL(left); + /* Do we go right? */ + else if ( keyRelation > 0 ) + curEl = curEl->BASE_EL(right); + /* We have hit the target. */ + else { + return curEl; + } + } + return 0; +} + +#else + +/** + * \brief Insert a new element into the tree with given key. + * + * If the key is not already in the tree then a new element is made using the + * Element(const Key &key) constructor and the insert succeeds. If lastFound is + * given then it is set to the element inserted. If the insert fails then + * lastFound is set to the existing element in the tree that has the same key as + * element. + * + * \returns The new element upon success, null upon failure. + */ +template Element *AvlTree:: + insert( const Key &key, Element **lastFound ) +{ + long keyRelation; + Element *curEl = root, *parentEl = 0; + Element *lastLess = 0; + + while (true) { + if ( curEl == 0 ) { + /* We are at an external element and did not find the key we were + * looking for. Create the new element, attach it underneath the leaf + * and rebalance. */ + Element *element = new Element( key ); + attachRebal( element, parentEl, lastLess ); + + if ( lastFound != 0 ) + *lastFound = element; + return element; + } + + keyRelation = compare( key, curEl->BASEKEY(getKey()) ); + + /* Do we go left? */ + if ( keyRelation < 0 ) { + parentEl = lastLess = curEl; + curEl = curEl->BASE_EL(left); + } + /* Do we go right? */ + else if ( keyRelation > 0 ) { + parentEl = curEl; + curEl = curEl->BASE_EL(right); + } + /* We have hit the target. */ + else { + if ( lastFound != 0 ) + *lastFound = curEl; + return 0; + } + } +} + +#ifdef AVLTREE_MAP +/** + * \brief Insert a new element into the tree with key and value. + * + * If the key is not already in the tree then a new element is constructed and + * the insert succeeds. If lastFound is given then it is set to the element + * inserted. If the insert fails then lastFound is set to the existing element in + * the tree that has the same key as element. This insert routine is only + * available in AvlMap because it is the only class that knows about a Value + * type. + * + * \returns The new element upon success, null upon failure. + */ +template Element *AvlTree:: + insert( const Key &key, const Value &val, Element **lastFound ) +{ + long keyRelation; + Element *curEl = root, *parentEl = 0; + Element *lastLess = 0; + + while (true) { + if ( curEl == 0 ) { + /* We are at an external element and did not find the key we were + * looking for. Create the new element, attach it underneath the leaf + * and rebalance. */ + Element *element = new Element( key, val ); + attachRebal( element, parentEl, lastLess ); + + if ( lastFound != 0 ) + *lastFound = element; + return element; + } + + keyRelation = compare(key, curEl->getKey()); + + /* Do we go left? */ + if ( keyRelation < 0 ) { + parentEl = lastLess = curEl; + curEl = curEl->BASE_EL(left); + } + /* Do we go right? */ + else if ( keyRelation > 0 ) { + parentEl = curEl; + curEl = curEl->BASE_EL(right); + } + /* We have hit the target. */ + else { + if ( lastFound != 0 ) + *lastFound = curEl; + return 0; + } + } +} +#endif /* AVLTREE_MAP */ + + +/** + * \brief Find a element in the tree with the given key. + * + * \returns The element if key exists, null if the key does not exist. + */ +template Element *AvlTree:: + find( const Key &key ) const +{ + Element *curEl = root; + long keyRelation; + + while (curEl) { + keyRelation = compare( key, curEl->BASEKEY(getKey()) ); + + /* Do we go left? */ + if ( keyRelation < 0 ) + curEl = curEl->BASE_EL(left); + /* Do we go right? */ + else if ( keyRelation > 0 ) + curEl = curEl->BASE_EL(right); + /* We have hit the target. */ + else { + return curEl; + } + } + return 0; +} + + +/** + * \brief Find a element, then detach it from the tree. + * + * The element is not deleted. + * + * \returns The element detached if the key is found, othewise returns null. + */ +template Element *AvlTree:: + detach(const Key &key) +{ + Element *element = find( key ); + if ( element ) { + detach(element); + } + + return element; +} + +/** + * \brief Find, detach and delete a element from the tree. + * + * \returns True if the element was found and deleted, false otherwise. + */ +template bool AvlTree:: + remove(const Key &key) +{ + /* Assume not found. */ + bool retVal = false; + + /* Look for the key. */ + Element *element = find( key ); + if ( element != 0 ) { + /* If found, detach the element and delete. */ + detach( element ); + delete element; + retVal = true; + } + + return retVal; +} + +#endif /* AVL_BASIC */ +#endif /* AVL_KEYLESS */ + + +/** + * \brief Detach and delete a element from the tree. + * + * If the element is not in the tree then undefined behaviour results. + */ +template void AvlTree:: + remove(Element *element) +{ + /* Detach and delete. */ + detach(element); + delete element; +} + +/** + * \brief Detach a element from the tree. + * + * If the element is not in the tree then undefined behaviour results. + * + * \returns The element given. + */ +template Element *AvlTree:: + detach(Element *element) +{ + Element *replacement, *fixfrom; + long lheight, rheight; + +#ifdef WALKABLE + /* Remove the element from the ordered list. */ + BASELIST::detach( element ); +#endif + + /* Update treeSize. */ + treeSize--; + + /* Find a replacement element. */ + if (element->BASE_EL(right)) + { + /* Find the leftmost element of the right subtree. */ + replacement = element->BASE_EL(right); + while (replacement->BASE_EL(left)) + replacement = replacement->BASE_EL(left); + + /* If replacing the element the with its child then we need to start + * fixing at the replacement, otherwise we start fixing at the + * parent of the replacement. */ + if (replacement->BASE_EL(parent) == element) + fixfrom = replacement; + else + fixfrom = replacement->BASE_EL(parent); + +#ifndef WALKABLE + if ( element == head ) + head = replacement; +#endif + + removeEl(replacement, replacement->BASE_EL(right)); + replaceEl(element, replacement); + } + else if (element->BASE_EL(left)) + { + /* Find the rightmost element of the left subtree. */ + replacement = element->BASE_EL(left); + while (replacement->BASE_EL(right)) + replacement = replacement->BASE_EL(right); + + /* If replacing the element the with its child then we need to start + * fixing at the replacement, otherwise we start fixing at the + * parent of the replacement. */ + if (replacement->BASE_EL(parent) == element) + fixfrom = replacement; + else + fixfrom = replacement->BASE_EL(parent); + +#ifndef WALKABLE + if ( element == tail ) + tail = replacement; +#endif + + removeEl(replacement, replacement->BASE_EL(left)); + replaceEl(element, replacement); + } + else + { + /* We need to start fixing at the parent of the element. */ + fixfrom = element->BASE_EL(parent); + +#ifndef WALKABLE + if ( element == head ) + head = element->BASE_EL(parent); + if ( element == tail ) + tail = element->BASE_EL(parent); +#endif + + /* The element we are deleting is a leaf element. */ + removeEl(element, 0); + } + + /* If fixfrom is null it means we just deleted + * the root of the tree. */ + if ( fixfrom == 0 ) + return element; + + /* Fix the heights after the deletion. */ + recalcHeights(fixfrom); + + /* Fix every unbalanced element going up in the tree. */ + Element *ub = findFirstUnbalEl(fixfrom); + while ( ub ) + { + /* Find the element to rebalance by moving down from the first unbalanced + * element 2 levels in the direction of the greatest heights. On the + * second move down, the heights may be equal ( but not on the first ). + * In which case go in the direction of the first move. */ + lheight = ub->BASE_EL(left) ? ub->BASE_EL(left)->BASE_EL(height) : 0; + rheight = ub->BASE_EL(right) ? ub->BASE_EL(right)->BASE_EL(height) : 0; + assert( lheight != rheight ); + if (rheight > lheight) + { + ub = ub->BASE_EL(right); + lheight = ub->BASE_EL(left) ? + ub->BASE_EL(left)->BASE_EL(height) : 0; + rheight = ub->BASE_EL(right) ? + ub->BASE_EL(right)->BASE_EL(height) : 0; + if (rheight > lheight) + ub = ub->BASE_EL(right); + else if (rheight < lheight) + ub = ub->BASE_EL(left); + else + ub = ub->BASE_EL(right); + } + else + { + ub = ub->BASE_EL(left); + lheight = ub->BASE_EL(left) ? + ub->BASE_EL(left)->BASE_EL(height) : 0; + rheight = ub->BASE_EL(right) ? + ub->BASE_EL(right)->BASE_EL(height) : 0; + if (rheight > lheight) + ub = ub->BASE_EL(right); + else if (rheight < lheight) + ub = ub->BASE_EL(left); + else + ub = ub->BASE_EL(left); + } + + + /* rebalance returns the grandparant of the subtree formed + * by the element that were rebalanced. + * We must continue upward from there rebalancing. */ + fixfrom = rebalance(ub); + + /* Find the next unbalaced element. */ + ub = findFirstUnbalEl(fixfrom); + } + + return element; +} + + +/** + * \brief Empty the tree and delete all the element. + * + * Resets the tree to its initial state. + */ +template void AvlTree::empty() +{ + if ( root ) { + /* Recursively delete from the tree structure. */ + deleteChildrenOf(root); + delete root; + root = 0; + treeSize = 0; + +#ifdef WALKABLE + BASELIST::abandon(); +#endif + } +} + +/** + * \brief Forget all element in the tree. + * + * Does not delete element. Resets the the tree to it's initial state. + */ +template void AvlTree::abandon() +{ + root = 0; + treeSize = 0; + +#ifdef WALKABLE + BASELIST::abandon(); +#endif +} + +/* Recursively delete all the children of a element. */ +template void AvlTree:: + deleteChildrenOf( Element *element ) +{ + /* Recurse left. */ + if (element->BASE_EL(left)) { + deleteChildrenOf(element->BASE_EL(left)); + + /* Delete left element. */ + delete element->BASE_EL(left); + element->BASE_EL(left) = 0; + } + + /* Recurse right. */ + if (element->BASE_EL(right)) { + deleteChildrenOf(element->BASE_EL(right)); + + /* Delete right element. */ + delete element->BASE_EL(right); + element->BASE_EL(left) = 0; + } +} + +/* rebalance from a element whose gradparent is unbalanced. Only + * call on a element that has a grandparent. */ +template Element *AvlTree:: + rebalance(Element *n) +{ + long lheight, rheight; + Element *a, *b, *c; + Element *t1, *t2, *t3, *t4; + + Element *p = n->BASE_EL(parent); /* parent (Non-NUL). L*/ + Element *gp = p->BASE_EL(parent); /* Grand-parent (Non-NULL). */ + Element *ggp = gp->BASE_EL(parent); /* Great grand-parent (may be NULL). */ + + if (gp->BASE_EL(right) == p) + { + /* gp + * \ + * p + */ + if (p->BASE_EL(right) == n) + { + /* gp + * \ + * p + * \ + * n + */ + a = gp; + b = p; + c = n; + t1 = gp->BASE_EL(left); + t2 = p->BASE_EL(left); + t3 = n->BASE_EL(left); + t4 = n->BASE_EL(right); + } + else + { + /* gp + * \ + * p + * / + * n + */ + a = gp; + b = n; + c = p; + t1 = gp->BASE_EL(left); + t2 = n->BASE_EL(left); + t3 = n->BASE_EL(right); + t4 = p->BASE_EL(right); + } + } + else + { + /* gp + * / + * p + */ + if (p->BASE_EL(right) == n) + { + /* gp + * / + * p + * \ + * n + */ + a = p; + b = n; + c = gp; + t1 = p->BASE_EL(left); + t2 = n->BASE_EL(left); + t3 = n->BASE_EL(right); + t4 = gp->BASE_EL(right); + } + else + { + /* gp + * / + * p + * / + * n + */ + a = n; + b = p; + c = gp; + t1 = n->BASE_EL(left); + t2 = n->BASE_EL(right); + t3 = p->BASE_EL(right); + t4 = gp->BASE_EL(right); + } + } + + /* Perform rotation. + */ + + /* Tie b to the great grandparent. */ + if ( ggp == 0 ) + root = b; + else if ( ggp->BASE_EL(left) == gp ) + ggp->BASE_EL(left) = b; + else + ggp->BASE_EL(right) = b; + b->BASE_EL(parent) = ggp; + + /* Tie a as a leftchild of b. */ + b->BASE_EL(left) = a; + a->BASE_EL(parent) = b; + + /* Tie c as a rightchild of b. */ + b->BASE_EL(right) = c; + c->BASE_EL(parent) = b; + + /* Tie t1 as a leftchild of a. */ + a->BASE_EL(left) = t1; + if ( t1 != 0 ) t1->BASE_EL(parent) = a; + + /* Tie t2 as a rightchild of a. */ + a->BASE_EL(right) = t2; + if ( t2 != 0 ) t2->BASE_EL(parent) = a; + + /* Tie t3 as a leftchild of c. */ + c->BASE_EL(left) = t3; + if ( t3 != 0 ) t3->BASE_EL(parent) = c; + + /* Tie t4 as a rightchild of c. */ + c->BASE_EL(right) = t4; + if ( t4 != 0 ) t4->BASE_EL(parent) = c; + + /* The heights are all recalculated manualy and the great + * grand-parent is passed to recalcHeights() to ensure + * the heights are correct up the tree. + * + * Note that recalcHeights() cuts out when it comes across + * a height that hasn't changed. + */ + + /* Fix height of a. */ + lheight = a->BASE_EL(left) ? a->BASE_EL(left)->BASE_EL(height) : 0; + rheight = a->BASE_EL(right) ? a->BASE_EL(right)->BASE_EL(height) : 0; + a->BASE_EL(height) = (lheight > rheight ? lheight : rheight) + 1; + + /* Fix height of c. */ + lheight = c->BASE_EL(left) ? c->BASE_EL(left)->BASE_EL(height) : 0; + rheight = c->BASE_EL(right) ? c->BASE_EL(right)->BASE_EL(height) : 0; + c->BASE_EL(height) = (lheight > rheight ? lheight : rheight) + 1; + + /* Fix height of b. */ + lheight = a->BASE_EL(height); + rheight = c->BASE_EL(height); + b->BASE_EL(height) = (lheight > rheight ? lheight : rheight) + 1; + + /* Fix height of b's parents. */ + recalcHeights(ggp); + return ggp; +} + +/* Recalculates the heights of all the ancestors of element. */ +template void AvlTree:: + recalcHeights(Element *element) +{ + long lheight, rheight, new_height; + while ( element != 0 ) + { + lheight = element->BASE_EL(left) ? element->BASE_EL(left)->BASE_EL(height) : 0; + rheight = element->BASE_EL(right) ? element->BASE_EL(right)->BASE_EL(height) : 0; + + new_height = (lheight > rheight ? lheight : rheight) + 1; + + /* If there is no chage in the height, then there will be no + * change in any of the ancestor's height. We can stop going up. + * If there was a change, continue upward. */ + if (new_height == element->BASE_EL(height)) + return; + else + element->BASE_EL(height) = new_height; + + element = element->BASE_EL(parent); + } +} + +/* Finds the first element whose grandparent is unbalanced. */ +template Element *AvlTree:: + findFirstUnbalGP(Element *element) +{ + long lheight, rheight, balanceProp; + Element *gp; + + if ( element == 0 || element->BASE_EL(parent) == 0 || + element->BASE_EL(parent)->BASE_EL(parent) == 0 ) + return 0; + + /* Don't do anything if we we have no grandparent. */ + gp = element->BASE_EL(parent)->BASE_EL(parent); + while ( gp != 0 ) + { + lheight = gp->BASE_EL(left) ? gp->BASE_EL(left)->BASE_EL(height) : 0; + rheight = gp->BASE_EL(right) ? gp->BASE_EL(right)->BASE_EL(height) : 0; + balanceProp = lheight - rheight; + + if ( balanceProp < -1 || balanceProp > 1 ) + return element; + + element = element->BASE_EL(parent); + gp = gp->BASE_EL(parent); + } + return 0; +} + + +/* Finds the first element that is unbalanced. */ +template Element *AvlTree:: + findFirstUnbalEl(Element *element) +{ + if ( element == 0 ) + return 0; + + while ( element != 0 ) + { + long lheight = element->BASE_EL(left) ? + element->BASE_EL(left)->BASE_EL(height) : 0; + long rheight = element->BASE_EL(right) ? + element->BASE_EL(right)->BASE_EL(height) : 0; + long balanceProp = lheight - rheight; + + if ( balanceProp < -1 || balanceProp > 1 ) + return element; + + element = element->BASE_EL(parent); + } + return 0; +} + +/* Replace a element in the tree with another element not in the tree. */ +template void AvlTree:: + replaceEl(Element *element, Element *replacement) +{ + Element *parent = element->BASE_EL(parent), + *left = element->BASE_EL(left), + *right = element->BASE_EL(right); + + replacement->BASE_EL(left) = left; + if (left) + left->BASE_EL(parent) = replacement; + replacement->BASE_EL(right) = right; + if (right) + right->BASE_EL(parent) = replacement; + + replacement->BASE_EL(parent) = parent; + if (parent) + { + if (parent->BASE_EL(left) == element) + parent->BASE_EL(left) = replacement; + else + parent->BASE_EL(right) = replacement; + } + else + root = replacement; + + replacement->BASE_EL(height) = element->BASE_EL(height); +} + +/* Removes a element from a tree and puts filler in it's place. + * Filler should be null or a child of element. */ +template void AvlTree:: + removeEl(Element *element, Element *filler) +{ + Element *parent = element->BASE_EL(parent); + + if (parent) + { + if (parent->BASE_EL(left) == element) + parent->BASE_EL(left) = filler; + else + parent->BASE_EL(right) = filler; + } + else + root = filler; + + if (filler) + filler->BASE_EL(parent) = parent; + + return; +} + +#ifdef AAPL_NAMESPACE +} +#endif diff --git a/aapl/avlibasic.h b/aapl/avlibasic.h new file mode 100644 index 00000000..a48faaa8 --- /dev/null +++ b/aapl/avlibasic.h @@ -0,0 +1,67 @@ +/* + * Copyright 2002 Adrian Thurston + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_AVLIBASIC_H +#define _AAPL_AVLIBASIC_H + +#include "compare.h" + +/** + * \addtogroup avlitree + * @{ + */ + +/** + * \class AvliBasic + * \brief Linked AVL Tree in which the entire element structure is the key. + * + * AvliBasic is a linked AVL tree that does not distinguish between the + * element that it contains and the key. The entire element structure is the + * key that is used to compare the relative ordering of elements. This is + * similar to the BstSet structure. + * + * AvliBasic does not assume ownership of elements in the tree. Items must be + * explicitly de-allocated. + */ + +/*@}*/ + +#define BASE_EL(name) name +#define BASEKEY(name) name +#define AVLMEL_CLASSDEF class Element, class Compare +#define AVLMEL_TEMPDEF class Element, class Compare +#define AVLMEL_TEMPUSE Element, Compare +#define AvlTree AvliBasic +#define AVL_BASIC +#define WALKABLE + +#include "avlcommon.h" + +#undef BASE_EL +#undef BASEKEY +#undef AVLMEL_CLASSDEF +#undef AVLMEL_TEMPDEF +#undef AVLMEL_TEMPUSE +#undef AvlTree +#undef AVL_BASIC +#undef WALKABLE + +#endif /* _AAPL_AVLIBASIC_H */ diff --git a/aapl/avlikeyless.h b/aapl/avlikeyless.h new file mode 100644 index 00000000..559b75af --- /dev/null +++ b/aapl/avlikeyless.h @@ -0,0 +1,64 @@ +/* + * Copyright 2002, 2003 Adrian Thurston + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_AVLIKEYLESS_H +#define _AAPL_AVLIKEYLESS_H + +#include "compare.h" +#include "dlistmel.h" + +/** + * \addtogroup avlitree + * @{ + */ + +/** + * \class AvliKeyless + * \brief Linked AVL tree that has no insert/find/remove functions that take a + * key. + * + * AvliKeyless is an implementation of the AVL tree rebalancing functionality + * only. It provides the common code for the tiny AVL tree implementations. + */ + +/*@}*/ + +#define BASE_EL(name) name +#define BASELIST DListMel< Element, AvliTreeEl > +#define AVLMEL_CLASSDEF class Element +#define AVLMEL_TEMPDEF class Element +#define AVLMEL_TEMPUSE Element +#define AvlTree AvliKeyless +#define WALKABLE +#define AVL_KEYLESS + +#include "avlcommon.h" + +#undef BASE_EL +#undef BASELIST +#undef AVLMEL_CLASSDEF +#undef AVLMEL_TEMPDEF +#undef AVLMEL_TEMPUSE +#undef AvlTree +#undef WALKABLE +#undef AVL_KEYLESS + +#endif /* _AAPL_AVLIKEYLESS_H */ diff --git a/aapl/avlimap.h b/aapl/avlimap.h new file mode 100644 index 00000000..38bfff75 --- /dev/null +++ b/aapl/avlimap.h @@ -0,0 +1,77 @@ +/* + * Copyright 2002 Adrian Thurston + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_AVLIMAP_H +#define _AAPL_AVLIMAP_H + +#include "compare.h" +#include "dlist.h" + +/** + * \addtogroup avlitree + * @{ + */ + +/** + * \class AvliMap + * \brief Linked key and value oriented AVL tree. + * + * AvliMap stores key and value pairs in elements that managed by the tree. It + * is intendend to be similar to map template found in the STL. AvliMap + * requires that a Key type, a Value type, and a class containing a compare() + * routine for Key be given. Items can be inserted with just a key or with a + * key and value pair. + * + * AvliMap assumes all elements in the tree are allocated on the heap and are + * to be managed by the tree. This means that the class destructor will delete + * the contents of the tree. A deep copy will cause existing elements to be + * deleted first. + * + * \include ex_avlimap.cpp + */ + +/*@}*/ + +#define AVLTREE_MAP +#define BASE_EL(name) name +#define BASEKEY(name) name +#define BASELIST DList< AvliMapEl > +#define AVLMEL_CLASSDEF class Key, class Value, class Compare = CmpOrd +#define AVLMEL_TEMPDEF class Key, class Value, class Compare +#define AVLMEL_TEMPUSE Key, Value, Compare +#define AvlTree AvliMap +#define Element AvliMapEl +#define WALKABLE + +#include "avlcommon.h" + +#undef AVLTREE_MAP +#undef BASE_EL +#undef BASEKEY +#undef BASELIST +#undef AVLMEL_CLASSDEF +#undef AVLMEL_TEMPDEF +#undef AVLMEL_TEMPUSE +#undef AvlTree +#undef Element +#undef WALKABLE + +#endif /* _AAPL_AVLIMAP_H */ diff --git a/aapl/avlimel.h b/aapl/avlimel.h new file mode 100644 index 00000000..9442a997 --- /dev/null +++ b/aapl/avlimel.h @@ -0,0 +1,79 @@ +/* + * Copyright 2002 Adrian Thurston + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_AVLIMEL_H +#define _AAPL_AVLIMEL_H + +#include "compare.h" +#include "dlistmel.h" + +/** + * \addtogroup avlitree + * @{ + */ + +/** + * \class AvliMel + * \brief Linked AVL tree for element appearing in multiple trees. + * + * AvliMel allows for an element to simultaneously be in multiple trees without + * the trees interferring with one another. For each tree that the element is + * to appear in, there must be a distinct set of AVL Tree management data that + * can be unambiguously referenced with some base class name. This name + * is passed to the tree as a template parameter and is used in the tree + * algorithms. + * + * The element must use the same key type and value in each tree that it + * appears in. If distinct keys are required, the AvliMelKey structure is + * available. + * + * AvliMel does not assume ownership of elements in the tree. The destructor + * will not delete the elements. If the user wishes to explicitly deallocate + * all the items in the tree the empty() routine is available. + * + * \include ex_avlimel.cpp + */ + +/*@}*/ + +#define BASE_EL(name) BaseEl::name +#define BASEKEY(name) name +#define BASELIST DListMel< Element, BaseEl > +#define AVLMEL_CLASSDEF class Element, class Key, \ + class BaseEl, class Compare = CmpOrd +#define AVLMEL_TEMPDEF class Element, class Key, \ + class BaseEl, class Compare +#define AVLMEL_TEMPUSE Element, Key, BaseEl, Compare +#define AvlTree AvliMel +#define WALKABLE + +#include "avlcommon.h" + +#undef BASE_EL +#undef BASEKEY +#undef BASELIST +#undef AVLMEL_CLASSDEF +#undef AVLMEL_TEMPDEF +#undef AVLMEL_TEMPUSE +#undef AvlTree +#undef WALKABLE + +#endif /* _AAPL_AVLIMEL_H */ diff --git a/aapl/avlimelkey.h b/aapl/avlimelkey.h new file mode 100644 index 00000000..faa56e83 --- /dev/null +++ b/aapl/avlimelkey.h @@ -0,0 +1,76 @@ +/* + * Copyright 2002 Adrian Thurston + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_AVLIMELKEY_H +#define _AAPL_AVLIMELKEY_H + +#include "compare.h" +#include "dlistmel.h" + +/** + * \addtogroup avlitree + * @{ + */ + +/** + * \class AvliMelKey + * \brief Linked AVL tree for element appearing in multiple trees with different keys. + * + * AvliMelKey is similar to AvliMel, except that an additional template + * parameter, BaseKey, is provided for resolving ambiguous references to + * getKey(). This means that if an element is stored in multiple trees, each + * tree can use a different key for ordering the elements in it. Using + * AvliMelKey an array of data structures can be indexed with an O(log(n)) + * search on two or more of the values contained within it and without + * allocating any additional data. + * + * AvliMelKey does not assume ownership of elements in the tree. The destructor + * will not delete the elements. If the user wishes to explicitly deallocate + * all the items in the tree the empty() routine is available. + * + * \include ex_avlimelkey.cpp + */ + +/*@}*/ + +#define BASE_EL(name) BaseEl::name +#define BASEKEY(name) BaseKey::name +#define BASELIST DListMel< Element, BaseEl > +#define AVLMEL_CLASSDEF class Element, class Key, class BaseEl, \ + class BaseKey, class Compare = CmpOrd +#define AVLMEL_TEMPDEF class Element, class Key, class BaseEl, \ + class BaseKey, class Compare +#define AVLMEL_TEMPUSE Element, Key, BaseEl, BaseKey, Compare +#define AvlTree AvliMelKey +#define WALKABLE + +#include "avlcommon.h" + +#undef BASE_EL +#undef BASEKEY +#undef BASELIST +#undef AVLMEL_CLASSDEF +#undef AVLMEL_TEMPDEF +#undef AVLMEL_TEMPUSE +#undef AvlTree +#undef WALKABLE + +#endif /* _AAPL_AVLIMELKEY_H */ diff --git a/aapl/avliset.h b/aapl/avliset.h new file mode 100644 index 00000000..cf5be365 --- /dev/null +++ b/aapl/avliset.h @@ -0,0 +1,75 @@ +/* + * Copyright 2002 Adrian Thurston + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_AVLISET_H +#define _AAPL_AVLISET_H + +#include "compare.h" +#include "dlist.h" + +/** + * \addtogroup avlitree + * @{ + */ + +/** + * \class AvliSet + * \brief Linked Key-only oriented tree. + * + * AvliSet stores only keys in elements that are managed by the tree. AvliSet + * requires that a Key type and a class containing a compare() routine + * for Key be given. Items are inserted with just a key value. + * + * AvliSet assumes all elements in the tree are allocated on the heap and are + * to be managed by the tree. This means that the class destructor will delete + * the contents of the tree. A deep copy will cause existing elements to be + * deleted first. + * + * \include ex_avliset.cpp + */ + +/*@}*/ + +#define AVLTREE_SET +#define BASE_EL(name) name +#define BASEKEY(name) name +#define BASELIST DList< AvliSetEl > +#define AVLMEL_CLASSDEF class Key, class Compare = CmpOrd +#define AVLMEL_TEMPDEF class Key, class Compare +#define AVLMEL_TEMPUSE Key, Compare +#define AvlTree AvliSet +#define Element AvliSetEl +#define WALKABLE + +#include "avlcommon.h" + +#undef AVLTREE_SET +#undef BASE_EL +#undef BASEKEY +#undef BASELIST +#undef AVLMEL_CLASSDEF +#undef AVLMEL_TEMPDEF +#undef AVLMEL_TEMPUSE +#undef AvlTree +#undef Element +#undef WALKABLE + +#endif /* _AAPL_AVLISET_H */ diff --git a/aapl/avlitree.h b/aapl/avlitree.h new file mode 100644 index 00000000..b053c96f --- /dev/null +++ b/aapl/avlitree.h @@ -0,0 +1,78 @@ +/* + * Copyright 2002 Adrian Thurston + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_AVLITREE_H +#define _AAPL_AVLITREE_H + +#include "compare.h" +#include "dlistmel.h" + +/** + * \addtogroup avlitree + * @{ + */ + +/** + * \class AvliTree + * \brief Linked AVL tree. + * + * AvliTree is the standard linked by-structure AVL tree. To use this + * structure the user must define an element type and give it the necessary + * properties. At the very least it must have a getKey() function that will be + * used to compare the relative ordering of elements and tree management data + * necessary for the AVL algorithm. An element type can acquire the management + * data by inheriting the AvliTreeEl class. + * + * AvliTree does not presume to manage the allocation of elements in the tree. + * The destructor will not delete the items in the tree, instead the elements + * must be explicitly de-allocated by the user if necessary and when it is + * safe to do so. The empty() routine will traverse the tree and delete all + * items. + * + * Since the tree does not manage the elements, it can contain elements that + * are allocated statically or that are part of another data structure. + * + * \include ex_avlitree.cpp + */ + +/*@}*/ + +#define BASE_EL(name) name +#define BASEKEY(name) name +#define BASELIST DListMel< Element, AvliTreeEl > +#define AVLMEL_CLASSDEF class Element, class Key, class Compare = CmpOrd +#define AVLMEL_TEMPDEF class Element, class Key, class Compare +#define AVLMEL_TEMPUSE Element, Key, Compare +#define AvlTree AvliTree +#define WALKABLE + +#include "avlcommon.h" + +#undef BASE_EL +#undef BASEKEY +#undef BASELIST +#undef AVLMEL_CLASSDEF +#undef AVLMEL_TEMPDEF +#undef AVLMEL_TEMPUSE +#undef AvlTree +#undef WALKABLE + +#endif /* _AAPL_AVLITREE_H */ diff --git a/aapl/avlkeyless.h b/aapl/avlkeyless.h new file mode 100644 index 00000000..30805136 --- /dev/null +++ b/aapl/avlkeyless.h @@ -0,0 +1,58 @@ +/* + * Copyright 2002, 2003 Adrian Thurston + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_AVLKEYLESS_H +#define _AAPL_AVLKEYLESS_H + +#include "compare.h" + +/** + * \addtogroup avltree + * @{ + */ + +/** + * \class AvlKeyless + * \brief AVL tree that has no insert/find/remove functions that take a key. + * + * AvlKeyless is an implementation of the AVL tree rebalancing functionality + * only. It provides the common code for the tiny AVL tree implementations. + */ + +/*@}*/ + +#define BASE_EL(name) name +#define AVLMEL_CLASSDEF class Element +#define AVLMEL_TEMPDEF class Element +#define AVLMEL_TEMPUSE Element +#define AvlTree AvlKeyless +#define AVL_KEYLESS + +#include "avlcommon.h" + +#undef BASE_EL +#undef AVLMEL_CLASSDEF +#undef AVLMEL_TEMPDEF +#undef AVLMEL_TEMPUSE +#undef AvlTree +#undef AVL_KEYLESS + +#endif /* _AAPL_AVLKEYLESS_H */ diff --git a/aapl/avlmap.h b/aapl/avlmap.h new file mode 100644 index 00000000..e4e15662 --- /dev/null +++ b/aapl/avlmap.h @@ -0,0 +1,74 @@ +/* + * Copyright 2002 Adrian Thurston + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_AVLMAP_H +#define _AAPL_AVLMAP_H + +#include "compare.h" + +/** + * \addtogroup avltree + * @{ + */ + +/** + * \class AvlMap + * \brief Key and value oriented AVL tree. + * + * AvlMap stores key and value pairs in elements that managed by the tree. It + * is intendend to be similar to map template found in the STL. AvlMap + * requires that a Key type, a Value type, and a class containing a compare() + * routine for Key be given. Items can be inserted with just a key or with a + * key and value pair. + * + * AvlMap assumes all elements in the tree are allocated on the heap and are + * to be managed by the tree. This means that the class destructor will delete + * the contents of the tree. A deep copy will cause existing elements to be + * deleted first. + * + * \include ex_avlmap.cpp + */ + +/*@}*/ + +#define AVLTREE_MAP +#define BASE_EL(name) name +#define BASEKEY(name) name +#define AVLMEL_CLASSDEF class Key, class Value, class Compare = CmpOrd +#define AVLMEL_TEMPDEF class Key, class Value, class Compare +#define AVLMEL_TEMPUSE Key, Value, Compare +#define AvlTree AvlMap +#define Element AvlMapEl + +#include "avlcommon.h" + +#undef AVLTREE_MAP +#undef BASE_EL +#undef BASEKEY +#undef AVLMEL_CLASSDEF +#undef AVLMEL_TEMPDEF +#undef AVLMEL_TEMPUSE +#undef AvlTree +#undef Element + + + +#endif /* _AAPL_AVLMAP_H */ diff --git a/aapl/avlmel.h b/aapl/avlmel.h new file mode 100644 index 00000000..7bfad3b7 --- /dev/null +++ b/aapl/avlmel.h @@ -0,0 +1,74 @@ +/* + * Copyright 2002 Adrian Thurston + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_AVLMEL_H +#define _AAPL_AVLMEL_H + +#include "compare.h" + +/** + * \addtogroup avltree + * @{ + */ + +/** + * \class AvlMel + * \brief AVL tree for elements appearing in multiple trees. + * + * AvlMel allows for an element to simultaneously be in multiple trees without + * the trees interferring with one another. For each tree that the element is + * to appear in, there must be a distinct set of AVL Tree management data that + * can be unambiguously referenced with some base class name. This name + * is passed to the tree as a template parameter and is used in the tree + * algorithms. + * + * The element must use the same key type and value in each tree that it + * appears in. If distinct keys are required, the AvlMelKey structure is + * available. + * + * AvlMel does not assume ownership of elements in the tree. The destructor + * will not delete the elements. If the user wishes to explicitly deallocate + * all the items in the tree the empty() routine is available. + * + * \include ex_avlmel.cpp + */ + +/*@}*/ + +#define BASE_EL(name) BaseEl::name +#define BASEKEY(name) name +#define AVLMEL_CLASSDEF class Element, class Key, \ + class BaseEl, class Compare = CmpOrd +#define AVLMEL_TEMPDEF class Element, class Key, \ + class BaseEl, class Compare +#define AVLMEL_TEMPUSE Element, Key, BaseEl, Compare +#define AvlTree AvlMel + +#include "avlcommon.h" + +#undef BASE_EL +#undef BASEKEY +#undef AVLMEL_CLASSDEF +#undef AVLMEL_TEMPDEF +#undef AVLMEL_TEMPUSE +#undef AvlTree + +#endif /* _AAPL_AVLMEL_H */ diff --git a/aapl/avlmelkey.h b/aapl/avlmelkey.h new file mode 100644 index 00000000..9261cc83 --- /dev/null +++ b/aapl/avlmelkey.h @@ -0,0 +1,71 @@ +/* + * Copyright 2002 Adrian Thurston + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_AVLMELKEY_H +#define _AAPL_AVLMELKEY_H + +#include "compare.h" + +/** + * \addtogroup avltree + * @{ + */ + +/** + * \class AvlMelKey + * \brief AVL tree for elements appearing in multiple trees with different keys. + * + * AvlMelKey is similar to AvlMel, except that an additional template + * parameter, BaseKey, is provided for resolving ambiguous references to + * getKey(). This means that if an element is stored in multiple trees, each + * tree can use a different key for ordering the elements in it. Using + * AvlMelKey an array of data structures can be indexed with an O(log(n)) + * search on two or more of the values contained within it and without + * allocating any additional data. + * + * AvlMelKey does not assume ownership of elements in the tree. The destructor + * will not delete the elements. If the user wishes to explicitly deallocate + * all the items in the tree the empty() routine is available. + * + * \include ex_avlmelkey.cpp + */ + +/*@}*/ + +#define BASE_EL(name) BaseEl::name +#define BASEKEY(name) BaseKey::name +#define AVLMEL_CLASSDEF class Element, class Key, class BaseEl, \ + class BaseKey, class Compare = CmpOrd +#define AVLMEL_TEMPDEF class Element, class Key, class BaseEl, \ + class BaseKey, class Compare +#define AVLMEL_TEMPUSE Element, Key, BaseEl, BaseKey, Compare +#define AvlTree AvlMelKey + +#include "avlcommon.h" + +#undef BASE_EL +#undef BASEKEY +#undef AVLMEL_CLASSDEF +#undef AVLMEL_TEMPDEF +#undef AVLMEL_TEMPUSE +#undef AvlTree + +#endif /* _AAPL_AVLMELKEY_H */ diff --git a/aapl/avlset.h b/aapl/avlset.h new file mode 100644 index 00000000..224ee59f --- /dev/null +++ b/aapl/avlset.h @@ -0,0 +1,70 @@ +/* + * Copyright 2002 Adrian Thurston + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_AVLSET_H +#define _AAPL_AVLSET_H + +#include "compare.h" + +/** + * \addtogroup avltree + * @{ + */ + +/** + * \class AvlSet + * \brief Key-only oriented tree. + * + * AvlSet stores only keys in elements that are managed by the tree. AvlSet + * requires that a Key type and a class containing a compare() routine + * for Key be given. Items are inserted with just a key value. + * + * AvlSet assumes all elements in the tree are allocated on the heap and are + * to be managed by the tree. This means that the class destructor will delete + * the contents of the tree. A deep copy will cause existing elements to be + * deleted first. + * + * \include ex_avlset.cpp + */ + +/*@}*/ + +#define AVLTREE_SET +#define BASE_EL(name) name +#define BASEKEY(name) name +#define AVLMEL_CLASSDEF class Key, class Compare = CmpOrd +#define AVLMEL_TEMPDEF class Key, class Compare +#define AVLMEL_TEMPUSE Key, Compare +#define AvlTree AvlSet +#define Element AvlSetEl + +#include "avlcommon.h" + +#undef AVLTREE_SET +#undef BASE_EL +#undef BASEKEY +#undef AVLMEL_CLASSDEF +#undef AVLMEL_TEMPDEF +#undef AVLMEL_TEMPUSE +#undef AvlTree +#undef Element + +#endif /* _AAPL_AVLSET_H */ diff --git a/aapl/avltree.h b/aapl/avltree.h new file mode 100644 index 00000000..cf153595 --- /dev/null +++ b/aapl/avltree.h @@ -0,0 +1,73 @@ +/* + * Copyright 2002 Adrian Thurston + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_AVLTREE_H +#define _AAPL_AVLTREE_H + +#include "compare.h" + +/** + * \addtogroup avltree + * @{ + */ + +/** + * \class AvlTree + * \brief Basic AVL tree. + * + * AvlTree is the standard by-structure AVL tree. To use this structure the + * user must define an element type and give it the necessary properties. At + * the very least it must have a getKey() function that will be used to + * compare the relative ordering of elements and tree management data + * necessary for the AVL algorithm. An element type can acquire the management + * data by inheriting the AvlTreeEl class. + * + * AvlTree does not presume to manage the allocation of elements in the tree. + * The destructor will not delete the items in the tree, instead the elements + * must be explicitly de-allocated by the user if necessary and when it is + * safe to do so. The empty() routine will traverse the tree and delete all + * items. + * + * Since the tree does not manage the elements, it can contain elements that + * are allocated statically or that are part of another data structure. + * + * \include ex_avltree.cpp + */ + +/*@}*/ + +#define BASE_EL(name) name +#define BASEKEY(name) name +#define AVLMEL_CLASSDEF class Element, class Key, class Compare = CmpOrd +#define AVLMEL_TEMPDEF class Element, class Key, class Compare +#define AVLMEL_TEMPUSE Element, Key, Compare +#define AvlTree AvlTree + +#include "avlcommon.h" + +#undef BASE_EL +#undef BASEKEY +#undef AVLMEL_CLASSDEF +#undef AVLMEL_TEMPDEF +#undef AVLMEL_TEMPUSE +#undef AvlTree + +#endif /* _AAPL_AVLTREE_H */ diff --git a/aapl/bstcommon.h b/aapl/bstcommon.h new file mode 100644 index 00000000..bd390cdc --- /dev/null +++ b/aapl/bstcommon.h @@ -0,0 +1,814 @@ +/* + * Copyright 2001 Adrian Thurston + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* This header is not wrapped in ifndefs because it is + * not intended to be included by users directly. */ + +#ifdef AAPL_NAMESPACE +namespace Aapl { +#endif + +/* Binary Search Table */ +template < BST_TEMPL_DECLARE > class BstTable : + public Compare, + public Vector< Element, Resize > +{ + typedef Vector BaseVector; + typedef Table BaseTable; + +public: + /** + * \brief Default constructor. + * + * Create an empty binary search table. + */ + BstTable() { } + + /** + * \brief Construct with initial value. + * + * Constructs a binary search table with an initial item. Uses the default + * constructor for initializing Value. + */ + BstTable(const Key &key) + { insert(key); } + +#if defined( BSTMAP ) + /** + * \brief Construct with initial value. + * + * Constructs a binary search table with an initial key/value pair. + */ + BstTable(const Key &key, const Value &val) + { insert(key, val); } +#endif + +#if ! defined( BSTSET ) + /** + * \brief Construct with initial value. + * + * Constructs a binary search table with an initial Element. + */ + BstTable(const Element &el) + { insert(el); } +#endif + + Element *insert(const Key &key, Element **lastFound = 0); + Element *insertMulti(const Key &key); + + bool insert(const BstTable &other); + void insertMulti(const BstTable &other); + +#if defined( BSTMAP ) + Element *insert(const Key &key, const Value &val, + Element **lastFound = 0); + Element *insertMulti(const Key &key, const Value &val ); +#endif + +#if ! defined( BSTSET ) + Element *insert(const Element &el, Element **lastFound = 0); + Element *insertMulti(const Element &el); +#endif + + Element *find(const Key &key, Element **lastFound = 0) const; + bool findMulti( const Key &key, Element *&lower, + Element *&upper ) const; + + bool remove(const Key &key); + bool remove(Element *item); + long removeMulti(const Key &key); + long removeMulti(Element *lower, Element *upper); + + /* The following provide access to the underlying insert and remove + * functions that my be hidden by the BST insert and remove. The insertDup + * and insertNew functions will never be hidden. They are provided for + * consistency. The difference between the non-shared and the shared + * tables is the documentation reference to the invoked function. */ + +#if !defined( SHARED_BST ) + /*@{*/ + + /** \brief Call the insert of the underlying vector. + * + * Provides to access to the vector insert, which may become hidden. Care + * should be taken to ensure that after the insert the ordering of + * elements is preserved. + * Invokes Vector::insert( long pos, const T &val ). + */ + void vinsert(long pos, const Element &val) + { Vector< Element, Resize >::insert( pos, &val, 1 ); } + + /** \brief Call the insert of the underlying vector. + * + * Provides to access to the vector insert, which may become hidden. Care + * should be taken to ensure that after the insert the ordering of + * elements is preserved. + * Invokes Vector::insert( long pos, const T *val, long len ). + */ + void vinsert(long pos, const Element *val, long len) + { Vector< Element, Resize >::insert( pos, val, len ); } + + /** \brief Call the insert of the underlying vector. + * + * Provides to access to the vector insert, which may become hidden. Care + * should be taken to ensure that after the insert the ordering of + * elements is preserved. + * Invokes Vector::insert( long pos, const Vector &v ). + */ + void vinsert(long pos, const BstTable &v) + { Vector< Element, Resize >::insert( pos, v.data, v.tabLen ); } + + /*@}*/ + + /*@{*/ + + /** \brief Call the remove of the underlying vector. + * + * Provides access to the vector remove, which may become hidden. + * Invokes Vector::remove( long pos ). + */ + void vremove(long pos) + { Vector< Element, Resize >::remove( pos, 1 ); } + + /** \brief Call the remove of the underlying vector. + * + * Proves access to the vector remove, which may become hidden. + * Invokes Vector::remove( long pos, long len ). + */ + void vremove(long pos, long len) + { Vector< Element, Resize >::remove( pos, len ); } + + /*@}*/ +#else /* SHARED_BST */ + /*@{*/ + + /** \brief Call the insert of the underlying vector. + * + * Provides to access to the vector insert, which may become hidden. Care + * should be taken to ensure that after the insert the ordering of + * elements is preserved. + * Invokes SVector::insert( long pos, const T &val ). + */ + void vinsert(long pos, const Element &val) + { Vector< Element, Resize >::insert( pos, &val, 1 ); } + + /** \brief Call the insert of the underlying vector. + * + * Provides to access to the vector insert, which may become hidden. Care + * should be taken to ensure that after the insert the ordering of + * elements is preserved. + * Invokes SVector::insert( long pos, const T *val, long len ). + */ + void vinsert(long pos, const Element *val, long len) + { Vector< Element, Resize >::insert( pos, val, len ); } + + /** \brief Call the insert of the underlying vector. + * + * Provides to access to the vector insert, which may become hidden. Care + * should be taken to ensure that after the insert the ordering of + * elements is preserved. + * Invokes SVector::insert( long pos, const SVector &v ). + */ + void vinsert(long pos, const BstTable &v) + { Vector< Element, Resize >::insert( pos, v.data, v.length() ); } + + /*@}*/ + + /*@{*/ + + /** \brief Call the remove of the underlying vector. + * + * Provides access to the vector remove, which may become hidden. + * Invokes SVector::remove( long pos ). + */ + void vremove(long pos) + { Vector< Element, Resize >::remove( pos, 1 ); } + + /** \brief Call the remove of the underlying vector. + * + * Proves access to the vector remove, which may become hidden. + * Invokes SVector::remove( long pos, long len ). + */ + void vremove(long pos, long len) + { Vector< Element, Resize >::remove( pos, len ); } + + /*@}*/ + +#endif /* SHARED_BST */ +}; + + +#if 0 +#if defined( SHARED_BST ) +/** + * \brief Construct a binary search table with an initial amount of + * allocation. + * + * The table is initialized to have room for allocLength elements. The + * table starts empty. + */ +template BstTable:: + BstTable( long allocLen ) +{ + /* Allocate the space if we are given a positive allocLen. */ + if ( allocLen > 0 ) { + /* Allocate the data needed. */ + STabHead *head = (STabHead*) + malloc( sizeof(STabHead) + sizeof(Element) * allocLen ); + if ( head == 0 ) + throw std::bad_alloc(); + + /* Set up the header and save the data pointer. */ + head->refCount = 1; + head->allocLen = allocLen; + head->tabLen = 0; + BaseTable::data = (Element*) (head + 1); + } +} +#else +/** + * \brief Construct a binary search table with an initial amount of + * allocation. + * + * The table is initialized to have room for allocLength elements. The + * table starts empty. + */ +template BstTable:: + BstTable( long allocLen ) +{ + /* Allocate the space if we are given a positive allocLen. */ + BaseTable::allocLen = allocLen; + if ( BaseTable::allocLen > 0 ) { + BaseTable::data = (Element*) malloc(sizeof(Element) * BaseTable::allocLen); + if ( BaseTable::data == NULL ) + throw std::bad_alloc(); + } +} + +#endif +#endif + +/** + * \brief Find the element with the given key and remove it. + * + * If multiple elements with the given key exist, then it is unspecified which + * element will be removed. + * + * \returns True if an element is found and consequently removed, false + * otherwise. + */ +template bool BstTable:: + remove(const Key &key) +{ + Element *el = find(key); + if ( el != 0 ) { + Vector< Element >::remove(el - BaseTable::data); + return true; + } + return false; +} + +/** + * \brief Remove the element pointed to by item. + * + * If item does not point to an element in the tree, then undefined behaviour + * results. If item is null, then remove has no effect. + * + * \returns True if item is not null, false otherwise. + */ +template bool BstTable:: + remove( Element *item ) +{ + if ( item != 0 ) { + Vector< Element >::remove(item - BaseTable::data); + return true; + } + return false; +} + +/** + * \brief Find and remove the entire range of elements with the given key. + * + * \returns The number of elements removed. + */ +template long BstTable:: + removeMulti(const Key &key) +{ + Element *low, *high; + if ( findMulti(key, low, high) ) { + /* Get the length of the range. */ + long num = high - low + 1; + Vector< Element >::remove(low - BaseTable::data, num); + return num; + } + + return 0; +} + +template long BstTable:: + removeMulti(Element *lower, Element *upper) +{ + /* Get the length of the range. */ + long num = upper - lower + 1; + Vector< Element >::remove(lower - BaseTable::data, num); + return num; +} + + +/** + * \brief Find a range of elements with the given key. + * + * If any elements with the given key exist then lower and upper are set to + * the low and high ends of the continous range of elements with the key. + * Lower and upper will point to the first and last elements with the key. + * + * \returns True if any elements are found, false otherwise. + */ +template bool BstTable:: + findMulti(const Key &key, Element *&low, Element *&high ) const +{ + const Element *lower, *mid, *upper; + long keyRelation; + const long tblLen = BaseTable::length(); + + if ( BaseTable::data == 0 ) + return false; + + lower = BaseTable::data; + upper = BaseTable::data + tblLen - 1; + while ( true ) { + if ( upper < lower ) { + /* Did not find the fd in the array. */ + return false; + } + + mid = lower + ((upper-lower)>>1); + keyRelation = compare(key, GET_KEY(*mid)); + + if ( keyRelation < 0 ) + upper = mid - 1; + else if ( keyRelation > 0 ) + lower = mid + 1; + else { + Element *lowEnd = BaseTable::data - 1; + Element *highEnd = BaseTable::data + tblLen; + + lower = mid - 1; + while ( lower != lowEnd && + compare(key, GET_KEY(*lower)) == 0 ) + lower--; + + upper = mid + 1; + while ( upper != highEnd && + compare(key, GET_KEY(*upper)) == 0 ) + upper++; + + low = (Element*)lower + 1; + high = (Element*)upper - 1; + return true; + } + } +} + +/** + * \brief Find an element with the given key. + * + * If the find succeeds then lastFound is set to the element found. If the + * find fails then lastFound is set the location where the key would be + * inserted. If there is more than one element in the tree with the given key, + * then it is unspecified which element is returned as the match. + * + * \returns The element found on success, null on failure. + */ +template Element *BstTable:: + find( const Key &key, Element **lastFound ) const +{ + const Element *lower, *mid, *upper; + long keyRelation; + const long tblLen = BaseTable::length(); + + if ( BaseTable::data == 0 ) + return 0; + + lower = BaseTable::data; + upper = BaseTable::data + tblLen - 1; + while ( true ) { + if ( upper < lower ) { + /* Did not find the key. Last found gets the insert location. */ + if ( lastFound != 0 ) + *lastFound = (Element*)lower; + return 0; + } + + mid = lower + ((upper-lower)>>1); + keyRelation = compare(key, GET_KEY(*mid)); + + if ( keyRelation < 0 ) + upper = mid - 1; + else if ( keyRelation > 0 ) + lower = mid + 1; + else { + /* Key is found. Last found gets the found record. */ + if ( lastFound != 0 ) + *lastFound = (Element*)mid; + return (Element*)mid; + } + } +} + +template Element *BstTable:: + insert(const Key &key, Element **lastFound) +{ + const Element *lower, *mid, *upper; + long keyRelation, insertPos; + const long tblLen = BaseTable::length(); + + if ( tblLen == 0 ) { + /* If the table is empty then go straight to insert. */ + lower = BaseTable::data; + goto insert; + } + + lower = BaseTable::data; + upper = BaseTable::data + tblLen - 1; + while ( true ) { + if ( upper < lower ) { + /* Did not find the key in the array. + * Place to insert at is lower. */ + goto insert; + } + + mid = lower + ((upper-lower)>>1); + keyRelation = compare(key, GET_KEY(*mid)); + + if ( keyRelation < 0 ) + upper = mid - 1; + else if ( keyRelation > 0 ) + lower = mid + 1; + else { + if ( lastFound != 0 ) + *lastFound = (Element*)mid; + return 0; + } + } + +insert: + /* Get the insert pos. */ + insertPos = lower - BaseTable::data; + + /* Do the insert. After makeRawSpaceFor, lower pointer is no good. */ + BaseVector::makeRawSpaceFor(insertPos, 1); + new(BaseTable::data + insertPos) Element(key); + + /* Set lastFound */ + if ( lastFound != 0 ) + *lastFound = BaseTable::data + insertPos; + return BaseTable::data + insertPos; +} + + +template Element *BstTable:: + insertMulti(const Key &key) +{ + const Element *lower, *mid, *upper; + long keyRelation, insertPos; + const long tblLen = BaseTable::length(); + + if ( tblLen == 0 ) { + /* If the table is empty then go straight to insert. */ + lower = BaseTable::data; + goto insert; + } + + lower = BaseTable::data; + upper = BaseTable::data + tblLen - 1; + while ( true ) { + if ( upper < lower ) { + /* Did not find the key in the array. + * Place to insert at is lower. */ + goto insert; + } + + mid = lower + ((upper-lower)>>1); + keyRelation = compare(key, GET_KEY(*mid)); + + if ( keyRelation < 0 ) + upper = mid - 1; + else if ( keyRelation > 0 ) + lower = mid + 1; + else { + lower = mid; + goto insert; + } + } + +insert: + /* Get the insert pos. */ + insertPos = lower - BaseTable::data; + + /* Do the insert. */ + BaseVector::makeRawSpaceFor(insertPos, 1); + new(BaseTable::data + insertPos) Element(key); + + /* Return the element inserted. */ + return BaseTable::data + insertPos; +} + +/** + * \brief Insert each element from other. + * + * Always attempts to insert all elements even if the insert of some item from + * other fails. + * + * \returns True if all items inserted successfully, false if any insert + * failed. + */ +template bool BstTable:: + insert(const BstTable &other) +{ + bool allSuccess = true; + long otherLen = other.length(); + for ( long i = 0; i < otherLen; i++ ) { + Element *el = insert( other.data[i] ); + if ( el == 0 ) + allSuccess = false; + } + return allSuccess; +} + +/** + * \brief Insert each element from other even if the elements exist already. + * + * No individual insertMulti can fail. + */ +template void BstTable:: + insertMulti(const BstTable &other) +{ + long otherLen = other.length(); + for ( long i = 0; i < otherLen; i++ ) + insertMulti( other.data[i] ); +} + +#if ! defined( BSTSET ) + +/** + * \brief Insert the given element. + * + * If the key in the given element does not already exist in the table then a + * new element is inserted. They element copy constructor is used to place the + * element into the table. If lastFound is given, it is set to the new element + * created. If the insert fails then lastFound is set to the existing element + * of the same key. + * + * \returns The new element created upon success, null upon failure. + */ +template Element *BstTable:: + insert(const Element &el, Element **lastFound ) +{ + const Element *lower, *mid, *upper; + long keyRelation, insertPos; + const long tblLen = BaseTable::length(); + + if ( tblLen == 0 ) { + /* If the table is empty then go straight to insert. */ + lower = BaseTable::data; + goto insert; + } + + lower = BaseTable::data; + upper = BaseTable::data + tblLen - 1; + while ( true ) { + if ( upper < lower ) { + /* Did not find the key in the array. + * Place to insert at is lower. */ + goto insert; + } + + mid = lower + ((upper-lower)>>1); + keyRelation = compare(GET_KEY(el), GET_KEY(*mid)); + + if ( keyRelation < 0 ) + upper = mid - 1; + else if ( keyRelation > 0 ) + lower = mid + 1; + else { + if ( lastFound != 0 ) + *lastFound = (Element*)mid; + return 0; + } + } + +insert: + /* Get the insert pos. */ + insertPos = lower - BaseTable::data; + + /* Do the insert. After makeRawSpaceFor, lower pointer is no good. */ + BaseVector::makeRawSpaceFor(insertPos, 1); + new(BaseTable::data + insertPos) Element(el); + + /* Set lastFound */ + if ( lastFound != 0 ) + *lastFound = BaseTable::data + insertPos; + return BaseTable::data + insertPos; +} + +/** + * \brief Insert the given element even if it exists already. + * + * If the key in the given element exists already then the new element is + * placed next to some other element of the same key. InsertMulti cannot fail. + * The element copy constructor is used to place the element in the table. + * + * \returns The new element created. + */ +template Element *BstTable:: + insertMulti(const Element &el) +{ + const Element *lower, *mid, *upper; + long keyRelation, insertPos; + const long tblLen = BaseTable::length(); + + if ( tblLen == 0 ) { + /* If the table is empty then go straight to insert. */ + lower = BaseTable::data; + goto insert; + } + + lower = BaseTable::data; + upper = BaseTable::data + tblLen - 1; + while ( true ) { + if ( upper < lower ) { + /* Did not find the fd in the array. + * Place to insert at is lower. */ + goto insert; + } + + mid = lower + ((upper-lower)>>1); + keyRelation = compare(GET_KEY(el), GET_KEY(*mid)); + + if ( keyRelation < 0 ) + upper = mid - 1; + else if ( keyRelation > 0 ) + lower = mid + 1; + else { + lower = mid; + goto insert; + } + } + +insert: + /* Get the insert pos. */ + insertPos = lower - BaseTable::data; + + /* Do the insert. */ + BaseVector::makeRawSpaceFor(insertPos, 1); + new(BaseTable::data + insertPos) Element(el); + + /* Return the element inserted. */ + return BaseTable::data + insertPos; +} +#endif + + +#if defined( BSTMAP ) + +/** + * \brief Insert the given key-value pair. + * + * If the given key does not already exist in the table then the key-value + * pair is inserted. Copy constructors are used to place the pair in the + * table. If lastFound is given, it is set to the new entry created. If the + * insert fails then lastFound is set to the existing pair of the same key. + * + * \returns The new element created upon success, null upon failure. + */ +template Element *BstTable:: + insert(const Key &key, const Value &val, Element **lastFound) +{ + const Element *lower, *mid, *upper; + long keyRelation, insertPos; + const long tblLen = BaseTable::length(); + + if ( tblLen == 0 ) { + /* If the table is empty then go straight to insert. */ + lower = BaseTable::data; + goto insert; + } + + lower = BaseTable::data; + upper = BaseTable::data + tblLen - 1; + while ( true ) { + if ( upper < lower ) { + /* Did not find the fd in the array. + * Place to insert at is lower. */ + goto insert; + } + + mid = lower + ((upper-lower)>>1); + keyRelation = Compare::compare(key, mid->key); + + if ( keyRelation < 0 ) + upper = mid - 1; + else if ( keyRelation > 0 ) + lower = mid + 1; + else { + if ( lastFound != NULL ) + *lastFound = (Element*)mid; + return 0; + } + } + +insert: + /* Get the insert pos. */ + insertPos = lower - BaseTable::data; + + /* Do the insert. */ + BaseVector::makeRawSpaceFor(insertPos, 1); + new(BaseTable::data + insertPos) Element(key, val); + + /* Set lastFound */ + if ( lastFound != NULL ) + *lastFound = BaseTable::data + insertPos; + return BaseTable::data + insertPos; +} + + +/** + * \brief Insert the given key-value pair even if the key exists already. + * + * If the key exists already then the key-value pair is placed next to some + * other pair of the same key. InsertMulti cannot fail. Copy constructors are + * used to place the pair in the table. + * + * \returns The new element created. + */ +template Element *BstTable:: + insertMulti(const Key &key, const Value &val) +{ + const Element *lower, *mid, *upper; + long keyRelation, insertPos; + const long tblLen = BaseTable::length(); + + if ( tblLen == 0 ) { + /* If the table is empty then go straight to insert. */ + lower = BaseTable::data; + goto insert; + } + + lower = BaseTable::data; + upper = BaseTable::data + tblLen - 1; + while ( true ) { + if ( upper < lower ) { + /* Did not find the key in the array. + * Place to insert at is lower. */ + goto insert; + } + + mid = lower + ((upper-lower)>>1); + keyRelation = Compare::compare(key, mid->key); + + if ( keyRelation < 0 ) + upper = mid - 1; + else if ( keyRelation > 0 ) + lower = mid + 1; + else { + lower = mid; + goto insert; + } + } + +insert: + /* Get the insert pos. */ + insertPos = lower - BaseTable::data; + + /* Do the insert. */ + BaseVector::makeRawSpaceFor(insertPos, 1); + new(BaseTable::data + insertPos) Element(key, val); + + /* Return the element inserted. */ + return BaseTable::data + insertPos; +} + +#endif + +#ifdef AAPL_NAMESPACE +} +#endif diff --git a/aapl/bstmap.h b/aapl/bstmap.h new file mode 100644 index 00000000..5154b86c --- /dev/null +++ b/aapl/bstmap.h @@ -0,0 +1,113 @@ +/* + * Copyright 2002 Adrian Thurston + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_BSTMAP_H +#define _AAPL_BSTMAP_H + +#include "compare.h" +#include "vector.h" + +#ifdef AAPL_NAMESPACE +namespace Aapl { +#endif + +/** + * \brief Element for BstMap. + * + * Stores the key and value pair. + */ +template struct BstMapEl +{ + BstMapEl() {} + BstMapEl(const Key &key) : key(key) {} + BstMapEl(const Key &key, const Value &val) : key(key), value(val) {} + + /** \brief The key */ + Key key; + + /** \brief The value. */ + Value value; +}; + +#ifdef AAPL_NAMESPACE +} +#endif + +/** + * \addtogroup bst + * @{ + */ + +/** + * \class BstMap + * \brief Binary search table for key and value pairs. + * + * BstMap stores key and value pairs in each element. The key and value can be + * any type. A compare class for the key must be supplied. + */ + +/*@}*/ + +#define BST_TEMPL_DECLARE class Key, class Value, \ + class Compare = CmpOrd, class Resize = ResizeExpn +#define BST_TEMPL_DEF class Key, class Value, class Compare, class Resize +#define BST_TEMPL_USE Key, Value, Compare, Resize +#define GET_KEY(el) ((el).key) +#define BstTable BstMap +#define Element BstMapEl +#define BSTMAP + +#include "bstcommon.h" + +#undef BST_TEMPL_DECLARE +#undef BST_TEMPL_DEF +#undef BST_TEMPL_USE +#undef GET_KEY +#undef BstTable +#undef Element +#undef BSTMAP + +/** + * \fn BstMap::insert(const Key &key, BstMapEl **lastFound) + * \brief Insert the given key. + * + * If the given key does not already exist in the table then a new element + * having key is inserted. They key copy constructor and value default + * constructor are used to place the pair in the table. If lastFound is given, + * it is set to the new entry created. If the insert fails then lastFound is + * set to the existing pair of the same key. + * + * \returns The new element created upon success, null upon failure. + */ + +/** + * \fn BstMap::insertMulti(const Key &key) + * \brief Insert the given key even if it exists already. + * + * If the key exists already then the new element having key is placed next + * to some other pair of the same key. InsertMulti cannot fail. The key copy + * constructor and the value default constructor are used to place the pair in + * the table. + * + * \returns The new element created. + */ + +#endif /* _AAPL_BSTMAP_H */ diff --git a/aapl/bstset.h b/aapl/bstset.h new file mode 100644 index 00000000..ce710ee0 --- /dev/null +++ b/aapl/bstset.h @@ -0,0 +1,86 @@ +/* + * Copyright 2002 Adrian Thurston + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_BSTSET_H +#define _AAPL_BSTSET_H + +/** + * \addtogroup bst + * @{ + */ + +/** + * \class BstSet + * \brief Binary search table for types that are the key. + * + * BstSet is suitable for types that comprise the entire key. Rather than look + * into the element to retrieve the key, the element is the key. A class that + * contains a comparison routine for the key must be given. + */ + +/*@}*/ + +#include "compare.h" +#include "vector.h" + +#define BST_TEMPL_DECLARE class Key, class Compare = CmpOrd, \ + class Resize = ResizeExpn +#define BST_TEMPL_DEF class Key, class Compare, class Resize +#define BST_TEMPL_USE Key, Compare, Resize +#define GET_KEY(el) (el) +#define BstTable BstSet +#define Element Key +#define BSTSET + +#include "bstcommon.h" + +#undef BST_TEMPL_DECLARE +#undef BST_TEMPL_DEF +#undef BST_TEMPL_USE +#undef GET_KEY +#undef BstTable +#undef Element +#undef BSTSET + +/** + * \fn BstSet::insert(const Key &key, Key **lastFound) + * \brief Insert the given key. + * + * If the given key does not already exist in the table then it is inserted. + * The key's copy constructor is used to place the item in the table. If + * lastFound is given, it is set to the new entry created. If the insert fails + * then lastFound is set to the existing key of the same value. + * + * \returns The new element created upon success, null upon failure. + */ + +/** + * \fn BstSet::insertMulti(const Key &key) + * \brief Insert the given key even if it exists already. + * + * If the key exists already then it is placed next to some other key of the + * same value. InsertMulti cannot fail. The key's copy constructor is used to + * place the item in the table. + * + * \returns The new element created. + */ + +#endif /* _AAPL_BSTSET_H */ diff --git a/aapl/bsttable.h b/aapl/bsttable.h new file mode 100644 index 00000000..9898ebff --- /dev/null +++ b/aapl/bsttable.h @@ -0,0 +1,84 @@ +/* + * Copyright 2002 Adrian Thurston + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_BSTTABLE_H +#define _AAPL_BSTTABLE_H + +#include "compare.h" +#include "vector.h" + +/** + * \addtogroup bst + * @{ + */ + +/** + * \class BstTable + * \brief Binary search table for structures that contain a key. + * + * This is the basic binary search table. It can be used to contain a + * structure that has a key and possibly some data. The key should be a member + * of the element class and accessible with getKey(). A class containing the + * compare routine must be supplied. + */ + +/*@}*/ + +#define BST_TEMPL_DECLARE class Element, class Key, \ + class Compare = CmpOrd, class Resize = ResizeExpn +#define BST_TEMPL_DEF class Element, class Key, class Compare, class Resize +#define BST_TEMPL_USE Element, Key, Compare, Resize +#define GET_KEY(el) ((el).getKey()) +#define BSTTABLE + +#include "bstcommon.h" + +#undef BST_TEMPL_DECLARE +#undef BST_TEMPL_DEF +#undef BST_TEMPL_USE +#undef GET_KEY +#undef BSTTABLE + +/** + * \fn BstTable::insert(const Key &key, Element **lastFound) + * \brief Insert a new element with the given key. + * + * If the given key does not already exist in the table a new element is + * inserted with the given key. A constructor taking only const Key& is used + * to initialize the new element. If lastFound is given, it is set to the new + * element created. If the insert fails then lastFound is set to the existing + * element with the same key. + * + * \returns The new element created upon success, null upon failure. + */ + +/** + * \fn BstTable::insertMulti(const Key &key) + * \brief Insert a new element even if the key exists already. + * + * If the key exists already then the new element is placed next to some + * element with the same key. InsertMulti cannot fail. A constructor taking + * only const Key& is used to initialize the new element. + * + * \returns The new element created. + */ + +#endif /* _AAPL_BSTTABLE_H */ diff --git a/aapl/bubblesort.h b/aapl/bubblesort.h new file mode 100644 index 00000000..20e0f6f4 --- /dev/null +++ b/aapl/bubblesort.h @@ -0,0 +1,94 @@ +/* + * Copyright 2002 Adrian Thurston + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_BUBBLESORT_H +#define _AAPL_BUBBLESORT_H + +#ifdef AAPL_NAMESPACE +namespace Aapl { +#endif + +/** + * \addtogroup sort + * @{ + */ + +/** + * \class BubbleSort + * \brief Bubble sort an array of data. + * + * BubbleSort can be used to sort any array of objects of type T provided a + * compare class is given. BubbleSort is in-place. It does not require any + * temporary storage. + * + * Objects are not made aware that they are being moved around in memory. + * Assignment operators, constructors and destructors are never invoked by the + * sort. + * + * BubbleSort runs in O(n^2) time. It is most useful when sorting arrays that + * are nearly sorted. It is best when neighbouring pairs are out of place. + * BubbleSort is a stable sort, meaning that objects with the same key have + * their relative ordering preserved. + */ + +/*@}*/ + +/* BubbleSort. */ +template class BubbleSort + : public Compare +{ +public: + /* Sorting interface routine. */ + void sort(T *data, long len); +}; + + +/** + * \brief Bubble sort an array of data. + */ +template void BubbleSort:: + sort(T *data, long len) +{ + bool changed = true; + for ( long pass = 1; changed && pass < len; pass ++ ) { + changed = false; + for ( long i = 0; i < len-pass; i++ ) { + /* Do we swap pos with the next one? */ + if ( compare( data[i], data[i+1] ) > 0 ) { + char tmp[sizeof(T)]; + + /* Swap the two items. */ + memcpy( tmp, data+i, sizeof(T) ); + memcpy( data+i, data+i+1, sizeof(T) ); + memcpy( data+i+1, tmp, sizeof(T) ); + + /* Note that we made a change. */ + changed = true; + } + } + } +} + +#ifdef AAPL_NAMESPACE +} +#endif + +#endif /* _AAPL_BUBBLESORT_H */ diff --git a/aapl/compare.h b/aapl/compare.h new file mode 100644 index 00000000..3d547b51 --- /dev/null +++ b/aapl/compare.h @@ -0,0 +1,273 @@ +/* + * Copyright 2001 Adrian Thurston + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_COMPARE_H +#define _AAPL_COMPARE_H + +#include +#include "astring.h" +#include "table.h" + +#ifdef AAPL_NAMESPACE +namespace Aapl { +#endif + +/** + * \defgroup compare Compare + * \brief Basic compare clases. + * + * Compare classes are used by data structures that need to know the relative + * ordering of elemets. To become a compare class, a class must imlement a + * routine long compare(const T &key1, const T &key2) that behaves just like + * strcmp. + * + * Compare classes are passed to the template data structure as a template + * parameter and are inherited. In most cases the compare routine will base + * the key comparision only on the two keys and the compare routine can + * therefore be static. Though sometimes it is useful to include data in the + * compare class and use this data in the comparison. For example the compare + * class may contain a pointer to some other data structure to which the + * comparison is delegated. + * + * @{ + */ + +/** + * \brief Compare two null terminated character sequences. + * + * This comparision class is a wrapper for strcmp. + */ +template struct CmpStrTmpl +{ + /** + * \brief Compare two null terminated string types. + */ + static inline long compare( const char *k1, const char *k2 ) + { return strcmp(k1, k2); } + + static int compare( const StrTmpl &s1, const StrTmpl &s2 ) + { + if ( s1.length() < s2.length() ) + return -1; + else if ( s1.length() > s2.length() ) + return 1; + else + return memcmp( s1.data, s2.data, s1.length() ); + } +}; + +typedef CmpStrTmpl CmpStr; + +/** + * \brief Compare a type for which < and > are implemented. + * + * CmpOrd is suitable for simple types such as integers and pointers that by + * default have the less-than and greater-than operators defined. + */ +template struct CmpOrd +{ + /** + * \brief Compare two ordinal types. + * + * This compare routine copies its arguements in by value. + */ + static inline long compare(const T k1, const T k2) + { + if (k1 < k2) + return -1; + else if (k1 > k2) + return 1; + else + return 0; + } +}; + +/** + * \brief Compare two tables of type T + * + * Table comparison is useful for keying a data structure on a vector or + * binary search table. T is the element type stored in the table. + * CompareT is the comparison structure used to compare the individual values + * in the table. + */ +template < class T, class CompareT = CmpOrd > struct CmpTable + : public CompareT +{ + /** + * \brief Compare two tables storing type T. + */ + static inline long compare(const Table &t1, const Table &t2) + { + if ( t1.tabLen < t2.tabLen ) + return -1; + else if ( t1.tabLen > t2.tabLen ) + return 1; + else + { + T *i1 = t1.data, *i2 = t2.data; + long len = t1.tabLen, cmpResult; + for ( long pos = 0; pos < len; + pos += 1, i1 += 1, i2 += 1 ) + { + cmpResult = CompareT::compare(*i1, *i2); + if ( cmpResult != 0 ) + return cmpResult; + } + return 0; + } + } +}; + +/** + * \brief Compare two tables of type T -- non-static version. + * + * CmpTableNs is identical to CmpTable, however the compare routine is + * non-static. If the CompareT class contains a non-static compare, then this + * version must be used because a static member cannot invoke a non-static + * member. + * + * Table comparison is useful for keying a data structure on a vector or binary + * search table. T is the element type stored in the table. CompareT + * is the comparison structure used to compare the individual values in the + * table. + */ +template < class T, class CompareT = CmpOrd > struct CmpTableNs + : public CompareT +{ + /** + * \brief Compare two tables storing type T. + */ + inline long compare(const Table &t1, const Table &t2) + { + if ( t1.tabLen < t2.tabLen ) + return -1; + else if ( t1.tabLen > t2.tabLen ) + return 1; + else + { + T *i1 = t1.data, *i2 = t2.data; + long len = t1.tabLen, cmpResult; + for ( long pos = 0; pos < len; + pos += 1, i1 += 1, i2 += 1 ) + { + cmpResult = CompareT::compare(*i1, *i2); + if ( cmpResult != 0 ) + return cmpResult; + } + return 0; + } + } +}; + +/** + * \brief Compare two implicitly shared tables of type T + * + * This table comparison is for data structures based on implicitly + * shared tables. + * + * Table comparison is useful for keying a data structure on a vector or + * binary search table. T is the element type stored in the table. + * CompareT is the comparison structure used to compare the individual values + * in the table. + */ +template < class T, class CompareT = CmpOrd > struct CmpSTable : public CompareT +{ + /** + * \brief Compare two tables storing type T. + */ + static inline long compare(const STable &t1, const STable &t2) + { + long t1Length = t1.length(); + long t2Length = t2.length(); + + /* Compare lengths. */ + if ( t1Length < t2Length ) + return -1; + else if ( t1Length > t2Length ) + return 1; + else { + /* Compare the table data. */ + T *i1 = t1.data, *i2 = t2.data; + for ( long pos = 0; pos < t1Length; + pos += 1, i1 += 1, i2 += 1 ) + { + long cmpResult = CompareT::compare(*i1, *i2); + if ( cmpResult != 0 ) + return cmpResult; + } + return 0; + } + } +}; + +/** + * \brief Compare two implicitly shared tables of type T -- non-static + * version. + * + * This is a non-static table comparison for data structures based on + * implicitly shared tables. If the CompareT class contains a non-static + * compare, then this version must be used because a static member cannot + * invoke a non-static member. + * + * Table comparison is useful for keying a data structure on a vector or + * binary search table. T is the element type stored in the table. + * CompareT is the comparison structure used to compare the individual values + * in the table. + */ +template < class T, class CompareT = CmpOrd > struct CmpSTableNs + : public CompareT +{ + /** + * \brief Compare two tables storing type T. + */ + inline long compare(const STable &t1, const STable &t2) + { + long t1Length = t1.length(); + long t2Length = t2.length(); + + /* Compare lengths. */ + if ( t1Length < t2Length ) + return -1; + else if ( t1Length > t2Length ) + return 1; + else { + /* Compare the table data. */ + T *i1 = t1.data, *i2 = t2.data; + for ( long pos = 0; pos < t1Length; + pos += 1, i1 += 1, i2 += 1 ) + { + long cmpResult = CompareT::compare(*i1, *i2); + if ( cmpResult != 0 ) + return cmpResult; + } + return 0; + } + } +}; + + +/*@}*/ + +#ifdef AAPL_NAMESPACE +} +#endif + +#endif /* _AAPL_COMPARE_H */ diff --git a/aapl/dlcommon.h b/aapl/dlcommon.h new file mode 100644 index 00000000..5ce9bd30 --- /dev/null +++ b/aapl/dlcommon.h @@ -0,0 +1,790 @@ +/* + * Copyright 2001 Adrian Thurston + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* This header is not wrapped in ifndef becuase it is not intended to + * be included by the user. */ + +#ifdef AAPL_NAMESPACE +namespace Aapl { +#endif + +#if defined( DOUBLELIST_VALUE ) +/** + * \brief Double list element for DListVal. + * + * DListValEl stores the type T of DListVal by value. + */ +template struct DListValEl +{ + /** + * \brief Construct a DListValEl with a given value. + * + * The only constructor available initializes the value element. This + * enforces that DListVal elements are never created without having their + * value intialzed by the user. T's copy constructor is used to copy the + * value in. + */ + DListValEl( const T &val ) : value(val) { } + + /** + * \brief Value stored by the list element. + * + * Value is always copied into new list elements using the copy + * constructor. + */ + T value; + + /** + * \brief List previous pointer. + * + * Points to the previous item in the list. If this is the first item in + * the list, then prev is NULL. If this element is not in a list then + * prev is undefined. + */ + DListValEl *prev; + + /** + * \brief List next pointer. + * + * Points to the next item in the list. If this is the list item in the + * list, then next is NULL. If this element is not in a list then next is + * undefined. + */ + DListValEl *next; +}; +#else + +#ifndef __AAPL_DOUBLE_LIST_EL +#define __AAPL_DOUBLE_LIST_EL +/** + * \brief Double list element properties. + * + * This class can be inherited to make a class suitable to be a double list + * element. It simply provides the next and previous pointers. An alternative + * is to put the next and previous pointers in the class directly. + */ +template struct DListEl +{ + /** + * \brief List previous pointer. + * + * Points to the previous item in the list. If this is the first item in + * the list, then prev is NULL. If this element is not in a list then + * prev is undefined. + */ + Element *prev; + + /** + * \brief List next pointer. + * + * Points to the next item in the list. If this is the list item in the + * list, then next is NULL. If this element is not in a list then next is + * undefined. + */ + Element *next; +}; +#endif /* __AAPL_DOUBLE_LIST_EL */ + +#endif + +/* Doubly Linked List */ +template class DList +{ +public: + /** \brief Initialize an empty list. */ + DList() : head(0), tail(0), listLen(0) {} + + /** + * \brief Perform a deep copy of the list. + * + * The elements of the other list are duplicated and put into this list. + * Elements are copied using the copy constructor. + */ + DList(const DList &other); + +#ifdef DOUBLELIST_VALUE + /** + * \brief Clear the double list contents. + * + * All elements are deleted. + */ + ~DList() { empty(); } + + /** + * \brief Assign another list into this list using a deep copy. + * + * The elements of the other list are duplicated and put into this list. + * Each list item is created using the copy constructor. If this list + * contains any elements before the copy, they are deleted first. + * + * \returns A reference to this. + */ + DList &operator=(const DList &other); + + /** + * \brief Transfer the contents of another list into this list. + * + * The elements of the other list moved in. The other list will be empty + * afterwards. If this list contains any elements before the copy, then + * they are deleted. + */ + void transfer(DList &other); +#else + /** + * \brief Abandon all elements in the list. + * + * List elements are not deleted. + */ + ~DList() {} + + /** + * \brief Perform a deep copy of the list. + * + * The elements of the other list are duplicated and put into this list. + * Each list item is created using the copy constructor. If this list + * contains any elements before the copy, they are abandoned. + * + * \returns A reference to this. + */ + DList &operator=(const DList &other); + + /** + * \brief Transfer the contents of another list into this list. + * + * The elements of the other list moved in. The other list will be empty + * afterwards. If this list contains any elements before the copy, they + * are abandoned. + */ + void transfer(DList &other); +#endif + + +#ifdef DOUBLELIST_VALUE + /** + * \brief Make a new element and prepend it to the front of the list. + * + * The item is copied into the new element using the copy constructor. + * Equivalent to list.addBefore(list.head, item). + */ + void prepend(const T &item); + + /** + * \brief Make a new element and append it to the end of the list. + * + * The item is copied into the new element using the copy constructor. + * Equivalent to list.addAfter(list.tail, item). + */ + void append(const T &item); + + /** + * \brief Make a new element and insert it immediately after an element in + * the list. + * + * The item is copied into the new element using the copy constructor. If + * prev_el is NULL then the new element is prepended to the front of the + * list. If prev_el is not already in the list then undefined behaviour + * results. Equivalent to list.addAfter(prev_el, new DListValEl(item)). + */ + void addAfter(Element *prev_el, const T &item); + + /** + * \brief Make a new element and insert it immediately before an element + * in the list. + * + * The item is copied into the new element using the copy construcotor. If + * next_el is NULL then the new element is appended to the end of the + * list. If next_el is not already in the list then undefined behaviour + * results. Equivalent to list.addBefore(next_el, new DListValEl(item)). + */ + void addBefore(Element *next_el, const T &item); +#endif + + /** + * \brief Prepend a single element to the front of the list. + * + * If new_el is already an element of some list, then undefined behaviour + * results. Equivalent to list.addBefore(list.head, new_el). + */ + void prepend(Element *new_el) { addBefore(head, new_el); } + + /** + * \brief Append a single element to the end of the list. + * + * If new_el is alreay an element of some list, then undefined behaviour + * results. Equivalent to list.addAfter(list.tail, new_el). + */ + void append(Element *new_el) { addAfter(tail, new_el); } + + /** + * \brief Prepend an entire list to the beginning of this list. + * + * All items are moved, not copied. Afterwards, the other list is emtpy. + * All items are prepended at once, so this is an O(1) operation. + * Equivalent to list.addBefore(list.head, dl). + */ + void prepend(DList &dl) { addBefore(head, dl); } + + /** + * \brief Append an entire list to the end of the list. + * + * All items are moved, not copied. Afterwards, the other list is empty. + * All items are appened at once, so this is an O(1) operation. + * Equivalent to list.addAfter(list.tail, dl). + */ + void append(DList &dl) { addAfter(tail, dl); } + + void addAfter(Element *prev_el, Element *new_el); + void addBefore(Element *next_el, Element *new_el); + + void addAfter(Element *prev_el, DList &dl); + void addBefore(Element *next_el, DList &dl); + + /** + * \brief Detach the head of the list + * + * The element detached is not deleted. If there is no head of the list + * (the list is empty) then undefined behaviour results. Equivalent to + * list.detach(list.head). + * + * \returns The element detached. + */ + Element *detachFirst() { return detach(head); } + + /** + * \brief Detach the tail of the list + * + * The element detached is not deleted. If there is no tail of the list + * (the list is empty) then undefined behaviour results. Equivalent to + * list.detach(list.tail). + * + * \returns The element detached. + */ + Element *detachLast() { return detach(tail); } + + /* Detaches an element from the list. Does not free any memory. */ + Element *detach(Element *el); + + /** + * \brief Detach and delete the first element in the list. + * + * If there is no first element (the list is empty) then undefined + * behaviour results. Equivalent to delete list.detach(list.head); + */ + void removeFirst() { delete detach( head ); } + + /** + * \brief Detach and delete the last element in the list. + * + * If there is no last element (the list is emtpy) then undefined + * behaviour results. Equivalent to delete list.detach(list.tail); + */ + void removeLast() { delete detach( tail ); } + + /** + * \brief Detach and delete an element from the list. + * + * If the element is not in the list, then undefined behaviour results. + * Equivalent to delete list.detach(el); + */ + void remove(Element *el) { delete detach( el ); } + + void empty(); + void abandon(); + + /** \brief The number of elements in the list. */ + long length() const { return listLen; } + + /** \brief Head and tail of the linked list. */ + Element *head, *tail; + + /** \brief The number of element in the list. */ + long listLen; + + /* Convenience access. */ + long size() const { return listLen; } + + /* Forward this so a ref can be used. */ + struct Iter; + + /* Class for setting the iterator. */ + struct IterFirst { IterFirst( const DList &l ) : l(l) { } const DList &l; }; + struct IterLast { IterLast( const DList &l ) : l(l) { } const DList &l; }; + struct IterNext { IterNext( const Iter &i ) : i(i) { } const Iter &i; }; + struct IterPrev { IterPrev( const Iter &i ) : i(i) { } const Iter &i; }; + + /** + * \brief Double List Iterator. + * \ingroup iterators + */ + struct Iter + { + /* Default construct. */ + Iter() : ptr(0) { } + + /* Construct from a double list. */ + Iter( const DList &dl ) : ptr(dl.head) { } + Iter( Element *el ) : ptr(el) { } + Iter( const IterFirst &dlf ) : ptr(dlf.l.head) { } + Iter( const IterLast &dll ) : ptr(dll.l.tail) { } + Iter( const IterNext &dln ) : ptr(dln.i.ptr->BASE_EL(next)) { } + Iter( const IterPrev &dlp ) : ptr(dlp.i.ptr->BASE_EL(prev)) { } + + /* Assign from a double list. */ + Iter &operator=( const DList &dl ) { ptr = dl.head; return *this; } + Iter &operator=( Element *el ) { ptr = el; return *this; } + Iter &operator=( const IterFirst &af ) { ptr = af.l.head; return *this; } + Iter &operator=( const IterLast &al ) { ptr = al.l.tail; return *this; } + Iter &operator=( const IterNext &an ) { ptr = an.i.ptr->BASE_EL(next); return *this; } + Iter &operator=( const IterPrev &ap ) { ptr = ap.i.ptr->BASE_EL(prev); return *this; } + + /** \brief Less than end? */ + bool lte() const { return ptr != 0; } + + /** \brief At end? */ + bool end() const { return ptr == 0; } + + /** \brief Greater than beginning? */ + bool gtb() const { return ptr != 0; } + + /** \brief At beginning? */ + bool beg() const { return ptr == 0; } + + /** \brief At first element? */ + bool first() const { return ptr && ptr->BASE_EL(prev) == 0; } + + /** \brief At last element? */ + bool last() const { return ptr && ptr->BASE_EL(next) == 0; } + + /** \brief Implicit cast to Element*. */ + operator Element*() const { return ptr; } + + /** \brief Dereference operator returns Element&. */ + Element &operator *() const { return *ptr; } + + /** \brief Arrow operator returns Element*. */ + Element *operator->() const { return ptr; } + + /** \brief Move to next item. */ + inline Element *operator++() { return ptr = ptr->BASE_EL(next); } + + /** \brief Move to next item. */ + inline Element *increment() { return ptr = ptr->BASE_EL(next); } + + /** \brief Move to next item. */ + inline Element *operator++(int); + + /** \brief Move to previous item. */ + inline Element *operator--() { return ptr = ptr->BASE_EL(prev); } + + /** \brief Move to previous item. */ + inline Element *decrement() { return ptr = ptr->BASE_EL(prev); } + + /** \brief Move to previous item. */ + inline Element *operator--(int); + + /** \brief Return the next item. Does not modify this. */ + inline IterNext next() const { return IterNext(*this); } + + /** \brief Return the prev item. Does not modify this. */ + inline IterPrev prev() const { return IterPrev(*this); } + + /** \brief The iterator is simply a pointer. */ + Element *ptr; + }; + + /** \brief Return first element. */ + IterFirst first() { return IterFirst(*this); } + + /** \brief Return last element. */ + IterLast last() { return IterLast(*this); } +}; + +/* Copy constructor, does a deep copy of other. */ +template DList:: + DList(const DList &other) : + head(0), tail(0), listLen(0) +{ + Element *el = other.head; + while( el != 0 ) { + append( new Element(*el) ); + el = el->BASE_EL(next); + } +} + +#ifdef DOUBLELIST_VALUE + +/* Assignement operator does deep copy. */ +template DList &DList:: + operator=(const DList &other) +{ + /* Free the old list. The value list assumes items were allocated on the + * heap by itself. */ + empty(); + + Element *el = other.head; + while( el != 0 ) { + append( new Element(*el) ); + el = el->BASE_EL(next); + } + return *this; +} + +template void DList:: + transfer(DList &other) +{ + /* Free the old list. The value list assumes items were allocated on the + * heap by itself. */ + empty(); + + head = other.head; + tail = other.tail; + listLen = other.listLen; + + other.abandon(); +} + +#else + +/* Assignement operator does deep copy. */ +template DList &DList:: + operator=(const DList &other) +{ + Element *el = other.head; + while( el != 0 ) { + append( new Element(*el) ); + el = el->BASE_EL(next); + } + return *this; +} + +template void DList:: + transfer(DList &other) +{ + head = other.head; + tail = other.tail; + listLen = other.listLen; + + other.abandon(); +} + +#endif + +#ifdef DOUBLELIST_VALUE + +/* Prepend a new item. Inlining this bloats the caller with new overhead. */ +template void DList:: + prepend(const T &item) +{ + addBefore(head, new Element(item)); +} + +/* Append a new item. Inlining this bloats the caller with the new overhead. */ +template void DList:: + append(const T &item) +{ + addAfter(tail, new Element(item)); +} + +/* Add a new item after a prev element. Inlining this bloats the caller with + * the new overhead. */ +template void DList:: + addAfter(Element *prev_el, const T &item) +{ + addAfter(prev_el, new Element(item)); +} + +/* Add a new item before a next element. Inlining this bloats the caller with + * the new overhead. */ +template void DList:: + addBefore(Element *next_el, const T &item) +{ + addBefore(next_el, new Element(item)); +} + +#endif + +/* + * The larger iterator operators. + */ + +/* Postfix ++ */ +template Element *DList::Iter:: + operator++(int) +{ + Element *rtn = ptr; + ptr = ptr->BASE_EL(next); + return rtn; +} + +/* Postfix -- */ +template Element *DList::Iter:: + operator--(int) +{ + Element *rtn = ptr; + ptr = ptr->BASE_EL(prev); + return rtn; +} + +/** + * \brief Insert an element immediately after an element in the list. + * + * If prev_el is NULL then new_el is prepended to the front of the list. If + * prev_el is not in the list or if new_el is already in a list, then + * undefined behaviour results. + */ +template void DList:: + addAfter(Element *prev_el, Element *new_el) +{ + /* Set the previous pointer of new_el to prev_el. We do + * this regardless of the state of the list. */ + new_el->BASE_EL(prev) = prev_el; + + /* Set forward pointers. */ + if (prev_el == 0) { + /* There was no prev_el, we are inserting at the head. */ + new_el->BASE_EL(next) = head; + head = new_el; + } + else { + /* There was a prev_el, we can access previous next. */ + new_el->BASE_EL(next) = prev_el->BASE_EL(next); + prev_el->BASE_EL(next) = new_el; + } + + /* Set reverse pointers. */ + if (new_el->BASE_EL(next) == 0) { + /* There is no next element. Set the tail pointer. */ + tail = new_el; + } + else { + /* There is a next element. Set it's prev pointer. */ + new_el->BASE_EL(next)->BASE_EL(prev) = new_el; + } + + /* Update list length. */ + listLen++; +} + +/** + * \brief Insert an element immediatly before an element in the list. + * + * If next_el is NULL then new_el is appended to the end of the list. If + * next_el is not in the list or if new_el is already in a list, then + * undefined behaviour results. + */ +template void DList:: + addBefore(Element *next_el, Element *new_el) +{ + /* Set the next pointer of the new element to next_el. We do + * this regardless of the state of the list. */ + new_el->BASE_EL(next) = next_el; + + /* Set reverse pointers. */ + if (next_el == 0) { + /* There is no next elememnt. We are inserting at the tail. */ + new_el->BASE_EL(prev) = tail; + tail = new_el; + } + else { + /* There is a next element and we can access next's previous. */ + new_el->BASE_EL(prev) = next_el->BASE_EL(prev); + next_el->BASE_EL(prev) = new_el; + } + + /* Set forward pointers. */ + if (new_el->BASE_EL(prev) == 0) { + /* There is no previous element. Set the head pointer.*/ + head = new_el; + } + else { + /* There is a previous element, set it's next pointer to new_el. */ + new_el->BASE_EL(prev)->BASE_EL(next) = new_el; + } + + /* Update list length. */ + listLen++; +} + +/** + * \brief Insert an entire list immediatly after an element in this list. + * + * Elements are moved, not copied. Afterwards, the other list is empty. If + * prev_el is NULL then the elements are prepended to the front of the list. + * If prev_el is not in the list then undefined behaviour results. All + * elements are inserted into the list at once, so this is an O(1) operation. + */ +template void DList:: + addAfter( Element *prev_el, DList &dl ) +{ + /* Do not bother if dl has no elements. */ + if ( dl.listLen == 0 ) + return; + + /* Set the previous pointer of dl.head to prev_el. We do + * this regardless of the state of the list. */ + dl.head->BASE_EL(prev) = prev_el; + + /* Set forward pointers. */ + if (prev_el == 0) { + /* There was no prev_el, we are inserting at the head. */ + dl.tail->BASE_EL(next) = head; + head = dl.head; + } + else { + /* There was a prev_el, we can access previous next. */ + dl.tail->BASE_EL(next) = prev_el->BASE_EL(next); + prev_el->BASE_EL(next) = dl.head; + } + + /* Set reverse pointers. */ + if (dl.tail->BASE_EL(next) == 0) { + /* There is no next element. Set the tail pointer. */ + tail = dl.tail; + } + else { + /* There is a next element. Set it's prev pointer. */ + dl.tail->BASE_EL(next)->BASE_EL(prev) = dl.tail; + } + + /* Update the list length. */ + listLen += dl.listLen; + + /* Empty out dl. */ + dl.head = dl.tail = 0; + dl.listLen = 0; +} + +/** + * \brief Insert an entire list immediately before an element in this list. + * + * Elements are moved, not copied. Afterwards, the other list is empty. If + * next_el is NULL then the elements are appended to the end of the list. If + * next_el is not in the list then undefined behaviour results. All elements + * are inserted at once, so this is an O(1) operation. + */ +template void DList:: + addBefore( Element *next_el, DList &dl ) +{ + /* Do not bother if dl has no elements. */ + if ( dl.listLen == 0 ) + return; + + /* Set the next pointer of dl.tail to next_el. We do + * this regardless of the state of the list. */ + dl.tail->BASE_EL(next) = next_el; + + /* Set reverse pointers. */ + if (next_el == 0) { + /* There is no next elememnt. We are inserting at the tail. */ + dl.head->BASE_EL(prev) = tail; + tail = dl.tail; + } + else { + /* There is a next element and we can access next's previous. */ + dl.head->BASE_EL(prev) = next_el->BASE_EL(prev); + next_el->BASE_EL(prev) = dl.tail; + } + + /* Set forward pointers. */ + if (dl.head->BASE_EL(prev) == 0) { + /* There is no previous element. Set the head pointer.*/ + head = dl.head; + } + else { + /* There is a previous element, set it's next pointer to new_el. */ + dl.head->BASE_EL(prev)->BASE_EL(next) = dl.head; + } + + /* Update list length. */ + listLen += dl.listLen; + + /* Empty out dl. */ + dl.head = dl.tail = 0; + dl.listLen = 0; +} + + +/** + * \brief Detach an element from the list. + * + * The element is not deleted. If the element is not in the list, then + * undefined behaviour results. + * + * \returns The element detached. + */ +template Element *DList:: + detach(Element *el) +{ + /* Set forward pointers to skip over el. */ + if (el->BASE_EL(prev) == 0) + head = el->BASE_EL(next); + else { + el->BASE_EL(prev)->BASE_EL(next) = + el->BASE_EL(next); + } + + /* Set reverse pointers to skip over el. */ + if (el->BASE_EL(next) == 0) + tail = el->BASE_EL(prev); + else { + el->BASE_EL(next)->BASE_EL(prev) = + el->BASE_EL(prev); + } + + /* Update List length and return element we detached. */ + listLen--; + return el; +} + +/** + * \brief Clear the list by deleting all elements. + * + * Each item in the list is deleted. The list is reset to its initial state. + */ +template void DList::empty() +{ + Element *nextToGo = 0, *cur = head; + + while (cur != 0) + { + nextToGo = cur->BASE_EL(next); + delete cur; + cur = nextToGo; + } + head = tail = 0; + listLen = 0; +} + +/** + * \brief Clear the list by forgetting all elements. + * + * All elements are abandoned, not deleted. The list is reset to it's initial + * state. + */ +template void DList::abandon() +{ + head = tail = 0; + listLen = 0; +} + +#ifdef AAPL_NAMESPACE +} +#endif diff --git a/aapl/dlist.h b/aapl/dlist.h new file mode 100644 index 00000000..eaf3e5d5 --- /dev/null +++ b/aapl/dlist.h @@ -0,0 +1,64 @@ +/* + * Copyright 2001 Adrian Thurston + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_DLIST_H +#define _AAPL_DLIST_H + +#define BASE_EL(name) name +#define DLMEL_TEMPDEF class Element +#define DLMEL_TEMPUSE Element +#define DList DList + +/** + * \addtogroup dlist + * @{ + */ + +/** + * \class DList + * \brief Basic doubly linked list. + * + * DList is the standard by-structure list type. This class requires the + * programmer to declare a list element type that has the necessary next and + * previous pointers in it. This can be achieved by inheriting from the + * DListEl class or by simply adding next and previous pointers directly into + * the list element class. + * + * DList does not assume ownership of elements in the list. If the elements + * are known to reside on the heap, the provided empty() routine can be used to + * delete all elements, however the destructor will not call this routine, it + * will simply abandon all the elements. It is up to the programmer to + * explicitly de-allocate items when necessary. + * + * \include ex_dlist.cpp + */ + +/*@}*/ + +#include "dlcommon.h" + +#undef BASE_EL +#undef DLMEL_TEMPDEF +#undef DLMEL_TEMPUSE +#undef DList + +#endif /* _AAPL_DLIST_H */ + diff --git a/aapl/dlistmel.h b/aapl/dlistmel.h new file mode 100644 index 00000000..34331393 --- /dev/null +++ b/aapl/dlistmel.h @@ -0,0 +1,71 @@ +/* + * Copyright 2001 Adrian Thurston + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_DLISTMEL_H +#define _AAPL_DLISTMEL_H + +/** + * \addtogroup dlist + * @{ + */ + +/** + * \class DListMel + * \brief Doubly linked list for elements that may appear in multiple lists. + * + * This class is similar to DList, except that the user defined list element + * can inherit from multple DListEl classes and consequently be an element in + * multiple lists. In other words, DListMel allows a single instance of a data + * structure to be an element in multiple lists without the lists interfereing + * with one another. + * + * For each list that an element class is to appear in, the element must have + * unique next and previous pointers that can be unambiguously refered to with + * some base class name. This name is given to DListMel as a template argument + * so it can use the correct next and previous pointers in its list + * operations. + * + * DListMel does not assume ownership of elements in the list. If the elements + * are known to reside on the heap and are not contained in any other list or + * data structure, the provided empty() routine can be used to delete all + * elements, however the destructor will not call this routine, it will simply + * abandon all the elements. It is up to the programmer to explicitly + * de-allocate items when it is safe to do so. + * + * \include ex_dlistmel.cpp + */ + +/*@}*/ + +#define BASE_EL(name) BaseEl::name +#define DLMEL_TEMPDEF class Element, class BaseEl +#define DLMEL_TEMPUSE Element, BaseEl +#define DList DListMel + +#include "dlcommon.h" + +#undef BASE_EL +#undef DLMEL_TEMPDEF +#undef DLMEL_TEMPUSE +#undef DList + +#endif /* _AAPL_DLISTMEL_H */ + diff --git a/aapl/dlistval.h b/aapl/dlistval.h new file mode 100644 index 00000000..6f249995 --- /dev/null +++ b/aapl/dlistval.h @@ -0,0 +1,71 @@ +/* + * Copyright 2002 Adrian Thurston + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_DLISTVAL_H +#define _AAPL_DLISTVAL_H + +/** + * \addtogroup dlist + * @{ + */ + +/** + * \class DListVal + * \brief By-value doubly linked list. + * + * This class is a doubly linked list that does not require a list element + * type to be declared. The user instead gives a type that is to be stored in + * the list element. When inserting a new data item, the value is copied into + * a newly allocated element. This list is inteded to behave and be utilized + * like the list template found in the STL. + * + * DListVal is different from the other lists in that it allocates elements + * itself. The raw element insert interface is still exposed for convenience, + * however, the list assumes all elements in the list are allocated on the + * heap and are to be managed by the list. The destructor WILL delete the + * contents of the list. If the list is ever copied in from another list, the + * existing contents are deleted first. This is in contrast to DList and + * DListMel, which will never delete their contents to allow for statically + * allocated elements. + * + * \include ex_dlistval.cpp + */ + +/*@}*/ + +#define BASE_EL(name) name +#define DLMEL_TEMPDEF class T +#define DLMEL_TEMPUSE T +#define DList DListVal +#define Element DListValEl +#define DOUBLELIST_VALUE + +#include "dlcommon.h" + +#undef BASE_EL +#undef DLMEL_TEMPDEF +#undef DLMEL_TEMPUSE +#undef DList +#undef Element +#undef DOUBLELIST_VALUE + +#endif /* _AAPL_DLISTVAL_H */ + diff --git a/aapl/insertsort.h b/aapl/insertsort.h new file mode 100644 index 00000000..eb3e2649 --- /dev/null +++ b/aapl/insertsort.h @@ -0,0 +1,94 @@ +/* + * Copyright 2002 Adrian Thurston + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_INSERTSORT_H +#define _AAPL_INSERTSORT_H + +#ifdef AAPL_NAMESPACE +namespace Aapl { +#endif + +/** + * \addtogroup sort + * @{ + */ + +/** + * \class InsertSort + * \brief Insertion sort an array of data. + * + * InsertSort can be used to sort any array of objects of type T provided a + * compare class is given. InsertSort is in-place. It does not require any + * temporary storage. + * + * Objects are not made aware that they are being moved around in memory. + * Assignment operators, constructors and destructors are never invoked by the + * sort. + * + * InsertSort runs in O(n^2) time. It is most useful when sorting small arrays. + * where it can outperform the O(n*log(n)) sorters due to its simplicity. + * InsertSort is a not a stable sort. Elements with the same key will not have + * their relative ordering preserved. + */ + +/*@}*/ + +/* InsertSort. */ +template class InsertSort + : public Compare +{ +public: + /* Sorting interface routine. */ + void sort(T *data, long len); +}; + + +/** + * \brief Insertion sort an array of data. + */ +template + void InsertSort::sort(T *data, long len) +{ + /* For each next largest spot in the sorted array... */ + for ( T *dest = data; dest < data+len-1; dest++ ) { + /* Find the next smallest element in the unsorted array. */ + T *smallest = dest; + for ( T *src = dest+1; src < data+len; src++ ) { + /* If src is smaller than the current src, then use it. */ + if ( compare( *src, *smallest ) < 0 ) + smallest = src; + } + + if ( smallest != dest ) { + /* Swap dest, smallest. */ + char tmp[sizeof(T)]; + memcpy( tmp, dest, sizeof(T) ); + memcpy( dest, smallest, sizeof(T) ); + memcpy( smallest, tmp, sizeof(T) ); + } + } +} + +#ifdef AAPL_NAMESPACE +} +#endif + +#endif /* _AAPL_INSERTSORT_H */ diff --git a/aapl/mergesort.h b/aapl/mergesort.h new file mode 100644 index 00000000..d017511f --- /dev/null +++ b/aapl/mergesort.h @@ -0,0 +1,140 @@ +/* + * Copyright 2001, 2002 Adrian Thurston + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_MERGESORT_H +#define _AAPL_MERGESORT_H + +#include "bubblesort.h" + +#ifdef AAPL_NAMESPACE +namespace Aapl { +#endif + +/** + * \addtogroup sort + * @{ + */ + +/** + * \class MergeSort + * \brief Merge sort an array of data. + * + * MergeSort can be used to sort any array of objects of type T provided a + * compare class is given. MergeSort is not in-place, it requires temporary + * storage equal to the size of the array. The temporary storage is allocated + * on the heap. + * + * Objects are not made aware that they are being moved around in memory. + * Assignment operators, constructors and destructors are never invoked by the + * sort. + * + * MergeSort runs in worst case O(n*log(n)) time. In most cases it is slower + * than QuickSort because more copying is neccessary. But on the other hand, + * it is a stable sort, meaning that objects with the same key have their + * relative ordering preserved. Also, its worst case is better. MergeSort + * switches to a BubbleSort when the size of the array being sorted is small. + * This happens when directly sorting a small array or when MergeSort calls + * itself recursively on a small portion of a larger array. + */ + +/*@}*/ + + +/* MergeSort. */ +template class MergeSort + : public BubbleSort +{ +public: + /* Sorting interface routine. */ + void sort(T *data, long len); + +private: + /* Recursive worker. */ + void doSort(T *tmpStor, T *data, long len); +}; + +#define _MS_BUBBLE_THRESH 16 + +/* Recursive mergesort worker. Split data, make recursive calls, merge + * results. */ +template< class T, class Compare> void MergeSort:: + doSort(T *tmpStor, T *data, long len) +{ + if ( len <= 1 ) + return; + + if ( len <= _MS_BUBBLE_THRESH ) { + BubbleSort::sort( data, len ); + return; + } + + long mid = len / 2; + + doSort( tmpStor, data, mid ); + doSort( tmpStor + mid, data + mid, len - mid ); + + /* Merge the data. */ + T *endLower = data + mid, *lower = data; + T *endUpper = data + len, *upper = data + mid; + T *dest = tmpStor; + while ( true ) { + if ( lower == endLower ) { + /* Possibly upper left. */ + if ( upper != endUpper ) + memcpy( dest, upper, (endUpper - upper) * sizeof(T) ); + break; + } + else if ( upper == endUpper ) { + /* Only lower left. */ + if ( lower != endLower ) + memcpy( dest, lower, (endLower - lower) * sizeof(T) ); + break; + } + else { + /* Both upper and lower left. */ + if ( compare(*lower, *upper) <= 0 ) + memcpy( dest++, lower++, sizeof(T) ); + else + memcpy( dest++, upper++, sizeof(T) ); + } + } + + /* Copy back from the tmpStor array. */ + memcpy( data, tmpStor, sizeof( T ) * len ); +} + +/** + * \brief Merge sort an array of data. + */ +template< class T, class Compare> + void MergeSort::sort(T *data, long len) +{ + /* Allocate the tmp space needed by merge sort, sort and free. */ + T *tmpStor = (T*) new char[sizeof(T) * len]; + doSort( tmpStor, data, len ); + delete[] (char*) tmpStor; +} + +#ifdef AAPL_NAMESPACE +} +#endif + +#endif /* _AAPL_MERGESORT_H */ diff --git a/aapl/quicksort.h b/aapl/quicksort.h new file mode 100644 index 00000000..9bb96efd --- /dev/null +++ b/aapl/quicksort.h @@ -0,0 +1,185 @@ +/* + * Copyright 2002 Adrian Thurston + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_QUICKSORT_H +#define _AAPL_QUICKSORT_H + +#include "insertsort.h" + +#ifdef AAPL_NAMESPACE +namespace Aapl { +#endif + +/** + * \addtogroup sort + * @{ + */ + +/** + * \class QuickSort + * \brief Quick sort an array of data. + * + * QuickSort can be used to sort any array of objects of type T provided a + * compare class is given. QuickSort is in-place. It does not require any + * temporary storage. + * + * Objects are not made aware that they are being moved around in memory. + * Assignment operators, constructors and destructors are never invoked by the + * sort. + * + * QuickSort runs in O(n*log(n)) time in the average case. It is faster than + * mergsort in the average case because it does less moving of data. The + * performance of quicksort depends mostly on the choice of pivot. This + * implementation picks the pivot as the median of first, middle, last. This + * choice of pivot avoids the O(n^2) worst case for input already sorted, but + * it is still possible to encounter the O(n^2) worst case. For example an + * array of identical elements will run in O(n^2) + * + * QuickSort is not a stable sort. Elements with the same key will not have + * their relative ordering preserved. QuickSort switches to an InsertSort + * when the size of the array being sorted is small. This happens when + * directly sorting a small array or when QuickSort calls iteself recursively + * on a small portion of a larger array. + */ + +/*@}*/ + +/* QuickSort. */ +template class QuickSort : + public InsertSort +{ +public: + /* Sorting interface routine. */ + void sort(T *data, long len); + +private: + /* Recursive worker. */ + void doSort(T *start, T *end); + T *partition(T *start, T *end); + inline T *median(T *start, T *end); +}; + +#define _QS_INSERTION_THRESH 16 + +/* Finds the median of start, middle, end. */ +template T *QuickSort:: + median(T *start, T *end) +{ + T *pivot, *mid = start + (end-start)/2; + + /* CChoose the pivot. */ + if ( compare(*start, *mid) < 0 ) { + if ( compare(*mid, *end) < 0 ) + pivot = mid; + else if ( compare(*start, *end) < 0 ) + pivot = end; + else + pivot = start; + } + else if ( compare(*start, *end) < 0 ) + pivot = start; + else if ( compare(*mid, *end) < 0 ) + pivot = end; + else + pivot = mid; + + return pivot; +} + +template T *QuickSort:: + partition(T *start, T *end) +{ + /* Use the median of start, middle, end as the pivot. First save + * it off then move the last element to the free spot. */ + char pcPivot[sizeof(T)]; + T *pivot = median(start, end); + + memcpy( pcPivot, pivot, sizeof(T) ); + if ( pivot != end ) + memcpy( pivot, end, sizeof(T) ); + + T *first = start-1; + T *last = end; + pivot = (T*) pcPivot; + + /* Shuffle element to the correct side of the pivot, ending + * up with the free spot where the pivot will go. */ + while ( true ) { + /* Throw one element ahead to the free spot at last. */ + while ( true ) { + first += 1; + if ( first == last ) + goto done; + if ( compare( *first, *pivot ) > 0 ) { + memcpy(last, first, sizeof(T)); + break; + } + } + + /* Throw one element back to the free spot at first. */ + while ( true ) { + last -= 1; + if ( last == first ) + goto done; + if ( compare( *last, *pivot ) < 0 ) { + memcpy(first, last, sizeof(T)); + break; + } + } + } +done: + /* Put the pivot into the middle spot for it. */ + memcpy( first, pivot, sizeof(T) ); + return first; +} + + +template< class T, class Compare> void QuickSort:: + doSort(T *start, T *end) +{ + long len = end - start + 1; + if ( len > _QS_INSERTION_THRESH ) { + /* Use quicksort. */ + T *pivot = partition( start, end ); + doSort(start, pivot-1); + doSort(pivot+1, end); + } + else if ( len > 1 ) { + /* Array is small, use insertion sort. */ + InsertSort::sort( start, len ); + } +} + +/** + * \brief Quick sort an array of data. + */ +template< class T, class Compare> + void QuickSort::sort(T *data, long len) +{ + /* Call recursive worker. */ + doSort(data, data+len-1); +} + +#ifdef AAPL_NAMESPACE +} +#endif + +#endif /* _AAPL_QUICKSORT_H */ diff --git a/aapl/resize.h b/aapl/resize.h new file mode 100644 index 00000000..24edc16e --- /dev/null +++ b/aapl/resize.h @@ -0,0 +1,344 @@ +/* + * Copyright 2002 Adrian Thurston + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_RESIZE_H +#define _AAPL_RESIZE_H + +#include + +#ifdef AAPL_NAMESPACE +namespace Aapl { +#endif + +/* This step is expressed in units of T. Changing this requires changes to + * docs in ResizeLin constructor. */ +#define LIN_DEFAULT_STEP 256 + +/* + * Resizing macros giving different resize methods. + */ + +/* If needed is greater than existing, give twice needed. */ +#define EXPN_UP( existing, needed ) \ + needed > existing ? (needed<<1) : existing + +/* If needed is less than 1 quarter existing, give twice needed. */ +#define EXPN_DOWN( existing, needed ) \ + needed < (existing>>2) ? (needed<<1) : existing + +/* If needed is greater than existing, give needed plus step. */ +#define LIN_UP( existing, needed ) \ + needed > existing ? (needed+step) : existing + +/* If needed is less than existing - 2 * step then give needed plus step. */ +#define LIN_DOWN( existing, needed ) \ + needed < (existing-(step<<1)) ? (needed+step) : existing + +/* Return existing. */ +#define CONST_UP( existing, needed ) existing + +/* Return existing. */ +#define CONST_DOWN( existing, needed ) existing + +/** + * \addtogroup vector + * @{ + */ + +/** \class ResizeLin + * \brief Linear table resizer. + * + * When an up resize or a down resize is needed, ResizeLin allocates the space + * needed plus some user defined step. The result is that when growing the + * vector in a linear fashion, the number of resizes is also linear. + * + * If only up resizing is done, then there will never be more than step unused + * spaces in the vector. If down resizing is done as well, there will never be + * more than 2*step unused spaces in the vector. The up resizing and down + * resizing policies are offset to improve performance when repeatedly + * inserting and removing a small number of elements relative to the step. + * This scheme guarantees that repetitive inserting and removing of a small + * number of elements will never result in repetative reallocation. + * + * The vectors pass sizes to the resizer in units of T, so the step gets + * interpreted as units of T. + */ + +/*@}*/ + +/* Linear resizing. */ +class ResizeLin +{ +protected: + /** + * \brief Default constructor. + * + * Intializes resize step to 256 units of the table type T. + */ + ResizeLin() : step(LIN_DEFAULT_STEP) { } + + /** + * \brief Determine the new table size when up resizing. + * + * If the existing size is insufficient for the space needed, then allocate + * the space needed plus the step. The step is in units of T. + */ + inline long upResize( long existing, long needed ) + { return LIN_UP(existing, needed); } + + /** + * \brief Determine the new table size when down resizing. + * + * If space needed is less than the existing - 2*step, then allocate the + * space needed space plus the step. The step is in units of T. + */ + inline long downResize( long existing, long needed ) + { return LIN_DOWN(existing, needed); } + +public: + /** + * \brief Step for linear resize. + * + * Amount of extra space in units of T added each time a resize must take + * place. This may be changed at any time. The step should be >= 0. + */ + long step; +}; + +/** + * \addtogroup vector + * @{ + */ + +/** \class ResizeCtLin + * \brief Linear table resizer with compile time step. + * + * When an up resize or a down resize is needed, ResizeCtLin allocates the + * space needed plus some compile time defined step. The result is that when + * growing the vector in a linear fashion, the number of resizes is also + * linear. + * + * If only up resizing is done, then there will never be more than step unused + * spaces in the vector. If down resizing is done as well, there will never be + * more than 2*step unused spaces in the vector. The up resizing and down + * resizing policies are offset to improve performance when repeatedly + * inserting and removing a small number of elements relative to the step. + * This scheme guarantees that repetitive inserting and removing of a small + * number of elements will never result in repetative reallocation. + * + * The vectors pass sizes to the resizer in units of T, so the step gets + * interpreted as units of T. + */ + +/*@}*/ + +/* Linear resizing. */ +template class ResizeCtLin +{ +protected: + /** + * \brief Determine the new table size when up resizing. + * + * If the existing size is insufficient for the space needed, then allocate + * the space needed plus the step. The step is in units of T. + */ + inline long upResize( long existing, long needed ) + { return LIN_UP(existing, needed); } + + /** + * \brief Determine the new table size when down resizing. + * + * If space needed is less than the existing - 2*step, then allocate the + * space needed space plus the step. The step is in units of T. + */ + inline long downResize( long existing, long needed ) + { return LIN_DOWN(existing, needed); } +}; + +/** + * \addtogroup vector + * @{ + */ + +/** \class ResizeConst + * \brief Constant table resizer. + * + * When an up resize is needed the existing size is always used. ResizeConst + * does not allow dynamic resizing. To use ResizeConst, the vector needs to be + * constructed with and initial allocation amount otherwise it will be + * unusable. + */ + +/*@}*/ + +/* Constant table resizing. */ +class ResizeConst +{ +protected: + /* Assert don't need more than exists. Return existing. */ + static inline long upResize( long existing, long needed ); + + /** + * \brief Determine the new table size when down resizing. + * + * Always returns the existing table size. + */ + static inline long downResize( long existing, long needed ) + { return CONST_DOWN(existing, needed); } +}; + +/** + * \brief Determine the new table size when up resizing. + * + * If the existing size is insufficient for the space needed, then an assertion + * will fail. Otherwise returns the existing size. + */ +inline long ResizeConst::upResize( long existing, long needed ) +{ + assert( needed <= existing ); + return CONST_UP(existing, needed); +} + +/** + * \addtogroup vector + * @{ + */ + +/** \class ResizeRunTime + * \brief Run time settable table resizer. + * + * ResizeRunTime can have it's up and down resizing policies set at run time. + * Both up and down policies can be set independently to one of Exponential, + * Linear, or Constant. See the documentation for ResizeExpn, ResizeLin, and + * ResizeConst for the details of the resizing policies. + * + * The policies may be changed at any time. The default policies are + * both Exponential. + */ + +/*@}*/ + +/* Run time resizing. */ +class ResizeRunTime +{ +protected: + /** + * \brief Default constuctor. + * + * The up and down resizing it initialized to Exponetial. The step + * defaults to 256 units of T. + */ + inline ResizeRunTime(); + + /** + * \brief Resizing policies. + */ + enum ResizeType { + Exponential, /*!< Exponential resizing. */ + Linear, /*!< Linear resizing. */ + Constant /*!< Constant table size. */ + }; + + inline long upResize( long existing, long needed ); + inline long downResize( long existing, long needed ); + +public: + /** + * \brief Step for linear resize. + * + * Amount of extra space in units of T added each time a resize must take + * place. This may be changed at any time. The step should be >= 0. + */ + long step; + + /** + * \brief Up resizing policy. + */ + ResizeType upResizeType; + + /** + * \brief Down resizing policy. + */ + ResizeType downResizeType; +}; + +inline ResizeRunTime::ResizeRunTime() +: + step( LIN_DEFAULT_STEP ), + upResizeType( Exponential ), + downResizeType( Exponential ) +{ +} + +/** + * \brief Determine the new table size when up resizing. + * + * Type of up resizing is determined by upResizeType. Exponential, Linear and + * Constant resizing is the same as that of ResizeExpn, ResizeLin and + * ResizeConst. + */ +inline long ResizeRunTime::upResize( long existing, long needed ) +{ + switch ( upResizeType ) { + case Exponential: + return EXPN_UP(existing, needed); + case Linear: + return LIN_UP(existing, needed); + case Constant: + assert( needed <= existing ); + return CONST_UP(existing, needed); + } + return 0; +}; + +/** + * \brief Determine the new table size when down resizing. + * + * Type of down resizing is determined by downResiizeType. Exponential, Linear + * and Constant resizing is the same as that of ResizeExpn, ResizeLin and + * ResizeConst. + */ +inline long ResizeRunTime::downResize( long existing, long needed ) +{ + switch ( downResizeType ) { + case Exponential: + return EXPN_DOWN(existing, needed); + case Linear: + return LIN_DOWN(existing, needed); + case Constant: + return CONST_DOWN(existing, needed); + } + return 0; +} + +/* Don't need these anymore. */ +#undef EXPN_UP +#undef EXPN_DOWN +#undef LIN_UP +#undef LIN_DOWN +#undef CONST_UP +#undef CONST_DOWN + +#ifdef AAPL_NAMESPACE +} +#endif + +#endif /* _AAPL_RESIZE_H */ diff --git a/aapl/sbstmap.h b/aapl/sbstmap.h new file mode 100644 index 00000000..9436a472 --- /dev/null +++ b/aapl/sbstmap.h @@ -0,0 +1,121 @@ +/* + * Copyright 2002 Adrian Thurston + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_SBSTMAP_H +#define _AAPL_SBSTMAP_H + +#include "compare.h" +#include "svector.h" + +#ifdef AAPL_NAMESPACE +namespace Aapl { +#endif + +/** + * \brief Element for BstMap. + * + * Stores the key and value pair. + */ +template struct SBstMapEl +{ + SBstMapEl() {} + SBstMapEl(const Key &key) : key(key) {} + SBstMapEl(const Key &key, const Value &val) : key(key), value(val) {} + + /** \brief The key */ + Key key; + + /** \brief The value. */ + Value value; +}; + +#ifdef AAPL_NAMESPACE +} +#endif + +/** + * \addtogroup bst + * @{ + */ + +/** + * \class SBstMap + * \brief Copy-on-write binary search table for key and value pairs. + * + * This is a map style binary search table that employs the copy-on-write + * mechanism for table data. BstMap stores key and value pairs in each + * element. The key and value can be any type. A compare class for the key + * must be supplied. + */ + +/*@}*/ + +#define BST_TEMPL_DECLARE class Key, class Value, \ + class Compare = CmpOrd, class Resize = ResizeExpn +#define BST_TEMPL_DEF class Key, class Value, class Compare, class Resize +#define BST_TEMPL_USE Key, Value, Compare, Resize +#define GET_KEY(el) ((el).key) +#define BstTable SBstMap +#define Vector SVector +#define Table STable +#define Element SBstMapEl +#define BSTMAP +#define SHARED_BST + +#include "bstcommon.h" + +#undef BST_TEMPL_DECLARE +#undef BST_TEMPL_DEF +#undef BST_TEMPL_USE +#undef GET_KEY +#undef BstTable +#undef Vector +#undef Table +#undef Element +#undef BSTMAP +#undef SHARED_BST + +/** + * \fn SBstMap::insert(const Key &key, BstMapEl **lastFound) + * \brief Insert the given key. + * + * If the given key does not already exist in the table then a new element + * having key is inserted. They key copy constructor and value default + * constructor are used to place the pair in the table. If lastFound is given, + * it is set to the new entry created. If the insert fails then lastFound is + * set to the existing pair of the same key. + * + * \returns The new element created upon success, null upon failure. + */ + +/** + * \fn SBstMap::insertMulti(const Key &key) + * \brief Insert the given key even if it exists already. + * + * If the key exists already then the new element having key is placed next + * to some other pair of the same key. InsertMulti cannot fail. The key copy + * constructor and the value default constructor are used to place the pair in + * the table. + * + * \returns The new element created. + */ + +#endif /* _AAPL_SBSTMAP_H */ diff --git a/aapl/sbstset.h b/aapl/sbstset.h new file mode 100644 index 00000000..fe8ddf6c --- /dev/null +++ b/aapl/sbstset.h @@ -0,0 +1,94 @@ +/* + * Copyright 2002 Adrian Thurston + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_SBSTSET_H +#define _AAPL_SBSTSET_H + +/** + * \addtogroup bst + * @{ + */ + +/** + * \class SBstSet + * \brief Copy-on-write binary search table for types that are the key. + * + * This is a set style binary search table that employs the copy-on-write + * mechanism for storing table data. BstSet is suitable for types that + * comprise the entire key. Rather than look into the element to retrieve the + * key, the element is the key. A class that contains a comparison routine + * for the key must be given. + */ + +/*@}*/ + +#include "compare.h" +#include "svector.h" + +#define BST_TEMPL_DECLARE class Key, class Compare = CmpOrd, \ + class Resize = ResizeExpn +#define BST_TEMPL_DEF class Key, class Compare, class Resize +#define BST_TEMPL_USE Key, Compare, Resize +#define GET_KEY(el) (el) +#define BstTable SBstSet +#define Vector SVector +#define Table STable +#define Element Key +#define BSTSET +#define SHARED_BST + +#include "bstcommon.h" + +#undef BST_TEMPL_DECLARE +#undef BST_TEMPL_DEF +#undef BST_TEMPL_USE +#undef GET_KEY +#undef BstTable +#undef Vector +#undef Table +#undef Element +#undef BSTSET +#undef SHARED_BST + +/** + * \fn SBstSet::insert(const Key &key, Key **lastFound) + * \brief Insert the given key. + * + * If the given key does not already exist in the table then it is inserted. + * The key's copy constructor is used to place the item in the table. If + * lastFound is given, it is set to the new entry created. If the insert fails + * then lastFound is set to the existing key of the same value. + * + * \returns The new element created upon success, null upon failure. + */ + +/** + * \fn SBstSet::insertMulti(const Key &key) + * \brief Insert the given key even if it exists already. + * + * If the key exists already then it is placed next to some other key of the + * same value. InsertMulti cannot fail. The key's copy constructor is used to + * place the item in the table. + * + * \returns The new element created. + */ + +#endif /* _AAPL_SBSTSET_H */ diff --git a/aapl/sbsttable.h b/aapl/sbsttable.h new file mode 100644 index 00000000..100b87ec --- /dev/null +++ b/aapl/sbsttable.h @@ -0,0 +1,93 @@ +/* + * Copyright 2002 Adrian Thurston + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_SBSTTABLE_H +#define _AAPL_SBSTTABLE_H + +#include "compare.h" +#include "svector.h" + +/** + * \addtogroup bst + * @{ + */ + +/** + * \class SBstTable + * \brief Copy-on-write binary search table for structures that contain a key. + * + * This is a basic binary search table that employs a copy-on-write data + * storage mechanism. It can be used to contain a structure that has a key and + * possibly some data. The key should be a member of the element class and + * accessible with getKey(). A class containing the compare routine must be + * supplied. + */ + +/*@}*/ + +#define BST_TEMPL_DECLARE class Element, class Key, \ + class Compare = CmpOrd, class Resize = ResizeExpn +#define BST_TEMPL_DEF class Element, class Key, class Compare, class Resize +#define BST_TEMPL_USE Element, Key, Compare, Resize +#define GET_KEY(el) ((el).getKey()) +#define BstTable SBstTable +#define Vector SVector +#define Table STable +#define BSTTABLE +#define SHARED_BST + +#include "bstcommon.h" + +#undef BST_TEMPL_DECLARE +#undef BST_TEMPL_DEF +#undef BST_TEMPL_USE +#undef GET_KEY +#undef BstTable +#undef Vector +#undef Table +#undef BSTTABLE +#undef SHARED_BST + +/** + * \fn SBstTable::insert(const Key &key, Element **lastFound) + * \brief Insert a new element with the given key. + * + * If the given key does not already exist in the table a new element is + * inserted with the given key. A constructor taking only const Key& is used + * to initialize the new element. If lastFound is given, it is set to the new + * element created. If the insert fails then lastFound is set to the existing + * element with the same key. + * + * \returns The new element created upon success, null upon failure. + */ + +/** + * \fn SBstTable::insertMulti(const Key &key) + * \brief Insert a new element even if the key exists already. + * + * If the key exists already then the new element is placed next to some + * element with the same key. InsertMulti cannot fail. A constructor taking + * only const Key& is used to initialize the new element. + * + * \returns The new element created. + */ + +#endif /* _AAPL_SBSTTABLE_H */ diff --git a/aapl/svector.h b/aapl/svector.h new file mode 100644 index 00000000..db3a5656 --- /dev/null +++ b/aapl/svector.h @@ -0,0 +1,1350 @@ +/* + * Copyright 2002, 2006 Adrian Thurston + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_SVECTOR_H +#define _AAPL_SVECTOR_H + +#include +#include +#include +#include +#include "table.h" + +#ifdef AAPL_NAMESPACE +namespace Aapl { +#endif + +/** + * \addtogroup vector + * @{ + */ + +/** \class SVector + * \brief Copy-on-write dynamic array. + * + * SVector is a variant of Vector that employs copy-on-write behaviour. The + * SVector copy constructor and = operator make shallow copies. If a vector + * that references shared data is modified with insert, replace, append, + * prepend, setAs or remove, a new copy is made so as not to interfere with + * the shared data. However, shared individual elements may be modified by + * bypassing the SVector interface. + * + * SVector is a dynamic array that can be used to contain complex data + * structures that have constructors and destructors as well as simple types + * such as integers and pointers. + * + * SVector supports inserting, overwriting, and removing single or multiple + * elements at once. Constructors and destructors are called wherever + * appropriate. For example, before an element is overwritten, it's + * destructor is called. + * + * SVector provides automatic resizing of allocated memory as needed and + * offers different allocation schemes for controlling how the automatic + * allocation is done. Two senses of the the length of the data is + * maintained: the amount of raw memory allocated to the vector and the number + * of actual elements in the vector. The various allocation schemes control + * how the allocated space is changed in relation to the number of elements in + * the vector. + */ + +/*@}*/ + +/* SVector */ +template < class T, class Resize = ResizeExpn > class SVector : + public STable, public Resize +{ +private: + typedef STable BaseTable; + +public: + /** + * \brief Initialize an empty vector with no space allocated. + * + * If a linear resizer is used, the step defaults to 256 units of T. For a + * runtime vector both up and down allocation schemes default to + * Exponential. + */ + SVector() { } + + /** + * \brief Create a vector that contains an initial element. + * + * The vector becomes one element in length. The element's copy + * constructor is used to place the value in the vector. + */ + SVector(const T &val) { setAs(&val, 1); } + + /** + * \brief Create a vector that contains an array of elements. + * + * The vector becomes len elements in length. Copy constructors are used + * to place the new elements in the vector. + */ + SVector(const T *val, long len) { setAs(val, len); } + + /* Shallow copy. */ + SVector( const SVector &v ); + + /** + * \brief Free all memory used by the vector. + * + * The vector is reset to zero elements. Destructors are called on all + * elements in the vector. The space allocated for the vector is freed. + */ + ~SVector() { empty(); } + + /* Delete all items. */ + void empty(); + + /** + * \brief Deep copy another vector into this vector. + * + * Copies the entire contents of the other vector into this vector. Any + * existing contents are first deleted. Equivalent to setAs. + */ + void deepCopy( const SVector &v ) { setAs(v.data, v.length()); } + + /* Perform a shallow copy of another vector. */ + SVector &operator=( const SVector &v ); + + + /*@{*/ + /** + * \brief Insert one element at position pos. + * + * Elements in the vector from pos onward are shifted one space to the + * right. The copy constructor is used to place the element into this + * vector. If pos is greater than the length of the vector then undefined + * behaviour results. If pos is negative then it is treated as an offset + * relative to the length of the vector. + */ + void insert(long pos, const T &val) { insert(pos, &val, 1); } + + /* Insert an array of values. */ + void insert(long pos, const T *val, long len); + + /** + * \brief Insert all the elements from another vector at position pos. + * + * Elements in this vector from pos onward are shifted v.length() spaces + * to the right. The element's copy constructor is used to copy the items + * into this vector. The other vector is left unchanged. If pos is off the + * end of the vector, then undefined behaviour results. If pos is negative + * then it is treated as an offset relative to the length of the vector. + * Equivalent to vector.insert(pos, other.data, other.length()). + */ + void insert(long pos, const SVector &v) { insert(pos, v.data, v.length()); } + + /* Insert len copies of val into the vector. */ + void insertDup(long pos, const T &val, long len); + + /** + * \brief Insert one new element using the default constrcutor. + * + * Elements in the vector from pos onward are shifted one space to the right. + * The default constructor is used to init the new element. If pos is greater + * than the length of the vector then undefined behaviour results. If pos is + * negative then it is treated as an offset relative to the length of the + * vector. + */ + void insertNew(long pos) { insertNew(pos, 1); } + + /* Insert len new items using default constructor. */ + void insertNew(long pos, long len); + /*@}*/ + + /*@{*/ + /** + * \brief Remove one element at position pos. + * + * The element's destructor is called. Elements to the right of pos are + * shifted one space to the left to take up the free space. If pos is greater + * than or equal to the length of the vector then undefined behavior results. + * If pos is negative then it is treated as an offset relative to the length + * of the vector. + */ + void remove(long pos) { remove(pos, 1); } + + /* Delete a number of elements. */ + void remove(long pos, long len); + /*@}*/ + + /*@{*/ + /** + * \brief Replace one element at position pos. + * + * If there is an existing element at position pos (if pos is less than the + * length of the vector) then its destructor is called before the space is + * used. The copy constructor is used to place the element into the vector. + * If pos is greater than the length of the vector then undefined behaviour + * results. If pos is negative then it is treated as an offset relative to + * the length of the vector. + */ + void replace(long pos, const T &val) { replace(pos, &val, 1); } + + /* Replace with an array of values. */ + void replace(long pos, const T *val, long len); + + /** + * \brief Replace at position pos with all the elements of another vector. + * + * Replace at position pos with all the elements of another vector. The other + * vector is left unchanged. If there are existing elements at the positions + * to be replaced, then destructors are called before the space is used. Copy + * constructors are used to place the elements into this vector. It is + * allowable for the pos and length of the other vector to specify a + * replacement that overwrites existing elements and creates new ones. If pos + * is greater than the length of the vector then undefined behaviour results. + * If pos is negative, then it is treated as an offset relative to the length + * of the vector. + */ + void replace(long pos, const SVector &v) { replace(pos, v.data, v.length()); } + + /* Replace len items with len copies of val. */ + void replaceDup(long pos, const T &val, long len); + + /** + * \brief Replace at position pos with one new element. + * + * If there is an existing element at the position to be replaced (pos is + * less than the length of the vector) then the element's destructor is + * called before the space is used. The default constructor is used to + * initialize the new element. If pos is greater than the length of the + * vector then undefined behaviour results. If pos is negative, then it is + * treated as an offset relative to the length of the vector. + */ + void replaceNew(long pos) { replaceNew(pos, 1); } + + /* Replace len items at pos with newly constructed objects. */ + void replaceNew(long pos, long len); + /*@}*/ + + /*@{*/ + + /** + * \brief Set the contents of the vector to be val exactly. + * + * The vector becomes one element in length. Destructors are called on any + * existing elements in the vector. The element's copy constructor is used to + * place the val in the vector. + */ + void setAs(const T &val) { setAs(&val, 1); } + + /* Set to the contents of an array. */ + void setAs(const T *val, long len); + + /** + * \brief Set the vector to exactly the contents of another vector. + * + * The vector becomes v.length() elements in length. Destructors are called + * on any existing elements. Copy constructors are used to place the new + * elements in the vector. + */ + void setAs(const SVector &v) { setAs(v.data, v.length()); } + + /* Set as len copies of item. */ + void setAsDup(const T &item, long len); + + /** + * \brief Set the vector to exactly one new item. + * + * The vector becomes one element in length. Destructors are called on any + * existing elements in the vector. The default constructor is used to + * init the new item. + */ + void setAsNew() { setAsNew(1); } + + /* Set as newly constructed objects using the default constructor. */ + void setAsNew(long len); + /*@}*/ + + /*@{*/ + /** + * \brief Append one elment to the end of the vector. + * + * Copy constructor is used to place the element in the vector. + */ + void append(const T &val) { replace(BaseTable::length(), &val, 1); } + + /** + * \brief Append len elements to the end of the vector. + * + * Copy constructors are used to place the elements in the vector. + */ + void append(const T *val, long len) { replace(BaseTable::length(), val, len); } + + /** + * \brief Append the contents of another vector. + * + * The other vector is left unchanged. Copy constructors are used to place + * the elements in the vector. + */ + void append(const SVector &v) + { replace(BaseTable::length(), v.data, v.length()); } + + /** + * \brief Append len copies of item. + * + * The copy constructor is used to place the item in the vector. + */ + void appendDup(const T &item, long len) { replaceDup(BaseTable::length(), item, len); } + + /** + * \brief Append a single newly created item. + * + * The new element is initialized with the default constructor. + */ + void appendNew() { replaceNew(BaseTable::length(), 1); } + + /** + * \brief Append len newly created items. + * + * The new elements are initialized with the default constructor. + */ + void appendNew(long len) { replaceNew(BaseTable::length(), len); } + /*@}*/ + + + /*@{*/ + /** + * \brief Prepend one elment to the front of the vector. + * + * Copy constructor is used to place the element in the vector. + */ + void prepend(const T &val) { insert(0, &val, 1); } + + /** + * \brief Prepend len elements to the front of the vector. + * + * Copy constructors are used to place the elements in the vector. + */ + void prepend(const T *val, long len) { insert(0, val, len); } + + /** + * \brief Prepend the contents of another vector. + * + * The other vector is left unchanged. Copy constructors are used to place + * the elements in the vector. + */ + void prepend(const SVector &v) { insert(0, v.data, v.length()); } + + /** + * \brief Prepend len copies of item. + * + * The copy constructor is used to place the item in the vector. + */ + void prependDup(const T &item, long len) { insertDup(0, item, len); } + + /** + * \brief Prepend a single newly created item. + * + * The new element is initialized with the default constructor. + */ + void prependNew() { insertNew(0, 1); } + + /** + * \brief Prepend len newly created items. + * + * The new elements are initialized with the default constructor. + */ + void prependNew(long len) { insertNew(0, len); } + /*@}*/ + + /* Convenience access. */ + T &operator[](int i) const { return BaseTable::data[i]; } + long size() const { return BaseTable::length(); } + + /* Various classes for setting the iterator */ + struct Iter; + struct IterFirst { IterFirst( const SVector &v ) : v(v) { } const SVector &v; }; + struct IterLast { IterLast( const SVector &v ) : v(v) { } const SVector &v; }; + struct IterNext { IterNext( const Iter &i ) : i(i) { } const Iter &i; }; + struct IterPrev { IterPrev( const Iter &i ) : i(i) { } const Iter &i; }; + + /** + * \brief Shared Vector Iterator. + * \ingroup iterators + */ + struct Iter + { + /* Construct, assign. */ + Iter() : ptr(0), ptrBeg(0), ptrEnd(0) { } + + /* Construct. */ + Iter( const SVector &v ); + Iter( const IterFirst &vf ); + Iter( const IterLast &vl ); + inline Iter( const IterNext &vn ); + inline Iter( const IterPrev &vp ); + + /* Assign. */ + Iter &operator=( const SVector &v ); + Iter &operator=( const IterFirst &vf ); + Iter &operator=( const IterLast &vl ); + inline Iter &operator=( const IterNext &vf ); + inline Iter &operator=( const IterPrev &vl ); + + /** \brief Less than end? */ + bool lte() const { return ptr != ptrEnd; } + + /** \brief At end? */ + bool end() const { return ptr == ptrEnd; } + + /** \brief Greater than beginning? */ + bool gtb() const { return ptr != ptrBeg; } + + /** \brief At beginning? */ + bool beg() const { return ptr == ptrBeg; } + + /** \brief At first element? */ + bool first() const { return ptr == ptrBeg+1; } + + /** \brief At last element? */ + bool last() const { return ptr == ptrEnd-1; } + + /* Return the position. */ + long pos() const { return ptr - ptrBeg - 1; } + T &operator[](int i) const { return ptr[i]; } + + /** \brief Implicit cast to T*. */ + operator T*() const { return ptr; } + + /** \brief Dereference operator returns T&. */ + T &operator *() const { return *ptr; } + + /** \brief Arrow operator returns T*. */ + T *operator->() const { return ptr; } + + /** \brief Move to next item. */ + T *operator++() { return ++ptr; } + + /** \brief Move to next item. */ + T *operator++(int) { return ptr++; } + + /** \brief Move to next item. */ + T *increment() { return ++ptr; } + + /** \brief Move to previous item. */ + T *operator--() { return --ptr; } + + /** \brief Move to previous item. */ + T *operator--(int) { return ptr--; } + + /** \brief Move to previous item. */ + T *decrement() { return --ptr; } + + /** \brief Return the next item. Does not modify this. */ + inline IterNext next() const { return IterNext(*this); } + + /** \brief Return the previous item. Does not modify this. */ + inline IterPrev prev() const { return IterPrev(*this); } + + /** \brief The iterator is simply a pointer. */ + T *ptr; + + /* For testing endpoints. */ + T *ptrBeg, *ptrEnd; + }; + + /** \brief Return first element. */ + IterFirst first() { return IterFirst( *this ); } + + /** \brief Return last element. */ + IterLast last() { return IterLast( *this ); } + +protected: + void makeRawSpaceFor(long pos, long len); + + void setAsCommon(long len); + long replaceCommon(long pos, long len); + long insertCommon(long pos, long len); + + void upResize(long len); + void upResizeDup(long len); + void upResizeFromEmpty(long len); + void downResize(long len); + void downResizeDup(long len); +}; + +/** + * \brief Perform a shallow copy of the vector. + * + * Takes a reference to the contents of the other vector. + */ +template SVector:: + SVector(const SVector &v) +{ + /* Take a reference to other, if any data is allocated. */ + if ( v.data == 0 ) + BaseTable::data = 0; + else { + /* Get the source header, up the refcount and ref it. */ + STabHead *srcHead = ((STabHead*) v.data) - 1; + srcHead->refCount += 1; + BaseTable::data = (T*) (srcHead + 1); + } +} + +/** + * \brief Shallow copy another vector into this vector. + * + * Takes a reference to the other vector. The contents of this vector are + * first emptied. + * + * \returns A reference to this. + */ +template SVector & + SVector:: operator=( const SVector &v ) +{ + /* First clean out the current contents. */ + empty(); + + /* Take a reference to other, if any data is allocated. */ + if ( v.data == 0 ) + BaseTable::data = 0; + else { + /* Get the source header, up the refcount and ref it. */ + STabHead *srcHead = ((STabHead*) v.data) - 1; + srcHead->refCount += 1; + BaseTable::data = (T*) (srcHead + 1); + } + return *this; +} + +/* Init a vector iterator with just a vector. */ +template SVector:: + Iter::Iter( const SVector &v ) +{ + long length; + if ( v.data == 0 || (length=(((STabHead*)v.data)-1)->tabLen) == 0 ) + ptr = ptrBeg = ptrEnd = 0; + else { + ptr = v.data; + ptrBeg = v.data-1; + ptrEnd = v.data+length; + } +} + +/* Init a vector iterator with the first of a vector. */ +template SVector:: + Iter::Iter( const IterFirst &vf ) +{ + long length; + if ( vf.v.data == 0 || (length=(((STabHead*)vf.v.data)-1)->tabLen) == 0 ) + ptr = ptrBeg = ptrEnd = 0; + else { + ptr = vf.v.data; + ptrBeg = vf.v.data-1; + ptrEnd = vf.v.data+length; + } +} + +/* Init a vector iterator with the last of a vector. */ +template SVector:: + Iter::Iter( const IterLast &vl ) +{ + long length; + if ( vl.v.data == 0 || (length=(((STabHead*)vl.v.data)-1)->tabLen) == 0 ) + ptr = ptrBeg = ptrEnd = 0; + else { + ptr = vl.v.data+length-1; + ptrBeg = vl.v.data-1; + ptrEnd = vl.v.data+length; + } +} + +/* Init a vector iterator with the next of some other iterator. */ +template SVector:: + Iter::Iter( const IterNext &vn ) +: + ptr(vn.i.ptr+1), + ptrBeg(vn.i.ptrBeg), + ptrEnd(vn.i.ptrEnd) +{ +} + +/* Init a vector iterator with the prev of some other iterator. */ +template SVector:: + Iter::Iter( const IterPrev &vp ) +: + ptr(vp.i.ptr-1), + ptrBeg(vp.i.ptrBeg), + ptrEnd(vp.i.ptrEnd) +{ +} + +/* Set a vector iterator with some vector. */ +template typename SVector::Iter & + SVector::Iter::operator=( const SVector &v ) +{ + long length; + if ( v.data == 0 || (length=(((STabHead*)v.data)-1)->tabLen) == 0 ) + ptr = ptrBeg = ptrEnd = 0; + else { + ptr = v.data; + ptrBeg = v.data-1; + ptrEnd = v.data+length; + } + return *this; +} + +/* Set a vector iterator with the first element in a vector. */ +template typename SVector::Iter & + SVector::Iter::operator=( const IterFirst &vf ) +{ + long length; + if ( vf.v.data == 0 || (length=(((STabHead*)vf.v.data)-1)->tabLen) == 0 ) + ptr = ptrBeg = ptrEnd = 0; + else { + ptr = vf.v.data; + ptrBeg = vf.v.data-1; + ptrEnd = vf.v.data+length; + } + return *this; +} + +/* Set a vector iterator with the last element in a vector. */ +template typename SVector::Iter & + SVector::Iter::operator=( const IterLast &vl ) +{ + long length; + if ( vl.v.data == 0 || (length=(((STabHead*)vl.v.data)-1)->tabLen) == 0 ) + ptr = ptrBeg = ptrEnd = 0; + else { + ptr = vl.v.data+length-1; + ptrBeg = vl.v.data-1; + ptrEnd = vl.v.data+length; + } + return *this; +} + +/* Set a vector iterator with the next of some other iterator. */ +template typename SVector::Iter & + SVector::Iter::operator=( const IterNext &vn ) +{ + ptr = vn.i.ptr+1; + ptrBeg = vn.i.ptrBeg; + ptrEnd = vn.i.ptrEnd; + return *this; +} + +/* Set a vector iterator with the prev of some other iterator. */ +template typename SVector::Iter & + SVector::Iter::operator=( const IterPrev &vp ) +{ + ptr = vp.i.ptr-1; + ptrBeg = vp.i.ptrBeg; + ptrEnd = vp.i.ptrEnd; + return *this; +} + +/* Up resize the data for len elements using Resize::upResize to tell us the + * new length. Reads and writes allocLen. Does not read or write length. + * Assumes that there is some data allocated already. */ +template void SVector:: + upResize(long len) +{ + /* Get the current header. */ + STabHead *head = ((STabHead*)BaseTable::data) - 1; + + /* Ask the resizer what the new length will be. */ + long newLen = Resize::upResize(head->allocLen, len); + + /* Did the data grow? */ + if ( newLen > head->allocLen ) { + head->allocLen = newLen; + + /* Table exists already, resize it up. */ + head = (STabHead*) realloc( head, sizeof(STabHead) + + sizeof(T) * newLen ); + if ( head == 0 ) + throw std::bad_alloc(); + + /* Save the data pointer. */ + BaseTable::data = (T*) (head + 1); + } +} + +/* Allocates a new buffer for an up resize that requires a duplication of the + * data. Uses Resize::upResize to get the allocation length. Reads and writes + * allocLen. This upResize does write the new length. Assumes that there is + * some data allocated already. */ +template void SVector:: + upResizeDup(long len) +{ + /* Get the current header. */ + STabHead *head = ((STabHead*)BaseTable::data) - 1; + + /* Ask the resizer what the new length will be. */ + long newLen = Resize::upResize(head->allocLen, len); + + /* Dereferencing the existing data, decrement the refcount. */ + head->refCount -= 1; + + /* Table exists already, resize it up. */ + head = (STabHead*) malloc( sizeof(STabHead) + sizeof(T) * newLen ); + if ( head == 0 ) + throw std::bad_alloc(); + + head->refCount = 1; + head->allocLen = newLen; + head->tabLen = len; + + /* Save the data pointer. */ + BaseTable::data = (T*) (head + 1); +} + +/* Up resize the data for len elements using Resize::upResize to tell us the + * new length. Reads and writes allocLen. This upresize DOES write length. + * Assumes that no data is allocated. */ +template void SVector:: + upResizeFromEmpty(long len) +{ + /* There is no table yet. If the len is zero, then there is no need to + * create a table. */ + if ( len > 0 ) { + /* Ask the resizer what the new length will be. */ + long newLen = Resize::upResize(0, len); + + /* If len is greater than zero then we are always allocating the table. */ + STabHead *head = (STabHead*) malloc( sizeof(STabHead) + + sizeof(T) * newLen ); + if ( head == 0 ) + throw std::bad_alloc(); + + /* Set up the header and save the data pointer. Note that we set the + * length here. This differs from the other upResizes. */ + head->refCount = 1; + head->allocLen = newLen; + head->tabLen = len; + BaseTable::data = (T*) (head + 1); + } +} + +/* Down resize the data for len elements using Resize::downResize to determine + * the new length. Reads and writes allocLen. Does not read or write length. */ +template void SVector:: + downResize(long len) +{ + /* If there is already no length, then there is nothing we can do. */ + if ( BaseTable::data != 0 ) { + /* Get the current header. */ + STabHead *head = ((STabHead*)BaseTable::data) - 1; + + /* Ask the resizer what the new length will be. */ + long newLen = Resize::downResize( head->allocLen, len ); + + /* Did the data shrink? */ + if ( newLen < head->allocLen ) { + if ( newLen == 0 ) { + /* Simply free the data. */ + free( head ); + BaseTable::data = 0; + } + else { + /* Save the new allocated length. */ + head->allocLen = newLen; + + /* Not shrinking to size zero, realloc it to the smaller size. */ + head = (STabHead*) realloc( head, sizeof(STabHead) + + sizeof(T) * newLen ); + if ( head == 0 ) + throw std::bad_alloc(); + + /* Save the new data ptr. */ + BaseTable::data = (T*) (head + 1); + } + } + } +} + +/* Allocate a new buffer for a down resize and duplication of the array. The + * new array will be len long and allocation size will be determined using + * Resize::downResize with the old array's allocLen. Does not actually copy + * any data. Reads and writes allocLen and writes the new len. */ +template void SVector:: + downResizeDup(long len) +{ + /* If there is already no length, then there is nothing we can do. */ + if ( BaseTable::data != 0 ) { + /* Get the current header. */ + STabHead *head = ((STabHead*)BaseTable::data) - 1; + + /* Ask the resizer what the new length will be. */ + long newLen = Resize::downResize( head->allocLen, len ); + + /* Detaching from the existing head, decrement the refcount. */ + head->refCount -= 1; + + /* Not shrinking to size zero, malloc it to the smaller size. */ + head = (STabHead*) malloc( sizeof(STabHead) + sizeof(T) * newLen ); + if ( head == 0 ) + throw std::bad_alloc(); + + /* Save the new allocated length. */ + head->refCount = 1; + head->allocLen = newLen; + head->tabLen = len; + + /* Save the data pointer. */ + BaseTable::data = (T*) (head + 1); + } +} + +/** + * \brief Free all memory used by the vector. + * + * The vector is reset to zero elements. Destructors are called on all + * elements in the vector. The space allocated for the vector is freed. + */ +template void SVector:: + empty() +{ + if ( BaseTable::data != 0 ) { + /* Get the header and drop the refcount on the data. */ + STabHead *head = ((STabHead*) BaseTable::data) - 1; + head->refCount -= 1; + + /* If the refcount just went down to zero nobody else is referencing + * the data. */ + if ( head->refCount == 0 ) { + /* Call All destructors. */ + T *pos = BaseTable::data; + for ( long i = 0; i < head->tabLen; pos++, i++ ) + pos->~T(); + + /* Free the data space. */ + free( head ); + } + + /* Clear the pointer. */ + BaseTable::data = 0; + } +} + +/* Prepare for setting the contents of the vector to some array len long. + * Handles reusing the existing space, detaching from a common space or + * growing from zero length automatically. */ +template void SVector:: + setAsCommon(long len) +{ + if ( BaseTable::data != 0 ) { + /* Get the header. */ + STabHead *head = ((STabHead*)BaseTable::data) - 1; + + /* If the refCount is one, then we can reuse the space. Otherwise we + * must detach from the referenced data create new space. */ + if ( head->refCount == 1 ) { + /* Call All destructors. */ + T *pos = BaseTable::data; + for ( long i = 0; i < head->tabLen; pos++, i++ ) + pos->~T(); + + /* Adjust the allocated length. */ + if ( len < head->tabLen ) + downResize( len ); + else if ( len > head->tabLen ) + upResize( len ); + + if ( BaseTable::data != 0 ) { + /* Get the header again and set the length. */ + head = ((STabHead*)BaseTable::data) - 1; + head->tabLen = len; + } + } + else { + /* Just detach from the data. */ + head->refCount -= 1; + BaseTable::data = 0; + + /* Make enough space. This will set the length. */ + upResizeFromEmpty( len ); + } + } + else { + /* The table is currently empty. Make enough space. This will set the + * length. */ + upResizeFromEmpty( len ); + } +} + +/** + * \brief Set the contents of the vector to be len elements exactly. + * + * The vector becomes len elements in length. Destructors are called on any + * existing elements in the vector. Copy constructors are used to place the + * new elements in the vector. + */ +template void SVector:: + setAs(const T *val, long len) +{ + /* Common stuff for setting the array to len long. */ + setAsCommon( len ); + + /* Copy data in. */ + T *dst = BaseTable::data; + const T *src = val; + for ( long i = 0; i < len; i++, dst++, src++ ) + new(dst) T(*src); +} + + +/** + * \brief Set the vector to len copies of item. + * + * The vector becomes len elements in length. Destructors are called on any + * existing elements in the vector. The element's copy constructor is used to + * copy the item into the vector. + */ +template void SVector:: + setAsDup(const T &item, long len) +{ + /* Do the common stuff for setting the array to len long. */ + setAsCommon( len ); + + /* Copy item in one spot at a time. */ + T *dst = BaseTable::data; + for ( long i = 0; i < len; i++, dst++ ) + new(dst) T(item); +} + +/** + * \brief Set the vector to exactly len new items. + * + * The vector becomes len elements in length. Destructors are called on any + * existing elements in the vector. Default constructors are used to init the + * new items. + */ +template void SVector:: + setAsNew(long len) +{ + /* Do the common stuff for setting the array to len long. */ + setAsCommon( len ); + + /* Create items using default constructor. */ + T *dst = BaseTable::data; + for ( long i = 0; i < len; i++, dst++ ) + new(dst) T(); +} + +/* Make space in vector for a replacement at pos of len items. Handles reusing + * existing space, detaching or growing from zero space. */ +template long SVector:: + replaceCommon(long pos, long len) +{ + if ( BaseTable::data != 0 ) { + /* Get the header. */ + STabHead *head = ((STabHead*)BaseTable::data) - 1; + + /* If we are given a negative position to replace at then treat it as + * a position relative to the length. This doesn't have any meaning + * unless the length is at least one. */ + if ( pos < 0 ) + pos = head->tabLen + pos; + + /* The end is the one past the last item that we want to write to. */ + long i, endPos = pos + len; + + if ( head->refCount == 1 ) { + /* We can reuse the space. Make sure we have enough space. */ + if ( endPos > head->tabLen ) { + upResize( endPos ); + + /* Get the header again, whose addr may have changed after + * resizing. */ + head = ((STabHead*)BaseTable::data) - 1; + + /* Delete any objects we need to delete. */ + T *item = BaseTable::data + pos; + for ( i = pos; i < head->tabLen; i++, item++ ) + item->~T(); + + /* We are extending the vector, set the new data length. */ + head->tabLen = endPos; + } + else { + /* Delete any objects we need to delete. */ + T *item = BaseTable::data + pos; + for ( i = pos; i < endPos; i++, item++ ) + item->~T(); + } + } + else { + /* Use endPos to calc the end of the vector. */ + long newLen = endPos; + if ( newLen < head->tabLen ) + newLen = head->tabLen; + + /* Duplicate and grow up to endPos. This will set the length. */ + upResizeDup( newLen ); + + /* Copy from src up to pos. */ + const T *src = (T*) (head + 1); + T *dst = BaseTable::data; + for ( i = 0; i < pos; i++, dst++, src++) + new(dst) T(*src); + + /* Copy any items after the replace range. */ + for ( i += len, src += len, dst += len; + i < head->tabLen; i++, dst++, src++ ) + new(dst) T(*src); + } + } + else { + /* There is no data initially, must grow from zero. This will set the + * new length. */ + upResizeFromEmpty( len ); + } + + return pos; +} + + +/** + * \brief Replace len elements at position pos. + * + * If there are existing elements at the positions to be replaced, then + * destructors are called before the space is used. Copy constructors are used + * to place the elements into the vector. It is allowable for the pos and + * length to specify a replacement that overwrites existing elements and + * creates new ones. If pos is greater than the length of the vector then + * undefined behaviour results. If pos is negative, then it is treated as an + * offset relative to the length of the vector. + */ +template void SVector:: + replace(long pos, const T *val, long len) +{ + /* Common work for replacing in the vector. */ + pos = replaceCommon( pos, len ); + + /* Copy data in using copy constructor. */ + T *dst = BaseTable::data + pos; + const T *src = val; + for ( long i = 0; i < len; i++, dst++, src++ ) + new(dst) T(*src); +} + +/** + * \brief Replace at position pos with len copies of an item. + * + * If there are existing elements at the positions to be replaced, then + * destructors are called before the space is used. The copy constructor is + * used to place the element into this vector. It is allowable for the pos and + * length to specify a replacement that overwrites existing elements and + * creates new ones. If pos is greater than the length of the vector then + * undefined behaviour results. If pos is negative, then it is treated as an + * offset relative to the length of the vector. + */ +template void SVector:: + replaceDup(long pos, const T &val, long len) +{ + /* Common replacement stuff. */ + pos = replaceCommon( pos, len ); + + /* Copy data in using copy constructor. */ + T *dst = BaseTable::data + pos; + for ( long i = 0; i < len; i++, dst++ ) + new(dst) T(val); +} + +/** + * \brief Replace at position pos with len new elements. + * + * If there are existing elements at the positions to be replaced, then + * destructors are called before the space is used. The default constructor is + * used to initialize the new elements. It is allowable for the pos and length + * to specify a replacement that overwrites existing elements and creates new + * ones. If pos is greater than the length of the vector then undefined + * behaviour results. If pos is negative, then it is treated as an offset + * relative to the length of the vector. + */ +template void SVector:: + replaceNew(long pos, long len) +{ + /* Do the common replacement stuff. */ + pos = replaceCommon( pos, len ); + + /* Copy data in using copy constructor. */ + T *dst = BaseTable::data + pos; + for ( long i = 0; i < len; i++, dst++ ) + new(dst) T(); +} + +/** + * \brief Remove len elements at position pos. + * + * Destructor is called on all elements removed. Elements to the right of pos + * are shifted len spaces to the left to take up the free space. If pos is + * greater than or equal to the length of the vector then undefined behavior + * results. If pos is negative then it is treated as an offset relative to the + * length of the vector. + */ +template void SVector:: + remove(long pos, long len) +{ + /* If there is no data, we can't delete anything anyways. */ + if ( BaseTable::data != 0 ) { + /* Get the header. */ + STabHead *head = ((STabHead*)BaseTable::data) - 1; + + /* If we are given a negative position to remove at then + * treat it as a position relative to the length. */ + if ( pos < 0 ) + pos = head->tabLen + pos; + + /* The first position after the last item deleted. */ + long endPos = pos + len; + + /* The New data length. */ + long i, newLen = head->tabLen - len; + + if ( head->refCount == 1 ) { + /* We are the only ones using the data. We can reuse + * the existing space. */ + + /* The place in the data we are deleting at. */ + T *dst = BaseTable::data + pos; + + /* Call Destructors. */ + T *item = BaseTable::data + pos; + for ( i = 0; i < len; i += 1, item += 1 ) + item->~T(); + + /* Shift data over if necessary. */ + long lenToSlideOver = head->tabLen - endPos; + if ( len > 0 && lenToSlideOver > 0 ) + memmove(BaseTable::data + pos, dst + len, sizeof(T)*lenToSlideOver); + + /* Shrink the data if necessary. */ + downResize( newLen ); + + if ( BaseTable::data != 0 ) { + /* Get the header again (because of the resize) and set the + * new data length. */ + head = ((STabHead*)BaseTable::data) - 1; + head->tabLen = newLen; + } + } + else { + /* Must detach from the common data. Just copy the non-deleted + * items from the common data. */ + + /* Duplicate and grow down to newLen. This will set the length. */ + downResizeDup( newLen ); + + /* Copy over just the non-deleted parts. */ + const T *src = (T*) (head + 1); + T *dst = BaseTable::data; + for ( i = 0; i < pos; i++, dst++, src++ ) + new(dst) T(*src); + + /* ... and the second half. */ + for ( i += len, src += len; i < head->tabLen; i++, src++, dst++ ) + new(dst) T(*src); + } + } +} + +/* Shift over existing data. Handles reusing existing space, detaching or + * growing from zero space. */ +template long SVector:: + insertCommon(long pos, long len) +{ + if ( BaseTable::data != 0 ) { + /* Get the header. */ + STabHead *head = ((STabHead*)BaseTable::data) - 1; + + /* If we are given a negative position to insert at then treat it as a + * position relative to the length. This only has meaning if there is + * existing data. */ + if ( pos < 0 ) + pos = head->tabLen + pos; + + /* Calculate the new length. */ + long i, newLen = head->tabLen + len; + + if ( head->refCount == 1 ) { + /* Up resize, we are growing. */ + upResize( newLen ); + + /* Get the header again, (the addr may have changed after + * resizing). */ + head = ((STabHead*)BaseTable::data) - 1; + + /* Shift over data at insert spot if needed. */ + if ( len > 0 && pos < head->tabLen ) { + memmove( BaseTable::data + pos + len, BaseTable::data + pos, + sizeof(T)*(head->tabLen - pos) ); + } + + /* Grow the length by the len inserted. */ + head->tabLen += len; + } + else { + /* Need to detach from the existing array. Copy over the other + * parts. This will set the length. */ + upResizeDup( newLen ); + + /* Copy over the parts around the insert. */ + const T *src = (T*) (head + 1); + T *dst = BaseTable::data; + for ( i = 0; i < pos; i++, dst++, src++ ) + new(dst) T(*src); + + /* ... and the second half. */ + for ( dst += len; i < head->tabLen; i++, src++, dst++ ) + new(dst) T(*src); + } + } + else { + /* There is no existing data. Start from zero. This will set the + * length. */ + upResizeFromEmpty( len ); + } + + return pos; +} + + +/** + * \brief Insert len elements at position pos. + * + * Elements in the vector from pos onward are shifted len spaces to the right. + * The copy constructor is used to place the elements into this vector. If pos + * is greater than the length of the vector then undefined behaviour results. + * If pos is negative then it is treated as an offset relative to the length + * of the vector. + */ +template void SVector:: + insert(long pos, const T *val, long len) +{ + /* Do the common insertion stuff. */ + pos = insertCommon( pos, len ); + + /* Copy data in element by element. */ + T *dst = BaseTable::data + pos; + const T *src = val; + for ( long i = 0; i < len; i++, dst++, src++ ) + new(dst) T(*src); +} + +/** + * \brief Insert len copies of item at position pos. + * + * Elements in the vector from pos onward are shifted len spaces to the right. + * The copy constructor is used to place the element into this vector. If pos + * is greater than the length of the vector then undefined behaviour results. + * If pos is negative then it is treated as an offset relative to the length + * of the vector. + */ +template void SVector:: + insertDup(long pos, const T &item, long len) +{ + /* Do the common insertion stuff. */ + pos = insertCommon( pos, len ); + + /* Copy the data item in one at a time. */ + T *dst = BaseTable::data + pos; + for ( long i = 0; i < len; i++, dst++ ) + new(dst) T(item); +} + + +/** + * \brief Insert len new elements using the default constructor. + * + * Elements in the vector from pos onward are shifted len spaces to the right. + * Default constructors are used to init the new elements. If pos is off the + * end of the vector then undefined behaviour results. If pos is negative then + * it is treated as an offset relative to the length of the vector. + */ +template void SVector:: + insertNew(long pos, long len) +{ + /* Do the common insertion stuff. */ + pos = insertCommon( pos, len ); + + /* Init new data with default constructors. */ + T *dst = BaseTable::data + pos; + for ( long i = 0; i < len; i++, dst++ ) + new(dst) T(); +} + +/* Makes space for len items, Does not init the items in any way. If pos is + * greater than the length of the vector then undefined behaviour results. + * Updates the length of the vector. */ +template void SVector:: + makeRawSpaceFor(long pos, long len) +{ + if ( BaseTable::data != 0 ) { + /* Get the header. */ + STabHead *head = ((STabHead*)BaseTable::data) - 1; + + /* Calculate the new length. */ + long i, newLen = head->tabLen + len; + + if ( head->refCount == 1 ) { + /* Up resize, we are growing. */ + upResize( newLen ); + + /* Get the header again, (the addr may have changed after + * resizing). */ + head = ((STabHead*)BaseTable::data) - 1; + + /* Shift over data at insert spot if needed. */ + if ( len > 0 && pos < head->tabLen ) { + memmove( BaseTable::data + pos + len, BaseTable::data + pos, + sizeof(T)*(head->tabLen - pos) ); + } + + /* Grow the length by the len inserted. */ + head->tabLen += len; + } + else { + /* Need to detach from the existing array. Copy over the other + * parts. This will set the length. */ + upResizeDup( newLen ); + + /* Copy over the parts around the insert. */ + const T *src = (T*) (head + 1); + T *dst = BaseTable::data; + for ( i = 0; i < pos; i++, dst++, src++ ) + new(dst) T(*src); + + /* ... and the second half. */ + for ( dst += len; i < head->tabLen; i++, src++, dst++ ) + new(dst) T(*src); + } + } + else { + /* There is no existing data. Start from zero. This will set the + * length. */ + upResizeFromEmpty( len ); + } +} + + +#ifdef AAPL_NAMESPACE +} +#endif + + +#endif /* _AAPL_SVECTOR_H */ diff --git a/aapl/table.h b/aapl/table.h new file mode 100644 index 00000000..c1f2b7bd --- /dev/null +++ b/aapl/table.h @@ -0,0 +1,252 @@ +/* + * Copyright 2001, 2002 Adrian Thurston + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_TABLE_H +#define _AAPL_TABLE_H + +#ifdef AAPL_NAMESPACE +namespace Aapl { +#endif + +/** + * \addtogroup vector + * @{ + */ + +/** \class Table + * \brief Base class for dynamic arrays. + * + * Table is used as the common data storage class for vectors. It does not + * provide any methods to operate on the data and as such it is not intended + * to be used directly. It exists so that algorithms that operatate on dynamic + * arrays can be written without knowing about the various vector classes that + * my exist. + */ + +/*@}*/ + +/* Table class. */ +template class Table +{ +public: + /* Default Constructor. */ + inline Table(); + + /** + * \brief Get the length of the vector. + * + * \returns the length of the vector. + */ + long length() const + { return tabLen; } + + /** + * \brief Table data. + * + * The pointer to the elements in the vector. Modifying the vector may + * cause this pointer to change. + */ + T *data; + + /** + * \brief Table length. + * + * The number of items of type T in the table. + */ + long tabLen; + + /** + * \brief Allocated length. + * + * The number of items for which there is room in the current allocation. + */ + long allocLen; +}; + +/** + * \brief Default constructor + * + * Initialize table data to empty. + */ +template inline Table::Table() +: + data(0), + tabLen(0), + allocLen(0) +{ +} + +/* Default shared table header class. */ +struct STabHead +{ + /** + * \brief Table length. + * + * The number of items of type T in the table. + */ + long tabLen; + + /** + * \brief Allocated length. + * + * The number of items for which there is room in the current allocation. + */ + long allocLen; + + /** + * \brief Ref Count. + * + * The number of shared vectors referencing this data. + */ + long refCount; +}; + +/** + * \addtogroup vector + * @{ + */ + +/** \class STable + * \brief Base class for implicitly shared dynamic arrays. + * + * STable is used as the common data storage class for shared vectors. It does + * not provide any methods to operate on the data and as such it is not + * intended to be used directly. It exists so that algorithms that operatate + * on dynamic arrays can be written without knowing about the various shared + * vector classes that my exist. + */ + +/*@}*/ + +/* STable class. */ +template class STable +{ +public: + /* Default Constructor. */ + inline STable(); + + /** + * \brief Get the length of the shared vector. + * + * \returns the length of the shared vector. + */ + long length() const + { return data == 0 ? 0 : (((STabHead*)data) - 1)->tabLen; } + + /** + * \brief Get header of the shared vector. + * + * \returns the header of the shared vector. + */ + STabHead *header() const + { return data == 0 ? 0 : (((STabHead*)data) - 1); } + + /** + * \brief Table data. + * + * The pointer to the elements in the vector. The shared table header is + * located just behind the data. Modifying the vector may cause this + * pointer to change. + */ + T *data; +}; + +/** + * \brief Default constructor + * + * Initialize shared table data to empty. + */ +template inline STable::STable() +: + data(0) +{ +} + +/* If needed is greater than existing, give twice needed. */ +#define EXPN_UP( existing, needed ) \ + needed > existing ? (needed<<1) : existing + +/* If needed is less than 1 quarter existing, give twice needed. */ +#define EXPN_DOWN( existing, needed ) \ + needed < (existing>>2) ? (needed<<1) : existing + +/** + * \addtogroup vector + * @{ + */ + +/** \class ResizeExpn + * \brief Exponential table resizer. + * + * ResizeExpn is the default table resizer. When an up resize is needed, space + * is doubled. When a down resize is needed, space is halved. The result is + * that when growing the vector in a linear fashion, the number of resizes of + * the allocated space behaves logarithmically. + * + * If only up resizes are done, there will never be more than 2 times the + * needed space allocated. If down resizes are done as well, there will never + * be more than 4 times the needed space allocated. ResizeExpn uses this 50% + * usage policy on up resizing and 25% usage policy on down resizing to + * improve performance when repeatedly inserting and removing a small number + * of elements relative to the size of the array. This scheme guarantees that + * repetitive inserting and removing of a small number of elements will never + * result in repetative reallocation. + * + * The sizes passed to the resizer from the vectors are in units of T. + */ + +/*@}*/ + +/* Exponential resizer. */ +class ResizeExpn +{ +protected: + /** + * \brief Determine the new table size when up resizing. + * + * If the existing size is insufficient for the space needed then allocate + * twice the space needed. Otherwise use the existing size. + * + * \returns The new table size. + */ + static inline long upResize( long existing, long needed ) + { return EXPN_UP( existing, needed ); } + + /** + * \brief Determine the new table size when down resizing. + * + * If the space needed is less than one quarter of the existing size then + * allocate twice the space needed. Otherwise use the exitsing size. + * + * \returns The new table size. + */ + static inline long downResize( long existing, long needed ) + { return EXPN_DOWN( existing, needed ); } +}; + +#undef EXPN_UP +#undef EXPN_DOWN + +#ifdef AAPL_NAMESPACE +} +#endif + +#endif /* _AAPL_TABLE_H */ diff --git a/aapl/vector.h b/aapl/vector.h new file mode 100644 index 00000000..835607b1 --- /dev/null +++ b/aapl/vector.h @@ -0,0 +1,1189 @@ +/* + * Copyright 2002, 2006 Adrian Thurston + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _AAPL_VECTOR_H +#define _AAPL_VECTOR_H + +#include +#include +#include +#include +#include "table.h" + +#ifdef AAPL_NAMESPACE +namespace Aapl { +#endif + +/** + * \addtogroup vector + * @{ + */ + +/** \class Vector + * \brief Dynamic array. + * + * This is typical vector implementation. It is a dynamic array that can be + * used to contain complex data structures that have constructors and + * destructors as well as simple types such as integers and pointers. + * + * Vector supports inserting, overwriting, and removing single or multiple + * elements at once. Constructors and destructors are called wherever + * appropriate. For example, before an element is overwritten, it's + * destructor is called. + * + * Vector provides automatic resizing of allocated memory as needed and offers + * different allocation schemes for controlling how the automatic allocation + * is done. Two senses of the the length of the data is maintained: the + * amount of raw memory allocated to the vector and the number of actual + * elements in the vector. The various allocation schemes control how the + * allocated space is changed in relation to the number of elements in the + * vector. + * + * \include ex_vector.cpp + */ + +/*@}*/ + +template < class T, class Resize = ResizeExpn > class Vector + : public Table, public Resize +{ +private: + typedef Table BaseTable; + +public: + /** + * \brief Initialize an empty vector with no space allocated. + * + * If a linear resizer is used, the step defaults to 256 units of T. For a + * runtime vector both up and down allocation schemes default to + * Exponential. + */ + Vector() { } + + /** + * \brief Create a vector that contains an initial element. + * + * The vector becomes one element in length. The element's copy + * constructor is used to place the value in the vector. + */ + Vector(const T &val) { setAs(&val, 1); } + + /** + * \brief Create a vector that contains an array of elements. + * + * The vector becomes len elements in length. Copy constructors are used + * to place the new elements in the vector. + */ + Vector(const T *val, long len) { setAs(val, len); } + + /* Deep copy. */ + Vector( const Vector &v ); + + /* Free all mem used by the vector. */ + ~Vector() { empty(); } + + /* Delete all items. */ + void empty(); + + /* Abandon the contents of the vector without deleteing. */ + void abandon(); + + /* Transfers the elements of another vector into this vector. First emptys + * the current vector. */ + void transfer( Vector &v ); + + /* Perform a deep copy of another vector into this vector. */ + Vector &operator=( const Vector &v ); + + /* Stack operations. */ + void push( const T &t ) { append( t ); } + void pop() { remove( BaseTable::tabLen - 1 ); } + T &top() { return BaseTable::data[BaseTable::tabLen - 1]; } + + /*@{*/ + /** + * \brief Insert one element at position pos. + * + * Elements in the vector from pos onward are shifted one space to the + * right. The copy constructor is used to place the element into this + * vector. If pos is greater than the length of the vector then undefined + * behaviour results. If pos is negative then it is treated as an offset + * relative to the length of the vector. + */ + void insert(long pos, const T &val) { insert(pos, &val, 1); } + + /* Insert an array of values. */ + void insert(long pos, const T *val, long len); + + /** + * \brief Insert all the elements from another vector at position pos. + * + * Elements in this vector from pos onward are shifted v.tabLen spaces to + * the right. The element's copy constructor is used to copy the items + * into this vector. The other vector is left unchanged. If pos is off the + * end of the vector, then undefined behaviour results. If pos is negative + * then it is treated as an offset relative to the length of the vector. + * Equivalent to vector.insert(pos, other.data, other.tabLen). + */ + void insert(long pos, const Vector &v) { insert(pos, v.data, v.tabLen); } + + /* Insert len copies of val into the vector. */ + void insertDup(long pos, const T &val, long len); + + /** + * \brief Insert one new element using the default constrcutor. + * + * Elements in the vector from pos onward are shifted one space to the + * right. The default constructor is used to init the new element. If pos + * is greater than the length of the vector then undefined behaviour + * results. If pos is negative then it is treated as an offset relative to + * the length of the vector. + */ + void insertNew(long pos) { insertNew(pos, 1); } + + /* Insert len new items using default constructor. */ + void insertNew(long pos, long len); + /*@}*/ + + /*@{*/ + /** + * \brief Remove one element at position pos. + * + * The element's destructor is called. Elements to the right of pos are + * shifted one space to the left to take up the free space. If pos is greater + * than or equal to the length of the vector then undefined behavior results. + * If pos is negative then it is treated as an offset relative to the length + * of the vector. + */ + void remove(long pos) { remove(pos, 1); } + + /* Delete a number of elements. */ + void remove(long pos, long len); + /*@}*/ + + /*@{*/ + /** + * \brief Replace one element at position pos. + * + * If there is an existing element at position pos (if pos is less than + * the length of the vector) then its destructor is called before the + * space is used. The copy constructor is used to place the element into + * the vector. If pos is greater than the length of the vector then + * undefined behaviour results. If pos is negative then it is treated as + * an offset relative to the length of the vector. + */ + void replace(long pos, const T &val) { replace(pos, &val, 1); } + + /* Replace with an array of values. */ + void replace(long pos, const T *val, long len); + + /** + * \brief Replace at position pos with all the elements of another vector. + * + * Replace at position pos with all the elements of another vector. The + * other vector is left unchanged. If there are existing elements at the + * positions to be replaced, then destructors are called before the space + * is used. Copy constructors are used to place the elements into this + * vector. It is allowable for the pos and length of the other vector to + * specify a replacement that overwrites existing elements and creates new + * ones. If pos is greater than the length of the vector then undefined + * behaviour results. If pos is negative, then it is treated as an offset + * relative to the length of the vector. + */ + void replace(long pos, const Vector &v) { replace(pos, v.data, v.tabLen); } + + /* Replace len items with len copies of val. */ + void replaceDup(long pos, const T &val, long len); + + /** + * \brief Replace at position pos with one new element. + * + * If there is an existing element at the position to be replaced (pos is + * less than the length of the vector) then the element's destructor is + * called before the space is used. The default constructor is used to + * initialize the new element. If pos is greater than the length of the + * vector then undefined behaviour results. If pos is negative, then it is + * treated as an offset relative to the length of the vector. + */ + void replaceNew(long pos) { replaceNew(pos, 1); } + + /* Replace len items at pos with newly constructed objects. */ + void replaceNew(long pos, long len); + /*@}*/ + + /*@{*/ + /** + * \brief Set the contents of the vector to be val exactly. + * + * The vector becomes one element in length. Destructors are called on any + * existing elements in the vector. The element's copy constructor is used + * to place the val in the vector. + */ + void setAs(const T &val) { setAs(&val, 1); } + + /* Set to the contents of an array. */ + void setAs(const T *val, long len); + + /** + * \brief Set the vector to exactly the contents of another vector. + * + * The vector becomes v.tabLen elements in length. Destructors are called + * on any existing elements. Copy constructors are used to place the new + * elements in the vector. + */ + void setAs(const Vector &v) { setAs(v.data, v.tabLen); } + + /* Set as len copies of item. */ + void setAsDup(const T &item, long len); + + /** + * \brief Set the vector to exactly one new item. + * + * The vector becomes one element in length. Destructors are called on any + * existing elements in the vector. The default constructor is used to + * init the new item. + */ + void setAsNew() { setAsNew(1); } + + /* Set as newly constructed objects using the default constructor. */ + void setAsNew(long len); + /*@}*/ + + /*@{*/ + /** + * \brief Append one elment to the end of the vector. + * + * Copy constructor is used to place the element in the vector. + */ + void append(const T &val) { replace(BaseTable::tabLen, &val, 1); } + + /** + * \brief Append len elements to the end of the vector. + * + * Copy constructors are used to place the elements in the vector. + */ + void append(const T *val, long len) { replace(BaseTable::tabLen, val, len); } + + /** + * \brief Append the contents of another vector. + * + * The other vector is left unchanged. Copy constructors are used to place the + * elements in the vector. + */ + void append(const Vector &v) { replace(BaseTable::tabLen, v.data, v.tabLen); } + + /** + * \brief Append len copies of item. + * + * The copy constructor is used to place the item in the vector. + */ + void appendDup(const T &item, long len) { replaceDup(BaseTable::tabLen, item, len); } + + /** + * \brief Append a single newly created item. + * + * The new element is initialized with the default constructor. + */ + void appendNew() { replaceNew(BaseTable::tabLen, 1); } + + /** + * \brief Append len newly created items. + * + * The new elements are initialized with the default constructor. + */ + void appendNew(long len) { replaceNew(BaseTable::tabLen, len); } + /*@}*/ + + /*@{*/ + /** \fn Vector::prepend(const T &val) + * \brief Prepend one elment to the front of the vector. + * + * Copy constructor is used to place the element in the vector. + */ + void prepend(const T &val) { insert(0, &val, 1); } + + /** + * \brief Prepend len elements to the front of the vector. + * + * Copy constructors are used to place the elements in the vector. + */ + void prepend(const T *val, long len) { insert(0, val, len); } + + /** + * \brief Prepend the contents of another vector. + * + * The other vector is left unchanged. Copy constructors are used to place the + * elements in the vector. + */ + void prepend(const Vector &v) { insert(0, v.data, v.tabLen); } + + /** + * \brief Prepend len copies of item. + * + * The copy constructor is used to place the item in the vector. + */ + void prependDup(const T &item, long len) { insertDup(0, item, len); } + + /** + * \brief Prepend a single newly created item. + * + * The new element is initialized with the default constructor. + */ + void prependNew() { insertNew(0, 1); } + + /** + * \brief Prepend len newly created items. + * + * The new elements are initialized with the default constructor. + */ + void prependNew(long len) { insertNew(0, len); } + /*@}*/ + + /* Convenience access. */ + T &operator[](int i) const { return BaseTable::data[i]; } + long size() const { return BaseTable::tabLen; } + + /* Forward this so a ref can be used. */ + struct Iter; + + /* Various classes for setting the iterator */ + struct IterFirst { IterFirst( const Vector &v ) : v(v) { } const Vector &v; }; + struct IterLast { IterLast( const Vector &v ) : v(v) { } const Vector &v; }; + struct IterNext { IterNext( const Iter &i ) : i(i) { } const Iter &i; }; + struct IterPrev { IterPrev( const Iter &i ) : i(i) { } const Iter &i; }; + + /** + * \brief Vector Iterator. + * \ingroup iterators + */ + struct Iter + { + /* Construct, assign. */ + Iter() : ptr(0), ptrBeg(0), ptrEnd(0) { } + + /* Construct. */ + Iter( const Vector &v ); + Iter( const IterFirst &vf ); + Iter( const IterLast &vl ); + inline Iter( const IterNext &vn ); + inline Iter( const IterPrev &vp ); + + /* Assign. */ + Iter &operator=( const Vector &v ); + Iter &operator=( const IterFirst &vf ); + Iter &operator=( const IterLast &vl ); + inline Iter &operator=( const IterNext &vf ); + inline Iter &operator=( const IterPrev &vl ); + + /** \brief Less than end? */ + bool lte() const { return ptr != ptrEnd; } + + /** \brief At end? */ + bool end() const { return ptr == ptrEnd; } + + /** \brief Greater than beginning? */ + bool gtb() const { return ptr != ptrBeg; } + + /** \brief At beginning? */ + bool beg() const { return ptr == ptrBeg; } + + /** \brief At first element? */ + bool first() const { return ptr == ptrBeg+1; } + + /** \brief At last element? */ + bool last() const { return ptr == ptrEnd-1; } + + /* Return the position. */ + long pos() const { return ptr - ptrBeg - 1; } + T &operator[](int i) const { return ptr[i]; } + + /** \brief Implicit cast to T*. */ + operator T*() const { return ptr; } + + /** \brief Dereference operator returns T&. */ + T &operator *() const { return *ptr; } + + /** \brief Arrow operator returns T*. */ + T *operator->() const { return ptr; } + + /** \brief Move to next item. */ + T *operator++() { return ++ptr; } + + /** \brief Move to next item. */ + T *operator++(int) { return ptr++; } + + /** \brief Move to next item. */ + T *increment() { return ++ptr; } + + /** \brief Move n items forward. */ + T *operator+=(long n) { return ptr+=n; } + + /** \brief Move to previous item. */ + T *operator--() { return --ptr; } + + /** \brief Move to previous item. */ + T *operator--(int) { return ptr--; } + + /** \brief Move to previous item. */ + T *decrement() { return --ptr; } + + /** \brief Move n items back. */ + T *operator-=(long n) { return ptr-=n; } + + /** \brief Return the next item. Does not modify this. */ + inline IterNext next() const { return IterNext(*this); } + + /** \brief Return the previous item. Does not modify this. */ + inline IterPrev prev() const { return IterPrev(*this); } + + /** \brief The iterator is simply a pointer. */ + T *ptr; + + /* For testing endpoints. */ + T *ptrBeg, *ptrEnd; + }; + + /** \brief Return first element. */ + IterFirst first() { return IterFirst( *this ); } + + /** \brief Return last element. */ + IterLast last() { return IterLast( *this ); } + +protected: + void makeRawSpaceFor(long pos, long len); + + void upResize(long len); + void downResize(long len); +}; + +/* Init a vector iterator with just a vector. */ +template Vector::Iter::Iter( const Vector &v ) +{ + if ( v.tabLen == 0 ) + ptr = ptrBeg = ptrEnd = 0; + else { + ptr = v.data; + ptrBeg = v.data-1; + ptrEnd = v.data+v.tabLen; + } +} + +/* Init a vector iterator with the first of a vector. */ +template Vector::Iter::Iter( + const IterFirst &vf ) +{ + if ( vf.v.tabLen == 0 ) + ptr = ptrBeg = ptrEnd = 0; + else { + ptr = vf.v.data; + ptrBeg = vf.v.data-1; + ptrEnd = vf.v.data+vf.v.tabLen; + } +} + +/* Init a vector iterator with the last of a vector. */ +template Vector::Iter::Iter( + const IterLast &vl ) +{ + if ( vl.v.tabLen == 0 ) + ptr = ptrBeg = ptrEnd = 0; + else { + ptr = vl.v.data+vl.v.tabLen-1; + ptrBeg = vl.v.data-1; + ptrEnd = vl.v.data+vl.v.tabLen; + } +} + +/* Init a vector iterator with the next of some other iterator. */ +template Vector::Iter::Iter( + const IterNext &vn ) +: + ptr(vn.i.ptr+1), + ptrBeg(vn.i.ptrBeg), + ptrEnd(vn.i.ptrEnd) +{ +} + +/* Init a vector iterator with the prev of some other iterator. */ +template Vector::Iter::Iter( + const IterPrev &vp ) +: + ptr(vp.i.ptr-1), + ptrBeg(vp.i.ptrBeg), + ptrEnd(vp.i.ptrEnd) +{ +} + +/* Set a vector iterator with some vector. */ +template typename Vector::Iter & + Vector::Iter::operator=( const Vector &v ) +{ + if ( v.tabLen == 0 ) + ptr = ptrBeg = ptrEnd = 0; + else { + ptr = v.data; + ptrBeg = v.data-1; + ptrEnd = v.data+v.tabLen; + } + return *this; +} + +/* Set a vector iterator with the first element in a vector. */ +template typename Vector::Iter & + Vector::Iter::operator=( const IterFirst &vf ) +{ + if ( vf.v.tabLen == 0 ) + ptr = ptrBeg = ptrEnd = 0; + else { + ptr = vf.v.data; + ptrBeg = vf.v.data-1; + ptrEnd = vf.v.data+vf.v.tabLen; + } + return *this; +} + +/* Set a vector iterator with the last element in a vector. */ +template typename Vector::Iter & + Vector::Iter::operator=( const IterLast &vl ) +{ + if ( vl.v.tabLen == 0 ) + ptr = ptrBeg = ptrEnd = 0; + else { + ptr = vl.v.data+vl.v.tabLen-1; + ptrBeg = vl.v.data-1; + ptrEnd = vl.v.data+vl.v.tabLen; + } + return *this; +} + +/* Set a vector iterator with the next of some other iterator. */ +template typename Vector::Iter & + Vector::Iter::operator=( const IterNext &vn ) +{ + ptr = vn.i.ptr+1; + ptrBeg = vn.i.ptrBeg; + ptrEnd = vn.i.ptrEnd; + return *this; +} + +/* Set a vector iterator with the prev of some other iterator. */ +template typename Vector::Iter & + Vector::Iter::operator=( const IterPrev &vp ) +{ + ptr = vp.i.ptr-1; + ptrBeg = vp.i.ptrBeg; + ptrEnd = vp.i.ptrEnd; + return *this; +} + +/** + * \brief Forget all elements in the vector. + * + * The contents of the vector are reset to null without without the space + * being freed. + */ +template void Vector:: + abandon() +{ + BaseTable::data = 0; + BaseTable::tabLen = 0; + BaseTable::allocLen = 0; +} + +/** + * \brief Transfer the contents of another vector into this vector. + * + * The dynamic array of the other vector is moved into this vector by + * reference. If this vector is non-empty then its contents are first deleted. + * Afterward the other vector will be empty. + */ +template void Vector:: + transfer( Vector &v ) +{ + empty(); + + BaseTable::data = v.data; + BaseTable::tabLen = v.tabLen; + BaseTable::allocLen = v.allocLen; + + v.abandon(); +} + +/** + * \brief Deep copy another vector into this vector. + * + * Copies the entire contents of the other vector into this vector. Any + * existing contents are first deleted. Equivalent to setAs. + * + * \returns A reference to this. + */ +template Vector &Vector:: + operator=( const Vector &v ) +{ + setAs(v.data, v.tabLen); + return *this; +} + +/* Up resize the data for len elements using Resize::upResize to tell us the + * new tabLen. Reads and writes allocLen. Does not read or write tabLen. */ +template void Vector:: + upResize(long len) +{ + /* Ask the resizer what the new tabLen will be. */ + long newLen = Resize::upResize(BaseTable::allocLen, len); + + /* Did the data grow? */ + if ( newLen > BaseTable::allocLen ) { + BaseTable::allocLen = newLen; + if ( BaseTable::data != 0 ) { + /* Table exists already, resize it up. */ + BaseTable::data = (T*) realloc( BaseTable::data, sizeof(T) * newLen ); + if ( BaseTable::data == 0 ) + throw std::bad_alloc(); + } + else { + /* Create the data. */ + BaseTable::data = (T*) malloc( sizeof(T) * newLen ); + if ( BaseTable::data == 0 ) + throw std::bad_alloc(); + } + } +} + +/* Down resize the data for len elements using Resize::downResize to determine + * the new tabLen. Reads and writes allocLen. Does not read or write tabLen. */ +template void Vector:: + downResize(long len) +{ + /* Ask the resizer what the new tabLen will be. */ + long newLen = Resize::downResize( BaseTable::allocLen, len ); + + /* Did the data shrink? */ + if ( newLen < BaseTable::allocLen ) { + BaseTable::allocLen = newLen; + if ( newLen == 0 ) { + /* Simply free the data. */ + free( BaseTable::data ); + BaseTable::data = 0; + } + else { + /* Not shrinking to size zero, realloc it to the smaller size. */ + BaseTable::data = (T*) realloc( BaseTable::data, sizeof(T) * newLen ); + if ( BaseTable::data == 0 ) + throw std::bad_alloc(); + } + } +} + +/** + * \brief Perform a deep copy of the vector. + * + * The contents of the other vector are copied into this vector. This vector + * gets the same allocation size as the other vector. All items are copied + * using the element's copy constructor. + */ +template Vector:: + Vector(const Vector &v) +{ + BaseTable::tabLen = v.tabLen; + BaseTable::allocLen = v.allocLen; + + if ( BaseTable::allocLen > 0 ) { + /* Allocate needed space. */ + BaseTable::data = (T*) malloc(sizeof(T) * BaseTable::allocLen); + if ( BaseTable::data == 0 ) + throw std::bad_alloc(); + + /* If there are any items in the src data, copy them in. */ + T *dst = BaseTable::data, *src = v.data; + for (long pos = 0; pos < BaseTable::tabLen; pos++, dst++, src++ ) + new(dst) T(*src); + } + else { + /* Nothing allocated. */ + BaseTable::data = 0; + } +} + +/** \fn Vector::~Vector() + * \brief Free all memory used by the vector. + * + * The vector is reset to zero elements. Destructors are called on all + * elements in the vector. The space allocated for the vector is freed. + */ + + +/** + * \brief Free all memory used by the vector. + * + * The vector is reset to zero elements. Destructors are called on all + * elements in the vector. The space allocated for the vector is freed. + */ +template void Vector:: + empty() +{ + if ( BaseTable::data != 0 ) { + /* Call All destructors. */ + T *pos = BaseTable::data; + for ( long i = 0; i < BaseTable::tabLen; pos++, i++ ) + pos->~T(); + + /* Free the data space. */ + free( BaseTable::data ); + BaseTable::data = 0; + BaseTable::tabLen = BaseTable::allocLen = 0; + } +} + +/** + * \brief Set the contents of the vector to be len elements exactly. + * + * The vector becomes len elements in length. Destructors are called on any + * existing elements in the vector. Copy constructors are used to place the + * new elements in the vector. + */ +template void Vector:: + setAs(const T *val, long len) +{ + /* Call All destructors. */ + long i; + T *pos = BaseTable::data; + for ( i = 0; i < BaseTable::tabLen; pos++, i++ ) + pos->~T(); + + /* Adjust the allocated length. */ + if ( len < BaseTable::tabLen ) + downResize( len ); + else if ( len > BaseTable::tabLen ) + upResize( len ); + + /* Set the new data length to exactly len. */ + BaseTable::tabLen = len; + + /* Copy data in. */ + T *dst = BaseTable::data; + const T *src = val; + for ( i = 0; i < len; i++, dst++, src++ ) + new(dst) T(*src); +} + +/** + * \brief Set the vector to len copies of item. + * + * The vector becomes len elements in length. Destructors are called on any + * existing elements in the vector. The element's copy constructor is used to + * copy the item into the vector. + */ +template void Vector:: + setAsDup(const T &item, long len) +{ + /* Call All destructors. */ + T *pos = BaseTable::data; + for ( long i = 0; i < BaseTable::tabLen; pos++, i++ ) + pos->~T(); + + /* Adjust the allocated length. */ + if ( len < BaseTable::tabLen ) + downResize( len ); + else if ( len > BaseTable::tabLen ) + upResize( len ); + + /* Set the new data length to exactly len. */ + BaseTable::tabLen = len; + + /* Copy item in one spot at a time. */ + T *dst = BaseTable::data; + for ( long i = 0; i < len; i++, dst++ ) + new(dst) T(item); +} + +/** + * \brief Set the vector to exactly len new items. + * + * The vector becomes len elements in length. Destructors are called on any + * existing elements in the vector. Default constructors are used to init the + * new items. + */ +template void Vector:: + setAsNew(long len) +{ + /* Call All destructors. */ + T *pos = BaseTable::data; + for ( long i = 0; i < BaseTable::tabLen; pos++, i++ ) + pos->~T(); + + /* Adjust the allocated length. */ + if ( len < BaseTable::tabLen ) + downResize( len ); + else if ( len > BaseTable::tabLen ) + upResize( len ); + + /* Set the new data length to exactly len. */ + BaseTable::tabLen = len; + + /* Create items using default constructor. */ + T *dst = BaseTable::data; + for ( long i = 0; i < len; i++, dst++ ) + new(dst) T(); +} + + +/** + * \brief Replace len elements at position pos. + * + * If there are existing elements at the positions to be replaced, then + * destructors are called before the space is used. Copy constructors are used + * to place the elements into the vector. It is allowable for the pos and + * length to specify a replacement that overwrites existing elements and + * creates new ones. If pos is greater than the length of the vector then + * undefined behaviour results. If pos is negative, then it is treated as an + * offset relative to the length of the vector. + */ +template void Vector:: + replace(long pos, const T *val, long len) +{ + long endPos, i; + T *item; + + /* If we are given a negative position to replace at then + * treat it as a position relative to the length. */ + if ( pos < 0 ) + pos = BaseTable::tabLen + pos; + + /* The end is the one past the last item that we want + * to write to. */ + endPos = pos + len; + + /* Make sure we have enough space. */ + if ( endPos > BaseTable::tabLen ) { + upResize( endPos ); + + /* Delete any objects we need to delete. */ + item = BaseTable::data + pos; + for ( i = pos; i < BaseTable::tabLen; i++, item++ ) + item->~T(); + + /* We are extending the vector, set the new data length. */ + BaseTable::tabLen = endPos; + } + else { + /* Delete any objects we need to delete. */ + item = BaseTable::data + pos; + for ( i = pos; i < endPos; i++, item++ ) + item->~T(); + } + + /* Copy data in using copy constructor. */ + T *dst = BaseTable::data + pos; + const T *src = val; + for ( i = 0; i < len; i++, dst++, src++ ) + new(dst) T(*src); +} + +/** + * \brief Replace at position pos with len copies of an item. + * + * If there are existing elements at the positions to be replaced, then + * destructors are called before the space is used. The copy constructor is + * used to place the element into this vector. It is allowable for the pos and + * length to specify a replacement that overwrites existing elements and + * creates new ones. If pos is greater than the length of the vector then + * undefined behaviour results. If pos is negative, then it is treated as an + * offset relative to the length of the vector. + */ +template void Vector:: + replaceDup(long pos, const T &val, long len) +{ + long endPos, i; + T *item; + + /* If we are given a negative position to replace at then + * treat it as a position relative to the length. */ + if ( pos < 0 ) + pos = BaseTable::tabLen + pos; + + /* The end is the one past the last item that we want + * to write to. */ + endPos = pos + len; + + /* Make sure we have enough space. */ + if ( endPos > BaseTable::tabLen ) { + upResize( endPos ); + + /* Delete any objects we need to delete. */ + item = BaseTable::data + pos; + for ( i = pos; i < BaseTable::tabLen; i++, item++ ) + item->~T(); + + /* We are extending the vector, set the new data length. */ + BaseTable::tabLen = endPos; + } + else { + /* Delete any objects we need to delete. */ + item = BaseTable::data + pos; + for ( i = pos; i < endPos; i++, item++ ) + item->~T(); + } + + /* Copy data in using copy constructor. */ + T *dst = BaseTable::data + pos; + for ( long i = 0; i < len; i++, dst++ ) + new(dst) T(val); +} + +/** + * \brief Replace at position pos with len new elements. + * + * If there are existing elements at the positions to be replaced, then + * destructors are called before the space is used. The default constructor is + * used to initialize the new elements. It is allowable for the pos and length + * to specify a replacement that overwrites existing elements and creates new + * ones. If pos is greater than the length of the vector then undefined + * behaviour results. If pos is negative, then it is treated as an offset + * relative to the length of the vector. + */ +template void Vector:: + replaceNew(long pos, long len) +{ + long endPos, i; + T *item; + + /* If we are given a negative position to replace at then + * treat it as a position relative to the length. */ + if ( pos < 0 ) + pos = BaseTable::tabLen + pos; + + /* The end is the one past the last item that we want + * to write to. */ + endPos = pos + len; + + /* Make sure we have enough space. */ + if ( endPos > BaseTable::tabLen ) { + upResize( endPos ); + + /* Delete any objects we need to delete. */ + item = BaseTable::data + pos; + for ( i = pos; i < BaseTable::tabLen; i++, item++ ) + item->~T(); + + /* We are extending the vector, set the new data length. */ + BaseTable::tabLen = endPos; + } + else { + /* Delete any objects we need to delete. */ + item = BaseTable::data + pos; + for ( i = pos; i < endPos; i++, item++ ) + item->~T(); + } + + /* Copy data in using copy constructor. */ + T *dst = BaseTable::data + pos; + for ( long i = 0; i < len; i++, dst++ ) + new(dst) T(); +} + +/** + * \brief Remove len elements at position pos. + * + * Destructor is called on all elements removed. Elements to the right of pos + * are shifted len spaces to the left to take up the free space. If pos is + * greater than or equal to the length of the vector then undefined behavior + * results. If pos is negative then it is treated as an offset relative to the + * length of the vector. + */ +template void Vector:: + remove(long pos, long len) +{ + long newLen, lenToSlideOver, endPos; + T *dst, *item; + + /* If we are given a negative position to remove at then + * treat it as a position relative to the length. */ + if ( pos < 0 ) + pos = BaseTable::tabLen + pos; + + /* The first position after the last item deleted. */ + endPos = pos + len; + + /* The new data length. */ + newLen = BaseTable::tabLen - len; + + /* The place in the data we are deleting at. */ + dst = BaseTable::data + pos; + + /* Call Destructors. */ + item = dst; + for ( long i = 0; i < len; i += 1, item += 1 ) + item->~T(); + + /* Shift data over if necessary. */ + lenToSlideOver = BaseTable::tabLen - endPos; + if ( len > 0 && lenToSlideOver > 0 ) + memmove(dst, dst + len, sizeof(T)*lenToSlideOver); + + /* Shrink the data if necessary. */ + downResize( newLen ); + + /* Set the new data length. */ + BaseTable::tabLen = newLen; +} + +/** + * \brief Insert len elements at position pos. + * + * Elements in the vector from pos onward are shifted len spaces to the right. + * The copy constructor is used to place the elements into this vector. If pos + * is greater than the length of the vector then undefined behaviour results. + * If pos is negative then it is treated as an offset relative to the length + * of the vector. + */ +template void Vector:: + insert(long pos, const T *val, long len) +{ + /* If we are given a negative position to insert at then + * treat it as a position relative to the length. */ + if ( pos < 0 ) + pos = BaseTable::tabLen + pos; + + /* Calculate the new length. */ + long newLen = BaseTable::tabLen + len; + + /* Up resize, we are growing. */ + upResize( newLen ); + + /* Shift over data at insert spot if needed. */ + if ( len > 0 && pos < BaseTable::tabLen ) { + memmove(BaseTable::data + pos + len, BaseTable::data + pos, + sizeof(T)*(BaseTable::tabLen-pos)); + } + + /* Copy data in element by element. */ + T *dst = BaseTable::data + pos; + const T *src = val; + for ( long i = 0; i < len; i++, dst++, src++ ) + new(dst) T(*src); + + /* Set the new length. */ + BaseTable::tabLen = newLen; +} + +/** + * \brief Insert len copies of item at position pos. + * + * Elements in the vector from pos onward are shifted len spaces to the right. + * The copy constructor is used to place the element into this vector. If pos + * is greater than the length of the vector then undefined behaviour results. + * If pos is negative then it is treated as an offset relative to the length + * of the vector. + */ +template void Vector:: + insertDup(long pos, const T &item, long len) +{ + /* If we are given a negative position to insert at then + * treat it as a position relative to the length. */ + if ( pos < 0 ) + pos = BaseTable::tabLen + pos; + + /* Calculate the new length. */ + long newLen = BaseTable::tabLen + len; + + /* Up resize, we are growing. */ + upResize( newLen ); + + /* Shift over data at insert spot if needed. */ + if ( len > 0 && pos < BaseTable::tabLen ) { + memmove(BaseTable::data + pos + len, BaseTable::data + pos, + sizeof(T)*(BaseTable::tabLen-pos)); + } + + /* Copy the data item in one at a time. */ + T *dst = BaseTable::data + pos; + for ( long i = 0; i < len; i++, dst++ ) + new(dst) T(item); + + /* Set the new length. */ + BaseTable::tabLen = newLen; +} + +/** + * \brief Insert len new elements using the default constructor. + * + * Elements in the vector from pos onward are shifted len spaces to the right. + * Default constructors are used to init the new elements. If pos is off the + * end of the vector then undefined behaviour results. If pos is negative then + * it is treated as an offset relative to the length of the vector. + */ +template void Vector:: + insertNew(long pos, long len) +{ + /* If we are given a negative position to insert at then + * treat it as a position relative to the length. */ + if ( pos < 0 ) + pos = BaseTable::tabLen + pos; + + /* Calculate the new length. */ + long newLen = BaseTable::tabLen + len; + + /* Up resize, we are growing. */ + upResize( newLen ); + + /* Shift over data at insert spot if needed. */ + if ( len > 0 && pos < BaseTable::tabLen ) { + memmove(BaseTable::data + pos + len, BaseTable::data + pos, + sizeof(T)*(BaseTable::tabLen-pos)); + } + + /* Init new data with default constructors. */ + T *dst = BaseTable::data + pos; + for ( long i = 0; i < len; i++, dst++ ) + new(dst) T(); + + /* Set the new length. */ + BaseTable::tabLen = newLen; +} + +/* Makes space for len items, Does not init the items in any way. If pos is + * greater than the length of the vector then undefined behaviour results. + * Updates the length of the vector. */ +template void Vector:: + makeRawSpaceFor(long pos, long len) +{ + /* Calculate the new length. */ + long newLen = BaseTable::tabLen + len; + + /* Up resize, we are growing. */ + upResize( newLen ); + + /* Shift over data at insert spot if needed. */ + if ( len > 0 && pos < BaseTable::tabLen ) { + memmove(BaseTable::data + pos + len, BaseTable::data + pos, + sizeof(T)*(BaseTable::tabLen-pos)); + } + + /* Save the new length. */ + BaseTable::tabLen = newLen; +} + +#ifdef AAPL_NAMESPACE +} +#endif + +#endif /* _AAPL_VECTOR_H */ diff --git a/colm.vim b/colm.vim new file mode 100644 index 00000000..b32441a5 --- /dev/null +++ b/colm.vim @@ -0,0 +1,89 @@ +" Vim syntax file +" +" Language: Colm +" Author: Adrian Thurston + +syntax clear + +" +" Regular Language Types +" + +" Identifiers +syntax match rlId "[a-zA-Z_][a-zA-Z_0-9]*" contained + +" Literals +syntax match rlLiteral "'\(\\.\|[^'\\]\)*'[i]*" contained +syntax match rlLiteral "\"\(\\.\|[^\"\\]\)*\"[i]*" contained +syntax match rlLiteral "\[\(\\.\|[^\]\\]\)*\]" contained + +" Numbers +syntax match rlNumber "[0-9][0-9]*" contained +syntax match rlNumber "0x[0-9a-fA-F][0-9a-fA-F]*" contained + +" Operators +syntax match rlOtherOps ":>" contained +syntax match rlOtherOps ":>>" contained +syntax match rlOtherOps "<:" contained + +syntax cluster rlTypes contains=rlId,rlLiteral,rlNumber,rlOtherOps +syntax region rlTypeRegion matchgroup=regionDelimiter start="/" end="/" + \ contains=@rlTypes + +syntax region cflTypeRegion matchgroup=regionDelimiter start="\[" end="\]" + \ contains=cflTypeRegion,patRegion,otLit,typeKeywords +syntax region patRegion matchgroup=String start="\"" end="\"" end="\n" + \ contains=char,cflTypeRegion + +syntax match char "[^\"\[]" contained +syntax match char "\\." contained + +syntax match otLit "\~.*$" +syntax match otLit "'\(\\.\|[^'\\]\)*'[i]*" + +" +" Other stuff +" + +syntax match tlComment "#.*$" +syntax match tlIdentifier "[a-zA-Z_][a-zA-Z_0-9]*" +syntax match tlNumber "[0-9][0-9]*" +syntax match tlNumber "nil" +syntax match tlNumber "true" +syntax match tlNumber "false" + +syntax keyword Type + \ commit include literal iter + \ namespace lex reducefirst global include + \ construct parse parse_stop match require + \ preeof + +syntax keyword typeKeywords + \ int str bool any ref vector map list ptr + +syntax keyword Keyword + \ reject else elsif return yield for while if + \ typeid in break + \ new deref + +syntax match tokenName "[a-zA-Z_][a-zA-Z_0-9]*" contained + +syntax region defTypes matchgroup=defKeywords + \ start="\" start="\" start="\" start="\" + \ matchgroup=Function end="[a-zA-Z_][a-zA-Z0-9_]*" end="/"me=e-1 + +" +" Specifying Groups +" +hi link tlComment Comment +hi link tlNumber Number +hi link otLit String +hi link rlNumber Number +hi link rlLiteral String +hi link defKeywords Type +hi link typeKeywords Type +hi link regionDelimiter Type +hi link char String +hi link tokenName Function + +let b:current_syntax = "colm" diff --git a/colm/Makefile.in b/colm/Makefile.in new file mode 100644 index 00000000..61b57867 --- /dev/null +++ b/colm/Makefile.in @@ -0,0 +1,123 @@ +# +# Copyright 2001-2007 Adrian Thurston +# + +# This file is part of Colm. +# +# Colm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Colm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Colm; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +INCS += -I../common -I../aapl +DEFS += + +CFLAGS += -g -Wall -Wwrite-strings +LDFLAGS += + +# Frequently changed ones first. +CC_SRCS = \ + compile.cpp \ + bytecode.cpp \ + fsmrun.cpp \ + pdarun.cpp \ + input.cpp \ + lmparse.cpp \ + lmscan.cpp \ + parsetree.cpp \ + parsedata.cpp \ + fsmstate.cpp \ + fsmbase.cpp \ + fsmattach.cpp \ + fsmmin.cpp \ + fsmgraph.cpp \ + pdagraph.cpp \ + pdabuild.cpp \ + pdacodegen.cpp \ + fsmcodegen.cpp \ + redfsm.cpp \ + fsmexec.cpp \ + main.cpp \ + list.cpp \ + map.cpp \ + string.cpp \ + redbuild.cpp \ + closure.cpp \ + fsmap.cpp \ + dotgen.cpp \ + tree.cpp + +RUNTIME_SRC = fsmrun.cpp pdarun.cpp bytecode.cpp list.cpp \ + map.cpp string.cpp input.cpp tree.cpp +GEN_SRC = lmscan.cpp lmparse.h lmparse.cpp + +RUNTIME = runtime.a + +LIBS = ../common/common.a + +#************************************* + +PREFIX = @prefix@ + +BUILD_PARSERS = @BUILD_PARSERS@ + +# Programs +CXX = @CXX@ + +# Get objects and dependencies from sources. +OBJS = $(CC_SRCS:%.cpp=%.o) +DEPS = $(CC_SRCS:%.cpp=.%.d) +RUNTIME_OBJS = $(RUNTIME_SRC:%.cpp=%.o) + +# Rules. +all: colm $(RUNTIME) + +colm: $(GEN_SRC) $(OBJS) $(LIBS) + $(CXX) $(LDFLAGS) -o $@ $(OBJS) $(LIBS) + +$(RUNTIME): $(RUNTIME_OBJS) + ar -cr $@ $(RUNTIME_OBJS) + +ifeq ($(BUILD_PARSERS),true) + +lmparse.h: lmparse.kh + kelbt -o $@ $< + +lmparse.cpp: lmparse.kl lmparse.kh + kelbt -o $@ $< + +lmscan.cpp: lmparse.h + +lmscan.cpp: lmscan.rl + ragel -G2 -o $@ $< + +endif + +%.o: %.cpp + @$(CXX) -M $(DEFS) $(INCS) $< > .$*.d + $(CXX) -c $(CFLAGS) $(DEFS) $(INCS) -o $@ $< + +distclean: clean + rm -f Makefile + +ifeq ($(BUILD_PARSERS),true) +EXTRA_CLEAN = $(GEN_SRC) +endif + +clean: + rm -f tags .*.d *.o colm $(EXTRA_CLEAN) $(RUNTIME) + +install: all + install -d $(PREFIX)/bin + install -s colm $(PREFIX)/bin/colm + +-include $(DEPS) diff --git a/colm/bytecode.cpp b/colm/bytecode.cpp new file mode 100644 index 00000000..30a76deb --- /dev/null +++ b/colm/bytecode.cpp @@ -0,0 +1,4092 @@ +/* + * Copyright 2007 Adrian Thurston + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "bytecode.h" +#include "astring.h" +#include "pdarun.h" +#include "dlistval.h" +#include "fsmrun.h" +#include "pdarun.h" +#include +#include +#include +#include + +using std::cout; +using std::cerr; +using std::endl; + +#define push(i) (*(--sp) = (i)) +#define pop() (*sp++) +#define top() (*sp) +#define ptop() (sp) +#define popn(n) (sp += (n)) +#define pushn(n) (sp -= (n)) +#define local(o) (frame[o]) +#define plocal(o) (&frame[o]) +#define local_iframe(o) (iframe[o]) +#define plocal_iframe(o) (&iframe[o]) + +#define read_byte( i ) do { \ + i = ((uchar) *instr++); \ +} while(0) + +#define read_word( i ) do { \ + i = ((Word) *instr++); \ + i |= ((Word) *instr++) << 8; \ + i |= ((Word) *instr++) << 16; \ + i |= ((Word) *instr++) << 24; \ +} while(0) + +#define read_tree( i ) do { \ + Word w; \ + w = ((Word) *instr++); \ + w |= ((Word) *instr++) << 8; \ + w |= ((Word) *instr++) << 16; \ + w |= ((Word) *instr++) << 24; \ + i = (Tree*) w; \ +} while(0) + +#define read_half( i ) do { \ + i = ((Word) *instr++); \ + i |= ((Word) *instr++) << 8; \ +} while(0) + +static Tree **vm_stack; + +Kid *alloc_attrs( Program *prg, long length ) +{ + Kid *cur = 0; + for ( long i = 0; i < length; i++ ) { + Kid *next = cur; + cur = prg->kidPool.allocate(); + cur->next = next; + } + return cur; +} + +void free_attrs( Program *prg, Kid *attrs ) +{ + Kid *cur = attrs; + while ( cur != 0 ) { + Kid *next = cur->next; + prg->kidPool.free( cur ); + cur = next; + } +} + +void set_attr( Tree *tree, long pos, Tree *val ) +{ + Kid *cur = tree->child; + for ( long i = 0; i < pos; i++ ) + cur = cur->next; + cur->tree = val; +} + +Tree *get_attr( Tree *tree, long pos ) +{ + Kid *cur = tree->child; + for ( long i = 0; i < pos; i++ ) + cur = cur->next; + return cur->tree; +} + +Kid *copy_obj_data( Program *prg, long length, Kid *src ) +{ + Kid *cur = 0; + for ( long i = 0; i < length; i++ ) { + Kid *next = cur; + cur = prg->kidPool.allocate(); + cur->next = next; + } + + Kid *dest = cur; + for ( long i = 0; i < length; i++ ) { + dest->tree = src->tree; + dest = dest->next; + src = src->next; + } + return cur; +} + +Kid *kid_list_concat( Kid *list1, Kid *list2 ) +{ + if ( list1 == 0 ) + return list2; + else if ( list2 == 0 ) + return list1; + + Kid *dest = list1; + while ( dest->next != 0 ) + dest = dest->next; + dest->next = list2; + return list1; +} + +/* Type conversions. */ +List *list( Tree *tree ) { return (List*) tree; } +Map *map( Tree *tree ) { return (Map*) tree; } +Str *str( Tree *tree ) { return (Str*) tree; } + +void split_iter_cur( Tree **&sp, Program *prg, TreeIter *iter ); +Tree *split_tree( Program *prg, Tree *t ); +Tree *copy_real_tree( Program *prg, Tree *tree, Kid *oldNextDown, Kid *&newNextDown ); + +Stream *open_stream_file( Program *prg, FILE *file ) +{ + Stream *res = (Stream*)prg->mapElPool.allocate(); + res->id = LEL_ID_STREAM; + res->file = file; + res->in = new InputStreamFile( file ); + res->scanner = new FsmRun( prg->rtd->fsmTables ); + res->scanner->attachInputStream( res->in ); + return res; +} + +Stream *open_stream_fd( Program *prg, long fd ) +{ + Stream *res = (Stream*)prg->mapElPool.allocate(); + res->id = LEL_ID_STREAM; + res->in = new InputStreamFD( fd ); + res->scanner = new FsmRun( prg->rtd->fsmTables ); + res->scanner->attachInputStream( res->in ); + return res; +} + +Tree *open_file( Program *prg, Tree *name ) +{ + Head *head = ((Str*)name)->value; + FILE *file = fopen( string_data(head), "rb" ); + Tree *res = 0; + + if ( file != 0 ) { + res = (Tree*) open_stream_file( prg, file ); + res = prg->treePool.allocate(); + res->id = LEL_ID_STREAM; + ((Stream *)res)->file = file; + } + + return res; +} + +void check_parse_tree( Tree *tree ) +{ + if ( tree->refs != 1 ) + cerr << "tree->refs: " << tree->refs << endl; + + Kid *child = tree->child; + while ( child != 0 ) { + check_parse_tree( child->tree ); + child = child->next; + } +} + +void send( Tree **root, Program *prg, PdaRun *parser, Tree *tree, bool ignore ) +{ + /* If the tree already has an alg (it has been parsed) then we need to + * send a copy of it because the parsing that we are about to do requires + * a fresh alg. */ + if ( tree->alg != 0 ) { + #ifdef COLM_LOG_BYTECODE + cerr << "copying tree in send because alg is set" << endl; + #endif + Kid *unused = 0; + tree = copy_real_tree( prg, tree, 0, unused ); + tree_upref( tree ); + } + + assert( tree->alg == 0 ); + tree->alg = prg->algPool.allocate(); + + if ( tree->id >= prg->rtd->firstNonTermId ) + tree->id = prg->rtd->lelInfo[tree->id].termDupId; + + tree->alg->flags |= AF_ARTIFICIAL; + if ( ignore ) + tree->alg->flags |= AF_IGNORE; + + /* FIXME: Do we need to remove the ignore tokens + * at this point? Will it cause a leak? */ + + Kid *kid = prg->kidPool.allocate(); + kid->tree = tree; + + if ( parser->queue == 0 ) + parser->queue = parser->queueLast = kid; + else { + parser->queueLast->next = kid; + parser->queueLast = kid; + } +} + +Tree *make_token( Tree **root, Program *prg, PdaRun *parser, long nargs ) +{ + Tree **const sp = root; + Tree **base = ptop() + nargs; + + Int *idInt = (Int*)base[-1]; + Str *textStr = (Str*)base[-2]; + + long id = idInt->value; + Head *tokdata = string_copy( prg, textStr->value ); + + LangElInfo *lelInfo = prg->rtd->lelInfo; + Tree *tree; + + if ( lelInfo[id].ignore ) { + tree = prg->treePool.allocate(); + tree->refs = 1; + tree->id = id; + tree->tokdata = tokdata; + } + else { + long objectLength = lelInfo[id].objectLength; + Kid *attrs = alloc_attrs( prg, objectLength ); + + tree = prg->treePool.allocate(); + tree->id = id; + tree->refs = 1; + tree->tokdata = tokdata; + + tree->child = attrs; + + assert( nargs-2 <= objectLength ); + for ( long id = 0; id < nargs-2; id++ ) { + set_attr( tree, id, base[-3-id] ); + tree_upref( get_attr( tree, id) ); + } + } + return tree; +} + +Tree *make_tree( Tree **root, Program *prg, PdaRun *parser, int nargs ) +{ + Tree **const sp = root; + Tree **base = ptop() + nargs; + + Int *idInt = (Int*)base[-1]; + + long id = idInt->value; + LangElInfo *lelInfo = prg->rtd->lelInfo; + + Tree *tree = prg->treePool.allocate(); + tree->id = id; + tree->refs = 1; + + long objectLength = lelInfo[id].objectLength; + Kid *attrs = alloc_attrs( prg, objectLength ); + + Kid *last = 0, *child = 0; + for ( long id = 0; id < nargs-1; id++ ) { + Kid *kid = prg->kidPool.allocate(); + kid->tree = base[-2-id]; + tree_upref( kid->tree ); + + if ( last == 0 ) + child = kid; + else + last->next = kid; + + last = kid; + } + + tree->child = kid_list_concat( attrs, child ); + + return tree; +} + +Tree *parse( Tree **&sp, Program *prg, Stream *stream, + long parserId, long stopId, CodeVect *&cv ) +{ + PdaTables *tables = prg->rtd->parsers[parserId]; + PdaRun parser( sp, prg, tables, stream->scanner, stopId ); + parser.run(); + parser.commit(); + Tree *tree = parser.getParsedRoot( stopId > 0 ); + tree_upref( tree ); + parser.clean(); + + cv = new CodeVect; + cv->transfer( parser.allReverseCode ); + return tree; +} + +Tree *undo_parse( Tree **&sp, Program *prg, Stream *stream, + long parserId, Tree *tree, CodeVect *rev ) +{ + PdaTables *tables = prg->rtd->parsers[parserId]; + PdaRun parser( sp, prg, tables, stream->scanner, 0 ); + parser.undoParse( tree, rev ); + return 0; +} + +Tree *stream_pull( Program *prg, Stream *stream, Tree *length ) +{ + long len = ((Int*)length)->value; + Head *tokdata = stream->scanner->extractToken( len ); + + Str *str = (Str*) prg->treePool.allocate(); + str->id = LEL_ID_STR; + str->value = tokdata; + + return (Tree*)str; +} + +void undo_pull( Program *prg, Stream *stream, Tree *str ) +{ + const char *data = string_data( ( (Str*)str )->value ); + long length = string_length( ( (Str*)str )->value ); + stream->scanner->sendBackText( data, length ); +} + +Word stream_push( Tree **&sp, Program *prg, Stream *stream, Tree *any ) +{ + std::stringstream ss; + print_tree( ss, sp, prg, any ); + stream->scanner->streamPush( ss.str().c_str(), ss.str().size()); + return ss.str().size(); +} + +void undo_stream_push( Tree **&sp, Program *prg, Stream *stream, Word len ) +{ + stream->scanner->undoStreamPush( len ); +} + + +void print_str( Head *str ) +{ + cout.write( (char*)(str->data), str->length ); +} + +void print_ignore_list( Tree **&sp, Program *prg, Tree *tree ) +{ + Kid *ignore = tree_ignore( prg, tree ); + + /* Record the root of the stack and push everything. */ + Tree **root = ptop(); + while ( tree_is_ignore( prg, ignore ) ) { + push( (SW)ignore ); + ignore = ignore->next; + } + + /* Pop them off and print. */ + while ( ptop() != root ) { + ignore = (Kid*) pop(); + print_tree( sp, prg, ignore->tree ); + } +} + +void print_kid( ostream &out, Tree **&sp, Program *prg, Kid *kid, bool printIgnore ) +{ + Tree **root = ptop(); + Kid *child; + +rec_call: + /* If not currently skipping ignore data, then print it. Ignore data can + * be associated with terminals and nonterminals. */ + if ( printIgnore && tree_ignore( prg, kid->tree ) != 0 ) { + /* Ignorelists are reversed. */ + print_ignore_list( sp, prg, kid->tree ); + printIgnore = false; + } + + if ( kid->tree->id < prg->rtd->firstNonTermId ) { + /* Always turn on ignore printing when we get to a token. */ + printIgnore = true; + + if ( kid->tree->id == LEL_ID_INT ) + out << ((Int*)kid->tree)->value; + else if ( kid->tree->id == LEL_ID_BOOL ) { + if ( ((Int*)kid->tree)->value ) + out << "true"; + else + out << "false"; + } + else if ( kid->tree->id == LEL_ID_PTR ) + out << '#' << (void*) ((Pointer*)kid->tree)->value; + else if ( kid->tree->id == LEL_ID_STR ) + print_str( ((Str*)kid->tree)->value ); + else if ( kid->tree->id == LEL_ID_STREAM ) + out << '#' << (void*) ((Stream*)kid->tree)->file; + else if ( kid->tree->tokdata != 0 && + string_length( kid->tree->tokdata ) > 0 ) + { + out.write( string_data( kid->tree->tokdata ), + string_length( kid->tree->tokdata ) ); + } + } + else { + /* Non-terminal. */ + child = tree_child( prg, kid->tree ); + if ( child != 0 ) { + push( (SW)kid ); + kid = child; + while ( kid != 0 ) { + goto rec_call; + rec_return: + kid = kid->next; + } + kid = (Kid*)pop(); + } + } + + if ( ptop() != root ) + goto rec_return; +} + +void print_tree( Tree **&sp, Program *prg, Tree *tree ) +{ + if ( tree == 0 ) + cout << "NIL"; + else { + Kid kid; + kid.tree = tree; + kid.next = 0; + print_kid( cout, sp, prg, &kid, false ); + } +} + +void print_tree( ostream &out, Tree **&sp, Program *prg, Tree *tree ) +{ + if ( tree == 0 ) + cout << "NIL"; + else { + Kid kid; + kid.tree = tree; + kid.next = 0; + print_kid( out, sp, prg, &kid, false ); + } +} + +void xml_escape_data( const char *data, long len ) +{ + for ( int i = 0; i < len; i++ ) { + if ( 32 <= data[i] && data[i] <= 126 ) + cout << data[i]; + else + cout << "&#" << ((unsigned)data[i]) << ';'; + } +} + +void xml_print_kid( Tree **&sp, Program *prg, Kid *kid, int depth ) +{ + Tree **root = ptop(); + int i = 0; + +rec_call: + for ( i = 0; i < depth; i++ ) + cout << " "; + + if ( kid->tree == 0 ) + cout << "NIL" << endl; + else { + cout << '<' << prg->rtd->lelInfo[kid->tree->id].name; + if ( kid->tree->child != 0 ) { + cout << '>' << endl; + push( (SW) kid ); + kid = kid->tree->child; + while ( kid != 0 ) { + depth++; + goto rec_call; + rec_return: + depth--; + kid = kid->next; + + /* If the parent kid is a repeat then skip this node and go + * right to the first child (repeated item). */ + if ( prg->rtd->lelInfo[((Kid*)top())->tree->id].repeat ) + kid = kid->tree->child; + } + kid = (Kid*) pop(); + + for ( i = 0; i < depth; i++ ) + cout << " "; + cout << "rtd->lelInfo[kid->tree->id].name << '>' << endl; + } + else if ( kid->tree->id == LEL_ID_PTR ) { + cout << '>' << (void*)((Pointer*)kid->tree)->value << + "rtd->lelInfo[kid->tree->id].name << '>' << endl; + } + else if ( kid->tree->id == LEL_ID_BOOL ) { + if ( ((Int*)kid->tree)->value ) + cout << ">truefalsertd->lelInfo[kid->tree->id].name << '>' << endl; + } + else if ( kid->tree->id == LEL_ID_INT ) { + cout << '>' << ((Int*)kid->tree)->value << + "rtd->lelInfo[kid->tree->id].name << '>' << endl; + } + else if ( kid->tree->id == LEL_ID_STR ) { + Head *head = (Head*) ((Str*)kid->tree)->value; + + cout << '>'; + xml_escape_data( (char*)(head->data), head->length ); + cout << "rtd->lelInfo[kid->tree->id].name << '>' << endl; + } + else if ( 0 < kid->tree->id && kid->tree->id < prg->rtd->firstNonTermId && + kid->tree->tokdata != 0 && + string_length( kid->tree->tokdata ) > 0 && + !prg->rtd->lelInfo[kid->tree->id].literal ) + { + cout << '>'; + xml_escape_data( string_data( kid->tree->tokdata ), + string_length( kid->tree->tokdata ) ); + cout << "rtd->lelInfo[kid->tree->id].name << '>' << endl; + } + else + cout << "/>" << endl; + } + + if ( ptop() != root ) + goto rec_return; +} + +void xml_print_tree( Tree **&sp, Program *prg, Tree *tree ) +{ + Kid kid; + kid.tree = tree; + kid.next = 0; + xml_print_kid( sp, prg, &kid, 0 ); +} + +Tree *get_rhs_el( Program *prg, Tree *lhs, long position ) +{ + Kid *pos = tree_child( prg, lhs ); + while ( position > 0 ) { + pos = pos->next; + position -= 1; + } + return pos->tree; +} + +Tree **alloc_obj_data( long length ) +{ + Tree **attrs = 0; + if ( length > 0 ) { + attrs = new Tree*[length]; + memset( attrs, 0, sizeof(Tree*)*length ); + } + return attrs; +} + +void set_field( Program *prg, Tree *tree, long field, Tree *value ) +{ + assert( tree->refs == 1 ); + if ( value != 0 ) + assert( value->refs >= 1 ); + set_attr( tree, field, value ); +} + +Tree *get_field( Tree *tree, Word field ) +{ + return get_attr( tree, field ); +} + +Tree *get_field_split( Program *prg, Tree *tree, Word field ) +{ + Tree *val = get_attr( tree, field ); + Tree *split = split_tree( prg, val ); + set_attr( tree, field, split ); + return split; +} + +void set_local( Tree **frame, long field, Tree *tree ) +{ + if ( tree != 0 ) + assert( tree->refs >= 1 ); + local(field) = tree; +} + +Tree *get_local_split( Program *prg, Tree **frame, long field ) +{ + Tree *val = local(field); + Tree *split = split_tree( prg, val ); + local(field) = split; + return split; +} + +Tree *get_ptr_val( Pointer *ptr ) +{ + return ptr->value->tree; +} + +Tree *get_ptr_val_split( Program *prg, Pointer *ptr ) +{ + Tree *val = ptr->value->tree; + Tree *split = split_tree( prg, val ); + ptr->value->tree = split; + return split; +} + +void list_free( Program *prg, List *list ) +{ + ListEl *el = list->head; + while ( el != 0 ) { + ListEl *next = el->next; + tree_downref( prg, el->value ); + prg->listElPool.free( el ); + el = next; + } + prg->mapElPool.free( (MapEl*)list ); +} + + +void map_free( Program *prg, Map *map ) +{ + MapEl *el = map->head; + while ( el != 0 ) { + MapEl *next = el->next; + tree_downref( prg, el->key ); + tree_downref( prg, el->tree ); + prg->mapElPool.free( el ); + el = next; + } + prg->mapElPool.free( (MapEl*)map ); +} + +void stream_free( Program *prg, Stream *s ) +{ + delete s->scanner; + delete s->in; + if ( s->file != 0 ) + fclose( s->file ); + prg->mapElPool.free( (MapEl*)s ); +} + +void downref_local_trees( Program *prg, Tree **frame, char *trees, long treesLen ) +{ + for ( long i = 0; i < treesLen; i++ ) { + #ifdef COLM_LOG_BYTECODE + cerr << "local tree downref: " << (long)trees[i] << endl; + #endif + + tree_downref( prg, local(trees[i]) ); + } +} + +void upref_uiter_args( Tree **frame, long nargs ) +{ + for ( long l = IFR_AA; l < IFR_AA + nargs; l++ ) { + #ifdef COLM_LOG_BYTECODE + cerr << "upref local " << l << endl; + #endif + tree_upref( local(l) ); + } +} + +void set_triter_cur( TreeIter *iter, Tree *tree ) +{ + iter->ref.kid->tree = tree; +} + +UserIter *uiter_create( Tree **&sp, Program *prg, FunctionInfo *fi, long searchId ) +{ + pushn( sizeof(UserIter) / sizeof(Word) ); + void *mem = ptop(); + + UserIter *uiter = new(mem) UserIter( ptop(), fi->argSize, searchId ); + return uiter; +} + +void iter_find( Program *prg, Tree **&sp, TreeIter *iter, bool tryFirst ) +{ + bool anyTree = iter->searchId == prg->rtd->anyId; + Tree **top = iter->stackRoot; + +rec_call: + if ( tryFirst && ( iter->ref.kid->tree->id == iter->searchId || anyTree ) ) + return; + else if ( iter->ref.kid->tree->child != 0 ) { + push( (SW) iter->ref.next ); + push( (SW) iter->ref.kid ); + iter->ref.kid = iter->ref.kid->tree->child; + iter->ref.next = (Ref*)ptop(); + while ( iter->ref.kid != 0 ) { + tryFirst = true; + goto rec_call; + rec_return: + iter->ref.kid = iter->ref.kid->next; + } + iter->ref.kid = (Kid*)pop(); + iter->ref.next = (Ref*)pop(); + } + + if ( top != ptop() ) + goto rec_return; + + iter->ref.kid = 0; +} + +Tree *tree_iter_advance( Program *prg, Tree **&sp, TreeIter *iter ) +{ + assert( iter->stackSize == iter->stackRoot - ptop() ); + + if ( iter->ref.kid == 0 ) { + /* Kid is zero, start from the root. */ + iter->ref = iter->rootRef; + iter_find( prg, sp, iter, true ); + } + else { + /* Have a previous item, continue searching from there. */ + iter_find( prg, sp, iter, false ); + } + + iter->stackSize = iter->stackRoot - ptop(); + + return (iter->ref.kid ? prg->trueVal : prg->falseVal ); +} + +Tree *tree_iter_next_child( Program *prg, Tree **&sp, TreeIter *iter ) +{ + assert( iter->stackSize == iter->stackRoot - ptop() ); + + if ( iter->ref.kid == 0 ) { + /* Kid is zero, start from the first child. */ + if ( iter->rootRef.kid->tree->child == 0 ) { + iter->ref.kid = 0; + iter->ref.next = 0; + } + else { + push( (SW) iter->rootRef.next ); + push( (SW) iter->rootRef.kid ); + iter->ref.kid = iter->rootRef.kid->tree->child; + iter->ref.next = (Ref*)ptop(); + } + } + else { + iter->ref.kid = iter->ref.kid->next; + } + + bool anyTree = iter->searchId == prg->rtd->anyId; + if ( ! anyTree ) { + /* Have a previous item, go to the next sibling. */ + while ( iter->ref.kid != 0 && iter->ref.kid->tree->id != iter->searchId ) { + iter->ref.kid = iter->ref.kid->next; + } + } + + iter->stackSize = iter->stackRoot - ptop(); + + return (iter->ref.kid ? prg->trueVal : prg->falseVal ); +} + +Tree *tree_iter_prev_child( Program *prg, Tree **&sp, TreeIter *iter ) +{ + assert( iter->stackSize == iter->stackRoot - ptop() ); + + if ( iter->ref.kid == 0 ) { + /* Kid is zero, start from the first child. */ + if ( iter->rootRef.kid->tree->child == 0 ) { + iter->ref.kid = 0; + iter->ref.next = 0; + } + else { + push( (SW) iter->rootRef.next ); + push( (SW) iter->rootRef.kid ); + + Kid *last = iter->rootRef.kid->tree->child; + while ( last->next != 0 ) + last = last->next; + + iter->ref.kid = last; + iter->ref.next = (Ref*)ptop(); + } + } + else { + /* Have a previous item, go to the prev sibling. */ + Kid *wasAt = iter->ref.kid; + Kid *parent = (Kid*) top(); + Kid *cur = 0, *next = parent->tree->child; + + while ( next != wasAt ) { + cur = next; + next = next->next; + } + + iter->ref.kid = cur; + } + + bool anyTree = iter->searchId == prg->rtd->anyId; + if ( ! anyTree ) { + /* Have a previous item, go to the next sibling. */ + while ( iter->ref.kid != 0 && iter->ref.kid->tree->id != iter->searchId ) { + iter->ref.kid = iter->ref.kid->next; + } + } + + iter->stackSize = iter->stackRoot - ptop(); + + return (iter->ref.kid ? prg->trueVal : prg->falseVal ); +} + +void tree_iter_destroy( Tree **&sp, TreeIter *iter ) +{ + long curStackSize = iter->stackRoot - ptop(); + assert( iter->stackSize == curStackSize ); + popn( iter->stackSize ); +} + +void user_iter_destroy( Tree **&sp, UserIter *uiter ) +{ + /* We should always be coming from a yield. The current stack size will be + * nonzero and the stack size in the iterator will be correct. */ + long curStackSize = uiter->stackRoot - ptop(); + assert( uiter->stackSize == curStackSize ); + + long argSize = uiter->argSize; + + popn( uiter->stackRoot - ptop() ); + popn( sizeof(UserIter) / sizeof(Word) ); + popn( argSize ); +} + +Tree *tree_iter_deref_cur( TreeIter *iter ) +{ + return iter->ref.kid == 0 ? 0 : iter->ref.kid->tree; +} + +Tree *tree_search( Kid *kid, long id ) +{ + if ( kid->tree->id == id ) + return kid->tree; + + Tree *res = 0; + if ( kid->tree->child != 0 ) + res = tree_search( kid->tree->child, id ); + + if ( res == 0 && kid->next != 0 ) + res = tree_search( kid->next, id ); + + return res; +} + +Tree *tree_search( Tree *tree, long id ) +{ + Tree *res = 0; + if ( tree->id == id ) + res = tree; + else if ( tree->child != 0 ) + res = tree_search( tree->child, id ); + return res; +} + +Tree *copy_real_tree( Program *prg, Tree *tree, Kid *oldNextDown, Kid *&newNextDown ) +{ + assert( tree->refs >= 2 ); + + /* Need to keep a lookout for next down. If + * copying it, return the copy. */ + Tree *newTree = prg->treePool.allocate(); + + newTree->id = tree->id; + newTree->tokdata = string_copy( prg, tree->tokdata ); +// newTree->pos = tree->pos; + + tree->refs -= 1; + + /* Copy the child list, will handle attributes, ignores + * and the children. */ + Kid *child = tree->child, *last = 0; + while ( child != 0 ) { + Kid *newChild = prg->kidPool.allocate(); + + /* Store the first child. */ + if ( newTree->child == 0 ) + newTree->child = newChild; + + /* Watch out for next down. */ + if ( child == oldNextDown ) + newNextDown = newChild; + + newChild->tree = child->tree; + newChild->next = 0; + + /* May be an attribute. */ + if ( newChild->tree != 0 ) + newChild->tree->refs += 1; + + if ( last != 0 ) + last->next = newChild; + + child = child->next; + last = newChild; + } + + return newTree; +} + +List *copy_list( Program *prg, List *list, Kid *oldNextDown, Kid *&newNextDown ) +{ + if ( list->refs > 1 ) { + #ifdef COLM_LOG_BYTECODE + cerr << "splitting list: " << list << " refs: " << + list->refs << endl; + #endif + + /* Not a need copy. */ + List *newList = (List*)prg->mapElPool.allocate(); + newList->id = list->genericInfo->langElId; + newList->genericInfo = list->genericInfo; + + list->refs -= 1; + + ListEl *src = list->head; + while( src != 0 ) { + ListEl *newEl = prg->listElPool.allocate(); + newEl->value = src->value; + tree_upref( newEl->value ); + + newList->append( newEl ); + + /* Watch out for next down. */ + if ( (Kid*)src == oldNextDown ) + newNextDown = (Kid*)newEl; + + src = src->next; + } + + list = newList; + } + return list; +} + + +Map *copy_map( Program *prg, Map *map, Kid *oldNextDown, Kid *&newNextDown ) +{ + if ( map->refs > 1 ) { + #ifdef COLM_LOG_BYTECODE + cerr << "splitting map: " << map << " refs: " << + map->refs << endl; + #endif + + Map *newMap = (Map*)prg->mapElPool.allocate(); + newMap->id = map->genericInfo->langElId; + newMap->genericInfo = map->genericInfo; + newMap->treeSize = map->treeSize; + newMap->root = 0; + + /* If there is a root, copy the tree. */ + if ( map->root != 0 ) { + newMap->root = newMap->copyBranch( prg, map->root, + oldNextDown, newNextDown ); + } + + map->refs -= 1; + + for ( MapEl *el = newMap->head; el != 0; el = el->next ) { + assert( map->genericInfo->typeArg == TYPE_TREE ); + tree_upref( el->tree ); + } + + map = newMap; + } + return map; +} + +Tree *copy_tree( Program *prg, Tree *tree, Kid *oldNextDown, Kid *&newNextDown ) +{ + LangElInfo *lelInfo = prg->rtd->lelInfo; + long genericId = lelInfo[tree->id].genericId; + if ( genericId > 0 ) { + GenericInfo *generic = &prg->rtd->genericInfo[genericId]; + if ( generic->type == GEN_LIST ) + tree = (Tree*) copy_list( prg, (List*) tree, oldNextDown, newNextDown ); + else if ( generic->type == GEN_MAP ) + tree = (Tree*) copy_map( prg, (Map*) tree, oldNextDown, newNextDown ); + else + assert(false); + } + else if ( tree->id == LEL_ID_PTR ) + assert(false); + else if ( tree->id == LEL_ID_BOOL ) + assert(false); + else if ( tree->id == LEL_ID_INT ) + assert(false); + else if ( tree->id == LEL_ID_STR ) + assert(false); + else + tree = copy_real_tree( prg, tree, oldNextDown, newNextDown ); + + assert( tree->refs == 0 ); + return tree; +} + +Tree *split_tree( Program *prg, Tree *tree ) +{ + if ( tree != 0 ) { + assert( tree->refs >= 1 ); + + if ( tree->refs > 1 ) { + #ifdef COLM_LOG_BYTECODE + cerr << "splitting tree: " << tree << " refs: " << + tree->refs << endl; + #endif + + Kid *oldNextDown = 0, *newNextDown = 0; + tree = copy_tree( prg, tree, oldNextDown, newNextDown ); + tree_upref( tree ); + } + + assert( tree->refs == 1 ); + } + return tree; +} + +Tree *create_generic( Program *prg, Word genericId ) +{ + GenericInfo *genericInfo = &prg->rtd->genericInfo[genericId]; + Tree *newGeneric = 0; + switch ( genericInfo->type ) { + case GEN_MAP: { + Map *map = (Map*)prg->mapElPool.allocate(); + map->id = genericInfo->langElId; + map->genericInfo = genericInfo; + newGeneric = (Tree*) map; + break; + } + case GEN_LIST: { + List *list = (List*)prg->mapElPool.allocate(); + list->id = genericInfo->langElId; + list->genericInfo = genericInfo; + newGeneric = (Tree*) list; + break; + } + default: + assert(false); + return 0; + } + + return newGeneric; +} + +bool map_insert( Program *prg, Map *map, Tree *key, Tree *element ) +{ + MapEl *mapEl = map->insert( prg, key ); + + if ( mapEl != 0 ) { + mapEl->tree = element; + return true; + } + + return false; +} + +void map_unremove( Program *prg, Map *map, Tree *key, Tree *element ) +{ + MapEl *mapEl = map->insert( prg, key ); + assert( mapEl != 0 ); + mapEl->tree = element; +} + +Tree *map_uninsert( Program *prg, Map *map, Tree *key ) +{ + MapEl *el = map->detach( key ); + Tree *val = el->tree; + prg->mapElPool.free( el ); + return val; +} + +Tree *map_store( Program *prg, Map *map, Tree *key, Tree *element ) +{ + Tree *oldTree = 0; + MapEl *elInTree = 0; + MapEl *mapEl = map->insert( prg, key, &elInTree ); + + if ( mapEl != 0 ) + mapEl->tree = element; + else { + /* Element with key exists. Overwriting the value. */ + oldTree = elInTree->tree; + elInTree->tree = element; + } + + return oldTree; +} + +Tree *map_unstore( Program *prg, Map *map, Tree *key, Tree *existing ) +{ + Tree *stored = 0; + if ( existing == 0 ) { + MapEl *mapEl = map->detach( key ); + stored = mapEl->tree; + prg->mapElPool.free( mapEl ); + } + else { + MapEl *mapEl = map->find( key ); + stored = mapEl->tree; + mapEl->tree = existing; + } + return stored; +} + +Tree *map_find( Map *map, Tree *key ) +{ + MapEl *mapEl = map->find( key ); + return mapEl == 0 ? 0 : mapEl->tree; +} + +long map_length( Map *map ) +{ + return map->length(); +} + +long list_length( List *list ) +{ + return list->length(); +} + +void list_append( Program *prg, List *list, Tree *val ) +{ + assert( list->refs == 1 ); + if ( val != 0 ) + assert( val->refs >= 1 ); + ListEl *listEl = prg->listElPool.allocate(); + listEl->value = val; + list->append( listEl ); +} + +Tree *list_remove_end( Program *prg, List *list ) +{ + Tree *tree = list->tail->value; + prg->listElPool.free( list->detachLast() ); + return tree; +} + +Tree *get_list_mem( List *list, Word field ) +{ + Tree *result = 0; + switch ( field ) { + case 0: + result = list->head->value; + break; + case 1: + result = list->tail->value; + break; + default: + assert( false ); + break; + } + return result; +} + +Tree *get_list_mem_split( Program *prg, List *list, Word field ) +{ + Tree *sv = 0; + switch ( field ) { + case 0: + sv = split_tree( prg, list->head->value ); + list->head->value = sv; + break; + case 1: + sv = split_tree( prg, list->tail->value ); + list->tail->value = sv; + break; + default: + assert( false ); + break; + } + return sv; +} + +Tree *set_list_mem( List *list, Half field, Tree *value ) +{ + assert( list->refs == 1 ); + if ( value != 0 ) + assert( value->refs >= 1 ); + + Tree *existing = 0; + switch ( field ) { + case 0: + existing = list->head->value; + list->head->value = value; + break; + case 1: + existing = list->tail->value; + list->tail->value = value; + break; + default: + assert( false ); + break; + } + return existing; +} + +struct TreePair +{ + TreePair() : key(0), val(0) {} + + Tree *key; + Tree *val; +}; + +TreePair map_remove( Program *prg, Map *map, Tree *key ) +{ + MapEl *mapEl = map->find( key ); + TreePair result; + if ( mapEl != 0 ) { + map->detach( mapEl ); + result.key = mapEl->key; + result.val = mapEl->tree; + prg->mapElPool.free( mapEl ); + } + + return result; +} + +void split_ref( Tree **&sp, Program *prg, Ref *fromRef ) +{ + /* Go up the chain of kids, turing the pointers down. */ + Ref *last = 0, *ref = fromRef, *next = 0; + while ( ref->next != 0 ) { + next = ref->next; + ref->next = last; + last = ref; + ref = next; + } + ref->next = last; + + /* Now traverse the list, which goes down. */ + while ( ref != 0 ) { + if ( ref->kid->tree->refs > 1 ) { + #ifdef COLM_LOG_BYTECODE + cerr << "splitting tree: " << ref->kid << " refs: " << + ref->kid->tree->refs << endl; + #endif + + Ref *nextDown = ref->next; + while ( nextDown != 0 && nextDown->kid == ref->kid ) + nextDown = nextDown->next; + + Kid *oldNextKidDown = nextDown != 0 ? nextDown->kid : 0; + Kid *newNextKidDown = 0; + + Tree *newTree = copy_tree( prg, ref->kid->tree, + oldNextKidDown, newNextKidDown ); + tree_upref( newTree ); + + while ( ref != 0 && ref != nextDown ) { + next = ref->next; + ref->next = 0; + + ref->kid->tree = newTree; + ref = next; + } + + /* Correct kid pointers down from ref. */ + while ( nextDown != 0 && nextDown->kid == oldNextKidDown ) { + nextDown->kid = newNextKidDown; + nextDown = nextDown->next; + } + } + else { + /* Reset the list as we go down. */ + next = ref->next; + ref->next = 0; + ref = next; + } + } +} + +void split_iter_cur( Tree **&sp, Program *prg, TreeIter *iter ) +{ + if ( iter->ref.kid == 0 ) + return; + + split_ref( sp, prg, &iter->ref ); +} + +void set_ref_value( Ref *ref, Tree *v ) +{ + Kid *firstKid = ref->kid; + while ( ref != 0 && ref->kid == firstKid ) { + ref->kid->tree = v; + ref = ref->next; + } +} + +long cmp_tree( const Tree *tree1, const Tree *tree2 ) +{ + long cmpres = 0; + if ( tree1 == 0 ) { + if ( tree2 == 0 ) + return 0; + else + return -1; + } + else if ( tree2 == 0 ) + return 1; + else if ( tree1->id < tree2->id ) + return -1; + else if ( tree1->id > tree2->id ) + return 1; + else if ( tree1->id == LEL_ID_PTR ) { + if ( ((Pointer*)tree1)->value < ((Pointer*)tree2)->value ) + return -1; + else if ( ((Pointer*)tree1)->value > ((Pointer*)tree2)->value ) + return 1; + } + else if ( tree1->id == LEL_ID_INT ) { + if ( ((Int*)tree1)->value < ((Int*)tree2)->value ) + return -1; + else if ( ((Int*)tree1)->value > ((Int*)tree2)->value ) + return 1; + } + else if ( tree1->id == LEL_ID_STR ) { + cmpres = cmp_string( ((Str*)tree1)->value, ((Str*)tree2)->value ); + if ( cmpres != 0 ) + return cmpres; + } + else { + if ( tree1->tokdata == 0 && tree2->tokdata != 0 ) + return -1; + else if ( tree1->tokdata != 0 && tree2->tokdata == 0 ) + return 1; + else if ( tree1->tokdata != 0 && tree2->tokdata != 0 ) { + cmpres = cmp_string( tree1->tokdata, tree2->tokdata ); + if ( cmpres != 0 ) + return cmpres; + } + } + + Kid *kid1 = tree1->child; + Kid *kid2 = tree2->child; + + while ( true ) { + if ( kid1 == 0 && kid2 == 0 ) + return 0; + else if ( kid1 == 0 && kid2 != 0 ) + return -1; + else if ( kid1 != 0 && kid2 == 0 ) + return 1; + else { + cmpres = cmp_tree( kid1->tree, kid2->tree ); + if ( cmpres != 0 ) + return cmpres; + } + kid1 = kid1->next; + kid2 = kid2->next; + } +} + +/* This must traverse in the same order that the bindId assignments are done + * in. */ +bool match_pattern( Tree **bindings, Program *prg, int pat, Kid *kid, bool checkNext ) +{ + PatReplNode *nodes = prg->rtd->patReplNodes; + + #ifdef COLM_LOG_MATCH + LangElInfo *lelInfo = prg->rtd->lelInfo; + cerr << "match_pattern " << ( pat == -1 ? "NULL" : lelInfo[nodes[pat].id].name ) << + " vs " << ( kid == 0 ? "NULL" : lelInfo[kid->tree->id].name ) << endl; + #endif + + /* match node, recurse on children. */ + if ( pat != -1 && kid != 0 ) { + if ( nodes[pat].id == kid->tree->id ) { + /* If the pattern node has data, then this means we need to match + * the data against the token data. */ + if ( nodes[pat].data != 0 ) { + /* Check the length of token text. */ + if ( nodes[pat].length != string_length( kid->tree->tokdata ) ) + return false; + + /* Check the token text data. */ + if ( nodes[pat].length > 0 && memcmp( nodes[pat].data, + string_data( kid->tree->tokdata ), nodes[pat].length ) != 0 ) + return false; + } + + /* No failure, all okay. */ + if ( nodes[pat].bindId > 0 ) { + #ifdef COLM_LOG_MATCH + cerr << "bindId: " << nodes[pat].bindId << endl; + #endif + bindings[nodes[pat].bindId] = kid->tree; + } + + /* If we didn't match a terminal duplicate of a nonterm then check + * down the children. */ + if ( !nodes[pat].stop ) { + /* Check for failure down child branch. */ + bool childCheck = match_pattern( bindings, prg, + nodes[pat].child, tree_child( prg, kid->tree ), true ); + if ( ! childCheck ) + return false; + } + + /* If checking next, then look for failure there. */ + if ( checkNext ) { + bool nextCheck = match_pattern( bindings, prg, + nodes[pat].next, kid->next, true ); + if ( ! nextCheck ) + return false; + } + + return true; + } + } + else if ( pat == -1 && kid == 0 ) { + /* Both null is a match. */ + return 1; + } + + return false; +} + +Tree *construct_integer( Program *prg, long i ) +{ + Int *integer = (Int*) prg->treePool.allocate(); + integer->id = LEL_ID_INT; + integer->value = i; + + return (Tree*)integer; +} + +Tree *construct_string( Program *prg, Head *s ) +{ + Str *str = (Str*) prg->treePool.allocate(); + str->id = LEL_ID_STR; + str->value = s; + + return (Tree*)str; +} + +Tree *construct_pointer( Program *prg, Tree *tree ) +{ + Kid *kid = prg->kidPool.allocate(); + kid->tree = tree; + kid->next = prg->heap; + prg->heap = kid; + + Pointer *pointer = (Pointer*) prg->treePool.allocate(); + pointer->id = LEL_ID_PTR; + pointer->value = kid; + + return (Tree*)pointer; +} + +Tree *construct_term( Program *prg, Word id, Head *tokdata ) +{ + LangElInfo *lelInfo = prg->rtd->lelInfo; + + Tree *tree = prg->treePool.allocate(); + tree->id = id; + tree->refs = 0; + tree->tokdata = tokdata; + + int objectLength = lelInfo[tree->id].objectLength; + tree->child = alloc_attrs( prg, objectLength ); + + return tree; +} + +Kid *construct_replacement_kid( Tree **bindings, Program *prg, Kid *prev, int pat ); + +Kid *construct_ignore_list( Program *prg, long pat ) +{ + PatReplNode *nodes = prg->rtd->patReplNodes; + long ignore = nodes[pat].ignore; + + Kid *first = 0, *last = 0; + while ( ignore >= 0 ) { + Head *ignoreData = string_alloc_const( prg, nodes[ignore].data, nodes[ignore].length ); + + Tree *ignTree = prg->treePool.allocate(); + ignTree->refs = 1; + ignTree->id = nodes[ignore].id; + ignTree->tokdata = ignoreData; + + Kid *ignKid = prg->kidPool.allocate(); + ignKid->tree = ignTree; + ignKid->next = 0; + + if ( last == 0 ) + first = ignKid; + else + last->next = ignKid; + + ignore = nodes[ignore].next; + last = ignKid; + } + + return first; +} + +/* Returns an uprefed tree. Saves us having to downref and bindings to zero to + * return a zero-ref tree. */ +Tree *construct_replacement_tree( Tree **bindings, Program *prg, int pat ) +{ + PatReplNode *nodes = prg->rtd->patReplNodes; + LangElInfo *lelInfo = prg->rtd->lelInfo; + Tree *tree = 0; + + if ( nodes[pat].bindId > 0 ) { + /* All bindings have been uprefed. */ + tree = bindings[nodes[pat].bindId]; + + long ignore = nodes[pat].ignore; + if ( ignore >= 0 ) { + tree = split_tree( prg, tree ); + tree->child = construct_ignore_list( prg, pat ); + } + } + else { + tree = prg->treePool.allocate(); + tree->id = nodes[pat].id; + tree->refs = 1; + tree->tokdata = nodes[pat].length == 0 ? 0 : + string_alloc_const( prg, + nodes[pat].data, nodes[pat].length ); + + int objectLength = lelInfo[tree->id].objectLength; + + Kid *attrs = alloc_attrs( prg, objectLength ); + Kid *ignore = construct_ignore_list( prg, pat ); + Kid *child = construct_replacement_kid( bindings, prg, + 0, nodes[pat].child ); + + tree->child = kid_list_concat( attrs, + kid_list_concat( ignore, child ) ); + } + + return tree; +} + +Kid *construct_replacement_kid( Tree **bindings, Program *prg, Kid *prev, int pat ) +{ + PatReplNode *nodes = prg->rtd->patReplNodes; + Kid *kid = 0; + + if ( pat != -1 ) { + kid = prg->kidPool.allocate(); + kid->tree = construct_replacement_tree( bindings, prg, pat ); + + /* Recurse down next. */ + Kid *next = construct_replacement_kid( bindings, prg, + kid, nodes[pat].next ); + + kid->next = next; + } + + return kid; +} + +bool test_false( Program *prg, Tree *tree ) +{ + bool flse = ( + tree == 0 || + tree == prg->falseVal || + tree->id == LEL_ID_INT && ((Int*)tree)->value == 0 ); + return flse; +} + +/* + * Execution environment + */ + +Program::Program( bool ctxDepParsing, RuntimeData *rtd ) +: + ctxDepParsing(ctxDepParsing), + rtd(rtd), + global(0), + heap(0), + stdinVal(0), + stdoutVal(0), + stderrVal(0) +{ + Int *trueInt = (Int*) treePool.allocate(); + trueInt->id = LEL_ID_BOOL; + trueInt->refs = 1; + trueInt->value = 1; + + Int *falseInt = (Int*) treePool.allocate(); + falseInt->id = LEL_ID_BOOL; + falseInt->refs = 1; + falseInt->value = 0; + + trueVal = (Tree*)trueInt; + falseVal = (Tree*)falseInt; +} + +void Program::clearGlobal() +{ + /* Downref all the fields in the global object. */ + for ( int g = 0; g < rtd->globalSize; g++ ) { + //assert( get_attr( global, g )->refs == 1 ); + tree_downref( this, get_attr( global, g ) ); + } + + /* Free the global object. */ + if ( rtd->globalSize > 0 ) + free_attrs( this, global->child ); + treePool.free( global ); +} + +void Program::freshGlobal() +{ + if ( global != 0 ) + clearGlobal(); + + Tree *tree = treePool.allocate(); + tree->child = alloc_attrs( this, rtd->globalSize ); + tree->refs = 1; + global = tree; +} + +void Program::clear() +{ + #ifdef COLM_LOG_BYTECODE + cerr << "clearing the prg" << endl; + #endif + + clearGlobal(); + + /* Clear the heap. */ + Kid *a = heap; + while ( a != 0 ) { + Kid *next = a->next; + tree_downref( this, a->tree ); + kidPool.free( a ); + a = next; + } + + //assert( trueVal->refs == 1 ); + //assert( falseVal->refs == 1 ); + tree_downref( this, trueVal ); + tree_downref( this, falseVal ); + + tree_downref( this, (Tree*)stdinVal ); + tree_downref( this, (Tree*)stdoutVal ); + tree_downref( this, (Tree*)stderrVal ); + + long kidLost = kidPool.numlost(); + if ( kidLost ) + cerr << "warning lost kids: " << kidLost << endl; + + long treeLost = treePool.numlost(); + if ( treeLost ) + cerr << "warning lost trees: " << treeLost << endl; + + long algLost = algPool.numlost(); + if ( algLost ) + cerr << "warning lost algs: " << algLost << endl; + + long listLost = listElPool.numlost(); + if ( listLost ) + cerr << "warning lost listEls: " << listLost << endl; + + long mapLost = mapElPool.numlost(); + if ( mapLost ) + cerr << "warning lost mapEls: " << mapLost << endl; + + kidPool.clear(); + treePool.clear(); + algPool.clear(); + listElPool.clear(); + mapElPool.clear(); + + //reverseCode.empty(); + + memset( vm_stack, 0, sizeof(Tree*) * VM_STACK_SIZE); +} + +void Program::run() +{ + assert( sizeof(Int) <= sizeof(Tree) ); + assert( sizeof(Str) <= sizeof(Tree) ); + assert( sizeof(Pointer) <= sizeof(Tree) ); + assert( sizeof(Map) <= sizeof(MapEl) ); + assert( sizeof(List) <= sizeof(MapEl) ); + assert( sizeof(Stream) <= sizeof(MapEl) ); + + //vm_stack = new Tree*[VM_STACK_SIZE]; + vm_stack = (Tree**)mmap( 0, sizeof(Tree*)*VM_STACK_SIZE, + PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0 ); + Tree **root = &vm_stack[VM_STACK_SIZE]; + + if ( rtd->rootCodeLen > 0 ) { + CodeVect reverseCode; + CodeVect allReverseCode; + Execution execution( this, reverseCode, 0, rtd->rootCode, 0, 0 ); + execution.execute( root ); + + /* Pull out the reverse code and free it. */ + #ifdef COLM_LOG_BYTECODE + cerr << "freeing the root reverse code" << endl; + #endif + + bool hasrcode = makeReverseCode( allReverseCode, reverseCode ); + if ( hasrcode ) + rcode_downref( root, this, allReverseCode.data ); + } +} + +Execution::Execution( Program *prg, CodeVect &reverseCode, + PdaRun *parser, Code *code, Tree *lhs, Head *matchText ) +: + prg(prg), + parser(parser), + code(code), + frame(0), iframe(0), + lhs(lhs), + matchText(matchText), + reject(false), + reverseCode(reverseCode), + rcodeUnitLen(0) +{ + if ( lhs != 0 ) { + assert( lhs->refs == 1 ); + } +} + +void rcode_downref_all( Tree **stack_root, Program *prg, CodeVect *rev ) +{ + while ( rev->length() > 0 ) { + /* Read the length */ + Code *prcode = rev->data + rev->length() - 4; + Word len; + read_word_p( len, prcode ); + + /* Find the start of block. */ + long start = rev->length() - len - 4; + prcode = rev->data + start; + + /* Execute it. */ + rcode_downref( stack_root, prg, prcode ); + + /* Backup over it. */ + rev->tabLen -= len + 4; + } +} + +void rcode_downref( Tree **stack_root, Program *prg, Code *instr ) +{ +again: + switch ( *instr++ ) { + case IN_PARSE_BKT: { + Half parserId; + Tree *stream, *tree; + Word wrev; + read_half( parserId ); + read_tree( stream ); + read_tree( tree ); + read_word( wrev ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_PARSE_BKT " << parserId << endl; + #endif + + parsed_downref( stack_root, prg, tree ); + rcode_downref_all( stack_root, prg, (CodeVect*)wrev ); + tree_downref( prg, stream ); + tree_downref( prg, tree ); + break; + } + case IN_STREAM_PULL_BKT: { + Tree *stream, *str; + read_tree( stream ); + read_tree( str ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_STREAM_PULL_BKT" << endl; + #endif + + tree_downref( prg, stream ); + tree_downref( prg, str ); + break; + } + case IN_STREAM_PUSH_BKT: { + Tree *stream; + Word len; + read_tree( stream ); + read_word( len ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_STREAM_PUSH_BKT" << endl; + #endif + + // FIXME: Implement + break; + } + case IN_LOAD_GLOBAL_BKT: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_LOAD_GLOBAL_BKT" << endl; + #endif + break; + } + case IN_GET_FIELD_BKT: { + short field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_GET_FIELD_BKT " << field << endl; + #endif + break; + } + case IN_SET_FIELD_BKT: { + short field; + Tree *val; + read_half( field ); + read_tree( val ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_SET_FIELD_BKT " << field << endl; + #endif + + tree_downref( prg, val ); + break; + } + case IN_PTR_DEREF_BKT: { + Tree *ptr; + read_tree( ptr ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_PTR_DEREF_BKT" << endl; + #endif + + tree_downref( prg, ptr ); + break; + } + case IN_SET_TOKEN_DATA_BKT: { + Word oldval; + read_word( oldval ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_SET_TOKEN_DATA_BKT " << endl; + #endif + + Head *head = (Head*)oldval; + string_free( prg, head ); + break; + } + case IN_LIST_APPEND_BKT: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_LIST_APPEND_BKT" << endl; + #endif + break; + } + case IN_LIST_REMOVE_END_BKT: { + Tree *val; + read_tree( val ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_LIST_REMOVE_END_BKT" << endl; + #endif + + tree_downref( prg, val ); + break; + } + case IN_GET_LIST_MEM_BKT: { + short field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_GET_LIST_MEM_BKT " << field << endl; + #endif + break; + } + case IN_SET_LIST_MEM_BKT: { + Half field; + Tree *val; + read_half( field ); + read_tree( val ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_SET_LIST_MEM_BKT " << field << endl; + #endif + + tree_downref( prg, val ); + break; + } + case IN_MAP_INSERT_BKT: { + uchar inserted; + Tree *key; + read_byte( inserted ); + read_tree( key ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_MAP_INSERT_BKT" << endl; + #endif + + tree_downref( prg, key ); + break; + } + case IN_MAP_STORE_BKT: { + Tree *key, *val; + read_tree( key ); + read_tree( val ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_MAP_STORE_BKT" << endl; + #endif + + tree_downref( prg, key ); + tree_downref( prg, val ); + break; + } + case IN_MAP_REMOVE_BKT: { + Tree *key, *val; + read_tree( key ); + read_tree( val ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_MAP_REMOVE_BKT" << endl; + #endif + + tree_downref( prg, key ); + tree_downref( prg, val ); + break; + } + case IN_STOP: { + return; + } + default: { + cerr << "UNKNOWN INSTRUCTION: " << (ulong)instr[-1] << + " -- reverse code downref" << endl; + exit(1); + break; + } + } + goto again; +} + +void Execution::execute( Tree **root ) +{ + Tree **sp = root; + + /* If we have a lhs push it to the stack. */ + bool haveLhs = lhs != 0; + if ( haveLhs ) + push( lhs ); + + /* Execution loop. */ + execute( sp, code ); + + /* Take the lhs off the stack. */ + if ( haveLhs ) + lhs = (Tree*) pop(); + + assert( sp == root ); +} + +bool makeReverseCode( CodeVect &all, CodeVect &reverseCode ) +{ + /* Do we need to revert the left hand side? */ + + /* Check if there was anything generated. */ + if ( reverseCode.length() == 0 ) + return false; + + long prevAllLength = all.length(); + + /* Go backwards, group by group, through the reverse code. Push each group + * to the global reverse code stack. */ + Code *p = reverseCode.data + reverseCode.length(); + while ( p != reverseCode.data ) { + p--; + long len = *p; + p = p - len; + all.append( p, len ); + } + + /* Stop, then place a total length in the global stack. */ + all.append( IN_STOP ); + long length = all.length() - prevAllLength; + all.appendWord( length ); + + /* Clear the revere code buffer. */ + reverseCode.tabLen = 0; + + return true; +} + +void Execution::rexecute( Tree **root, Code *rcode, CodeVect &allRev ) +{ + /* Read the length */ + Code *prcode = allRev.data + allRev.length() - 4; + Word len; + read_word_p( len, prcode ); + + /* Find the start of block. */ + long start = allRev.length() - len - 4; + prcode = allRev.data + start; + + /* Execute it. */ + Tree **sp = root; + execute( sp, prcode ); + assert( sp == root ); + + /* Backup over it. */ + allRev.tabLen -= len + 4; +} + +void Execution::execute( Tree **&sp, Code *instr ) +{ +again: + switch ( *instr++ ) { + case IN_LOAD_NIL: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_LOAD_NIL" << endl; + #endif + + push( 0 ); + break; + } + case IN_LOAD_TRUE: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_LOAD_TRUE" << endl; + #endif + + tree_upref( prg->trueVal ); + push( prg->trueVal ); + break; + } + case IN_LOAD_FALSE: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_LOAD_FALSE" << endl; + #endif + + tree_upref( prg->falseVal ); + push( prg->falseVal ); + break; + } + case IN_LOAD_INT: { + Word i; + read_word( i ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_LOAD_INT " << i << endl; + #endif + + Tree *tree = construct_integer( prg, i ); + tree_upref( tree ); + push( tree ); + break; + } + case IN_LOAD_STR: { + Word offset; + read_word( offset ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_LOAD_STR " << offset << endl; + #endif + + Head *lit = make_literal( prg, offset ); + Tree *tree = construct_string( prg, lit ); + tree_upref( tree ); + push( tree ); + break; + } + case IN_PRINT: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_PRINT" << endl; + #endif + + Tree *tree = pop(); + print_tree( sp, prg, tree ); + tree_downref( prg, tree ); + break; + } + case IN_PRINT_XML: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_PRINT_XML" << endl; + #endif + + Tree *tree = pop(); + xml_print_tree( sp, prg, tree ); + tree_downref( prg, tree ); + break; + } + case IN_LOAD_GLOBAL_R: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_LOAD_GLOBAL_R" << endl; + #endif + + tree_upref( prg->global ); + push( prg->global ); + break; + } + case IN_LOAD_GLOBAL_WV: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_LOAD_GLOBAL_WV" << endl; + #endif + + tree_upref( prg->global ); + push( prg->global ); + + /* Set up the reverse instruction. */ + reverseCode.append( IN_LOAD_GLOBAL_BKT ); + rcodeUnitLen = 1; + break; + } + case IN_LOAD_GLOBAL_BKT: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_LOAD_GLOBAL_BKT" << endl; + #endif + + tree_upref( prg->global ); + push( prg->global ); + break; + } + case IN_INIT_RHS_EL: { + Half position; + short field; + read_half( position ); + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_INIT_RHS_EL " << position << " " << field << endl; + #endif + + Tree *val = get_rhs_el( prg, lhs, position ); + tree_upref( val ); + local(field) = val; + break; + } + case IN_UITER_ADVANCE: { + short field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_UITER_ADVANCE " << field << endl; + #endif + + /* Get the iterator. */ + UserIter *uiter = (UserIter*) local(field); + + long stackSize = uiter->stackRoot - ptop(); + assert( uiter->stackSize == stackSize ); + + /* Fix the return instruction pointer. */ + uiter->stackRoot[-IFR_AA + IFR_RIN] = (SW)instr; + + instr = uiter->resume; + frame = uiter->frame; + iframe = &uiter->stackRoot[-IFR_AA]; + break; + } + case IN_UITER_GET_CUR_R: { + short field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_UITER_GET_CUR_R " << field << endl; + #endif + + UserIter *uiter = (UserIter*) local(field); + Tree *val = uiter->ref.kid->tree; + tree_upref( val ); + push( val ); + break; + } + case IN_UITER_GET_CUR_WC: { + short field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_UITER_GET_CUR_WC " << field << endl; + #endif + + UserIter *uiter = (UserIter*) local(field); + split_ref( sp, prg, &uiter->ref ); + Tree *split = uiter->ref.kid->tree; + tree_upref( split ); + push( split ); + break; + } + case IN_UITER_SET_CUR_WC: { + short field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_UITER_SET_CUR_WC " << field << endl; + #endif + + Tree *t = pop(); + UserIter *uiter = (UserIter*) local(field); + split_ref( sp, prg, &uiter->ref ); + Tree *old = uiter->ref.kid->tree; + uiter->ref.kid->tree = t; + tree_downref( prg, old ); + break; + } + case IN_GET_LOCAL_R: { + short field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_GET_LOCAL_R " << field << endl; + #endif + + Tree *val = local(field); + tree_upref( val ); + push( val ); + break; + } + case IN_GET_LOCAL_WC: { + short field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_GET_LOCAL_WC " << field << endl; + #endif + + Tree *split = get_local_split( prg, frame, field ); + tree_upref( split ); + push( split ); + break; + } + case IN_SET_LOCAL_WC: { + short field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_SET_LOCAL_WC " << field << endl; + #endif + + Tree *val = pop(); + tree_downref( prg, local(field) ); + set_local( frame, field, val ); + break; + } + case IN_SAVE_RET: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_SAVE_RET " << endl; + #endif + + Tree *val = pop(); + local(FR_RV) = val; + break; + } + case IN_GET_LOCAL_REF_R: { + short field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_GET_LOCAL_REF_R " << field << endl; + #endif + + Ref *ref = (Ref*) plocal(field); + Tree *val = ref->kid->tree; + tree_upref( val ); + push( val ); + break; + } + case IN_GET_LOCAL_REF_WC: { + short field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_GET_LOCAL_REF_WC " << field << endl; + #endif + + Ref *ref = (Ref*) plocal(field); + split_ref( sp, prg, ref ); + Tree *val = ref->kid->tree; + tree_upref( val ); + push( val ); + break; + } + case IN_SET_LOCAL_REF_WC: { + short field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_SET_LOCAL_REF_WC " << field << endl; + #endif + + Tree *val = pop(); + Ref *ref = (Ref*) plocal(field); + split_ref( sp, prg, ref ); + set_ref_value( ref, val ); + break; + } + case IN_GET_FIELD_R: { + short field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_GET_FIELD_R " << field << endl; + #endif + + Tree *obj = pop(); + tree_downref( prg, obj ); + + Tree *val = get_field( obj, field ); + tree_upref( val ); + push( val ); + break; + } + case IN_GET_FIELD_WC: { + short field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_GET_FIELD_WC " << field << endl; + #endif + + Tree *obj = pop(); + tree_downref( prg, obj ); + + Tree *split = get_field_split( prg, obj, field ); + tree_upref( split ); + push( split ); + break; + } + case IN_GET_FIELD_WV: { + short field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_GET_FIELD_WV " << field << endl; + #endif + + Tree *obj = pop(); + tree_downref( prg, obj ); + + Tree *split = get_field_split( prg, obj, field ); + tree_upref( split ); + push( split ); + + /* Set up the reverse instruction. */ + reverseCode.append( IN_GET_FIELD_BKT ); + reverseCode.appendHalf( field ); + rcodeUnitLen += 3; + break; + } + case IN_GET_FIELD_BKT: { + short field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_GET_FIELD_BKT " << field << endl; + #endif + + Tree *obj = pop(); + tree_downref( prg, obj ); + + Tree *split = get_field_split( prg, obj, field ); + tree_upref( split ); + push( split ); + break; + } + case IN_SET_FIELD_WC: { + short field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_SET_FIELD_WC " << field << endl; + #endif + + Tree *obj = pop(); + Tree *val = pop(); + tree_downref( prg, obj ); + + /* Downref the old value. */ + Tree *prev = get_field( obj, field ); + tree_downref( prg, prev ); + + set_field( prg, obj, field, val ); + break; + } + case IN_SET_FIELD_WV: { + short field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_SET_FIELD_WV " << field << endl; + #endif + + Tree *obj = pop(); + Tree *val = pop(); + tree_downref( prg, obj ); + + /* Save the old value, then set the field. */ + Tree *prev = get_field( obj, field ); + set_field( prg, obj, field, val ); + + /* Set up the reverse instruction. */ + reverseCode.append( IN_SET_FIELD_BKT ); + reverseCode.appendHalf( field ); + reverseCode.appendWord( (Word)prev ); + rcodeUnitLen += 7; + reverseCode.append( rcodeUnitLen ); + /* FLUSH */ + break; + } + case IN_SET_FIELD_BKT: { + short field; + Tree *val; + read_half( field ); + read_tree( val ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_SET_FIELD_BKT " << field << endl; + #endif + + Tree *obj = pop(); + tree_downref( prg, obj ); + + /* Downref the old value. */ + Tree *prev = get_field( obj, field ); + tree_downref( prg, prev ); + + set_field( prg, obj, field, val ); + break; + } + case IN_SET_FIELD_LEAVE_WC: { + short field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_SET_FIELD_LEAVE_WC " << field << endl; + #endif + + /* Note that we don't downref the object here because we are + * leaving it on the stack. */ + Tree *obj = pop(); + Tree *val = pop(); + + /* Downref the old value. */ + Tree *prev = get_field( obj, field ); + tree_downref( prg, prev ); + + /* Set the field. */ + set_field( prg, obj, field, val ); + + /* Leave the object on the top of the stack. */ + push( obj ); + break; + } + case IN_POP: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_POP" << endl; + #endif + + Tree *val = pop(); + tree_downref( prg, val ); + break; + } + case IN_STR_ATOI: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_STR_ATOI" << endl; + #endif + + Str *str = (Str*)pop(); + Word res = str_atoi( str->value ); + Tree *integer = construct_integer( prg, res ); + tree_upref( integer ); + push( integer ); + tree_downref( prg, (Tree*)str ); + break; + } + case IN_INT_TO_STR: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_INT_TO_STR" << endl; + #endif + + Int *i = (Int*)pop(); + Head *res = int_to_str( prg, i->value ); + Tree *str = construct_string( prg, res ); + tree_upref( str ); + push( str ); + tree_downref( prg, (Tree*) i ); + break; + } + case IN_CONCAT_STR: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_CONCAT_STR" << endl; + #endif + + Str *s2 = (Str*)pop(); + Str *s1 = (Str*)pop(); + Head *res = concat_str( s1->value, s2->value ); + Tree *str = construct_string( prg, res ); + tree_upref( str ); + tree_downref( prg, (Tree*)s1 ); + tree_downref( prg, (Tree*)s2 ); + push( str ); + break; + } + case IN_STR_UORD8: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_STR_UORD8" << endl; + #endif + + Str *str = (Str*)pop(); + Word res = str_uord8( str->value ); + Tree *tree = construct_integer( prg, res ); + tree_upref( tree ); + push( tree ); + tree_downref( prg, (Tree*)str ); + break; + } + case IN_STR_UORD16: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_STR_UORD16" << endl; + #endif + + Str *str = (Str*)pop(); + Word res = str_uord16( str->value ); + Tree *tree = construct_integer( prg, res ); + tree_upref( tree ); + push( tree ); + tree_downref( prg, (Tree*)str ); + break; + } + + case IN_STR_LENGTH: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_STR_LENGTH" << endl; + #endif + + Str *str = (Str*)pop(); + long len = string_length( str->value ); + Tree *res = construct_integer( prg, len ); + tree_upref( res ); + push( res ); + tree_downref( prg, (Tree*)str ); + break; + } + case IN_JMP_FALSE: { + short dist; + read_half( dist ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_JMP_FALSE " << dist << endl; + #endif + + Tree *tree = pop(); + if ( test_false( prg, tree ) ) + instr += dist; + tree_downref( prg, tree ); + break; + } + case IN_JMP_TRUE: { + short dist; + read_half( dist ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_JMP_TRUE " << dist << endl; + #endif + + Tree *tree = pop(); + if ( !test_false( prg, tree ) ) + instr += dist; + tree_downref( prg, tree ); + break; + } + case IN_JMP: { + short dist; + read_half( dist ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_JMP " << dist << endl; + #endif + + instr += dist; + break; + } + case IN_REJECT: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_REJECT" << endl; + #endif + reject = true; + break; + } + + /* + * Binary comparison operators. + */ + case IN_TST_EQL: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_TST_EQL" << endl; + #endif + + Tree *o2 = pop(); + Tree *o1 = pop(); + long r = cmp_tree( o1, o2 ); + Tree *val = r ? prg->falseVal : prg->trueVal; + tree_upref( val ); + push( val ); + tree_downref( prg, o1 ); + tree_downref( prg, o2 ); + break; + } + case IN_TST_NOT_EQL: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_TST_NOT_EQL" << endl; + #endif + + Tree *o2 = pop(); + Tree *o1 = pop(); + long r = cmp_tree( o1, o2 ); + Tree *val = r ? prg->trueVal : prg->falseVal; + tree_upref( val ); + push( val ); + tree_downref( prg, o1 ); + tree_downref( prg, o2 ); + break; + } + case IN_TST_LESS: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_TST_LESS" << endl; + #endif + + Tree *o2 = pop(); + Tree *o1 = pop(); + long r = cmp_tree( o1, o2 ); + Tree *val = r < 0 ? prg->trueVal : prg->falseVal; + tree_upref( val ); + push( val ); + tree_downref( prg, o1 ); + tree_downref( prg, o2 ); + break; + } + case IN_TST_LESS_EQL: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_TST_LESS_EQL" << endl; + #endif + + Tree *o2 = pop(); + Tree *o1 = pop(); + long r = cmp_tree( o1, o2 ); + Tree *val = r <= 0 ? prg->trueVal : prg->falseVal; + tree_upref( val ); + push( val ); + tree_downref( prg, o1 ); + tree_downref( prg, o2 ); + } + case IN_TST_GRTR: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_TST_GRTR" << endl; + #endif + + Tree *o2 = pop(); + Tree *o1 = pop(); + long r = cmp_tree( o1, o2 ); + Tree *val = r > 0 ? prg->trueVal : prg->falseVal; + tree_upref( val ); + push( val ); + tree_downref( prg, o1 ); + tree_downref( prg, o2 ); + break; + } + case IN_TST_GRTR_EQL: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_TST_GRTR_EQL" << endl; + #endif + + Tree *o2 = (Tree*)pop(); + Tree *o1 = (Tree*)pop(); + long r = cmp_tree( o1, o2 ); + Tree *val = r >= 0 ? prg->trueVal : prg->falseVal; + tree_upref( val ); + push( val ); + tree_downref( prg, o1 ); + tree_downref( prg, o2 ); + break; + } + case IN_TST_LOGICAL_AND: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_TST_LOGICAL_AND" << endl; + #endif + + Tree *o2 = pop(); + Tree *o1 = pop(); + long v2 = !test_false( prg, o2 ); + long v1 = !test_false( prg, o1 ); + Word r = v1 && v2; + Tree *val = r ? prg->trueVal : prg->falseVal; + tree_upref( val ); + push( val ); + tree_downref( prg, o1 ); + tree_downref( prg, o2 ); + break; + } + case IN_TST_LOGICAL_OR: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_TST_LOGICAL_OR" << endl; + #endif + + Tree *o2 = pop(); + Tree *o1 = pop(); + long v2 = !test_false( prg, o2 ); + long v1 = !test_false( prg, o1 ); + Word r = v1 || v2; + Tree *val = r ? prg->trueVal : prg->falseVal; + tree_upref( val ); + push( val ); + tree_downref( prg, o1 ); + tree_downref( prg, o2 ); + break; + } + case IN_NOT: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_NOT" << endl; + #endif + + Tree *tree = (Tree*)pop(); + long r = test_false( prg, tree ); + Tree *val = r ? prg->trueVal : prg->falseVal; + tree_upref( val ); + push( val ); + tree_downref( prg, tree ); + break; + } + + case IN_ADD_INT: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_ADD_INT" << endl; + #endif + + Int *o2 = (Int*)pop(); + Int *o1 = (Int*)pop(); + long r = o1->value + o2->value; + Tree *tree = construct_integer( prg, r ); + tree_upref( tree ); + push( tree ); + tree_downref( prg, (Tree*)o1 ); + tree_downref( prg, (Tree*)o2 ); + break; + } + case IN_MULT_INT: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_MULT_INT" << endl; + #endif + + Int *o2 = (Int*)pop(); + Int *o1 = (Int*)pop(); + long r = o1->value * o2->value; + Tree *tree = construct_integer( prg, r ); + tree_upref( tree ); + push( tree ); + tree_downref( prg, (Tree*)o1 ); + tree_downref( prg, (Tree*)o2 ); + break; + } + case IN_SUB_INT: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_SUB_INT" << endl; + #endif + + Int *o2 = (Int*)pop(); + Int *o1 = (Int*)pop(); + long r = o1->value - o2->value; + Tree *tree = construct_integer( prg, r ); + tree_upref( tree ); + push( tree ); + tree_downref( prg, (Tree*)o1 ); + tree_downref( prg, (Tree*)o2 ); + break; + } + case IN_DUP_TOP: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_DUP_TOP" << endl; + #endif + + Tree *val = top(); + tree_upref( val ); + push( val ); + break; + } + case IN_TRITER_FROM_REF: { + short field; + Half searchTypeId; + read_half( field ); + read_half( searchTypeId ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_TRITER_FROM_REF " << field << " " << searchTypeId << endl; + #endif + + Ref rootRef; + rootRef.kid = (Kid*)pop(); + rootRef.next = (Ref*)pop(); + void *mem = plocal(field); + new(mem) TreeIter( rootRef, searchTypeId, ptop() ); + break; + } + case IN_TRITER_DESTROY: { + short field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_TRITER_DESTROY " << field << endl; + #endif + + TreeIter *iter = (TreeIter*) plocal(field); + tree_iter_destroy( sp, iter ); + break; + } + case IN_TREE_SEARCH: { + Word id; + read_word( id ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_TREE_SEARCH " << id << endl; + #endif + + Tree *tree = pop(); + Tree *res = tree_search( tree, id ); + tree_upref( res ); + push( res ); + tree_downref( prg, tree ); + break; + } + case IN_TRITER_ADVANCE: { + short field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_TRITER_ADVANCE " << field << endl; + #endif + + TreeIter *iter = (TreeIter*) plocal(field); + Tree *res = tree_iter_advance( prg, sp, iter ); + tree_upref( res ); + push( res ); + break; + } + case IN_TRITER_NEXT_CHILD: { + short field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_TRITER_NEXT_CHILD " << field << endl; + #endif + + TreeIter *iter = (TreeIter*) plocal(field); + Tree *res = tree_iter_next_child( prg, sp, iter ); + tree_upref( res ); + push( res ); + break; + } + case IN_TRITER_PREV_CHILD: { + short field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_TRITER_PREV_CHILD " << field << endl; + #endif + + TreeIter *iter = (TreeIter*) plocal(field); + Tree *res = tree_iter_prev_child( prg, sp, iter ); + tree_upref( res ); + push( res ); + break; + } + case IN_TRITER_GET_CUR_R: { + short field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_TRITER_GET_CUR_R " << field << endl; + #endif + + TreeIter *iter = (TreeIter*) plocal(field); + Tree *tree = tree_iter_deref_cur( iter ); + tree_upref( tree ); + push( tree ); + break; + } + case IN_TRITER_GET_CUR_WC: { + short field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_TRITER_GET_CUR_WC " << field << endl; + #endif + + TreeIter *iter = (TreeIter*) plocal(field); + split_iter_cur( sp, prg, iter ); + Tree *tree = tree_iter_deref_cur( iter ); + tree_upref( tree ); + push( tree ); + break; + } + case IN_TRITER_SET_CUR_WC: { + short field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_TRITER_SET_CUR_WC " << field << endl; + #endif + + Tree *tree = pop(); + TreeIter *iter = (TreeIter*) plocal(field); + split_iter_cur( sp, prg, iter ); + Tree *old = tree_iter_deref_cur( iter ); + set_triter_cur( iter, tree ); + tree_downref( prg, old ); + break; + } + case IN_MATCH: { + Half patternId; + read_half( patternId ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_MATCH " << patternId << endl; + #endif + + Tree *tree = pop(); + + /* Run the match, push the result. */ + int rootNode = prg->rtd->patReplInfo[patternId].offset; + + /* Bindings are indexed starting at 1. Zero bindId to represent no + * binding. We make a space for it here rather than do math at + * access them. */ + long numBindings = prg->rtd->patReplInfo[patternId].numBindings; + Tree *bindings[1+numBindings]; + memset( bindings, 0, sizeof(Tree*)*(1+numBindings) ); + + Kid kid; + kid.tree = tree; + kid.next = 0; + bool matched = match_pattern( bindings, prg, rootNode, &kid, false ); + + if ( !matched ) + memset( bindings, 0, sizeof(Tree*)*(1+numBindings) ); + else { + for ( int b = 1; b <= numBindings; b++ ) + assert( bindings[b] != 0 ); + } + + #ifdef COLM_LOG_MATCH + cerr << "match result: " << matched << endl; + #endif + + Tree *result = matched ? tree : 0; + tree_upref( result ); + push( result ? tree : 0 ); + for ( int b = 1; b <= numBindings; b++ ) { + tree_upref( bindings[b] ); + push( bindings[b] ); + } + + tree_downref( prg, tree ); + break; + } + case IN_PARSE: { + Half parserId, stopId; + read_half( parserId ); + read_half( stopId ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_PARSE " << parserId << " " << stopId << endl; + #endif + + /* Comes back from parse upreffed. */ + CodeVect *cv; + Tree *stream = pop(); + Tree *res = parse( sp, prg, (Stream*)stream, parserId, stopId, cv ); + push( res ); + + /* Single unit. */ + tree_upref( res ); + reverseCode.append( IN_PARSE_BKT ); + reverseCode.appendHalf( parserId ); + reverseCode.appendWord( (Word) stream ); + reverseCode.appendWord( (Word) res ); + reverseCode.appendWord( (Word) cv ); + reverseCode.append( 15 ); + break; + } + case IN_STREAM_PULL: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_STREAM_PULL" << endl; + #endif + Tree *len = pop(); + Tree *stream = pop(); + Tree *string = stream_pull( prg, (Stream*)stream, len ); + tree_upref( string ); + push( string ); + + /* Single unit. */ + tree_upref( string ); + reverseCode.append( IN_STREAM_PULL_BKT ); + reverseCode.appendWord( (Word) stream ); + reverseCode.appendWord( (Word) string ); + reverseCode.append( 9 ); + + tree_downref( prg, len ); + break; + } + case IN_STREAM_PULL_BKT: { + Tree *stream, *string; + read_tree( stream ); + read_tree( string ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_STREAM_PULL_BKT" << endl; + #endif + + undo_pull( prg, (Stream*)stream, string ); + tree_downref( prg, stream ); + tree_downref( prg, string ); + break; + } + case IN_STREAM_PUSH: { + /* FIXME: Need to check the refcounting here. */ + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_STREAM_PUSH" << endl; + #endif + Tree *tree = pop(); + Tree *stream = pop(); + Word len = stream_push( sp, prg, + (Stream*)stream, tree ); + push( 0 ); + + /* Single unit. */ + reverseCode.append( IN_STREAM_PUSH_BKT ); + reverseCode.appendWord( (Word)stream ); + reverseCode.appendWord( len ); + reverseCode.append( 9 ); + + tree_downref( prg, tree ); + break; + } + case IN_STREAM_PUSH_BKT: { + Tree *stream; + Word len; + read_tree( stream ); + read_word( len ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_STREAM_PUSH_BKT" << endl; + #endif + + undo_stream_push( sp, prg, (Stream*)stream, len ); + break; + } + case IN_PARSE_BKT: { + Half parserId; + Tree *stream, *tree; + Word wrev; + read_half( parserId ); + read_tree( stream ); + read_tree( tree ); + read_word( wrev ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_PARSE_BKT " << parserId << endl; + #endif + + undo_parse( sp, prg, (Stream*)stream, parserId, tree, (CodeVect*)wrev ); + tree_downref( prg, stream ); + break; + } + case IN_CONSTRUCT: { + Half patternId; + read_half( patternId ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_CONSTRUCT " << patternId << endl; + #endif + + int rootNode = prg->rtd->patReplInfo[patternId].offset; + + /* Note that bindIds are indexed at one. Add one spot for them. */ + int numBindings = prg->rtd->patReplInfo[patternId].numBindings; + Tree *bindings[1+numBindings]; + + for ( int b = 1; b <= numBindings; b++ ) { + bindings[b] = pop(); + assert( bindings[b] != 0 ); + } + + Tree *replTree = 0; + PatReplNode *nodes = prg->rtd->patReplNodes; + LangElInfo *lelInfo = prg->rtd->lelInfo; + long genericId = lelInfo[nodes[rootNode].id].genericId; + if ( genericId > 0 ) { + replTree = create_generic( prg, genericId ); + tree_upref( replTree ); + } + else { + replTree = construct_replacement_tree( bindings, + prg, rootNode ); + } + + push( replTree ); + break; + } + case IN_CONSTRUCT_TERM: { + Half tokenId; + read_half( tokenId ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_CONSTRUCT_TERM " << tokenId << endl; + #endif + + /* Pop the string we are constructing the token from. */ + Str *str = (Str*)pop(); + Tree *res = construct_term( prg, tokenId, str->value ); + tree_upref( res ); + push( res ); + break; + } + case IN_MAKE_TOKEN: { + uchar nargs; + read_byte( nargs ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_MAKE_TOKEN " << (ulong) nargs << endl; + #endif + + Tree *result = make_token( sp, prg, parser, nargs ); + for ( long i = 0; i < nargs; i++ ) + tree_downref( prg, pop() ); + push( result ); + break; + } + case IN_MAKE_TREE: { + uchar nargs; + read_byte( nargs ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_MAKE_TREE " << (ulong) nargs << endl; + #endif + + Tree *result = make_tree( sp, prg, parser, nargs ); + for ( long i = 0; i < nargs; i++ ) + tree_downref( prg, pop() ); + push( result ); + break; + } + case IN_SEND: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_SEND" << endl; + #endif + + Tree *tree = pop(); + send( sp, prg, parser, tree, false ); + push( 0 ); + break; + } + case IN_IGNORE: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_IGNORE" << endl; + #endif + + Tree *tree = pop(); + send( sp, prg, parser, tree, true ); + push( 0 ); + break; + } + case IN_TREE_NEW: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_TREE_NEW " << endl; + #endif + + Tree *tree = pop(); + Tree *res = construct_pointer( prg, tree ); + tree_upref( res ); + push( res ); + break; + } + case IN_PTR_DEREF_R: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_PTR_DEREF_R" << endl; + #endif + + Pointer *ptr = (Pointer*)pop(); + tree_downref( prg, (Tree*)ptr ); + + Tree *dval = get_ptr_val( ptr ); + tree_upref( dval ); + push( dval ); + break; + } + case IN_PTR_DEREF_WC: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_PTR_DEREF_WC" << endl; + #endif + + Pointer *ptr = (Pointer*)pop(); + tree_downref( prg, (Tree*)ptr ); + + Tree *dval = get_ptr_val_split( prg, ptr ); + tree_upref( dval ); + push( dval ); + break; + } + case IN_PTR_DEREF_WV: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_PTR_DEREF_WV" << endl; + #endif + + Pointer *ptr = (Pointer*)pop(); + /* Don't downref the pointer since it is going into the reverse + * instruction. */ + + Tree *dval = get_ptr_val_split( prg, ptr ); + tree_upref( dval ); + push( dval ); + + /* This is an initial global load. Need to reverse execute it. */ + reverseCode.append( IN_PTR_DEREF_BKT ); + reverseCode.appendWord( (Word) ptr ); + rcodeUnitLen = 5; + break; + } + case IN_PTR_DEREF_BKT: { + Word p; + read_word( p ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_PTR_DEREF_BKT" << endl; + #endif + + Pointer *ptr = (Pointer*)p; + + Tree *dval = get_ptr_val_split( prg, ptr ); + tree_upref( dval ); + push( dval ); + + tree_downref( prg, (Tree*)ptr ); + break; + } + case IN_REF_FROM_LOCAL: { + short int field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_REF_FROM_LOCAL " << field << endl; + #endif + + /* First push the null next pointer, then the kid pointer. */ + Tree **ptr = plocal(field); + push( 0 ); + push( (SW)ptr ); + break; + } + case IN_REF_FROM_REF: { + short int field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_REF_FROM_REF " << field << endl; + #endif + + Ref *ref = (Ref*)plocal(field); + push( (SW)ref ); + push( (SW)ref->kid ); + break; + } + case IN_TRITER_REF_FROM_CUR: { + short int field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_TRITER_REF_FROM_CUR " << field << endl; + #endif + + /* Push the next pointer first, then the kid. */ + TreeIter *iter = (TreeIter*) plocal(field); + push( (SW)&iter->ref ); + push( (SW)iter->ref.kid ); + break; + } + case IN_UITER_REF_FROM_CUR: { + short int field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_UITER_REF_FROM_CUR " << field << endl; + #endif + + /* Push the next pointer first, then the kid. */ + UserIter *uiter = (UserIter*) local(field); + push( (SW)uiter->ref.next ); + push( (SW)uiter->ref.kid ); + break; + } + case IN_GET_TOKEN_DATA_R: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_GET_TOKEN_DATA_R" << endl; + #endif + + Tree *tree = (Tree*) pop(); + Head *data = string_copy( prg, tree->tokdata ); + Tree *str = construct_string( prg, data ); + tree_upref( str ); + push( str ); + tree_downref( prg, tree ); + break; + } + case IN_SET_TOKEN_DATA_WC: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_SET_TOKEN_DATA_WC" << endl; + #endif + + Tree *tree = pop(); + Tree *val = pop(); + Head *head = string_copy( prg, ((Str*)val)->value ); + string_free( prg, tree->tokdata ); + tree->tokdata = head; + + tree_downref( prg, tree ); + tree_downref( prg, val ); + break; + } + case IN_SET_TOKEN_DATA_WV: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_SET_TOKEN_DATA_WV" << endl; + #endif + + Tree *tree = pop(); + Tree *val = pop(); + + Head *oldval = tree->tokdata; + Head *head = string_copy( prg, ((Str*)val)->value ); + tree->tokdata = head; + + /* Set up reverse code. Needs no args. */ + reverseCode.append( IN_SET_TOKEN_DATA_BKT ); + reverseCode.appendWord( (Word)oldval ); + rcodeUnitLen += 5; + reverseCode.append( rcodeUnitLen ); + + tree_downref( prg, tree ); + tree_downref( prg, val ); + break; + } + case IN_SET_TOKEN_DATA_BKT: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_SET_TOKEN_DATA_BKT " << endl; + #endif + + Word oldval; + read_word( oldval ); + + Tree *tree = pop(); + Head *head = (Head*)oldval; + string_free( prg, tree->tokdata ); + tree->tokdata = head; + tree_downref( prg, tree ); + break; + } + case IN_GET_TOKEN_POS_R: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_GET_TOKEN_POS_R" << endl; + #endif + + Tree *tree = (Tree*) pop(); + Tree *integer = construct_integer( prg, 0 ); + tree_upref( integer ); + push( integer ); + tree_downref( prg, tree ); + + /* Requires a new implementation. */ + assert( false ); + break; + } + case IN_GET_MATCH_LENGTH_R: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_GET_MATCH_LENGTH_R" << endl; + #endif + Tree *integer = construct_integer( prg, string_length(matchText) ); + tree_upref( integer ); + push( integer ); + break; + } + case IN_GET_MATCH_TEXT_R: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_GET_MATCH_TEXT_R" << endl; + #endif + Head *s = string_copy( prg, matchText ); + Tree *tree = construct_string( prg, s ); + tree_upref( tree ); + push( tree ); + break; + } + case IN_LIST_LENGTH: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_LIST_LENGTH" << endl; + #endif + + List *list = (List*) pop(); + long len = list_length( list ); + Tree *res = construct_integer( prg, len ); + tree_upref( res ); + push( res ); + break; + } + case IN_LIST_APPEND_WV: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_LIST_APPEND_WV" << endl; + #endif + + Tree *obj = pop(); + Tree *val = pop(); + + tree_downref( prg, obj ); + + list_append( prg, (List*)obj, val ); + tree_upref( prg->trueVal ); + push( prg->trueVal ); + + /* Set up reverse code. Needs no args. */ + reverseCode.append( IN_LIST_APPEND_BKT ); + rcodeUnitLen += 1; + reverseCode.append( rcodeUnitLen ); + /* FLUSH */ + break; + } + case IN_LIST_APPEND_WC: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_LIST_APPEND_WC" << endl; + #endif + + Tree *obj = pop(); + Tree *val = pop(); + + tree_downref( prg, obj ); + + list_append( prg, (List*)obj, val ); + tree_upref( prg->trueVal ); + push( prg->trueVal ); + break; + } + case IN_LIST_APPEND_BKT: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_LIST_APPEND_BKT" << endl; + #endif + + Tree *obj = pop(); + tree_downref( prg, obj ); + + Tree *tree = list_remove_end( prg, (List*)obj ); + tree_downref( prg, tree ); + break; + } + case IN_LIST_REMOVE_END_WC: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_LIST_REMOVE_END_WC" << endl; + #endif + + Tree *obj = pop(); + tree_downref( prg, obj ); + + Tree *end = list_remove_end( prg, (List*)obj ); + push( end ); + break; + } + case IN_LIST_REMOVE_END_WV: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_LIST_REMOVE_END_WV" << endl; + #endif + + Tree *obj = pop(); + tree_downref( prg, obj ); + + Tree *end = list_remove_end( prg, (List*)obj ); + push( end ); + + /* Set up reverse. The result comes off the list downrefed. + * Need it up referenced for the reverse code too. */ + tree_upref( end ); + reverseCode.append( IN_LIST_REMOVE_END_BKT ); + reverseCode.appendWord( (Word)end ); + rcodeUnitLen += 5; + reverseCode.append( rcodeUnitLen ); + /* FLUSH */ + break; + } + case IN_LIST_REMOVE_END_BKT: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_LIST_REMOVE_END_BKT" << endl; + #endif + + Tree *val; + read_tree( val ); + + Tree *obj = pop(); + tree_downref( prg, obj ); + + list_append( prg, (List*)obj, val ); + break; + } + case IN_GET_LIST_MEM_R: { + short field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_GET_LIST_MEM_R " << field << endl; + #endif + + Tree *obj = pop(); + tree_downref( prg, obj ); + + Tree *val = get_list_mem( list(obj), field ); + tree_upref( val ); + push( val ); + break; + } + case IN_GET_LIST_MEM_WC: { + short field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_GET_LIST_MEM_WC " << field << endl; + #endif + + Tree *obj = pop(); + tree_downref( prg, obj ); + + Tree *val = get_list_mem_split( prg, list(obj), field ); + tree_upref( val ); + push( val ); + break; + } + case IN_GET_LIST_MEM_WV: { + short field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_GET_LIST_MEM_WV " << field << endl; + #endif + + Tree *obj = pop(); + tree_downref( prg, obj ); + + Tree *val = get_list_mem_split( prg, list(obj), field ); + tree_upref( val ); + push( val ); + + /* Set up the reverse instruction. */ + reverseCode.append( IN_GET_LIST_MEM_BKT ); + reverseCode.appendHalf( field ); + rcodeUnitLen += 3; + break; + } + case IN_GET_LIST_MEM_BKT: { + short field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_GET_LIST_MEM_BKT " << field << endl; + #endif + + Tree *obj = pop(); + tree_downref( prg, obj ); + + Tree *res = get_list_mem_split( prg, list(obj), field ); + tree_upref( res ); + push( res ); + break; + } + case IN_SET_LIST_MEM_WC: { + Half field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_SET_LIST_MEM_WC " << field << endl; + #endif + + Tree *obj = pop(); + tree_downref( prg, obj ); + + Tree *val = pop(); + Tree *existing = set_list_mem( list(obj), field, val ); + tree_downref( prg, existing ); + break; + } + case IN_SET_LIST_MEM_WV: { + Half field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_SET_LIST_MEM_WV " << field << endl; + #endif + + Tree *obj = pop(); + tree_downref( prg, obj ); + + Tree *val = pop(); + Tree *existing = set_list_mem( list(obj), field, val ); + + /* Set up the reverse instruction. */ + reverseCode.append( IN_SET_LIST_MEM_BKT ); + reverseCode.appendHalf( field ); + reverseCode.appendWord( (Word)existing ); + rcodeUnitLen += 7; + reverseCode.append( rcodeUnitLen ); + /* FLUSH */ + break; + } + case IN_SET_LIST_MEM_BKT: { + Half field; + Tree *val; + read_half( field ); + read_tree( val ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_SET_LIST_MEM_BKT " << field << endl; + #endif + + Tree *obj = pop(); + tree_downref( prg, obj ); + + Tree *undid = set_list_mem( list(obj), field, val ); + tree_downref( prg, undid ); + break; + } + case IN_MAP_INSERT_WV: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_MAP_INSERT_WV" << endl; + #endif + + Tree *obj = pop(); + Tree *val = pop(); + Tree *key = pop(); + + tree_downref( prg, obj ); + + bool inserted = map_insert( prg, (Map*)obj, key, val ); + Tree *result = inserted ? prg->trueVal : prg->falseVal; + tree_upref( result ); + push( result ); + + /* Set up the reverse instruction. If the insert fails still need + * to pop the loaded map object. Just use the reverse instruction + * since it's nice to see it in the logs. */ + + /* Need to upref key for storage in reverse code. */ + tree_upref( key ); + reverseCode.append( IN_MAP_INSERT_BKT ); + reverseCode.append( inserted ); + reverseCode.appendWord( (Word)key ); + rcodeUnitLen += 6; + reverseCode.append( rcodeUnitLen ); + + if ( ! inserted ) { + tree_downref( prg, key ); + tree_downref( prg, val ); + } + break; + } + case IN_MAP_INSERT_WC: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_MAP_INSERT_WC" << endl; + #endif + + Tree *obj = pop(); + Tree *val = pop(); + Tree *key = pop(); + + tree_downref( prg, obj ); + + bool inserted = map_insert( prg, (Map*)obj, key, val ); + Tree *result = inserted ? prg->trueVal : prg->falseVal; + tree_upref( result ); + push( result ); + + if ( ! inserted ) { + tree_downref( prg, key ); + tree_downref( prg, val ); + } + break; + } + case IN_MAP_INSERT_BKT: { + uchar inserted; + Tree *key; + read_byte( inserted ); + read_tree( key ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_MAP_INSERT_BKT" << endl; + #endif + + Tree *obj = pop(); + if ( inserted ) { + Tree *val = map_uninsert( prg, (Map*)obj, key ); + tree_downref( prg, key ); + tree_downref( prg, val ); + } + + tree_downref( prg, obj ); + tree_downref( prg, key ); + break; + } + case IN_MAP_STORE_WC: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_MAP_STORE_WC" << endl; + #endif + + Tree *obj = pop(); + Tree *element = pop(); + Tree *key = pop(); + + Tree *existing = map_store( prg, (Map*)obj, key, element ); + Tree *result = existing == 0 ? prg->trueVal : prg->falseVal; + tree_upref( result ); + push( result ); + + tree_downref( prg, obj ); + if ( existing != 0 ) { + tree_downref( prg, key ); + tree_downref( prg, existing ); + } + break; + } + case IN_MAP_STORE_WV: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_MAP_STORE_WV" << endl; + #endif + + Tree *obj = pop(); + Tree *element = pop(); + Tree *key = pop(); + + Tree *existing = map_store( prg, (Map*)obj, key, element ); + Tree *result = existing == 0 ? prg->trueVal : prg->falseVal; + tree_upref( result ); + push( result ); + + /* Set up the reverse instruction. */ + tree_upref( key ); + tree_upref( existing ); + reverseCode.append( IN_MAP_STORE_BKT ); + reverseCode.appendWord( (Word)key ); + reverseCode.appendWord( (Word)existing ); + rcodeUnitLen += 9; + reverseCode.append( rcodeUnitLen ); + /* FLUSH */ + + tree_downref( prg, obj ); + if ( existing != 0 ) { + tree_downref( prg, key ); + tree_downref( prg, existing ); + } + break; + } + case IN_MAP_STORE_BKT: { + Tree *key, *val; + read_tree( key ); + read_tree( val ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_MAP_STORE_BKT" << endl; + #endif + + Tree *obj = pop(); + Tree *stored = map_unstore( prg, (Map*)obj, key, val ); + + tree_downref( prg, stored ); + if ( val == 0 ) + tree_downref( prg, key ); + + tree_downref( prg, obj ); + tree_downref( prg, key ); + break; + } + case IN_MAP_REMOVE_WC: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_MAP_REMOVE_WC" << endl; + #endif + + Tree *obj = pop(); + Tree *key = pop(); + TreePair pair = map_remove( prg, (Map*)obj, key ); + + push( pair.val ); + + tree_downref( prg, obj ); + tree_downref( prg, key ); + tree_downref( prg, pair.key ); + break; + } + case IN_MAP_REMOVE_WV: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_MAP_REMOVE_WV" << endl; + #endif + + Tree *obj = pop(); + Tree *key = pop(); + TreePair pair = map_remove( prg, (Map*)obj, key ); + + tree_upref( pair.val ); + push( pair.val ); + + /* Reverse instruction. */ + reverseCode.append( IN_MAP_REMOVE_BKT ); + reverseCode.appendWord( (Word)pair.key ); + reverseCode.appendWord( (Word)pair.val ); + rcodeUnitLen += 9; + reverseCode.append( rcodeUnitLen ); + + tree_downref( prg, obj ); + tree_downref( prg, key ); + break; + } + case IN_MAP_REMOVE_BKT: { + Tree *key, *val; + read_tree( key ); + read_tree( val ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_MAP_REMOVE_BKT" << endl; + #endif + + /* Either both or neither. */ + assert( ( key == 0 ) xor ( val != 0 ) ); + + Tree *obj = pop(); + if ( key != 0 ) + map_unremove( prg, (Map*)obj, key, val ); + + tree_downref( prg, obj ); + break; + } + case IN_MAP_LENGTH: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_MAP_LENGTH" << endl; + #endif + + Tree *obj = pop(); + long len = map_length( (Map*)obj ); + Tree *res = construct_integer( prg, len ); + tree_upref( res ); + push( res ); + + tree_downref( prg, obj ); + break; + } + case IN_MAP_FIND: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_MAP_FIND" << endl; + #endif + + Tree *obj = pop(); + Tree *key = pop(); + Tree *result = map_find( (Map*)obj, key ); + tree_upref( result ); + push( result ); + + tree_downref( prg, obj ); + tree_downref( prg, key ); + break; + } + case IN_INIT_LOCALS: { + Half size; + read_half( size ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_INIT_LOCALS " << size << endl; + #endif + + frame = ptop(); + pushn( size ); + memset( ptop(), 0, sizeof(Word) * size ); + break; + } + case IN_POP_LOCALS: { + Half frameId, size; + read_half( frameId ); + read_half( size ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_POP_LOCALS " << frameId << " " << size << endl; + #endif + + FrameInfo *fi = &prg->rtd->frameInfo[frameId]; + downref_local_trees( prg, frame, fi->trees, fi->treesLen ); + popn( size ); + break; + } + case IN_CALL: { + Half funcId; + read_half( funcId ); + + FunctionInfo *fi = &prg->rtd->functionInfo[funcId]; + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_CALL " << fi->name << endl; + #endif + + push( 0 ); /* Return value. */ + push( (SW)instr ); + push( (SW)frame ); + + instr = prg->rtd->frameInfo[fi->frameId].code; + frame = ptop(); + break; + } + case IN_YIELD: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_YIELD" << endl; + #endif + + Kid *kid = (Kid*)pop(); + Ref *next = (Ref*)pop(); + UserIter *uiter = (UserIter*) plocal_iframe( IFR_AA ); + + if ( kid == 0 || kid->tree == 0 || + kid->tree->id == uiter->searchId || + uiter->searchId == prg->rtd->anyId ) + { + /* Store the yeilded value. */ + uiter->ref.kid = kid; + uiter->ref.next = next; + uiter->stackSize = uiter->stackRoot - ptop(); + uiter->resume = instr; + uiter->frame = frame; + + /* Restore the instruction and frame pointer. */ + instr = (Code*) local_iframe(IFR_RIN); + frame = (Tree**) local_iframe(IFR_RFR); + iframe = (Tree**) local_iframe(IFR_RIF); + + /* Return the yield result on the top of the stack. */ + Tree *result = uiter->ref.kid != 0 ? prg->trueVal : prg->falseVal; + tree_upref( result ); + push( result ); + } + break; + } + case IN_UITER_CREATE: { + short field; + Half funcId, searchId; + read_half( field ); + read_half( funcId ); + read_half( searchId ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_UITER_CREATE " << field << " " << + funcId << " " << searchId << endl; + #endif + + FunctionInfo *fi = prg->rtd->functionInfo + funcId; + UserIter *uiter = uiter_create( sp, prg, fi, searchId ); + local(field) = (SW) uiter; + + /* This is a setup similar to as a call, only the frame structure + * is slightly different for user iterators. We aren't going to do + * the call. We don't need to set up the return ip because the + * uiter advance will set it. The frame we need to do because it + * is set once for the lifetime of the iterator. */ + push( 0 ); /* Return instruction pointer, */ + push( (SW)iframe ); /* Return iframe. */ + push( (SW)frame ); /* Return frame. */ + + /* Now set up the first yeild. */ + uiter->ref.kid = 0; + uiter->stackSize = uiter->stackRoot - ptop(); + uiter->resume = prg->rtd->frameInfo[fi->frameId].code; + uiter->frame = &uiter->stackRoot[-IFR_AA]; + + upref_uiter_args( frame, fi->argSize ); + break; + } + case IN_UITER_DESTROY: { + short field; + read_half( field ); + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_UITER_DESTROY " << field << endl; + #endif + + UserIter *uiter = (UserIter*) local(field); + user_iter_destroy( sp, uiter ); + break; + } + case IN_RET: { + Half funcId; + read_half( funcId ); + + FunctionInfo *fui = &prg->rtd->functionInfo[funcId]; + + #ifdef COLM_LOG_BYTECODE + cerr << "IN_RET " << fui->name << endl; + #endif + + FrameInfo *fi = &prg->rtd->frameInfo[fui->frameId]; + downref_local_trees( prg, frame, fi->trees, fi->treesLen ); + + popn( fui->frameSize ); + frame = (Tree**) pop(); + instr = (Code*) pop(); + Tree *retVal = pop(); + popn( fui->argSize ); + push( retVal ); + break; + } + case IN_OPEN_FILE: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_OPEN_FILE" << endl; + #endif + + Tree *name = pop(); + Tree *res = open_file( prg, name ); + tree_upref( res ); + push( res ); + tree_downref( prg, name ); + break; + } + case IN_GET_STDIN: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_GET_STDIN" << endl; + #endif + + /* Pop the root object. */ + tree_downref( prg, pop() ); + if ( prg->stdinVal == 0 ) { + prg->stdinVal = open_stream_fd( prg, 0 ); + tree_upref( (Tree*)prg->stdinVal ); + } + + tree_upref( (Tree*)prg->stdinVal ); + push( (Tree*)prg->stdinVal ); + break; + } + case IN_STOP: { + #ifdef COLM_LOG_BYTECODE + cerr << "IN_STOP" << endl; + #endif + + cout.flush(); + return; + } + + /* Halt is a default instruction given by the compiler when it is + * asked to generate and instruction it doesn't have. It is deliberate + * and can represent "not implemented" or "compiler error" because a + * variable holding instructions was not properly initialize. */ + case IN_HALT: { + cerr << "IN_HALT -- compiler did something wrong" << endl; + exit(1); + break; + } + default: { + cerr << "UNKNOWN INSTRUCTION: " << (ulong)instr[-1] << + " -- something is wrong" << endl; + exit(1); + break; + } + } + goto again; +} diff --git a/colm/bytecode.h b/colm/bytecode.h new file mode 100644 index 00000000..0bdcab99 --- /dev/null +++ b/colm/bytecode.h @@ -0,0 +1,615 @@ +/* + * Copyright 2007 Adrian Thurston + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _BYTECODE_H +#define _BYTECODE_H + +#include "vector.h" +#include "resize.h" +#include "dlist.h" +#include "config.h" +#include "avlmap.h" + +#include + +using std::cerr; +using std::endl; +using std::ostream; + +typedef unsigned long ulong; +typedef unsigned char uchar; + +#define read_word_p( i, p ) do { \ + i = ((Word) p[0]); \ + i |= ((Word) p[1]) << 8; \ + i |= ((Word) p[2]) << 16; \ + i |= ((Word) p[3]) << 24; \ +} while(0) + +#define IN_LOAD_INT 0x01 +#define IN_LOAD_STR 0x02 +#define IN_LOAD_NIL 0x03 +#define IN_LOAD_TRUE 0xa3 +#define IN_LOAD_FALSE 0xa4 + +#define IN_ADD_INT 0x04 +#define IN_SUB_INT 0x06 +#define IN_MULT_INT 0x05 + +#define IN_TST_EQL 0xa0 +#define IN_TST_NOT_EQL 0xa1 +#define IN_TST_LESS 0x0e +#define IN_TST_GRTR 0x10 +#define IN_TST_LESS_EQL 0x0f +#define IN_TST_GRTR_EQL 0x11 +#define IN_TST_LOGICAL_AND 0x12 +#define IN_TST_LOGICAL_OR 0x13 + +#define IN_NOT 0x16 + +#define IN_JMP 0x0c +#define IN_JMP_FALSE 0x0b +#define IN_JMP_TRUE 0x0d + +#define IN_STR_ATOI 0x14 +#define IN_STR_LENGTH 0x15 +#define IN_CONCAT_STR 0x17 + +#define IN_INIT_LOCALS 0x18 +#define IN_POP_LOCALS 0xb0 +#define IN_POP 0x19 +#define IN_DUP_TOP 0x1a +#define IN_REJECT 0x1b +#define IN_MATCH 0x1c +#define IN_CONSTRUCT 0x1d +#define IN_TREE_NEW 0x1f + +#define IN_GET_LOCAL_R 0x20 +#define IN_GET_LOCAL_WC 0x21 +#define IN_SET_LOCAL_WC 0x22 + +#define IN_GET_LOCAL_REF_R 0x23 +#define IN_GET_LOCAL_REF_WC 0x24 +#define IN_SET_LOCAL_REF_WC 0x25 + +#define IN_SAVE_RET 0x26 + +#define IN_GET_FIELD_R 0x27 +#define IN_GET_FIELD_WC 0x28 +#define IN_GET_FIELD_WV 0x29 +#define IN_GET_FIELD_BKT 0x2a + +#define IN_SET_FIELD_WV 0x2b +#define IN_SET_FIELD_WC 0x2c +#define IN_SET_FIELD_BKT 0x2d +#define IN_SET_FIELD_LEAVE_WC 0x2e + +#define IN_GET_MATCH_LENGTH_R 0x2f +#define IN_GET_MATCH_TEXT_R 0x30 + +#define IN_GET_TOKEN_DATA_R 0x31 +#define IN_SET_TOKEN_DATA_WC 0x32 +#define IN_SET_TOKEN_DATA_WV 0x33 +#define IN_SET_TOKEN_DATA_BKT 0x34 + +#define IN_GET_TOKEN_POS_R 0x35 + +#define IN_INIT_RHS_EL 0x3b + +#define IN_TRITER_FROM_REF 0x3c +#define IN_TRITER_ADVANCE 0x3d +#define IN_TRITER_NEXT_CHILD 0x98 +#define IN_TRITER_PREV_CHILD 0x9b +#define IN_TRITER_GET_CUR_R 0x3e +#define IN_TRITER_GET_CUR_WC 0x3f +#define IN_TRITER_SET_CUR_WC 0x40 +#define IN_TRITER_DESTROY 0x41 + +#define IN_UITER_DESTROY 0x52 +#define IN_UITER_CREATE 0x53 +#define IN_UITER_ADVANCE 0x54 +#define IN_UITER_GET_CUR_R 0x55 +#define IN_UITER_GET_CUR_WC 0x56 +#define IN_UITER_SET_CUR_WC 0x57 + +#define IN_TREE_SEARCH 0x58 + +#define IN_LOAD_GLOBAL_R 0x59 +#define IN_LOAD_GLOBAL_WV 0x5a +#define IN_LOAD_GLOBAL_BKT 0x5b + +#define IN_PTR_DEREF_R 0x5e +#define IN_PTR_DEREF_WV 0x5f +#define IN_PTR_DEREF_WC 0x60 +#define IN_PTR_DEREF_BKT 0x61 + +#define IN_REF_FROM_LOCAL 0x62 +#define IN_REF_FROM_REF 0x97 +#define IN_TRITER_REF_FROM_CUR 0x63 +#define IN_UITER_REF_FROM_CUR 0x64 + +#define IN_MAP_LENGTH 0x65 +#define IN_MAP_FIND 0x66 +#define IN_MAP_INSERT_WV 0x67 +#define IN_MAP_INSERT_WC 0x68 +#define IN_MAP_INSERT_BKT 0x69 +#define IN_MAP_STORE_WV 0x6a +#define IN_MAP_STORE_WC 0x6b +#define IN_MAP_STORE_BKT 0x6c +#define IN_MAP_REMOVE_WV 0x6d +#define IN_MAP_REMOVE_WC 0x6e +#define IN_MAP_REMOVE_BKT 0x6f + +#define IN_LIST_LENGTH 0x70 +#define IN_LIST_APPEND_WV 0x71 +#define IN_LIST_APPEND_WC 0x72 +#define IN_LIST_APPEND_BKT 0x73 +#define IN_LIST_REMOVE_END_WV 0x74 +#define IN_LIST_REMOVE_END_WC 0x75 +#define IN_LIST_REMOVE_END_BKT 0x76 + +#define IN_GET_LIST_MEM_R 0x77 +#define IN_GET_LIST_MEM_WC 0x78 +#define IN_GET_LIST_MEM_WV 0x79 +#define IN_GET_LIST_MEM_BKT 0x7a +#define IN_SET_LIST_MEM_WV 0x7b +#define IN_SET_LIST_MEM_WC 0x7c +#define IN_SET_LIST_MEM_BKT 0x7d + +#define IN_VECTOR_LENGTH 0x7e +#define IN_VECTOR_APPEND_WV 0x7f +#define IN_VECTOR_APPEND_WC 0x80 +#define IN_VECTOR_APPEND_BKT 0x81 +#define IN_VECTOR_INSERT_WV 0x82 +#define IN_VECTOR_INSERT_WC 0x83 +#define IN_VECTOR_INSERT_BKT 0x84 + +#define IN_PRINT 0x87 +#define IN_PRINT_XML 0x88 + +#define IN_HALT 0x8a + +#define IN_CALL 0x8b +#define IN_RET 0x8c +#define IN_YIELD 0x8d +#define IN_STOP 0x8e + +#define IN_STR_UORD8 0x8f +#define IN_STR_SORD8 0x90 +#define IN_STR_UORD16 0x91 +#define IN_STR_SORD16 0x92 +#define IN_STR_UORD32 0x93 +#define IN_STR_SORD32 0x94 + +#define IN_INT_TO_STR 0x99 + +#define IN_CREATE_TOKEN 0x95 +#define IN_MAKE_TOKEN 0x96 +#define IN_MAKE_TREE 0xb2 +#define IN_CONSTRUCT_TERM 0x9a +#define IN_PARSE 0xb1 +#define IN_PARSE_BKT 0xb3 +#define IN_STREAM_PULL 0xb4 +#define IN_STREAM_PULL_BKT 0xb5 +#define IN_STREAM_PUSH 0xbc +#define IN_STREAM_PUSH_BKT 0xbd +#define IN_SEND 0xb6 +#define IN_IGNORE 0xb7 + +#define IN_OPEN_FILE 0xb8 +#define IN_GET_STDIN 0xb9 +#define IN_GET_STDOUT 0xba +#define IN_GET_STDERR 0xbb + + +/* Types */ +#define TYPE_NIL 0x01 +#define TYPE_TREE 0x02 +#define TYPE_REF 0x03 +#define TYPE_PTR 0x04 +#define TYPE_ITER 0x05 + +/* Types of Generics. */ +#define GEN_LIST 0x10 +#define GEN_MAP 0x11 +#define GEN_VECTOR 0x12 + +/* Allocation, number of items. */ +#define FRESH_BLOCK 8128 + +/* Virtual machine stack size, number of pointers. + * This will be mmapped. */ +#define VM_STACK_SIZE (4*1024ll*1024ll) + +/* Known language element ids. */ +#define LEL_ID_PTR 1 +#define LEL_ID_BOOL 2 +#define LEL_ID_INT 3 +#define LEL_ID_STR 4 +#define LEL_ID_STREAM 5 + +#define AF_GENERATED 0x1 +#define AF_COMMITTED 0x2 +#define AF_REV_FREED 0x4 +#define AF_ARTIFICIAL 0x8 +#define AF_NAMED 0x10 +#define AF_GROUP_MEM 0x20 +#define AF_IGNORE 0x40 +#define AF_HAS_RCODE 0x80 + +/* + * Call stack. + */ + +/* Number of spots in the frame, after the args. */ +#define FR_AA 3 + +/* Positions relative to the frame pointer. */ +#define FR_RV 2 /* return value */ +#define FR_RI 1 /* return instruction */ +#define FR_RF 0 /* return frame pointer */ + +/* + * Calling Convention: + * a1 + * a2 + * a3 + * ... + * return value FR_RV + * return instr FR_RI + * return frame FR_RF + */ + +/* + * User iterator call stack. + * Adds an iframe pointer, removes the return value. + */ + +/* Number of spots in the frame, after the args. */ +#define IFR_AA 3 + +/* Positions relative to the frame pointer. */ +#define IFR_RIN 2 /* return instruction */ +#define IFR_RIF 1 /* return iframe pointer */ +#define IFR_RFR 0 /* return frame pointer */ + + +struct Kid; +struct Tree; +struct Alg; +struct ListEl; +struct MapEl; +struct PdaTables; +struct RuntimeData; +struct FsmRun; +struct PdaRun; +struct Program; +struct List; +struct Map; +struct Stream; + +typedef unsigned char Code; +typedef unsigned long Word; +typedef unsigned long Half; + +typedef Tree *SW; +typedef Tree **StackPtr; +typedef Tree **&StackRef; + +Tree **alloc_obj_data( long length ); + +Kid *alloc_attrs( Program *prg, long length ); +void free_attrs( Program *prg, Kid *attrs ); +void set_attr( Tree *tree, long pos, Tree *val ); +Tree *get_attr( Tree *tree, long pos ); + +/* Return the size of a type in words. */ +template int sizeof_in_words() +{ + assert( (sizeof(T) % sizeof(Word)) == 0 ); + return sizeof(T) / sizeof(Word); +} + +/* + * Code Vector + */ +struct CodeVect : public Vector +{ + void appendHalf( Half half ) + { + /* not optimal. */ + append( half & 0xff ); + append( (half>>8) & 0xff ); + } + + void appendWord( Word word ) + { + /* not optimal. */ + append( word & 0xff ); + append( (word>>8) & 0xff ); + append( (word>>16) & 0xff ); + append( (word>>24) & 0xff ); + } + + void setHalf( long pos, Half half ) + { + /* not optimal. */ + data[pos] = half & 0xff; + data[pos+1] = (half>>8) & 0xff; + } + + void insertHalf( long pos, Half half ) + { + /* not optimal. */ + insert( pos, half & 0xff ); + insert( pos+1, (half>>8) & 0xff ); + } + + void insertWord( long pos, Word word ) + { + /* not optimal. */ + insert( pos, word & 0xff ); + insert( pos+1, (word>>8) & 0xff ); + insert( pos+2, (word>>16) & 0xff ); + insert( pos+3, (word>>24) & 0xff ); + } + + void insertTree( long pos, Tree *tree ) + { insertWord( pos, (Word) tree ); } +}; + +/* + * Strings + */ + +/* Header located just before string data. */ +struct Head +{ + const char *data; + long length; +}; + +struct Program; +struct Stream; + +Head *string_alloc_new( Program *prg, const char *data, long length ); +Head *string_alloc_const( Program *prg, const char *data, long length ); +Head *string_copy( Program *prg, Head *head ); +void string_free( Program *prg, Head *head ); + +void print_str( Head *str ); + +long string_length( Head *str ); +const char *string_data( Head *str ); +void string_shorten( Head *tokdata, long newlen ); +void ignore_data( Tree *tree, char *dest ); +long ignore_length( Tree *tree ); +void free_obj_data( Program *prg, Kid *attrs ); +Head *concat_str( Head *s1, Head *s2 ); +Word cmp_string( Head *s1, Head *s2 ); +Word str_atoi( Head *str ); +Head *int_to_str( Program *prg, Word i ); +Word str_uord16( Head *head ); +Word str_uord8( Head *head ); +Head *make_literal( Program *prg, long litoffset ); +void rcode_downref( Tree **stack_root, Program *prg, Code *instr ); +void rcode_downref_all( Tree **stack_root, Program *prg, CodeVect *cv ); +void xml_print_tree( Tree **&sp, Program *prg, Tree *tree ); +void xml_print_kid( Tree **&sp, Program *prg, Kid *kid, int depth ); +void parsed_downref( Tree **root, Program *prg, Tree *tree ); +Stream *open_stream( Program *prg, FILE *file ); +Tree *construct_string( Program *prg, Head *s ); +void list_free( Program *prg, List *list ); +void ignore_free( Program *prg, Tree *tree ); +void map_free( Program *prg, Map *map ); +void stream_free( Program *prg, Stream *s ); + +void tree_downref( Program *prg, Tree *tree ); +void tree_upref( Tree *tree ); +Kid *tree_child( Program *prg, Tree *tree ); +Kid *tree_extract_child( Program *prg, Tree *tree ); +Kid *tree_ignore( Program *prg, Tree *tree ); +void print_tree( Tree **&sp, Program *prg, Tree *tree ); +void print_tree( ostream &out, Tree **&sp, Program *prg, Tree *tree ); +bool tree_is_ignore( Program *prg, Kid *kid ); +Kid *kid_list_concat( Kid *list1, Kid *list2 ); + +/* + * Maps + */ +struct GenericInfo +{ + long type; + long typeArg; + long keyOffset; + long keyType; + long langElId; +}; + +long cmp_tree( const Tree *tree1, const Tree *tree2 ); + +/* + * Runtime environment + */ + +struct PoolItem +{ + PoolItem *next; +}; + +template struct PoolBlock +{ + T data[FRESH_BLOCK]; + PoolBlock *next; +}; + +template struct PoolAlloc +{ + PoolAlloc() : + head(0), nextel(FRESH_BLOCK), pool(0) + {} + + T *allocate(); + void free( T *el ); + void clear(); + long numlost(); + + PoolBlock *head; + long nextel; + PoolItem *pool; +}; + +template T *PoolAlloc::allocate() +{ + //#ifdef COLM_LOG_BYTECODE + //cerr << "allocating in: " << __PRETTY_FUNCTION__ << endl; + //#endif + + T *newEl = 0; + if ( pool == 0 ) { + if ( nextel == FRESH_BLOCK ) { + #ifdef COLM_LOG_BYTECODE + cerr << "allocating " << FRESH_BLOCK << " Elements of type T" << endl; + #endif + + PoolBlock *newBlock = new PoolBlock; + newBlock->next = head; + head = newBlock; + nextel = 0; + } + newEl = &head->data[nextel++]; + } + else { + newEl = (T*)pool; + pool = pool->next; + } + memset( newEl, 0, sizeof(T) ); + return newEl; +} + +template void PoolAlloc::free( T *el ) +{ + //#ifdef COLM_LOG_BYTECODE + //cerr << "freeing in: " << __PRETTY_FUNCTION__ << endl; + //#endif + + memset( el, 0, sizeof(T) ); + PoolItem *pi = (PoolItem*) el; + pi->next = pool; + pool = pi; +} + +template void PoolAlloc::clear() +{ + PoolBlock *block = head; + while ( block != 0 ) { + PoolBlock *next = block->next; + delete block; + block = next; + } + + head = 0; + nextel = 0; + pool = 0; +} + +template long PoolAlloc::numlost() +{ + /* Count the number of items allocated. */ + long lost = 0; + PoolBlock *block = head; + if ( block != 0 ) { + lost = nextel; + block = block->next; + while ( block != 0 ) { + lost += FRESH_BLOCK; + block = block->next; + } + } + + /* Subtract. Items that are on the free list. */ + PoolItem *pi = pool; + while ( pi != 0 ) { + lost -= 1; + pi = pi->next; + } + + return lost; +} + +struct Int; + +struct Program +{ + Program( bool ctxDepParsing, RuntimeData *rtd ); + + bool ctxDepParsing; + RuntimeData *rtd; + Tree *global; + + PoolAlloc kidPool; + PoolAlloc treePool; + PoolAlloc algPool; + PoolAlloc listElPool; + PoolAlloc mapElPool; + + Tree *trueVal; + Tree *falseVal; + + void run(); + void clear(); + void clearGlobal(); + void freshGlobal(); + + Kid *heap; + + Stream *stdinVal; + Stream *stdoutVal; + Stream *stderrVal; +}; + +struct Execution +{ + Execution( Program *prg, CodeVect &reverseCode, + PdaRun *parser, Code *code, Tree *lhs, Head *matchText ); + + Program *prg; + PdaTables *pdaTables; + PdaRun *parser; + Code *code; + Tree **frame; + Tree **iframe; + Tree *lhs; + + Head *matchText; + bool reject; + + /* Reverse code. */ + CodeVect &reverseCode; + long rcodeUnitLen; + + void execute( Tree **root ); + void rexecute( Tree **root, Code *revcode, CodeVect &allRev ); + void execute( Tree **&sp, Code *instr ); + void rdownref( Code *instr ); +}; + +#endif diff --git a/colm/closure.cpp b/colm/closure.cpp new file mode 100644 index 00000000..3a10d030 --- /dev/null +++ b/colm/closure.cpp @@ -0,0 +1,450 @@ +/* + * Copyright 2005 Adrian Thurston + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "colm.h" +#include "parsedata.h" + +#include "vector.h" +#include +#include +#include + +using std::endl; +using std::cerr; + +void ParseData::lr0BringInItem( PdaGraph *pdaGraph, PdaState *dest, PdaState *prodState, + PdaTrans *expandFrom, Definition *prod ) +{ + /* We use dot sets for finding unique states. In the future, should merge + * dots sets with the stateSet pointer (only need one of these). */ + assert( dest != prodState ); + dest->dotSet.insert( prodState->dotSet ); + + /* Get the epsilons, context, out priorities. */ + dest->pendingCommits.insert( prodState->pendingCommits ); + //if ( prodState->pendingCommits.length() > 0 ) + // cerr << "THERE ARE PENDING COMMITS DRAWN IN" << endl; + + if ( prodState->transMap.length() > 0 ) { + assert( prodState->transMap.length() == 1 ); + PdaTrans *srcTrans = prodState->transMap[0].value; + + /* Look for the source in the destination. */ + TransMapEl *destTel = dest->transMap.find( srcTrans->lowKey ); + if ( destTel == 0 ) { + /* Make a new state and transition to it. */ + PdaState *newState = pdaGraph->addState(); + PdaTrans *newTrans = new PdaTrans(); + + /* Attach the new transition to the new state. */ + newTrans->lowKey = srcTrans->lowKey; + pdaGraph->attachTrans( dest, newState, newTrans ); + pdaGraph->addInTrans( newTrans, srcTrans ); + + /* The transitions we make during lr0 closure are all shifts. */ + assert( newTrans->isShift ); + assert( srcTrans->isShift ); + + /* The new state must have its state set setup. */ + newState->stateSet = new PdaStateSet; + newState->stateSet->insert( srcTrans->toState ); + + /* Insert the transition into the map. Be sure to set destTel, it + * is needed below. */ + dest->transMap.insert( srcTrans->lowKey, newTrans, &destTel ); + + /* If the item is a non-term, queue it for closure. */ + KlangEl *langEl = langElIndex[srcTrans->lowKey]; + if ( langEl != 0 && langEl->type == KlangEl::NonTerm ) { + pdaGraph->transClosureQueue.append( newTrans ); + //cerr << "put to trans closure queue" << endl; + } + } + else { + //cerr << "merging transitions" << endl; + destTel->value->toState->stateSet->insert( srcTrans->toState ); + pdaGraph->addInTrans( destTel->value, srcTrans ); + } + + /* If this is an expansion then we may need to bring in commits. */ + if ( expandFrom != 0 && expandFrom->commits.length() > 0 ) { + //cerr << "SETTING COMMIT ON CLOSURE ROUND" << endl; + destTel->value->commits.insert( expandFrom->commits ); + + expandFrom->commits.empty(); + } + } + else { + /* ProdState does not have any transitions out. It is at the end of a + * production. */ + if ( expandFrom != 0 && expandFrom->commits.length() > 0 ) { + //cerr << "SETTING COMMIT IN PENDING LOOKAHEAD" << endl; + for ( LongSet::Iter len = expandFrom->commits; len.lte(); len++ ) + dest->pendingCommits.insert( ProdIdPair( prod->prodId, *len ) ); + + expandFrom->commits.empty(); + } + } +} + +void ParseData::lr0InvokeClosure( PdaGraph *pdaGraph, PdaState *state ) +{ + /* State should not already be closed. */ + assert( !state->inClosedMap ); + + /* This is used each time we invoke closure, it must be cleared. */ + pdaGraph->transClosureQueue.abandon(); + + /* Drag in the core items. */ + for ( PdaStateSet::Iter ssi = *state->stateSet; ssi.lte(); ssi++ ) + lr0BringInItem( pdaGraph, state, *ssi, 0, 0 ); + + /* Now bring in the derived items. */ + while ( pdaGraph->transClosureQueue.length() > 0 ) { + PdaTrans *toClose = pdaGraph->transClosureQueue.detachFirst(); + //cerr << "have a transition to derive" << endl; + + /* Get the langEl. */ + KlangEl *langEl = langElIndex[toClose->lowKey]; + + /* Make graphs for all of the productions that the non + * terminal goes to that are not already in the state's dotSet. */ + for ( LelDefList::Iter prod = langEl->defList; prod.lte(); prod++ ) { + /* Bring in the start state of the production. */ + lr0BringInItem( pdaGraph, state, prod->fsm->startState, toClose, prod ); + } + } + + /* Try and insert into the closed dict. */ + DotSetMapEl *lastFound; + if ( pdaGraph->closedMap.insert( state, &lastFound ) ) { + /* Insertion into closed dict succeeded. There is no state with the + * same dot set. The state is now closed. It is guaranteed a spot in + * the closed dict and it will never go away (states never deleted + * during closure). */ + pdaGraph->stateClosedList.append( state ); + state->inClosedMap = true; + + /* Add all of the states in the out transitions to the closure queue. + * This will give us a depth first search of the graph. */ + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + /* Get the state the transEl goes to. */ + PdaState *targ = trans->value->toState; + + /* If the state on this tranisition has not already been slated + * for closure, then add it to the queue. */ + if ( !targ->onClosureQueue && !targ->inClosedMap ) { + pdaGraph->stateClosureQueue.append( targ ); + targ->onClosureQueue = true; + } + } + } + else { + /* Insertion into closed dict failed. There is an existing state + * with the same dot set. Get the existing state. */ + pdaGraph->inTransMove( lastFound, state ); + for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) { + pdaGraph->stateList.detach( tel->value->toState ); + delete tel->value->toState; + delete tel->value; + } + pdaGraph->stateList.detach( state ); + delete state; + } +} + +/* Invoke cloure on the graph. We use a queue here to achieve a breadth + * first search of the tree we build. Note, there are back edges in this + * tree. They are the edges made when upon closure, a dot set exists + * already. */ +void ParseData::lr0CloseAllStates( PdaGraph *pdaGraph ) +{ + /* While there are items on the closure queue. */ + while ( pdaGraph->stateClosureQueue.length() > 0 ) { + /* Pop the first item off. */ + PdaState *state = pdaGraph->stateClosureQueue.detachFirst(); + state->onClosureQueue = false; + + /* Invoke closure upon the state. */ + lr0InvokeClosure( pdaGraph, state ); + } +} + +void ParseData::transferCommits( PdaGraph *pdaGraph, PdaTrans *trans, + PdaState *state, long prodId ) +{ + ProdIdPairSet &pendingCommits = state->pendingCommits; + for ( ProdIdPairSet::Iter pi = pendingCommits; pi.lte(); pi++ ) { + if ( pi->onReduce == prodId ) + trans->commits.insert( pi->length ); + } +} + +void ParseData::lalr1AddFollow2( PdaGraph *pdaGraph, PdaTrans *trans, FollowToAdd &followKeys ) +{ + for ( ExpandToSet::Iter ets = trans->expandTo; ets.lte(); ets++ ) { + int prodId = ets->prodId; + PdaState *expandTo = ets->state; + + for ( FollowToAdd::Iter fkey = followKeys; fkey.lte(); fkey++ ) { + TransMapEl *transEl = expandTo->transMap.find( fkey->key ); + + if ( transEl != 0 ) { + /* Set up the follow transition. */ + PdaTrans *destTrans = transEl->value; + + transferCommits( pdaGraph, destTrans, expandTo, prodId ); + + pdaGraph->addInReduction( destTrans, prodId, fkey->value ); + } + else { + /* Set up the follow transition. */ + PdaTrans *followTrans = new PdaTrans; + followTrans->lowKey = fkey->key; + followTrans->isShift = false; + followTrans->reductions.insert( prodId, fkey->value ); + + transferCommits( pdaGraph, followTrans, expandTo, prodId ); + + pdaGraph->attachTrans( expandTo, actionDestState, followTrans ); + expandTo->transMap.insert( followTrans->lowKey, followTrans ); + pdaGraph->transClosureQueue.append( followTrans ); + } + } + } +} + +long PdaTrans::maxPrior() +{ + long prior = LONG_MIN; + if ( isShift && shiftPrior > prior ) + prior = shiftPrior; + for ( ReductionMap::Iter red = reductions; red.lte(); red++ ) { + if ( red->value > prior ) + prior = red->value; + } + return prior; +} + +void ParseData::lalr1AddFollow1( PdaGraph *pdaGraph, PdaState *state ) +{ + /* Finding non-terminals into the state. */ + for ( PdaTransInList::Iter in = state->inRange; in.lte(); in++ ) { + long key = in->lowKey; + KlangEl *langEl = langElIndex[key]; + if ( langEl != 0 && langEl->type == KlangEl::NonTerm ) { + /* Finding the following transitions. */ + FollowToAdd followKeys; + for ( TransMap::Iter fout = state->transMap; fout.lte(); fout++ ) { + int fkey = fout->key; + KlangEl *flel = langElIndex[fkey]; + if ( flel == 0 || flel->type == KlangEl::Term ) { + long prior = fout->value->maxPrior(); + followKeys.insert( fkey, prior ); + } + } + + if ( followKeys.length() > 0 ) + lalr1AddFollow2( pdaGraph, in, followKeys ); + } + } +} + +void ParseData::lalr1AddFollow2( PdaGraph *pdaGraph, PdaTrans *trans, + long followKey, long prior ) +{ + for ( ExpandToSet::Iter ets = trans->expandTo; ets.lte(); ets++ ) { + int prodId = ets->prodId; + PdaState *expandTo = ets->state; + + TransMapEl *transEl = expandTo->transMap.find( followKey ); + if ( transEl != 0 ) { + /* Add in the reductions, or in the shift. */ + PdaTrans *destTrans = transEl->value; + + transferCommits( pdaGraph, destTrans, expandTo, prodId ); + + pdaGraph->addInReduction( destTrans, prodId, prior ); + } + else { + /* Set up the follow transition. */ + PdaTrans *followTrans = new PdaTrans; + followTrans->lowKey = followKey; + followTrans->isShift = false; + followTrans->reductions.insert( prodId, prior ); + + transferCommits( pdaGraph, followTrans, expandTo, prodId ); + + pdaGraph->attachTrans( expandTo, actionDestState, followTrans ); + expandTo->transMap.insert( followTrans->lowKey, followTrans ); + pdaGraph->transClosureQueue.append( followTrans ); + } + } +} + +void ParseData::lalr1AddFollow1( PdaGraph *pdaGraph, PdaTrans *trans ) +{ + PdaState *state = trans->fromState; + int fkey = trans->lowKey; + KlangEl *flel = langElIndex[fkey]; + if ( flel == 0 || flel->type == KlangEl::Term ) { + /* Finding non-terminals into the state. */ + for ( PdaTransInList::Iter in = state->inRange; in.lte(); in++ ) { + long key = in->lowKey; + KlangEl *langEl = langElIndex[key]; + if ( langEl != 0 && langEl->type == KlangEl::NonTerm ) { + //cerr << "FOLLOW PRIOR TRANSFER 2: " << prior << endl; + long prior = trans->maxPrior(); + lalr1AddFollow2( pdaGraph, in, fkey, prior ); + } + } + } +} + +/* Add follow sets to an LR(0) graph to make it LALR(1). */ +void ParseData::lalr1AddFollowSets( PdaGraph *pdaGraph, KlangEl *rootEl ) +{ + /* Make the state that all reduction actions go to. Since a reduction pops + * states of the stack and sets the new target state, this state is + * actually never reached. Just here to link the trans to. */ + actionDestState = pdaGraph->addState(); + pdaGraph->setFinState( actionDestState ); + + /* Get the entry into the graph and traverse over start. */ + PdaState *overStart = pdaGraph->followFsm( pdaGraph->startState, rootEl->rootDef->fsm ); + + /* Add _eof after the initial _start. */ + PdaTrans *eofTrans = pdaGraph->insertNewTrans( overStart, actionDestState, + eofKlangEl->id, eofKlangEl->id ); + eofTrans->isShift = true; + + /* This was used during lr0 table construction. */ + pdaGraph->transClosureQueue.abandon(); + + /* Need to pass over every state initially. */ + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) + lalr1AddFollow1( pdaGraph, state ); + + /* While the closure queue has items, pop them off and add follow + * characters. */ + while ( pdaGraph->transClosureQueue.length() > 0 ) { + /* Pop the first item off and add Follow for it . */ + PdaTrans *trans = pdaGraph->transClosureQueue.detachFirst(); + lalr1AddFollow1( pdaGraph, trans ); + } +} + +void ParseData::linkExpansions( PdaGraph *pdaGraph ) +{ + pdaGraph->setStateNumbers(); + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + /* Find transitions out on non terminals. */ + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + long key = trans->key; + KlangEl *langEl = langElIndex[key]; + if ( langEl != 0 && langEl->type == KlangEl::NonTerm ) { + /* For each production that the non terminal expand to ... */ + for ( LelDefList::Iter prod = langEl->defList; prod.lte(); prod++ ) { + /* Follow the production and add to the trans's expand to set. */ + PdaState *followRes = pdaGraph->followFsm( state, prod->fsm ); + + //KlangEl *lel = langElIndex[key]; + //cerr << state->stateNum << ", "; + //if ( lel != 0 ) + // cerr << lel->data; + //else + // cerr << (char)key; + //cerr << " -> " << (*fto)->stateNum << " on " << + // prod->data << " (fss = " << fin.pos() << ")" << endl; + trans->value->expandTo.insert( ExpandToEl( followRes, prod->prodId ) ); + } + } + } + } +} + +/* Add terminal versions of all nonterminal transitions. */ +void ParseData::addDupTerms( PdaGraph *pdaGraph ) +{ + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + PdaTransList newTranitions; + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + KlangEl *lel = langElIndex[trans->value->lowKey]; + if ( lel->type == KlangEl::NonTerm ) { + PdaTrans *dupTrans = new PdaTrans; + dupTrans->lowKey = lel->termDup->id; + dupTrans->isShift = true; + + /* Save the target state in to state. In the next loop when we + * attach the transition we must clear this because the + * attaching code requires the transition to be unattached. */ + dupTrans->toState = trans->value->toState; + newTranitions.append( dupTrans ); + + /* Commit code used? */ + //transferCommits( pdaGraph, followTrans, expandTo, prodId ); + } + } + + for ( PdaTrans *dup = newTranitions.head; dup != 0; ) { + PdaTrans *next = dup->next; + PdaState *toState = dup->toState; + dup->toState = 0; + pdaGraph->attachTrans( state, toState, dup ); + state->transMap.insert( dup->lowKey, dup ); + dup = next; + } + } +} + +/* Generate a LALR(1) graph. */ +void ParseData::lalr1GenerateParser( PdaGraph *pdaGraph, KlangEl *rootEl ) +{ + /* Make the intial graph. */ + pdaGraph->langElIndex = langElIndex; + + PdaState *start = pdaGraph->addState(); + pdaGraph->setStartState( start ); + + start->stateSet = new PdaStateSet; + start->stateSet->insert( rootEl->rootDef->fsm->startState ); + + /* Queue the start state for closure. */ + start->onClosureQueue = true; + pdaGraph->stateClosureQueue.append( start ); + + /* Run the lr0 closure. */ + lr0CloseAllStates( pdaGraph ); + + /* Add terminal versions of all nonterminal transitions. */ + addDupTerms( pdaGraph ); + + /* Link production expansions to the place they expand to. */ + linkExpansions( pdaGraph ); + + /* Walk the graph adding follow sets to the LR(0) graph. */ + lalr1AddFollowSets( pdaGraph, rootEl ); + +// /* Set the commit on the final eof shift. */ +// PdaTrans *overStart = pdaGraph->startState->findTrans( rootEl->id ); +// PdaTrans *eofTrans = overStart->toState->findTrans( eofKlangEl->id ); +// eofTrans->afterShiftCommits.insert( 2 ); +} diff --git a/colm/colm.h b/colm/colm.h new file mode 100644 index 00000000..9d906c49 --- /dev/null +++ b/colm/colm.h @@ -0,0 +1,92 @@ +/* + * Copyright 2001-2007 Adrian Thurston + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _COLM_H +#define _COLM_H + +#include +#include +#include +#include +#include + +#include "config.h" +#include "avltree.h" +#include "common.h" + +#define PROGNAME "colm" + +/* IO filenames and stream. */ +extern bool printPrintables; +extern bool graphvizDone; + +extern int gblErrorCount; + +std::ostream &error(); + +/* IO filenames and stream. */ +extern const char *outputFileName; +extern std::ostream *outStream; +extern bool generateGraphviz; +extern bool branchPointInfo; +extern bool addUniqueEmptyProductions; + +extern int gblErrorCount; +extern char startDefName[]; + +/* Error reporting. */ +std::ostream &error(); +std::ostream &error( int first_line, int first_column ); +std::ostream &warning( ); +std::ostream &warning( int first_line, int first_column ); + +struct exit_object { }; +extern exit_object endp; +void operator<<( std::ostream &out, exit_object & ); + +extern std::ostream *outStream; +extern bool printStatistics; + +extern int gblErrorCount; +extern char machineMain[]; + +/* Location in an input file. */ +struct InputLoc +{ + const char *fileName; + int line; + int col; +}; + +/* Error reporting. */ +std::ostream &error(); +std::ostream &error( const InputLoc &loc ); +std::ostream &warning( const InputLoc &loc ); + +void scan( char *fileName, std::istream &input, std::ostream &output ); +void terminateAllParsers( ); +void checkMachines( ); + +void xmlEscapeHost( std::ostream &out, char *data, int len ); +void openOutput(); +void escapeLiteralString( std::ostream &out, const char *data ); + +#endif /* _COLM_H */ diff --git a/colm/compile.cpp b/colm/compile.cpp new file mode 100644 index 00000000..ebe1fbdc --- /dev/null +++ b/colm/compile.cpp @@ -0,0 +1,2492 @@ +/* + * Copyright 2007 Adrian Thurston + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "bytecode.h" +#include "parsedata.h" +#include +#include + +using std::cout; +using std::cerr; +using std::endl; + +void ParseData::initUniqueTypes( ) +{ + uniqueTypeNil = new UniqueType( TYPE_NIL ); + uniqueTypePtr = new UniqueType( TYPE_TREE, ptrKlangEl ); + uniqueTypeBool = new UniqueType( TYPE_TREE, boolKlangEl ); + uniqueTypeInt = new UniqueType( TYPE_TREE, intKlangEl ); + uniqueTypeStr = new UniqueType( TYPE_TREE, strKlangEl ); + uniqueTypeStream = new UniqueType( TYPE_TREE, streamKlangEl ); + uniqueTypeAny = new UniqueType( TYPE_TREE, anyKlangEl ); + + uniqeTypeMap.insert( uniqueTypeNil ); + uniqeTypeMap.insert( uniqueTypePtr ); + uniqeTypeMap.insert( uniqueTypeBool ); + uniqeTypeMap.insert( uniqueTypeInt ); + uniqeTypeMap.insert( uniqueTypeStr ); + uniqeTypeMap.insert( uniqueTypeStream ); + uniqeTypeMap.insert( uniqueTypeAny ); +} + +IterDef::IterDef( Type type ) : + type(type), + func(0), + useFuncId(false), + useSearchUT(false) +{ + if ( type == Tree ) { + inCreate = IN_TRITER_FROM_REF; + inDestroy = IN_TRITER_DESTROY; + inAdvance = IN_TRITER_ADVANCE; + + inGetCurR = IN_TRITER_GET_CUR_R; + inGetCurWC = IN_TRITER_GET_CUR_WC; + inSetCurWC = IN_TRITER_SET_CUR_WC; + inRefFromCur = IN_TRITER_REF_FROM_CUR; + useSearchUT = true; + } + else if ( type == Child ) { + inCreate = IN_TRITER_FROM_REF; + inDestroy = IN_TRITER_DESTROY; + inAdvance = IN_TRITER_NEXT_CHILD; + + inGetCurR = IN_TRITER_GET_CUR_R; + inGetCurWC = IN_TRITER_GET_CUR_WC; + inSetCurWC = IN_TRITER_SET_CUR_WC; + inRefFromCur = IN_TRITER_REF_FROM_CUR; + useSearchUT = true; + } + else if ( type == RevChild ) { + inCreate = IN_TRITER_FROM_REF; + inDestroy = IN_TRITER_DESTROY; + inAdvance = IN_TRITER_PREV_CHILD; + + inGetCurR = IN_TRITER_GET_CUR_R; + inGetCurWC = IN_TRITER_GET_CUR_WC; + inSetCurWC = IN_TRITER_SET_CUR_WC; + inRefFromCur = IN_TRITER_REF_FROM_CUR; + useSearchUT = true; + } + else + assert(false); +} + +IterDef::IterDef( Type type, Function *func ) : + type(type), + func(func), + useFuncId(true), + useSearchUT(true), + inCreate(IN_UITER_CREATE), + inDestroy(IN_UITER_DESTROY), + inAdvance(IN_UITER_ADVANCE), + inGetCurR(IN_UITER_GET_CUR_R), + inGetCurWC(IN_UITER_GET_CUR_WC), + inSetCurWC(IN_UITER_SET_CUR_WC), + inRefFromCur(IN_UITER_REF_FROM_CUR) +{} + +ObjMethod *initFunction( UniqueType *retType, ObjectDef *obj, + const String &name, int methIdWC, int methIdWV, bool isConst ) +{ + ObjMethod *objMethod = new ObjMethod( retType, name, + methIdWC, methIdWV, 0, 0, 0, isConst ); + obj->objMethodMap->insert( name, objMethod ); + return objMethod; +} + +ObjMethod *initFunction( UniqueType *retType, ObjectDef *obj, + const String &name, int methIdWC, int methIdWV, UniqueType *arg1, bool isConst ) +{ + UniqueType *args[] = { arg1 }; + ObjMethod *objMethod = new ObjMethod( retType, name, + methIdWC, methIdWV, 1, args, 0, isConst ); + obj->objMethodMap->insert( name, objMethod ); + return objMethod; +} + +ObjMethod *initFunction( UniqueType *retType, ObjectDef *obj, + const String &name, int methIdWC, int methIdWV, + UniqueType *arg1, UniqueType *arg2, bool isConst ) +{ + UniqueType *args[] = { arg1, arg2 }; + ObjMethod *objMethod = new ObjMethod( retType, name, + methIdWC, methIdWV, 2, args, 0, isConst ); + obj->objMethodMap->insert( name, objMethod ); + return objMethod; +} + +IterDef *ParseData::findIterDef( IterDef::Type type, Function *func ) +{ + IterDefSetEl *el = iterDefSet.find( IterDef( type, func ) ); + if ( el == 0 ) + el = iterDefSet.insert( IterDef( type, func ) ); + return &el->key; +} + +IterDef *ParseData::findIterDef( IterDef::Type type ) +{ + IterDefSetEl *el = iterDefSet.find( IterDef( type ) ); + if ( el == 0 ) + el = iterDefSet.insert( IterDef( type ) ); + return &el->key; +} + +UniqueType *ParseData::findUniqueType( int typeId ) +{ + UniqueType searchKey( typeId ); + UniqueType *uniqueType = uniqeTypeMap.find( &searchKey ); + if ( uniqueType == 0 ) { + uniqueType = new UniqueType( typeId ); + uniqeTypeMap.insert( uniqueType ); + } + return uniqueType; +} + +UniqueType *ParseData::findUniqueType( int typeId, KlangEl *langEl ) +{ + UniqueType searchKey( typeId, langEl ); + UniqueType *uniqueType = uniqeTypeMap.find( &searchKey ); + if ( uniqueType == 0 ) { + uniqueType = new UniqueType( typeId, langEl ); + uniqeTypeMap.insert( uniqueType ); + } + return uniqueType; +} + +UniqueType *ParseData::findUniqueType( int typeId, IterDef *iterDef ) +{ + UniqueType searchKey( typeId, iterDef ); + UniqueType *uniqueType = uniqeTypeMap.find( &searchKey ); + if ( uniqueType == 0 ) { + uniqueType = new UniqueType( typeId, iterDef ); + uniqeTypeMap.insert( uniqueType ); + } + return uniqueType; +} + +UniqueType *TypeRef::lookupTypePart( ParseData *pd, + NamespaceQual *qual, const String &name ) +{ + /* Lookup up the qualifiction and then the name. */ + Namespace *nspace = qual->getQual( pd ); + + if ( nspace == 0 ) + error(loc) << "do not have region for resolving reference" << endp; + + /* Search for the token in the region by name. */ + SymbolMapEl *inDict = nspace->symbolMap.find( name ); + if ( inDict != 0 ) { + long typeId = ( isPtr ? TYPE_PTR : ( isRef ? TYPE_REF : TYPE_TREE ) ); + return pd->findUniqueType( typeId, inDict->value ); + } + + error(loc) << "unknown type in typeof expression" << endp; + return 0; +} + +UniqueType *TypeRef::lookupType( ParseData *pd ) +{ + if ( uniqueType != 0 ) + return uniqueType; + + if ( iterDef != 0 ) + uniqueType = pd->findUniqueType( TYPE_ITER, iterDef ); + else if ( factor != 0 ) + uniqueType = pd->findUniqueType( TYPE_TREE, factor->langEl ); + else { + String name = typeName; + if ( isOpt ) + name.setAs( 32, "_opt_%s", name.data ); + else if ( isRepeat ) + name.setAs( 32, "_repeat_%s", name.data ); + + /* Not an iterator. May be a reference. */ + uniqueType = lookupTypePart( pd, nspaceQual, name ); + } + + return uniqueType; +} + +ObjField *ObjectDef::findField( String name ) +{ + ObjFieldMapEl *objDefMapEl = objFieldMap->find( name ); + if ( objDefMapEl != 0 ) + return objDefMapEl->value; + return 0; +} + +ObjMethod *ObjectDef::findMethod( String name ) +{ + ObjMethodMapEl *objMethodMapEl = objMethodMap->find( name ); + if ( objMethodMapEl != 0 ) + return objMethodMapEl->value; + return 0; +} + +long sizeOfField( UniqueType *fieldUT ) +{ + long size = 0; + if ( fieldUT->typeId == TYPE_ITER ) { + /* Select on the iterator type. */ + if ( fieldUT->iterDef->type == IterDef::Tree || + fieldUT->iterDef->type == IterDef::Child || + fieldUT->iterDef->type == IterDef::RevChild ) + size = sizeof(TreeIter) / sizeof(Word); + else if ( fieldUT->iterDef->type == IterDef::User ) { + /* User iterators are just a pointer to the UserIter struct. The + * struct needs to go right beneath the call to the user iterator + * so it can be found by a yield. It is therefore allocated on the + * stack right before the call. */ + size = 1; + } + else { + assert(false); + } + } + else if ( fieldUT->typeId == TYPE_REF ) + size = 2; + else + size = 1; + + return size; +} + +void ObjectDef::referenceField( ParseData *pd, ObjField *field ) +{ + field->beenReferenced = true; + initField( pd, field ); +} + +void ObjectDef::initField( ParseData *pd, ObjField *field ) +{ + if ( !field->beenInitialized ) { + field->beenInitialized = true; + UniqueType *fieldUT = field->typeRef->lookupType( pd ); + + if ( type == FrameType ) { + nextOffset += sizeOfField( fieldUT ); + field->offset = -nextOffset; + + pd->initLocalInstructions( field ); + } + else { + field->offset = nextOffset; + nextOffset += sizeOfField( fieldUT ); + + /* Initialize the instructions. */ + pd->initFieldInstructions( field ); + } + } +} + +UniqueType *LangVarRef::loadFieldInstr( ParseData *pd, CodeVect &code, + ObjectDef *inObject, ObjField *el, bool forWriting, bool revert ) +{ + /* Ensure that the field is referenced. */ + inObject->referenceField( pd, el ); + + UniqueType *elUT = el->typeRef->lookupType( pd ); + + /* If it's a reference then we load it read always. */ + if ( forWriting ) { + /* The instruction, depends on whether or not we are reverting. */ + if ( elUT->typeId == TYPE_ITER ) + code.append( elUT->iterDef->inGetCurWC ); + else if ( revert ) + code.append( el->inGetWV ); + else + code.append( el->inGetWC ); + } + else { + /* Loading something for writing */ + if ( elUT->typeId == TYPE_ITER ) + code.append( elUT->iterDef->inGetCurR ); + else + code.append( el->inGetR ); + } + + if ( el->useOffset ) { + /* Gets of locals and fields require offsets. Fake vars like token + * data and lhs don't require it. */ + code.appendHalf( el->offset ); + } + + /* If we are dealing with an iterator then dereference it. */ + if ( elUT->typeId == TYPE_ITER ) + elUT = el->typeRef->searchTypeRef->lookupType( pd ); + + return elUT; +} + +ObjectDef *objDefFromUT( ParseData *pd, UniqueType *ut ) +{ + ObjectDef *objDef = 0; + if ( ut->typeId == TYPE_TREE || ut->typeId == TYPE_REF ) + objDef = ut->langEl->objectDef; + else { + /* This should have generated a compiler error. */ + assert(false); + } + return objDef; +} + +void LangVarRef::loadQualification( ParseData *pd, CodeVect &code, + ObjectDef *rootObj, int lastPtrInQual, bool forWriting, bool revert ) +{ + /* Start the search from the root object. */ + ObjectDef *searchObjDef = rootObj; + + for ( QualItemVect::Iter qi = *qual; qi.lte(); qi++ ) { + /* Lookup the field int the current qualification. */ + ObjFieldMapEl *objDefMapEl = searchObjDef->objFieldMap->find( qi->data ); + if ( objDefMapEl == 0 ) + error(qi->loc) << "cannot resolve qualification " << qi->data << endp; + ObjField *el = objDefMapEl->value; + + if ( forWriting && el->refActive ) + error(qi->loc) << "reference active, cannot write to object" << endp; + + bool lfForWriting = forWriting; + bool lfRevert = revert; + + /* If there is a pointer in the qualification, we need to compute + * forWriting and revert. */ + if ( lastPtrInQual >= 0 ) { + if ( qi.pos() <= lastPtrInQual ) { + /* If we are before or at the pointer we are strictly read + * only, regardless of the origin. */ + lfForWriting = false; + lfRevert = false; + } + else { + /* If we are past the pointer then we are always reverting + * because the object is global. Forwriting is as passed in. + * */ + lfRevert = true; + } + } + + UniqueType *qualUT = loadFieldInstr( pd, code, searchObjDef, + el, lfForWriting, lfRevert ); + + if ( qi->type == QualItem::Dot ) { + /* Cannot a reference. Iterator yes (access of the iterator not + * hte current) */ + if ( qualUT->typeId == TYPE_PTR ) + error(loc) << "dot cannot be used to access a pointer" << endp; + } + else if ( qi->type == QualItem::Arrow ) { + if ( qualUT->typeId == TYPE_PTR ) { + /* Always dereference references when used for qualification. If + * this is the last one then we must start with the reverse + * execution business. */ + if ( qi.pos() == lastPtrInQual && forWriting ) { + /* This is like a global load. */ + code.append( IN_PTR_DEREF_WV ); + } + else { + /* If reading or not yet the last in ref then we only need a + * reading deref. */ + code.append( IN_PTR_DEREF_R ); + } + + qualUT = pd->findUniqueType( TYPE_TREE, qualUT->langEl ); + } + else { + error(loc) << "arrow operator cannot be used to access this type" << endp; + } + } + + searchObjDef = objDefFromUT( pd, qualUT ); + } +} + +void LangVarRef::loadGlobalObj( ParseData *pd, CodeVect &code, + int lastPtrInQual, bool forWriting ) +{ + /* Start the search in the global object. */ + ObjectDef *rootObj = pd->globalObjectDef; + + if ( forWriting && lastPtrInQual < 0 ) { + /* If we are writing an no reference was found in the qualification + * then load the gloabl with a revert. */ + code.append( IN_LOAD_GLOBAL_WV ); + } + else + code.append( IN_LOAD_GLOBAL_R ); + + loadQualification( pd, code, rootObj, lastPtrInQual, forWriting, true ); +} + +void LangVarRef::loadLocalObj( ParseData *pd, CodeVect &code, + int lastPtrInQual, bool forWriting ) +{ + /* Start the search in the local frame. */ + ObjectDef *rootObj = pd->curLocalFrame; + loadQualification( pd, code, rootObj, lastPtrInQual, forWriting, false ); +} + +bool LangVarRef::isLocalRef( ParseData *pd ) +{ + if ( qual->length() > 0 ) { + if ( pd->curLocalFrame->objFieldMap->find( qual->data[0].data ) != 0 ) + return true; + } + else if ( pd->curLocalFrame->objFieldMap->find( name ) != 0 ) + return true; + else if ( pd->curLocalFrame->objMethodMap->find( name ) != 0 ) + return true; + + return false; +} + +void LangVarRef::loadObj( ParseData *pd, CodeVect &code, + int lastPtrInQual, bool forWriting ) +{ + if ( isLocalRef( pd ) ) + loadLocalObj( pd, code, lastPtrInQual, forWriting ); + else + loadGlobalObj( pd, code, lastPtrInQual, forWriting ); +} + +VarRefLookup LangVarRef::lookupQualification( ParseData *pd, ObjectDef *rootDef ) const +{ + int lastPtrInQual = -1; + ObjectDef *searchObjDef = rootDef; + int firstConstPart = -1; + + for ( QualItemVect::Iter qi = *qual; qi.lte(); qi++ ) { + /* Lookup the field int the current qualification. */ + ObjFieldMapEl *objDefMapEl = searchObjDef->objFieldMap->find( qi->data ); + if ( objDefMapEl == 0 ) + error(qi->loc) << "cannot resolve qualification " << qi->data << endp; + ObjField *el = objDefMapEl->value; + + /* Lookup the type of the field. */ + UniqueType *qualUT = el->typeRef->lookupType( pd ); + + /* If we are dealing with an iterator then dereference it. */ + if ( qualUT->typeId == TYPE_ITER ) + qualUT = el->typeRef->searchTypeRef->lookupType( pd ); + + /* Is it const? */ + if ( firstConstPart < 0 && el->isConst ) + firstConstPart = qi.pos(); + + /* Check for references. When loop is done we will have the last one + * present, if any. */ + if ( qualUT->typeId == TYPE_PTR ) + lastPtrInQual = qi.pos(); + + if ( qi->type == QualItem::Dot ) { + /* Cannot dot a reference. Iterator yes (access of the iterator + * not the current) */ + if ( qualUT->typeId == TYPE_PTR ) + error(loc) << "dot cannot be used to access a pointer" << endp; + } + else if ( qi->type == QualItem::Arrow ) { + if ( qualUT->typeId == TYPE_ITER ) + qualUT = el->typeRef->searchTypeRef->lookupType( pd ); + else if ( qualUT->typeId == TYPE_PTR ) + qualUT = pd->findUniqueType( TYPE_TREE, qualUT->langEl ); + } + + searchObjDef = objDefFromUT( pd, qualUT ); + } + + return VarRefLookup( lastPtrInQual, firstConstPart, searchObjDef ); +} + +VarRefLookup LangVarRef::lookupObj( ParseData *pd ) +{ + ObjectDef *rootDef; + if ( isLocalRef( pd ) ) + rootDef = pd->curLocalFrame; + else + rootDef = pd->globalObjectDef; + + return lookupQualification( pd, rootDef ); +} + +VarRefLookup LangVarRef::lookupField( ParseData *pd ) +{ + /* Lookup the object that the field is in. */ + VarRefLookup lookup = lookupObj( pd ); + + /* Lookup the field. */ + ObjFieldMapEl *objDefMapEl = lookup.inObject->objFieldMap->find( name ); + if ( objDefMapEl == 0 ) + error(loc) << "cannot find name " << name << " in object" << endp; + + ObjField *field = objDefMapEl->value; + + lookup.objField = field; + lookup.uniqueType = field->typeRef->lookupType( pd ); + + if ( field->typeRef->searchTypeRef != 0 ) + lookup.iterSearchUT = field->typeRef->searchTypeRef->lookupType( pd ); + + return lookup; +} + +VarRefLookup LangVarRef::lookupMethod( ParseData *pd ) +{ + /* Lookup the object that the field is in. */ + VarRefLookup lookup = lookupObj( pd ); + + /* Find the method. */ + assert( lookup.inObject->objMethodMap != 0 ); + ObjMethod *method = lookup.inObject->findMethod( name ); + if ( method == 0 ) + error(loc) << "cannot find " << name << "(...) in object" << endp; + + lookup.objMethod = method; + lookup.uniqueType = method->returnUT; + + return lookup; +} + +void LangVarRef::setFieldInstr( ParseData *pd, CodeVect &code, + ObjectDef *inObject, ObjField *el, UniqueType *exprUT, bool revert ) +{ + /* Ensure that the field is referenced. */ + inObject->referenceField( pd, el ); + + if ( revert ) + code.append( el->inSetWV ); + else + code.append( el->inSetWC ); + + /* Maybe write out an offset. */ + if ( el->useOffset ) + code.appendHalf( el->offset ); +} + +bool castAssignment( ParseData *pd, CodeVect &code, UniqueType *destUT, + UniqueType *destSearchUT, UniqueType *srcUT ) +{ + if ( destUT == srcUT ) + return true; + + /* Casting trees to any. */ + if ( destUT->typeId == TYPE_TREE && destUT->langEl == pd->anyKlangEl && + srcUT->typeId == TYPE_TREE ) + return true; + + /* Setting a reference from a tree. */ + if ( destUT->typeId == TYPE_REF && srcUT->typeId == TYPE_TREE && + destUT->langEl == srcUT->langEl ) + return true; + + /* Setting an iterator from a tree. */ + if ( destUT->typeId == TYPE_ITER && srcUT->typeId == TYPE_TREE && + destSearchUT->langEl == srcUT->langEl ) + return true; + + /* Assigning nil to a tree. */ + if ( destUT->typeId == TYPE_TREE && srcUT->typeId == TYPE_NIL ) + return true; + + /* Assigning nil to a pointer. */ + if ( destUT->typeId == TYPE_PTR && srcUT->typeId == TYPE_NIL ) + return true; + + return false; +} + +void LangVarRef::setField( ParseData *pd, CodeVect &code, + ObjectDef *inObject, UniqueType *exprUT, bool revert ) +{ + ObjFieldMapEl *objDefMapEl = inObject->objFieldMap->find( name ); + if ( objDefMapEl == 0 ) + error(loc) << "cannot find name " << name << " in object" << endp; + + ObjField *el = objDefMapEl->value; + setFieldInstr( pd, code, inObject, el, exprUT, revert ); +} + +void LangVarRef::setFieldIter( ParseData *pd, CodeVect &code, + ObjectDef *inObject, UniqueType *objUT, UniqueType *exprType, bool revert ) +{ + ObjFieldMapEl *objDefMapEl = inObject->objFieldMap->find( name ); + if ( objDefMapEl == 0 ) + error(loc) << "cannot find name " << name << " in object" << endp; + + ObjField *el = objDefMapEl->value; + code.append( objUT->iterDef->inSetCurWC ); + code.appendHalf( el->offset ); +} + +UniqueType *LangVarRef::evaluate( ParseData *pd, CodeVect &code, bool forWriting ) +{ + /* Lookup the loadObj. */ + VarRefLookup lookup = lookupField( pd ); + + /* Load the object, if any. */ + loadObj( pd, code, lookup.lastPtrInQual, forWriting ); + + /* Load the field. */ + UniqueType *ut = loadFieldInstr( pd, code, lookup.inObject, + lookup.objField, forWriting, false ); + + return ut; +} + +/* Return the field referenced. */ +ObjField *LangVarRef::evaluateRef( ParseData *pd, CodeVect &code ) +{ + /* Lookup the loadObj. */ + VarRefLookup lookup = lookupField( pd ); + + if ( lookup.inObject->type != ObjectDef::FrameType ) + error(loc) << "can only take references of local variables" << endl; + + if ( lookup.objField->refActive ) + error(loc) << "reference current active, cannot take another" << endl; + + /* Ensure that the field is referenced. */ + lookup.inObject->referenceField( pd, lookup.objField ); + + /* Note that we could have modified children. */ + lookup.objField->refActive = true; + + if ( lookup.objField->typeRef->iterDef != 0 ) { + code.append( lookup.objField->typeRef->iterDef->inRefFromCur ); + code.appendHalf( lookup.objField->offset ); + } + else if ( lookup.objField->typeRef->isRef ) { + code.append( IN_REF_FROM_REF ); + code.appendHalf( lookup.objField->offset ); + } + else { + code.append( IN_REF_FROM_LOCAL ); + code.appendHalf( lookup.objField->offset ); + } + + return lookup.objField; +} + +ObjField **LangVarRef::evaluateArgs( ParseData *pd, CodeVect &code, + VarRefLookup &lookup, ExprVect *args ) +{ + /* Parameter list is given only for user defined methods. Otherwise it + * will be null. */ + ParameterList *paramList = lookup.objMethod->paramList; + + /* Match the number of arguments. */ + int numArgs = args != 0 ? args->length() : 0; + if ( numArgs != lookup.objMethod->numParams ) + error(loc) << "wrong number of arguments" << endp; + + /* This is for storing the object fields used by references. */ + ObjField **paramRefs = new ObjField*[numArgs]; + memset( paramRefs, 0, sizeof(ObjField*) * numArgs ); + + /* Evaluate and push the args. */ + if ( args != 0 ) { + /* If we have the parameter list, initialize an iterator. */ + ParameterList::Iter p; + paramList != 0 && ( p = *paramList ); + + for ( ExprVect::Iter pe = *args; pe.lte(); pe++ ) { + /* Get the expression and the UT for the arg. */ + LangExpr *expression = *pe; + UniqueType *paramUT = lookup.objMethod->paramUTs[pe.pos()]; + + if ( paramUT->typeId == TYPE_REF ) { + /* Make sure we are dealing with a variable reference. */ + if ( expression->type != LangExpr::TermType ) + error(loc) << "not a term: argument must be a local variable" << endp; + if ( expression->term->type != LangTerm::VarRefType ) + error(loc) << "not a variable: argument must be a local variable" << endp; + + /* Lookup the field. */ + LangVarRef *varRef = expression->term->varRef; + + ObjField *refOf = varRef->evaluateRef( pd, code ); + paramRefs[pe.pos()] = refOf; + } + else { + UniqueType *exprUT = expression->evaluate( pd, code ); + + if ( !castAssignment( pd, code, paramUT, 0, exprUT ) ) + error(loc) << "arg " << pe.pos()+1 << " is of the wrong type" << endp; + } + + /* Advance the parameter list iterator if we have it. */ + paramList != 0 && p.increment(); + } + } + + return paramRefs; +} + +void LangVarRef::resetActiveRefs( ParseData *pd, VarRefLookup &lookup, ObjField **paramRefs ) +{ + /* Parameter list is given only for user defined methods. Otherwise it + * will be null. */ + for ( long p = 0; p < lookup.objMethod->numParams; p++ ) { + if ( paramRefs[p] != 0 ) + paramRefs[p]->refActive = false; + } +} + + +void LangVarRef::callOperation( ParseData *pd, CodeVect &code, VarRefLookup &lookup ) +{ + /* This is for writing if it is a non-const builtin. */ + bool forWriting = lookup.objMethod->func == 0 && + !lookup.objMethod->isConst; + + if ( lookup.objMethod->useCallObj ) { + /* Load the object, if any. */ + loadObj( pd, code, lookup.lastPtrInQual, forWriting ); + } + + /* Check if we need to revert the function. If it operates on a reference + * or if it is not local then we need to revert it. */ + bool revert = lookup.lastPtrInQual >= 0 || !isLocalRef(pd); + + /* The call instruction. */ + if ( revert ) + code.append( lookup.objMethod->opcodeWV ); + else + code.append( lookup.objMethod->opcodeWC ); + + if ( lookup.objMethod->useFuncId ) + code.appendHalf( lookup.objMethod->funcId ); +} + +UniqueType *LangVarRef::evaluateCall( ParseData *pd, CodeVect &code, ExprVect *args ) +{ + /* Evaluate the object. */ + VarRefLookup lookup = lookupMethod( pd ); + + /* Evaluate and push the arguments. */ + ObjField **paramRefs = evaluateArgs( pd, code, lookup, args ); + + /* Write the call opcode. */ + callOperation( pd, code, lookup ); + + resetActiveRefs( pd, lookup, paramRefs ); + delete[] paramRefs; + + /* Return the type to the expression. */ + return lookup.uniqueType; +} + +UniqueType *LangTerm::evaluateMatch( ParseData *pd, CodeVect &code ) +{ + /* Add the vars bound by the pattern into the local scope. */ + for ( PatternItemList::Iter item = *pattern->list; item.lte(); item++ ) { + if ( item->varRef != 0 ) + item->bindId = pattern->nextBindId++; + } + + UniqueType *ut = varRef->evaluate( pd, code ); + if ( ut->typeId != TYPE_TREE ) + error(varRef->loc) << "expected match against a tree type" << endp; + + /* Store the language element type in the pattern. This is needed by + * the pattern parser. */ + pattern->langEl = ut->langEl; + + code.append( IN_MATCH ); + code.appendHalf( pattern->patRepId ); + + for ( PatternItemList::Iter item = pattern->list->last(); item.gtb(); item-- ) { + if ( item->varRef != 0 ) { + /* Compute the unique type. */ + UniqueType *exprType = pd->findUniqueType( TYPE_TREE, item->factor->langEl ); + + /* Get the type of the variable being assigned to. */ + VarRefLookup lookup = item->varRef->lookupField( pd ); + + item->varRef->loadObj( pd, code, lookup.lastPtrInQual, false ); + item->varRef->setField( pd, code, lookup.inObject, exprType, false ); + } + } + + return ut; +} + +UniqueType *LangTerm::evaluateNew( ParseData *pd, CodeVect &code ) +{ + /* Evaluate the expression. */ + UniqueType *ut = expr->evaluate( pd, code ); + if ( ut->typeId != TYPE_TREE ) + error() << "new can only be applied to tree types" << endp; + + code.append( IN_TREE_NEW ); + return pd->findUniqueType( TYPE_PTR, ut->langEl ); +} + +void LangTerm::assignFieldArgs( ParseData *pd, CodeVect &code, UniqueType *replUT ) +{ + /* Now assign the field initializations. Note that we need to do this in + * reverse because the last expression evaluated is at the top of the + * stack. */ + if ( fieldInitArgs != 0 && fieldInitArgs->length() > 0 ) { + ObjectDef *objDef = objDefFromUT( pd, replUT ); + /* Note the reverse traversal. */ + for ( FieldInitVect::Iter pi = fieldInitArgs->last(); pi.gtb(); pi-- ) { + FieldInit *fieldInit = *pi; + ObjFieldMapEl *el = objDef->objFieldMap->find( fieldInit->name ); + if ( el == 0 ) { + error(fieldInit->loc) << "failed to find init name " << + fieldInit->name << " in object" << endp; + } + + /* Lookup the type of the field and compare it to the type of the + * expression. */ + ObjField *field = el->value; + UniqueType *fieldUT = field->typeRef->lookupType( pd ); + if ( !castAssignment( pd, code, fieldUT, 0, fieldInit->exprUT ) ) + error(fieldInit->loc) << "type mismatch in initialization" << endp; + + /* The set field instruction must leave the object on the top of + * the stack. */ + code.append( IN_SET_FIELD_LEAVE_WC ); + code.appendHalf( field->offset ); + } + } +} + +UniqueType *LangTerm::evaluateTreeConstruct( ParseData *pd, CodeVect &code ) +{ + /* Evaluate the initialization expressions. */ + if ( fieldInitArgs != 0 && fieldInitArgs->length() > 0 ) { + for ( FieldInitVect::Iter pi = *fieldInitArgs; pi.lte(); pi++ ) { + FieldInit *fieldInit = *pi; + fieldInit->exprUT = fieldInit->expr->evaluate( pd, code ); + } + } + + /* Assign bind ids to the variables in the replacement. */ + for ( ReplItemList::Iter item = *replacement->list; item.lte(); item++ ) { + if ( item->varRef != 0 ) + item->bindId = replacement->nextBindId++; + } + + /* Evaluate variable references. */ + for ( ReplItemList::Iter item = replacement->list->last(); item.gtb(); item-- ) { + if ( item->type == ReplItem::VarRefType ) { + UniqueType *ut = item->varRef->evaluate( pd, code ); + + if ( ut->typeId != TYPE_TREE ) + error() << "variables used in replacements must be trees" << endp; + + item->langEl = ut->langEl; + } + } + + /* Construct the tree using the tree information stored in the compiled + * code. */ + code.append( IN_CONSTRUCT ); + code.appendHalf( replacement->patRepId ); + + /* Lookup the type of the replacement and store it in the replacement + * object so that replacement parsing has a target. */ + UniqueType *replUT = typeRef->lookupType( pd ); + if ( replUT->typeId == TYPE_TREE ) + replacement->langEl = replUT->langEl; + else + error(loc) << "don't know how to construct this type" << endp; + + assignFieldArgs( pd, code, replUT ); + + return replUT; +} + + +UniqueType *LangTerm::evaluateTermConstruct( ParseData *pd, CodeVect &code ) +{ + /* Going to make this replacement directly. Take it out of the list of + * replacements so that we don't try to parse it. */ + pd->replList.remove( replacement ); + + /* Evaluate the initialization expressions. */ + if ( fieldInitArgs != 0 && fieldInitArgs->length() > 0 ) { + for ( FieldInitVect::Iter pi = *fieldInitArgs; pi.lte(); pi++ ) { + FieldInit *fieldInit = *pi; + fieldInit->exprUT = fieldInit->expr->evaluate( pd, code ); + } + } + + UniqueType *replUT = typeRef->lookupType( pd ); + + /* Evaluate the expression that we are constructing the term with and make + * the term. */ + ReplItem *replItem = replacement->list->head; + replItem->varRef->evaluate( pd, code ); + code.append( IN_CONSTRUCT_TERM ); + code.appendHalf( replUT->langEl->id ); + + assignFieldArgs( pd, code, replUT ); + return replUT; +} + +bool LangTerm::constructTermFromString( ParseData *pd ) +{ + UniqueType *replUT = typeRef->lookupType( pd ); + if ( replUT->typeId == TYPE_TREE && replUT->langEl->id < pd->firstNonTermId ) { + if ( replacement->list->length() == 1 ) { + ReplItem *replItem = replacement->list->head; + if ( replItem->type == ReplItem::VarRefType ) { + VarRefLookup lookup = replItem->varRef->lookupField( pd ); + if ( lookup.uniqueType == pd->uniqueTypeStr ) + return true; + } + } + } + return false; +} + +UniqueType *LangTerm::evaluateConstruct( ParseData *pd, CodeVect &code ) +{ + /* If the type is a token and the replacement contains just a string then + * construct a token using the text of the string. Otherwise do a normal + * tree construct. */ + if ( constructTermFromString( pd ) ) + return evaluateTermConstruct( pd, code ); + else + return evaluateTreeConstruct( pd, code ); +} + +UniqueType *LangTerm::evaluateParse( ParseData *pd, CodeVect &code, bool stop ) +{ + UniqueType *ut = typeRef->lookupType( pd ); + if ( ut->typeId != TYPE_TREE ) + error(loc) << "can only parse trees" << endl; + + /* Should be one arg, a stream. */ + if ( args == 0 || args->length() != 1 ) + error(loc) << "expecting one argument" << endp; + + UniqueType *argUT = args->data[0]->evaluate( pd, code ); + if ( argUT != pd->uniqueTypeStream ) + error(loc) << "single argument must be a stream" << endp; + + /* Allocate a parser id. This will cause a parser to be built for + * the type. */ + ut->langEl->parserId = pd->nextParserId++; + + code.append( IN_PARSE ); + code.appendHalf( ut->langEl->parserId ); + if ( stop ) + code.appendHalf( ut->langEl->id ); + else + code.appendHalf( 0 ); + return ut; +} + +UniqueType *LangTerm::evaluate( ParseData *pd, CodeVect &code ) +{ + switch ( type ) { + case VarRefType: + return varRef->evaluate( pd, code ); + case MethodCallType: + return varRef->evaluateCall( pd, code, args ); + case NilType: + code.append( IN_LOAD_NIL ); + return pd->uniqueTypeNil; + case TrueType: + code.append( IN_LOAD_TRUE ); + return pd->uniqueTypeBool; + case FalseType: + code.append( IN_LOAD_FALSE ); + return pd->uniqueTypeBool; + case MakeTokenType: + return evaluateMakeToken( pd, code ); + case MakeTreeType: + return evaluateMakeTree( pd, code ); + case NumberType: { + unsigned int n = atoi( data ); + code.append( IN_LOAD_INT ); + code.appendWord( n ); + return pd->uniqueTypeInt; + } + case StringType: { + String interp; + bool unused; + prepareLitString( interp, unused, data, InputLoc() ); + + /* Make sure we have this string. */ + StringMapEl *mapEl = 0; + if ( pd->literalStrings.insert( interp, &mapEl ) ) + mapEl->value = pd->literalStrings.length()-1; + + code.append( IN_LOAD_STR ); + code.appendWord( mapEl->value ); + return pd->uniqueTypeStr; + } + case MatchType: + return evaluateMatch( pd, code ); + case ParseType: + return evaluateParse( pd, code, false ); + case ParseStopType: + return evaluateParse( pd, code, true ); + case ConstructType: + return evaluateConstruct( pd, code ); + case NewType: + return evaluateNew( pd, code ); + case TypeIdType: { + /* Evaluate the expression. */ + UniqueType *ut = typeRef->lookupType( pd ); + if ( ut->typeId != TYPE_TREE ) + error() << "typeid can only be applied to tree types" << endp; + + code.append( IN_LOAD_INT ); + code.appendWord( ut->langEl->id ); + return pd->uniqueTypeInt; + } + case SearchType: { + /* Evaluate the expression. */ + UniqueType *ut = typeRef->lookupType( pd ); + if ( ut->typeId != TYPE_TREE ) + error(loc) << "can only search for tree types" << endp; + + UniqueType *treeUT = varRef->evaluate( pd, code ); + if ( treeUT->typeId != TYPE_TREE ) + error(loc) << "search can be applied only to tree types" << endl; + + code.append( IN_TREE_SEARCH ); + code.appendWord( ut->langEl->id ); + return ut; + }; + } + return 0; +} + +UniqueType *LangExpr::evaluate( ParseData *pd, CodeVect &code ) +{ + switch ( type ) { + case BinaryType: { + switch ( op ) { + case '+': { + UniqueType *lt = left->evaluate( pd, code ); + UniqueType *rt = right->evaluate( pd, code ); + + if ( lt == pd->uniqueTypeInt && rt == pd->uniqueTypeInt ) { + code.append( IN_ADD_INT ); + return pd->uniqueTypeInt; + } + + if ( lt == pd->uniqueTypeStr && rt == pd->uniqueTypeStr ) { + code.append( IN_CONCAT_STR ); + return pd->uniqueTypeStr; + } + + error(loc) << "do not have an addition operator for these types" << endp; + break; + } + case '-': { + UniqueType *lt = left->evaluate( pd, code ); + UniqueType *rt = right->evaluate( pd, code ); + + if ( lt == pd->uniqueTypeInt && rt == pd->uniqueTypeInt ) { + code.append( IN_SUB_INT ); + return pd->uniqueTypeInt; + } + + error(loc) << "do not have an addition operator for these types" << endp; + break; + } + case '*': { + UniqueType *lt = left->evaluate( pd, code ); + UniqueType *rt = right->evaluate( pd, code ); + + if ( lt == pd->uniqueTypeInt && rt == pd->uniqueTypeInt ) { + code.append( IN_MULT_INT ); + return pd->uniqueTypeInt; + } + + error(loc) << "do not have an multiplication " + "operator for these types" << endp; + break; + } + case OP_DoubleEql: { + UniqueType *lt = left->evaluate( pd, code ); + UniqueType *rt = right->evaluate( pd, code ); + + if ( lt != rt ) + error(loc) << "comparison of different types" << endp; + + code.append( IN_TST_EQL ); + return pd->uniqueTypeBool; + } + case OP_NotEql: { + UniqueType *lt = left->evaluate( pd, code ); + UniqueType *rt = right->evaluate( pd, code ); + + if ( lt != rt ) + error(loc) << "comparison of different types" << endp; + + code.append( IN_TST_NOT_EQL ); + return pd->uniqueTypeBool; + } + case '<': { + left->evaluate( pd, code ); + right->evaluate( pd, code ); + + code.append( IN_TST_LESS ); + return pd->uniqueTypeBool; + } + case '>': { + left->evaluate( pd, code ); + right->evaluate( pd, code ); + + code.append( IN_TST_GRTR ); + return pd->uniqueTypeBool; + } + case OP_LessEql: { + left->evaluate( pd, code ); + right->evaluate( pd, code ); + + code.append( IN_TST_LESS_EQL ); + return pd->uniqueTypeBool; + } + case OP_GrtrEql: { + left->evaluate( pd, code ); + right->evaluate( pd, code ); + + code.append( IN_TST_GRTR_EQL ); + return pd->uniqueTypeBool; + } + case OP_LogicalAnd: { + /* Evaluate the left and duplicate it. */ + left->evaluate( pd, code ); + code.append( IN_DUP_TOP ); + + /* Jump over the right if false, leaving the original left + * result on the top of the stack. We don't know the + * distance yet so record the position of the jump. */ + long jump = code.length(); + code.append( IN_JMP_FALSE ); + code.appendHalf( 0 ); + + /* Evauluate the right, add the test. Store it separately. */ + right->evaluate( pd, code ); + code.append( IN_TST_LOGICAL_AND ); + + /* Set the distance of the jump. */ + long distance = code.length() - jump - 3; + code.setHalf( jump+1, distance ); + + return pd->uniqueTypeInt; + } + case OP_LogicalOr: { + /* Evaluate the left and duplicate it. */ + left->evaluate( pd, code ); + code.append( IN_DUP_TOP ); + + /* Jump over the right if true, leaving the original left + * result on the top of the stack. We don't know the + * distance yet so record the position of the jump. */ + long jump = code.length(); + code.append( IN_JMP_TRUE ); + code.appendHalf( 0 ); + + /* Evauluate the right, add the test. */ + right->evaluate( pd, code ); + code.append( IN_TST_LOGICAL_OR ); + + /* Set the distance of the jump. */ + long distance = code.length() - jump - 3; + code.setHalf( jump+1, distance ); + + return pd->uniqueTypeInt; + } + } + + assert(false); + return 0; + } + case UnaryType: { + switch ( op ) { + case '!': { + /* Evaluate the left and duplicate it. */ + right->evaluate( pd, code ); + code.append( IN_NOT ); + return pd->uniqueTypeBool; + } + case OP_Deref: { + UniqueType *ut = right->evaluate( pd, code ); + if ( ut->typeId != TYPE_PTR ) + error(loc) << "can only dereference pointers" << endl; + + code.append( IN_PTR_DEREF_R ); + ut = pd->findUniqueType( TYPE_TREE, ut->langEl ); + return ut; + } + default: + assert(false); + } + return 0; + } + case TermType: { + return term->evaluate( pd, code ); + } + } + return 0; +} + +void LangVarRef::assignValue( ParseData *pd, CodeVect &code, + UniqueType *exprUT ) +{ + /* Lookup the left hand side of the assignment. */ + VarRefLookup lookup = lookupField( pd ); + + if ( lookup.objField->refActive ) + error(loc) << "reference active, cannot write to object" << endp; + + if ( lookup.firstConstPart >= 0 ) { + error(loc) << "left hand side qualification \"" << + qual->data[lookup.firstConstPart].data << "\" is const" << endp; + } + + if ( lookup.objField->isConst ) + error(loc) << "field \"" << name << "\" is const" << endp; + + /* Check the types of the assignment and possibly cast. */ + UniqueType *objUT = lookup.objField->typeRef->lookupType( pd ); + assert( lookup.uniqueType == lookup.objField->typeRef->lookupType( pd ) ); + if ( !castAssignment( pd, code, objUT, lookup.iterSearchUT, exprUT ) ) + error(loc) << "type mismatch in assignment" << endp; + + /* Decide if we need to revert the assignment. */ + bool revert = lookup.lastPtrInQual >= 0 || !isLocalRef(pd); + + /* Load the object and generate the field setting code. */ + loadObj( pd, code, lookup.lastPtrInQual, true ); + + if ( lookup.uniqueType->typeId == TYPE_ITER ) + setFieldIter( pd, code, lookup.inObject, lookup.uniqueType, exprUT, false ); + else + setField( pd, code, lookup.inObject, exprUT, revert ); +} + +UniqueType *LangTerm::evaluateMakeToken( ParseData *pd, CodeVect &code ) +{ +// if ( pd->compileContext != ParseData::CompileTranslation ) +// error(loc) << "make_token can be used only in a translation block" << endp; + + /* Match the number of arguments. */ + int numArgs = args != 0 ? args->length() : 0; + if ( numArgs < 2 ) + error(loc) << "need at least two arguments" << endp; + + for ( ExprVect::Iter pe = *args; pe.lte(); pe++ ) { + /* Evaluate. */ + UniqueType *exprUT = (*pe)->evaluate( pd, code ); + + if ( pe.pos() == 0 && exprUT != pd->uniqueTypeInt ) + error(loc) << "first arg, id, must be an int" << endp; + + if ( pe.pos() == 1 && exprUT != pd->uniqueTypeStr ) + error(loc) << "second arg, length, must be a string" << endp; + } + + /* The token is now created, send it. */ + code.append( IN_MAKE_TOKEN ); + code.append( args->length() ); + + return pd->uniqueTypeAny; +} + +UniqueType *LangTerm::evaluateMakeTree( ParseData *pd, CodeVect &code ) +{ + if ( pd->compileContext != ParseData::CompileTranslation ) + error(loc) << "make_tree can be used only in a translation block" << endp; + + /* Match the number of arguments. */ + int numArgs = args != 0 ? args->length() : 0; + if ( numArgs < 1 ) + error(loc) << "need at least one argument" << endp; + + for ( ExprVect::Iter pe = *args; pe.lte(); pe++ ) { + /* Evaluate. */ + UniqueType *exprUT = (*pe)->evaluate( pd, code ); + + if ( pe.pos() == 0 && exprUT != pd->uniqueTypeInt ) + error(loc) << "first arg, nonterm id, must be an int" << endp; + } + + /* The token is now created, send it. */ + code.append( IN_MAKE_TREE ); + code.append( args->length() ); + + return pd->uniqueTypeAny; +} + +void LangStmt::compileForIterBody( ParseData *pd, CodeVect &code, + ObjField *iterObjField, LangVarRef *iterVarRef, + UniqueType *iterUT ) +{ + /* Remember the top of the loop. */ + long top = code.length(); + + /* Advance */ + code.append( iterUT->iterDef->inAdvance ); + code.appendHalf( iterObjField->offset ); + + /* Test: jump past the while block if false. Note that we don't have the + * distance yet. */ + long jumpFalse = code.length(); + code.append( IN_JMP_FALSE ); + code.appendHalf( 0 ); + + /* + * Set up the loop cleanup code. + */ + + /* Set up the current loop cleanup. */ + CodeVect loopCleanup; + if ( pd->loopCleanup != 0 ) + loopCleanup.setAs( *pd->loopCleanup ); + + /* Add the cleanup for the current loop. */ + loopCleanup.append( iterUT->iterDef->inDestroy ); + loopCleanup.appendHalf( iterObjField->offset ); + + /* Push the loop cleanup. */ + CodeVect *oldLoopCleanup = pd->loopCleanup; + pd->loopCleanup = &loopCleanup; + + /* Compile the contents. */ + for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) + stmt->compile( pd, code ); + + pd->loopCleanup = oldLoopCleanup; + + /* Jump back to the top to retest. */ + long retestDist = code.length() - top + 3; + code.append( IN_JMP ); + code.appendHalf( -retestDist ); + + /* Set the jump false distance. */ + long falseDist = code.length() - jumpFalse - 3; + code.setHalf( jumpFalse+1, falseDist ); + + /* Compute the jump distance for the break jumps. */ + for ( LongVect::Iter brk = pd->breakJumps; brk.lte(); brk++ ) { + long distance = code.length() - *brk - 3; + code.setHalf( *brk+1, distance ); + } + pd->breakJumps.empty(); + + /* Destroy the iterator. */ + code.append( iterUT->iterDef->inDestroy ); + code.appendHalf( iterObjField->offset ); + + unscopeIterVariable( pd, iterObjField ); +} + +ObjField *LangStmt::createIterVariable( ParseData *pd, TypeRef *iterTypeRef ) +{ + /* Check for redeclaration. */ + if ( pd->curLocalFrame->objFieldMap->find( name ) != 0 ) + error(loc) << "variable " << name << " redeclared" << endp; + + /* Create the field and insert it into the field map. */ + ObjField *iterObjField = new ObjField( loc, iterTypeRef, name ); + pd->curLocalFrame->objFieldMap->insert( name, iterObjField ); + pd->curLocalFrame->initField( pd, iterObjField ); + return iterObjField; +} + +void LangStmt::unscopeIterVariable( ParseData *pd, ObjField *iterObjField ) +{ + pd->curLocalFrame->objFieldMap->detach( name ); +} + +LangTerm *LangStmt::chooseDefaultIter( ParseData *pd ) +{ + /* Lookup the lang term and decide what iterator to use based + * on its type. */ + VarRefLookup lookup = langTerm->varRef->lookupField( pd ); + + if ( lookup.inObject->type != ObjectDef::FrameType ) + error(loc) << "root of iteration must be a local" << endp; + + LangVarRef *callVarRef = 0; + if ( lookup.uniqueType->typeId == TYPE_TREE || + lookup.uniqueType->typeId == TYPE_REF || + lookup.uniqueType->typeId == TYPE_ITER || + lookup.uniqueType->typeId == TYPE_PTR ) + { + /* The iterator name. */ + callVarRef = new LangVarRef( loc, new QualItemVect, "triter" ); + } + else { + error(loc) << "there is no default iterator for a " + "root of that type" << endp; + } + + /* The parameters. */ + ExprVect *callExprVect = new ExprVect; + LangExpr *callExpr = new LangExpr( new LangTerm( + LangTerm::VarRefType, langTerm->varRef ) ); + callExprVect->append( callExpr ); + + LangTerm *callLangTerm = new LangTerm( callVarRef, callExprVect ); + + return callLangTerm; +} + +void LangStmt::compileForIter( ParseData *pd, CodeVect &code ) +{ + if ( langTerm->type != LangTerm::MethodCallType ) + langTerm = chooseDefaultIter( pd ); + + /* The type we are searching for. */ + UniqueType *searchUT = typeRef->lookupType( pd ); + + /* + * Declare the iterator variable. + */ + VarRefLookup lookup = langTerm->varRef->lookupMethod( pd ); + if ( lookup.objMethod->iterDef == 0 ) { + error(loc) << "attempt to iterate using something " + "that is not an iterator" << endp; + } + + /* Type ref and object field for the iterator. */ + TypeRef *iterTypeRef = new TypeRef( loc, lookup.objMethod->iterDef, typeRef ); + ObjField *iterObjField = createIterVariable( pd, iterTypeRef ); + + /* + * Create the iterator from the local var. + */ + + LangVarRef *iterVarRef = new LangVarRef( loc, new QualItemVect, name ); + UniqueType *iterUT = iterTypeRef->lookupType( pd ); + + /* Evaluate and push the arguments. */ + ObjField **paramRefs = langTerm->varRef->evaluateArgs( + pd, code, lookup, langTerm->args ); + + code.append( iterUT->iterDef->inCreate ); + code.appendHalf( iterObjField->offset ); + if ( lookup.objMethod->func != 0 ) + code.appendHalf( lookup.objMethod->func->funcId ); + + if ( iterUT->iterDef->useSearchUT ) { + if ( searchUT->typeId == TYPE_PTR ) + code.appendHalf( pd->uniqueTypePtr->langEl->id ); + else + code.appendHalf( searchUT->langEl->id ); + } + + compileForIterBody( pd, code, iterObjField, iterVarRef, iterUT ); + + langTerm->varRef->resetActiveRefs( pd, lookup, paramRefs ); + delete[] paramRefs; +} + +void LangStmt::compileWhile( ParseData *pd, CodeVect &code ) +{ + /* Generate code for the while test. Remember the top. */ + long top = code.length(); + expr->evaluate( pd, code ); + + /* Jump past the while block if false. Note that we don't have the + * distance yet. */ + long jumpFalse = code.length(); + code.append( IN_JMP_FALSE ); + code.appendHalf( 0 ); + + /* Compute the while block. */ + for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) + stmt->compile( pd, code ); + + /* Jump back to the top to retest. */ + long retestDist = code.length() - top + 3; + code.append( IN_JMP ); + code.appendHalf( -retestDist ); + + /* Set the jump false distance. */ + long falseDist = code.length() - jumpFalse - 3; + code.setHalf( jumpFalse+1, falseDist ); + + /* Compute the jump distance for the break jumps. */ + for ( LongVect::Iter brk = pd->breakJumps; brk.lte(); brk++ ) { + long distance = code.length() - *brk - 3; + code.setHalf( *brk+1, distance ); + } + pd->breakJumps.empty(); +} + +void LangStmt::compile( ParseData *pd, CodeVect &code ) +{ + switch ( type ) { + case PrintType: + case PrintXMLType: { + UniqueType **types = new UniqueType*[exprPtrVect->length()]; + + /* Push the args backwards. */ + for ( ExprVect::Iter pex = exprPtrVect->last(); pex.gtb(); pex-- ) + types[pex.pos()] = (*pex)->evaluate( pd, code ); + + /* Run the printing forwards. */ + if ( type == PrintType ) { + for ( ExprVect::Iter pex = *exprPtrVect; pex.lte(); pex++ ) + code.append( IN_PRINT ); + } + else { + for ( ExprVect::Iter pex = *exprPtrVect; pex.lte(); pex++ ) + code.append( IN_PRINT_XML ); + } + + delete[] types; + + break; + } + case ExprType: { + /* Evaluate the exrepssion, then pop it immediately. */ + expr->evaluate( pd, code ); + code.append( IN_POP ); + break; + } + case IfType: { + long jumpFalse, jumpPastElse, distance; + + /* Evaluate the test. */ + expr->evaluate( pd, code ); + + /* Jump past the if block if false. We don't know the distance + * yet so store the location of the jump. */ + jumpFalse = code.length(); + code.append( IN_JMP_FALSE ); + code.appendHalf( 0 ); + + /* Compile the if true branch. */ + for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) + stmt->compile( pd, code ); + + if ( elsePart != 0 ) { + /* Jump past the else code for the if true branch. */ + jumpPastElse = code.length(); + code.append( IN_JMP ); + code.appendHalf( 0 ); + } + + /* Set the distance for the jump false case. */ + distance = code.length() - jumpFalse - 3; + code.setHalf( jumpFalse+1, distance ); + + if ( elsePart != 0 ) { + /* Compile the else branch. */ + for ( StmtList::Iter stmt = *elsePart; stmt.lte(); stmt++ ) + stmt->compile( pd, code ); + + /* Set the distance for jump over the else part. */ + distance = code.length() - jumpPastElse - 3; + code.setHalf( jumpPastElse+1, distance ); + } + + break; + } + case RejectType: { + code.append( IN_REJECT ); + break; + } + case WhileType: { + compileWhile( pd, code ); + break; + } + case AssignType: { + /* Evaluate the exrepssion. */ + UniqueType *exprUT = expr->evaluate( pd, code ); + + /* Do the assignment. */ + varRef->assignValue( pd, code, exprUT ); + break; + } + case ForIterType: { + compileForIter( pd, code ); + break; + } + case ReturnType: { + /* Evaluate the exrepssion. */ + UniqueType *exprUT = expr->evaluate( pd, code ); + + UniqueType *resUT = pd->curFunction->typeRef->lookupType( pd ); + if ( !castAssignment( pd, code, resUT, 0, exprUT ) ) + error(loc) << "return value wrong type" << endp; + + code.append( IN_SAVE_RET ); + + /* The loop cleanup code. */ + if ( pd->loopCleanup != 0 ) + code.append( *pd->loopCleanup ); + + /* Jump to the return label. The distnacnce will be filled in + * later. */ + pd->returnJumps.append( code.length() ); + code.append( IN_JMP ); + code.appendHalf( 0 ); + break; + } + case BreakType: { + pd->breakJumps.append( code.length() ); + code.append( IN_JMP ); + code.appendHalf( 0 ); + break; + } + case YieldType: { + /* take a reference and yield it. Immediately reset the referece. */ + ObjField *objField = varRef->evaluateRef( pd, code ); + objField->refActive = false; + code.append( IN_YIELD ); + break; + } + } +} + +void CodeBlock::compile( ParseData *pd, CodeVect &code ) +{ + for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) + stmt->compile( pd, code ); +} + +void ParseData::addProdRedObjectVar( ObjectDef *localFrame, KlangEl *nonTerm ) +{ + UniqueType *prodNameUT = findUniqueType( TYPE_TREE, nonTerm ); + TypeRef *typeRef = new TypeRef( InputLoc(), prodNameUT ); + ObjField *el = new ObjField( InputLoc(), typeRef, "lhs" ); + + /* Is the only item pushed to the stack just before a reduction action is + * executed. We rely on a zero offset. */ + el->beenReferenced = true; + el->beenInitialized = true; + el->isLhsEl = true; + el->offset = 0; + + initLocalInstructions( el ); + + localFrame->objFieldMap->insert( el->name, el ); +} + +void ParseData::addProdRHSVars( ObjectDef *localFrame, ProdElList *prodElList ) +{ + long position = 1; + for ( ProdElList::Iter rhsEl = *prodElList; rhsEl.lte(); rhsEl++, position++ ) { + if ( rhsEl->type == PdaFactor::ReferenceType ) { + TypeRef *typeRef = new TypeRef( rhsEl->loc, rhsEl->nspaceQual, rhsEl->refName ); + + /* Use an offset of zero. For frame objects we compute the offset on + * demand. */ + String name( 8, "r%d", position ); + ObjField *el = new ObjField( InputLoc(), typeRef, name ); + rhsEl->objField = el; + + /* Right hand side elements are constant. */ + el->isConst = true; + el->isRhsEl = true; + + /* Only ever fetch for reading since they are constant. */ + el->inGetR = IN_GET_LOCAL_R; + + localFrame->objFieldMap->insert( el->name, el ); + } + } +} + +void ParseData::addProdRHSLoads( Definition *prod, long codeInsertPos ) +{ + CodeVect code; + long position = 0; + for ( ProdElList::Iter rhsEl = *prod->prodElList; rhsEl.lte(); rhsEl++, position++ ) { + if ( rhsEl->type == PdaFactor::ReferenceType ) { + if ( rhsEl->objField->beenReferenced ) { + code.append ( IN_INIT_RHS_EL ); + code.appendHalf( position ); + code.appendHalf( rhsEl->objField->offset ); + } + } + } + prod->redBlock->code.insert( codeInsertPos, code ); +} + +void ParseData::addMatchLength( ObjectDef *frame, KlangEl *lel ) +{ + /* Make the type ref. */ + TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeInt ); + + /* Create the field and insert it into the map. */ + ObjField *el = new ObjField( InputLoc(), typeRef, "match_length" ); + el->beenReferenced = true; + el->beenInitialized = true; + el->isConst = true; + el->useOffset = false; + el->inGetR = IN_GET_MATCH_LENGTH_R; + frame->objFieldMap->insert( el->name, el ); +} + +void ParseData::addMatchText( ObjectDef *frame, KlangEl *lel ) +{ + /* Make the type ref. */ + TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeStr ); + + /* Create the field and insert it into the map. */ + ObjField *el = new ObjField( InputLoc(), typeRef, "match_text" ); + el->beenReferenced = true; + el->beenInitialized = true; + el->isConst = true; + el->useOffset = false; + el->inGetR = IN_GET_MATCH_TEXT_R; + frame->objFieldMap->insert( el->name, el ); +} + +void ParseData::initFieldInstructions( ObjField *el ) +{ + el->inGetR = IN_GET_FIELD_R; + el->inGetWC = IN_GET_FIELD_WC; + el->inGetWV = IN_GET_FIELD_WV; + el->inSetWC = IN_SET_FIELD_WC; + el->inSetWV = IN_SET_FIELD_WV; +} + +void ParseData::initLocalInstructions( ObjField *el ) +{ + el->inGetR = IN_GET_LOCAL_R; + el->inGetWC = IN_GET_LOCAL_WC; + el->inSetWC = IN_SET_LOCAL_WC; +} + +void ParseData::initLocalRefInstructions( ObjField *el ) +{ + el->inGetR = IN_GET_LOCAL_REF_R; + el->inGetWC = IN_GET_LOCAL_REF_WC; + el->inSetWC = IN_SET_LOCAL_REF_WC; +} + +void ParseData::initIntObject( ) +{ + ObjFieldMap *fieldMap = new ObjFieldMap; + ObjMethodMap *methodMap = new ObjMethodMap; + intObj = new ObjectDef( ObjectDef::BuiltinType, "int", + fieldMap, methodMap, nextObjectId++ ); + intKlangEl->objectDef = intObj; + + initFunction( uniqueTypeStr, intObj, "to_string", IN_INT_TO_STR, IN_INT_TO_STR, true ); +} + +/* Add a constant length field to the object. + * Opcode supplied by the caller. */ +void ParseData::addLengthField( ObjectDef *objDef, Code getLength ) +{ + /* Create the "length" field. */ + TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeInt ); + ObjField *el = new ObjField( InputLoc(), typeRef, "length" ); + el->beenReferenced = true; + el->beenInitialized = true; + el->isConst = true; + el->useOffset = false; + el->inGetR = getLength; + + objDef->objFieldMap->insert( el->name, el ); +} + +void ParseData::initStrObject( ) +{ + ObjFieldMap *fieldMap = new ObjFieldMap; + ObjMethodMap *methodMap = new ObjMethodMap; + strObj = new ObjectDef( ObjectDef::BuiltinType, "str", + fieldMap, methodMap, nextObjectId++ ); + strKlangEl->objectDef = strObj; + + initFunction( uniqueTypeInt, strObj, "atoi", IN_STR_ATOI, IN_STR_ATOI, true ); + initFunction( uniqueTypeInt, strObj, "uord8", IN_STR_UORD8, IN_STR_UORD8, true ); + initFunction( uniqueTypeInt, strObj, "sord8", IN_STR_SORD8, IN_STR_SORD8, true ); + initFunction( uniqueTypeInt, strObj, "uord16", IN_STR_UORD16, IN_STR_UORD16, true ); + initFunction( uniqueTypeInt, strObj, "sord16", IN_STR_SORD16, IN_STR_SORD16, true ); + initFunction( uniqueTypeInt, strObj, "uord32", IN_STR_UORD32, IN_STR_UORD32, true ); + initFunction( uniqueTypeInt, strObj, "sord32", IN_STR_SORD32, IN_STR_SORD32, true ); + addLengthField( strObj, IN_STR_LENGTH ); +} + +void ParseData::initStreamObject( ) +{ + ObjFieldMap *fieldMap = new ObjFieldMap; + ObjMethodMap *methodMap = new ObjMethodMap; + streamObj = new ObjectDef( ObjectDef::BuiltinType, "stream", + fieldMap, methodMap, nextObjectId++ ); + streamKlangEl->objectDef = streamObj; + +// initFunction( uniqueTypeInt, strObj, "atoi", IN_STR_ATOI, IN_STR_ATOI, true ); +// initFunction( uniqueTypeInt, strObj, "uord8", IN_STR_UORD8, IN_STR_UORD8, true ); +// initFunction( uniqueTypeInt, strObj, "sord8", IN_STR_SORD8, IN_STR_SORD8, true ); +// initFunction( uniqueTypeInt, strObj, "uord16", IN_STR_UORD16, IN_STR_UORD16, true ); +// initFunction( uniqueTypeInt, strObj, "sord16", IN_STR_SORD16, IN_STR_SORD16, true ); +// initFunction( uniqueTypeInt, strObj, "uord32", IN_STR_UORD32, IN_STR_UORD32, true ); +// initFunction( uniqueTypeInt, strObj, "sord32", IN_STR_SORD32, IN_STR_SORD32, true ); +// addLengthField( strObj, IN_STR_LENGTH ); +} + +ObjField *ParseData::makeDataEl() +{ + /* Create the "data" field. */ + TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeStr ); + ObjField *el = new ObjField( InputLoc(), typeRef, "data" ); + + /* Setting beenReferenced to true prevents us from assigning instructions + * and an offset to the field. */ + + el->beenReferenced = true; + el->beenInitialized = true; + el->useOffset = false; + el->inGetR = IN_GET_TOKEN_DATA_R; + el->inSetWC = IN_SET_TOKEN_DATA_WC; + el->inSetWV = IN_SET_TOKEN_DATA_WV; + return el; +} + +ObjField *ParseData::makePosEl() +{ + /* Create the "data" field. */ + TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeInt ); + ObjField *el = new ObjField( InputLoc(), typeRef, "pos" ); + + /* Setting beenReferenced to true prevents us from assigning instructions + * and an offset to the field. */ + + el->isConst = true; + el->beenReferenced = true; + el->beenInitialized = true; + el->useOffset = false; + el->inGetR = IN_GET_TOKEN_POS_R; + return el; +} + +void ParseData::initTokenObjects( ) +{ + /* Make a default object Definition. */ + ObjFieldMap *fieldMap = new ObjFieldMap; + ObjMethodMap *methodMap = new ObjMethodMap; + tokenObj = new ObjectDef( ObjectDef::BuiltinType, "token", fieldMap, + methodMap, nextObjectId++ ); + + ObjField *dataEl = makeDataEl(); + tokenObj->objFieldMap->insert( dataEl->name, dataEl ); + + ObjField *posEl = makePosEl(); + tokenObj->objFieldMap->insert( posEl->name, posEl ); + + + /* Give all user terminals the token object type. */ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + if ( lel->isUserTerm ) { + if ( lel->objectDef == 0 ) + lel->objectDef = tokenObj; + else { + /* Create the "data" field. */ + ObjField *dataEl = makeDataEl(); + lel->objectDef->objFieldMap->insert( dataEl->name, dataEl ); + + /* Create the "pos" field. */ + ObjField *posEl = makePosEl(); + lel->objectDef->objFieldMap->insert( posEl->name, posEl ); + } + } + } +} + +void ParseData::findLocalTrees( CharSet &trees ) +{ + /* We exlcude "lhs" from being downrefed because we need to use if after + * the frame is is cleaned and so it must survive. */ + for ( ObjFieldMap::Iter of = *curLocalFrame->objFieldMap; of.lte(); of++ ) { + ObjField *el = of->value; + if ( !el->isLhsEl && el->beenReferenced ) { + UniqueType *ut = el->typeRef->lookupType( this ); + if ( ut->typeId == TYPE_TREE || ut->typeId == TYPE_PTR ) + trees.insert( el->offset ); + } + } +} + +void ParseData::compileReductionCode( Definition *prod ) +{ + CodeBlock *block = prod->redBlock; + + /* Init the compilation context. */ + compileContext = CompileReduction; + curLocalFrame = block->localFrame; + block->frameId = nextFrameId++; + + /* Add the alloc frame opcode. We don't have the right + * frame size yet. We will fill it in later. */ + block->code.append( IN_INIT_LOCALS ); + block->code.appendHalf( 0 ); + long afterAllocFrame = block->code.length(); + + /* Compile the reduce block. */ + block->compile( this, block->code ); + + /* We have the frame size now. Set in the alloc frame instruction. */ + long frameSize = curLocalFrame->size(); + block->code.setHalf( 1, frameSize ); + + addProdRHSLoads( prod, afterAllocFrame ); + + block->code.append( IN_POP_LOCALS ); + block->code.appendHalf( block->frameId ); + block->code.appendHalf( frameSize ); + + block->code.append( IN_STOP ); + + /* Now that compilation is done variables are referenced. Make the local + * trees descriptor. */ + findLocalTrees( block->trees ); +} + +void ParseData::compileTranslateBlock( KlangEl *langEl ) +{ + CodeBlock *block = langEl->transBlock; + + /* Set up compilation context. */ + compileContext = CompileTranslation; + curLocalFrame = block->localFrame; + block->frameId = nextFrameId++; + + /* References to the reduce item. */ + addMatchLength( curLocalFrame, langEl ); + addMatchText( curLocalFrame, langEl ); + initFunction( uniqueTypeStr, curLocalFrame, "pull", + IN_STREAM_PULL, IN_STREAM_PULL, uniqueTypeStream, uniqueTypeInt, true ); + initFunction( uniqueTypeInt, curLocalFrame, "push", + IN_STREAM_PUSH, IN_STREAM_PUSH, uniqueTypeStream, uniqueTypeAny, true ); + + initFunction( uniqueTypeInt, curLocalFrame, "send", + IN_SEND, IN_SEND, uniqueTypeAny, true ); + initFunction( uniqueTypeInt, curLocalFrame, "send_ignore", + IN_IGNORE, IN_IGNORE, uniqueTypeAny, true ); + + /* Add the alloc frame opcode. We don't have the right + * frame size yet. We will fill it in later. */ + block->code.append( IN_INIT_LOCALS ); + block->code.appendHalf( 0 ); + + /* Set the local frame and compile the reduce block. */ + block->compile( this, block->code ); + + /* We have the frame size now. Set in the alloc frame instruction. */ + long frameSize = curLocalFrame->size(); + block->code.setHalf( 1, frameSize ); + + block->code.append( IN_POP_LOCALS ); + block->code.appendHalf( block->frameId ); + block->code.appendHalf( frameSize ); + + block->code.append( IN_STOP ); + + /* Now that compilation is done variables are referenced. Make the local + * trees descriptor. */ + findLocalTrees( block->trees ); +} + +void ParseData::compilePreEof( TokenRegion *region ) +{ + CodeBlock *block = region->preEofBlock; + + /* Set up compilation context. */ + compileContext = CompileTranslation; + curLocalFrame = region->preEofBlock->localFrame; + block->frameId = nextFrameId++; + + /* References to the reduce item. */ +// addMatchLength( curLocalFrame, langEl ); +// addMatchText( curLocalFrame, langEl ); +// initFunction( uniqueTypeStr, curLocalFrame, "pull", +// IN_STREAM_PULL, IN_STREAM_PULL, uniqueTypeStream, uniqueTypeInt, true ); + + initFunction( uniqueTypeInt, curLocalFrame, "send", + IN_SEND, IN_SEND, uniqueTypeAny, true ); + initFunction( uniqueTypeInt, curLocalFrame, "send_ignore", + IN_IGNORE, IN_IGNORE, uniqueTypeAny, true ); + + /* Add the alloc frame opcode. We don't have the right + * frame size yet. We will fill it in later. */ + block->code.append( IN_INIT_LOCALS ); + block->code.appendHalf( 0 ); + + /* Set the local frame and compile the reduce block. */ + block->compile( this, block->code ); + + /* We have the frame size now. Set in the alloc frame instruction. */ + long frameSize = curLocalFrame->size(); + block->code.setHalf( 1, frameSize ); + + block->code.append( IN_POP_LOCALS ); + block->code.appendHalf( block->frameId ); + block->code.appendHalf( frameSize ); + + block->code.append( IN_STOP ); + + /* Now that compilation is done variables are referenced. Make the local + * trees descriptor. */ + findLocalTrees( block->trees ); +} + +void ParseData::compileRootBlock( ) +{ + CodeBlock *block = rootCodeBlock; + + /* Set up the compile context. No locals are needed for the root code + * block, but we need an empty local frame for the compile. */ + compileContext = CompileRoot; + curLocalFrame = rootLocalFrame; + block->frameId = nextFrameId++; + + /* Add the alloc frame opcode. We don't have the right + * frame size yet. We will fill it in later. */ + block->code.append( IN_INIT_LOCALS ); + block->code.appendHalf( 0 ); + + block->compile( this, block->code ); + + /* We have the frame size now. Store it in frame init. */ + long frameSize = curLocalFrame->size(); + block->code.setHalf( 1, frameSize ); + + block->code.append( IN_POP_LOCALS ); + block->code.appendHalf( block->frameId ); + block->code.appendHalf( frameSize ); + + block->code.append( IN_STOP ); + + /* Make the local trees descriptor. */ + findLocalTrees( block->trees ); +} + +void ParseData::initAllLanguageObjects() +{ + /* Init all user object fields (need consistent size). */ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + ObjectDef *obj = lel->objectDef; + if ( obj != 0 ) { + /* Init all fields of the object. */ + for ( ObjFieldMap::Iter f = *obj->objFieldMap; f.lte(); f++ ) + obj->initField( this, f->value ); + } + } + + /* Init all fields of the global object. */ + for ( ObjFieldMap::Iter f = *globalObjectDef->objFieldMap; f.lte(); f++ ) + globalObjectDef->initField( this, f->value ); +} + +void ParseData::initMapFunctions( GenericType *gen ) +{ + addLengthField( gen->objDef, IN_MAP_LENGTH ); + initFunction( gen->utArg, gen->objDef, "find", + IN_MAP_FIND, IN_MAP_FIND, gen->keyUT, true ); + initFunction( uniqueTypeInt, gen->objDef, "insert", + IN_MAP_INSERT_WC, IN_MAP_INSERT_WV, gen->keyUT, gen->utArg, false ); + initFunction( uniqueTypeInt, gen->objDef, "store", + IN_MAP_STORE_WC, IN_MAP_STORE_WV, gen->keyUT, gen->utArg, false ); + initFunction( gen->utArg, gen->objDef, "remove", + IN_MAP_REMOVE_WC, IN_MAP_REMOVE_WV, gen->keyUT, false ); +} + +void ParseData::initListFunctions( GenericType *gen ) +{ + addLengthField( gen->objDef, IN_LIST_LENGTH ); + + initFunction( uniqueTypeInt, gen->objDef, "append", + IN_LIST_APPEND_WC, IN_LIST_APPEND_WV, gen->utArg, false ); + initFunction( uniqueTypeInt, gen->objDef, "push", + IN_LIST_APPEND_WC, IN_LIST_APPEND_WV, gen->utArg, false ); + + initFunction( gen->utArg, gen->objDef, "remove_end", + IN_LIST_REMOVE_END_WC, IN_LIST_REMOVE_END_WV, false ); + initFunction( gen->utArg, gen->objDef, "pop", + IN_LIST_REMOVE_END_WC, IN_LIST_REMOVE_END_WV, false ); +} + +void ParseData::initListField( GenericType *gen, const char *name, int offset ) +{ + /* Make the type ref and create the field. */ + TypeRef *typeRef = new TypeRef( InputLoc(), gen->utArg ); + ObjField *el = new ObjField( InputLoc(), typeRef, name ); + + el->inGetR = IN_GET_LIST_MEM_R; + el->inGetWC = IN_GET_LIST_MEM_WC; + el->inGetWV = IN_GET_LIST_MEM_WV; + el->inSetWC = IN_SET_LIST_MEM_WC; + el->inSetWV = IN_SET_LIST_MEM_WV; + + gen->objDef->objFieldMap->insert( el->name, el ); + + el->useOffset = true; + el->beenReferenced = true; + el->beenInitialized = true; + + /* Zero for head, One for tail. */ + el->offset = offset; +} + +void ParseData::initListFields( GenericType *gen ) +{ + initListField( gen, "head", 0 ); + initListField( gen, "tail", 1 ); + initListField( gen, "top", 1 ); +} + +void ParseData::initVectorFunctions( GenericType *gen ) +{ + addLengthField( gen->objDef, IN_VECTOR_LENGTH ); + initFunction( uniqueTypeInt, gen->objDef, "append", + IN_VECTOR_APPEND_WC, IN_VECTOR_APPEND_WV, gen->utArg, false ); + initFunction( uniqueTypeInt, gen->objDef, "insert", + IN_VECTOR_INSERT_WC, IN_VECTOR_INSERT_WV, uniqueTypeInt, gen->utArg, false ); +} + +void ParseData::resolveGenericTypes() +{ + for ( NamespaceList::Iter ns = namespaceList; ns.lte(); ns++ ) { + for ( GenericList::Iter gen = ns->genericList; gen.lte(); gen++ ) { + gen->utArg = gen->typeArg->lookupType( this ); + + if ( gen->typeId == GEN_MAP ) + gen->keyUT = gen->keyTypeArg->lookupType( this ); + + ObjFieldMap *fieldMap = new ObjFieldMap; + ObjMethodMap *methodMap = new ObjMethodMap; + gen->objDef = new ObjectDef( ObjectDef::BuiltinType, + gen->name, fieldMap, methodMap, nextObjectId++ ); + + switch ( gen->typeId ) { + case GEN_MAP: + initMapFunctions( gen ); + break; + case GEN_LIST: + initListFunctions( gen ); + initListFields( gen ); + break; + case GEN_VECTOR: + initVectorFunctions( gen ); + break; + } + + gen->langEl->objectDef = gen->objDef; + } + } +} + +void ParseData::makeFuncVisible( Function *func, bool isUserIter ) +{ + /* Need an object for the local frame. */ + curLocalFrame = func->codeBlock->localFrame; + func->localFrame = func->codeBlock->localFrame; + + /* Set up the parameters. */ + long paramPos = 0, paramListSize = 0; + UniqueType **paramUTs = new UniqueType*[func->paramList->length()]; + for ( ParameterList::Iter param = *func->paramList; param.lte(); param++ ) { + paramUTs[paramPos] = param->typeRef->lookupType( this ); + + if ( func->localFrame->objFieldMap->find( param->name ) != 0 ) + error(param->loc) << "parameter " << param->name << " redeclared" << endp; + + func->localFrame->objFieldMap->insert( param->name, param ); + param->beenInitialized = true; + param->pos = paramPos; + + /* Initialize the object field as a local variable. We also want trees + * downreffed. */ + if ( paramUTs[paramPos]->typeId == TYPE_REF ) + initLocalRefInstructions( param ); + else + initLocalInstructions( param ); + + paramListSize += sizeOfField( paramUTs[paramPos] ); + paramPos += 1; + } + + /* Param offset is relative to one past the last item in the array of + * words containing the args. */ + long paramOffset = 0; + for ( ParameterList::Iter param = *func->paramList; param.lte(); param++ ) { + + /* Moving downward, and need the offset to point to the lower half of + * the argument. */ + paramOffset -= sizeOfField( paramUTs[param->pos] ); + + /* How much space do we need to make for call overhead. */ + long frameAfterArgs = isUserIter ? IFR_AA : FR_AA; + + /* Going up first we have the frame data, then maybe + * the user iterator, then the args from high to low. */ + param->offset = frameAfterArgs + + ( isUserIter ? ( sizeof(UserIter) / sizeof(Word) ) : 0 ) + + paramListSize + paramOffset; + } + + func->paramListSize = paramListSize; + func->paramUTs = paramUTs; + + /* Insert the function into the global function map. */ + UniqueType *returnUT = func->typeRef != 0 ? + func->typeRef->lookupType(this) : uniqueTypeInt; + ObjMethod *objMethod = new ObjMethod( returnUT, func->name, + IN_CALL, IN_CALL, + func->paramList->length(), paramUTs, func->paramList, false ); + objMethod->funcId = func->funcId; + objMethod->useFuncId = true; + objMethod->useCallObj = false; + objMethod->func = func; + + if ( isUserIter ) { + IterDef *uiter = findIterDef( IterDef::User, func ); + objMethod->iterDef = uiter; + } + + globalObjectDef->objMethodMap->insert( func->name, objMethod ); +} + +void ParseData::compileUserIter( Function *func ) +{ + CodeBlock *block = func->codeBlock; + + compileContext = CompileFunction; + curFunction = func; + block->frameId = nextFrameId++; + + makeFuncVisible( func, true ); + + /* Add the alloc frame opcode. We don't have the right + * frame size yet. We will fill it in later. */ + block->code.append( IN_INIT_LOCALS ); + block->code.appendHalf( 0 ); + + /* Compile the block. */ + block->compile( this, block->code ); + + /* We have the frame size now. Set in the alloc frame instruction. */ + int frameSize = func->localFrame->size(); + block->code.setHalf( 1, frameSize ); + + /* Check for a return statement. */ + if ( block->stmtList->length() == 0 || + block->stmtList->tail->type != LangStmt::YieldType ) + { + /* Push the return value. */ + block->code.append( IN_LOAD_NIL ); + block->code.append( IN_YIELD ); + } + + /* Now that compilation is done variables are referenced. Make the local + * trees descriptor. */ + findLocalTrees( block->trees ); + + /* FIXME: Need to deal with the freeing of local trees. */ +} + +void ParseData::compileFunction( Function *func ) +{ + CodeBlock *block = func->codeBlock; + + compileContext = CompileFunction; + curFunction = func; + block->frameId = nextFrameId++; + + makeFuncVisible( func, false ); + + /* Add the alloc frame opcode. We don't have the right + * frame size yet. We will fill it in later. */ + block->code.append( IN_INIT_LOCALS ); + block->code.appendHalf( 0 ); + + /* Compile the block. */ + block->compile( this, block->code ); + + /* We have the frame size now. Set in the alloc frame instruction. */ + int frameSize = func->localFrame->size(); + block->code.setHalf( 1, frameSize ); + + /* Check for a return statement. */ + if ( block->stmtList->length() == 0 || + block->stmtList->tail->type != LangStmt::ReturnType ) + { + /* Push the return value. */ + block->code.append( IN_LOAD_NIL ); + block->code.append( IN_SAVE_RET ); + } + + /* Compute the jump distance for the return jumps. */ + for ( LongVect::Iter rj = returnJumps; rj.lte(); rj++ ) { + long distance = block->code.length() - *rj - 3; + block->code.setHalf( *rj+1, distance ); + } + + /* Reset the vector of return jumps. */ + returnJumps.empty(); + + /* Return cleans up the stack (including the args) and leaves the return + * value on the top. */ + block->code.append( IN_RET ); + block->code.appendHalf( func->funcId ); + + /* Now that compilation is done variables are referenced. Make the local + * trees descriptor. */ + findLocalTrees( block->trees ); +} + +void ParseData::makeDefaultIterators() +{ + /* Tree iterator. */ + { + UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyKlangEl ); + ObjMethod *objMethod = initFunction( uniqueTypeAny, globalObjectDef, + "triter", IN_HALT, IN_HALT, anyRefUT, true ); + + IterDef *triter = findIterDef( IterDef::Tree ); + objMethod->iterDef = triter; + } + + /* Child iterator. */ + { + UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyKlangEl ); + ObjMethod *objMethod = initFunction( uniqueTypeAny, globalObjectDef, + "child", IN_HALT, IN_HALT, anyRefUT, true ); + + IterDef *triter = findIterDef( IterDef::Child ); + objMethod->iterDef = triter; + } + + /* Reverse iterator. */ + { + UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyKlangEl ); + ObjMethod *objMethod = initFunction( uniqueTypeAny, globalObjectDef, + "rev_child", IN_HALT, IN_HALT, anyRefUT, true ); + + IterDef *triter = findIterDef( IterDef::RevChild ); + objMethod->iterDef = triter; + } +} + +void ParseData::addStdin() +{ + /* Make the type ref. */ + TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeStream ); + + /* Create the field and insert it into the map. */ + ObjField *el = new ObjField( InputLoc(), typeRef, "stdin" ); + el->beenReferenced = true; + el->beenInitialized = true; + el->isConst = true; + el->useOffset = false; + el->inGetR = IN_GET_STDIN; + globalObjectDef->objFieldMap->insert( el->name, el ); +} + +void ParseData::addStdout() +{ + /* Make the type ref. */ + TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeStr ); + + /* Create the field and insert it into the map. */ + ObjField *el = new ObjField( InputLoc(), typeRef, "stout" ); + el->beenReferenced = true; + el->beenInitialized = true; + el->isConst = true; + el->useOffset = false; + el->inGetR = IN_GET_STDOUT; + globalObjectDef->objFieldMap->insert( el->name, el ); +} + +void ParseData::addStderr() +{ + /* Make the type ref. */ + TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeStr ); + + /* Create the field and insert it into the map. */ + ObjField *el = new ObjField( InputLoc(), typeRef, "stderr" ); + el->beenReferenced = true; + el->beenInitialized = true; + el->isConst = true; + el->useOffset = false; + el->inGetR = IN_GET_STDERR; + globalObjectDef->objFieldMap->insert( el->name, el ); +} + +void ParseData::initGlobalFunctions() +{ + ObjMethod *method; + + method = initFunction( uniqueTypeStream, globalObjectDef, "open_file", + IN_OPEN_FILE, IN_OPEN_FILE, uniqueTypeStr, true ); + method->useCallObj = false; + + addStdin(); + addStdout(); + addStderr(); +} + +void ParseData::compileByteCode() +{ + initUniqueTypes(); + initIntObject(); + initStrObject(); + initStreamObject(); + initTokenObjects(); + makeDefaultIterators(); + initAllLanguageObjects(); + resolveGenericTypes(); + + initGlobalFunctions(); + + /* The function info structure relies on functions being compile first, + * then iterators. */ + + /* Compile functions. */ + for ( FunctionList::Iter f = functionList; f.lte(); f++ ) { + if ( f->isUserIter ) + compileUserIter( f ); + else + compileFunction( f ); + } + + /* Compile the reduction code. */ + for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { + if ( prod->redBlock != 0 ) + compileReductionCode( prod ); + } + + /* Compile the token translation code. */ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + if ( lel->transBlock != 0 ) + compileTranslateBlock( lel ); + } + + /* Compile preeof blocks. */ + for ( RegionList::Iter r = regionList; r.lte(); r++ ) { + if ( r->preEofBlock != 0 ) + compilePreEof( r ); + } + + /* Compile the init code */ + compileRootBlock( ); +} diff --git a/colm/dotgen.cpp b/colm/dotgen.cpp new file mode 100644 index 00000000..d362d714 --- /dev/null +++ b/colm/dotgen.cpp @@ -0,0 +1,369 @@ +/* + * Copyright 2001-2007 Adrian Thurston + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +#include "dotgen.h" +#include "parsedata.h" +#include "pdacodegen.h" + +using namespace std; +#if 0 + +std::ostream &GraphvizDotGen::KEY( Key key ) +{ + if ( printPrintables && key.isPrintable() ) { + // Output values as characters, ensuring we escape the quote (") character + char cVal = (char) key.getVal(); + out << "'"; + switch ( cVal ) { + case '"': case '\\': + out << "\\" << cVal; + break; + default: + out << cVal; + break; + } + out << "'"; + } + else { + if ( keyOps->isSigned ) + out << key.getVal(); + else + out << (unsigned long) key.getVal(); + } + + return out; +} + +std::ostream &GraphvizDotGen::TRANS_ACTION( RedState *fromState, RedTrans *trans ) +{ + int n = 0; + RedAction *actions[3]; + + if ( fromState->fromStateAction != 0 ) + actions[n++] = fromState->fromStateAction; + if ( trans->action != 0 ) + actions[n++] = trans->action; + if ( trans->targ != 0 && trans->targ->toStateAction != 0 ) + actions[n++] = trans->targ->toStateAction; + + if ( n > 0 ) + out << " / "; + + /* Loop the existing actions and write out what's there. */ + for ( int a = 0; a < n; a++ ) { + for ( GenActionTable::Iter actIt = actions[a]->key.first(); actIt.lte(); actIt++ ) { + GenAction *action = actIt->value; + out << action->nameOrLoc(); + if ( a < n-1 || !actIt.last() ) + out << ", "; + } + } + return out; +} + +std::ostream &GraphvizDotGen::ACTION( RedAction *action ) +{ + /* The action. */ + out << " / "; + for ( GenActionTable::Iter actIt = action->key.first(); actIt.lte(); actIt++ ) { + GenAction *action = actIt->value; + if ( action->name != 0 ) + out << action->name; + else + out << action->loc.line << ":" << action->loc.col; + if ( !actIt.last() ) + out << ", "; + } + return out; +} + +std::ostream &GraphvizDotGen::ONCHAR( Key lowKey, Key highKey ) +{ + if ( lowKey > keyOps->maxKey ) { + GenCondSpace *condSpace = redFsm->findCondSpace( lowKey, highKey ); + Key values = ( lowKey - condSpace->baseKey ) / keyOps->alphSize(); + + lowKey = keyOps->minKey + + (lowKey - condSpace->baseKey - keyOps->alphSize() * values.getVal()); + highKey = keyOps->minKey + + (highKey - condSpace->baseKey - keyOps->alphSize() * values.getVal()); + KEY( lowKey ); + if ( lowKey != highKey ) { + out << ".."; + KEY( highKey ); + } + out << "("; + + for ( GenCondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) { + bool set = values & (1 << csi.pos()); + if ( !set ) + out << "!"; + out << (*csi)->nameOrLoc(); + if ( !csi.last() ) + out << ", "; + } + out << ")"; + } + else { + /* Output the key. Possibly a range. */ + KEY( lowKey ); + if ( highKey != lowKey ) { + out << ".."; + KEY( highKey ); + } + } + return out; +} + +void GraphvizDotGen::writeTransList( RedState *state ) +{ + /* Build the set of unique transitions out of this state. */ + RedTransPtrSet stTransSet; + for ( RedTransList::Iter tel = state->outRange; tel.lte(); tel++ ) { + /* If we haven't seen the transitions before, the move forward + * emitting all the transitions on the same character. */ + if ( stTransSet.insert( tel->value ) ) { + /* Write out the from and to states. */ + out << "\t" << state->id << " -> "; + + if ( tel->value->targ == 0 ) + out << "err_" << state->id; + else + out << tel->value->targ->id; + + /* Begin the label. */ + out << " [ label = \""; + ONCHAR( tel->lowKey, tel->highKey ); + + /* Walk the transition list, finding the same. */ + for ( RedTransList::Iter mtel = tel.next(); mtel.lte(); mtel++ ) { + if ( mtel->value == tel->value ) { + out << ", "; + ONCHAR( mtel->lowKey, mtel->highKey ); + } + } + + /* Write the action and close the transition. */ + TRANS_ACTION( state, tel->value ); + out << "\" ];\n"; + } + } + + /* Write the default transition. */ + if ( state->defTrans != 0 ) { + /* Write out the from and to states. */ + out << "\t" << state->id << " -> "; + + if ( state->defTrans->targ == 0 ) + out << "err_" << state->id; + else + out << state->defTrans->targ->id; + + /* Begin the label. */ + out << " [ label = \"DEF"; + + /* Write the action and close the transition. */ + TRANS_ACTION( state, state->defTrans ); + out << "\" ];\n"; + } +} + +void GraphvizDotGen::writeDotFile( ) +{ + out << + "digraph " << fsmName << " {\n" + " rankdir=LR;\n"; + + /* Define the psuedo states. Transitions will be done after the states + * have been defined as either final or not final. */ + out << " node [ shape = point ];\n"; + out << " ENTRY;\n"; + + /* Psuedo states for entry points in the entry map. */ + for ( EntryIdVect::Iter en = redFsm->entryPointIds; en.lte(); en++ ) { + RedState *state = redFsm->allStates + *en; + out << " en_" << state->id << ";\n"; + } + + /* Psuedo states for final states with eof actions. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofAction != 0 ) + out << " eof_" << st->id << ";\n"; + } + + out << " node [ shape = circle, height = 0.2 ];\n"; + + /* Psuedo states for states whose default actions go to error. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + bool needsErr = false; + if ( st->defTrans != 0 && st->defTrans->targ == 0 ) + needsErr = true; + else { + for ( RedTransList::Iter tel = st->outRange; tel.lte(); tel++ ) { + if ( tel->value->targ == 0 ) { + needsErr = true; + break; + } + } + } + + if ( needsErr ) + out << " err_" << st->id << " [ label=\"\"];\n"; + } + + /* Attributes common to all nodes, plus double circle for final states. */ + out << " node [ fixedsize = true, height = 0.65, shape = doublecircle ];\n"; + + /* List Final states. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->isFinal ) + out << " " << st->id << ";\n"; + } + + /* List transitions. */ + out << " node [ shape = circle ];\n"; + + /* Walk the states. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + writeTransList( st ); + + /* Transitions into the start state. */ + out << " ENTRY -> " << redFsm->startState->id << " [ label = \"IN"; + out << "\" ];\n"; + + /* Transitions into the entry points. */ + for ( EntryIdVect::Iter en = redFsm->entryPointIds; en.lte(); en++ ) { + RedState *state = redFsm->allStates + *en; + char *name = redFsm->entryPointNames[en.pos()]; + out << " en_" << state->id << " -> " << state->id << + " [ label = \"" << name << "\" ];\n"; + } + + /* Out action transitions. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofAction != 0 ) { + out << " " << st->id << " -> eof_" << + st->id << " [ label = \"EOF"; + ACTION( st->eofAction ) << "\" ];\n"; + } + } + + out << + "}\n"; +} + +void GraphvizDotGen::finishRagelDef() +{ + if ( !graphvizDone ) { + graphvizDone = true; + + /* For dot file generation we want to pick default transitions. */ + redFsm->chooseDefaultSpan(); + + /* Write out with it. */ + writeDotFile(); + } +} + + +void PdaCodeGen::writeTransList( PdaState *state ) +{ + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + /* Write out the from and to states. */ + out << "\t" << state->stateNum << " -> " << trans->value->toState->stateNum; + + /* Begin the label. */ + out << " [ label = \""; + long key = trans->key; + KlangEl *lel = pd->langElIndex[key]; + if ( lel != 0 ) + out << lel->data; + else + out << (char)key; + + if ( trans->value->actions.length() > 0 ) { + out << " / "; + for ( ActDataList::Iter act = trans->value->actions; act.lte(); act++ ) { + switch ( *act & 0x3 ) { + case 1: + out << "S(" << trans->value->actOrds[act.pos()] << ")"; + break; + case 2: { + out << "R(" << pd->prodIdIndex[(*act >> 2)]->data << + ", " << trans->value->actOrds[act.pos()] << ")"; + break; + } + case 3: { + out << "SR(" << pd->prodIdIndex[(*act >> 2)]->data << + ", " << trans->value->actOrds[act.pos()] << ")"; + break; + }} + if ( ! act.last() ) + out << ", "; + } + } + + out << "\" ];\n"; + } +} + +void PdaCodeGen::writeDotFile( PdaGraph *graph ) +{ + out << + "digraph " << parserName << " {\n" + " rankdir=LR;\n"; + + /* Define the psuedo states. Transitions will be done after the states + * have been defined as either final or not final. */ + out << " node [ shape = point ];\n"; + out << " ENTRY;\n"; + + out << " node [ shape = circle, height = 0.2 ];\n"; + + /* Attributes common to all nodes, plus double circle for final states. */ + out << " node [ fixedsize = true, height = 0.65, shape = doublecircle ];\n"; + + /* List Final states. */ + for ( PdaStateSet::Iter st = graph->finStateSet; st.lte(); st++ ) + out << " " << (*st)->stateNum << ";\n"; + + /* List transitions. */ + out << " node [ shape = circle ];\n"; + + /* Walk the states. */ + for ( PdaStateList::Iter st = graph->stateList; st.lte(); st++ ) + writeTransList( st ); + + /* Transitions into the start state. */ + out << " ENTRY -> " << graph->startState->stateNum << " [ label = \"START\" ];\n"; + + out << + "}\n"; +} + +void PdaCodeGen::writeDotFile( ) +{ + writeDotFile( pd->pdaGraph ); +} + +#endif + diff --git a/colm/dotgen.h b/colm/dotgen.h new file mode 100644 index 00000000..c0cd31e7 --- /dev/null +++ b/colm/dotgen.h @@ -0,0 +1,51 @@ +/* + * Copyright 2001-2007 Adrian Thurston + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _GVDOTGEN_H +#define _GVDOTGEN_H + +#include + +#if 0 + +class GraphvizDotGen : public CodeGenData +{ +public: + GraphvizDotGen( ostream &out ) : CodeGenData(out) { } + + /* Print an fsm to out stream. */ + void writeTransList( RedState *state ); + void writeDotFile( ); + + virtual void finishRagelDef(); + +private: + /* Writing labels and actions. */ + std::ostream &ONCHAR( Key lowKey, Key highKey ); + std::ostream &TRANS_ACTION( RedState *fromState, RedTrans *trans ); + std::ostream &ACTION( RedAction *action ); + std::ostream &KEY( Key key ); +}; + +#endif + + +#endif /* _GVDOTGEN_H */ diff --git a/colm/fsmap.cpp b/colm/fsmap.cpp new file mode 100644 index 00000000..d843474f --- /dev/null +++ b/colm/fsmap.cpp @@ -0,0 +1,854 @@ +/* + * Copyright 2002-2004 Adrian Thurston + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" +#include "fsmgraph.h" +#include + +using std::cerr; +using std::endl; + +CondData *condData = 0; +KeyOps *keyOps = 0; + +/* Insert an action into an action table. */ +void ActionTable::setAction( int ordering, Action *action ) +{ + /* Multi-insert in case specific instances of an action appear in a + * transition more than once. */ + insertMulti( ordering, action ); +} + +/* Set all the action from another action table in this table. */ +void ActionTable::setActions( const ActionTable &other ) +{ + for ( ActionTable::Iter action = other; action.lte(); action++ ) + insertMulti( action->key, action->value ); +} + +void ActionTable::setActions( int *orderings, Action **actions, int nActs ) +{ + for ( int a = 0; a < nActs; a++ ) + insertMulti( orderings[a], actions[a] ); +} + +bool ActionTable::hasAction( Action *action ) +{ + for ( int a = 0; a < length(); a++ ) { + if ( data[a].value == action ) + return true; + } + return false; +} + +/* Insert an action into an action table. */ +void LmActionTable::setAction( int ordering, TokenDef *action ) +{ + /* Multi-insert in case specific instances of an action appear in a + * transition more than once. */ + insertMulti( ordering, action ); +} + +/* Set all the action from another action table in this table. */ +void LmActionTable::setActions( const LmActionTable &other ) +{ + for ( LmActionTable::Iter action = other; action.lte(); action++ ) + insertMulti( action->key, action->value ); +} + +void ErrActionTable::setAction( int ordering, Action *action, int transferPoint ) +{ + insertMulti( ErrActionTableEl( action, ordering, transferPoint ) ); +} + +void ErrActionTable::setActions( const ErrActionTable &other ) +{ + for ( ErrActionTable::Iter act = other; act.lte(); act++ ) + insertMulti( ErrActionTableEl( act->action, act->ordering, act->transferPoint ) ); +} + +/* Insert a priority into this priority table. Looks out for priorities on + * duplicate keys. */ +void PriorTable::setPrior( int ordering, PriorDesc *desc ) +{ + PriorEl *lastHit = 0; + PriorEl *insed = insert( PriorEl(ordering, desc), &lastHit ); + if ( insed == 0 ) { + /* This already has a priority on the same key as desc. Overwrite the + * priority if the ordering is larger (later in time). */ + if ( ordering >= lastHit->ordering ) + *lastHit = PriorEl( ordering, desc ); + } +} + +/* Set all the priorities from a priorTable in this table. */ +void PriorTable::setPriors( const PriorTable &other ) +{ + /* Loop src priorities once to overwrite duplicates. */ + PriorTable::Iter priorIt = other; + for ( ; priorIt.lte(); priorIt++ ) + setPrior( priorIt->ordering, priorIt->desc ); +} + +/* Set the priority of starting transitions. Isolates the start state so it has + * no other entry points, then sets the priorities of all the transitions out + * of the start state. If the start state is final, then the outPrior of the + * start state is also set. The idea is that a machine that accepts the null + * string can still specify the starting trans prior for when it accepts the + * null word. */ +void FsmGraph::startFsmPrior( int ordering, PriorDesc *prior ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState(); + + /* Walk all transitions out of the start state. */ + for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) { + if ( trans->toState != 0 ) + trans->priorTable.setPrior( ordering, prior ); + } +} + +/* Set the priority of all transitions in a graph. Walks all transition lists + * and all def transitions. */ +void FsmGraph::allTransPrior( int ordering, PriorDesc *prior ) +{ + /* Walk the list of all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Walk the out list of the state. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->toState != 0 ) + trans->priorTable.setPrior( ordering, prior ); + } + } +} + +/* Set the priority of all transitions that go into a final state. Note that if + * any entry states are final, we will not be setting the priority of any + * transitions that may go into those states in the future. The graph does not + * support pending in transitions in the same way pending out transitions are + * supported. */ +void FsmGraph::finishFsmPrior( int ordering, PriorDesc *prior ) +{ + /* Walk all final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) { + /* Walk all in transitions of the final state. */ + for ( TransInList::Iter trans = (*state)->inList; trans.lte(); trans++ ) + trans->priorTable.setPrior( ordering, prior ); + } +} + +/* Set the priority of any future out transitions that may be made going out of + * this state machine. */ +void FsmGraph::leaveFsmPrior( int ordering, PriorDesc *prior ) +{ + /* Set priority in all final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->outPriorTable.setPrior( ordering, prior ); +} + + +/* Set actions to execute on starting transitions. Isolates the start state + * so it has no other entry points, then adds to the transition functions + * of all the transitions out of the start state. If the start state is final, + * then the func is also added to the start state's out func list. The idea is + * that a machine that accepts the null string can execute a start func when it + * matches the null word, which can only be done when leaving the start/final + * state. */ +void FsmGraph::startFsmAction( int ordering, Action *action ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState(); + + /* Walk the start state's transitions, setting functions. */ + for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) { + if ( trans->toState != 0 ) + trans->actionTable.setAction( ordering, action ); + } +} + +/* Set functions to execute on all transitions. Walks the out lists of all + * states. */ +void FsmGraph::allTransAction( int ordering, Action *action ) +{ + /* Walk all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Walk the out list of the state. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->toState != 0 ) + trans->actionTable.setAction( ordering, action ); + } + } +} + +/* Specify functions to execute upon entering final states. If the start state + * is final we can't really specify a function to execute upon entering that + * final state the first time. So function really means whenever entering a + * final state from within the same fsm. */ +void FsmGraph::finishFsmAction( int ordering, Action *action ) +{ + /* Walk all final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) { + /* Walk the final state's in list. */ + for ( TransInList::Iter trans = (*state)->inList; trans.lte(); trans++ ) + trans->actionTable.setAction( ordering, action ); + } +} + +/* Add functions to any future out transitions that may be made going out of + * this state machine. */ +void FsmGraph::leaveFsmAction( int ordering, Action *action ) +{ + /* Insert the action in the outActionTable of all final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->outActionTable.setAction( ordering, action ); +} + +/* Add functions to the longest match action table for constructing scanners. */ +void FsmGraph::longMatchAction( int ordering, TokenDef *lmPart ) +{ + /* Walk all final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) { + /* Walk the final state's in list. */ + for ( TransInList::Iter trans = (*state)->inList; trans.lte(); trans++ ) + trans->lmActionTable.setAction( ordering, lmPart ); + } +} + +void FsmGraph::fillGaps( FsmState *state ) +{ + if ( state->outList.length() == 0 ) { + /* Add the range on the lower and upper bound. */ + attachNewTrans( state, 0, keyOps->minKey, keyOps->maxKey ); + } + else { + TransList srcList; + srcList.transfer( state->outList ); + + /* Check for a gap at the beginning. */ + TransList::Iter trans = srcList, next; + if ( keyOps->minKey < trans->lowKey ) { + /* Make the high key and append. */ + Key highKey = trans->lowKey; + highKey.decrement(); + + attachNewTrans( state, 0, keyOps->minKey, highKey ); + } + + /* Write the transition. */ + next = trans.next(); + state->outList.append( trans ); + + /* Keep the last high end. */ + Key lastHigh = trans->highKey; + + /* Loop each source range. */ + for ( trans = next; trans.lte(); trans = next ) { + /* Make the next key following the last range. */ + Key nextKey = lastHigh; + nextKey.increment(); + + /* Check for a gap from last up to here. */ + if ( nextKey < trans->lowKey ) { + /* Make the high end of the range that fills the gap. */ + Key highKey = trans->lowKey; + highKey.decrement(); + + attachNewTrans( state, 0, nextKey, highKey ); + } + + /* Reduce the transition. If it reduced to anything then add it. */ + next = trans.next(); + state->outList.append( trans ); + + /* Keep the last high end. */ + lastHigh = trans->highKey; + } + + /* Now check for a gap on the end to fill. */ + if ( lastHigh < keyOps->maxKey ) { + /* Get a copy of the default. */ + lastHigh.increment(); + + attachNewTrans( state, 0, lastHigh, keyOps->maxKey ); + } + } +} + +void FsmGraph::setErrorAction( FsmState *state, int ordering, Action *action ) +{ + /* Fill any gaps in the out list with an error transition. */ + fillGaps( state ); + + /* Set error transitions in the transitions that go to error. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->toState == 0 ) + trans->actionTable.setAction( ordering, action ); + } +} + +void FsmGraph::setErrorActions( FsmState *state, const ActionTable &other ) +{ + /* Fill any gaps in the out list with an error transition. */ + fillGaps( state ); + + /* Set error transitions in the transitions that go to error. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->toState == 0 ) + trans->actionTable.setActions( other ); + } +} + + +/* Give a target state for error transitions. */ +void FsmGraph::setErrorTarget( FsmState *state, FsmState *target, int *orderings, + Action **actions, int nActs ) +{ + /* Fill any gaps in the out list with an error transition. */ + fillGaps( state ); + + /* Set error target in the transitions that go to error. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->toState == 0 ) { + /* The trans goes to error, redirect it. */ + redirectErrorTrans( trans->fromState, target, trans ); + trans->actionTable.setActions( orderings, actions, nActs ); + } + } +} + +void FsmGraph::transferErrorActions( FsmState *state, int transferPoint ) +{ + for ( int i = 0; i < state->errActionTable.length(); ) { + ErrActionTableEl *act = state->errActionTable.data + i; + if ( act->transferPoint == transferPoint ) { + /* Transfer the error action and remove it. */ + setErrorAction( state, act->ordering, act->action ); + state->errActionTable.vremove( i ); + } + else { + /* Not transfering and deleting, skip over the item. */ + i += 1; + } + } +} + +/* Set error actions in the start state. */ +void FsmGraph::startErrorAction( int ordering, Action *action, int transferPoint ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState(); + + /* Add the actions. */ + startState->errActionTable.setAction( ordering, action, transferPoint ); +} + +/* Set error actions in all states where there is a transition out. */ +void FsmGraph::allErrorAction( int ordering, Action *action, int transferPoint ) +{ + /* Insert actions in the error action table of all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + state->errActionTable.setAction( ordering, action, transferPoint ); +} + +/* Set error actions in final states. */ +void FsmGraph::finalErrorAction( int ordering, Action *action, int transferPoint ) +{ + /* Add the action to the error table of final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->errActionTable.setAction( ordering, action, transferPoint ); +} + +void FsmGraph::notStartErrorAction( int ordering, Action *action, int transferPoint ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState ) + state->errActionTable.setAction( ordering, action, transferPoint ); + } +} + +void FsmGraph::notFinalErrorAction( int ordering, Action *action, int transferPoint ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( ! state->isFinState() ) + state->errActionTable.setAction( ordering, action, transferPoint ); + } +} + +/* Set error actions in the states that have transitions into a final state. */ +void FsmGraph::middleErrorAction( int ordering, Action *action, int transferPoint ) +{ + /* Isolate the start state in case it is reachable from in inside the + * machine, in which case we don't want it set. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState && ! state->isFinState() ) + state->errActionTable.setAction( ordering, action, transferPoint ); + } +} + +/* Set EOF actions in the start state. */ +void FsmGraph::startEOFAction( int ordering, Action *action ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState(); + + /* Add the actions. */ + startState->eofActionTable.setAction( ordering, action ); +} + +/* Set EOF actions in all states where there is a transition out. */ +void FsmGraph::allEOFAction( int ordering, Action *action ) +{ + /* Insert actions in the EOF action table of all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + state->eofActionTable.setAction( ordering, action ); +} + +/* Set EOF actions in final states. */ +void FsmGraph::finalEOFAction( int ordering, Action *action ) +{ + /* Add the action to the error table of final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->eofActionTable.setAction( ordering, action ); +} + +void FsmGraph::notStartEOFAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState ) + state->eofActionTable.setAction( ordering, action ); + } +} + +void FsmGraph::notFinalEOFAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( ! state->isFinState() ) + state->eofActionTable.setAction( ordering, action ); + } +} + +/* Set EOF actions in the states that have transitions into a final state. */ +void FsmGraph::middleEOFAction( int ordering, Action *action ) +{ + /* Set the actions in all states that are not the start state and not final. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState && ! state->isFinState() ) + state->eofActionTable.setAction( ordering, action ); + } +} + +/* + * Set To State Actions. + */ + +/* Set to state actions in the start state. */ +void FsmGraph::startToStateAction( int ordering, Action *action ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState(); + startState->toStateActionTable.setAction( ordering, action ); +} + +/* Set to state actions in all states. */ +void FsmGraph::allToStateAction( int ordering, Action *action ) +{ + /* Insert the action on all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + state->toStateActionTable.setAction( ordering, action ); +} + +/* Set to state actions in final states. */ +void FsmGraph::finalToStateAction( int ordering, Action *action ) +{ + /* Add the action to the error table of final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->toStateActionTable.setAction( ordering, action ); +} + +void FsmGraph::notStartToStateAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState ) + state->toStateActionTable.setAction( ordering, action ); + } +} + +void FsmGraph::notFinalToStateAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( ! state->isFinState() ) + state->toStateActionTable.setAction( ordering, action ); + } +} + +/* Set to state actions in states that are not final and not the start state. */ +void FsmGraph::middleToStateAction( int ordering, Action *action ) +{ + /* Set the action in all states that are not the start state and not final. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState && ! state->isFinState() ) + state->toStateActionTable.setAction( ordering, action ); + } +} + +/* + * Set From State Actions. + */ + +void FsmGraph::startFromStateAction( int ordering, Action *action ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState(); + startState->fromStateActionTable.setAction( ordering, action ); +} + +void FsmGraph::allFromStateAction( int ordering, Action *action ) +{ + /* Insert the action on all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + state->fromStateActionTable.setAction( ordering, action ); +} + +void FsmGraph::finalFromStateAction( int ordering, Action *action ) +{ + /* Add the action to the error table of final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->fromStateActionTable.setAction( ordering, action ); +} + +void FsmGraph::notStartFromStateAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState ) + state->fromStateActionTable.setAction( ordering, action ); + } +} + +void FsmGraph::notFinalFromStateAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( ! state->isFinState() ) + state->fromStateActionTable.setAction( ordering, action ); + } +} + +void FsmGraph::middleFromStateAction( int ordering, Action *action ) +{ + /* Set the action in all states that are not the start state and not final. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState && ! state->isFinState() ) + state->fromStateActionTable.setAction( ordering, action ); + } +} + +/* Shift the function ordering of the start transitions to start + * at fromOrder and increase in units of 1. Useful before staring. + * Returns the maximum number of order numbers used. */ +int FsmGraph::shiftStartActionOrder( int fromOrder ) +{ + int maxUsed = 0; + + /* Walk the start state's transitions, shifting function ordering. */ + for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) { + /* Walk the function data for the transition and set the keys to + * increasing values starting at fromOrder. */ + int curFromOrder = fromOrder; + ActionTable::Iter action = trans->actionTable; + for ( ; action.lte(); action++ ) + action->key = curFromOrder++; + + /* Keep track of the max number of orders used. */ + if ( curFromOrder - fromOrder > maxUsed ) + maxUsed = curFromOrder - fromOrder; + } + + return maxUsed; +} + +/* Remove all priorities. */ +void FsmGraph::clearAllPriorities() +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Clear out priority data. */ + state->outPriorTable.empty(); + + /* Clear transition data from the out transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) + trans->priorTable.empty(); + } +} + +/* Zeros out the function ordering keys. This may be called before minimization + * when it is known that no more fsm operations are going to be done. This + * will achieve greater reduction as states will not be separated on the basis + * of function ordering. */ +void FsmGraph::nullActionKeys( ) +{ + /* For each state... */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Walk the transitions for the state. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + /* Walk the action table for the transition. */ + for ( ActionTable::Iter action = trans->actionTable; + action.lte(); action++ ) + action->key = 0; + + /* Walk the action table for the transition. */ + for ( LmActionTable::Iter action = trans->lmActionTable; + action.lte(); action++ ) + action->key = 0; + } + + /* Null the action keys of the to state action table. */ + for ( ActionTable::Iter action = state->toStateActionTable; + action.lte(); action++ ) + action->key = 0; + + /* Null the action keys of the from state action table. */ + for ( ActionTable::Iter action = state->fromStateActionTable; + action.lte(); action++ ) + action->key = 0; + + /* Null the action keys of the out transtions. */ + for ( ActionTable::Iter action = state->outActionTable; + action.lte(); action++ ) + action->key = 0; + + /* Null the action keys of the error action table. */ + for ( ErrActionTable::Iter action = state->errActionTable; + action.lte(); action++ ) + action->ordering = 0; + + /* Null the action keys eof action table. */ + for ( ActionTable::Iter action = state->eofActionTable; + action.lte(); action++ ) + action->key = 0; + } +} + +/* Walk the list of states and verify that non final states do not have out + * data, that all stateBits are cleared, and that there are no states with + * zero foreign in transitions. */ +void FsmGraph::verifyStates() +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Non final states should not have leaving data. */ + if ( ! (state->stateBits & SB_ISFINAL) ) { + assert( state->outActionTable.length() == 0 ); + assert( state->outCondSet.length() == 0 ); + assert( state->outPriorTable.length() == 0 ); + } + + /* Data used in algorithms should be cleared. */ + assert( (state->stateBits & SB_BOTH) == 0 ); + assert( state->foreignInTrans > 0 ); + } +} + +/* Compare two transitions according to their relative priority. Since the + * base transition has no priority associated with it, the default is to + * return equal. */ +int FsmGraph::comparePrior( const PriorTable &priorTable1, const PriorTable &priorTable2 ) +{ + /* Looking for differing priorities on same keys. Need to concurrently + * scan the priority lists. */ + PriorTable::Iter pd1 = priorTable1; + PriorTable::Iter pd2 = priorTable2; + while ( pd1.lte() && pd2.lte() ) { + /* Check keys. */ + if ( pd1->desc->key < pd2->desc->key ) + pd1.increment(); + else if ( pd1->desc->key > pd2->desc->key ) + pd2.increment(); + /* Keys are the same, check priorities. */ + else if ( pd1->desc->priority < pd2->desc->priority ) + return -1; + else if ( pd1->desc->priority > pd2->desc->priority ) + return 1; + else { + /* Keys and priorities are equal, advance both. */ + pd1.increment(); + pd2.increment(); + } + } + + /* No differing priorities on the same key. */ + return 0; +} + +/* Compares two transitions according to priority and functions. Pointers + * should not be null. Does not consider to state or from state. Compare two + * transitions according to the data contained in the transitions. Data means + * any properties added to user transitions that may differentiate them. Since + * the base transition has no data, the default is to return equal. */ +int FsmGraph::compareTransData( FsmTrans *trans1, FsmTrans *trans2 ) +{ + /* Compare the prior table. */ + int cmpRes = CmpPriorTable::compare( trans1->priorTable, + trans2->priorTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Compare longest match action tables. */ + cmpRes = CmpLmActionTable::compare(trans1->lmActionTable, + trans2->lmActionTable); + if ( cmpRes != 0 ) + return cmpRes; + + /* Compare action tables. */ + return CmpActionTable::compare(trans1->actionTable, + trans2->actionTable); +} + +/* Callback invoked when another trans (or possibly this) is added into this + * transition during the merging process. Draw in any properties of srcTrans + * into this transition. AddInTrans is called when a new transitions is made + * that will be a duplicate of another transition or a combination of several + * other transitions. AddInTrans will be called for each transition that the + * new transition is to represent. */ +void FsmGraph::addInTrans( FsmTrans *destTrans, FsmTrans *srcTrans ) +{ + /* Protect against adding in from ourselves. */ + if ( srcTrans == destTrans ) { + /* Adding in ourselves, need to make a copy of the source transitions. + * The priorities are not copied in as that would have no effect. */ + destTrans->lmActionTable.setActions( LmActionTable(srcTrans->lmActionTable) ); + destTrans->actionTable.setActions( ActionTable(srcTrans->actionTable) ); + } + else { + /* Not a copy of ourself, get the functions and priorities. */ + destTrans->lmActionTable.setActions( srcTrans->lmActionTable ); + destTrans->actionTable.setActions( srcTrans->actionTable ); + destTrans->priorTable.setPriors( srcTrans->priorTable ); + } +} + +/* Compare the properties of states that are embedded by users. Compares out + * priorities, out transitions, to, from, out, error and eof action tables. */ +int FsmGraph::compareStateData( const FsmState *state1, const FsmState *state2 ) +{ + /* Compare the out priority table. */ + int cmpRes = CmpPriorTable:: + compare( state1->outPriorTable, state2->outPriorTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test to state action tables. */ + cmpRes = CmpActionTable::compare( state1->toStateActionTable, + state2->toStateActionTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test from state action tables. */ + cmpRes = CmpActionTable::compare( state1->fromStateActionTable, + state2->fromStateActionTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test out action tables. */ + cmpRes = CmpActionTable::compare( state1->outActionTable, + state2->outActionTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test out condition sets. */ + cmpRes = CmpActionSet::compare( state1->outCondSet, + state2->outCondSet ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test out error action tables. */ + cmpRes = CmpErrActionTable::compare( state1->errActionTable, + state2->errActionTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test eof action tables. */ + return CmpActionTable::compare( state1->eofActionTable, + state2->eofActionTable ); +} + + +/* Invoked when a state looses its final state status and the leaving + * transition embedding data should be deleted. */ +void FsmGraph::clearOutData( FsmState *state ) +{ + /* Kill the out actions and priorities. */ + state->outActionTable.empty(); + state->outCondSet.empty(); + state->outPriorTable.empty(); +} + +bool FsmGraph::hasOutData( FsmState *state ) +{ + return ( state->outActionTable.length() > 0 || + state->outCondSet.length() > 0 || + state->outPriorTable.length() > 0 ); +} + +/* + * Setting Conditions. + */ + + +void logNewExpansion( Expansion *exp ); +void logCondSpace( CondSpace *condSpace ); + +CondSpace *FsmGraph::addCondSpace( const CondSet &condSet ) +{ + CondSpace *condSpace = condData->condSpaceMap.find( condSet ); + if ( condSpace == 0 ) { + Key baseKey = condData->nextCondKey; + condData->nextCondKey += (1 << condSet.length() ) * keyOps->alphSize(); + + condSpace = new CondSpace( condSet ); + condSpace->baseKey = baseKey; + condData->condSpaceMap.insert( condSpace ); + + #ifdef COLM_LOG_CONDS + cerr << "adding new condition space" << endl; + cerr << " condition set: "; + logCondSpace( condSpace ); + cerr << endl; + cerr << " baseKey: " << baseKey.getVal() << endl; + #endif + } + return condSpace; +} + +void FsmGraph::startFsmCondition( Action *condAction ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState(); + embedCondition( startState, condAction ); +} + +void FsmGraph::allTransCondition( Action *condAction ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + embedCondition( state, condAction ); +} + +void FsmGraph::leaveFsmCondition( Action *condAction ) +{ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->outCondSet.insert( condAction ); +} diff --git a/colm/fsmattach.cpp b/colm/fsmattach.cpp new file mode 100644 index 00000000..201cdd76 --- /dev/null +++ b/colm/fsmattach.cpp @@ -0,0 +1,425 @@ +/* + * Copyright 2001 Adrian Thurston + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include "fsmgraph.h" + +#include +using namespace std; + +/* Insert a transition into an inlist. The head must be supplied. */ +void FsmGraph::attachToInList( FsmState *from, FsmState *to, + FsmTrans *&head, FsmTrans *trans ) +{ + trans->ilnext = head; + trans->ilprev = 0; + + /* If in trans list is not empty, set the head->prev to trans. */ + if ( head != 0 ) + head->ilprev = trans; + + /* Now insert ourselves at the front of the list. */ + head = trans; + + /* Keep track of foreign transitions for from and to. */ + if ( from != to ) { + if ( misfitAccounting ) { + /* If the number of foreign in transitions is about to go up to 1 then + * move it from the misfit list to the main list. */ + if ( to->foreignInTrans == 0 ) + stateList.append( misfitList.detach( to ) ); + } + + to->foreignInTrans += 1; + } +}; + +/* Detach a transition from an inlist. The head of the inlist must be supplied. */ +void FsmGraph::detachFromInList( FsmState *from, FsmState *to, + FsmTrans *&head, FsmTrans *trans ) +{ + /* Detach in the inTransList. */ + if ( trans->ilprev == 0 ) + head = trans->ilnext; + else + trans->ilprev->ilnext = trans->ilnext; + + if ( trans->ilnext != 0 ) + trans->ilnext->ilprev = trans->ilprev; + + /* Keep track of foreign transitions for from and to. */ + if ( from != to ) { + to->foreignInTrans -= 1; + + if ( misfitAccounting ) { + /* If the number of foreign in transitions goes down to 0 then move it + * from the main list to the misfit list. */ + if ( to->foreignInTrans == 0 ) + misfitList.append( stateList.detach( to ) ); + } + } +} + +/* Attach states on the default transition, range list or on out/in list key. + * First makes a new transition. If there is already a transition out from + * fromState on the default, then will assertion fail. */ +FsmTrans *FsmGraph::attachNewTrans( FsmState *from, FsmState *to, Key lowKey, Key highKey ) +{ + /* Make the new transition. */ + FsmTrans *retVal = new FsmTrans(); + + /* The transition is now attached. Remember the parties involved. */ + retVal->fromState = from; + retVal->toState = to; + + /* Make the entry in the out list for the transitions. */ + from->outList.append( retVal ); + + /* Set the the keys of the new trans. */ + retVal->lowKey = lowKey; + retVal->highKey = highKey; + + /* Attach using inList as the head pointer. */ + if ( to != 0 ) + attachToInList( from, to, to->inList.head, retVal ); + + return retVal; +} + +/* Attach for range lists or for the default transition. This attach should + * be used when a transition already is allocated and must be attached to a + * target state. Does not handle adding the transition into the out list. */ +void FsmGraph::attachTrans( FsmState *from, FsmState *to, FsmTrans *trans ) +{ + assert( trans->fromState == 0 && trans->toState == 0 ); + trans->fromState = from; + trans->toState = to; + + if ( to != 0 ) { + /* Attach using the inList pointer as the head pointer. */ + attachToInList( from, to, to->inList.head, trans ); + } +} + +/* Redirect a transition away from error and towards some state. This is just + * like attachTrans except it requires fromState to be set and does not touch + * it. */ +void FsmGraph::redirectErrorTrans( FsmState *from, FsmState *to, FsmTrans *trans ) +{ + assert( trans->fromState != 0 && trans->toState == 0 ); + trans->toState = to; + + if ( to != 0 ) { + /* Attach using the inList pointer as the head pointer. */ + attachToInList( from, to, to->inList.head, trans ); + } +} + +/* Detach for out/in lists or for default transition. */ +void FsmGraph::detachTrans( FsmState *from, FsmState *to, FsmTrans *trans ) +{ + assert( trans->fromState == from && trans->toState == to ); + trans->fromState = 0; + trans->toState = 0; + + if ( to != 0 ) { + /* Detach using to's inList pointer as the head. */ + detachFromInList( from, to, to->inList.head, trans ); + } +} + + +/* Detach a state from the graph. Detaches and deletes transitions in and out + * of the state. Empties inList and outList. Removes the state from the final + * state set. A detached state becomes useless and should be deleted. */ +void FsmGraph::detachState( FsmState *state ) +{ + /* Detach the in transitions from the inList list of transitions. */ + while ( state->inList.head != 0 ) { + /* Get pointers to the trans and the state. */ + FsmTrans *trans = state->inList.head; + FsmState *fromState = trans->fromState; + + /* Detach the transitions from the source state. */ + detachTrans( fromState, state, trans ); + + /* Ok to delete the transition. */ + fromState->outList.detach( trans ); + delete trans; + } + + /* Remove the entry points in on the machine. */ + while ( state->entryIds.length() > 0 ) + unsetEntry( state->entryIds[0], state ); + + /* Detach out range transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); ) { + TransList::Iter next = trans.next(); + detachTrans( state, trans->toState, trans ); + delete trans; + trans = next; + } + + /* Delete all of the out range pointers. */ + state->outList.abandon(); + + /* Unset final stateness before detaching from graph. */ + if ( state->stateBits & SB_ISFINAL ) + finStateSet.remove( state ); +} + + +/* Duplicate a transition. Makes a new transition that is attached to the same + * dest as srcTrans. The new transition has functions and priority taken from + * srcTrans. Used for merging a transition in to a free spot. The trans can + * just be dropped in. It does not conflict with an existing trans and need + * not be crossed. Returns the new transition. */ +FsmTrans *FsmGraph::dupTrans( FsmState *from, FsmTrans *srcTrans ) +{ + /* Make a new transition. */ + FsmTrans *newTrans = new FsmTrans(); + + /* We can attach the transition, one does not exist. */ + attachTrans( from, srcTrans->toState, newTrans ); + + /* Call the user callback to add in the original source transition. */ + addInTrans( newTrans, srcTrans ); + + return newTrans; +} + +/* In crossing, src trans and dest trans both go to existing states. Make one + * state from the sets of states that src and dest trans go to. */ +FsmTrans *FsmGraph::fsmAttachStates( MergeData &md, FsmState *from, + FsmTrans *destTrans, FsmTrans *srcTrans ) +{ + /* The priorities are equal. We must merge the transitions. Does the + * existing trans go to the state we are to attach to? ie, are we to + * simply double up the transition? */ + FsmState *toState = srcTrans->toState; + FsmState *existingState = destTrans->toState; + + if ( existingState == toState ) { + /* The transition is a double up to the same state. Copy the src + * trans into itself. We don't need to merge in the from out trans + * data, that was done already. */ + addInTrans( destTrans, srcTrans ); + } + else { + /* The trans is not a double up. Dest trans cannot be the same as src + * trans. Set up the state set. */ + StateSet stateSet; + + /* We go to all the states the existing trans goes to, plus... */ + if ( existingState->stateDictEl == 0 ) + stateSet.insert( existingState ); + else + stateSet.insert( existingState->stateDictEl->stateSet ); + + /* ... all the states that we have been told to go to. */ + if ( toState->stateDictEl == 0 ) + stateSet.insert( toState ); + else + stateSet.insert( toState->stateDictEl->stateSet ); + + /* Look for the state. If it is not there already, make it. */ + StateDictEl *lastFound; + if ( md.stateDict.insert( stateSet, &lastFound ) ) { + /* Make a new state representing the combination of states in + * stateSet. It gets added to the fill list. This means that we + * need to fill in it's transitions sometime in the future. We + * don't do that now (ie, do not recurse). */ + FsmState *combinState = addState(); + + /* Link up the dict element and the state. */ + lastFound->targState = combinState; + combinState->stateDictEl = lastFound; + + /* Add to the fill list. */ + md.fillListAppend( combinState ); + } + + /* Get the state insertted/deleted. */ + FsmState *targ = lastFound->targState; + + /* Detach the state from existing state. */ + detachTrans( from, existingState, destTrans ); + + /* Re-attach to the new target. */ + attachTrans( from, targ, destTrans ); + + /* Add in src trans to the existing transition that we redirected to + * the new state. We don't need to merge in the from out trans data, + * that was done already. */ + addInTrans( destTrans, srcTrans ); + } + + return destTrans; +} + +/* Two transitions are to be crossed, handle the possibility of either going + * to the error state. */ +FsmTrans *FsmGraph::mergeTrans( MergeData &md, FsmState *from, + FsmTrans *destTrans, FsmTrans *srcTrans ) +{ + FsmTrans *retTrans = 0; + if ( destTrans->toState == 0 && srcTrans->toState == 0 ) { + /* Error added into error. */ + addInTrans( destTrans, srcTrans ); + retTrans = destTrans; + } + else if ( destTrans->toState == 0 && srcTrans->toState != 0 ) { + /* Non error added into error we need to detach and reattach, */ + detachTrans( from, destTrans->toState, destTrans ); + attachTrans( from, srcTrans->toState, destTrans ); + addInTrans( destTrans, srcTrans ); + retTrans = destTrans; + } + else if ( srcTrans->toState == 0 ) { + /* Dest goes somewhere but src doesn't, just add it it in. */ + addInTrans( destTrans, srcTrans ); + retTrans = destTrans; + } + else { + /* Both go somewhere, run the actual cross. */ + retTrans = fsmAttachStates( md, from, destTrans, srcTrans ); + } + + return retTrans; +} + +/* Find the trans with the higher priority. If src is lower priority then dest then + * src is ignored. If src is higher priority than dest, then src overwrites dest. If + * the priorities are equal, then they are merged. */ +FsmTrans *FsmGraph::crossTransitions( MergeData &md, FsmState *from, + FsmTrans *destTrans, FsmTrans *srcTrans ) +{ + FsmTrans *retTrans; + + /* Compare the priority of the dest and src transitions. */ + int compareRes = comparePrior( destTrans->priorTable, srcTrans->priorTable ); + if ( compareRes < 0 ) { + /* Src trans has a higher priority than dest, src overwrites dest. + * Detach dest and return a copy of src. */ + detachTrans( from, destTrans->toState, destTrans ); + retTrans = dupTrans( from, srcTrans ); + } + else if ( compareRes > 0 ) { + /* The dest trans has a higher priority, use dest. */ + retTrans = destTrans; + } + else { + /* Src trans and dest trans have the same priority, they must be merged. */ + retTrans = mergeTrans( md, from, destTrans, srcTrans ); + } + + /* Return the transition that resulted from the cross. */ + return retTrans; +} + +/* Copy the transitions in srcList to the outlist of dest. The srcList should + * not be the outList of dest, otherwise you would be copying the contents of + * srcList into itself as it's iterated: bad news. */ +void FsmGraph::outTransCopy( MergeData &md, FsmState *dest, FsmTrans *srcList ) +{ + /* The destination list. */ + TransList destList; + + /* Set up an iterator to stop at breaks. */ + PairIter outPair( dest->outList.head, srcList ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + case RangeInS1: { + /* The pair iter is the authority on the keys. It may have needed + * to break the dest range. */ + FsmTrans *destTrans = outPair.s1Tel.trans; + destTrans->lowKey = outPair.s1Tel.lowKey; + destTrans->highKey = outPair.s1Tel.highKey; + destList.append( destTrans ); + break; + } + case RangeInS2: { + /* Src range may get crossed with dest's default transition. */ + FsmTrans *newTrans = dupTrans( dest, outPair.s2Tel.trans ); + + /* Set up the transition's keys and append to the dest list. */ + newTrans->lowKey = outPair.s2Tel.lowKey; + newTrans->highKey = outPair.s2Tel.highKey; + destList.append( newTrans ); + break; + } + case RangeOverlap: { + /* Exact overlap, cross them. */ + FsmTrans *newTrans = crossTransitions( md, dest, + outPair.s1Tel.trans, outPair.s2Tel.trans ); + + /* Set up the transition's keys and append to the dest list. */ + newTrans->lowKey = outPair.s1Tel.lowKey; + newTrans->highKey = outPair.s1Tel.highKey; + destList.append( newTrans ); + break; + } + case BreakS1: { + /* Since we are always writing to the dest trans, the dest needs + * to be copied when it is broken. The copy goes into the first + * half of the break to "break it off". */ + outPair.s1Tel.trans = dupTrans( dest, outPair.s1Tel.trans ); + break; + } + case BreakS2: + break; + } + } + + /* Abandon the old outList and transfer destList into it. */ + dest->outList.transfer( destList ); +} + + +/* Move all the transitions that go into src so that they go into dest. */ +void FsmGraph::inTransMove( FsmState *dest, FsmState *src ) +{ + /* Do not try to move in trans to and from the same state. */ + assert( dest != src ); + + /* If src is the start state, dest becomes the start state. */ + if ( src == startState ) { + unsetStartState(); + setStartState( dest ); + } + + /* For each entry point into, create an entry point into dest, when the + * state is detached, the entry points to src will be removed. */ + for ( EntryIdSet::Iter enId = src->entryIds; enId.lte(); enId++ ) + changeEntry( *enId, dest, src ); + + /* Move the transitions in inList. */ + while ( src->inList.head != 0 ) { + /* Get trans and from state. */ + FsmTrans *trans = src->inList.head; + FsmState *fromState = trans->fromState; + + /* Detach from src, reattach to dest. */ + detachTrans( fromState, src, trans ); + attachTrans( fromState, dest, trans ); + } +} diff --git a/colm/fsmbase.cpp b/colm/fsmbase.cpp new file mode 100644 index 00000000..6db6e8da --- /dev/null +++ b/colm/fsmbase.cpp @@ -0,0 +1,602 @@ +/* + * Copyright 2001-2007 Adrian Thurston + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include "fsmgraph.h" + +/* Simple singly linked list append routine for the fill list. The new state + * goes to the end of the list. */ +void MergeData::fillListAppend( FsmState *state ) +{ + state->alg.next = 0; + + if ( stfillHead == 0 ) { + /* List is empty, state becomes head and tail. */ + stfillHead = state; + stfillTail = state; + } + else { + /* List is not empty, state goes after last element. */ + stfillTail->alg.next = state; + stfillTail = state; + } +} + +/* Graph constructor. */ +FsmGraph::FsmGraph() +: + /* No start state. */ + startState(0), + errState(0), + + /* Misfit accounting is a switch, turned on only at specific times. It + * controls what happens when states have no way in from the outside + * world.. */ + misfitAccounting(false), + + lmRequiresErrorState(false) +{ +} + +/* Copy all graph data including transitions. */ +FsmGraph::FsmGraph( const FsmGraph &graph ) +: + /* Lists start empty. Will be filled by copy. */ + stateList(), + misfitList(), + + /* Copy in the entry points, + * pointers will be resolved later. */ + entryPoints(graph.entryPoints), + startState(graph.startState), + errState(0), + + /* Will be filled by copy. */ + finStateSet(), + + /* Misfit accounting is only on during merging. */ + misfitAccounting(false), + + lmRequiresErrorState(graph.lmRequiresErrorState) +{ + /* Create the states and record their map in the original state. */ + StateList::Iter origState = graph.stateList; + for ( ; origState.lte(); origState++ ) { + /* Make the new state. */ + FsmState *newState = new FsmState( *origState ); + + /* Add the state to the list. */ + stateList.append( newState ); + + /* Set the mapsTo item of the old state. */ + origState->alg.stateMap = newState; + } + + /* Derefernce all the state maps. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + /* The points to the original in the src machine. The taget's duplicate + * is in the statemap. */ + FsmState *toState = trans->toState != 0 ? trans->toState->alg.stateMap : 0; + + /* Attach The transition to the duplicate. */ + trans->toState = 0; + attachTrans( state, toState, trans ); + } + } + + /* Fix the state pointers in the entry points array. */ + EntryMapEl *eel = entryPoints.data; + for ( int e = 0; e < entryPoints.length(); e++, eel++ ) { + /* Get the duplicate of the state. */ + eel->value = eel->value->alg.stateMap; + + /* Foreign in transitions must be built up when duping machines so + * increment it here. */ + eel->value->foreignInTrans += 1; + } + + /* Fix the start state pointer and the new start state's count of in + * transiions. */ + startState = startState->alg.stateMap; + startState->foreignInTrans += 1; + + /* Build the final state set. */ + StateSet::Iter st = graph.finStateSet; + for ( ; st.lte(); st++ ) + finStateSet.insert((*st)->alg.stateMap); +} + +/* Deletes all transition data then deletes each state. */ +FsmGraph::~FsmGraph() +{ + /* Delete all the transitions. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Iterate the out transitions, deleting them. */ + state->outList.empty(); + } + + /* Delete all the states. */ + stateList.empty(); +} + +/* Set a state final. The state has its isFinState set to true and the state + * is added to the finStateSet. */ +void FsmGraph::setFinState( FsmState *state ) +{ + /* Is it already a fin state. */ + if ( state->stateBits & SB_ISFINAL ) + return; + + state->stateBits |= SB_ISFINAL; + finStateSet.insert( state ); +} + +/* Set a state non-final. The has its isFinState flag set false and the state + * is removed from the final state set. */ +void FsmGraph::unsetFinState( FsmState *state ) +{ + /* Is it already a non-final state? */ + if ( ! (state->stateBits & SB_ISFINAL) ) + return; + + /* When a state looses its final state status it must relinquish all the + * properties that are allowed only for final states. */ + clearOutData( state ); + + state->stateBits &= ~ SB_ISFINAL; + finStateSet.remove( state ); +} + +/* Set and unset a state as the start state. */ +void FsmGraph::setStartState( FsmState *state ) +{ + /* Sould change from unset to set. */ + assert( startState == 0 ); + startState = state; + + if ( misfitAccounting ) { + /* If the number of foreign in transitions is about to go up to 1 then + * take it off the misfit list and put it on the head list. */ + if ( state->foreignInTrans == 0 ) + stateList.append( misfitList.detach( state ) ); + } + + /* Up the foreign in transitions to the state. */ + state->foreignInTrans += 1; +} + +void FsmGraph::unsetStartState() +{ + /* Should change from set to unset. */ + assert( startState != 0 ); + + /* Decrement the entry's count of foreign entries. */ + startState->foreignInTrans -= 1; + + if ( misfitAccounting ) { + /* If the number of foreign in transitions just went down to 0 then take + * it off the main list and put it on the misfit list. */ + if ( startState->foreignInTrans == 0 ) + misfitList.append( stateList.detach( startState ) ); + } + + startState = 0; +} + +/* Associate an id with a state. Makes the state a named entry point. Has no + * effect if the entry point is already mapped to the state. */ +void FsmGraph::setEntry( int id, FsmState *state ) +{ + /* Insert the id into the state. If the state is already labelled with id, + * nothing to do. */ + if ( state->entryIds.insert( id ) ) { + /* Insert the entry and assert that it succeeds. */ + entryPoints.insertMulti( id, state ); + + if ( misfitAccounting ) { + /* If the number of foreign in transitions is about to go up to 1 then + * take it off the misfit list and put it on the head list. */ + if ( state->foreignInTrans == 0 ) + stateList.append( misfitList.detach( state ) ); + } + + /* Up the foreign in transitions to the state. */ + state->foreignInTrans += 1; + } +} + +/* Remove the association of an id with a state. The state looses it's entry + * point status. Assumes that the id is indeed mapped to state. */ +void FsmGraph::unsetEntry( int id, FsmState *state ) +{ + /* Find the entry point in on id. */ + EntryMapEl *enLow = 0, *enHigh = 0; + entryPoints.findMulti( id, enLow, enHigh ); + while ( enLow->value != state ) + enLow += 1; + + /* Remove the record from the map. */ + entryPoints.remove( enLow ); + + /* Remove the state's sense of the link. */ + state->entryIds.remove( id ); + state->foreignInTrans -= 1; + if ( misfitAccounting ) { + /* If the number of foreign in transitions just went down to 0 then take + * it off the main list and put it on the misfit list. */ + if ( state->foreignInTrans == 0 ) + misfitList.append( stateList.detach( state ) ); + } +} + +/* Remove all association of an id with states. Assumes that the id is indeed + * mapped to a state. */ +void FsmGraph::unsetEntry( int id ) +{ + /* Find the entry point in on id. */ + EntryMapEl *enLow = 0, *enHigh = 0; + entryPoints.findMulti( id, enLow, enHigh ); + for ( EntryMapEl *mel = enLow; mel <= enHigh; mel++ ) { + /* Remove the state's sense of the link. */ + mel->value->entryIds.remove( id ); + mel->value->foreignInTrans -= 1; + if ( misfitAccounting ) { + /* If the number of foreign in transitions just went down to 0 + * then take it off the main list and put it on the misfit list. */ + if ( mel->value->foreignInTrans == 0 ) + misfitList.append( stateList.detach( mel->value ) ); + } + } + + /* Remove the records from the entry points map. */ + entryPoints.removeMulti( enLow, enHigh ); +} + + +void FsmGraph::changeEntry( int id, FsmState *to, FsmState *from ) +{ + /* Find the entry in the entry map. */ + EntryMapEl *enLow = 0, *enHigh = 0; + entryPoints.findMulti( id, enLow, enHigh ); + while ( enLow->value != from ) + enLow += 1; + + /* Change it to the new target. */ + enLow->value = to; + + /* Remove from's sense of the link. */ + from->entryIds.remove( id ); + from->foreignInTrans -= 1; + if ( misfitAccounting ) { + /* If the number of foreign in transitions just went down to 0 then take + * it off the main list and put it on the misfit list. */ + if ( from->foreignInTrans == 0 ) + misfitList.append( stateList.detach( from ) ); + } + + /* Add to's sense of the link. */ + if ( to->entryIds.insert( id ) != 0 ) { + if ( misfitAccounting ) { + /* If the number of foreign in transitions is about to go up to 1 then + * take it off the misfit list and put it on the head list. */ + if ( to->foreignInTrans == 0 ) + stateList.append( misfitList.detach( to ) ); + } + + /* Up the foreign in transitions to the state. */ + to->foreignInTrans += 1; + } +} + + +/* Clear all entry points from a machine. */ +void FsmGraph::unsetAllEntryPoints() +{ + for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) { + /* Kill all the state's entry points at once. */ + if ( en->value->entryIds.length() > 0 ) { + en->value->foreignInTrans -= en->value->entryIds.length(); + + if ( misfitAccounting ) { + /* If the number of foreign in transitions just went down to 0 + * then take it off the main list and put it on the misfit + * list. */ + if ( en->value->foreignInTrans == 0 ) + misfitList.append( stateList.detach( en->value ) ); + } + + /* Clear the set of ids out all at once. */ + en->value->entryIds.empty(); + } + } + + /* Now clear out the entry map all at once. */ + entryPoints.empty(); +} + +/* Assigning an epsilon transition into final states. */ +void FsmGraph::epsilonTrans( int id ) +{ + for ( StateSet::Iter fs = finStateSet; fs.lte(); fs++ ) + (*fs)->epsilonTrans.append( id ); +} + +/* Mark all states reachable from state. Traverses transitions forward. Used + * for removing states that have no path into them. */ +void FsmGraph::markReachableFromHere( FsmState *state ) +{ + /* Base case: return; */ + if ( state->stateBits & SB_ISMARKED ) + return; + + /* Set this state as processed. We are going to visit all states that this + * state has a transition to. */ + state->stateBits |= SB_ISMARKED; + + /* Recurse on all out transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->toState != 0 ) + markReachableFromHere( trans->toState ); + } +} + +void FsmGraph::markReachableFromHereStopFinal( FsmState *state ) +{ + /* Base case: return; */ + if ( state->stateBits & SB_ISMARKED ) + return; + + /* Set this state as processed. We are going to visit all states that this + * state has a transition to. */ + state->stateBits |= SB_ISMARKED; + + /* Recurse on all out transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + FsmState *toState = trans->toState; + if ( toState != 0 && !toState->isFinState() ) + markReachableFromHereStopFinal( toState ); + } +} + +/* Mark all states reachable from state. Traverse transitions backwards. Used + * for removing dead end paths in graphs. */ +void FsmGraph::markReachableFromHereReverse( FsmState *state ) +{ + /* Base case: return; */ + if ( state->stateBits & SB_ISMARKED ) + return; + + /* Set this state as processed. We are going to visit all states with + * transitions into this state. */ + state->stateBits |= SB_ISMARKED; + + /* Recurse on all items in transitions. */ + for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ ) + markReachableFromHereReverse( trans->fromState ); +} + +/* Determine if there are any entry points into a start state other than the + * start state. Setting starting transitions requires that the start state be + * isolated. In most cases a start state will already be isolated. */ +bool FsmGraph::isStartStateIsolated() +{ + /* If there are any in transitions then the state is not isolated. */ + if ( startState->inList.head != 0 ) + return false; + + /* If there are any entry points then isolated. */ + if ( startState->entryIds.length() > 0 ) + return false; + + return true; +} + +/* Bring in other's entry points. Assumes others states are going to be + * copied into this machine. */ +void FsmGraph::copyInEntryPoints( FsmGraph *other ) +{ + /* Use insert multi because names are not unique. */ + for ( EntryMap::Iter en = other->entryPoints; en.lte(); en++ ) + entryPoints.insertMulti( en->key, en->value ); +} + + +void FsmGraph::unsetAllFinStates() +{ + for ( StateSet::Iter st = finStateSet; st.lte(); st++ ) + (*st)->stateBits &= ~ SB_ISFINAL; + finStateSet.empty(); +} + +void FsmGraph::setFinBits( int finStateBits ) +{ + for ( int s = 0; s < finStateSet.length(); s++ ) + finStateSet.data[s]->stateBits |= finStateBits; +} + + +/* Tests the integrity of the transition lists and the fromStates. */ +void FsmGraph::verifyIntegrity() +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Walk the out transitions and assert fromState is correct. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) + assert( trans->fromState == state ); + + /* Walk the inlist and assert toState is correct. */ + for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ ) + assert( trans->toState == state ); + } +} + +void FsmGraph::verifyReachability() +{ + /* Mark all the states that can be reached + * through the set of entry points. */ + markReachableFromHere( startState ); + for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) + markReachableFromHere( en->value ); + + /* Check that everything got marked. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + /* Assert it got marked and then clear the mark. */ + assert( st->stateBits & SB_ISMARKED ); + st->stateBits &= ~ SB_ISMARKED; + } +} + +void FsmGraph::verifyNoDeadEndStates() +{ + /* Mark all states that have paths to the final states. */ + for ( StateSet::Iter pst = finStateSet; pst.lte(); pst++ ) + markReachableFromHereReverse( *pst ); + + /* Start state gets honorary marking. Must be done AFTER recursive call. */ + startState->stateBits |= SB_ISMARKED; + + /* Make sure everything got marked. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + /* Assert the state got marked and unmark it. */ + assert( st->stateBits & SB_ISMARKED ); + st->stateBits &= ~ SB_ISMARKED; + } +} + +void FsmGraph::depthFirstOrdering( FsmState *state ) +{ + /* Nothing to do if the state is already on the list. */ + if ( state->stateBits & SB_ONLIST ) + return; + + /* Doing depth first, put state on the list. */ + state->stateBits |= SB_ONLIST; + stateList.append( state ); + + /* Recurse on everything ranges. */ + for ( TransList::Iter tel = state->outList; tel.lte(); tel++ ) { + if ( tel->toState != 0 ) + depthFirstOrdering( tel->toState ); + } +} + +/* Ordering states by transition connections. */ +void FsmGraph::depthFirstOrdering() +{ + /* Init on state list flags. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) + st->stateBits &= ~SB_ONLIST; + + /* Clear out the state list, we will rebuild it. */ + int stateListLen = stateList.length(); + stateList.abandon(); + + /* Add back to the state list from the start state and all other entry + * points. */ + if ( errState != 0 ) + depthFirstOrdering( errState ); + depthFirstOrdering( startState ); + for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) + depthFirstOrdering( en->value ); + + /* Make sure we put everything back on. */ + assert( stateListLen == stateList.length() ); +} + +/* Stable sort the states by final state status. */ +void FsmGraph::sortStatesByFinal() +{ + /* Move forward through the list and throw final states onto the end. */ + FsmState *state = 0; + FsmState *next = stateList.head; + FsmState *last = stateList.tail; + while ( state != last ) { + /* Move forward and load up the next. */ + state = next; + next = state->next; + + /* Throw to the end? */ + if ( state->isFinState() ) { + stateList.detach( state ); + stateList.append( state ); + } + } +} + +void FsmGraph::setStateNumbers( int base ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + state->alg.stateNum = base++; +} + + +bool FsmGraph::checkErrTrans( FsmState *state, FsmTrans *trans ) +{ + /* Might go directly to error state. */ + if ( trans->toState == 0 ) + return true; + + if ( trans->prev == 0 ) { + /* If this is the first transition. */ + if ( keyOps->minKey < trans->lowKey ) + return true; + } + else { + /* Not the first transition. Compare against the prev. */ + FsmTrans *prev = trans->prev; + Key nextKey = prev->highKey; + nextKey.increment(); + if ( nextKey < trans->lowKey ) + return true; + } + return false; +} + +bool FsmGraph::checkErrTransFinish( FsmState *state ) +{ + /* Check if there are any ranges already. */ + if ( state->outList.length() == 0 ) + return true; + else { + /* Get the last and check for a gap on the end. */ + FsmTrans *last = state->outList.tail; + if ( last->highKey < keyOps->maxKey ) + return true; + } + return 0; +} + +bool FsmGraph::hasErrorTrans() +{ + bool result; + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + for ( TransList::Iter tr = st->outList; tr.lte(); tr++ ) { + result = checkErrTrans( st, tr ); + if ( result ) + return true; + } + result = checkErrTransFinish( st ); + if ( result ) + return true; + } + return false; +} diff --git a/colm/fsmcodegen.cpp b/colm/fsmcodegen.cpp new file mode 100644 index 00000000..025c6dce --- /dev/null +++ b/colm/fsmcodegen.cpp @@ -0,0 +1,1089 @@ +/* + * Copyright 2001-2006 Adrian Thurston + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "parsedata.h" +#include "fsmcodegen.h" +#include "redfsm.h" +#include "dotgen.h" +#include "bstmap.h" +#include "fsmrun.h" +#include +#include +#include + + +using std::ostream; +using std::ostringstream; +using std::string; +using std::cerr; +using std::endl; + + +/* Init code gen with in parameters. */ +FsmCodeGen::FsmCodeGen( const char *sourceFileName, const char *fsmName, ostream &out, + RedFsm *redFsm, FsmTables *fsmTables ) +: + sourceFileName(sourceFileName), + fsmName(fsmName), + out(out), + redFsm(redFsm), + fsmTables(fsmTables), + codeGenErrCount(0), + dataPrefix(true), + writeFirstFinal(true), + writeErr(true) +{ +} + +unsigned int FsmCodeGen::arrayTypeSize( unsigned long maxVal ) +{ + long long maxValLL = (long long) maxVal; + HostType *arrayType = keyOps->typeSubsumes( maxValLL ); + assert( arrayType != 0 ); + return arrayType->size; +} + +string FsmCodeGen::ARRAY_TYPE( unsigned long maxVal ) +{ + long long maxValLL = (long long) maxVal; + HostType *arrayType = keyOps->typeSubsumes( maxValLL ); + assert( arrayType != 0 ); + + string ret = arrayType->data1; + if ( arrayType->data2 != 0 ) { + ret += " "; + ret += arrayType->data2; + } + return ret; +} + + +/* Write out the fsm name. */ +string FsmCodeGen::FSM_NAME() +{ + return fsmName; +} + +/* Emit the offset of the start state as a decimal integer. */ +string FsmCodeGen::START_STATE_ID() +{ + ostringstream ret; + ret << redFsm->startState->id; + return ret.str(); +}; + +/* Write out the array of actions. */ +std::ostream &FsmCodeGen::ACTIONS_ARRAY() +{ + out << "\t0, "; + int totalActions = 1; + for ( GenActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) { + /* Write out the length, which will never be the last character. */ + out << act->key.length() << ", "; + /* Put in a line break every 8 */ + if ( totalActions++ % 8 == 7 ) + out << "\n\t"; + + for ( GenActionTable::Iter item = act->key; item.lte(); item++ ) { + out << item->value->actionId; + if ( ! (act.last() && item.last()) ) + out << ", "; + + /* Put in a line break every 8 */ + if ( totalActions++ % 8 == 7 ) + out << "\n\t"; + } + } + out << "\n"; + return out; +} + + +string FsmCodeGen::CS() +{ + ostringstream ret; + /* Expression for retrieving the key, use simple dereference. */ + ret << ACCESS() << "cs"; + return ret.str(); +} + +string FsmCodeGen::GET_WIDE_KEY() +{ + if ( redFsm->anyConditions() ) + return "_widec"; + else + return GET_KEY(); +} + +string FsmCodeGen::GET_WIDE_KEY( RedState *state ) +{ + if ( state->stateCondList.length() > 0 ) + return "_widec"; + else + return GET_KEY(); +} + +string FsmCodeGen::GET_KEY() +{ + ostringstream ret; + /* Expression for retrieving the key, use simple dereference. */ + ret << "(*" << P() << ")"; + return ret.str(); +} + +/* Write out level number of tabs. Makes the nested binary search nice + * looking. */ +string FsmCodeGen::TABS( int level ) +{ + string result; + while ( level-- > 0 ) + result += "\t"; + return result; +} + +/* Write out a key from the fsm code gen. Depends on wether or not the key is + * signed. */ +string FsmCodeGen::KEY( Key key ) +{ + ostringstream ret; + if ( keyOps->isSigned || !hostLang->explicitUnsigned ) + ret << key.getVal(); + else + ret << (unsigned long) key.getVal() << 'u'; + return ret.str(); +} + +void FsmCodeGen::SET_ACT( ostream &ret, InlineItem *item ) +{ + ret << ACT() << " = " << item->longestMatchPart->longestMatchId << ";"; +} + +void FsmCodeGen::SET_TOKEND( ostream &ret, InlineItem *item ) +{ + /* The tokend action sets tokend. */ + ret << TOKEND() << " = " << P() << "+1;"; +} +void FsmCodeGen::INIT_TOKSTART( ostream &ret, InlineItem *item ) +{ + ret << TOKSTART() << " = 0;"; +} + +void FsmCodeGen::INIT_ACT( ostream &ret, InlineItem *item ) +{ + ret << ACT() << " = 0;"; +} + +void FsmCodeGen::SET_TOKSTART( ostream &ret, InlineItem *item ) +{ + ret << TOKSTART() << " = " << P() << ";"; +} + +void FsmCodeGen::EMIT_TOKEN( ostream &ret, KlangEl *token ) +{ + if ( token->ignore ) + ret << " sendIgnore( " << token->id << " );\n"; + else + ret << " sendToken( " << token->id << " );\n"; +} + +void FsmCodeGen::LM_SWITCH( ostream &ret, InlineItem *item, + int targState, int inFinish ) +{ + ret << + " " << P() << " = " << TOKEND() << ";\n" + " switch( " << ACT() << " ) {\n"; + + /* If the switch handles error then we also forced the error state. It + * will exist. */ + if ( item->tokenRegion->lmSwitchHandlesError ) { + ret << " case 0: " << P() << " = " << TOKSTART() << + "; goto st" << redFsm->errState->id << ";\n"; + } + + for ( TokenDefList::Iter lmi = item->tokenRegion->tokenDefList; lmi.lte(); lmi++ ) { + if ( lmi->inLmSelect ) { + assert( lmi->token != 0 ); + ret << " case " << lmi->longestMatchId << ":\n"; + EMIT_TOKEN( ret, lmi->token ); + ret << " break;\n"; + } + } + + ret << + " }\n" + "\t" + " goto _resume;\n"; +} + +void FsmCodeGen::LM_ON_LAST( ostream &ret, InlineItem *item ) +{ + assert( item->longestMatchPart->token != 0 ); + + ret << " " << P() << " += 1;\n"; + EMIT_TOKEN( ret, item->longestMatchPart->token ); + ret << " goto _resume;\n"; +} + +void FsmCodeGen::LM_ON_NEXT( ostream &ret, InlineItem *item ) +{ + assert( item->longestMatchPart->token != 0 ); + + EMIT_TOKEN( ret, item->longestMatchPart->token ); + ret << " goto _resume;\n"; +} + +void FsmCodeGen::LM_ON_LAG_BEHIND( ostream &ret, InlineItem *item ) +{ + assert( item->longestMatchPart->token != 0 ); + + ret << " " << P() << " = " << TOKEND() << ";\n"; + EMIT_TOKEN( ret, item->longestMatchPart->token ); + ret << " goto _resume;\n"; +} + + +/* Write out an inline tree structure. Walks the list and possibly calls out + * to virtual functions than handle language specific items in the tree. */ +void FsmCodeGen::INLINE_LIST( ostream &ret, InlineList *inlineList, + int targState, bool inFinish ) +{ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + switch ( item->type ) { + case InlineItem::Text: + assert( false ); + break; + case InlineItem::LmSetActId: + SET_ACT( ret, item ); + break; + case InlineItem::LmSetTokEnd: + SET_TOKEND( ret, item ); + break; + case InlineItem::LmInitTokStart: + assert( false ); + break; + case InlineItem::LmInitAct: + INIT_ACT( ret, item ); + break; + case InlineItem::LmSetTokStart: + SET_TOKSTART( ret, item ); + break; + case InlineItem::LmSwitch: + LM_SWITCH( ret, item, targState, inFinish ); + break; + case InlineItem::LmOnLast: + LM_ON_LAST( ret, item ); + break; + case InlineItem::LmOnNext: + LM_ON_NEXT( ret, item ); + break; + case InlineItem::LmOnLagBehind: + LM_ON_LAG_BEHIND( ret, item ); + break; + } + } +} + +/* Write out paths in line directives. Escapes any special characters. */ +string FsmCodeGen::LDIR_PATH( char *path ) +{ + ostringstream ret; + for ( char *pc = path; *pc != 0; pc++ ) { + if ( *pc == '\\' ) + ret << "\\\\"; + else + ret << *pc; + } + return ret.str(); +} + +void FsmCodeGen::ACTION( ostream &ret, GenAction *action, int targState, bool inFinish ) +{ + /* Write the block and close it off. */ + ret << "\t{"; + INLINE_LIST( ret, action->inlineList, targState, inFinish ); + + if ( action->objField ) { + ObjField *field = action->objField; + if ( action->markType == MarkEnter ) + ret << "mark_enter[" << field->offset << "] = " << P() << ";\n"; + else if ( action->markType == MarkLeave ) + ret << "mark_leave[" << field->offset << "] = " << P() << ";\n"; + } + + ret << "}\n"; + +} + +void FsmCodeGen::CONDITION( ostream &ret, GenAction *condition ) +{ + ret << "\n"; + INLINE_LIST( ret, condition->inlineList, 0, false ); +} + +string FsmCodeGen::ERROR_STATE() +{ + ostringstream ret; + if ( redFsm->errState != 0 ) + ret << redFsm->errState->id; + else + ret << "-1"; + return ret.str(); +} + +string FsmCodeGen::FIRST_FINAL_STATE() +{ + ostringstream ret; + if ( redFsm->firstFinState != 0 ) + ret << redFsm->firstFinState->id; + else + ret << redFsm->nextStateId; + return ret.str(); +} + +string FsmCodeGen::DATA_PREFIX() +{ + if ( dataPrefix ) + return FSM_NAME() + "_"; + return ""; +} + +/* Emit the alphabet data type. */ +string FsmCodeGen::ALPH_TYPE() +{ + string ret = keyOps->alphType->data1; + if ( keyOps->alphType->data2 != 0 ) { + ret += " "; + ret += + keyOps->alphType->data2; + } + return ret; +} + +/* Emit the alphabet data type. */ +string FsmCodeGen::WIDE_ALPH_TYPE() +{ + string ret; + if ( redFsm->maxKey <= keyOps->maxKey ) + ret = ALPH_TYPE(); + else { + long long maxKeyVal = redFsm->maxKey.getLongLong(); + HostType *wideType = keyOps->typeSubsumes( keyOps->isSigned, maxKeyVal ); + assert( wideType != 0 ); + + ret = wideType->data1; + if ( wideType->data2 != 0 ) { + ret += " "; + ret += wideType->data2; + } + } + return ret; +} + + +string FsmCodeGen::PTR_CONST() +{ + return "const "; +} + +std::ostream &FsmCodeGen::OPEN_ARRAY( string type, string name ) +{ + out << "static const " << type << " " << name << "[] = {\n"; + return out; +} + +std::ostream &FsmCodeGen::CLOSE_ARRAY() +{ + return out << "};\n"; +} + +std::ostream &FsmCodeGen::STATIC_VAR( string type, string name ) +{ + out << "static const " << type << " " << name; + return out; +} + +string FsmCodeGen::UINT( ) +{ + return "unsigned int"; +} + +string FsmCodeGen::ARR_OFF( string ptr, string offset ) +{ + return ptr + " + " + offset; +} + +string FsmCodeGen::CAST( string type ) +{ + return "(" + type + ")"; +} + +std::ostream &FsmCodeGen::TO_STATE_ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( GenActionList::Iter act = redFsm->actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numToStateRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\tcase " << act->actionId << ":\n"; + ACTION( out, act, 0, false ); + out << "\tbreak;\n"; + } + } + + return out; +} + +std::ostream &FsmCodeGen::FROM_STATE_ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( GenActionList::Iter act = redFsm->actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numFromStateRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\tcase " << act->actionId << ":\n"; + ACTION( out, act, 0, false ); + out << "\tbreak;\n"; + } + } + + return out; +} + +std::ostream &FsmCodeGen::ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( GenActionList::Iter act = redFsm->actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numTransRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\tcase " << act->actionId << ":\n"; + ACTION( out, act, 0, false ); + out << "\tbreak;\n"; + } + } + + return out; +} + +void FsmCodeGen::emitSingleSwitch( RedState *state ) +{ + /* Load up the singles. */ + int numSingles = state->outSingle.length(); + RedTransEl *data = state->outSingle.data; + + if ( numSingles == 1 ) { + /* If there is a single single key then write it out as an if. */ + out << "\tif ( " << GET_WIDE_KEY(state) << " == " << + KEY(data[0].lowKey) << " )\n\t\t"; + + /* Virtual function for writing the target of the transition. */ + TRANS_GOTO(data[0].value, 0) << "\n"; + } + else if ( numSingles > 1 ) { + /* Write out single keys in a switch if there is more than one. */ + out << "\tswitch( " << GET_WIDE_KEY(state) << " ) {\n"; + + /* Write out the single indicies. */ + for ( int j = 0; j < numSingles; j++ ) { + out << "\t\tcase " << KEY(data[j].lowKey) << ": "; + TRANS_GOTO(data[j].value, 0) << "\n"; + } + + /* Close off the transition switch. */ + out << "\t}\n"; + } +} + +void FsmCodeGen::emitRangeBSearch( RedState *state, int level, int low, int high ) +{ + /* Get the mid position, staying on the lower end of the range. */ + int mid = (low + high) >> 1; + RedTransEl *data = state->outRange.data; + + /* Determine if we need to look higher or lower. */ + bool anyLower = mid > low; + bool anyHigher = mid < high; + + /* Determine if the keys at mid are the limits of the alphabet. */ + bool limitLow = data[mid].lowKey == keyOps->minKey; + bool limitHigh = data[mid].highKey == keyOps->maxKey; + + if ( anyLower && anyHigher ) { + /* Can go lower and higher than mid. */ + out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " < " << + KEY(data[mid].lowKey) << " ) {\n"; + emitRangeBSearch( state, level+1, low, mid-1 ); + out << TABS(level) << "} else if ( " << GET_WIDE_KEY(state) << " > " << + KEY(data[mid].highKey) << " ) {\n"; + emitRangeBSearch( state, level+1, mid+1, high ); + out << TABS(level) << "} else\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + else if ( anyLower && !anyHigher ) { + /* Can go lower than mid but not higher. */ + out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " < " << + KEY(data[mid].lowKey) << " ) {\n"; + emitRangeBSearch( state, level+1, low, mid-1 ); + + /* if the higher is the highest in the alphabet then there is no + * sense testing it. */ + if ( limitHigh ) { + out << TABS(level) << "} else\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + else { + out << TABS(level) << "} else if ( " << GET_WIDE_KEY(state) << " <= " << + KEY(data[mid].highKey) << " )\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + } + else if ( !anyLower && anyHigher ) { + /* Can go higher than mid but not lower. */ + out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " > " << + KEY(data[mid].highKey) << " ) {\n"; + emitRangeBSearch( state, level+1, mid+1, high ); + + /* If the lower end is the lowest in the alphabet then there is no + * sense testing it. */ + if ( limitLow ) { + out << TABS(level) << "} else\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + else { + out << TABS(level) << "} else if ( " << GET_WIDE_KEY(state) << " >= " << + KEY(data[mid].lowKey) << " )\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + } + else { + /* Cannot go higher or lower than mid. It's mid or bust. What + * tests to do depends on limits of alphabet. */ + if ( !limitLow && !limitHigh ) { + out << TABS(level) << "if ( " << KEY(data[mid].lowKey) << " <= " << + GET_WIDE_KEY(state) << " && " << GET_WIDE_KEY(state) << " <= " << + KEY(data[mid].highKey) << " )\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + else if ( limitLow && !limitHigh ) { + out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " <= " << + KEY(data[mid].highKey) << " )\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + else if ( !limitLow && limitHigh ) { + out << TABS(level) << "if ( " << KEY(data[mid].lowKey) << " <= " << + GET_WIDE_KEY(state) << " )\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + else { + /* Both high and low are at the limit. No tests to do. */ + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + } +} + +void FsmCodeGen::COND_TRANSLATE( GenStateCond *stateCond, int level ) +{ + GenCondSpace *condSpace = stateCond->condSpace; + out << TABS(level) << "_widec = " << CAST(WIDE_ALPH_TYPE()) << "(" << + KEY(condSpace->baseKey) << " + (" << GET_KEY() << + " - " << KEY(keyOps->minKey) << "));\n"; + + for ( GenCondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) { + out << TABS(level) << "if ( "; + CONDITION( out, *csi ); + Size condValOffset = ((1 << csi.pos()) * keyOps->alphSize()); + out << " ) _widec += " << condValOffset << ";\n"; + } +} + +void FsmCodeGen::emitCondBSearch( RedState *state, int level, int low, int high ) +{ + /* Get the mid position, staying on the lower end of the range. */ + int mid = (low + high) >> 1; + GenStateCond **data = state->stateCondVect.data; + + /* Determine if we need to look higher or lower. */ + bool anyLower = mid > low; + bool anyHigher = mid < high; + + /* Determine if the keys at mid are the limits of the alphabet. */ + bool limitLow = data[mid]->lowKey == keyOps->minKey; + bool limitHigh = data[mid]->highKey == keyOps->maxKey; + + if ( anyLower && anyHigher ) { + /* Can go lower and higher than mid. */ + out << TABS(level) << "if ( " << GET_KEY() << " < " << + KEY(data[mid]->lowKey) << " ) {\n"; + emitCondBSearch( state, level+1, low, mid-1 ); + out << TABS(level) << "} else if ( " << GET_KEY() << " > " << + KEY(data[mid]->highKey) << " ) {\n"; + emitCondBSearch( state, level+1, mid+1, high ); + out << TABS(level) << "} else {\n"; + COND_TRANSLATE(data[mid], level+1); + out << TABS(level) << "}\n"; + } + else if ( anyLower && !anyHigher ) { + /* Can go lower than mid but not higher. */ + out << TABS(level) << "if ( " << GET_KEY() << " < " << + KEY(data[mid]->lowKey) << " ) {\n"; + emitCondBSearch( state, level+1, low, mid-1 ); + + /* if the higher is the highest in the alphabet then there is no + * sense testing it. */ + if ( limitHigh ) { + out << TABS(level) << "} else {\n"; + COND_TRANSLATE(data[mid], level+1); + out << TABS(level) << "}\n"; + } + else { + out << TABS(level) << "} else if ( " << GET_KEY() << " <= " << + KEY(data[mid]->highKey) << " ) {\n"; + COND_TRANSLATE(data[mid], level+1); + out << TABS(level) << "}\n"; + } + } + else if ( !anyLower && anyHigher ) { + /* Can go higher than mid but not lower. */ + out << TABS(level) << "if ( " << GET_KEY() << " > " << + KEY(data[mid]->highKey) << " ) {\n"; + emitCondBSearch( state, level+1, mid+1, high ); + + /* If the lower end is the lowest in the alphabet then there is no + * sense testing it. */ + if ( limitLow ) { + out << TABS(level) << "} else {\n"; + COND_TRANSLATE(data[mid], level+1); + out << TABS(level) << "}\n"; + } + else { + out << TABS(level) << "} else if ( " << GET_KEY() << " >= " << + KEY(data[mid]->lowKey) << " ) {\n"; + COND_TRANSLATE(data[mid], level+1); + out << TABS(level) << "}\n"; + } + } + else { + /* Cannot go higher or lower than mid. It's mid or bust. What + * tests to do depends on limits of alphabet. */ + if ( !limitLow && !limitHigh ) { + out << TABS(level) << "if ( " << KEY(data[mid]->lowKey) << " <= " << + GET_KEY() << " && " << GET_KEY() << " <= " << + KEY(data[mid]->highKey) << " ) {\n"; + COND_TRANSLATE(data[mid], level+1); + out << TABS(level) << "}\n"; + } + else if ( limitLow && !limitHigh ) { + out << TABS(level) << "if ( " << GET_KEY() << " <= " << + KEY(data[mid]->highKey) << " ) {\n"; + COND_TRANSLATE(data[mid], level+1); + out << TABS(level) << "}\n"; + } + else if ( !limitLow && limitHigh ) { + out << TABS(level) << "if ( " << KEY(data[mid]->lowKey) << " <= " << + GET_KEY() << " )\n {"; + COND_TRANSLATE(data[mid], level+1); + out << TABS(level) << "}\n"; + } + else { + /* Both high and low are at the limit. No tests to do. */ + COND_TRANSLATE(data[mid], level); + } + } +} + +std::ostream &FsmCodeGen::STATE_GOTOS() +{ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st == redFsm->errState ) + STATE_GOTO_ERROR(); + else { + /* Writing code above state gotos. */ + GOTO_HEADER( st ); + + if ( st->stateCondVect.length() > 0 ) { + out << " _widec = " << GET_KEY() << ";\n"; + emitCondBSearch( st, 1, 0, st->stateCondVect.length() - 1 ); + } + + /* Try singles. */ + if ( st->outSingle.length() > 0 ) + emitSingleSwitch( st ); + + /* Default case is to binary search for the ranges, if that fails then */ + if ( st->outRange.length() > 0 ) + emitRangeBSearch( st, 1, 0, st->outRange.length() - 1 ); + + /* Write the default transition. */ + TRANS_GOTO( st->defTrans, 1 ) << "\n"; + } + } + return out; +} + +unsigned int FsmCodeGen::TO_STATE_ACTION( RedState *state ) +{ + int act = 0; + if ( state->toStateAction != 0 ) + act = state->toStateAction->location+1; + return act; +} + +unsigned int FsmCodeGen::FROM_STATE_ACTION( RedState *state ) +{ + int act = 0; + if ( state->fromStateAction != 0 ) + act = state->fromStateAction->location+1; + return act; +} + +std::ostream &FsmCodeGen::TO_STATE_ACTIONS() +{ + /* Take one off for the psuedo start state. */ + int numStates = redFsm->stateList.length(); + unsigned int *vals = new unsigned int[numStates]; + memset( vals, 0, sizeof(unsigned int)*numStates ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + vals[st->id] = TO_STATE_ACTION(st); + + out << "\t"; + for ( int st = 0; st < redFsm->nextStateId; st++ ) { + /* Write any eof action. */ + out << vals[st]; + if ( st < numStates-1 ) { + out << ", "; + if ( (st+1) % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + delete[] vals; + return out; +} + +std::ostream &FsmCodeGen::FROM_STATE_ACTIONS() +{ + /* Take one off for the psuedo start state. */ + int numStates = redFsm->stateList.length(); + unsigned int *vals = new unsigned int[numStates]; + memset( vals, 0, sizeof(unsigned int)*numStates ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + vals[st->id] = FROM_STATE_ACTION(st); + + out << "\t"; + for ( int st = 0; st < redFsm->nextStateId; st++ ) { + /* Write any eof action. */ + out << vals[st]; + if ( st < numStates-1 ) { + out << ", "; + if ( (st+1) % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + delete[] vals; + return out; +} + +bool FsmCodeGen::IN_TRANS_ACTIONS( RedState *state ) +{ + /* Emit any transitions that have actions and that go to this state. */ + for ( int it = 0; it < state->numInTrans; it++ ) { + RedTrans *trans = state->inTrans[it]; + if ( trans->action != 0 && trans->labelNeeded ) { + /* Write the label for the transition so it can be jumped to. */ + out << "tr" << trans->id << ":\n"; + + /* If the action contains a next, then we must preload the current + * state since the action may or may not set it. */ + if ( trans->action->anyNextStmt() ) + out << " " << CS() << " = " << trans->targ->id << ";\n"; + + /* Write each action in the list. */ + for ( GenActionTable::Iter item = trans->action->key; item.lte(); item++ ) + ACTION( out, item->value, trans->targ->id, false ); + + out << "\tgoto st" << trans->targ->id << ";\n"; + } + } + + return 0; +} + +/* Called from FsmCodeGen::STATE_GOTOS just before writing the gotos for each + * state. */ +void FsmCodeGen::GOTO_HEADER( RedState *state ) +{ + IN_TRANS_ACTIONS( state ); + + if ( state->labelNeeded ) + out << "st" << state->id << ":\n"; + + if ( state->toStateAction != 0 ) { + /* Remember that we wrote an action. Write every action in the list. */ + for ( GenActionTable::Iter item = state->toStateAction->key; item.lte(); item++ ) + ACTION( out, item->value, state->id, false ); + } + + /* Give the state a switch case. */ + out << "case " << state->id << ":\n"; + + /* Advance and test buffer pos. */ + out << + " if ( ++" << P() << " == " << PE() << " )\n" + " goto out" << state->id << ";\n"; + + if ( state->fromStateAction != 0 ) { + /* Remember that we wrote an action. Write every action in the list. */ + for ( GenActionTable::Iter item = state->fromStateAction->key; item.lte(); item++ ) + ACTION( out, item->value, state->id, false ); + } + + /* Record the prev state if necessary. */ + if ( state->anyRegCurStateRef() ) + out << " _ps = " << state->id << ";\n"; +} + +void FsmCodeGen::STATE_GOTO_ERROR() +{ + /* In the error state we need to emit some stuff that usually goes into + * the header. */ + RedState *state = redFsm->errState; + IN_TRANS_ACTIONS( state ); + + if ( state->labelNeeded ) + out << "st" << state->id << ":\n"; + + /* We do not need a case label here because the the error state is checked + * at the head of the loop. */ + + /* Break out here. */ + out << " goto out" << state->id << ";\n"; +} + + +/* Emit the goto to take for a given transition. */ +std::ostream &FsmCodeGen::TRANS_GOTO( RedTrans *trans, int level ) +{ + if ( trans->action != 0 ) { + /* Go to the transition which will go to the state. */ + out << TABS(level) << "goto tr" << trans->id << ";"; + } + else { + /* Go directly to the target state. */ + out << TABS(level) << "goto st" << trans->targ->id << ";"; + } + return out; +} + +std::ostream &FsmCodeGen::EXIT_STATES() +{ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + out << " case " << st->id << ": out" << st->id << ": "; + if ( st->eofTrans != 0 ) { + out << "if ( " << PE() << " == " << PEOF() << " ) {"; + TRANS_GOTO( st->eofTrans, 0 ); + out << "\n"; + out << "}"; + } + + /* Exit. */ + out << CS() << " = " << st->id << "; goto out; \n"; + } + return out; +} + +/* Set up labelNeeded flag for each state. */ +void FsmCodeGen::setLabelsNeeded() +{ + /* Do not use all labels by default, init all labelNeeded vars to false. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + st->labelNeeded = false; + + if ( redFsm->errState != 0 && redFsm->anyLmSwitchError() ) + redFsm->errState->labelNeeded = true; + + /* Walk all transitions and set only those that have targs. */ + for ( RedTransSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) { + /* If there is no action with a next statement, then the label will be + * needed. */ + if ( trans->action == 0 || !trans->action->anyNextStmt() ) + trans->targ->labelNeeded = true; + } + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + st->outNeeded = st->labelNeeded; +} + +void FsmCodeGen::writeData() +{ + out << "static const int " << START() << " = " << START_STATE_ID() << ";\n"; + out << "static const int " << FIRST_FINAL() << " = " << FIRST_FINAL_STATE() << ";\n"; + out << "static const int " << ERROR() << " = " << ERROR_STATE() << ";\n"; + out << "\n"; + + out << "long " << entryByRegion() << "[] = {\n\t"; + for ( int i = 0; i < fsmTables->numRegions; i++ ) { + out << fsmTables->entryByRegion[i]; + + if ( i < fsmTables->numRegions-1 ) { + out << ", "; + if ( (i+1) % 8 == 0 ) + out << "\n\t"; + } + } + out << "\n};\n\n"; + + out << + "FsmTables fsmTables_start =\n" + "{\n" + " 0, " /* actions */ + " 0, " /* keyOffsets */ + " 0, " /* transKeys */ + " 0, " /* singleLengths */ + " 0, " /* rangeLengths */ + " 0, " /* indexOffsets */ + " 0, " /* transTargsWI */ + " 0, " /* transActionsWI */ + " 0, " /* toStateActions */ + " 0, " /* fromStateActions */ + " 0, " /* eofActions */ + " 0,\n" /* eofTargs */ + " " << entryByRegion() << ",\n" + + "\n" + " 0, " /* numStates */ + " 0, " /* numActions */ + " 0, " /* numTransKeys */ + " 0, " /* numSingleLengths */ + " 0, " /* numRangeLengths */ + " 0, " /* numIndexOffsets */ + " 0, " /* numTransTargsWI */ + " 0,\n" /* numTransActionsWI */ + " " << redFsm->regionToEntry.length() << ",\n" + "\n" + " " << START() << ",\n" + " " << FIRST_FINAL() << ",\n" + " " << ERROR() << ",\n" + "\n" + " 0,\n" /* actionSwitch */ + " 0\n" /* numActionSwitch */ + "};\n" + "\n"; +} + +void FsmCodeGen::writeInit() +{ + out << + " " << CS() << " = " << START() << ";\n"; + + /* If there are any calls, then the stack top needs initialization. */ + if ( redFsm->anyActionCalls() || redFsm->anyActionRets() ) + out << "\t" << TOP() << " = 0;\n"; + + out << + " " << TOKSTART() << " = 0;\n" + " " << TOKEND() << " = 0;\n" + " " << ACT() << " = 0;\n"; + + out << "\n"; +} + +void FsmCodeGen::writeExec() +{ + setLabelsNeeded(); + + out << + "void FsmRun::execute()\n" + "{\n" + "_resume:\n"; + + if ( redFsm->errState != 0 ) { + out << + " if ( " << CS() << " == " << redFsm->errState->id << " )\n" + " goto out;\n"; + } + + out << + " if ( p == pe )\n" + " goto out_switch;\n" + " --" << P() << ";\n" + "\n" + " switch ( " << CS() << " )\n {\n"; + STATE_GOTOS() << + " }\n"; + + out << + "out_switch:\n" + " switch ( " << CS() << " )\n {\n"; + EXIT_STATES() << + " }\n"; + + out << + " out: {}\n" + "}\n" + "\n"; +} + +void FsmCodeGen::writeCode() +{ + redFsm->depthFirstOrdering(); + + out << + "#include \n" + "#include \n" + "#include \n" + "#include \"config.h\"\n" + "\n" + "\n"; + + writeData(); + writeExec(); + + out << + "int main( int argc, char **argv )\n" + "{\n" + " Program program( true, &main_runtimeData );\n" + " program.freshGlobal();\n" + " program.run();\n" + " program.clear();\n" + " return 0;\n" + "}\n" + "\n"; + + out.flush(); +} + +ostream &FsmCodeGen::source_warning( const InputLoc &loc ) +{ + cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": warning: "; + return cerr; +} + +ostream &FsmCodeGen::source_error( const InputLoc &loc ) +{ + codeGenErrCount += 1; + assert( sourceFileName != 0 ); + cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": "; + return cerr; +} + + diff --git a/colm/fsmcodegen.h b/colm/fsmcodegen.h new file mode 100644 index 00000000..ff804e88 --- /dev/null +++ b/colm/fsmcodegen.h @@ -0,0 +1,214 @@ +/* + * Copyright 2001-2006 Adrian Thurston + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _FSMCODEGEN_H +#define _FSMCODEGEN_H + +#include +#include +#include +#include "common.h" +#include "parsedata.h" +#include "redfsm.h" + +using std::string; +using std::ostream; + +/* Integer array line length. */ +#define IALL 8 + +/* Forwards. */ +struct RedFsm; +struct RedState; +struct GenAction; +struct NameInst; +struct RedAction; +struct LongestMatch; +struct TokenDef; +struct InlineList; +struct InlineItem; +struct FsmRun; +struct NameInst; +struct FsmCodeGen; + +typedef unsigned long ulong; +typedef unsigned char uchar; + + +/* + * The interface to the parser + */ + +std::ostream *openOutput( char *inputFile ); + +inline string itoa( int i ) +{ + char buf[16]; + sprintf( buf, "%i", i ); + return buf; +} + +/* + * class FsmCodeGen + */ +class FsmCodeGen +{ +public: + FsmCodeGen( const char *sourceFileName, const char *fsmName, ostream &out, + RedFsm *redFsm, FsmTables *fsmTables ); + +protected: + string FSM_NAME(); + string START_STATE_ID(); + ostream &ACTIONS_ARRAY(); + string GET_WIDE_KEY(); + string GET_WIDE_KEY( RedState *state ); + string TABS( int level ); + string KEY( Key key ); + string LDIR_PATH( char *path ); + void ACTION( ostream &ret, GenAction *action, int targState, bool inFinish ); + void CONDITION( ostream &ret, GenAction *condition ); + string ALPH_TYPE(); + string WIDE_ALPH_TYPE(); + string ARRAY_TYPE( unsigned long maxVal ); + + string ARR_OFF( string ptr, string offset ); + string CAST( string type ); + string UINT(); + string GET_KEY(); + + string ACCESS() { return ""; } + + string P() { return ACCESS() + "p"; } + string PE() { return ACCESS() + "pe"; } + string PEOF() { return ACCESS() + "peof"; } + + string BUF() { return ACCESS() + "buf"; } + string HAVE() { return ACCESS() + "have"; } + string IGNLEN() { return ACCESS() + "ignlen"; } + + string CS(); + string STACK() { return ACCESS() + "stack"; } + string TOP() { return ACCESS() + "top"; } + string TOKSTART() { return ACCESS() + "tokstart"; } + string TOKEND() { return ACCESS() + "tokend"; } + string ACT() { return ACCESS() + "act"; } + + string DATA_PREFIX(); + + string START() { return DATA_PREFIX() + "start"; } + string ERROR() { return DATA_PREFIX() + "error"; } + string FIRST_FINAL() { return DATA_PREFIX() + "first_final"; } + + string entryByRegion() { return DATA_PREFIX() + "entryByRegion"; } + + + void INLINE_LIST( ostream &ret, InlineList *inlineList, + int targState, bool inFinish ); + void EXEC_TOKEND( ostream &ret, InlineItem *item, int targState, int inFinish ); + void EXECTE( ostream &ret, InlineItem *item, int targState, int inFinish ); + void LM_SWITCH( ostream &ret, InlineItem *item, int targState, int inFinish ); + void SET_ACT( ostream &ret, InlineItem *item ); + void INIT_TOKSTART( ostream &ret, InlineItem *item ); + void INIT_ACT( ostream &ret, InlineItem *item ); + void SET_TOKSTART( ostream &ret, InlineItem *item ); + void SET_TOKEND( ostream &ret, InlineItem *item ); + void GET_TOKEND( ostream &ret, InlineItem *item ); + void SUB_ACTION( ostream &ret, InlineItem *item, int targState, bool inFinish ); + void LM_ON_LAST( ostream &ret, InlineItem *item ); + void LM_ON_NEXT( ostream &ret, InlineItem *item ); + void LM_ON_LAG_BEHIND( ostream &ret, InlineItem *item ); + void EXEC_TOKEND( ostream &ret ); + void EMIT_TOKEN( ostream &ret, KlangEl *token ); + + string ERROR_STATE(); + string FIRST_FINAL_STATE(); + + string PTR_CONST(); + ostream &OPEN_ARRAY( string type, string name ); + ostream &CLOSE_ARRAY(); + ostream &STATIC_VAR( string type, string name ); + + string CTRL_FLOW(); + + ostream &source_warning(const InputLoc &loc); + ostream &source_error(const InputLoc &loc); + + unsigned int arrayTypeSize( unsigned long maxVal ); + +/* subclass */ + +public: + const char *sourceFileName; + const char *fsmName; + ostream &out; + RedFsm *redFsm; + FsmTables *fsmTables; + int codeGenErrCount; + + /* Write options. */ + bool dataPrefix; + bool writeFirstFinal; + bool writeErr; + + std::ostream &TO_STATE_ACTION_SWITCH(); + std::ostream &FROM_STATE_ACTION_SWITCH(); + std::ostream &ACTION_SWITCH(); + std::ostream &STATE_GOTOS(); + std::ostream &TRANSITIONS(); + std::ostream &EXEC_FUNCS(); + + unsigned int TO_STATE_ACTION( RedState *state ); + unsigned int FROM_STATE_ACTION( RedState *state ); + + std::ostream &TO_STATE_ACTIONS(); + std::ostream &FROM_STATE_ACTIONS(); + + void COND_TRANSLATE( GenStateCond *stateCond, int level ); + void emitCondBSearch( RedState *state, int level, int low, int high ); + void STATE_CONDS( RedState *state, bool genDefault ); + + void emitSingleSwitch( RedState *state ); + void emitRangeBSearch( RedState *state, int level, int low, int high ); + + std::ostream &EXIT_STATES(); + std::ostream &TRANS_GOTO( RedTrans *trans, int level ); + std::ostream &FINISH_CASES(); + + void writeData(); + void writeInit(); + void writeExec(); + void writeCode(); + +protected: + bool useAgainLabel(); + + /* Called from GotoCodeGen::STATE_GOTOS just before writing the gotos for + * each state. */ + bool IN_TRANS_ACTIONS( RedState *state ); + void GOTO_HEADER( RedState *state ); + void STATE_GOTO_ERROR(); + + /* Set up labelNeeded flag for each state. */ + void setLabelsNeeded(); +}; + +#endif /* _FSMCODEGEN_H */ diff --git a/colm/fsmexec.cpp b/colm/fsmexec.cpp new file mode 100644 index 00000000..80370890 --- /dev/null +++ b/colm/fsmexec.cpp @@ -0,0 +1,209 @@ +/* + * Copyright 2007 Adrian Thurston + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include + +#include "config.h" +#include "fsmrun.h" +#include "redfsm.h" +#include "parsedata.h" +#include "parsetree.h" +#include "pdarun.h" +#include "colm.h" + +void FsmRun::execAction( GenAction *genAction ) +{ + for ( InlineList::Iter item = *genAction->inlineList; item.lte(); item++ ) { + switch ( item->type ) { + case InlineItem::Text: + assert(false); + break; + case InlineItem::LmSetActId: + act = item->longestMatchPart->longestMatchId; + break; + case InlineItem::LmSetTokEnd: + tokend = p + 1; + break; + case InlineItem::LmInitTokStart: + assert(false); + break; + case InlineItem::LmInitAct: + act = 0; + break; + case InlineItem::LmSetTokStart: + tokstart = p; + break; + case InlineItem::LmSwitch: + /* If the switch handles error then we also forced the error state. It + * will exist. */ + p = tokend; + if ( item->tokenRegion->lmSwitchHandlesError && act == 0 ) { + p = tokstart; + cs = tables->errorState; + } + else { + for ( TokenDefList::Iter lmi = item->tokenRegion->tokenDefList; + lmi.lte(); lmi++ ) + { + if ( lmi->inLmSelect && act == lmi->longestMatchId ) + emitToken( lmi->token ); + } + } + gotoResume = true; + break; + case InlineItem::LmOnLast: + p += 1; + emitToken( item->longestMatchPart->token ); + gotoResume = true; + break; + case InlineItem::LmOnNext: + emitToken( item->longestMatchPart->token ); + gotoResume = true; + break; + case InlineItem::LmOnLagBehind: + p = tokend; + emitToken( item->longestMatchPart->token ); + gotoResume = true; + break; + } + } + + if ( genAction->objField ) { + ObjField *field = genAction->objField; + if ( genAction->markType == MarkEnter ) + mark_enter[field->offset] = p; + else if ( genAction->markType == MarkLeave ) + mark_leave[field->offset] = p; + } +} + +void FsmRun::execute() +{ + int _klen; + unsigned int _trans; + const long *_acts; + unsigned int _nacts; + const char *_keys; + +_resume: + if ( cs == tables->errorState ) + goto out; + + if ( p == pe ) + goto out; + +_loop_head: + _acts = tables->actions + tables->fromStateActions[cs]; + _nacts = (unsigned int) *_acts++; + while ( _nacts-- > 0 ) + execAction( tables->actionSwitch[*_acts++] ); + + _keys = tables->transKeys + tables->keyOffsets[cs]; + _trans = tables->indexOffsets[cs]; + + _klen = tables->singleLengths[cs]; + if ( _klen > 0 ) { + const char *_lower = _keys; + const char *_mid; + const char *_upper = _keys + _klen - 1; + while (1) { + if ( _upper < _lower ) + break; + + _mid = _lower + ((_upper-_lower) >> 1); + if ( (*p) < *_mid ) + _upper = _mid - 1; + else if ( (*p) > *_mid ) + _lower = _mid + 1; + else { + _trans += (_mid - _keys); + goto _match; + } + } + _keys += _klen; + _trans += _klen; + } + + _klen = tables->rangeLengths[cs]; + if ( _klen > 0 ) { + const char *_lower = _keys; + const char *_mid; + const char *_upper = _keys + (_klen<<1) - 2; + while (1) { + if ( _upper < _lower ) + break; + + _mid = _lower + (((_upper-_lower) >> 1) & ~1); + if ( (*p) < _mid[0] ) + _upper = _mid - 2; + else if ( (*p) > _mid[1] ) + _lower = _mid + 2; + else { + _trans += ((_mid - _keys)>>1); + goto _match; + } + } + _trans += _klen; + } + +_match: + cs = tables->transTargsWI[_trans]; + + if ( tables->transActionsWI[_trans] == 0 ) + goto _again; + + gotoResume = false; + _acts = tables->actions + tables->transActionsWI[_trans]; + _nacts = (unsigned int) *_acts++; + while ( _nacts-- > 0 ) + execAction( tables->actionSwitch[*_acts++] ); + if ( gotoResume ) + goto _resume; + +_again: + _acts = tables->actions + tables->toStateActions[cs]; + _nacts = (unsigned int) *_acts++; + while ( _nacts-- > 0 ) + execAction( tables->actionSwitch[*_acts++] ); + + if ( cs == tables->errorState ) + goto out; + + if ( ++p != pe ) + goto _loop_head; +out: + if ( p == peof ) { + gotoResume = false; + _acts = tables->actions + tables->eofActions[cs]; + _nacts = (unsigned int) *_acts++; + + if ( tables->eofTargs[cs] >= 0 ) + cs = tables->eofTargs[cs]; + + while ( _nacts-- > 0 ) + execAction( tables->actionSwitch[*_acts++] ); + if ( gotoResume ) + goto _resume; + } +} + + diff --git a/colm/fsmgraph.cpp b/colm/fsmgraph.cpp new file mode 100644 index 00000000..6b955ad6 --- /dev/null +++ b/colm/fsmgraph.cpp @@ -0,0 +1,1399 @@ +/* + * Copyright 2001, 2002, 2006 Adrian Thurston + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include + +#include "config.h" +#include "fsmgraph.h" +#include "mergesort.h" + +using std::cerr; +using std::endl; + +/* Make a new state. The new state will be put on the graph's + * list of state. The new state can be created final or non final. */ +FsmState *FsmGraph::addState() +{ + /* Make the new state to return. */ + FsmState *state = new FsmState(); + + if ( misfitAccounting ) { + /* Create the new state on the misfit list. All states are created + * with no foreign in transitions. */ + misfitList.append( state ); + } + else { + /* Create the new state. */ + stateList.append( state ); + } + + return state; +} + +/* Construct an FSM that is the concatenation of an array of characters. A new + * machine will be made that has len+1 states with one transition between each + * state for each integer in str. IsSigned determines if the integers are to + * be considered as signed or unsigned ints. */ +void FsmGraph::concatFsm( Key *str, int len ) +{ + /* Make the first state and set it as the start state. */ + FsmState *last = addState(); + setStartState( last ); + + /* Attach subsequent states. */ + for ( int i = 0; i < len; i++ ) { + FsmState *newState = addState(); + attachNewTrans( last, newState, str[i], str[i] ); + last = newState; + } + + /* Make the last state the final state. */ + setFinState( last ); +} + +/* Case insensitive version of concatFsm. */ +void FsmGraph::concatFsmCI( Key *str, int len ) +{ + /* Make the first state and set it as the start state. */ + FsmState *last = addState(); + setStartState( last ); + + /* Attach subsequent states. */ + for ( int i = 0; i < len; i++ ) { + FsmState *newState = addState(); + + KeySet keySet; + if ( str[i].isLower() ) + keySet.insert( str[i].toUpper() ); + if ( str[i].isUpper() ) + keySet.insert( str[i].toLower() ); + keySet.insert( str[i] ); + + for ( int i = 0; i < keySet.length(); i++ ) + attachNewTrans( last, newState, keySet[i], keySet[i] ); + + last = newState; + } + + /* Make the last state the final state. */ + setFinState( last ); +} + +/* Construct a machine that matches one character. A new machine will be made + * that has two states with a single transition between the states. IsSigned + * determines if the integers are to be considered as signed or unsigned ints. */ +void FsmGraph::concatFsm( Key chr ) +{ + /* Two states first start, second final. */ + setStartState( addState() ); + + FsmState *end = addState(); + setFinState( end ); + + /* Attach on the character. */ + attachNewTrans( startState, end, chr, chr ); +} + +/* Construct a machine that matches any character in set. A new machine will + * be made that has two states and len transitions between the them. The set + * should be ordered correctly accroding to KeyOps and should not contain + * any duplicates. */ +void FsmGraph::orFsm( Key *set, int len ) +{ + /* Two states first start, second final. */ + setStartState( addState() ); + + FsmState *end = addState(); + setFinState( end ); + + for ( int i = 1; i < len; i++ ) + assert( set[i-1] < set[i] ); + + /* Attach on all the integers in the given string of ints. */ + for ( int i = 0; i < len; i++ ) + attachNewTrans( startState, end, set[i], set[i] ); +} + +/* Construct a machine that matches a range of characters. A new machine will + * be made with two states and a range transition between them. The range will + * match any characters from low to high inclusive. Low should be less than or + * equal to high otherwise undefined behaviour results. IsSigned determines + * if the integers are to be considered as signed or unsigned ints. */ +void FsmGraph::rangeFsm( Key low, Key high ) +{ + /* Two states first start, second final. */ + setStartState( addState() ); + + FsmState *end = addState(); + setFinState( end ); + + /* Attach using the range of characters. */ + attachNewTrans( startState, end, low, high ); +} + +/* Construct a machine that a repeated range of characters. */ +void FsmGraph::rangeStarFsm( Key low, Key high) +{ + /* One state which is final and is the start state. */ + setStartState( addState() ); + setFinState( startState ); + + /* Attach start to start using range of characters. */ + attachNewTrans( startState, startState, low, high ); +} + +/* Construct a machine that matches the empty string. A new machine will be + * made with only one state. The new state will be both a start and final + * state. IsSigned determines if the machine has a signed or unsigned + * alphabet. Fsm operations must be done on machines with the same alphabet + * signedness. */ +void FsmGraph::lambdaFsm( ) +{ + /* Give it one state with no transitions making it + * the start state and final state. */ + setStartState( addState() ); + setFinState( startState ); +} + +/* Construct a machine that matches nothing at all. A new machine will be + * made with only one state. It will not be final. */ +void FsmGraph::emptyFsm( ) +{ + /* Give it one state with no transitions making it + * the start state and final state. */ + setStartState( addState() ); +} + +void FsmGraph::transferOutData( FsmState *destState, FsmState *srcState ) +{ + for ( TransList::Iter trans = destState->outList; trans.lte(); trans++ ) { + if ( trans->toState != 0 ) { + /* Get the actions data from the outActionTable. */ + trans->actionTable.setActions( srcState->outActionTable ); + + /* Get the priorities from the outPriorTable. */ + trans->priorTable.setPriors( srcState->outPriorTable ); + } + } +} + +/* Kleene star operator. Makes this machine the kleene star of itself. Any + * transitions made going out of the machine and back into itself will be + * notified that they are leaving transitions by having the leavingFromState + * callback invoked. */ +void FsmGraph::starOp( ) +{ + /* For the merging process. */ + MergeData md; + + /* Turn on misfit accounting to possibly catch the old start state. */ + setMisfitAccounting( true ); + + /* Create the new new start state. It will be set final after the merging + * of the final states with the start state is complete. */ + FsmState *prevStartState = startState; + unsetStartState(); + setStartState( addState() ); + + /* Merge the new start state with the old one to isolate it. */ + mergeStates( md, startState, prevStartState ); + + /* Merge the start state into all final states. Except the start state on + * the first pass. If the start state is set final we will be doubling up + * its transitions, which will get transfered to any final states that + * follow it in the final state set. This will be determined by the order + * of items in the final state set. To prevent this we just merge with the + * start on a second pass. */ + for ( StateSet::Iter st = finStateSet; st.lte(); st++ ) { + if ( *st != startState ) + mergeStatesLeaving( md, *st, startState ); + } + + /* Now it is safe to merge the start state with itself (provided it + * is set final). */ + if ( startState->isFinState() ) + mergeStatesLeaving( md, startState, startState ); + + /* Now ensure the new start state is a final state. */ + setFinState( startState ); + + /* Fill in any states that were newed up as combinations of others. */ + fillInStates( md ); + + /* Remove the misfits and turn off misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +void FsmGraph::repeatOp( int times ) +{ + /* Must be 1 and up. 0 produces null machine and requires deleting this. */ + assert( times > 0 ); + + /* A repeat of one does absolutely nothing. */ + if ( times == 1 ) + return; + + /* Make a machine to make copies from. */ + FsmGraph *copyFrom = new FsmGraph( *this ); + + /* Concatentate duplicates onto the end up until before the last. */ + for ( int i = 1; i < times-1; i++ ) { + FsmGraph *dup = new FsmGraph( *copyFrom ); + doConcat( dup, 0, false ); + } + + /* Now use the copyFrom on the end. */ + doConcat( copyFrom, 0, false ); +} + +void FsmGraph::optionalRepeatOp( int times ) +{ + /* Must be 1 and up. 0 produces null machine and requires deleting this. */ + assert( times > 0 ); + + /* A repeat of one optional merely allows zero string. */ + if ( times == 1 ) { + setFinState( startState ); + return; + } + + /* Make a machine to make copies from. */ + FsmGraph *copyFrom = new FsmGraph( *this ); + + /* The state set used in the from end of the concatentation. Starts with + * the initial final state set, then after each concatenation, gets set to + * the the final states that come from the the duplicate. */ + StateSet lastFinSet( finStateSet ); + + /* Set the initial state to zero to allow zero copies. */ + setFinState( startState ); + + /* Concatentate duplicates onto the end up until before the last. */ + for ( int i = 1; i < times-1; i++ ) { + /* Make a duplicate for concating and set the fin bits to graph 2 so we + * can pick out it's final states after the optional style concat. */ + FsmGraph *dup = new FsmGraph( *copyFrom ); + dup->setFinBits( SB_GRAPH2 ); + doConcat( dup, &lastFinSet, true ); + + /* Clear the last final state set and make the new one by taking only + * the final states that come from graph 2.*/ + lastFinSet.empty(); + for ( int i = 0; i < finStateSet.length(); i++ ) { + /* If the state came from graph 2, add it to the last set and clear + * the bits. */ + FsmState *fs = finStateSet[i]; + if ( fs->stateBits & SB_GRAPH2 ) { + lastFinSet.insert( fs ); + fs->stateBits &= ~SB_GRAPH2; + } + } + } + + /* Now use the copyFrom on the end, no bits set, no bits to clear. */ + doConcat( copyFrom, &lastFinSet, true ); +} + + +/* Fsm concatentation worker. Supports treating the concatentation as optional, + * which essentially leaves the final states of machine one as final. */ +void FsmGraph::doConcat( FsmGraph *other, StateSet *fromStates, bool optional ) +{ + /* For the merging process. */ + StateSet finStateSetCopy, startStateSet; + MergeData md; + + /* Turn on misfit accounting for both graphs. */ + setMisfitAccounting( true ); + other->setMisfitAccounting( true ); + + /* Get the other's start state. */ + FsmState *otherStartState = other->startState; + + /* Unset other's start state before bringing in the entry points. */ + other->unsetStartState(); + + /* Bring in the rest of other's entry points. */ + copyInEntryPoints( other ); + other->entryPoints.empty(); + + /* Bring in other's states into our state lists. */ + stateList.append( other->stateList ); + misfitList.append( other->misfitList ); + + /* If from states is not set, then get a copy of our final state set before + * we clobber it and use it instead. */ + if ( fromStates == 0 ) { + finStateSetCopy = finStateSet; + fromStates = &finStateSetCopy; + } + + /* Unset all of our final states and get the final states from other. */ + if ( !optional ) + unsetAllFinStates(); + finStateSet.insert( other->finStateSet ); + + /* Since other's lists are empty, we can delete the fsm without + * affecting any states. */ + delete other; + + /* Merge our former final states with the start state of other. */ + for ( int i = 0; i < fromStates->length(); i++ ) { + FsmState *state = fromStates->data[i]; + + /* Merge the former final state with other's start state. */ + mergeStatesLeaving( md, state, otherStartState ); + + /* If the former final state was not reset final then we must clear + * the state's out trans data. If it got reset final then it gets to + * keep its out trans data. This must be done before fillInStates gets + * called to prevent the data from being sourced. */ + if ( ! state->isFinState() ) + clearOutData( state ); + } + + /* Fill in any new states made from merging. */ + fillInStates( md ); + + /* Remove the misfits and turn off misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +/* Concatenates other to the end of this machine. Other is deleted. Any + * transitions made leaving this machine and entering into other are notified + * that they are leaving transitions by having the leavingFromState callback + * invoked. */ +void FsmGraph::concatOp( FsmGraph *other ) +{ + /* Assert same signedness and return graph concatenation op. */ + doConcat( other, 0, false ); +} + + +void FsmGraph::doOr( FsmGraph *other ) +{ + /* For the merging process. */ + MergeData md; + + /* Build a state set consisting of both start states */ + StateSet startStateSet; + startStateSet.insert( startState ); + startStateSet.insert( other->startState ); + + /* Both of the original start states loose their start state status. */ + unsetStartState(); + other->unsetStartState(); + + /* Bring in the rest of other's entry points. */ + copyInEntryPoints( other ); + other->entryPoints.empty(); + + /* Merge the lists. This will move all the states from other + * into this. No states will be deleted. */ + stateList.append( other->stateList ); + misfitList.append( other->misfitList ); + + /* Move the final set data from other into this. */ + finStateSet.insert(other->finStateSet); + other->finStateSet.empty(); + + /* Since other's list is empty, we can delete the fsm without + * affecting any states. */ + delete other; + + /* Create a new start state. */ + setStartState( addState() ); + + /* Merge the start states. */ + mergeStates( md, startState, startStateSet.data, startStateSet.length() ); + + /* Fill in any new states made from merging. */ + fillInStates( md ); +} + +/* Unions other with this machine. Other is deleted. */ +void FsmGraph::unionOp( FsmGraph *other ) +{ + /* Turn on misfit accounting for both graphs. */ + setMisfitAccounting( true ); + other->setMisfitAccounting( true ); + + /* Call Worker routine. */ + doOr( other ); + + /* Remove the misfits and turn off misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +/* Intersects other with this machine. Other is deleted. */ +void FsmGraph::intersectOp( FsmGraph *other ) +{ + /* Turn on misfit accounting for both graphs. */ + setMisfitAccounting( true ); + other->setMisfitAccounting( true ); + + /* Set the fin bits on this and other to want each other. */ + setFinBits( SB_GRAPH1 ); + other->setFinBits( SB_GRAPH2 ); + + /* Call worker Or routine. */ + doOr( other ); + + /* Unset any final states that are no longer to + * be final due to final bits. */ + unsetIncompleteFinals(); + + /* Remove the misfits and turn off misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); + + /* Remove states that have no path to a final state. */ + removeDeadEndStates(); +} + +/* Set subtracts other machine from this machine. Other is deleted. */ +void FsmGraph::subtractOp( FsmGraph *other ) +{ + /* Turn on misfit accounting for both graphs. */ + setMisfitAccounting( true ); + other->setMisfitAccounting( true ); + + /* Set the fin bits of other to be killers. */ + other->setFinBits( SB_GRAPH1 ); + + /* Call worker Or routine. */ + doOr( other ); + + /* Unset any final states that are no longer to + * be final due to final bits. */ + unsetKilledFinals(); + + /* Remove the misfits and turn off misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); + + /* Remove states that have no path to a final state. */ + removeDeadEndStates(); +} + +bool FsmGraph::inEptVect( EptVect *eptVect, FsmState *state ) +{ + if ( eptVect != 0 ) { + /* Vect is there, walk it looking for state. */ + for ( int i = 0; i < eptVect->length(); i++ ) { + if ( eptVect->data[i].targ == state ) + return true; + } + } + return false; +} + +/* Fill epsilon vectors in a root state from a given starting point. Epmploys + * a depth first search through the graph of epsilon transitions. */ +void FsmGraph::epsilonFillEptVectFrom( FsmState *root, FsmState *from, bool parentLeaving ) +{ + /* Walk the epsilon transitions out of the state. */ + for ( EpsilonTrans::Iter ep = from->epsilonTrans; ep.lte(); ep++ ) { + /* Find the entry point, if the it does not resove, ignore it. */ + EntryMapEl *enLow, *enHigh; + if ( entryPoints.findMulti( *ep, enLow, enHigh ) ) { + /* Loop the targets. */ + for ( EntryMapEl *en = enLow; en <= enHigh; en++ ) { + /* Do not add the root or states already in eptVect. */ + FsmState *targ = en->value; + if ( targ != from && !inEptVect(root->eptVect, targ) ) { + /* Maybe need to create the eptVect. */ + if ( root->eptVect == 0 ) + root->eptVect = new EptVect(); + + /* If moving to a different graph or if any parent is + * leaving then we are leaving. */ + bool leaving = parentLeaving || + root->owningGraph != targ->owningGraph; + + /* All ok, add the target epsilon and recurse. */ + root->eptVect->append( EptVectEl(targ, leaving) ); + epsilonFillEptVectFrom( root, targ, leaving ); + } + } + } + } +} + +void FsmGraph::shadowReadWriteStates( MergeData &md ) +{ + /* Init isolatedShadow algorithm data. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) + st->isolatedShadow = 0; + + /* Any states that may be both read from and written to must + * be shadowed. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + /* Find such states by looping through stateVect lists, which give us + * the states that will be read from. May cause us to visit the states + * that we are interested in more than once. */ + if ( st->eptVect != 0 ) { + /* For all states that will be read from. */ + for ( EptVect::Iter ept = *st->eptVect; ept.lte(); ept++ ) { + /* Check for read and write to the same state. */ + FsmState *targ = ept->targ; + if ( targ->eptVect != 0 ) { + /* State is to be written to, if the shadow is not already + * there, create it. */ + if ( targ->isolatedShadow == 0 ) { + FsmState *shadow = addState(); + mergeStates( md, shadow, targ ); + targ->isolatedShadow = shadow; + } + + /* Write shadow into the state vector so that it is the + * state that the epsilon transition will read from. */ + ept->targ = targ->isolatedShadow; + } + } + } + } +} + +void FsmGraph::resolveEpsilonTrans( MergeData &md ) +{ + /* Walk the state list and invoke recursive worker on each state. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) + epsilonFillEptVectFrom( st, st, false ); + + /* Prevent reading from and writing to of the same state. */ + shadowReadWriteStates( md ); + + /* For all states that have epsilon transitions out, draw the transitions, + * clear the epsilon transitions. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + /* If there is a state vector, then create the pre-merge state. */ + if ( st->eptVect != 0 ) { + /* Merge all the epsilon targets into the state. */ + for ( EptVect::Iter ept = *st->eptVect; ept.lte(); ept++ ) { + if ( ept->leaving ) + mergeStatesLeaving( md, st, ept->targ ); + else + mergeStates( md, st, ept->targ ); + } + + /* Clean up the target list. */ + delete st->eptVect; + st->eptVect = 0; + } + + /* Clear the epsilon transitions vector. */ + st->epsilonTrans.empty(); + } +} + +void FsmGraph::epsilonOp() +{ + /* For merging process. */ + MergeData md; + + setMisfitAccounting( true ); + + for ( StateList::Iter st = stateList; st.lte(); st++ ) + st->owningGraph = 0; + + /* Perform merges. */ + resolveEpsilonTrans( md ); + + /* Epsilons can caused merges which leave behind unreachable states. */ + fillInStates( md ); + + /* Remove the misfits and turn off misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +/* Make a new maching by joining together a bunch of machines without making + * any transitions between them. A negative finalId results in there being no + * final id. */ +void FsmGraph::joinOp( int startId, int finalId, FsmGraph **others, int numOthers ) +{ + /* For the merging process. */ + MergeData md; + + /* Set the owning machines. Start at one. Zero is reserved for the start + * and final states. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) + st->owningGraph = 1; + for ( int m = 0; m < numOthers; m++ ) { + for ( StateList::Iter st = others[m]->stateList; st.lte(); st++ ) + st->owningGraph = 2+m; + } + + /* All machines loose start state status. */ + unsetStartState(); + for ( int m = 0; m < numOthers; m++ ) + others[m]->unsetStartState(); + + /* Bring the other machines into this. */ + for ( int m = 0; m < numOthers; m++ ) { + /* Bring in the rest of other's entry points. */ + copyInEntryPoints( others[m] ); + others[m]->entryPoints.empty(); + + /* Merge the lists. This will move all the states from other into + * this. No states will be deleted. */ + stateList.append( others[m]->stateList ); + assert( others[m]->misfitList.length() == 0 ); + + /* Move the final set data from other into this. */ + finStateSet.insert( others[m]->finStateSet ); + others[m]->finStateSet.empty(); + + /* Since other's list is empty, we can delete the fsm without + * affecting any states. */ + delete others[m]; + } + + /* Look up the start entry point. */ + EntryMapEl *enLow = 0, *enHigh = 0; + bool findRes = entryPoints.findMulti( startId, enLow, enHigh ); + if ( ! findRes ) { + /* No start state. Set a default one and proceed with the join. Note + * that the result of the join will be a very uninteresting machine. */ + setStartState( addState() ); + } + else { + /* There is at least one start state, create a state that will become + * the new start state. */ + FsmState *newStart = addState(); + setStartState( newStart ); + + /* The start state is in an owning machine class all it's own. */ + newStart->owningGraph = 0; + + /* Create the set of states to merge from. */ + StateSet stateSet; + for ( EntryMapEl *en = enLow; en <= enHigh; en++ ) + stateSet.insert( en->value ); + + /* Merge in the set of start states into the new start state. */ + mergeStates( md, newStart, stateSet.data, stateSet.length() ); + } + + /* Take a copy of the final state set, before unsetting them all. This + * will allow us to call clearOutData on the states that don't get + * final state status back back. */ + StateSet finStateSetCopy = finStateSet; + + /* Now all final states are unset. */ + unsetAllFinStates(); + + if ( finalId >= 0 ) { + /* Create the implicit final state. */ + FsmState *finState = addState(); + setFinState( finState ); + + /* Assign an entry into the final state on the final state entry id. Note + * that there may already be an entry on this id. That's ok. Also set the + * final state owning machine id. It's in a class all it's own. */ + setEntry( finalId, finState ); + finState->owningGraph = 0; + } + + /* Hand over to workers for resolving epsilon trans. This will merge states + * with the targets of their epsilon transitions. */ + resolveEpsilonTrans( md ); + + /* Invoke the relinquish final callback on any states that did not get + * final state status back. */ + for ( StateSet::Iter st = finStateSetCopy; st.lte(); st++ ) { + if ( !((*st)->stateBits & SB_ISFINAL) ) + clearOutData( *st ); + } + + /* Fill in any new states made from merging. */ + fillInStates( md ); + + /* Joining can be messy. Instead of having misfit accounting on (which is + * tricky here) do a full cleaning. */ + removeUnreachableStates(); +} + +void FsmGraph::globOp( FsmGraph **others, int numOthers ) +{ + /* All other machines loose start states status. */ + for ( int m = 0; m < numOthers; m++ ) + others[m]->unsetStartState(); + + /* Bring the other machines into this. */ + for ( int m = 0; m < numOthers; m++ ) { + /* Bring in the rest of other's entry points. */ + copyInEntryPoints( others[m] ); + others[m]->entryPoints.empty(); + + /* Merge the lists. This will move all the states from other into + * this. No states will be deleted. */ + stateList.append( others[m]->stateList ); + assert( others[m]->misfitList.length() == 0 ); + + /* Move the final set data from other into this. */ + finStateSet.insert( others[m]->finStateSet ); + others[m]->finStateSet.empty(); + + /* Since other's list is empty, we can delete the fsm without + * affecting any states. */ + delete others[m]; + } +} + +void FsmGraph::deterministicEntry() +{ + /* For the merging process. */ + MergeData md; + + /* States may loose their entry points, turn on misfit accounting. */ + setMisfitAccounting( true ); + + /* Get a copy of the entry map then clear all the entry points. As we + * iterate the old entry map finding duplicates we will add the entry + * points for the new states that we create. */ + EntryMap prevEntry = entryPoints; + unsetAllEntryPoints(); + + for ( int enId = 0; enId < prevEntry.length(); ) { + /* Count the number of states on this entry key. */ + int highId = enId; + while ( highId < prevEntry.length() && prevEntry[enId].key == prevEntry[highId].key ) + highId += 1; + + int numIds = highId - enId; + if ( numIds == 1 ) { + /* Only a single entry point, just set the entry. */ + setEntry( prevEntry[enId].key, prevEntry[enId].value ); + } + else { + /* Multiple entry points, need to create a new state and merge in + * all the targets of entry points. */ + FsmState *newEntry = addState(); + for ( int en = enId; en < highId; en++ ) + mergeStates( md, newEntry, prevEntry[en].value ); + + /* Add the new state as the single entry point. */ + setEntry( prevEntry[enId].key, newEntry ); + } + + enId += numIds; + } + + /* The old start state may be unreachable. Remove the misfits and turn off + * misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +/* Unset any final states that are no longer to be final due to final bits. */ +void FsmGraph::unsetKilledFinals() +{ + /* Duplicate the final state set before we begin modifying it. */ + StateSet fin( finStateSet ); + + for ( int s = 0; s < fin.length(); s++ ) { + /* Check for killing bit. */ + FsmState *state = fin.data[s]; + if ( state->stateBits & SB_GRAPH1 ) { + /* One final state is a killer, set to non-final. */ + unsetFinState( state ); + } + + /* Clear all killing bits. Non final states should never have had those + * state bits set in the first place. */ + state->stateBits &= ~SB_GRAPH1; + } +} + +/* Unset any final states that are no longer to be final due to final bits. */ +void FsmGraph::unsetIncompleteFinals() +{ + /* Duplicate the final state set before we begin modifying it. */ + StateSet fin( finStateSet ); + + for ( int s = 0; s < fin.length(); s++ ) { + /* Check for one set but not the other. */ + FsmState *state = fin.data[s]; + if ( state->stateBits & SB_BOTH && + (state->stateBits & SB_BOTH) != SB_BOTH ) + { + /* One state wants the other but it is not there. */ + unsetFinState( state ); + } + + /* Clear wanting bits. Non final states should never have had those + * state bits set in the first place. */ + state->stateBits &= ~SB_BOTH; + } +} + +/* Ensure that the start state is free of entry points (aside from the fact + * that it is the start state). If the start state has entry points then Make a + * new start state by merging with the old one. Useful before modifying start + * transitions. If the existing start state has any entry points other than the + * start state entry then modifying its transitions changes more than the start + * transitions. So isolate the start state by separating it out such that it + * only has start stateness as it's entry point. */ +void FsmGraph::isolateStartState( ) +{ + /* For the merging process. */ + MergeData md; + + /* Bail out if the start state is already isolated. */ + if ( isStartStateIsolated() ) + return; + + /* Turn on misfit accounting to possibly catch the old start state. */ + setMisfitAccounting( true ); + + /* This will be the new start state. The existing start + * state is merged with it. */ + FsmState *prevStartState = startState; + unsetStartState(); + setStartState( addState() ); + + /* Merge the new start state with the old one to isolate it. */ + mergeStates( md, startState, prevStartState ); + + /* Stfil and stateDict will be empty because the merging of the old start + * state into the new one will not have any conflicting transitions. */ + assert( md.stateDict.treeSize == 0 ); + assert( md.stfillHead == 0 ); + + /* The old start state may be unreachable. Remove the misfits and turn off + * misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +#ifdef COLM_LOG_CONDS +void logCondSpace( CondSpace *condSpace ) +{ + if ( condSpace == 0 ) + cerr << ""; + else { + for ( CondSet::Iter csi = condSpace->condSet.last(); csi.gtb(); csi-- ) { + if ( ! csi.last() ) + cerr << ','; + (*csi)->actionName( cerr ); + } + } +} + +void logNewExpansion( Expansion *exp ) +{ + cerr << "created expansion:" << endl; + cerr << " range: " << exp->lowKey.getVal() << " .. " << + exp->highKey.getVal() << endl; + + cerr << " fromCondSpace: "; + logCondSpace( exp->fromCondSpace ); + cerr << endl; + cerr << " fromVals: " << exp->fromVals << endl; + + cerr << " toCondSpace: "; + logCondSpace( exp->toCondSpace ); + cerr << endl; + cerr << " toValsList: "; + for ( LongVect::Iter to = exp->toValsList; to.lte(); to++ ) + cerr << " " << *to; + cerr << endl; +} +#endif + + +void FsmGraph::findTransExpansions( ExpansionList &expansionList, + FsmState *destState, FsmState *srcState ) +{ + PairIter transCond( destState->outList.head, + srcState->stateCondList.head ); + for ( ; !transCond.end(); transCond++ ) { + if ( transCond.userState == RangeOverlap ) { + Expansion *expansion = new Expansion( transCond.s1Tel.lowKey, + transCond.s1Tel.highKey ); + expansion->fromTrans = new FsmTrans(*transCond.s1Tel.trans); + expansion->fromTrans->fromState = 0; + expansion->fromTrans->toState = transCond.s1Tel.trans->toState; + expansion->fromCondSpace = 0; + expansion->fromVals = 0; + CondSpace *srcCS = transCond.s2Tel.trans->condSpace; + expansion->toCondSpace = srcCS; + + long numTargVals = (1 << srcCS->condSet.length()); + for ( long targVals = 0; targVals < numTargVals; targVals++ ) + expansion->toValsList.append( targVals ); + + #ifdef COLM_LOG_CONDS + logNewExpansion( expansion ); + #endif + expansionList.append( expansion ); + } + } +} + +void FsmGraph::findCondExpInTrans( ExpansionList &expansionList, FsmState *state, + Key lowKey, Key highKey, CondSpace *fromCondSpace, CondSpace *toCondSpace, + long fromVals, LongVect &toValsList ) +{ + FsmTrans searchTrans; + searchTrans.lowKey = fromCondSpace->baseKey + fromVals * keyOps->alphSize() + + (lowKey - keyOps->minKey); + searchTrans.highKey = fromCondSpace->baseKey + fromVals * keyOps->alphSize() + + (highKey - keyOps->minKey); + searchTrans.prev = searchTrans.next = 0; + + PairIter pairIter( state->outList.head, &searchTrans ); + for ( ; !pairIter.end(); pairIter++ ) { + if ( pairIter.userState == RangeOverlap ) { + Expansion *expansion = new Expansion( lowKey, highKey ); + expansion->fromTrans = new FsmTrans(*pairIter.s1Tel.trans); + expansion->fromTrans->fromState = 0; + expansion->fromTrans->toState = pairIter.s1Tel.trans->toState; + expansion->fromCondSpace = fromCondSpace; + expansion->fromVals = fromVals; + expansion->toCondSpace = toCondSpace; + expansion->toValsList = toValsList; + + expansionList.append( expansion ); + #ifdef COLM_LOG_CONDS + logNewExpansion( expansion ); + #endif + } + } +} + +void FsmGraph::findCondExpansions( ExpansionList &expansionList, + FsmState *destState, FsmState *srcState ) +{ + PairIter condCond( destState->stateCondList.head, + srcState->stateCondList.head ); + for ( ; !condCond.end(); condCond++ ) { + if ( condCond.userState == RangeOverlap ) { + /* Loop over all existing condVals . */ + CondSet &destCS = condCond.s1Tel.trans->condSpace->condSet; + long destLen = destCS.length(); + + /* Find the items in src cond set that are not in dest + * cond set. These are the items that we must expand. */ + CondSet srcOnlyCS = condCond.s2Tel.trans->condSpace->condSet; + for ( CondSet::Iter dcsi = destCS; dcsi.lte(); dcsi++ ) + srcOnlyCS.remove( *dcsi ); + long srcOnlyLen = srcOnlyCS.length(); + + if ( srcOnlyCS.length() > 0 ) { + #ifdef COLM_LOG_CONDS + cerr << "there are " << srcOnlyCS.length() << " item(s) that are " + "only in the srcCS" << endl; + #endif + + CondSet mergedCS = destCS; + mergedCS.insert( condCond.s2Tel.trans->condSpace->condSet ); + + CondSpace *fromCondSpace = addCondSpace( destCS ); + CondSpace *toCondSpace = addCondSpace( mergedCS ); + + /* Loop all values in the dest space. */ + for ( long destVals = 0; destVals < (1 << destLen); destVals++ ) { + long basicVals = 0; + for ( CondSet::Iter csi = destCS; csi.lte(); csi++ ) { + if ( destVals & (1 << csi.pos()) ) { + Action **cim = mergedCS.find( *csi ); + long bitPos = (cim - mergedCS.data); + basicVals |= 1 << bitPos; + } + } + + /* Loop all new values. */ + LongVect expandToVals; + for ( long soVals = 0; soVals < (1 << srcOnlyLen); soVals++ ) { + long targVals = basicVals; + for ( CondSet::Iter csi = srcOnlyCS; csi.lte(); csi++ ) { + if ( soVals & (1 << csi.pos()) ) { + Action **cim = mergedCS.find( *csi ); + long bitPos = (cim - mergedCS.data); + targVals |= 1 << bitPos; + } + } + expandToVals.append( targVals ); + } + + findCondExpInTrans( expansionList, destState, + condCond.s1Tel.lowKey, condCond.s1Tel.highKey, + fromCondSpace, toCondSpace, destVals, expandToVals ); + } + } + } + } +} + +void FsmGraph::doExpand( MergeData &md, FsmState *destState, ExpansionList &expList1 ) +{ + for ( ExpansionList::Iter exp = expList1; exp.lte(); exp++ ) { + for ( LongVect::Iter to = exp->toValsList; to.lte(); to++ ) { + long targVals = *to; + + /* We will use the copy of the transition that was made when the + * expansion was created. It will get used multiple times. Each + * time we must set up the keys, everything else is constant and + * and already prepared. */ + FsmTrans *srcTrans = exp->fromTrans; + + srcTrans->lowKey = exp->toCondSpace->baseKey + + targVals * keyOps->alphSize() + (exp->lowKey - keyOps->minKey); + srcTrans->highKey = exp->toCondSpace->baseKey + + targVals * keyOps->alphSize() + (exp->highKey - keyOps->minKey); + + TransList srcList; + srcList.append( srcTrans ); + outTransCopy( md, destState, srcList.head ); + srcList.abandon(); + } + } +} + + +void FsmGraph::doRemove( MergeData &md, FsmState *destState, ExpansionList &expList1 ) +{ + for ( ExpansionList::Iter exp = expList1; exp.lte(); exp++ ) { + Removal removal; + if ( exp->fromCondSpace == 0 ) { + removal.lowKey = exp->lowKey; + removal.highKey = exp->highKey; + } + else { + removal.lowKey = exp->fromCondSpace->baseKey + + exp->fromVals * keyOps->alphSize() + (exp->lowKey - keyOps->minKey); + removal.highKey = exp->fromCondSpace->baseKey + + exp->fromVals * keyOps->alphSize() + (exp->highKey - keyOps->minKey); + } + removal.next = 0; + + TransList destList; + PairIter pairIter( destState->outList.head, &removal ); + for ( ; !pairIter.end(); pairIter++ ) { + switch ( pairIter.userState ) { + case RangeInS1: { + FsmTrans *destTrans = pairIter.s1Tel.trans; + destTrans->lowKey = pairIter.s1Tel.lowKey; + destTrans->highKey = pairIter.s1Tel.highKey; + destList.append( destTrans ); + break; + } + case RangeInS2: + break; + case RangeOverlap: { + FsmTrans *trans = pairIter.s1Tel.trans; + detachTrans( trans->fromState, trans->toState, trans ); + delete trans; + break; + } + case BreakS1: { + pairIter.s1Tel.trans = dupTrans( destState, + pairIter.s1Tel.trans ); + break; + } + case BreakS2: + break; + } + } + destState->outList.transfer( destList ); + } +} + +void FsmGraph::mergeStateConds( FsmState *destState, FsmState *srcState ) +{ + StateCondList destList; + PairIter pairIter( destState->stateCondList.head, + srcState->stateCondList.head ); + for ( ; !pairIter.end(); pairIter++ ) { + switch ( pairIter.userState ) { + case RangeInS1: { + StateCond *destCond = pairIter.s1Tel.trans; + destCond->lowKey = pairIter.s1Tel.lowKey; + destCond->highKey = pairIter.s1Tel.highKey; + destList.append( destCond ); + break; + } + case RangeInS2: { + StateCond *newCond = new StateCond( *pairIter.s2Tel.trans ); + newCond->lowKey = pairIter.s2Tel.lowKey; + newCond->highKey = pairIter.s2Tel.highKey; + destList.append( newCond ); + break; + } + case RangeOverlap: { + StateCond *destCond = pairIter.s1Tel.trans; + StateCond *srcCond = pairIter.s2Tel.trans; + CondSet mergedCondSet; + mergedCondSet.insert( destCond->condSpace->condSet ); + mergedCondSet.insert( srcCond->condSpace->condSet ); + destCond->condSpace = addCondSpace( mergedCondSet ); + + destCond->lowKey = pairIter.s1Tel.lowKey; + destCond->highKey = pairIter.s1Tel.highKey; + destList.append( destCond ); + break; + } + case BreakS1: + pairIter.s1Tel.trans = new StateCond( *pairIter.s1Tel.trans ); + break; + + case BreakS2: + break; + } + } + destState->stateCondList.transfer( destList ); +} + +/* A state merge which represents the drawing in of leaving transitions. If + * there is any out data then we duplicate the souce state, transfer the out + * data, then merge in the state. The new state will be reaped because it will + * not be given any in transitions. */ +void FsmGraph::mergeStatesLeaving( MergeData &md, FsmState *destState, FsmState *srcState ) +{ + if ( !hasOutData( destState ) ) + mergeStates( md, destState, srcState ); + else { + FsmState *ssMutable = addState(); + mergeStates( md, ssMutable, srcState ); + transferOutData( ssMutable, destState ); + + for ( ActionSet::Iter cond = destState->outCondSet; cond.lte(); cond++ ) + embedCondition( md, ssMutable, *cond ); + + mergeStates( md, destState, ssMutable ); + } +} + +void FsmGraph::mergeStates( MergeData &md, FsmState *destState, + FsmState **srcStates, int numSrc ) +{ + for ( int s = 0; s < numSrc; s++ ) + mergeStates( md, destState, srcStates[s] ); +} + +void FsmGraph::mergeStates( MergeData &md, FsmState *destState, FsmState *srcState ) +{ + ExpansionList expList1; + ExpansionList expList2; + + findTransExpansions( expList1, destState, srcState ); + findCondExpansions( expList1, destState, srcState ); + findTransExpansions( expList2, srcState, destState ); + findCondExpansions( expList2, srcState, destState ); + + mergeStateConds( destState, srcState ); + + outTransCopy( md, destState, srcState->outList.head ); + + doExpand( md, destState, expList1 ); + doExpand( md, destState, expList2 ); + + doRemove( md, destState, expList1 ); + doRemove( md, destState, expList2 ); + + expList1.empty(); + expList2.empty(); + + /* Get its bits and final state status. */ + destState->stateBits |= ( srcState->stateBits & ~SB_ISFINAL ); + if ( srcState->isFinState() ) + setFinState( destState ); + + /* Draw in any properties of srcState into destState. */ + if ( srcState == destState ) { + /* Duplicate the list to protect against write to source. The + * priorities sets are not copied in because that would have no + * effect. */ + destState->epsilonTrans.append( EpsilonTrans( srcState->epsilonTrans ) ); + + /* Get all actions, duplicating to protect against write to source. */ + destState->toStateActionTable.setActions( + ActionTable( srcState->toStateActionTable ) ); + destState->fromStateActionTable.setActions( + ActionTable( srcState->fromStateActionTable ) ); + destState->outActionTable.setActions( ActionTable( srcState->outActionTable ) ); + destState->outCondSet.insert( ActionSet( srcState->outCondSet ) ); + destState->errActionTable.setActions( ErrActionTable( srcState->errActionTable ) ); + destState->eofActionTable.setActions( ActionTable( srcState->eofActionTable ) ); + } + else { + /* Get the epsilons, out priorities. */ + destState->epsilonTrans.append( srcState->epsilonTrans ); + destState->outPriorTable.setPriors( srcState->outPriorTable ); + + /* Get all actions. */ + destState->toStateActionTable.setActions( srcState->toStateActionTable ); + destState->fromStateActionTable.setActions( srcState->fromStateActionTable ); + destState->outActionTable.setActions( srcState->outActionTable ); + destState->outCondSet.insert( srcState->outCondSet ); + destState->errActionTable.setActions( srcState->errActionTable ); + destState->eofActionTable.setActions( srcState->eofActionTable ); + } +} + +void FsmGraph::fillInStates( MergeData &md ) +{ + /* Merge any states that are awaiting merging. This will likey cause + * other states to be added to the stfil list. */ + FsmState *state = md.stfillHead; + while ( state != 0 ) { + StateSet *stateSet = &state->stateDictEl->stateSet; + mergeStates( md, state, stateSet->data, stateSet->length() ); + state = state->alg.next; + } + + /* Delete the state sets of all states that are on the fill list. */ + state = md.stfillHead; + while ( state != 0 ) { + /* Delete and reset the state set. */ + delete state->stateDictEl; + state->stateDictEl = 0; + + /* Next state in the stfill list. */ + state = state->alg.next; + } + + /* StateDict will still have its ptrs/size set but all of it's element + * will be deleted so we don't need to clean it up. */ +} + +void FsmGraph::findEmbedExpansions( ExpansionList &expansionList, + FsmState *destState, Action *condAction ) +{ + StateCondList destList; + PairIter transCond( destState->outList.head, + destState->stateCondList.head ); + for ( ; !transCond.end(); transCond++ ) { + switch ( transCond.userState ) { + case RangeInS1: { + if ( transCond.s1Tel.lowKey <= keyOps->maxKey ) { + assert( transCond.s1Tel.highKey <= keyOps->maxKey ); + + /* Make a new state cond. */ + StateCond *newStateCond = new StateCond( transCond.s1Tel.lowKey, + transCond.s1Tel.highKey ); + newStateCond->condSpace = addCondSpace( CondSet( condAction ) ); + destList.append( newStateCond ); + + /* Create the expansion. */ + Expansion *expansion = new Expansion( transCond.s1Tel.lowKey, + transCond.s1Tel.highKey ); + expansion->fromTrans = new FsmTrans(*transCond.s1Tel.trans); + expansion->fromTrans->fromState = 0; + expansion->fromTrans->toState = transCond.s1Tel.trans->toState; + expansion->fromCondSpace = 0; + expansion->fromVals = 0; + expansion->toCondSpace = newStateCond->condSpace; + expansion->toValsList.append( 1 ); + #ifdef COLM_LOG_CONDS + logNewExpansion( expansion ); + #endif + expansionList.append( expansion ); + } + break; + } + case RangeInS2: { + /* Enhance state cond and find the expansion. */ + StateCond *stateCond = transCond.s2Tel.trans; + stateCond->lowKey = transCond.s2Tel.lowKey; + stateCond->highKey = transCond.s2Tel.highKey; + + CondSet &destCS = stateCond->condSpace->condSet; + long destLen = destCS.length(); + CondSpace *fromCondSpace = stateCond->condSpace; + + CondSet mergedCS = destCS; + mergedCS.insert( condAction ); + CondSpace *toCondSpace = addCondSpace( mergedCS ); + stateCond->condSpace = toCondSpace; + destList.append( stateCond ); + + /* Loop all values in the dest space. */ + for ( long destVals = 0; destVals < (1 << destLen); destVals++ ) { + long basicVals = 0; + for ( CondSet::Iter csi = destCS; csi.lte(); csi++ ) { + if ( destVals & (1 << csi.pos()) ) { + Action **cim = mergedCS.find( *csi ); + long bitPos = (cim - mergedCS.data); + basicVals |= 1 << bitPos; + } + } + + long targVals = basicVals; + Action **cim = mergedCS.find( condAction ); + long bitPos = (cim - mergedCS.data); + targVals |= 1 << bitPos; + + LongVect expandToVals( targVals ); + findCondExpInTrans( expansionList, destState, + transCond.s2Tel.lowKey, transCond.s2Tel.highKey, + fromCondSpace, toCondSpace, destVals, expandToVals ); + } + break; + } + + + case RangeOverlap: + case BreakS1: + case BreakS2: + assert( false ); + break; + } + } + + destState->stateCondList.transfer( destList ); +} + +void FsmGraph::embedCondition( FsmState *state, Action *condAction ) +{ + MergeData md; + ExpansionList expList; + + /* Turn on misfit accounting to possibly catch the old start state. */ + setMisfitAccounting( true ); + + /* Worker. */ + embedCondition( md, state, condAction ); + + /* Fill in any states that were newed up as combinations of others. */ + fillInStates( md ); + + /* Remove the misfits and turn off misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +void FsmGraph::embedCondition( MergeData &md, FsmState *state, Action *condAction ) +{ + ExpansionList expList; + + findEmbedExpansions( expList, state, condAction ); + doExpand( md, state, expList ); + doRemove( md, state, expList ); + expList.empty(); +} diff --git a/colm/fsmgraph.h b/colm/fsmgraph.h new file mode 100644 index 00000000..9c002077 --- /dev/null +++ b/colm/fsmgraph.h @@ -0,0 +1,1392 @@ +/* + * Copyright 2001-2007 Adrian Thurston + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _FSMGRAPH_H +#define _FSMGRAPH_H + +#include +#include "common.h" +#include "vector.h" +#include "bstset.h" +#include "compare.h" +#include "avltree.h" +#include "dlist.h" +#include "bstmap.h" +#include "sbstmap.h" +#include "sbstset.h" +#include "sbsttable.h" +#include "avlset.h" +#include "avlmap.h" + +/* Flags that control merging. */ +#define SB_GRAPH1 0x01 +#define SB_GRAPH2 0x02 +#define SB_BOTH 0x03 +#define SB_ISFINAL 0x04 +#define SB_ISMARKED 0x08 +#define SB_ONLIST 0x10 + +struct FsmTrans; +struct FsmState; +struct FsmGraph; +struct Action; +struct TokenDef; +struct NameInst; + +/* State list element for unambiguous access to list element. */ +struct FsmListEl +{ + FsmState *prev, *next; +}; + +/* This is the marked index for a state pair. Used in minimization. It keeps + * track of whether or not the state pair is marked. */ +struct MarkIndex +{ + MarkIndex(int states); + ~MarkIndex(); + + void markPair(int state1, int state2); + bool isPairMarked(int state1, int state2); + +private: + int numStates; + bool *array; +}; + +extern KeyOps *keyOps; + +/* Transistion Action Element. */ +typedef SBstMapEl< int, Action* > ActionTableEl; + +/* Transition Action Table. */ +struct ActionTable + : public SBstMap< int, Action*, CmpOrd > +{ + void setAction( int ordering, Action *action ); + void setActions( int *orderings, Action **actions, int nActs ); + void setActions( const ActionTable &other ); + + bool hasAction( Action *action ); +}; + +typedef SBstSet< Action*, CmpOrd > ActionSet; +typedef CmpSTable< Action*, CmpOrd > CmpActionSet; + +/* Transistion Action Element. */ +typedef SBstMapEl< int, TokenDef* > LmActionTableEl; + +/* Transition Action Table. */ +struct LmActionTable + : public SBstMap< int, TokenDef*, CmpOrd > +{ + void setAction( int ordering, TokenDef *action ); + void setActions( const LmActionTable &other ); +}; + +/* Compare of a whole action table element (key & value). */ +struct CmpActionTableEl +{ + static int compare( const ActionTableEl &action1, + const ActionTableEl &action2 ) + { + if ( action1.key < action2.key ) + return -1; + else if ( action1.key > action2.key ) + return 1; + else if ( action1.value < action2.value ) + return -1; + else if ( action1.value > action2.value ) + return 1; + return 0; + } +}; + +/* Compare for ActionTable. */ +typedef CmpSTable< ActionTableEl, CmpActionTableEl > CmpActionTable; + +/* Compare of a whole lm action table element (key & value). */ +struct CmpLmActionTableEl +{ + static int compare( const LmActionTableEl &lmAction1, + const LmActionTableEl &lmAction2 ) + { + if ( lmAction1.key < lmAction2.key ) + return -1; + else if ( lmAction1.key > lmAction2.key ) + return 1; + else if ( lmAction1.value < lmAction2.value ) + return -1; + else if ( lmAction1.value > lmAction2.value ) + return 1; + return 0; + } +}; + +/* Compare for ActionTable. */ +typedef CmpSTable< LmActionTableEl, CmpLmActionTableEl > CmpLmActionTable; + +/* Action table element for error action tables. Adds the encoding of transfer + * point. */ +struct ErrActionTableEl +{ + ErrActionTableEl( Action *action, int ordering, int transferPoint ) + : ordering(ordering), action(action), transferPoint(transferPoint) { } + + /* Ordering and id of the action embedding. */ + int ordering; + Action *action; + + /* Id of point of transfere from Error action table to transtions and + * eofActionTable. */ + int transferPoint; + + int getKey() const { return ordering; } +}; + +struct ErrActionTable + : public SBstTable< ErrActionTableEl, int, CmpOrd > +{ + void setAction( int ordering, Action *action, int transferPoint ); + void setActions( const ErrActionTable &other ); +}; + +/* Compare of an error action table element (key & value). */ +struct CmpErrActionTableEl +{ + static int compare( const ErrActionTableEl &action1, + const ErrActionTableEl &action2 ) + { + if ( action1.ordering < action2.ordering ) + return -1; + else if ( action1.ordering > action2.ordering ) + return 1; + else if ( action1.action < action2.action ) + return -1; + else if ( action1.action > action2.action ) + return 1; + else if ( action1.transferPoint < action2.transferPoint ) + return -1; + else if ( action1.transferPoint > action2.transferPoint ) + return 1; + return 0; + } +}; + +/* Compare for ErrActionTable. */ +typedef CmpSTable< ErrActionTableEl, CmpErrActionTableEl > CmpErrActionTable; + + +/* Descibe a priority, shared among PriorEls. + * Has key and whether or not used. */ +struct PriorDesc +{ + int key; + int priority; +}; + +/* Element in the arrays of priorities for transitions and arrays. Ordering is + * unique among instantiations of machines, desc is shared. */ +struct PriorEl +{ + PriorEl( int ordering, PriorDesc *desc ) + : ordering(ordering), desc(desc) { } + + int ordering; + PriorDesc *desc; +}; + +/* Compare priority elements, which are ordered by the priority descriptor + * key. */ +struct PriorElCmp +{ + static inline int compare( const PriorEl &pel1, const PriorEl &pel2 ) + { + if ( pel1.desc->key < pel2.desc->key ) + return -1; + else if ( pel1.desc->key > pel2.desc->key ) + return 1; + else + return 0; + } +}; + + +/* Priority Table. */ +struct PriorTable + : public SBstSet< PriorEl, PriorElCmp > +{ + void setPrior( int ordering, PriorDesc *desc ); + void setPriors( const PriorTable &other ); +}; + +/* Compare of prior table elements for distinguising state data. */ +struct CmpPriorEl +{ + static inline int compare( const PriorEl &pel1, const PriorEl &pel2 ) + { + if ( pel1.desc < pel2.desc ) + return -1; + else if ( pel1.desc > pel2.desc ) + return 1; + else if ( pel1.ordering < pel2.ordering ) + return -1; + else if ( pel1.ordering > pel2.ordering ) + return 1; + return 0; + } +}; + +/* Compare of PriorTable distinguising state data. Using a compare of the + * pointers is a little more strict than it needs be. It requires that + * prioritiy tables have the exact same set of priority assignment operators + * (from the input lang) to be considered equal. + * + * Really only key-value pairs need be tested and ordering be merged. However + * this would require that in the fuseing of states, priority descriptors be + * chosen for the new fused state based on priority. Since the out transition + * lists and ranges aren't necessarily going to line up, this is more work for + * little gain. Final compression resets all priorities first, so this would + * only be useful for compression at every operator, which is only an + * undocumented test feature. + */ +typedef CmpSTable CmpPriorTable; + +/* Plain action list that imposes no ordering. */ +typedef Vector TransFuncList; + +/* Comparison for TransFuncList. */ +typedef CmpTable< int, CmpOrd > TransFuncListCompare; + +/* Transition class that implements actions and priorities. */ +struct FsmTrans +{ + FsmTrans() : fromState(0), toState(0) {} + FsmTrans( const FsmTrans &other ) : + lowKey(other.lowKey), + highKey(other.highKey), + fromState(0), toState(0), + actionTable(other.actionTable), + priorTable(other.priorTable) + { + assert( lmActionTable.length() == 0 && other.lmActionTable.length() == 0 ); + } + + Key lowKey, highKey; + FsmState *fromState; + FsmState *toState; + + /* Pointers for outlist. */ + FsmTrans *prev, *next; + + /* Pointers for in-list. */ + FsmTrans *ilprev, *ilnext; + + /* The function table and priority for the transition. */ + ActionTable actionTable; + PriorTable priorTable; + + LmActionTable lmActionTable; +}; + +/* In transition list. Like DList except only has head pointers, which is all + * that is required. Insertion and deletion is handled by the graph. This + * class provides the iterator of a single list. */ +struct TransInList +{ + TransInList() : head(0) { } + + FsmTrans *head; + + struct Iter + { + /* Default construct. */ + Iter() : ptr(0) { } + + /* Construct, assign from a list. */ + Iter( const TransInList &il ) : ptr(il.head) { } + Iter &operator=( const TransInList &dl ) { ptr = dl.head; return *this; } + + /* At the end */ + bool lte() const { return ptr != 0; } + bool end() const { return ptr == 0; } + + /* At the first, last element. */ + bool first() const { return ptr && ptr->ilprev == 0; } + bool last() const { return ptr && ptr->ilnext == 0; } + + /* Cast, dereference, arrow ops. */ + operator FsmTrans*() const { return ptr; } + FsmTrans &operator *() const { return *ptr; } + FsmTrans *operator->() const { return ptr; } + + /* Increment, decrement. */ + inline void operator++(int) { ptr = ptr->ilnext; } + inline void operator--(int) { ptr = ptr->ilprev; } + + /* The iterator is simply a pointer. */ + FsmTrans *ptr; + }; +}; + +typedef DList TransList; + +/* Set of states, list of states. */ +typedef BstSet StateSet; +typedef DList StateList; + +/* A element in a state dict. */ +struct StateDictEl +: + public AvlTreeEl +{ + StateDictEl(const StateSet &stateSet) + : stateSet(stateSet) { } + + const StateSet &getKey() { return stateSet; } + StateSet stateSet; + FsmState *targState; +}; + +/* Dictionary mapping a set of states to a target state. */ +typedef AvlTree< StateDictEl, StateSet, CmpTable > StateDict; + +/* Data needed for a merge operation. */ +struct MergeData +{ + MergeData() + : stfillHead(0), stfillTail(0) { } + + StateDict stateDict; + + FsmState *stfillHead; + FsmState *stfillTail; + + void fillListAppend( FsmState *state ); +}; + +struct TransEl +{ + /* Constructors. */ + TransEl() { } + TransEl( Key lowKey, Key highKey ) + : lowKey(lowKey), highKey(highKey) { } + TransEl( Key lowKey, Key highKey, FsmTrans *value ) + : lowKey(lowKey), highKey(highKey), value(value) { } + + Key lowKey, highKey; + FsmTrans *value; +}; + +struct CmpKey +{ + static int compare( const Key key1, const Key key2 ) + { + if ( key1 < key2 ) + return -1; + else if ( key1 > key2 ) + return 1; + else + return 0; + } +}; + +/* Vector based set of key items. */ +typedef BstSet KeySet; + +struct MinPartition +{ + MinPartition() : active(false) { } + + StateList list; + bool active; + + MinPartition *prev, *next; +}; + +/* Epsilon transition stored in a state. Specifies the target */ +typedef Vector EpsilonTrans; + +/* List of states that are to be drawn into this. */ +struct EptVectEl +{ + EptVectEl( FsmState *targ, bool leaving ) + : targ(targ), leaving(leaving) { } + + FsmState *targ; + bool leaving; +}; +typedef Vector EptVect; + +/* Set of entry ids that go into this state. */ +typedef BstSet EntryIdSet; + +/* Set of longest match items that may be active in a given state. */ +typedef BstSet LmItemSet; + +/* Conditions. */ +typedef BstSet< Action*, CmpOrd > CondSet; +typedef CmpTable< Action*, CmpOrd > CmpCondSet; + +struct CondSpace + : public AvlTreeEl +{ + CondSpace( const CondSet &condSet ) + : condSet(condSet) {} + + const CondSet &getKey() { return condSet; } + + CondSet condSet; + Key baseKey; + long condSpaceId; +}; + +typedef Vector CondSpaceVect; + +typedef AvlTree CondSpaceMap; + +struct StateCond +{ + StateCond( Key lowKey, Key highKey ) : + lowKey(lowKey), highKey(highKey) {} + + Key lowKey; + Key highKey; + CondSpace *condSpace; + + StateCond *prev, *next; +}; + +typedef DList StateCondList; +typedef Vector LongVect; + +struct Expansion +{ + Expansion( Key lowKey, Key highKey ) : + lowKey(lowKey), highKey(highKey), + fromTrans(0), fromCondSpace(0), + toCondSpace(0) {} + + ~Expansion() + { + if ( fromTrans != 0 ) + delete fromTrans; + } + + Key lowKey; + Key highKey; + + FsmTrans *fromTrans; + CondSpace *fromCondSpace; + long fromVals; + + CondSpace *toCondSpace; + LongVect toValsList; + + Expansion *prev, *next; +}; + +typedef DList ExpansionList; + +struct Removal +{ + Key lowKey; + Key highKey; + + Removal *next; +}; + +struct CondData +{ + CondData() : nextCondKey(0) {} + + /* Condition info. */ + Key nextCondKey; + + CondSpaceMap condSpaceMap; +}; + +extern CondData *condData; + +/* State class that implements actions and priorities. */ +struct FsmState +{ + FsmState(); + FsmState(const FsmState &other); + ~FsmState(); + + /* Is the state final? */ + bool isFinState() { return stateBits & SB_ISFINAL; } + + /* Out transition list and the pointer for the default out trans. */ + TransList outList; + + /* In transition Lists. */ + TransInList inList; + + /* Entry points into the state. */ + EntryIdSet entryIds; + + /* Epsilon transitions. */ + EpsilonTrans epsilonTrans; + + /* Condition info. */ + StateCondList stateCondList; + + /* Number of in transitions from states other than ourselves. */ + int foreignInTrans; + + /* Temporary data for various algorithms. */ + union { + /* When duplicating the fsm we need to map each + * state to the new state representing it. */ + FsmState *stateMap; + + /* When minimizing machines by partitioning, this maps to the group + * the state is in. */ + MinPartition *partition; + + /* When merging states (state machine operations) this next pointer is + * used for the list of states that need to be filled in. */ + FsmState *next; + + /* Identification for printing and stable minimization. */ + int stateNum; + + } alg; + + /* Data used in epsilon operation, maybe fit into alg? */ + FsmState *isolatedShadow; + int owningGraph; + + /* A pointer to a dict element that contains the set of states this state + * represents. This cannot go into alg, because alg.next is used during + * the merging process. */ + StateDictEl *stateDictEl; + + /* When drawing epsilon transitions, holds the list of states to merge + * with. */ + EptVect *eptVect; + + /* Bits controlling the behaviour of the state during collapsing to dfa. */ + int stateBits; + + /* State list elements. */ + FsmState *next, *prev; + + /* + * Priority and Action data. + */ + + /* Out priorities transfered to out transitions. */ + PriorTable outPriorTable; + + /* The following two action tables are distinguished by the fact that when + * toState actions are executed immediatly after transition actions of + * incoming transitions and the current character will be the same as the + * one available then. The fromState actions are executed immediately + * before the transition actions of outgoing transitions and the current + * character is same as the one available then. */ + + /* Actions to execute upon entering into a state. */ + ActionTable toStateActionTable; + + /* Actions to execute when going from the state to the transition. */ + ActionTable fromStateActionTable; + + /* Actions to add to any future transitions that leave via this state. */ + ActionTable outActionTable; + + /* Conditions to add to any future transiions that leave via this sttate. */ + ActionSet outCondSet; + + /* Error action tables. */ + ErrActionTable errActionTable; + + /* Actions to execute on eof. */ + ActionTable eofActionTable; + + /* Set of longest match items that may be active in this state. */ + LmItemSet lmItemSet; + + FsmState *eofTarget; +}; + +template struct NextTrans +{ + Key lowKey, highKey; + ListItem *trans; + ListItem *next; + + void load() { + if ( trans == 0 ) + next = 0; + else { + next = trans->next; + lowKey = trans->lowKey; + highKey = trans->highKey; + } + } + + void set( ListItem *t ) { + trans = t; + load(); + } + + void increment() { + trans = next; + load(); + } +}; + + +/* Encodes the different states that are meaningful to the of the iterator. */ +enum PairIterUserState +{ + RangeInS1, RangeInS2, + RangeOverlap, + BreakS1, BreakS2 +}; + +template struct PairIter +{ + /* Encodes the different states that an fsm iterator can be in. */ + enum IterState { + Begin, + ConsumeS1Range, ConsumeS2Range, + OnlyInS1Range, OnlyInS2Range, + S1SticksOut, S1SticksOutBreak, + S2SticksOut, S2SticksOutBreak, + S1DragsBehind, S1DragsBehindBreak, + S2DragsBehind, S2DragsBehindBreak, + ExactOverlap, End + }; + + PairIter( ListItem1 *list1, ListItem2 *list2 ); + + /* Query iterator. */ + bool lte() { return itState != End; } + bool end() { return itState == End; } + void operator++(int) { findNext(); } + void operator++() { findNext(); } + + /* Iterator state. */ + ListItem1 *list1; + ListItem2 *list2; + IterState itState; + PairIterUserState userState; + + NextTrans s1Tel; + NextTrans s2Tel; + Key bottomLow, bottomHigh; + ListItem1 *bottomTrans1; + ListItem2 *bottomTrans2; + +private: + void findNext(); +}; + +/* Init the iterator by advancing to the first item. */ +template PairIter::PairIter( + ListItem1 *list1, ListItem2 *list2 ) +: + list1(list1), + list2(list2), + itState(Begin) +{ + findNext(); +} + +/* Return and re-entry for the co-routine iterators. This should ALWAYS be + * used inside of a block. */ +#define CO_RETURN(label) \ + itState = label; \ + return; \ + entry##label: backIn = true + +/* Return and re-entry for the co-routine iterators. This should ALWAYS be + * used inside of a block. */ +#define CO_RETURN2(label, uState) \ + itState = label; \ + userState = uState; \ + return; \ + entry##label: backIn = true + +/* Advance to the next transition. When returns, trans points to the next + * transition, unless there are no more, in which case end() returns true. */ +template void PairIter::findNext() +{ + /* This variable is used in dummy statements that follow the entry + * goto labels. The compiler needs some statement to follow the label. */ + bool backIn; + + /* Jump into the iterator routine base on the iterator state. */ + switch ( itState ) { + case Begin: goto entryBegin; + case ConsumeS1Range: goto entryConsumeS1Range; + case ConsumeS2Range: goto entryConsumeS2Range; + case OnlyInS1Range: goto entryOnlyInS1Range; + case OnlyInS2Range: goto entryOnlyInS2Range; + case S1SticksOut: goto entryS1SticksOut; + case S1SticksOutBreak: goto entryS1SticksOutBreak; + case S2SticksOut: goto entryS2SticksOut; + case S2SticksOutBreak: goto entryS2SticksOutBreak; + case S1DragsBehind: goto entryS1DragsBehind; + case S1DragsBehindBreak: goto entryS1DragsBehindBreak; + case S2DragsBehind: goto entryS2DragsBehind; + case S2DragsBehindBreak: goto entryS2DragsBehindBreak; + case ExactOverlap: goto entryExactOverlap; + case End: goto entryEnd; + } + +entryBegin: + /* Set up the next structs at the head of the transition lists. */ + s1Tel.set( list1 ); + s2Tel.set( list2 ); + + /* Concurrently scan both out ranges. */ + while ( true ) { + if ( s1Tel.trans == 0 ) { + /* We are at the end of state1's ranges. Process the rest of + * state2's ranges. */ + while ( s2Tel.trans != 0 ) { + /* Range is only in s2. */ + CO_RETURN2( ConsumeS2Range, RangeInS2 ); + s2Tel.increment(); + } + break; + } + else if ( s2Tel.trans == 0 ) { + /* We are at the end of state2's ranges. Process the rest of + * state1's ranges. */ + while ( s1Tel.trans != 0 ) { + /* Range is only in s1. */ + CO_RETURN2( ConsumeS1Range, RangeInS1 ); + s1Tel.increment(); + } + break; + } + /* Both state1's and state2's transition elements are good. + * The signiture of no overlap is a back key being in front of a + * front key. */ + else if ( s1Tel.highKey < s2Tel.lowKey ) { + /* A range exists in state1 that does not overlap with state2. */ + CO_RETURN2( OnlyInS1Range, RangeInS1 ); + s1Tel.increment(); + } + else if ( s2Tel.highKey < s1Tel.lowKey ) { + /* A range exists in state2 that does not overlap with state1. */ + CO_RETURN2( OnlyInS2Range, RangeInS2 ); + s2Tel.increment(); + } + /* There is overlap, must mix the ranges in some way. */ + else if ( s1Tel.lowKey < s2Tel.lowKey ) { + /* Range from state1 sticks out front. Must break it into + * non-overlaping and overlaping segments. */ + bottomLow = s2Tel.lowKey; + bottomHigh = s1Tel.highKey; + s1Tel.highKey = s2Tel.lowKey; + s1Tel.highKey.decrement(); + bottomTrans1 = s1Tel.trans; + + /* Notify the caller that we are breaking s1. This gives them a + * chance to duplicate s1Tel[0,1].value. */ + CO_RETURN2( S1SticksOutBreak, BreakS1 ); + + /* Broken off range is only in s1. */ + CO_RETURN2( S1SticksOut, RangeInS1 ); + + /* Advance over the part sticking out front. */ + s1Tel.lowKey = bottomLow; + s1Tel.highKey = bottomHigh; + s1Tel.trans = bottomTrans1; + } + else if ( s2Tel.lowKey < s1Tel.lowKey ) { + /* Range from state2 sticks out front. Must break it into + * non-overlaping and overlaping segments. */ + bottomLow = s1Tel.lowKey; + bottomHigh = s2Tel.highKey; + s2Tel.highKey = s1Tel.lowKey; + s2Tel.highKey.decrement(); + bottomTrans2 = s2Tel.trans; + + /* Notify the caller that we are breaking s2. This gives them a + * chance to duplicate s2Tel[0,1].value. */ + CO_RETURN2( S2SticksOutBreak, BreakS2 ); + + /* Broken off range is only in s2. */ + CO_RETURN2( S2SticksOut, RangeInS2 ); + + /* Advance over the part sticking out front. */ + s2Tel.lowKey = bottomLow; + s2Tel.highKey = bottomHigh; + s2Tel.trans = bottomTrans2; + } + /* Low ends are even. Are the high ends even? */ + else if ( s1Tel.highKey < s2Tel.highKey ) { + /* Range from state2 goes longer than the range from state1. We + * must break the range from state2 into an evenly overlaping + * segment. */ + bottomLow = s1Tel.highKey; + bottomLow.increment(); + bottomHigh = s2Tel.highKey; + s2Tel.highKey = s1Tel.highKey; + bottomTrans2 = s2Tel.trans; + + /* Notify the caller that we are breaking s2. This gives them a + * chance to duplicate s2Tel[0,1].value. */ + CO_RETURN2( S2DragsBehindBreak, BreakS2 ); + + /* Breaking s2 produces exact overlap. */ + CO_RETURN2( S2DragsBehind, RangeOverlap ); + + /* Advance over the front we just broke off of range 2. */ + s2Tel.lowKey = bottomLow; + s2Tel.highKey = bottomHigh; + s2Tel.trans = bottomTrans2; + + /* Advance over the entire s1Tel. We have consumed it. */ + s1Tel.increment(); + } + else if ( s2Tel.highKey < s1Tel.highKey ) { + /* Range from state1 goes longer than the range from state2. We + * must break the range from state1 into an evenly overlaping + * segment. */ + bottomLow = s2Tel.highKey; + bottomLow.increment(); + bottomHigh = s1Tel.highKey; + s1Tel.highKey = s2Tel.highKey; + bottomTrans1 = s1Tel.trans; + + /* Notify the caller that we are breaking s1. This gives them a + * chance to duplicate s2Tel[0,1].value. */ + CO_RETURN2( S1DragsBehindBreak, BreakS1 ); + + /* Breaking s1 produces exact overlap. */ + CO_RETURN2( S1DragsBehind, RangeOverlap ); + + /* Advance over the front we just broke off of range 1. */ + s1Tel.lowKey = bottomLow; + s1Tel.highKey = bottomHigh; + s1Tel.trans = bottomTrans1; + + /* Advance over the entire s2Tel. We have consumed it. */ + s2Tel.increment(); + } + else { + /* There is an exact overlap. */ + CO_RETURN2( ExactOverlap, RangeOverlap ); + + s1Tel.increment(); + s2Tel.increment(); + } + } + + /* Done, go into end state. */ + CO_RETURN( End ); +} + + +/* Compare lists of epsilon transitions. Entries are name ids of targets. */ +typedef CmpTable< int, CmpOrd > CmpEpsilonTrans; + +/* Compare class for the Approximate minimization. */ +class ApproxCompare +{ +public: + ApproxCompare() { } + int compare( const FsmState *pState1, const FsmState *pState2 ); +}; + +/* Compare class for the initial partitioning of a partition minimization. */ +class InitPartitionCompare +{ +public: + InitPartitionCompare() { } + int compare( const FsmState *pState1, const FsmState *pState2 ); +}; + +/* Compare class for the regular partitioning of a partition minimization. */ +class PartitionCompare +{ +public: + PartitionCompare() { } + int compare( const FsmState *pState1, const FsmState *pState2 ); +}; + +/* Compare class for a minimization that marks pairs. Provides the shouldMark + * routine. */ +class MarkCompare +{ +public: + MarkCompare() { } + bool shouldMark( MarkIndex &markIndex, const FsmState *pState1, + const FsmState *pState2 ); +}; + +/* List of partitions. */ +typedef DList< MinPartition > PartitionList; + +/* List of transtions out of a state. */ +typedef Vector TransListVect; + +/* Entry point map used for keeping track of entry points in a machine. */ +typedef BstSet< int > EntryIdSet; +typedef BstMapEl< int, FsmState* > EntryMapEl; +typedef BstMap< int, FsmState* > EntryMap; +typedef Vector EntryMapBase; + +/* Graph class that implements actions and priorities. */ +struct FsmGraph +{ + /* Constructors/Destructors. */ + FsmGraph( ); + FsmGraph( const FsmGraph &graph ); + ~FsmGraph(); + + /* The list of states. */ + StateList stateList; + StateList misfitList; + + /* The map of entry points. */ + EntryMap entryPoints; + + /* The start state. */ + FsmState *startState; + + /* Error state, possibly created only when the final machine has been + * created and the XML machine is about to be written. No transitions + * point to this state. */ + FsmState *errState; + + /* The set of final states. */ + StateSet finStateSet; + + /* Misfit Accounting. Are misfits put on a separate list. */ + bool misfitAccounting; + + bool lmRequiresErrorState; + NameInst *rootName; + NameInst **nameIndex; + + /* + * Transition actions and priorities. + */ + + /* Set priorities on transtions. */ + void startFsmPrior( int ordering, PriorDesc *prior ); + void allTransPrior( int ordering, PriorDesc *prior ); + void finishFsmPrior( int ordering, PriorDesc *prior ); + void leaveFsmPrior( int ordering, PriorDesc *prior ); + + /* Action setting support. */ + void transferErrorActions( FsmState *state, int transferPoint ); + void setErrorAction( FsmState *state, int ordering, Action *action ); + void setErrorActions( FsmState *state, const ActionTable &other ); + + /* Fill all spaces in a transition list with an error transition. */ + void fillGaps( FsmState *state ); + + /* Similar to setErrorAction, instead gives a state to go to on error. */ + void setErrorTarget( FsmState *state, FsmState *target, int *orderings, + Action **actions, int nActs ); + + /* Set actions to execute. */ + void startFsmAction( int ordering, Action *action ); + void allTransAction( int ordering, Action *action ); + void finishFsmAction( int ordering, Action *action ); + void leaveFsmAction( int ordering, Action *action ); + void longMatchAction( int ordering, TokenDef *lmPart ); + + /* Set conditions. */ + CondSpace *addCondSpace( const CondSet &condSet ); + + void findEmbedExpansions( ExpansionList &expansionList, + FsmState *destState, Action *condAction ); + void embedCondition( MergeData &md, FsmState *state, Action *condAction ); + void embedCondition( FsmState *state, Action *condAction ); + + void startFsmCondition( Action *condAction ); + void allTransCondition( Action *condAction ); + void leaveFsmCondition( Action *condAction ); + + /* Set error actions to execute. */ + void startErrorAction( int ordering, Action *action, int transferPoint ); + void allErrorAction( int ordering, Action *action, int transferPoint ); + void finalErrorAction( int ordering, Action *action, int transferPoint ); + void notStartErrorAction( int ordering, Action *action, int transferPoint ); + void notFinalErrorAction( int ordering, Action *action, int transferPoint ); + void middleErrorAction( int ordering, Action *action, int transferPoint ); + + /* Set EOF actions. */ + void startEOFAction( int ordering, Action *action ); + void allEOFAction( int ordering, Action *action ); + void finalEOFAction( int ordering, Action *action ); + void notStartEOFAction( int ordering, Action *action ); + void notFinalEOFAction( int ordering, Action *action ); + void middleEOFAction( int ordering, Action *action ); + + /* Set To State actions. */ + void startToStateAction( int ordering, Action *action ); + void allToStateAction( int ordering, Action *action ); + void finalToStateAction( int ordering, Action *action ); + void notStartToStateAction( int ordering, Action *action ); + void notFinalToStateAction( int ordering, Action *action ); + void middleToStateAction( int ordering, Action *action ); + + /* Set From State actions. */ + void startFromStateAction( int ordering, Action *action ); + void allFromStateAction( int ordering, Action *action ); + void finalFromStateAction( int ordering, Action *action ); + void notStartFromStateAction( int ordering, Action *action ); + void notFinalFromStateAction( int ordering, Action *action ); + void middleFromStateAction( int ordering, Action *action ); + + /* Shift the action ordering of the start transitions to start at + * fromOrder and increase in units of 1. Useful before kleene star + * operation. */ + int shiftStartActionOrder( int fromOrder ); + + /* Clear all priorities from the fsm to so they won't affcet minimization + * of the final fsm. */ + void clearAllPriorities(); + + /* Zero out all the function keys. */ + void nullActionKeys(); + + /* Walk the list of states and verify state properties. */ + void verifyStates(); + + /* Misfit Accounting. Are misfits put on a separate list. */ + void setMisfitAccounting( bool val ) + { misfitAccounting = val; } + + /* Set and Unset a state as final. */ + void setFinState( FsmState *state ); + void unsetFinState( FsmState *state ); + + void setStartState( FsmState *state ); + void unsetStartState( ); + + /* Set and unset a state as an entry point. */ + void setEntry( int id, FsmState *state ); + void changeEntry( int id, FsmState *to, FsmState *from ); + void unsetEntry( int id, FsmState *state ); + void unsetEntry( int id ); + void unsetAllEntryPoints(); + + /* Epsilon transitions. */ + void epsilonTrans( int id ); + void shadowReadWriteStates( MergeData &md ); + + /* + * Basic attaching and detaching. + */ + + /* Common to attaching/detaching list and default. */ + void attachToInList( FsmState *from, FsmState *to, FsmTrans *&head, FsmTrans *trans ); + void detachFromInList( FsmState *from, FsmState *to, FsmTrans *&head, FsmTrans *trans ); + + /* Attach with a new transition. */ + FsmTrans *attachNewTrans( FsmState *from, FsmState *to, + Key onChar1, Key onChar2 ); + + /* Attach with an existing transition that already in an out list. */ + void attachTrans( FsmState *from, FsmState *to, FsmTrans *trans ); + + /* Redirect a transition away from error and towards some state. */ + void redirectErrorTrans( FsmState *from, FsmState *to, FsmTrans *trans ); + + /* Detach a transition from a target state. */ + void detachTrans( FsmState *from, FsmState *to, FsmTrans *trans ); + + /* Detach a state from the graph. */ + void detachState( FsmState *state ); + + /* + * NFA to DFA conversion routines. + */ + + /* Duplicate a transition that will dropin to a free spot. */ + FsmTrans *dupTrans( FsmState *from, FsmTrans *srcTrans ); + + /* In crossing, two transitions both go to real states. */ + FsmTrans *fsmAttachStates( MergeData &md, FsmState *from, + FsmTrans *destTrans, FsmTrans *srcTrans ); + + /* Two transitions are to be crossed, handle the possibility of either + * going to the error state. */ + FsmTrans *mergeTrans( MergeData &md, FsmState *from, + FsmTrans *destTrans, FsmTrans *srcTrans ); + + /* Compare deterimne relative priorities of two transition tables. */ + int comparePrior( const PriorTable &priorTable1, const PriorTable &priorTable2 ); + + /* Cross a src transition with one that is already occupying a spot. */ + FsmTrans *crossTransitions( MergeData &md, FsmState *from, + FsmTrans *destTrans, FsmTrans *srcTrans ); + + void outTransCopy( MergeData &md, FsmState *dest, FsmTrans *srcList ); + + void doRemove( MergeData &md, FsmState *destState, ExpansionList &expList1 ); + void doExpand( MergeData &md, FsmState *destState, ExpansionList &expList1 ); + void findCondExpInTrans( ExpansionList &expansionList, FsmState *state, + Key lowKey, Key highKey, CondSpace *fromCondSpace, CondSpace *toCondSpace, + long destVals, LongVect &toValsList ); + void findTransExpansions( ExpansionList &expansionList, + FsmState *destState, FsmState *srcState ); + void findCondExpansions( ExpansionList &expansionList, + FsmState *destState, FsmState *srcState ); + void mergeStateConds( FsmState *destState, FsmState *srcState ); + + /* Merge a set of states into newState. */ + void mergeStates( MergeData &md, FsmState *destState, + FsmState **srcStates, int numSrc ); + void mergeStatesLeaving( MergeData &md, FsmState *destState, FsmState *srcState ); + void mergeStates( MergeData &md, FsmState *destState, FsmState *srcState ); + + /* Make all states that are combinations of other states and that + * have not yet had their out transitions filled in. This will + * empty out stateDict and stFil. */ + void fillInStates( MergeData &md ); + + /* + * Transition Comparison. + */ + + /* Compare transition data. Either of the pointers may be null. */ + static inline int compareDataPtr( FsmTrans *trans1, FsmTrans *trans2 ); + + /* Compare target state and transition data. Either pointer may be null. */ + static inline int compareFullPtr( FsmTrans *trans1, FsmTrans *trans2 ); + + /* Compare target partitions. Either pointer may be null. */ + static inline int comparePartPtr( FsmTrans *trans1, FsmTrans *trans2 ); + + /* Check marked status of target states. Either pointer may be null. */ + static inline bool shouldMarkPtr( MarkIndex &markIndex, + FsmTrans *trans1, FsmTrans *trans2 ); + + /* + * Callbacks. + */ + + /* Compare priority and function table of transitions. */ + static int compareTransData( FsmTrans *trans1, FsmTrans *trans2 ); + + /* Add in the properties of srcTrans into this. */ + void addInTrans( FsmTrans *destTrans, FsmTrans *srcTrans ); + + /* Compare states on data stored in the states. */ + static int compareStateData( const FsmState *state1, const FsmState *state2 ); + + /* Out transition data. */ + void clearOutData( FsmState *state ); + bool hasOutData( FsmState *state ); + void transferOutData( FsmState *destState, FsmState *srcState ); + + /* + * Allocation. + */ + + /* New up a state and add it to the graph. */ + FsmState *addState(); + + /* + * Building basic machines + */ + + void concatFsm( Key c ); + void concatFsm( Key *str, int len ); + void concatFsmCI( Key *str, int len ); + void orFsm( Key *set, int len ); + void rangeFsm( Key low, Key high ); + void rangeStarFsm( Key low, Key high ); + void emptyFsm( ); + void lambdaFsm( ); + + /* + * Fsm operators. + */ + + void starOp( ); + void repeatOp( int times ); + void optionalRepeatOp( int times ); + void concatOp( FsmGraph *other ); + void unionOp( FsmGraph *other ); + void intersectOp( FsmGraph *other ); + void subtractOp( FsmGraph *other ); + void epsilonOp(); + void joinOp( int startId, int finalId, FsmGraph **others, int numOthers ); + void globOp( FsmGraph **others, int numOthers ); + void deterministicEntry(); + + /* + * Operator workers + */ + + /* Determine if there are any entry points into a start state other than + * the start state. */ + bool isStartStateIsolated(); + + /* Make a new start state that has no entry points. Will not change the + * identity of the fsm. */ + void isolateStartState(); + + /* Workers for resolving epsilon transitions. */ + bool inEptVect( EptVect *eptVect, FsmState *targ ); + void epsilonFillEptVectFrom( FsmState *root, FsmState *from, bool parentLeaving ); + void resolveEpsilonTrans( MergeData &md ); + + /* Workers for concatenation and union. */ + void doConcat( FsmGraph *other, StateSet *fromStates, bool optional ); + void doOr( FsmGraph *other ); + + /* + * Final states + */ + + /* Unset any final states that are no longer to be final + * due to final bits. */ + void unsetIncompleteFinals(); + void unsetKilledFinals(); + + /* Bring in other's entry points. Assumes others states are going to be + * copied into this machine. */ + void copyInEntryPoints( FsmGraph *other ); + + /* Ordering states. */ + void depthFirstOrdering( FsmState *state ); + void depthFirstOrdering(); + void sortStatesByFinal(); + + /* Set sqequential state numbers starting at 0. */ + void setStateNumbers( int base ); + + /* Unset all final states. */ + void unsetAllFinStates(); + + /* Set the bits of final states and clear the bits of non final states. */ + void setFinBits( int finStateBits ); + + /* + * Self-consistency checks. + */ + + /* Run a sanity check on the machine. */ + void verifyIntegrity(); + + /* Verify that there are no unreachable states, or dead end states. */ + void verifyReachability(); + void verifyNoDeadEndStates(); + + /* + * Path pruning + */ + + /* Mark all states reachable from state. */ + void markReachableFromHereReverse( FsmState *state ); + + /* Mark all states reachable from state. */ + void markReachableFromHere( FsmState *state ); + void markReachableFromHereStopFinal( FsmState *state ); + + /* Removes states that cannot be reached by any path in the fsm and are + * thus wasted silicon. */ + void removeDeadEndStates(); + + /* Removes states that cannot be reached by any path in the fsm and are + * thus wasted silicon. */ + void removeUnreachableStates(); + + /* Remove error actions from states on which the error transition will + * never be taken. */ + bool outListCovers( FsmState *state ); + bool anyErrorRange( FsmState *state ); + + /* Remove states that are on the misfit list. */ + void removeMisfits(); + + /* + * FSM Minimization + */ + + /* Minimization by partitioning. */ + void minimizePartition1(); + void minimizePartition2(); + + /* Minimize the final state Machine. The result is the minimal fsm. Slow + * but stable, correct minimization. Uses n^2 space (lookout) and average + * n^2 time. Worst case n^3 time, but a that is a very rare case. */ + void minimizeStable(); + + /* Minimize the final state machine. Does not find the minimal fsm, but a + * pretty good approximation. Does not use any extra space. Average n^2 + * time. Worst case n^3 time, but a that is a very rare case. */ + void minimizeApproximate(); + + /* This is the worker for the minimize approximate solution. It merges + * states that have identical out transitions. */ + bool minimizeRound( ); + + /* Given an intial partioning of states, split partitions that have out trans + * to differing partitions. */ + int partitionRound( FsmState **statePtrs, MinPartition *parts, int numParts ); + + /* Split partitions that have a transition to a previously split partition, until + * there are no more partitions to split. */ + int splitCandidates( FsmState **statePtrs, MinPartition *parts, int numParts ); + + /* Fuse together states in the same partition. */ + void fusePartitions( MinPartition *parts, int numParts ); + + /* Mark pairs where out final stateness differs, out trans data differs, + * trans pairs go to a marked pair or trans data differs. Should get + * alot of pairs. */ + void initialMarkRound( MarkIndex &markIndex ); + + /* One marking round on all state pairs. Considers if trans pairs go + * to a marked state only. Returns whether or not a pair was marked. */ + bool markRound( MarkIndex &markIndex ); + + /* Move the in trans into src into dest. */ + void inTransMove(FsmState *dest, FsmState *src); + + /* Make state src and dest the same state. */ + void fuseEquivStates(FsmState *dest, FsmState *src); + + /* Find any states that didn't get marked by the marking algorithm and + * merge them into the primary states of their equivalence class. */ + void fuseUnmarkedPairs( MarkIndex &markIndex ); + + /* Merge neighboring transitions go to the same state and have the same + * transitions data. */ + void compressTransitions(); + + /* Returns true if there is a transtion (either explicit or by a gap) to + * the error state. */ + bool checkErrTrans( FsmState *state, FsmTrans *trans ); + bool checkErrTransFinish( FsmState *state ); + bool hasErrorTrans(); +}; + + +#endif /* _FSMGRAPH_H */ diff --git a/colm/fsmmin.cpp b/colm/fsmmin.cpp new file mode 100644 index 00000000..901659f5 --- /dev/null +++ b/colm/fsmmin.cpp @@ -0,0 +1,732 @@ +/* + * Copyright 2002 Adrian Thurston + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "fsmgraph.h" +#include "mergesort.h" + +int FsmGraph::partitionRound( FsmState **statePtrs, MinPartition *parts, int numParts ) +{ + /* Need a mergesort object and a single partition compare. */ + MergeSort mergeSort; + PartitionCompare partCompare; + + /* For each partition. */ + for ( int p = 0; p < numParts; p++ ) { + /* Fill the pointer array with the states in the partition. */ + StateList::Iter state = parts[p].list; + for ( int s = 0; state.lte(); state++, s++ ) + statePtrs[s] = state; + + /* Sort the states using the partitioning compare. */ + int numStates = parts[p].list.length(); + mergeSort.sort( statePtrs, numStates ); + + /* Assign the states into partitions based on the results of the sort. */ + int destPart = p, firstNewPart = numParts; + for ( int s = 1; s < numStates; s++ ) { + /* If this state differs from the last then move to the next partition. */ + if ( partCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) { + /* The new partition is the next avail spot. */ + destPart = numParts; + numParts += 1; + } + + /* If the state is not staying in the first partition, then + * transfer it to its destination partition. */ + if ( destPart != p ) { + FsmState *state = parts[p].list.detach( statePtrs[s] ); + parts[destPart].list.append( state ); + } + } + + /* Fix the partition pointer for all the states that got moved to a new + * partition. This must be done after the states are transfered so the + * result of the sort is not altered. */ + for ( int newPart = firstNewPart; newPart < numParts; newPart++ ) { + StateList::Iter state = parts[newPart].list; + for ( ; state.lte(); state++ ) + state->alg.partition = &parts[newPart]; + } + } + + return numParts; +} + +/** + * \brief Minimize by partitioning version 1. + * + * Repeatedly tries to split partitions until all partitions are unsplittable. + * Produces the most minimal FSM possible. + */ +void FsmGraph::minimizePartition1() +{ + /* Need one mergesort object and partition compares. */ + MergeSort mergeSort; + InitPartitionCompare initPartCompare; + + /* Nothing to do if there are no states. */ + if ( stateList.length() == 0 ) + return; + + /* + * First thing is to partition the states by final state status and + * transition functions. This gives us an initial partitioning to work + * with. + */ + + /* Make a array of pointers to states. */ + int numStates = stateList.length(); + FsmState** statePtrs = new FsmState*[numStates]; + + /* Fill up an array of pointers to the states for easy sorting. */ + StateList::Iter state = stateList; + for ( int s = 0; state.lte(); state++, s++ ) + statePtrs[s] = state; + + /* Sort the states using the array of states. */ + mergeSort.sort( statePtrs, numStates ); + + /* An array of lists of states is used to partition the states. */ + MinPartition *parts = new MinPartition[numStates]; + + /* Assign the states into partitions. */ + int destPart = 0; + for ( int s = 0; s < numStates; s++ ) { + /* If this state differs from the last then move to the next partition. */ + if ( s > 0 && initPartCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) { + /* Move to the next partition. */ + destPart += 1; + } + + /* Put the state into its partition. */ + statePtrs[s]->alg.partition = &parts[destPart]; + parts[destPart].list.append( statePtrs[s] ); + } + + /* We just moved all the states from the main list into partitions without + * taking them off the main list. So clean up the main list now. */ + stateList.abandon(); + + /* Split partitions. */ + int numParts = destPart + 1; + while ( true ) { + /* Test all partitions for splitting. */ + int newNum = partitionRound( statePtrs, parts, numParts ); + + /* When no partitions can be split, stop. */ + if ( newNum == numParts ) + break; + + numParts = newNum; + } + + /* Fuse states in the same partition. The states will end up back on the + * main list. */ + fusePartitions( parts, numParts ); + + /* Cleanup. */ + delete[] statePtrs; + delete[] parts; +} + +/* Split partitions that need splittting, decide which partitions might need + * to be split as a result, continue until there are no more that might need + * to be split. */ +int FsmGraph::splitCandidates( FsmState **statePtrs, MinPartition *parts, int numParts ) +{ + /* Need a mergesort and a partition compare. */ + MergeSort mergeSort; + PartitionCompare partCompare; + + /* The lists of unsplitable (partList) and splitable partitions. + * Only partitions in the splitable list are check for needing splitting. */ + PartitionList partList, splittable; + + /* Initially, all partitions are born from a split (the initial + * partitioning) and can cause other partitions to be split. So any + * partition with a state with a transition out to another partition is a + * candidate for splitting. This will make every partition except possibly + * partitions of final states split candidates. */ + for ( int p = 0; p < numParts; p++ ) { + /* Assume not active. */ + parts[p].active = false; + + /* Look for a trans out of any state in the partition. */ + for ( StateList::Iter state = parts[p].list; state.lte(); state++ ) { + /* If there is at least one transition out to another state then + * the partition becomes splittable. */ + if ( state->outList.length() > 0 ) { + parts[p].active = true; + break; + } + } + + /* If it was found active then it goes on the splittable list. */ + if ( parts[p].active ) + splittable.append( &parts[p] ); + else + partList.append( &parts[p] ); + } + + /* While there are partitions that are splittable, pull one off and try + * to split it. If it splits, determine which partitions may now be split + * as a result of the newly split partition. */ + while ( splittable.length() > 0 ) { + MinPartition *partition = splittable.detachFirst(); + + /* Fill the pointer array with the states in the partition. */ + StateList::Iter state = partition->list; + for ( int s = 0; state.lte(); state++, s++ ) + statePtrs[s] = state; + + /* Sort the states using the partitioning compare. */ + int numStates = partition->list.length(); + mergeSort.sort( statePtrs, numStates ); + + /* Assign the states into partitions based on the results of the sort. */ + MinPartition *destPart = partition; + int firstNewPart = numParts; + for ( int s = 1; s < numStates; s++ ) { + /* If this state differs from the last then move to the next partition. */ + if ( partCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) { + /* The new partition is the next avail spot. */ + destPart = &parts[numParts]; + numParts += 1; + } + + /* If the state is not staying in the first partition, then + * transfer it to its destination partition. */ + if ( destPart != partition ) { + FsmState *state = partition->list.detach( statePtrs[s] ); + destPart->list.append( state ); + } + } + + /* Fix the partition pointer for all the states that got moved to a new + * partition. This must be done after the states are transfered so the + * result of the sort is not altered. */ + int newPart; + for ( newPart = firstNewPart; newPart < numParts; newPart++ ) { + StateList::Iter state = parts[newPart].list; + for ( ; state.lte(); state++ ) + state->alg.partition = &parts[newPart]; + } + + /* Put the partition we just split and any new partitions that came out + * of the split onto the inactive list. */ + partition->active = false; + partList.append( partition ); + for ( newPart = firstNewPart; newPart < numParts; newPart++ ) { + parts[newPart].active = false; + partList.append( &parts[newPart] ); + } + + if ( destPart == partition ) + continue; + + /* Now determine which partitions are splittable as a result of + * splitting partition by walking the in lists of the states in + * partitions that got split. Partition is the faked first item in the + * loop. */ + MinPartition *causalPart = partition; + newPart = firstNewPart - 1; + while ( newPart < numParts ) { + /* Loop all states in the causal partition. */ + StateList::Iter state = causalPart->list; + for ( ; state.lte(); state++ ) { + /* Walk all transition into the state and put the partition + * that the from state is in onto the splittable list. */ + for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ ) { + MinPartition *fromPart = trans->fromState->alg.partition; + if ( ! fromPart->active ) { + fromPart->active = true; + partList.detach( fromPart ); + splittable.append( fromPart ); + } + } + } + + newPart += 1; + causalPart = &parts[newPart]; + } + } + return numParts; +} + + +/** + * \brief Minimize by partitioning version 2 (best alg). + * + * Repeatedly tries to split partitions that may splittable until there are no + * more partitions that might possibly need splitting. Runs faster than + * version 1. Produces the most minimal fsm possible. + */ +void FsmGraph::minimizePartition2() +{ + /* Need a mergesort and an initial partition compare. */ + MergeSort mergeSort; + InitPartitionCompare initPartCompare; + + /* Nothing to do if there are no states. */ + if ( stateList.length() == 0 ) + return; + + /* + * First thing is to partition the states by final state status and + * transition functions. This gives us an initial partitioning to work + * with. + */ + + /* Make a array of pointers to states. */ + int numStates = stateList.length(); + FsmState** statePtrs = new FsmState*[numStates]; + + /* Fill up an array of pointers to the states for easy sorting. */ + StateList::Iter state = stateList; + for ( int s = 0; state.lte(); state++, s++ ) + statePtrs[s] = state; + + /* Sort the states using the array of states. */ + mergeSort.sort( statePtrs, numStates ); + + /* An array of lists of states is used to partition the states. */ + MinPartition *parts = new MinPartition[numStates]; + + /* Assign the states into partitions. */ + int destPart = 0; + for ( int s = 0; s < numStates; s++ ) { + /* If this state differs from the last then move to the next partition. */ + if ( s > 0 && initPartCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) { + /* Move to the next partition. */ + destPart += 1; + } + + /* Put the state into its partition. */ + statePtrs[s]->alg.partition = &parts[destPart]; + parts[destPart].list.append( statePtrs[s] ); + } + + /* We just moved all the states from the main list into partitions without + * taking them off the main list. So clean up the main list now. */ + stateList.abandon(); + + /* Split partitions. */ + int numParts = splitCandidates( statePtrs, parts, destPart+1 ); + + /* Fuse states in the same partition. The states will end up back on the + * main list. */ + fusePartitions( parts, numParts ); + + /* Cleanup. */ + delete[] statePtrs; + delete[] parts; +} + +void FsmGraph::initialMarkRound( MarkIndex &markIndex ) +{ + /* P and q for walking pairs. */ + FsmState *p = stateList.head, *q; + + /* Need an initial partition compare. */ + InitPartitionCompare initPartCompare; + + /* Walk all unordered pairs of (p, q) where p != q. + * The second depth of the walk stops before reaching p. This + * gives us all unordered pairs of states (p, q) where p != q. */ + while ( p != 0 ) { + q = stateList.head; + while ( q != p ) { + /* If the states differ on final state status, out transitions or + * any transition data then they should be separated on the initial + * round. */ + if ( initPartCompare.compare( p, q ) != 0 ) + markIndex.markPair( p->alg.stateNum, q->alg.stateNum ); + + q = q->next; + } + p = p->next; + } +} + +bool FsmGraph::markRound( MarkIndex &markIndex ) +{ + /* P an q for walking pairs. Take note if any pair gets marked. */ + FsmState *p = stateList.head, *q; + bool pairWasMarked = false; + + /* Need a mark comparison. */ + MarkCompare markCompare; + + /* Walk all unordered pairs of (p, q) where p != q. + * The second depth of the walk stops before reaching p. This + * gives us all unordered pairs of states (p, q) where p != q. */ + while ( p != 0 ) { + q = stateList.head; + while ( q != p ) { + /* Should we mark the pair? */ + if ( !markIndex.isPairMarked( p->alg.stateNum, q->alg.stateNum ) ) { + if ( markCompare.shouldMark( markIndex, p, q ) ) { + markIndex.markPair( p->alg.stateNum, q->alg.stateNum ); + pairWasMarked = true; + } + } + q = q->next; + } + p = p->next; + } + + return pairWasMarked; +} + + +/** + * \brief Minimize by pair marking. + * + * Decides if each pair of states is distinct or not. Uses O(n^2) memory and + * should only be used on small graphs. Produces the most minmimal FSM + * possible. + */ +void FsmGraph::minimizeStable() +{ + /* Set the state numbers. */ + setStateNumbers( 0 ); + + /* This keeps track of which pairs have been marked. */ + MarkIndex markIndex( stateList.length() ); + + /* Mark pairs where final stateness, out trans, or trans data differ. */ + initialMarkRound( markIndex ); + + /* While the last round of marking succeeded in marking a state + * continue to do another round. */ + int modified = markRound( markIndex ); + while (modified) + modified = markRound( markIndex ); + + /* Merge pairs that are unmarked. */ + fuseUnmarkedPairs( markIndex ); +} + +bool FsmGraph::minimizeRound() +{ + /* Nothing to do if there are no states. */ + if ( stateList.length() == 0 ) + return false; + + /* Need a mergesort on approx compare and an approx compare. */ + MergeSort mergeSort; + ApproxCompare approxCompare; + + /* Fill up an array of pointers to the states. */ + FsmState **statePtrs = new FsmState*[stateList.length()]; + StateList::Iter state = stateList; + for ( int s = 0; state.lte(); state++, s++ ) + statePtrs[s] = state; + + bool modified = false; + + /* Sort The list. */ + mergeSort.sort( statePtrs, stateList.length() ); + + /* Walk the list looking for duplicates next to each other, + * merge in any duplicates. */ + FsmState **pLast = statePtrs; + FsmState **pState = statePtrs + 1; + for ( int i = 1; i < stateList.length(); i++, pState++ ) { + if ( approxCompare.compare( *pLast, *pState ) == 0 ) { + /* Last and pState are the same, so fuse together. Move forward + * with pState but not with pLast. If any more are identical, we + * must */ + fuseEquivStates( *pLast, *pState ); + modified = true; + } + else { + /* Last and this are different, do not set to merge them. Move + * pLast to the current (it may be way behind from merging many + * states) and pState forward one to consider the next pair. */ + pLast = pState; + } + } + delete[] statePtrs; + return modified; +} + +/** + * \brief Minmimize by an approximation. + * + * Repeatedly tries to find states with transitions out to the same set of + * states on the same set of keys until no more identical states can be found. + * Does not produce the most minimial FSM possible. + */ +void FsmGraph::minimizeApproximate() +{ + /* While the last minimization round succeeded in compacting states, + * continue to try to compact states. */ + while ( true ) { + bool modified = minimizeRound(); + if ( ! modified ) + break; + } +} + + +/* Remove states that have no path to them from the start state. Recursively + * traverses the graph marking states that have paths into them. Then removes + * all states that did not get marked. */ +void FsmGraph::removeUnreachableStates() +{ + /* Misfit accounting should be off and there should be no states on the + * misfit list. */ + assert( !misfitAccounting && misfitList.length() == 0 ); + + /* Mark all the states that can be reached + * through the existing set of entry points. */ + markReachableFromHere( startState ); + for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) + markReachableFromHere( en->value ); + + /* Delete all states that are not marked + * and unmark the ones that are marked. */ + FsmState *state = stateList.head; + while ( state ) { + FsmState *next = state->next; + + if ( state->stateBits & SB_ISMARKED ) + state->stateBits &= ~ SB_ISMARKED; + else { + detachState( state ); + stateList.detach( state ); + delete state; + } + + state = next; + } +} + +bool FsmGraph::outListCovers( FsmState *state ) +{ + /* Must be at least one range to cover. */ + if ( state->outList.length() == 0 ) + return false; + + /* The first must start at the lower bound. */ + TransList::Iter trans = state->outList.first(); + if ( keyOps->minKey < trans->lowKey ) + return false; + + /* Loop starts at second el. */ + trans.increment(); + + /* Loop checks lower against prev upper. */ + for ( ; trans.lte(); trans++ ) { + /* Lower end of the trans must be one greater than the + * previous' high end. */ + Key lowKey = trans->lowKey; + lowKey.decrement(); + if ( trans->prev->highKey < lowKey ) + return false; + } + + /* Require that the last range extends to the upper bound. */ + trans = state->outList.last(); + if ( trans->highKey < keyOps->maxKey ) + return false; + + return true; +} + +/* Remove states that that do not lead to a final states. Works recursivly traversing + * the graph in reverse (starting from all final states) and marking seen states. Then + * removes states that did not get marked. */ +void FsmGraph::removeDeadEndStates() +{ + /* Misfit accounting should be off and there should be no states on the + * misfit list. */ + assert( !misfitAccounting && misfitList.length() == 0 ); + + /* Mark all states that have paths to the final states. */ + FsmState **st = finStateSet.data; + int nst = finStateSet.length(); + for ( int i = 0; i < nst; i++, st++ ) + markReachableFromHereReverse( *st ); + + /* Start state gets honorary marking. If the machine accepts nothing we + * still want the start state to hang around. This must be done after the + * recursive call on all the final states so that it does not cause the + * start state in transitions to be skipped when the start state is + * visited by the traversal. */ + startState->stateBits |= SB_ISMARKED; + + /* Delete all states that are not marked + * and unmark the ones that are marked. */ + FsmState *state = stateList.head; + while ( state != 0 ) { + FsmState *next = state->next; + + if ( state->stateBits & SB_ISMARKED ) + state->stateBits &= ~ SB_ISMARKED; + else { + detachState( state ); + stateList.detach( state ); + delete state; + } + + state = next; + } +} + +/* Remove states on the misfit list. To work properly misfit accounting should + * be on when this is called. The detaching of a state will likely cause + * another misfit to be collected and it can then be removed. */ +void FsmGraph::removeMisfits() +{ + while ( misfitList.length() > 0 ) { + /* Get the first state. */ + FsmState *state = misfitList.head; + + /* Detach and delete. */ + detachState( state ); + + /* The state was previously on the misfit list and detaching can only + * remove in transitions so the state must still be on the misfit + * list. */ + misfitList.detach( state ); + delete state; + } +} + +/* Fuse src into dest because they have been deemed equivalent states. + * Involves moving transitions into src to go into dest and invoking + * callbacks. Src is deleted detached from the graph and deleted. */ +void FsmGraph::fuseEquivStates( FsmState *dest, FsmState *src ) +{ + /* This would get ugly. */ + assert( dest != src ); + + /* Cur is a duplicate. We can merge it with trail. */ + inTransMove( dest, src ); + + detachState( src ); + stateList.detach( src ); + delete src; +} + +void FsmGraph::fuseUnmarkedPairs( MarkIndex &markIndex ) +{ + FsmState *p = stateList.head, *nextP, *q; + + /* Definition: The primary state of an equivalence class is the first state + * encounterd that belongs to the equivalence class. All equivalence + * classes have primary state including equivalence classes with one state + * in it. */ + + /* For each unmarked pair merge p into q and delete p. q is always the + * primary state of it's equivalence class. We wouldn't have landed on it + * here if it were not, because it would have been deleted. + * + * Proof that q is the primaray state of it's equivalence class: Assume q + * is not the primary state of it's equivalence class, then it would be + * merged into some state that came before it and thus p would be + * equivalent to that state. But q is the first state that p is equivalent + * to so we have a contradiction. */ + + /* Walk all unordered pairs of (p, q) where p != q. + * The second depth of the walk stops before reaching p. This + * gives us all unordered pairs of states (p, q) where p != q. */ + while ( p != 0 ) { + nextP = p->next; + + q = stateList.head; + while ( q != p ) { + /* If one of p or q is a final state then mark. */ + if ( ! markIndex.isPairMarked( p->alg.stateNum, q->alg.stateNum ) ) { + fuseEquivStates( q, p ); + break; + } + q = q->next; + } + p = nextP; + } +} + +void FsmGraph::fusePartitions( MinPartition *parts, int numParts ) +{ + /* For each partition, fuse state 2, 3, ... into state 1. */ + for ( int p = 0; p < numParts; p++ ) { + /* Assume that there will always be at least one state. */ + FsmState *first = parts[p].list.head, *toFuse = first->next; + + /* Put the first state back onto the main state list. Don't bother + * removing it from the partition list first. */ + stateList.append( first ); + + /* Fuse the rest of the state into the first. */ + while ( toFuse != 0 ) { + /* Save the next. We will trash it before it is needed. */ + FsmState *next = toFuse->next; + + /* Put the state to be fused in to the first back onto the main + * list before it is fuse. the graph. The state needs to be on + * the main list for the detach from the graph to work. Don't + * bother removing the state from the partition list first. We + * need not maintain it. */ + stateList.append( toFuse ); + + /* Now fuse to the first. */ + fuseEquivStates( first, toFuse ); + + /* Go to the next that we saved before trashing the next pointer. */ + toFuse = next; + } + + /* We transfered the states from the partition list into the main list without + * removing the states from the partition list first. Clean it up. */ + parts[p].list.abandon(); + } +} + + +/* Merge neighboring transitions go to the same state and have the same + * transitions data. */ +void FsmGraph::compressTransitions() +{ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + if ( st->outList.length() > 1 ) { + for ( TransList::Iter trans = st->outList, next = trans.next(); next.lte(); ) { + Key nextLow = next->lowKey; + nextLow.decrement(); + if ( trans->highKey == nextLow && trans->toState == next->toState && + CmpActionTable::compare( trans->actionTable, next->actionTable ) == 0 ) + { + trans->highKey = next->highKey; + st->outList.detach( next ); + detachTrans( next->fromState, next->toState, next ); + delete next; + next = trans.next(); + } + else { + trans.increment(); + next.increment(); + } + } + } + } +} diff --git a/colm/fsmrun.cpp b/colm/fsmrun.cpp new file mode 100644 index 00000000..9edb584b --- /dev/null +++ b/colm/fsmrun.cpp @@ -0,0 +1,890 @@ +/* + * Copyright 2007 Adrian Thurston + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include + +#include "config.h" +#include "fsmrun.h" +#include "redfsm.h" +#include "parsedata.h" +#include "parsetree.h" +#include "pdarun.h" +#include "colm.h" + +using std::cerr; +using std::endl; + +exit_object endp; + +void operator<<( ostream &out, exit_object & ) +{ + out << endl; + exit(1); +} + +FsmRun::FsmRun( FsmTables *tables ) : + tables(tables), + parser(0), + position(0) +{ +} + +FsmRun::~FsmRun() +{ +// RunBuf *rb = runBuf; +// while ( rb != 0 ) { +// RunBuf *next = rb->next; +// delete rb; +// rb = next; +// } +} + +void FsmRun::undoStreamPush( long length ) +{ + long remainder = pe - p; + memmove( runBuf->buf, runBuf->buf + length, remainder ); + pe -= length; +} + +void FsmRun::streamPush( const char *data, long length ) +{ + #ifdef COLM_LOG_PARSE + cerr << "readying fake push" << endl; + #endif + + if ( p == runBuf->buf ) { + cerr << "case 1" << endl; + assert(false); + } + else if ( p == (runBuf->buf + runBuf->length) ) { + cerr << "case 2" << endl; + assert(false); + } + else { + cerr << "case 3" << endl; + + /* Send back the second half of the current run buffer. */ + RunBuf *dup = new RunBuf; + memcpy( dup, runBuf, sizeof(RunBuf) ); + + /* Need to fix the offset. */ + dup->length = pe - runBuf->buf; + dup->offset = p - runBuf->buf; + + /* Send it back. */ + inputStream->pushBack( dup ); + + /* Since the second half is gone the current buffer now ends at p. */ + pe = p; + runBuf->length = p - runBuf->buf; + + /* Create a new buffer for the data. This is the easy implementation. + * Something better is needed here. It puts a max on the amount of + * data that can be pushed back to the stream. */ + assert( length < FSM_BUFSIZE ); + RunBuf *newBuf = new RunBuf; + newBuf->next = runBuf; + newBuf->offset = 0; + newBuf->length = length; + memcpy( newBuf->buf, data, length ); + + p = newBuf->buf; + pe = newBuf->buf + newBuf->length; + runBuf = newBuf; + } +} + +/* Should only be sending back whole tokens/ignores, therefore the send back + * should never cross a buffer boundary. Either we slide back p, or we move to + * a previous buffer and slide back p. */ +void FsmRun::sendBackText( const char *data, long length ) +{ + #ifdef COLM_LOG_PARSE + cerr << "push back of " << length << " characters" << endl; + #endif + + if ( length == 0 ) + return; + + if ( p == runBuf->buf ) { + #ifdef COLM_LOG_PARSE + cerr << "pushing back runbuf" << endl; + #endif + + /* Move to the next run buffer. */ + RunBuf *back = runBuf; + runBuf = runBuf->next; + + /* Flush out the input buffer. */ + back->length = pe-p; + back->offset = 0; + inputStream->pushBack( back ); + + /* Set p and pe. */ + assert( runBuf != 0 ); + p = pe = runBuf->buf + runBuf->length; + } + + /* If there is data in the current buffer then the whole send back + * should be in this buffer. */ + assert( (p - runBuf->buf) >= length ); + + /* slide p back. */ + p -= length; + + #ifdef COLM_LOG_PARSE + if ( memcmp( data, p, length ) != 0 ) + cerr << "mismatch of pushed back text" << endl; + #endif + + assert( memcmp( data, p, length ) == 0 ); + + position -= length; + + /* We are adjusting p so this must be reset. */ + tokstart = 0; +} + +void FsmRun::queueBack( Kid *input ) +{ + Alg *alg = input->tree->alg; + + if ( alg->flags & AF_GROUP_MEM ) { + #ifdef COLM_LOG_PARSE + LangElInfo *lelInfo = parser->tables->gbl->lelInfo; + cerr << "queuing back: " << lelInfo[input->tree->id].name << endl; + #endif + + if ( parser->queue == 0 ) + parser->queue = parser->queueLast = input; + else { + parser->queueLast->next = input; + parser->queueLast = input; + } + } + else { + /* If there are queued items send them back starting at the tail + * (newest). */ + if ( parser->queue != 0 ) { + /* Reverse the list. */ + Kid *kid = parser->queue, *last = 0; + while ( kid != 0 ) { + Kid *next = kid->next; + kid->next = last; + last = kid; + kid = next; + } + + /* Send them back. */ + while ( last != 0 ) { + Kid *next = last->next; + sendBack( last ); + last = next; + } + + parser->queue = 0; + } + + /* Now that the queue is flushed, can send back the original item. */ + sendBack( input ); + } +} + +void FsmRun::sendBackIgnore( Kid *ignore ) +{ + /* Ignore tokens are queued in reverse order. */ + while ( tree_is_ignore( parser->prg, ignore ) ) { + #ifdef COLM_LOG_PARSE + LangElInfo *lelInfo = parser->tables->gbl->lelInfo; + cerr << "sending back: " << lelInfo[ignore->tree->id].name; + if ( ignore->tree->alg != 0 && ignore->tree->alg->flags & AF_ARTIFICIAL ) + cerr << " (artificial)"; + cerr << endl; + #endif + + Head *head = ignore->tree->tokdata; + bool artificial = ignore->tree->alg != 0 && + ignore->tree->alg->flags & AF_ARTIFICIAL; + + if ( head != 0 && !artificial ) + sendBackText( string_data( head ), head->length ); + + /* Check for reverse code. */ + Alg *alg = ignore->tree->alg; + if ( alg != 0 && alg->flags & AF_HAS_RCODE ) { + Execution execution( parser->prg, parser->reverseCode, + parser, 0, 0, 0 ); + + /* Do the reverse exeuction. */ + execution.rexecute( parser->root, 0, parser->allReverseCode ); + alg->flags &= ~AF_HAS_RCODE; + } + + ignore = ignore->next; + } +} + +void FsmRun::sendBack( Kid *input ) +{ + #ifdef COLM_LOG_PARSE + LangElInfo *lelInfo = parser->tables->gbl->lelInfo; + cerr << "sending back: " << lelInfo[input->tree->id].name; + if ( input->tree->alg->flags & AF_ARTIFICIAL ) + cerr << " (artificial)"; + cerr << endl; + #endif + + Alg *alg = input->tree->alg; + if ( alg->flags & AF_NAMED ) { + /* Send back anything that is in the buffer. */ + inputStream->pushBack( p, pe-p ); + p = pe = runBuf->buf; + + /* Send the named lang el back first, then send back any leading + * whitespace. */ + inputStream->pushBackNamed(); + } + + if ( !(alg->flags & AF_ARTIFICIAL) ) { + /* Push back the token data. */ + sendBackText( string_data( input->tree->tokdata ), + string_length( input->tree->tokdata ) ); + } + + /* Check for reverse code. */ + if ( alg->flags & AF_HAS_RCODE ) { + Execution execution( parser->prg, parser->reverseCode, + parser, 0, 0, 0 ); + + /* Do the reverse exeuction. */ + execution.rexecute( parser->root, 0, parser->allReverseCode ); + alg->flags &= ~AF_HAS_RCODE; + } + + /* Always push back the ignore text. */ + sendBackIgnore( tree_ignore( parser->prg, input->tree ) ); + + /* If eof was just sent back remember that it needs to be sent again. */ + if ( input->tree->id == parser->tables->gbl->eofId ) + eofSent = false; + + /* If the item is bound then store remove it from the bindings array. */ + Tree *lastBound = parser->bindings.top(); + if ( lastBound == input->tree ) { + parser->bindings.pop(); + tree_downref( parser->prg, input->tree ); + } + + /* Downref the tree that was sent back and free the kid. */ + tree_downref( parser->prg, input->tree ); + parser->prg->kidPool.free( input ); +} + +void FsmRun::sendEOF( ) +{ + #ifdef COLM_LOG_PARSE + cerr << "token: _EOF" << endl; + #endif + + Kid *input = parser->prg->kidPool.allocate(); + input->tree = parser->prg->treePool.allocate(); + input->tree->alg = parser->prg->algPool.allocate(); + + input->tree->refs = 1; + input->tree->id = parser->tables->gbl->eofId; + + bool ctxDepParsing = parser->prg->ctxDepParsing; + long frameId = parser->tables->gbl->regionInfo[region].eofFrameId; + if ( ctxDepParsing && frameId >= 0 ) { + #ifdef COLM_LOG_PARSE + cerr << "HAVE PRE_EOF BLOCK" << endl; + #endif + + Code *code = parser->tables->gbl->frameInfo[frameId].code; + + /* Execute the translation. */ + Execution execution( parser->prg, parser->reverseCode, + parser, code, 0, 0 ); + execution.execute( parser->root ); + + set_AF_GROUP_MEM(); + + sendQueuedTokens(); + } + + parser->send( input ); + + if ( parser->errCount > 0 ) { + parser->parse_error( parser->tables->gbl->eofId, input->tree ) << + "parse error" << endp; + } + + tokstart = 0; + region = parser->getNextRegion(); + cs = tables->entryByRegion[region]; +} + +void FsmRun::sendQueuedTokens() +{ + while ( parser->queue != 0 ) { + /* Pull an item to send off the queue. */ + Kid *send = parser->queue; + parser->queue = parser->queue->next; + + /* Must clear next, since the parsing algorithm uses it. */ + send->next = 0; + if ( send->tree->alg->flags & AF_IGNORE ) { + #ifdef COLM_LOG_PARSE + cerr << "ignoring queued item: " << + parser->tables->gbl->lelInfo[send->tree->id].name << endl; + #endif + + parser->ignore( send->tree ); + parser->prg->kidPool.free( send ); + } + else { + #ifdef COLM_LOG_PARSE + cerr << "sending queue item: " << + parser->tables->gbl->lelInfo[send->tree->id].name << endl; + #endif + sendLangEl( send ); + } + } +} + +void FsmRun::sendToken( long id ) +{ + #ifdef COLM_LOG_PARSE + cerr << "token: " << parser->tables->gbl->lelInfo[id].name << endl; + #endif + + bool ctxDepParsing = parser->prg->ctxDepParsing; + LangElInfo *lelInfo = parser->tables->gbl->lelInfo; + + /* Copy the token data. */ + long length = p-tokstart; + Head *tokdata = string_alloc_const( parser->prg, tokstart, length ); + + if ( ctxDepParsing && lelInfo[id].frameId >= 0 ) { + translateLangEl( id, tokdata, false, 0 ); + sendQueuedTokens(); + } + else { + makeToken( id, tokdata, false, 0 ); + assert( parser->queue == 0 ); + } + + memset( mark_leave, 0, sizeof(mark_leave) ); +} + +void FsmRun::sendNamedLangEl() +{ + /* All three set by getLangEl. */ + long bindId; + char *data; + long length; + + KlangEl *klangEl = inputStream->getLangEl( bindId, data, length ); + if ( klangEl->termDup != 0 ) + klangEl = klangEl->termDup; + + #ifdef COLM_LOG_PARSE + cerr << "named langEl: " << parser->tables->gbl->lelInfo[klangEl->id].name << endl; + #endif + + /* Copy the token data. */ + Head *tokdata = 0; + if ( data != 0 ) + tokdata = string_alloc_new( parser->prg, data, length ); + + makeToken( klangEl->id, tokdata, true, bindId ); +} + +void FsmRun::set_AF_GROUP_MEM() +{ + /* Set AF_GROUP_MEM now. */ + long sendCount = 0; + Kid *queued = parser->queue; + while ( queued != 0 ) { + if ( !(queued->tree->alg->flags & AF_IGNORE) ) { + if ( sendCount > 0 ) + queued->tree->alg->flags |= AF_GROUP_MEM; + sendCount += 1; + } + queued = queued->next; + } +} + +/* + * Implmented: + * -shorten the match (possibly to zero length) + * -change the token to a new identifier + * -change global state (it can, but it isn't reverted during backtracking). + * + * Not implemented: + * -invoke failure (and hence the backtracker) + */ + +void FsmRun::translateLangEl( int id, Head *tokdata, bool namedLangEl, int bindId ) +{ + #ifdef COLM_LOG_PARSE + cerr << "translating: " << + parser->tables->gbl->lelInfo[id].name << endl; + #endif + + Code *code = parser->tables->gbl->frameInfo[ + parser->tables->gbl->lelInfo[id].frameId].code; + + p = tokstart; + + /* Execute the translation. */ + Execution execution( parser->prg, parser->reverseCode, + parser, code, 0, tokdata ); + execution.execute( parser->root ); + + string_free( parser->prg, tokdata ); + + set_AF_GROUP_MEM(); +} + +void FsmRun::makeToken( int id, Head *tokdata, bool namedLangEl, int bindId ) +{ + /* Make the token object. */ + long objectLength = parser->tables->gbl->lelInfo[id].objectLength; + Kid *attrs = alloc_attrs( parser->prg, objectLength ); + + Kid *input = 0; + input = parser->prg->kidPool.allocate(); + input->tree = parser->prg->treePool.allocate(); + input->tree->alg = parser->prg->algPool.allocate(); + + if ( namedLangEl ) + input->tree->alg->flags |= AF_NAMED; + + input->tree->refs = 1; + input->tree->id = id; + input->tree->tokdata = tokdata; + + /* No children and ignores get added later. */ + input->tree->child = attrs; + + /* Set attributes for the labelled components. */ + for ( int i = 0; i < 32; i++ ) { + if ( mark_leave[i] != 0 ) { + Head *data = string_alloc_new( parser->prg, + mark_enter[i], mark_leave[i] - mark_enter[i] ); + set_attr( input->tree, i, construct_string( parser->prg, data ) ); + tree_upref( get_attr( input->tree, i ) ); + } + } + + /* If the item is bound then store it in the bindings array. */ + if ( bindId > 0 ) { + parser->bindings.push( input->tree ); + tree_upref( input->tree ); + } + + sendLangEl( input ); +} + +/* Send back the accumulated ignore tokens. */ +void PdaRun::sendBackIgnore() +{ + Kid *ignore = extractIgnore(); + fsmRun->sendBackIgnore( ignore ); + while ( ignore != 0 ) { + Kid *next = ignore->next; + tree_downref( prg, ignore->tree ); + prg->kidPool.free( ignore ); + ignore = next; + } +} + +Kid *PdaRun::extractIgnore() +{ + Kid *ignore = accumIgnore; + accumIgnore = 0; + return ignore; +} + +void PdaRun::send( Kid *input ) +{ + long length = string_length( input->tree->tokdata ); + //input->tree->pos = fsmRun->position; + fsmRun->position += length; + + /* Pull the ignore tokens out and store in the token. */ + Kid *ignore = extractIgnore(); + if ( ignore != 0 ) { + Kid *child = input->tree->child; + input->tree->child = ignore; + while ( ignore->next != 0 ) + ignore = ignore->next; + ignore->next = child; + } + + /* Pull the reverse code out and store in the token. */ + bool hasrcode = makeReverseCode( allReverseCode, reverseCode ); + if ( hasrcode ) + input->tree->alg->flags |= AF_HAS_RCODE; + + parseToken( input ); +} + +void FsmRun::sendLangEl( Kid *input ) +{ + long id = input->tree->id; + + /* Send the token to the parser. */ + parser->send( input ); + + /* Check the result. */ + if ( parser->errCount > 0 ) { + /* Error occured in the top-level parser. */ + parser->parse_error(id, input->tree) << "parse error" << endp; + } + else { + /* Set the current state from the next region. */ + region = parser->getNextRegion(); + cs = tables->entryByRegion[region]; + + if ( parser->isParserStopFinished() ) { + #ifdef COLM_LOG_PARSE + cerr << "stopping the parse" << endl; + #endif + cs = tables->errorState; + parser->stopParsing = true; + } + } + + /* Reset tokstart. */ + tokstart = 0; + + #ifdef COLM_LOG_PARSE + cerr << "new token region: " << + parser->tables->gbl->regionInfo[region].name << endl; + #endif +} + +void PdaRun::ignore( Tree *tree ) +{ + /* Add the ignore string to the head of the ignore list. */ + Kid *ignore = prg->kidPool.allocate(); + ignore->tree = tree; + + /* Pull the reverse code out and store in the token. */ + bool hasrcode = makeReverseCode( allReverseCode, reverseCode ); + if ( hasrcode ) { + if ( tree->alg == 0 ) + tree->alg = prg->algPool.allocate(); + tree->alg->flags |= AF_HAS_RCODE; + } + + /* Prepend it to the list of ignore tokens. */ + ignore->next = accumIgnore; + accumIgnore = ignore; +} + +void FsmRun::sendIgnore( long id ) +{ + int length = p-tokstart; + + #ifdef COLM_LOG_PARSE + cerr << "ignoring: " << parser->tables->gbl->lelInfo[id].name << endl; + #endif + + /* Make the ignore string. */ + Head *ignoreStr = string_alloc_const( parser->prg, tokstart, length ); + + Tree *tree = parser->prg->treePool.allocate(); + tree->refs = 1; + tree->id = id; + tree->tokdata = ignoreStr; + + /* Send it to the parser. */ + parser->ignore( tree ); + + /* Prepare for more scanning. */ + tokstart = 0; + position += length; + region = parser->getNextRegion(); + cs = tables->entryByRegion[region]; + + memset( mark_leave, 0, sizeof(mark_leave) ); +} + +void FsmRun::emitToken( KlangEl *token ) +{ + if ( token->ignore ) + sendIgnore( token->id ); + else + sendToken( token->id ); +} + +/* Load up a token, starting from tokstart if it is set. If not set then + * start it at p. */ +Head *FsmRun::extractToken( long length ) +{ + /* How much do we have already? Tokstart may or may not be set. */ + long have = 0; + if ( tokstart != 0 ) + have = p - tokstart; + else + tokstart = p; + + /* The generated token length has been stuffed into tokdata. */ + if ( tokstart + length > pe ) { + /* There is not enough data in the buffer to generate the token. + * Shift data over and fill the buffer. */ + if ( have > 0 ) { + /* There is data that needs to be shifted over. */ + memmove( runBuf->buf, tokstart, have ); + tokend -= (tokstart - runBuf->buf); + tokstart = runBuf->buf; + } + p = pe = runBuf->buf + have; + peof = 0; + + long space = runBuf->buf + FSM_BUFSIZE - pe; + + if ( space == 0 ) + cerr << "OUT OF BUFFER SPACE" << endp; + + long len = inputStream->getData( p, space ); + pe = p + len; + } + + if ( tokstart + length > pe ) + cerr << "NOT ENOUGH DATA TO FETCH TOKEN" << endp; + + Head *tokdata = string_alloc_const( parser->prg, tokstart, length ); + p = tokstart + length; + tokstart = 0; + + return tokdata; +} + +void FsmRun::attachInputStream( InputStream *in ) +{ + /* Run buffers need to stick around because + * token strings point into them. */ + runBuf = new RunBuf; + runBuf->next = 0; + + inputStream = in; + p = pe = runBuf->buf; + peof = 0; + eofSent = false; + position = 0; +} + +long PdaRun::run() +{ + /* PDA must be init first to set next region. */ + init(); + return fsmRun->run( this ); +} + +long PdaRun::undoParse( Tree *tree, CodeVect *rev ) +{ + /* PDA must be init first to set next region. */ + init(); + Kid *top = prg->kidPool.allocate(); + top->next = stackTop; + top->tree = tree; + stackTop = top; + numRetry += 1; + allReverseCode.transfer( *rev ); + + parseToken( 0 ); + + assert( stackTop->next == 0 ); + + prg->algPool.free( stackTop->tree->alg ); + prg->treePool.free( stackTop->tree ); + prg->kidPool.free( stackTop ); + return 0; +} + +long FsmRun::run( PdaRun *destParser ) +{ + long space, prevState = cs; + + PdaRun *prevParser = parser; + parser = destParser; + + act = 0; + tokstart = 0; + tokend = 0; + region = parser->getNextRegion(); + cs = tables->entryByRegion[region]; + memset( mark_leave, 0, sizeof(mark_leave) ); + + /* Start with the EOF test. The pattern and replacement input sources can + * be EOF from the start. */ + + while ( true ) { + /* Check for eof. */ + if ( p == pe && inputStream->isEOF() ) { + if ( tokstart != 0 ) { + /* If a token has been started, but not finshed + * this is an error. */ + cs = tables->errorState; + } + else { + eofSent = true; + sendEOF(); + if ( !eofSent ) + continue; + break; + } + } + + if ( p == pe ) { + /* We don't have any data. What is next in the input stream? */ + if ( inputStream->isLangEl() ) + sendNamedLangEl( ); + else { + space = runBuf->buf + FSM_BUFSIZE - pe; + + if ( space == 0 ) + cerr << "OUT OF BUFFER SPACE" << endp; + + int len = inputStream->getData( p, space ); + pe = p + len; + if ( inputStream->needFlush() ) + peof = pe; + } + } + + execute(); + + /* Fall through here either when the input buffer has been exhausted + * or the scanner is in an error state. Otherwise we must continue. */ + + if ( cs == tables->errorState && parser->stopParsing ) { + #ifdef COLM_LOG_PARSE + cerr << "scanner has been stopped" << endl; + #endif + goto done; + } + + /* First thing check for error. */ + if ( cs == tables->errorState ) { + /* If a token was started, but not finished (tokstart != 0) then + * restore p to the beginning of that token. */ + if ( tokstart != 0 ) + p = tokstart; + + /* Check for a default token in the region. If one is there + * then send it and continue with the processing loop. */ + if ( parser->tables->gbl->regionInfo[region].defaultToken >= 0 ) { + tokstart = tokend = p; + sendToken( parser->tables->gbl->regionInfo[region].defaultToken ); + continue; + } + + if ( parser->getNextRegion( 1 ) != 0 ) { + #ifdef COLM_LOG_PARSE + cerr << "scanner failed, trying next region" << endl; + #endif + + /* May have accumulated ignore tokens from a previous region. + * need to rescan them since we won't be sending tokens from + * this region. */ + parser->sendBackIgnore(); + + parser->nextRegionInd += 1; + region = parser->getNextRegion(); + cs = tables->entryByRegion[region]; + #ifdef COLM_LOG_PARSE + cerr << "new token region: " << + parser->tables->gbl->regionInfo[region].name << endl; + #endif + continue; + } + + if ( parser->numRetry > 0 ) { + /* Invoke the parser's error handling. */ + #ifdef COLM_LOG_PARSE + cerr << "invoking parse error from the scanner" << endl; + #endif + + parser->sendBackIgnore(); + parser->parseToken( 0 ); + + if ( parser->errCount > 0 ) { + /* Error occured in the top-level parser. */ + cerr << "PARSE ERROR" << endp; + } + else { + region = parser->getNextRegion(); + cs = tables->entryByRegion[region]; + #ifdef COLM_LOG_PARSE + cerr << "new token region: " << + parser->tables->gbl->regionInfo[region].name << endl; + #endif + continue; + } + } + + /* Machine failed before finding a token. */ + cerr << "SCANNER ERROR" << endp; + } + + space = runBuf->buf + FSM_BUFSIZE - pe; + if ( space == 0 ) { + /* Create a new run buf. */ + RunBuf *buf = new RunBuf; + buf->next = runBuf; + runBuf = buf; + + /* If partway through a token then preserve the prefix. */ + long have = 0; + + if ( tokstart == 0 ) { + /* No prefix, the previous buffer was filled. */ + runBuf->next->length = FSM_BUFSIZE; + } + else { + /* There is data that needs to be shifted over. */ + have = pe - tokstart; + memcpy( runBuf->buf, tokstart, have ); + + /* Compute the length of the previous buffer. */ + runBuf->next->length = FSM_BUFSIZE - have; + + /* Compute tokstart and tokend. */ + tokend = runBuf->buf + (tokend - tokstart); + tokstart = runBuf->buf; + } + p = pe = runBuf->buf + have; + peof = 0; + } + } + +done: + parser = prevParser; + cs = prevState; + return 0; +} diff --git a/colm/fsmrun.h b/colm/fsmrun.h new file mode 100644 index 00000000..43f8cc8c --- /dev/null +++ b/colm/fsmrun.h @@ -0,0 +1,132 @@ +/* + * Copyright 2007 Adrian Thurston + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _FSMRUN_H +#define _FSMRUN_H + +#include "astring.h" +#include "pdarun.h" +#include "input.h" + +#define FSM_BUFSIZE 8192 +//#define FSM_BUFSIZE 8 + +struct GenAction; +struct KlangEl; +struct PdaRun; +struct ParseData; +struct Kid; +struct Pattern; +struct PatternItem; +struct Replacement; +struct ReplItem; + +struct FsmTables +{ + long *actions; + long *keyOffsets; + char *transKeys; + long *singleLengths; + long *rangeLengths; + long *indexOffsets; + long *transTargsWI; + long *transActionsWI; + long *toStateActions; + long *fromStateActions; + long *eofActions; + long *eofTargs; + long *entryByRegion; + + long numStates; + long numActions; + long numTransKeys; + long numSingleLengths; + long numRangeLengths; + long numIndexOffsets; + long numTransTargsWI; + long numTransActionsWI; + long numRegions; + + long startState; + long firstFinal; + long errorState; + + GenAction **actionSwitch; + long numActionSwitch; +}; + +struct RunBuf +{ + char buf[FSM_BUFSIZE]; + long length; + long offset; + RunBuf *next; +}; + +struct FsmRun +{ + FsmRun( FsmTables *tables ); + ~FsmRun(); + + void set_AF_GROUP_MEM(); + + void sendLangEl( Kid *input ); + void makeToken( int id, Head *tokdata, bool namedLangEl, int bindId ); + void translateLangEl( int id, Head *tokdata, bool namedLangEl, int bindId ); + void sendNamedLangEl(); + void sendEOF(); + void sendIgnore( long id ); + void sendQueuedTokens(); + void sendToken( long id ); + + void sendBackIgnore( Kid *ignore ); + void sendBack( Kid *input ); + void queueBack( Kid *input ); + void sendBackText( const char *data, long length ); + void emitToken( KlangEl *token ); + void execAction( GenAction *action ); + + long run( PdaRun *parser ); + void attachInputStream( InputStream *in ); + void streamPush( const char *data, long length ); + void undoStreamPush( long length ); + + Head *extractToken( long len ); + + void execute(); + + FsmTables *tables; + PdaRun *parser; + InputStream *inputStream; + + /* FsmRun State. */ + int region, cs, act; + char *tokstart, *tokend; + char *p, *pe, *peof; + bool eofSent; + RunBuf *runBuf; + bool gotoResume; + long position; + char *mark_enter[32]; + char *mark_leave[32]; +}; + +#endif diff --git a/colm/fsmstate.cpp b/colm/fsmstate.cpp new file mode 100644 index 00000000..d9df91ad --- /dev/null +++ b/colm/fsmstate.cpp @@ -0,0 +1,467 @@ +/* + * Copyright 2002 Adrian Thurston + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include "fsmgraph.h" + +#include +using namespace std; + +/* Construct a mark index for a specified number of states. Must new up + * an array that is states^2 in size. */ +MarkIndex::MarkIndex( int states ) : numStates(states) +{ + /* Total pairs is states^2. Actually only use half of these, but we allocate + * them all to make indexing into the array easier. */ + int total = states * states; + + /* New up chars so that individual DListEl constructors are + * not called. Zero out the mem manually. */ + array = new bool[total]; + memset( array, 0, sizeof(bool) * total ); +} + +/* Free the array used to store state pairs. */ +MarkIndex::~MarkIndex() +{ + delete[] array; +} + +/* Mark a pair of states. States are specified by their number. The + * marked states are moved from the unmarked list to the marked list. */ +void MarkIndex::markPair(int state1, int state2) +{ + int pos = ( state1 >= state2 ) ? + ( state1 * numStates ) + state2 : + ( state2 * numStates ) + state1; + + array[pos] = true; +} + +/* Returns true if the pair of states are marked. Returns false otherwise. + * Ordering of states given does not matter. */ +bool MarkIndex::isPairMarked(int state1, int state2) +{ + int pos = ( state1 >= state2 ) ? + ( state1 * numStates ) + state2 : + ( state2 * numStates ) + state1; + + return array[pos]; +} + +/* Create a new fsm state. State has not out transitions or in transitions, not + * out out transition data and not number. */ +FsmState::FsmState() +: + /* No out or in transitions. */ + outList(), + inList(), + + /* No entry points, or epsilon trans. */ + entryIds(), + epsilonTrans(), + + /* Conditions. */ + stateCondList(), + + /* No transitions in from other states. */ + foreignInTrans(0), + + /* Only used during merging. Normally null. */ + stateDictEl(0), + eptVect(0), + + /* No state identification bits. */ + stateBits(0), + + /* No Priority data. */ + outPriorTable(), + + /* No Action data. */ + toStateActionTable(), + fromStateActionTable(), + outActionTable(), + outCondSet(), + errActionTable(), + eofActionTable(), + + eofTarget(0) +{ +} + +/* Copy everything except actual the transitions. That is left up to the + * FsmGraph copy constructor. */ +FsmState::FsmState(const FsmState &other) +: + /* All lists are cleared. They will be filled in when the + * individual transitions are duplicated and attached. */ + outList(), + inList(), + + /* Duplicate the entry id set and epsilon transitions. These + * are sets of integers and as such need no fixing. */ + entryIds(other.entryIds), + epsilonTrans(other.epsilonTrans), + + /* Copy in the elements of the conditions. */ + stateCondList( other.stateCondList ), + + /* No transitions in from other states. */ + foreignInTrans(0), + + /* This is only used during merging. Normally null. */ + stateDictEl(0), + eptVect(0), + + /* Fsm state data. */ + stateBits(other.stateBits), + + /* Copy in priority data. */ + outPriorTable(other.outPriorTable), + + /* Copy in action data. */ + toStateActionTable(other.toStateActionTable), + fromStateActionTable(other.fromStateActionTable), + outActionTable(other.outActionTable), + outCondSet(other.outCondSet), + errActionTable(other.errActionTable), + eofActionTable(other.eofActionTable), + + eofTarget(0) +{ + /* Duplicate all the transitions. */ + for ( TransList::Iter trans = other.outList; trans.lte(); trans++ ) { + /* Dupicate and store the orginal target in the transition. This will + * be corrected once all the states have been created. */ + FsmTrans *newTrans = new FsmTrans(*trans); + newTrans->toState = trans->toState; + outList.append( newTrans ); + } +} + +/* If there is a state dict element, then delete it. Everything else is left + * up to the FsmGraph destructor. */ +FsmState::~FsmState() +{ + if ( stateDictEl != 0 ) + delete stateDictEl; +} + +/* Compare two states using pointers to the states. With the approximate + * compare the idea is that if the compare finds them the same, they can + * immediately be merged. */ +int ApproxCompare::compare( const FsmState *state1 , const FsmState *state2 ) +{ + int compareRes; + + /* Test final state status. */ + if ( (state1->stateBits & SB_ISFINAL) && !(state2->stateBits & SB_ISFINAL) ) + return -1; + else if ( !(state1->stateBits & SB_ISFINAL) && (state2->stateBits & SB_ISFINAL) ) + return 1; + + /* Test epsilon transition sets. */ + compareRes = CmpEpsilonTrans::compare( state1->epsilonTrans, + state2->epsilonTrans ); + if ( compareRes != 0 ) + return compareRes; + + /* Compare the out transitions. */ + compareRes = FsmGraph::compareStateData( state1, state2 ); + if ( compareRes != 0 ) + return compareRes; + + /* Use a pair iterator to get the transition pairs. */ + PairIter outPair( state1->outList.head, state2->outList.head ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + + case RangeInS1: + compareRes = FsmGraph::compareFullPtr( outPair.s1Tel.trans, 0 ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangeInS2: + compareRes = FsmGraph::compareFullPtr( 0, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangeOverlap: + compareRes = FsmGraph::compareFullPtr( + outPair.s1Tel.trans, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case BreakS1: + case BreakS2: + break; + } + } + + /* Got through the entire state comparison, deem them equal. */ + return 0; +} + +/* Compare class for the sort that does the intial partition of compaction. */ +int InitPartitionCompare::compare( const FsmState *state1 , const FsmState *state2 ) +{ + int compareRes; + + /* Test final state status. */ + if ( (state1->stateBits & SB_ISFINAL) && !(state2->stateBits & SB_ISFINAL) ) + return -1; + else if ( !(state1->stateBits & SB_ISFINAL) && (state2->stateBits & SB_ISFINAL) ) + return 1; + + /* Test epsilon transition sets. */ + compareRes = CmpEpsilonTrans::compare( state1->epsilonTrans, + state2->epsilonTrans ); + if ( compareRes != 0 ) + return compareRes; + + /* Compare the out transitions. */ + compareRes = FsmGraph::compareStateData( state1, state2 ); + if ( compareRes != 0 ) + return compareRes; + + /* Use a pair iterator to test the condition pairs. */ + PairIter condPair( state1->stateCondList.head, state2->stateCondList.head ); + for ( ; !condPair.end(); condPair++ ) { + switch ( condPair.userState ) { + case RangeInS1: + return 1; + case RangeInS2: + return -1; + + case RangeOverlap: { + CondSpace *condSpace1 = condPair.s1Tel.trans->condSpace; + CondSpace *condSpace2 = condPair.s2Tel.trans->condSpace; + if ( condSpace1 < condSpace2 ) + return -1; + else if ( condSpace1 > condSpace2 ) + return 1; + break; + } + case BreakS1: + case BreakS2: + break; + } + } + + /* Use a pair iterator to test the transition pairs. */ + PairIter outPair( state1->outList.head, state2->outList.head ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + + case RangeInS1: + compareRes = FsmGraph::compareDataPtr( outPair.s1Tel.trans, 0 ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangeInS2: + compareRes = FsmGraph::compareDataPtr( 0, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangeOverlap: + compareRes = FsmGraph::compareDataPtr( + outPair.s1Tel.trans, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case BreakS1: + case BreakS2: + break; + } + } + + return 0; +} + +/* Compare class for the sort that does the partitioning. */ +int PartitionCompare::compare( const FsmState *state1, const FsmState *state2 ) +{ + int compareRes; + + /* Use a pair iterator to get the transition pairs. */ + PairIter outPair( state1->outList.head, state2->outList.head ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + + case RangeInS1: + compareRes = FsmGraph::comparePartPtr( outPair.s1Tel.trans, 0 ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangeInS2: + compareRes = FsmGraph::comparePartPtr( 0, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangeOverlap: + compareRes = FsmGraph::comparePartPtr( + outPair.s1Tel.trans, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case BreakS1: + case BreakS2: + break; + } + } + + return 0; +} + +/* Compare class for the sort that does the partitioning. */ +bool MarkCompare::shouldMark( MarkIndex &markIndex, const FsmState *state1, + const FsmState *state2 ) +{ + /* Use a pair iterator to get the transition pairs. */ + PairIter outPair( state1->outList.head, state2->outList.head ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + + case RangeInS1: + if ( FsmGraph::shouldMarkPtr( markIndex, outPair.s1Tel.trans, 0 ) ) + return true; + break; + + case RangeInS2: + if ( FsmGraph::shouldMarkPtr( markIndex, 0, outPair.s2Tel.trans ) ) + return true; + break; + + case RangeOverlap: + if ( FsmGraph::shouldMarkPtr( markIndex, + outPair.s1Tel.trans, outPair.s2Tel.trans ) ) + return true; + break; + + case BreakS1: + case BreakS2: + break; + } + } + + return false; +} + +/* + * Transition Comparison. + */ + +/* Compare target partitions. Either pointer may be null. */ +int FsmGraph::comparePartPtr( FsmTrans *trans1, FsmTrans *trans2 ) +{ + if ( trans1 != 0 ) { + /* If trans1 is set then so should trans2. The initial partitioning + * guarantees this for us. */ + if ( trans1->toState == 0 && trans2->toState != 0 ) + return -1; + else if ( trans1->toState != 0 && trans2->toState == 0 ) + return 1; + else if ( trans1->toState != 0 ) { + /* Both of targets are set. */ + return CmpOrd< MinPartition* >::compare( + trans1->toState->alg.partition, trans2->toState->alg.partition ); + } + } + return 0; +} + + +/* Compares two transition pointers according to priority and functions. + * Either pointer may be null. Does not consider to state or from state. */ +int FsmGraph::compareDataPtr( FsmTrans *trans1, FsmTrans *trans2 ) +{ + if ( trans1 == 0 && trans2 != 0 ) + return -1; + else if ( trans1 != 0 && trans2 == 0 ) + return 1; + else if ( trans1 != 0 ) { + /* Both of the transition pointers are set. */ + int compareRes = compareTransData( trans1, trans2 ); + if ( compareRes != 0 ) + return compareRes; + } + return 0; +} + +/* Compares two transitions according to target state, priority and functions. + * Does not consider from state. Either of the pointers may be null. */ +int FsmGraph::compareFullPtr( FsmTrans *trans1, FsmTrans *trans2 ) +{ + if ( (trans1 != 0) ^ (trans2 != 0) ) { + /* Exactly one of the transitions is set. */ + if ( trans1 != 0 ) + return -1; + else + return 1; + } + else if ( trans1 != 0 ) { + /* Both of the transition pointers are set. Test target state, + * priority and funcs. */ + if ( trans1->toState < trans2->toState ) + return -1; + else if ( trans1->toState > trans2->toState ) + return 1; + else if ( trans1->toState != 0 ) { + /* Test transition data. */ + int compareRes = compareTransData( trans1, trans2 ); + if ( compareRes != 0 ) + return compareRes; + } + } + return 0; +} + + +bool FsmGraph::shouldMarkPtr( MarkIndex &markIndex, FsmTrans *trans1, + FsmTrans *trans2 ) +{ + if ( (trans1 != 0) ^ (trans2 != 0) ) { + /* Exactly one of the transitions is set. The initial mark round + * should rule out this case. */ + assert( false ); + } + else if ( trans1 != 0 ) { + /* Both of the transitions are set. If the target pair is marked, then + * the pair we are considering gets marked. */ + return markIndex.isPairMarked( trans1->toState->alg.stateNum, + trans2->toState->alg.stateNum ); + } + + /* Neither of the transitiosn are set. */ + return false; +} + + diff --git a/colm/input.cpp b/colm/input.cpp new file mode 100644 index 00000000..d9a3c7e0 --- /dev/null +++ b/colm/input.cpp @@ -0,0 +1,144 @@ +/* + * Copyright 2007, 2008 Adrian Thurston + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "input.h" +#include "colm.h" +#include "fsmrun.h" +#include +#include + +using std::cerr; +using std::endl; + +/* + * String + */ + +int InputStreamString::getData( char *dest, int length ) +{ + int available = data.length() - offset; + + if ( available < length ) + length = available; + + memcpy( dest, data.data+offset, length ); + offset += length; + + if ( offset == data.length() ) + eof = true; + + return length; +} + +void InputStreamString::pushBack( char *data, long len ) +{ + assert( len <= offset ); + offset -= len; +} + +/* + * File + */ + +int InputStreamFile::isEOF() +{ + return queue == 0 && feof( file ); +} + +int InputStreamFile::needFlush() +{ + return queue == 0 && feof( file ); +} + +int InputStreamFile::getData( char *dest, int length ) +{ + /* If there is any data in queue, read from that first. */ + if ( queue != 0 ) { + long avail = queue->length - queue->offset; + if ( length >= avail ) { + memcpy( dest, &queue->buf[queue->offset], avail ); + RunBuf *del = queue; + queue = queue->next; + delete del; + return avail; + } + else { + memcpy( dest, &queue->buf[queue->offset], length ); + queue->offset += length; + return length; + } + } + else { + return fread( dest, 1, length, file ); + } +} + +void InputStreamFile::pushBack( RunBuf *runBuf ) +{ + runBuf->next = queue; + queue = runBuf; +} + +/* + * FD + */ + +int InputStreamFD::isEOF() +{ + return queue == 0 && eof; +} + +int InputStreamFD::needFlush() +{ + return queue == 0 && eof; +} + +void InputStreamFD::pushBack( RunBuf *runBuf ) +{ + runBuf->next = queue; + queue = runBuf; +} + +int InputStreamFD::getData( char *dest, int length ) +{ + /* If there is any data in queue, read from that first. */ + if ( queue != 0 ) { + long avail = queue->length - queue->offset; + if ( length >= avail ) { + memcpy( dest, &queue->buf[queue->offset], avail ); + RunBuf *del = queue; + queue = queue->next; + delete del; + return avail; + } + else { + memcpy( dest, &queue->buf[queue->offset], length ); + queue->offset += length; + return length; + } + } + else { + long got = read( fd, dest, length ); + if ( got == 0 ) + eof = true; + return got; + } +} diff --git a/colm/input.h b/colm/input.h new file mode 100644 index 00000000..e8e6655b --- /dev/null +++ b/colm/input.h @@ -0,0 +1,145 @@ +/* + * Copyright 2007, 2008 Adrian Thurston + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _INPUT_H +#define _INPUT_H + +#include "astring.h" + +struct KlangEl; +struct Pattern; +struct PatternItem; +struct Replacement; +struct ReplItem; +struct RunBuf; + +struct InputStream +{ + virtual ~InputStream() {} + + /* Basic functions. */ + virtual int getData( char *dest, int length ) = 0; + virtual int isEOF() = 0; + virtual int needFlush() = 0; + + virtual void pushBack( char *data, long len ) + { assert(false); } + virtual void pushBack( RunBuf *runBuf ) + { assert(false); } + + /* Named language elements for patterns and replacements. */ + virtual int isLangEl() { return false; } + virtual KlangEl *getLangEl( long &bindId, char *&data, long &length ) + { assert( false ); return 0; } + virtual void pushBackNamed() + { assert( false ); } +}; + +struct InputStreamString : public InputStream +{ + InputStreamString( const String &data ) + : data(data), offset(0), eof(false) {} + + int getData( char *dest, int length ); + int isEOF() { return eof; } + int needFlush() { return eof; } + void pushBack( char *data, long len ); + + String data; + int offset; + bool eof; +}; + +struct InputStreamFile : public InputStream +{ + InputStreamFile( FILE *file ) + : file(file), queue(0) {} + + int getData( char *dest, int length ); + int isEOF(); + int needFlush(); + + void pushBack( RunBuf *runBuf ); + + FILE *file; + RunBuf *queue; +}; + +struct InputStreamFD : public InputStream +{ + InputStreamFD( long fd ) + : fd(fd), eof(false), queue(0) {} + + int isEOF(); + int needFlush(); + int getData( char *dest, int length ); + + void pushBack( RunBuf *runBuf ); + + long fd; + bool eof; + RunBuf *queue; +}; + +struct InputStreamPattern : public InputStream +{ + InputStreamPattern( Pattern *pattern ); + + int isLangEl(); + int getData( char *dest, int length ); + KlangEl *getLangEl( long &bindId, char *&data, long &length ); + int isEOF(); + int needFlush(); + void pushBack( char *data, long len ); + void pushBackNamed(); + + void backup(); + int shouldFlush(); + + Pattern *pattern; + PatternItem *patItem; + int offset; + bool flush; +}; + +struct InputStreamRepl : public InputStream +{ + InputStreamRepl( Replacement *replacement ); + + int isLangEl(); + int getData( char *dest, int length ); + KlangEl *getLangEl( long &bindId, char *&data, long &length ); + int isEOF(); + int needFlush(); + void pushBack( char *data, long len ); + void pushBackNamed(); + + void backup(); + int shouldFlush(); + + Replacement *replacement; + ReplItem *replItem; + int offset; + bool flush; +}; + +#endif /* _INPUT_H */ + diff --git a/colm/list.cpp b/colm/list.cpp new file mode 100644 index 00000000..5d0fc7f1 --- /dev/null +++ b/colm/list.cpp @@ -0,0 +1,105 @@ +/* + * Copyright 2007 Adrian Thurston + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "pdarun.h" + +void List::addAfter(ListEl *prev_el, ListEl *new_el) +{ + /* Set the previous pointer of new_el to prev_el. We do + * this regardless of the state of the list. */ + new_el->prev = prev_el; + + /* Set forward pointers. */ + if (prev_el == 0) { + /* There was no prev_el, we are inserting at the head. */ + new_el->next = head; + head = new_el; + } + else { + /* There was a prev_el, we can access previous next. */ + new_el->next = prev_el->next; + prev_el->next = new_el; + } + + /* Set reverse pointers. */ + if (new_el->next == 0) { + /* There is no next element. Set the tail pointer. */ + tail = new_el; + } + else { + /* There is a next element. Set it's prev pointer. */ + new_el->next->prev = new_el; + } + + /* Update list length. */ + listLen++; +} + +void List::addBefore(ListEl *next_el, ListEl *new_el) +{ + /* Set the next pointer of the new element to next_el. We do + * this regardless of the state of the list. */ + new_el->next = next_el; + + /* Set reverse pointers. */ + if (next_el == 0) { + /* There is no next elememnt. We are inserting at the tail. */ + new_el->prev = tail; + tail = new_el; + } + else { + /* There is a next element and we can access next's previous. */ + new_el->prev = next_el->prev; + next_el->prev = new_el; + } + + /* Set forward pointers. */ + if (new_el->prev == 0) { + /* There is no previous element. Set the head pointer.*/ + head = new_el; + } + else { + /* There is a previous element, set it's next pointer to new_el. */ + new_el->prev->next = new_el; + } + + listLen++; +} + +ListEl *List::detach(ListEl *el) +{ + /* Set forward pointers to skip over el. */ + if (el->prev == 0) + head = el->next; + else + el->prev->next = el->next; + + /* Set reverse pointers to skip over el. */ + if (el->next == 0) + tail = el->prev; + else + el->next->prev = el->prev; + + /* Update List length and return element we detached. */ + listLen--; + return el; +} + diff --git a/colm/lmparse.kh b/colm/lmparse.kh new file mode 100644 index 00000000..598edf84 --- /dev/null +++ b/colm/lmparse.kh @@ -0,0 +1,106 @@ +/* + * Copyright 2001-2007 Adrian Thurston + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef RLPARSE_H +#define RLPARSE_H + +#include +#include "avltree.h" +#include "parsedata.h" + +struct Parser +{ + %%{ + parser Parser; + + # Use a class for tokens. + token uses class Token; + + # Atoms. + token TK_Word, TK_Literal, TK_SingleLit, TK_DoubleLit, TK_Number, TK_UInt, + TK_Hex, KW_Nil, KW_True, KW_False; + + # General tokens. + token TK_DotDot, TK_ColonGt, TK_ColonGtGt, TK_LtColon, + TK_DoubleArrow, TK_StarStar, TK_NameSep, TK_DashDash, TK_DoubleEql, + TK_NotEql, TK_DoubleColon, TK_LessEql, TK_GrtrEql, TK_RightArrow, + TK_LitPat, TK_AmpAmp, TK_BarBar, TK_SqOpen, TK_SqOpenNeg, TK_SqClose, + TK_Dash, TK_ReChar; + + # Defining things. + token KW_Rl, KW_Def, KW_Lex, KW_Ignore, KW_Token, KW_Commit, KW_Namespace, + KW_Literal, KW_ReduceFirst, KW_Map, KW_List, KW_Vector, KW_Global, + KW_Iter, KW_Reject, KW_Ptr, KW_Ref, KW_Deref; + + # Language. + token KW_If, KW_While, KW_Else, KW_Elsif, KW_For, KW_Return, KW_Yield, KW_In, + KW_Break, KW_PrintXML, KW_Print, KW_Require; + + # Patterns. + token KW_Match, KW_Construct, KW_Parse, KW_ParseStop, KW_New, KW_MakeToken, + KW_MakeTree, KW_TypeId; + + token KW_Include, KW_Preeof; + + }%% + + %% write instance_data; + + void init(); + int parseLangEl( int type, const Token *token ); + + Parser( const char *fileName, const char *sectionName, const InputLoc §ionLoc ) + : sectionName(sectionName) + { + pd = new ParseData( fileName, sectionName, sectionLoc, std::cout ); + } + + int token( InputLoc &loc, int tokId, char *tokstart, int toklen ); + void addRegularDef( const InputLoc &loc, Namespace *nspace, + const String &name, JoinOrLm *joinOrLm, bool isInstance ); + void addProduction( InputLoc &loc, const String &name, + ProdElList *prodElList, bool commit, CodeBlock *redBlock ); + + /* Report an error encountered by the parser. */ + ostream &parse_error( int tokId, Token &token ); + + ParseData *pd; + + /* The name of the root section, this does not change during an include. */ + const char *sectionName; + + NameRef nameRef; + NameRefList nameRefList; + + KlangElVect langElVect; + + PatternItemList *patternItemList; + ReplItemList *replItemList; + RegionVect regionStack; + NamespaceVect namespaceStack; + + String curDefineId; + ProdElList *curProdElList; +}; + +%% write token_defs; + +#endif diff --git a/colm/lmparse.kl b/colm/lmparse.kl new file mode 100644 index 00000000..ef04b711 --- /dev/null +++ b/colm/lmparse.kl @@ -0,0 +1,2013 @@ +/* + * Copyright 2001-2007 Adrian Thurston + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include + +#include "config.h" +#include "lmparse.h" +#include "colm.h" + +using std::cout; +using std::cerr; +using std::endl; + +ParserDict parserDict; + +%%{ + +parser Parser; + +include "lmparse.kh"; + +start: root_item_list + final { + #ifdef COLM_LOG_COMPILE + cerr << "parsing complete" << endl; + #endif + + pd->rootCodeBlock = new CodeBlock( $1->stmtList ); + }; + +nonterm root_item_list uses lang_stmt_list; + +root_item_list: root_item_list root_item + final { + $$->stmtList = $1->stmtList; + + /* Maybe a statement. */ + if ( $2->stmt != 0 ) + $$->stmtList->append( $2->stmt ); + }; + +root_item_list: + final { + $$->stmtList = new StmtList; + }; + +nonterm root_item uses statement; + +root_item: literal_def commit final { $$->stmt = 0; }; +root_item: rl_def commit final { $$->stmt = 0; }; +root_item: token_def commit final { $$->stmt = 0; }; +root_item: cfl_def commit final { $$->stmt = 0; }; +root_item: region_def commit final { $$->stmt = 0; }; +root_item: namespace_def commit final { $$->stmt = 0; }; +root_item: function_def commit final { $$->stmt = 0; }; +root_item: generic_def commit final { $$->stmt = 0; }; +root_item: iter_def commit final { $$->stmt = 0; }; +root_item: global_def commit final { $$->stmt = $1->stmt; }; +root_item: statement commit final { $$->stmt = $1->stmt; }; +root_item: pre_eof commit final { $$->stmt = 0; }; + +nonterm block_open +{ + ObjectDef *localFrame; +}; + +block_open: '{' + final { + /* Init the object representing the local frame. */ + $$->localFrame = new ObjectDef( ObjectDef::FrameType, + "local", new ObjFieldMap(), new ObjMethodMap(), pd->nextObjectId++ ); + pd->curLocalFrame = $$->localFrame; + }; + +block_close: '}' + final { + /* Pop the cur local frame, back to the root. */ + pd->curLocalFrame = pd->rootLocalFrame; + }; + + +iter_def: + KW_Iter TK_Word '(' opt_param_list ')' block_open lang_stmt_list block_close + final { + CodeBlock *codeBlock = new CodeBlock( $7->stmtList ); + codeBlock->localFrame = $6->localFrame; + Function *newFunction = new Function( 0, $2->data, + $4->paramList, codeBlock, pd->nextFuncId++, true ); + pd->functionList.append( newFunction ); + }; + +function_def: + type_ref TK_Word '(' opt_param_list ')' block_open lang_stmt_list block_close + final { + CodeBlock *codeBlock = new CodeBlock( $7->stmtList ); + codeBlock->localFrame = $6->localFrame; + Function *newFunction = new Function( $1->typeRef, $2->data, + $4->paramList, codeBlock, pd->nextFuncId++, false ); + pd->functionList.append( newFunction ); + }; + +nonterm opt_param_list uses param_list; + +opt_param_list: param_list + final { + $$->paramList = $1->paramList; + }; + +opt_param_list: + final { + $$->paramList = new ParameterList; + }; + +nonterm param_list +{ + ParameterList *paramList; +}; + +param_list: param_list ',' param_var_def + final { + $$->paramList = $1->paramList; + $$->paramList->append( $3->objField ); + }; + +param_list: param_var_def + final { + /* Create the map and insert the first item. */ + $$->paramList = new ParameterList; + $$->paramList->append( $1->objField ); + }; + +nonterm param_var_def uses var_def; + +param_var_def: var_def + final { + $$->objField = $1->objField; + }; +param_var_def: reference_type_ref TK_Word + final { + /* Return an object field object. The user of this nonterminal must + * load it into the approrpriate map and do error checking. */ + $$->objField = new ObjField( $2->loc, $1->typeRef, $2->data ); + }; + +nonterm reference_type_ref uses type_ref; + +reference_type_ref: KW_Ref region_qual TK_Word + final { + $$->typeRef = new TypeRef( $1->loc, $2->nspaceQual, $3->data ); + $$->typeRef->isRef = true; + }; + +generic_def: KW_Map TK_Word '[' type_ref type_ref ']' + final { + Namespace *nspace = namespaceStack.top(); + + KlangEl *langEl = getKlangEl( pd, nspace, $2->data, KlangEl::NonTerm ); + + /* Check that the element wasn't previously defined as something else. */ + if ( langEl->type != KlangEl::Unknown && langEl->type != KlangEl::NonTerm ) { + error($1->loc) << "'" << curDefineId << + "' already defined as something else" << endp; + } + + GenericType *generic = new GenericType( $2->data, GEN_MAP, + pd->nextGenericId++, langEl, $5->typeRef ); + generic->keyTypeArg = $4->typeRef; + + /* Add one empty production. */ + ProdElList *emptyList = new ProdElList; + addProduction( $1->loc, $2->data, emptyList, false, 0 ); + + nspace->genericList.append( generic ); + langEl->generic = generic; + }; + +generic_def: KW_List TK_Word '[' type_ref ']' + final { + /* Get the language element. */ + Namespace *nspace = namespaceStack.top(); + KlangEl *langEl = getKlangEl( pd, nspace, $2->data, KlangEl::NonTerm ); + + /* Check that the element wasn't previously defined as something else. */ + if ( langEl->type != KlangEl::Unknown && langEl->type != KlangEl::NonTerm ) { + error($1->loc) << "'" << curDefineId << + "' already defined as something else" << endp; + } + + GenericType *generic = new GenericType( $2->data, GEN_LIST, + pd->nextGenericId++, langEl, $4->typeRef ); + + /* Add one empty production. */ + ProdElList *emptyList = new ProdElList; + addProduction( $1->loc, $2->data, emptyList, false, 0 ); + + nspace->genericList.append( generic ); + langEl->generic = generic; + }; + +generic_def: KW_Vector TK_Word '[' type_ref ']' + final { + GenericType *generic = new GenericType( $2->data, GEN_VECTOR, + pd->nextGenericId++, 0, $4->typeRef ); + + Namespace *nspace = namespaceStack.top(); + nspace->genericList.append( generic ); + }; + +nonterm global_def uses statement; + +global_def: KW_Global var_def opt_def_init + final { + $$->stmt = 0; + ObjectDef *globalObj = pd->globalObjectDef; + + if ( globalObj->objFieldMap->find( $2->objField->name ) != 0 ) + error($2->objField->loc) << "object field renamed" << endp; + + globalObj->objFieldMap->insert( $2->objField->name, $2->objField ); + + if ( $3->expr != 0 ) { + LangVarRef *varRef = new LangVarRef( $2->objField->loc, + new QualItemVect, $2->objField->name ); + + $$->stmt = new LangStmt( $2->objField->loc, + $3->assignType, varRef, $3->expr ); + } + }; + +cfl_def: KW_Def cfl_id obj_var_list properties_list cfl_prod_list + final { + /* Get the language element. */ + KlangEl *langEl = getKlangEl( pd, namespaceStack.top(), + curDefineId, KlangEl::NonTerm ); + + /* Check that the element wasn't previously defined as something else. */ + if ( langEl->type != KlangEl::Unknown && langEl->type != KlangEl::NonTerm ) { + error($1->loc) << "'" << curDefineId << + "' already defined as something else" << endp; + } + + /* Make a new object definition. */ + ObjectDef *objectDef = new ObjectDef( ObjectDef::UserType, curDefineId, + pd->objFieldMap, new ObjMethodMap(), pd->nextObjectId++ ); + + langEl->objectDef = objectDef; + }; + +cfl_id: TK_Word + final { + curDefineId = $1->data; + }; + +cfl_prod_list: cfl_prod_list '|' define_prod; +cfl_prod_list: define_prod; + +properties_list: properties_list property; +properties_list: ; + +property: + KW_ReduceFirst + final { + /* Get the language element. */ + KlangEl *prodName = getKlangEl( pd, namespaceStack.top(), + curDefineId, KlangEl::NonTerm ); + prodName->reduceFirst = true; + }; + +define_prod: '[' prod_el_list ']' opt_commit opt_reduce_code + final { + addProduction( $1->loc, curDefineId, curProdElList, $4->commit, $5->codeBlock ); + }; + +obj_var_list: obj_var_list var_def + final { + if ( pd->objFieldMap->find( $2->objField->name ) != 0 ) + error() << "object field renamed" << endp; + + pd->objFieldMap->insert( $2->objField->name, $2->objField ); + }; + +obj_var_list: + final { + pd->objFieldMap = new ObjFieldMap; + }; + + +nonterm type_ref +{ + TypeRef *typeRef; +}; + +type_ref: basic_type_ref + final { + $$->typeRef = $1->typeRef; + }; + +nonterm basic_type_ref uses type_ref; + +basic_type_ref: region_qual TK_Word opt_repeat + final { + $$->typeRef = new TypeRef( $2->loc, $1->nspaceQual, $2->data ); + $$->typeRef->isRepeat = $3->repeat; + $$->typeRef->isOpt = $3->opt; + }; + +basic_type_ref: KW_Ptr region_qual TK_Word opt_repeat + final { + $$->typeRef = new TypeRef( $1->loc, $2->nspaceQual, $3->data ); + $$->typeRef->isRepeat = $4->repeat; + $$->typeRef->isOpt = $4->opt; + $$->typeRef->isPtr = true; + }; + +nonterm var_def +{ + InputLoc loc; + ObjField *objField; +}; + +var_def: type_ref TK_Word + final { + /* Return an object field object. The user of this nonterminal must + * load it into the approrpriate map and do error checking. */ + $$->objField = new ObjField( $2->loc, $1->typeRef, $2->data ); + }; + +region_def: + region_head '{' root_item_list '}' + final { + /* Pop the top of the stack. */ + regionStack.pop(); + }; + +region_head: + KW_Lex TK_Word + final { + /* Make the new token region. */ + TokenRegion *tokenRegion = new TokenRegion( InputLoc(), $2->data, + pd->regionList.length(), regionStack.top() ); + regionStack.top()->childRegions.append( tokenRegion ); + pd->regionList.append( tokenRegion ); + JoinOrLm *joinOrLm = new JoinOrLm( tokenRegion ); + String scannerName( $2->data.length() + 2, "<%s>", $2->data.data ); + addRegularDef( InputLoc(), namespaceStack.top(), scannerName, joinOrLm, true ); + regionStack.push( tokenRegion ); + }; + +namespace_def: + namespace_head '{' root_item_list '}' + final{ + /* Pop the top of the stack. */ + namespaceStack.pop(); + }; + +namespace_head: + KW_Namespace TK_Word + final { + /* Make the new namespace. */ + Namespace *nspace = new Namespace( InputLoc(), $2->data, + pd->namespaceList.length(), namespaceStack.top() ); + namespaceStack.top()->childNamespaces.append( nspace ); + pd->namespaceList.append( nspace ); + namespaceStack.push( nspace ); + }; + +pattern_list: pattern_list pattern; +pattern_list: init_pattern_list pattern; + +init_pattern_list: + final { + patternItemList = new PatternItemList; + }; + +pattern: '"' litpat_el_list '"'; +pattern: '[' pattern_el_list ']'; + +litpat_el_list: litpat_el_list litpat_el; +litpat_el_list: ; + +litpat_el: TK_LitPat + final { + PatternItem *patternItem = new PatternItem( $1->data, + PatternItem::InputText ); + patternItemList->append( patternItem ); + }; + +litpat_el: '[' pattern_el_list ']'; + +pattern_el_list: pattern_el_list pattern_el; +pattern_el_list: ; + +pattern_el: opt_label pattern_el_type_or_lit + final { + /* Store the variable reference in the pattern itemm. */ + $2->patternItem->varRef = $1->varRef; + + if ( $1->varRef != 0 ) { + if ( pd->curLocalFrame->objFieldMap->find( $1->varRef->name ) != 0 ) { + error( $1->varRef->loc ) << "variable " << $1->varRef->name << + " redeclared" << endp; + } + + TypeRef *typeRef = new TypeRef( InputLoc(), $2->patternItem->factor ); + ObjField *objField = new ObjField( InputLoc(), typeRef, $1->varRef->name ); + + /* Insert it into the field map. */ + pd->curLocalFrame->objFieldMap->insert( $1->varRef->name, objField ); + } + }; + +pattern_el: '"' litpat_el_list '"'; +pattern_el: '?' TK_Word + final { + /* FIXME: Implement */ + assert(false); + }; + +nonterm pattern_el_type_or_lit +{ + PatternItem *patternItem; +}; + +pattern_el_type_or_lit: region_qual TK_Word opt_repeat + final { + PdaFactor *factor = new PdaFactor( $2->loc, false, $1->nspaceQual, + $2->data, 0, $3->opt, $3->repeat ); + $$->patternItem = new PatternItem( factor, PatternItem::FactorType ); + patternItemList->append( $$->patternItem ); + }; + +pattern_el_type_or_lit: region_qual TK_Literal opt_repeat + final { + PdaLiteral *literal = new PdaLiteral( $2->loc, *$2 ); + PdaFactor *factor = new PdaFactor( $2->loc, false, $1->nspaceQual, + literal, 0, $3->opt, $3->repeat ); + $$->patternItem = new PatternItem( factor, PatternItem::FactorType ); + patternItemList->append( $$->patternItem ); + }; + +nonterm opt_label +{ + /* Variable reference. */ + LangVarRef *varRef; +}; + +opt_label: TK_Word ':' + final { + $$->varRef = new LangVarRef( $1->loc, new QualItemVect, $1->data ); + }; +opt_label: + final { + $$->varRef = 0; + }; + +repl_list: repl_list replacement; +repl_list: init_repl_list replacement; + +init_repl_list: + final { + replItemList = new ReplItemList; + }; + +replacement: '"' litrepl_el_list '"'; +replacement: '[' repl_el_list ']'; + +litrepl_el_list: litrepl_el_list litrepl_el; +litrepl_el_list: ; + +litrepl_el: TK_LitPat + final { + ReplItem *replItem = new ReplItem( ReplItem::InputText, $1->data ); + replItemList->append( replItem ); + }; + +litrepl_el: '[' repl_el_list ']'; + +repl_el_list: repl_el_list repl_el; +repl_el_list: ; + +repl_el: var_ref + final { + ReplItem *replItem = new ReplItem( ReplItem::VarRefType, $1->varRef ); + replItemList->append( replItem ); + }; + +repl_el: region_qual TK_Literal + final { + PdaLiteral *literal = new PdaLiteral( $2->loc, *$2 ); + PdaFactor *factor = new PdaFactor( $2->loc, false, $1->nspaceQual, + literal, 0, false, false ); + ReplItem *replItem = new ReplItem( ReplItem::FactorType, factor ); + replItemList->append( replItem ); + }; + +repl_el: '"' litrepl_el_list '"'; + +prod_el_list: + prod_el_list prod_el + final { + curProdElList->append( $2->factor ); + }; + +prod_el_list: + final { curProdElList = new ProdElList; }; + +nonterm prod_el +{ + PdaFactor *factor; +}; + +prod_el: + opt_commit region_qual TK_Word opt_repeat + final { + $$->factor = new PdaFactor( $3->loc, $1->commit, + $2->nspaceQual, $3->data, 0, $4->opt, $4->repeat ); + }; + +prod_el: + opt_commit region_qual TK_Literal opt_repeat + final { + /* Create a new factor node going to a concat literal. */ + PdaLiteral *literal = new PdaLiteral( $3->loc, *$3 ); + $$->factor = new PdaFactor( $3->loc, $1->commit, $2->nspaceQual, + literal, 0, $4->opt, $4->repeat ); + }; + +nonterm opt_repeat +{ + bool opt; + bool repeat; +}; + +opt_repeat: '?' final { $$->opt = true; $$->repeat = false; }; +opt_repeat: '*' final { $$->opt = false; $$->repeat = true; }; +opt_repeat: final { $$->opt = false; $$->repeat = false; }; + +nonterm region_qual +{ + NamespaceQual *nspaceQual; +}; + +region_qual: region_qual TK_Word TK_DoubleColon + final { + $$->nspaceQual = $1->nspaceQual; + $$->nspaceQual->qualNames.append( $2->data ); + }; + +region_qual: + final { + $$->nspaceQual = new NamespaceQual( namespaceStack.top(), regionStack.top() ); + }; + +literal_def: KW_Literal literal_list; + +literal_list: literal_list ',' literal_item; +literal_list: literal_item; + +literal_item: TK_Literal + final { + /* Create a name for the literal. */ + String name( 32, "_literal_%.4x", pd->nextTokenId ); + + bool insideRegion = regionStack.top() != pd->rootRegion; + if ( !insideRegion ) { + /* Make a new token region just for the token. */ + TokenRegion *tokenRegion = new TokenRegion( InputLoc(), $1->data, + pd->regionList.length(), regionStack.top() ); + regionStack.top()->childRegions.append( tokenRegion ); + pd->regionList.append( tokenRegion ); + JoinOrLm *joinOrLm = new JoinOrLm( tokenRegion ); + String scannerName( name.length() + 2, "<%s>", name.data ); + addRegularDef( InputLoc(), namespaceStack.top(), scannerName, joinOrLm, true ); + regionStack.push( tokenRegion ); + } + + bool unusedCI; + String interp; + prepareLitString( interp, unusedCI, $1->data, $1->loc ); + + /* Look for the production's associated region. */ + Namespace *nspace = namespaceStack.top(); + TokenRegion *region = regionStack.top(); + + LiteralDictEl *ldel = nspace->literalDict.find( interp ); + if ( ldel != 0 ) + error( $1->loc ) << "literal already defined in this namespace" << endp; + else { + /* Create a token for the literal. */ + KlangEl *newLangEl = getKlangEl( pd, nspace, name, KlangEl::Term ); + assert( newLangEl != 0 ); + assert( newLangEl->type == KlangEl::Term ); + newLangEl->lit = $1->data; + newLangEl->isLiteral = true; + + Join *join = new Join( new Expression( new Term( new FactorWithAug( + new FactorWithRep( $1->loc, new FactorWithNeg( $1->loc, new Factor( + new Literal( $1->loc, $1->data, + Literal::LitString ) ) ) ) ) ) ) ); + + TokenDef *tokenDef = new TokenDef( join, newLangEl, $1->loc, + pd->nextTokenId, nspace, region ); + newLangEl->tokenDef = tokenDef; + + region->tokenDefList.append( tokenDef ); + ldel = nspace->literalDict.insert( interp, newLangEl ); + pd->nextTokenId += 1; + } + + if ( !insideRegion ) { + /* Leave the region just for this token. */ + regionStack.pop(); + } + }; + +token_def: + token_or_ignore token_def_name obj_var_list + '/' opt_rl_join '/' opt_translate + final { + bool ignore = $1->ignore; + String name = $2->name; + Join *join = $5->join; + CodeBlock *transBlock = $7->transBlock; + + /* Check the region if this is for an ignore. */ + if ( ignore && !pd->insideRegion ) + error($1->loc) << "ignore tokens can only appear inside scanners" << endp; + + /* Check the name if this is a token. */ + if ( !ignore && name == 0 ) + error($1->loc) << "tokens must have a name" << endp; + + /* Give a default name to ignores. */ + if ( name == 0 ) + name.setAs( 32, "_ignore_%.4x", pd->nextTokenId ); + + /* Create the token. */ + KlangEl *tokEl = getKlangEl( pd, namespaceStack.top(), name, KlangEl::Term ); + if ( tokEl->type != KlangEl::Unknown && tokEl->type != KlangEl::Term ) + error($1->loc) << "'" << name << "' already defined as something else" << endp; + + tokEl->type = KlangEl::Term; + tokEl->ignore = ignore; + tokEl->transBlock = transBlock; + + Namespace *nspace = namespaceStack.top(); + TokenRegion *region = regionStack.top(); + + TokenDef *tokenDef = new TokenDef( join, tokEl, $1->loc, + pd->nextTokenId++, nspace, region ); + region->tokenDefList.append( tokenDef ); + tokEl->tokenDef = tokenDef; + + /* Create the object def for the token. */ + ObjectDef *objectDef = new ObjectDef( ObjectDef::UserType, name, + pd->objFieldMap, new ObjMethodMap(), pd->nextObjectId++ ); + + tokEl->objectDef = objectDef; + + /* This is created and pushed in the name. */ + if ( !pd->insideRegion ) { + /* Leave the region that we made just for this token. */ + regionStack.pop(); + } + + if ( join != 0 ) { + /* Create a regular language definition so the token can be used to + * make other tokens */ + JoinOrLm *joinOrLm = new JoinOrLm( join ); + addRegularDef( $1->loc, namespaceStack.top(), name, joinOrLm, false ); + } + }; + +nonterm token_or_ignore +{ + InputLoc loc; + bool ignore; +}; + +token_or_ignore: KW_Token + final { $$->loc = $1->loc; $$->ignore = false; }; + +token_or_ignore: KW_Ignore + final { $$->loc = $1->loc; $$->ignore = true; }; + +nonterm class token_def_name +{ + String name; +}; + +token_def_name: + opt_name + final { + String name = $1->name; + + $$->name = name; + pd->insideRegion = regionStack.top() != pd->rootRegion; + pd->tokenDefName = name; + + if ( !pd->insideRegion ) { + /* If not inside a region, make one for the token. */ + TokenRegion *tokenRegion = new TokenRegion( InputLoc(), name, + pd->regionList.length(), regionStack.top() ); + regionStack.top()->childRegions.append( tokenRegion ); + pd->regionList.append( tokenRegion ); + JoinOrLm *joinOrLm = new JoinOrLm( tokenRegion ); + String scannerName( name.length() + 2, "<%s>", name.data ); + addRegularDef( InputLoc(), namespaceStack.top(), scannerName, joinOrLm, true ); + regionStack.push( tokenRegion ); + } + + /* Reset the lable id counter. */ + pd->nextLabelId = 0; + }; + +nonterm class opt_name +{ + String name; +}; + +opt_name: TK_Word final { $$->name = $1->data; }; +opt_name: ; + +nonterm opt_translate +{ + CodeBlock *transBlock; +}; + +opt_translate: + block_open lang_stmt_list block_close + final { + $$->transBlock = new CodeBlock( $2->stmtList ); + $$->transBlock->localFrame = $1->localFrame; + }; + +opt_translate: + final { + $$->transBlock = 0; + }; + +pre_eof: + KW_Preeof block_open lang_stmt_list block_close + final { + bool insideRegion = regionStack.top() != pd->rootRegion; + if ( !insideRegion ) + error($1->loc) << "preeof must be used inside an existing region" << endl; + + CodeBlock *codeBlock = new CodeBlock( $3->stmtList ); + codeBlock->localFrame = $2->localFrame; + + TokenRegion *region = regionStack.top(); + region->preEofBlock = codeBlock; + }; + +rl_def: + KW_Rl machine_name '/' rl_join '/' + final { + /* Generic creation of machine for instantiation and assignment. */ + JoinOrLm *joinOrLm = new JoinOrLm( $4->join ); + addRegularDef( $2->loc, namespaceStack.top(), $2->data, joinOrLm, false ); + }; + +type class token_data +{ + InputLoc loc; + String data; +}; + +nonterm machine_name uses token_data; + +machine_name: + TK_Word + final { + /* Make/get the priority key. The name may have already been referenced + * and therefore exist. */ + PriorDictEl *priorDictEl; + if ( pd->priorDict.insert( $1->data, pd->nextPriorKey, &priorDictEl ) ) + pd->nextPriorKey += 1; + pd->curDefPriorKey = priorDictEl->value; + + /* Make/get the local error key. */ + LocalErrDictEl *localErrDictEl; + if ( pd->localErrDict.insert( $1->data, pd->nextLocalErrKey, &localErrDictEl ) ) + pd->nextLocalErrKey += 1; + pd->curDefLocalErrKey = localErrDictEl->value; + + $$->loc = $1->loc; + $$->data = $1->data; + }; + +# +# Reduce statements +# + +nonterm opt_reduce_code +{ + CodeBlock *codeBlock; +}; + +opt_reduce_code: + final { $$->codeBlock = 0; }; + +opt_reduce_code: + start_reduce lang_stmt_list block_close + final { + $$->codeBlock = new CodeBlock( $2->stmtList ); + $$->codeBlock->localFrame = $1->localFrame; + }; + +nonterm start_reduce uses block_open; + +start_reduce: + block_open + final { + $$->localFrame = $1->localFrame; + + /* Get the language element. */ + KlangEl *prodName = getKlangEl( pd, namespaceStack.top(), + curDefineId, KlangEl::NonTerm ); + + /* References to the reduce item. */ + pd->addProdRedObjectVar( pd->curLocalFrame, prodName ); + pd->addProdRHSVars( pd->curLocalFrame, curProdElList ); + }; + +nonterm lang_stmt_list +{ + StmtList *stmtList; +}; + +lang_stmt_list: rec_stmt_list opt_require_stmt + final { + $$->stmtList = $1->stmtList; + if ( $2->stmt != 0 ) + $$->stmtList->append( $2->stmt ); + }; + +nonterm rec_stmt_list uses lang_stmt_list; + +rec_stmt_list: rec_stmt_list statement + final { + $$->stmtList = $1->stmtList; + + /* Maybe a statement was generated. */ + if ( $2->stmt != 0 ) + $$->stmtList->append( $2->stmt ); + }; + +rec_stmt_list: + final { + $$->stmtList = new StmtList; + }; + +nonterm opt_def_init +{ + LangExpr *expr; + LangStmt::Type assignType; +}; + +opt_def_init: '=' code_expr + final { + $$->expr = $2->expr; + $$->assignType = LangStmt::AssignType; + }; +opt_def_init: + final { + $$->expr = 0; + }; + +nonterm statement +{ + LangStmt *stmt; +}; + +statement: var_def opt_def_init + final { + /* By default no statement here. Maybe will add an initialization. */ + $$->stmt = 0; + + /* Check for redeclaration. */ + if ( pd->curLocalFrame->objFieldMap->find( $1->objField->name ) != 0 ) { + error( $1->objField->loc ) << "variable " << $1->objField->name << + " redeclared" << endp; + } + + /* Insert it into the field map. */ + pd->curLocalFrame->objFieldMap->insert( $1->objField->name, $1->objField ); + + if ( $2->expr != 0 ) { + LangVarRef *varRef = new LangVarRef( $1->objField->loc, + new QualItemVect, $1->objField->name ); + + $$->stmt = new LangStmt( $1->objField->loc, + $2->assignType, varRef, $2->expr ); + } + }; +statement: var_ref '=' code_expr + final { + $$->stmt = new LangStmt( $2->loc, LangStmt::AssignType, $1->varRef, $3->expr ); + }; +statement: KW_Print '(' code_expr_list ')' + final { + $$->stmt = new LangStmt( $1->loc, LangStmt::PrintType, $3->exprVect ); + }; +statement: KW_PrintXML '(' code_expr_list ')' + final { + $$->stmt = new LangStmt( $1->loc, LangStmt::PrintXMLType, $3->exprVect ); + }; +statement: code_expr + final { + $$->stmt = new LangStmt( InputLoc(), LangStmt::ExprType, $1->expr ); + }; +statement: KW_If code_expr block_or_single elsif_list + final { + $$->stmt = new LangStmt( LangStmt::IfType, $2->expr, $3->stmtList, $4->stmtList ); + }; +statement: KW_Reject + final { + $$->stmt = new LangStmt( $1->loc, LangStmt::RejectType ); + }; +statement: KW_While code_expr block_or_single + final { + $$->stmt = new LangStmt( LangStmt::WhileType, $2->expr, $3->stmtList ); + }; +statement: KW_For TK_Word ':' type_ref KW_In iter_call block_or_single + final { + $$->stmt = new LangStmt( $1->loc, LangStmt::ForIterType, + $2->data, $4->typeRef, $6->langTerm, $7->stmtList ); + }; +statement: KW_Return code_expr + final { + $$->stmt = new LangStmt( $1->loc, LangStmt::ReturnType, $2->expr ); + }; +statement: KW_Break + final { + $$->stmt = new LangStmt( LangStmt::BreakType ); + }; +statement: KW_Yield var_ref + final { + $$->stmt = new LangStmt( LangStmt::YieldType, $2->varRef ); + }; + +nonterm opt_require_stmt uses statement; + +opt_require_stmt: + require_pattern lang_stmt_list + final { + $$->stmt = new LangStmt( LangStmt::IfType, $1->expr, $2->stmtList, 0 ); + }; +opt_require_stmt: + final { + $$->stmt = 0; + }; + +nonterm require_pattern uses code_expr; + +require_pattern: + KW_Require var_ref pattern_list + final { + Namespace *nspace = namespaceStack.top(); + TokenRegion *region = regionStack.top(); + Pattern *pattern = new Pattern( $1->loc, nspace, region, + patternItemList, pd->nextPatReplId++ ); + pd->patternList.append( pattern ); + + $$->expr = new LangExpr( + new LangTerm( LangTerm::MatchType, $2->varRef, pattern ) ); + }; + +nonterm block_or_single uses lang_stmt_list; + +block_or_single: '{' lang_stmt_list '}' + final { + $$->stmtList = $2->stmtList; + }; +block_or_single: statement + final { + $$->stmtList = new StmtList; + $$->stmtList->append( $1->stmt ); + }; + +nonterm iter_call +{ + LangTerm *langTerm; +}; + +iter_call: var_ref '(' opt_code_expr_list ')' + final { + $$->langTerm = new LangTerm( $1->varRef, $3->exprVect ); + }; +iter_call: TK_Word + final { + $$->langTerm = new LangTerm( LangTerm::VarRefType, + new LangVarRef( $1->loc, new QualItemVect, $1->data ) ); + }; + +nonterm elsif_list +{ + StmtList *stmtList; +}; + +elsif_list: + elsif_clause elsif_list + final { + /* Put any of the followng elseif part, an else, or null into the elsePart. */ + $1->stmt->elsePart = $2->stmtList; + + /* Make a statement list with jsut the elseif clause in it. It will go into + * some else part. */ + $$->stmtList = new StmtList; + $$->stmtList->append( $1->stmt ); + }; +elsif_list: + optional_else + final { + $$->stmtList = $1->stmtList; + }; + +nonterm elsif_clause +{ + LangStmt *stmt; +}; + +elsif_clause: + KW_Elsif code_expr block_or_single + final { + $$->stmt = new LangStmt( LangStmt::IfType, $2->expr, $3->stmtList, 0 ); + }; + +nonterm optional_else +{ + StmtList *stmtList; +}; + +optional_else: + KW_Else block_or_single + final { + $$->stmtList = $2->stmtList; + }; + +optional_else: + final { + $$->stmtList = 0; + }; + +nonterm code_expr_list +{ + ExprVect *exprVect; +}; + +code_expr_list: code_expr_list ',' code_expr + final { + $$->exprVect = $1->exprVect; + $$->exprVect->append( $3->expr ); + }; +code_expr_list: code_expr + final { + $$->exprVect = new ExprVect; + $$->exprVect->append( $1->expr ); + }; + +nonterm opt_code_expr_list uses code_expr_list; + +opt_code_expr_list: code_expr_list + final { + $$->exprVect = $1->exprVect; + }; + +opt_code_expr_list: + final { + $$->exprVect = 0; + }; + +# +# Type list +# + +nonterm type_list +{ + TypeRefVect *typeRefVect; +}; + +type_list: type_list ',' type_ref + final { + $$->typeRefVect = $1->typeRefVect; + $$->typeRefVect->append( $3->typeRef ); + }; +type_list: type_ref + final { + $$->typeRefVect = new TypeRefVect; + $$->typeRefVect->append( $1->typeRef ); + }; + +nonterm opt_type_list uses type_list; + +opt_type_list: type_list + final { + $$->typeRefVect = $1->typeRefVect; + }; + +opt_type_list: + final { + $$->typeRefVect = 0; + }; + + +# +# Variable reference +# + +nonterm var_ref +{ + LangVarRef *varRef; +}; + +var_ref: qual TK_Word + final { + $$->varRef = new LangVarRef( $2->loc, $1->qual, $2->data ); + }; + +nonterm qual +{ + QualItemVect *qual; +}; + +qual: qual TK_Word '.' + final { + $$->qual = $1->qual; + $$->qual->append( QualItem( $2->loc, $2->data, QualItem::Dot ) ); + }; +qual: qual TK_Word TK_RightArrow + final { + $$->qual = $1->qual; + $$->qual->append( QualItem( $2->loc, $2->data, QualItem::Arrow ) ); + }; +qual: + final { + $$->qual = new QualItemVect; + }; + +# +# Code expression +# + +nonterm code_expr +{ + LangExpr *expr; +}; + +code_expr: code_expr TK_AmpAmp code_relational + final { + $$->expr = new LangExpr( $2->loc, $1->expr, OP_LogicalAnd, $3->expr ); + }; + +code_expr: code_expr TK_BarBar code_relational + final { + $$->expr = new LangExpr( $2->loc, $1->expr, OP_LogicalOr, $3->expr ); + }; + +code_expr: code_relational + final { + $$->expr = $1->expr; + }; + +nonterm code_relational uses code_expr; + +code_relational: code_relational TK_DoubleEql code_additive + final { + $$->expr = new LangExpr( $2->loc, $1->expr, OP_DoubleEql, $3->expr ); + }; + +code_relational: code_relational TK_NotEql code_additive + final { + $$->expr = new LangExpr( $2->loc, $1->expr, OP_NotEql, $3->expr ); + }; + +code_relational: code_relational '<' code_additive + final { + $$->expr = new LangExpr( $2->loc, $1->expr, '<', $3->expr ); + }; + +code_relational: code_relational '>' code_additive + final { + $$->expr = new LangExpr( $2->loc, $1->expr, '>', $3->expr ); + }; + +code_relational: code_relational TK_LessEql code_additive + final { + $$->expr = new LangExpr( $2->loc, $1->expr, OP_LessEql, $3->expr ); + }; + +code_relational: code_relational TK_GrtrEql code_additive + final { + $$->expr = new LangExpr( $2->loc, $1->expr, OP_GrtrEql, $3->expr ); + }; + + +code_relational: code_additive + final { + $$->expr = $1->expr; + }; + +nonterm code_additive uses code_expr; + +code_additive: code_additive '+' code_multiplicitive + final { + $$->expr = new LangExpr( $2->loc, $1->expr, '+', $3->expr ); + }; + +code_additive: code_additive '-' code_multiplicitive + final { + $$->expr = new LangExpr( $2->loc, $1->expr, '-', $3->expr ); + }; + +code_additive: code_multiplicitive + final { + $$->expr = $1->expr; + }; + +nonterm code_multiplicitive uses code_expr; + +code_multiplicitive: code_multiplicitive '*' code_unary + final { + $$->expr = new LangExpr( $2->loc, $1->expr, '*', $3->expr ); + }; + +code_multiplicitive: code_unary + final { + $$->expr = $1->expr; + }; + +nonterm code_unary uses code_expr; +code_unary: '!' code_factor + final { + $$->expr = new LangExpr( $1->loc, '!', $2->expr ); + }; +code_unary: code_factor + final { + $$->expr = $1->expr; + }; + + + +nonterm code_factor uses code_expr; + +code_factor: TK_Number + final { + $$->expr = new LangExpr( new LangTerm( LangTerm::NumberType, $1->data ) ); + }; +code_factor: TK_Literal + final { + $$->expr = new LangExpr( new LangTerm( LangTerm::StringType, $1->data ) ); + }; +code_factor: var_ref '(' opt_code_expr_list ')' + final { + $$->expr = new LangExpr( new LangTerm( $1->varRef, $3->exprVect ) ); + }; +code_factor: var_ref + final { + $$->expr = new LangExpr( new LangTerm( LangTerm::VarRefType, $1->varRef ) ); + }; +code_factor: KW_Match var_ref pattern_list + final { + Namespace *nspace = namespaceStack.top(); + TokenRegion *region = regionStack.top(); + Pattern *pattern = new Pattern( $1->loc, nspace, region, + patternItemList, pd->nextPatReplId++ ); + pd->patternList.append( pattern ); + + $$->expr = new LangExpr( new LangTerm( LangTerm::MatchType, $2->varRef, pattern ) ); + }; +code_factor: KW_New code_factor + final { + $$->expr = new LangExpr( new LangTerm( LangTerm::NewType, $2->expr ) ); + }; +code_factor: + KW_Construct type_ref opt_field_init repl_list + final { + Namespace *nspace = namespaceStack.top(); + TokenRegion *region = regionStack.top(); + Replacement *replacement = new Replacement( nspace, region, replItemList, + pd->nextPatReplId++ ); + pd->replList.append( replacement ); + $$->expr = new LangExpr( new LangTerm( $1->loc, LangTerm::ConstructType, + $2->typeRef, $3->fieldInitVect, replacement ) ); + }; +code_factor: KW_Parse type_ref '(' opt_code_expr_list ')' + final { + $$->expr = new LangExpr( new LangTerm( $1->loc, + LangTerm::ParseType, $2->typeRef ) ); + $$->expr->term->args = $4->exprVect; + }; +code_factor: KW_ParseStop type_ref '(' opt_code_expr_list ')' + final { + $$->expr = new LangExpr( new LangTerm( $1->loc, + LangTerm::ParseStopType, $2->typeRef ) ); + $$->expr->term->args = $4->exprVect; + }; +code_factor: KW_TypeId type_ref + final { + $$->expr = new LangExpr( new LangTerm( $1->loc, + LangTerm::TypeIdType, $2->typeRef ) ); + }; +code_factor: type_ref KW_In var_ref + final { + $$->expr = new LangExpr( new LangTerm( $2->loc, + LangTerm::SearchType, $1->typeRef, $3->varRef ) ); + }; +code_factor: KW_Nil + final { + $$->expr = new LangExpr( new LangTerm( $1->loc, + LangTerm::NilType ) ); + }; +code_factor: KW_True + final { + $$->expr = new LangExpr( new LangTerm( $1->loc, + LangTerm::TrueType ) ); + }; +code_factor: KW_False + final { + $$->expr = new LangExpr( new LangTerm( $1->loc, + LangTerm::FalseType ) ); + }; +code_factor: '(' code_expr ')' + final { + $$->expr = $2->expr; + }; +code_factor: KW_MakeTree '(' opt_code_expr_list ')' + final { + $$->expr = new LangExpr( new LangTerm( $1->loc, + LangTerm::MakeTreeType, $3->exprVect ) ); + }; +code_factor: KW_MakeToken '(' opt_code_expr_list ')' + final { + $$->expr = new LangExpr( new LangTerm( $1->loc, + LangTerm::MakeTokenType, $3->exprVect ) ); + }; +code_factor: KW_Deref code_expr + final { + $$->expr = new LangExpr( $1->loc, OP_Deref, $2->expr ); + }; + +nonterm opt_field_init uses field_init_list; + +opt_field_init: '(' opt_field_init_list ')' + final { + $$->fieldInitVect = $2->fieldInitVect; + }; +opt_field_init: + final { + $$->fieldInitVect = 0; + }; + +nonterm opt_field_init_list uses field_init_list; + +opt_field_init_list: field_init_list + final { + $$->fieldInitVect = $1->fieldInitVect; + }; +opt_field_init_list: + final { + $$->fieldInitVect = 0; + }; + +nonterm field_init_list +{ + FieldInitVect *fieldInitVect; +}; + +field_init_list: field_init_list ',' field_init + final { + $$->fieldInitVect = $1->fieldInitVect; + $$->fieldInitVect->append( $3->fieldInit ); + }; +field_init_list: field_init + final { + $$->fieldInitVect = new FieldInitVect; + $$->fieldInitVect->append( $1->fieldInit ); + }; + +nonterm field_init +{ + FieldInit *fieldInit; +}; + +field_init: TK_Word ':' code_expr + final { + $$->fieldInit = new FieldInit( $1->loc, $1->data, $3->expr ); + }; + +# +# Regular Expressions +# + +nonterm opt_rl_join uses rl_join; + +opt_rl_join: rl_join + final { + $$->join = $1->join; + }; + +opt_rl_join: + final { + $$->join = 0; + }; + +nonterm rl_join +{ + Join *join; +}; + +rl_join: + rl_join ',' rl_expr + final { + /* Append the expression to the list and return it. */ + $1->join->exprList.append( $3->expression ); + $$->join = $1->join; + }; +rl_join: + rl_expr + final { + $$->join = new Join( $1->expression ); + }; + +nonterm rl_expr +{ + Expression *expression; +}; + +rl_expr: + rl_expr '|' rl_term_short final { + $$->expression = new Expression( $1->expression, + $3->term, Expression::OrType ); + }; +rl_expr: + rl_expr '&' rl_term_short final { + $$->expression = new Expression( $1->expression, + $3->term, Expression::IntersectType ); + }; +# This priority specification overrides the innermost parsing strategy which +# results ordered choice interpretation of the grammar. +rl_expr: + rl_expr '-' rl_term_short final { + $$->expression = new Expression( $1->expression, + $3->term, Expression::SubtractType ); + }; +rl_expr: + rl_expr TK_DashDash rl_term_short final { + $$->expression = new Expression( $1->expression, + $3->term, Expression::StrongSubtractType ); + }; +rl_expr: + rl_term_short final { + $$->expression = new Expression( $1->term ); + }; + +nonterm rl_term_short +{ + Term *term; +}; + +shortest rl_term_short; + +rl_term_short: rl_term + final { $$->term = $1->term; }; + +nonterm rl_term +{ + Term *term; +}; + +rl_term: + rl_term factor_with_label final { + $$->term = new Term( $1->term, $2->factorWithAug ); + }; +rl_term: + rl_term '.' factor_with_label final { + $$->term = new Term( $1->term, $3->factorWithAug ); + }; +rl_term: + rl_term TK_ColonGt factor_with_label final { + $$->term = new Term( $1->term, $3->factorWithAug, Term::RightStartType ); + }; +rl_term: + rl_term TK_ColonGtGt factor_with_label final { + $$->term = new Term( $1->term, $3->factorWithAug, Term::RightFinishType ); + }; +rl_term: + rl_term TK_LtColon factor_with_label final { + $$->term = new Term( $1->term, + $3->factorWithAug, Term::LeftType ); + }; +rl_term: + factor_with_label final { + $$->term = new Term( $1->factorWithAug ); + }; + +nonterm factor_with_label +{ + FactorWithAug *factorWithAug; +}; + +factor_with_label: + factor_with_ep final { + $$->factorWithAug = $1->factorWithAug; + }; + +factor_with_label: + TK_Word ':' factor_with_label final { + $$->factorWithAug = $3->factorWithAug; + + if ( pd->objFieldMap->find( $1->data ) != 0 ) + error($1->loc) << "label name \"" << $1->data << "\" already in use" << endp; + + /* Create the object field. */ + NamespaceQual *qual = new NamespaceQual( namespaceStack.top(), regionStack.top() ); + TypeRef *typeRef = new TypeRef( $1->loc, qual, "str" ); + ObjField *objField = new ObjField( $1->loc, typeRef, $1->data ); + + /* Insert it into the map. */ + pd->objFieldMap->insert( $1->data, objField ); + + /* Create the enter and leaving actions that will mark the substring. */ + Action *enter = new Action( MarkEnter, objField ); + Action *leave = new Action( MarkLeave, objField ); + pd->actionList.append( enter ); + pd->actionList.append( leave ); + + /* Add entering and leaving actions. */ + $$->factorWithAug->actions.append( ParserAction( $1->loc, at_start, 0, enter ) ); + $$->factorWithAug->actions.append( ParserAction( $1->loc, at_leave, 0, leave ) ); + }; + +nonterm factor_with_ep +{ + FactorWithAug *factorWithAug; +}; + +factor_with_ep: + factor_with_aug final { + $$->factorWithAug = $1->factorWithAug; + }; + +nonterm factor_with_aug +{ + FactorWithAug *factorWithAug; +}; + +factor_with_aug: + factor_with_rep final { + $$->factorWithAug = new FactorWithAug( $1->factorWithRep ); + }; + + +# The fourth level of precedence. These are the trailing unary operators that +# allow for repetition. + +nonterm factor_with_rep +{ + FactorWithRep *factorWithRep; +}; + +factor_with_rep: + factor_with_rep '*' final { + $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, + 0, 0, FactorWithRep::StarType ); + }; +factor_with_rep: + factor_with_rep TK_StarStar final { + $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, + 0, 0, FactorWithRep::StarStarType ); + }; +factor_with_rep: + factor_with_rep '?' final { + $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, + 0, 0, FactorWithRep::OptionalType ); + }; +factor_with_rep: + factor_with_rep '+' final { + $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, + 0, 0, FactorWithRep::PlusType ); + }; +factor_with_rep: + factor_with_rep '{' factor_rep_num '}' final { + $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, + $3->rep, 0, FactorWithRep::ExactType ); + }; +factor_with_rep: + factor_with_rep '{' ',' factor_rep_num '}' final { + $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, + 0, $4->rep, FactorWithRep::MaxType ); + }; +factor_with_rep: + factor_with_rep '{' factor_rep_num ',' '}' final { + $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, + $3->rep, 0, FactorWithRep::MinType ); + }; +factor_with_rep: + factor_with_rep '{' factor_rep_num ',' factor_rep_num '}' final { + $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, + $3->rep, $5->rep, FactorWithRep::RangeType ); + }; +factor_with_rep: + factor_with_neg final { + $$->factorWithRep = new FactorWithRep( + $1->factorWithNeg->loc, $1->factorWithNeg ); + }; + +nonterm factor_rep_num +{ + int rep; +}; + +factor_rep_num: + TK_UInt final { + // Convert the priority number to a long. Check for overflow. + errno = 0; + int rep = strtol( $1->data, 0, 10 ); + if ( errno == ERANGE && rep == LONG_MAX ) { + // Repetition too large. Recover by returing repetition 1. */ + error($1->loc) << "repetition number " << $1->data << " overflows" << endl; + $$->rep = 1; + } + else { + // Cannot be negative, so no overflow. + $$->rep = rep; + } + }; + + +# +# The fifth level up in precedence. Negation. +# + +nonterm factor_with_neg +{ + FactorWithNeg *factorWithNeg; +}; + +factor_with_neg: + '!' factor_with_neg final { + $$->factorWithNeg = new FactorWithNeg( $1->loc, + $2->factorWithNeg, FactorWithNeg::NegateType ); + }; +factor_with_neg: + '^' factor_with_neg final { + $$->factorWithNeg = new FactorWithNeg( $1->loc, + $2->factorWithNeg, FactorWithNeg::CharNegateType ); + }; +factor_with_neg: + rl_factor final { + $$->factorWithNeg = new FactorWithNeg( $1->factor->loc, $1->factor ); + }; + +nonterm rl_factor +{ + Factor *factor; +}; + +rl_factor: + TK_Literal final { + /* Create a new factor node going to a concat literal. */ + $$->factor = new Factor( new Literal( $1->loc, $1->data, Literal::LitString ) ); + }; +rl_factor: + alphabet_num final { + /* Create a new factor node going to a literal number. */ + $$->factor = new Factor( new Literal( $1->loc, + $1->data, Literal::Number ) ); + }; +rl_factor: + TK_Word final { + /* Find the named graph. */ + Namespace *nspace = namespaceStack.top(); + GraphDictEl *gdNode = nspace->graphDict.find( $1->data ); + if ( gdNode == 0 ) { + /* Recover by returning null as the factor node. */ + error($1->loc) << "graph lookup of \"" << $1->data << "\" failed" << endl; + $$->factor = 0; + } + else if ( gdNode->isInstance ) { + /* Recover by retuning null as the factor node. */ + error($1->loc) << "references to graph instantiations not allowed " + "in expressions" << endl; + $$->factor = 0; + } + else { + /* Create a factor node that is a lookup of an expression. */ + $$->factor = new Factor( $1->loc, gdNode->value ); + } + }; +rl_factor: + TK_SqOpen regular_expr_or_data TK_SqClose final { + /* Create a new factor node going to an OR expression. */ + $$->factor = new Factor( new ReItem( $1->loc, $2->reOrBlock, ReItem::OrBlock ) ); + }; +rl_factor: + TK_SqOpenNeg regular_expr_or_data TK_SqClose final { + /* Create a new factor node going to a negated OR expression. */ + $$->factor = new Factor( new ReItem( $1->loc, $2->reOrBlock, ReItem::NegOrBlock ) ); + }; +rl_factor: + range_lit TK_DotDot range_lit final { + /* Create a new factor node going to a range. */ + $$->factor = new Factor( new Range( $1->literal, $3->literal ) ); + }; +rl_factor: + '(' rl_join ')' final { + /* Create a new factor going to a parenthesized join. */ + $$->factor = new Factor( $2->join ); + }; + +nonterm range_lit +{ + Literal *literal; +}; + +# Literals which can be the end points of ranges. +range_lit: + TK_Literal final { + /* Range literas must have only one char. We restrict this in the parse tree. */ + $$->literal = new Literal( $1->loc, $1->data, Literal::LitString ); + }; +range_lit: + alphabet_num final { + /* Create a new literal number. */ + $$->literal = new Literal( $1->loc, $1->data, Literal::Number ); + }; + +nonterm alphabet_num uses token_data; + +# Any form of a number that can be used as a basic machine. */ +alphabet_num: + TK_UInt final { + $$->loc = $1->loc; + $$->data = $1->data; + }; +alphabet_num: + '-' TK_UInt final { + $$->loc = $1->loc; + $$->data = '+'; + $$->data += $2->data; + }; +alphabet_num: + TK_Hex final { + $$->loc = $1->loc; + $$->data = $1->data; + }; + +# +# Regular Expressions. +# + + +# The data inside of a [] expression in a regular expression. Accepts any +# number of characters or ranges. */ +nonterm regular_expr_or_data +{ + ReOrBlock *reOrBlock; +}; + +regular_expr_or_data: + regular_expr_or_data regular_expr_or_char final { + /* An optimization to lessen the tree size. If an or char is directly + * under the left side on the right and the right side is another or + * char then paste them together and return the left side. Otherwise + * just put the two under a new or data node. */ + if ( $2->reOrItem->type == ReOrItem::Data && + $1->reOrBlock->type == ReOrBlock::RecurseItem && + $1->reOrBlock->item->type == ReOrItem::Data ) + { + /* Append the right side to right side of the left and toss the + * right side. */ + $1->reOrBlock->item->data += $2->reOrItem->data; + delete $2->reOrItem; + $$->reOrBlock = $1->reOrBlock; + } + else { + /* Can't optimize, put the left and right under a new node. */ + $$->reOrBlock = new ReOrBlock( $1->reOrBlock, $2->reOrItem ); + } + }; +regular_expr_or_data: + final { + $$->reOrBlock = new ReOrBlock(); + }; + +# A single character inside of an or expression. Can either be a character or a +# set of characters. +nonterm regular_expr_or_char +{ + ReOrItem *reOrItem; +}; + +regular_expr_or_char: + TK_ReChar final { + $$->reOrItem = new ReOrItem( $1->loc, $1->data ); + }; +regular_expr_or_char: + TK_ReChar TK_Dash TK_ReChar final { + $$->reOrItem = new ReOrItem( $2->loc, $1->data[0], $3->data[0] ); + }; + +# A local state reference. Cannot have :: prefix. +local_state_ref: + no_name_sep state_ref_names; + +# Clear the name ref structure. +no_name_sep: + final { + nameRef.empty(); + }; + +# A qualified state reference. +state_ref: opt_name_sep state_ref_names; + +# Optional leading name separator. +opt_name_sep: + TK_NameSep + final { + /* Insert an initial null pointer val to indicate the existence of the + * initial name seperator. */ + nameRef.setAs( 0 ); + }; +opt_name_sep: + final { + nameRef.empty(); + }; + +# List of names separated by :: +state_ref_names: + state_ref_names TK_NameSep TK_Word + final { + nameRef.append( $3->data ); + }; +state_ref_names: + TK_Word + final { + nameRef.append( $1->data ); + }; + +nonterm opt_commit +{ + bool commit; +}; + +opt_commit: final { $$->commit = false; }; +opt_commit: KW_Commit final { $$->commit = true; }; + +# +# Grammar Finished +# + + write types; + write data; +}%% + +void Parser::init() +{ + + /* Set up the root namespace. */ + const char *rootNamespaceName = "___ROOT_NAMESPACE"; + Namespace *rootNamespace = new Namespace( InputLoc(), + rootNamespaceName, pd->namespaceList.length(), 0 ); + pd->namespaceList.append( rootNamespace ); + namespaceStack.push( rootNamespace ); + pd->rootNamespace = rootNamespace; + + /* Set up the root token region. */ + const char *rootRegionName = "___ROOT_REGION"; + TokenRegion *rootRegion = new TokenRegion( InputLoc(), rootRegionName, + pd->regionList.length(), 0 ); + pd->regionList.append( rootRegion ); + JoinOrLm *joinOrLm = new JoinOrLm( rootRegion ); + addRegularDef( InputLoc(), namespaceStack.top(), rootRegionName, joinOrLm, true ); + regionStack.push( rootRegion ); + pd->rootRegion = rootRegion; + + /* Set up the global object. */ + String global = "global"; + ObjFieldMap *objFieldMap = new ObjFieldMap; + pd->globalObjectDef = new ObjectDef( ObjectDef::UserType, global, + objFieldMap, new ObjMethodMap(), pd->nextObjectId++ ); + + /* The eofTokenRegion defaults to the root region. */ + pd->eofTokenRegion = rootRegion; + + /* Initialize the dictionary of graphs. This is our symbol table. The + * initialization needs to be done on construction which happens at the + * beginning of a machine spec so any assignment operators can reference + * the builtins. */ + pd->initGraphDict(); + + pd->rootLocalFrame = new ObjectDef( ObjectDef::FrameType, + "local", new ObjFieldMap(), new ObjMethodMap(), + pd->nextObjectId++ ); + pd->curLocalFrame = pd->rootLocalFrame; + + %% write init; +} + +int Parser::parseLangEl( int type, const Token *token ) +{ + %% write exec; + return errCount == 0 ? 0 : -1; +} + +void Parser::addRegularDef( const InputLoc &loc, Namespace *nspace, + const String &name, JoinOrLm *joinOrLm, bool isInstance ) +{ + GraphDictEl *newEl = nspace->graphDict.insert( name ); + if ( newEl != 0 ) { + /* New element in the dict, all good. */ + newEl->value = new VarDef( name, joinOrLm ); + newEl->isInstance = isInstance; + newEl->loc = loc; + + /* It it is an instance, put on the instance list. */ + if ( isInstance ) + pd->instanceList.append( newEl ); + } + else { + // Recover by ignoring the duplicate. + error(loc) << "fsm \"" << name << "\" previously defined" << endl; + } +} + +void Parser::addProduction( InputLoc &loc, const String &name, + ProdElList *prodElList, bool commit, CodeBlock *redBlock ) +{ + /* Get the language element. */ + KlangEl *prodName = getKlangEl( pd, namespaceStack.top(), + name, KlangEl::NonTerm ); + + /* Check that the element wasn't previously defined as something else. */ + if ( prodName->type != KlangEl::Unknown + && prodName->type != KlangEl::NonTerm ) + { + error(loc) << "'" << name << "' already defined as something else" << endp; + } + else { + Namespace *nspace = namespaceStack.top(); + + prodName->type = KlangEl::NonTerm; + Definition *newDef = new Definition( loc, prodName, + prodElList, commit, redBlock, + pd->prodList.length(), Definition::Production ); + + prodName->defList.append( newDef ); + pd->prodList.append( newDef ); + + /* If the token has the same name as the region it is in, then also + * insert it into the symbol map for the parent region. */ + if ( strcmp( name, nspace->name ) == 0 ) { + /* Insert the name into the top of the region stack after popping the + * region just created. We need it in the parent. */ + nspace->parentNamespace->symbolMap.insert( name, prodName ); + } + } +} + +ostream &Parser::parse_error( int tokId, Token &token ) +{ + /* Maintain the error count. */ + gblErrorCount += 1; + + cerr << token.loc.fileName << ":" << token.loc.line << ":" << token.loc.col << ": "; + cerr << "at token "; + if ( tokId < 128 ) + cerr << "\"" << Parser_lelNames[tokId] << "\""; + else + cerr << Parser_lelNames[tokId]; + if ( token.data != 0 ) + cerr << " with data \"" << token.data << "\""; + cerr << ": "; + + return cerr; +} + +int Parser::token( InputLoc &loc, int tokId, char *tokstart, int toklen ) +{ + Token token; + + if ( toklen > 0 ) + token.data.setAs( tokstart, toklen ); + + token.loc = loc; + int res = parseLangEl( tokId, &token ); + if ( res < 0 ) { + parse_error(tokId, token) << "parse error" << endl; + exit(1); + } + return res; +} diff --git a/colm/lmscan.h b/colm/lmscan.h new file mode 100644 index 00000000..06ad6deb --- /dev/null +++ b/colm/lmscan.h @@ -0,0 +1,131 @@ +/* + * Copyright 2007 Adrian Thurston + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _RLSCAN_H +#define _RLSCAN_H + +#include +#include +#include + +#include "colm.h" +#include "lmparse.h" +#include "parsedata.h" +#include "avltree.h" +#include "vector.h" +#include "buffer.h" + +using std::ifstream; +using std::istream; +using std::ostream; +using std::cout; +using std::cerr; +using std::endl; + +extern char *Parser_lelNames[]; + +/* This is used for tracking the current stack of include file/machine pairs. It is + * is used to detect and recursive include structure. */ +struct IncludeStackItem +{ + IncludeStackItem( char *fileName, char *sectionName ) + : fileName(fileName), sectionName(sectionName) {} + + char *fileName; + char *sectionName; +}; + +typedef Vector IncludeStack; + +struct Scanner +{ + Scanner( const char *fileName, istream &input, ostream &output, + Parser *inclToParser, char *inclSectionTarg, + int includeDepth ) + : + fileName(fileName), input(input), output(output), + inclToParser(inclToParser), + inclSectionTarg(inclSectionTarg), + includeDepth(includeDepth), + line(1), column(1), lastnl(0), + parserExistsError(false), + whitespaceOn(true) + { + parser = new Parser( fileName, "machine", InputLoc() ); + parser->init(); + } + + bool recursiveInclude( char *inclFileName, char *inclSectionName ); + + #if 0 + char *prepareFileName( char *fileName, int len ) + { + bool caseInsensitive; + Token tokenFnStr, tokenRes; + tokenFnStr.data = fileName; + tokenFnStr.length = len; + tokenFnStr.prepareLitString( tokenRes, caseInsensitive ); + return tokenRes.data; + } + #endif + + void sectionParseInit(); + void token( int type, char *start, char *end ); + void token( int type, char c ); + void token( int type ); + void updateCol(); + void endSection(); + void do_scan(); + ostream &scan_error(); + + const char *fileName; + istream &input; + ostream &output; + Parser *inclToParser; + char *inclSectionTarg; + int includeDepth; + + int cs; + int line; + char *word, *lit; + int word_len, lit_len; + InputLoc sectionLoc; + char *ts, *te; + int column; + char *lastnl; + + /* Set by machine statements, these persist from section to section + * allowing for unnamed sections. */ + Parser *parser; + IncludeStack includeStack; + + /* This is set if ragel has already emitted an error stating that + * no section name has been seen and thus no parser exists. */ + bool parserExistsError; + + /* This is for inline code. By default it is on. It goes off for + * statements and values in inline blocks which are parsed. */ + bool whitespaceOn; + + Buffer litBuf; +}; + +#endif /* _RLSCAN_H */ diff --git a/colm/lmscan.rl b/colm/lmscan.rl new file mode 100644 index 00000000..6bbc5d00 --- /dev/null +++ b/colm/lmscan.rl @@ -0,0 +1,545 @@ +/* + * Copyright 2006-2007 Adrian Thurston + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include + +#include "colm.h" +#include "lmscan.h" +#include "lmparse.h" +#include "parsedata.h" +#include "avltree.h" +#include "vector.h" + +//#define PRINT_TOKENS + +using std::ifstream; +using std::istream; +using std::ostream; +using std::cout; +using std::cerr; +using std::endl; + +%%{ + machine section_parse; + alphtype int; + write data; +}%% + +void Scanner::sectionParseInit() +{ + %% write init; +} + +ostream &Scanner::scan_error() +{ + /* Maintain the error count. */ + gblErrorCount += 1; + cerr << fileName << ":" << line << ":" << column << ": "; + return cerr; +} + +bool Scanner::recursiveInclude( char *inclFileName, char *inclSectionName ) +{ + for ( IncludeStack::Iter si = includeStack; si.lte(); si++ ) { + if ( strcmp( si->fileName, inclFileName ) == 0 && + strcmp( si->sectionName, inclSectionName ) == 0 ) + { + return true; + } + } + return false; +} + +void Scanner::updateCol() +{ + char *from = lastnl; + if ( from == 0 ) + from = ts; + //cerr << "adding " << te - from << " to column" << endl; + column += te - from; + lastnl = 0; +} + +void Scanner::token( int type, char c ) +{ + token( type, &c, &c + 1 ); +} + +void Scanner::token( int type ) +{ + token( type, 0, 0 ); +} + +%%{ + machine section_parse; + import "lmparse.h"; + + action clear_words { word = lit = 0; word_len = lit_len = 0; } + action store_lit { lit = tokdata; lit_len = toklen; } + + action mach_err { scan_error() << "bad machine statement" << endl; } + action incl_err { scan_error() << "bad include statement" << endl; } + action write_err { scan_error() << "bad write statement" << endl; } + + action handle_include + { + #if 0 + char *inclSectionName = word; + char *inclFileName = 0; + + /* Implement defaults for the input file and section name. */ + if ( inclSectionName == 0 ) + inclSectionName = parser->sectionName; + + if ( lit != 0 ) + inclFileName = prepareFileName( lit, lit_len ); + else + inclFileName = fileName; + + /* Check for a recursive include structure. Add the current file/section + * name then check if what we are including is already in the stack. */ + includeStack.append( IncludeStackItem( fileName, parser->sectionName ) ); + + if ( recursiveInclude( inclFileName, inclSectionName ) ) + scan_error() << "include: this is a recursive include operation" << endl; + else { + /* Open the input file for reading. */ + ifstream *inFile = new ifstream( inclFileName ); + if ( ! inFile->is_open() ) { + scan_error() << "include: could not open " << + inclFileName << " for reading" << endl; + } + + Scanner scanner( inclFileName, *inFile, output, parser, + inclSectionName, includeDepth+1 ); + scanner.do_scan( ); + delete inFile; + } + + /* Remove the last element (len-1) */ + includeStack.remove( -1 ); + #endif + } + + include_target = + TK_Literal >clear_words @store_lit; + + include_stmt = + ( KW_Include include_target ';' ) @handle_include + <>err incl_err <>eof incl_err; + + action handle_token + { + InputLoc loc; + + #ifdef PRINT_TOKENS + cerr << "scanner:" << line << ":" << column << + ": sending token to the parser " << Parser_lelNames[*p]; + cerr << " " << toklen; + if ( tokdata != 0 ) + cerr << " " << tokdata; + cerr << endl; + #endif + + loc.fileName = fileName; + loc.line = line; + loc.col = column; + + parser->token( loc, type, tokdata, toklen ); + } + + # Catch everything else. + everything_else = ^( KW_Include ) @handle_token; + + main := ( + include_stmt | + everything_else + )*; +}%% + +void Scanner::token( int type, char *start, char *end ) +{ + char *tokdata = 0; + int toklen = 0; + int *p = &type; + int *pe = &type + 1; + int *eof = 0; + + if ( start != 0 ) { + toklen = end-start; + tokdata = new char[toklen+1]; + memcpy( tokdata, start, toklen ); + tokdata[toklen] = 0; + } + + %%{ + machine section_parse; + write exec; + }%% + + updateCol(); +} + +void Scanner::endSection( ) +{ + /* Execute the eof actions for the section parser. */ + /* Probably use: token( -1 ); */ +} + +%%{ + machine rlscan; + + # This is sent by the driver code. + EOF = 0; + + action inc_nl { + lastnl = p; + column = 0; + line++; + } + NL = '\n' @inc_nl; + + # Identifiers, numbers, commetns, and other common things. + ident = ( alpha | '_' ) ( alpha |digit |'_' )*; + number = digit+; + hex_number = '0x' [0-9a-fA-F]+; + + # These literal forms are common to C-like host code and ragel. + s_literal = "'" ([^'\\] | NL | '\\' (any | NL))* "'"; + d_literal = '"' ([^"\\] | NL | '\\' (any | NL))* '"'; + + whitespace = [ \t] | NL; + pound_comment = '#' [^\n]* NL; + + or_literal := |* + # Escape sequences in OR expressions. + '\\0' => { token( TK_ReChar, '\0' ); }; + '\\a' => { token( TK_ReChar, '\a' ); }; + '\\b' => { token( TK_ReChar, '\b' ); }; + '\\t' => { token( TK_ReChar, '\t' ); }; + '\\n' => { token( TK_ReChar, '\n' ); }; + '\\v' => { token( TK_ReChar, '\v' ); }; + '\\f' => { token( TK_ReChar, '\f' ); }; + '\\r' => { token( TK_ReChar, '\r' ); }; + '\\\n' => { updateCol(); }; + '\\' any => { token( TK_ReChar, ts+1, te ); }; + + # Range dash in an OR expression. + '-' => { token( TK_Dash, 0, 0 ); }; + + # Terminate an OR expression. + ']' => { token( TK_SqClose ); fret; }; + + EOF => { + scan_error() << "unterminated OR literal" << endl; + }; + + # Characters in an OR expression. + [^\]] => { token( TK_ReChar, ts, te ); }; + + *|; + + regular_type := |* + # Identifiers. + ident => { token( TK_Word, ts, te ); } ; + + # Numbers + number => { token( TK_UInt, ts, te ); }; + hex_number => { token( TK_Hex, ts, te ); }; + + # Literals, with optionals. + ( s_literal | d_literal ) [i]? + => { token( TK_Literal, ts, te ); }; + + '[' => { token( TK_SqOpen ); fcall or_literal; }; + '[^' => { token( TK_SqOpenNeg ); fcall or_literal; }; + + '/' => { token( '/'); fret; }; + + # Ignore. + pound_comment => { updateCol(); }; + + '..' => { token( TK_DotDot ); }; + '**' => { token( TK_StarStar ); }; + '--' => { token( TK_DashDash ); }; + + ':>' => { token( TK_ColonGt ); }; + ':>>' => { token( TK_ColonGtGt ); }; + '<:' => { token( TK_LtColon ); }; + + # Whitespace other than newline. + [ \t\r]+ => { updateCol(); }; + + # If we are in a single line machine then newline may end the spec. + NL => { updateCol(); }; + + # Consume eof. + EOF; + + any => { token( *ts ); } ; + *|; + + literal_pattern := |* + '\\' 'a' { litBuf.append( '\a' ); }; + '\\' 'b' { litBuf.append( '\b' ); }; + '\\' 't' { litBuf.append( '\t' ); }; + '\\' 'n' { litBuf.append( '\n' ); }; + '\\' 'v' { litBuf.append( '\v' ); }; + '\\' 'f' { litBuf.append( '\f' ); }; + '\\' 'r' { litBuf.append( '\r' ); }; + + '\\' any { + litBuf.append( ts[1] ); + }; + '"' => { + if ( litBuf.length > 0 ) { + token( TK_LitPat, litBuf.data, litBuf.data+litBuf.length ); + litBuf.clear(); + } + token( '"' ); + fret; + }; + NL => { + if ( litBuf.length > 0 ) { + litBuf.append( '\n' ); + token( TK_LitPat, litBuf.data, litBuf.data+litBuf.length ); + litBuf.clear(); + } + token( '"' ); + fret; + }; + '[' => { + if ( litBuf.length > 0 ) { + token( TK_LitPat, litBuf.data, litBuf.data+litBuf.length ); + litBuf.clear(); + } + token( '[' ); + fcall main; + }; + any => { + litBuf.append( *ts ); + }; + *|; + + # Parser definitions. + main := |* + 'lex' => { token( KW_Lex ); }; + 'commit' => { token( KW_Commit ); }; + 'token' => { token( KW_Token ); }; + 'literal' => { token( KW_Literal ); }; + 'rl' => { token( KW_Rl ); }; + 'def' => { token( KW_Def ); }; + 'ignore' => { token( KW_Ignore ); }; + 'construct' => { token( KW_Construct ); }; + 'new' => { token( KW_New ); }; + 'print' => { token( KW_Print ); }; + 'if' => { token( KW_If ); }; + 'reject' => { token( KW_Reject ); }; + 'while' => { token( KW_While ); }; + 'else' => { token( KW_Else ); }; + 'elsif' => { token( KW_Elsif ); }; + 'match' => { token( KW_Match ); }; + 'for' => { token( KW_For ); }; + 'iter' => { token( KW_Iter ); }; + 'print_xml' => { token( KW_PrintXML ); }; + 'namespace' => { token( KW_Namespace ); }; + 'lex' => { token( KW_Lex ); }; + 'map' => { token( KW_Map ); }; + 'list' => { token( KW_List ); }; + 'vector' => { token( KW_Vector ); }; + 'return' => { token( KW_Return ); }; + 'break' => { token( KW_Break ); }; + 'yield' => { token( KW_Yield ); }; + 'typeid' => { token( KW_TypeId ); }; + 'make_token' => { token( KW_MakeToken ); }; + 'make_tree' => { token( KW_MakeTree ); }; + 'reducefirst' => { token( KW_ReduceFirst ); }; + 'for' => { token( KW_For ); }; + 'in' => { token( KW_In ); }; + 'nil' => { token( KW_Nil ); }; + 'true' => { token( KW_True ); }; + 'false' => { token( KW_False ); }; + 'parse' => { token( KW_Parse ); }; + 'parse_stop' => { token( KW_ParseStop ); }; + 'global' => { token( KW_Global ); }; + 'ptr' => { token( KW_Ptr ); }; + 'ref' => { token( KW_Ref ); }; + 'deref' => { token( KW_Deref ); }; + 'require' => { token( KW_Require ); }; + 'preeof' => { token( KW_Preeof ); }; + + # Identifiers. + ident => { token( TK_Word, ts, te ); } ; + + number => { token( TK_Number, ts, te ); }; + + '/' => { + token( '/' ); + fcall regular_type; + }; + + "~" [^\n]* NL => { + token( '"' ); + token( TK_LitPat, ts+1, te ); + token( '"' ); + }; + + s_literal => { + token( TK_Literal, ts, te ); + }; + + '"' => { + token( '"' ); + litBuf.clear(); + fcall literal_pattern; + }; + '[' => { + token( '[' ); + fcall main; + }; + + ']' => { + token( ']' ); + if ( top > 0 ) + fret; + }; + + # Ignore. + pound_comment => { updateCol(); }; + + '=>' => { token( TK_DoubleArrow ); }; + '==' => { token( TK_DoubleEql ); }; + '!=' => { token( TK_NotEql ); }; + '::' => { token( TK_DoubleColon ); }; + '<=' => { token( TK_LessEql ); }; + '>=' => { token( TK_GrtrEql ); }; + '->' => { token( TK_RightArrow ); }; + '&&' => { token( TK_AmpAmp ); }; + '||' => { token( TK_BarBar ); }; + + ('+' | '-' | '*' | '/' | '(' | ')' | '@' | '$' ) => { token( *ts ); }; + + + # Whitespace other than newline. + [ \t\r]+ => { updateCol(); }; + + # If we are in a single line machine then newline may end the spec. + NL => { updateCol(); }; + + # Consume eof. + EOF; + + any => { token( *ts ); } ; + *|; +}%% + +%% write data; + +void Scanner::do_scan() +{ + int bufsize = 8; + char *buf = new char[bufsize]; + const char last_char = 0; + int cs, act, have = 0; + int top, stack[32]; + bool execute = true; + + sectionParseInit(); + %% write init; + + while ( execute ) { + char *p = buf + have; + int space = bufsize - have; + + if ( space == 0 ) { + /* We filled up the buffer trying to scan a token. Grow it. */ + bufsize = bufsize * 2; + char *newbuf = new char[bufsize]; + + /* Recompute p and space. */ + p = newbuf + have; + space = bufsize - have; + + /* Patch up pointers possibly in use. */ + if ( ts != 0 ) + ts = newbuf + ( ts - buf ); + te = newbuf + ( te - buf ); + + /* Copy the new buffer in. */ + memcpy( newbuf, buf, have ); + delete[] buf; + buf = newbuf; + } + + input.read( p, space ); + int len = input.gcount(); + + /* If we see eof then append the EOF char. */ + if ( len == 0 ) { + p[0] = last_char, len = 1; + execute = false; + } + + char *pe = p + len; + char *eof = 0; + %% write exec; + + /* Check if we failed. */ + if ( cs == rlscan_error ) { + /* Machine failed before finding a token. I'm not yet sure if this + * is reachable. */ + scan_error() << "scanner error" << endl; + exit(1); + } + + /* Decide if we need to preserve anything. */ + char *preserve = ts; + + /* Now set up the prefix. */ + if ( preserve == 0 ) + have = 0; + else { + /* There is data that needs to be shifted over. */ + have = pe - preserve; + memmove( buf, preserve, have ); + unsigned int shiftback = preserve - buf; + if ( ts != 0 ) + ts -= shiftback; + te -= shiftback; + + preserve = buf; + } + } + delete[] buf; + + InputLoc loc; + loc.fileName = ""; + loc.line = line; + loc.col = 1; + parser->token( loc, Parser_tk_eof, 0, 0 ); +} + +void scan( char *fileName, istream &input, ostream &output ) +{ + Scanner scanner( fileName, input, output, 0, 0, 0 ); +} diff --git a/colm/main.cpp b/colm/main.cpp new file mode 100644 index 00000000..2e78efd7 --- /dev/null +++ b/colm/main.cpp @@ -0,0 +1,357 @@ +/* + * Copyright 2001-2007 Adrian Thurston + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "colm.h" +#include "lmscan.h" +#include "pcheck.h" +#include "vector.h" +#include "version.h" +#include "common.h" +#include "parsedata.h" +#include "vector.h" +#include "version.h" +#include "fsmcodegen.h" +#include "dotgen.h" + +using std::istream; +using std::ifstream; +using std::ostream; +using std::ios; +using std::cin; +using std::cout; +using std::cerr; +using std::endl; + +/* Graphviz dot file generation. */ +bool graphvizDone = false; + +bool printPrintables = false; + +using std::ostream; +using std::istream; +using std::ifstream; +using std::ofstream; +using std::ios; +using std::cout; +using std::cerr; +using std::cin; +using std::endl; + +/* Target language and output style. */ +char defExtension[] = ".cpp"; + +/* Io globals. */ +istream *inStream = 0; +ostream *outStream = 0; +const char *inputFileName = 0; +const char *outputFileName = 0; + +bool generateGraphviz = false; +bool branchPointInfo = false; +bool addUniqueEmptyProductions = false; + +/* Print version information. */ +void version(); + +/* Total error count. */ +int gblErrorCount = 0; + +/* Print the opening to an error in the input, then return the error ostream. */ +ostream &error( const InputLoc &loc ) +{ + /* Keep the error count. */ + gblErrorCount += 1; + + cerr << "error: " << inputFileName << ":" << + loc.line << ":" << loc.col << ": "; + return cerr; +} + +/* Print the opening to a program error, then return the error stream. */ +ostream &error() +{ + gblErrorCount += 1; + cerr << "error: " PROGNAME ": "; + return cerr; +} + + +/* Print the opening to a warning, then return the error ostream. */ +ostream &warning( ) +{ + cerr << "warning: " << inputFileName << ": "; + return cerr; +} + +/* Print the opening to a warning in the input, then return the error ostream. */ +ostream &warning( const InputLoc &loc ) +{ + assert( inputFileName != 0 ); + cerr << "warning: " << inputFileName << ":" << + loc.line << ":" << loc.col << ": "; + return cerr; +} + +void escapeLineDirectivePath( std::ostream &out, char *path ) +{ + for ( char *pc = path; *pc != 0; pc++ ) { + if ( *pc == '\\' ) + out << "\\\\"; + else + out << *pc; + } +} + +void escapeLineDirectivePath( std::ostream &out, char *path ); +void scan( char *fileName, istream &input ); + +bool printStatistics = false; + +/* Print a summary of the options. */ +void usage() +{ + cout << +"usage: colm [options] file\n" +"general:\n" +" -h, -H, -?, --help print this usage and exit\n" +" -v, --version print version information and exit\n" +" -o write output to \n" +" -i show conflict information\n" + ; +} + +/* Print version information. */ +void version() +{ + cout << "Colm version " VERSION << " " PUBDATE << endl << + "Copyright (c) 2007, 2008 by Adrian Thurston" << endl; +} + +/* Invoked by the parser when the root element is opened. */ +void openOutput( ) +{ + /* If the output format is code and no output file name is given, then + * make a default. */ + if ( outputFileName == 0 ) { + const char *ext = findFileExtension( inputFileName ); + if ( ext != 0 && strcmp( ext, ".rh" ) == 0 ) + outputFileName = fileNameFromStem( inputFileName, ".h" ); + else { + const char *defExtension = ".cpp"; + outputFileName = fileNameFromStem( inputFileName, defExtension ); + } + } + + #ifdef COLM_LOG_COMPILE + cerr << "opening output file: " << outputFileName << endl; + #endif + + /* Make sure we are not writing to the same file as the input file. */ + if ( outputFileName != 0 && strcmp( inputFileName, outputFileName ) == 0 ) { + error() << "output file \"" << outputFileName << + "\" is the same as the input file" << endl; + } + + if ( outputFileName != 0 ) { + /* Open the output stream, attaching it to the filter. */ + ofstream *outFStream = new ofstream( outputFileName ); + + if ( !outFStream->is_open() ) { + error() << "error opening " << outputFileName << " for writing" << endl; + exit(1); + } + + outStream = outFStream; + } + else { + /* Writing out ot std out. */ + outStream = &cout; + } +} + +void compileOutput( const char *argv0 ) +{ + /* Find the location of us. */ + char *location = strdup( argv0 ); + char *last = location + strlen(location) - 1; + while ( true ) { + if ( last == location ) { + last[0] = '.'; + last[1] = 0; + break; + } + if ( *last == '/' ) { + last[0] = 0; + break; + } + last -= 1; + } + + char *exec = fileNameFromStem( outputFileName, ".bin" ); + + int length = 1024 + 3*strlen(location) + strlen(outputFileName) + strlen(exec); + char command[length]; + sprintf( command, + "g++ -Wall -Wwrite-strings" + " -I%s/../aapl" + " -I%s/../colm" + " -I%s/../common" + " -g" + " -o %s" + " %s" + " %s/../colm/runtime.a", + location, location, location, exec, outputFileName, location ); + #ifdef COLM_LOG_COMPILE + cout << "compiling: " << outputFileName << endl; + #endif + int res = system( command ); + if ( res != 0 ) + cout << "there was a problem compiling the output" << endl; +} + +void process_args( int argc, const char **argv ) +{ + ParamCheck pc( "io:S:M:vHh?-:s", argc, argv ); + + while ( pc.check() ) { + switch ( pc.state ) { + case ParamCheck::match: + switch ( pc.parameter ) { + case 'i': + branchPointInfo = true; + break; + /* Output. */ + case 'o': + if ( *pc.parameterArg == 0 ) + error() << "a zero length output file name was given" << endl; + else if ( outputFileName != 0 ) + error() << "more than one output file name was given" << endl; + else { + /* Ok, remember the output file name. */ + outputFileName = pc.parameterArg; + } + break; + + /* Version and help. */ + case 'v': + version(); + exit(0); + case 'H': case 'h': case '?': + usage(); + exit(0); + case 's': + printStatistics = true; + break; + case '-': + if ( strcasecmp(pc.parameterArg, "help") == 0 ) { + usage(); + exit(0); + } + else if ( strcasecmp(pc.parameterArg, "version") == 0 ) { + version(); + exit(0); + } + else { + error() << "--" << pc.parameterArg << + " is an invalid argument" << endl; + } + } + break; + + case ParamCheck::invalid: + error() << "-" << pc.parameter << " is an invalid argument" << endl; + break; + + case ParamCheck::noparam: + /* It is interpreted as an input file. */ + if ( *pc.curArg == 0 ) + error() << "a zero length input file name was given" << endl; + else if ( inputFileName != 0 ) + error() << "more than one input file name was given" << endl; + else { + /* OK, Remember the filename. */ + inputFileName = pc.curArg; + } + break; + } + } +} + +/* Main, process args and call yyparse to start scanning input. */ +int main(int argc, const char **argv) +{ + process_args( argc, argv ); + + /* Bail on above errors. */ + if ( gblErrorCount > 0 ) + exit(1); + + /* Make sure we are not writing to the same file as the input file. */ + if ( inputFileName != 0 && outputFileName != 0 && + strcmp( inputFileName, outputFileName ) == 0 ) + { + error() << "output file \"" << outputFileName << + "\" is the same as the input file" << endl; + } + + /* Open the input file for reading. */ + istream *inStream; + if ( inputFileName != 0 ) { + /* Open the input file for reading. */ + ifstream *inFile = new ifstream( inputFileName ); + inStream = inFile; + if ( ! inFile->is_open() ) + error() << "could not open " << inputFileName << " for reading" << endl; + } + else { + inputFileName = ""; + inStream = &cin; + } + + /* Bail on above errors. */ + if ( gblErrorCount > 0 ) + exit(1); + + Scanner scanner( inputFileName, *inStream, cout, 0, 0, 0 ); + scanner.do_scan(); + + /* Parsing complete, check for errors.. */ + if ( gblErrorCount > 0 ) + return 1; + + /* Initiate a compile following a parse. */ + scanner.parser->pd->semanticAnalysis(); + + if ( outStream != 0 ) + delete outStream; + + compileOutput( argv[0] ); + + return 0; +} diff --git a/colm/map.cpp b/colm/map.cpp new file mode 100644 index 00000000..6327b301 --- /dev/null +++ b/colm/map.cpp @@ -0,0 +1,806 @@ +/* + * Copyright 2008 Adrian Thurston + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "pdarun.h" + +void Map::listAbandon() +{ + head = tail = 0; +} + +void Map::listAddBefore( MapEl *next_el, MapEl *new_el ) +{ + /* Set the next pointer of the new element to next_el. We do + * this regardless of the state of the list. */ + new_el->next = next_el; + + /* Set reverse pointers. */ + if (next_el == 0) { + /* There is no next elememnt. We are inserting at the tail. */ + new_el->prev = tail; + tail = new_el; + } + else { + /* There is a next element and we can access next's previous. */ + new_el->prev = next_el->prev; + next_el->prev = new_el; + } + + /* Set forward pointers. */ + if (new_el->prev == 0) { + /* There is no previous element. Set the head pointer.*/ + head = new_el; + } + else { + /* There is a previous element, set it's next pointer to new_el. */ + new_el->prev->next = new_el; + } +} + +void Map::listAddAfter( MapEl *prev_el, MapEl *new_el ) +{ + /* Set the previous pointer of new_el to prev_el. We do + * this regardless of the state of the list. */ + new_el->prev = prev_el; + + /* Set forward pointers. */ + if (prev_el == 0) { + /* There was no prev_el, we are inserting at the head. */ + new_el->next = head; + head = new_el; + } + else { + /* There was a prev_el, we can access previous next. */ + new_el->next = prev_el->next; + prev_el->next = new_el; + } + + /* Set reverse pointers. */ + if (new_el->next == 0) { + /* There is no next element. Set the tail pointer. */ + tail = new_el; + } + else { + /* There is a next element. Set it's prev pointer. */ + new_el->next->prev = new_el; + } +} + +MapEl *Map::listDetach(MapEl *el) +{ + /* Set forward pointers to skip over el. */ + if (el->prev == 0) + head = el->next; + else + el->prev->next = el->next; + + /* Set reverse pointers to skip over el. */ + if (el->next == 0) + tail = el->prev; + else + el->next->prev = el->prev; + + /* Update List length and return element we detached. */ + return el; +} + + +/* Recursive worker for tree copying. */ +MapEl *Map::copyBranch( Program *p, MapEl *el, Kid *oldNextDown, Kid *&newNextDown ) +{ + /* Duplicate element. Either the base element's copy constructor or defaul + * constructor will get called. Both will suffice for initting the + * pointers to null when they need to be. */ + MapEl *newEl = p->mapElPool.allocate(); + + if ( (Kid*)el == oldNextDown ) + newNextDown = (Kid*)newEl; + + /* If the left tree is there, copy it. */ + if ( newEl->left ) { + newEl->left = copyBranch( p, newEl->left, oldNextDown, newNextDown ); + newEl->left->parent = newEl; + } + + listAddAfter( tail, newEl ); + + /* If the right tree is there, copy it. */ + if ( newEl->right ) { + newEl->right = copyBranch( p, newEl->right, oldNextDown, newNextDown ); + newEl->right->parent = newEl; + } + + return newEl; +} + +/* Once an insertion position is found, attach a element to the tree. */ +void Map::attachRebal( MapEl *element, MapEl *parentEl, MapEl *lastLess ) +{ + /* Increment the number of element in the tree. */ + treeSize += 1; + + /* Set element's parent. */ + element->parent = parentEl; + + /* New element always starts as a leaf with height 1. */ + element->left = 0; + element->right = 0; + element->height = 1; + + /* Are we inserting in the tree somewhere? */ + if ( parentEl != 0 ) { + /* We have a parent so we are somewhere in the tree. If the parent + * equals lastLess, then the last traversal in the insertion went + * left, otherwise it went right. */ + if ( lastLess == parentEl ) { + parentEl->left = element; + + listAddBefore( parentEl, element ); + } + else { + parentEl->right = element; + + listAddAfter( parentEl, element ); + } + } + else { + /* No parent element so we are inserting the root. */ + root = element; + + listAddAfter( tail, element ); + } + + /* Recalculate the heights. */ + recalcHeights(parentEl); + + /* Find the first unbalance. */ + MapEl *ub = findFirstUnbalGP(element); + + /* rebalance. */ + if ( ub != 0 ) + { + /* We assert that after this single rotation the + * tree is now properly balanced. */ + rebalance(ub); + } +} + +/** + * \brief Insert an existing element into the tree. + * + * If the insert succeeds and lastFound is given then it is set to the element + * inserted. If the insert fails then lastFound is set to the existing element in + * the tree that has the same key as element. If the element's avl pointers are + * already in use then undefined behaviour results. + * + * \returns The element inserted upon success, null upon failure. + */ +MapEl *Map::insert( MapEl *element, MapEl **lastFound ) +{ + long keyRelation; + MapEl *curEl = root, *parentEl = 0; + MapEl *lastLess = 0; + + while (true) { + if ( curEl == 0 ) { + /* We are at an external element and did not find the key we were + * looking for. Attach underneath the leaf and rebalance. */ + attachRebal( element, parentEl, lastLess ); + + if ( lastFound != 0 ) + *lastFound = element; + return element; + } + + keyRelation = compare( element->getKey(), + curEl->getKey() ); + + /* Do we go left? */ + if ( keyRelation < 0 ) { + parentEl = lastLess = curEl; + curEl = curEl->left; + } + /* Do we go right? */ + else if ( keyRelation > 0 ) { + parentEl = curEl; + curEl = curEl->right; + } + /* We have hit the target. */ + else { + if ( lastFound != 0 ) + *lastFound = curEl; + return 0; + } + } +} + +/** + * \brief Insert a new element into the tree with given key. + * + * If the key is not already in the tree then a new element is made using the + * MapEl(const Key &key) constructor and the insert succeeds. If lastFound is + * given then it is set to the element inserted. If the insert fails then + * lastFound is set to the existing element in the tree that has the same key as + * element. + * + * \returns The new element upon success, null upon failure. + */ +MapEl *Map::insert( Program *p, Tree *key, MapEl **lastFound ) +{ + long keyRelation; + MapEl *curEl = root, *parentEl = 0; + MapEl *lastLess = 0; + + while (true) { + if ( curEl == 0 ) { + /* We are at an external element and did not find the key we were + * looking for. Create the new element, attach it underneath the leaf + * and rebalance. */ + MapEl *element = p->mapElPool.allocate(); + element->key = key; + element->tree = 0; + attachRebal( element, parentEl, lastLess ); + + if ( lastFound != 0 ) + *lastFound = element; + return element; + } + + keyRelation = compare( key, curEl->getKey() ); + + /* Do we go left? */ + if ( keyRelation < 0 ) { + parentEl = lastLess = curEl; + curEl = curEl->left; + } + /* Do we go right? */ + else if ( keyRelation > 0 ) { + parentEl = curEl; + curEl = curEl->right; + } + /* We have hit the target. */ + else { + if ( lastFound != 0 ) + *lastFound = curEl; + return 0; + } + } +} + +/** + * \brief Find a element in the tree with the given key. + * + * \returns The element if key exists, null if the key does not exist. + */ +MapEl *Map::find( Tree *key ) const +{ + MapEl *curEl = root; + long keyRelation; + + while (curEl) { + keyRelation = compare( key, curEl->getKey() ); + + /* Do we go left? */ + if ( keyRelation < 0 ) + curEl = curEl->left; + /* Do we go right? */ + else if ( keyRelation > 0 ) + curEl = curEl->right; + /* We have hit the target. */ + else { + return curEl; + } + } + return 0; +} + + +/** + * \brief Find a element, then detach it from the tree. + * + * The element is not deleted. + * + * \returns The element detached if the key is found, othewise returns null. + */ +MapEl *Map::detach( Tree *key ) +{ + MapEl *element = find( key ); + if ( element ) { + detach(element); + } + + return element; +} + +/** + * \brief Find, detach and delete a element from the tree. + * + * \returns True if the element was found and deleted, false otherwise. + */ +bool Map::remove( Tree *key ) +{ + /* Assume not found. */ + bool retVal = false; + + /* Look for the key. */ + MapEl *element = find( key ); + if ( element != 0 ) { + /* If found, detach the element and delete. */ + detach( element ); + delete element; + retVal = true; + } + + return retVal; +} + +/** + * \brief Detach and delete a element from the tree. + * + * If the element is not in the tree then undefined behaviour results. + */ +void Map::remove(MapEl *element) +{ + /* Detach and delete. */ + detach(element); + delete element; +} + +/** + * \brief Detach a element from the tree. + * + * If the element is not in the tree then undefined behaviour results. + * + * \returns The element given. + */ +MapEl *Map::detach(MapEl *element) +{ + MapEl *replacement, *fixfrom; + long lheight, rheight; + + /* Remove the element from the ordered list. */ + listDetach( element ); + + /* Update treeSize. */ + treeSize--; + + /* Find a replacement element. */ + if (element->right) + { + /* Find the leftmost element of the right subtree. */ + replacement = element->right; + while (replacement->left) + replacement = replacement->left; + + /* If replacing the element the with its child then we need to start + * fixing at the replacement, otherwise we start fixing at the + * parent of the replacement. */ + if (replacement->parent == element) + fixfrom = replacement; + else + fixfrom = replacement->parent; + + removeEl(replacement, replacement->right); + replaceEl(element, replacement); + } + else if (element->left) + { + /* Find the rightmost element of the left subtree. */ + replacement = element->left; + while (replacement->right) + replacement = replacement->right; + + /* If replacing the element the with its child then we need to start + * fixing at the replacement, otherwise we start fixing at the + * parent of the replacement. */ + if (replacement->parent == element) + fixfrom = replacement; + else + fixfrom = replacement->parent; + + removeEl(replacement, replacement->left); + replaceEl(element, replacement); + } + else + { + /* We need to start fixing at the parent of the element. */ + fixfrom = element->parent; + + /* The element we are deleting is a leaf element. */ + removeEl(element, 0); + } + + /* If fixfrom is null it means we just deleted + * the root of the tree. */ + if ( fixfrom == 0 ) + return element; + + /* Fix the heights after the deletion. */ + recalcHeights(fixfrom); + + /* Fix every unbalanced element going up in the tree. */ + MapEl *ub = findFirstUnbalEl(fixfrom); + while ( ub ) + { + /* Find the element to rebalance by moving down from the first unbalanced + * element 2 levels in the direction of the greatest heights. On the + * second move down, the heights may be equal ( but not on the first ). + * In which case go in the direction of the first move. */ + lheight = ub->left ? ub->left->height : 0; + rheight = ub->right ? ub->right->height : 0; + assert( lheight != rheight ); + if (rheight > lheight) + { + ub = ub->right; + lheight = ub->left ? + ub->left->height : 0; + rheight = ub->right ? + ub->right->height : 0; + if (rheight > lheight) + ub = ub->right; + else if (rheight < lheight) + ub = ub->left; + else + ub = ub->right; + } + else + { + ub = ub->left; + lheight = ub->left ? + ub->left->height : 0; + rheight = ub->right ? + ub->right->height : 0; + if (rheight > lheight) + ub = ub->right; + else if (rheight < lheight) + ub = ub->left; + else + ub = ub->left; + } + + + /* rebalance returns the grandparant of the subtree formed + * by the element that were rebalanced. + * We must continue upward from there rebalancing. */ + fixfrom = rebalance(ub); + + /* Find the next unbalaced element. */ + ub = findFirstUnbalEl(fixfrom); + } + + return element; +} + + +void Map::empty() +{ + if ( root ) { + /* Recursively delete from the tree structure. */ + deleteChildrenOf(root); + delete root; + root = 0; + treeSize = 0; + + listAbandon(); + } +} + +/* Recursively delete all the children of a element. */ +void Map::deleteChildrenOf( MapEl *element ) +{ + /* Recurse left. */ + if (element->left) { + deleteChildrenOf(element->left); + + /* Delete left element. */ + delete element->left; + element->left = 0; + } + + /* Recurse right. */ + if (element->right) { + deleteChildrenOf(element->right); + + /* Delete right element. */ + delete element->right; + element->left = 0; + } +} + +/* rebalance from a element whose gradparent is unbalanced. Only + * call on a element that has a grandparent. */ +MapEl *Map::rebalance(MapEl *n) +{ + long lheight, rheight; + MapEl *a, *b, *c; + MapEl *t1, *t2, *t3, *t4; + + MapEl *p = n->parent; /* parent (Non-NUL). L*/ + MapEl *gp = p->parent; /* Grand-parent (Non-NULL). */ + MapEl *ggp = gp->parent; /* Great grand-parent (may be NULL). */ + + if (gp->right == p) + { + /* gp + * * p + p + */ + if (p->right == n) + { + /* gp + * * p + p + * * n + n + */ + a = gp; + b = p; + c = n; + t1 = gp->left; + t2 = p->left; + t3 = n->left; + t4 = n->right; + } + else + { + /* gp + * * p + p + * / + * n + */ + a = gp; + b = n; + c = p; + t1 = gp->left; + t2 = n->left; + t3 = n->right; + t4 = p->right; + } + } + else + { + /* gp + * / + * p + */ + if (p->right == n) + { + /* gp + * / + * p + * * n + n + */ + a = p; + b = n; + c = gp; + t1 = p->left; + t2 = n->left; + t3 = n->right; + t4 = gp->right; + } + else + { + /* gp + * / + * p + * / + * n + */ + a = n; + b = p; + c = gp; + t1 = n->left; + t2 = n->right; + t3 = p->right; + t4 = gp->right; + } + } + + /* Perform rotation. + */ + + /* Tie b to the great grandparent. */ + if ( ggp == 0 ) + root = b; + else if ( ggp->left == gp ) + ggp->left = b; + else + ggp->right = b; + b->parent = ggp; + + /* Tie a as a leftchild of b. */ + b->left = a; + a->parent = b; + + /* Tie c as a rightchild of b. */ + b->right = c; + c->parent = b; + + /* Tie t1 as a leftchild of a. */ + a->left = t1; + if ( t1 != 0 ) t1->parent = a; + + /* Tie t2 as a rightchild of a. */ + a->right = t2; + if ( t2 != 0 ) t2->parent = a; + + /* Tie t3 as a leftchild of c. */ + c->left = t3; + if ( t3 != 0 ) t3->parent = c; + + /* Tie t4 as a rightchild of c. */ + c->right = t4; + if ( t4 != 0 ) t4->parent = c; + + /* The heights are all recalculated manualy and the great + * grand-parent is passed to recalcHeights() to ensure + * the heights are correct up the tree. + * + * Note that recalcHeights() cuts out when it comes across + * a height that hasn't changed. + */ + + /* Fix height of a. */ + lheight = a->left ? a->left->height : 0; + rheight = a->right ? a->right->height : 0; + a->height = (lheight > rheight ? lheight : rheight) + 1; + + /* Fix height of c. */ + lheight = c->left ? c->left->height : 0; + rheight = c->right ? c->right->height : 0; + c->height = (lheight > rheight ? lheight : rheight) + 1; + + /* Fix height of b. */ + lheight = a->height; + rheight = c->height; + b->height = (lheight > rheight ? lheight : rheight) + 1; + + /* Fix height of b's parents. */ + recalcHeights(ggp); + return ggp; +} + +/* Recalculates the heights of all the ancestors of element. */ +void Map::recalcHeights(MapEl *element) +{ + long lheight, rheight, new_height; + while ( element != 0 ) + { + lheight = element->left ? element->left->height : 0; + rheight = element->right ? element->right->height : 0; + + new_height = (lheight > rheight ? lheight : rheight) + 1; + + /* If there is no chage in the height, then there will be no + * change in any of the ancestor's height. We can stop going up. + * If there was a change, continue upward. */ + if (new_height == element->height) + return; + else + element->height = new_height; + + element = element->parent; + } +} + +/* Finds the first element whose grandparent is unbalanced. */ +MapEl *Map::findFirstUnbalGP(MapEl *element) +{ + long lheight, rheight, balanceProp; + MapEl *gp; + + if ( element == 0 || element->parent == 0 || + element->parent->parent == 0 ) + return 0; + + /* Don't do anything if we we have no grandparent. */ + gp = element->parent->parent; + while ( gp != 0 ) + { + lheight = gp->left ? gp->left->height : 0; + rheight = gp->right ? gp->right->height : 0; + balanceProp = lheight - rheight; + + if ( balanceProp < -1 || balanceProp > 1 ) + return element; + + element = element->parent; + gp = gp->parent; + } + return 0; +} + + +/* Finds the first element that is unbalanced. */ +MapEl *Map::findFirstUnbalEl(MapEl *element) +{ + if ( element == 0 ) + return 0; + + while ( element != 0 ) + { + long lheight = element->left ? + element->left->height : 0; + long rheight = element->right ? + element->right->height : 0; + long balanceProp = lheight - rheight; + + if ( balanceProp < -1 || balanceProp > 1 ) + return element; + + element = element->parent; + } + return 0; +} + +/* Replace a element in the tree with another element not in the tree. */ +void Map::replaceEl(MapEl *element, MapEl *replacement) +{ + MapEl *parent = element->parent, + *left = element->left, + *right = element->right; + + replacement->left = left; + if (left) + left->parent = replacement; + replacement->right = right; + if (right) + right->parent = replacement; + + replacement->parent = parent; + if (parent) + { + if (parent->left == element) + parent->left = replacement; + else + parent->right = replacement; + } + else + root = replacement; + + replacement->height = element->height; +} + +/* Removes a element from a tree and puts filler in it's place. + * Filler should be null or a child of element. */ +void Map::removeEl(MapEl *element, MapEl *filler) +{ + MapEl *parent = element->parent; + + if (parent) + { + if (parent->left == element) + parent->left = filler; + else + parent->right = filler; + } + else + root = filler; + + if (filler) + filler->parent = parent; + + return; +} + + diff --git a/colm/parsedata.cpp b/colm/parsedata.cpp new file mode 100644 index 00000000..54b55b23 --- /dev/null +++ b/colm/parsedata.cpp @@ -0,0 +1,1830 @@ +/* + * Copyright 2001-2006 Adrian Thurston + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include + +#include "colm.h" +#include "lmparse.h" +#include "parsedata.h" +#include "parsetree.h" +#include "mergesort.h" +#include "redbuild.h" +#include "pdacodegen.h" +#include "fsmcodegen.h" +#include "fsmrun.h" + +using namespace std; +using std::ostringstream; + +char machineMain[] = "main"; + +/* Perform minimization after an operation according + * to the command line args. */ +void afterOpMinimize( FsmGraph *fsm, bool lastInSeq ) +{ + /* Switch on the prefered minimization algorithm. */ + if ( lastInSeq ) { + /* First clean up the graph. FsmGraph operations may leave these + * lying around. There should be no dead end states. The subtract + * intersection operators are the only places where they may be + * created and those operators clean them up. */ + fsm->removeUnreachableStates(); + fsm->minimizePartition2(); + } +} + +/* Count the transitions in the fsm by walking the state list. */ +int countTransitions( FsmGraph *fsm ) +{ + int numTrans = 0; + FsmState *state = fsm->stateList.head; + while ( state != 0 ) { + numTrans += state->outList.length(); + state = state->next; + } + return numTrans; +} + +Key makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd ) +{ + /* Reset errno so we can check for overflow or underflow. In the event of + * an error, sets the return val to the upper or lower bound being tested + * against. */ + errno = 0; + unsigned int size = keyOps->alphType->size; + bool unusedBits = size < sizeof(unsigned long); + + unsigned long ul = strtoul( str, 0, 16 ); + + if ( errno == ERANGE || unusedBits && ul >> (size * 8) ) { + error(loc) << "literal " << str << " overflows the alphabet type" << endl; + ul = 1 << (size * 8); + } + + if ( unusedBits && keyOps->alphType->isSigned && ul >> (size * 8 - 1) ) + ul |= (0xffffffff >> (size*8 ) ) << (size*8); + + return Key( (long)ul ); +} + +Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd ) +{ + /* Convert the number to a decimal. First reset errno so we can check + * for overflow or underflow. */ + errno = 0; + long long minVal = keyOps->alphType->minVal; + long long maxVal = keyOps->alphType->maxVal; + + long long ll = strtoll( str, 0, 10 ); + + /* Check for underflow. */ + if ( errno == ERANGE && ll < 0 || ll < minVal) { + error(loc) << "literal " << str << " underflows the alphabet type" << endl; + ll = minVal; + } + /* Check for overflow. */ + else if ( errno == ERANGE && ll > 0 || ll > maxVal ) { + error(loc) << "literal " << str << " overflows the alphabet type" << endl; + ll = maxVal; + } + + if ( keyOps->alphType->isSigned ) + return Key( (long)ll ); + else + return Key( (unsigned long)ll ); +} + +/* Make an fsm key in int format (what the fsm graph uses) from an alphabet + * number returned by the parser. Validates that the number doesn't overflow + * the alphabet type. */ +Key makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd ) +{ + /* Switch on hex/decimal format. */ + if ( str[0] == '0' && str[1] == 'x' ) + return makeFsmKeyHex( str, loc, pd ); + else + return makeFsmKeyDec( str, loc, pd ); +} + +/* Make an fsm int format (what the fsm graph uses) from a single character. + * Performs proper conversion depending on signed/unsigned property of the + * alphabet. */ +Key makeFsmKeyChar( char c, ParseData *pd ) +{ + if ( keyOps->isSigned ) { + /* Copy from a char type. */ + return Key( c ); + } + else { + /* Copy from an unsigned byte type. */ + return Key( (unsigned char)c ); + } +} + +/* Make an fsm key array in int format (what the fsm graph uses) from a string + * of characters. Performs proper conversion depending on signed/unsigned + * property of the alphabet. */ +void makeFsmKeyArray( Key *result, char *data, int len, ParseData *pd ) +{ + if ( keyOps->isSigned ) { + /* Copy from a char star type. */ + char *src = data; + for ( int i = 0; i < len; i++ ) + result[i] = Key(src[i]); + } + else { + /* Copy from an unsigned byte ptr type. */ + unsigned char *src = (unsigned char*) data; + for ( int i = 0; i < len; i++ ) + result[i] = Key(src[i]); + } +} + +/* Like makeFsmKeyArray except the result has only unique keys. They ordering + * will be changed. */ +void makeFsmUniqueKeyArray( KeySet &result, char *data, int len, + bool caseInsensitive, ParseData *pd ) +{ + /* Use a transitions list for getting unique keys. */ + if ( keyOps->isSigned ) { + /* Copy from a char star type. */ + char *src = data; + for ( int si = 0; si < len; si++ ) { + Key key( src[si] ); + result.insert( key ); + if ( caseInsensitive ) { + if ( key.isLower() ) + result.insert( key.toUpper() ); + else if ( key.isUpper() ) + result.insert( key.toLower() ); + } + } + } + else { + /* Copy from an unsigned byte ptr type. */ + unsigned char *src = (unsigned char*) data; + for ( int si = 0; si < len; si++ ) { + Key key( src[si] ); + result.insert( key ); + if ( caseInsensitive ) { + if ( key.isLower() ) + result.insert( key.toUpper() ); + else if ( key.isUpper() ) + result.insert( key.toLower() ); + } + } + } +} + +FsmGraph *dotFsm( ParseData *pd ) +{ + FsmGraph *retFsm = new FsmGraph(); + retFsm->rangeFsm( keyOps->minKey, keyOps->maxKey ); + return retFsm; +} + +FsmGraph *dotStarFsm( ParseData *pd ) +{ + FsmGraph *retFsm = new FsmGraph(); + retFsm->rangeStarFsm( keyOps->minKey, keyOps->maxKey ); + return retFsm; +} + +/* Make a builtin type. Depends on the signed nature of the alphabet type. */ +FsmGraph *makeBuiltin( BuiltinMachine builtin, ParseData *pd ) +{ + /* FsmGraph created to return. */ + FsmGraph *retFsm = 0; + bool isSigned = keyOps->isSigned; + + switch ( builtin ) { + case BT_Any: { + /* All characters. */ + retFsm = dotFsm( pd ); + break; + } + case BT_Ascii: { + /* Ascii characters 0 to 127. */ + retFsm = new FsmGraph(); + retFsm->rangeFsm( 0, 127 ); + break; + } + case BT_Extend: { + /* Ascii extended characters. This is the full byte range. Dependent + * on signed, vs no signed. If the alphabet is one byte then just use + * dot fsm. */ + if ( isSigned ) { + retFsm = new FsmGraph(); + retFsm->rangeFsm( -128, 127 ); + } + else { + retFsm = new FsmGraph(); + retFsm->rangeFsm( 0, 255 ); + } + break; + } + case BT_Alpha: { + /* Alpha [A-Za-z]. */ + FsmGraph *upper = new FsmGraph(), *lower = new FsmGraph(); + upper->rangeFsm( 'A', 'Z' ); + lower->rangeFsm( 'a', 'z' ); + upper->unionOp( lower ); + upper->minimizePartition2(); + retFsm = upper; + break; + } + case BT_Digit: { + /* Digits [0-9]. */ + retFsm = new FsmGraph(); + retFsm->rangeFsm( '0', '9' ); + break; + } + case BT_Alnum: { + /* Alpha numerics [0-9A-Za-z]. */ + FsmGraph *digit = new FsmGraph(), *lower = new FsmGraph(); + FsmGraph *upper = new FsmGraph(); + digit->rangeFsm( '0', '9' ); + upper->rangeFsm( 'A', 'Z' ); + lower->rangeFsm( 'a', 'z' ); + digit->unionOp( upper ); + digit->unionOp( lower ); + digit->minimizePartition2(); + retFsm = digit; + break; + } + case BT_Lower: { + /* Lower case characters. */ + retFsm = new FsmGraph(); + retFsm->rangeFsm( 'a', 'z' ); + break; + } + case BT_Upper: { + /* Upper case characters. */ + retFsm = new FsmGraph(); + retFsm->rangeFsm( 'A', 'Z' ); + break; + } + case BT_Cntrl: { + /* Control characters. */ + FsmGraph *cntrl = new FsmGraph(); + FsmGraph *highChar = new FsmGraph(); + cntrl->rangeFsm( 0, 31 ); + highChar->concatFsm( 127 ); + cntrl->unionOp( highChar ); + cntrl->minimizePartition2(); + retFsm = cntrl; + break; + } + case BT_Graph: { + /* Graphical ascii characters [!-~]. */ + retFsm = new FsmGraph(); + retFsm->rangeFsm( '!', '~' ); + break; + } + case BT_Print: { + /* Printable characters. Same as graph except includes space. */ + retFsm = new FsmGraph(); + retFsm->rangeFsm( ' ', '~' ); + break; + } + case BT_Punct: { + /* Punctuation. */ + FsmGraph *range1 = new FsmGraph(); + FsmGraph *range2 = new FsmGraph(); + FsmGraph *range3 = new FsmGraph(); + FsmGraph *range4 = new FsmGraph(); + range1->rangeFsm( '!', '/' ); + range2->rangeFsm( ':', '@' ); + range3->rangeFsm( '[', '`' ); + range4->rangeFsm( '{', '~' ); + range1->unionOp( range2 ); + range1->unionOp( range3 ); + range1->unionOp( range4 ); + range1->minimizePartition2(); + retFsm = range1; + break; + } + case BT_Space: { + /* Whitespace: [\t\v\f\n\r ]. */ + FsmGraph *cntrl = new FsmGraph(); + FsmGraph *space = new FsmGraph(); + cntrl->rangeFsm( '\t', '\r' ); + space->concatFsm( ' ' ); + cntrl->unionOp( space ); + cntrl->minimizePartition2(); + retFsm = cntrl; + break; + } + case BT_Xdigit: { + /* Hex digits [0-9A-Fa-f]. */ + FsmGraph *digit = new FsmGraph(); + FsmGraph *upper = new FsmGraph(); + FsmGraph *lower = new FsmGraph(); + digit->rangeFsm( '0', '9' ); + upper->rangeFsm( 'A', 'F' ); + lower->rangeFsm( 'a', 'f' ); + digit->unionOp( upper ); + digit->unionOp( lower ); + digit->minimizePartition2(); + retFsm = digit; + break; + } + case BT_Lambda: { + retFsm = new FsmGraph(); + retFsm->lambdaFsm(); + break; + } + case BT_Empty: { + retFsm = new FsmGraph(); + retFsm->emptyFsm(); + break; + }} + + return retFsm; +} + +/* Check if this name inst or any name inst below is referenced. */ +bool NameInst::anyRefsRec() +{ + if ( numRefs > 0 ) + return true; + + /* Recurse on children until true. */ + for ( NameVect::Iter ch = childVect; ch.lte(); ch++ ) { + if ( (*ch)->anyRefsRec() ) + return true; + } + + return false; +} + +/* + * ParseData + */ + +/* Initialize the structure that will collect info during the parse of a + * machine. */ +ParseData::ParseData( const String &fileName, const String §ionName, + const InputLoc §ionLoc, ostream &out ) +: + nextPriorKey(0), + nextLocalErrKey(1), /* 0 is reserved for global error actions. */ + nextNameId(0), + alphTypeSet(false), + getKeyExpr(0), + accessExpr(0), + curStateExpr(0), + lowerNum(0), + upperNum(0), + fileName(fileName), + sectionName(sectionName), + sectionLoc(sectionLoc), + errorCount(0), + curActionOrd(0), + curPriorOrd(0), + nextEpsilonResolvedLink(0), + nextTokenId(1), + rootCodeBlock(0), + parserName(sectionName), + out(out), + access(0), + tokenStruct(0), + rootKlangEl(0), + eofKlangEl(0), + errorKlangEl(0), + defaultCharKlangEl(0), + rootRegion(0), + defaultRegion(0), + firstNonTermId(0), + prodIdIndex(0), + nextPatReplId(0), + nextGenericId(1), + nextFuncId(0), + loopCleanup(0), + nextObjectId(1), /* 0 is reserved for no object. */ + nextFrameId(0), + nextParserId(0), + nextLabelId(0) +{ +} + +/* Clean up the data collected during a parse. */ +ParseData::~ParseData() +{ + /* Delete all the nodes in the action list. Will cause all the + * string data that represents the actions to be deallocated. */ + actionList.empty(); +} + +/* Make a name id in the current name instantiation scope if it is not + * already there. */ +NameInst *ParseData::addNameInst( const InputLoc &loc, char *data, bool isLabel ) +{ + /* Create the name instantitaion object and insert it. */ + NameInst *newNameInst = new NameInst( loc, curNameInst, data, nextNameId++, isLabel ); + curNameInst->childVect.append( newNameInst ); + if ( data != 0 ) + curNameInst->children.insertMulti( data, newNameInst ); + return newNameInst; +} + +void ParseData::initNameWalk( NameInst *rootName ) +{ + curNameInst = rootName; + curNameChild = 0; +} + +/* Goes into the next child scope. The number of the child is already set up. + * We need this for the syncronous name tree and parse tree walk to work + * properly. It is reset on entry into a scope and advanced on poping of a + * scope. A call to enterNameScope should be accompanied by a corresponding + * popNameScope. */ +NameFrame ParseData::enterNameScope( bool isLocal, int numScopes ) +{ + /* Save off the current data. */ + NameFrame retFrame; + retFrame.prevNameInst = curNameInst; + retFrame.prevNameChild = curNameChild; + retFrame.prevLocalScope = localNameScope; + + /* Enter into the new name scope. */ + for ( int i = 0; i < numScopes; i++ ) { + curNameInst = curNameInst->childVect[curNameChild]; + curNameChild = 0; + } + + if ( isLocal ) + localNameScope = curNameInst; + + return retFrame; +} + +/* Return from a child scope to a parent. The parent info must be specified as + * an argument and is obtained from the corresponding call to enterNameScope. + * */ +void ParseData::popNameScope( const NameFrame &frame ) +{ + /* Pop the name scope. */ + curNameInst = frame.prevNameInst; + curNameChild = frame.prevNameChild+1; + localNameScope = frame.prevLocalScope; +} + +void ParseData::resetNameScope( const NameFrame &frame ) +{ + /* Pop the name scope. */ + curNameInst = frame.prevNameInst; + curNameChild = frame.prevNameChild; + localNameScope = frame.prevLocalScope; +} + + +void ParseData::unsetObsoleteEntries( FsmGraph *graph ) +{ + /* Loop the reference names and increment the usage. Names that are no + * longer needed will be unset in graph. */ + for ( NameVect::Iter ref = curNameInst->referencedNames; ref.lte(); ref++ ) { + /* Get the name. */ + NameInst *name = *ref; + name->numUses += 1; + + /* If the name is no longer needed unset its corresponding entry. */ + if ( name->numUses == name->numRefs ) { + assert( graph->entryPoints.find( name->id ) != 0 ); + graph->unsetEntry( name->id ); + } + } +} + +NameSet ParseData::resolvePart( NameInst *refFrom, const char *data, bool recLabelsOnly ) +{ + /* Queue needed for breadth-first search, load it with the start node. */ + NameInstList nameQueue; + nameQueue.append( refFrom ); + + NameSet result; + while ( nameQueue.length() > 0 ) { + /* Pull the next from location off the queue. */ + NameInst *from = nameQueue.detachFirst(); + + /* Look for the name. */ + NameMapEl *low, *high; + if ( from->children.findMulti( data, low, high ) ) { + /* Record all instances of the name. */ + for ( ; low <= high; low++ ) + result.insert( low->value ); + } + + /* Name not there, do breadth-first operation of appending all + * childrent to the processing queue. */ + for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) { + if ( !recLabelsOnly || (*name)->isLabel ) + nameQueue.append( *name ); + } + } + + /* Queue exhausted and name never found. */ + return result; +} + +void ParseData::resolveFrom( NameSet &result, NameInst *refFrom, + const NameRef &nameRef, int namePos ) +{ + /* Look for the name in the owning scope of the factor with aug. */ + NameSet partResult = resolvePart( refFrom, nameRef[namePos], false ); + + /* If there are more parts to the name then continue on. */ + if ( ++namePos < nameRef.length() ) { + /* There are more components to the name, search using all the part + * results as the base. */ + for ( NameSet::Iter name = partResult; name.lte(); name++ ) + resolveFrom( result, *name, nameRef, namePos ); + } + else { + /* This is the last component, append the part results to the final + * results. */ + result.insert( partResult ); + } +} + +ostream &operator<<( ostream &out, const Token &token ) +{ + out << token.data; + return out; +} + +/* Write out a name reference. */ +ostream &operator<<( ostream &out, const NameRef &nameRef ) +{ + int pos = 0; + if ( nameRef[pos] == 0 ) { + out << "::"; + pos += 1; + } + out << nameRef[pos++]; + for ( ; pos < nameRef.length(); pos++ ) + out << "::" << nameRef[pos]; + return out; +} + +ostream &operator<<( ostream &out, const NameInst &nameInst ) +{ + /* Count the number fully qualified name parts. */ + int numParents = 0; + NameInst *curParent = nameInst.parent; + while ( curParent != 0 ) { + numParents += 1; + curParent = curParent->parent; + } + + /* Make an array and fill it in. */ + curParent = nameInst.parent; + NameInst **parents = new NameInst*[numParents]; + for ( int p = numParents-1; p >= 0; p-- ) { + parents[p] = curParent; + curParent = curParent->parent; + } + + /* Write the parents out, skip the root. */ + for ( int p = 1; p < numParents; p++ ) + out << "::" << ( parents[p]->name != 0 ? parents[p]->name : "" ); + + /* Write the name and cleanup. */ + out << "::" << ( nameInst.name != 0 ? nameInst.name : "" ); + delete[] parents; + return out; +} + +struct CmpNameInstLoc +{ + static int compare( const NameInst *ni1, const NameInst *ni2 ) + { + if ( ni1->loc.line < ni2->loc.line ) + return -1; + else if ( ni1->loc.line > ni2->loc.line ) + return 1; + else if ( ni1->loc.col < ni2->loc.col ) + return -1; + else if ( ni1->loc.col > ni2->loc.col ) + return 1; + return 0; + } +}; + +void errorStateLabels( const NameSet &resolved ) +{ + MergeSort mergeSort; + mergeSort.sort( resolved.data, resolved.length() ); + for ( NameSet::Iter res = resolved; res.lte(); res++ ) + error((*res)->loc) << " -> " << **res << endl; +} + + +void ParseData::referenceRegions( NameInst *rootName ) +{ + for ( NameVect::Iter inst = rootName->childVect; inst.lte(); inst++ ) { + /* Inc the reference in the name. This will cause the entry point to + * survive to the end of the graph generating walk. */ + (*inst)->numRefs += 1; + } +} + +/* Walk a name tree starting at from and fill the name index. */ +void ParseData::fillNameIndex( NameInst **nameIndex, NameInst *from ) +{ + /* Fill the value for from in the name index. */ + nameIndex[from->id] = from; + + /* Recurse on the implicit final state and then all children. */ + if ( from->final != 0 ) + fillNameIndex( nameIndex, from->final ); + for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) + fillNameIndex( nameIndex, *name ); +} + +NameInst **ParseData::makeNameIndex( NameInst *rootName ) +{ + /* The number of nodes in the tree can now be given by nextNameId. Put a + * null pointer on the end of the list to terminate it. */ + NameInst **nameIndex = new NameInst*[nextNameId+1]; + memset( nameIndex, 0, sizeof(NameInst*)*(nextNameId+1) ); + fillNameIndex( nameIndex, rootName ); + return nameIndex; +} + +void ParseData::createBuiltin( const char *name, BuiltinMachine builtin ) +{ + Expression *expression = new Expression( builtin ); + Join *join = new Join( expression ); + JoinOrLm *joinOrLm = new JoinOrLm( join ); + VarDef *varDef = new VarDef( name, joinOrLm ); + GraphDictEl *graphDictEl = new GraphDictEl( name, varDef ); + rootNamespace->graphDict.insert( graphDictEl ); +} + +/* Initialize the graph dict with builtin types. */ +void ParseData::initGraphDict( ) +{ + createBuiltin( "any", BT_Any ); + createBuiltin( "ascii", BT_Ascii ); + createBuiltin( "extend", BT_Extend ); + createBuiltin( "alpha", BT_Alpha ); + createBuiltin( "digit", BT_Digit ); + createBuiltin( "alnum", BT_Alnum ); + createBuiltin( "lower", BT_Lower ); + createBuiltin( "upper", BT_Upper ); + createBuiltin( "cntrl", BT_Cntrl ); + createBuiltin( "graph", BT_Graph ); + createBuiltin( "print", BT_Print ); + createBuiltin( "punct", BT_Punct ); + createBuiltin( "space", BT_Space ); + createBuiltin( "xdigit", BT_Xdigit ); + createBuiltin( "null", BT_Lambda ); + createBuiltin( "zlen", BT_Lambda ); + createBuiltin( "empty", BT_Empty ); +} + +/* Set the alphabet type. If the types are not valid returns false. */ +bool ParseData::setAlphType( char *s1, char *s2 ) +{ + bool valid = false; + for ( int i = 0; i < hostLang->numHostTypes; i++ ) { + if ( strcmp( s1, hostLang->hostTypes[i].data1 ) == 0 && + hostLang->hostTypes[i].data2 != 0 && + strcmp( s2, hostLang->hostTypes[i].data2 ) == 0 ) + { + valid = true; + userAlphType = hostLang->hostTypes + i; + break; + } + } + + alphTypeSet = true; + return valid; +} + +/* Set the alphabet type. If the types are not valid returns false. */ +bool ParseData::setAlphType( char *s1 ) +{ + bool valid = false; + for ( int i = 0; i < hostLang->numHostTypes; i++ ) { + if ( strcmp( s1, hostLang->hostTypes[i].data1 ) == 0 && + hostLang->hostTypes[i].data2 == 0 ) + { + valid = true; + userAlphType = hostLang->hostTypes + i; + break; + } + } + + alphTypeSet = true; + return valid; +} + +/* Initialize the key operators object that will be referenced by all fsms + * created. */ +void ParseData::initKeyOps( ) +{ + /* Signedness and bounds. */ + HostType *alphType = alphTypeSet ? userAlphType : hostLang->defaultAlphType; + thisKeyOps.setAlphType( alphType ); + + if ( lowerNum != 0 ) { + /* If ranges are given then interpret the alphabet type. */ + thisKeyOps.minKey = makeFsmKeyNum( lowerNum, rangeLowLoc, this ); + thisKeyOps.maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this ); + } + + thisCondData.nextCondKey = thisKeyOps.maxKey; + thisCondData.nextCondKey.increment(); +} + +void ParseData::printNameInst( NameInst *nameInst, int level ) +{ + for ( int i = 0; i < level; i++ ) + cerr << " "; + cerr << (nameInst->name != 0 ? nameInst->name : "") << + " id: " << nameInst->id << + " refs: " << nameInst->numRefs << endl; + for ( NameVect::Iter name = nameInst->childVect; name.lte(); name++ ) + printNameInst( *name, level+1 ); +} + +/* Remove duplicates of unique actions from an action table. */ +void ParseData::removeDups( ActionTable &table ) +{ + /* Scan through the table looking for unique actions to + * remove duplicates of. */ + for ( int i = 0; i < table.length(); i++ ) { + /* Remove any duplicates ahead of i. */ + for ( int r = i+1; r < table.length(); ) { + if ( table[r].value == table[i].value ) + table.vremove(r); + else + r += 1; + } + } +} + +/* Remove duplicates from action lists. This operates only on transition and + * eof action lists and so should be called once all actions have been + * transfered to their final resting place. */ +void ParseData::removeActionDups( FsmGraph *graph ) +{ + /* Loop all states. */ + for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) { + /* Loop all transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) + removeDups( trans->actionTable ); + removeDups( state->toStateActionTable ); + removeDups( state->fromStateActionTable ); + removeDups( state->eofActionTable ); + } +} + +Action *ParseData::newAction( const String &name, InlineList *inlineList ) +{ + InputLoc loc; + loc.line = 1; + loc.col = 1; + + Action *action = new Action( loc, name, inlineList ); + actionList.append( action ); + return action; +} + +void ParseData::initLongestMatchData() +{ + if ( regionList.length() > 0 ) { + /* The initActId action gives act a default value. */ + InlineList *il4 = new InlineList; + il4->append( new InlineItem( InputLoc(), InlineItem::LmInitAct ) ); + initActId = newAction( "initact", il4 ); + initActId->isLmAction = true; + + /* The setTokStart action sets tokstart. */ + InlineList *il5 = new InlineList; + il5->append( new InlineItem( InputLoc(), InlineItem::LmSetTokStart ) ); + setTokStart = newAction( "tokstart", il5 ); + setTokStart->isLmAction = true; + + /* The setTokEnd action sets tokend. */ + InlineList *il3 = new InlineList; + il3->append( new InlineItem( InputLoc(), InlineItem::LmSetTokEnd ) ); + setTokEnd = newAction( "tokend", il3 ); + setTokEnd->isLmAction = true; + + /* The action will also need an ordering: ahead of all user action + * embeddings. */ + initActIdOrd = curActionOrd++; + setTokStartOrd = curActionOrd++; + setTokEndOrd = curActionOrd++; + } +} + +void ParseData::finishGraphBuild( FsmGraph *graph ) +{ + /* Resolve any labels that point to multiple states. Any labels that are + * still around are referenced only by gotos and calls and they need to be + * made into deterministic entry points. */ + graph->deterministicEntry(); + + /* + * All state construction is now complete. + */ + + /* Transfer global error actions. */ + for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) + graph->transferErrorActions( state, 0 ); + + removeActionDups( graph ); + + /* Remove unreachable states. There should be no dead end states. The + * subtract and intersection operators are the only places where they may + * be created and those operators clean them up. */ + graph->removeUnreachableStates(); + + /* No more fsm operations are to be done. Action ordering numbers are + * no longer of use and will just hinder minimization. Clear them. */ + graph->nullActionKeys(); + + /* Transition priorities are no longer of use. We can clear them + * because they will just hinder minimization as well. Clear them. */ + graph->clearAllPriorities(); + + /* Minimize here even if we minimized at every op. Now that function + * keys have been cleared we may get a more minimal fsm. */ + graph->minimizePartition2(); + graph->compressTransitions(); +} + +void ParseData::printNameTree( NameInst *rootName ) +{ + /* Print the name instance map. */ + cerr << "name tree:" << endl; + for ( NameVect::Iter name = rootName->childVect; name.lte(); name++ ) + printNameInst( *name, 0 ); +} + +void ParseData::printNameIndex( NameInst **nameIndex ) +{ + /* The name index is terminated with a null pointer. */ + cerr << "name index:" << endl; + for ( int ni = 0; nameIndex[ni]; ni++ ) { + cerr << ni << ": "; + char *name = nameIndex[ni]->name; + cerr << ( name != 0 ? name : "" ) << endl; + } +} + +/* Build the name tree and supporting data structures. */ +NameInst *ParseData::makeJoinNameTree( Join *join ) +{ + /* Create the root name. */ + nextNameId = 0; + NameInst *rootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false ); + + /* Make the name tree. */ + initNameWalk( rootName ); + join->makeNameTree( this ); + + return rootName; +} + + +/* Build the name tree and supporting data structures. */ +NameInst *ParseData::makeNameTree() +{ + /* Create the root name. */ + nextNameId = 0; + NameInst *rootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false ); + + /* First make the name tree. */ + initNameWalk( rootName ); + for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) { + /* Recurse on the instance. */ + glel->value->makeNameTree( glel->loc, this ); + } + + return rootName; +} + + +FsmGraph *ParseData::makeJoin( Join *join ) +{ + /* Build the name tree and supporting data structures. */ + NameInst *rootName = makeJoinNameTree( join ); + NameInst **nameIndex = makeNameIndex( rootName ); + + /* Resove name references in the tree. */ + initNameWalk( rootName ); + join->resolveNameRefs( this ); + + /* Make all the instantiations, we know that main exists in this list. */ + initNameWalk( rootName ); + + /* Build the graph from a walk of the parse tree. */ + FsmGraph *newGraph = join->walk( this ); + + /* Wrap up the construction. */ + finishGraphBuild( newGraph ); + + newGraph->rootName = rootName; + newGraph->nameIndex = nameIndex; + + return newGraph; +} + +FsmGraph *ParseData::makeAllRegions() +{ + /* Build the name tree and supporting data structures. */ + NameInst *rootName = makeNameTree( ); + NameInst **nameIndex = makeNameIndex( rootName ); + + /* Resove name references in the tree. */ + initNameWalk( rootName ); + for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) + glel->value->resolveNameRefs( this ); + + /* Resovle the implicit name references to the nfa instantiations. */ + referenceRegions( rootName ); + + int numGraphs = 0; + FsmGraph **graphs = new FsmGraph*[instanceList.length()]; + + /* Make all the instantiations, we know that main exists in this list. */ + initNameWalk( rootName ); + for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) { + /* Build the graph from a walk of the parse tree. */ + FsmGraph *newGraph = glel->value->walk( this ); + + /* Wrap up the construction. */ + finishGraphBuild( newGraph ); + + /* Save off the new graph. */ + graphs[numGraphs++] = newGraph; + } + + /* NOTE: If putting in minimization here we need to include eofTarget + * into the minimization algorithm. It is currently set by the longest + * match operator and not considered anywhere else. */ + + /* Add all the other graphs into the first. */ + FsmGraph *all = graphs[0]; + all->globOp( graphs+1, numGraphs-1 ); + delete[] graphs; + + /* Go through all the token regions and check for lmRequiresErrorState. */ + for ( RegionList::Iter reg = regionList; reg.lte(); reg++ ) { + if ( reg->lmSwitchHandlesError ) + all->lmRequiresErrorState = true; + } + + all->rootName = rootName; + all->nameIndex = nameIndex; + + return all; +} + +void ParseData::analyzeAction( Action *action, InlineList *inlineList ) +{ + /* FIXME: Actions used as conditions should be very constrained. */ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + //if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr ) + // action->anyCall = true; + + /* Need to recurse into longest match items. */ + if ( item->type == InlineItem::LmSwitch ) { + TokenRegion *lm = item->tokenRegion; + for ( TokenDefList::Iter lmi = lm->tokenDefList; lmi.lte(); lmi++ ) { + if ( lmi->action != 0 ) + analyzeAction( action, lmi->action->inlineList ); + } + } + + if ( item->type == InlineItem::LmOnLast || + item->type == InlineItem::LmOnNext || + item->type == InlineItem::LmOnLagBehind ) + { + TokenDef *lmi = item->longestMatchPart; + if ( lmi->action != 0 ) + analyzeAction( action, lmi->action->inlineList ); + } + + if ( item->children != 0 ) + analyzeAction( action, item->children ); + } +} + +void ParseData::analyzeGraph( FsmGraph *graph ) +{ + for ( ActionList::Iter act = actionList; act.lte(); act++ ) + analyzeAction( act, act->inlineList ); + + for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { + /* The transition list. */ + for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { + for ( ActionTable::Iter at = trans->actionTable; at.lte(); at++ ) + at->value->numTransRefs += 1; + } + + for ( ActionTable::Iter at = st->toStateActionTable; at.lte(); at++ ) + at->value->numToStateRefs += 1; + + for ( ActionTable::Iter at = st->fromStateActionTable; at.lte(); at++ ) + at->value->numFromStateRefs += 1; + + for ( ActionTable::Iter at = st->eofActionTable; at.lte(); at++ ) + at->value->numEofRefs += 1; + + for ( StateCondList::Iter sc = st->stateCondList; sc.lte(); sc++ ) { + for ( CondSet::Iter sci = sc->condSpace->condSet; sci.lte(); sci++ ) + (*sci)->numCondRefs += 1; + } + } +} + +FsmGraph *ParseData::makeFsmGraph( Join *join ) +{ + /* Make the graph, do minimization. */ + FsmGraph *fsmGraph = join != 0 ? makeJoin( join ) : makeAllRegions(); + + /* If any errors have occured in the input file then don't write anything. */ + if ( gblErrorCount > 0 ) + return 0; + + analyzeGraph( fsmGraph ); + + /* Decide if an error state is necessary. + * 1. There is an error transition + * 2. There is a gap in the transitions + * 3. The longest match operator requires it. */ + if ( fsmGraph->lmRequiresErrorState || fsmGraph->hasErrorTrans() ) + fsmGraph->errState = fsmGraph->addState(); + + /* State numbers need to be assigned such that all final states have a + * larger state id number than all non-final states. This enables the + * first_final mechanism to function correctly. We also want states to be + * ordered in a predictable fashion. So we first apply a depth-first + * search, then do a stable sort by final state status, then assign + * numbers. */ + + fsmGraph->depthFirstOrdering(); + fsmGraph->sortStatesByFinal(); + fsmGraph->setStateNumbers( 0 ); + + return fsmGraph; +} + +void ParseData::createDefaultScanner() +{ + InputLoc loc; + + const char *name = "___DEFAULT_SCANNER"; + + /* Create the default namespace. */ + defaultNamespace = new Namespace( InputLoc(), name, + namespaceList.length(), 0 ); + namespaceList.append( defaultNamespace ); + + /* Create a scanner which will be used when no other scanner can be + * figured out. It returns single characters. */ + defaultRegion = new TokenRegion( InputLoc(), name, + regionList.length(), 0 ); + regionList.append( defaultRegion ); + JoinOrLm *joinOrLm = new JoinOrLm( defaultRegion ); + + /* Insert the machine definition into the graph dictionary. */ + GraphDictEl *newEl = rootNamespace->graphDict.insert( name ); + assert( newEl != 0 ); + newEl->value = new VarDef( name, joinOrLm ); + newEl->isInstance = true; + instanceList.append( newEl ); + + /* Now create the one and only token -> "" / any / */ + name = "___DEFAULT_SCANNER_CHR"; + defaultCharKlangEl = getKlangEl( this, defaultNamespace, + name, KlangEl::Term ); + assert( defaultCharKlangEl != 0 ); + assert( defaultCharKlangEl->type == KlangEl::Term ); + + Join *join = new Join( new Expression( BT_Any ) ); + + TokenDef *tokenDef = new TokenDef( join, defaultCharKlangEl, loc, + nextTokenId++, rootNamespace, defaultRegion ); + defaultRegion->tokenDefList.append( tokenDef ); + defaultCharKlangEl->tokenDef = tokenDef; +} + +void ParseData::resolveLiteralFactor( PdaFactor *fact ) +{ + /* Interpret escape sequences and remove quotes. */ + bool unusedCI; + String interp; + prepareLitString( interp, unusedCI, fact->literal->token.data, + fact->literal->token.loc ); + + //cerr << "resolving literal: " << fact->literal->token << endl; + + /* Look for the production's associated region. */ + Namespace *nspace = fact->nspaceQual->getQual( this ); + + if ( nspace == 0 ) + error(fact->loc) << "do not have region for resolving literal" << endp; + + LiteralDictEl *ldel = nspace->literalDict.find( interp ); + if ( ldel == 0 ) + cerr << "could not resolve literal: " << fact->literal->token << endp; + + TokenDef *tokenDef = ldel->value->tokenDef; + fact->langEl = tokenDef->token; +} + +void ParseData::resolveReferenceFactor( PdaFactor *fact ) +{ + /* Look for the production's associated region. */ + Namespace *nspace = fact->nspaceQual->getQual( this ); + + if ( nspace == 0 ) + error(fact->loc) << "do not have namespace for resolving reference" << endp; + + fact->nspace = nspace; + + /* Look up the language element in the region. */ + KlangEl *langEl = getKlangEl( this, nspace, fact->refName, KlangEl::Unknown ); + + if ( fact->opt ) { + /* If the factor is an opt, create the opt element and link the factor + * to it. */ + String optName( 32, "_opt_%s", fact->refName.data ); + + SymbolMapEl *inDict = nspace->symbolMap.find( optName ); + if ( inDict != 0 ) { + fact->langEl = inDict->value; + } + else { + KlangEl *prodName = getKlangEl( this, nspace, optName, KlangEl::NonTerm ); + prodName->type = KlangEl::NonTerm; + prodName->isOpt = true; + + ProdElList *prodElList1 = new ProdElList; + + /* Build the first production of the repeat. */ + PdaFactor *factor1 = new PdaFactor( InputLoc(), false, fact->nspaceQual, + fact->refName, 0, false, false ); + prodElList1->append( factor1 ); + + Definition *newDef1 = new Definition( InputLoc(), + prodName, prodElList1, false, 0, + prodList.length(), Definition::Production ); + + prodName->defList.append( newDef1 ); + prodList.append( newDef1 ); + + /* Build the second production of the repeat. */ + ProdElList *prodElList2 = new ProdElList; + + Definition *newDef2 = new Definition( InputLoc(), + prodName, prodElList2, false, 0, + prodList.length(), Definition::Production ); + + prodName->defList.append( newDef2 ); + prodList.append( newDef2 ); + + fact->langEl = prodName; + } + } + else if ( fact->repeat ) { + /* If the factor is a repeat, create the repeat element and link the + * factor to it. */ + String repeatName( 32, "_repeat_%s", fact->refName.data ); + + SymbolMapEl *inDict = nspace->symbolMap.find( repeatName ); + if ( inDict != 0 ) { + fact->langEl = inDict->value; + } + else { + KlangEl *prodName = getKlangEl( this, nspace, + repeatName, KlangEl::NonTerm ); + prodName->type = KlangEl::NonTerm; + prodName->isRepeat = true; + + ProdElList *prodElList1 = new ProdElList; + + /* Build the first production of the repeat. */ + PdaFactor *factor1 = new PdaFactor( InputLoc(), false, fact->nspaceQual, + fact->refName, 0, false, false ); + PdaFactor *factor2 = new PdaFactor( InputLoc(), false, fact->nspaceQual, + repeatName, 0, false, false ); + + prodElList1->append( factor1 ); + prodElList1->append( factor2 ); + + Definition *newDef1 = new Definition( InputLoc(), + prodName, prodElList1, false, 0, + prodList.length(), Definition::Production ); + + prodName->defList.append( newDef1 ); + prodList.append( newDef1 ); + + /* Build the second production of the repeat. */ + ProdElList *prodElList2 = new ProdElList; + + Definition *newDef2 = new Definition( InputLoc(), + prodName, prodElList2, false, 0, + prodList.length(), Definition::Production ); + + prodName->defList.append( newDef2 ); + prodList.append( newDef2 ); + + fact->langEl = prodName; + } + } + else { + /* The factor is not a repeat. Link to the language element. */ + fact->langEl = langEl; + } +} + +void ParseData::resolveFactor( PdaFactor *fact ) +{ + switch ( fact->type ) { + case PdaFactor::LiteralType: + resolveLiteralFactor( fact ); + break; + case PdaFactor::ReferenceType: + resolveReferenceFactor( fact ); + break; + } +} + +void ParseData::resolveProductionEls() +{ + for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { + for ( ProdElList::Iter fact = *prod->prodElList; fact.lte(); fact++ ) + resolveFactor( fact ); + } +} + +Namespace *Namespace::findNamespace( const String &name ) +{ + for ( NamespaceVect::Iter c = childNamespaces; c.lte(); c++ ) { + if ( strcmp( name, (*c)->name ) == 0 ) + return *c; + } + return 0; +} + +/* Search from a previously resolved qualification. (name 1+ in a qual list). */ +Namespace *NamespaceQual::searchFrom( Namespace *from, StringVect::Iter &qualPart ) +{ + /* While there are still parts in the qualification. */ + while ( qualPart.lte() ) { + Namespace *child = from->findNamespace( *qualPart ); + if ( child == 0 ) + return 0; + + from = child; + qualPart.increment(); + } + + return from; +} + +Namespace *NamespaceQual::getQual( ParseData *pd ) +{ + /* Do the search only once. */ + if ( cachedNspaceQual != 0 ) + return cachedNspaceQual; + + if ( qualNames.length() == 0 ) { + /* No qualification, use the region the qualification was + * declared in. */ + cachedNspaceQual = declInNspace; + } + else if ( strcmp( qualNames[0], "root" ) == 0 ) { + /* First item is "root." Start the downward search from there. */ + StringVect::Iter qualPart = qualNames; + qualPart.increment(); + cachedNspaceQual = searchFrom( pd->rootNamespace, qualPart ); + return cachedNspaceQual; + } + else { + /* Have a qualification. Move upwards through the declared + * regions looking for the first part. */ + StringVect::Iter qualPart = qualNames; + Namespace *parentNamespace = declInNspace; + while ( parentNamespace != 0 ) { + /* Search for the first part underneath the current parent. */ + Namespace *child = parentNamespace->findNamespace( *qualPart ); + + if ( child != 0 ) { + /* Found the first part. Start going below the result. */ + qualPart.increment(); + cachedNspaceQual = searchFrom( child, qualPart ); + return cachedNspaceQual; + } + + /* Not found, move up to the parent. */ + parentNamespace = parentNamespace->parentNamespace; + } + + /* Failed to find the place to start from. */ + cachedNspaceQual = 0; + } + + return cachedNspaceQual; +} + +void ParseData::resolvePatternEls() +{ + for ( PatternList::Iter pat = patternList; pat.lte(); pat++ ) { + for ( PatternItemList::Iter item = *pat->list; item.lte(); item++ ) { + switch ( item->type ) { + case PatternItem::FactorType: + /* Use pdaFactor reference resolving. */ + resolveFactor( item->factor ); + break; + case PatternItem::InputText: + /* Nothing to do here. */ + break; + } + } + } +} + +void ParseData::resolveReplacementEls() +{ + for ( ReplList::Iter repl = replList; repl.lte(); repl++ ) { + for ( ReplItemList::Iter item = *repl->list; item.lte(); item++ ) { + switch ( item->type ) { + case ReplItem::FactorType: + /* Use pdaFactor reference resolving. */ + resolveFactor( item->factor ); + break; + case ReplItem::InputText: + case ReplItem::VarRefType: + break; + } + } + } +} + +void ParseData::initEmptyScanners() +{ + for ( RegionList::Iter reg = regionList; reg.lte(); reg++ ) { + if ( reg->tokenDefList.length() == 0 ) { + InputLoc loc; + String name( reg->name.length() + 16, "__%s_DEF_PAT", reg->name.data ); + + KlangEl *lel = getKlangEl( this, rootNamespace, + name.data, KlangEl::Term ); + assert( lel != 0 ); + assert( lel->type == KlangEl::Term ); + + Join *join = new Join( new Expression( BT_Any ) ); + + TokenDef *tokenDef = new TokenDef( join, lel, loc, nextTokenId++, + rootNamespace, reg ); + reg->tokenDefList.append( tokenDef ); + lel->tokenDef = tokenDef; + } + } +} + + +/* + * Pattern + */ + +InputStreamPattern::InputStreamPattern( Pattern *pattern ) +: + pattern(pattern), + patItem(pattern->list->head), + offset(0), + flush(false) +{} + +int InputStreamPattern::isLangEl() +{ + return patItem != 0 && patItem->type == PatternItem::FactorType; +} + +int InputStreamPattern::shouldFlush() +{ + return patItem == 0 || patItem->type == PatternItem::FactorType; +} + +KlangEl *InputStreamPattern::getLangEl( long &bindId, char *&data, long &length ) +{ + KlangEl *klangEl = patItem->factor->langEl; + bindId = patItem->bindId; + data = 0; + length = 0; + + patItem = patItem->next; + offset = 0; + flush = false; + return klangEl; +} + + +int InputStreamPattern::getData( char *dest, int length ) +{ + assert ( patItem->type == PatternItem::InputText ); + int available = patItem->data.length() - offset; + + if ( available < length ) + length = available; + + memcpy( dest, patItem->data.data+offset, length ); + offset += length; + + if ( offset == patItem->data.length() ) { + /* Read up to the end of the data. Advance the + * pattern item. */ + patItem = patItem->next; + offset = 0; + flush = shouldFlush(); + } + else { + /* There is more data in this buffer. Don't flush. */ + flush = false; + } + return length; +} + +int InputStreamPattern::isEOF() +{ + return patItem == 0; +} + +int InputStreamPattern::needFlush() +{ + return flush; +} + + +void InputStreamPattern::backup() +{ + if ( patItem == 0 ) + patItem = pattern->list->tail; + else + patItem = patItem->prev; +} + +void InputStreamPattern::pushBack( char *data, long length ) +{ + if ( length == 0 ) + return; + + /* While pushing back past the current pattern item start. */ + while ( length > offset ) { + length -= offset; + if ( offset > 0 ) + assert( memcmp( patItem->data, data-length, offset ) == 0 ); + backup(); + offset = patItem->data.length(); + } + + offset -= length; + assert( memcmp( &patItem->data[offset], data, length ) == 0 ); +} + +void InputStreamPattern::pushBackNamed() +{ + backup(); + offset = patItem->data.length(); +} + + +/* + * Replacement + */ + +InputStreamRepl::InputStreamRepl( Replacement *replacement ) +: + replacement(replacement), + replItem(replacement->list->head), + offset(0), + flush(false) +{} + +int InputStreamRepl::isLangEl() +{ + return replItem != 0 && + ( replItem->type == ReplItem::VarRefType || replItem->type == ReplItem::FactorType ); +} + +int InputStreamRepl::shouldFlush() +{ + return replItem == 0 || + ( replItem->type == ReplItem::VarRefType || replItem->type == ReplItem::FactorType ); +} + +KlangEl *InputStreamRepl::getLangEl( long &bindId, char *&data, long &length ) +{ + KlangEl *klangEl = replItem->type == ReplItem::VarRefType ? + replItem->langEl : replItem->factor->langEl; + bindId = replItem->bindId; + + data = 0; + length = 0; + + if ( replItem->type == ReplItem::FactorType ) { + if ( replItem->factor->literal != 0 ) { + bool unusedCI; + prepareLitString( replItem->data, unusedCI, + replItem->factor->literal->token.data, + replItem->factor->literal->token.loc ); + + data = replItem->data; + length = replItem->data.length(); + } + } + + replItem = replItem->next; + offset = 0; + flush = false; + return klangEl; +} + +int InputStreamRepl::getData( char *dest, int length ) +{ + assert ( replItem->type == ReplItem::InputText ); + int available = replItem->data.length() - offset; + + if ( available < length ) + length = available; + + memcpy( dest, replItem->data.data+offset, length ); + offset += length; + + if ( offset == replItem->data.length() ) { + /* Read up to the end of the data. Advance the + * replacement item. */ + replItem = replItem->next; + offset = 0; + flush = shouldFlush(); + } + else { + /* There is more data in this buffer. Don't flush. */ + flush = false; + } + return length; +} + +int InputStreamRepl::isEOF() +{ + return replItem == 0; +} + +int InputStreamRepl::needFlush() +{ + return flush; +} + +void InputStreamRepl::backup() +{ + if ( replItem == 0 ) + replItem = replacement->list->tail; + else + replItem = replItem->prev; +} + +void InputStreamRepl::pushBack( char *data, long length ) +{ + if ( length == 0 ) + return; + + /* While pushing back past the current pattern item start. */ + while ( length > offset ) { + length -= offset; + assert( memcmp( replItem->data, data-length, offset ) == 0 ); + backup(); + offset = replItem->data.length(); + } + + offset -= length; + assert( memcmp( &replItem->data[offset], data, length ) == 0 ); +} + +void InputStreamRepl::pushBackNamed() +{ + backup(); + offset = replItem->data.length(); +} + + +void ParseData::makePatternParsers() +{ + for ( PatternList::Iter pat = patternList; pat.lte(); pat++ ) { + /* We assume the reduction action compilation phase was run before + * pattern parsing and it decorated the pattern with the target type. */ + assert( pat->langEl != 0 ); + if ( pat->langEl->type != KlangEl::NonTerm ) + error(pat->loc) << "pattern type is not a non-terminal" << endp; + + /* Make a parser for the language element. */ + makeParser( pat->langEl ); + } + + for ( ReplList::Iter repl = replList; repl.lte(); repl++ ) { + /* We assume the reduction action compilation phase was run before + * replacement parsing decorated the replacement with the target type. */ + assert( repl->langEl != 0 ); + + /* Make a parser for the language element. */ + makeParser( repl->langEl ); + } +} + +void ParseData::parsePatterns() +{ + FsmRun fsmRun( runtimeData->fsmTables ); + Program program( false, runtimeData ); + + for ( ReplList::Iter repl = replList; repl.lte(); repl++ ) { + //cerr << "parsing replacement: " << repl->data << endl; + InputStreamRepl in( repl ); + fsmRun.attachInputStream( &in ); + + repl->pdaRun = new PdaRun( 0, &program, repl->langEl->pdaTables, &fsmRun, 0 ); + repl->pdaRun->run(); + + //#ifdef COLM_LOG_COMPILE + //xml_print_list( runtimeData, repl->pdaRun->stackTop, 0 ); + //#endif + } + + for ( PatternList::Iter pat = patternList; pat.lte(); pat++ ) { + //cerr << "parsing pattern: " << pat->data << endl; + InputStreamPattern in( pat ); + fsmRun.attachInputStream( &in ); + + pat->pdaRun = new PdaRun( 0, &program, pat->langEl->pdaTables, &fsmRun, 0 ); + pat->pdaRun->run(); + + //#ifdef COLM_LOG_COMPILE + //xml_print_list( runtimeData, pat->pdaRun->stackTop, 0 ); + //#endif + } + + fillInPatterns( &program ); +} + +void ParseData::verifyParseStopGrammar( KlangEl *langEl ) +{ + PdaGraph *pdaGraph = langEl->pdaGraph; + + /* Get the entry into the graph and traverse over the root. The resulting + * state can have eof, nothing else can. */ + PdaState *overStart = pdaGraph->followFsm( + pdaGraph->startState, + langEl->rootDef->fsm ); + + /* The graph must reduce to root all on it's own. It cannot depend on + * require EOF. */ + for ( PdaStateList::Iter st = pdaGraph->stateList; st.lte(); st++ ) { + if ( st == overStart ) + continue; + + for ( TransMap::Iter tr = st->transMap; tr.lte(); tr++ ) { + if ( tr->value->lowKey == eofKlangEl->id ) { + /* This needs a better error message. Appears to be voodoo. */ + error() << "grammar is not usable with parse_stop" << endp; + } + } + } +} + +void ParseData::resolveUses() +{ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + if ( lel->objectDefUses != 0 ) { + /* Look for the production's associated region. */ + Namespace *nspace = lel->objectDefUsesQual->getQual( this ); + + if ( nspace == 0 ) + error() << "do not have namespace for resolving reference" << endp; + + /* Look up the language element in the region. */ + KlangEl *langEl = getKlangEl( this, nspace, lel->objectDefUses, KlangEl::Unknown ); + lel->objectDef = langEl->objectDef; + } + } +} + +void ParseData::semanticAnalysis() +{ + beginProcessing(); + initKeyOps(); + + /* Resolve uses statements. */ + resolveUses(); + + /* Init the longest match data and create the default scanner which will + * return single characters for us when we have no other scanner */ + initLongestMatchData(); + createDefaultScanner(); + + /* This needs to happen before the scanner is built. */ + resolveProductionEls(); + + /* Resolve pattern and replacement elements. */ + resolvePatternEls(); + resolveReplacementEls(); + + /* Fill any empty scanners with a default token. */ + initEmptyScanners(); + + FsmGraph *fsmGraph = makeScanner(); + + #ifdef COLM_LOG_COMPILE + printNameTree( fsmGraph->rootName ); + printNameIndex( fsmGraph->nameIndex ); + #endif + + prepGrammar(); + + /* Compile bytecode. */ + compileByteCode(); + + /* Make the reduced fsm. */ + RedFsmBuild reduce( sectionName, this, fsmGraph ); + RedFsm *redFsm = reduce.reduceMachine(); + + /* Build the parsers used for patterns and replacements. */ + makePatternParsers(); + + /* Make parsers that we need. */ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + if ( lel->parserId >= 0 ) { + makeParser( lel ); + + if ( lel->parseStop ) + verifyParseStopGrammar( lel ); + } + } + + /* Make the scanner tables. */ + fsmTables = redFsm->makeFsmTables(); + + /* Now that all parsers are built, make the global runtimeData. */ + makeRuntimeData(); + + /* + * All compilation is now complete. + */ + + /* Parse patterns and replacements. */ + parsePatterns(); + + /* + * Write output. + */ + + openOutput(); + + FsmCodeGen *fsmGen = new FsmCodeGen("", sectionName, + *outStream, redFsm, fsmTables ); + + PdaCodeGen *pdaGen = new PdaCodeGen( outputFileName, "parser", this, *outStream ); + + pdaGen->writeFirst(); + fsmGen->writeCode(); + + /* Make parsers that we need. */ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + if ( lel->parserId >= 0 ) + pdaGen->writeParserData( lel->parserId, lel->pdaTables ); + } + + /* Write the runtime data. */ + pdaGen->writeRuntimeData( runtimeData ); + + outStream->flush(); +} diff --git a/colm/parsedata.h b/colm/parsedata.h new file mode 100644 index 00000000..801956f0 --- /dev/null +++ b/colm/parsedata.h @@ -0,0 +1,904 @@ +/* + * Copyright 2001-2006 Adrian Thurston + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _PARSEDATA_H +#define _PARSEDATA_H + +#include +#include +#include "bstset.h" +#include "colm.h" +#include "avlmap.h" +#include "avlset.h" +#include "bstmap.h" +#include "vector.h" +#include "dlist.h" +#include "dlistmel.h" +#include "fsmgraph.h" +#include "compare.h" +#include "vector.h" +#include "common.h" +#include "parsetree.h" +#include "astring.h" +#include "pdagraph.h" +#include "compare.h" +#include "pdarun.h" +#include "bytecode.h" + +using std::ostream; + +/* Forwards. */ +struct RedFsm; +struct KlangEl; +struct ParseData; +struct PdaCodeGen; +struct FsmCodeGen; + +#define SHIFT_CODE 0x1 +#define REDUCE_CODE 0x2 +#define SHIFT_REDUCE_CODE 0x3 + +inline long makeReduceCode( long reduction, bool isShiftReduce ) +{ + return ( isShiftReduce ? SHIFT_REDUCE_CODE : REDUCE_CODE ) | + ( reduction << 2 ); +} + +struct PdaFactor; +struct ProdElList; +struct PdaLiteral; +struct Definition; + +struct DefListEl { Definition *prev, *next; }; +struct LelDefListEl { Definition *prev, *next; }; +typedef Vector< KlangEl* > KlangElVect; +typedef Vector< PdaFactor* > FactorVect; + +typedef AvlMap StringMap; +typedef AvlMapEl StringMapEl; + +/* Graph dictionary. */ +struct Definition +: + public DefListEl, public LelDefListEl +{ + enum Type { Production }; + + Definition( const InputLoc &loc, KlangEl *prodName, ProdElList *prodElList, + bool prodCommit, CodeBlock *redBlock, int prodId, Type type ) : + loc(loc), prodName(prodName), prodElList(prodElList), + prodCommit(prodCommit), redBlock(redBlock), prodId(prodId), + type(type), fsm(0), fsmLength(0), uniqueEmptyLeader(0), + isLeftRec(false), localFrame(0), lhsField(0) {} + + InputLoc loc; + KlangEl *prodName; + ProdElList *prodElList; + bool prodCommit; + + CodeBlock *redBlock; + + int prodId; + Type type; + + PdaGraph *fsm; + int fsmLength; + String data; + LongSet reducesTo; + + KlangEl *uniqueEmptyLeader; + + ProdIdSet nonTermFirstSet; + AlphSet firstSet; + bool isLeftRec; + + ObjectDef *localFrame; + ObjField *lhsField; +}; + +struct CmpDefById +{ + static int compare( Definition *d1, Definition *d2 ) + { + if ( d1->prodId < d2->prodId ) + return -1; + else if ( d1->prodId > d2->prodId ) + return 1; + else + return 0; + } +}; + + +/* Map dotItems to productions. */ +typedef BstMap< int, Definition*, CmpOrd > DotItemIndex; +typedef BstMapEl< int, Definition*> DotItemIndexEl; + +/* A vector of production vectors. Each non terminal can have many productions. */ +typedef DListMel DefList; +typedef DListMel LelDefList; + +/* A set of machines made during a closure round. */ +typedef Vector< PdaGraph* > Machines; + +/* List of language elements. */ +typedef DList LelList; + +typedef Vector< TokenDef* > TokenDefVect; + +struct UniqueType; + +typedef Vector KlangElVect; + +/* A language element class. Can be a nonTerm or a term. */ +struct KlangEl : public DListEl +{ + enum Type { Unknown, Term, NonTerm }; + + KlangEl( Namespace *nspace, const String &name, Type type ); + ~KlangEl(); + + /* The region the language element was defined in. */ + Namespace *nspace; + + String name; + String lit; + + String fullName; + String fullLit; + + Type type; + long id; + bool isUserTerm; + bool isContext; + String displayString; + long numAppearances; + bool commit; + bool ignore; + bool reduceFirst; + bool isLiteral; + bool isRepeat; + bool isOpt; + bool parseStop; + + /* Productions from the language element if it is a non-terminal. */ + LelDefList defList; + + TokenDef *tokenDef; + Definition *rootDef; + KlangEl *termDup; + + PdaGraph *pdaGraph; + PdaTables *pdaTables; + + CodeBlock *transBlock; + + ObjectDef *objectDef; + NamespaceQual *objectDefUsesQual; + String objectDefUses; + + long thisSize; + long ofiOffset; + + GenericType *generic; + + + long parserId; +}; + +struct PdaFactor +{ + /* Language elements a factor node can be. */ + enum Type { + LiteralType, + ReferenceType + }; + + /* Construct with a literal fsm. */ + PdaFactor( const InputLoc &loc, bool commit, NamespaceQual *nspaceQual, + PdaLiteral *literal, int priorVal, bool opt, bool repeat ) : + loc(loc), commit(commit), nspaceQual(nspaceQual), + literal(literal), langEl(0), priorVal(priorVal), opt(opt), repeat(repeat), + nspace(0), type(LiteralType), objField(0) {} + + /* Construct with a reference to a var def. */ + PdaFactor( const InputLoc &loc, bool commit, NamespaceQual *nspaceQual, + const String &refName, int priorVal, bool opt, bool repeat ) : + loc(loc), commit(commit), nspaceQual(nspaceQual), refName(refName), + literal(0), langEl(0), priorVal(priorVal), opt(opt), repeat(repeat), + nspace(0), type(ReferenceType), objField(0) {} + + PdaFactor( const InputLoc &loc, KlangEl *langEl ) : + loc(loc), commit(false), nspaceQual(0), literal(0), langEl(langEl), + priorVal(0), opt(false), repeat(false), nspace(0), type(ReferenceType), objField(0) {} + + PdaFactor() : + commit(false), nspaceQual(0), + literal(0), langEl(0), priorVal(0), opt(false), repeat(false), + nspace(0), type(LiteralType), objField(0) {} + + InputLoc loc; + bool commit; + NamespaceQual *nspaceQual; + String refName; + PdaLiteral *literal; + KlangEl *langEl; + int priorVal; + bool opt; + bool repeat; + Namespace *nspace; + Type type; + ObjField *objField; + + PdaFactor *prev, *next; +}; + +struct ProdElList : public DList +{ + PdaGraph *walk( ParseData *pd ); +}; + +/* Some literal machine. Can be a number or literal string. */ +struct PdaLiteral +{ + PdaLiteral( const InputLoc &loc, const Token &token ) + : loc(loc), token(token), value(0) { } + + InputLoc loc; + Token token; + long value; +}; + +/* Forwards. */ +using std::ostream; + +/* Nodes in the tree that use this action. */ +typedef Vector ActionRefs; + +/* Element in list of actions. Contains the string for the code to exectute. */ +struct Action +: + public DListEl, + public AvlTreeEl +{ +public: + + Action( const InputLoc &loc, const String &name, InlineList *inlineList ) + : + loc(loc), + name(name), + objField(0), + inlineList(inlineList), + actionId(-1), + numTransRefs(0), + numToStateRefs(0), + numFromStateRefs(0), + numEofRefs(0), + numCondRefs(0), + anyCall(false), + isLmAction(false) + { + } + + Action( MarkType markType, ObjField *objField ) + : + name("mark"), + markType(markType), + objField(objField), + inlineList(new InlineList), + actionId(-1), + numTransRefs(0), + numToStateRefs(0), + numFromStateRefs(0), + numEofRefs(0), + numCondRefs(0), + anyCall(false), + isLmAction(false) + { + } + + /* Key for action dictionary. */ + const String &getKey() const { return name; } + + /* Data collected during parse. */ + InputLoc loc; + String name; + + MarkType markType; + ObjField *objField; + + InlineList *inlineList; + int actionId; + + void actionName( ostream &out ) + { + if ( name != 0 ) + out << name; + else + out << loc.line << ":" << loc.col; + } + + /* Places in the input text that reference the action. */ + ActionRefs actionRefs; + + /* Number of references in the final machine. */ + bool numRefs() + { return numTransRefs + numToStateRefs + numFromStateRefs + numEofRefs; } + int numTransRefs; + int numToStateRefs; + int numFromStateRefs; + int numEofRefs; + int numCondRefs; + bool anyCall; + + bool isLmAction; +}; + +/* A list of actions. */ +typedef DList ActionList; +typedef AvlTree ActionDict; + +struct VarDef; +struct Join; +struct Expression; +struct Term; +struct FactorWithAug; +struct FactorWithLabel; +struct FactorWithRep; +struct FactorWithNeg; +struct Factor; +struct Literal; +struct Range; +struct RegExpr; +struct ReItem; +struct ReOrBlock; +struct ReOrItem; +struct TokenRegion; + +/* Priority name dictionary. */ +typedef AvlMapEl PriorDictEl; +typedef AvlMap PriorDict; + +/* Local error name dictionary. */ +typedef AvlMapEl LocalErrDictEl; +typedef AvlMap LocalErrDict; + +/* Tree of instantiated names. */ +typedef BstMapEl NameMapEl; +typedef BstMap NameMap; +typedef Vector NameVect; +typedef BstSet NameSet; + +/* Node in the tree of instantiated names. */ +struct NameInst +{ + NameInst( const InputLoc &loc, NameInst *parent, const String &name, + int id, bool isLabel ) : + loc(loc), parent(parent), name(name), id(id), isLabel(isLabel), + isLongestMatch(false), numRefs(0), numUses(0), start(0), final(0) {} + + InputLoc loc; + + /* Keep parent pointers in the name tree to retrieve + * fully qulified names. */ + NameInst *parent; + + String name; + int id; + bool isLabel; + bool isLongestMatch; + + int numRefs; + int numUses; + + /* Names underneath us, excludes anonymous names. */ + NameMap children; + + /* All names underneath us in order of appearance. */ + NameVect childVect; + + /* Join scopes need an implicit "final" target. */ + NameInst *start, *final; + + /* During a fsm generation walk, lists the names that are referenced by + * epsilon operations in the current scope. After the link is made by the + * epsilon reference and the join operation is complete, the label can + * have its refcount decremented. Once there are no more references the + * entry point can be removed from the fsm returned. */ + NameVect referencedNames; + + /* Pointers for the name search queue. */ + NameInst *prev, *next; + + /* Check if this name inst or any name inst below is referenced. */ + bool anyRefsRec(); +}; + +typedef DList NameInstList; + +/* Stack frame used in walking the name tree. */ +struct NameFrame +{ + NameInst *prevNameInst; + int prevNameChild; + NameInst *prevLocalScope; +}; + +/* Class to collect information about the machine during the + * parse of input. */ +struct ParseData +{ + /* Create a new parse data object. This is done at the beginning of every + * fsm specification. */ + ParseData( const String &fileName, const String §ionName, + const InputLoc §ionLoc, ostream &out ); + ~ParseData(); + + /* + * Setting up the graph dict. + */ + + void compileLiteralTokens(); + void initEmptyScanners(); + void initUniqueTypes(); + + /* Initialize a graph dict with the basic fsms. */ + void initGraphDict(); + void createBuiltin( const char *name, BuiltinMachine builtin ); + + /* Make a name id in the current name instantiation scope if it is not + * already there. */ + NameInst *addNameInst( const InputLoc &loc, char *data, bool isLabel ); + NameInst *makeJoinNameTree( Join *join ); + NameInst *makeNameTree( ); + void fillNameIndex( NameInst **nameIndex, NameInst *from ); + NameInst **makeNameIndex( NameInst *rootName ); + + void printNameTree( NameInst *rootName ); + void printNameIndex( NameInst **nameIndex ); + + /* Increments the usage count on entry names. Names that are no longer + * needed will have their entry points unset. */ + void unsetObsoleteEntries( FsmGraph *graph ); + + /* Resove name references in action code and epsilon transitions. */ + NameSet resolvePart( NameInst *refFrom, const char *data, bool recLabelsOnly ); + void resolveFrom( NameSet &result, NameInst *refFrom, + const NameRef &nameRef, int namePos ); + void referenceRegions( NameInst *root ); + + /* Set the alphabet type. If type types are not valid returns false. */ + bool setAlphType( char *s1, char *s2 ); + bool setAlphType( char *s1 ); + + /* Unique actions. */ + void removeDups( ActionTable &actionTable ); + void removeActionDups( FsmGraph *graph ); + + /* Dumping the name instantiation tree. */ + void printNameInst( NameInst *nameInst, int level ); + + /* Make the graph from a graph dict node. Does minimization. */ + void finishGraphBuild( FsmGraph *graph ); + FsmGraph *makeJoin( Join *join ); + FsmGraph *makeAllRegions(); + FsmGraph *makeFsmGraph( Join *join ); + FsmGraph *makeScanner() + { return makeFsmGraph(0); } + + void analyzeAction( Action *action, InlineList *inlineList ); + void analyzeGraph( FsmGraph *graph ); + + void initKeyOps(); + + /* + * Data collected during the parse. + */ + + /* The list of instances. */ + GraphList instanceList; + + /* Dictionary of actions. Lets actions be defined and then referenced. */ + ActionDict actionDict; + + /* Dictionary of named priorities. */ + PriorDict priorDict; + + /* Dictionary of named local errors. */ + LocalErrDict localErrDict; + + /* List of actions. Will be pasted into a switch statement. */ + ActionList actionList; + + /* The id of the next priority name and label. */ + int nextPriorKey, nextLocalErrKey, nextNameId; + + /* The default priority number key for a machine. This is active during + * the parse of the rhs of a machine assignment. */ + int curDefPriorKey; + + int curDefLocalErrKey; + + /* Alphabet type. */ + HostType *userAlphType; + bool alphTypeSet; + + /* Element type and get key expression. */ + InlineList *getKeyExpr; + InlineList *accessExpr; + InlineList *curStateExpr; + + /* The alphabet range. */ + char *lowerNum, *upperNum; + Key lowKey, highKey; + InputLoc rangeLowLoc, rangeHighLoc; + + /* The name of the file the fsm is from, and the spec name. */ + String fileName; + String sectionName; + InputLoc sectionLoc; + + /* Number of errors encountered parsing the fsm spec. */ + int errorCount; + + /* Counting the action and priority ordering. */ + int curActionOrd; + int curPriorOrd; + + /* Root of the name tree. */ + NameInst *curNameInst; + int curNameChild; + + /* The place where resolved epsilon transitions go. These cannot go into + * the parse tree because a single epsilon op can resolve more than once + * to different nameInsts if the machine it's in is used more than once. */ + NameVect epsilonResolvedLinks; + int nextEpsilonResolvedLink; + + /* Root of the name tree used for doing local name searches. */ + NameInst *localNameScope; + + void setLmInRetLoc( InlineList *inlineList ); + void initLongestMatchData(); + void initNameWalk( NameInst *rootName ); + NameInst *nextNameScope() { return curNameInst->childVect[curNameChild]; } + NameFrame enterNameScope( bool isLocal, int numScopes ); + void popNameScope( const NameFrame &frame ); + void resetNameScope( const NameFrame &frame ); + + /* Counter for assigning ids to longest match items. */ + int nextTokenId; + + /* List of all longest match parse tree items. */ + RegionList regionList; + + NamespaceList namespaceList; + + Action *newAction( const String &name, InlineList *inlineList ); + + Action *setTokStart; + int setTokStartOrd; + + Action *initActId; + int initActIdOrd; + + Action *setTokEnd; + int setTokEndOrd; + + CodeBlock *rootCodeBlock; + + void beginProcessing() + { + ::condData = &thisCondData; + ::keyOps = &thisKeyOps; + } + + CondData thisCondData; + KeyOps thisKeyOps; + + /* CONTEXT FREE */ + void wrapNonTerminals(); + void makeDefinitionNames(); + void noUndefindKlangEls(); + void makeKlangElIds(); + void makeKlangElNames(); + + /* Parser generation. */ + void advanceReductions( PdaGraph *pdaGraph ); + void sortActions( PdaGraph *pdaGraph ); + void addDupTerms( PdaGraph *pdaGraph ); + void linkExpansions( PdaGraph *pdaGraph ); + void lalr1FollowEpsilonOp( PdaGraph *pdaGraph ); + + void transferCommits( PdaGraph *pdaGraph, PdaTrans *trans, PdaState *state, long prodId ); + + void lalr1AddFollow2( PdaGraph *pdaGraph, PdaTrans *trans, FollowToAdd &followKeys ); + void lalr1AddFollow1( PdaGraph *pdaGraph, PdaState *state ); + + void lalr1AddFollow2( PdaGraph *pdaGraph, PdaTrans *trans, long followKey, long prior ); + void lalr1AddFollow1( PdaGraph *pdaGraph, PdaTrans *trans ); + + void lalr1AddFollowSets( PdaGraph *pdaGraph, KlangEl *rootEl ); + + void lr0BringInItem( PdaGraph *pdaGraph, PdaState *dest, PdaState *prodState, + PdaTrans *expandFrom, Definition *prod ); + void lr0InvokeClosure( PdaGraph *pdaGraph, PdaState *state ); + void lr0CloseAllStates( PdaGraph *pdaGraph ); + + void lalr1GenerateParser( PdaGraph *pdaGraph, KlangEl *rootEl ); + + void reduceActions( PdaGraph *pdaGraph ); + + bool makeNonTermFirstSetProd( Definition *prod, PdaState *state ); + void makeNonTermFirstSets(); + + bool makeFirstSetProd( Definition *prod, PdaState *state ); + void makeFirstSets(); + + void trySetTime( PdaTrans *trans, long code, long &time ); + void addRegion( PdaState *tabState, long pdaKey ); + PdaState *followProd( PdaState *tabState, PdaState *prodState ); + void findFollow( AlphSet &result, PdaState *overTab, + PdaState *overSrc, Definition *parentDef ); + void pdaActionOrder( PdaGraph *pdaGraph, KlangEl *rootEl ); + void pdaOrderFollow( KlangEl *rootEl, PdaState *tabState, + PdaTrans *tabTrans, PdaTrans *srcTrans, + Definition *parentDef, Definition *definition, long &time ); + void pdaOrderProd( KlangEl *rootEl, PdaState *tabState, + PdaState *srcState, Definition *parentDef, long &time ); + void analyzeMachine( PdaGraph *pdaGraph, KlangEl *rootEl ); + + void makeProdFsms(); + void insertUniqueEmptyProductions(); + void printNonTermFirstSets(); + void printFirstSets(); + + void resolveLiteralFactor( PdaFactor *fact ); + void resolveReferenceFactor( PdaFactor *fact ); + void resolveFactor( PdaFactor *fact ); + void resolveProductionEls(); + void resolvePatternEls(); + void resolveReplacementEls(); + + void addMatchText( ObjectDef *frame, KlangEl *lel ); + void addMatchLength( ObjectDef *frame, KlangEl *lel ); + void addTransTokVar( ObjectDef *frame, KlangEl *lel ); + void addProdRHSLoads( Definition *prod, long pos ); + void addProdRHSVars( ObjectDef *localFrame, ProdElList *prodElList ); + void addProdRedObjectVar( ObjectDef *localFrame, KlangEl *langEl ); + void addProdObjects(); + + void prepGrammar(); + + void makePatternParsers(); + void parsePatterns(); + + void makeParser( KlangEl *rootEl ); + PdaGraph *makePdaGraph( KlangEl *rootEl ); + PdaTables *makePdaTables( PdaGraph *pdaGraph ); + + void fillInPatterns( Program *prg ); + void makeRuntimeData(); + + /* Generate and write out the fsm. */ + void generateGraphviz(); + + void verifyParseStopGrammar( KlangEl *langEl ); + + void initFieldInstructions( ObjField *el ); + void initLocalInstructions( ObjField *el ); + void initLocalRefInstructions( ObjField *el ); + + void initMapFunctions( GenericType *gen ); + void initListField( GenericType *gen, const char *name, int offset ); + void initListFields( GenericType *gen ); + void initListFunctions( GenericType *gen ); + void initVectorFunctions( GenericType *gen ); + + void addStdin(); + void addStdout(); + void addStderr(); + void initGlobalFunctions(); + void makeDefaultIterators(); + void addLengthField( ObjectDef *objDef, Code getLength ); + ObjectDef *findObject( const String &name ); + void initAllLanguageObjects(); + void resolveListElementOf( ObjectDef *container, ObjectDef *obj, ElementOf *elof ); + void resolveMapElementOf( ObjectDef *container, ObjectDef *obj, ElementOf *elof ); + void resolveElementOf( ObjectDef *obj ); + void makeFuncVisible( Function *func, bool isUserIter ); + void compileFunction( Function *func ); + void compileUserIter( Function *func ); + void compilePreEof( TokenRegion *region ); + void compileRootBlock(); + void compileTranslateBlock( KlangEl *langEl ); + void findLocalTrees( CharSet &trees ); + void compileReductionCode( Definition *prod ); + void resolveGenericTypes(); + void compileByteCode(); + + void resolveUses(); + void createDefaultScanner(); + void semanticAnalysis(); + + /* + * Data collected during the parse. + */ + + /* Dictionary of graphs. Both instances and non-instances go here. */ + LelList langEls; + + /* The list of instances. */ + DefList prodList; + + /* Dumping. */ + DotItemIndex dotItemIndex; + + /* The name of the file the fsm is from, and the spec name. */ + // EXISTS IN RL: char *fileName; + String parserName; + ostream &out; + // EXISTS IN RL: InputLoc sectionLoc; + + /* How to access the instance data. */ + String access; + + /* The name of the token structure. */ + String tokenStruct; + + GenericType *anyList; + GenericType *anyMap; + GenericType *anyVector; + + KlangEl *ptrKlangEl; + KlangEl *boolKlangEl; + KlangEl *intKlangEl; + KlangEl *strKlangEl; + KlangEl *streamKlangEl; + KlangEl *anyKlangEl; + KlangEl *rootKlangEl; + KlangEl *eofKlangEl; + KlangEl *errorKlangEl; + KlangEl *defaultCharKlangEl; + + TokenRegion *rootRegion; + TokenRegion *defaultRegion; + TokenRegion *eofTokenRegion; + + Namespace *defaultNamespace; + Namespace *rootNamespace; + + int nextSymbolId; + int firstNonTermId; + + KlangEl **langElIndex; + PdaState *actionDestState; + DefSetSet prodSetSet; + + Definition **prodIdIndex; + AlphSet literalSet; + + PatternList patternList; + ReplList replList; + + ObjectDef *globalObjectDef; + + VectorTypeIdMap vectorTypeIdMap; + ObjectDef *curLocalFrame; + + UniqueType *findUniqueType( int typeId ); + UniqueType *findUniqueType( int typeId, KlangEl *langEl ); + UniqueType *findUniqueType( int typeId, IterDef *iterDef ); + + UniqueType *uniqueTypeNil; + UniqueType *uniqueTypePtr; + UniqueType *uniqueTypeBool; + UniqueType *uniqueTypeInt; + UniqueType *uniqueTypeStr; + UniqueType *uniqueTypeStream; + UniqueType *uniqueTypeAny; + + UniqueTypeMap uniqeTypeMap; + + void initStrObject(); + void initStreamObject(); + void initIntObject(); + void initTokenObjects(); + + ObjectDef *intObj; + ObjectDef *strObj; + ObjectDef *streamObj; + ObjectDef *tokenObj; + + FsmTables *fsmTables; + RuntimeData *runtimeData; + + int nextPatReplId; + int nextGenericId; + + FunctionList functionList; + int nextFuncId; + + enum CompileContext { + CompileTranslation, + CompileReduction, + CompileFunction, + CompileRoot + }; + + CompileContext compileContext; + LongVect returnJumps; + LongVect breakJumps; + Function *curFunction; + + /* Loops fill this in for return statements to use. */ + CodeVect *loopCleanup; + + ObjField *makeDataEl(); + ObjField *makePosEl(); + + IterDef *findIterDef( IterDef::Type type, GenericType *generic ); + IterDef *findIterDef( IterDef::Type type, Function *func ); + IterDef *findIterDef( IterDef::Type type ); + IterDefSet iterDefSet; + + enum GeneratesType { GenToken, GenIgnore, GenCfl }; + + int nextObjectId; + GeneratesType generatesType; + bool generatesIgnore; + bool insideRegion; + String tokenDefName; + + StringMap literalStrings; + + long nextFrameId; + long nextParserId; + + ObjectDef *rootLocalFrame; + + long nextLabelId; + ObjFieldMap *objFieldMap; +}; + +void afterOpMinimize( FsmGraph *fsm, bool lastInSeq = true ); +Key makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd ); +Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd ); +Key makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd ); +Key makeFsmKeyChar( char c, ParseData *pd ); +void makeFsmKeyArray( Key *result, char *data, int len, ParseData *pd ); +void makeFsmUniqueKeyArray( KeySet &result, char *data, int len, + bool caseInsensitive, ParseData *pd ); +FsmGraph *makeBuiltin( BuiltinMachine builtin, ParseData *pd ); +FsmGraph *dotFsm( ParseData *pd ); +FsmGraph *dotStarFsm( ParseData *pd ); + +void errorStateLabels( const NameSet &locations ); + +struct Parser; + +typedef AvlMap ParserDict; +typedef AvlMapEl ParserDictEl; + +KlangEl *getKlangEl( ParseData *pd, Namespace *nspace, + const String &data, KlangEl::Type defType ); + +#endif /* _PARSEDATA_H */ diff --git a/colm/parsetree.cpp b/colm/parsetree.cpp new file mode 100644 index 00000000..86284867 --- /dev/null +++ b/colm/parsetree.cpp @@ -0,0 +1,2172 @@ +/* + * Copyright 2001-2006 Adrian Thurston + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include + +/* Parsing. */ +#include "lmparse.h" +#include "parsetree.h" + +using namespace std; +ostream &operator<<( ostream &out, const NameRef &nameRef ); +ostream &operator<<( ostream &out, const NameInst &nameInst ); +ostream &operator<<( ostream &out, const Token &token ); + +/* Convert the literal string which comes in from the scanner into an array of + * characters with escapes and options interpreted. Also null terminates the + * string. Though this null termination should not be relied on for + * interpreting literals in the parser because the string may contain a + * literal string with \0 */ +void prepareLitString( String &result, bool &caseInsensitive, + const String &srcString, const InputLoc &loc ) +{ + result.setAs( String::Fresh(), srcString.length() ); + caseInsensitive = false; + + char *src = srcString.data + 1; + char *end = srcString.data + srcString.length() - 1; + + while ( *end != '\'' && *end != '\"' ) { + if ( *end == 'i' ) + caseInsensitive = true; + else { + error( loc ) << "literal string '" << *end << + "' option not supported" << endl; + } + end -= 1; + } + + char *dest = result.data; + int len = 0; + while ( src != end ) { + if ( *src == '\\' ) { + switch ( src[1] ) { + case '0': dest[len++] = '\0'; break; + case 'a': dest[len++] = '\a'; break; + case 'b': dest[len++] = '\b'; break; + case 't': dest[len++] = '\t'; break; + case 'n': dest[len++] = '\n'; break; + case 'v': dest[len++] = '\v'; break; + case 'f': dest[len++] = '\f'; break; + case 'r': dest[len++] = '\r'; break; + case '\n': break; + default: dest[len++] = src[1]; break; + } + src += 2; + } + else { + dest[len++] = *src++; + } + } + + result.chop( len ); +} + +int CmpUniqueType::compare( const UniqueType &ut1, const UniqueType &ut2 ) +{ + if ( ut1.typeId < ut2.typeId ) + return -1; + else if ( ut1.typeId > ut2.typeId ) + return 1; + else if ( ut1.typeId == TYPE_TREE || + ut1.typeId == TYPE_PTR || + ut1.typeId == TYPE_REF ) + { + if ( ut1.langEl < ut2.langEl ) + return -1; + else if ( ut1.langEl > ut2.langEl ) + return 1; + } + else if ( ut1.typeId == TYPE_ITER ) { + if ( ut1.iterDef < ut2.iterDef ) + return -1; + else if ( ut1.iterDef > ut2.iterDef ) + return 1; + } + else { + /* Fail on anything unimplemented. */ + assert( false ); + } + + return 0; +} + + +FsmGraph *VarDef::walk( ParseData *pd ) +{ + /* We enter into a new name scope. */ + NameFrame nameFrame = pd->enterNameScope( true, 1 ); + + /* Recurse on the expression. */ + FsmGraph *rtnVal = joinOrLm->walk( pd ); + + /* Do the tranfer of local error actions. */ + LocalErrDictEl *localErrDictEl = pd->localErrDict.find( name ); + if ( localErrDictEl != 0 ) { + for ( StateList::Iter state = rtnVal->stateList; state.lte(); state++ ) + rtnVal->transferErrorActions( state, localErrDictEl->value ); + } + + /* If the expression below is a join operation with multiple expressions + * then it just had epsilon transisions resolved. If it is a join + * with only a single expression then run the epsilon op now. */ + if ( joinOrLm->type == JoinOrLm::JoinType && joinOrLm->join->exprList.length() == 1 ) + rtnVal->epsilonOp(); + + /* We can now unset entry points that are not longer used. */ + pd->unsetObsoleteEntries( rtnVal ); + + /* If the name of the variable is referenced then add the entry point to + * the graph. */ + if ( pd->curNameInst->numRefs > 0 ) + rtnVal->setEntry( pd->curNameInst->id, rtnVal->startState ); + + /* Pop the name scope. */ + pd->popNameScope( nameFrame ); + return rtnVal; +} + +void VarDef::makeNameTree( const InputLoc &loc, ParseData *pd ) +{ + /* The variable definition enters a new scope. */ + NameInst *prevNameInst = pd->curNameInst; + pd->curNameInst = pd->addNameInst( loc, name, false ); + + if ( joinOrLm->type == JoinOrLm::LongestMatchType ) + pd->curNameInst->isLongestMatch = true; + + /* Recurse. */ + joinOrLm->makeNameTree( pd ); + + /* The name scope ends, pop the name instantiation. */ + pd->curNameInst = prevNameInst; +} + +void VarDef::resolveNameRefs( ParseData *pd ) +{ + /* Entering into a new scope. */ + NameFrame nameFrame = pd->enterNameScope( true, 1 ); + + /* Recurse. */ + joinOrLm->resolveNameRefs( pd ); + + /* The name scope ends, pop the name instantiation. */ + pd->popNameScope( nameFrame ); +} + +InputLoc TokenDef::getLoc() +{ + return action != 0 ? action->loc : semiLoc; +} + +/* + * If there are any LMs then all of the following entry points must reset + * tokstart: + * + * 1. fentry(StateRef) + * 2. ftoto(StateRef), fcall(StateRef), fnext(StateRef) + * 3. targt of any transition that has an fcall (the return loc). + * 4. start state of all longest match routines. + */ + +Action *TokenRegion::newAction( ParseData *pd, const InputLoc &loc, + const String &name, InlineList *inlineList ) +{ + Action *action = new Action( loc, name, inlineList ); + pd->actionList.append( action ); + action->isLmAction = true; + return action; +} + +void TokenRegion::makeActions( ParseData *pd ) +{ + /* Make actions that set the action id. */ + for ( TokenDefList::Iter lmi = tokenDefList; lmi.lte(); lmi++ ) { + /* For each part create actions for setting the match type. We need + * to do this so that the actions will go into the actionIndex. */ + InlineList *inlineList = new InlineList; + inlineList->append( new InlineItem( lmi->getLoc(), this, lmi, + InlineItem::LmSetActId ) ); + char *actName = new char[50]; + sprintf( actName, "store%i", lmi->longestMatchId ); + lmi->setActId = newAction( pd, lmi->getLoc(), actName, inlineList ); + } + + /* Make actions that execute the user action and restart on the last character. */ + for ( TokenDefList::Iter lmi = tokenDefList; lmi.lte(); lmi++ ) { + /* For each part create actions for setting the match type. We need + * to do this so that the actions will go into the actionIndex. */ + InlineList *inlineList = new InlineList; + inlineList->append( new InlineItem( lmi->getLoc(), this, lmi, + InlineItem::LmOnLast ) ); + char *actName = new char[50]; + sprintf( actName, "imm%i", lmi->longestMatchId ); + lmi->actOnLast = newAction( pd, lmi->getLoc(), actName, inlineList ); + } + + /* Make actions that execute the user action and restart on the next + * character. These actions will set tokend themselves (it is the current + * char). */ + for ( TokenDefList::Iter lmi = tokenDefList; lmi.lte(); lmi++ ) { + /* For each part create actions for setting the match type. We need + * to do this so that the actions will go into the actionIndex. */ + InlineList *inlineList = new InlineList; + inlineList->append( new InlineItem( lmi->getLoc(), this, lmi, + InlineItem::LmOnNext ) ); + char *actName = new char[50]; + sprintf( actName, "lagh%i", lmi->longestMatchId ); + lmi->actOnNext = newAction( pd, lmi->getLoc(), actName, inlineList ); + } + + /* Make actions that execute the user action and restart at tokend. These + * actions execute some time after matching the last char. */ + for ( TokenDefList::Iter lmi = tokenDefList; lmi.lte(); lmi++ ) { + /* For each part create actions for setting the match type. We need + * to do this so that the actions will go into the actionIndex. */ + InlineList *inlineList = new InlineList; + inlineList->append( new InlineItem( lmi->getLoc(), this, lmi, + InlineItem::LmOnLagBehind ) ); + char *actName = new char[50]; + sprintf( actName, "lag%i", lmi->longestMatchId ); + lmi->actLagBehind = newAction( pd, lmi->getLoc(), actName, inlineList ); + } + + InputLoc loc; + loc.line = 1; + loc.col = 1; + + /* Create the error action. */ + InlineList *il6 = new InlineList; + il6->append( new InlineItem( loc, this, 0, InlineItem::LmSwitch ) ); + lmActSelect = newAction( pd, loc, "lagsel", il6 ); +} + +void TokenRegion::findName( ParseData *pd ) +{ + NameInst *nameInst = pd->curNameInst; + while ( nameInst->name == 0 ) { + nameInst = nameInst->parent; + /* Since every machine must must have a name, we should always find a + * name for the longest match. */ + assert( nameInst != 0 ); + } + name = nameInst->name; +} + +void TokenRegion::makeNameTree( ParseData *pd ) +{ + /* Create an anonymous scope for the longest match. Will be used for + * restarting machine after matching a token. */ + NameInst *prevNameInst = pd->curNameInst; + pd->curNameInst = pd->addNameInst( loc, 0, false ); + + /* Save off the name inst into the token region. This is only legal for + * token regions because they are only ever referenced once (near the root + * of the name tree). They cannot have more than one corresponding name + * inst. */ + assert( regionNameInst == 0 ); + regionNameInst = pd->curNameInst; + + /* Recurse into all parts of the longest match operator. */ + for ( TokenDefList::Iter td = tokenDefList; td.lte(); td++ ) { + /* Watch out for patternless tokens. */ + if ( td->join != 0 ) + td->join->makeNameTree( pd ); + } + + /* Traverse the name tree upwards to find a name for this lm. */ + findName( pd ); + + /* Also make the longest match's actions at this point. */ + makeActions( pd ); + + /* The name scope ends, pop the name instantiation. */ + pd->curNameInst = prevNameInst; +} + +void TokenRegion::resolveNameRefs( ParseData *pd ) +{ + /* The longest match gets its own name scope. */ + NameFrame nameFrame = pd->enterNameScope( true, 1 ); + + /* Take an action reference for each longest match item and recurse. */ + for ( TokenDefList::Iter lmi = tokenDefList; lmi.lte(); lmi++ ) { + /* Watch out for patternless tokens. */ + if ( lmi->join != 0 ) + lmi->join->resolveNameRefs( pd ); + } + + /* The name scope ends, pop the name instantiation. */ + pd->popNameScope( nameFrame ); +} + +void TokenRegion::restart( FsmGraph *graph, FsmTrans *trans ) +{ + FsmState *fromState = trans->fromState; + graph->detachTrans( fromState, trans->toState, trans ); + graph->attachTrans( fromState, graph->startState, trans ); +} + +void TokenRegion::runLongestMatch( ParseData *pd, FsmGraph *graph ) +{ + graph->markReachableFromHereStopFinal( graph->startState ); + for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { + if ( ms->stateBits & SB_ISMARKED ) { + ms->lmItemSet.insert( 0 ); + ms->stateBits &= ~ SB_ISMARKED; + } + } + + /* Transfer the first item of non-empty lmAction tables to the item sets + * of the states that follow. Exclude states that have no transitions out. + * This must happen on a separate pass so that on each iteration of the + * next pass we have the item set entries from all lmAction tables. */ + for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { + for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { + if ( trans->lmActionTable.length() > 0 ) { + LmActionTableEl *lmAct = trans->lmActionTable.data; + FsmState *toState = trans->toState; + assert( toState ); + + /* Check if there are transitions out, this may be a very + * close approximation? Out transitions going nowhere? + * FIXME: Check. */ + if ( toState->outList.length() > 0 ) { + /* Fill the item sets. */ + graph->markReachableFromHereStopFinal( toState ); + for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { + if ( ms->stateBits & SB_ISMARKED ) { + ms->lmItemSet.insert( lmAct->value ); + ms->stateBits &= ~ SB_ISMARKED; + } + } + } + } + } + } + + /* The lmItem sets are now filled, telling us which longest match rules + * can succeed in which states. First determine if we need to make sure + * act is defaulted to zero. */ + int maxItemSetLength = 0; + graph->markReachableFromHereStopFinal( graph->startState ); + for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { + if ( ms->stateBits & SB_ISMARKED ) { + if ( ms->lmItemSet.length() > maxItemSetLength ) + maxItemSetLength = ms->lmItemSet.length(); + ms->stateBits &= ~ SB_ISMARKED; + } + } + + /* The actions executed on starting to match a token. */ + graph->isolateStartState(); + graph->startState->fromStateActionTable.setAction( pd->setTokStartOrd, pd->setTokStart ); + if ( maxItemSetLength > 1 ) { + /* The longest match action switch may be called when tokens are + * matched, in which case act must be initialized, there must be a + * case to handle the error, and the generated machine will require an + * error state. */ + lmSwitchHandlesError = true; + graph->startState->toStateActionTable.setAction( pd->initActIdOrd, pd->initActId ); + } + + /* The place to store transitions to restart. It maybe possible for the + * restarting to affect the searching through the graph that follows. For + * now take the safe route and save the list of transitions to restart + * until after all searching is done. */ + Vector restartTrans; + + /* Set actions that do immediate token recognition, set the longest match part + * id and set the token ending. */ + for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { + for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { + if ( trans->lmActionTable.length() > 0 ) { + LmActionTableEl *lmAct = trans->lmActionTable.data; + FsmState *toState = trans->toState; + assert( toState ); + + /* Check if there are transitions out, this may be a very + * close approximation? Out transitions going nowhere? + * FIXME: Check. */ + if ( toState->outList.length() == 0 ) { + /* Can execute the immediate action for the longest match + * part. Redirect the action to the start state. */ + trans->actionTable.setAction( lmAct->key, + lmAct->value->actOnLast ); + restartTrans.append( trans ); + } + else { + /* Look for non final states that have a non-empty item + * set. If these are present then we need to record the + * end of the token. Also Find the highest item set + * length reachable from here (excluding at transtions to + * final states). */ + bool nonFinalNonEmptyItemSet = false; + maxItemSetLength = 0; + graph->markReachableFromHereStopFinal( toState ); + for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { + if ( ms->stateBits & SB_ISMARKED ) { + if ( ms->lmItemSet.length() > 0 && !ms->isFinState() ) + nonFinalNonEmptyItemSet = true; + if ( ms->lmItemSet.length() > maxItemSetLength ) + maxItemSetLength = ms->lmItemSet.length(); + ms->stateBits &= ~ SB_ISMARKED; + } + } + + /* If there are reachable states that are not final and + * have non empty item sets or that have an item set + * length greater than one then we need to set tokend + * because the error action that matches the token will + * require it. */ + if ( nonFinalNonEmptyItemSet || maxItemSetLength > 1 ) + trans->actionTable.setAction( pd->setTokEndOrd, pd->setTokEnd ); + + /* Some states may not know which longest match item to + * execute, must set it. */ + if ( maxItemSetLength > 1 ) { + /* There are transitions out, another match may come. */ + trans->actionTable.setAction( lmAct->key, + lmAct->value->setActId ); + } + } + } + } + } + + /* Now that all graph searching is done it certainly safe set the + * restarting. It may be safe above, however this must be verified. */ + for ( Vector::Iter pt = restartTrans; pt.lte(); pt++ ) + restart( graph, *pt ); + + int lmErrActionOrd = pd->curActionOrd++; + + /* Embed the error for recognizing a char. */ + for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { + if ( st->lmItemSet.length() == 1 && st->lmItemSet[0] != 0 ) { + if ( st->isFinState() ) { + /* On error execute the onActNext action, which knows that + * the last character of the token was one back and restart. */ + graph->setErrorTarget( st, graph->startState, &lmErrActionOrd, + &st->lmItemSet[0]->actOnNext, 1 ); + st->eofActionTable.setAction( lmErrActionOrd, + st->lmItemSet[0]->actOnNext ); + st->eofTarget = graph->startState; + } + else { + graph->setErrorTarget( st, graph->startState, &lmErrActionOrd, + &st->lmItemSet[0]->actLagBehind, 1 ); + st->eofActionTable.setAction( lmErrActionOrd, + st->lmItemSet[0]->actLagBehind ); + st->eofTarget = graph->startState; + } + } + else if ( st->lmItemSet.length() > 1 ) { + /* Need to use the select. Take note of the which items the select + * is needed for so only the necessary actions are included. */ + for ( LmItemSet::Iter plmi = st->lmItemSet; plmi.lte(); plmi++ ) { + if ( *plmi != 0 ) + (*plmi)->inLmSelect = true; + } + /* On error, execute the action select and go to the start state. */ + graph->setErrorTarget( st, graph->startState, &lmErrActionOrd, + &lmActSelect, 1 ); + st->eofActionTable.setAction( lmErrActionOrd, lmActSelect ); + st->eofTarget = graph->startState; + } + } + + /* Finally, the start state should be made final. */ + graph->setFinState( graph->startState ); +} + +void TokenRegion::transferScannerLeavingActions( FsmGraph *graph ) +{ + for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { + if ( st->outActionTable.length() > 0 ) + graph->setErrorActions( st, st->outActionTable ); + } +} + +FsmGraph *TokenRegion::walk( ParseData *pd ) +{ + /* The longest match has it's own name scope. */ + NameFrame nameFrame = pd->enterNameScope( true, 1 ); + + /* Make each part of the longest match. */ + int numParts = 0; + FsmGraph **parts = new FsmGraph*[tokenDefList.length()]; + for ( TokenDefList::Iter lmi = tokenDefList; lmi.lte(); lmi++ ) { + /* Watch out for patternless tokens. */ + if ( lmi->join != 0 ) { + /* Create the machine and embed the setting of the longest match id. */ + parts[numParts] = lmi->join->walk( pd ); + parts[numParts]->longMatchAction( pd->curActionOrd++, lmi ); + + /* Look for tokens that accept the zero length-word. The first one found + * will be used as the default token. */ + if ( defaultTokenDef == 0 && parts[numParts]->startState->isFinState() ) + defaultTokenDef = lmi; + + numParts += 1; + } + } + FsmGraph *retFsm = parts[0]; + + if ( defaultTokenDef != 0 && defaultTokenDef->token->ignore ) + error() << "ignore token cannot be a scanner's zero-length token" << endp; + + /* The region is empty. Return the empty set. */ + if ( numParts == 0 ) { + retFsm = new FsmGraph(); + retFsm->lambdaFsm(); + } + else { + /* Before we union the patterns we need to deal with leaving actions. They + * are transfered to error transitions out of the final states (like local + * error actions) and to eof actions. In the scanner we need to forbid + * on_last for any final state that has an leaving action. */ + for ( int i = 0; i < numParts; i++ ) + transferScannerLeavingActions( parts[i] ); + + /* Union machines one and up with machine zero. */ + FsmGraph *retFsm = parts[0]; + for ( int i = 1; i < numParts; i++ ) { + retFsm->unionOp( parts[i] ); + afterOpMinimize( retFsm ); + } + + runLongestMatch( pd, retFsm ); + delete[] parts; + } + + /* Pop the name scope. */ + pd->popNameScope( nameFrame ); + + return retFsm; +} + +FsmGraph *JoinOrLm::walk( ParseData *pd ) +{ + FsmGraph *rtnVal = 0; + switch ( type ) { + case JoinType: + rtnVal = join->walk( pd ); + break; + case LongestMatchType: + rtnVal = tokenRegion->walk( pd ); + break; + } + return rtnVal; +} + +void JoinOrLm::makeNameTree( ParseData *pd ) +{ + switch ( type ) { + case JoinType: + join->makeNameTree( pd ); + break; + case LongestMatchType: + tokenRegion->makeNameTree( pd ); + break; + } +} + +void JoinOrLm::resolveNameRefs( ParseData *pd ) +{ + switch ( type ) { + case JoinType: + join->resolveNameRefs( pd ); + break; + case LongestMatchType: + tokenRegion->resolveNameRefs( pd ); + break; + } +} + + +/* Construct with a location and the first expression. */ +Join::Join( const InputLoc &loc, Expression *expr ) +: + loc(loc) +{ + exprList.append( expr ); +} + +/* Construct with a location and the first expression. */ +Join::Join( Expression *expr ) +: + loc(loc) +{ + exprList.append( expr ); +} + +/* Walk an expression node. */ +FsmGraph *Join::walk( ParseData *pd ) +{ + if ( exprList.length() > 1 ) + return walkJoin( pd ); + else + return exprList.head->walk( pd ); +} + +/* There is a list of expressions to join. */ +FsmGraph *Join::walkJoin( ParseData *pd ) +{ + /* We enter into a new name scope. */ + NameFrame nameFrame = pd->enterNameScope( true, 1 ); + + /* Evaluate the machines. */ + FsmGraph **fsms = new FsmGraph*[exprList.length()]; + ExprList::Iter expr = exprList; + for ( int e = 0; e < exprList.length(); e++, expr++ ) + fsms[e] = expr->walk( pd ); + + /* Get the start and final names. Final is + * guaranteed to exist, start is not. */ + NameInst *startName = pd->curNameInst->start; + NameInst *finalName = pd->curNameInst->final; + + int startId = -1; + if ( startName != 0 ) { + /* Take note that there was an implicit link to the start machine. */ + pd->localNameScope->referencedNames.append( startName ); + startId = startName->id; + } + + /* A final id of -1 indicates there is no epsilon that references the + * final state, therefor do not create one or set an entry point to it. */ + int finalId = -1; + if ( finalName->numRefs > 0 ) + finalId = finalName->id; + + /* Join machines 1 and up onto machine 0. */ + FsmGraph *retFsm = fsms[0]; + retFsm->joinOp( startId, finalId, fsms+1, exprList.length()-1 ); + + /* We can now unset entry points that are not longer used. */ + pd->unsetObsoleteEntries( retFsm ); + + /* Pop the name scope. */ + pd->popNameScope( nameFrame ); + + delete[] fsms; + return retFsm; +} + +void Join::makeNameTree( ParseData *pd ) +{ + if ( exprList.length() > 1 ) { + /* Create the new anonymous scope. */ + NameInst *prevNameInst = pd->curNameInst; + pd->curNameInst = pd->addNameInst( loc, 0, false ); + + /* Join scopes need an implicit "final" target. */ + pd->curNameInst->final = new NameInst( InputLoc(), pd->curNameInst, "final", + pd->nextNameId++, false ); + + /* Recurse into all expressions in the list. */ + for ( ExprList::Iter expr = exprList; expr.lte(); expr++ ) + expr->makeNameTree( pd ); + + /* The name scope ends, pop the name instantiation. */ + pd->curNameInst = prevNameInst; + } + else { + /* Recurse into the single expression. */ + exprList.head->makeNameTree( pd ); + } +} + + +void Join::resolveNameRefs( ParseData *pd ) +{ + /* Branch on whether or not there is to be a join. */ + if ( exprList.length() > 1 ) { + /* The variable definition enters a new scope. */ + NameFrame nameFrame = pd->enterNameScope( true, 1 ); + + /* The join scope must contain a start label. */ + NameSet resolved = pd->resolvePart( pd->localNameScope, "start", true ); + if ( resolved.length() > 0 ) { + /* Take the first. */ + pd->curNameInst->start = resolved[0]; + if ( resolved.length() > 1 ) { + /* Complain about the multiple references. */ + error(loc) << "multiple start labels" << endl; + errorStateLabels( resolved ); + } + } + + /* Make sure there is a start label. */ + if ( pd->curNameInst->start != 0 ) { + /* There is an implicit reference to start name. */ + pd->curNameInst->start->numRefs += 1; + } + else { + /* No start label. Complain and recover by adding a label to the + * adding one. Recover ignoring the problem. */ + error(loc) << "no start label" << endl; + } + + /* Recurse into all expressions in the list. */ + for ( ExprList::Iter expr = exprList; expr.lte(); expr++ ) + expr->resolveNameRefs( pd ); + + /* The name scope ends, pop the name instantiation. */ + pd->popNameScope( nameFrame ); + } + else { + /* Recurse into the single expression. */ + exprList.head->resolveNameRefs( pd ); + } +} + +/* Clean up after an expression node. */ +Expression::~Expression() +{ + switch ( type ) { + case OrType: case IntersectType: case SubtractType: + case StrongSubtractType: + delete expression; + delete term; + break; + case TermType: + delete term; + break; + case BuiltinType: + break; + } +} + +/* Evaluate a single expression node. */ +FsmGraph *Expression::walk( ParseData *pd, bool lastInSeq ) +{ + FsmGraph *rtnVal = 0; + switch ( type ) { + case OrType: { + /* Evaluate the expression. */ + rtnVal = expression->walk( pd, false ); + /* Evaluate the term. */ + FsmGraph *rhs = term->walk( pd ); + /* Perform union. */ + rtnVal->unionOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case IntersectType: { + /* Evaluate the expression. */ + rtnVal = expression->walk( pd ); + /* Evaluate the term. */ + FsmGraph *rhs = term->walk( pd ); + /* Perform intersection. */ + rtnVal->intersectOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case SubtractType: { + /* Evaluate the expression. */ + rtnVal = expression->walk( pd ); + /* Evaluate the term. */ + FsmGraph *rhs = term->walk( pd ); + /* Perform subtraction. */ + rtnVal->subtractOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case StrongSubtractType: { + /* Evaluate the expression. */ + rtnVal = expression->walk( pd ); + + /* Evaluate the term and pad it with any* machines. */ + FsmGraph *rhs = dotStarFsm( pd ); + FsmGraph *termFsm = term->walk( pd ); + FsmGraph *trailAnyStar = dotStarFsm( pd ); + rhs->concatOp( termFsm ); + rhs->concatOp( trailAnyStar ); + + /* Perform subtraction. */ + rtnVal->subtractOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case TermType: { + /* Return result of the term. */ + rtnVal = term->walk( pd ); + break; + } + case BuiltinType: { + /* Duplicate the builtin. */ + rtnVal = makeBuiltin( builtin, pd ); + break; + } + } + + return rtnVal; +} + +void Expression::makeNameTree( ParseData *pd ) +{ + switch ( type ) { + case OrType: + case IntersectType: + case SubtractType: + case StrongSubtractType: + expression->makeNameTree( pd ); + term->makeNameTree( pd ); + break; + case TermType: + term->makeNameTree( pd ); + break; + case BuiltinType: + break; + } +} + +void Expression::resolveNameRefs( ParseData *pd ) +{ + switch ( type ) { + case OrType: + case IntersectType: + case SubtractType: + case StrongSubtractType: + expression->resolveNameRefs( pd ); + term->resolveNameRefs( pd ); + break; + case TermType: + term->resolveNameRefs( pd ); + break; + case BuiltinType: + break; + } +} + +/* Clean up after a term node. */ +Term::~Term() +{ + switch ( type ) { + case ConcatType: + case RightStartType: + case RightFinishType: + case LeftType: + delete term; + delete factorWithAug; + break; + case FactorWithAugType: + delete factorWithAug; + break; + } +} + +/* Evaluate a term node. */ +FsmGraph *Term::walk( ParseData *pd, bool lastInSeq ) +{ + FsmGraph *rtnVal = 0; + switch ( type ) { + case ConcatType: { + /* Evaluate the Term. */ + rtnVal = term->walk( pd, false ); + /* Evaluate the FactorWithRep. */ + FsmGraph *rhs = factorWithAug->walk( pd ); + /* Perform concatenation. */ + rtnVal->concatOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case RightStartType: { + /* Evaluate the Term. */ + rtnVal = term->walk( pd ); + + /* Evaluate the FactorWithRep. */ + FsmGraph *rhs = factorWithAug->walk( pd ); + + /* Set up the priority descriptors. The left machine gets the + * lower priority where as the right get the higher start priority. */ + priorDescs[0].key = pd->nextPriorKey++; + priorDescs[0].priority = 0; + rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] ); + + /* The start transitions right machine get the higher priority. + * Use the same unique key. */ + priorDescs[1].key = priorDescs[0].key; + priorDescs[1].priority = 1; + rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] ); + + /* Perform concatenation. */ + rtnVal->concatOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case RightFinishType: { + /* Evaluate the Term. */ + rtnVal = term->walk( pd ); + + /* Evaluate the FactorWithRep. */ + FsmGraph *rhs = factorWithAug->walk( pd ); + + /* Set up the priority descriptors. The left machine gets the + * lower priority where as the finishing transitions to the right + * get the higher priority. */ + priorDescs[0].key = pd->nextPriorKey++; + priorDescs[0].priority = 0; + rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] ); + + /* The finishing transitions of the right machine get the higher + * priority. Use the same unique key. */ + priorDescs[1].key = priorDescs[0].key; + priorDescs[1].priority = 1; + rhs->finishFsmPrior( pd->curPriorOrd++, &priorDescs[1] ); + + /* Perform concatenation. */ + rtnVal->concatOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case LeftType: { + /* Evaluate the Term. */ + rtnVal = term->walk( pd ); + + /* Evaluate the FactorWithRep. */ + FsmGraph *rhs = factorWithAug->walk( pd ); + + /* Set up the priority descriptors. The left machine gets the + * higher priority. */ + priorDescs[0].key = pd->nextPriorKey++; + priorDescs[0].priority = 1; + rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] ); + + /* The right machine gets the lower priority. Since + * startTransPrior might unnecessarily increase the number of + * states during the state machine construction process (due to + * isolation), we use allTransPrior instead, which has the same + * effect. */ + priorDescs[1].key = priorDescs[0].key; + priorDescs[1].priority = 0; + rhs->allTransPrior( pd->curPriorOrd++, &priorDescs[1] ); + + /* Perform concatenation. */ + rtnVal->concatOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case FactorWithAugType: { + rtnVal = factorWithAug->walk( pd ); + break; + } + } + return rtnVal; +} + +void Term::makeNameTree( ParseData *pd ) +{ + switch ( type ) { + case ConcatType: + case RightStartType: + case RightFinishType: + case LeftType: + term->makeNameTree( pd ); + factorWithAug->makeNameTree( pd ); + break; + case FactorWithAugType: + factorWithAug->makeNameTree( pd ); + break; + } +} + +void Term::resolveNameRefs( ParseData *pd ) +{ + switch ( type ) { + case ConcatType: + case RightStartType: + case RightFinishType: + case LeftType: + term->resolveNameRefs( pd ); + factorWithAug->resolveNameRefs( pd ); + break; + case FactorWithAugType: + factorWithAug->resolveNameRefs( pd ); + break; + } +} + +/* Clean up after a factor with augmentation node. */ +FactorWithAug::~FactorWithAug() +{ + delete factorWithRep; + + /* Walk the vector of parser actions, deleting function names. */ + + /* Clean up priority descriptors. */ + if ( priorDescs != 0 ) + delete[] priorDescs; +} + +void FactorWithAug::assignActions( ParseData *pd, FsmGraph *graph, int *actionOrd ) +{ + /* Assign actions. */ + for ( int i = 0; i < actions.length(); i++ ) { + switch ( actions[i].type ) { + /* Transition actions. */ + case at_start: + graph->startFsmAction( actionOrd[i], actions[i].action ); + afterOpMinimize( graph ); + break; + case at_all: + graph->allTransAction( actionOrd[i], actions[i].action ); + break; + case at_finish: + graph->finishFsmAction( actionOrd[i], actions[i].action ); + break; + case at_leave: + graph->leaveFsmAction( actionOrd[i], actions[i].action ); + break; + + /* Global error actions. */ + case at_start_gbl_error: + graph->startErrorAction( actionOrd[i], actions[i].action, 0 ); + afterOpMinimize( graph ); + break; + case at_all_gbl_error: + graph->allErrorAction( actionOrd[i], actions[i].action, 0 ); + break; + case at_final_gbl_error: + graph->finalErrorAction( actionOrd[i], actions[i].action, 0 ); + break; + case at_not_start_gbl_error: + graph->notStartErrorAction( actionOrd[i], actions[i].action, 0 ); + break; + case at_not_final_gbl_error: + graph->notFinalErrorAction( actionOrd[i], actions[i].action, 0 ); + break; + case at_middle_gbl_error: + graph->middleErrorAction( actionOrd[i], actions[i].action, 0 ); + break; + + /* Local error actions. */ + case at_start_local_error: + graph->startErrorAction( actionOrd[i], actions[i].action, + actions[i].localErrKey ); + afterOpMinimize( graph ); + break; + case at_all_local_error: + graph->allErrorAction( actionOrd[i], actions[i].action, + actions[i].localErrKey ); + break; + case at_final_local_error: + graph->finalErrorAction( actionOrd[i], actions[i].action, + actions[i].localErrKey ); + break; + case at_not_start_local_error: + graph->notStartErrorAction( actionOrd[i], actions[i].action, + actions[i].localErrKey ); + break; + case at_not_final_local_error: + graph->notFinalErrorAction( actionOrd[i], actions[i].action, + actions[i].localErrKey ); + break; + case at_middle_local_error: + graph->middleErrorAction( actionOrd[i], actions[i].action, + actions[i].localErrKey ); + break; + + /* EOF actions. */ + case at_start_eof: + graph->startEOFAction( actionOrd[i], actions[i].action ); + afterOpMinimize( graph ); + break; + case at_all_eof: + graph->allEOFAction( actionOrd[i], actions[i].action ); + break; + case at_final_eof: + graph->finalEOFAction( actionOrd[i], actions[i].action ); + break; + case at_not_start_eof: + graph->notStartEOFAction( actionOrd[i], actions[i].action ); + break; + case at_not_final_eof: + graph->notFinalEOFAction( actionOrd[i], actions[i].action ); + break; + case at_middle_eof: + graph->middleEOFAction( actionOrd[i], actions[i].action ); + break; + + /* To State Actions. */ + case at_start_to_state: + graph->startToStateAction( actionOrd[i], actions[i].action ); + afterOpMinimize( graph ); + break; + case at_all_to_state: + graph->allToStateAction( actionOrd[i], actions[i].action ); + break; + case at_final_to_state: + graph->finalToStateAction( actionOrd[i], actions[i].action ); + break; + case at_not_start_to_state: + graph->notStartToStateAction( actionOrd[i], actions[i].action ); + break; + case at_not_final_to_state: + graph->notFinalToStateAction( actionOrd[i], actions[i].action ); + break; + case at_middle_to_state: + graph->middleToStateAction( actionOrd[i], actions[i].action ); + break; + + /* From State Actions. */ + case at_start_from_state: + graph->startFromStateAction( actionOrd[i], actions[i].action ); + afterOpMinimize( graph ); + break; + case at_all_from_state: + graph->allFromStateAction( actionOrd[i], actions[i].action ); + break; + case at_final_from_state: + graph->finalFromStateAction( actionOrd[i], actions[i].action ); + break; + case at_not_start_from_state: + graph->notStartFromStateAction( actionOrd[i], actions[i].action ); + break; + case at_not_final_from_state: + graph->notFinalFromStateAction( actionOrd[i], actions[i].action ); + break; + case at_middle_from_state: + graph->middleFromStateAction( actionOrd[i], actions[i].action ); + break; + + /* Remaining cases, prevented by the parser. */ + default: + assert( false ); + break; + } + } +} + +void FactorWithAug::assignPriorities( FsmGraph *graph, int *priorOrd ) +{ + /* Assign priorities. */ + for ( int i = 0; i < priorityAugs.length(); i++ ) { + switch ( priorityAugs[i].type ) { + case at_start: + graph->startFsmPrior( priorOrd[i], &priorDescs[i]); + /* Start fsm priorities are a special case that may require + * minimization afterwards. */ + afterOpMinimize( graph ); + break; + case at_all: + graph->allTransPrior( priorOrd[i], &priorDescs[i] ); + break; + case at_finish: + graph->finishFsmPrior( priorOrd[i], &priorDescs[i] ); + break; + case at_leave: + graph->leaveFsmPrior( priorOrd[i], &priorDescs[i] ); + break; + + default: + /* Parser Prevents this case. */ + break; + } + } +} + +void FactorWithAug::assignConditions( FsmGraph *graph ) +{ + for ( int i = 0; i < conditions.length(); i++ ) { + switch ( conditions[i].type ) { + /* Transition actions. */ + case at_start: + graph->startFsmCondition( conditions[i].action ); + afterOpMinimize( graph ); + break; + case at_all: + graph->allTransCondition( conditions[i].action ); + break; + case at_leave: + graph->leaveFsmCondition( conditions[i].action ); + break; + default: + break; + } + } +} + + +/* Evaluate a factor with augmentation node. */ +FsmGraph *FactorWithAug::walk( ParseData *pd ) +{ + /* Enter into the scopes created for the labels. */ + NameFrame nameFrame = pd->enterNameScope( false, labels.length() ); + + /* Make the array of function orderings. */ + int *actionOrd = 0; + if ( actions.length() > 0 ) + actionOrd = new int[actions.length()]; + + /* First walk the list of actions, assigning order to all starting + * actions. */ + for ( int i = 0; i < actions.length(); i++ ) { + if ( actions[i].type == at_start || + actions[i].type == at_start_gbl_error || + actions[i].type == at_start_local_error || + actions[i].type == at_start_to_state || + actions[i].type == at_start_from_state || + actions[i].type == at_start_eof ) + actionOrd[i] = pd->curActionOrd++; + } + + /* Evaluate the factor with repetition. */ + FsmGraph *rtnVal = factorWithRep->walk( pd ); + + /* Compute the remaining action orderings. */ + for ( int i = 0; i < actions.length(); i++ ) { + if ( actions[i].type != at_start && + actions[i].type != at_start_gbl_error && + actions[i].type != at_start_local_error && + actions[i].type != at_start_to_state && + actions[i].type != at_start_from_state && + actions[i].type != at_start_eof ) + actionOrd[i] = pd->curActionOrd++; + } + + assignConditions( rtnVal ); + + assignActions( pd, rtnVal , actionOrd ); + + /* Make the array of priority orderings. Orderings are local to this walk + * of the factor with augmentation. */ + int *priorOrd = 0; + if ( priorityAugs.length() > 0 ) + priorOrd = new int[priorityAugs.length()]; + + /* Walk all priorities, assigning the priority ordering. */ + for ( int i = 0; i < priorityAugs.length(); i++ ) + priorOrd[i] = pd->curPriorOrd++; + + /* If the priority descriptors have not been made, make them now. Make + * priority descriptors for each priority asignment that will be passed to + * the fsm. Used to keep track of the key, value and used bit. */ + if ( priorDescs == 0 && priorityAugs.length() > 0 ) { + priorDescs = new PriorDesc[priorityAugs.length()]; + for ( int i = 0; i < priorityAugs.length(); i++ ) { + /* Init the prior descriptor for the priority setting. */ + priorDescs[i].key = priorityAugs[i].priorKey; + priorDescs[i].priority = priorityAugs[i].priorValue; + } + } + + /* Assign priorities into the machine. */ + assignPriorities( rtnVal, priorOrd ); + + /* Assign epsilon transitions. */ + for ( int e = 0; e < epsilonLinks.length(); e++ ) { + /* Get the name, which may not exist. If it doesn't then silently + * ignore it because an error has already been reported. */ + NameInst *epTarg = pd->epsilonResolvedLinks[pd->nextEpsilonResolvedLink++]; + if ( epTarg != 0 ) { + /* Make the epsilon transitions. */ + rtnVal->epsilonTrans( epTarg->id ); + + /* Note that we have made a link to the name. */ + pd->localNameScope->referencedNames.append( epTarg ); + } + } + + /* Set entry points for labels. */ + if ( labels.length() > 0 ) { + /* Pop the names. */ + pd->resetNameScope( nameFrame ); + + /* Make labels that are referenced into entry points. */ + for ( int i = 0; i < labels.length(); i++ ) { + pd->enterNameScope( false, 1 ); + + /* Will always be found. */ + NameInst *name = pd->curNameInst; + + /* If the name is referenced then set the entry point. */ + if ( name->numRefs > 0 ) + rtnVal->setEntry( name->id, rtnVal->startState ); + } + + pd->popNameScope( nameFrame ); + } + + if ( priorOrd != 0 ) + delete[] priorOrd; + if ( actionOrd != 0 ) + delete[] actionOrd; + return rtnVal; +} + +void FactorWithAug::makeNameTree( ParseData *pd ) +{ + /* Add the labels to the tree of instantiated names. Each label + * makes a new scope. */ + NameInst *prevNameInst = pd->curNameInst; + for ( int i = 0; i < labels.length(); i++ ) + pd->curNameInst = pd->addNameInst( labels[i].loc, labels[i].data, true ); + + /* Recurse, then pop the names. */ + factorWithRep->makeNameTree( pd ); + pd->curNameInst = prevNameInst; +} + + +void FactorWithAug::resolveNameRefs( ParseData *pd ) +{ + /* Enter into the name scope created by any labels. */ + NameFrame nameFrame = pd->enterNameScope( false, labels.length() ); + + /* Recurse first. IMPORTANT: we must do the exact same traversal as when + * the tree is constructed. */ + factorWithRep->resolveNameRefs( pd ); + + /* Resolve epsilon transitions. */ + for ( int ep = 0; ep < epsilonLinks.length(); ep++ ) { + /* Get the link. */ + EpsilonLink &link = epsilonLinks[ep]; + NameInst *resolvedName = 0; + + if ( link.target.length() == 1 && strcmp( link.target.data[0], "final" ) == 0 ) { + /* Epsilon drawn to an implicit final state. An implicit final is + * only available in join operations. */ + resolvedName = pd->localNameScope->final; + } + else { + /* Do an search for the name. */ + NameSet resolved; + pd->resolveFrom( resolved, pd->localNameScope, link.target, 0 ); + if ( resolved.length() > 0 ) { + /* Take the first one. */ + resolvedName = resolved[0]; + if ( resolved.length() > 1 ) { + /* Complain about the multiple references. */ + error(link.loc) << "state reference " << link.target << + " resolves to multiple entry points" << endl; + errorStateLabels( resolved ); + } + } + } + + /* This is tricky, we stuff resolved epsilon transitions into one long + * vector in the parse data structure. Since the name resolution and + * graph generation both do identical walks of the parse tree we + * should always find the link resolutions in the right place. */ + pd->epsilonResolvedLinks.append( resolvedName ); + + if ( resolvedName != 0 ) { + /* Found the name, bump of the reference count on it. */ + resolvedName->numRefs += 1; + } + else { + /* Complain, no recovery action, the epsilon op will ignore any + * epsilon transitions whose names did not resolve. */ + error(link.loc) << "could not resolve label " << link.target << endl; + } + } + + if ( labels.length() > 0 ) + pd->popNameScope( nameFrame ); +} + + +/* Clean up after a factor with repetition node. */ +FactorWithRep::~FactorWithRep() +{ + switch ( type ) { + case StarType: case StarStarType: case OptionalType: case PlusType: + case ExactType: case MaxType: case MinType: case RangeType: + delete factorWithRep; + break; + case FactorWithNegType: + delete factorWithNeg; + break; + } +} + +/* Evaluate a factor with repetition node. */ +FsmGraph *FactorWithRep::walk( ParseData *pd ) +{ + FsmGraph *retFsm = 0; + + switch ( type ) { + case StarType: { + /* Evaluate the FactorWithRep. */ + retFsm = factorWithRep->walk( pd ); + if ( retFsm->startState->isFinState() ) { + warning(loc) << "applying kleene star to a machine that " + "accepts zero length word" << endl; + } + + /* Shift over the start action orders then do the kleene star. */ + pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); + retFsm->starOp( ); + afterOpMinimize( retFsm ); + break; + } + case StarStarType: { + /* Evaluate the FactorWithRep. */ + retFsm = factorWithRep->walk( pd ); + if ( retFsm->startState->isFinState() ) { + warning(loc) << "applying kleene star to a machine that " + "accepts zero length word" << endl; + } + + /* Set up the prior descs. All gets priority one, whereas leaving gets + * priority zero. Make a unique key so that these priorities don't + * interfere with any priorities set by the user. */ + priorDescs[0].key = pd->nextPriorKey++; + priorDescs[0].priority = 1; + retFsm->allTransPrior( pd->curPriorOrd++, &priorDescs[0] ); + + /* Leaveing gets priority 0. Use same unique key. */ + priorDescs[1].key = priorDescs[0].key; + priorDescs[1].priority = 0; + retFsm->leaveFsmPrior( pd->curPriorOrd++, &priorDescs[1] ); + + /* Shift over the start action orders then do the kleene star. */ + pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); + retFsm->starOp( ); + afterOpMinimize( retFsm ); + break; + } + case OptionalType: { + /* Make the null fsm. */ + FsmGraph *nu = new FsmGraph(); + nu->lambdaFsm( ); + + /* Evaluate the FactorWithRep. */ + retFsm = factorWithRep->walk( pd ); + + /* Perform the question operator. */ + retFsm->unionOp( nu ); + afterOpMinimize( retFsm ); + break; + } + case PlusType: { + /* Evaluate the FactorWithRep. */ + retFsm = factorWithRep->walk( pd ); + if ( retFsm->startState->isFinState() ) { + warning(loc) << "applying plus operator to a machine that " + "accpets zero length word" << endl; + } + + /* Need a duplicated for the star end. */ + FsmGraph *dup = new FsmGraph( *retFsm ); + + /* The start func orders need to be shifted before doing the star. */ + pd->curActionOrd += dup->shiftStartActionOrder( pd->curActionOrd ); + + /* Star the duplicate. */ + dup->starOp( ); + afterOpMinimize( dup ); + + retFsm->concatOp( dup ); + afterOpMinimize( retFsm ); + break; + } + case ExactType: { + /* Get an int from the repetition amount. */ + if ( lowerRep == 0 ) { + /* No copies. Don't need to evaluate the factorWithRep. + * This Defeats the purpose so give a warning. */ + warning(loc) << "exactly zero repetitions results " + "in the null machine" << endl; + + retFsm = new FsmGraph(); + retFsm->lambdaFsm(); + } + else { + /* Evaluate the first FactorWithRep. */ + retFsm = factorWithRep->walk( pd ); + if ( retFsm->startState->isFinState() ) { + warning(loc) << "applying repetition to a machine that " + "accepts zero length word" << endl; + } + + /* The start func orders need to be shifted before doing the + * repetition. */ + pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); + + /* Do the repetition on the machine. Already guarded against n == 0 */ + retFsm->repeatOp( lowerRep ); + afterOpMinimize( retFsm ); + } + break; + } + case MaxType: { + /* Get an int from the repetition amount. */ + if ( upperRep == 0 ) { + /* No copies. Don't need to evaluate the factorWithRep. + * This Defeats the purpose so give a warning. */ + warning(loc) << "max zero repetitions results " + "in the null machine" << endl; + + retFsm = new FsmGraph(); + retFsm->lambdaFsm(); + } + else { + /* Evaluate the first FactorWithRep. */ + retFsm = factorWithRep->walk( pd ); + if ( retFsm->startState->isFinState() ) { + warning(loc) << "applying max repetition to a machine that " + "accepts zero length word" << endl; + } + + /* The start func orders need to be shifted before doing the + * repetition. */ + pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); + + /* Do the repetition on the machine. Already guarded against n == 0 */ + retFsm->optionalRepeatOp( upperRep ); + afterOpMinimize( retFsm ); + } + break; + } + case MinType: { + /* Evaluate the repeated machine. */ + retFsm = factorWithRep->walk( pd ); + if ( retFsm->startState->isFinState() ) { + warning(loc) << "applying min repetition to a machine that " + "accepts zero length word" << endl; + } + + /* The start func orders need to be shifted before doing the repetition + * and the kleene star. */ + pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); + + if ( lowerRep == 0 ) { + /* Acts just like a star op on the machine to return. */ + retFsm->starOp( ); + afterOpMinimize( retFsm ); + } + else { + /* Take a duplicate for the plus. */ + FsmGraph *dup = new FsmGraph( *retFsm ); + + /* Do repetition on the first half. */ + retFsm->repeatOp( lowerRep ); + afterOpMinimize( retFsm ); + + /* Star the duplicate. */ + dup->starOp( ); + afterOpMinimize( dup ); + + /* Tak on the kleene star. */ + retFsm->concatOp( dup ); + afterOpMinimize( retFsm ); + } + break; + } + case RangeType: { + /* Check for bogus range. */ + if ( upperRep - lowerRep < 0 ) { + error(loc) << "invalid range repetition" << endl; + + /* Return null machine as recovery. */ + retFsm = new FsmGraph(); + retFsm->lambdaFsm(); + } + else if ( lowerRep == 0 && upperRep == 0 ) { + /* No copies. Don't need to evaluate the factorWithRep. This + * defeats the purpose so give a warning. */ + warning(loc) << "zero to zero repetitions results " + "in the null machine" << endl; + + retFsm = new FsmGraph(); + retFsm->lambdaFsm(); + } + else { + /* Now need to evaluate the repeated machine. */ + retFsm = factorWithRep->walk( pd ); + if ( retFsm->startState->isFinState() ) { + warning(loc) << "applying range repetition to a machine that " + "accepts zero length word" << endl; + } + + /* The start func orders need to be shifted before doing both kinds + * of repetition. */ + pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); + + if ( lowerRep == 0 ) { + /* Just doing max repetition. Already guarded against n == 0. */ + retFsm->optionalRepeatOp( upperRep ); + afterOpMinimize( retFsm ); + } + else if ( lowerRep == upperRep ) { + /* Just doing exact repetition. Already guarded against n == 0. */ + retFsm->repeatOp( lowerRep ); + afterOpMinimize( retFsm ); + } + else { + /* This is the case that 0 < lowerRep < upperRep. Take a + * duplicate for the optional repeat. */ + FsmGraph *dup = new FsmGraph( *retFsm ); + + /* Do repetition on the first half. */ + retFsm->repeatOp( lowerRep ); + afterOpMinimize( retFsm ); + + /* Do optional repetition on the second half. */ + dup->optionalRepeatOp( upperRep - lowerRep ); + afterOpMinimize( dup ); + + /* Tak on the duplicate machine. */ + retFsm->concatOp( dup ); + afterOpMinimize( retFsm ); + } + } + break; + } + case FactorWithNegType: { + /* Evaluate the Factor. Pass it up. */ + retFsm = factorWithNeg->walk( pd ); + break; + }} + return retFsm; +} + +void FactorWithRep::makeNameTree( ParseData *pd ) +{ + switch ( type ) { + case StarType: + case StarStarType: + case OptionalType: + case PlusType: + case ExactType: + case MaxType: + case MinType: + case RangeType: + factorWithRep->makeNameTree( pd ); + break; + case FactorWithNegType: + factorWithNeg->makeNameTree( pd ); + break; + } +} + +void FactorWithRep::resolveNameRefs( ParseData *pd ) +{ + switch ( type ) { + case StarType: + case StarStarType: + case OptionalType: + case PlusType: + case ExactType: + case MaxType: + case MinType: + case RangeType: + factorWithRep->resolveNameRefs( pd ); + break; + case FactorWithNegType: + factorWithNeg->resolveNameRefs( pd ); + break; + } +} + +/* Clean up after a factor with negation node. */ +FactorWithNeg::~FactorWithNeg() +{ + switch ( type ) { + case NegateType: + case CharNegateType: + delete factorWithNeg; + break; + case FactorType: + delete factor; + break; + } +} + +/* Evaluate a factor with negation node. */ +FsmGraph *FactorWithNeg::walk( ParseData *pd ) +{ + FsmGraph *retFsm = 0; + + switch ( type ) { + case NegateType: { + /* Evaluate the factorWithNeg. */ + FsmGraph *toNegate = factorWithNeg->walk( pd ); + + /* Negation is subtract from dot-star. */ + retFsm = dotStarFsm( pd ); + retFsm->subtractOp( toNegate ); + afterOpMinimize( retFsm ); + break; + } + case CharNegateType: { + /* Evaluate the factorWithNeg. */ + FsmGraph *toNegate = factorWithNeg->walk( pd ); + + /* CharNegation is subtract from dot. */ + retFsm = dotFsm( pd ); + retFsm->subtractOp( toNegate ); + afterOpMinimize( retFsm ); + break; + } + case FactorType: { + /* Evaluate the Factor. Pass it up. */ + retFsm = factor->walk( pd ); + break; + }} + return retFsm; +} + +void FactorWithNeg::makeNameTree( ParseData *pd ) +{ + switch ( type ) { + case NegateType: + case CharNegateType: + factorWithNeg->makeNameTree( pd ); + break; + case FactorType: + factor->makeNameTree( pd ); + break; + } +} + +void FactorWithNeg::resolveNameRefs( ParseData *pd ) +{ + switch ( type ) { + case NegateType: + case CharNegateType: + factorWithNeg->resolveNameRefs( pd ); + break; + case FactorType: + factor->resolveNameRefs( pd ); + break; + } +} + +/* Clean up after a factor node. */ +Factor::~Factor() +{ + switch ( type ) { + case LiteralType: + delete literal; + break; + case RangeType: + delete range; + break; + case OrExprType: + delete reItem; + break; + case RegExprType: + delete regExp; + break; + case ReferenceType: + break; + case ParenType: + delete join; + break; + } +} + +/* Evaluate a factor node. */ +FsmGraph *Factor::walk( ParseData *pd ) +{ + FsmGraph *rtnVal = 0; + switch ( type ) { + case LiteralType: + rtnVal = literal->walk( pd ); + break; + case RangeType: + rtnVal = range->walk( pd ); + break; + case OrExprType: + rtnVal = reItem->walk( pd, 0 ); + break; + case RegExprType: + rtnVal = regExp->walk( pd, 0 ); + break; + case ReferenceType: + rtnVal = varDef->walk( pd ); + break; + case ParenType: + rtnVal = join->walk( pd ); + break; + } + + return rtnVal; +} + +void Factor::makeNameTree( ParseData *pd ) +{ + switch ( type ) { + case LiteralType: + case RangeType: + case OrExprType: + case RegExprType: + break; + case ReferenceType: + varDef->makeNameTree( loc, pd ); + break; + case ParenType: + join->makeNameTree( pd ); + break; + } +} + +void Factor::resolveNameRefs( ParseData *pd ) +{ + switch ( type ) { + case LiteralType: + case RangeType: + case OrExprType: + case RegExprType: + break; + case ReferenceType: + varDef->resolveNameRefs( pd ); + break; + case ParenType: + join->resolveNameRefs( pd ); + break; + } +} + +/* Clean up a range object. Must delete the two literals. */ +Range::~Range() +{ + delete lowerLit; + delete upperLit; +} + +bool Range::verifyRangeFsm( FsmGraph *rangeEnd ) +{ + /* Must have two states. */ + if ( rangeEnd->stateList.length() != 2 ) + return false; + /* The start state cannot be final. */ + if ( rangeEnd->startState->isFinState() ) + return false; + /* There should be only one final state. */ + if ( rangeEnd->finStateSet.length() != 1 ) + return false; + /* The final state cannot have any transitions out. */ + if ( rangeEnd->finStateSet[0]->outList.length() != 0 ) + return false; + /* The start state should have only one transition out. */ + if ( rangeEnd->startState->outList.length() != 1 ) + return false; + /* The singe transition out of the start state should not be a range. */ + FsmTrans *startTrans = rangeEnd->startState->outList.head; + if ( startTrans->lowKey != startTrans->highKey ) + return false; + return true; +} + +/* Evaluate a range. Gets the lower an upper key and makes an fsm range. */ +FsmGraph *Range::walk( ParseData *pd ) +{ + /* Construct and verify the suitability of the lower end of the range. */ + FsmGraph *lowerFsm = lowerLit->walk( pd ); + if ( !verifyRangeFsm( lowerFsm ) ) { + error(lowerLit->loc) << + "bad range lower end, must be a single character" << endl; + } + + /* Construct and verify the upper end. */ + FsmGraph *upperFsm = upperLit->walk( pd ); + if ( !verifyRangeFsm( upperFsm ) ) { + error(upperLit->loc) << + "bad range upper end, must be a single character" << endl; + } + + /* Grab the keys from the machines, then delete them. */ + Key lowKey = lowerFsm->startState->outList.head->lowKey; + Key highKey = upperFsm->startState->outList.head->lowKey; + delete lowerFsm; + delete upperFsm; + + /* Validate the range. */ + if ( lowKey > highKey ) { + /* Recover by setting upper to lower; */ + error(lowerLit->loc) << "lower end of range is greater then upper end" << endl; + highKey = lowKey; + } + + /* Return the range now that it is validated. */ + FsmGraph *retFsm = new FsmGraph(); + retFsm->rangeFsm( lowKey, highKey ); + return retFsm; +} + +/* Evaluate a literal object. */ +FsmGraph *Literal::walk( ParseData *pd ) +{ + /* FsmGraph to return, is the alphabet signed. */ + FsmGraph *rtnVal = 0; + + switch ( type ) { + case Number: { + /* Make the fsm key in int format. */ + Key fsmKey = makeFsmKeyNum( literal.data, loc, pd ); + /* Make the new machine. */ + rtnVal = new FsmGraph(); + rtnVal->concatFsm( fsmKey ); + break; + } + case LitString: { + /* Make the array of keys in int format. */ + String interp; + bool caseInsensitive; + prepareLitString( interp, caseInsensitive, literal, loc ); + Key *arr = new Key[interp.length()]; + makeFsmKeyArray( arr, interp.data, interp.length(), pd ); + + /* Make the new machine. */ + rtnVal = new FsmGraph(); + if ( caseInsensitive ) + rtnVal->concatFsmCI( arr, interp.length() ); + else + rtnVal->concatFsm( arr, interp.length() ); + delete[] arr; + break; + }} + return rtnVal; +} + +/* Clean up after a regular expression object. */ +RegExpr::~RegExpr() +{ + switch ( type ) { + case RecurseItem: + delete regExp; + delete item; + break; + case Empty: + break; + } +} + +/* Evaluate a regular expression object. */ +FsmGraph *RegExpr::walk( ParseData *pd, RegExpr *rootRegex ) +{ + /* This is the root regex, pass down a pointer to this. */ + if ( rootRegex == 0 ) + rootRegex = this; + + FsmGraph *rtnVal = 0; + switch ( type ) { + case RecurseItem: { + /* Walk both items. */ + FsmGraph *fsm1 = regExp->walk( pd, rootRegex ); + FsmGraph *fsm2 = item->walk( pd, rootRegex ); + if ( fsm1 == 0 ) + rtnVal = fsm2; + else { + fsm1->concatOp( fsm2 ); + rtnVal = fsm1; + } + break; + } + case Empty: { + /* FIXME: Return something here. */ + rtnVal = 0; + break; + } + } + return rtnVal; +} + +/* Clean up after an item in a regular expression. */ +ReItem::~ReItem() +{ + switch ( type ) { + case Data: + case Dot: + break; + case OrBlock: + case NegOrBlock: + delete orBlock; + break; + } +} + +/* Evaluate a regular expression object. */ +FsmGraph *ReItem::walk( ParseData *pd, RegExpr *rootRegex ) +{ + /* The fsm to return, is the alphabet signed? */ + FsmGraph *rtnVal = 0; + + switch ( type ) { + case Data: { + /* Move the data into an integer array and make a concat fsm. */ + Key *arr = new Key[data.length()]; + makeFsmKeyArray( arr, data.data, data.length(), pd ); + + /* Make the concat fsm. */ + rtnVal = new FsmGraph(); + if ( rootRegex != 0 && rootRegex->caseInsensitive ) + rtnVal->concatFsmCI( arr, data.length() ); + else + rtnVal->concatFsm( arr, data.length() ); + delete[] arr; + break; + } + case Dot: { + /* Make the dot fsm. */ + rtnVal = dotFsm( pd ); + break; + } + case OrBlock: { + /* Get the or block and minmize it. */ + rtnVal = orBlock->walk( pd, rootRegex ); + rtnVal->minimizePartition2(); + break; + } + case NegOrBlock: { + /* Get the or block and minimize it. */ + FsmGraph *fsm = orBlock->walk( pd, rootRegex ); + fsm->minimizePartition2(); + + /* Make a dot fsm and subtract from it. */ + rtnVal = dotFsm( pd ); + rtnVal->subtractOp( fsm ); + rtnVal->minimizePartition2(); + break; + } + } + + /* If the item is followed by a star, then apply the star op. */ + if ( star ) { + if ( rtnVal->startState->isFinState() ) { + warning(loc) << "applying kleene star to a machine that " + "accpets zero length word" << endl; + } + + rtnVal->starOp(); + rtnVal->minimizePartition2(); + } + return rtnVal; +} + +/* Clean up after an or block of a regular expression. */ +ReOrBlock::~ReOrBlock() +{ + switch ( type ) { + case RecurseItem: + delete orBlock; + delete item; + break; + case Empty: + break; + } +} + + +/* Evaluate an or block of a regular expression. */ +FsmGraph *ReOrBlock::walk( ParseData *pd, RegExpr *rootRegex ) +{ + FsmGraph *rtnVal = 0; + switch ( type ) { + case RecurseItem: { + /* Evaluate the two fsm. */ + FsmGraph *fsm1 = orBlock->walk( pd, rootRegex ); + FsmGraph *fsm2 = item->walk( pd, rootRegex ); + if ( fsm1 == 0 ) + rtnVal = fsm2; + else { + fsm1->unionOp( fsm2 ); + rtnVal = fsm1; + } + break; + } + case Empty: { + rtnVal = 0; + break; + } + } + return rtnVal;; +} + +/* Evaluate an or block item of a regular expression. */ +FsmGraph *ReOrItem::walk( ParseData *pd, RegExpr *rootRegex ) +{ + /* The return value, is the alphabet signed? */ + FsmGraph *rtnVal = 0; + switch ( type ) { + case Data: { + /* Make the or machine. */ + rtnVal = new FsmGraph(); + + /* Put the or data into an array of ints. Note that we find unique + * keys. Duplicates are silently ignored. The alternative would be to + * issue warning or an error but since we can't with [a0-9a] or 'a' | + * 'a' don't bother here. */ + KeySet keySet; + makeFsmUniqueKeyArray( keySet, data.data, data.length(), + rootRegex != 0 ? rootRegex->caseInsensitive : false, pd ); + + /* Run the or operator. */ + rtnVal->orFsm( keySet.data, keySet.length() ); + break; + } + case Range: { + /* Make the upper and lower keys. */ + Key lowKey = makeFsmKeyChar( lower, pd ); + Key highKey = makeFsmKeyChar( upper, pd ); + + /* Validate the range. */ + if ( lowKey > highKey ) { + /* Recover by setting upper to lower; */ + error(loc) << "lower end of range is greater then upper end" << endl; + highKey = lowKey; + } + + /* Make the range machine. */ + rtnVal = new FsmGraph(); + rtnVal->rangeFsm( lowKey, highKey ); + + if ( rootRegex != 0 && rootRegex->caseInsensitive ) { + if ( lowKey <= 'Z' && 'A' <= highKey ) { + Key otherLow = lowKey < 'A' ? Key('A') : lowKey; + Key otherHigh = 'Z' < highKey ? Key('Z') : highKey; + + otherLow = 'a' + ( otherLow - 'A' ); + otherHigh = 'a' + ( otherHigh - 'A' ); + + FsmGraph *otherRange = new FsmGraph(); + otherRange->rangeFsm( otherLow, otherHigh ); + rtnVal->unionOp( otherRange ); + rtnVal->minimizePartition2(); + } + else if ( lowKey <= 'z' && 'a' <= highKey ) { + Key otherLow = lowKey < 'a' ? Key('a') : lowKey; + Key otherHigh = 'z' < highKey ? Key('z') : highKey; + + otherLow = 'A' + ( otherLow - 'a' ); + otherHigh = 'A' + ( otherHigh - 'a' ); + + FsmGraph *otherRange = new FsmGraph(); + otherRange->rangeFsm( otherLow, otherHigh ); + rtnVal->unionOp( otherRange ); + rtnVal->minimizePartition2(); + } + } + + break; + }} + return rtnVal; +} diff --git a/colm/parsetree.h b/colm/parsetree.h new file mode 100644 index 00000000..aceefea1 --- /dev/null +++ b/colm/parsetree.h @@ -0,0 +1,1605 @@ +/* + * Copyright 2001-2006 Adrian Thurston + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _PARSETREE_H +#define _PARSETREE_H + +#include +#include +#include "colm.h" +#include "avlmap.h" +#include "bstmap.h" +#include "bstset.h" +#include "vector.h" +#include "dlist.h" +#include "astring.h" +#include "bytecode.h" +#include "avlbasic.h" + +/* Operators that are represented with single symbol characters. */ +#define OP_DoubleEql 'e' +#define OP_NotEql 'q' +#define OP_LessEql 'l' +#define OP_GrtrEql 'g' +#define OP_LogicalAnd 'a' +#define OP_LogicalOr 'o' +#define OP_Deref 'd' + +struct NameInst; +struct FsmGraph; +struct RedFsm; +struct FsmTables; +struct FsmRun; +struct ObjectDef; +struct ElementOf; +struct UniqueType; +struct ObjField; +struct TransBlock; +struct CodeBlock; + +/* Types of builtin machines. */ +enum BuiltinMachine +{ + BT_Any, + BT_Ascii, + BT_Extend, + BT_Alpha, + BT_Digit, + BT_Alnum, + BT_Lower, + BT_Upper, + BT_Cntrl, + BT_Graph, + BT_Print, + BT_Punct, + BT_Space, + BT_Xdigit, + BT_Lambda, + BT_Empty +}; + +typedef BstSet CharSet; + + +struct ParseData; +struct TypeRef; + +/* Leaf type. */ +struct Literal; + +/* Tree nodes. */ + +struct Term; +struct FactorWithAug; +struct FactorWithRep; +struct FactorWithNeg; +struct Factor; +struct Expression; +struct Join; +struct JoinOrLm; +struct TokenRegion; +struct Namespace; +struct TokenDef; +struct TokenDefList; +struct Range; +struct KlangEl; + +/* Type of augmentation. Describes locations in the machine. */ +enum AugType +{ + /* Transition actions/priorities. */ + at_start, + at_all, + at_finish, + at_leave, + + /* Global error actions. */ + at_start_gbl_error, + at_all_gbl_error, + at_final_gbl_error, + at_not_start_gbl_error, + at_not_final_gbl_error, + at_middle_gbl_error, + + /* Local error actions. */ + at_start_local_error, + at_all_local_error, + at_final_local_error, + at_not_start_local_error, + at_not_final_local_error, + at_middle_local_error, + + /* To State Action embedding. */ + at_start_to_state, + at_all_to_state, + at_final_to_state, + at_not_start_to_state, + at_not_final_to_state, + at_middle_to_state, + + /* From State Action embedding. */ + at_start_from_state, + at_all_from_state, + at_final_from_state, + at_not_start_from_state, + at_not_final_from_state, + at_middle_from_state, + + /* EOF Action embedding. */ + at_start_eof, + at_all_eof, + at_final_eof, + at_not_start_eof, + at_not_final_eof, + at_middle_eof +}; + +/* IMPORTANT: These must follow the same order as the state augs in AugType + * since we will be using this to compose AugType. */ +enum StateAugType +{ + sat_start = 0, + sat_all, + sat_final, + sat_not_start, + sat_not_final, + sat_middle +}; + +struct Action; +struct PriorDesc; +struct RegExpr; +struct ReItem; +struct ReOrBlock; +struct ReOrItem; +struct ExplicitMachine; +struct InlineItem; +struct InlineList; + +/* Reference to a named state. */ +typedef Vector NameRef; +typedef Vector NameRefList; +typedef Vector NameTargList; + +/* Structure for storing location of epsilon transitons. */ +struct EpsilonLink +{ + EpsilonLink( const InputLoc &loc, NameRef &target ) + : loc(loc), target(target) { } + + InputLoc loc; + NameRef target; +}; + +struct Label +{ + Label( const InputLoc &loc, const String &data, ObjField *objField ) + : loc(loc), data(data), objField(objField) { } + + InputLoc loc; + String data; + ObjField *objField; +}; + +/* Structure represents an action assigned to some FactorWithAug node. The + * factor with aug will keep an array of these. */ +struct ParserAction +{ + ParserAction( const InputLoc &loc, AugType type, int localErrKey, Action *action ) + : loc(loc), type(type), localErrKey(localErrKey), action(action) { } + + InputLoc loc; + AugType type; + int localErrKey; + Action *action; +}; + +struct Token +{ + String data; + InputLoc loc; +}; + +void prepareLitString( String &result, bool &caseInsensitive, + const String &srcString, const InputLoc &loc ); + +std::ostream &operator<<(std::ostream &out, const Token &token ); + +typedef AvlMap< String, KlangEl*, CmpStr > LiteralDict; +typedef AvlMapEl< String, KlangEl* > LiteralDictEl; + +/* Store the value and type of a priority augmentation. */ +struct PriorityAug +{ + PriorityAug( AugType type, int priorKey, int priorValue ) : + type(type), priorKey(priorKey), priorValue(priorValue) { } + + AugType type; + int priorKey; + int priorValue; +}; + +/* + * A Variable Definition + */ +struct VarDef +{ + VarDef( const String &name, JoinOrLm *joinOrLm ) + : name(name), joinOrLm(joinOrLm) { } + + /* Parse tree traversal. */ + FsmGraph *walk( ParseData *pd ); + void makeNameTree( const InputLoc &loc, ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + String name; + JoinOrLm *joinOrLm; +}; + +typedef Vector StringVect; +typedef CmpTable CmpStrVect; + +struct NamespaceQual +{ + NamespaceQual( Namespace *declInNspace, TokenRegion *declInRegion ) : + cachedNspaceQual(0), declInNspace(declInNspace) {} + + Namespace *cachedNspaceQual; + Namespace *declInNspace; + + StringVect qualNames; + + Namespace *searchFrom( Namespace *from, StringVect::Iter &qualPart ); + Namespace *getQual( ParseData *pd ); +}; + +struct TokenDef +{ + TokenDef( Join *join, KlangEl *token, InputLoc &semiLoc, + int longestMatchId, Namespace *nspace, TokenRegion *tokenRegion ) + : + join(join), action(0), token(token), semiLoc(semiLoc), + longestMatchId(longestMatchId), inLmSelect(false), + nspace(nspace), tokenRegion(tokenRegion) {} + + InputLoc getLoc(); + + Join *join; + Action *action; + KlangEl *token; + InputLoc semiLoc; + + Action *setActId; + Action *actOnLast; + Action *actOnNext; + Action *actLagBehind; + int longestMatchId; + bool inLmSelect; + Namespace *nspace; + TokenRegion *tokenRegion; + + TokenDef *prev, *next; +}; + +/* Declare a new type so that ptreetypes.h need not include dlist.h. */ +struct TokenDefList : DList {}; + +/* Symbol Map. */ +typedef AvlMap< String, KlangEl*, CmpStr > SymbolMap; +typedef AvlMapEl< String, KlangEl* > SymbolMapEl; + +typedef Vector RegionVect; + +struct TokenRegion +{ + /* Construct with a list of joins */ + TokenRegion( const InputLoc &loc, const String &name, int id, + TokenRegion *parentRegion ) : + loc(loc), name(name), id(id), + lmSwitchHandlesError(false), regionNameInst(0), + parentRegion(parentRegion), defaultTokenDef(0), + preEofBlock(0) { } + + /* Tree traversal. */ + FsmGraph *walk( ParseData *pd ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + void runLongestMatch( ParseData *pd, FsmGraph *graph ); + void transferScannerLeavingActions( FsmGraph *graph ); + Action *newAction( ParseData *pd, const InputLoc &loc, const String &name, + InlineList *inlineList ); + void makeActions( ParseData *pd ); + void findName( ParseData *pd ); + void restart( FsmGraph *graph, FsmTrans *trans ); + + InputLoc loc; + TokenDefList tokenDefList; + String name; + int id; + + Action *lmActSelect; + bool lmSwitchHandlesError; + + /* This gets saved off during the name walk. Can save it off because token + * regions are referenced once only. */ + NameInst *regionNameInst; + + TokenRegion *parentRegion; + RegionVect childRegions; + + TokenDef *defaultTokenDef; + + TokenRegion *next, *prev; + + CodeBlock *preEofBlock; +}; + +typedef DList RegionList; +typedef BstSet< TokenRegion*, CmpOrd > RegionSet; + +typedef Vector NamespaceVect; + +struct GenericType + : public DListEl +{ + GenericType( const String &name, long typeId, long id, + KlangEl *langEl, TypeRef *typeArg ) + : + name(name), typeId(typeId), id(id), langEl(langEl), + typeArg(typeArg), keyTypeArg(0), + utArg(0), keyUT(0), + objDef(0) + {} + + const String &getKey() const + { return name; }; + + String name; + long typeId; + long id; + KlangEl *langEl; + TypeRef *typeArg; + TypeRef *keyTypeArg; + UniqueType *utArg; + UniqueType *keyUT; + + ObjectDef *objDef; +}; + +typedef DList GenericList; + +struct UserIter; +typedef AvlMap UserIterMap; +typedef AvlMapEl UserIterMapEl; + +/* Graph dictionary. */ +struct GraphDictEl +: + public AvlTreeEl, + public DListEl +{ + GraphDictEl( const String &key ) + : key(key), value(0), isInstance(false) { } + GraphDictEl( const String &key, VarDef *value ) + : key(key), value(value), isInstance(false) { } + + const String &getKey() { return key; } + + String key; + VarDef *value; + bool isInstance; + + /* Location info of graph definition. Points to variable name of assignment. */ + InputLoc loc; +}; + +typedef AvlTree GraphDict; +typedef DList GraphList; + +struct Namespace +{ + /* Construct with a list of joins */ + Namespace( const InputLoc &loc, const String &name, int id, + Namespace *parentNamespace ) : + loc(loc), name(name), id(id), + parentNamespace(parentNamespace) { } + + /* Tree traversal. */ + Namespace *findNamespace( const String &name ); + + InputLoc loc; + String name; + int id; + + /* Literal patterns and the dictionary mapping literals to the underlying + * tokens. */ + LiteralDict literalDict; + + /* Dictionary of symbols within the region. */ + SymbolMap symbolMap; + GenericList genericList; + + /* Dictionary of graphs. Both instances and non-instances go here. */ + GraphDict graphDict; + + Namespace *parentNamespace; + NamespaceVect childNamespaces; + + Namespace *next, *prev; +}; + +typedef DList NamespaceList; +typedef BstSet< Namespace*, CmpOrd > NamespaceSet; + +/* List of Expressions. */ +typedef DList ExprList; + +struct JoinOrLm +{ + enum Type { + JoinType, + LongestMatchType + }; + + JoinOrLm( Join *join ) : + join(join), type(JoinType) {} + JoinOrLm( TokenRegion *tokenRegion ) : + tokenRegion(tokenRegion), type(LongestMatchType) {} + + FsmGraph *walk( ParseData *pd ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + Join *join; + TokenRegion *tokenRegion; + Type type; +}; + +/* + * Join + */ +struct Join +{ + /* Construct with the first expression. */ + Join( Expression *expr ); + Join( const InputLoc &loc, Expression *expr ); + + /* Tree traversal. */ + FsmGraph *walk( ParseData *pd ); + FsmGraph *walkJoin( ParseData *pd ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + /* Data. */ + InputLoc loc; + ExprList exprList; +}; + +/* + * Expression + */ +struct Expression +{ + enum Type { + OrType, + IntersectType, + SubtractType, + StrongSubtractType, + TermType, + BuiltinType + }; + + /* Construct with an expression on the left and a term on the right. */ + Expression( Expression *expression, Term *term, Type type ) : + expression(expression), term(term), + builtin(builtin), type(type), prev(this), next(this) { } + + /* Construct with only a term. */ + Expression( Term *term ) : + expression(0), term(term), builtin(builtin), + type(TermType) , prev(this), next(this) { } + + /* Construct with a builtin type. */ + Expression( BuiltinMachine builtin ) : + expression(0), term(0), builtin(builtin), + type(BuiltinType), prev(this), next(this) { } + + ~Expression(); + + /* Tree traversal. */ + FsmGraph *walk( ParseData *pd, bool lastInSeq = true ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + /* Node data. */ + Expression *expression; + Term *term; + BuiltinMachine builtin; + Type type; + + Expression *prev, *next; +}; + +/* + * Term + */ +struct Term +{ + enum Type { + ConcatType, + RightStartType, + RightFinishType, + LeftType, + FactorWithAugType + }; + + Term( Term *term, FactorWithAug *factorWithAug ) : + term(term), factorWithAug(factorWithAug), type(ConcatType) { } + + Term( Term *term, FactorWithAug *factorWithAug, Type type ) : + term(term), factorWithAug(factorWithAug), type(type) { } + + Term( FactorWithAug *factorWithAug ) : + term(0), factorWithAug(factorWithAug), type(FactorWithAugType) { } + + ~Term(); + + FsmGraph *walk( ParseData *pd, bool lastInSeq = true ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + Term *term; + FactorWithAug *factorWithAug; + Type type; + + /* Priority descriptor for RightFinish type. */ + PriorDesc priorDescs[2]; +}; + + +/* Third level of precedence. Augmenting nodes with actions and priorities. */ +struct FactorWithAug +{ + FactorWithAug( FactorWithRep *factorWithRep ) : + priorDescs(0), factorWithRep(factorWithRep) { } + ~FactorWithAug(); + + /* Tree traversal. */ + FsmGraph *walk( ParseData *pd ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + void assignActions( ParseData *pd, FsmGraph *graph, int *actionOrd ); + void assignPriorities( FsmGraph *graph, int *priorOrd ); + + void assignConditions( FsmGraph *graph ); + + /* Actions and priorities assigned to the factor node. */ + Vector actions; + Vector priorityAugs; + PriorDesc *priorDescs; + Vector