diff options
Diffstat (limited to 'rts/gmp')
524 files changed, 90586 insertions, 0 deletions
diff --git a/rts/gmp/.gdbinit b/rts/gmp/.gdbinit new file mode 100644 index 0000000000..843c109e89 --- /dev/null +++ b/rts/gmp/.gdbinit @@ -0,0 +1,34 @@ +# Copyright (C) 1999 Free Software Foundation, Inc. +# +# This file is part of the GNU MP Library. +# +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. +# +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +define pz +set __gmpz_dump ($) +end + +define pq +set __gmpz_dump ($->_mp_num) +echo / +set __gmpz_dump ($->_mp_den) +end + +define pf +set __gmpf_dump ($) +end + diff --git a/rts/gmp/AUTHORS b/rts/gmp/AUTHORS new file mode 100644 index 0000000000..1fa057af6c --- /dev/null +++ b/rts/gmp/AUTHORS @@ -0,0 +1,12 @@ +Authors if GNU MP (in chronological order) +Torbjörn Granlund +John Amanatides +Paul Zimmermann +Ken Weber +Bennet Yee +Andreas Schwab +Robert Harley +Linus Nordberg +Kent Boortz +Kevin Ryde +Guillaume Hanrot diff --git a/rts/gmp/COPYING b/rts/gmp/COPYING new file mode 100644 index 0000000000..a6d7d0188a --- /dev/null +++ b/rts/gmp/COPYING @@ -0,0 +1,336 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Hereny it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Section diff --git a/rts/gmp/COPYING.LIB b/rts/gmp/COPYING.LIB new file mode 100644 index 0000000000..c4792dd27a --- /dev/null +++ b/rts/gmp/COPYING.LIB @@ -0,0 +1,515 @@ + + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations +below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. +^L + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it +becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. +^L + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control +compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. +^L + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. +^L + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. +^L + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. +^L + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply, and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License +may add an explicit geographical distribution limitation excluding those +countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. +^L + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS +^L + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms +of the ordinary General Public License). + + To apply these terms, attach the following notices to the library. +It is safest to attach them to the start of each source file to most +effectively convey the exclusion of warranty; and each file should +have at least the "copyright" line and a pointer to where the full +notice is found. + + + <one line to give the library's name and a brief idea of what it +does.> + Copyright (C) <year> <name of author> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +Also add information on how to contact you by electronic and paper +mail. + +You should also get your employer (if you work as a programmer) or +your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James +Random Hacker. + + <signature of Ty Coon>, 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! + + diff --git a/rts/gmp/INSTALL b/rts/gmp/INSTALL new file mode 100644 index 0000000000..62faa1a2e3 --- /dev/null +++ b/rts/gmp/INSTALL @@ -0,0 +1,146 @@ + + INSTALLING GNU MP + ================= + + +These instructions are only for the impatient. Others should read the install +instructions in the manual, gmp.info. Use + + info -f ./gmp.info + +or in emacs + + C-u C-h i gmp.info + + +Here are some brief instructions on how to install GMP, and some examples to +help you get started using GMP. + +First, you need to compile, and optionally install, GMP. Since you're +impatient, try this: + + ./configure; make + +If that fails, or you care about the performance of GMP, you need to read the +full instructions in the chapter "Installing GMP", in the manual. + +Next, try some small test programs, for example the ones below. + +In GMP programs, all variables need to be initialized before they are +assigned, and cleared out before program flow leaves the scope in which they +were declared. Here is an example program that reads two numbers from the +command line, multiplies them, and prints the result to stdout. + + + #include <stdio.h> + #include <gmp.h> /* All GMP programs need to include gmp.h */ + + main (int argc, char **argv) + { + mpz_t a, b, p; + + if (argc != 3) + { printf ("Usage: %s <number> <number>\n", argv[0]); exit (1); } + + /* Initialize variables */ + mpz_init (a); + mpz_init (b); + mpz_init (p); + + /* Assign a and b from base 10 strings in argv */ + mpz_set_str (a, argv[1], 10); + mpz_set_str (b, argv[2], 10); + + /* Multiply a and b and put the result in p */ + mpz_mul (p, a, b); + + /* Print p in base 10 */ + mpz_out_str (stdout, 10, p); + fputc ('\n', stdout); + + /* Clear out variables */ + mpz_clear (a); + mpz_clear (b); + mpz_clear (p); + exit (0); + } + + +This might look tedious, with all the initializing and clearing. Fortunately +some of these operations can be combined, and other operations can often be +avoided. An experienced GMP user might write: + + + #include <stdio.h> + #include <gmp.h> + + main (int argc, char **argv) + { + mpz_t a, b, p; + + if (argc != 3) + { printf ("Usage: %s <number> <number>\n", argv[0]); exit (1); } + + /* Initialize and assign a and b from base 10 strings in argv */ + mpz_init_set_str (a, argv[1], 10); + mpz_init_set_str (b, argv[2], 10); + /* Initialize p */ + mpz_init (p); + + /* Multiply a and b and put the result in p */ + mpz_mul (p, a, b); + + /* Print p in base 10 */ + mpz_out_str (stdout, 10, p); + fputc ('\n', stdout); + + /* Since we're about to exit, no need to clear out variables */ + exit (0); + } + + +Now you have to compile your test program, and link it with the GMP library. +Assuming your working directory is still the gmp source directory, and your +source file is called example.c, enter: + + gcc -g -I. example.c .libs/libgmp.a + +After installing, the command becomes: "gcc -g example.c -lgmp". Also, GMP is +libtool based so you can use that to link if you want. + +Now try to run the example: + + ./a.out 98365871231256752134 319378318340103345227 + 31415926535897932384618573336104570964418 + +The functions used here all operate on signed integers, and have names +starting with "mpz_". There are many more such functions than used in these +examples. See the chapter "Integer Functions" in the manual, for a complete +list. + +There are two other main classes of functions in GMP. They operate on +rational numbers and floating-point numbers, respectively. The chapters +"Rational Number Functions", and "Floating-point Functions" document these +classes. + +To run a set of tests, do "make check". This will take a while. + +To create the printable documentation from the texinfo source, type "make +gmp.dvi" or "make gmp.ps". This requires various "tex" commands. + +To install the library, do "make install" (then you can use -lgmp instead of +.libs/libgmp.a). + +If you decide to use GMP, it is a good idea you at least read the chapter "GMP +Basics" in the manual. + +Some known build problems are noted in the "Installing GMP" chapter of +the manual. Please report other problems to bug-gmp@gnu.org. + + + +---------------- +Local variables: +mode: text +fill-column: 78 +End: diff --git a/rts/gmp/Makefile.am b/rts/gmp/Makefile.am new file mode 100644 index 0000000000..b73b805c6e --- /dev/null +++ b/rts/gmp/Makefile.am @@ -0,0 +1,197 @@ +## Process this file with automake to generate Makefile.in + + +# Copyright (C) 1991, 1993, 1994, 1996, 1997, 1999, 2000 Free Software +# Foundation, Inc. +# +# This file is part of the GNU MP Library. +# +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. +# +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# make check +# +# It'd be good if "make check" first did a "make all" or whatever to +# build libgmp.la, but it's not clear how best to do this. Putting a +# "check:" target is overridden by automake, and a "check-local:" runs +# too late (due to depth-first subdirectory processing). For now it's +# necessary to do "make && make check". +# +# MPF_OBJECTS etc +# +# Libtool needs all the .lo files passed to it if it's going to build +# both a static and shared library. If a convenience library like +# mpf/libmpf.la is passed then the resulting libgmp.a gets the PIC .lo +# objects rather than the non-PIC .o's. +# +# Unfortunately this leads to the big lists of objects below. Something +# like mpz/*.lo would probably work, but might risk missing something +# out or getting something extra. The source files for each .lo are +# listed in the Makefile.am's in the subdirectories. + + +# Libtool -version-info for libgmp.la and libmp.la. See (libtool)Versioning +# +# 1. No interfaces changed, only implementations (good): Increment REVISION. +# +# 2. Interfaces added, none removed (good): Increment CURRENT, increment +# AGE, set REVISION to 0. +# +# 3. Interfaces removed (BAD, breaks upward compatibility): Increment +# CURRENT, set AGE and REVISION to 0. +# +# Do this separately for libgmp and libmp, only do it just before a release. +# +# GMP -version-info +# release libgmp libmp +# 2.0.x - - +# 3.0 3:0:0 3:0:0 +# 3.0.1 3:1:0 3:0:0 +# 3.1 4:0:1 4:0:1 +# 3.1.1 4:1:1 4:1:1 +# +# +# Starting at 3:0:0 is a slight abuse of the versioning system, but it +# ensures we're past soname libgmp.so.2, which is what has been used on +# Debian GNU/Linux packages of gmp 2. Pretend gmp 2 was 2:0:0, so the +# interface changes for gmp 3 mean 3:0:0 is right. + +LIBGMP_LT_CURRENT = 4 +LIBGMP_LT_REVISION = 1 +LIBGMP_LT_AGE = 1 + +LIBMP_LT_CURRENT = 4 +LIBMP_LT_REVISION = 1 +LIBMP_LT_AGE = 1 + + +AUTOMAKE_OPTIONS = gnu check-news no-dependencies ansi2knr + +SUBDIRS = mpn mpz mpq mpf mpbsd mpfr tests demos tune + +include_HEADERS = gmp.h $(MPBSD_HEADERS_OPTION) $(MPFR_HEADERS_OPTION) +EXTRA_HEADERS = mp.h + +lib_LTLIBRARIES = libgmp.la $(MPBSD_LTLIBRARIES_OPTION) + +EXTRA_DIST = .gdbinit gmp-impl.h longlong.h stack-alloc.h urandom.h doc macos + +DISTCLEANFILES = asm-syntax.h config.m4 @gmp_srclinks@ + + +MPF_OBJECTS = mpf/init.lo mpf/init2.lo mpf/set.lo mpf/set_ui.lo mpf/set_si.lo \ + mpf/set_str.lo mpf/set_d.lo mpf/set_z.lo mpf/iset.lo mpf/iset_ui.lo \ + mpf/iset_si.lo mpf/iset_str.lo mpf/iset_d.lo mpf/clear.lo mpf/get_str.lo \ + mpf/dump.lo mpf/size.lo mpf/eq.lo mpf/reldiff.lo mpf/sqrt.lo mpf/random2.lo \ + mpf/inp_str.lo mpf/out_str.lo mpf/add.lo mpf/add_ui.lo mpf/sub.lo \ + mpf/sub_ui.lo mpf/ui_sub.lo mpf/mul.lo mpf/mul_ui.lo mpf/div.lo \ + mpf/div_ui.lo mpf/cmp.lo mpf/cmp_ui.lo mpf/cmp_si.lo mpf/mul_2exp.lo \ + mpf/div_2exp.lo mpf/abs.lo mpf/neg.lo mpf/set_q.lo mpf/get_d.lo \ + mpf/set_dfl_prec.lo mpf/set_prc.lo mpf/set_prc_raw.lo mpf/get_prc.lo \ + mpf/ui_div.lo mpf/sqrt_ui.lo mpf/floor.lo mpf/ceil.lo mpf/trunc.lo \ + mpf/pow_ui.lo mpf/urandomb.lo mpf/swap.lo +MPZ_OBJECTS = mpz/abs.lo mpz/add.lo mpz/add_ui.lo mpz/addmul_ui.lo mpz/and.lo \ + mpz/array_init.lo mpz/bin_ui.lo mpz/bin_uiui.lo mpz/cdiv_q.lo \ + mpz/cdiv_q_ui.lo mpz/cdiv_qr.lo mpz/cdiv_qr_ui.lo mpz/cdiv_r.lo \ + mpz/cdiv_r_ui.lo mpz/cdiv_ui.lo mpz/clear.lo mpz/clrbit.lo mpz/cmp.lo \ + mpz/cmp_si.lo mpz/cmp_ui.lo mpz/cmpabs.lo mpz/cmpabs_ui.lo mpz/com.lo \ + mpz/divexact.lo mpz/dump.lo mpz/fac_ui.lo mpz/fdiv_q.lo mpz/fdiv_q_2exp.lo \ + mpz/fdiv_q_ui.lo mpz/fdiv_qr.lo mpz/fdiv_qr_ui.lo mpz/fdiv_r.lo \ + mpz/fdiv_r_2exp.lo mpz/fdiv_r_ui.lo mpz/fdiv_ui.lo mpz/fib_ui.lo \ + mpz/fits_sint_p.lo mpz/fits_slong_p.lo mpz/fits_sshort_p.lo \ + mpz/fits_uint_p.lo mpz/fits_ulong_p.lo mpz/fits_ushort_p.lo mpz/gcd.lo \ + mpz/gcd_ui.lo mpz/gcdext.lo mpz/get_d.lo mpz/get_si.lo mpz/get_str.lo \ + mpz/get_ui.lo mpz/getlimbn.lo mpz/hamdist.lo mpz/init.lo mpz/inp_raw.lo \ + mpz/inp_str.lo mpz/invert.lo mpz/ior.lo mpz/iset.lo mpz/iset_d.lo \ + mpz/iset_si.lo mpz/iset_str.lo mpz/iset_ui.lo mpz/jacobi.lo \ + mpz/kronsz.lo mpz/kronuz.lo mpz/kronzs.lo mpz/kronzu.lo \ + mpz/lcm.lo mpz/legendre.lo \ + mpz/mod.lo mpz/mul.lo mpz/mul_2exp.lo mpz/mul_si.lo mpz/mul_ui.lo \ + mpz/neg.lo mpz/nextprime.lo mpz/out_raw.lo mpz/out_str.lo mpz/perfpow.lo mpz/perfsqr.lo \ + mpz/popcount.lo mpz/pow_ui.lo mpz/powm.lo mpz/powm_ui.lo mpz/pprime_p.lo \ + mpz/random.lo mpz/random2.lo mpz/realloc.lo mpz/remove.lo mpz/root.lo \ + mpz/rrandomb.lo \ + mpz/scan0.lo mpz/scan1.lo mpz/set.lo mpz/set_d.lo mpz/set_f.lo mpz/set_q.lo \ + mpz/set_si.lo mpz/set_str.lo mpz/set_ui.lo mpz/setbit.lo mpz/size.lo \ + mpz/sizeinbase.lo mpz/sqrt.lo mpz/sqrtrem.lo mpz/sub.lo mpz/sub_ui.lo \ + mpz/swap.lo mpz/tdiv_ui.lo mpz/tdiv_q.lo mpz/tdiv_q_2exp.lo mpz/tdiv_q_ui.lo \ + mpz/tdiv_qr.lo mpz/tdiv_qr_ui.lo mpz/tdiv_r.lo mpz/tdiv_r_2exp.lo \ + mpz/tdiv_r_ui.lo mpz/tstbit.lo mpz/ui_pow_ui.lo mpz/urandomb.lo \ + mpz/urandomm.lo mpz/xor.lo +MPQ_OBJECTS = mpq/add.lo mpq/canonicalize.lo mpq/clear.lo mpq/cmp.lo \ + mpq/cmp_ui.lo mpq/div.lo mpq/get_d.lo mpq/get_den.lo mpq/get_num.lo \ + mpq/init.lo mpq/inv.lo mpq/mul.lo mpq/neg.lo mpq/out_str.lo \ + mpq/set.lo mpq/set_den.lo \ + mpq/set_num.lo mpq/set_si.lo mpq/set_ui.lo mpq/sub.lo mpq/equal.lo \ + mpq/set_z.lo mpq/set_d.lo mpq/swap.lo +MPN_OBJECTS = @mpn_objs_in_libgmp@ + +MPBSD_OBJECTS = mpbsd/add.lo mpbsd/tdiv_qr.lo mpbsd/move.lo mpbsd/powm.lo \ + mpbsd/sub.lo mpbsd/cmp.lo mpbsd/mfree.lo mpbsd/mtox.lo mpbsd/realloc.lo \ + mpbsd/gcd.lo mpbsd/itom.lo mpbsd/min.lo mpbsd/mul.lo mpbsd/mout.lo \ + mpbsd/pow_ui.lo mpbsd/sdiv.lo mpbsd/sqrtrem.lo mpbsd/xtom.lo + +# FIXME: Add mpfr/rnd_mode.lo when it's clean. +MPFR_OBJECTS = mpfr/add.lo mpfr/div_2exp.lo mpfr/neg.lo mpfr/set_dfl_prec.lo \ + mpfr/set_str_raw.lo mpfr/agm.lo mpfr/get_str.lo mpfr/print_raw.lo \ + mpfr/set_dfl_rnd.lo mpfr/sqrt.lo mpfr/clear.lo mpfr/init.lo \ + mpfr/set_f.lo mpfr/sub.lo mpfr/cmp.lo mpfr/mul.lo mpfr/round.lo \ + mpfr/set_prec.lo mpfr/cmp_ui.lo mpfr/mul_2exp.lo mpfr/set.lo mpfr/set_si.lo \ + mpfr/div.lo mpfr/mul_ui.lo mpfr/set_d.lo mpfr/pow.lo mpfr/out_str.lo \ + mpfr/pi.lo mpfr/set_z.lo mpfr/add_ulp.lo mpfr/log2.lo mpfr/random.lo \ + mpfr/log.lo mpfr/exp.lo mpfr/div_ui.lo mpfr/zeta.lo mpfr/karadiv.lo \ + mpfr/karasqrt.lo mpfr/print_rnd_mode.lo + + +if WANT_MPFR +MPFR_HEADERS_OPTION = mpfr/mpfr.h +MPFR_OBJECTS_OPTION = $(MPFR_OBJECTS) +MPFR_LIBADD_OPTION = -lm +endif +libgmp_la_SOURCES = assert.c compat.c errno.c memory.c mp_set_fns.c \ + mp_clz_tab.c mp_minv_tab.c \ + rand.c randclr.c randlc.c randlc2x.c randraw.c randsd.c \ + randsdui.c version.c stack-alloc.c mp_bpl.c extract-dbl.c insert-dbl.c +libgmp_la_DEPENDENCIES = \ + $(MPF_OBJECTS) $(MPZ_OBJECTS) $(MPN_OBJECTS) $(MPQ_OBJECTS) \ + $(MPFR_OBJECTS_OPTION) +libgmp_la_LIBADD = $(libgmp_la_DEPENDENCIES) $(MPFR_LIBADD_OPTION) +libgmp_la_LDFLAGS = \ + -version-info $(LIBGMP_LT_CURRENT):$(LIBGMP_LT_REVISION):$(LIBGMP_LT_AGE) + + +if WANT_MPBSD +MPBSD_HEADERS_OPTION = mp.h +MPBSD_LTLIBRARIES_OPTION = libmp.la +endif +libmp_la_SOURCES = assert.c errno.c memory.c mp_bpl.c mp_clz_tab.c \ + mp_minv_tab.c mp_set_fns.c stack-alloc.c +libmp_la_DEPENDENCIES = $(MPBSD_OBJECTS) $(MPN_OBJECTS) \ + mpz/add.lo mpz/clear.lo mpz/cmp.lo mpz/init.lo mpz/mod.lo mpz/mul.lo \ + mpz/mul_2exp.lo mpz/realloc.lo mpz/set.lo mpz/set_ui.lo mpz/tdiv_r.lo \ + mpz/sub.lo +libmp_la_LIBADD = $(libmp_la_DEPENDENCIES) +libmp_la_LDFLAGS = \ + -version-info $(LIBMP_LT_CURRENT):$(LIBMP_LT_REVISION):$(LIBMP_LT_AGE) + + +info_TEXINFOS = gmp.texi + + +# Don't ship CVS directories or emacs backups. +dist-hook: + -find $(distdir) \( -name CVS -type d \) -o -name "*.~*" \ + | xargs rm -rf diff --git a/rts/gmp/Makefile.in b/rts/gmp/Makefile.in new file mode 100644 index 0000000000..e63383e7a7 --- /dev/null +++ b/rts/gmp/Makefile.in @@ -0,0 +1,932 @@ +# Makefile.in generated automatically by automake 1.4a from Makefile.am + +# Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +SHELL = @SHELL@ + +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ +VPATH = @srcdir@ +prefix = @prefix@ +exec_prefix = @exec_prefix@ + +bindir = @bindir@ +sbindir = @sbindir@ +libexecdir = @libexecdir@ +datadir = @datadir@ +sysconfdir = @sysconfdir@ +sharedstatedir = @sharedstatedir@ +localstatedir = @localstatedir@ +libdir = @libdir@ +infodir = @infodir@ +mandir = @mandir@ +includedir = @includedir@ +oldincludedir = /usr/include + +DESTDIR = + +pkgdatadir = $(datadir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ + +top_builddir = . + +ACLOCAL = @ACLOCAL@ +AUTOCONF = @AUTOCONF@ +AUTOMAKE = @AUTOMAKE@ +AUTOHEADER = @AUTOHEADER@ + +INSTALL = @INSTALL@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_FLAG = +transform = @program_transform_name@ + +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : + +@SET_MAKE@ +build_alias = @build_alias@ +build_triplet = @build@ +host_alias = @host_alias@ +host_triplet = @host@ +target_alias = @target_alias@ +target_triplet = @target@ +AMDEP = @AMDEP@ +AMTAR = @AMTAR@ +AR = @AR@ +AS = @AS@ +AWK = @AWK@ +CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@ +CC = @CC@ +CCAS = @CCAS@ +CPP = @CPP@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +EXEEXT = @EXEEXT@ +LIBTOOL = @LIBTOOL@ +LN_S = @LN_S@ +M4 = @M4@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +PACKAGE = @PACKAGE@ +RANLIB = @RANLIB@ +SPEED_CYCLECOUNTER_OBJS = @SPEED_CYCLECOUNTER_OBJS@ +STRIP = @STRIP@ +U = @U@ +VERSION = @VERSION@ +gmp_srclinks = @gmp_srclinks@ +install_sh = @install_sh@ +mpn_objects = @mpn_objects@ +mpn_objs_in_libgmp = @mpn_objs_in_libgmp@ + +# Copyright (C) 1991, 1993, 1994, 1996, 1997, 1999, 2000 Free Software +# Foundation, Inc. +# +# This file is part of the GNU MP Library. +# +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. +# +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + +# make check +# +# It'd be good if "make check" first did a "make all" or whatever to +# build libgmp.la, but it's not clear how best to do this. Putting a +# "check:" target is overridden by automake, and a "check-local:" runs +# too late (due to depth-first subdirectory processing). For now it's +# necessary to do "make && make check". +# +# MPF_OBJECTS etc +# +# Libtool needs all the .lo files passed to it if it's going to build +# both a static and shared library. If a convenience library like +# mpf/libmpf.la is passed then the resulting libgmp.a gets the PIC .lo +# objects rather than the non-PIC .o's. +# +# Unfortunately this leads to the big lists of objects below. Something +# like mpz/*.lo would probably work, but might risk missing something +# out or getting something extra. The source files for each .lo are +# listed in the Makefile.am's in the subdirectories. + +# Libtool -version-info for libgmp.la and libmp.la. See (libtool)Versioning +# +# 1. No interfaces changed, only implementations (good): Increment REVISION. +# +# 2. Interfaces added, none removed (good): Increment CURRENT, increment +# AGE, set REVISION to 0. +# +# 3. Interfaces removed (BAD, breaks upward compatibility): Increment +# CURRENT, set AGE and REVISION to 0. +# +# Do this separately for libgmp and libmp, only do it just before a release. +# +# GMP -version-info +# release libgmp libmp +# 2.0.x - - +# 3.0 3:0:0 3:0:0 +# 3.0.1 3:1:0 3:0:0 +# 3.1 4:0:1 4:0:1 +# 3.1.1 4:1:1 4:1:1 +# +# +# Starting at 3:0:0 is a slight abuse of the versioning system, but it +# ensures we're past soname libgmp.so.2, which is what has been used on +# Debian GNU/Linux packages of gmp 2. Pretend gmp 2 was 2:0:0, so the +# interface changes for gmp 3 mean 3:0:0 is right. + + +LIBGMP_LT_CURRENT = 4 +LIBGMP_LT_REVISION = 1 +LIBGMP_LT_AGE = 1 + +LIBMP_LT_CURRENT = 4 +LIBMP_LT_REVISION = 1 +LIBMP_LT_AGE = 1 + +AUTOMAKE_OPTIONS = gnu check-news no-dependencies ansi2knr + +SUBDIRS = mpn mpz + +include_HEADERS = gmp.h $(MPBSD_HEADERS_OPTION) $(MPFR_HEADERS_OPTION) +EXTRA_HEADERS = mp.h + +lib_LTLIBRARIES = libgmp.la $(MPBSD_LTLIBRARIES_OPTION) + +EXTRA_DIST = .gdbinit gmp-impl.h longlong.h stack-alloc.h urandom.h doc macos + +DISTCLEANFILES = asm-syntax.h config.m4 @gmp_srclinks@ + +MPZ_OBJECTS = mpz/abs.lo mpz/add.lo mpz/add_ui.lo mpz/addmul_ui.lo mpz/and.lo \ + mpz/array_init.lo mpz/bin_ui.lo mpz/bin_uiui.lo mpz/cdiv_q.lo \ + mpz/cdiv_q_ui.lo mpz/cdiv_qr.lo mpz/cdiv_qr_ui.lo mpz/cdiv_r.lo \ + mpz/cdiv_r_ui.lo mpz/cdiv_ui.lo mpz/clear.lo mpz/clrbit.lo mpz/cmp.lo \ + mpz/cmp_si.lo mpz/cmp_ui.lo mpz/cmpabs.lo mpz/cmpabs_ui.lo mpz/com.lo \ + mpz/divexact.lo mpz/dump.lo mpz/fac_ui.lo mpz/fdiv_q.lo mpz/fdiv_q_2exp.lo \ + mpz/fdiv_q_ui.lo mpz/fdiv_qr.lo mpz/fdiv_qr_ui.lo mpz/fdiv_r.lo \ + mpz/fdiv_r_2exp.lo mpz/fdiv_r_ui.lo mpz/fdiv_ui.lo mpz/fib_ui.lo \ + mpz/fits_sint_p.lo mpz/fits_slong_p.lo mpz/fits_sshort_p.lo \ + mpz/fits_uint_p.lo mpz/fits_ulong_p.lo mpz/fits_ushort_p.lo mpz/gcd.lo \ + mpz/gcd_ui.lo mpz/gcdext.lo mpz/get_d.lo mpz/get_si.lo mpz/get_str.lo \ + mpz/get_ui.lo mpz/getlimbn.lo mpz/hamdist.lo mpz/init.lo mpz/inp_raw.lo \ + mpz/inp_str.lo mpz/invert.lo mpz/ior.lo mpz/iset.lo mpz/iset_d.lo \ + mpz/iset_si.lo mpz/iset_str.lo mpz/iset_ui.lo mpz/jacobi.lo \ + mpz/kronsz.lo mpz/kronuz.lo mpz/kronzs.lo mpz/kronzu.lo \ + mpz/lcm.lo mpz/legendre.lo \ + mpz/mod.lo mpz/mul.lo mpz/mul_2exp.lo mpz/mul_si.lo mpz/mul_ui.lo \ + mpz/neg.lo mpz/nextprime.lo mpz/out_raw.lo mpz/out_str.lo mpz/perfpow.lo mpz/perfsqr.lo \ + mpz/popcount.lo mpz/pow_ui.lo mpz/powm.lo mpz/powm_ui.lo mpz/pprime_p.lo \ + mpz/random.lo mpz/random2.lo mpz/realloc.lo mpz/remove.lo mpz/root.lo \ + mpz/rrandomb.lo \ + mpz/scan0.lo mpz/scan1.lo mpz/set.lo mpz/set_d.lo mpz/set_f.lo mpz/set_q.lo \ + mpz/set_si.lo mpz/set_str.lo mpz/set_ui.lo mpz/setbit.lo mpz/size.lo \ + mpz/sizeinbase.lo mpz/sqrt.lo mpz/sqrtrem.lo mpz/sub.lo mpz/sub_ui.lo \ + mpz/swap.lo mpz/tdiv_ui.lo mpz/tdiv_q.lo mpz/tdiv_q_2exp.lo mpz/tdiv_q_ui.lo \ + mpz/tdiv_qr.lo mpz/tdiv_qr_ui.lo mpz/tdiv_r.lo mpz/tdiv_r_2exp.lo \ + mpz/tdiv_r_ui.lo mpz/tstbit.lo mpz/ui_pow_ui.lo mpz/urandomb.lo \ + mpz/urandomm.lo mpz/xor.lo + +MPN_OBJECTS = @mpn_objs_in_libgmp@ + +MPBSD_OBJECTS = mpbsd/add.lo mpbsd/tdiv_qr.lo mpbsd/move.lo mpbsd/powm.lo \ + mpbsd/sub.lo mpbsd/cmp.lo mpbsd/mfree.lo mpbsd/mtox.lo mpbsd/realloc.lo \ + mpbsd/gcd.lo mpbsd/itom.lo mpbsd/min.lo mpbsd/mul.lo mpbsd/mout.lo \ + mpbsd/pow_ui.lo mpbsd/sdiv.lo mpbsd/sqrtrem.lo mpbsd/xtom.lo + + + +@WANT_MPFR_TRUE@MPFR_HEADERS_OPTION = @WANT_MPFR_TRUE@mpfr/mpfr.h +@WANT_MPFR_TRUE@MPFR_OBJECTS_OPTION = @WANT_MPFR_TRUE@$(MPFR_OBJECTS) +@WANT_MPFR_TRUE@MPFR_LIBADD_OPTION = @WANT_MPFR_TRUE@-lm +libgmp_la_SOURCES = assert.c compat.c errno.c memory.c mp_set_fns.c \ + mp_clz_tab.c mp_minv_tab.c \ + version.c stack-alloc.c mp_bpl.c extract-dbl.c insert-dbl.c + +libgmp_la_DEPENDENCIES = \ + $(MPF_OBJECTS) $(MPZ_OBJECTS) $(MPN_OBJECTS) $(MPQ_OBJECTS) \ + $(MPFR_OBJECTS_OPTION) + +libgmp_la_LIBADD = $(libgmp_la_DEPENDENCIES) $(MPFR_LIBADD_OPTION) +libgmp_la_LDFLAGS = \ + -version-info $(LIBGMP_LT_CURRENT):$(LIBGMP_LT_REVISION):$(LIBGMP_LT_AGE) + + +@WANT_MPBSD_TRUE@MPBSD_HEADERS_OPTION = @WANT_MPBSD_TRUE@mp.h +@WANT_MPBSD_TRUE@MPBSD_LTLIBRARIES_OPTION = @WANT_MPBSD_TRUE@libmp.la +libmp_la_SOURCES = assert.c errno.c memory.c mp_bpl.c mp_clz_tab.c \ + mp_minv_tab.c mp_set_fns.c stack-alloc.c + +libmp_la_DEPENDENCIES = $(MPBSD_OBJECTS) $(MPN_OBJECTS) \ + mpz/add.lo mpz/clear.lo mpz/cmp.lo mpz/init.lo mpz/mod.lo mpz/mul.lo \ + mpz/mul_2exp.lo mpz/realloc.lo mpz/set.lo mpz/set_ui.lo mpz/tdiv_r.lo \ + mpz/sub.lo + +libmp_la_LIBADD = $(libmp_la_DEPENDENCIES) +libmp_la_LDFLAGS = \ + -version-info $(LIBMP_LT_CURRENT):$(LIBMP_LT_REVISION):$(LIBMP_LT_AGE) + + +info_TEXINFOS = gmp.texi +subdir = . +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs +CONFIG_HEADER = config.h +CONFIG_CLEAN_FILES = +LTLIBRARIES = $(lib_LTLIBRARIES) + + +DEFS = @DEFS@ -I. -I$(srcdir) -I. +CPPFLAGS = @CPPFLAGS@ +LDFLAGS = @LDFLAGS@ +LIBS = @LIBS@ +ANSI2KNR = @ANSI2KNR@ +am_libgmp_la_OBJECTS = assert$U.lo compat$U.lo errno$U.lo memory$U.lo \ +mp_set_fns$U.lo mp_clz_tab$U.lo mp_minv_tab$U.lo rand$U.lo randclr$U.lo \ +randlc$U.lo randlc2x$U.lo randraw$U.lo randsd$U.lo randsdui$U.lo \ +version$U.lo stack-alloc$U.lo mp_bpl$U.lo extract-dbl$U.lo \ +insert-dbl$U.lo +libgmp_la_OBJECTS = $(am_libgmp_la_OBJECTS) +am_libmp_la_OBJECTS = assert$U.lo errno$U.lo memory$U.lo mp_bpl$U.lo \ +mp_clz_tab$U.lo mp_minv_tab$U.lo mp_set_fns$U.lo stack-alloc$U.lo +libmp_la_OBJECTS = $(am_libmp_la_OBJECTS) +COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CFLAGS = @CFLAGS@ +CCLD = $(CC) +LINK = $(LIBTOOL) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +DIST_SOURCES = $(libgmp_la_SOURCES) $(libmp_la_SOURCES) +TEXI2DVI = texi2dvi +# INFO_DEPS = gmp.info +DVIS = gmp.dvi +TEXINFOS = gmp.texi +HEADERS = $(include_HEADERS) + +DIST_COMMON = README $(EXTRA_HEADERS) $(include_HEADERS) ./stamp-h.in \ +AUTHORS COPYING COPYING.LIB ChangeLog INSTALL Makefile.am Makefile.in \ +NEWS acconfig.h acinclude.m4 aclocal.m4 ansi2knr.1 ansi2knr.c \ +config.guess config.in config.sub configure configure.in depcomp \ +install-sh ltconfig ltmain.sh mdate-sh missing mkinstalldirs stamp-vti \ +texinfo.tex version.texi + + +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) + +GZIP_ENV = --best +depcomp = +SOURCES = $(libgmp_la_SOURCES) $(libmp_la_SOURCES) +OBJECTS = $(am_libgmp_la_OBJECTS) $(am_libmp_la_OBJECTS) + +all: all-redirect +.SUFFIXES: +.SUFFIXES: .c .dvi .info .lo .o .obj .ps .texi .texinfo .txi +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) + cd $(top_srcdir) && $(AUTOMAKE) --gnu Makefile + +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + cd $(top_builddir) \ + && CONFIG_FILES=$@ CONFIG_HEADERS= $(SHELL) ./config.status + +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ configure.in acinclude.m4 + cd $(srcdir) && $(ACLOCAL) + +config.status: $(srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + $(SHELL) ./config.status --recheck +$(srcdir)/configure: @MAINTAINER_MODE_TRUE@$(srcdir)/configure.in $(ACLOCAL_M4) $(CONFIGURE_DEPENDENCIES) + cd $(srcdir) && $(AUTOCONF) + +config.h: stamp-h + @if test ! -f $@; then \ + rm -f stamp-h; \ + $(MAKE) stamp-h; \ + else :; fi +stamp-h: $(srcdir)/config.in $(top_builddir)/config.status + @rm -f stamp-h stamp-hT + @echo timestamp > stamp-hT 2> /dev/null + cd $(top_builddir) \ + && CONFIG_FILES= CONFIG_HEADERS=config.h:config.in \ + $(SHELL) ./config.status + @mv stamp-hT stamp-h +$(srcdir)/config.in: @MAINTAINER_MODE_TRUE@$(srcdir)/./stamp-h.in + @if test ! -f $@; then \ + rm -f $(srcdir)/./stamp-h.in; \ + $(MAKE) $(srcdir)/./stamp-h.in; \ + else :; fi +$(srcdir)/./stamp-h.in: $(top_srcdir)/configure.in $(ACLOCAL_M4) acconfig.h + @rm -f $(srcdir)/./stamp-h.in $(srcdir)/./stamp-h.inT + @echo timestamp > $(srcdir)/./stamp-h.inT 2> /dev/null + cd $(top_srcdir) && $(AUTOHEADER) + @mv $(srcdir)/./stamp-h.inT $(srcdir)/./stamp-h.in + +mostlyclean-hdr: + +clean-hdr: + +distclean-hdr: + -rm -f config.h + +maintainer-clean-hdr: + +mostlyclean-libLTLIBRARIES: + +clean-libLTLIBRARIES: + -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) + +distclean-libLTLIBRARIES: + +maintainer-clean-libLTLIBRARIES: + +install-libLTLIBRARIES: $(lib_LTLIBRARIES) + @$(NORMAL_INSTALL) + $(mkinstalldirs) $(DESTDIR)$(libdir) + @list='$(lib_LTLIBRARIES)'; for p in $$list; do \ + if test -f $$p; then \ + echo " $(LIBTOOL) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$p $(DESTDIR)$(libdir)/$$p"; \ + $(LIBTOOL) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$p $(DESTDIR)$(libdir)/$$p; \ + else :; fi; \ + done + +uninstall-libLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LTLIBRARIES)'; for p in $$list; do \ + echo " $(LIBTOOL) --mode=uninstall rm -f $(DESTDIR)$(libdir)/$$p"; \ + $(LIBTOOL) --mode=uninstall rm -f $(DESTDIR)$(libdir)/$$p; \ + done + +mostlyclean-compile: + -rm -f *.o core *.core + -rm -f *.$(OBJEXT) + +clean-compile: + +distclean-compile: + -rm -f *.tab.c + +maintainer-clean-compile: + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +distclean-libtool: + +maintainer-clean-libtool: + +mostlyclean-krextra: + +clean-krextra: + -rm -f ansi2knr + +distclean-krextra: + +maintainer-clean-krextra: +ansi2knr: ansi2knr.$(OBJEXT) + $(LINK) ansi2knr.$(OBJEXT) $(LIBS) +ansi2knr.$(OBJEXT): $(CONFIG_HEADER) + + +mostlyclean-kr: + -rm -f *_.c + +clean-kr: + +distclean-kr: + +maintainer-clean-kr: + +gmp.dll: libgmp.a + dllwrap -mno-cygwin --target=i386-unknown-mingw32 \ + --export-all --dllname gmp.dll --output-lib=libgmp_imp.a \ + -o gmp.dll libgmp.a + +libgmp.la: $(libgmp_la_OBJECTS) $(libgmp_la_DEPENDENCIES) + $(LINK) -rpath $(libdir) $(libgmp_la_LDFLAGS) $(libgmp_la_OBJECTS) $(libgmp_la_LIBADD) $(LIBS) + +libmp.la: $(libmp_la_OBJECTS) $(libmp_la_DEPENDENCIES) + $(LINK) -rpath $(libdir) $(libmp_la_LDFLAGS) $(libmp_la_OBJECTS) $(libmp_la_LIBADD) $(LIBS) +.c.o: + $(COMPILE) -c $< +.c.obj: + $(COMPILE) -c `cygpath -w $<` +.c.lo: + $(LTCOMPILE) -c -o $@ $< +assert_.c: assert.c $(ANSI2KNR) + $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/assert.c; then echo $(srcdir)/assert.c; else echo assert.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > assert_.c +compat_.c: compat.c $(ANSI2KNR) + $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/compat.c; then echo $(srcdir)/compat.c; else echo compat.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > compat_.c +errno_.c: errno.c $(ANSI2KNR) + $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/errno.c; then echo $(srcdir)/errno.c; else echo errno.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > errno_.c +extract-dbl_.c: extract-dbl.c $(ANSI2KNR) + $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/extract-dbl.c; then echo $(srcdir)/extract-dbl.c; else echo extract-dbl.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > extract-dbl_.c +insert-dbl_.c: insert-dbl.c $(ANSI2KNR) + $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/insert-dbl.c; then echo $(srcdir)/insert-dbl.c; else echo insert-dbl.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > insert-dbl_.c +memory_.c: memory.c $(ANSI2KNR) + $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/memory.c; then echo $(srcdir)/memory.c; else echo memory.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > memory_.c +mp_bpl_.c: mp_bpl.c $(ANSI2KNR) + $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mp_bpl.c; then echo $(srcdir)/mp_bpl.c; else echo mp_bpl.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > mp_bpl_.c +mp_clz_tab_.c: mp_clz_tab.c $(ANSI2KNR) + $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mp_clz_tab.c; then echo $(srcdir)/mp_clz_tab.c; else echo mp_clz_tab.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > mp_clz_tab_.c +mp_minv_tab_.c: mp_minv_tab.c $(ANSI2KNR) + $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mp_minv_tab.c; then echo $(srcdir)/mp_minv_tab.c; else echo mp_minv_tab.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > mp_minv_tab_.c +mp_set_fns_.c: mp_set_fns.c $(ANSI2KNR) + $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mp_set_fns.c; then echo $(srcdir)/mp_set_fns.c; else echo mp_set_fns.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > mp_set_fns_.c +rand_.c: rand.c $(ANSI2KNR) + $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/rand.c; then echo $(srcdir)/rand.c; else echo rand.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > rand_.c +randclr_.c: randclr.c $(ANSI2KNR) + $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randclr.c; then echo $(srcdir)/randclr.c; else echo randclr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > randclr_.c +randlc_.c: randlc.c $(ANSI2KNR) + $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randlc.c; then echo $(srcdir)/randlc.c; else echo randlc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > randlc_.c +randlc2x_.c: randlc2x.c $(ANSI2KNR) + $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randlc2x.c; then echo $(srcdir)/randlc2x.c; else echo randlc2x.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > randlc2x_.c +randraw_.c: randraw.c $(ANSI2KNR) + $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randraw.c; then echo $(srcdir)/randraw.c; else echo randraw.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > randraw_.c +randsd_.c: randsd.c $(ANSI2KNR) + $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randsd.c; then echo $(srcdir)/randsd.c; else echo randsd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > randsd_.c +randsdui_.c: randsdui.c $(ANSI2KNR) + $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randsdui.c; then echo $(srcdir)/randsdui.c; else echo randsdui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > randsdui_.c +stack-alloc_.c: stack-alloc.c $(ANSI2KNR) + $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/stack-alloc.c; then echo $(srcdir)/stack-alloc.c; else echo stack-alloc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > stack-alloc_.c +version_.c: version.c $(ANSI2KNR) + $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/version.c; then echo $(srcdir)/version.c; else echo version.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > version_.c +assert_.$(OBJEXT) assert_.lo compat_.$(OBJEXT) compat_.lo \ +errno_.$(OBJEXT) errno_.lo extract-dbl_.$(OBJEXT) extract-dbl_.lo \ +insert-dbl_.$(OBJEXT) insert-dbl_.lo memory_.$(OBJEXT) memory_.lo \ +mp_bpl_.$(OBJEXT) mp_bpl_.lo mp_clz_tab_.$(OBJEXT) mp_clz_tab_.lo \ +mp_minv_tab_.$(OBJEXT) mp_minv_tab_.lo mp_set_fns_.$(OBJEXT) \ +mp_set_fns_.lo rand_.$(OBJEXT) rand_.lo randclr_.$(OBJEXT) randclr_.lo \ +randlc_.$(OBJEXT) randlc_.lo randlc2x_.$(OBJEXT) randlc2x_.lo \ +randraw_.$(OBJEXT) randraw_.lo randsd_.$(OBJEXT) randsd_.lo \ +randsdui_.$(OBJEXT) randsdui_.lo stack-alloc_.$(OBJEXT) stack-alloc_.lo \ +version_.$(OBJEXT) version_.lo : $(ANSI2KNR) + +$(srcdir)/version.texi: @MAINTAINER_MODE_TRUE@stamp-vti + @: + +$(srcdir)/stamp-vti: gmp.texi $(top_srcdir)/configure.in + @echo "@set UPDATED `$(SHELL) $(srcdir)/mdate-sh $(srcdir)/gmp.texi`" > vti.tmp + @echo "@set EDITION $(VERSION)" >> vti.tmp + @echo "@set VERSION $(VERSION)" >> vti.tmp + @cmp -s vti.tmp $(srcdir)/version.texi \ + || (echo "Updating $(srcdir)/version.texi"; \ + cp vti.tmp $(srcdir)/version.texi) + -@rm -f vti.tmp + @cp $(srcdir)/version.texi $@ + +mostlyclean-vti: + -rm -f vti.tmp + +clean-vti: + +distclean-vti: + +maintainer-clean-vti: + -@MAINTAINER_MODE_TRUE@rm -f $(srcdir)/stamp-vti $(srcdir)/version.texi + +# gmp.info: gmp.texi version.texi +# gmp.dvi: gmp.texi version.texi + + +DVIPS = dvips + +.texi.info: + @cd $(srcdir) && rm -f $@ $@-[0-9] $@-[0-9][0-9] + cd $(srcdir) \ + && $(MAKEINFO) `echo $< | sed 's,.*/,,'` + +.texi.dvi: + TEXINPUTS=$(srcdir):$$TEXINPUTS \ + MAKEINFO='$(MAKEINFO) -I $(srcdir)' $(TEXI2DVI) $< + +.texi: + @cd $(srcdir) && rm -f $@ $@-[0-9] $@-[0-9][0-9] + cd $(srcdir) \ + && $(MAKEINFO) `echo $< | sed 's,.*/,,'` + +.texinfo.info: + @cd $(srcdir) && rm -f $@ $@-[0-9] $@-[0-9][0-9] + cd $(srcdir) \ + && $(MAKEINFO) `echo $< | sed 's,.*/,,'` + +.texinfo: + @cd $(srcdir) && rm -f $@ $@-[0-9] $@-[0-9][0-9] + cd $(srcdir) \ + && $(MAKEINFO) `echo $< | sed 's,.*/,,'` + +.texinfo.dvi: + TEXINPUTS=$(srcdir):$$TEXINPUTS \ + MAKEINFO='$(MAKEINFO) -I $(srcdir)' $(TEXI2DVI) $< + +.txi.info: + @cd $(srcdir) && rm -f $@ $@-[0-9] $@-[0-9][0-9] + cd $(srcdir) \ + && $(MAKEINFO) `echo $< | sed 's,.*/,,'` + +.txi.dvi: + TEXINPUTS=$(srcdir):$$TEXINPUTS \ + MAKEINFO='$(MAKEINFO) -I $(srcdir)' $(TEXI2DVI) $< + +.txi: + @cd $(srcdir) && rm -f $@ $@-[0-9] $@-[0-9][0-9] + cd $(srcdir) \ + && $(MAKEINFO) `echo $< | sed 's,.*/,,'` +.dvi.ps: + $(DVIPS) $< -o $@ + +install-info-am: $(INFO_DEPS) + @$(NORMAL_INSTALL) + $(mkinstalldirs) $(DESTDIR)$(infodir) + @list='$(INFO_DEPS)'; \ + for file in $$list; do \ + d=$(srcdir); \ + for ifile in `CDPATH=: && cd $$d && echo $$file $$file-[0-9] $$file-[0-9][0-9]`; do \ + if test -f $$d/$$ifile; then \ + echo " $(INSTALL_DATA) $$d/$$ifile $(DESTDIR)$(infodir)/$$ifile"; \ + $(INSTALL_DATA) $$d/$$ifile $(DESTDIR)$(infodir)/$$ifile; \ + else : ; fi; \ + done; \ + done + @$(POST_INSTALL) + @if $(SHELL) -c 'install-info --version | sed 1q | fgrep -s -v -i debian' >/dev/null 2>&1; then \ + list='$(INFO_DEPS)'; \ + for file in $$list; do \ + echo " install-info --info-dir=$(DESTDIR)$(infodir) $(DESTDIR)$(infodir)/$$file";\ + install-info --info-dir=$(DESTDIR)$(infodir) $(DESTDIR)$(infodir)/$$file || :;\ + done; \ + else : ; fi + +uninstall-info: + $(PRE_UNINSTALL) + @if $(SHELL) -c 'install-info --version | sed 1q | fgrep -s -v -i debian' >/dev/null 2>&1; then \ + list='$(INFO_DEPS)'; \ + for file in $$list; do \ + echo " install-info --info-dir=$(DESTDIR)$(infodir) --remove $(DESTDIR)$(infodir)/$$file"; \ + install-info --info-dir=$(DESTDIR)$(infodir) --remove $(DESTDIR)$(infodir)/$$file; \ + done; \ + else :; fi + @$(NORMAL_UNINSTALL) + @list='$(INFO_DEPS)'; \ + for file in $$list; do \ + (if cd $(DESTDIR)$(infodir); then \ + echo " rm -f $$file $$file-[0-9] $$file-[0-9][0-9])"; \ + rm -f $$file $$file-[0-9] $$file-[0-9][0-9]; \ + else :; fi); \ + done + +dist-info: $(INFO_DEPS) + list='$(INFO_DEPS)'; \ + for base in $$list; do \ + d=$(srcdir); \ + for file in `CDPATH=: && cd $$d && eval echo $$base*`; do \ + test -f $(distdir)/$$file \ + || cp -p $$d/$$file $(distdir)/$$file; \ + done; \ + done + +mostlyclean-aminfo: + -rm -f gmp.aux gmp.cp gmp.cps gmp.dvi gmp.fn gmp.fns gmp.pgs gmp.ky \ + gmp.kys gmp.ps gmp.log gmp.pg gmp.toc gmp.tp gmp.tps gmp.vr \ + gmp.vrs gmp.op gmp.tr gmp.cv gmp.cn gmp.cm gmp.ov + +clean-aminfo: + +distclean-aminfo: + +maintainer-clean-aminfo: + cd $(srcdir) && for i in $(INFO_DEPS); do \ + rm -f $$i; \ + if test "`echo $$i-[0-9]*`" != "$$i-[0-9]*"; then \ + rm -f $$i-[0-9]*; \ + fi; \ + done + +install-includeHEADERS: $(include_HEADERS) + @$(NORMAL_INSTALL) + $(mkinstalldirs) $(DESTDIR)$(includedir) + @list='$(include_HEADERS)'; for p in $$list; do \ + if test -f "$$p"; then d= ; else d="$(srcdir)/"; fi; \ + f="`echo $$p | sed -e 's|^.*/||'`"; \ + echo " $(INSTALL_DATA) $$d$$p $(DESTDIR)$(includedir)/$$f"; \ + $(INSTALL_DATA) $$d$$p $(DESTDIR)$(includedir)/$$f; \ + done + +uninstall-includeHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(include_HEADERS)'; for p in $$list; do \ + f="`echo $$p | sed -e 's|^.*/||'`"; \ + echo " rm -f $(DESTDIR)$(includedir)/$$f"; \ + rm -f $(DESTDIR)$(includedir)/$$f; \ + done + +# This directory's subdirectories are mostly independent; you can cd +# into them and run `make' without going through this Makefile. +# To change the values of `make' variables: instead of editing Makefiles, +# (1) if the variable is set in `config.status', edit `config.status' +# (which will cause the Makefiles to be regenerated when you run `make'); +# (2) otherwise, pass the desired values on the `make' command line. + +all-recursive install-data-recursive install-exec-recursive \ +installdirs-recursive install-recursive uninstall-recursive \ +check-recursive installcheck-recursive info-recursive dvi-recursive: + @set fnord $(MAKEFLAGS); amf=$$2; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || case "$$amf" in *=*) exit 1;; *k*) fail=yes;; *) exit 1;; esac; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +mostlyclean-recursive clean-recursive distclean-recursive \ +maintainer-clean-recursive: + @set fnord $(MAKEFLAGS); amf=$$2; \ + dot_seen=no; \ + rev=''; list='$(SUBDIRS)'; for subdir in $$list; do \ + rev="$$subdir $$rev"; \ + if test "$$subdir" = "."; then dot_seen=yes; else :; fi; \ + done; \ + test "$$dot_seen" = "no" && rev=". $$rev"; \ + target=`echo $@ | sed s/-recursive//`; \ + for subdir in $$rev; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || case "$$amf" in *=*) exit 1;; *k*) fail=yes;; *) exit 1;; esac; \ + done && test -z "$$fail" +tags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \ + done + +tags: TAGS + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + mkid -f$$here/ID $$unique $(LISP) + +TAGS: tags-recursive $(HEADERS) $(SOURCES) config.in $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test -f $$subdir/TAGS && tags="$$tags -i $$here/$$subdir/TAGS"; \ + fi; \ + done; \ + list='$(SOURCES) $(HEADERS) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + test -z "$(ETAGS_ARGS)config.in$$unique$(LISP)$$tags" \ + || etags $(ETAGS_ARGS) $$tags config.in $$unique $(LISP) + +mostlyclean-tags: + +clean-tags: + +distclean-tags: + -rm -f TAGS ID + +maintainer-clean-tags: + +distdir = $(PACKAGE)-$(VERSION) +top_distdir = $(distdir) + + +# This target untars the dist file and tries a VPATH configuration. Then +# it guarantees that the distribution is self-contained by making another +# tarfile. +distcheck: dist + -chmod -R a+w $(distdir) > /dev/null 2>&1; rm -rf $(distdir) + GZIP=$(GZIP_ENV) gunzip -c $(distdir).tar.gz | $(AMTAR) xf - + chmod -R a-w $(distdir); chmod a+w $(distdir) + mkdir $(distdir)/=build + mkdir $(distdir)/=inst + chmod a-w $(distdir) + dc_install_base=`CDPATH=: && cd $(distdir)/=inst && pwd` \ + && cd $(distdir)/=build \ + && ../configure --srcdir=.. --prefix=$$dc_install_base \ + && $(MAKE) $(AM_MAKEFLAGS) \ + && $(MAKE) $(AM_MAKEFLAGS) dvi \ + && $(MAKE) $(AM_MAKEFLAGS) check \ + && $(MAKE) $(AM_MAKEFLAGS) install \ + && $(MAKE) $(AM_MAKEFLAGS) installcheck \ + && $(MAKE) $(AM_MAKEFLAGS) uninstall \ + && test `find $$dc_install_base -type f -print | wc -l` -le 1 \ + && $(MAKE) $(AM_MAKEFLAGS) dist \ + && $(MAKE) $(AM_MAKEFLAGS) distclean \ + && rm -f $(distdir).tar.gz \ + && test `find . -type f -print | wc -l` -eq 0 + -chmod -R a+w $(distdir) > /dev/null 2>&1; rm -rf $(distdir) + @banner="$(distdir).tar.gz is ready for distribution"; \ + dashes=`echo "$$banner" | sed s/./=/g`; \ + echo "$$dashes"; \ + echo "$$banner"; \ + echo "$$dashes" +dist: distdir + -find $(distdir) -type d ! -perm -777 -exec chmod a+rwx {} \; -o \ + ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \ + ! -type d ! -perm -400 -exec chmod a+r {} \; -o \ + ! -type d ! -perm -444 -exec $(SHELL) $(install_sh) -c -m a+r {} {} \; \ + || chmod -R a+r $(distdir) + $(AMTAR) chof - $(distdir) | GZIP=$(GZIP_ENV) gzip -c > $(distdir).tar.gz + -chmod -R a+w $(distdir) > /dev/null 2>&1; rm -rf $(distdir) +dist-all: distdir + -find $(distdir) -type d ! -perm -777 -exec chmod a+rwx {} \; -o \ + ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \ + ! -type d ! -perm -400 -exec chmod a+r {} \; -o \ + ! -type d ! -perm -444 -exec $(SHELL) $(install_sh) -c -m a+r {} {} \; \ + || chmod -R a+r $(distdir) + $(AMTAR) chof - $(distdir) | GZIP=$(GZIP_ENV) gzip -c > $(distdir).tar.gz + -chmod -R a+w $(distdir) > /dev/null 2>&1; rm -rf $(distdir) +distdir: $(DISTFILES) + @if sed 15q $(srcdir)/NEWS | fgrep -e "$(VERSION)" > /dev/null; then :; else \ + echo "NEWS not updated; not releasing" 1>&2; \ + exit 1; \ + fi + -chmod -R a+w $(distdir) > /dev/null 2>&1; rm -rf $(distdir) + mkdir $(distdir) + $(mkinstalldirs) $(distdir)/mpfr + @for file in $(DISTFILES); do \ + d=$(srcdir); \ + if test -d $$d/$$file; then \ + cp -pR $$d/$$file $(distdir); \ + else \ + test -f $(distdir)/$$file \ + || cp -p $$d/$$file $(distdir)/$$file || :; \ + fi; \ + done + for subdir in $(SUBDIRS); do \ + if test "$$subdir" = .; then :; else \ + test -d $(distdir)/$$subdir \ + || mkdir $(distdir)/$$subdir \ + || exit 1; \ + (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir=../$(distdir) distdir=../$(distdir)/$$subdir distdir) \ + || exit 1; \ + fi; \ + done + $(MAKE) $(AM_MAKEFLAGS) top_distdir="$(top_distdir)" distdir="$(distdir)" dist-info + $(MAKE) $(AM_MAKEFLAGS) top_distdir="$(top_distdir)" distdir="$(distdir)" dist-hook +info-am: $(INFO_DEPS) +info: info-recursive +dvi-am: $(DVIS) +dvi: dvi-recursive +check-am: all-am +check: check-recursive +installcheck-am: +installcheck: installcheck-recursive +all-recursive-am: config.h + $(MAKE) $(AM_MAKEFLAGS) all-recursive + +install-exec-am: install-libLTLIBRARIES +install-exec: install-exec-recursive + +install-data-am: install-info-am install-includeHEADERS +install-data: install-data-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am +install: install-recursive +uninstall-am: uninstall-libLTLIBRARIES uninstall-info \ + uninstall-includeHEADERS +uninstall: uninstall-recursive +all-am: Makefile $(INFO_DEPS) $(ANSI2KNR) $(LTLIBRARIES) $(HEADERS) \ + config.h +all-redirect: all-recursive-am +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_STRIP_FLAG=-s install +installdirs: installdirs-recursive +installdirs-am: + $(mkinstalldirs) $(DESTDIR)$(libdir) $(DESTDIR)$(infodir) \ + $(DESTDIR)$(includedir) + + +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -rm -f Makefile $(CONFIG_CLEAN_FILES) + -rm -f config.cache config.log stamp-h stamp-h[0-9]* + -test -z "$(DISTCLEANFILES)" || rm -f $(DISTCLEANFILES) + +maintainer-clean-generic: + -rm -f Makefile.in +mostlyclean-am: mostlyclean-hdr mostlyclean-libLTLIBRARIES \ + mostlyclean-compile mostlyclean-libtool \ + mostlyclean-krextra mostlyclean-kr mostlyclean-vti \ + mostlyclean-aminfo mostlyclean-tags mostlyclean-generic + +mostlyclean: mostlyclean-recursive + +clean-am: clean-hdr clean-libLTLIBRARIES clean-compile clean-libtool \ + clean-krextra clean-kr clean-vti clean-aminfo \ + clean-tags clean-generic mostlyclean-am + +clean: clean-recursive + +distclean-am: distclean-hdr distclean-libLTLIBRARIES distclean-compile \ + distclean-libtool distclean-krextra distclean-kr \ + distclean-vti distclean-aminfo distclean-tags \ + distclean-generic clean-am + -rm -f libtool + +distclean: distclean-recursive + -rm -f config.status + +maintainer-clean-am: maintainer-clean-hdr \ + maintainer-clean-libLTLIBRARIES \ + maintainer-clean-compile maintainer-clean-libtool \ + maintainer-clean-krextra maintainer-clean-kr \ + maintainer-clean-vti maintainer-clean-aminfo \ + maintainer-clean-tags maintainer-clean-generic \ + distclean-am + @echo "This command is intended for maintainers to use;" + @echo "it deletes files that may require special tools to rebuild." + +maintainer-clean: maintainer-clean-recursive + -rm -f config.status + +.PHONY: mostlyclean-hdr distclean-hdr clean-hdr maintainer-clean-hdr \ +mostlyclean-libLTLIBRARIES distclean-libLTLIBRARIES \ +clean-libLTLIBRARIES maintainer-clean-libLTLIBRARIES \ +uninstall-libLTLIBRARIES install-libLTLIBRARIES mostlyclean-compile \ +distclean-compile clean-compile maintainer-clean-compile \ +mostlyclean-libtool distclean-libtool clean-libtool \ +maintainer-clean-libtool mostlyclean-krextra distclean-krextra \ +clean-krextra maintainer-clean-krextra mostlyclean-kr distclean-kr \ +clean-kr maintainer-clean-kr mostlyclean-vti distclean-vti clean-vti \ +maintainer-clean-vti install-info-am uninstall-info mostlyclean-aminfo \ +distclean-aminfo clean-aminfo maintainer-clean-aminfo \ +uninstall-includeHEADERS install-includeHEADERS install-recursive \ +uninstall-recursive install-data-recursive uninstall-data-recursive \ +install-exec-recursive uninstall-exec-recursive installdirs-recursive \ +uninstalldirs-recursive all-recursive check-recursive \ +installcheck-recursive info-recursive dvi-recursive \ +mostlyclean-recursive distclean-recursive clean-recursive \ +maintainer-clean-recursive tags tags-recursive mostlyclean-tags \ +distclean-tags clean-tags maintainer-clean-tags distdir info-am info \ +dvi-am dvi check check-am installcheck-am installcheck all-recursive-am \ +install-exec-am install-exec install-data-am install-data install-am \ +install uninstall-am uninstall all-redirect all-am all install-strip \ +installdirs-am installdirs mostlyclean-generic distclean-generic \ +clean-generic maintainer-clean-generic clean mostlyclean distclean \ +maintainer-clean + + +# Don't ship CVS directories or emacs backups. +dist-hook: + -find $(distdir) \( -name CVS -type d \) -o -name "*.~*" \ + | xargs rm -rf + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/rts/gmp/NEWS b/rts/gmp/NEWS new file mode 100644 index 0000000000..3b549d59f3 --- /dev/null +++ b/rts/gmp/NEWS @@ -0,0 +1,136 @@ +Changes between MP version 3.1 and 3.1.1 + +* Bug fixes for division (rare), mpf_get_str, FFT, and miscellaneous minor + things. + +Changes between MP version 3.0 and 3.1 + +* Bug fixes. +* Improved `make check' running more tests. +* Tuned algorithm cutoff points for many machines. This will improve speed for + a lot of operations, in some cases by a large amount. +* Major speed improvments: Alpha 21264. +* Some speed improvments: Cray vector computers, AMD K6 and Athlon, Intel P5 + and Pentium Pro/II/III. +* The mpf_get_prec function now works as it did in GMP 2. +* New utilities for auto-tuning and speed measuring. +* Multiplication now optionally uses FFT for very large operands. (To enable + it, pass --enable-fft to configure.) +* Support for new systems: Solaris running on x86, FreeBSD 5, HP-UX 11, Cray + vector computers, Rhapsody, Nextstep/Openstep, MacOS. +* Support for shared libraries on 32-bit HPPA. +* New integer functions: mpz_mul_si, mpz_odd_p, mpz_even_p. +* New Kronecker symbol functions: mpz_kronecker_si, mpz_kronecker_ui, + mpz_si_kronecker, mpz_ui_kronecker. +* New rational functions: mpq_out_str, mpq_swap. +* New float functions: mpf_swap. +* New mpn functions: mpn_divexact_by3c, mpn_tdiv_qr. +* New EXPERIMENTAL function layer for accurate floating-point arithmetic, mpfr. + To try it, pass --enable-mpfr to configure. See the mpfr subdirectory for + more information; it is not documented in the main GMP manual. + +Changes between MP version 3.0 and 3.0.1 + +* Memory leaks in gmp_randinit and mpz_probab_prime_p fixed. +* Documentation for gmp_randinit fixed. Misc documentation errors fixed. + +Changes between MP version 2.0 and 3.0 + +* Source level compatibility with past releases (except mpn_gcd). +* Bug fixes. +* Much improved speed thanks to both host independent and host dependent + optimizations. +* Switch to autoconf/automake/libtool. +* Support for building libgmp as a shared library. +* Multiplication and squaring using 3-way Toom-Cook. +* Division using the Burnikel-Ziegler method. +* New functions computing binomial coefficients: mpz_bin_ui, mpz_bin_uiui. +* New function computing Fibonacci numbers: mpz_fib_ui. +* New random number generators: mpf_urandomb, mpz_rrandomb, mpz_urandomb, + mpz_urandomm, gmp_randclear, gmp_randinit, gmp_randinit_lc_2exp, gmp_randseed, + gmp_randseed_ui. +* New function for quickly extracting limbs: mpz_getlimbn. +* New functions performing integer size tests: mpz_fits_sint_p, + mpz_fits_slong_p, mpz_fits_sshort_p, mpz_fits_uint_p, mpz_fits_ulong_p, + mpz_fits_ushort_p. +* New mpf functions: mpf_ceil, mpf_floor, mpf_pow_ui, mpf_trunc. +* New mpq function: mpq_set_d. +* New mpz functions: mpz_addmul_ui, mpz_cmpabs, mpz_cmpabs_ui, mpz_lcm, + mpz_nextprime, mpz_perfect_power_p, mpz_remove, mpz_root, mpz_swap, + mpz_tdiv_ui, mpz_tstbit, mpz_xor. +* New mpn function: mpn_divexact_by3. +* New CPU support: DEC Alpha 21264, AMD K6 and Athlon, HPPA 2.0 and 64, + Intel Pentium Pro and Pentium-II/III, Sparc 64, PowerPC 64. +* Almost 10 times faster mpz_invert and mpn_gcdext. +* The interface of mpn_gcd has changed. +* Better support for MIPS R4x000 and R5000 under Irix 6. +* Improved support for SPARCv8 and SPARCv9 processors. + +Changes between MP version 2.0 and 2.0.2 + +* Many bug fixes. + +Changes between MP version 1.3.2 and 2.0 + +* Division routines in the mpz class have changed. There are three classes of + functions, that rounds the quotient to -infinity, 0, and +infinity, + respectively. The first class of functions have names that begin with + mpz_fdiv (f is short for floor), the second class' names begin with mpz_tdiv + (t is short for trunc), and the third class' names begin with mpz_cdiv (c is + short for ceil). + + The old division routines beginning with mpz_m are similar to the new + mpz_fdiv, with the exception that some of the new functions return useful + values. + + The old function names can still be used. All the old functions names will + now do floor division, not trunc division as some of them used to. This was + changed to make the functions more compatible with common mathematical + practice. + + The mpz_mod and mpz_mod_ui functions now compute the mathematical mod + function. I.e., the sign of the 2nd argument is ignored. + +* The mpq assignment functions do not canonicalize their results. A new + function, mpq_canonicalize must be called by the user if the result is not + known to be canonical. +* The mpn functions are now documented. These functions are intended for + very time critical applications, or applications that need full control over + memory allocation. Note that the mpn interface is irregular and hard to + use. +* New functions for arbitrary precision floating point arithmetic. Names + begin with `mpf_'. Associated type mpf_t. +* New and improved mpz functions, including much faster GCD, fast exact + division (mpz_divexact), bit scan (mpz_scan0 and mpz_scan1), and number + theoretical functions like Jacobi (mpz_jacobi) and multiplicative inverse + (mpz_invert). +* New variable types (mpz_t and mpq_t) are available that makes syntax of + mpz and mpq calls nicer (no need for & before variables). The MP_INT and + MP_RAT types are still available for compatibility. +* Uses GNU configure. This makes it possible to choose target architecture + and CPU variant, and to compile into a separate object directory. +* Carefully optimized assembly for important inner loops. Support for DEC + Alpha, Amd 29000, HPPA 1.0 and 1.1, Intel Pentium and generic x86, Intel + i960, Motorola MC68000, MC68020, MC88100, and MC88110, Motorola/IBM + PowerPC, National NS32000, IBM POWER, MIPS R3000, R4000, SPARCv7, + SuperSPARC, generic SPARCv8, and DEC VAX. Some support also for ARM, + Clipper, IBM ROMP (RT), and Pyramid AP/XP. +* Faster. Thanks to the assembler code, new algorithms, and general tuning. + In particular, the speed on machines without GCC is improved. +* Support for machines without alloca. +* Now under the LGPL. + +INCOMPATIBILITIES BETWEEN GMP 1 AND GMP 2 + +* mpq assignment functions do not canonicalize their results. +* mpz division functions round differently. +* mpz mod functions now really compute mod. +* mpz_powm and mpz_powm_ui now really use mod for reduction. + + + +---------------- +Local variables: +mode: text +fill-column: 76 +End: diff --git a/rts/gmp/README b/rts/gmp/README new file mode 100644 index 0000000000..177c97eb12 --- /dev/null +++ b/rts/gmp/README @@ -0,0 +1,84 @@ + + THE GNU MP LIBRARY + + +GNU MP is a library for arbitrary precision arithmetic, operating on signed +integers, rational numbers, and floating point numbers. It has a rich set of +functions, and the functions have a regular interface. + +GNU MP is designed to be as fast as possible, both for small operands and huge +operands. The speed is achieved by using fullwords as the basic arithmetic +type, by using fast algorithms, with carefully optimized assembly code for the +most common inner loops for lots of CPUs, and by a general emphasis on speed +(instead of simplicity or elegance). + +GNU MP is believed to be faster than any other similar library. Its advantage +increases with operand sizes for certain operations, since GNU MP in many +cases has asymptotically faster algorithms. + +GNU MP is free software and may be freely copied on the terms contained in the +files COPYING.LIB and COPYING (most of GNU MP is under the former, some under +the latter). + + + + OVERVIEW OF GNU MP + +There are five classes of functions in GNU MP. + + 1. Signed integer arithmetic functions (mpz). These functions are intended + to be easy to use, with their regular interface. The associated type is + `mpz_t'. + + 2. Rational arithmetic functions (mpq). For now, just a small set of + functions necessary for basic rational arithmetics. The associated type + is `mpq_t'. + + 3. Floating-point arithmetic functions (mpf). If the C type `double' + doesn't give enough precision for your application, declare your + variables as `mpf_t' instead, set the precision to any number desired, + and call the functions in the mpf class for the arithmetic operations. + + 4. Positive-integer, hard-to-use, very low overhead functions are in the + mpn class. No memory management is performed. The caller must ensure + enough space is available for the results. The set of functions is not + regular, nor is the calling interface. These functions accept input + arguments in the form of pairs consisting of a pointer to the least + significant word, and an integral size telling how many limbs (= words) + the pointer points to. + + Almost all calculations, in the entire package, are made by calling these + low-level functions. + + 5. Berkeley MP compatible functions. + + To use these functions, include the file "mp.h". You can test if you are + using the GNU version by testing if the symbol __GNU_MP__ is defined. + +For more information on how to use GNU MP, please refer to the documentation. +It is composed from the file gmp.texi, and can be displayed on the screen or +printed. How to do that, as well how to build the library, is described in +the INSTALL file in this directory. + + + + REPORTING BUGS + +If you find a bug in the library, please make sure to tell us about it! + +You should first check the GNU MP web pages at http://www.swox.com/gmp/, +under "Status of the current release". There will be patches for all known +serious bugs there. + +Report bugs to bug-gmp@gnu.org. What information is needed in a good bug +report is described in the manual. The same address can be used for +suggesting modifications and enhancements. + + + + +---------------- +Local variables: +mode: text +fill-column: 78 +End: diff --git a/rts/gmp/acconfig.h b/rts/gmp/acconfig.h new file mode 100644 index 0000000000..dfb1b0b039 --- /dev/null +++ b/rts/gmp/acconfig.h @@ -0,0 +1,92 @@ +/* +Copyright (C) 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. +*/ + +@TOP@ + +/* Define if a limb is long long. */ +#undef _LONG_LONG_LIMB + +/* Define if we have native implementation of function. */ +#undef HAVE_NATIVE_ +#undef HAVE_NATIVE_mpn_add +#undef HAVE_NATIVE_mpn_add_1 +#undef HAVE_NATIVE_mpn_add_n +#undef HAVE_NATIVE_mpn_add_nc +#undef HAVE_NATIVE_mpn_addmul_1 +#undef HAVE_NATIVE_mpn_addmul_1c +#undef HAVE_NATIVE_mpn_addsub_n +#undef HAVE_NATIVE_mpn_addsub_nc +#undef HAVE_NATIVE_mpn_and_n +#undef HAVE_NATIVE_mpn_andn_n +#undef HAVE_NATIVE_mpn_bdivmod +#undef HAVE_NATIVE_mpn_cmp +#undef HAVE_NATIVE_mpn_com_n +#undef HAVE_NATIVE_mpn_copyd +#undef HAVE_NATIVE_mpn_copyi +#undef HAVE_NATIVE_mpn_divexact_by3c +#undef HAVE_NATIVE_mpn_divrem +#undef HAVE_NATIVE_mpn_divrem_1 +#undef HAVE_NATIVE_mpn_divrem_1c +#undef HAVE_NATIVE_mpn_divrem_2 +#undef HAVE_NATIVE_mpn_divrem_newton +#undef HAVE_NATIVE_mpn_divrem_classic +#undef HAVE_NATIVE_mpn_dump +#undef HAVE_NATIVE_mpn_gcd +#undef HAVE_NATIVE_mpn_gcd_1 +#undef HAVE_NATIVE_mpn_gcdext +#undef HAVE_NATIVE_mpn_get_str +#undef HAVE_NATIVE_mpn_hamdist +#undef HAVE_NATIVE_mpn_invert_limb +#undef HAVE_NATIVE_mpn_ior_n +#undef HAVE_NATIVE_mpn_iorn_n +#undef HAVE_NATIVE_mpn_lshift +#undef HAVE_NATIVE_mpn_mod_1 +#undef HAVE_NATIVE_mpn_mod_1c +#undef HAVE_NATIVE_mpn_mul +#undef HAVE_NATIVE_mpn_mul_1 +#undef HAVE_NATIVE_mpn_mul_1c +#undef HAVE_NATIVE_mpn_mul_basecase +#undef HAVE_NATIVE_mpn_mul_n +#undef HAVE_NATIVE_mpn_nand_n +#undef HAVE_NATIVE_mpn_nior_n +#undef HAVE_NATIVE_mpn_perfect_square_p +#undef HAVE_NATIVE_mpn_popcount +#undef HAVE_NATIVE_mpn_preinv_mod_1 +#undef HAVE_NATIVE_mpn_random2 +#undef HAVE_NATIVE_mpn_random +#undef HAVE_NATIVE_mpn_rawrandom +#undef HAVE_NATIVE_mpn_rshift +#undef HAVE_NATIVE_mpn_scan0 +#undef HAVE_NATIVE_mpn_scan1 +#undef HAVE_NATIVE_mpn_set_str +#undef HAVE_NATIVE_mpn_sqrtrem +#undef HAVE_NATIVE_mpn_sqr_basecase +#undef HAVE_NATIVE_mpn_sub +#undef HAVE_NATIVE_mpn_sub_1 +#undef HAVE_NATIVE_mpn_sub_n +#undef HAVE_NATIVE_mpn_sub_nc +#undef HAVE_NATIVE_mpn_submul_1 +#undef HAVE_NATIVE_mpn_submul_1c +#undef HAVE_NATIVE_mpn_udiv_w_sdiv +#undef HAVE_NATIVE_mpn_umul_ppmm +#undef HAVE_NATIVE_mpn_udiv_qrnnd +#undef HAVE_NATIVE_mpn_xor_n +#undef HAVE_NATIVE_mpn_xnor_n diff --git a/rts/gmp/acinclude.m4 b/rts/gmp/acinclude.m4 new file mode 100644 index 0000000000..a02394a963 --- /dev/null +++ b/rts/gmp/acinclude.m4 @@ -0,0 +1,835 @@ +dnl GMP specific autoconf macros + + +dnl Copyright (C) 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + +dnl GMP_HEADER_GETVAL(NAME,FILE) +dnl ---------------------------- +dnl Expand to the value of a "#define NAME" from the given FILE. +dnl The regexps here aren't very rugged, but are enough for gmp. +dnl /dev/null as a parameter prevents a hang if $2 is accidentally omitted. + +define(GMP_HEADER_GETVAL, +[patsubst(patsubst( +esyscmd([grep "^#define $1 " $2 /dev/null 2>/dev/null]), +[^.*$1[ ]+],[]), +[[ + ]*$],[])]) + + +dnl GMP_VERSION +dnl ----------- +dnl The gmp version number, extracted from the #defines in gmp.h. +dnl Two digits like 3.0 if patchlevel <= 0, or three digits like 3.0.1 if +dnl patchlevel > 0. + +define(GMP_VERSION, +[GMP_HEADER_GETVAL(__GNU_MP_VERSION,gmp.h)[]dnl +.GMP_HEADER_GETVAL(__GNU_MP_VERSION_MINOR,gmp.h)[]dnl +ifelse(m4_eval(GMP_HEADER_GETVAL(__GNU_MP_VERSION_PATCHLEVEL,gmp.h) > 0),1, +[.GMP_HEADER_GETVAL(__GNU_MP_VERSION_PATCHLEVEL,gmp.h)])]) + + +dnl GMP_PROG_M4() +dnl ------------- +dnl +dnl Find a working m4, either in $PATH or likely locations, and setup $M4 +dnl and an AC_SUBST accordingly. If $M4 is already set then it's a user +dnl choice and is accepted with no checks. GMP_PROG_M4 is like +dnl AC_PATH_PROG or AC_CHECK_PROG, but it tests each m4 found to see if +dnl it's good enough. +dnl +dnl See mpn/asm-defs.m4 for details on the known bad m4s. + +AC_DEFUN(GMP_PROG_M4, +[AC_CACHE_CHECK([for suitable m4], + gmp_cv_prog_m4, +[if test -n "$M4"; then + gmp_cv_prog_m4="$M4" +else + cat >conftest.m4 <<\EOF +dnl must protect this against being expanded during autoconf m4! +[define(dollarhash,``$][#'')dnl +ifelse(dollarhash(x),1,`define(t1,Y)', +``bad: $][# not supported (SunOS /usr/bin/m4) +'')dnl +ifelse(eval(89),89,`define(t2,Y)', +`bad: eval() doesnt support 8 or 9 in a constant (OpenBSD 2.6 m4) +')dnl +ifelse(t1`'t2,YY,`good +')dnl] +EOF + echo "trying m4" 1>&AC_FD_CC + gmp_tmp_val="`(m4 conftest.m4) 2>&AC_FD_CC`" + echo "$gmp_tmp_val" 1>&AC_FD_CC + if test "$gmp_tmp_val" = good; then + gmp_cv_prog_m4="m4" + else + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" +dnl $ac_dummy forces splitting on constant user-supplied paths. +dnl POSIX.2 word splitting is done only on the output of word expansions, +dnl not every word. This closes a longstanding sh security hole. + ac_dummy="$PATH:/usr/5bin" + for ac_dir in $ac_dummy; do + test -z "$ac_dir" && ac_dir=. + echo "trying $ac_dir/m4" 1>&AC_FD_CC + gmp_tmp_val="`($ac_dir/m4 conftest.m4) 2>&AC_FD_CC`" + echo "$gmp_tmp_val" 1>&AC_FD_CC + if test "$gmp_tmp_val" = good; then + gmp_cv_prog_m4="$ac_dir/m4" + break + fi + done + IFS="$ac_save_ifs" + if test -z "$gmp_cv_prog_m4"; then + AC_MSG_ERROR([No usable m4 in \$PATH or /usr/5bin (see config.log for reasons).]) + fi + fi + rm -f conftest.m4 +fi]) +M4="$gmp_cv_prog_m4" +AC_SUBST(M4) +]) + + +dnl GMP_PROG_CC_FIND([CC_LIST], [REQ_64BIT_CC]) +dnl Find first working compiler in CC_LIST. +dnl If REQ_64BIT_CC is "yes", the compiler is required to be able to +dnl produce 64-bit code. +dnl NOTE: If a compiler needs any special flags for producing 64-bit code, +dnl these have to be found in shell variable `gmp_cflags64_{cc}', where `{cc}' +dnl is the name of the compiler. +dnl Set CC to the name of the first working compiler. +dnl If a 64-bit compiler is found, set CC64 to the name of the compiler and +dnl CFLAGS64 to flags to use. +dnl This macro does not test if any of the compilers found is a GNU compiler. +dnl To do this, when you have finally made up your mind on which one to use, +dnl and set CC accordingly, invoke [GMP_PROG_CC_SELECT]. That macro will +dnl also make sure that your selection of CFLAGS is valid. +dnl +AC_DEFUN(GMP_PROG_CC_FIND, +[AC_BEFORE([$0], [CC_PROG_CPP]) +ifelse([$1], , gmp_cc_list="gcc cc", gmp_cc_list="[$1]") +ifelse([$2], , gmp_req_64bit_cc="no", gmp_req_64bit_cc="[$2]") + +CC32= +CC64= +for c in $gmp_cc_list; do + # Avoid cache hits. + unset CC + unset ac_cv_prog_CC + AC_CHECK_TOOL(CC, $c, $c) + if test -n "$CC"; then + eval c_flags=\$gmp_cflags_$c + GMP_PROG_CC_WORKS($CC, $c_flags, + gmp_prog_cc_works=yes, + gmp_prog_cc_works=no) + + if test "$gmp_prog_cc_works" != "yes"; then + continue + fi + + # Save first working compiler, whether 32- or 64-bit capable. + if test -z "$CC32"; then + CC32="$CC" + fi + if test "$gmp_req_64bit_cc" = "yes"; then + eval c_flags=\$gmp_cflags64_$c + + # Verify that the compiler works in 64-bit mode as well. + # /usr/ucb/cc on Solaris 7 can *compile* in 64-bit mode, but not link. + GMP_PROG_CC_WORKS($c, $c_flags, + gmp_prog_cc_works=yes, + gmp_prog_cc_works=no) + + if test "$gmp_prog_cc_works" = "yes"; then + GMP_CHECK_CC_64BIT($c, $c_flags) + if test "$gmp_cv_cc_64bit" = "yes"; then + test -z "$CC64" && CC64="$c" + test -z "$CFLAGS64" && CFLAGS64="$c_flags" + # We have CC64 so we're done. + break + fi + fi + else + # We have CC32, and we don't need a 64-bit compiler so we're done. + break + fi + fi +done +CC="$CC32" +])dnl + +dnl GMP_PROG_CC_SELECT +dnl Check that `CC' works with `CFLAGS'. Check if `CC' is a GNU compiler. +dnl Cache the result as `ac_cv_prog_CC'. +AC_DEFUN(GMP_PROG_CC_SELECT, +[AC_BEFORE([$0], [CC_PROG_CPP]) +AC_PROG_CC_WORKS +AC_PROG_CC_GNU + +if test "$ac_cv_prog_gcc" = "yes"; then + GCC=yes +else + GCC= +fi + +# Set CFLAGS if not already set. +if test -z "$CFLAGS"; then + CFLAGS="-g" + if test "$GCC" = "yes"; then + CFLAGS="$CFLAGS -O2" + fi +fi + +AC_SUBST(CC) +AC_CACHE_VAL(ac_cv_prog_CC, ac_cv_prog_CC="$CC") +AC_PROVIDE([AC_PROG_CC]) +])dnl + +dnl GMP_CHECK_CC_64BIT(cc, cflags64) +dnl Find out if `CC' can produce 64-bit code. +dnl Requires NM to be set to nm for target. +dnl FIXME: Cache result. +AC_DEFUN(GMP_CHECK_CC_64BIT, +[ + gmp_tmp_CC_save="$CC" + CC="[$1]" + AC_MSG_CHECKING([whether the C compiler ($CC) is 64-bit capable]) + if test -z "$NM"; then + echo; echo ["configure: $0: fatal: need nm"] + exit 1 + fi + gmp_tmp_CFLAGS_save="$CFLAGS" + CFLAGS="[$2]" + + case "$target" in + hppa2.0*-*-*) + # FIXME: If gcc is installed under another name than "gcc", we will + # test the wrong thing. + if test "$CC" != "gcc"; then + dnl Let compiler version A.10.32.30 or higher be ok. + dnl Bad compiler output: + dnl ccom: HP92453-01 G.10.32.05 HP C Compiler + dnl Good compiler output: + dnl ccom: HP92453-01 A.10.32.30 HP C Compiler + echo >conftest.c + gmp_tmp_vs=`$CC $CFLAGS -V -c -o conftest.o conftest.c 2>&1 | grep "^ccom:"` + rm conftest* + gmp_tmp_v1=`echo $gmp_tmp_vs | sed 's/.* .\.\(.*\)\..*\..* HP C.*/\1/'` + gmp_tmp_v2=`echo $gmp_tmp_vs | sed 's/.* .\..*\.\(.*\)\..* HP C.*/\1/'` + gmp_tmp_v3=`echo $gmp_tmp_vs | sed 's/.* .\..*\..*\.\(.*\) HP C.*/\1/'` + gmp_cv_cc_64bit=no + test -n "$gmp_tmp_v1" && test "$gmp_tmp_v1" -ge "10" \ + && test -n "$gmp_tmp_v2" && test "$gmp_tmp_v2" -ge "32" \ + && test -n "$gmp_tmp_v3" && test "$gmp_tmp_v3" -ge "30" \ + && gmp_cv_cc_64bit=yes + else # gcc + # FIXME: Compile a minimal file and determine if the resulting object + # file is an ELF file. If so, gcc can produce 64-bit code. + # Do we have file(1) for target? + gmp_cv_cc_64bit=no + fi + ;; + mips-sgi-irix6.*) + # We use `-n32' to cc and `-mabi=n32' to gcc, resulting in 64-bit + # arithmetic but not 64-bit pointers, so the general test for sizeof + # (void *) is not valid. + # Simply try to compile an empty main. If that succeeds return + # true. + AC_TRY_COMPILE( , , + gmp_cv_cc_64bit=yes, gmp_cv_cc_64bit=no, + gmp_cv_cc_64bit=no) + ;; + *-*-*) + # Allocate an array of size sizeof (void *) and use nm to determine its + # size. We depend on the first declared variable being put at address 0. + cat >conftest.c <<EOF +[char arr[sizeof (void *)]={0}; +char post=0;] +EOF + gmp_compile="$CC $CFLAGS -c conftest.c 1>&AC_FD_CC" + if AC_TRY_EVAL(gmp_compile); then + changequote(<,>)dnl + gmp_tmp_val=`$NM conftest.o | grep post | sed -e 's;[[][0-9][]]\(.*\);\1;' \ + -e 's;[^1-9]*\([0-9]*\).*;\1;'` + changequote([, ])dnl + if test "$gmp_tmp_val" = "8"; then + gmp_cv_cc_64bit=yes + else + gmp_cv_cc_64bit=no + fi + else + echo "configure: failed program was:" >&AC_FD_CC + cat conftest.$ac_ext >&AC_FD_CC + gmp_cv_cc_64bit=no + fi + rm -f conftest* + ;; + esac + + CC="$gmp_tmp_CC_save" + CFLAGS="$gmp_tmp_CFLAGS_save" + AC_MSG_RESULT($gmp_cv_cc_64bit) +])dnl + +dnl GMP_INIT([M4-DEF-FILE]) +dnl +AC_DEFUN(GMP_INIT, +[ifelse([$1], , gmp_configm4=config.m4, gmp_configm4="[$1]") +gmp_tmpconfigm4=cnfm4.tmp +gmp_tmpconfigm4i=cnfm4i.tmp +gmp_tmpconfigm4p=cnfm4p.tmp +test -f $gmp_tmpconfigm4 && rm $gmp_tmpconfigm4 +test -f $gmp_tmpconfigm4i && rm $gmp_tmpconfigm4i +test -f $gmp_tmpconfigm4p && rm $gmp_tmpconfigm4p +])dnl + +dnl GMP_FINISH +dnl ---------- +dnl Create config.m4 from its accumulated parts. +dnl +dnl __CONFIG_M4_INCLUDED__ is used so that a second or subsequent include +dnl of config.m4 is harmless. +dnl +dnl A separate ifdef on the angle bracket quoted part ensures the quoting +dnl style there is respected. The basic defines from gmp_tmpconfigm4 are +dnl fully quoted but are still put under an ifdef in case any have been +dnl redefined by one of the m4 include files. +dnl +dnl Doing a big ifdef within asm-defs.m4 and/or other macro files wouldn't +dnl work, since it'd interpret parentheses and quotes in dnl comments, and +dnl having a whole file as a macro argument would overflow the string space +dnl on BSD m4. + +AC_DEFUN(GMP_FINISH, +[AC_REQUIRE([GMP_INIT]) +echo "creating $gmp_configm4" +echo ["dnl $gmp_configm4. Generated automatically by configure."] > $gmp_configm4 +if test -f $gmp_tmpconfigm4; then + echo ["changequote(<,>)dnl"] >> $gmp_configm4 + echo ["ifdef(<__CONFIG_M4_INCLUDED__>,,<"] >> $gmp_configm4 + cat $gmp_tmpconfigm4 >> $gmp_configm4 + echo [">)"] >> $gmp_configm4 + echo ["changequote(\`,')dnl"] >> $gmp_configm4 + rm $gmp_tmpconfigm4 +fi +echo ["ifdef(\`__CONFIG_M4_INCLUDED__',,\`"] >> $gmp_configm4 +if test -f $gmp_tmpconfigm4i; then + cat $gmp_tmpconfigm4i >> $gmp_configm4 + rm $gmp_tmpconfigm4i +fi +if test -f $gmp_tmpconfigm4p; then + cat $gmp_tmpconfigm4p >> $gmp_configm4 + rm $gmp_tmpconfigm4p +fi +echo ["')"] >> $gmp_configm4 +echo ["define(\`__CONFIG_M4_INCLUDED__')"] >> $gmp_configm4 +])dnl + +dnl GMP_INCLUDE(FILE) +AC_DEFUN(GMP_INCLUDE, +[AC_REQUIRE([GMP_INIT]) +echo ["include(\`$1')"] >> $gmp_tmpconfigm4i +])dnl + +dnl GMP_SINCLUDE(FILE) +AC_DEFUN(GMP_SINCLUDE, +[AC_REQUIRE([GMP_INIT]) +echo ["sinclude(\`$1')"] >> $gmp_tmpconfigm4i +])dnl + +dnl GMP_DEFINE(MACRO, DEFINITION [, LOCATION]) +dnl [ Define M4 macro MACRO as DEFINITION in temporary file. ] +dnl [ If LOCATION is `POST', the definition will appear after any ] +dnl [ include() directives inserted by GMP_INCLUDE/GMP_SINCLUDE. ] +dnl [ Mind the quoting! No shell variables will get expanded. ] +dnl [ Don't forget to invoke GMP_FINISH to create file config.m4. ] +dnl [ config.m4 uses `<' and '>' as quote characters for all defines. ] +AC_DEFUN(GMP_DEFINE, +[AC_REQUIRE([GMP_INIT]) +echo ['define(<$1>, <$2>)'] >> ifelse([$3], [POST], $gmp_tmpconfigm4p, $gmp_tmpconfigm4) +])dnl + +dnl GMP_DEFINE_RAW(STRING, [, LOCATION]) +dnl [ Put STRING in temporary file. ] +dnl [ If LOCATION is `POST', the definition will appear after any ] +dnl [ include() directives inserted by GMP_INCLUDE/GMP_SINCLUDE. ] +dnl [ Don't forget to invoke GMP_FINISH to create file config.m4. ] +AC_DEFUN(GMP_DEFINE_RAW, +[AC_REQUIRE([GMP_INIT]) +echo [$1] >> ifelse([$2], [POST], $gmp_tmpconfigm4p, $gmp_tmpconfigm4) +])dnl + +dnl GMP_CHECK_ASM_LABEL_SUFFIX +dnl Should a label have a colon or not? +AC_DEFUN(GMP_CHECK_ASM_LABEL_SUFFIX, +[AC_CACHE_CHECK([what assembly label suffix to use], + gmp_cv_check_asm_label_suffix, +[case "$target" in + *-*-hpux*) gmp_cv_check_asm_label_suffix=[""] ;; + *) gmp_cv_check_asm_label_suffix=[":"] ;; +esac +]) +echo ["define(<LABEL_SUFFIX>, <\$][1$gmp_cv_check_asm_label_suffix>)"] >> $gmp_tmpconfigm4 +])dnl + +dnl GMP_CHECK_ASM_UNDERSCORE([ACTION-IF-FOUND [, ACTION-IF-NOT-FOUND]]) +dnl Shamelessly borrowed from glibc. +AC_DEFUN(GMP_CHECK_ASM_UNDERSCORE, +[AC_CACHE_CHECK([if symbols are prefixed by underscore], + gmp_cv_check_asm_underscore, +[cat > conftest.$ac_ext <<EOF +dnl This sometimes fails to find confdefs.h, for some reason. +dnl [#]line __oline__ "[$]0" +[#]line __oline__ "configure" +#include "confdefs.h" +int underscore_test() { +return; } +EOF +if AC_TRY_EVAL(ac_compile); then + if grep _underscore_test conftest* >/dev/null; then + gmp_cv_check_asm_underscore=yes + else + gmp_cv_check_asm_underscore=no + fi +else + echo "configure: failed program was:" >&AC_FD_CC + cat conftest.$ac_ext >&AC_FD_CC +fi +rm -f conftest* +]) +if test "$gmp_cv_check_asm_underscore" = "yes"; then + GMP_DEFINE(GSYM_PREFIX, [_]) + ifelse([$1], , :, [$1]) +else + GMP_DEFINE(GSYM_PREFIX, []) + ifelse([$2], , :, [$2]) +fi +])dnl + +dnl GMP_CHECK_ASM_ALIGN_LOG([ACTION-IF-FOUND [, ACTION-IF-NOT-FOUND]]) +dnl Is parameter to `.align' logarithmic? +dnl Requires NM to be set to nm for target. +AC_DEFUN(GMP_CHECK_ASM_ALIGN_LOG, +[AC_REQUIRE([GMP_CHECK_ASM_GLOBL]) +AC_REQUIRE([GMP_CHECK_ASM_DATA]) +AC_REQUIRE([GMP_CHECK_ASM_LABEL_SUFFIX]) +AC_CACHE_CHECK([if .align assembly directive is logarithmic], + gmp_cv_check_asm_align_log, +[if test -z "$NM"; then + echo; echo ["configure: $0: fatal: need nm"] + exit 1 +fi +cat > conftest.s <<EOF + $gmp_cv_check_asm_data + .align 4 + $gmp_cv_check_asm_globl foo + .byte 1 + .align 4 +foo$gmp_cv_check_asm_label_suffix + .byte 2 +EOF +ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC" +if AC_TRY_EVAL(ac_assemble); then + changequote(<,>) + gmp_tmp_val=`$NM conftest.o | grep foo | sed -e 's;[[][0-9][]]\(.*\);\1;' \ + -e 's;[^1-9]*\([0-9]*\).*;\1;'` + changequote([, ])dnl + if test "$gmp_tmp_val" = "10" || test "$gmp_tmp_val" = "16"; then + gmp_cv_check_asm_align_log=yes + else + gmp_cv_check_asm_align_log=no + fi +else + echo "configure: failed program was:" >&AC_FD_CC + cat conftest.s >&AC_FD_CC +fi +rm -f conftest* +]) +GMP_DEFINE_RAW(["define(<ALIGN_LOGARITHMIC>,<$gmp_cv_check_asm_align_log>)"]) +if test "$gmp_cv_check_asm_align_log" = "yes"; then + ifelse([$1], , :, [$1]) +else + ifelse([$2], , :, [$2]) +fi +])dnl + + +dnl GMP_CHECK_ASM_ALIGN_FILL_0x90 +dnl ----------------------------- +dnl Determine whether a ",0x90" suffix works on a .align directive. +dnl This is only meant for use on x86, where 0x90 is a "nop". +dnl +dnl Old gas, eg. 1.92.3 - needs ",0x90" or else the fill is an invalid 0x00. +dnl New gas, eg. 2.91 - generates the good multibyte nop fills even when +dnl ",0x90" is given. +dnl Solaris 2.6 as - doesn't allow ",0x90", gives a fatal error. +dnl Solaris 2.8 as - gives a warning for ",0x90", no ill effect. +dnl +dnl Note that both solaris "as"s only care about ",0x90" if they actually +dnl have to use it to fill something, hence the .byte in the sample. It's +dnl only the second .align that provokes an error or warning. +dnl +dnl We prefer to suppress the warning from solaris 2.8 to stop anyone +dnl worrying something might be wrong. + +AC_DEFUN(GMP_CHECK_ASM_ALIGN_FILL_0x90, +[AC_CACHE_CHECK([if the .align directive accepts an 0x90 fill in .text], + gmp_cv_check_asm_align_fill_0x90, +[AC_REQUIRE([GMP_CHECK_ASM_TEXT]) +cat > conftest.s <<EOF + $gmp_cv_check_asm_text + .align 4, 0x90 + .byte 0 + .align 4, 0x90 +EOF +gmp_tmp_val="`$CCAS $CFLAGS conftest.s 2>&1`" +if test $? = 0; then + echo "$gmp_tmp_val" 1>&AC_FD_CC + if echo "$gmp_tmp_val" | grep "Warning: Fill parameter ignored for executable section"; then + echo "Supressing this warning by omitting 0x90" 1>&AC_FD_CC + gmp_cv_check_asm_align_fill_0x90=no + else + gmp_cv_check_asm_align_fill_0x90=yes + fi +else + echo "Non-zero exit code" 1>&AC_FD_CC + echo "$gmp_tmp_val" 1>&AC_FD_CC + gmp_cv_check_asm_align_fill_0x90=no +fi +rm -f conftest* +]) +GMP_DEFINE_RAW( +["define(<ALIGN_FILL_0x90>,<$gmp_cv_check_asm_align_fill_0x90>)"]) +]) + + +dnl GMP_CHECK_ASM_TEXT +AC_DEFUN(GMP_CHECK_ASM_TEXT, +[AC_CACHE_CHECK([how to switch to text section], gmp_cv_check_asm_text, +[case "$target" in + *-*-aix*) + changequote({, }) + gmp_cv_check_asm_text={".csect .text[PR]"} + changequote([, ]) + ;; + *-*-hpux*) gmp_cv_check_asm_text=[".code"] ;; + *) gmp_cv_check_asm_text=[".text"] ;; +esac +]) +echo ["define(<TEXT>, <$gmp_cv_check_asm_text>)"] >> $gmp_tmpconfigm4 +])dnl + +dnl GMP_CHECK_ASM_DATA +dnl Can we say `.data'? +AC_DEFUN(GMP_CHECK_ASM_DATA, +[AC_CACHE_CHECK([how to switch to data section], gmp_cv_check_asm_data, +[case "$target" in + *-*-aix*) + changequote({, }) + gmp_cv_check_asm_data={".csect .data[RW]"} + changequote([, ]) + ;; + *) gmp_cv_check_asm_data=[".data"] ;; +esac +]) +echo ["define(<DATA>, <$gmp_cv_check_asm_data>)"] >> $gmp_tmpconfigm4 +])dnl + +dnl GMP_CHECK_ASM_GLOBL +dnl Can we say `.global'? +AC_DEFUN(GMP_CHECK_ASM_GLOBL, +[AC_CACHE_CHECK([how to export a symbol], gmp_cv_check_asm_globl, +[case "$target" in + *-*-hpux*) gmp_cv_check_asm_globl=[".export"] ;; + *) gmp_cv_check_asm_globl=[".globl"] ;; +esac +]) +echo ["define(<GLOBL>, <$gmp_cv_check_asm_globl>)"] >> $gmp_tmpconfigm4 +])dnl + +dnl GMP_CHECK_ASM_TYPE +dnl Can we say `.type'? +AC_DEFUN(GMP_CHECK_ASM_TYPE, +[AC_CACHE_CHECK([how the .type assembly directive should be used], +gmp_cv_check_asm_type, +[ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC" +for gmp_tmp_prefix in @ \# %; do + echo " .type sym,${gmp_tmp_prefix}function" > conftest.s + if AC_TRY_EVAL(ac_assemble); then + gmp_cv_check_asm_type="[.type \$][1,${gmp_tmp_prefix}\$][2]" + break + fi +done +if test -z "$gmp_cv_check_asm_type"; then + gmp_cv_check_asm_type="[dnl]" +fi +]) +echo ["define(<TYPE>, <$gmp_cv_check_asm_type>)"] >> $gmp_tmpconfigm4 +])dnl + +dnl GMP_CHECK_ASM_SIZE +dnl Can we say `.size'? +AC_DEFUN(GMP_CHECK_ASM_SIZE, +[AC_CACHE_CHECK([if the .size assembly directive works], gmp_cv_check_asm_size, +[ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC" +echo ' .size sym,1' > conftest.s +if AC_TRY_EVAL(ac_assemble); then + gmp_cv_check_asm_size="[.size \$][1,\$][2]" +else + gmp_cv_check_asm_size="[dnl]" +fi +]) +echo ["define(<SIZE>, <$gmp_cv_check_asm_size>)"] >> $gmp_tmpconfigm4 +])dnl + +dnl GMP_CHECK_ASM_LSYM_PREFIX +dnl What is the prefix for a local label? +dnl Requires NM to be set to nm for target. +AC_DEFUN(GMP_CHECK_ASM_LSYM_PREFIX, +[AC_REQUIRE([GMP_CHECK_ASM_LABEL_SUFFIX]) +AC_CACHE_CHECK([what prefix to use for a local label], +gmp_cv_check_asm_lsym_prefix, +[if test -z "$NM"; then + echo; echo ["$0: fatal: need nm"] + exit 1 +fi +ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC" +gmp_cv_check_asm_lsym_prefix="L" +for gmp_tmp_pre in L .L $ L$; do + cat > conftest.s <<EOF +dummy${gmp_cv_check_asm_label_suffix} +${gmp_tmp_pre}gurkmacka${gmp_cv_check_asm_label_suffix} + .byte 0 +EOF + if AC_TRY_EVAL(ac_assemble); then + $NM conftest.o >/dev/null 2>&1 + gmp_rc=$? + if test "$gmp_rc" != "0"; then + echo "configure: $NM failure, using default" + break + fi + if $NM conftest.o | grep gurkmacka >/dev/null; then true; else + gmp_cv_check_asm_lsym_prefix="$gmp_tmp_pre" + break + fi + else + echo "configure: failed program was:" >&AC_FD_CC + cat conftest.s >&AC_FD_CC + # Use default. + fi +done +rm -f conftest* +]) +echo ["define(<LSYM_PREFIX>, <${gmp_cv_check_asm_lsym_prefix}>)"] >> $gmp_tmpconfigm4 +]) + +dnl GMP_CHECK_ASM_W32 +dnl How to [define] a 32-bit word. +dnl Requires NM to be set to nm for target. +AC_DEFUN(GMP_CHECK_ASM_W32, +[AC_REQUIRE([GMP_CHECK_ASM_DATA]) +AC_REQUIRE([GMP_CHECK_ASM_GLOBL]) +AC_REQUIRE([GMP_CHECK_ASM_LABEL_SUFFIX]) +AC_CACHE_CHECK([how to [define] a 32-bit word], + gmp_cv_check_asm_w32, +[if test -z "$NM"; then + echo; echo ["configure: $0: fatal: need nm"] + exit 1 +fi + +# FIXME: HPUX puts first symbol at 0x40000000, breaking our assumption +# that it's at 0x0. We'll have to declare another symbol before the +# .long/.word and look at the distance between the two symbols. The +# only problem is that the sed expression(s) barfs (on Solaris, for +# example) for the symbol with value 0. For now, HPUX uses .word. + +case "$target" in + *-*-hpux*) + gmp_cv_check_asm_w32=".word" + ;; + *-*-*) + ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC" + for gmp_tmp_op in .long .word; do + cat > conftest.s <<EOF + $gmp_cv_check_asm_data + $gmp_cv_check_asm_globl foo + $gmp_tmp_op 0 +foo${gmp_cv_check_asm_label_suffix} + .byte 0 +EOF + if AC_TRY_EVAL(ac_assemble); then + changequote(<,>) + gmp_tmp_val=`$NM conftest.o | grep foo | sed -e 's;[[][0-9][]]\(.*\);\1;' \ + -e 's;[^1-9]*\([0-9]*\).*;\1;'` + changequote([, ])dnl + if test "$gmp_tmp_val" = "4"; then + gmp_cv_check_asm_w32="$gmp_tmp_op" + break + fi + fi + done + ;; +esac + +if test -z "$gmp_cv_check_asm_w32"; then + echo; echo ["configure: $0: fatal: do not know how to define a 32-bit word"] + exit 1 +fi +rm -f conftest* +]) +echo ["define(<W32>, <$gmp_cv_check_asm_w32>)"] >> $gmp_tmpconfigm4 +]) + +dnl GMP_CHECK_ASM_MMX([ACTION-IF-FOUND, [ACTION-IF-NOT-FOUND]]) +dnl Can we assemble MMX insns? +AC_DEFUN(GMP_CHECK_ASM_MMX, +[AC_REQUIRE([GMP_CHECK_ASM_TEXT]) +AC_CACHE_CHECK([if the assembler knows about MMX instructions], + gmp_cv_check_asm_mmx, +[cat > conftest.s <<EOF + $gmp_cv_check_asm_text + por %mm0, %mm0 +EOF +ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC" +if AC_TRY_EVAL(ac_assemble); then + gmp_cv_check_asm_mmx=yes +else + gmp_cv_check_asm_mmx=no +fi +rm -f conftest* +]) +if test "$gmp_cv_check_asm_mmx" = "yes"; then + ifelse([$1], , :, [$1]) +else + AC_MSG_WARN([+----------------------------------------------------------]) + AC_MSG_WARN([| WARNING WARNING WARNING]) + AC_MSG_WARN([| Target CPU has MMX code, but it can't be assembled by]) + AC_MSG_WARN([| $CCAS $CFLAGS]) + AC_MSG_WARN([| Non-MMX replacements will be used.]) + AC_MSG_WARN([| This will be an inferior build.]) + AC_MSG_WARN([+----------------------------------------------------------]) + ifelse([$2], , :, [$2]) +fi +])dnl + +dnl GMP_CHECK_ASM_SHLDL_CL([ACTION-IF-FOUND, [ACTION-IF-NOT-FOUND]]) +AC_DEFUN(GMP_CHECK_ASM_SHLDL_CL, +[AC_REQUIRE([GMP_CHECK_ASM_TEXT]) +AC_CACHE_CHECK([if the assembler takes cl with shldl], + gmp_cv_check_asm_shldl_cl, +[cat > conftest.s <<EOF + $gmp_cv_check_asm_text + shldl %cl, %eax, %ebx +EOF +ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC" +if AC_TRY_EVAL(ac_assemble); then + gmp_cv_check_asm_shldl_cl=yes +else + gmp_cv_check_asm_shldl_cl=no +fi +rm -f conftest* +]) +if test "$gmp_cv_check_asm_shldl_cl" = "yes"; then + ifelse([$1], , :, [$1]) +else + ifelse([$2], , :, [$2]) +fi +])dnl + +dnl GMP_PROG_CC_WORKS(CC, CFLAGS, ACTION-IF-WORKS, [ACTION-IF-NOT-WORKS]) +dnl Check if CC can compile and link. Perform various target specific tests. +dnl FIXME: Require `$target'. +AC_DEFUN(GMP_PROG_CC_WORKS, +[AC_LANG_C dnl Note: Destructive. +CC="[$1]" +CFLAGS="[$2]" +AC_MSG_CHECKING([if the C compiler ($CC) works with flags $CFLAGS]) + +# Simple test for all targets. +AC_TRY_COMPILER([int main(){return(0);}], + tmp_works, tmp_cross) + +# Target specific tests. +if test "$tmp_works" = "yes"; then + case "$target" in + *-*-aix*) # Returning a funcptr. + AC_TRY_COMPILE( , [} void *g(); void *f() { return g(); } int bar(){], + tmp_works=yes, tmp_works=no) + ;; + esac +fi + +if test "$tmp_works" = "yes"; then + [$3] +else + ifelse([$4], , :, [$4]) +fi + +AC_MSG_RESULT($tmp_works) +])dnl + + +dnl GMP_C_ANSI2KNR +dnl -------------- +dnl Setup to use ansi2knr if necessary. +dnl +dnl The test here is simply that if an ANSI style function works then +dnl ansi2knr isn't needed. The normal tests for whether $CC works mean we +dnl don't need to worry here about anything badly broken. +dnl +dnl AM_C_PROTOTYPES is the normal way to set up ansi2knr, but (in automake +dnl March 2000) it gives the wrong answer on a C++ compiler because its +dnl test requires that the compiler accept both ANSI and K&R, or otherwise +dnl ansi2knr is used. A C++ compiler fails on the K&R part, which makes +dnl AM_C_PROTOTYPES think it needs ansi2knr! GMP has no bare K&R so we +dnl only need ANSI or K&R to work, not both. + +AC_DEFUN(GMP_C_ANSI2KNR, +[AC_CACHE_CHECK([if ansi2knr should be used], + gmp_cv_c_ansi2knr, +[cat >conftest.c <<EOF +int main (int argc, char *argv[]) { return 0; } +EOF +if AC_TRY_EVAL(ac_compile); then + gmp_cv_c_ansi2knr=no +else + gmp_cv_c_ansi2knr=yes +fi +rm -f conftest.* +]) +if test $gmp_cv_c_ansi2knr = no; then + U= ANSI2KNR= +else + U=_ ANSI2KNR=./ansi2knr + # Ensure some checks needed by ansi2knr itself. + AC_HEADER_STDC + AC_CHECK_HEADERS(string.h) +fi +AC_SUBST(U) +AC_SUBST(ANSI2KNR) +]) + + +dnl Deal with bad synchronization of Autoconf with Libtool. +AC_DEFUN(AC_CANONICAL_BUILD, [_AC_CANONICAL_BUILD]) +AC_DEFUN(AC_CHECK_TOOL_PREFIX, [_AC_CHECK_TOOL_PREFIX]) diff --git a/rts/gmp/aclocal.m4 b/rts/gmp/aclocal.m4 new file mode 100644 index 0000000000..086c77915c --- /dev/null +++ b/rts/gmp/aclocal.m4 @@ -0,0 +1,1963 @@ +dnl aclocal.m4 generated automatically by aclocal 1.4a + +dnl Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +dnl This program is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY, to the extent permitted by law; without +dnl even the implied warranty of MERCHANTABILITY or FITNESS FOR A +dnl PARTICULAR PURPOSE. + +dnl GMP specific autoconf macros + + +dnl Copyright (C) 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + +dnl GMP_HEADER_GETVAL(NAME,FILE) +dnl ---------------------------- +dnl Expand to the value of a "#define NAME" from the given FILE. +dnl The regexps here aren't very rugged, but are enough for gmp. +dnl /dev/null as a parameter prevents a hang if $2 is accidentally omitted. + +define(GMP_HEADER_GETVAL, +[patsubst(patsubst( +esyscmd([grep "^#define $1 " $2 /dev/null 2>/dev/null]), +[^.*$1[ ]+],[]), +[[ + ]*$],[])]) + + +dnl GMP_VERSION +dnl ----------- +dnl The gmp version number, extracted from the #defines in gmp.h. +dnl Two digits like 3.0 if patchlevel <= 0, or three digits like 3.0.1 if +dnl patchlevel > 0. + +define(GMP_VERSION, +[GMP_HEADER_GETVAL(__GNU_MP_VERSION,gmp.h)[]dnl +.GMP_HEADER_GETVAL(__GNU_MP_VERSION_MINOR,gmp.h)[]dnl +ifelse(m4_eval(GMP_HEADER_GETVAL(__GNU_MP_VERSION_PATCHLEVEL,gmp.h) > 0),1, +[.GMP_HEADER_GETVAL(__GNU_MP_VERSION_PATCHLEVEL,gmp.h)])]) + + +dnl GMP_PROG_M4() +dnl ------------- +dnl +dnl Find a working m4, either in $PATH or likely locations, and setup $M4 +dnl and an AC_SUBST accordingly. If $M4 is already set then it's a user +dnl choice and is accepted with no checks. GMP_PROG_M4 is like +dnl AC_PATH_PROG or AC_CHECK_PROG, but it tests each m4 found to see if +dnl it's good enough. +dnl +dnl See mpn/asm-defs.m4 for details on the known bad m4s. + +AC_DEFUN(GMP_PROG_M4, +[AC_CACHE_CHECK([for suitable m4], + gmp_cv_prog_m4, +[if test -n "$M4"; then + gmp_cv_prog_m4="$M4" +else + cat >conftest.m4 <<\EOF +dnl must protect this against being expanded during autoconf m4! +[define(dollarhash,``$][#'')dnl +ifelse(dollarhash(x),1,`define(t1,Y)', +``bad: $][# not supported (SunOS /usr/bin/m4) +'')dnl +ifelse(eval(89),89,`define(t2,Y)', +`bad: eval() doesnt support 8 or 9 in a constant (OpenBSD 2.6 m4) +')dnl +ifelse(t1`'t2,YY,`good +')dnl] +EOF + echo "trying m4" 1>&AC_FD_CC + gmp_tmp_val="`(m4 conftest.m4) 2>&AC_FD_CC`" + echo "$gmp_tmp_val" 1>&AC_FD_CC + if test "$gmp_tmp_val" = good; then + gmp_cv_prog_m4="m4" + else + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" +dnl $ac_dummy forces splitting on constant user-supplied paths. +dnl POSIX.2 word splitting is done only on the output of word expansions, +dnl not every word. This closes a longstanding sh security hole. + ac_dummy="$PATH:/usr/5bin" + for ac_dir in $ac_dummy; do + test -z "$ac_dir" && ac_dir=. + echo "trying $ac_dir/m4" 1>&AC_FD_CC + gmp_tmp_val="`($ac_dir/m4 conftest.m4) 2>&AC_FD_CC`" + echo "$gmp_tmp_val" 1>&AC_FD_CC + if test "$gmp_tmp_val" = good; then + gmp_cv_prog_m4="$ac_dir/m4" + break + fi + done + IFS="$ac_save_ifs" + if test -z "$gmp_cv_prog_m4"; then + AC_MSG_ERROR([No usable m4 in \$PATH or /usr/5bin (see config.log for reasons).]) + fi + fi + rm -f conftest.m4 +fi]) +M4="$gmp_cv_prog_m4" +AC_SUBST(M4) +]) + + +dnl GMP_PROG_CC_FIND([CC_LIST], [REQ_64BIT_CC]) +dnl Find first working compiler in CC_LIST. +dnl If REQ_64BIT_CC is "yes", the compiler is required to be able to +dnl produce 64-bit code. +dnl NOTE: If a compiler needs any special flags for producing 64-bit code, +dnl these have to be found in shell variable `gmp_cflags64_{cc}', where `{cc}' +dnl is the name of the compiler. +dnl Set CC to the name of the first working compiler. +dnl If a 64-bit compiler is found, set CC64 to the name of the compiler and +dnl CFLAGS64 to flags to use. +dnl This macro does not test if any of the compilers found is a GNU compiler. +dnl To do this, when you have finally made up your mind on which one to use, +dnl and set CC accordingly, invoke [GMP_PROG_CC_SELECT]. That macro will +dnl also make sure that your selection of CFLAGS is valid. +dnl +AC_DEFUN(GMP_PROG_CC_FIND, +[AC_BEFORE([$0], [CC_PROG_CPP]) +ifelse([$1], , gmp_cc_list="gcc cc", gmp_cc_list="[$1]") +ifelse([$2], , gmp_req_64bit_cc="no", gmp_req_64bit_cc="[$2]") + +CC32= +CC64= +for c in $gmp_cc_list; do + # Avoid cache hits. + unset CC + unset ac_cv_prog_CC + AC_CHECK_TOOL(CC, $c, $c) + if test -n "$CC"; then + eval c_flags=\$gmp_cflags_$c + GMP_PROG_CC_WORKS($CC, $c_flags, + gmp_prog_cc_works=yes, + gmp_prog_cc_works=no) + + if test "$gmp_prog_cc_works" != "yes"; then + continue + fi + + # Save first working compiler, whether 32- or 64-bit capable. + if test -z "$CC32"; then + CC32="$CC" + fi + if test "$gmp_req_64bit_cc" = "yes"; then + eval c_flags=\$gmp_cflags64_$c + + # Verify that the compiler works in 64-bit mode as well. + # /usr/ucb/cc on Solaris 7 can *compile* in 64-bit mode, but not link. + GMP_PROG_CC_WORKS($c, $c_flags, + gmp_prog_cc_works=yes, + gmp_prog_cc_works=no) + + if test "$gmp_prog_cc_works" = "yes"; then + GMP_CHECK_CC_64BIT($c, $c_flags) + if test "$gmp_cv_cc_64bit" = "yes"; then + test -z "$CC64" && CC64="$c" + test -z "$CFLAGS64" && CFLAGS64="$c_flags" + # We have CC64 so we're done. + break + fi + fi + else + # We have CC32, and we don't need a 64-bit compiler so we're done. + break + fi + fi +done +CC="$CC32" +])dnl + +dnl GMP_PROG_CC_SELECT +dnl Check that `CC' works with `CFLAGS'. Check if `CC' is a GNU compiler. +dnl Cache the result as `ac_cv_prog_CC'. +AC_DEFUN(GMP_PROG_CC_SELECT, +[AC_BEFORE([$0], [CC_PROG_CPP]) +AC_PROG_CC_WORKS +AC_PROG_CC_GNU + +if test "$ac_cv_prog_gcc" = "yes"; then + GCC=yes +else + GCC= +fi + +# Set CFLAGS if not already set. +if test -z "$CFLAGS"; then + CFLAGS="-g" + if test "$GCC" = "yes"; then + CFLAGS="$CFLAGS -O2" + fi +fi + +AC_SUBST(CC) +AC_CACHE_VAL(ac_cv_prog_CC, ac_cv_prog_CC="$CC") +AC_PROVIDE([AC_PROG_CC]) +])dnl + +dnl GMP_CHECK_CC_64BIT(cc, cflags64) +dnl Find out if `CC' can produce 64-bit code. +dnl Requires NM to be set to nm for target. +dnl FIXME: Cache result. +AC_DEFUN(GMP_CHECK_CC_64BIT, +[ + gmp_tmp_CC_save="$CC" + CC="[$1]" + AC_MSG_CHECKING([whether the C compiler ($CC) is 64-bit capable]) + if test -z "$NM"; then + echo; echo ["configure: $0: fatal: need nm"] + exit 1 + fi + gmp_tmp_CFLAGS_save="$CFLAGS" + CFLAGS="[$2]" + + case "$target" in + hppa2.0*-*-*) + # FIXME: If gcc is installed under another name than "gcc", we will + # test the wrong thing. + if test "$CC" != "gcc"; then + dnl Let compiler version A.10.32.30 or higher be ok. + dnl Bad compiler output: + dnl ccom: HP92453-01 G.10.32.05 HP C Compiler + dnl Good compiler output: + dnl ccom: HP92453-01 A.10.32.30 HP C Compiler + echo >conftest.c + gmp_tmp_vs=`$CC $CFLAGS -V -c -o conftest.o conftest.c 2>&1 | grep "^ccom:"` + rm conftest* + gmp_tmp_v1=`echo $gmp_tmp_vs | sed 's/.* .\.\(.*\)\..*\..* HP C.*/\1/'` + gmp_tmp_v2=`echo $gmp_tmp_vs | sed 's/.* .\..*\.\(.*\)\..* HP C.*/\1/'` + gmp_tmp_v3=`echo $gmp_tmp_vs | sed 's/.* .\..*\..*\.\(.*\) HP C.*/\1/'` + gmp_cv_cc_64bit=no + test -n "$gmp_tmp_v1" && test "$gmp_tmp_v1" -ge "10" \ + && test -n "$gmp_tmp_v2" && test "$gmp_tmp_v2" -ge "32" \ + && test -n "$gmp_tmp_v3" && test "$gmp_tmp_v3" -ge "30" \ + && gmp_cv_cc_64bit=yes + else # gcc + # FIXME: Compile a minimal file and determine if the resulting object + # file is an ELF file. If so, gcc can produce 64-bit code. + # Do we have file(1) for target? + gmp_cv_cc_64bit=no + fi + ;; + mips-sgi-irix6.*) + # We use `-n32' to cc and `-mabi=n32' to gcc, resulting in 64-bit + # arithmetic but not 64-bit pointers, so the general test for sizeof + # (void *) is not valid. + # Simply try to compile an empty main. If that succeeds return + # true. + AC_TRY_COMPILE( , , + gmp_cv_cc_64bit=yes, gmp_cv_cc_64bit=no, + gmp_cv_cc_64bit=no) + ;; + *-*-*) + # Allocate an array of size sizeof (void *) and use nm to determine its + # size. We depend on the first declared variable being put at address 0. + cat >conftest.c <<EOF +[char arr[sizeof (void *)]={0}; +char post=0;] +EOF + gmp_compile="$CC $CFLAGS -c conftest.c 1>&AC_FD_CC" + if AC_TRY_EVAL(gmp_compile); then + changequote(<,>)dnl + gmp_tmp_val=`$NM conftest.o | grep post | sed -e 's;[[][0-9][]]\(.*\);\1;' \ + -e 's;[^1-9]*\([0-9]*\).*;\1;'` + changequote([, ])dnl + if test "$gmp_tmp_val" = "8"; then + gmp_cv_cc_64bit=yes + else + gmp_cv_cc_64bit=no + fi + else + echo "configure: failed program was:" >&AC_FD_CC + cat conftest.$ac_ext >&AC_FD_CC + gmp_cv_cc_64bit=no + fi + rm -f conftest* + ;; + esac + + CC="$gmp_tmp_CC_save" + CFLAGS="$gmp_tmp_CFLAGS_save" + AC_MSG_RESULT($gmp_cv_cc_64bit) +])dnl + +dnl GMP_INIT([M4-DEF-FILE]) +dnl +AC_DEFUN(GMP_INIT, +[ifelse([$1], , gmp_configm4=config.m4, gmp_configm4="[$1]") +gmp_tmpconfigm4=cnfm4.tmp +gmp_tmpconfigm4i=cnfm4i.tmp +gmp_tmpconfigm4p=cnfm4p.tmp +test -f $gmp_tmpconfigm4 && rm $gmp_tmpconfigm4 +test -f $gmp_tmpconfigm4i && rm $gmp_tmpconfigm4i +test -f $gmp_tmpconfigm4p && rm $gmp_tmpconfigm4p +])dnl + +dnl GMP_FINISH +dnl ---------- +dnl Create config.m4 from its accumulated parts. +dnl +dnl __CONFIG_M4_INCLUDED__ is used so that a second or subsequent include +dnl of config.m4 is harmless. +dnl +dnl A separate ifdef on the angle bracket quoted part ensures the quoting +dnl style there is respected. The basic defines from gmp_tmpconfigm4 are +dnl fully quoted but are still put under an ifdef in case any have been +dnl redefined by one of the m4 include files. +dnl +dnl Doing a big ifdef within asm-defs.m4 and/or other macro files wouldn't +dnl work, since it'd interpret parentheses and quotes in dnl comments, and +dnl having a whole file as a macro argument would overflow the string space +dnl on BSD m4. + +AC_DEFUN(GMP_FINISH, +[AC_REQUIRE([GMP_INIT]) +echo "creating $gmp_configm4" +echo ["dnl $gmp_configm4. Generated automatically by configure."] > $gmp_configm4 +if test -f $gmp_tmpconfigm4; then + echo ["changequote(<,>)dnl"] >> $gmp_configm4 + echo ["ifdef(<__CONFIG_M4_INCLUDED__>,,<"] >> $gmp_configm4 + cat $gmp_tmpconfigm4 >> $gmp_configm4 + echo [">)"] >> $gmp_configm4 + echo ["changequote(\`,')dnl"] >> $gmp_configm4 + rm $gmp_tmpconfigm4 +fi +echo ["ifdef(\`__CONFIG_M4_INCLUDED__',,\`"] >> $gmp_configm4 +if test -f $gmp_tmpconfigm4i; then + cat $gmp_tmpconfigm4i >> $gmp_configm4 + rm $gmp_tmpconfigm4i +fi +if test -f $gmp_tmpconfigm4p; then + cat $gmp_tmpconfigm4p >> $gmp_configm4 + rm $gmp_tmpconfigm4p +fi +echo ["')"] >> $gmp_configm4 +echo ["define(\`__CONFIG_M4_INCLUDED__')"] >> $gmp_configm4 +])dnl + +dnl GMP_INCLUDE(FILE) +AC_DEFUN(GMP_INCLUDE, +[AC_REQUIRE([GMP_INIT]) +echo ["include(\`$1')"] >> $gmp_tmpconfigm4i +])dnl + +dnl GMP_SINCLUDE(FILE) +AC_DEFUN(GMP_SINCLUDE, +[AC_REQUIRE([GMP_INIT]) +echo ["sinclude(\`$1')"] >> $gmp_tmpconfigm4i +])dnl + +dnl GMP_DEFINE(MACRO, DEFINITION [, LOCATION]) +dnl [ Define M4 macro MACRO as DEFINITION in temporary file. ] +dnl [ If LOCATION is `POST', the definition will appear after any ] +dnl [ include() directives inserted by GMP_INCLUDE/GMP_SINCLUDE. ] +dnl [ Mind the quoting! No shell variables will get expanded. ] +dnl [ Don't forget to invoke GMP_FINISH to create file config.m4. ] +dnl [ config.m4 uses `<' and '>' as quote characters for all defines. ] +AC_DEFUN(GMP_DEFINE, +[AC_REQUIRE([GMP_INIT]) +echo ['define(<$1>, <$2>)'] >> ifelse([$3], [POST], $gmp_tmpconfigm4p, $gmp_tmpconfigm4) +])dnl + +dnl GMP_DEFINE_RAW(STRING, [, LOCATION]) +dnl [ Put STRING in temporary file. ] +dnl [ If LOCATION is `POST', the definition will appear after any ] +dnl [ include() directives inserted by GMP_INCLUDE/GMP_SINCLUDE. ] +dnl [ Don't forget to invoke GMP_FINISH to create file config.m4. ] +AC_DEFUN(GMP_DEFINE_RAW, +[AC_REQUIRE([GMP_INIT]) +echo [$1] >> ifelse([$2], [POST], $gmp_tmpconfigm4p, $gmp_tmpconfigm4) +])dnl + +dnl GMP_CHECK_ASM_LABEL_SUFFIX +dnl Should a label have a colon or not? +AC_DEFUN(GMP_CHECK_ASM_LABEL_SUFFIX, +[AC_CACHE_CHECK([what assembly label suffix to use], + gmp_cv_check_asm_label_suffix, +[case "$target" in + *-*-hpux*) gmp_cv_check_asm_label_suffix=[""] ;; + *) gmp_cv_check_asm_label_suffix=[":"] ;; +esac +]) +echo ["define(<LABEL_SUFFIX>, <\$][1$gmp_cv_check_asm_label_suffix>)"] >> $gmp_tmpconfigm4 +])dnl + +dnl GMP_CHECK_ASM_UNDERSCORE([ACTION-IF-FOUND [, ACTION-IF-NOT-FOUND]]) +dnl Shamelessly borrowed from glibc. +AC_DEFUN(GMP_CHECK_ASM_UNDERSCORE, +[AC_CACHE_CHECK([if symbols are prefixed by underscore], + gmp_cv_check_asm_underscore, +[cat > conftest.$ac_ext <<EOF +dnl This sometimes fails to find confdefs.h, for some reason. +dnl [#]line __oline__ "[$]0" +[#]line __oline__ "configure" +#include "confdefs.h" +int underscore_test() { +return; } +EOF +if AC_TRY_EVAL(ac_compile); then + if grep _underscore_test conftest* >/dev/null; then + gmp_cv_check_asm_underscore=yes + else + gmp_cv_check_asm_underscore=no + fi +else + echo "configure: failed program was:" >&AC_FD_CC + cat conftest.$ac_ext >&AC_FD_CC +fi +rm -f conftest* +]) +if test "$gmp_cv_check_asm_underscore" = "yes"; then + GMP_DEFINE(GSYM_PREFIX, [_]) + ifelse([$1], , :, [$1]) +else + GMP_DEFINE(GSYM_PREFIX, []) + ifelse([$2], , :, [$2]) +fi +])dnl + +dnl GMP_CHECK_ASM_ALIGN_LOG([ACTION-IF-FOUND [, ACTION-IF-NOT-FOUND]]) +dnl Is parameter to `.align' logarithmic? +dnl Requires NM to be set to nm for target. +AC_DEFUN(GMP_CHECK_ASM_ALIGN_LOG, +[AC_REQUIRE([GMP_CHECK_ASM_GLOBL]) +AC_REQUIRE([GMP_CHECK_ASM_DATA]) +AC_REQUIRE([GMP_CHECK_ASM_LABEL_SUFFIX]) +AC_CACHE_CHECK([if .align assembly directive is logarithmic], + gmp_cv_check_asm_align_log, +[if test -z "$NM"; then + echo; echo ["configure: $0: fatal: need nm"] + exit 1 +fi +cat > conftest.s <<EOF + $gmp_cv_check_asm_data + .align 4 + $gmp_cv_check_asm_globl foo + .byte 1 + .align 4 +foo$gmp_cv_check_asm_label_suffix + .byte 2 +EOF +ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC" +if AC_TRY_EVAL(ac_assemble); then + changequote(<,>) + gmp_tmp_val=`$NM conftest.o | grep foo | sed -e 's;[[][0-9][]]\(.*\);\1;' \ + -e 's;[^1-9]*\([0-9]*\).*;\1;'` + changequote([, ])dnl + if test "$gmp_tmp_val" = "10" || test "$gmp_tmp_val" = "16"; then + gmp_cv_check_asm_align_log=yes + else + gmp_cv_check_asm_align_log=no + fi +else + echo "configure: failed program was:" >&AC_FD_CC + cat conftest.s >&AC_FD_CC +fi +rm -f conftest* +]) +GMP_DEFINE_RAW(["define(<ALIGN_LOGARITHMIC>,<$gmp_cv_check_asm_align_log>)"]) +if test "$gmp_cv_check_asm_align_log" = "yes"; then + ifelse([$1], , :, [$1]) +else + ifelse([$2], , :, [$2]) +fi +])dnl + + +dnl GMP_CHECK_ASM_ALIGN_FILL_0x90 +dnl ----------------------------- +dnl Determine whether a ",0x90" suffix works on a .align directive. +dnl This is only meant for use on x86, where 0x90 is a "nop". +dnl +dnl Old gas, eg. 1.92.3 - needs ",0x90" or else the fill is an invalid 0x00. +dnl New gas, eg. 2.91 - generates the good multibyte nop fills even when +dnl ",0x90" is given. +dnl Solaris 2.6 as - doesn't allow ",0x90", gives a fatal error. +dnl Solaris 2.8 as - gives a warning for ",0x90", no ill effect. +dnl +dnl Note that both solaris "as"s only care about ",0x90" if they actually +dnl have to use it to fill something, hence the .byte in the sample. It's +dnl only the second .align that provokes an error or warning. +dnl +dnl We prefer to suppress the warning from solaris 2.8 to stop anyone +dnl worrying something might be wrong. + +AC_DEFUN(GMP_CHECK_ASM_ALIGN_FILL_0x90, +[AC_CACHE_CHECK([if the .align directive accepts an 0x90 fill in .text], + gmp_cv_check_asm_align_fill_0x90, +[AC_REQUIRE([GMP_CHECK_ASM_TEXT]) +cat > conftest.s <<EOF + $gmp_cv_check_asm_text + .align 4, 0x90 + .byte 0 + .align 4, 0x90 +EOF +gmp_tmp_val="`$CCAS $CFLAGS conftest.s 2>&1`" +if test $? = 0; then + echo "$gmp_tmp_val" 1>&AC_FD_CC + if echo "$gmp_tmp_val" | grep "Warning: Fill parameter ignored for executable section"; then + echo "Supressing this warning by omitting 0x90" 1>&AC_FD_CC + gmp_cv_check_asm_align_fill_0x90=no + else + gmp_cv_check_asm_align_fill_0x90=yes + fi +else + echo "Non-zero exit code" 1>&AC_FD_CC + echo "$gmp_tmp_val" 1>&AC_FD_CC + gmp_cv_check_asm_align_fill_0x90=no +fi +rm -f conftest* +]) +GMP_DEFINE_RAW( +["define(<ALIGN_FILL_0x90>,<$gmp_cv_check_asm_align_fill_0x90>)"]) +]) + + +dnl GMP_CHECK_ASM_TEXT +AC_DEFUN(GMP_CHECK_ASM_TEXT, +[AC_CACHE_CHECK([how to switch to text section], gmp_cv_check_asm_text, +[case "$target" in + *-*-aix*) + changequote({, }) + gmp_cv_check_asm_text={".csect .text[PR]"} + changequote([, ]) + ;; + *-*-hpux*) gmp_cv_check_asm_text=[".code"] ;; + *) gmp_cv_check_asm_text=[".text"] ;; +esac +]) +echo ["define(<TEXT>, <$gmp_cv_check_asm_text>)"] >> $gmp_tmpconfigm4 +])dnl + +dnl GMP_CHECK_ASM_DATA +dnl Can we say `.data'? +AC_DEFUN(GMP_CHECK_ASM_DATA, +[AC_CACHE_CHECK([how to switch to data section], gmp_cv_check_asm_data, +[case "$target" in + *-*-aix*) + changequote({, }) + gmp_cv_check_asm_data={".csect .data[RW]"} + changequote([, ]) + ;; + *) gmp_cv_check_asm_data=[".data"] ;; +esac +]) +echo ["define(<DATA>, <$gmp_cv_check_asm_data>)"] >> $gmp_tmpconfigm4 +])dnl + +dnl GMP_CHECK_ASM_GLOBL +dnl Can we say `.global'? +AC_DEFUN(GMP_CHECK_ASM_GLOBL, +[AC_CACHE_CHECK([how to export a symbol], gmp_cv_check_asm_globl, +[case "$target" in + *-*-hpux*) gmp_cv_check_asm_globl=[".export"] ;; + *) gmp_cv_check_asm_globl=[".globl"] ;; +esac +]) +echo ["define(<GLOBL>, <$gmp_cv_check_asm_globl>)"] >> $gmp_tmpconfigm4 +])dnl + +dnl GMP_CHECK_ASM_TYPE +dnl Can we say `.type'? +AC_DEFUN(GMP_CHECK_ASM_TYPE, +[AC_CACHE_CHECK([how the .type assembly directive should be used], +gmp_cv_check_asm_type, +[ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC" +for gmp_tmp_prefix in @ \# %; do + echo " .type sym,${gmp_tmp_prefix}function" > conftest.s + if AC_TRY_EVAL(ac_assemble); then + gmp_cv_check_asm_type="[.type \$][1,${gmp_tmp_prefix}\$][2]" + break + fi +done +if test -z "$gmp_cv_check_asm_type"; then + gmp_cv_check_asm_type="[dnl]" +fi +]) +echo ["define(<TYPE>, <$gmp_cv_check_asm_type>)"] >> $gmp_tmpconfigm4 +])dnl + +dnl GMP_CHECK_ASM_SIZE +dnl Can we say `.size'? +AC_DEFUN(GMP_CHECK_ASM_SIZE, +[AC_CACHE_CHECK([if the .size assembly directive works], gmp_cv_check_asm_size, +[ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC" +echo ' .size sym,1' > conftest.s +if AC_TRY_EVAL(ac_assemble); then + gmp_cv_check_asm_size="[.size \$][1,\$][2]" +else + gmp_cv_check_asm_size="[dnl]" +fi +]) +echo ["define(<SIZE>, <$gmp_cv_check_asm_size>)"] >> $gmp_tmpconfigm4 +])dnl + +dnl GMP_CHECK_ASM_LSYM_PREFIX +dnl What is the prefix for a local label? +dnl Requires NM to be set to nm for target. +AC_DEFUN(GMP_CHECK_ASM_LSYM_PREFIX, +[AC_REQUIRE([GMP_CHECK_ASM_LABEL_SUFFIX]) +AC_CACHE_CHECK([what prefix to use for a local label], +gmp_cv_check_asm_lsym_prefix, +[if test -z "$NM"; then + echo; echo ["$0: fatal: need nm"] + exit 1 +fi +ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC" +gmp_cv_check_asm_lsym_prefix="L" +for gmp_tmp_pre in L .L $ L$; do + cat > conftest.s <<EOF +dummy${gmp_cv_check_asm_label_suffix} +${gmp_tmp_pre}gurkmacka${gmp_cv_check_asm_label_suffix} + .byte 0 +EOF + if AC_TRY_EVAL(ac_assemble); then + $NM conftest.o >/dev/null 2>&1 + gmp_rc=$? + if test "$gmp_rc" != "0"; then + echo "configure: $NM failure, using default" + break + fi + if $NM conftest.o | grep gurkmacka >/dev/null; then true; else + gmp_cv_check_asm_lsym_prefix="$gmp_tmp_pre" + break + fi + else + echo "configure: failed program was:" >&AC_FD_CC + cat conftest.s >&AC_FD_CC + # Use default. + fi +done +rm -f conftest* +]) +echo ["define(<LSYM_PREFIX>, <${gmp_cv_check_asm_lsym_prefix}>)"] >> $gmp_tmpconfigm4 +]) + +dnl GMP_CHECK_ASM_W32 +dnl How to [define] a 32-bit word. +dnl Requires NM to be set to nm for target. +AC_DEFUN(GMP_CHECK_ASM_W32, +[AC_REQUIRE([GMP_CHECK_ASM_DATA]) +AC_REQUIRE([GMP_CHECK_ASM_GLOBL]) +AC_REQUIRE([GMP_CHECK_ASM_LABEL_SUFFIX]) +AC_CACHE_CHECK([how to [define] a 32-bit word], + gmp_cv_check_asm_w32, +[if test -z "$NM"; then + echo; echo ["configure: $0: fatal: need nm"] + exit 1 +fi + +# FIXME: HPUX puts first symbol at 0x40000000, breaking our assumption +# that it's at 0x0. We'll have to declare another symbol before the +# .long/.word and look at the distance between the two symbols. The +# only problem is that the sed expression(s) barfs (on Solaris, for +# example) for the symbol with value 0. For now, HPUX uses .word. + +case "$target" in + *-*-hpux*) + gmp_cv_check_asm_w32=".word" + ;; + *-*-*) + ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC" + for gmp_tmp_op in .long .word; do + cat > conftest.s <<EOF + $gmp_cv_check_asm_data + $gmp_cv_check_asm_globl foo + $gmp_tmp_op 0 +foo${gmp_cv_check_asm_label_suffix} + .byte 0 +EOF + if AC_TRY_EVAL(ac_assemble); then + changequote(<,>) + gmp_tmp_val=`$NM conftest.o | grep foo | sed -e 's;[[][0-9][]]\(.*\);\1;' \ + -e 's;[^1-9]*\([0-9]*\).*;\1;'` + changequote([, ])dnl + if test "$gmp_tmp_val" = "4"; then + gmp_cv_check_asm_w32="$gmp_tmp_op" + break + fi + fi + done + ;; +esac + +if test -z "$gmp_cv_check_asm_w32"; then + echo; echo ["configure: $0: fatal: do not know how to define a 32-bit word"] + exit 1 +fi +rm -f conftest* +]) +echo ["define(<W32>, <$gmp_cv_check_asm_w32>)"] >> $gmp_tmpconfigm4 +]) + +dnl GMP_CHECK_ASM_MMX([ACTION-IF-FOUND, [ACTION-IF-NOT-FOUND]]) +dnl Can we assemble MMX insns? +AC_DEFUN(GMP_CHECK_ASM_MMX, +[AC_REQUIRE([GMP_CHECK_ASM_TEXT]) +AC_CACHE_CHECK([if the assembler knows about MMX instructions], + gmp_cv_check_asm_mmx, +[cat > conftest.s <<EOF + $gmp_cv_check_asm_text + por %mm0, %mm0 +EOF +ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC" +if AC_TRY_EVAL(ac_assemble); then + gmp_cv_check_asm_mmx=yes +else + gmp_cv_check_asm_mmx=no +fi +rm -f conftest* +]) +if test "$gmp_cv_check_asm_mmx" = "yes"; then + ifelse([$1], , :, [$1]) +else + AC_MSG_WARN([+----------------------------------------------------------]) + AC_MSG_WARN([| WARNING WARNING WARNING]) + AC_MSG_WARN([| Target CPU has MMX code, but it can't be assembled by]) + AC_MSG_WARN([| $CCAS $CFLAGS]) + AC_MSG_WARN([| Non-MMX replacements will be used.]) + AC_MSG_WARN([| This will be an inferior build.]) + AC_MSG_WARN([+----------------------------------------------------------]) + ifelse([$2], , :, [$2]) +fi +])dnl + +dnl GMP_CHECK_ASM_SHLDL_CL([ACTION-IF-FOUND, [ACTION-IF-NOT-FOUND]]) +AC_DEFUN(GMP_CHECK_ASM_SHLDL_CL, +[AC_REQUIRE([GMP_CHECK_ASM_TEXT]) +AC_CACHE_CHECK([if the assembler takes cl with shldl], + gmp_cv_check_asm_shldl_cl, +[cat > conftest.s <<EOF + $gmp_cv_check_asm_text + shldl %cl, %eax, %ebx +EOF +ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC" +if AC_TRY_EVAL(ac_assemble); then + gmp_cv_check_asm_shldl_cl=yes +else + gmp_cv_check_asm_shldl_cl=no +fi +rm -f conftest* +]) +if test "$gmp_cv_check_asm_shldl_cl" = "yes"; then + ifelse([$1], , :, [$1]) +else + ifelse([$2], , :, [$2]) +fi +])dnl + +dnl GMP_PROG_CC_WORKS(CC, CFLAGS, ACTION-IF-WORKS, [ACTION-IF-NOT-WORKS]) +dnl Check if CC can compile and link. Perform various target specific tests. +dnl FIXME: Require `$target'. +AC_DEFUN(GMP_PROG_CC_WORKS, +[AC_LANG_C dnl Note: Destructive. +CC="[$1]" +CFLAGS="[$2]" +AC_MSG_CHECKING([if the C compiler ($CC) works with flags $CFLAGS]) + +# Simple test for all targets. +AC_TRY_COMPILER([int main(){return(0);}], + tmp_works, tmp_cross) + +# Target specific tests. +if test "$tmp_works" = "yes"; then + case "$target" in + *-*-aix*) # Returning a funcptr. + AC_TRY_COMPILE( , [} void *g(); void *f() { return g(); } int bar(){], + tmp_works=yes, tmp_works=no) + ;; + esac +fi + +if test "$tmp_works" = "yes"; then + [$3] +else + ifelse([$4], , :, [$4]) +fi + +AC_MSG_RESULT($tmp_works) +])dnl + + +dnl GMP_C_ANSI2KNR +dnl -------------- +dnl Setup to use ansi2knr if necessary. +dnl +dnl The test here is simply that if an ANSI style function works then +dnl ansi2knr isn't needed. The normal tests for whether $CC works mean we +dnl don't need to worry here about anything badly broken. +dnl +dnl AM_C_PROTOTYPES is the normal way to set up ansi2knr, but (in automake +dnl March 2000) it gives the wrong answer on a C++ compiler because its +dnl test requires that the compiler accept both ANSI and K&R, or otherwise +dnl ansi2knr is used. A C++ compiler fails on the K&R part, which makes +dnl AM_C_PROTOTYPES think it needs ansi2knr! GMP has no bare K&R so we +dnl only need ANSI or K&R to work, not both. + +AC_DEFUN(GMP_C_ANSI2KNR, +[AC_CACHE_CHECK([if ansi2knr should be used], + gmp_cv_c_ansi2knr, +[cat >conftest.c <<EOF +int main (int argc, char *argv[]) { return 0; } +EOF +if AC_TRY_EVAL(ac_compile); then + gmp_cv_c_ansi2knr=no +else + gmp_cv_c_ansi2knr=yes +fi +rm -f conftest.* +]) +if test $gmp_cv_c_ansi2knr = no; then + U= ANSI2KNR= +else + U=_ ANSI2KNR=./ansi2knr + # Ensure some checks needed by ansi2knr itself. + AC_HEADER_STDC + AC_CHECK_HEADERS(string.h) +fi +AC_SUBST(U) +AC_SUBST(ANSI2KNR) +]) + + +dnl Deal with bad synchronization of Autoconf with Libtool. +AC_DEFUN(AC_CANONICAL_BUILD, [_AC_CANONICAL_BUILD]) +AC_DEFUN(AC_CHECK_TOOL_PREFIX, [_AC_CHECK_TOOL_PREFIX]) + + +# serial 1 + +AC_DEFUN(AM_C_PROTOTYPES, +[AC_REQUIRE([AM_PROG_CC_STDC]) +AC_REQUIRE([AC_PROG_CPP]) +AC_MSG_CHECKING([for function prototypes]) +if test "$am_cv_prog_cc_stdc" != no; then + AC_MSG_RESULT(yes) + AC_DEFINE(PROTOTYPES,1,[Define if compiler has function prototypes]) + U= ANSI2KNR= +else + AC_MSG_RESULT(no) + U=_ ANSI2KNR=./ansi2knr + # Ensure some checks needed by ansi2knr itself. + AC_HEADER_STDC + AC_CHECK_HEADERS(string.h) +fi +AC_SUBST(U)dnl +AC_SUBST(ANSI2KNR)dnl +]) + + +# serial 1 + +# @defmac AC_PROG_CC_STDC +# @maindex PROG_CC_STDC +# @ovindex CC +# If the C compiler in not in ANSI C mode by default, try to add an option +# to output variable @code{CC} to make it so. This macro tries various +# options that select ANSI C on some system or another. It considers the +# compiler to be in ANSI C mode if it handles function prototypes correctly. +# +# If you use this macro, you should check after calling it whether the C +# compiler has been set to accept ANSI C; if not, the shell variable +# @code{am_cv_prog_cc_stdc} is set to @samp{no}. If you wrote your source +# code in ANSI C, you can make an un-ANSIfied copy of it by using the +# program @code{ansi2knr}, which comes with Ghostscript. +# @end defmac + +AC_DEFUN(AM_PROG_CC_STDC, +[AC_REQUIRE([AC_PROG_CC]) +AC_BEFORE([$0], [AC_C_INLINE]) +AC_BEFORE([$0], [AC_C_CONST]) +dnl Force this before AC_PROG_CPP. Some cpp's, eg on HPUX, require +dnl a magic option to avoid problems with ANSI preprocessor commands +dnl like #elif. +dnl FIXME: can't do this because then AC_AIX won't work due to a +dnl circular dependency. +dnl AC_BEFORE([$0], [AC_PROG_CPP]) +AC_MSG_CHECKING(for ${CC-cc} option to accept ANSI C) +AC_CACHE_VAL(am_cv_prog_cc_stdc, +[am_cv_prog_cc_stdc=no +ac_save_CC="$CC" +# Don't try gcc -ansi; that turns off useful extensions and +# breaks some systems' header files. +# AIX -qlanglvl=ansi +# Ultrix and OSF/1 -std1 +# HP-UX 10.20 and later -Ae +# HP-UX older versions -Aa -D_HPUX_SOURCE +# SVR4 -Xc -D__EXTENSIONS__ +for ac_arg in "" -qlanglvl=ansi -std1 -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + AC_TRY_COMPILE( +[#include <stdarg.h> +#include <stdio.h> +#include <sys/types.h> +#include <sys/stat.h> +/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ +struct buf { int x; }; +FILE * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (p, i) + char **p; + int i; +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); +int argc; +char **argv; +], [ +return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; +], +[am_cv_prog_cc_stdc="$ac_arg"; break]) +done +CC="$ac_save_CC" +]) +if test -z "$am_cv_prog_cc_stdc"; then + AC_MSG_RESULT([none needed]) +else + AC_MSG_RESULT($am_cv_prog_cc_stdc) +fi +case "x$am_cv_prog_cc_stdc" in + x|xno) ;; + *) CC="$CC $am_cv_prog_cc_stdc" ;; +esac +]) + +# Do all the work for Automake. This macro actually does too much -- +# some checks are only needed if your package does certain things. +# But this isn't really a big deal. + +# serial 1 + +dnl Usage: +dnl AM_INIT_AUTOMAKE(package,version, [no-define]) + +AC_DEFUN(AM_INIT_AUTOMAKE, +[AC_REQUIRE([AC_PROG_INSTALL]) +dnl We require 2.13 because we rely on SHELL being computed by configure. +AC_PREREQ([2.13]) +PACKAGE=[$1] +AC_SUBST(PACKAGE) +VERSION=[$2] +AC_SUBST(VERSION) +dnl test to see if srcdir already configured +if test "`CDPATH=: && cd $srcdir && pwd`" != "`pwd`" && + test -f $srcdir/config.status; then + AC_MSG_ERROR([source directory already configured; run "make distclean" there first]) +fi +ifelse([$3],, +AC_DEFINE_UNQUOTED(PACKAGE, "$PACKAGE", [Name of package]) +AC_DEFINE_UNQUOTED(VERSION, "$VERSION", [Version number of package])) +AC_REQUIRE([AM_SANITY_CHECK]) +AC_REQUIRE([AC_ARG_PROGRAM]) +AM_MISSING_PROG(ACLOCAL, aclocal) +AM_MISSING_PROG(AUTOCONF, autoconf) +AM_MISSING_PROG(AUTOMAKE, automake) +AM_MISSING_PROG(AUTOHEADER, autoheader) +AM_MISSING_PROG(MAKEINFO, makeinfo) +AM_MISSING_PROG(AMTAR, tar) +AM_MISSING_INSTALL_SH +dnl We need awk for the "check" target. The system "awk" is bad on +dnl some platforms. +AC_REQUIRE([AC_PROG_AWK]) +AC_REQUIRE([AC_PROG_MAKE_SET]) +AC_REQUIRE([AM_DEP_TRACK]) +AC_REQUIRE([AM_SET_DEPDIR]) +ifdef([AC_PROVIDE_AC_PROG_CC], [AM_DEPENDENCIES(CC)], [ + define([AC_PROG_CC], defn([AC_PROG_CC])[AM_DEPENDENCIES(CC)])]) +ifdef([AC_PROVIDE_AC_PROG_CXX], [AM_DEPENDENCIES(CXX)], [ + define([AC_PROG_CXX], defn([AC_PROG_CXX])[AM_DEPENDENCIES(CXX)])]) +]) + +# +# Check to make sure that the build environment is sane. +# + +AC_DEFUN(AM_SANITY_CHECK, +[AC_MSG_CHECKING([whether build environment is sane]) +# Just in case +sleep 1 +echo timestamp > conftestfile +# Do `set' in a subshell so we don't clobber the current shell's +# arguments. Must try -L first in case configure is actually a +# symlink; some systems play weird games with the mod time of symlinks +# (eg FreeBSD returns the mod time of the symlink's containing +# directory). +if ( + set X `ls -Lt $srcdir/configure conftestfile 2> /dev/null` + if test "[$]*" = "X"; then + # -L didn't work. + set X `ls -t $srcdir/configure conftestfile` + fi + if test "[$]*" != "X $srcdir/configure conftestfile" \ + && test "[$]*" != "X conftestfile $srcdir/configure"; then + + # If neither matched, then we have a broken ls. This can happen + # if, for instance, CONFIG_SHELL is bash and it inherits a + # broken ls alias from the environment. This has actually + # happened. Such a system could not be considered "sane". + AC_MSG_ERROR([ls -t appears to fail. Make sure there is not a broken +alias in your environment]) + fi + + test "[$]2" = conftestfile + ) +then + # Ok. + : +else + AC_MSG_ERROR([newly created file is older than distributed files! +Check your system clock]) +fi +rm -f conftest* +AC_MSG_RESULT(yes)]) + +dnl AM_MISSING_PROG(NAME, PROGRAM) +AC_DEFUN(AM_MISSING_PROG, [ +AC_REQUIRE([AM_MISSING_HAS_RUN]) +$1=${$1-"${am_missing_run}$2"} +AC_SUBST($1)]) + +dnl Like AM_MISSING_PROG, but only looks for install-sh. +dnl AM_MISSING_INSTALL_SH() +AC_DEFUN(AM_MISSING_INSTALL_SH, [ +AC_REQUIRE([AM_MISSING_HAS_RUN]) +if test -z "$install_sh"; then + install_sh="$ac_aux_dir/install-sh" + test -f "$install_sh" || install_sh="$ac_aux_dir/install.sh" + test -f "$install_sh" || install_sh="${am_missing_run}${ac_auxdir}/install-sh" + dnl FIXME: an evil hack: we remove the SHELL invocation from + dnl install_sh because automake adds it back in. Sigh. + install_sh="`echo $install_sh | sed -e 's/\${SHELL}//'`" +fi +AC_SUBST(install_sh)]) + +dnl AM_MISSING_HAS_RUN. +dnl Define MISSING if not defined so far and test if it supports --run. +dnl If it does, set am_missing_run to use it, otherwise, to nothing. +AC_DEFUN([AM_MISSING_HAS_RUN], [ +test x"${MISSING+set}" = xset || \ + MISSING="\${SHELL} `CDPATH=: && cd $ac_aux_dir && pwd`/missing" +dnl Use eval to expand $SHELL +if eval "$MISSING --run :"; then + am_missing_run="$MISSING --run " +else + am_missing_run= + am_backtick='`' + AC_MSG_WARN([${am_backtick}missing' script is too old or missing]) +fi +]) + +dnl See how the compiler implements dependency checking. +dnl Usage: +dnl AM_DEPENDENCIES(NAME) +dnl NAME is "CC", "CXX" or "OBJC". + +dnl We try a few techniques and use that to set a single cache variable. + +AC_DEFUN(AM_DEPENDENCIES,[ +AC_REQUIRE([AM_SET_DEPDIR]) +AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS]) +ifelse([$1],CC,[ +AC_REQUIRE([AC_PROG_CC]) +AC_REQUIRE([AC_PROG_CPP]) +depcc="$CC" +depcpp="$CPP"],[$1],CXX,[ +AC_REQUIRE([AC_PROG_CXX]) +AC_REQUIRE([AC_PROG_CXXCPP]) +depcc="$CXX" +depcpp="$CXXCPP"],[$1],OBJC,[ +am_cv_OBJC_dependencies_compiler_type=gcc],[ +AC_REQUIRE([AC_PROG_][$1]) +depcc="$[$1]" +depcpp=""]) +AC_MSG_CHECKING([dependency style of $depcc]) +AC_CACHE_VAL(am_cv_[$1]_dependencies_compiler_type,[ +if test -z "$AMDEP"; then + echo '#include "conftest.h"' > conftest.c + echo 'int i;' > conftest.h + + am_cv_[$1]_dependencies_compiler_type=none + for depmode in `sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < "$am_depcomp"`; do + case "$depmode" in + nosideeffect) + # after this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + none) break ;; + esac + if depmode="$depmode" \ + source=conftest.c object=conftest.o \ + depfile=conftest.Po tmpdepfile=conftest.TPo \ + $SHELL $am_depcomp $depcc -c conftest.c 2>/dev/null && + grep conftest.h conftest.Po > /dev/null 2>&1; then + am_cv_[$1]_dependencies_compiler_type="$depmode" + break + fi + done + + rm -f conftest.* +else + am_cv_[$1]_dependencies_compiler_type=none +fi +]) +AC_MSG_RESULT($am_cv_[$1]_dependencies_compiler_type) +[$1]DEPMODE="depmode=$am_cv_[$1]_dependencies_compiler_type" +AC_SUBST([$1]DEPMODE) +]) + +dnl Choose a directory name for dependency files. +dnl This macro is AC_REQUIREd in AM_DEPENDENCIES + +AC_DEFUN(AM_SET_DEPDIR,[ +if test -d .deps || mkdir .deps 2> /dev/null || test -d .deps; then + DEPDIR=.deps +else + DEPDIR=_deps +fi +AC_SUBST(DEPDIR) +]) + +AC_DEFUN(AM_DEP_TRACK,[ +AC_ARG_ENABLE(dependency-tracking, +[ --disable-dependency-tracking Speeds up one-time builds + --enable-dependency-tracking Do not reject slow dependency extractors]) +if test "x$enable_dependency_tracking" = xno; then + AMDEP="#" +else + am_depcomp="$ac_aux_dir/depcomp" + if test ! -f "$am_depcomp"; then + AMDEP="#" + else + AMDEP= + fi +fi +AC_SUBST(AMDEP) +if test -z "$AMDEP"; then + AMDEPBACKSLASH='\' +else + AMDEPBACKSLASH= +fi +pushdef([subst], defn([AC_SUBST])) +subst(AMDEPBACKSLASH) +popdef([subst]) +]) + +dnl Generate code to set up dependency tracking. +dnl This macro should only be invoked once -- use via AC_REQUIRE. +dnl Usage: +dnl AM_OUTPUT_DEPENDENCY_COMMANDS + +dnl +dnl This code is only required when automatic dependency tracking +dnl is enabled. FIXME. This creates each `.P' file that we will +dnl need in order to bootstrap the dependency handling code. +AC_DEFUN(AM_OUTPUT_DEPENDENCY_COMMANDS,[ +AC_OUTPUT_COMMANDS([ +test x"$AMDEP" != x"" || +for mf in $CONFIG_FILES; do + case "$mf" in + Makefile) dirpart=.;; + */Makefile) dirpart=`echo "$mf" | sed -e 's|/[^/]*$||'`;; + *) continue;; + esac + grep '^DEP_FILES *= *[^ #]' < "$mf" > /dev/null || continue + # Extract the definition of DEP_FILES from the Makefile without + # running `make'. + DEPDIR=`sed -n -e '/^DEPDIR = / s///p' < "$mf"` + test -z "$DEPDIR" && continue + # When using ansi2knr, U may be empty or an underscore; expand it + U=`sed -n -e '/^U = / s///p' < "$mf"` + test -d "$dirpart/$DEPDIR" || mkdir "$dirpart/$DEPDIR" + # We invoke sed twice because it is the simplest approach to + # changing $(DEPDIR) to its actual value in the expansion. + for file in `sed -n -e ' + /^DEP_FILES = .*\\\\$/ { + s/^DEP_FILES = // + :loop + s/\\\\$// + p + n + /\\\\$/ b loop + p + } + /^DEP_FILES = / s/^DEP_FILES = //p' < "$mf" | \ + sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g' -e 's/\$U/'"$U"'/g'`; do + # Make sure the directory exists. + test -f "$dirpart/$file" && continue + fdir=`echo "$file" | sed -e 's|/[^/]*$||'` + $ac_aux_dir/mkinstalldirs "$dirpart/$fdir" > /dev/null 2>&1 + # echo "creating $dirpart/$file" + echo '# dummy' > "$dirpart/$file" + done +done +], [AMDEP="$AMDEP" +ac_aux_dir="$ac_aux_dir"])]) + +# Like AC_CONFIG_HEADER, but automatically create stamp file. + +AC_DEFUN(AM_CONFIG_HEADER, +[AC_PREREQ([2.12]) +AC_CONFIG_HEADER([$1]) +dnl When config.status generates a header, we must update the stamp-h file. +dnl This file resides in the same directory as the config header +dnl that is generated. We must strip everything past the first ":", +dnl and everything past the last "/". +AC_OUTPUT_COMMANDS(changequote(<<,>>)dnl +ifelse(patsubst(<<$1>>, <<[^ ]>>, <<>>), <<>>, +<<test -z "<<$>>CONFIG_HEADERS" || echo timestamp > patsubst(<<$1>>, <<^\([^:]*/\)?.*>>, <<\1>>)stamp-h<<>>dnl>>, +<<am_indx=1 +for am_file in <<$1>>; do + case " <<$>>CONFIG_HEADERS " in + *" <<$>>am_file "*<<)>> + echo timestamp > `echo <<$>>am_file | sed -e 's%:.*%%' -e 's%[^/]*$%%'`stamp-h$am_indx + ;; + esac + am_indx=`expr "<<$>>am_indx" + 1` +done<<>>dnl>>) +changequote([,]))]) + +# Add --enable-maintainer-mode option to configure. +# From Jim Meyering + +# serial 1 + +AC_DEFUN(AM_MAINTAINER_MODE, +[AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles]) + dnl maintainer-mode is disabled by default + AC_ARG_ENABLE(maintainer-mode, +[ --enable-maintainer-mode enable make rules and dependencies not useful + (and sometimes confusing) to the casual installer], + USE_MAINTAINER_MODE=$enableval, + USE_MAINTAINER_MODE=no) + AC_MSG_RESULT($USE_MAINTAINER_MODE) + AM_CONDITIONAL(MAINTAINER_MODE, test $USE_MAINTAINER_MODE = yes) + MAINT=$MAINTAINER_MODE_TRUE + AC_SUBST(MAINT)dnl +] +) + +# Define a conditional. + +AC_DEFUN(AM_CONDITIONAL, +[AC_SUBST($1_TRUE) +AC_SUBST($1_FALSE) +if $2; then + $1_TRUE= + $1_FALSE='#' +else + $1_TRUE='#' + $1_FALSE= +fi]) + + +# serial 42 AC_PROG_LIBTOOL +AC_DEFUN(AC_PROG_LIBTOOL, +[AC_REQUIRE([AC_LIBTOOL_SETUP])dnl + +# Save cache, so that ltconfig can load it +AC_CACHE_SAVE + +# Actually configure libtool. ac_aux_dir is where install-sh is found. +AR="$AR" CC="$CC" CFLAGS="$CFLAGS" CPPFLAGS="$CPPFLAGS" \ +MAGIC="$MAGIC" LD="$LD" LDFLAGS="$LDFLAGS" LIBS="$LIBS" \ +LN_S="$LN_S" NM="$NM" RANLIB="$RANLIB" STRIP="$STRIP" \ +AS="$AS" DLLTOOL="$DLLTOOL" OBJDUMP="$OBJDUMP" \ +objext="$OBJEXT" exeext="$EXEEXT" reload_flag="$reload_flag" \ +deplibs_check_method="$deplibs_check_method" file_magic_cmd="$file_magic_cmd" \ +${CONFIG_SHELL-/bin/sh} $ac_aux_dir/ltconfig --no-reexec \ +$libtool_flags --no-verify --build="$build" $ac_aux_dir/ltmain.sh $lt_target \ +|| AC_MSG_ERROR([libtool configure failed]) + +# Reload cache, that may have been modified by ltconfig +AC_CACHE_LOAD + +# This can be used to rebuild libtool when needed +LIBTOOL_DEPS="$ac_aux_dir/ltconfig $ac_aux_dir/ltmain.sh" + +# Always use our own libtool. +LIBTOOL='$(SHELL) $(top_builddir)/libtool' +AC_SUBST(LIBTOOL)dnl + +# Redirect the config.log output again, so that the ltconfig log is not +# clobbered by the next message. +exec 5>>./config.log +]) + +AC_DEFUN(AC_LIBTOOL_SETUP, +[AC_PREREQ(2.13)dnl +AC_REQUIRE([AC_ENABLE_SHARED])dnl +AC_REQUIRE([AC_ENABLE_STATIC])dnl +AC_REQUIRE([AC_ENABLE_FAST_INSTALL])dnl +AC_REQUIRE([AC_CANONICAL_HOST])dnl +AC_REQUIRE([AC_CANONICAL_BUILD])dnl +AC_REQUIRE([AC_PROG_CC])dnl +AC_REQUIRE([AC_PROG_LD])dnl +AC_REQUIRE([AC_PROG_LD_RELOAD_FLAG])dnl +AC_REQUIRE([AC_PROG_NM])dnl +AC_REQUIRE([AC_PROG_LN_S])dnl +AC_REQUIRE([AC_DEPLIBS_CHECK_METHOD])dnl +AC_REQUIRE([AC_OBJEXT])dnl +AC_REQUIRE([AC_EXEEXT])dnl +dnl + +# Only perform the check for file, if the check method requires it +case "$deplibs_check_method" in +file_magic*) + if test "$file_magic_cmd" = '${MAGIC}'; then + AC_PATH_MAGIC + fi + ;; +esac + +case "$target" in +NONE) lt_target="$host" ;; +*) lt_target="$target" ;; +esac + +AC_CHECK_TOOL(RANLIB, ranlib, :) +AC_CHECK_TOOL(STRIP, strip, :) + +# Check for any special flags to pass to ltconfig. +libtool_flags="--cache-file=$cache_file" +test "$enable_shared" = no && libtool_flags="$libtool_flags --disable-shared" +test "$enable_static" = no && libtool_flags="$libtool_flags --disable-static" +test "$enable_fast_install" = no && libtool_flags="$libtool_flags --disable-fast-install" +test "$ac_cv_prog_gcc" = yes && libtool_flags="$libtool_flags --with-gcc" +test "$ac_cv_prog_gnu_ld" = yes && libtool_flags="$libtool_flags --with-gnu-ld" +ifdef([AC_PROVIDE_AC_LIBTOOL_DLOPEN], +[libtool_flags="$libtool_flags --enable-dlopen"]) +ifdef([AC_PROVIDE_AC_LIBTOOL_WIN32_DLL], +[libtool_flags="$libtool_flags --enable-win32-dll"]) +AC_ARG_ENABLE(libtool-lock, + [ --disable-libtool-lock avoid locking (might break parallel builds)]) +test "x$enable_libtool_lock" = xno && libtool_flags="$libtool_flags --disable-lock" +test x"$silent" = xyes && libtool_flags="$libtool_flags --silent" + +AC_ARG_WITH(pic, + [ --with-pic try to use only PIC/non-PIC objects [default=use both]], + pic_mode="$withval", pic_mode=default) +test x"$pic_mode" = xyes && libtool_flags="$libtool_flags --prefer-pic" +test x"$pic_mode" = xno && libtool_flags="$libtool_flags --prefer-non-pic" + +# Some flags need to be propagated to the compiler or linker for good +# libtool support. +case "$lt_target" in +*-*-irix6*) + # Find out which ABI we are using. + echo '[#]line __oline__ "configure"' > conftest.$ac_ext + if AC_TRY_EVAL(ac_compile); then + case "`/usr/bin/file conftest.o`" in + *32-bit*) + LD="${LD-ld} -32" + ;; + *N32*) + LD="${LD-ld} -n32" + ;; + *64-bit*) + LD="${LD-ld} -64" + ;; + esac + fi + rm -rf conftest* + ;; + +*-*-sco3.2v5*) + # On SCO OpenServer 5, we need -belf to get full-featured binaries. + SAVE_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -belf" + AC_CACHE_CHECK([whether the C compiler needs -belf], lt_cv_cc_needs_belf, + [AC_LANG_SAVE + AC_LANG_C + AC_TRY_LINK([],[],[lt_cv_cc_needs_belf=yes],[lt_cv_cc_needs_belf=no]) + AC_LANG_RESTORE]) + if test x"$lt_cv_cc_needs_belf" != x"yes"; then + # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf + CFLAGS="$SAVE_CFLAGS" + fi + ;; + +ifdef([AC_PROVIDE_AC_LIBTOOL_WIN32_DLL], +[*-*-cygwin* | *-*-mingw*) + AC_CHECK_TOOL(DLLTOOL, dlltool, false) + AC_CHECK_TOOL(AS, as, false) + AC_CHECK_TOOL(OBJDUMP, objdump, false) + + # recent cygwin and mingw systems supply a stub DllMain which the user + # can override, but on older systems we have to supply one + AC_CACHE_CHECK([if libtool should supply DllMain function], lt_cv_need_dllmain, + [AC_TRY_LINK([], + [extern int __attribute__((__stdcall__)) DllMain(void*, int, void*); + DllMain (0, 0, 0);], + [lt_cv_need_dllmain=no],[lt_cv_need_dllmain=yes])]) + + case "$lt_target/$CC" in + *-*-cygwin*/gcc*-mno-cygwin*|*-*-mingw*) + # old mingw systems require "-dll" to link a DLL, while more recent ones + # require "-mdll" + SAVE_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -mdll" + AC_CACHE_CHECK([how to link DLLs], lt_cv_cc_dll_switch, + [AC_TRY_LINK([], [], [lt_cv_cc_dll_switch=-mdll],[lt_cv_cc_dll_switch=-dll])]) + CFLAGS="$SAVE_CFLAGS" ;; + *-*-cygwin*) + # cygwin systems need to pass --dll to the linker, and not link + # crt.o which will require a WinMain@16 definition. + lt_cv_cc_dll_switch="-Wl,--dll -nostartfiles" ;; + esac + ;; + ]) +esac +]) + +# AC_LIBTOOL_DLOPEN - enable checks for dlopen support +AC_DEFUN(AC_LIBTOOL_DLOPEN, [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])]) + +# AC_LIBTOOL_WIN32_DLL - declare package support for building win32 dll's +AC_DEFUN(AC_LIBTOOL_WIN32_DLL, [AC_BEFORE([$0], [AC_LIBTOOL_SETUP])]) + +# AC_ENABLE_SHARED - implement the --enable-shared flag +# Usage: AC_ENABLE_SHARED[(DEFAULT)] +# Where DEFAULT is either `yes' or `no'. If omitted, it defaults to +# `yes'. +AC_DEFUN(AC_ENABLE_SHARED, [dnl +define([AC_ENABLE_SHARED_DEFAULT], ifelse($1, no, no, yes))dnl +AC_ARG_ENABLE(shared, +changequote(<<, >>)dnl +<< --enable-shared[=PKGS] build shared libraries [default=>>AC_ENABLE_SHARED_DEFAULT], +changequote([, ])dnl +[p=${PACKAGE-default} +case "$enableval" in +yes) enable_shared=yes ;; +no) enable_shared=no ;; +*) + enable_shared=no + # Look at the argument we got. We use all the common list separators. + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:," + for pkg in $enableval; do + if test "X$pkg" = "X$p"; then + enable_shared=yes + fi + done + IFS="$ac_save_ifs" + ;; +esac], +enable_shared=AC_ENABLE_SHARED_DEFAULT)dnl +]) + +# AC_DISABLE_SHARED - set the default shared flag to --disable-shared +AC_DEFUN(AC_DISABLE_SHARED, [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl +AC_ENABLE_SHARED(no)]) + +# AC_ENABLE_STATIC - implement the --enable-static flag +# Usage: AC_ENABLE_STATIC[(DEFAULT)] +# Where DEFAULT is either `yes' or `no'. If omitted, it defaults to +# `yes'. +AC_DEFUN(AC_ENABLE_STATIC, [dnl +define([AC_ENABLE_STATIC_DEFAULT], ifelse($1, no, no, yes))dnl +AC_ARG_ENABLE(static, +changequote(<<, >>)dnl +<< --enable-static[=PKGS] build static libraries [default=>>AC_ENABLE_STATIC_DEFAULT], +changequote([, ])dnl +[p=${PACKAGE-default} +case "$enableval" in +yes) enable_static=yes ;; +no) enable_static=no ;; +*) + enable_static=no + # Look at the argument we got. We use all the common list separators. + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:," + for pkg in $enableval; do + if test "X$pkg" = "X$p"; then + enable_static=yes + fi + done + IFS="$ac_save_ifs" + ;; +esac], +enable_static=AC_ENABLE_STATIC_DEFAULT)dnl +]) + +# AC_DISABLE_STATIC - set the default static flag to --disable-static +AC_DEFUN(AC_DISABLE_STATIC, [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl +AC_ENABLE_STATIC(no)]) + + +# AC_ENABLE_FAST_INSTALL - implement the --enable-fast-install flag +# Usage: AC_ENABLE_FAST_INSTALL[(DEFAULT)] +# Where DEFAULT is either `yes' or `no'. If omitted, it defaults to +# `yes'. +AC_DEFUN(AC_ENABLE_FAST_INSTALL, [dnl +define([AC_ENABLE_FAST_INSTALL_DEFAULT], ifelse($1, no, no, yes))dnl +AC_ARG_ENABLE(fast-install, +changequote(<<, >>)dnl +<< --enable-fast-install[=PKGS] optimize for fast installation [default=>>AC_ENABLE_FAST_INSTALL_DEFAULT], +changequote([, ])dnl +[p=${PACKAGE-default} +case "$enableval" in +yes) enable_fast_install=yes ;; +no) enable_fast_install=no ;; +*) + enable_fast_install=no + # Look at the argument we got. We use all the common list separators. + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:," + for pkg in $enableval; do + if test "X$pkg" = "X$p"; then + enable_fast_install=yes + fi + done + IFS="$ac_save_ifs" + ;; +esac], +enable_fast_install=AC_ENABLE_FAST_INSTALL_DEFAULT)dnl +]) + +# AC_ENABLE_FAST_INSTALL - set the default to --disable-fast-install +AC_DEFUN(AC_DISABLE_FAST_INSTALL, [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl +AC_ENABLE_FAST_INSTALL(no)]) + + +# AC_PATH_TOOL_PREFIX - find a file program which can recognise shared library +AC_DEFUN(AC_PATH_TOOL_PREFIX, +[AC_MSG_CHECKING([for $1]) +AC_CACHE_VAL(lt_cv_path_MAGIC, +[case "$MAGIC" in + /*) + lt_cv_path_MAGIC="$MAGIC" # Let the user override the test with a path. + ;; + ?:/*) + ac_cv_path_MAGIC="$MAGIC" # Let the user override the test with a dos path. + ;; + *) + ac_save_MAGIC="$MAGIC" + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" +dnl $ac_dummy forces splitting on constant user-supplied paths. +dnl POSIX.2 word splitting is done only on the output of word expansions, +dnl not every word. This closes a longstanding sh security hole. + ac_dummy="ifelse([$2], , $PATH, [$2])" + for ac_dir in $ac_dummy; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/$1; then + lt_cv_path_MAGIC="$ac_dir/$1" + if test -n "$file_magic_test_file"; then + case "$deplibs_check_method" in + "file_magic "*) + file_magic_regex="`expr \"$deplibs_check_method\" : \"file_magic \(.*\)\"`" + MAGIC="$lt_cv_path_MAGIC" + if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | + egrep "$file_magic_regex" > /dev/null; then + : + else + cat <<EOF 1>&2 + +*** Warning: the command libtool uses to detect shared libraries, +*** $file_magic_cmd, produces output that libtool cannot recognize. +*** The result is that libtool may fail to recognize shared libraries +*** as such. This will affect the creation of libtool libraries that +*** depend on shared libraries, but programs linked with such libtool +*** libraries will work regardless of this problem. Nevertheless, you +*** may want to report the problem to your system manager and/or to +*** bug-libtool@gnu.org + +EOF + fi ;; + esac + fi + break + fi + done + IFS="$ac_save_ifs" + MAGIC="$ac_save_MAGIC" + ;; +esac]) +MAGIC="$lt_cv_path_MAGIC" +if test -n "$MAGIC"; then + AC_MSG_RESULT($MAGIC) +else + AC_MSG_RESULT(no) +fi +]) + + +# AC_PATH_MAGIC - find a file program which can recognise a shared library +AC_DEFUN(AC_PATH_MAGIC, +[AC_REQUIRE([AC_CHECK_TOOL_PREFIX])dnl +AC_PATH_TOOL_PREFIX(${ac_tool_prefix}file, /usr/bin:$PATH) +if test -z "$lt_cv_path_MAGIC"; then + if test -n "$ac_tool_prefix"; then + AC_PATH_TOOL_PREFIX(file, /usr/bin:$PATH) + else + MAGIC=: + fi +fi +]) + + +# AC_PROG_LD - find the path to the GNU or non-GNU linker +AC_DEFUN(AC_PROG_LD, +[AC_ARG_WITH(gnu-ld, +[ --with-gnu-ld assume the C compiler uses GNU ld [default=no]], +test "$withval" = no || with_gnu_ld=yes, with_gnu_ld=no) +AC_REQUIRE([AC_PROG_CC])dnl +AC_REQUIRE([AC_CANONICAL_HOST])dnl +AC_REQUIRE([AC_CANONICAL_BUILD])dnl +ac_prog=ld +if test "$ac_cv_prog_gcc" = yes; then + # Check if gcc -print-prog-name=ld gives a path. + AC_MSG_CHECKING([for ld used by GCC]) + case $lt_target in + *-*-mingw*) + # gcc leaves a trailing carriage return which upsets mingw + ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; + *) + ac_prog=`($CC -print-prog-name=ld) 2>&5` ;; + esac + case "$ac_prog" in + # Accept absolute paths. +changequote(,)dnl + [\\/]* | [A-Za-z]:[\\/]*) + re_direlt='/[^/][^/]*/\.\./' +changequote([,])dnl + # Canonicalize the path of ld + ac_prog=`echo $ac_prog| sed 's%\\\\%/%g'` + while echo $ac_prog | grep "$re_direlt" > /dev/null 2>&1; do + ac_prog=`echo $ac_prog| sed "s%$re_direlt%/%"` + done + test -z "$LD" && LD="$ac_prog" + ;; + "") + # If it fails, then pretend we aren't using GCC. + ac_prog=ld + ;; + *) + # If it is relative, then search for the first ld in PATH. + with_gnu_ld=unknown + ;; + esac +elif test "$with_gnu_ld" = yes; then + AC_MSG_CHECKING([for GNU ld]) +else + AC_MSG_CHECKING([for non-GNU ld]) +fi +AC_CACHE_VAL(ac_cv_path_LD, +[if test -z "$LD"; then + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}${PATH_SEPARATOR-:}" + for ac_dir in $PATH; do + test -z "$ac_dir" && ac_dir=. + if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then + ac_cv_path_LD="$ac_dir/$ac_prog" + # Check to see if the program is GNU ld. I'd rather use --version, + # but apparently some GNU ld's only accept -v. + # Break only if it was the GNU/non-GNU ld that we prefer. + if "$ac_cv_path_LD" -v 2>&1 < /dev/null | egrep '(GNU|with BFD)' > /dev/null; then + test "$with_gnu_ld" != no && break + else + test "$with_gnu_ld" != yes && break + fi + fi + done + IFS="$ac_save_ifs" +else + ac_cv_path_LD="$LD" # Let the user override the test with a path. +fi]) +LD="$ac_cv_path_LD" +if test -n "$LD"; then + AC_MSG_RESULT($LD) +else + AC_MSG_RESULT(no) +fi +test -z "$LD" && AC_MSG_ERROR([no acceptable ld found in \$PATH]) +AC_PROG_LD_GNU +]) + +AC_DEFUN(AC_PROG_LD_GNU, +[AC_CACHE_CHECK([if the linker ($LD) is GNU ld], ac_cv_prog_gnu_ld, +[# I'd rather use --version here, but apparently some GNU ld's only accept -v. +if $LD -v 2>&1 </dev/null | egrep '(GNU|with BFD)' 1>&5; then + ac_cv_prog_gnu_ld=yes +else + ac_cv_prog_gnu_ld=no +fi]) +with_gnu_ld=$ac_cv_prog_gnu_ld +]) + +# AC_PROG_LD_RELOAD_FLAG - find reload flag for linker +# -- PORTME Some linkers may need a different reload flag. +AC_DEFUN(AC_PROG_LD_RELOAD_FLAG, +[AC_CACHE_CHECK([for $LD option to reload object files], lt_cv_ld_reload_flag, +[lt_cv_ld_reload_flag='-r']) +reload_flag=$lt_cv_ld_reload_flag +test -n "$reload_flag" && reload_flag=" $reload_flag" +]) + +# AC_DEPLIBS_CHECK_METHOD - how to check for library dependencies +# -- PORTME fill in with the dynamic library characteristics +AC_DEFUN(AC_DEPLIBS_CHECK_METHOD, +[AC_CACHE_CHECK([how to recognise dependant libraries], +lt_cv_deplibs_check_method, +[lt_cv_file_magic_cmd='${MAGIC}' +lt_cv_file_magic_test_file= +lt_cv_deplibs_check_method='unknown' +# Need to set the preceding variable on all platforms that support +# interlibrary dependencies. +# 'none' -- dependencies not supported. +# `unknown' -- same as none, but documents that we really don't know. +# 'pass_all' -- all dependencies passed with no checks. +# 'test_compile' -- check by making test program. +# 'file_magic [regex]' -- check by looking for files in library path +# which responds to the $file_magic_cmd with a given egrep regex. +# If you have `file' or equivalent on your system and you're not sure +# whether `pass_all' will *always* work, you probably want this one. + +case "$host_os" in +aix4* | beos*) + lt_cv_deplibs_check_method=pass_all + ;; + +bsdi4*) + changequote(,)dnl + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib)' + changequote([, ])dnl + lt_cv_file_magic_test_file=/shlib/libc.so + ;; + +cygwin* | mingw*) + lt_cv_deplibs_check_method='file_magic file format pei*-i386(.*architecture: i386)?' + lt_cv_file_magic_cmd='${OBJDUMP} -f' + ;; + +freebsd*) + case "$version_type" in + freebsd-elf*) + lt_cv_deplibs_check_method=pass_all + ;; + esac + ;; + +gnu*) + lt_cv_deplibs_check_method=pass_all + ;; + +irix5* | irix6*) + case "$host_os" in + irix5*) + # this will be overridden with pass_all, but let us keep it just in case + lt_cv_deplibs_check_method="file_magic ELF 32-bit MSB dynamic lib MIPS - version 1" + ;; + *) + case "$LD" in + *-32|*"-32 ") libmagic=32-bit;; + *-n32|*"-n32 ") libmagic=N32;; + *-64|*"-64 ") libmagic=64-bit;; + *) libmagic=never-match;; + esac + # this will be overridden with pass_all, but let us keep it just in case + changequote(,)dnl + lt_cv_deplibs_check_method="file_magic ELF ${libmagic} MSB mips-[1234] dynamic lib MIPS - version 1" + changequote([, ])dnl + ;; + esac + lt_cv_file_magic_test_file=`echo /lib${libsuff}/libc.so*` + lt_cv_deplibs_check_method=pass_all + ;; + +# This must be Linux ELF. +linux-gnu*) + case "$host_cpu" in + alpha* | i*86 | powerpc* | sparc* ) + lt_cv_deplibs_check_method=pass_all ;; + *) + # glibc up to 2.1.1 does not perform some relocations on ARM + changequote(,)dnl + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [LM]SB (shared object|dynamic lib )' ;; + changequote([, ])dnl + esac + lt_cv_file_magic_test_file=`echo /lib/libc.so* /lib/libc-*.so` + ;; + +osf3* | osf4* | osf5*) + # this will be overridden with pass_all, but let us keep it just in case + lt_cv_deplibs_check_method='file_magic COFF format alpha shared library' + lt_cv_file_magic_test_file=/shlib/libc.so + lt_cv_deplibs_check_method=pass_all + ;; + +sco3.2v5*) + lt_cv_deplibs_check_method=pass_all + ;; + +solaris*) + lt_cv_deplibs_check_method=pass_all + lt_cv_file_magic_test_file=/lib/libc.so + ;; + +sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) + case "$host_vendor" in + ncr) + lt_cv_deplibs_check_method=pass_all + ;; + motorola) + changequote(,)dnl + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib) M[0-9][0-9]* Version [0-9]' + changequote([, ])dnl + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*` + ;; + esac + ;; +esac +]) +file_magic_cmd=$lt_cv_file_magic_cmd +deplibs_check_method=$lt_cv_deplibs_check_method +]) + + +# AC_PROG_NM - find the path to a BSD-compatible name lister +AC_DEFUN(AC_PROG_NM, +[AC_MSG_CHECKING([for BSD-compatible nm]) +AC_CACHE_VAL(ac_cv_path_NM, +[if test -n "$NM"; then + # Let the user override the test. + ac_cv_path_NM="$NM" +else + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}${PATH_SEPARATOR-:}" + for ac_dir in $PATH /usr/ccs/bin /usr/ucb /bin; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/nm || test -f $ac_dir/nm$ac_exeext ; then + # Check to see if the nm accepts a BSD-compat flag. + # Adding the `sed 1q' prevents false positives on HP-UX, which says: + # nm: unknown option "B" ignored + if ($ac_dir/nm -B /dev/null 2>&1 | sed '1q'; exit 0) | egrep /dev/null >/dev/null; then + ac_cv_path_NM="$ac_dir/nm -B" + break + elif ($ac_dir/nm -p /dev/null 2>&1 | sed '1q'; exit 0) | egrep /dev/null >/dev/null; then + ac_cv_path_NM="$ac_dir/nm -p" + break + else + ac_cv_path_NM=${ac_cv_path_NM="$ac_dir/nm"} # keep the first match, but + continue # so that we can try to find one that supports BSD flags + fi + fi + done + IFS="$ac_save_ifs" + test -z "$ac_cv_path_NM" && ac_cv_path_NM=nm +fi]) +NM="$ac_cv_path_NM" +AC_MSG_RESULT([$NM]) +]) + +# AC_CHECK_LIBM - check for math library +AC_DEFUN(AC_CHECK_LIBM, +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +LIBM= +case "$lt_target" in +*-*-beos* | *-*-cygwin*) + # These system don't have libm + ;; +*-ncr-sysv4.3*) + AC_CHECK_LIB(mw, _mwvalidcheckl, LIBM="-lmw") + AC_CHECK_LIB(m, main, LIBM="$LIBM -lm") + ;; +*) + AC_CHECK_LIB(m, main, LIBM="-lm") + ;; +esac +]) + +# AC_LIBLTDL_CONVENIENCE[(dir)] - sets LIBLTDL to the link flags for +# the libltdl convenience library, adds --enable-ltdl-convenience to +# the configure arguments. Note that LIBLTDL is not AC_SUBSTed, nor +# is AC_CONFIG_SUBDIRS called. If DIR is not provided, it is assumed +# to be `${top_builddir}/libltdl'. Make sure you start DIR with +# '${top_builddir}/' (note the single quotes!) if your package is not +# flat, and, if you're not using automake, define top_builddir as +# appropriate in the Makefiles. +AC_DEFUN(AC_LIBLTDL_CONVENIENCE, [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl + case "$enable_ltdl_convenience" in + no) AC_MSG_ERROR([this package needs a convenience libltdl]) ;; + "") enable_ltdl_convenience=yes + ac_configure_args="$ac_configure_args --enable-ltdl-convenience" ;; + esac + LIBLTDL=ifelse($#,1,$1,['${top_builddir}/libltdl'])/libltdlc.la + INCLTDL=ifelse($#,1,-I$1,['-I${top_srcdir}/libltdl']) +]) + +# AC_LIBLTDL_INSTALLABLE[(dir)] - sets LIBLTDL to the link flags for +# the libltdl installable library, and adds --enable-ltdl-install to +# the configure arguments. Note that LIBLTDL is not AC_SUBSTed, nor +# is AC_CONFIG_SUBDIRS called. If DIR is not provided, it is assumed +# to be `${top_builddir}/libltdl'. Make sure you start DIR with +# '${top_builddir}/' (note the single quotes!) if your package is not +# flat, and, if you're not using automake, define top_builddir as +# appropriate in the Makefiles. +# In the future, this macro may have to be called after AC_PROG_LIBTOOL. +AC_DEFUN(AC_LIBLTDL_INSTALLABLE, [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl + AC_CHECK_LIB(ltdl, main, + [test x"$enable_ltdl_install" != xyes && enable_ltdl_install=no], + [if test x"$enable_ltdl_install" = xno; then + AC_MSG_WARN([libltdl not installed, but installation disabled]) + else + enable_ltdl_install=yes + fi + ]) + if test x"$enable_ltdl_install" = x"yes"; then + ac_configure_args="$ac_configure_args --enable-ltdl-install" + LIBLTDL=ifelse($#,1,$1,['${top_builddir}/libltdl'])/libltdl.la + INCLTDL=ifelse($#,1,-I$1,['-I${top_srcdir}/libltdl']) + else + ac_configure_args="$ac_configure_args --enable-ltdl-install=no" + LIBLTDL="-lltdl" + INCLTDL= + fi +]) + +dnl old names +AC_DEFUN(AM_PROG_LIBTOOL, [indir([AC_PROG_LIBTOOL])])dnl +AC_DEFUN(AM_ENABLE_SHARED, [indir([AC_ENABLE_SHARED], $@)])dnl +AC_DEFUN(AM_ENABLE_STATIC, [indir([AC_ENABLE_STATIC], $@)])dnl +AC_DEFUN(AM_DISABLE_SHARED, [indir([AC_DISABLE_SHARED], $@)])dnl +AC_DEFUN(AM_DISABLE_STATIC, [indir([AC_DISABLE_STATIC], $@)])dnl +AC_DEFUN(AM_PROG_LD, [indir([AC_PROG_LD])])dnl +AC_DEFUN(AM_PROG_NM, [indir([AC_PROG_NM])])dnl + +dnl This is just to silence aclocal about the macro not being used +ifelse([AC_DISABLE_FAST_INSTALL])dnl + diff --git a/rts/gmp/ansi2knr.1 b/rts/gmp/ansi2knr.1 new file mode 100644 index 0000000000..f9ee5a631c --- /dev/null +++ b/rts/gmp/ansi2knr.1 @@ -0,0 +1,36 @@ +.TH ANSI2KNR 1 "19 Jan 1996" +.SH NAME +ansi2knr \- convert ANSI C to Kernighan & Ritchie C +.SH SYNOPSIS +.I ansi2knr +[--varargs] input_file [output_file] +.SH DESCRIPTION +If no output_file is supplied, output goes to stdout. +.br +There are no error messages. +.sp +.I ansi2knr +recognizes function definitions by seeing a non-keyword identifier at the left +margin, followed by a left parenthesis, with a right parenthesis as the last +character on the line, and with a left brace as the first token on the +following line (ignoring possible intervening comments). It will recognize a +multi-line header provided that no intervening line ends with a left or right +brace or a semicolon. These algorithms ignore whitespace and comments, except +that the function name must be the first thing on the line. +.sp +The following constructs will confuse it: +.br + - Any other construct that starts at the left margin and follows the +above syntax (such as a macro or function call). +.br + - Some macros that tinker with the syntax of the function header. +.sp +The --varargs switch is obsolete, and is recognized only for +backwards compatibility. The present version of +.I ansi2knr +will always attempt to convert a ... argument to va_alist and va_dcl. +.SH AUTHOR +L. Peter Deutsch <ghost@aladdin.com> wrote the original ansi2knr and +continues to maintain the current version; most of the code in the current +version is his work. ansi2knr also includes contributions by Francois +Pinard <pinard@iro.umontreal.ca> and Jim Avera <jima@netcom.com>. diff --git a/rts/gmp/ansi2knr.c b/rts/gmp/ansi2knr.c new file mode 100644 index 0000000000..937c731886 --- /dev/null +++ b/rts/gmp/ansi2knr.c @@ -0,0 +1,677 @@ +/* Copyright (C) 1989, 1997, 1998, 1999 Aladdin Enterprises. All rights reserved. */ + +/* Convert ANSI C function definitions to K&R ("traditional C") syntax */ + +/* +ansi2knr is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY. No author or distributor accepts responsibility to anyone for the +consequences of using it or for whether it serves any particular purpose or +works at all, unless he says so in writing. Refer to the GNU General Public +License (the "GPL") for full details. + +Everyone is granted permission to copy, modify and redistribute ansi2knr, +but only under the conditions described in the GPL. A copy of this license +is supposed to have been given to you along with ansi2knr so you can know +your rights and responsibilities. It should be in a file named COPYLEFT, +or, if there is no file named COPYLEFT, a file named COPYING. Among other +things, the copyright notice and this notice must be preserved on all +copies. + +We explicitly state here what we believe is already implied by the GPL: if +the ansi2knr program is distributed as a separate set of sources and a +separate executable file which are aggregated on a storage medium together +with another program, this in itself does not bring the other program under +the GPL, nor does the mere fact that such a program or the procedures for +constructing it invoke the ansi2knr executable bring any other part of the +program under the GPL. +*/ + +/* + * Usage: + ansi2knr [--filename FILENAME] [INPUT_FILE [OUTPUT_FILE]] + * --filename provides the file name for the #line directive in the output, + * overriding input_file (if present). + * If no input_file is supplied, input is read from stdin. + * If no output_file is supplied, output goes to stdout. + * There are no error messages. + * + * ansi2knr recognizes function definitions by seeing a non-keyword + * identifier at the left margin, followed by a left parenthesis, + * with a right parenthesis as the last character on the line, + * and with a left brace as the first token on the following line + * (ignoring possible intervening comments), except that a line + * consisting of only + * identifier1(identifier2) + * will not be considered a function definition unless identifier2 is + * the word "void", and a line consisting of + * identifier1(identifier2, <<arbitrary>>) + * will not be considered a function definition. + * ansi2knr will recognize a multi-line header provided + * that no intervening line ends with a left or right brace or a semicolon. + * These algorithms ignore whitespace and comments, except that + * the function name must be the first thing on the line. + * The following constructs will confuse it: + * - Any other construct that starts at the left margin and + * follows the above syntax (such as a macro or function call). + * - Some macros that tinker with the syntax of function headers. + */ + +/* + * The original and principal author of ansi2knr is L. Peter Deutsch + * <ghost@aladdin.com>. Other authors are noted in the change history + * that follows (in reverse chronological order): + lpd 1999-04-12 added minor fixes from Pavel Roskin + <pavel_roskin@geocities.com> for clean compilation with + gcc -W -Wall + lpd 1999-03-22 added hack to recognize lines consisting of + identifier1(identifier2, xxx) as *not* being procedures + lpd 1999-02-03 made indentation of preprocessor commands consistent + lpd 1999-01-28 fixed two bugs: a '/' in an argument list caused an + endless loop; quoted strings within an argument list + confused the parser + lpd 1999-01-24 added a check for write errors on the output, + suggested by Jim Meyering <meyering@ascend.com> + lpd 1998-11-09 added further hack to recognize identifier(void) + as being a procedure + lpd 1998-10-23 added hack to recognize lines consisting of + identifier1(identifier2) as *not* being procedures + lpd 1997-12-08 made input_file optional; only closes input and/or + output file if not stdin or stdout respectively; prints + usage message on stderr rather than stdout; adds + --filename switch (changes suggested by + <ceder@lysator.liu.se>) + lpd 1996-01-21 added code to cope with not HAVE_CONFIG_H and with + compilers that don't understand void, as suggested by + Tom Lane + lpd 1996-01-15 changed to require that the first non-comment token + on the line following a function header be a left brace, + to reduce sensitivity to macros, as suggested by Tom Lane + <tgl@sss.pgh.pa.us> + lpd 1995-06-22 removed #ifndefs whose sole purpose was to define + undefined preprocessor symbols as 0; changed all #ifdefs + for configuration symbols to #ifs + lpd 1995-04-05 changed copyright notice to make it clear that + including ansi2knr in a program does not bring the entire + program under the GPL + lpd 1994-12-18 added conditionals for systems where ctype macros + don't handle 8-bit characters properly, suggested by + Francois Pinard <pinard@iro.umontreal.ca>; + removed --varargs switch (this is now the default) + lpd 1994-10-10 removed CONFIG_BROKETS conditional + lpd 1994-07-16 added some conditionals to help GNU `configure', + suggested by Francois Pinard <pinard@iro.umontreal.ca>; + properly erase prototype args in function parameters, + contributed by Jim Avera <jima@netcom.com>; + correct error in writeblanks (it shouldn't erase EOLs) + lpd 1989-xx-xx original version + */ + +/* Most of the conditionals here are to make ansi2knr work with */ +/* or without the GNU configure machinery. */ + +#if HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <ctype.h> + +#if HAVE_CONFIG_H + +/* + For properly autoconfiguring ansi2knr, use AC_CONFIG_HEADER(config.h). + This will define HAVE_CONFIG_H and so, activate the following lines. + */ + +# if STDC_HEADERS || HAVE_STRING_H +# include <string.h> +# else +# include <strings.h> +# endif + +#else /* not HAVE_CONFIG_H */ + +/* Otherwise do it the hard way */ + +# ifdef BSD +# include <strings.h> +# else +# ifdef VMS + extern int strlen(), strncmp(); +# else +# include <string.h> +# endif +# endif + +#endif /* not HAVE_CONFIG_H */ + +#if STDC_HEADERS +# include <stdlib.h> +#else +/* + malloc and free should be declared in stdlib.h, + but if you've got a K&R compiler, they probably aren't. + */ +# ifdef MSDOS +# include <malloc.h> +# else +# ifdef VMS + extern char *malloc(); + extern void free(); +# else + extern char *malloc(); + extern int free(); +# endif +# endif + +#endif + +/* Define NULL (for *very* old compilers). */ +#ifndef NULL +# define NULL (0) +#endif + +/* + * The ctype macros don't always handle 8-bit characters correctly. + * Compensate for this here. + */ +#ifdef isascii +# undef HAVE_ISASCII /* just in case */ +# define HAVE_ISASCII 1 +#else +#endif +#if STDC_HEADERS || !HAVE_ISASCII +# define is_ascii(c) 1 +#else +# define is_ascii(c) isascii(c) +#endif + +#define is_space(c) (is_ascii(c) && isspace(c)) +#define is_alpha(c) (is_ascii(c) && isalpha(c)) +#define is_alnum(c) (is_ascii(c) && isalnum(c)) + +/* Scanning macros */ +#define isidchar(ch) (is_alnum(ch) || (ch) == '_') +#define isidfirstchar(ch) (is_alpha(ch) || (ch) == '_') + +/* Forward references */ +char *skipspace(); +char *scanstring(); +int writeblanks(); +int test1(); +int convert1(); + +/* The main program */ +int +main(argc, argv) + int argc; + char *argv[]; +{ FILE *in = stdin; + FILE *out = stdout; + char *filename = 0; + char *program_name = argv[0]; + char *output_name = 0; +#define bufsize 5000 /* arbitrary size */ + char *buf; + char *line; + char *more; + char *usage = + "Usage: ansi2knr [--filename FILENAME] [INPUT_FILE [OUTPUT_FILE]]\n"; + /* + * In previous versions, ansi2knr recognized a --varargs switch. + * If this switch was supplied, ansi2knr would attempt to convert + * a ... argument to va_alist and va_dcl; if this switch was not + * supplied, ansi2knr would simply drop any such arguments. + * Now, ansi2knr always does this conversion, and we only + * check for this switch for backward compatibility. + */ + int convert_varargs = 1; + int output_error; + + while ( argc > 1 && argv[1][0] == '-' ) { + if ( !strcmp(argv[1], "--varargs") ) { + convert_varargs = 1; + argc--; + argv++; + continue; + } + if ( !strcmp(argv[1], "--filename") && argc > 2 ) { + filename = argv[2]; + argc -= 2; + argv += 2; + continue; + } + fprintf(stderr, "%s: Unrecognized switch: %s\n", program_name, + argv[1]); + fprintf(stderr, usage); + exit(1); + } + switch ( argc ) + { + default: + fprintf(stderr, usage); + exit(0); + case 3: + output_name = argv[2]; + out = fopen(output_name, "w"); + if ( out == NULL ) { + fprintf(stderr, "%s: Cannot open output file %s\n", + program_name, output_name); + exit(1); + } + /* falls through */ + case 2: + in = fopen(argv[1], "r"); + if ( in == NULL ) { + fprintf(stderr, "%s: Cannot open input file %s\n", + program_name, argv[1]); + exit(1); + } + if ( filename == 0 ) + filename = argv[1]; + /* falls through */ + case 1: + break; + } + if ( filename ) + fprintf(out, "#line 1 \"%s\"\n", filename); + buf = malloc(bufsize); + if ( buf == NULL ) + { + fprintf(stderr, "Unable to allocate read buffer!\n"); + exit(1); + } + line = buf; + while ( fgets(line, (unsigned)(buf + bufsize - line), in) != NULL ) + { +test: line += strlen(line); + switch ( test1(buf) ) + { + case 2: /* a function header */ + convert1(buf, out, 1, convert_varargs); + break; + case 1: /* a function */ + /* Check for a { at the start of the next line. */ + more = ++line; +f: if ( line >= buf + (bufsize - 1) ) /* overflow check */ + goto wl; + if ( fgets(line, (unsigned)(buf + bufsize - line), in) == NULL ) + goto wl; + switch ( *skipspace(more, 1) ) + { + case '{': + /* Definitely a function header. */ + convert1(buf, out, 0, convert_varargs); + fputs(more, out); + break; + case 0: + /* The next line was blank or a comment: */ + /* keep scanning for a non-comment. */ + line += strlen(line); + goto f; + default: + /* buf isn't a function header, but */ + /* more might be. */ + fputs(buf, out); + strcpy(buf, more); + line = buf; + goto test; + } + break; + case -1: /* maybe the start of a function */ + if ( line != buf + (bufsize - 1) ) /* overflow check */ + continue; + /* falls through */ + default: /* not a function */ +wl: fputs(buf, out); + break; + } + line = buf; + } + if ( line != buf ) + fputs(buf, out); + free(buf); + if ( output_name ) { + output_error = ferror(out); + output_error |= fclose(out); + } else { /* out == stdout */ + fflush(out); + output_error = ferror(out); + } + if ( output_error ) { + fprintf(stderr, "%s: error writing to %s\n", program_name, + (output_name ? output_name : "stdout")); + exit(1); + } + if ( in != stdin ) + fclose(in); + return 0; +} + +/* Skip over whitespace and comments, in either direction. */ +char * +skipspace(p, dir) + register char *p; + register int dir; /* 1 for forward, -1 for backward */ +{ for ( ; ; ) + { while ( is_space(*p) ) + p += dir; + if ( !(*p == '/' && p[dir] == '*') ) + break; + p += dir; p += dir; + while ( !(*p == '*' && p[dir] == '/') ) + { if ( *p == 0 ) + return p; /* multi-line comment?? */ + p += dir; + } + p += dir; p += dir; + } + return p; +} + +/* Scan over a quoted string, in either direction. */ +char * +scanstring(p, dir) + register char *p; + register int dir; +{ + for (p += dir; ; p += dir) + if (*p == '"' && p[-dir] != '\\') + return p + dir; +} + +/* + * Write blanks over part of a string. + * Don't overwrite end-of-line characters. + */ +int +writeblanks(start, end) + char *start; + char *end; +{ char *p; + for ( p = start; p < end; p++ ) + if ( *p != '\r' && *p != '\n' ) + *p = ' '; + return 0; +} + +/* + * Test whether the string in buf is a function definition. + * The string may contain and/or end with a newline. + * Return as follows: + * 0 - definitely not a function definition; + * 1 - definitely a function definition; + * 2 - definitely a function prototype (NOT USED); + * -1 - may be the beginning of a function definition, + * append another line and look again. + * The reason we don't attempt to convert function prototypes is that + * Ghostscript's declaration-generating macros look too much like + * prototypes, and confuse the algorithms. + */ +int +test1(buf) + char *buf; +{ register char *p = buf; + char *bend; + char *endfn; + int contin; + + if ( !isidfirstchar(*p) ) + return 0; /* no name at left margin */ + bend = skipspace(buf + strlen(buf) - 1, -1); + switch ( *bend ) + { + case ';': contin = 0 /*2*/; break; + case ')': contin = 1; break; + case '{': return 0; /* not a function */ + case '}': return 0; /* not a function */ + default: contin = -1; + } + while ( isidchar(*p) ) + p++; + endfn = p; + p = skipspace(p, 1); + if ( *p++ != '(' ) + return 0; /* not a function */ + p = skipspace(p, 1); + if ( *p == ')' ) + return 0; /* no parameters */ + /* Check that the apparent function name isn't a keyword. */ + /* We only need to check for keywords that could be followed */ + /* by a left parenthesis (which, unfortunately, is most of them). */ + { static char *words[] = + { "asm", "auto", "case", "char", "const", "double", + "extern", "float", "for", "if", "int", "long", + "register", "return", "short", "signed", "sizeof", + "static", "switch", "typedef", "unsigned", + "void", "volatile", "while", 0 + }; + char **key = words; + char *kp; + unsigned len = endfn - buf; + + while ( (kp = *key) != 0 ) + { if ( strlen(kp) == len && !strncmp(kp, buf, len) ) + return 0; /* name is a keyword */ + key++; + } + } + { + char *id = p; + int len; + /* + * Check for identifier1(identifier2) and not + * identifier1(void), or identifier1(identifier2, xxxx). + */ + + while ( isidchar(*p) ) + p++; + len = p - id; + p = skipspace(p, 1); + if (*p == ',' || + (*p == ')' && (len != 4 || strncmp(id, "void", 4))) + ) + return 0; /* not a function */ + } + /* + * If the last significant character was a ), we need to count + * parentheses, because it might be part of a formal parameter + * that is a procedure. + */ + if (contin > 0) { + int level = 0; + + for (p = skipspace(buf, 1); *p; p = skipspace(p + 1, 1)) + level += (*p == '(' ? 1 : *p == ')' ? -1 : 0); + if (level > 0) + contin = -1; + } + return contin; +} + +/* Convert a recognized function definition or header to K&R syntax. */ +int +convert1(buf, out, header, convert_varargs) + char *buf; + FILE *out; + int header; /* Boolean */ + int convert_varargs; /* Boolean */ +{ char *endfn; + register char *p; + /* + * The breaks table contains pointers to the beginning and end + * of each argument. + */ + char **breaks; + unsigned num_breaks = 2; /* for testing */ + char **btop; + char **bp; + char **ap; + char *vararg = 0; + + /* Pre-ANSI implementations don't agree on whether strchr */ + /* is called strchr or index, so we open-code it here. */ + for ( endfn = buf; *(endfn++) != '('; ) + ; +top: p = endfn; + breaks = (char **)malloc(sizeof(char *) * num_breaks * 2); + if ( breaks == NULL ) + { /* Couldn't allocate break table, give up */ + fprintf(stderr, "Unable to allocate break table!\n"); + fputs(buf, out); + return -1; + } + btop = breaks + num_breaks * 2 - 2; + bp = breaks; + /* Parse the argument list */ + do + { int level = 0; + char *lp = NULL; + char *rp = NULL; + char *end = NULL; + + if ( bp >= btop ) + { /* Filled up break table. */ + /* Allocate a bigger one and start over. */ + free((char *)breaks); + num_breaks <<= 1; + goto top; + } + *bp++ = p; + /* Find the end of the argument */ + for ( ; end == NULL; p++ ) + { switch(*p) + { + case ',': + if ( !level ) end = p; + break; + case '(': + if ( !level ) lp = p; + level++; + break; + case ')': + if ( --level < 0 ) end = p; + else rp = p; + break; + case '/': + if (p[1] == '*') + p = skipspace(p, 1) - 1; + break; + case '"': + p = scanstring(p, 1) - 1; + break; + default: + ; + } + } + /* Erase any embedded prototype parameters. */ + if ( lp && rp ) + writeblanks(lp + 1, rp); + p--; /* back up over terminator */ + /* Find the name being declared. */ + /* This is complicated because of procedure and */ + /* array modifiers. */ + for ( ; ; ) + { p = skipspace(p - 1, -1); + switch ( *p ) + { + case ']': /* skip array dimension(s) */ + case ')': /* skip procedure args OR name */ + { int level = 1; + while ( level ) + switch ( *--p ) + { + case ']': case ')': + level++; + break; + case '[': case '(': + level--; + break; + case '/': + if (p > buf && p[-1] == '*') + p = skipspace(p, -1) + 1; + break; + case '"': + p = scanstring(p, -1) + 1; + break; + default: ; + } + } + if ( *p == '(' && *skipspace(p + 1, 1) == '*' ) + { /* We found the name being declared */ + while ( !isidfirstchar(*p) ) + p = skipspace(p, 1) + 1; + goto found; + } + break; + default: + goto found; + } + } +found: if ( *p == '.' && p[-1] == '.' && p[-2] == '.' ) + { if ( convert_varargs ) + { *bp++ = "va_alist"; + vararg = p-2; + } + else + { p++; + if ( bp == breaks + 1 ) /* sole argument */ + writeblanks(breaks[0], p); + else + writeblanks(bp[-1] - 1, p); + bp--; + } + } + else + { while ( isidchar(*p) ) p--; + *bp++ = p+1; + } + p = end; + } + while ( *p++ == ',' ); + *bp = p; + /* Make a special check for 'void' arglist */ + if ( bp == breaks+2 ) + { p = skipspace(breaks[0], 1); + if ( !strncmp(p, "void", 4) ) + { p = skipspace(p+4, 1); + if ( p == breaks[2] - 1 ) + { bp = breaks; /* yup, pretend arglist is empty */ + writeblanks(breaks[0], p + 1); + } + } + } + /* Put out the function name and left parenthesis. */ + p = buf; + while ( p != endfn ) putc(*p, out), p++; + /* Put out the declaration. */ + if ( header ) + { fputs(");", out); + for ( p = breaks[0]; *p; p++ ) + if ( *p == '\r' || *p == '\n' ) + putc(*p, out); + } + else + { for ( ap = breaks+1; ap < bp; ap += 2 ) + { p = *ap; + while ( isidchar(*p) ) + putc(*p, out), p++; + if ( ap < bp - 1 ) + fputs(", ", out); + } + fputs(") ", out); + /* Put out the argument declarations */ + for ( ap = breaks+2; ap <= bp; ap += 2 ) + (*ap)[-1] = ';'; + if ( vararg != 0 ) + { *vararg = 0; + fputs(breaks[0], out); /* any prior args */ + fputs("va_dcl", out); /* the final arg */ + fputs(bp[0], out); + } + else + fputs(breaks[0], out); + } + free((char *)breaks); + return 0; +} diff --git a/rts/gmp/assert.c b/rts/gmp/assert.c new file mode 100644 index 0000000000..65eccfa30b --- /dev/null +++ b/rts/gmp/assert.c @@ -0,0 +1,52 @@ +/* GMP assertion failure handler. */ + +/* +Copyright (C) 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. +*/ + +#include <stdio.h> +#include "gmp.h" +#include "gmp-impl.h" + + +int +#if __STDC__ +__gmp_assert_fail (const char *filename, int linenum, + const char *expr) +#else +__gmp_assert_fail (filename, linenum, expr) +char *filename; +int linenum; +char *expr; +#endif +{ + if (filename != NULL && filename[0] != '\0') + { + fprintf (stderr, "%s:", filename); + if (linenum != -1) + fprintf (stderr, "%d: ", linenum); + } + + fprintf (stderr, "GNU MP assertion failed: %s\n", expr); + abort(); + + /*NOTREACHED*/ + return 0; +} diff --git a/rts/gmp/compat.c b/rts/gmp/compat.c new file mode 100644 index 0000000000..ab7529f52f --- /dev/null +++ b/rts/gmp/compat.c @@ -0,0 +1,46 @@ +/* Old function entrypoints retained for binary compatibility. */ + +/* +Copyright (C) 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. +*/ + +#include <stdio.h> +#include "gmp.h" +#include "gmp-impl.h" + + +/* mpn_divexact_by3 was a function in gmp 3.0, but as of gmp 3.1 it's a + macro calling mpn_divexact_by3c. */ +int +__MPN (divexact_by3) (mp_ptr dst, mp_srcptr src, mp_size_t size) +{ + mpn_divexact_by3 (dst, src, size); +} + + +/* mpn_divmod_1 was a function in gmp 3.0 and earlier, but marked obsolete + in gmp 2 and 3. As of gmp 3.1 it's a macro calling mpn_divrem_1. */ +int +__MPN (divmod_1) (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor) +{ + mpn_divmod_1 (dst, src, size, divisor); +} + + diff --git a/rts/gmp/config.guess b/rts/gmp/config.guess new file mode 100644 index 0000000000..08018f497d --- /dev/null +++ b/rts/gmp/config.guess @@ -0,0 +1,1373 @@ +#! /bin/sh +# Attempt to guess a canonical system name. +# +# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000 +# Free Software Foundation, Inc. +# +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# Written by Per Bothner <bothner@cygnus.com>. +# Please send patches to <config-patches@gnu.org>. +# +# This script attempts to guess a canonical system name similar to +# config.sub. If it succeeds, it prints the system name on stdout, and +# exits with 0. Otherwise, it exits with 1. +# +# The plan is that this can be called by configure scripts if you +# don't specify an explicit system type (host/target name). +# +# Only a few systems have been added to this list; please add others +# (but try to keep the structure clean). +# + + +# This is needed to find uname on a Pyramid OSx when run in the BSD universe. +# (ghazi@noc.rutgers.edu 8/24/94.) +if (test -f /.attbin/uname) >/dev/null 2>&1 ; then + PATH=$PATH:/.attbin ; export PATH +fi + +UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown +UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown +UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown +UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown + +dummy=dummy-$$ +trap 'rm -f $dummy.c $dummy.o $dummy ${dummy}1.s ${dummy}2.c ; exit 1' 1 2 15 + +# Use $HOST_CC if defined. $CC may point to a cross-compiler +if test x"$CC_FOR_BUILD" = x; then + if test x"$HOST_CC" != x; then + CC_FOR_BUILD="$HOST_CC" + else + if test x"$CC" != x; then + CC_FOR_BUILD="$CC" + else + echo 'dummy(){}' >$dummy.c + for c in cc c89 gcc; do + ($c $dummy.c -c) >/dev/null 2>&1 + if test $? = 0; then + CC_FOR_BUILD="$c"; break + fi + done + rm -f $dummy.c $dummy.o + if test x"$CC_FOR_BUILD" = x; then + CC_FOR_BUILD=no_compiler_found + fi + fi + fi +fi + + +# First make a best effort at recognizing x86 CPU type and leave it in X86CPU. +# If we fail, set X86CPU to UNAME_MACHINE +# +# DJGPP v2 (or 2.03 at least) always gives "pc" for uname -m, and the +# OEM for uname -s. Eg. pc:MS-DOS:6:2 on MS-DOS 6.21. The list of +# possible OEMs is in src/libc/dos/dos/getdos_v.c of djlsr203.zip, but +# just pc:*:*:* seems ok. + +case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in + i?86:*:*:* | i86pc:*:*:* | pc:*:*:*) + case "${UNAME_MACHINE}" in + i86pc | pc) UNAME_MACHINE=i386 ;; + esac + cat <<EOF >${dummy}1.s + .globl cpuid + .globl _cpuid +cpuid: +_cpuid: + pushl %esi + pushl %ebx + movl 16(%esp),%eax + .byte 0x0f + .byte 0xa2 + movl 12(%esp),%esi + movl %ebx,(%esi) + movl %edx,4(%esi) + movl %ecx,8(%esi) + popl %ebx + popl %esi + ret +EOF + cat <<EOF >${dummy}2.c +main () +{ + char vendor_string[13]; + char dummy_string[12]; + long fms; + int family, model; + char *modelstr; + + cpuid (vendor_string, 0); + vendor_string[12] = 0; + + fms = cpuid (dummy_string, 1); + + family = (fms >> 8) & 15; + model = (fms >> 4) & 15; + + modelstr = "i486"; + if (strcmp (vendor_string, "GenuineIntel") == 0) + { + switch (family) + { + case 5: + if (model <= 2) + modelstr = "pentium"; + else if (model >= 4) + modelstr = "pentiummmx"; + break; + case 6: + if (model == 1) + modelstr = "pentiumpro"; + else if (model <= 6) + modelstr = "pentium2"; + else + modelstr = "pentium3"; + break; + } + } + else if (strcmp (vendor_string, "AuthenticAMD") == 0) + { + switch (family) + { + case 5: + if (model <= 3) + modelstr = "k5"; + else if (model <= 7) + modelstr = "k6"; + else if (model <= 8) + modelstr = "k62"; + else if (model <= 9) + modelstr = "k63"; + break; + case 6: + modelstr = "athlon"; + break; + } + } + else if (strcmp (vendor_string, "CyrixInstead") == 0) + { + /* Should recognize Cyrix' processors too. */ + } + + printf ("%s\n", modelstr); + return 0; +} +EOF + $CC_FOR_BUILD ${dummy}1.s ${dummy}2.c -o $dummy >/dev/null 2>&1 + if test "$?" = 0 ; then + X86CPU=`./$dummy` + fi + + + # Default to believing uname -m if the program fails to compile or + # run. Will fail to run on 386 since cpuid was only added on 486. + if test -z "$X86CPU" + then + X86CPU="$UNAME_MACHINE" + fi + rm -f ${dummy}1.s ${dummy}2.c $dummy + ;; +esac + +# Note: order is significant - the case branches are not exclusive. + +case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in + alpha:OSF1:*:*) + if test $UNAME_RELEASE = "V4.0"; then + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` + fi + # A Vn.n version is a released version. + # A Tn.n version is a released field test version. + # A Xn.n version is an unreleased experimental baselevel. + # 1.2 uses "1.2" for uname -r. + cat <<EOF >$dummy.s + .data +\$Lformat: + .byte 37,100,45,37,120,10,0 # "%d-%x\n" + + .text + .globl main + .align 4 + .ent main +main: + .frame \$30,16,\$26,0 + ldgp \$29,0(\$27) + .prologue 1 + .long 0x47e03d80 # implver \$0 + lda \$2,-1 + .long 0x47e20c21 # amask \$2,\$1 + lda \$16,\$Lformat + mov \$0,\$17 + not \$1,\$18 + jsr \$26,printf + ldgp \$29,0(\$26) + mov 0,\$16 + jsr \$26,exit + .end main +EOF + $CC_FOR_BUILD $dummy.s -o $dummy 2>/dev/null + if test "$?" = 0 ; then + case `./$dummy` in + 0-0) + UNAME_MACHINE="alpha" + ;; + 1-0) + UNAME_MACHINE="alphaev5" + ;; + 1-1) + UNAME_MACHINE="alphaev56" + ;; + 1-101) + UNAME_MACHINE="alphapca56" + ;; + 2-303) + UNAME_MACHINE="alphaev6" + ;; + 2-307) + UNAME_MACHINE="alphaev67" + ;; + esac + fi + rm -f $dummy.s $dummy + echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[VTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` + exit 0 ;; + alpha:NetBSD:*:* | alpha:FreeBSD:*:*) + cat <<EOF >$dummy.s + .globl main + .ent main +main: + .frame \$30,0,\$26,0 + .prologue 0 + .long 0x47e03d80 # implver $0 + lda \$2,259 + .long 0x47e20c21 # amask $2,$1 + srl \$1,8,\$2 + sll \$2,2,\$2 + sll \$0,3,\$0 + addl \$1,\$0,\$0 + addl \$2,\$0,\$0 + ret \$31,(\$26),1 + .end main +EOF + $CC_FOR_BUILD $dummy.s -o $dummy 2>/dev/null + if test "$?" = 0 ; then + ./$dummy + case "$?" in + 7) + UNAME_MACHINE="alpha" + ;; + 15) + UNAME_MACHINE="alphaev5" + ;; + 14) + UNAME_MACHINE="alphaev56" + ;; + 10) + UNAME_MACHINE="alphapca56" + ;; + 16) + UNAME_MACHINE="alphaev6" + ;; + esac + fi + rm -f $dummy.s $dummy + echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM}${UNAME_RELEASE} | sed -e 's/^[VTX]//' -e 's/[-(].*//' | tr [[A-Z]] [[a-z]]` + exit 0 ;; + Alpha\ *:Windows_NT*:*) + # How do we know it's Interix rather than the generic POSIX subsystem? + # Should we change UNAME_MACHINE based on the output of uname instead + # of the specific Alpha model? + echo alpha-pc-interix + exit 0 ;; + 21064:Windows_NT:50:3) + echo alpha-dec-winnt3.5 + exit 0 ;; + Amiga*:UNIX_System_V:4.0:*) + echo m68k-cbm-sysv4 + exit 0;; + amiga:NetBSD:*:*) + echo m68k-cbm-netbsd${UNAME_RELEASE} + exit 0 ;; + amiga:OpenBSD:*:*) + echo m68k-unknown-openbsd${UNAME_RELEASE} + exit 0 ;; + *:[Aa]miga[Oo][Ss]:*:*) + echo ${UNAME_MACHINE}-unknown-amigaos + exit 0 ;; + arc64:OpenBSD:*:*) + echo mips64el-unknown-openbsd${UNAME_RELEASE} + exit 0 ;; + arc:OpenBSD:*:*) + echo mipsel-unknown-openbsd${UNAME_RELEASE} + exit 0 ;; + hkmips:OpenBSD:*:*) + echo mips-unknown-openbsd${UNAME_RELEASE} + exit 0 ;; + pmax:OpenBSD:*:*) + echo mipsel-unknown-openbsd${UNAME_RELEASE} + exit 0 ;; + sgi:OpenBSD:*:*) + echo mips-unknown-openbsd${UNAME_RELEASE} + exit 0 ;; + wgrisc:OpenBSD:*:*) + echo mipsel-unknown-openbsd${UNAME_RELEASE} + exit 0 ;; + *:OS/390:*:*) + echo i370-ibm-openedition + exit 0 ;; + arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) + echo arm-acorn-riscix${UNAME_RELEASE} + exit 0;; + arm32:NetBSD:*:*) + echo arm-unknown-netbsd`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` + exit 0 ;; + SR2?01:HI-UX/MPP:*:*) + echo hppa1.1-hitachi-hiuxmpp + exit 0;; + Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) + # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. + if test "`(/bin/universe) 2>/dev/null`" = att ; then + echo pyramid-pyramid-sysv3 + else + echo pyramid-pyramid-bsd + fi + exit 0 ;; + NILE*:*:*:dcosx) + echo pyramid-pyramid-svr4 + exit 0 ;; + sun4H:SunOS:5.*:*) + echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit 0 ;; + sun4[md]:SunOS:5.*:*) + echo sparcv8-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit 0 ;; + sun4u:SunOS:5.*:*) + echo sparcv9-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit 0 ;; + sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) + echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit 0 ;; + i386:SunOS:5.*:*) + echo ${X86CPU}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit 0 ;; + sun4[md]:SunOS:*:*) + case "`/usr/bin/arch -k`" in + Series*|S4*) + UNAME_RELEASE=`uname -v` + ;; + esac + # Japanese Language versions have a version number like `4.1.3-JL'. + echo sparcv8-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'` + exit 0 ;; + sun4*:SunOS:*:*) + case "`/usr/bin/arch -k`" in + Series*|S4*) + UNAME_RELEASE=`uname -v` + ;; + esac + # Japanese Language versions have a version number like `4.1.3-JL'. + echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'` + exit 0 ;; + sun3*:SunOS:*:*) + echo m68k-sun-sunos${UNAME_RELEASE} + exit 0 ;; + sun*:*:4.2BSD:*) + UNAME_RELEASE=`(head -1 /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` + test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3 + case "`/bin/arch`" in + sun3) + echo m68k-sun-sunos${UNAME_RELEASE} + ;; + sun4) + echo sparc-sun-sunos${UNAME_RELEASE} + ;; + esac + exit 0 ;; + aushp:SunOS:*:*) + echo sparc-auspex-sunos${UNAME_RELEASE} + exit 0 ;; + atari*:NetBSD:*:*) + echo m68k-atari-netbsd${UNAME_RELEASE} + exit 0 ;; + atari*:OpenBSD:*:*) + echo m68k-unknown-openbsd${UNAME_RELEASE} + exit 0 ;; + # The situation for MiNT is a little confusing. The machine name + # can be virtually everything (everything which is not + # "atarist" or "atariste" at least should have a processor + # > m68000). The system name ranges from "MiNT" over "FreeMiNT" + # to the lowercase version "mint" (or "freemint"). Finally + # the system name "TOS" denotes a system which is actually not + # MiNT. But MiNT is downward compatible to TOS, so this should + # be no problem. + atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit 0 ;; + atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit 0 ;; + *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit 0 ;; + milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) + echo m68k-milan-mint${UNAME_RELEASE} + exit 0 ;; + hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) + echo m68k-hades-mint${UNAME_RELEASE} + exit 0 ;; + *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) + echo m68k-unknown-mint${UNAME_RELEASE} + exit 0 ;; + sun3*:NetBSD:*:*) + echo m68k-sun-netbsd${UNAME_RELEASE} + exit 0 ;; + sun3*:OpenBSD:*:*) + echo m68k-unknown-openbsd${UNAME_RELEASE} + exit 0 ;; + mac68k:NetBSD:*:*) + echo m68k-apple-netbsd${UNAME_RELEASE} + exit 0 ;; + mac68k:OpenBSD:*:*) + echo m68k-unknown-openbsd${UNAME_RELEASE} + exit 0 ;; + macppc:NetBSD:*:*) + echo powerpc-apple-netbsd${UNAME_RELEASE} + exit 0 ;; + mvme68k:OpenBSD:*:*) + echo m68k-unknown-openbsd${UNAME_RELEASE} + exit 0 ;; + mvme88k:OpenBSD:*:*) + echo m88k-unknown-openbsd${UNAME_RELEASE} + exit 0 ;; + powerpc:machten:*:*) + echo powerpc-apple-machten${UNAME_RELEASE} + exit 0 ;; + RISC*:Mach:*:*) + echo mips-dec-mach_bsd4.3 + exit 0 ;; + RISC*:ULTRIX:*:*) + echo mips-dec-ultrix${UNAME_RELEASE} + exit 0 ;; + VAX*:ULTRIX*:*:*) + echo vax-dec-ultrix${UNAME_RELEASE} + exit 0 ;; + 2020:CLIX:*:* | 2430:CLIX:*:*) + echo clipper-intergraph-clix${UNAME_RELEASE} + exit 0 ;; + mips:*:*:UMIPS | mips:*:*:RISCos) + sed 's/^ //' << EOF >$dummy.c +#ifdef __cplusplus +#include <stdio.h> /* for printf() prototype */ + int main (int argc, char *argv[]) { +#else + int main (argc, argv) int argc; char *argv[]; { +#endif + #if defined (host_mips) && defined (MIPSEB) + #if defined (SYSTYPE_SYSV) + printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_SVR4) + printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD) + printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0); + #endif + #endif + exit (-1); + } +EOF + $CC_FOR_BUILD $dummy.c -o $dummy \ + && ./$dummy `echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` \ + && rm $dummy.c $dummy && exit 0 + rm -f $dummy.c $dummy + echo mips-mips-riscos${UNAME_RELEASE} + exit 0 ;; + Night_Hawk:Power_UNIX:*:*) + echo powerpc-harris-powerunix + exit 0 ;; + m88k:CX/UX:7*:*) + echo m88k-harris-cxux7 + exit 0 ;; + m88k:*:4*:R4*) + echo m88k-motorola-sysv4 + exit 0 ;; + m88k:*:3*:R3*) + echo m88k-motorola-sysv3 + exit 0 ;; + AViiON:dgux:*:*) + # DG/UX returns AViiON for all architectures + UNAME_PROCESSOR=`/usr/bin/uname -p` + if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ] + then + if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \ + [ ${TARGET_BINARY_INTERFACE}x = x ] + then + echo m88k-dg-dgux${UNAME_RELEASE} + else + echo m88k-dg-dguxbcs${UNAME_RELEASE} + fi + else + echo i586-dg-dgux${UNAME_RELEASE} + fi + exit 0 ;; + M88*:DolphinOS:*:*) # DolphinOS (SVR3) + echo m88k-dolphin-sysv3 + exit 0 ;; + M88*:*:R3*:*) + # Delta 88k system running SVR3 + echo m88k-motorola-sysv3 + exit 0 ;; + XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) + echo m88k-tektronix-sysv3 + exit 0 ;; + Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) + echo m68k-tektronix-bsd + exit 0 ;; + *:IRIX*:*:*) + echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'` + exit 0 ;; + ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. + echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id + exit 0 ;; # Note that: echo "'`uname -s`'" gives 'AIX ' + i?86:AIX:*:*) + echo i386-ibm-aix + exit 0 ;; + *:AIX:2:3) + if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then + sed 's/^ //' << EOF >$dummy.c + #include <sys/systemcfg.h> + + main() + { + if (!__power_pc()) + exit(1); + puts("powerpc-ibm-aix3.2.5"); + exit(0); + } +EOF + $CC_FOR_BUILD $dummy.c -o $dummy && ./$dummy && rm $dummy.c $dummy && exit 0 + rm -f $dummy.c $dummy + echo rs6000-ibm-aix3.2.5 + elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then + echo rs6000-ibm-aix3.2.4 + else + echo rs6000-ibm-aix3.2 + fi + exit 0 ;; + *:AIX:*:4) + sed 's/^ //' << EOF >$dummy.c + #include <stdio.h> + #include <sys/systemcfg.h> + main () + { + if (_system_configuration.architecture == POWER_RS + || _system_configuration.implementation == POWER_601) + puts ("power"); + else + { + if (_system_configuration.width == 64) + puts ("powerpc64"); + else + puts ("powerpc"); + } + exit (0); + } +EOF + $CC_FOR_BUILD $dummy.c -o $dummy + IBM_ARCH=`./$dummy` + rm -f $dummy.c $dummy + if [ -x /usr/bin/oslevel ] ; then + IBM_REV=`/usr/bin/oslevel` + else + IBM_REV=4.${UNAME_RELEASE} + fi + echo ${IBM_ARCH}-ibm-aix${IBM_REV} + exit 0 ;; + *:AIX:*:*) + echo rs6000-ibm-aix + exit 0 ;; + ibmrt:4.4BSD:*|romp-ibm:BSD:*) + echo romp-ibm-bsd4.4 + exit 0 ;; + ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and + echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to + exit 0 ;; # report: romp-ibm BSD 4.3 + *:BOSX:*:*) + echo rs6000-bull-bosx + exit 0 ;; + DPX/2?00:B.O.S.:*:*) + echo m68k-bull-sysv3 + exit 0 ;; + 9000/[34]??:4.3bsd:1.*:*) + echo m68k-hp-bsd + exit 0 ;; + hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) + echo m68k-hp-bsd4.4 + exit 0 ;; + 9000/[34678]??:HP-UX:*:*) + case "${UNAME_MACHINE}" in + 9000/31? ) HP_ARCH=m68000 ;; + 9000/[34]?? ) HP_ARCH=m68k ;; + 9000/[678][0-9][0-9]) + sed 's/^ //' << EOF >$dummy.c + + #define _HPUX_SOURCE + #include <stdlib.h> + #include <unistd.h> + + int main () + { + #if defined(_SC_KERNEL_BITS) + long bits = sysconf(_SC_KERNEL_BITS); + #endif + long cpu = sysconf (_SC_CPU_VERSION); + + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1"); break; + case CPU_PA_RISC2_0: + #if defined(_SC_KERNEL_BITS) + switch (bits) + { + case 64: puts ("hppa2.0w"); break; + case 32: puts ("hppa2.0n"); break; + default: puts ("hppa2.0"); break; + } break; + #else /* !defined(_SC_KERNEL_BITS) */ + puts ("hppa2.0"); break; + #endif + default: puts ("hppa1.0"); break; + } + exit (0); + } +EOF + (CCOPTS= $CC_FOR_BUILD $dummy.c -o $dummy 2>/dev/null ) && HP_ARCH=`./$dummy` + rm -f $dummy.c $dummy + esac + HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` + echo ${HP_ARCH}-hp-hpux${HPUX_REV} + exit 0 ;; + 3050*:HI-UX:*:*) + sed 's/^ //' << EOF >$dummy.c + #include <unistd.h> + int + main () + { + long cpu = sysconf (_SC_CPU_VERSION); + /* The order matters, because CPU_IS_HP_MC68K erroneously returns + true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct + results, however. */ + if (CPU_IS_PA_RISC (cpu)) + { + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break; + case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break; + default: puts ("hppa-hitachi-hiuxwe2"); break; + } + } + else if (CPU_IS_HP_MC68K (cpu)) + puts ("m68k-hitachi-hiuxwe2"); + else puts ("unknown-hitachi-hiuxwe2"); + exit (0); + } +EOF + $CC_FOR_BUILD $dummy.c -o $dummy && ./$dummy && rm $dummy.c $dummy && exit 0 + rm -f $dummy.c $dummy + echo unknown-hitachi-hiuxwe2 + exit 0 ;; + 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* ) + echo hppa1.1-hp-bsd + exit 0 ;; + 9000/8??:4.3bsd:*:*) + echo hppa1.0-hp-bsd + exit 0 ;; + *9??*:MPE/iX:*:*) + echo hppa1.0-hp-mpeix + exit 0 ;; + hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* ) + echo hppa1.1-hp-osf + exit 0 ;; + hp8??:OSF1:*:*) + echo hppa1.0-hp-osf + exit 0 ;; + i?86:OSF1:*:*) + if [ -x /usr/sbin/sysversion ] ; then + echo ${UNAME_MACHINE}-unknown-osf1mk + else + echo ${UNAME_MACHINE}-unknown-osf1 + fi + exit 0 ;; + parisc*:Lites*:*:*) + echo hppa1.1-hp-lites + exit 0 ;; + hppa*:OpenBSD:*:*) + echo hppa-unknown-openbsd + exit 0 ;; + C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) + echo c1-convex-bsd + exit 0 ;; + C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) + if getsysinfo -f scalar_acc + then echo c32-convex-bsd + else echo c2-convex-bsd + fi + exit 0 ;; + C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) + echo c34-convex-bsd + exit 0 ;; + C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) + echo c38-convex-bsd + exit 0 ;; + C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) + echo c4-convex-bsd + exit 0 ;; + CRAY*X-MP:*:*:*) + echo xmp-cray-unicos + exit 0 ;; + CRAY*Y-MP:*:*:*) + echo ymp-cray-unicos${UNAME_RELEASE} + exit 0 ;; + CRAY*[A-Z]90:*:*:*) + echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \ + | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ + -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ + exit 0 ;; + CRAY*TS:*:*:*) + echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit 0 ;; + CRAY*T3D:*:*:*) + echo alpha-cray-unicos + exit 0 ;; + CRAY*T3E:*:*:*) + echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit 0 ;; + CRAY*SV1:*:*:*) + echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit 0 ;; + CRAY-2:*:*:*) + echo cray2-cray-unicos + exit 0 ;; + F300:UNIX_System_V:*:*) + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` + echo "f300-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit 0 ;; + F301:UNIX_System_V:*:*) + echo f301-fujitsu-uxpv`echo $UNAME_RELEASE | sed 's/ .*//'` + exit 0 ;; + hp3[0-9][05]:NetBSD:*:*) + echo m68k-hp-netbsd${UNAME_RELEASE} + exit 0 ;; + hp300:OpenBSD:*:*) + echo m68k-unknown-openbsd${UNAME_RELEASE} + exit 0 ;; + i?86:BSD/386:*:* | i?86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) + echo ${X86CPU}-pc-bsdi${UNAME_RELEASE} + exit 0 ;; + sparc*:BSD/OS:*:*) + echo sparc-unknown-bsdi${UNAME_RELEASE} + exit 0 ;; + *:BSD/OS:*:*) + echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE} + exit 0 ;; + i386:FreeBSD:*:*) + echo ${X86CPU}-pc-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` + exit 0 ;; + *:FreeBSD:*:*) + echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` + exit 0 ;; + i386:NetBSD:*:*) + echo ${X86CPU}-pc-netbsd`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` + exit 0 ;; + *:NetBSD:*:*) + echo ${UNAME_MACHINE}-unknown-netbsd`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` + exit 0 ;; + i386:OpenBSD:*:*) + echo ${X86CPU}-pc-openbsd`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` + exit 0 ;; + *:OpenBSD:*:*) + echo ${UNAME_MACHINE}-unknown-openbsd`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` + exit 0 ;; + i*:CYGWIN*:*) + echo ${X86CPU}-pc-cygwin + exit 0 ;; + i*:MINGW*:*) + echo ${UNAME_MACHINE}-pc-mingw32 + exit 0 ;; + i*:Windows_NT*:* | Pentium*:Windows_NT*:*) + # How do we know it's Interix rather than the generic POSIX subsystem? + # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we + # UNAME_MACHINE based on the output of uname instead of i386? + echo i386-pc-interix + exit 0 ;; + i*:UWIN*:*) + echo ${UNAME_MACHINE}-pc-uwin + exit 0 ;; + p*:CYGWIN*:*) + echo powerpcle-unknown-cygwin + exit 0 ;; + prep*:SunOS:5.*:*) + echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit 0 ;; + *:GNU:*:*) + echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` + exit 0 ;; + *:Linux:*:*) + + # The BFD linker knows what the default object file format is, so + # first see if it will tell us. cd to the root directory to prevent + # problems with other programs or directories called `ld' in the path. + ld_help_string=`cd /; ld --help 2>&1` + ld_supported_emulations=`echo $ld_help_string \ + | sed -ne '/supported emulations:/!d + s/[ ][ ]*/ /g + s/.*supported emulations: *// + s/ .*// + p'` + case "$ld_supported_emulations" in + *ia64) + echo "${UNAME_MACHINE}-unknown-linux" + exit 0 + ;; + i?86linux) + echo "${X86CPU}-pc-linux-gnuaout" + exit 0 + ;; + i?86coff) + echo "${X86CPU}-pc-linux-gnucoff" + exit 0 + ;; + sparclinux) + echo "${UNAME_MACHINE}-unknown-linux-gnuaout" + exit 0 + ;; + armlinux) + echo "${UNAME_MACHINE}-unknown-linux-gnuaout" + exit 0 + ;; + elf32arm*) + echo "${UNAME_MACHINE}-unknown-linux-gnuoldld" + exit 0 + ;; + armelf_linux*) + echo "${UNAME_MACHINE}-unknown-linux-gnu" + exit 0 + ;; + m68klinux) + echo "${UNAME_MACHINE}-unknown-linux-gnuaout" + exit 0 + ;; + elf32ppc | elf32ppclinux) + # Determine Lib Version + cat >$dummy.c <<EOF +#include <features.h> +#if defined(__GLIBC__) +extern char __libc_version[]; +extern char __libc_release[]; +#endif +main(argc, argv) + int argc; + char *argv[]; +{ +#if defined(__GLIBC__) + printf("%s %s\n", __libc_version, __libc_release); +#else + printf("unkown\n"); +#endif + return 0; +} +EOF + LIBC="" + $CC_FOR_BUILD $dummy.c -o $dummy 2>/dev/null + if test "$?" = 0 ; then + ./$dummy | grep 1\.99 > /dev/null + if test "$?" = 0 ; then + LIBC="libc1" + fi + fi + rm -f $dummy.c $dummy + echo powerpc-unknown-linux-gnu${LIBC} + exit 0 + ;; + esac + + if test "${UNAME_MACHINE}" = "alpha" ; then + cat <<EOF >$dummy.s + .data + \$Lformat: + .byte 37,100,45,37,120,10,0 # "%d-%x\n" + + .text + .globl main + .align 4 + .ent main + main: + .frame \$30,16,\$26,0 + ldgp \$29,0(\$27) + .prologue 1 + .long 0x47e03d80 # implver \$0 + lda \$2,-1 + .long 0x47e20c21 # amask \$2,\$1 + lda \$16,\$Lformat + mov \$0,\$17 + not \$1,\$18 + jsr \$26,printf + ldgp \$29,0(\$26) + mov 0,\$16 + jsr \$26,exit + .end main +EOF + LIBC="" + $CC_FOR_BUILD $dummy.s -o $dummy 2>/dev/null + if test "$?" = 0 ; then + case `./$dummy` in + 0-0) + UNAME_MACHINE="alpha" + ;; + 1-0) + UNAME_MACHINE="alphaev5" + ;; + 1-1) + UNAME_MACHINE="alphaev56" + ;; + 1-101) + UNAME_MACHINE="alphapca56" + ;; + 2-303) + UNAME_MACHINE="alphaev6" + ;; + 2-307) + UNAME_MACHINE="alphaev67" + ;; + esac + + objdump --private-headers $dummy | \ + grep ld.so.1 > /dev/null + if test "$?" = 0 ; then + LIBC="libc1" + fi + fi + rm -f $dummy.s $dummy + echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} ; exit 0 + elif test "${UNAME_MACHINE}" = "mips" ; then + cat >$dummy.c <<EOF +#ifdef __cplusplus +#include <stdio.h> /* for printf() prototype */ + int main (int argc, char *argv[]) { +#else + int main (argc, argv) int argc; char *argv[]; { +#endif +#ifdef __MIPSEB__ + printf ("%s-unknown-linux-gnu\n", argv[1]); +#endif +#ifdef __MIPSEL__ + printf ("%sel-unknown-linux-gnu\n", argv[1]); +#endif + return 0; +} +EOF + $CC_FOR_BUILD $dummy.c -o $dummy 2>/dev/null && ./$dummy "${UNAME_MACHINE}" && rm $dummy.c $dummy && exit 0 + rm -f $dummy.c $dummy + elif test "${UNAME_MACHINE}" = "s390"; then + echo s390-ibm-linux && exit 0 + else + # Either a pre-BFD a.out linker (linux-gnuoldld) + # or one that does not give us useful --help. + # GCC wants to distinguish between linux-gnuoldld and linux-gnuaout. + # If ld does not provide *any* "supported emulations:" + # that means it is gnuoldld. + echo "$ld_help_string" | grep >/dev/null 2>&1 "supported emulations:" + test $? != 0 && echo "${X86CPU}-pc-linux-gnuoldld" && exit 0 + + case "${UNAME_MACHINE}" in + i?86) + VENDOR=pc; + UNAME_MACHINE=${X86CPU} + ;; + *) + VENDOR=unknown; + ;; + esac + # Determine whether the default compiler is a.out or elf + cat >$dummy.c <<EOF +#include <features.h> +#ifdef __cplusplus +#include <stdio.h> /* for printf() prototype */ + int main (int argc, char *argv[]) { +#else + int main (argc, argv) int argc; char *argv[]; { +#endif +#ifdef __ELF__ +# ifdef __GLIBC__ +# if __GLIBC__ >= 2 + printf ("%s-${VENDOR}-linux-gnu\n", argv[1]); +# else + printf ("%s-${VENDOR}-linux-gnulibc1\n", argv[1]); +# endif +# else + printf ("%s-${VENDOR}-linux-gnulibc1\n", argv[1]); +# endif +#else + printf ("%s-${VENDOR}-linux-gnuaout\n", argv[1]); +#endif + return 0; +} +EOF + $CC_FOR_BUILD $dummy.c -o $dummy 2>/dev/null && ./$dummy "${UNAME_MACHINE}" && rm $dummy.c $dummy && exit 0 + rm -f $dummy.c $dummy + fi ;; +# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. earlier versions +# are messed up and put the nodename in both sysname and nodename. + i?86:DYNIX/ptx:4*:*) + echo i386-sequent-sysv4 + exit 0 ;; + i?86:UNIX_SV:4.2MP:2.*) + # Unixware is an offshoot of SVR4, but it has its own version + # number series starting with 2... + # I am not positive that other SVR4 systems won't match this, + # I just have to hope. -- rms. + # Use sysv4.2uw... so that sysv4* matches it. + echo ${X86CPU}-pc-sysv4.2uw${UNAME_VERSION} + exit 0 ;; + i?86:*:4.*:* | i?86:SYSTEM_V:4.*:*) + UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'` + if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then + echo ${X86CPU}-univel-sysv${UNAME_REL} + else + echo ${X86CPU}-pc-sysv${UNAME_REL} + fi + exit 0 ;; + i?86:*:5:7*) + # Fixed at (any) Pentium or better + UNAME_MACHINE=i586 + if [ ${UNAME_SYSTEM} = "UnixWare" ] ; then + echo ${X86CPU}-sco-sysv${UNAME_RELEASE}uw${UNAME_VERSION} + else + echo ${X86CPU}-pc-sysv${UNAME_RELEASE} + fi + exit 0 ;; + i?86:*:3.2:*) + if test -f /usr/options/cb.name; then + UNAME_REL=`sed -n 's/.*Version //p' </usr/options/cb.name` + echo ${X86CPU}-pc-isc$UNAME_REL + elif /bin/uname -X 2>/dev/null >/dev/null ; then + UNAME_REL=`(/bin/uname -X|egrep Release|sed -e 's/.*= //')` + (/bin/uname -X|egrep i80486 >/dev/null) && UNAME_MACHINE=i486 + (/bin/uname -X|egrep '^Machine.*Pentium' >/dev/null) \ + && UNAME_MACHINE=i586 + (/bin/uname -X|egrep '^Machine.*Pent ?II' >/dev/null) \ + && UNAME_MACHINE=i686 + (/bin/uname -X|egrep '^Machine.*Pentium Pro' >/dev/null) \ + && UNAME_MACHINE=i686 + echo ${X86CPU}-pc-sco$UNAME_REL + else + echo ${X86CPU}-pc-sysv32 + fi + exit 0 ;; + i?86:*DOS:*:*) + echo ${X86CPU}-pc-msdosdjgpp + exit 0 ;; + pc:*:*:*) + # Left here for compatibility: + # uname -m prints for DJGPP always 'pc', but it prints nothing about + # the processor, so we play safe by assuming i386. + echo i386-pc-msdosdjgpp + exit 0 ;; + Intel:Mach:3*:*) + echo i386-pc-mach3 + exit 0 ;; + paragon:*:*:*) + echo i860-intel-osf1 + exit 0 ;; + i860:*:4.*:*) # i860-SVR4 + if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then + echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4 + else # Add other i860-SVR4 vendors below as they are discovered. + echo i860-unknown-sysv${UNAME_RELEASE} # Unknown i860-SVR4 + fi + exit 0 ;; + mini*:CTIX:SYS*5:*) + # "miniframe" + echo m68010-convergent-sysv + exit 0 ;; + M68*:*:R3V[567]*:*) + test -r /sysV68 && echo 'm68k-motorola-sysv' && exit 0 ;; + 3[34]??:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 4850:*:4.0:3.0) + OS_REL='' + test -r /etc/.relid \ + && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && echo i486-ncr-sysv4.3${OS_REL} && exit 0 + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && echo i586-ncr-sysv4.3${OS_REL} && exit 0 ;; + 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && echo i486-ncr-sysv4 && exit 0 ;; + m68*:LynxOS:2.*:*) + echo m68k-unknown-lynxos${UNAME_RELEASE} + exit 0 ;; + mc68030:UNIX_System_V:4.*:*) + echo m68k-atari-sysv4 + exit 0 ;; + i?86:LynxOS:2.*:* | i?86:LynxOS:3.[01]*:*) + echo i386-unknown-lynxos${UNAME_RELEASE} + exit 0 ;; + TSUNAMI:LynxOS:2.*:*) + echo sparc-unknown-lynxos${UNAME_RELEASE} + exit 0 ;; + rs6000:LynxOS:2.*:* | PowerPC:LynxOS:2.*:*) + echo rs6000-unknown-lynxos${UNAME_RELEASE} + exit 0 ;; + SM[BE]S:UNIX_SV:*:*) + echo mips-dde-sysv${UNAME_RELEASE} + exit 0 ;; + RM*:ReliantUNIX-*:*:*) + echo mips-sni-sysv4 + exit 0 ;; + RM*:SINIX-*:*:*) + echo mips-sni-sysv4 + exit 0 ;; + *:SINIX-*:*:*) + if uname -p 2>/dev/null >/dev/null ; then + UNAME_MACHINE=`(uname -p) 2>/dev/null` + echo ${UNAME_MACHINE}-sni-sysv4 + else + echo ns32k-sni-sysv + fi + exit 0 ;; + PENTIUM:CPunix:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort + # says <Richard.M.Bartel@ccMail.Census.GOV> + echo i586-unisys-sysv4 + exit 0 ;; + *:UNIX_System_V:4*:FTX*) + # From Gerald Hewes <hewes@openmarket.com>. + # How about differentiating between stratus architectures? -djm + echo hppa1.1-stratus-sysv4 + exit 0 ;; + *:*:*:FTX*) + # From seanf@swdc.stratus.com. + echo i860-stratus-sysv4 + exit 0 ;; + mc68*:A/UX:*:*) + echo m68k-apple-aux${UNAME_RELEASE} + exit 0 ;; + news*:NEWS-OS:*:6*) + echo mips-sony-newsos6 + exit 0 ;; + R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) + if [ -d /usr/nec ]; then + echo mips-nec-sysv${UNAME_RELEASE} + else + echo mips-unknown-sysv${UNAME_RELEASE} + fi + exit 0 ;; + BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. + echo powerpc-be-beos + exit 0 ;; + BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. + echo powerpc-apple-beos + exit 0 ;; + BePC:BeOS:*:*) # BeOS running on Intel PC compatible. + echo i586-pc-beos + exit 0 ;; + SX-4:SUPER-UX:*:*) + echo sx4-nec-superux${UNAME_RELEASE} + exit 0 ;; + SX-5:SUPER-UX:*:*) + echo sx5-nec-superux${UNAME_RELEASE} + exit 0 ;; + Power*:Rhapsody:*:*) + echo powerpc-apple-rhapsody${UNAME_RELEASE} + exit 0 ;; + *:Rhapsody:*:*) + echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE} + exit 0 ;; + Power*:Mac*OS:*:*) + echo powerpc-apple-macos${UNAME_RELEASE} + exit 0 ;; + *:Mac*OS:*:*) + echo ${UNAME_MACHINE}-apple-macos${UNAME_RELEASE} + exit 0 ;; + *:Darwin:*:*) + echo `uname -p`-apple-darwin${UNAME_RELEASE} + exit 0 ;; + *:procnto*:*:* | *:QNX:[0123456789]*:*) + if test "${UNAME_MACHINE}" = "x86pc"; then + UNAME_MACHINE=pc + fi + echo `uname -p`-${UNAME_MACHINE}-nto-qnx + exit 0 ;; + *:QNX:*:4*) + echo i386-pc-qnx + exit 0 ;; + NSR-W:NONSTOP_KERNEL:*:*) + echo nsr-tandem-nsk${UNAME_RELEASE} + exit 0 ;; + BS2000:POSIX*:*:*) + echo bs2000-siemens-sysv + exit 0 ;; +esac + +#echo '(No uname command or uname output not recognized.)' 1>&2 +#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2 + +cat >$dummy.c <<EOF +#ifdef _SEQUENT_ +# include <sys/types.h> +# include <sys/utsname.h> +#endif +main () +{ +#if defined (sony) +#if defined (MIPSEB) + /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed, + I don't know.... */ + printf ("mips-sony-bsd\n"); exit (0); +#else +#include <sys/param.h> + printf ("m68k-sony-newsos%s\n", +#ifdef NEWSOS4 + "4" +#else + "" +#endif + ); exit (0); +#endif +#endif + +#if defined (__arm) && defined (__acorn) && defined (__unix) + printf ("arm-acorn-riscix"); exit (0); +#endif + +#if defined (hp300) && !defined (hpux) + printf ("m68k-hp-bsd\n"); exit (0); +#endif + +#if defined (NeXT) +#if !defined (__ARCHITECTURE__) +#define __ARCHITECTURE__ "m68k" +#endif + int version; + version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`; + if (version < 4) + printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version); + else + printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version); + exit (0); +#endif + +#if defined (MULTIMAX) || defined (n16) +#if defined (UMAXV) + printf ("ns32k-encore-sysv\n"); exit (0); +#else +#if defined (CMU) + printf ("ns32k-encore-mach\n"); exit (0); +#else + printf ("ns32k-encore-bsd\n"); exit (0); +#endif +#endif +#endif + +#if defined (__386BSD__) + printf ("i386-pc-bsd\n"); exit (0); +#endif + +#if defined (sequent) +#if defined (i386) + printf ("i386-sequent-dynix\n"); exit (0); +#endif +#if defined (ns32000) + printf ("ns32k-sequent-dynix\n"); exit (0); +#endif +#endif + +#if defined (_SEQUENT_) + struct utsname un; + + uname(&un); + + if (strncmp(un.version, "V2", 2) == 0) { + printf ("i386-sequent-ptx2\n"); exit (0); + } + if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */ + printf ("i386-sequent-ptx1\n"); exit (0); + } + printf ("i386-sequent-ptx\n"); exit (0); + +#endif + +#if defined (vax) +#if !defined (ultrix) + printf ("vax-dec-bsd\n"); exit (0); +#else + printf ("vax-dec-ultrix\n"); exit (0); +#endif +#endif + +#if defined (alliant) && defined (i860) + printf ("i860-alliant-bsd\n"); exit (0); +#endif + + exit (1); +} +EOF + +$CC_FOR_BUILD $dummy.c -o $dummy 2>/dev/null && ./$dummy && rm $dummy.c $dummy && exit 0 +rm -f $dummy.c $dummy + +# Apollos put the system type in the environment. + +test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit 0; } + +# Convex versions that predate uname can use getsysinfo(1) + +if [ -x /usr/convex/getsysinfo ] +then + case `getsysinfo -f cpu_type` in + c1*) + echo c1-convex-bsd + exit 0 ;; + c2*) + if getsysinfo -f scalar_acc + then echo c32-convex-bsd + else echo c2-convex-bsd + fi + exit 0 ;; + c34*) + echo c34-convex-bsd + exit 0 ;; + c38*) + echo c38-convex-bsd + exit 0 ;; + c4*) + echo c4-convex-bsd + exit 0 ;; + esac +fi + +#echo '(Unable to guess system type)' 1>&2 + +exit 1 diff --git a/rts/gmp/config.in b/rts/gmp/config.in new file mode 100644 index 0000000000..8b2546ef16 --- /dev/null +++ b/rts/gmp/config.in @@ -0,0 +1,162 @@ +/* config.in. Generated automatically from configure.in by autoheader. */ +/* +Copyright (C) 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. +*/ + + +/* Define if a limb is long long. */ +#undef _LONG_LONG_LIMB + +/* Define if we have native implementation of function. */ +#undef HAVE_NATIVE_ +#undef HAVE_NATIVE_mpn_add +#undef HAVE_NATIVE_mpn_add_1 +#undef HAVE_NATIVE_mpn_add_n +#undef HAVE_NATIVE_mpn_add_nc +#undef HAVE_NATIVE_mpn_addmul_1 +#undef HAVE_NATIVE_mpn_addmul_1c +#undef HAVE_NATIVE_mpn_addsub_n +#undef HAVE_NATIVE_mpn_addsub_nc +#undef HAVE_NATIVE_mpn_and_n +#undef HAVE_NATIVE_mpn_andn_n +#undef HAVE_NATIVE_mpn_bdivmod +#undef HAVE_NATIVE_mpn_cmp +#undef HAVE_NATIVE_mpn_com_n +#undef HAVE_NATIVE_mpn_copyd +#undef HAVE_NATIVE_mpn_copyi +#undef HAVE_NATIVE_mpn_divexact_by3c +#undef HAVE_NATIVE_mpn_divrem +#undef HAVE_NATIVE_mpn_divrem_1 +#undef HAVE_NATIVE_mpn_divrem_1c +#undef HAVE_NATIVE_mpn_divrem_2 +#undef HAVE_NATIVE_mpn_divrem_newton +#undef HAVE_NATIVE_mpn_divrem_classic +#undef HAVE_NATIVE_mpn_dump +#undef HAVE_NATIVE_mpn_gcd +#undef HAVE_NATIVE_mpn_gcd_1 +#undef HAVE_NATIVE_mpn_gcdext +#undef HAVE_NATIVE_mpn_get_str +#undef HAVE_NATIVE_mpn_hamdist +#undef HAVE_NATIVE_mpn_invert_limb +#undef HAVE_NATIVE_mpn_ior_n +#undef HAVE_NATIVE_mpn_iorn_n +#undef HAVE_NATIVE_mpn_lshift +#undef HAVE_NATIVE_mpn_mod_1 +#undef HAVE_NATIVE_mpn_mod_1c +#undef HAVE_NATIVE_mpn_mul +#undef HAVE_NATIVE_mpn_mul_1 +#undef HAVE_NATIVE_mpn_mul_1c +#undef HAVE_NATIVE_mpn_mul_basecase +#undef HAVE_NATIVE_mpn_mul_n +#undef HAVE_NATIVE_mpn_nand_n +#undef HAVE_NATIVE_mpn_nior_n +#undef HAVE_NATIVE_mpn_perfect_square_p +#undef HAVE_NATIVE_mpn_popcount +#undef HAVE_NATIVE_mpn_preinv_mod_1 +#undef HAVE_NATIVE_mpn_random2 +#undef HAVE_NATIVE_mpn_random +#undef HAVE_NATIVE_mpn_rawrandom +#undef HAVE_NATIVE_mpn_rshift +#undef HAVE_NATIVE_mpn_scan0 +#undef HAVE_NATIVE_mpn_scan1 +#undef HAVE_NATIVE_mpn_set_str +#undef HAVE_NATIVE_mpn_sqrtrem +#undef HAVE_NATIVE_mpn_sqr_basecase +#undef HAVE_NATIVE_mpn_sub +#undef HAVE_NATIVE_mpn_sub_1 +#undef HAVE_NATIVE_mpn_sub_n +#undef HAVE_NATIVE_mpn_sub_nc +#undef HAVE_NATIVE_mpn_submul_1 +#undef HAVE_NATIVE_mpn_submul_1c +#undef HAVE_NATIVE_mpn_udiv_w_sdiv +#undef HAVE_NATIVE_mpn_umul_ppmm +#undef HAVE_NATIVE_mpn_udiv_qrnnd +#undef HAVE_NATIVE_mpn_xor_n +#undef HAVE_NATIVE_mpn_xnor_n + +/* Define to 1 if you have the declaration of `optarg', and to 0 if you don't. + */ +#undef HAVE_DECL_OPTARG + +/* ./configure --enable-assert option, to enable some ASSERT()s */ +#undef WANT_ASSERT + +/* Define if you have the <sys/sysctl.h> header file. */ +#undef HAVE_SYS_SYSCTL_H + +/* Define if you have the `strtoul' function. */ +#undef HAVE_STRTOUL + +/* Name of package */ +#undef PACKAGE + +/* Define if you have the `sysctlbyname' function. */ +#undef HAVE_SYSCTLBYNAME + +/* Define if the system has the type `void'. */ +#undef HAVE_VOID + +/* Define if you have the `popen' function. */ +#undef HAVE_POPEN + +/* ./configure --disable-alloca option, to use stack-alloc.c, not alloca */ +#undef USE_STACK_ALLOC + +/* Define if cpp supports the ANSI # stringizing operator. */ +#undef HAVE_STRINGIZE + +/* Define if you have the <sys/time.h> header file. */ +#undef HAVE_SYS_TIME_H + +/* Define if you have the `sysconf' function. */ +#undef HAVE_SYSCONF + +/* Define if you have the `getpagesize' function. */ +#undef HAVE_GETPAGESIZE + +/* Define if you have the `processor_info' function. */ +#undef HAVE_PROCESSOR_INFO + +/* Version number of package */ +#undef VERSION + +/* Define if you have the `getopt_long' function. */ +#undef HAVE_GETOPT_LONG + +/* Define if you have the <getopt.h> header file. */ +#undef HAVE_GETOPT_H + +/* Define if you have the ANSI C header files. */ +#undef STDC_HEADERS + +/* Define if a speed_cyclecounter exists (for the tune programs) */ +#undef HAVE_SPEED_CYCLECOUNTER + +/* Define if mpn/tests has calling conventions checking for the CPU */ +#undef HAVE_CALLING_CONVENTIONS + +/* ./configure --enable-fft option, to enable FFTs for multiplication */ +#undef WANT_FFT + +/* Define if you have the <string.h> header file. */ +#undef HAVE_STRING_H + +/* Define if you have the <unistd.h> header file. */ +#undef HAVE_UNISTD_H diff --git a/rts/gmp/config.sub b/rts/gmp/config.sub new file mode 100644 index 0000000000..c4123f28ff --- /dev/null +++ b/rts/gmp/config.sub @@ -0,0 +1,1273 @@ +#! /bin/sh +# Configuration validation subroutine script, version 1.1. +# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000 +# Free Software Foundation, Inc. +# +# This file is (in principle) common to ALL GNU software. +# The presence of a machine in this file suggests that SOME GNU software +# can handle that machine. It does not imply ALL GNU software can. +# +# This file is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# Written by Per Bothner <bothner@cygnus.com>. +# Please send patches to <config-patches@gnu.org>. +# +# Configuration subroutine to validate and canonicalize a configuration type. +# Supply the specified configuration type as an argument. +# If it is invalid, we print an error message on stderr and exit with code 1. +# Otherwise, we print the canonical config type on stdout and succeed. + +# This file is supposed to be the same for all GNU packages +# and recognize all the CPU types, system types and aliases +# that are meaningful with *any* GNU software. +# Each package is responsible for reporting which valid configurations +# it does not support. The user should be able to distinguish +# a failure to support a valid configuration from a meaningless +# configuration. + +# The goal of this file is to map all the various variations of a given +# machine specification into a single specification in the form: +# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM +# or in some cases, the newer four-part form: +# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM +# It is wrong to echo any other type of specification. + +if [ x$1 = x ] +then + echo Configuration name missing. 1>&2 + echo "Usage: $0 CPU-MFR-OPSYS" 1>&2 + echo "or $0 ALIAS" 1>&2 + echo where ALIAS is a recognized configuration type. 1>&2 + exit 1 +fi + +# First pass through any local machine types. +case $1 in + *local*) + echo $1 + exit 0 + ;; + *) + ;; +esac + +# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any). +# Here we must recognize all the valid KERNEL-OS combinations. +maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` +case $maybe_os in + nto-qnx* | linux-gnu*) + os=-$maybe_os + basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` + ;; + *) + basic_machine=`echo $1 | sed 's/-[^-]*$//'` + if [ $basic_machine != $1 ] + then os=`echo $1 | sed 's/.*-/-/'` + else os=; fi + ;; +esac + +### Let's recognize common machines as not being operating systems so +### that things like config.sub decstation-3100 work. We also +### recognize some manufacturers as not being operating systems, so we +### can provide default operating systems below. +case $os in + -sun*os*) + # Prevent following clause from handling this invalid input. + ;; + -dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \ + -att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \ + -unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \ + -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\ + -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \ + -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \ + -apple) + os= + basic_machine=$1 + ;; + -sim | -cisco | -oki | -wec | -winbond) + os= + basic_machine=$1 + ;; + -scout) + ;; + -wrs) + os=-vxworks + basic_machine=$1 + ;; + -hiux*) + os=-hiuxwe2 + ;; + -sco5) + os=-sco3.2v5 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco4) + os=-sco3.2v4 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco3.2.[4-9]*) + os=`echo $os | sed -e 's/sco3.2./sco3.2v/'` + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco3.2v[4-9]*) + # Don't forget version if it is 3.2v4 or newer. + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco*) + os=-sco3.2v2 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -udk*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -isc) + os=-isc2.2 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -clix*) + basic_machine=clipper-intergraph + ;; + -isc*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -lynx*) + os=-lynxos + ;; + -ptx*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'` + ;; + -windowsnt*) + os=`echo $os | sed -e 's/windowsnt/winnt/'` + ;; + -psos*) + os=-psos + ;; + -mint | -mint[0-9]*) + basic_machine=m68k-atari + os=-mint + ;; +esac + +# Decode aliases for certain CPU-COMPANY combinations. +case $basic_machine in + # Recognize the basic CPU types without company name. + # Some are omitted here because they have special meanings below. + tahoe | i860 | ia64 | m32r | m68k | m68000 | m88k | ns32k | arc | arm \ + | arme[lb] | pyramid | mn10200 | mn10300 | tron | a29k \ + | 580 | i960 | h8300 \ + | x86 | ppcbe | mipsbe | mipsle | shbe | shle | armbe | armle \ + | hppa | hppa1.0 | hppa1.1 | hppa2.0 | hppa2.0w | hppa2.0n \ + | alpha | alphaev[4-8] | alphaev56 | alphapca5[67] \ + | alphaev6[78] \ + | we32k | ns16k | clipper | i370 | sh | powerpc | powerpcle \ + | 1750a | dsp16xx | pdp11 | mips16 | mips64 | mipsel | mips64el \ + | mips64orion | mips64orionel | mipstx39 | mipstx39el \ + | mips64vr4300 | mips64vr4300el | mips64vr4100 | mips64vr4100el \ + | mips64vr5000 | miprs64vr5000el | mcore \ + | sparc | sparclet | sparclite | sparc64 | sparcv9 | v850 | c4x \ + | powerpc64 | sparcv8 | supersparc | microsparc | ultrasparc \ + | thumb | d10v | fr30 | avr) + basic_machine=$basic_machine-unknown + ;; + m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | z8k | v70 | h8500 | w65 | pj | pjl) + ;; + + # We use `pc' rather than `unknown' + # because (1) that's what they normally are, and + # (2) the word "unknown" tends to confuse beginning users. + i[34567]86 | pentium[23] | k[56] | k6[23] | athlon) + basic_machine=$basic_machine-pc + ;; + # Object if more than one company name word. + *-*-*) + echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 + exit 1 + ;; + # Recognize the basic CPU types with company name. + vax-* | tahoe-* | i[34567]86-* | pentium[23]-* | i860-* | ia64-* | m32r-* | m68k-* | m68000-* \ + | m88k-* | sparc-* | ns32k-* | fx80-* | arc-* | arm-* | c[123]* \ + | mips-* | pyramid-* | tron-* | a29k-* | romp-* | rs6000-* \ + | power-* | none-* | 580-* | cray2-* | h8300-* | h8500-* | i960-* \ + | xmp-* | ymp-* \ + | x86-* | ppcbe-* | mipsbe-* | mipsle-* | shbe-* | shle-* | armbe-* | armle-* \ + | hppa-* | hppa1.0-* | hppa1.1-* | hppa2.0-* | hppa2.0w-* | hppa2.0n-* \ + | alpha-* | alphaev[4-8]-* | alphaev56-* | alphapca5[67]-* \ + | alphaev6[78]-* \ + | we32k-* | cydra-* | ns16k-* | pn-* | np1-* | xps100-* \ + | clipper-* | orion-* \ + | sparclite-* | pdp11-* | sh-* | powerpc-* | powerpcle-* \ + | sparc64-* | sparcv9-* | sparc86x-* | mips16-* | mips64-* | mipsel-* \ + | mips64el-* | mips64orion-* | mips64orionel-* \ + | mips64vr4100-* | mips64vr4100el-* | mips64vr4300-* | mips64vr4300el-* \ + | mipstx39-* | mipstx39el-* | mcore-* \ + | f301-* | armv*-* | s390-* | sv1-* | t3e-* \ + | m88110-* | m680[01234]0-* | m683?2-* | m68360-* | z8k-* | d10v-* \ + | k[56]-* | k6[23]-* | athlon-* | powerpc64-* \ + | sparcv8-* | supersparc-* | microsparc-* | ultrasparc-* \ + | thumb-* | v850-* | d30v-* | tic30-* | c30-* | fr30-* ) + ;; + # Recognize the various machine names and aliases which stand + # for a CPU type and a company and sometimes even an OS. + 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc) + basic_machine=m68000-att + ;; + 3b*) + basic_machine=we32k-att + ;; + a29khif) + basic_machine=a29k-amd + os=-udi + ;; + adobe68k) + basic_machine=m68010-adobe + os=-scout + ;; + alliant | fx80) + basic_machine=fx80-alliant + ;; + altos | altos3068) + basic_machine=m68k-altos + ;; + am29k) + basic_machine=a29k-none + os=-bsd + ;; + amdahl) + basic_machine=580-amdahl + os=-sysv + ;; + amiga | amiga-*) + basic_machine=m68k-cbm + ;; + amigaos | amigados) + basic_machine=m68k-cbm + os=-amigaos + ;; + amigaunix | amix) + basic_machine=m68k-cbm + os=-sysv4 + ;; + apollo68) + basic_machine=m68k-apollo + os=-sysv + ;; + apollo68bsd) + basic_machine=m68k-apollo + os=-bsd + ;; + aux) + basic_machine=m68k-apple + os=-aux + ;; + balance) + basic_machine=ns32k-sequent + os=-dynix + ;; + convex-c1) + basic_machine=c1-convex + os=-bsd + ;; + convex-c2) + basic_machine=c2-convex + os=-bsd + ;; + convex-c32) + basic_machine=c32-convex + os=-bsd + ;; + convex-c34) + basic_machine=c34-convex + os=-bsd + ;; + convex-c38) + basic_machine=c38-convex + os=-bsd + ;; + cray | ymp) + basic_machine=ymp-cray + os=-unicos + ;; + cray2) + basic_machine=cray2-cray + os=-unicos + ;; + [ctj]90-cray) + basic_machine=c90-cray + os=-unicos + ;; + crds | unos) + basic_machine=m68k-crds + ;; + da30 | da30-*) + basic_machine=m68k-da30 + ;; + decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn) + basic_machine=mips-dec + ;; + delta | 3300 | motorola-3300 | motorola-delta \ + | 3300-motorola | delta-motorola) + basic_machine=m68k-motorola + ;; + delta88) + basic_machine=m88k-motorola + os=-sysv3 + ;; + dpx20 | dpx20-*) + basic_machine=rs6000-bull + os=-bosx + ;; + dpx2* | dpx2*-bull) + basic_machine=m68k-bull + os=-sysv3 + ;; + ebmon29k) + basic_machine=a29k-amd + os=-ebmon + ;; + elxsi) + basic_machine=elxsi-elxsi + os=-bsd + ;; + encore | umax | mmax) + basic_machine=ns32k-encore + ;; + es1800 | OSE68k | ose68k | ose | OSE) + basic_machine=m68k-ericsson + os=-ose + ;; + fx2800) + basic_machine=i860-alliant + ;; + genix) + basic_machine=ns32k-ns + ;; + gmicro) + basic_machine=tron-gmicro + os=-sysv + ;; + h3050r* | hiux*) + basic_machine=hppa1.1-hitachi + os=-hiuxwe2 + ;; + h8300hms) + basic_machine=h8300-hitachi + os=-hms + ;; + h8300xray) + basic_machine=h8300-hitachi + os=-xray + ;; + h8500hms) + basic_machine=h8500-hitachi + os=-hms + ;; + harris) + basic_machine=m88k-harris + os=-sysv3 + ;; + hp300-*) + basic_machine=m68k-hp + ;; + hp300bsd) + basic_machine=m68k-hp + os=-bsd + ;; + hp300hpux) + basic_machine=m68k-hp + os=-hpux + ;; + hp3k9[0-9][0-9] | hp9[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hp9k2[0-9][0-9] | hp9k31[0-9]) + basic_machine=m68000-hp + ;; + hp9k3[2-9][0-9]) + basic_machine=m68k-hp + ;; + hp9k6[0-9][0-9] | hp6[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hp9k7[0-79][0-9] | hp7[0-79][0-9]) + basic_machine=hppa1.1-hp + ;; + hp9k78[0-9] | hp78[0-9]) + basic_machine=hppa2.0-hp + ;; + hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893) + basic_machine=hppa2.0-hp + ;; + hp9k8[0-9][13679] | hp8[0-9][13679]) + basic_machine=hppa1.1-hp + ;; + hp9k8[0-9][0-9] | hp8[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hppa-next) + os=-nextstep3 + ;; + hppaosf) + basic_machine=hppa1.1-hp + os=-osf + ;; + hppro) + basic_machine=hppa1.1-hp + os=-proelf + ;; + i370-ibm* | ibm*) + basic_machine=i370-ibm + ;; +# I'm not sure what "Sysv32" means. Should this be sysv3.2? + i[34567]86v32) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv32 + ;; + i[34567]86v4*) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv4 + ;; + i[34567]86v) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv + ;; + i[34567]86sol2) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-solaris2 + ;; + i386mach) + basic_machine=i386-mach + os=-mach + ;; + i386-vsta | vsta) + basic_machine=i386-unknown + os=-vsta + ;; + i386-go32 | go32) + basic_machine=i386-unknown + os=-go32 + ;; + i386-mingw32 | mingw32) + basic_machine=i386-unknown + os=-mingw32 + ;; + iris | iris4d) + basic_machine=mips-sgi + case $os in + -irix*) + ;; + *) + os=-irix4 + ;; + esac + ;; + isi68 | isi) + basic_machine=m68k-isi + os=-sysv + ;; + macppc*) + basic_machine=powerpc-apple + ;; + m88k-omron*) + basic_machine=m88k-omron + ;; + magnum | m3230) + basic_machine=mips-mips + os=-sysv + ;; + merlin) + basic_machine=ns32k-utek + os=-sysv + ;; + miniframe) + basic_machine=m68000-convergent + ;; + *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*) + basic_machine=m68k-atari + os=-mint + ;; + mipsel*-linux*) + basic_machine=mipsel-unknown + os=-linux-gnu + ;; + mips*-linux*) + basic_machine=mips-unknown + os=-linux-gnu + ;; + mips3*-*) + basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'` + ;; + mips3*) + basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown + ;; + mmix*) + basic_machine=mmix-knuth + os=-mmixware + ;; + monitor) + basic_machine=m68k-rom68k + os=-coff + ;; + msdos) + basic_machine=i386-unknown + os=-msdos + ;; + mvs) + basic_machine=i370-ibm + os=-mvs + ;; + ncr3000) + basic_machine=i486-ncr + os=-sysv4 + ;; + netbsd386) + basic_machine=i386-unknown + os=-netbsd + ;; + netwinder) + basic_machine=armv4l-rebel + os=-linux + ;; + news | news700 | news800 | news900) + basic_machine=m68k-sony + os=-newsos + ;; + news1000) + basic_machine=m68030-sony + os=-newsos + ;; + news-3600 | risc-news) + basic_machine=mips-sony + os=-newsos + ;; + necv70) + basic_machine=v70-nec + os=-sysv + ;; + next | m*-next ) + basic_machine=m68k-next + case $os in + -nextstep* ) + ;; + -ns2*) + os=-nextstep2 + ;; + *) + os=-nextstep3 + ;; + esac + ;; + nh3000) + basic_machine=m68k-harris + os=-cxux + ;; + nh[45]000) + basic_machine=m88k-harris + os=-cxux + ;; + nindy960) + basic_machine=i960-intel + os=-nindy + ;; + mon960) + basic_machine=i960-intel + os=-mon960 + ;; + np1) + basic_machine=np1-gould + ;; + nsr-tandem) + basic_machine=nsr-tandem + ;; + op50n-* | op60c-*) + basic_machine=hppa1.1-oki + os=-proelf + ;; + OSE68000 | ose68000) + basic_machine=m68000-ericsson + os=-ose + ;; + os68k) + basic_machine=m68k-none + os=-os68k + ;; + pa-hitachi) + basic_machine=hppa1.1-hitachi + os=-hiuxwe2 + ;; + paragon) + basic_machine=i860-intel + os=-osf + ;; + pbd) + basic_machine=sparc-tti + ;; + pbb) + basic_machine=m68k-tti + ;; + pc532 | pc532-*) + basic_machine=ns32k-pc532 + ;; + pentiummmx | p55) + basic_machine=pentiummmx-pc + ;; + pentium | p5 | i586) + basic_machine=pentium-pc + ;; + pentiumpro | p6) + basic_machine=pentiumpro-pc + ;; + pentiummmx-* | p55-*) + basic_machine=pentiummmx-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentium-* | p5-* | i586-*) + basic_machine=pentium-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentiumpro-* | p6-*) + basic_machine=pentiumpro-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + nexen) + # We don't have specific support for Nexgen yet, so just call it a Pentium + basic_machine=i586-nexgen + ;; + pn) + basic_machine=pn-gould + ;; + power) basic_machine=rs6000-ibm + ;; + ppc) basic_machine=powerpc-unknown + ;; + ppc-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ppc64) basic_machine=powerpc64-unknown + ;; + ppc64-*) + basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ppcle | powerpclittle | ppc-le | powerpc-little) + basic_machine=powerpcle-unknown + ;; + ppcle-* | powerpclittle-*) + basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ps2) + basic_machine=i386-ibm + ;; + rom68k) + basic_machine=m68k-rom68k + os=-coff + ;; + rm[46]00) + basic_machine=mips-siemens + ;; + rtpc | rtpc-*) + basic_machine=romp-ibm + ;; + sa29200) + basic_machine=a29k-amd + os=-udi + ;; + sequent) + basic_machine=i386-sequent + ;; + sh) + basic_machine=sh-hitachi + os=-hms + ;; + sparclite-wrs) + basic_machine=sparclite-wrs + os=-vxworks + ;; + sps7) + basic_machine=m68k-bull + os=-sysv2 + ;; + spur) + basic_machine=spur-unknown + ;; + st2000) + basic_machine=m68k-tandem + ;; + stratus) + basic_machine=i860-stratus + os=-sysv4 + ;; + sun2) + basic_machine=m68000-sun + ;; + sun2os3) + basic_machine=m68000-sun + os=-sunos3 + ;; + sun2os4) + basic_machine=m68000-sun + os=-sunos4 + ;; + sun3os3) + basic_machine=m68k-sun + os=-sunos3 + ;; + sun3os4) + basic_machine=m68k-sun + os=-sunos4 + ;; + sun4os3) + basic_machine=sparc-sun + os=-sunos3 + ;; + sun4os4) + basic_machine=sparc-sun + os=-sunos4 + ;; + sun4sol2) + basic_machine=sparc-sun + os=-solaris2 + ;; + sun3 | sun3-*) + basic_machine=m68k-sun + ;; + sun4) + basic_machine=sparc-sun + ;; + sun386 | sun386i | roadrunner) + basic_machine=i386-sun + ;; + sv1) + basic_machine=sv1-cray + os=-unicos + ;; + symmetry) + basic_machine=i386-sequent + os=-dynix + ;; + t3e) + basic_machine=t3e-cray + os=-unicos + ;; + tx39) + basic_machine=mipstx39-unknown + ;; + tx39el) + basic_machine=mipstx39el-unknown + ;; + tower | tower-32) + basic_machine=m68k-ncr + ;; + udi29k) + basic_machine=a29k-amd + os=-udi + ;; + ultra3) + basic_machine=a29k-nyu + os=-sym1 + ;; + v810 | necv810) + basic_machine=v810-nec + os=-none + ;; + vaxv) + basic_machine=vax-dec + os=-sysv + ;; + vms) + basic_machine=vax-dec + os=-vms + ;; + vpp*|vx|vx-*) + basic_machine=f301-fujitsu + ;; + vxworks960) + basic_machine=i960-wrs + os=-vxworks + ;; + vxworks68) + basic_machine=m68k-wrs + os=-vxworks + ;; + vxworks29k) + basic_machine=a29k-wrs + os=-vxworks + ;; + w65*) + basic_machine=w65-wdc + os=-none + ;; + w89k-*) + basic_machine=hppa1.1-winbond + os=-proelf + ;; + xmp) + basic_machine=xmp-cray + os=-unicos + ;; + xps | xps100) + basic_machine=xps100-honeywell + ;; + z8k-*-coff) + basic_machine=z8k-unknown + os=-sim + ;; + none) + basic_machine=none-none + os=-none + ;; + +# Here we handle the default manufacturer of certain CPU types. It is in +# some cases the only manufacturer, in others, it is the most popular. + w89k) + basic_machine=hppa1.1-winbond + ;; + op50n) + basic_machine=hppa1.1-oki + ;; + op60c) + basic_machine=hppa1.1-oki + ;; + mips) + if [ x$os = x-linux-gnu ]; then + basic_machine=mips-unknown + else + basic_machine=mips-mips + fi + ;; + romp) + basic_machine=romp-ibm + ;; + rs6000) + basic_machine=rs6000-ibm + ;; + vax) + basic_machine=vax-dec + ;; + pdp11) + basic_machine=pdp11-dec + ;; + we32k) + basic_machine=we32k-att + ;; + sparc | sparcv9) + basic_machine=sparc-sun + ;; + cydra) + basic_machine=cydra-cydrome + ;; + orion) + basic_machine=orion-highlevel + ;; + orion105) + basic_machine=clipper-highlevel + ;; + mac | mpw | mac-mpw) + basic_machine=m68k-apple + ;; + pmac | pmac-mpw) + basic_machine=powerpc-apple + ;; + c4x*) + basic_machine=c4x-none + os=-coff + ;; + *) + echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 + exit 1 + ;; +esac + +# Here we canonicalize certain aliases for manufacturers. +case $basic_machine in + *-digital*) + basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'` + ;; + *-commodore*) + basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'` + ;; + *) + ;; +esac + +# Decode manufacturer-specific aliases for certain operating systems. + +if [ x"$os" != x"" ] +then +case $os in + # First match some system type aliases + # that might get confused with valid system types. + # -solaris* is a basic system type, with this one exception. + -solaris1 | -solaris1.*) + os=`echo $os | sed -e 's|solaris1|sunos4|'` + ;; + -solaris) + os=-solaris2 + ;; + -svr4*) + os=-sysv4 + ;; + -unixware*) + os=-sysv4.2uw + ;; + -gnu/linux*) + os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'` + ;; + # First accept the basic system types. + # The portable systems comes first. + # Each alternative MUST END IN A *, to match a version number. + # -sysv* is not here because it comes later, after sysvr4. + -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \ + | -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\ + | -hpux* | -unos* | -osf* | -luna* | -dgux* | -solaris* | -sym* \ + | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ + | -aos* \ + | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ + | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ + | -hiux* | -386bsd* | -netbsd* | -openbsd* | -freebsd* | -riscix* \ + | -lynxos* | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ + | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ + | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ + | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ + | -mingw32* | -linux-gnu* | -uxpv* | -beos* | -mpeix* | -udk* \ + | -interix* | -uwin* | -rhapsody* | -darwin* | -opened* \ + | -openstep* | -oskit*) + # Remember, each alternative MUST END IN *, to match a version number. + ;; + -qnx*) + case $basic_machine in + x86-* | i[34567]86-*) + ;; + *) + os=-nto$os + ;; + esac + ;; + -nto*) + os=-nto-qnx + ;; + -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \ + | -windows* | -osx | -abug | -netware* | -os9* | -beos* \ + | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*) + ;; + -mac*) + os=`echo $os | sed -e 's|mac|macos|'` + ;; + -linux*) + os=`echo $os | sed -e 's|linux|linux-gnu|'` + ;; + -sunos5*) + os=`echo $os | sed -e 's|sunos5|solaris2|'` + ;; + -sunos6*) + os=`echo $os | sed -e 's|sunos6|solaris3|'` + ;; + -opened*) + os=-openedition + ;; + -wince*) + os=-wince + ;; + -osfrose*) + os=-osfrose + ;; + -osf*) + os=-osf + ;; + -utek*) + os=-bsd + ;; + -dynix*) + os=-bsd + ;; + -acis*) + os=-aos + ;; + -386bsd) + os=-bsd + ;; + -ctix* | -uts*) + os=-sysv + ;; + -ns2 ) + os=-nextstep2 + ;; + -nsk) + os=-nsk + ;; + # Preserve the version number of sinix5. + -sinix5.*) + os=`echo $os | sed -e 's|sinix|sysv|'` + ;; + -sinix*) + os=-sysv4 + ;; + -triton*) + os=-sysv3 + ;; + -oss*) + os=-sysv3 + ;; + -svr4) + os=-sysv4 + ;; + -svr3) + os=-sysv3 + ;; + -sysvr4) + os=-sysv4 + ;; + # This must come after -sysvr4. + -sysv*) + ;; + -ose*) + os=-ose + ;; + -es1800*) + os=-ose + ;; + -xenix) + os=-xenix + ;; + -*mint | -*MiNT) + os=-mint + ;; + -none) + ;; + *) + # Get rid of the `-' at the beginning of $os. + os=`echo $os | sed 's/[^-]*-//'` + echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2 + exit 1 + ;; +esac +else + +# Here we handle the default operating systems that come with various machines. +# The value should be what the vendor currently ships out the door with their +# machine or put another way, the most popular os provided with the machine. + +# Note that if you're going to try to match "-MANUFACTURER" here (say, +# "-sun"), then you have to tell the case statement up towards the top +# that MANUFACTURER isn't an operating system. Otherwise, code above +# will signal an error saying that MANUFACTURER isn't an operating +# system, and we'll never get to this point. + +case $basic_machine in + *-acorn) + os=-riscix1.2 + ;; + arm*-rebel) + os=-linux + ;; + arm*-semi) + os=-aout + ;; + pdp11-*) + os=-none + ;; + *-dec | vax-*) + os=-ultrix4.2 + ;; + m68*-apollo) + os=-domain + ;; + i386-sun) + os=-sunos4.0.2 + ;; + m68000-sun) + os=-sunos3 + # This also exists in the configure program, but was not the + # default. + # os=-sunos4 + ;; + m68*-cisco) + os=-aout + ;; + mips*-cisco) + os=-elf + ;; + mips*-*) + os=-elf + ;; + *-tti) # must be before sparc entry or we get the wrong os. + os=-sysv3 + ;; + sparc-* | *-sun) + os=-sunos4.1.1 + ;; + *-be) + os=-beos + ;; + *-ibm) + os=-aix + ;; + *-wec) + os=-proelf + ;; + *-winbond) + os=-proelf + ;; + *-oki) + os=-proelf + ;; + *-hp) + os=-hpux + ;; + *-hitachi) + os=-hiux + ;; + i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent) + os=-sysv + ;; + *-cbm) + os=-amigaos + ;; + *-dg) + os=-dgux + ;; + *-dolphin) + os=-sysv3 + ;; + m68k-ccur) + os=-rtu + ;; + m88k-omron*) + os=-luna + ;; + *-next ) + os=-nextstep + ;; + *-sequent) + os=-ptx + ;; + *-crds) + os=-unos + ;; + *-ns) + os=-genix + ;; + i370-*) + os=-mvs + ;; + *-next) + os=-nextstep3 + ;; + *-gould) + os=-sysv + ;; + *-highlevel) + os=-bsd + ;; + *-encore) + os=-bsd + ;; + *-sgi) + os=-irix + ;; + *-siemens) + os=-sysv4 + ;; + *-masscomp) + os=-rtu + ;; + f301-fujitsu) + os=-uxpv + ;; + *-rom68k) + os=-coff + ;; + *-*bug) + os=-coff + ;; + *-apple) + os=-macos + ;; + *-atari*) + os=-mint + ;; + *) + os=-none + ;; +esac +fi + +# Here we handle the case where we know the os, and the CPU type, but not the +# manufacturer. We pick the logical manufacturer. +vendor=unknown +case $basic_machine in + *-unknown) + case $os in + -riscix*) + vendor=acorn + ;; + -sunos*) + vendor=sun + ;; + -aix*) + vendor=ibm + ;; + -beos*) + vendor=be + ;; + -hpux*) + vendor=hp + ;; + -mpeix*) + vendor=hp + ;; + -hiux*) + vendor=hitachi + ;; + -unos*) + vendor=crds + ;; + -dgux*) + vendor=dg + ;; + -luna*) + vendor=omron + ;; + -genix*) + vendor=ns + ;; + -mvs* | -opened*) + vendor=ibm + ;; + -ptx*) + vendor=sequent + ;; + -vxsim* | -vxworks*) + vendor=wrs + ;; + -aux*) + vendor=apple + ;; + -hms*) + vendor=hitachi + ;; + -mpw* | -macos*) + vendor=apple + ;; + -*mint | -*MiNT) + vendor=atari + ;; + esac + basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"` + ;; +esac + +echo $basic_machine$os diff --git a/rts/gmp/configure b/rts/gmp/configure new file mode 100644 index 0000000000..8294680486 --- /dev/null +++ b/rts/gmp/configure @@ -0,0 +1,5216 @@ +#! /bin/sh +# From configure.in Revision: 1.129.2.2 +# Guess values for system-dependent variables and create Makefiles. +# Generated automatically using Autoconf version 2.14a. +# Copyright (C) 1992, 93, 94, 95, 96, 98, 99, 2000 +# Free Software Foundation, Inc. +# +# This configure script is free software; the Free Software Foundation +# gives unlimited permission to copy, distribute and modify it. + +# Defaults: +ac_default_prefix=/usr/local +# Any additions from configure.in: + +# Initialize some variables set by options. +ac_init_help=false +ac_init_version=false +# The variables have the same names as the options, with +# dashes changed to underlines. +build=NONE +cache_file=./config.cache +exec_prefix=NONE +host=NONE +no_create= +nonopt=NONE +no_recursion= +prefix=NONE +program_prefix=NONE +program_suffix=NONE +program_transform_name=s,x,x, +silent= +site= +srcdir= +target=NONE +verbose= +x_includes=NONE +x_libraries=NONE +bindir='${exec_prefix}/bin' +sbindir='${exec_prefix}/sbin' +libexecdir='${exec_prefix}/libexec' +datadir='${prefix}/share' +sysconfdir='${prefix}/etc' +sharedstatedir='${prefix}/com' +localstatedir='${prefix}/var' +libdir='${exec_prefix}/lib' +includedir='${prefix}/include' +oldincludedir='/usr/include' +infodir='${prefix}/info' +mandir='${prefix}/man' + +# Initialize some other variables. +subdirs= +MFLAGS= MAKEFLAGS= +SHELL=${CONFIG_SHELL-/bin/sh} +# Maximum number of lines to put in a shell here document. +: ${ac_max_here_lines=48} +# Sed expression to map a string onto a valid sh and CPP variable names. +ac_tr_sh='sed -e y%*+%pp%;s%[^a-zA-Z0-9_]%_%g' +ac_tr_cpp='sed -e y%*abcdefghijklmnopqrstuvwxyz%PABCDEFGHIJKLMNOPQRSTUVWXYZ%;s%[^A-Z0-9_]%_%g' + +ac_prev= +for ac_option +do + # If the previous option needs an argument, assign it. + if test -n "$ac_prev"; then + eval "$ac_prev=\$ac_option" + ac_prev= + continue + fi + + ac_optarg=`echo "$ac_option" | sed -n 's/^[^=]*=//p'` + + # Accept the important Cygnus configure options, so we can diagnose typos. + + case "$ac_option" in + + -bindir | --bindir | --bindi | --bind | --bin | --bi) + ac_prev=bindir ;; + -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) + bindir="$ac_optarg" ;; + + -build | --build | --buil | --bui | --bu) + ac_prev=build ;; + -build=* | --build=* | --buil=* | --bui=* | --bu=*) + build="$ac_optarg" ;; + + -cache-file | --cache-file | --cache-fil | --cache-fi \ + | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) + ac_prev=cache_file ;; + -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ + | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) + cache_file="$ac_optarg" ;; + + -datadir | --datadir | --datadi | --datad | --data | --dat | --da) + ac_prev=datadir ;; + -datadir=* | --datadir=* | --datadi=* | --datad=* | --data=* | --dat=* \ + | --da=*) + datadir="$ac_optarg" ;; + + -disable-* | --disable-*) + ac_feature=`echo "$ac_option"|sed -e 's/-*disable-//'` + # Reject names that are not valid shell variable names. + if echo "$ac_feature" | grep '[^-a-zA-Z0-9_]' >/dev/null 2>&1; then + { echo "configure: error: invalid feature: $ac_feature" 1>&2; exit 1; } + fi + ac_feature=`echo $ac_feature| sed 's/-/_/g'` + eval "enable_${ac_feature}=no" ;; + + -enable-* | --enable-*) + ac_feature=`echo "$ac_option"|sed -e 's/-*enable-//' -e 's/=.*//'` + # Reject names that are not valid shell variable names. + if echo "$ac_feature" | grep '[^-a-zA-Z0-9_]' >/dev/null 2>&1; then + { echo "configure: error: invalid feature: $ac_feature" 1>&2; exit 1; } + fi + ac_feature=`echo $ac_feature| sed 's/-/_/g'` + case "$ac_option" in + *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;; + *) ac_optarg=yes ;; + esac + eval "enable_${ac_feature}='$ac_optarg'" ;; + + -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ + | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ + | --exec | --exe | --ex) + ac_prev=exec_prefix ;; + -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ + | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ + | --exec=* | --exe=* | --ex=*) + exec_prefix="$ac_optarg" ;; + + -gas | --gas | --ga | --g) + # Obsolete; use --with-gas. + with_gas=yes ;; + + -help | --help | --hel | --he | -h) + ac_init_help=: ;; + -host | --host | --hos | --ho) + ac_prev=host ;; + -host=* | --host=* | --hos=* | --ho=*) + host="$ac_optarg" ;; + + -includedir | --includedir | --includedi | --included | --include \ + | --includ | --inclu | --incl | --inc) + ac_prev=includedir ;; + -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ + | --includ=* | --inclu=* | --incl=* | --inc=*) + includedir="$ac_optarg" ;; + + -infodir | --infodir | --infodi | --infod | --info | --inf) + ac_prev=infodir ;; + -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) + infodir="$ac_optarg" ;; + + -libdir | --libdir | --libdi | --libd) + ac_prev=libdir ;; + -libdir=* | --libdir=* | --libdi=* | --libd=*) + libdir="$ac_optarg" ;; + + -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ + | --libexe | --libex | --libe) + ac_prev=libexecdir ;; + -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ + | --libexe=* | --libex=* | --libe=*) + libexecdir="$ac_optarg" ;; + + -localstatedir | --localstatedir | --localstatedi | --localstated \ + | --localstate | --localstat | --localsta | --localst \ + | --locals | --local | --loca | --loc | --lo) + ac_prev=localstatedir ;; + -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ + | --localstate=* | --localstat=* | --localsta=* | --localst=* \ + | --locals=* | --local=* | --loca=* | --loc=* | --lo=*) + localstatedir="$ac_optarg" ;; + + -mandir | --mandir | --mandi | --mand | --man | --ma | --m) + ac_prev=mandir ;; + -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) + mandir="$ac_optarg" ;; + + -nfp | --nfp | --nf) + # Obsolete; use --without-fp. + with_fp=no ;; + + -no-create | --no-create | --no-creat | --no-crea | --no-cre \ + | --no-cr | --no-c) + no_create=yes ;; + + -no-recursion | --no-recursion | --no-recursio | --no-recursi \ + | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) + no_recursion=yes ;; + + -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ + | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ + | --oldin | --oldi | --old | --ol | --o) + ac_prev=oldincludedir ;; + -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ + | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ + | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) + oldincludedir="$ac_optarg" ;; + + -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) + ac_prev=prefix ;; + -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) + prefix="$ac_optarg" ;; + + -program-prefix | --program-prefix | --program-prefi | --program-pref \ + | --program-pre | --program-pr | --program-p) + ac_prev=program_prefix ;; + -program-prefix=* | --program-prefix=* | --program-prefi=* \ + | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) + program_prefix="$ac_optarg" ;; + + -program-suffix | --program-suffix | --program-suffi | --program-suff \ + | --program-suf | --program-su | --program-s) + ac_prev=program_suffix ;; + -program-suffix=* | --program-suffix=* | --program-suffi=* \ + | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) + program_suffix="$ac_optarg" ;; + + -program-transform-name | --program-transform-name \ + | --program-transform-nam | --program-transform-na \ + | --program-transform-n | --program-transform- \ + | --program-transform | --program-transfor \ + | --program-transfo | --program-transf \ + | --program-trans | --program-tran \ + | --progr-tra | --program-tr | --program-t) + ac_prev=program_transform_name ;; + -program-transform-name=* | --program-transform-name=* \ + | --program-transform-nam=* | --program-transform-na=* \ + | --program-transform-n=* | --program-transform-=* \ + | --program-transform=* | --program-transfor=* \ + | --program-transfo=* | --program-transf=* \ + | --program-trans=* | --program-tran=* \ + | --progr-tra=* | --program-tr=* | --program-t=*) + program_transform_name="$ac_optarg" ;; + + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + silent=yes ;; + + -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) + ac_prev=sbindir ;; + -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ + | --sbi=* | --sb=*) + sbindir="$ac_optarg" ;; + + -sharedstatedir | --sharedstatedir | --sharedstatedi \ + | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ + | --sharedst | --shareds | --shared | --share | --shar \ + | --sha | --sh) + ac_prev=sharedstatedir ;; + -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ + | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ + | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ + | --sha=* | --sh=*) + sharedstatedir="$ac_optarg" ;; + + -site | --site | --sit) + ac_prev=site ;; + -site=* | --site=* | --sit=*) + site="$ac_optarg" ;; + + -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) + ac_prev=srcdir ;; + -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) + srcdir="$ac_optarg" ;; + + -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ + | --syscon | --sysco | --sysc | --sys | --sy) + ac_prev=sysconfdir ;; + -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ + | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) + sysconfdir="$ac_optarg" ;; + + -target | --target | --targe | --targ | --tar | --ta | --t) + ac_prev=target ;; + -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) + target="$ac_optarg" ;; + + -v | -verbose | --verbose | --verbos | --verbo | --verb) + verbose=yes ;; + + -version | --version | --versio | --versi | --vers | -V) + ac_init_version=: ;; + + -with-* | --with-*) + ac_package=`echo "$ac_option"|sed -e 's/-*with-//' -e 's/=.*//'` + # Reject names that are not valid shell variable names. + if echo "$ac_package" | grep '[^-a-zA-Z0-9_]' >/dev/null 2>&1; then + { echo "configure: error: invalid package: $ac_package" 1>&2; exit 1; } + fi + ac_package=`echo $ac_package| sed 's/-/_/g'` + case "$ac_option" in + *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;; + *) ac_optarg=yes ;; + esac + eval "with_${ac_package}='$ac_optarg'" ;; + + -without-* | --without-*) + ac_package=`echo "$ac_option"|sed -e 's/-*without-//'` + # Reject names that are not valid shell variable names. + if echo "$ac_package" | grep '[^-a-zA-Z0-9_]' >/dev/null 2>&1; then + { echo "configure: error: invalid package: $ac_package" 1>&2; exit 1; } + fi + ac_package=`echo $ac_package| sed 's/-/_/g'` + eval "with_${ac_package}=no" ;; + + --x) + # Obsolete; use --with-x. + with_x=yes ;; + + -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ + | --x-incl | --x-inc | --x-in | --x-i) + ac_prev=x_includes ;; + -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ + | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) + x_includes="$ac_optarg" ;; + + -x-libraries | --x-libraries | --x-librarie | --x-librari \ + | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) + ac_prev=x_libraries ;; + -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ + | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) + x_libraries="$ac_optarg" ;; + + -*) { echo "configure: error: unrecognized option: $ac_option +Try \`configure --help' for more information." 1>&2; exit 1; } + ;; + + *=*) + ac_envvar=`echo "$ac_option" | sed -e 's/=.*//'` + # Reject names that are not valid shell variable names. + if echo "$ac_envvar" | grep '[^a-zA-Z0-9_]' >/dev/null 2>&1; then + { echo "configure: error: invalid variable name: $ac_envvar" 1>&2; exit 1; } + fi + ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` + eval "$ac_envvar='$ac_optarg'" + export $ac_envvar ;; + + *) + if echo "$ac_option" | grep '[^-a-zA-Z0-9.]' >/dev/null 2>&1; then + echo "configure: warning: invalid host type: $ac_option" 1>&2 + fi + if test "x$nonopt" != xNONE; then + { echo "configure: error: can only configure for one host and one target at a time" 1>&2; exit 1; } + fi + nonopt="$ac_option" + ;; + + esac +done + +if test -n "$ac_prev"; then + { echo "configure: error: missing argument to --\`echo $ac_prev | sed 's/_/-/g'\`" 1>&2; exit 1; } +fi +if $ac_init_help; then + # Omit some internal or obsolete options to make the list less imposing. + # This message is too long to be a string in the A/UX 3.1 sh. + cat <<\EOF +`configure' configures software source code packages to adapt to many kinds +of systems. + +Usage: configure [OPTION]... [VAR=VALUE]... [HOST] + +To safely assign special values to environment variables (e.g., CC, +CFLAGS...), give to `configure' the definition as VAR=VALUE. + +Defaults for the options are specified in brackets. + +Configuration: + -h, --help print this message + -V, --version print the version of autoconf that created configure + -q, --quiet, --silent do not print `checking...' messages + --cache-file=FILE cache test results in FILE + -n, --no-create do not create output files + +EOF + + cat <<EOF +Directories: + --prefix=PREFIX install architecture-independent files in PREFIX + [$ac_default_prefix] + --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX + [same as prefix] + --bindir=DIR user executables in DIR [EPREFIX/bin] + --sbindir=DIR system admin executables in DIR [EPREFIX/sbin] + --libexecdir=DIR program executables in DIR [EPREFIX/libexec] + --datadir=DIR read-only architecture-independent data in DIR + [PREFIX/share] + --sysconfdir=DIR read-only single-machine data in DIR [PREFIX/etc] + --sharedstatedir=DIR modifiable architecture-independent data in DIR + [PREFIX/com] + --localstatedir=DIR modifiable single-machine data in DIR [PREFIX/var] + --libdir=DIR object code libraries in DIR [EPREFIX/lib] + --includedir=DIR C header files in DIR [PREFIX/include] + --oldincludedir=DIR C header files for non-gcc in DIR [/usr/include] + --infodir=DIR info documentation in DIR [PREFIX/info] + --mandir=DIR man documentation in DIR [PREFIX/man] + --srcdir=DIR find the sources in DIR [configure dir or ..] +EOF + + cat <<\EOF + +Host type: + --build=BUILD configure for building on BUILD [BUILD=HOST] + --host=HOST configure for HOST [guessed] + --target=TARGET configure for TARGET [TARGET=HOST] +EOF + + cat <<\EOF + +Program names: + --program-prefix=PREFIX prepend PREFIX to installed program names + --program-suffix=SUFFIX append SUFFIX to installed program names + --program-transform-name=PROGRAM run sed PROGRAM on installed program names + +Optional Features: + --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) + --enable-FEATURE=ARG include FEATURE ARG=yes + --disable-dependency-tracking Speeds up one-time builds + --enable-dependency-tracking Do not reject slow dependency extractors + --enable-maintainer-mode enable make rules and dependencies not useful + (and sometimes confusing) to the casual installer + --enable-assert enable ASSERT checking default=no + --enable-alloca use alloca for temp space default=yes + --enable-fft enable FFTs for multiplication default=no + --enable-mpbsd build Berkley MP compatibility library default=no + --enable-mpfr build MPFR default=no + --enable-shared=PKGS build shared libraries default=yes + --enable-static=PKGS build static libraries default=yes + --enable-fast-install=PKGS optimize for fast installation default=yes + --disable-libtool-lock avoid locking (might break parallel builds) + +Optional Packages: + --with-PACKAGE=ARG use PACKAGE ARG=yes + --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) + --with-gnu-ld assume the C compiler uses GNU ld default=no + --with-pic try to use only PIC/non-PIC objects default=use both +EOF + exit 0 +fi +if $ac_init_version; then + cat <<\EOF +Generated automatically using Autoconf version 2.14a. +Copyright (C) 1992, 93, 94, 95, 96, 98, 99, 2000 +Free Software Foundation, Inc. + +This configure script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it. +EOF + exit 0 +fi +trap 'rm -fr conftest* confdefs* core core.* *.core $ac_clean_files; exit 1' 1 2 15 + +# Keep a trace of the command line. +# Strip out --no-create and --no-recursion so they do not pile up. +# Also quote any args containing shell meta-characters. +ac_configure_args= +for ac_arg +do + case "$ac_arg" in + -no-create | --no-create | --no-creat | --no-crea | --no-cre \ + | --no-cr | --no-c) ;; + -no-recursion | --no-recursion | --no-recursio | --no-recursi \ + | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) ;; + *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*) + ac_arg=`echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` + ac_configure_args="$ac_configure_args '$ac_arg'" ;; + *) ac_configure_args="$ac_configure_args $ac_arg" ;; + esac +done + +# File descriptor usage: +# 0 standard input +# 1 file creation +# 2 errors and warnings +# 3 some systems may open it to /dev/tty +# 4 used on the Kubota Titan +# 6 checking for... messages and results +# 5 compiler messages saved in config.log +if test "$silent" = yes; then + exec 6>/dev/null +else + exec 6>&1 +fi +exec 5>./config.log + +echo "\ +This file contains any messages produced by compilers while +running configure, to aid debugging if configure makes a mistake. + +It was created by configure version 2.14a, executed with + > $0 $ac_configure_args +" 1>&5 + +# NLS nuisances. +# Only set these to C if already set. These must not be set unconditionally +# because not all systems understand e.g. LANG=C (notably SCO). +# Fixing LC_MESSAGES prevents Solaris sh from translating var values in `set'! +# Non-C LC_CTYPE values break the ctype check. +if test "${LANG+set}" = set; then LANG=C; export LANG; fi +if test "${LC_ALL+set}" = set; then LC_ALL=C; export LC_ALL; fi +if test "${LC_MESSAGES+set}" = set; then LC_MESSAGES=C; export LC_MESSAGES; fi +if test "${LC_CTYPE+set}" = set; then LC_CTYPE=C; export LC_CTYPE; fi + +# confdefs.h avoids OS command line length limits that DEFS can exceed. +rm -rf conftest* confdefs.h +# AIX cpp loses on an empty file, so make sure it contains at least a newline. +echo >confdefs.h + +# A filename unique to this package, relative to the directory that +# configure is in, which we can look for to find out if srcdir is correct. +ac_unique_file= + +# Find the source files, if location was not specified. +if test -z "$srcdir"; then + ac_srcdir_defaulted=yes + # Try the directory containing this script, then its parent. + ac_prog=$0 + ac_confdir=`echo "$ac_prog" | sed 's%/[^/][^/]*$%%'` + test "x$ac_confdir" = "x$ac_prog" && ac_confdir=. + srcdir=$ac_confdir + if test ! -r $srcdir/$ac_unique_file; then + srcdir=.. + fi +else + ac_srcdir_defaulted=no +fi +if test ! -r $srcdir/$ac_unique_file; then + if test "$ac_srcdir_defaulted" = yes; then + { echo "configure: error: cannot find sources in $ac_confdir or .." 1>&2; exit 1; } + else + { echo "configure: error: cannot find sources in $srcdir" 1>&2; exit 1; } + fi +fi +srcdir=`echo "$srcdir" | sed 's%\([^/]\)/*$%\1%'` + +# Prefer explicitly selected file to automatically selected ones. +if test -z "$CONFIG_SITE"; then + if test "x$prefix" != xNONE; then + CONFIG_SITE="$prefix/share/config.site $prefix/etc/config.site" + else + CONFIG_SITE="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site" + fi +fi +for ac_site_file in $CONFIG_SITE; do + if test -r "$ac_site_file"; then + echo "loading site script $ac_site_file" + . "$ac_site_file" + fi +done + +if test -r "$cache_file"; then + echo "loading cache $cache_file" + test -f "$cache_file" && . $cache_file +else + echo "creating cache $cache_file" + >$cache_file +fi + +ac_ext=c +# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. +ac_cpp='$CPP $CPPFLAGS' +ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' +ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' +cross_compiling=$ac_cv_prog_cc_cross + +ac_exeext= +ac_objext=o +# Factoring default headers for most tests. +ac_includes_default="\ +#include <stdio.h> +#include <sys/types.h> +#if STDC_HEADERS +# include <stdlib.h> +# include <stddef.h> +#else +# if HAVE_STDLIB_H +# include <stdlib.h> +# endif +#endif +#if HAVE_STRING_H +# if !STDC_HEADERS && HAVE_MEMORY_H +# include <memory.h> +# endif +# include <string.h> +#else +# if HAVE_STRINGS_H +# include <strings.h> +# endif +#endif +#if HAVE_INTTYPES_H +# include <inttypes.h> +#endif +#if HAVE_UNISTD_H +# include <unistd.h> +#endif" + +if (echo "testing\c"; echo 1,2,3) | grep c >/dev/null; then + # Stardent Vistra SVR4 grep lacks -e, says Kaveh R. Ghazi. + if (echo -n testing; echo 1,2,3) | sed s/-n/xn/ | grep xn >/dev/null; then + ECHO_N= ECHO_C=' +' ECHO_T=' ' + else + ECHO_N=-n ECHO_C= ECHO_T= + fi +else + ECHO_N= ECHO_C='\c' ECHO_T= +fi + +ac_aux_dir= +for ac_dir in $srcdir $srcdir/.. $srcdir/../..; do + if test -f $ac_dir/install-sh; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/install-sh -c" + break + elif test -f $ac_dir/install.sh; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/install.sh -c" + break + elif test -f $ac_dir/shtool; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/shtool install -c" + break + fi +done +if test -z "$ac_aux_dir"; then + { echo "configure: error: cannot find install-sh or install.sh in $srcdir $srcdir/.. $srcdir/../.." 1>&2; exit 1; } +fi +ac_config_guess="$SHELL $ac_aux_dir/config.guess" +ac_config_sub="$SHELL $ac_aux_dir/config.sub" +ac_configure="$SHELL $ac_aux_dir/configure" # This should be Cygnus configure. + +echo $ECHO_N "checking host system type... $ECHO_C" 1>&6 +echo "configure:636: checking host system type" 1>&5 +if test "x$ac_cv_host" = "x" || (test "x$host" != "xNONE" && test "x$host" != "x$ac_cv_host_alias"); then + + # Make sure we can run config.sub. + if $ac_config_sub sun4 >/dev/null 2>&1; then :; else + { echo "configure: error: cannot run $ac_config_sub" 1>&2; exit 1; } + fi + + ac_cv_host_alias=$host + case "$ac_cv_host_alias" in + NONE) + case $nonopt in + NONE) + if ac_cv_host_alias=`$ac_config_guess`; then : + else { echo "configure: error: cannot guess host type; you must specify one" 1>&2; exit 1; } + fi ;; *) ac_cv_host_alias=$nonopt ;; + esac ;; + esac + + ac_cv_host=`$ac_config_sub $ac_cv_host_alias` || exit 1 + ac_cv_host_cpu=`echo $ac_cv_host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'` + ac_cv_host_vendor=`echo $ac_cv_host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'` + ac_cv_host_os=`echo $ac_cv_host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'` +else + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +fi + +echo "$ECHO_T""$ac_cv_host" 1>&6 + +host=$ac_cv_host +host_alias=$ac_cv_host_alias +host_cpu=$ac_cv_host_cpu +host_vendor=$ac_cv_host_vendor +host_os=$ac_cv_host_os + +echo $ECHO_N "checking target system type... $ECHO_C" 1>&6 +echo "configure:672: checking target system type" 1>&5 +if test "x$ac_cv_target" = "x" || (test "x$target" != "xNONE" && test "x$target" != "x$ac_cv_target_alias"); then + + # Make sure we can run config.sub. + if $ac_config_sub sun4 >/dev/null 2>&1; then :; else + { echo "configure: error: cannot run $ac_config_sub" 1>&2; exit 1; } + fi + + ac_cv_target_alias=$target + case "$ac_cv_target_alias" in + NONE) + case $nonopt in + NONE) + ac_cv_target_alias=$host_alias ;; + *) ac_cv_target_alias=$nonopt ;; + esac ;; + esac + + ac_cv_target=`$ac_config_sub $ac_cv_target_alias` || exit 1 + ac_cv_target_cpu=`echo $ac_cv_target | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'` + ac_cv_target_vendor=`echo $ac_cv_target | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'` + ac_cv_target_os=`echo $ac_cv_target | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'` +else + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +fi + +echo "$ECHO_T""$ac_cv_target" 1>&6 + +target=$ac_cv_target +target_alias=$ac_cv_target_alias +target_cpu=$ac_cv_target_cpu +target_vendor=$ac_cv_target_vendor +target_os=$ac_cv_target_os + +echo $ECHO_N "checking build system type... $ECHO_C" 1>&6 +echo "configure:707: checking build system type" 1>&5 +if test "x$ac_cv_build" = "x" || (test "x$build" != "xNONE" && test "x$build" != "x$ac_cv_build_alias"); then + + # Make sure we can run config.sub. + if $ac_config_sub sun4 >/dev/null 2>&1; then :; else + { echo "configure: error: cannot run $ac_config_sub" 1>&2; exit 1; } + fi + + ac_cv_build_alias=$build + case "$ac_cv_build_alias" in + NONE) + case $nonopt in + NONE) + ac_cv_build_alias=$host_alias ;; + *) ac_cv_build_alias=$nonopt ;; + esac ;; + esac + + ac_cv_build=`$ac_config_sub $ac_cv_build_alias` || exit 1 + ac_cv_build_cpu=`echo $ac_cv_build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'` + ac_cv_build_vendor=`echo $ac_cv_build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'` + ac_cv_build_os=`echo $ac_cv_build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'` +else + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +fi + +echo "$ECHO_T""$ac_cv_build" 1>&6 + +build=$ac_cv_build +build_alias=$ac_cv_build_alias +build_cpu=$ac_cv_build_cpu +build_vendor=$ac_cv_build_vendor +build_os=$ac_cv_build_os + +# Do some error checking and defaulting for the host and target type. +# The inputs are: +# configure --host=HOST --target=TARGET --build=BUILD NONOPT +# +# The rules are: +# 1. You are not allowed to specify --host, --target, and nonopt at the +# same time. +# 2. Host defaults to nonopt. +# 3. If nonopt is not specified, then host defaults to the current host, +# as determined by config.guess. +# 4. Target and build default to nonopt. +# 5. If nonopt is not specified, then target and build default to host. + +# The aliases save the names the user supplied, while $host etc. +# will get canonicalized. +case $host---$target---$nonopt in +NONE---*---* | *---NONE---* | *---*---NONE) ;; +*) { echo "configure: error: can only configure for one host and one target at a time" 1>&2; exit 1; } ;; +esac + +test "$host_alias" != "$target_alias" && + test "$program_prefix$program_suffix$program_transform_name" = \ + NONENONEs,x,x, && + program_prefix=${target_alias}- + +# Find a good install program. We prefer a C program (faster), +# so one script is as good as another. But avoid the broken or +# incompatible versions: +# SysV /etc/install, /usr/sbin/install +# SunOS /usr/etc/install +# IRIX /sbin/install +# AIX /bin/install +# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag +# AFS /usr/afsws/bin/install, which mishandles nonexistent args +# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" +# ./install, which can be erroneously created by make from ./install.sh. +echo $ECHO_N "checking for a BSD compatible install... $ECHO_C" 1>&6 +echo "configure:778: checking for a BSD compatible install" 1>&5 +if test -z "$INSTALL"; then +if test "${ac_cv_path_install+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + IFS="${IFS= }"; ac_save_IFS="$IFS"; IFS=":" + for ac_dir in $PATH; do + # Account for people who put trailing slashes in PATH elements. + case "$ac_dir/" in + /|./|.//|/etc/*|/usr/sbin/*|/usr/etc/*|/sbin/*|/usr/afsws/bin/*|/usr/ucb/*) ;; + *) + # OSF1 and SCO ODT 3.0 have their own names for install. + # Don't use installbsd from OSF since it installs stuff as root + # by default. + for ac_prog in ginstall scoinst install; do + if test -f $ac_dir/$ac_prog; then + if test $ac_prog = install && + grep dspmsg $ac_dir/$ac_prog >/dev/null 2>&1; then + # AIX install. It has an incompatible calling convention. + : + elif test $ac_prog = install && + grep pwplus $ac_dir/$ac_prog >/dev/null 2>&1; then + # program-specific install script used by HP pwplus--don't use. + : + else + ac_cv_path_install="$ac_dir/$ac_prog -c" + break 2 + fi + fi + done + ;; + esac + done + IFS="$ac_save_IFS" + +fi + if test "${ac_cv_path_install+set}" = set; then + INSTALL="$ac_cv_path_install" + else + # As a last resort, use the slow shell script. We don't cache a + # path for INSTALL within a source directory, because that will + # break other packages using the cache if that directory is + # removed, or if the path is relative. + INSTALL="$ac_install_sh" + fi +fi +echo "$ECHO_T""$INSTALL" 1>&6 + +# Use test -z because SunOS4 sh mishandles braces in ${var-val}. +# It thinks the first close brace ends the variable substitution. +test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' + +test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}' + +test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' + +echo $ECHO_N "checking whether build environment is sane... $ECHO_C" 1>&6 +echo "configure:835: checking whether build environment is sane" 1>&5 +# Just in case +sleep 1 +echo timestamp > conftestfile +# Do `set' in a subshell so we don't clobber the current shell's +# arguments. Must try -L first in case configure is actually a +# symlink; some systems play weird games with the mod time of symlinks +# (eg FreeBSD returns the mod time of the symlink's containing +# directory). +if ( + set X `ls -Lt $srcdir/configure conftestfile 2> /dev/null` + if test "$*" = "X"; then + # -L didn't work. + set X `ls -t $srcdir/configure conftestfile` + fi + if test "$*" != "X $srcdir/configure conftestfile" \ + && test "$*" != "X conftestfile $srcdir/configure"; then + + # If neither matched, then we have a broken ls. This can happen + # if, for instance, CONFIG_SHELL is bash and it inherits a + # broken ls alias from the environment. This has actually + # happened. Such a system could not be considered "sane". + { echo "configure: error: ls -t appears to fail. Make sure there is not a broken +alias in your environment" 1>&2; exit 1; } + fi + + test "$2" = conftestfile + ) +then + # Ok. + : +else + { echo "configure: error: newly created file is older than distributed files! +Check your system clock" 1>&2; exit 1; } +fi +rm -f conftest* +echo "$ECHO_T""yes" 1>&6 +if test "$program_transform_name" = s,x,x,; then + program_transform_name= +else + # Double any \ or $. echo might interpret backslashes. + cat <<\EOF >conftestsed +s,\\,\\\\,g; s,\$,$$,g +EOF + program_transform_name=`echo $program_transform_name | sed -f conftestsed` + rm -f conftestsed +fi +test "$program_prefix" != NONE && + program_transform_name="s,^,${program_prefix},;$program_transform_name" +# Use a double $ so make ignores it. +test "$program_suffix" != NONE && + program_transform_name="s,\$\$,${program_suffix},;$program_transform_name" + +# sed with no file args requires a program. +test "$program_transform_name" = "" && program_transform_name="s,x,x," + +test x"${MISSING+set}" = xset || \ + MISSING="\${SHELL} `CDPATH=: && cd $ac_aux_dir && pwd`/missing" +if eval "$MISSING --run :"; then + am_missing_run="$MISSING --run " +else + am_missing_run= + am_backtick='`' + echo "configure: warning: ${am_backtick}missing' script is too old or missing" 1>&2 +fi + +for ac_prog in mawk gawk nawk awk +do +# Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +echo $ECHO_N "checking for $ac_word... $ECHO_C" 1>&6 +echo "configure:906: checking for $ac_word" 1>&5 +if test "${ac_cv_prog_AWK+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + if test -n "$AWK"; then + ac_cv_prog_AWK="$AWK" # Let the user override the test. +else + for ac_path in `IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" +ac_dummy="$PATH" +for ac_dir in $ac_dummy; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/$ac_word; then + echo "$ac_dir/$ac_word" + fi +done +IFS="$ac_save_ifs" +`; do + ac_cv_prog_AWK="$ac_prog" + break + done +fi +fi +AWK="$ac_cv_prog_AWK" +if test -n "$AWK"; then + echo "$ECHO_T""$AWK" 1>&6 +else + echo "$ECHO_T""no" 1>&6 +fi + +test -n "$AWK" && break +done + +echo $ECHO_N "checking whether ${MAKE-make} sets \${MAKE}... $ECHO_C" 1>&6 +echo "configure:939: checking whether ${MAKE-make} sets \${MAKE}" 1>&5 +set dummy ${MAKE-make}; ac_make=`echo "$2" | sed 'y%./+-%__p_%'` +if eval "test \"\${ac_cv_prog_make_${ac_make}_set+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + cat >conftestmake <<\EOF +all: + @echo 'ac_maketemp="${MAKE}"' +EOF +# GNU make sometimes prints "make[1]: Entering...", which would confuse us. +eval `${MAKE-make} -f conftestmake 2>/dev/null | grep temp=` +if test -n "$ac_maketemp"; then + eval ac_cv_prog_make_${ac_make}_set=yes +else + eval ac_cv_prog_make_${ac_make}_set=no +fi +rm -f conftestmake +fi +if eval "test \"`echo '$ac_cv_prog_make_'${ac_make}_set`\" = yes"; then + echo "$ECHO_T""yes" 1>&6 + SET_MAKE= +else + echo "$ECHO_T""no" 1>&6 + SET_MAKE="MAKE=${MAKE-make}" +fi + +# Check whether --enable-dependency-tracking or --disable-dependency-tracking was given. +if test "${enable_dependency_tracking+set}" = set; then + enableval="$enable_dependency_tracking" + +fi +if test "x$enable_dependency_tracking" = xno; then + AMDEP="#" +else + am_depcomp="$ac_aux_dir/depcomp" + if test ! -f "$am_depcomp"; then + AMDEP="#" + else + AMDEP= + fi +fi + +if test -z "$AMDEP"; then + AMDEPBACKSLASH='\' +else + AMDEPBACKSLASH= +fi + +if test -d .deps || mkdir .deps 2> /dev/null || test -d .deps; then + DEPDIR=.deps +else + DEPDIR=_deps +fi + +PACKAGE=gmp + +VERSION=3.1.1 + +if test "`CDPATH=: && cd $srcdir && pwd`" != "`pwd`" && + test -f $srcdir/config.status; then + { echo "configure: error: source directory already configured; run "make distclean" there first" 1>&2; exit 1; } +fi +cat >>confdefs.h <<EOF +#define PACKAGE "$PACKAGE" +EOF + +cat >>confdefs.h <<EOF +#define VERSION "$VERSION" +EOF + +ACLOCAL=${ACLOCAL-"${am_missing_run}aclocal"} + +AUTOCONF=${AUTOCONF-"${am_missing_run}autoconf"} + +AUTOMAKE=${AUTOMAKE-"${am_missing_run}automake"} + +AUTOHEADER=${AUTOHEADER-"${am_missing_run}autoheader"} + +MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"} + +AMTAR=${AMTAR-"${am_missing_run}tar"} + +if test -z "$install_sh"; then + install_sh="$ac_aux_dir/install-sh" + test -f "$install_sh" || install_sh="$ac_aux_dir/install.sh" + test -f "$install_sh" || install_sh="${am_missing_run}${ac_auxdir}/install-sh" + install_sh="`echo $install_sh | sed -e 's/\${SHELL}//'`" +fi + +echo $ECHO_N "checking whether to enable maintainer-specific portions of Makefiles... $ECHO_C" 1>&6 +echo "configure:1029: checking whether to enable maintainer-specific portions of Makefiles" 1>&5 + # Check whether --enable-maintainer-mode or --disable-maintainer-mode was given. +if test "${enable_maintainer_mode+set}" = set; then + enableval="$enable_maintainer_mode" + USE_MAINTAINER_MODE=$enableval +else + USE_MAINTAINER_MODE=no +fi + echo "$ECHO_T""$USE_MAINTAINER_MODE" 1>&6 + +if test $USE_MAINTAINER_MODE = yes; then + MAINTAINER_MODE_TRUE= + MAINTAINER_MODE_FALSE='#' +else + MAINTAINER_MODE_TRUE='#' + MAINTAINER_MODE_FALSE= +fi + MAINT=$MAINTAINER_MODE_TRUE + +gmp_configm4="config.m4" +gmp_tmpconfigm4=cnfm4.tmp +gmp_tmpconfigm4i=cnfm4i.tmp +gmp_tmpconfigm4p=cnfm4p.tmp +test -f $gmp_tmpconfigm4 && rm $gmp_tmpconfigm4 +test -f $gmp_tmpconfigm4i && rm $gmp_tmpconfigm4i +test -f $gmp_tmpconfigm4p && rm $gmp_tmpconfigm4p + +# Check whether --enable-assert or --disable-assert was given. +if test "${enable_assert+set}" = set; then + enableval="$enable_assert" + case "${enableval}" in +yes|no) ;; +*) { echo "configure: error: bad value ${enableval} for --enable-assert, need yes or no" 1>&2; exit 1; } ;; +esac +else + enable_assert=no +fi + +if test "$enable_assert" = "yes"; then + cat >>confdefs.h <<\EOF +#define WANT_ASSERT 1 +EOF + +fi + +# Check whether --enable-alloca or --disable-alloca was given. +if test "${enable_alloca+set}" = set; then + enableval="$enable_alloca" + case "${enableval}" in +yes|no) ;; +*) { echo "configure: error: bad value ${enableval} for --enable-alloca, need yes or no" 1>&2; exit 1; } ;; +esac +else + enable_alloca=yes +fi + +if test "$enable_alloca" = "no"; then + cat >>confdefs.h <<\EOF +#define USE_STACK_ALLOC 1 +EOF + +fi + +# Check whether --enable-fft or --disable-fft was given. +if test "${enable_fft+set}" = set; then + enableval="$enable_fft" + case "${enableval}" in +yes|no) ;; +*) { echo "configure: error: bad value ${enableval} for --enable-fft, need yes or no" 1>&2; exit 1; } ;; +esac +else + enable_fft=no +fi + +if test "$enable_fft" = "yes"; then + cat >>confdefs.h <<\EOF +#define WANT_FFT 1 +EOF + +fi + +# Check whether --enable-mpbsd or --disable-mpbsd was given. +if test "${enable_mpbsd+set}" = set; then + enableval="$enable_mpbsd" + case "${enableval}" in +yes|no) ;; +*) { echo "configure: error: bad value ${enableval} for --enable-mpbsd, need yes or no" 1>&2; exit 1; } ;; +esac +else + enable_mpbsd=no +fi + +if test "$enable_mpbsd" = "yes"; then + WANT_MPBSD_TRUE= + WANT_MPBSD_FALSE='#' +else + WANT_MPBSD_TRUE='#' + WANT_MPBSD_FALSE= +fi + +# Check whether --enable-mpfr or --disable-mpfr was given. +if test "${enable_mpfr+set}" = set; then + enableval="$enable_mpfr" + case "${enableval}" in +yes|no) ;; +*) { echo "configure: error: bad value ${enableval} for --enable-mpfr, need yes or no" 1>&2; exit 1; } ;; +esac +else + enable_mpfr=no +fi + +if test "$enable_mpfr" = "yes"; then + WANT_MPFR_TRUE= + WANT_MPFR_FALSE='#' +else + WANT_MPFR_TRUE='#' + WANT_MPFR_FALSE= +fi + +os_64bit="no" +cclist="gcc cc" # FIXME: Prefer c89 to cc. +gmp_cflags_gcc="-g -O2" +gmp_cflags64_gcc="-g -O2" +gmp_cflags_cc="-g" +gmp_cflags64_cc="-g" + +case "$target" in + # Alpha + alpha*-cray-unicos*) + # Don't perform any assembly syntax tests on this beast. + gmp_no_asm_syntax_testing=yes + cclist=cc + gmp_cflags_cc="$gmp_cflags_cc -O" + ;; + alpha*-*-osf*) + flavour=`echo $target_cpu | sed 's/^alpha//g'` + if test -n "$flavour"; then + case $flavour in # compilers don't seem to understand `ev67' and such. + ev6? | ev7*) flavour=ev6;; + esac + gmp_optcflags_gcc="-mcpu=$flavour" + # FIXME: We shouldn't fail fatally if none of these work, but that's + # how xoptcflags work and we don't have any other mechanism right now. + # Why do we need this here and not for alpha*-*-* below? + gmp_xoptcflags_gcc="-Wa,-arch,${flavour} -Wa,-m${flavour}" + gmp_optcflags_cc="-arch $flavour -tune $flavour" + fi + ;; + alpha*-*-*) + cclist="gcc" + flavour=`echo $target_cpu | sed 's/^alpha//g'` + if test -n "$flavour"; then + case $flavour in + ev6? | ev7*) flavour=ev6;; + esac + gmp_optcflags_gcc="-mcpu=$flavour" + fi + ;; + # Cray vector machines. This must come after alpha* so that we can + # recognize present and future vector processors with a wildcard. + *-cray-unicos*) + # Don't perform any assembly syntax tests on this beast. + gmp_no_asm_syntax_testing=yes + cclist=cc + # Don't inherit default gmp_cflags_cc value; it comes with -g which + # disables all optimization on Cray vector systems + gmp_cflags_cc="-O" + ;; + + # AMD and Intel x86 configurations + i?86*-*-* | k[5-8]*-*-* | pentium*-*-* | athlon-*-*) + # Rumour has it -O2 used to give worse register allocation than just -O. + gmp_cflags_gcc="-g -O -fomit-frame-pointer" + + case "${target}" in + i386*-*-*) gmp_optcflags_gcc="-mcpu=i386 -march=i386";; + i486*-*-*) gmp_optcflags_gcc="-mcpu=i486 -march=i486";; + i586*-*-* | pentium-*-* | pentiummmx-*-*) + gmp_optcflags_gcc="-mcpu=pentium -march=pentium";; + + # -march=pentiumpro not used because mpz/powm.c (swox cvs rev 1.4) + # tickles a bug in gcc 2.95.2 (believed fixed in 2.96). + i686*-*-* | pentiumpro-*-* | pentium[23]-*-*) + gmp_optcflags_gcc="-mcpu=pentiumpro";; + + k6*-*-*) gmp_optcflags_gcc="-mcpu=k6 -march=k6";; + + # Athlon instruction costs are close to p6: 3 cycle load latency, 4-6 + # cycle mul, 40 cycle div, pairable adc, ... + # FIXME: Change this when gcc gets something specific for Athlon. + # -march=pentiumpro not used, per i686 above. + athlon-*-*) gmp_optcflags_gcc="-mcpu=pentiumpro";; + esac + ;; + + # Sparc + ultrasparc*-*-solaris2.[7-9] | sparcv9-*-solaris2.[7-9]) + os_64bit=yes + gmp_cflags_gcc="$gmp_cflags_gcc -Wa,-xarch=v8plus" + gmp_xoptcflags_gcc="-mcpu=v9 -mcpu=v8 -mv8" + gmp_cflags64_gcc="$gmp_cflags64_gcc -m64 -mptr64 -Wa,-xarch=v9 -mcpu=v9" + gmp_cflags_cc="-xtarget=native -xarch=v8 -xO4" + gmp_cflags64_cc="-xtarget=native -xarch=v9 -xO4" + ;; + sparc64-*-linux*) + # Need to think more about the options passed here. This isn't good for + # some sparc64 linux distros, since we end up not optimizing when all the + # options below fail. + os_64bit=yes + gmp_cflags64_gcc="$gmp_cflags64_gcc -m64 -mptr64 -Wa,-xarch=v9 -mcpu=v9" + gmp_cflags_gcc="$gmp_cflags_gcc -m32" + gmp_xoptflags_gcc="-mcpu=ultrasparc -mvis" + ;; + ultrasparc*-*-* | sparcv9-*-*) + gmp_cflags_gcc="$gmp_cflags_gcc -Wa,-xarch=v8plus" + gmp_xoptcflags_gcc="-mcpu=v9 -mcpu=v8 -mv8" + gmp_cflags_cc="-xtarget=native -xarch=v8 -xO4" + ;; + sparcv8*-*-solaris2.* | microsparc*-*-solaris2.*) + gmp_cflags_gcc="$gmp_cflags_gcc" + gmp_xoptcflags_gcc="-mcpu=v8 -mv8" + gmp_cflags_cc="-xtarget=native -xarch=v8 -xO4" + ;; + sparcv8*-*-* | microsparc*-*-*) # SunOS, Linux, *BSD + cclist="gcc acc cc" + gmp_cflags_gcc="$gmp_cflags_gcc" + gmp_xoptcflags_gcc="-mcpu=v8 -mv8" + gmp_cflags_acc="-g -O2 -cg92" + gmp_cflags_cc="-O2" # FIXME: Flag for v8? + ;; + supersparc*-*-solaris2.*) + gmp_cflags_gcc="$gmp_cflags_gcc -DSUPERSPARC" + gmp_xoptcflags_gcc="-mcpu=v8 -mv8" + gmp_cflags_cc="-xtarget=native -xarch=v8 -xO4 -DSUPERSPARC" + ;; + supersparc*-*-*) # SunOS, Linux, *BSD + cclist="gcc acc cc" + gmp_cflags_gcc="$gmp_cflags_gcc -DSUPERSPARC" + gmp_xoptcflags_gcc="-mcpu=v8 -mv8" + gmp_cflags_acc="-g -O2 -cg92 -DSUPERSPARC" + gmp_cflags_cc="-O2 -DSUPERSPARC" # FIXME: Flag for v8? + ;; + *sparc*-*-*) + cclist="gcc acc cc" + gmp_cflags_acc="-g -O2" + gmp_cflags_cc="-g -O2" + ;; + + # POWER/PowerPC + powerpc64-*-aix*) + cclist="gcc xlc" + gmp_cflags_gcc="$gmp_cflags_gcc -maix64 -mpowerpc64" + gmp_cflags_xlc="-g -O2 -q64 -qtune=pwr3" + ;; + powerpc*-*-aix*) + cclist="gcc xlc" + gmp_cflags_gcc="$gmp_cflags_gcc -mpowerpc" + gmp_cflags_xlc="$gmp_cflags_cc -qarch=ppc -O2" + ;; + power-*-aix*) + cclist="gcc xlc" + gmp_cflags_gcc="$gmp_cflags_gcc -mpower" + gmp_cflags_xlc="$gmp_cflags_cc -qarch=pwr -O2" + ;; + powerpc64*-*-*) + gmp_cflags_gcc="$gmp_cflags_gcc -mpowerpc64" + cat >>confdefs.h <<\EOF +#define _LONG_LONG_LIMB 1 +EOF + ;; + powerpc-apple-darwin* | powerpc-apple-macosx*) + gmp_cflags_gcc="$gmp_cflags_gcc -mpowerpc -traditional-cpp" + ;; + powerpc*-*-*) + gmp_cflags_gcc="$gmp_cflags_gcc -mpowerpc" + ;; + + # MIPS + mips-sgi-irix6.*) + os_64bit=yes + gmp_cflags64_gcc="-g -O2 -mabi=n32" + gmp_cflags64_cc="$gmp_cflags64_cc -O2 -n32" + ;; + + # Motorola 68k family + m88110*-*-*) + gmp_cflags_gcc="-g -O -m88110" ;; + m68*-*-*) + gmp_cflags_gcc="$gmp_cflags_gcc -fomit-frame-pointer" + ;; + + # HP + hppa1.0*-*-*) + cclist="gcc c89 cc" + gmp_cflags_c89="$gmp_cflags_cc +O2" + gmp_cflags_cc="$gmp_cflags_cc +O2" + ;; + hppa2.0w*-*-*) + cclist="c89 cc" + gmp_cflags_c89="+DD64 +O3" + gmp_cflags_cc="+DD64 +O3" + ;; + hppa2.0*-*-*) + os_64bit=yes + cclist="gcc c89 cc" + gmp_cflags64_gcc="$gmp_cflags64_gcc -mWHAT -D_LONG_LONG_LIMB" + # +O2 to cc triggers bug in mpz/powm.c (1.4) + gmp_cflags64_c89="+DA2.0 +e +O3 -D_LONG_LONG_LIMB" + gmp_cflags64_cc="+DA2.0 +e +O3 -D_LONG_LONG_LIMB" + gmp_cflags_c89="$gmp_cflags_cc +O2" + gmp_cflags_cc="$gmp_cflags_cc +O2" + ;; + + # VAX + vax*-*-*) + gmp_cflags_gcc="$gmp_cflags_gcc -fomit-frame-pointer" + ;; + + # Fujitsu + f30[01]-fujitsu-sysv*) + cclist="gcc vcc" + gmp_cflags_vcc="-g" # FIXME: flags for vcc? + ;; +esac + +case "${target}" in + *-*-mingw32) gmp_cflags_gcc="$gmp_cflags_gcc -mno-cygwin";; +esac + +echo $ECHO_N "checking for BSD-compatible nm... $ECHO_C" 1>&6 +echo "configure:1352: checking for BSD-compatible nm" 1>&5 +if test "${ac_cv_path_NM+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + if test -n "$NM"; then + # Let the user override the test. + ac_cv_path_NM="$NM" +else + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}${PATH_SEPARATOR-:}" + for ac_dir in $PATH /usr/ccs/bin /usr/ucb /bin; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/nm || test -f $ac_dir/nm$ac_exeext ; then + # Check to see if the nm accepts a BSD-compat flag. + # Adding the `sed 1q' prevents false positives on HP-UX, which says: + # nm: unknown option "B" ignored + if ($ac_dir/nm -B /dev/null 2>&1 | sed '1q'; exit 0) | egrep /dev/null >/dev/null; then + ac_cv_path_NM="$ac_dir/nm -B" + break + elif ($ac_dir/nm -p /dev/null 2>&1 | sed '1q'; exit 0) | egrep /dev/null >/dev/null; then + ac_cv_path_NM="$ac_dir/nm -p" + break + else + ac_cv_path_NM=${ac_cv_path_NM="$ac_dir/nm"} # keep the first match, but + continue # so that we can try to find one that supports BSD flags + fi + fi + done + IFS="$ac_save_ifs" + test -z "$ac_cv_path_NM" && ac_cv_path_NM=nm +fi +fi + +NM="$ac_cv_path_NM" +echo "$ECHO_T""$NM" 1>&6 + # nm on 64-bit AIX needs to know the object file format +case "$target" in + powerpc64*-*-aix*) + NM="$NM -X 64" + ;; +esac + +# Save CFLAGS given on command line. +gmp_user_CFLAGS="$CFLAGS" + +if test -z "$CC"; then + # Find compiler. + +if test $host != $build; then + ac_tool_prefix=${host_alias}- +else + ac_tool_prefix= +fi + +gmp_cc_list="$cclist" +gmp_req_64bit_cc="$os_64bit" + +CC32= +CC64= +for c in $gmp_cc_list; do + # Avoid cache hits. + unset CC + unset ac_cv_prog_CC + +# Extract the first word of "${ac_tool_prefix}$c", so it can be a program name with args. +set dummy ${ac_tool_prefix}$c; ac_word=$2 +echo $ECHO_N "checking for $ac_word... $ECHO_C" 1>&6 +echo "configure:1418: checking for $ac_word" 1>&5 +if test "${ac_cv_prog_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else + for ac_path in `IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" +ac_dummy="$PATH" +for ac_dir in $ac_dummy; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/$ac_word; then + echo "$ac_dir/$ac_word" + fi +done +IFS="$ac_save_ifs" +`; do + ac_cv_prog_CC="${ac_tool_prefix}$c" + break + done +fi +fi +CC="$ac_cv_prog_CC" +if test -n "$CC"; then + echo "$ECHO_T""$CC" 1>&6 +else + echo "$ECHO_T""no" 1>&6 +fi + +if test -z "$ac_cv_prog_CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "$c", so it can be a program name with args. +set dummy $c; ac_word=$2 +echo $ECHO_N "checking for $ac_word... $ECHO_C" 1>&6 +echo "configure:1452: checking for $ac_word" 1>&5 +if test "${ac_cv_prog_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else + for ac_path in `IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" +ac_dummy="$PATH" +for ac_dir in $ac_dummy; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/$ac_word; then + echo "$ac_dir/$ac_word" + fi +done +IFS="$ac_save_ifs" +`; do + ac_cv_prog_CC="$c" + break + done + test -z "$ac_cv_prog_CC" && ac_cv_prog_CC="$c" +fi +fi +CC="$ac_cv_prog_CC" +if test -n "$CC"; then + echo "$ECHO_T""$CC" 1>&6 +else + echo "$ECHO_T""no" 1>&6 +fi + + else + CC="$c" + fi +fi + + if test -n "$CC"; then + eval c_flags=\$gmp_cflags_$c + ac_ext=c +# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. +ac_cpp='$CPP $CPPFLAGS' +ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' +ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' +cross_compiling=$ac_cv_prog_cc_cross + CC="$CC" +CFLAGS="$c_flags" +echo $ECHO_N "checking if the C compiler ($CC) works with flags $CFLAGS... $ECHO_C" 1>&6 +echo "configure:1498: checking if the C compiler ($CC) works with flags $CFLAGS" 1>&5 + +# Simple test for all targets. +cat >conftest.$ac_ext <<EOF + +#line 1503 "configure" +#include "confdefs.h" + +int main(){return(0);} +EOF +if { (eval echo configure:1508: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then + tmp_works=yes + # If we can't run a trivial program, we are probably using a cross compiler. + if (./conftest; exit) 2>/dev/null; then + tmp_cross=no + else + tmp_cross=yes + fi +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + tmp_works=no +fi +rm -fr conftest* + +# Target specific tests. +if test "$tmp_works" = "yes"; then + case "$target" in + *-*-aix*) # Returning a funcptr. + cat >conftest.$ac_ext <<EOF +#line 1528 "configure" +#include "confdefs.h" + +int +main () +{ +} void *g(); void *f() { return g(); } int bar(){ + ; + return 0; +} +EOF +if { (eval echo configure:1539: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then + rm -rf conftest* + tmp_works=yes +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + tmp_works=no +fi +rm -f conftest* + ;; + esac +fi + +if test "$tmp_works" = "yes"; then + gmp_prog_cc_works=yes +else + gmp_prog_cc_works=no +fi + +echo "$ECHO_T""$tmp_works" 1>&6 + + if test "$gmp_prog_cc_works" != "yes"; then + continue + fi + + # Save first working compiler, whether 32- or 64-bit capable. + if test -z "$CC32"; then + CC32="$CC" + fi + if test "$gmp_req_64bit_cc" = "yes"; then + eval c_flags=\$gmp_cflags64_$c + + # Verify that the compiler works in 64-bit mode as well. + # /usr/ucb/cc on Solaris 7 can *compile* in 64-bit mode, but not link. + ac_ext=c +# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. +ac_cpp='$CPP $CPPFLAGS' +ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' +ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' +cross_compiling=$ac_cv_prog_cc_cross + CC="$c" +CFLAGS="$c_flags" +echo $ECHO_N "checking if the C compiler ($CC) works with flags $CFLAGS... $ECHO_C" 1>&6 +echo "configure:1583: checking if the C compiler ($CC) works with flags $CFLAGS" 1>&5 + +# Simple test for all targets. +cat >conftest.$ac_ext <<EOF + +#line 1588 "configure" +#include "confdefs.h" + +int main(){return(0);} +EOF +if { (eval echo configure:1593: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then + tmp_works=yes + # If we can't run a trivial program, we are probably using a cross compiler. + if (./conftest; exit) 2>/dev/null; then + tmp_cross=no + else + tmp_cross=yes + fi +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + tmp_works=no +fi +rm -fr conftest* + +# Target specific tests. +if test "$tmp_works" = "yes"; then + case "$target" in + *-*-aix*) # Returning a funcptr. + cat >conftest.$ac_ext <<EOF +#line 1613 "configure" +#include "confdefs.h" + +int +main () +{ +} void *g(); void *f() { return g(); } int bar(){ + ; + return 0; +} +EOF +if { (eval echo configure:1624: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then + rm -rf conftest* + tmp_works=yes +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + tmp_works=no +fi +rm -f conftest* + ;; + esac +fi + +if test "$tmp_works" = "yes"; then + gmp_prog_cc_works=yes +else + gmp_prog_cc_works=no +fi + +echo "$ECHO_T""$tmp_works" 1>&6 + + if test "$gmp_prog_cc_works" = "yes"; then + + gmp_tmp_CC_save="$CC" + CC="$c" + echo $ECHO_N "checking whether the C compiler ($CC) is 64-bit capable... $ECHO_C" 1>&6 +echo "configure:1651: checking whether the C compiler ($CC) is 64-bit capable" 1>&5 + if test -z "$NM"; then + echo; echo "configure: GMP_CHECK_CC_64BIT: fatal: need nm" + exit 1 + fi + gmp_tmp_CFLAGS_save="$CFLAGS" + CFLAGS="$c_flags" + + case "$target" in + hppa2.0*-*-*) + # FIXME: If gcc is installed under another name than "gcc", we will + # test the wrong thing. + if test "$CC" != "gcc"; then + echo >conftest.c + gmp_tmp_vs=`$CC $CFLAGS -V -c -o conftest.o conftest.c 2>&1 | grep "^ccom:"` + rm conftest* + gmp_tmp_v1=`echo $gmp_tmp_vs | sed 's/.* .\.\(.*\)\..*\..* HP C.*/\1/'` + gmp_tmp_v2=`echo $gmp_tmp_vs | sed 's/.* .\..*\.\(.*\)\..* HP C.*/\1/'` + gmp_tmp_v3=`echo $gmp_tmp_vs | sed 's/.* .\..*\..*\.\(.*\) HP C.*/\1/'` + gmp_cv_cc_64bit=no + test -n "$gmp_tmp_v1" && test "$gmp_tmp_v1" -ge "10" \ + && test -n "$gmp_tmp_v2" && test "$gmp_tmp_v2" -ge "32" \ + && test -n "$gmp_tmp_v3" && test "$gmp_tmp_v3" -ge "30" \ + && gmp_cv_cc_64bit=yes + else # gcc + # FIXME: Compile a minimal file and determine if the resulting object + # file is an ELF file. If so, gcc can produce 64-bit code. + # Do we have file(1) for target? + gmp_cv_cc_64bit=no + fi + ;; + mips-sgi-irix6.*) + # We use `-n32' to cc and `-mabi=n32' to gcc, resulting in 64-bit + # arithmetic but not 64-bit pointers, so the general test for sizeof + # (void *) is not valid. + # Simply try to compile an empty main. If that succeeds return + # true. + cat >conftest.$ac_ext <<EOF +#line 1689 "configure" +#include "confdefs.h" + +int +main () +{ + + ; + return 0; +} +EOF +if { (eval echo configure:1700: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then + rm -rf conftest* + gmp_cv_cc_64bit=yes +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + gmp_cv_cc_64bit=no +fi +rm -f conftest* + ;; + *-*-*) + # Allocate an array of size sizeof (void *) and use nm to determine its + # size. We depend on the first declared variable being put at address 0. + cat >conftest.c <<EOF +char arr[sizeof (void *)]={0}; +char post=0; +EOF + gmp_compile="$CC $CFLAGS -c conftest.c 1>&5" + if { (eval echo configure:1719: \"$gmp_compile\") 1>&5; (eval $gmp_compile) 2>&5; }; then + gmp_tmp_val=`$NM conftest.o | grep post | sed -e 's;[[][0-9][]]\(.*\);\1;' \ + -e 's;[^1-9]*\([0-9]*\).*;\1;'` + if test "$gmp_tmp_val" = "8"; then + gmp_cv_cc_64bit=yes + else + gmp_cv_cc_64bit=no + fi + else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + gmp_cv_cc_64bit=no + fi + rm -f conftest* + ;; + esac + + CC="$gmp_tmp_CC_save" + CFLAGS="$gmp_tmp_CFLAGS_save" + echo "$ECHO_T""$gmp_cv_cc_64bit" 1>&6 + + if test "$gmp_cv_cc_64bit" = "yes"; then + test -z "$CC64" && CC64="$c" + test -z "$CFLAGS64" && CFLAGS64="$c_flags" + # We have CC64 so we're done. + break + fi + fi + else + # We have CC32, and we don't need a 64-bit compiler so we're done. + break + fi + fi +done +CC="$CC32" + + # If 64-bit OS and we have a 64-bit compiler, use it. + if test -n "$os_64bit" && test -n "$CC64"; then + CC=$CC64 + CFLAGS=$CFLAGS64 + else + eval CFLAGS=\$gmp_cflags_$CC + fi + + # Try compiler flags that may work with only some compiler versions. + # gmp_optcflags: All or nothing. + eval optcflags=\$gmp_optcflags_$CC + if test -n "$optcflags"; then + CFLAGS_save="$CFLAGS" + CFLAGS="$CFLAGS $optcflags" + echo $ECHO_N "checking whether $CC accepts $optcflags... $ECHO_C" 1>&6 +echo "configure:1770: checking whether $CC accepts $optcflags" 1>&5 + ac_ext=c +# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. +ac_cpp='$CPP $CPPFLAGS' +ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' +ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' +cross_compiling=$ac_cv_prog_cc_cross + + cat >conftest.$ac_ext <<EOF + +#line 1780 "configure" +#include "confdefs.h" + +int main(){return(0);} +EOF +if { (eval echo configure:1785: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then + optok=yes + # If we can't run a trivial program, we are probably using a cross compiler. + if (./conftest; exit) 2>/dev/null; then + cross=no + else + cross=yes + fi +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + optok=no +fi +rm -fr conftest* + if test "$optok" = "yes"; then + echo "$ECHO_T""yes" 1>&6 + else + echo "$ECHO_T""no" 1>&6 + CFLAGS="$CFLAGS_save" + fi + fi + # gmp_xoptcflags: First is best, one has to work. + eval xoptcflags=\$gmp_xoptcflags_$CC + if test -n "$xoptcflags"; then + gmp_found="no" + for xopt in $xoptcflags; do + CFLAGS_save="$CFLAGS" + CFLAGS="$CFLAGS $xopt" + echo $ECHO_N "checking whether $CC accepts $xopt... $ECHO_C" 1>&6 +echo "configure:1814: checking whether $CC accepts $xopt" 1>&5 + ac_ext=c +# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. +ac_cpp='$CPP $CPPFLAGS' +ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' +ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' +cross_compiling=$ac_cv_prog_cc_cross + + cat >conftest.$ac_ext <<EOF + +#line 1824 "configure" +#include "confdefs.h" + +int main(){return(0);} +EOF +if { (eval echo configure:1829: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then + optok=yes + # If we can't run a trivial program, we are probably using a cross compiler. + if (./conftest; exit) 2>/dev/null; then + cross=no + else + cross=yes + fi +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + optok=no +fi +rm -fr conftest* + if test "$optok" = "yes"; then + echo "$ECHO_T""yes" 1>&6 + gmp_found="yes" + break + else + echo "$ECHO_T""no" 1>&6 + CFLAGS="$CFLAGS_save" + fi + done + if test "$gmp_found" = "no"; then + echo "$0: fatal: need a compiler that understands one of $xoptcflags" + exit 1 + fi + fi +fi + +# Restore CFLAGS given on command line. +# FIXME: We've run through quite some unnecessary code looking for a +# nice compiler and working flags for it, just to spoil that with user +# supplied flags. +test -n "$gmp_user_CFLAGS" && CFLAGS="$gmp_user_CFLAGS" + +# Select chosen compiler. + +echo $ECHO_N "checking whether the C compiler ($CC $CFLAGS $CPPFLAGS $LDFLAGS) works... $ECHO_C" 1>&6 +echo "configure:1868: checking whether the C compiler ($CC $CFLAGS $CPPFLAGS $LDFLAGS) works" 1>&5 + +ac_ext=c +# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. +ac_cpp='$CPP $CPPFLAGS' +ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' +ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' +cross_compiling=$ac_cv_prog_cc_cross + +cat >conftest.$ac_ext <<EOF + +#line 1879 "configure" +#include "confdefs.h" + +int main(){return(0);} +EOF +if { (eval echo configure:1884: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then + ac_cv_prog_cc_works=yes + # If we can't run a trivial program, we are probably using a cross compiler. + if (./conftest; exit) 2>/dev/null; then + ac_cv_prog_cc_cross=no + else + ac_cv_prog_cc_cross=yes + fi +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + ac_cv_prog_cc_works=no +fi +rm -fr conftest* +ac_ext=c +# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. +ac_cpp='$CPP $CPPFLAGS' +ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' +ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' +cross_compiling=$ac_cv_prog_cc_cross + +echo "$ECHO_T""$ac_cv_prog_cc_works" 1>&6 +if test $ac_cv_prog_cc_works = no; then + { echo "configure: error: installation or configuration problem: C compiler cannot create executables." 1>&2; exit 77; } +fi +echo $ECHO_N "checking whether the C compiler ($CC $CFLAGS $CPPFLAGS $LDFLAGS) is a cross-compiler... $ECHO_C" 1>&6 +echo "configure:1910: checking whether the C compiler ($CC $CFLAGS $CPPFLAGS $LDFLAGS) is a cross-compiler" 1>&5 +echo "$ECHO_T""$ac_cv_prog_cc_cross" 1>&6 +cross_compiling=$ac_cv_prog_cc_cross + +echo $ECHO_N "checking whether we are using GNU C... $ECHO_C" 1>&6 +echo "configure:1915: checking whether we are using GNU C" 1>&5 +if test "${ac_cv_prog_gcc+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + cat >conftest.c <<EOF +#ifdef __GNUC__ + yes; +#endif +EOF +if { ac_try='${CC-cc} -E conftest.c'; { (eval echo configure:1924: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }; } | egrep yes >/dev/null 2>&1; then + ac_cv_prog_gcc=yes +else + ac_cv_prog_gcc=no +fi +fi +echo "$ECHO_T""$ac_cv_prog_gcc" 1>&6 + +if test "$ac_cv_prog_gcc" = "yes"; then + GCC=yes +else + GCC= +fi + +# Set CFLAGS if not already set. +if test -z "$CFLAGS"; then + CFLAGS="-g" + if test "$GCC" = "yes"; then + CFLAGS="$CFLAGS -O2" + fi +fi + +if test "${ac_cv_prog_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + ac_cv_prog_CC="$CC" +fi + +# How to assemble. +CCAS="$CC -c" + +echo $ECHO_N "checking how to run the C preprocessor... $ECHO_C" 1>&6 +echo "configure:1956: checking how to run the C preprocessor" 1>&5 +# On Suns, sometimes $CPP names a directory. +if test -n "$CPP" && test -d "$CPP"; then + CPP= +fi +if test -z "$CPP"; then +if test "${ac_cv_prog_CPP+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + # This must be in double quotes, not single quotes, because CPP may get + # substituted into the Makefile and "${CC-cc}" will confuse make. + CPP="${CC-cc} -E" + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. + +cat >conftest.$ac_ext <<EOF +#line 1972 "configure" +#include "confdefs.h" +#include <assert.h> +Syntax Error +EOF +ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" +{ (eval echo configure:1978: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` +if test -z "$ac_err"; then + : +else + echo "$ac_err" >&5 + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + CPP="${CC-cc} -E -traditional-cpp" + +cat >conftest.$ac_ext <<EOF +#line 1990 "configure" +#include "confdefs.h" +#include <assert.h> +Syntax Error +EOF +ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" +{ (eval echo configure:1996: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` +if test -z "$ac_err"; then + : +else + echo "$ac_err" >&5 + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + CPP="${CC-cc} -nologo -E" + +cat >conftest.$ac_ext <<EOF +#line 2008 "configure" +#include "confdefs.h" +#include <assert.h> +Syntax Error +EOF +ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" +{ (eval echo configure:2014: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` +if test -z "$ac_err"; then + : +else + echo "$ac_err" >&5 + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + CPP=/lib/cpp +fi +rm -f conftest* +fi +rm -f conftest* +fi +rm -f conftest* + ac_cv_prog_CPP="$CPP" +fi + CPP="$ac_cv_prog_CPP" +else + ac_cv_prog_CPP="$CPP" +fi +echo "$ECHO_T""$CPP" 1>&6 + +# Find a good install program. We prefer a C program (faster), +# so one script is as good as another. But avoid the broken or +# incompatible versions: +# SysV /etc/install, /usr/sbin/install +# SunOS /usr/etc/install +# IRIX /sbin/install +# AIX /bin/install +# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag +# AFS /usr/afsws/bin/install, which mishandles nonexistent args +# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" +# ./install, which can be erroneously created by make from ./install.sh. +echo $ECHO_N "checking for a BSD compatible install... $ECHO_C" 1>&6 +echo "configure:2050: checking for a BSD compatible install" 1>&5 +if test -z "$INSTALL"; then +if test "${ac_cv_path_install+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + IFS="${IFS= }"; ac_save_IFS="$IFS"; IFS=":" + for ac_dir in $PATH; do + # Account for people who put trailing slashes in PATH elements. + case "$ac_dir/" in + /|./|.//|/etc/*|/usr/sbin/*|/usr/etc/*|/sbin/*|/usr/afsws/bin/*|/usr/ucb/*) ;; + *) + # OSF1 and SCO ODT 3.0 have their own names for install. + # Don't use installbsd from OSF since it installs stuff as root + # by default. + for ac_prog in ginstall scoinst install; do + if test -f $ac_dir/$ac_prog; then + if test $ac_prog = install && + grep dspmsg $ac_dir/$ac_prog >/dev/null 2>&1; then + # AIX install. It has an incompatible calling convention. + : + elif test $ac_prog = install && + grep pwplus $ac_dir/$ac_prog >/dev/null 2>&1; then + # program-specific install script used by HP pwplus--don't use. + : + else + ac_cv_path_install="$ac_dir/$ac_prog -c" + break 2 + fi + fi + done + ;; + esac + done + IFS="$ac_save_IFS" + +fi + if test "${ac_cv_path_install+set}" = set; then + INSTALL="$ac_cv_path_install" + else + # As a last resort, use the slow shell script. We don't cache a + # path for INSTALL within a source directory, because that will + # break other packages using the cache if that directory is + # removed, or if the path is relative. + INSTALL="$ac_install_sh" + fi +fi +echo "$ECHO_T""$INSTALL" 1>&6 + +# Use test -z because SunOS4 sh mishandles braces in ${var-val}. +# It thinks the first close brace ends the variable substitution. +test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' + +test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}' + +test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' + +echo $ECHO_N "checking whether ln -s works... $ECHO_C" 1>&6 +echo "configure:2107: checking whether ln -s works" 1>&5 +if test "${ac_cv_prog_LN_S+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + rm -f conftestdata +if ln -s X conftestdata 2>/dev/null +then + rm -f conftestdata + ac_cv_prog_LN_S="ln -s" +else + ac_cv_prog_LN_S=ln +fi +fi +LN_S="$ac_cv_prog_LN_S" +if test "$ac_cv_prog_LN_S" = "ln -s"; then + echo "$ECHO_T""yes" 1>&6 +else + echo "$ECHO_T""no" 1>&6 +fi + +echo $ECHO_N "checking for suitable m4... $ECHO_C" 1>&6 +echo "configure:2128: checking for suitable m4" 1>&5 +if test "${gmp_cv_prog_m4+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + if test -n "$M4"; then + gmp_cv_prog_m4="$M4" +else + cat >conftest.m4 <<\EOF +define(dollarhash,``$#'')dnl +ifelse(dollarhash(x),1,`define(t1,Y)', +``bad: $# not supported (SunOS /usr/bin/m4) +'')dnl +ifelse(eval(89),89,`define(t2,Y)', +`bad: eval() doesnt support 8 or 9 in a constant (OpenBSD 2.6 m4) +')dnl +ifelse(t1`'t2,YY,`good +')dnl +EOF + echo "trying m4" 1>&5 + gmp_tmp_val="`(m4 conftest.m4) 2>&5`" + echo "$gmp_tmp_val" 1>&5 + if test "$gmp_tmp_val" = good; then + gmp_cv_prog_m4="m4" + else + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" + ac_dummy="$PATH:/usr/5bin" + for ac_dir in $ac_dummy; do + test -z "$ac_dir" && ac_dir=. + echo "trying $ac_dir/m4" 1>&5 + gmp_tmp_val="`($ac_dir/m4 conftest.m4) 2>&5`" + echo "$gmp_tmp_val" 1>&5 + if test "$gmp_tmp_val" = good; then + gmp_cv_prog_m4="$ac_dir/m4" + break + fi + done + IFS="$ac_save_ifs" + if test -z "$gmp_cv_prog_m4"; then + { echo "configure: error: No usable m4 in \$PATH or /usr/5bin (see config.log for reasons)." 1>&2; exit 1; } + fi + fi + rm -f conftest.m4 +fi +fi +echo "$ECHO_T""$gmp_cv_prog_m4" 1>&6 +M4="$gmp_cv_prog_m4" + +# Extract the first word of "ar", so it can be a program name with args. +set dummy ar; ac_word=$2 +echo $ECHO_N "checking for $ac_word... $ECHO_C" 1>&6 +echo "configure:2178: checking for $ac_word" 1>&5 +if test "${ac_cv_prog_AR+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + if test -n "$AR"; then + ac_cv_prog_AR="$AR" # Let the user override the test. +else + for ac_path in `IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" +ac_dummy="$PATH" +for ac_dir in $ac_dummy; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/$ac_word; then + echo "$ac_dir/$ac_word" + fi +done +IFS="$ac_save_ifs" +`; do + ac_cv_prog_AR="ar" + break + done +fi +fi +AR="$ac_cv_prog_AR" +if test -n "$AR"; then + echo "$ECHO_T""$AR" 1>&6 +else + echo "$ECHO_T""no" 1>&6 +fi + +# ar on AIX needs to know the object file format +case "$target" in + powerpc64*-*-aix*) + AR="$AR -X 64" + ;; +esac + +if test "$gmp_no_asm_syntax_testing" != "yes"; then + echo $ECHO_N "checking how to switch to text section... $ECHO_C" 1>&6 +echo "configure:2216: checking how to switch to text section" 1>&5 +if test "${gmp_cv_check_asm_text+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + case "$target" in + *-*-aix*) + + gmp_cv_check_asm_text=".csect .text[PR]" + + ;; + *-*-hpux*) gmp_cv_check_asm_text=".code" ;; + *) gmp_cv_check_asm_text=".text" ;; +esac + +fi +echo "$ECHO_T""$gmp_cv_check_asm_text" 1>&6 +echo "define(<TEXT>, <$gmp_cv_check_asm_text>)" >> $gmp_tmpconfigm4 + + echo $ECHO_N "checking how to switch to data section... $ECHO_C" 1>&6 +echo "configure:2235: checking how to switch to data section" 1>&5 +if test "${gmp_cv_check_asm_data+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + case "$target" in + *-*-aix*) + + gmp_cv_check_asm_data=".csect .data[RW]" + + ;; + *) gmp_cv_check_asm_data=".data" ;; +esac + +fi +echo "$ECHO_T""$gmp_cv_check_asm_data" 1>&6 +echo "define(<DATA>, <$gmp_cv_check_asm_data>)" >> $gmp_tmpconfigm4 + + echo $ECHO_N "checking how to export a symbol... $ECHO_C" 1>&6 +echo "configure:2253: checking how to export a symbol" 1>&5 +if test "${gmp_cv_check_asm_globl+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + case "$target" in + *-*-hpux*) gmp_cv_check_asm_globl=".export" ;; + *) gmp_cv_check_asm_globl=".globl" ;; +esac + +fi +echo "$ECHO_T""$gmp_cv_check_asm_globl" 1>&6 +echo "define(<GLOBL>, <$gmp_cv_check_asm_globl>)" >> $gmp_tmpconfigm4 + + echo $ECHO_N "checking what assembly label suffix to use... $ECHO_C" 1>&6 +echo "configure:2267: checking what assembly label suffix to use" 1>&5 +if test "${gmp_cv_check_asm_label_suffix+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + case "$target" in + *-*-hpux*) gmp_cv_check_asm_label_suffix="" ;; + *) gmp_cv_check_asm_label_suffix=":" ;; +esac + +fi +echo "$ECHO_T""$gmp_cv_check_asm_label_suffix" 1>&6 +echo "define(<LABEL_SUFFIX>, <\$1$gmp_cv_check_asm_label_suffix>)" >> $gmp_tmpconfigm4 + + echo $ECHO_N "checking how the .type assembly directive should be used... $ECHO_C" 1>&6 +echo "configure:2281: checking how the .type assembly directive should be used" 1>&5 +if test "${gmp_cv_check_asm_type+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + ac_assemble="$CCAS $CFLAGS conftest.s 1>&5" +for gmp_tmp_prefix in @ \# %; do + echo " .type sym,${gmp_tmp_prefix}function" > conftest.s + if { (eval echo configure:2288: \"$ac_assemble\") 1>&5; (eval $ac_assemble) 2>&5; }; then + gmp_cv_check_asm_type=".type \$1,${gmp_tmp_prefix}\$2" + break + fi +done +if test -z "$gmp_cv_check_asm_type"; then + gmp_cv_check_asm_type="dnl" +fi + +fi +echo "$ECHO_T""$gmp_cv_check_asm_type" 1>&6 +echo "define(<TYPE>, <$gmp_cv_check_asm_type>)" >> $gmp_tmpconfigm4 + + echo $ECHO_N "checking if the .size assembly directive works... $ECHO_C" 1>&6 +echo "configure:2302: checking if the .size assembly directive works" 1>&5 +if test "${gmp_cv_check_asm_size+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + ac_assemble="$CCAS $CFLAGS conftest.s 1>&5" +echo ' .size sym,1' > conftest.s +if { (eval echo configure:2308: \"$ac_assemble\") 1>&5; (eval $ac_assemble) 2>&5; }; then + gmp_cv_check_asm_size=".size \$1,\$2" +else + gmp_cv_check_asm_size="dnl" +fi + +fi +echo "$ECHO_T""$gmp_cv_check_asm_size" 1>&6 +echo "define(<SIZE>, <$gmp_cv_check_asm_size>)" >> $gmp_tmpconfigm4 + +echo $ECHO_N "checking what prefix to use for a local label... $ECHO_C" 1>&6 +echo "configure:2319: checking what prefix to use for a local label" 1>&5 +if test "${gmp_cv_check_asm_lsym_prefix+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + if test -z "$NM"; then + echo; echo "GMP_CHECK_ASM_LSYM_PREFIX: fatal: need nm" + exit 1 +fi +ac_assemble="$CCAS $CFLAGS conftest.s 1>&5" +gmp_cv_check_asm_lsym_prefix="L" +for gmp_tmp_pre in L .L $ L$; do + cat > conftest.s <<EOF +dummy${gmp_cv_check_asm_label_suffix} +${gmp_tmp_pre}gurkmacka${gmp_cv_check_asm_label_suffix} + .byte 0 +EOF + if { (eval echo configure:2335: \"$ac_assemble\") 1>&5; (eval $ac_assemble) 2>&5; }; then + $NM conftest.o >/dev/null 2>&1 + gmp_rc=$? + if test "$gmp_rc" != "0"; then + echo "configure: $NM failure, using default" + break + fi + if $NM conftest.o | grep gurkmacka >/dev/null; then true; else + gmp_cv_check_asm_lsym_prefix="$gmp_tmp_pre" + break + fi + else + echo "configure: failed program was:" >&5 + cat conftest.s >&5 + # Use default. + fi +done +rm -f conftest* + +fi +echo "$ECHO_T""$gmp_cv_check_asm_lsym_prefix" 1>&6 +echo "define(<LSYM_PREFIX>, <${gmp_cv_check_asm_lsym_prefix}>)" >> $gmp_tmpconfigm4 + +echo $ECHO_N "checking how to define a 32-bit word... $ECHO_C" 1>&6 +echo "configure:2359: checking how to [define] a 32-bit word" 1>&5 +if test "${gmp_cv_check_asm_w32+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + if test -z "$NM"; then + echo; echo "configure: GMP_CHECK_ASM_W32: fatal: need nm" + exit 1 +fi + +# FIXME: HPUX puts first symbol at 0x40000000, breaking our assumption +# that it's at 0x0. We'll have to declare another symbol before the +# .long/.word and look at the distance between the two symbols. The +# only problem is that the sed expression(s) barfs (on Solaris, for +# example) for the symbol with value 0. For now, HPUX uses .word. + +case "$target" in + *-*-hpux*) + gmp_cv_check_asm_w32=".word" + ;; + *-*-*) + ac_assemble="$CCAS $CFLAGS conftest.s 1>&5" + for gmp_tmp_op in .long .word; do + cat > conftest.s <<EOF + $gmp_cv_check_asm_data + $gmp_cv_check_asm_globl foo + $gmp_tmp_op 0 +foo${gmp_cv_check_asm_label_suffix} + .byte 0 +EOF + if { (eval echo configure:2388: \"$ac_assemble\") 1>&5; (eval $ac_assemble) 2>&5; }; then + + gmp_tmp_val=`$NM conftest.o | grep foo | sed -e 's;[[][0-9][]]\(.*\);\1;' \ + -e 's;[^1-9]*\([0-9]*\).*;\1;'` + if test "$gmp_tmp_val" = "4"; then + gmp_cv_check_asm_w32="$gmp_tmp_op" + break + fi + fi + done + ;; +esac + +if test -z "$gmp_cv_check_asm_w32"; then + echo; echo "configure: GMP_CHECK_ASM_W32: fatal: do not know how to define a 32-bit word" + exit 1 +fi +rm -f conftest* + +fi +echo "$ECHO_T""$gmp_cv_check_asm_w32" 1>&6 +echo "define(<W32>, <$gmp_cv_check_asm_w32>)" >> $gmp_tmpconfigm4 + + echo $ECHO_N "checking if symbols are prefixed by underscore... $ECHO_C" 1>&6 +echo "configure:2412: checking if symbols are prefixed by underscore" 1>&5 +if test "${gmp_cv_check_asm_underscore+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + cat > conftest.$ac_ext <<EOF +#line 2417 "configure" +#include "confdefs.h" +int underscore_test() { +return; } +EOF +if { (eval echo configure:2422: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then + if grep _underscore_test conftest* >/dev/null; then + gmp_cv_check_asm_underscore=yes + else + gmp_cv_check_asm_underscore=no + fi +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 +fi +rm -f conftest* + +fi +echo "$ECHO_T""$gmp_cv_check_asm_underscore" 1>&6 +if test "$gmp_cv_check_asm_underscore" = "yes"; then + +echo 'define(<GSYM_PREFIX>, <_>)' >> $gmp_tmpconfigm4 + + underscore=yes +else + +echo 'define(<GSYM_PREFIX>, <>)' >> $gmp_tmpconfigm4 + + underscore=no +fi + +echo $ECHO_N "checking if .align assembly directive is logarithmic... $ECHO_C" 1>&6 +echo "configure:2449: checking if .align assembly directive is logarithmic" 1>&5 +if test "${gmp_cv_check_asm_align_log+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + if test -z "$NM"; then + echo; echo "configure: GMP_CHECK_ASM_ALIGN_LOG: fatal: need nm" + exit 1 +fi +cat > conftest.s <<EOF + $gmp_cv_check_asm_data + .align 4 + $gmp_cv_check_asm_globl foo + .byte 1 + .align 4 +foo$gmp_cv_check_asm_label_suffix + .byte 2 +EOF +ac_assemble="$CCAS $CFLAGS conftest.s 1>&5" +if { (eval echo configure:2467: \"$ac_assemble\") 1>&5; (eval $ac_assemble) 2>&5; }; then + + gmp_tmp_val=`$NM conftest.o | grep foo | sed -e 's;[[][0-9][]]\(.*\);\1;' \ + -e 's;[^1-9]*\([0-9]*\).*;\1;'` + if test "$gmp_tmp_val" = "10" || test "$gmp_tmp_val" = "16"; then + gmp_cv_check_asm_align_log=yes + else + gmp_cv_check_asm_align_log=no + fi +else + echo "configure: failed program was:" >&5 + cat conftest.s >&5 +fi +rm -f conftest* + +fi +echo "$ECHO_T""$gmp_cv_check_asm_align_log" 1>&6 + +echo "define(<ALIGN_LOGARITHMIC>,<$gmp_cv_check_asm_align_log>)" >> $gmp_tmpconfigm4 + +if test "$gmp_cv_check_asm_align_log" = "yes"; then + asm_align=log +else + asm_align=nolog +fi + +fi + +family=generic + +case ${target} in + arm*-*-*) + path="arm" + ;; + sparcv9*-*-solaris2.[789]* | sparc64*-*-solaris2.[789]* | ultrasparc*-*-solaris2.[789]*) + if test -n "$CC64" + then path="sparc64" + else path="sparc32/v9 sparc32/v8 sparc32" + fi + ;; + sparc64-*-linux*) + if test -n "$CC64" + then path="sparc64" + else path="sparc32/v9 sparc32/v8 sparc32" + fi + ;; + sparcv8*-*-* | microsparc*-*-*) + path="sparc32/v8 sparc32" + if test x${floating_point} = xno + then extra_functions="udiv_nfp" + else extra_functions="udiv_fp" + fi + ;; + sparcv9*-*-* | ultrasparc*-*-*) + path="sparc32/v9 sparc32/v8 sparc32" + extra_functions="udiv_fp" + ;; + supersparc*-*-*) + path="sparc32/v8/supersparc sparc32/v8 sparc32" + extra_functions="udiv" + ;; + sparc*-*-*) path="sparc32" + if test x${floating_point} = xno + then extra_functions="udiv_nfp" + else extra_functions="udiv_fp" + fi + ;; + hppa7000*-*-*) + path="hppa/hppa1_1 hppa" + extra_functions="udiv_qrnnd" + ;; + hppa1.0*-*-*) + path="hppa" + extra_functions="udiv_qrnnd" + ;; + hppa2.0w-*-*) + path="pa64w" + extra_functions="umul_ppmm udiv_qrnnd" + ;; + hppa2.0*-*-*) + if test -n "$CC64"; then + path="pa64" + extra_functions="umul_ppmm udiv_qrnnd" + # We need to use the system compiler, or actually the system assembler, + # since GAS has not been ported to understand the 2.0 instructions. + CCAS="$CC64 -c" + else + # FIXME: path should be "hppa/hppa2_0 hppa/hppa1_1 hppa" + path="hppa/hppa1_1 hppa" + extra_functions="udiv_qrnnd" + fi + ;; + hppa*-*-*) #assume pa7100 + path="hppa/hppa1_1/pa7100 hppa/hppa1_1 hppa" + extra_functions="udiv_qrnnd";; + f30[01]-fujitsu-sysv*) + path=fujitsu;; + alphaev6*-*-*) path="alpha/ev6 alpha"; extra_functions="invert_limb cntlz";; + alphaev5*-*-*) path="alpha/ev5 alpha"; extra_functions="invert_limb cntlz";; + alpha*-*-*) path="alpha"; extra_functions="invert_limb cntlz";; + # Cray vector machines. This must come after alpha* so that we can + # recognize present and future vector processors with a wildcard. + *-cray-unicos*) + path="cray" + extra_functions="mulww";; + am29000*-*-*) path="a29k";; + a29k*-*-*) path="a29k";; + + # AMD and Intel x86 configurations + + i?86*-*-* | k[5-8]*-*-* | pentium*-*-* | athlon-*-*) + gmp_m4postinc="x86/x86-defs.m4" + extra_functions="udiv umul" + CALLING_CONVENTIONS_OBJS="x86call.o x86check.o" + +echo $ECHO_N "checking if the assembler takes cl with shldl... $ECHO_C" 1>&6 +echo "configure:2583: checking if the assembler takes cl with shldl" 1>&5 +if test "${gmp_cv_check_asm_shldl_cl+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + cat > conftest.s <<EOF + $gmp_cv_check_asm_text + shldl %cl, %eax, %ebx +EOF +ac_assemble="$CCAS $CFLAGS conftest.s 1>&5" +if { (eval echo configure:2592: \"$ac_assemble\") 1>&5; (eval $ac_assemble) 2>&5; }; then + gmp_cv_check_asm_shldl_cl=yes +else + gmp_cv_check_asm_shldl_cl=no +fi +rm -f conftest* + +fi +echo "$ECHO_T""$gmp_cv_check_asm_shldl_cl" 1>&6 +if test "$gmp_cv_check_asm_shldl_cl" = "yes"; then + +echo 'define(<WANT_SHLDL_CL>, <1>)' >> $gmp_tmpconfigm4 + +else + +echo 'define(<WANT_SHLDL_CL>, <0>)' >> $gmp_tmpconfigm4 + +fi + + echo $ECHO_N "checking if the .align directive accepts an 0x90 fill in .text... $ECHO_C" 1>&6 +echo "configure:2612: checking if the .align directive accepts an 0x90 fill in .text" 1>&5 +if test "${gmp_cv_check_asm_align_fill_0x90+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + +cat > conftest.s <<EOF + $gmp_cv_check_asm_text + .align 4, 0x90 + .byte 0 + .align 4, 0x90 +EOF +gmp_tmp_val="`$CCAS $CFLAGS conftest.s 2>&1`" +if test $? = 0; then + echo "$gmp_tmp_val" 1>&5 + if echo "$gmp_tmp_val" | grep "Warning: Fill parameter ignored for executable section"; then + echo "Supressing this warning by omitting 0x90" 1>&5 + gmp_cv_check_asm_align_fill_0x90=no + else + gmp_cv_check_asm_align_fill_0x90=yes + fi +else + echo "Non-zero exit code" 1>&5 + echo "$gmp_tmp_val" 1>&5 + gmp_cv_check_asm_align_fill_0x90=no +fi +rm -f conftest* + +fi +echo "$ECHO_T""$gmp_cv_check_asm_align_fill_0x90" 1>&6 + +echo "define(<ALIGN_FILL_0x90>,<$gmp_cv_check_asm_align_fill_0x90>)" >> $gmp_tmpconfigm4 + + # the CPUs below wanting to know about mmx + case ${target} in + pentiummmx-*-* | pentium[23]-*-* | k6*-*-* | athlon-*-*) + +echo $ECHO_N "checking if the assembler knows about MMX instructions... $ECHO_C" 1>&6 +echo "configure:2649: checking if the assembler knows about MMX instructions" 1>&5 +if test "${gmp_cv_check_asm_mmx+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + cat > conftest.s <<EOF + $gmp_cv_check_asm_text + por %mm0, %mm0 +EOF +ac_assemble="$CCAS $CFLAGS conftest.s 1>&5" +if { (eval echo configure:2658: \"$ac_assemble\") 1>&5; (eval $ac_assemble) 2>&5; }; then + gmp_cv_check_asm_mmx=yes +else + gmp_cv_check_asm_mmx=no +fi +rm -f conftest* + +fi +echo "$ECHO_T""$gmp_cv_check_asm_mmx" 1>&6 +if test "$gmp_cv_check_asm_mmx" = "yes"; then + tmp_mmx=yes +else + echo "configure: warning: +----------------------------------------------------------" 1>&2 + echo "configure: warning: | WARNING WARNING WARNING" 1>&2 + echo "configure: warning: | Target CPU has MMX code, but it can't be assembled by" 1>&2 + echo "configure: warning: | $CCAS $CFLAGS" 1>&2 + echo "configure: warning: | Non-MMX replacements will be used." 1>&2 + echo "configure: warning: | This will be an inferior build." 1>&2 + echo "configure: warning: +----------------------------------------------------------" 1>&2 + tmp_mmx=no +fi + + ;; + esac + + # default for anything not otherwise mentioned + path="x86" + + case ${target} in + i[34]86*-*-*) + path="x86" + ;; + k5*-*-*) + # don't know what best suits k5 + path="x86" + ;; + i586*-*-* | pentium-*-*) + path="x86/pentium x86" + ;; + pentiummmx-*-*) + path="x86/pentium x86" + if test "$tmp_mmx" = yes; then + path="x86/pentium/mmx $path" + fi + ;; + i686*-*-* | pentiumpro-*-*) + path="x86/p6 x86" + ;; + pentium2-*-*) + path="x86/p6 x86" + # The pentium/mmx lshift and rshift are good on p6 and can be used + # until there's something specific for p6. + if test "$tmp_mmx" = yes; then + path="x86/p6/mmx x86/pentium/mmx $path" + fi + ;; + pentium3-*-*) + path="x86/p6 x86" + # The pentium/mmx lshift and rshift are good on p6 and can be used + # until there's something specific for p6. + if test "$tmp_mmx" = yes; then + path="x86/p6/p3mmx x86/p6/mmx x86/pentium/mmx $path" + fi + ;; + k6[23]*-*-*) + path="x86/k6 x86" + if test "$tmp_mmx" = yes; then + path="x86/k6/k62mmx x86/k6/mmx $path" + fi + ;; + k6*-*-*) + path="x86/k6 x86" + if test "$tmp_mmx" = yes; then + path="x86/k6/mmx $path" + fi + ;; + athlon-*-*) + path="x86/k7 x86" + if test "$tmp_mmx" = yes; then + path="x86/k7/mmx $path" + fi + ;; + esac + ;; + + i960*-*-*) path="i960";; + + ia64*-*-*) path="ia64";; + +# Motorola 68k configurations. Let m68k mean 68020-68040. + m680[234]0*-*-* | m68k*-*-* | \ + m68*-next-nextstep*) # Nexts are at least '020 + path="m68k/mc68020 m68k" + family=m68k + ;; + m68000*-*-*) + path="m68k" + family=m68k + ;; + + m88k*-*-* | m88k*-*-*) path="m88k";; + m88110*-*-*) path="m88k/mc88110 m88k";; + ns32k*-*-*) path="ns32k";; + + pyramid-*-*) path="pyr";; + + ppc601-*-*) path="power powerpc32";; + powerpc64*-*-*) path="powerpc64";; + powerpc*-*-*) path="powerpc32";; + rs6000-*-* | power-*-* | power2-*-*) + path="power" + extra_functions="udiv_w_sdiv" + ;; + + sh-*-*) path="sh";; + sh2-*-*) path="sh/sh2 sh";; + + mips[34]*-*-*) path="mips3";; + mips*-*-irix6*) path="mips3";; + mips*-*-*) path="mips2";; + + vax*-*-*) path="vax"; extra_functions="udiv_w_sdiv";; + + z8000x*-*-*) path="z8000x"; extra_functions="udiv_w_sdiv";; + z8000*-*-*) path="z8000"; extra_functions="udiv_w_sdiv";; + + clipper*-*-*) path="clipper";; +esac + +if test -n "$CALLING_CONVENTIONS_OBJS"; then + cat >>confdefs.h <<\EOF +#define HAVE_CALLING_CONVENTIONS 1 +EOF + +fi + +case ${target} in + i[5-8]86*-*-* | k[5-8]*-*-* | pentium*-*-* | athlon-*-*) + # rdtsc is in pentium and up, not in i386 and i486 + SPEED_CYCLECOUNTER_OBJS=pentium.lo + ;; + alpha*-*-*) + SPEED_CYCLECOUNTER_OBJS=alpha.lo + ;; + sparcv9*-*-* | ultrasparc*-*-* | sparc64*-*-*) + SPEED_CYCLECOUNTER_OBJS=sparcv9.lo + ;; + hppa2*-*-*) + SPEED_CYCLECOUNTER_OBJS=hppa2.lo + ;; + hppa*-*-*) + SPEED_CYCLECOUNTER_OBJS=hppa.lo + ;; +esac + +if test -n "$SPEED_CYCLECOUNTER_OBJS" +then + cat >>confdefs.h <<\EOF +#define HAVE_SPEED_CYCLECOUNTER 1 +EOF + +fi + +echo $ECHO_N "checking for Cygwin environment... $ECHO_C" 1>&6 +echo "configure:2822: checking for Cygwin environment" 1>&5 +if test "${ac_cv_cygwin+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + cat >conftest.$ac_ext <<EOF +#line 2827 "configure" +#include "confdefs.h" + +int +main () +{ +#ifndef __CYGWIN__ +# define __CYGWIN__ __CYGWIN32__ +#endif +return __CYGWIN__; + ; + return 0; +} +EOF +if { (eval echo configure:2841: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then + rm -rf conftest* + ac_cv_cygwin=yes +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + ac_cv_cygwin=no +fi +rm -f conftest* +fi +echo "$ECHO_T""$ac_cv_cygwin" 1>&6 +CYGWIN= +test "$ac_cv_cygwin" = yes && CYGWIN=yes +echo $ECHO_N "checking for mingw32 environment... $ECHO_C" 1>&6 +echo "configure:2856: checking for mingw32 environment" 1>&5 +if test "${ac_cv_mingw32+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + cat >conftest.$ac_ext <<EOF +#line 2861 "configure" +#include "confdefs.h" + +int +main () +{ +return __MINGW32__; + ; + return 0; +} +EOF +if { (eval echo configure:2872: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then + rm -rf conftest* + ac_cv_mingw32=yes +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + ac_cv_mingw32=no +fi +rm -f conftest* +fi +echo "$ECHO_T""$ac_cv_mingw32" 1>&6 +MINGW32= +test "$ac_cv_mingw32" = yes && MINGW32=yes +echo $ECHO_N "checking for EMX OS/2 environment... $ECHO_C" 1>&6 +echo "configure:2887: checking for EMX OS/2 environment" 1>&5 +if test "${ac_cv_emxos2+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + cat >conftest.$ac_ext <<EOF +#line 2892 "configure" +#include "confdefs.h" + +int +main () +{ +return __EMX__; + ; + return 0; +} +EOF +if { (eval echo configure:2903: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then + rm -rf conftest* + ac_cv_emxos2=yes +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + ac_cv_emxos2=no +fi +rm -f conftest* +fi +echo "$ECHO_T""$ac_cv_emxos2" 1>&6 +EMXOS2= +test "$ac_cv_emxos2" = yes && EMXOS2=yes + +echo $ECHO_N "checking for executable suffix... $ECHO_C" 1>&6 +echo "configure:2919: checking for executable suffix" 1>&5 +if test "${ac_cv_exeext+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + if test "$CYGWIN" = yes || test "$MINGW32" = yes || test "$EMXOS2" = yes; then + ac_cv_exeext=.exe +else + rm -f conftest* + echo 'int main () { return 0; }' >conftest.$ac_ext + ac_cv_exeext= + if { (eval echo configure:2929: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; }; then + for ac_file in conftest.*; do + case $ac_file in + *.c | *.C | *.o | *.obj | *.xcoff) ;; + *) ac_cv_exeext=`echo $ac_file | sed -e s/conftest//` ;; + esac + done + else + { echo "configure: error: installation or configuration problem: compiler cannot create executables." 1>&2; exit 1; } + fi + rm -f conftest* + test x"${ac_cv_exeext}" = x && ac_cv_exeext=no +fi +fi + +EXEEXT="" +test x"${ac_cv_exeext}" != xno && EXEEXT=${ac_cv_exeext} +echo "$ECHO_T""${ac_cv_exeext}" 1>&6 +ac_exeext=$EXEEXT + +echo $ECHO_N "checking for object suffix... $ECHO_C" 1>&6 +echo "configure:2950: checking for object suffix" 1>&5 +if test "${ac_cv_objext+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + rm -f conftest* +echo 'int i = 1;' >conftest.$ac_ext +if { (eval echo configure:2956: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then + for ac_file in conftest.*; do + case $ac_file in + *.c) ;; + *) ac_cv_objext=`echo $ac_file | sed -e s/conftest.//` ;; + esac + done +else + { echo "configure: error: installation or configuration problem; compiler does not work" 1>&2; exit 1; } +fi +rm -f conftest* +fi + +echo "$ECHO_T""$ac_cv_objext" 1>&6 +OBJEXT=$ac_cv_objext +ac_objext=$ac_cv_objext + +case "$target" in + *-*-aix4.[3-9]*) enable_shared=no ;; +esac +# Check whether --enable-shared or --disable-shared was given. +if test "${enable_shared+set}" = set; then + enableval="$enable_shared" + p=${PACKAGE-default} +case "$enableval" in +yes) enable_shared=yes ;; +no) enable_shared=no ;; +*) + enable_shared=no + # Look at the argument we got. We use all the common list separators. + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:," + for pkg in $enableval; do + if test "X$pkg" = "X$p"; then + enable_shared=yes + fi + done + IFS="$ac_save_ifs" + ;; +esac +else + enable_shared=yes +fi +# Check whether --enable-static or --disable-static was given. +if test "${enable_static+set}" = set; then + enableval="$enable_static" + p=${PACKAGE-default} +case "$enableval" in +yes) enable_static=yes ;; +no) enable_static=no ;; +*) + enable_static=no + # Look at the argument we got. We use all the common list separators. + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:," + for pkg in $enableval; do + if test "X$pkg" = "X$p"; then + enable_static=yes + fi + done + IFS="$ac_save_ifs" + ;; +esac +else + enable_static=yes +fi +# Check whether --enable-fast-install or --disable-fast-install was given. +if test "${enable_fast_install+set}" = set; then + enableval="$enable_fast_install" + p=${PACKAGE-default} +case "$enableval" in +yes) enable_fast_install=yes ;; +no) enable_fast_install=no ;; +*) + enable_fast_install=no + # Look at the argument we got. We use all the common list separators. + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:," + for pkg in $enableval; do + if test "X$pkg" = "X$p"; then + enable_fast_install=yes + fi + done + IFS="$ac_save_ifs" + ;; +esac +else + enable_fast_install=yes +fi + +echo $ECHO_N "checking build system type... $ECHO_C" 1>&6 +echo "configure:3044: checking build system type" 1>&5 +if test "x$ac_cv_build" = "x" || (test "x$build" != "xNONE" && test "x$build" != "x$ac_cv_build_alias"); then + + # Make sure we can run config.sub. + if $ac_config_sub sun4 >/dev/null 2>&1; then :; else + { echo "configure: error: cannot run $ac_config_sub" 1>&2; exit 1; } + fi + + ac_cv_build_alias=$build + case "$ac_cv_build_alias" in + NONE) + case $nonopt in + NONE) + ac_cv_build_alias=$host_alias ;; + *) ac_cv_build_alias=$nonopt ;; + esac ;; + esac + + ac_cv_build=`$ac_config_sub $ac_cv_build_alias` || exit 1 + ac_cv_build_cpu=`echo $ac_cv_build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'` + ac_cv_build_vendor=`echo $ac_cv_build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'` + ac_cv_build_os=`echo $ac_cv_build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'` +else + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +fi + +echo "$ECHO_T""$ac_cv_build" 1>&6 + +build=$ac_cv_build +build_alias=$ac_cv_build_alias +build_cpu=$ac_cv_build_cpu +build_vendor=$ac_cv_build_vendor +build_os=$ac_cv_build_os + +# Check whether --with-gnu-ld or --without-gnu-ld was given. +if test "${with_gnu_ld+set}" = set; then + withval="$with_gnu_ld" + test "$withval" = no || with_gnu_ld=yes +else + with_gnu_ld=no +fi + +ac_prog=ld +if test "$ac_cv_prog_gcc" = yes; then + # Check if gcc -print-prog-name=ld gives a path. + echo $ECHO_N "checking for ld used by GCC... $ECHO_C" 1>&6 +echo "configure:3090: checking for ld used by GCC" 1>&5 + case $target in + *-*-mingw*) + # gcc leaves a trailing carriage return which upsets mingw + ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; + *) + ac_prog=`($CC -print-prog-name=ld) 2>&5` ;; + esac + case "$ac_prog" in + # Accept absolute paths. + [\\/]* | [A-Za-z]:[\\/]*) + re_direlt='/[^/][^/]*/\.\./' + # Canonicalize the path of ld + ac_prog=`echo $ac_prog| sed 's%\\\\%/%g'` + while echo $ac_prog | grep "$re_direlt" > /dev/null 2>&1; do + ac_prog=`echo $ac_prog| sed "s%$re_direlt%/%"` + done + test -z "$LD" && LD="$ac_prog" + ;; + "") + # If it fails, then pretend we aren't using GCC. + ac_prog=ld + ;; + *) + # If it is relative, then search for the first ld in PATH. + with_gnu_ld=unknown + ;; + esac +elif test "$with_gnu_ld" = yes; then + echo $ECHO_N "checking for GNU ld... $ECHO_C" 1>&6 +echo "configure:3120: checking for GNU ld" 1>&5 +else + echo $ECHO_N "checking for non-GNU ld... $ECHO_C" 1>&6 +echo "configure:3123: checking for non-GNU ld" 1>&5 +fi +if test "${ac_cv_path_LD+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + if test -z "$LD"; then + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}${PATH_SEPARATOR-:}" + for ac_dir in $PATH; do + test -z "$ac_dir" && ac_dir=. + if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then + ac_cv_path_LD="$ac_dir/$ac_prog" + # Check to see if the program is GNU ld. I'd rather use --version, + # but apparently some GNU ld's only accept -v. + # Break only if it was the GNU/non-GNU ld that we prefer. + if "$ac_cv_path_LD" -v 2>&1 < /dev/null | egrep '(GNU|with BFD)' > /dev/null; then + test "$with_gnu_ld" != no && break + else + test "$with_gnu_ld" != yes && break + fi + fi + done + IFS="$ac_save_ifs" +else + ac_cv_path_LD="$LD" # Let the user override the test with a path. +fi +fi + +LD="$ac_cv_path_LD" +if test -n "$LD"; then + echo "$ECHO_T""$LD" 1>&6 +else + echo "$ECHO_T""no" 1>&6 +fi +test -z "$LD" && { echo "configure: error: no acceptable ld found in \$PATH" 1>&2; exit 1; } +echo $ECHO_N "checking if the linker ($LD) is GNU ld... $ECHO_C" 1>&6 +echo "configure:3158: checking if the linker ($LD) is GNU ld" 1>&5 +if test "${ac_cv_prog_gnu_ld+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + # I'd rather use --version here, but apparently some GNU ld's only accept -v. +if $LD -v 2>&1 </dev/null | egrep '(GNU|with BFD)' 1>&5; then + ac_cv_prog_gnu_ld=yes +else + ac_cv_prog_gnu_ld=no +fi +fi +echo "$ECHO_T""$ac_cv_prog_gnu_ld" 1>&6 +with_gnu_ld=$ac_cv_prog_gnu_ld + +echo $ECHO_N "checking for $LD option to reload object files... $ECHO_C" 1>&6 +echo "configure:3173: checking for $LD option to reload object files" 1>&5 +if test "${lt_cv_ld_reload_flag+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + lt_cv_ld_reload_flag='-r' +fi +echo "$ECHO_T""$lt_cv_ld_reload_flag" 1>&6 +reload_flag=$lt_cv_ld_reload_flag +test -n "$reload_flag" && reload_flag=" $reload_flag" + +echo $ECHO_N "checking how to recognise dependant libraries... $ECHO_C" 1>&6 +echo "configure:3184: checking how to recognise dependant libraries" 1>&5 +if test "${lt_cv_deplibs_check_method+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + lt_cv_file_magic_cmd='${MAGIC}' +lt_cv_file_magic_test_file= +lt_cv_deplibs_check_method='unknown' +# Need to set the preceding variable on all platforms that support +# interlibrary dependencies. +# 'none' -- dependencies not supported. +# `unknown' -- same as none, but documents that we really don't know. +# 'pass_all' -- all dependencies passed with no checks. +# 'test_compile' -- check by making test program. +# 'file_magic [regex]' -- check by looking for files in library path +# which responds to the $file_magic_cmd with a given egrep regex. +# If you have `file' or equivalent on your system and you're not sure +# whether `pass_all' will *always* work, you probably want this one. + +case "$host_os" in +aix4* | beos*) + lt_cv_deplibs_check_method=pass_all + ;; + +bsdi4*) + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib)' + lt_cv_file_magic_test_file=/shlib/libc.so + ;; + +cygwin* | mingw*) + lt_cv_deplibs_check_method='file_magic file format pei*-i386(.*architecture: i386)?' + lt_cv_file_magic_cmd='${OBJDUMP} -f' + ;; + +freebsd*) + case "$version_type" in + freebsd-elf*) + lt_cv_deplibs_check_method=pass_all + ;; + esac + ;; + +gnu*) + lt_cv_deplibs_check_method=pass_all + ;; + +irix5* | irix6*) + case "$host_os" in + irix5*) + # this will be overridden with pass_all, but let us keep it just in case + lt_cv_deplibs_check_method="file_magic ELF 32-bit MSB dynamic lib MIPS - version 1" + ;; + *) + case "$LD" in + *-32|*"-32 ") libmagic=32-bit;; + *-n32|*"-n32 ") libmagic=N32;; + *-64|*"-64 ") libmagic=64-bit;; + *) libmagic=never-match;; + esac + # this will be overridden with pass_all, but let us keep it just in case + lt_cv_deplibs_check_method="file_magic ELF ${libmagic} MSB mips-[1234] dynamic lib MIPS - version 1" + ;; + esac + lt_cv_file_magic_test_file=`echo /lib${libsuff}/libc.so*` + lt_cv_deplibs_check_method=pass_all + ;; + +# This must be Linux ELF. +linux-gnu*) + case "$host_cpu" in + alpha* | i*86 | powerpc* | sparc* ) + lt_cv_deplibs_check_method=pass_all ;; + *) + # glibc up to 2.1.1 does not perform some relocations on ARM + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [LM]SB (shared object|dynamic lib )' ;; + esac + lt_cv_file_magic_test_file=`echo /lib/libc.so* /lib/libc-*.so` + ;; + +osf3* | osf4* | osf5*) + # this will be overridden with pass_all, but let us keep it just in case + lt_cv_deplibs_check_method='file_magic COFF format alpha shared library' + lt_cv_file_magic_test_file=/shlib/libc.so + lt_cv_deplibs_check_method=pass_all + ;; + +sco3.2v5*) + lt_cv_deplibs_check_method=pass_all + ;; + +solaris*) + lt_cv_deplibs_check_method=pass_all + lt_cv_file_magic_test_file=/lib/libc.so + ;; + +sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) + case "$host_vendor" in + ncr) + lt_cv_deplibs_check_method=pass_all + ;; + motorola) + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib) M[0-9][0-9]* Version [0-9]' + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*` + ;; + esac + ;; +esac + +fi +echo "$ECHO_T""$lt_cv_deplibs_check_method" 1>&6 +file_magic_cmd=$lt_cv_file_magic_cmd +deplibs_check_method=$lt_cv_deplibs_check_method + +if test $host != $build; then + ac_tool_prefix=${host_alias}- +else + ac_tool_prefix= +fi + +# Only perform the check for file, if the check method requires it +case "$deplibs_check_method" in +file_magic*) + if test "$file_magic_cmd" = '${MAGIC}'; then + +echo $ECHO_N "checking for ${ac_tool_prefix}file... $ECHO_C" 1>&6 +echo "configure:3308: checking for ${ac_tool_prefix}file" 1>&5 +if test "${lt_cv_path_MAGIC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + case "$MAGIC" in + /*) + lt_cv_path_MAGIC="$MAGIC" # Let the user override the test with a path. + ;; + ?:/*) + ac_cv_path_MAGIC="$MAGIC" # Let the user override the test with a dos path. + ;; + *) + ac_save_MAGIC="$MAGIC" + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" + ac_dummy="/usr/bin:$PATH" + for ac_dir in $ac_dummy; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/${ac_tool_prefix}file; then + lt_cv_path_MAGIC="$ac_dir/${ac_tool_prefix}file" + if test -n "$file_magic_test_file"; then + case "$deplibs_check_method" in + "file_magic "*) + file_magic_regex="`expr \"$deplibs_check_method\" : \"file_magic \(.*\)\"`" + MAGIC="$lt_cv_path_MAGIC" + if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | + egrep "$file_magic_regex" > /dev/null; then + : + else + cat <<EOF 1>&2 + +*** Warning: the command libtool uses to detect shared libraries, +*** $file_magic_cmd, produces output that libtool cannot recognize. +*** The result is that libtool may fail to recognize shared libraries +*** as such. This will affect the creation of libtool libraries that +*** depend on shared libraries, but programs linked with such libtool +*** libraries will work regardless of this problem. Nevertheless, you +*** may want to report the problem to your system manager and/or to +*** bug-libtool@gnu.org + +EOF + fi ;; + esac + fi + break + fi + done + IFS="$ac_save_ifs" + MAGIC="$ac_save_MAGIC" + ;; +esac +fi + +MAGIC="$lt_cv_path_MAGIC" +if test -n "$MAGIC"; then + echo "$ECHO_T""$MAGIC" 1>&6 +else + echo "$ECHO_T""no" 1>&6 +fi + +if test -z "$lt_cv_path_MAGIC"; then + if test -n "$ac_tool_prefix"; then + echo $ECHO_N "checking for file... $ECHO_C" 1>&6 +echo "configure:3370: checking for file" 1>&5 +if test "${lt_cv_path_MAGIC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + case "$MAGIC" in + /*) + lt_cv_path_MAGIC="$MAGIC" # Let the user override the test with a path. + ;; + ?:/*) + ac_cv_path_MAGIC="$MAGIC" # Let the user override the test with a dos path. + ;; + *) + ac_save_MAGIC="$MAGIC" + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" + ac_dummy="/usr/bin:$PATH" + for ac_dir in $ac_dummy; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/file; then + lt_cv_path_MAGIC="$ac_dir/file" + if test -n "$file_magic_test_file"; then + case "$deplibs_check_method" in + "file_magic "*) + file_magic_regex="`expr \"$deplibs_check_method\" : \"file_magic \(.*\)\"`" + MAGIC="$lt_cv_path_MAGIC" + if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | + egrep "$file_magic_regex" > /dev/null; then + : + else + cat <<EOF 1>&2 + +*** Warning: the command libtool uses to detect shared libraries, +*** $file_magic_cmd, produces output that libtool cannot recognize. +*** The result is that libtool may fail to recognize shared libraries +*** as such. This will affect the creation of libtool libraries that +*** depend on shared libraries, but programs linked with such libtool +*** libraries will work regardless of this problem. Nevertheless, you +*** may want to report the problem to your system manager and/or to +*** bug-libtool@gnu.org + +EOF + fi ;; + esac + fi + break + fi + done + IFS="$ac_save_ifs" + MAGIC="$ac_save_MAGIC" + ;; +esac +fi + +MAGIC="$lt_cv_path_MAGIC" +if test -n "$MAGIC"; then + echo "$ECHO_T""$MAGIC" 1>&6 +else + echo "$ECHO_T""no" 1>&6 +fi + + else + MAGIC=: + fi +fi + + fi + ;; +esac + +case "$target" in +NONE) lt_target="$host" ;; +*) lt_target="$target" ;; +esac + +# Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args. +set dummy ${ac_tool_prefix}ranlib; ac_word=$2 +echo $ECHO_N "checking for $ac_word... $ECHO_C" 1>&6 +echo "configure:3446: checking for $ac_word" 1>&5 +if test "${ac_cv_prog_RANLIB+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + if test -n "$RANLIB"; then + ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. +else + for ac_path in `IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" +ac_dummy="$PATH" +for ac_dir in $ac_dummy; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/$ac_word; then + echo "$ac_dir/$ac_word" + fi +done +IFS="$ac_save_ifs" +`; do + ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib" + break + done +fi +fi +RANLIB="$ac_cv_prog_RANLIB" +if test -n "$RANLIB"; then + echo "$ECHO_T""$RANLIB" 1>&6 +else + echo "$ECHO_T""no" 1>&6 +fi + +if test -z "$ac_cv_prog_RANLIB"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "ranlib", so it can be a program name with args. +set dummy ranlib; ac_word=$2 +echo $ECHO_N "checking for $ac_word... $ECHO_C" 1>&6 +echo "configure:3480: checking for $ac_word" 1>&5 +if test "${ac_cv_prog_RANLIB+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + if test -n "$RANLIB"; then + ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. +else + for ac_path in `IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" +ac_dummy="$PATH" +for ac_dir in $ac_dummy; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/$ac_word; then + echo "$ac_dir/$ac_word" + fi +done +IFS="$ac_save_ifs" +`; do + ac_cv_prog_RANLIB="ranlib" + break + done + test -z "$ac_cv_prog_RANLIB" && ac_cv_prog_RANLIB=":" +fi +fi +RANLIB="$ac_cv_prog_RANLIB" +if test -n "$RANLIB"; then + echo "$ECHO_T""$RANLIB" 1>&6 +else + echo "$ECHO_T""no" 1>&6 +fi + + else + RANLIB=":" + fi +fi + +# Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args. +set dummy ${ac_tool_prefix}strip; ac_word=$2 +echo $ECHO_N "checking for $ac_word... $ECHO_C" 1>&6 +echo "configure:3518: checking for $ac_word" 1>&5 +if test "${ac_cv_prog_STRIP+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + if test -n "$STRIP"; then + ac_cv_prog_STRIP="$STRIP" # Let the user override the test. +else + for ac_path in `IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" +ac_dummy="$PATH" +for ac_dir in $ac_dummy; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/$ac_word; then + echo "$ac_dir/$ac_word" + fi +done +IFS="$ac_save_ifs" +`; do + ac_cv_prog_STRIP="${ac_tool_prefix}strip" + break + done +fi +fi +STRIP="$ac_cv_prog_STRIP" +if test -n "$STRIP"; then + echo "$ECHO_T""$STRIP" 1>&6 +else + echo "$ECHO_T""no" 1>&6 +fi + +if test -z "$ac_cv_prog_STRIP"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "strip", so it can be a program name with args. +set dummy strip; ac_word=$2 +echo $ECHO_N "checking for $ac_word... $ECHO_C" 1>&6 +echo "configure:3552: checking for $ac_word" 1>&5 +if test "${ac_cv_prog_STRIP+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + if test -n "$STRIP"; then + ac_cv_prog_STRIP="$STRIP" # Let the user override the test. +else + for ac_path in `IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" +ac_dummy="$PATH" +for ac_dir in $ac_dummy; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/$ac_word; then + echo "$ac_dir/$ac_word" + fi +done +IFS="$ac_save_ifs" +`; do + ac_cv_prog_STRIP="strip" + break + done + test -z "$ac_cv_prog_STRIP" && ac_cv_prog_STRIP=":" +fi +fi +STRIP="$ac_cv_prog_STRIP" +if test -n "$STRIP"; then + echo "$ECHO_T""$STRIP" 1>&6 +else + echo "$ECHO_T""no" 1>&6 +fi + + else + STRIP=":" + fi +fi + +# Check for any special flags to pass to ltconfig. +libtool_flags="--cache-file=$cache_file" +test "$enable_shared" = no && libtool_flags="$libtool_flags --disable-shared" +test "$enable_static" = no && libtool_flags="$libtool_flags --disable-static" +test "$enable_fast_install" = no && libtool_flags="$libtool_flags --disable-fast-install" +test "$ac_cv_prog_gcc" = yes && libtool_flags="$libtool_flags --with-gcc" +test "$ac_cv_prog_gnu_ld" = yes && libtool_flags="$libtool_flags --with-gnu-ld" + +# Check whether --enable-libtool-lock or --disable-libtool-lock was given. +if test "${enable_libtool_lock+set}" = set; then + enableval="$enable_libtool_lock" + +fi +test "x$enable_libtool_lock" = xno && libtool_flags="$libtool_flags --disable-lock" +test x"$silent" = xyes && libtool_flags="$libtool_flags --silent" + +# Check whether --with-pic or --without-pic was given. +if test "${with_pic+set}" = set; then + withval="$with_pic" + pic_mode="$withval" +else + pic_mode=default +fi +test x"$pic_mode" = xyes && libtool_flags="$libtool_flags --prefer-pic" +test x"$pic_mode" = xno && libtool_flags="$libtool_flags --prefer-non-pic" + +# Some flags need to be propagated to the compiler or linker for good +# libtool support. +case "$lt_target" in +*-*-irix6*) + # Find out which ABI we are using. + echo '#line 3618 "configure"' > conftest.$ac_ext + if { (eval echo configure:3619: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then + case "`/usr/bin/file conftest.o`" in + *32-bit*) + LD="${LD-ld} -32" + ;; + *N32*) + LD="${LD-ld} -n32" + ;; + *64-bit*) + LD="${LD-ld} -64" + ;; + esac + fi + rm -rf conftest* + ;; + +*-*-sco3.2v5*) + # On SCO OpenServer 5, we need -belf to get full-featured binaries. + SAVE_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -belf" + echo $ECHO_N "checking whether the C compiler needs -belf... $ECHO_C" 1>&6 +echo "configure:3640: checking whether the C compiler needs -belf" 1>&5 +if test "${lt_cv_cc_needs_belf+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + + ac_ext=c +# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. +ac_cpp='$CPP $CPPFLAGS' +ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' +ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' +cross_compiling=$ac_cv_prog_cc_cross + + cat >conftest.$ac_ext <<EOF +#line 3653 "configure" +#include "confdefs.h" + +int +main() +{ + + ; + return 0; +} +EOF +if { (eval echo configure:3664: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then + rm -rf conftest* + lt_cv_cc_needs_belf=yes +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + lt_cv_cc_needs_belf=no +fi +rm -f conftest* + + ac_ext=c +# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. +ac_cpp='$CPP $CPPFLAGS' +ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' +ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' +cross_compiling=$ac_cv_prog_cc_cross + +fi +echo "$ECHO_T""$lt_cv_cc_needs_belf" 1>&6 + if test x"$lt_cv_cc_needs_belf" != x"yes"; then + # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf + CFLAGS="$SAVE_CFLAGS" + fi + ;; + +esac + +# Save cache, so that ltconfig can load it +cat >confcache <<\EOF +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs. It is not useful on other systems. +# If it contains results you don't want to keep, you may remove or edit it. +# +# By default, configure uses ./config.cache as the cache file, +# creating it if it does not exist already. You can give configure +# the --cache-file=FILE option to use a different cache file; that is +# what configure does when it calls configure scripts in +# subdirectories, so they share the cache. +# Giving --cache-file=/dev/null disables caching, for debugging configure. +# config.status only pays attention to the cache file if you give it the +# --recheck option to rerun configure. +# +EOF +# The following way of writing the cache mishandles newlines in values, +# but we know of no workaround that is simple, portable, and efficient. +# So, don't put newlines in cache variables' values. +# Ultrix sh set writes to stderr and can't be redirected directly, +# and sets the high bit in the cache file unless we assign to the vars. +(set) 2>&1 | + case `(ac_space=' '; set | grep ac_space) 2>&1` in + *ac_space=\ *) + # `set' does not quote correctly, so add quotes (double-quote substitution + # turns \\\\ into \\, and sed turns \\ into \). + sed -n \ + -e "s/'/'\\\\''/g" \ + -e "s/^\\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\\)=\\(.*\\)/\\1=\${\\1='\\2'}/p" + ;; + *) + # `set' quotes correctly as required by POSIX, so do not add quotes. + sed -n -e 's/^\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\)=\(.*\)/\1=${\1=\2}/p' + ;; + esac >>confcache +if cmp -s $cache_file confcache; then :; else + if test -w $cache_file; then + echo "updating cache $cache_file" + cat confcache >$cache_file + else + echo "not updating unwritable cache $cache_file" + fi +fi +rm -f confcache + +# Actually configure libtool. ac_aux_dir is where install-sh is found. +AR="$AR" CC="$CC" CFLAGS="$CFLAGS" CPPFLAGS="$CPPFLAGS" \ +MAGIC="$MAGIC" LD="$LD" LDFLAGS="$LDFLAGS" LIBS="$LIBS" \ +LN_S="$LN_S" NM="$NM" RANLIB="$RANLIB" STRIP="$STRIP" \ +AS="$AS" DLLTOOL="$DLLTOOL" OBJDUMP="$OBJDUMP" \ +objext="$OBJEXT" exeext="$EXEEXT" reload_flag="$reload_flag" \ +deplibs_check_method="$deplibs_check_method" file_magic_cmd="$file_magic_cmd" \ +${CONFIG_SHELL-/bin/sh} $ac_aux_dir/ltconfig --no-reexec \ +$libtool_flags --no-verify --build="$build" $ac_aux_dir/ltmain.sh $lt_target \ +|| { echo "configure: error: libtool configure failed" 1>&2; exit 1; } + +# Reload cache, that may have been modified by ltconfig +if test -r "$cache_file"; then + echo "loading cache $cache_file" + test -f "$cache_file" && . $cache_file +else + echo "creating cache $cache_file" + >$cache_file +fi + +# This can be used to rebuild libtool when needed +LIBTOOL_DEPS="$ac_aux_dir/ltconfig $ac_aux_dir/ltmain.sh" + +# Always use our own libtool. +LIBTOOL='$(SHELL) $(top_builddir)/libtool' + +# Redirect the config.log output again, so that the ltconfig log is not +# clobbered by the next message. +exec 5>>./config.log + +echo $ECHO_N "checking whether optarg is declared... $ECHO_C" 1>&6 +echo "configure:3769: checking whether optarg is declared" 1>&5 +if test "${ac_cv_have_decl_optarg+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + cat >conftest.$ac_ext <<EOF +#line 3774 "configure" +#include "confdefs.h" +$ac_includes_default +int +main () +{ +#ifndef optarg + char *p = (char *) optarg; +#endif + + ; + return 0; +} +EOF +if { (eval echo configure:3788: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then + rm -rf conftest* + ac_cv_have_decl_optarg=yes +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + ac_cv_have_decl_optarg=no +fi +rm -f conftest* +fi +echo "$ECHO_T""$ac_cv_have_decl_optarg" 1>&6 +if test $ac_cv_have_decl_optarg = yes; then + cat >>confdefs.h <<EOF +#define HAVE_DECL_OPTARG 1 +EOF + +else + cat >>confdefs.h <<EOF +#define HAVE_DECL_OPTARG 0 +EOF + +fi + +echo $ECHO_N "checking for ANSI C header files... $ECHO_C" 1>&6 +echo "configure:3813: checking for ANSI C header files" 1>&5 +if test "${ac_cv_header_stdc+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + +cat >conftest.$ac_ext <<EOF +#line 3819 "configure" +#include "confdefs.h" +#include <stdlib.h> +#include <stdarg.h> +#include <string.h> +#include <float.h> + +EOF +ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" +{ (eval echo configure:3828: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` +if test -z "$ac_err"; then + rm -rf conftest* + ac_cv_header_stdc=yes +else + echo "$ac_err" >&5 + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + ac_cv_header_stdc=no +fi +rm -f conftest* + +if test $ac_cv_header_stdc = yes; then + # SunOS 4.x string.h does not declare mem*, contrary to ANSI. + +cat >conftest.$ac_ext <<EOF +#line 3846 "configure" +#include "confdefs.h" +#include <string.h> + +EOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + egrep "memchr" >/dev/null 2>&1; then + : +else + rm -rf conftest* + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. + +cat >conftest.$ac_ext <<EOF +#line 3866 "configure" +#include "confdefs.h" +#include <stdlib.h> + +EOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + egrep "free" >/dev/null 2>&1; then + : +else + rm -rf conftest* + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. +if test "$cross_compiling" = yes; then + : +else + cat >conftest.$ac_ext <<EOF +#line 3888 "configure" +#include "confdefs.h" +#include <ctype.h> +#if ((' ' & 0x0FF) == 0x020) +# define ISLOWER(c) ('a' <= (c) && (c) <= 'z') +# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) +#else +# define ISLOWER(c) (('a' <= (c) && (c) <= 'i') \ + || ('j' <= (c) && (c) <= 'r') \ + || ('s' <= (c) && (c) <= 'z')) +# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) +#endif + +#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) +int +main () +{ + int i; + for (i = 0; i < 256; i++) + if (XOR (islower (i), ISLOWER (i)) + || toupper (i) != TOUPPER (i)) + exit(2); + exit (0); +} +EOF +if { (eval echo configure:3913: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null +then + : +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -fr conftest* + ac_cv_header_stdc=no +fi +rm -fr conftest* + +fi + +fi +fi +echo "$ECHO_T""$ac_cv_header_stdc" 1>&6 +if test $ac_cv_header_stdc = yes; then + cat >>confdefs.h <<\EOF +#define STDC_HEADERS 1 +EOF + +fi + +for ac_header in getopt.h unistd.h sys/sysctl.h sys/time.h +do +ac_ac_Header=`echo "ac_cv_header_$ac_header" | $ac_tr_sh` +echo $ECHO_N "checking for $ac_header... $ECHO_C" 1>&6 +echo "configure:3940: checking for $ac_header" 1>&5 +if eval "test \"\${$ac_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + +cat >conftest.$ac_ext <<EOF +#line 3946 "configure" +#include "confdefs.h" +#include <$ac_header> + +EOF +ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" +{ (eval echo configure:3952: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` +if test -z "$ac_err"; then + rm -rf conftest* + eval "$ac_ac_Header=yes" +else + echo "$ac_err" >&5 + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + eval "$ac_ac_Header=no" +fi +rm -f conftest* +fi +echo "$ECHO_T""`eval echo '${'$ac_ac_Header'}'`" 1>&6 +if test `eval echo '${'$ac_ac_Header'}'` = yes; then + cat >>confdefs.h <<EOF +#define `echo "HAVE_$ac_header" | $ac_tr_cpp` 1 +EOF + +fi +done + +echo $ECHO_N "checking for void... $ECHO_C" 1>&6 +echo "configure:3976: checking for void" 1>&5 +if test "${ac_cv_type_void+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + cat >conftest.$ac_ext <<EOF +#line 3981 "configure" +#include "confdefs.h" +$ac_includes_default +int +main () +{ +if ((void *) 0) + return 0; +if (sizeof (void)) + return 0; + ; + return 0; +} +EOF +if { (eval echo configure:3995: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then + rm -rf conftest* + ac_cv_type_void=yes +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + ac_cv_type_void=no +fi +rm -f conftest* +fi +echo "$ECHO_T""$ac_cv_type_void" 1>&6 +if test $ac_cv_type_void = yes; then + cat >>confdefs.h <<EOF +#define HAVE_VOID 1 +EOF + +fi + +echo $ECHO_N "checking for preprocessor stringizing operator... $ECHO_C" 1>&6 +echo "configure:4015: checking for preprocessor stringizing operator" 1>&5 +if test "${ac_cv_c_stringize+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + +cat >conftest.$ac_ext <<EOF +#line 4021 "configure" +#include "confdefs.h" + +#define x(y) #y + +char *s = x(teststring); + +EOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + egrep "#teststring" >/dev/null 2>&1; then + rm -rf conftest* + ac_cv_c_stringize=no +else + rm -rf conftest* + ac_cv_c_stringize=yes +fi +rm -f conftest* + +fi + +if test "${ac_cv_c_stringize}" = yes; then + cat >>confdefs.h <<\EOF +#define HAVE_STRINGIZE 1 +EOF + +fi +echo "$ECHO_T""${ac_cv_c_stringize}" 1>&6 + +for ac_func in getopt_long getpagesize popen processor_info strtoul sysconf sysctlbyname +do +ac_ac_var=`echo "ac_cv_func_$ac_func" | $ac_tr_sh` +echo $ECHO_N "checking for $ac_func... $ECHO_C" 1>&6 +echo "configure:4053: checking for $ac_func" 1>&5 +if eval "test \"\${$ac_ac_var+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + cat >conftest.$ac_ext <<EOF +#line 4058 "configure" +#include "confdefs.h" +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $ac_func(); below. */ +#include <assert.h> +/* Override any gcc2 internal prototype to avoid an error. */ +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ +char $ac_func(); +char (*f)(); + +int +main() +{ + +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined (__stub_$ac_func) || defined (__stub___$ac_func) +choke me +#else +f = $ac_func; +#endif + + ; + return 0; +} +EOF +if { (eval echo configure:4086: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then + rm -rf conftest* + eval "$ac_ac_var=yes" +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + eval "$ac_ac_var=no" +fi +rm -f conftest* + +fi +echo "$ECHO_T""`eval echo '${'$ac_ac_var'}'`" 1>&6 +if test `eval echo '${'$ac_ac_var'}'` = yes; then + cat >>confdefs.h <<EOF +#define `echo "HAVE_$ac_func" | $ac_tr_cpp` 1 +EOF + +fi +done + +echo $ECHO_N "checking if ansi2knr should be used... $ECHO_C" 1>&6 +echo "configure:4108: checking if ansi2knr should be used" 1>&5 +if test "${gmp_cv_c_ansi2knr+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + cat >conftest.c <<EOF +int main (int argc, char *argv) { return 0; } +EOF +if { (eval echo configure:4115: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then + gmp_cv_c_ansi2knr=no +else + gmp_cv_c_ansi2knr=yes +fi +rm -f conftest.* + +fi +echo "$ECHO_T""$gmp_cv_c_ansi2knr" 1>&6 +if test $gmp_cv_c_ansi2knr = no; then + U= ANSI2KNR= +else + U=_ ANSI2KNR=./ansi2knr + # Ensure some checks needed by ansi2knr itself. + +echo $ECHO_N "checking for ANSI C header files... $ECHO_C" 1>&6 +echo "configure:4131: checking for ANSI C header files" 1>&5 +if test "${ac_cv_header_stdc+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + +cat >conftest.$ac_ext <<EOF +#line 4137 "configure" +#include "confdefs.h" +#include <stdlib.h> +#include <stdarg.h> +#include <string.h> +#include <float.h> + +EOF +ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" +{ (eval echo configure:4146: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` +if test -z "$ac_err"; then + rm -rf conftest* + ac_cv_header_stdc=yes +else + echo "$ac_err" >&5 + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + ac_cv_header_stdc=no +fi +rm -f conftest* + +if test $ac_cv_header_stdc = yes; then + # SunOS 4.x string.h does not declare mem*, contrary to ANSI. + +cat >conftest.$ac_ext <<EOF +#line 4164 "configure" +#include "confdefs.h" +#include <string.h> + +EOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + egrep "memchr" >/dev/null 2>&1; then + : +else + rm -rf conftest* + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. + +cat >conftest.$ac_ext <<EOF +#line 4184 "configure" +#include "confdefs.h" +#include <stdlib.h> + +EOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + egrep "free" >/dev/null 2>&1; then + : +else + rm -rf conftest* + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. +if test "$cross_compiling" = yes; then + : +else + cat >conftest.$ac_ext <<EOF +#line 4206 "configure" +#include "confdefs.h" +#include <ctype.h> +#if ((' ' & 0x0FF) == 0x020) +# define ISLOWER(c) ('a' <= (c) && (c) <= 'z') +# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) +#else +# define ISLOWER(c) (('a' <= (c) && (c) <= 'i') \ + || ('j' <= (c) && (c) <= 'r') \ + || ('s' <= (c) && (c) <= 'z')) +# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) +#endif + +#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) +int +main () +{ + int i; + for (i = 0; i < 256; i++) + if (XOR (islower (i), ISLOWER (i)) + || toupper (i) != TOUPPER (i)) + exit(2); + exit (0); +} +EOF +if { (eval echo configure:4231: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null +then + : +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -fr conftest* + ac_cv_header_stdc=no +fi +rm -fr conftest* + +fi + +fi +fi +echo "$ECHO_T""$ac_cv_header_stdc" 1>&6 +if test $ac_cv_header_stdc = yes; then + cat >>confdefs.h <<\EOF +#define STDC_HEADERS 1 +EOF + +fi + + for ac_header in string.h +do +ac_ac_Header=`echo "ac_cv_header_$ac_header" | $ac_tr_sh` +echo $ECHO_N "checking for $ac_header... $ECHO_C" 1>&6 +echo "configure:4258: checking for $ac_header" 1>&5 +if eval "test \"\${$ac_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" 1>&6 +else + +cat >conftest.$ac_ext <<EOF +#line 4264 "configure" +#include "confdefs.h" +#include <$ac_header> + +EOF +ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" +{ (eval echo configure:4270: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` +if test -z "$ac_err"; then + rm -rf conftest* + eval "$ac_ac_Header=yes" +else + echo "$ac_err" >&5 + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + eval "$ac_ac_Header=no" +fi +rm -f conftest* +fi +echo "$ECHO_T""`eval echo '${'$ac_ac_Header'}'`" 1>&6 +if test `eval echo '${'$ac_ac_Header'}'` = yes; then + cat >>confdefs.h <<EOF +#define `echo "HAVE_$ac_header" | $ac_tr_cpp` 1 +EOF + +fi +done + +fi + +syntax= +# For now, we use the old switch for setting syntax. +# FIXME: Remove when conversion to .asm is completed. +case "${target}" in + m680[234]0*-*-linuxaout* | m68k*-*-linuxaout* | \ + m68k-next-nextstep* | \ + m68000*-*-*) + syntax=mit + ;; + m680[234]0*-*-linux* | m68k*-*-linux*) + syntax=elf + ;; + m680[234]0*-*-* | m68k*-*-*) + syntax=mit + ;; +esac + +# Now build an asm-syntax.h file for targets that include that from the +# assembly files. +# FIXME: Remove when conversion to .asm is completed. +case "${family}-${underscore}-${asm_align}-${syntax}" in + m68k-yes-log-mit) + echo '#define MIT_SYNTAX' >asm-syntax.h + cat $srcdir/mpn/underscore.h >>asm-syntax.h + echo '#include "'$srcdir'/mpn/m68k/syntax.h"' >>asm-syntax.h;; + m68k-no-nolog-elf) + echo '#define ELF_SYNTAX' >asm-syntax.h + echo '#define C_SYMBOL_NAME(name) name' >>asm-syntax.h + echo '#include "'$srcdir'/mpn/m68k/syntax.h"' >>asm-syntax.h;; +esac + +# The pattern here tests for an absolute path the same way as +# _AC_OUTPUT_FILES in autoconf acgeneral.m4. + +echo "dnl CONFIG_TOP_SRCDIR is a path from the mpn builddir to the top srcdir" >> $gmp_tmpconfigm4 + +case "$srcdir" in +[\\/]* | ?:[\\/]* ) + +echo "define(<CONFIG_TOP_SRCDIR>,<\`$srcdir'>)" >> $gmp_tmpconfigm4 + ;; +*) + +echo "define(<CONFIG_TOP_SRCDIR>,<\`../$srcdir'>)" >> $gmp_tmpconfigm4 + ;; +esac + +echo "include(CONFIG_TOP_SRCDIR\`/mpn/asm-defs.m4')" >> $gmp_tmpconfigm4p + +# Must be after asm-defs.m4 + +echo "define_not_for_expansion(\`HAVE_TARGET_CPU_$target_cpu')" >> $gmp_tmpconfigm4p + +case "$target" in + alpha*-cray-unicos*) + gmp_m4postinc="alpha/unicos.m4" + ;; + alpha*-*-*) + gmp_m4postinc="alpha/default.m4" + ;; + power*-*-*) + case "$target" in + *-*-mach* | *-*-rhapsody* | *-*-nextstep* | *-*-darwin* | *-*-macosx*) + ;; # these use non-conventional assembly syntax. + powerpc64-*-aix*) + gmp_m4postinc="powerpc32/regmap.m4 powerpc64/aix.m4" + ;; + *-*-aix*) + gmp_m4postinc="powerpc32/regmap.m4 powerpc32/aix.m4" + ;; + *) + gmp_m4postinc="powerpc32/regmap.m4" + ;; + esac + ;; +esac + +for tmp_f in $gmp_m4postinc; do + +echo "include_mpn(\`$tmp_f')" >> $gmp_tmpconfigm4p + +done + +# Set up `gmp_links'. It's a list of link:file pairs that configure will +# process to create link -> file. +gmp_links= + +# If the user specified `MPN_PATH', use that instead of the path we've +# come up with. +if test -z "$MPN_PATH"; then + path="$path generic" +else + path="$MPN_PATH" +fi + +# Pick the correct source files in $path and link them to mpn/. +# $gmp_mpn_functions lists all functions we need. +# +# The rule is to find a file with the function name and a .asm, .S, +# .s, or .c extension. Certain multi-function files with special names +# can provide some functions too. (mpn/Makefile.am passes +# -DOPERATION_<func> to get them to generate the right code.) + +# FIXME: udiv and umul aren't in $gmp_mpn_functions_optional yet since +# there's some versions of those files which should be checked for bit +# rot first. Put them in $extra_functions for each target for now, +# change to standard optionals when all are ready. + +# Note: The following lines defining $gmp_mpn_functions_optional +# and $gmp_mpn_functions are parsed by the "macos/configure" +# Perl script. So if you change the lines in a major way +# make sure to run and examine the output from +# +# % (cd macos; perl configure) + +gmp_mpn_functions_optional="copyi copyd com_n \ + and_n andn_n nand_n ior_n iorn_n nior_n xor_n xnor_n" + +gmp_mpn_functions="${extra_functions} inlines add_n sub_n mul_1 addmul_1 \ + submul_1 lshift rshift diveby3 divrem divrem_1 divrem_2 \ + mod_1 mod_1_rs pre_mod_1 dump \ + mul mul_fft mul_n mul_basecase sqr_basecase random \ + random2 sqrtrem get_str set_str scan0 scan1 popcount hamdist cmp perfsqr \ + bdivmod gcd_1 gcd gcdext tdiv_qr bz_divrem_n sb_divrem_mn jacbase \ + $gmp_mpn_functions_optional" + +# the list of all object files used by mpn/Makefile.in and the +# top-level Makefile.in, respectively +mpn_objects= +mpn_objs_in_libgmp="mpn/mp_bases.lo" + +# SLPJ trace +echo "Peering at file structure (takes a while)..." 1>&6 + +for tmp_fn in ${gmp_mpn_functions} ; do +# SLPJ trace + echo "...$tmp_fn..." 1>&6 + +# This line was +# rm -f mpn/${tmp_fn}.[Ssc] mpn/${tmp_fn}.asm +# but I found that on my NT workstation the command +# would unpredictably hang. rm wasn't an active process, +# but absolutlely nothing was happening. +# I *think* that expanding the [Ssc] cures the problem +# SLPJ May 01 + rm -f mpn/${tmp_fn}.S mpn/${tmp_fn}.s mpn/${tmp_fn}.c mpn/${tmp_fn}.asm + + echo "...$tmp_fn (done rm)..." 1>&6 + + # functions that can be provided by multi-function files + tmp_mulfunc= + case $tmp_fn in + add_n|sub_n) tmp_mulfunc="aors_n" ;; + addmul_1|submul_1) tmp_mulfunc="aorsmul_1" ;; + popcount|hamdist) tmp_mulfunc="popham" ;; + and_n|andn_n|nand_n | ior_n|iorn_n|nior_n | xor_n|xnor_n) + tmp_mulfunc="logops_n" ;; + esac + + found=no + for tmp_dir in $path; do + +# SLPJ trace +# We get stuck sometimes + echo " ...dir $tmp_dir..." 1>&6 + for tmp_base in $tmp_fn $tmp_mulfunc; do + +# SLPJ trace +# We get stuck sometimes + echo " ...base $tmp_base..." 1>&6 + for tmp_ext in asm S s c; do + tmp_file=$srcdir/mpn/$tmp_dir/$tmp_base.$tmp_ext + +# SLPJ trace +# We get stuck sometimes + echo " ...$tmp_file..." 1>&6 + + if test -f $tmp_file; then + found=yes + + mpn_objects="$mpn_objects ${tmp_fn}.lo" + mpn_objs_in_libgmp="$mpn_objs_in_libgmp mpn/${tmp_fn}.lo" + gmp_links="$gmp_links mpn/$tmp_fn.$tmp_ext:mpn/$tmp_dir/$tmp_base.$tmp_ext" + + # duplicate AC_DEFINEs are harmless, so it doesn't matter + # that multi-function files get grepped here repeatedly + gmp_ep="` + sed -n 's/^[ ]*MULFUNC_PROLOGUE(\(.*\))/\1/p' $tmp_file ; + sed -n 's/^[ ]*PROLOGUE.*(\(.*\))/\1/p' $tmp_file + `" + for gmp_tmp in $gmp_ep; do + cat >>confdefs.h <<EOF +#define HAVE_NATIVE_${gmp_tmp} 1 +EOF + + done + + break + fi + done + if test $found = yes; then break ; fi + done + if test $found = yes; then break ; fi + done + + if test $found = no; then + for tmp_optional in $gmp_mpn_functions_optional; do + if test $tmp_optional = $tmp_fn; then + found=yes + fi + done + if test $found = no; then + { echo "configure: error: no version of $tmp_fn found in path: $path" 1>&2; exit 1; } + fi + fi +done + + +# Create link for gmp-mparam.h. + +# SLPJ trace +echo "Creating link for gmp-mparam.h..." 1>&6 + +for tmp_dir in $path ; do + rm -f gmp-mparam.h + if test -f $srcdir/mpn/${tmp_dir}/gmp-mparam.h ; then + gmp_links="$gmp_links gmp-mparam.h:mpn/${tmp_dir}/gmp-mparam.h" + + # Copy any KARATSUBA_SQR_THRESHOLD in gmp-mparam.h to config.m4. + # Some versions of sqr_basecase.asm use this. + tmp_gmp_karatsuba_sqr_threshold="`sed -n 's/^#define KARATSUBA_SQR_THRESHOLD[ ]*\([0-9][0-9]*\).*$/\1/p' $srcdir/mpn/${tmp_dir}/gmp-mparam.h`" + if test -n "$tmp_gmp_karatsuba_sqr_threshold"; then + +echo "define(<KARATSUBA_SQR_THRESHOLD>,<$tmp_gmp_karatsuba_sqr_threshold>)" >> $gmp_tmpconfigm4 + + fi + + break + fi +done + +# SLPJ trace +echo "Digging out links to include in DISTCLEANFILES..." 1>&6 + +# Dig out the links from `gmp_links' for inclusion in DISTCLEANFILES. +gmp_srclinks= +for f in $gmp_links; do + gmp_srclinks="$gmp_srclinks `echo $f | sed 's/\(.*\):.*/\1/'`" +done + +echo "creating $gmp_configm4" +echo "dnl $gmp_configm4. Generated automatically by configure." > $gmp_configm4 +if test -f $gmp_tmpconfigm4; then + echo "changequote(<,>)dnl" >> $gmp_configm4 + echo "ifdef(<__CONFIG_M4_INCLUDED__>,,<" >> $gmp_configm4 + cat $gmp_tmpconfigm4 >> $gmp_configm4 + echo ">)" >> $gmp_configm4 + echo "changequote(\`,')dnl" >> $gmp_configm4 + rm $gmp_tmpconfigm4 +fi +echo "ifdef(\`__CONFIG_M4_INCLUDED__',,\`" >> $gmp_configm4 +if test -f $gmp_tmpconfigm4i; then + cat $gmp_tmpconfigm4i >> $gmp_configm4 + rm $gmp_tmpconfigm4i +fi +if test -f $gmp_tmpconfigm4p; then + cat $gmp_tmpconfigm4p >> $gmp_configm4 + rm $gmp_tmpconfigm4p +fi +echo "')" >> $gmp_configm4 +echo "define(\`__CONFIG_M4_INCLUDED__')" >> $gmp_configm4 + +trap '' 1 2 15 +cat >confcache <<\EOF +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs. It is not useful on other systems. +# If it contains results you don't want to keep, you may remove or edit it. +# +# By default, configure uses ./config.cache as the cache file, +# creating it if it does not exist already. You can give configure +# the --cache-file=FILE option to use a different cache file; that is +# what configure does when it calls configure scripts in +# subdirectories, so they share the cache. +# Giving --cache-file=/dev/null disables caching, for debugging configure. +# config.status only pays attention to the cache file if you give it the +# --recheck option to rerun configure. +# +EOF +# The following way of writing the cache mishandles newlines in values, +# but we know of no workaround that is simple, portable, and efficient. +# So, don't put newlines in cache variables' values. +# Ultrix sh set writes to stderr and can't be redirected directly, +# and sets the high bit in the cache file unless we assign to the vars. +(set) 2>&1 | + case `(ac_space=' '; set | grep ac_space) 2>&1` in + *ac_space=\ *) + # `set' does not quote correctly, so add quotes (double-quote substitution + # turns \\\\ into \\, and sed turns \\ into \). + sed -n \ + -e "s/'/'\\\\''/g" \ + -e "s/^\\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\\)=\\(.*\\)/\\1=\${\\1='\\2'}/p" + ;; + *) + # `set' quotes correctly as required by POSIX, so do not add quotes. + sed -n -e 's/^\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\)=\(.*\)/\1=${\1=\2}/p' + ;; + esac >>confcache +if cmp -s $cache_file confcache; then :; else + if test -w $cache_file; then + echo "updating cache $cache_file" + cat confcache >$cache_file + else + echo "not updating unwritable cache $cache_file" + fi +fi +rm -f confcache + +trap 'rm -fr conftest* confdefs* core core.* *.core $ac_clean_files; exit 1' 1 2 15 + +test "x$prefix" = xNONE && prefix=$ac_default_prefix +# Let make expand exec_prefix. +test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' + +# Any assignment to VPATH causes Sun make to only execute +# the first set of double-colon rules, so remove it if not needed. +# If there is a colon in the path, we need to keep it. +if test "x$srcdir" = x.; then + ac_vpsub='/^[ ]*VPATH[ ]*=[^:]*$/d' +fi + +DEFS=-DHAVE_CONFIG_H + +: ${CONFIG_STATUS=./config.status} +trap 'rm -f $CONFIG_STATUS conftest*; exit 1' 1 2 15 +echo creating $CONFIG_STATUS +cat >$CONFIG_STATUS <<EOF +#! /bin/sh +# Generated automatically by configure. +# Run this file to recreate the current configuration. +# This directory was configured as follows, +# on host `(hostname || uname -n) 2>/dev/null | sed 1q`: +# +# $0 $ac_configure_args +# +# Compiler output produced by configure, useful for debugging +# configure, is in ./config.log if it exists. + +# Files that config.status was made for. +config_files="\\ + Makefile mpn/Makefile mpz/Makefile" +config_headers="\\ + config.h:config.in" +config_links="\\ + $gmp_links" +config_commands="\\ + default-1" + +ac_cs_usage="\\ +\\\`$CONFIG_STATUS' instantiates files from templates according to the +current configuration. + +Usage: $CONFIG_STATUS [OPTIONS] FILE... + + --recheck Update $CONFIG_STATUS by reconfiguring in the same conditions + --version Print the version of Autoconf and exit + --help Display this help and exit + --file=FILE[:TEMPLATE] + Instantiate the configuration file FILE + --header=FILE[:TEMPLATE] + Instantiate the configuration header FILE + +Configuration files: +\$config_files + +Configuration headers: +\$config_headers + +Configuration links: +\$config_links + +Configuration commands: +\$config_commands + +Report bugs to <bug-autoconf@gnu.org>." + +ac_cs_version="\\ +$CONFIG_STATUS generated by autoconf version 2.14a. +Configured on host `(hostname || uname -n) 2>/dev/null | sed 1q` by + `echo "$0 $ac_configure_args" | sed 's/[\\"\`\$]/\\\\&/g'`" + +# Root of the tmp file names. Use pid to allow concurrent executions. +ac_cs_root=cs\$\$ +ac_given_srcdir=$srcdir +ac_given_INSTALL="$INSTALL" + +# If no file are specified by the user, then we need to provide default +# value. By we need to know if files were specified by the user. +ac_need_defaults=: +while test \$# != 0 +do + case "\$1" in + --*=*) + ac_option=\`echo "\$1" | sed -e 's/=.*//'\` + ac_optarg=\`echo "\$1" | sed -e 's/[^=]*=//'\` + shift + set dummy "\$ac_option" "\$ac_optarg" \${1+"\$@"} + shift + ;; + -*);; + *) # This is not an option, so the user has probably given explicit + # arguments. + ac_need_defaults=false;; + esac + + case "\$1" in + + # Handling of the options. + -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) + echo "running \${CONFIG_SHELL-/bin/sh} $0 `echo "$ac_configure_args" | sed 's/[\\"\`\$]/\\\\&/g'` --no-create --no-recursion" + exec \${CONFIG_SHELL-/bin/sh} $0 $ac_configure_args --no-create --no-recursion ;; + -version | --version | --versio | --versi | --vers | --ver | --ve | --v) + echo "\$ac_cs_version"; exit 0 ;; + --he | --h) + # Conflict between --help and --header + echo "$CONFIG_STATUS: ambiguous option: \$ac_option +Try \\\`$CONFIG_STATUS --help' for more information."; exit 1 ;; + -help | --help | --hel ) + echo "\$ac_cs_usage"; exit 0 ;; + --file | --fil | --fi | --f ) + shift + CONFIG_FILES="\$CONFIG_FILES \$1" + ac_need_defaults=false;; + --header | --heade | --head | --hea ) + shift + CONFIG_HEADERS="\$CONFIG_FILES \$1" + ac_need_defaults=false;; + + # Handling of arguments. + 'Makefile' ) CONFIG_FILES="\$CONFIG_FILES Makefile" ;; + 'mpz/Makefile' ) CONFIG_FILES="\$CONFIG_FILES mpz/Makefile" ;; + 'mpn/Makefile' ) CONFIG_FILES="\$CONFIG_FILES mpn/Makefile" ;; + '$gmp_links' ) CONFIG_LINKS="\$CONFIG_LINKS $gmp_links" ;; + 'default-1' ) CONFIG_COMMANDS="\$CONFIG_COMMANDS default-1" ;; + 'config.h' ) CONFIG_HEADERS="\$CONFIG_HEADERS config.h:config.in" ;; + + # This is an error. + -*) echo "$CONFIG_STATUS: unrecognized option: \$1 +Try \\\`$CONFIG_STATUS --help' for more information."; exit 1 ;; + *) echo "$CONFIG_STATUS: invalid argument: \$1"; exit 1 ;; + esac + shift +done + +EOF + +cat >>$CONFIG_STATUS <<\EOF +# If the user did not use the arguments to specify the items to instantiate, +# then the envvar interface is used. Set only those that are not. +if $ac_need_defaults; then + : ${CONFIG_FILES=$config_files} + : ${CONFIG_HEADERS=$config_headers} + : ${CONFIG_LINKS=$config_links} + : ${CONFIG_COMMANDS=$config_commands} +fi + +# Trap to remove the temp files. +trap 'rm -fr $ac_cs_root*; exit 1' 1 2 15 + +EOF + +cat >>$CONFIG_STATUS <<EOF +# +# INIT-COMMANDS section. +# + +EOF + +cat >>$CONFIG_STATUS <<EOF + +# +# CONFIG_FILES section. +# + +# No need to generate the scripts if there are no CONFIG_FILES. +# This happens for instance when ./config.status config.h +if test -n "\$CONFIG_FILES"; then + # Protect against being on the right side of a sed subst in config.status. + sed 's/%@/@@/; s/@%/@@/; s/%;t t\$/@;t t/; /@;t t\$/s/[\\\\&%]/\\\\&/g; + s/@@/%@/; s/@@/@%/; s/@;t t\$/%;t t/' >\$ac_cs_root.subs <<\\CEOF +s%@exec_prefix@%$exec_prefix%;t t +s%@prefix@%$prefix%;t t +s%@program_transform_name@%$program_transform_name%;t t +s%@bindir@%$bindir%;t t +s%@sbindir@%$sbindir%;t t +s%@libexecdir@%$libexecdir%;t t +s%@datadir@%$datadir%;t t +s%@sysconfdir@%$sysconfdir%;t t +s%@sharedstatedir@%$sharedstatedir%;t t +s%@localstatedir@%$localstatedir%;t t +s%@libdir@%$libdir%;t t +s%@includedir@%$includedir%;t t +s%@oldincludedir@%$oldincludedir%;t t +s%@infodir@%$infodir%;t t +s%@mandir@%$mandir%;t t +s%@SHELL@%$SHELL%;t t +s%@ECHO_C@%$ECHO_C%;t t +s%@ECHO_N@%$ECHO_N%;t t +s%@ECHO_T@%$ECHO_T%;t t +s%@CFLAGS@%$CFLAGS%;t t +s%@CPPFLAGS@%$CPPFLAGS%;t t +s%@CXXFLAGS@%$CXXFLAGS%;t t +s%@FFLAGS@%$FFLAGS%;t t +s%@DEFS@%$DEFS%;t t +s%@LDFLAGS@%$LDFLAGS%;t t +s%@LIBS@%$LIBS%;t t +s%@host@%$host%;t t +s%@host_alias@%$host_alias%;t t +s%@host_cpu@%$host_cpu%;t t +s%@host_vendor@%$host_vendor%;t t +s%@host_os@%$host_os%;t t +s%@target@%$target%;t t +s%@target_alias@%$target_alias%;t t +s%@target_cpu@%$target_cpu%;t t +s%@target_vendor@%$target_vendor%;t t +s%@target_os@%$target_os%;t t +s%@build@%$build%;t t +s%@build_alias@%$build_alias%;t t +s%@build_cpu@%$build_cpu%;t t +s%@build_vendor@%$build_vendor%;t t +s%@build_os@%$build_os%;t t +s%@INSTALL_PROGRAM@%$INSTALL_PROGRAM%;t t +s%@INSTALL_SCRIPT@%$INSTALL_SCRIPT%;t t +s%@INSTALL_DATA@%$INSTALL_DATA%;t t +s%@PACKAGE@%$PACKAGE%;t t +s%@VERSION@%$VERSION%;t t +s%@ACLOCAL@%$ACLOCAL%;t t +s%@AUTOCONF@%$AUTOCONF%;t t +s%@AUTOMAKE@%$AUTOMAKE%;t t +s%@AUTOHEADER@%$AUTOHEADER%;t t +s%@MAKEINFO@%$MAKEINFO%;t t +s%@AMTAR@%$AMTAR%;t t +s%@install_sh@%$install_sh%;t t +s%@AWK@%$AWK%;t t +s%@SET_MAKE@%$SET_MAKE%;t t +s%@AMDEP@%$AMDEP%;t t +s%@AMDEPBACKSLASH@%$AMDEPBACKSLASH%;t t +s%@DEPDIR@%$DEPDIR%;t t +s%@MAINTAINER_MODE_TRUE@%$MAINTAINER_MODE_TRUE%;t t +s%@MAINTAINER_MODE_FALSE@%$MAINTAINER_MODE_FALSE%;t t +s%@MAINT@%$MAINT%;t t +s%@WANT_MPBSD_TRUE@%$WANT_MPBSD_TRUE%;t t +s%@WANT_MPBSD_FALSE@%$WANT_MPBSD_FALSE%;t t +s%@WANT_MPFR_TRUE@%$WANT_MPFR_TRUE%;t t +s%@WANT_MPFR_FALSE@%$WANT_MPFR_FALSE%;t t +s%@CC@%$CC%;t t +s%@CCAS@%$CCAS%;t t +s%@CPP@%$CPP%;t t +s%@LN_S@%$LN_S%;t t +s%@M4@%$M4%;t t +s%@AR@%$AR%;t t +s%@CALLING_CONVENTIONS_OBJS@%$CALLING_CONVENTIONS_OBJS%;t t +s%@SPEED_CYCLECOUNTER_OBJS@%$SPEED_CYCLECOUNTER_OBJS%;t t +s%@EXEEXT@%$EXEEXT%;t t +s%@OBJEXT@%$OBJEXT%;t t +s%@RANLIB@%$RANLIB%;t t +s%@STRIP@%$STRIP%;t t +s%@LIBTOOL@%$LIBTOOL%;t t +s%@U@%$U%;t t +s%@ANSI2KNR@%$ANSI2KNR%;t t +s%@mpn_objects@%$mpn_objects%;t t +s%@mpn_objs_in_libgmp@%$mpn_objs_in_libgmp%;t t +s%@gmp_srclinks@%$gmp_srclinks%;t t +CEOF + +EOF + + cat >>$CONFIG_STATUS <<\EOF + # Split the substitutions into bite-sized pieces for seds with + # small command number limits, like on Digital OSF/1 and HP-UX. + ac_max_sed_lines=48 + ac_sed_frag=1 # Number of current file. + ac_beg=1 # First line for current file. + ac_end=$ac_max_sed_lines # Line after last line for current file. + ac_more_lines=: + ac_sed_cmds="" + while $ac_more_lines; do + if test $ac_beg -gt 1; then + sed "1,${ac_beg}d; ${ac_end}q" $ac_cs_root.subs >$ac_cs_root.sfrag + else + sed "${ac_end}q" $ac_cs_root.subs >$ac_cs_root.sfrag + fi + if test ! -s $ac_cs_root.sfrag; then + ac_more_lines=false + rm -f $ac_cs_root.sfrag + else + # The purpose of the label and of the branching condition is to + # speed up the sed processing (if there are no `@' at all, there + # is no need to browse any of the substitutions). + # These are the two extra sed commands mentioned above. + (echo ':t + /@[a-zA-Z_][a-zA-Z_0-9]*@/!b' && cat $ac_cs_root.sfrag) >$ac_cs_root.s$ac_sed_frag + if test -z "$ac_sed_cmds"; then + ac_sed_cmds="sed -f $ac_cs_root.s$ac_sed_frag" + else + ac_sed_cmds="$ac_sed_cmds | sed -f $ac_cs_root.s$ac_sed_frag" + fi + ac_sed_frag=`expr $ac_sed_frag + 1` + ac_beg=$ac_end + ac_end=`expr $ac_end + $ac_max_sed_lines` + fi + done + if test -z "$ac_sed_cmds"; then + ac_sed_cmds=cat + fi +fi # test -n "$CONFIG_FILES" + +EOF +cat >>$CONFIG_STATUS <<\EOF +for ac_file in .. $CONFIG_FILES; do if test "x$ac_file" != x..; then + # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in". + case "$ac_file" in + *:*) ac_file_in=`echo "$ac_file" | sed 's%[^:]*:%%'` + ac_file=`echo "$ac_file" | sed 's%:.*%%'` ;; + *) ac_file_in="${ac_file}.in" ;; + esac + + # Adjust a relative srcdir, top_srcdir, and INSTALL for subdirectories. + + # Remove last slash and all that follows it. Not all systems have dirname. + ac_dir=`echo "$ac_file" | sed 's%/[^/][^/]*$%%'` + if test "$ac_dir" != "$ac_file" && test "$ac_dir" != .; then + # The file is in a subdirectory. + test ! -d "$ac_dir" && mkdir "$ac_dir" + ac_dir_suffix="/`echo $ac_dir|sed 's%^\./%%'`" + # A "../" for each directory in $ac_dir_suffix. + ac_dots=`echo "$ac_dir_suffix" | sed 's%/[^/]*%../%g'` + else + ac_dir_suffix= ac_dots= + fi + + case "$ac_given_srcdir" in + .) srcdir=. + if test -z "$ac_dots"; then top_srcdir=. + else top_srcdir=`echo $ac_dots | sed 's%/$%%'`; fi ;; + [\\/]* | ?:[\\/]* ) + srcdir="$ac_given_srcdir$ac_dir_suffix"; + top_srcdir=$ac_given_srcdir ;; + *) # Relative path. + srcdir="$ac_dots$ac_given_srcdir$ac_dir_suffix" + top_srcdir="$ac_dots$ac_given_srcdir" ;; + esac + + case "$ac_given_INSTALL" in + [\\/$]* | ?:[\\/]* ) INSTALL="$ac_given_INSTALL" ;; + *) INSTALL="$ac_dots$ac_given_INSTALL" ;; + esac + + echo creating "$ac_file" + rm -f "$ac_file" + configure_input="Generated automatically from `echo $ac_file_in | + sed 's%.*/%%'` by configure." + case "$ac_file" in + *[Mm]akefile*) ac_comsub="1i\\ +# $configure_input" ;; + *) ac_comsub= ;; + esac + + # Don't redirect the output to AC_FILE directly: use `mv' so that updating + # is atomic, and doesn't need trapping. + ac_file_inputs=`echo "$ac_file_in" | + sed -e "s%:% $ac_given_srcdir/%g;s%^%$ac_given_srcdir/%"` + for ac_file_input in $ac_file_inputs; + do + test -f "$ac_file_input" || + { echo "configure: error: cannot find input file \`$ac_file_input'" 1>&2; exit 1; } + done +EOF +cat >>$CONFIG_STATUS <<EOF + sed -e "$ac_comsub +$ac_vpsub +$extrasub +EOF +cat >>$CONFIG_STATUS <<\EOF +:t +/@[a-zA-Z_][a-zA-Z_0-9]*@/!b +s%@configure_input@%$configure_input%;t t +s%@srcdir@%$srcdir%;t t +s%@top_srcdir@%$top_srcdir%;t t +s%@INSTALL@%$INSTALL%;t t +" $ac_file_inputs | (eval "$ac_sed_cmds") >$ac_cs_root.out + mv $ac_cs_root.out $ac_file + +fi; done +rm -f $ac_cs_root.s* +EOF +cat >>$CONFIG_STATUS <<\EOF + +# +# CONFIG_HEADER section. +# + +# These sed commands are passed to sed as "A NAME B NAME C VALUE D", where +# NAME is the cpp macro being defined and VALUE is the value it is being given. +# +# ac_d sets the value in "#define NAME VALUE" lines. +ac_dA='s%^\([ ]*\)#\([ ]*define[ ][ ]*\)' +ac_dB='[ ].*$%\1#\2' +ac_dC=' ' +ac_dD='%;t' +# ac_u turns "#undef NAME" without trailing blanks into "#define NAME VALUE". +ac_uA='s%^\([ ]*\)#\([ ]*\)undef\([ ][ ]*\)' +ac_uB='$%\1#\2define\3' +ac_uC=' ' +ac_uD='%;t' + +for ac_file in .. $CONFIG_HEADERS; do if test "x$ac_file" != x..; then + # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in". + case "$ac_file" in + *:*) ac_file_in=`echo "$ac_file" | sed 's%[^:]*:%%'` + ac_file=`echo "$ac_file" | sed 's%:.*%%'` ;; + *) ac_file_in="${ac_file}.in" ;; + esac + + echo creating $ac_file + + rm -f $ac_cs_root.frag $ac_cs_root.in $ac_cs_root.out + ac_file_inputs=`echo "$ac_file_in" | + sed -e "s%:% $ac_given_srcdir/%g;s%^%$ac_given_srcdir/%"` + for ac_file_input in $ac_file_inputs; + do + test -f "$ac_file_input" || + { echo "configure: error: cannot find input file \`$ac_file_input'" 1>&2; exit 1; } + done + # Remove the trailing spaces. + sed -e 's/[ ]*$//' $ac_file_inputs >$ac_cs_root.in + +EOF + +# Transform confdefs.h into two sed scripts, `conftest.defines' and +# `conftest.undefs', that substitutes the proper values into +# config.h.in to produce config.h. The first handles `#define' +# templates, and the second `#undef' templates. +# And first: Protect against being on the right side of a sed subst in +# config.status. Protect against being in an unquoted here document +# in config.status. +rm -f conftest.defines conftest.undefs +ac_cs_root=conftest +cat >$ac_cs_root.hdr <<\EOF +s/[\\&%]/\\&/g +s%[\\$`]%\\&%g +t clear +: clear +s%^[ ]*#[ ]*define[ ][ ]*\(\([^ (][^ (]*\)([^)]*)\)[ ]*\(.*\)$%${ac_dA}\2${ac_dB}\1${ac_dC}\3${ac_dD}%gp +t cleanup +s%^[ ]*#[ ]*define[ ][ ]*\([^ ][^ ]*\)[ ]*\(.*\)$%${ac_dA}\1${ac_dB}\1${ac_dC}\2${ac_dD}%gp +: cleanup +EOF +# If some macros were called several times there might be several times +# the same #defines, which is useless. Nevertheless, we may not want to +# sort them, since we want the *last* AC_DEFINE to be honored. +uniq confdefs.h | sed -n -f $ac_cs_root.hdr >conftest.defines +sed -e 's/ac_d/ac_u/g' conftest.defines >conftest.undefs +rm -f $ac_cs_root.hdr + +# This sed command replaces #undef with comments. This is necessary, for +# example, in the case of _POSIX_SOURCE, which is predefined and required +# on some systems where configure will not decide to define it. +cat >>conftest.undefs <<\EOF +s%^[ ]*#[ ]*undef[ ][ ]*[a-zA-Z_][a-zA-Z_0-9]*%/* & */% +EOF + +# Break up conftest.defines because some shells have a limit on the size +# of here documents, and old seds have small limits too (100 cmds). +echo ' # Handle all the #define templates only if necessary.' >>$CONFIG_STATUS +echo ' if egrep "^[ ]*#[ ]*define" $ac_cs_root.in >/dev/null; then' >>$CONFIG_STATUS +echo ' # If there are no defines, we may have an empty if/fi' >>$CONFIG_STATUS +echo ' :' >>$CONFIG_STATUS +rm -f conftest.tail +while grep . conftest.defines >/dev/null +do + # Write a limited-size here document to $ac_cs_root.frag. + echo ' cat >$ac_cs_root.frag <<CEOF' >>$CONFIG_STATUS + echo '/^[ ]*#[ ]*define/!b' >>$CONFIG_STATUS + sed ${ac_max_here_lines}q conftest.defines >>$CONFIG_STATUS + echo 'CEOF + sed -f $ac_cs_root.frag $ac_cs_root.in >$ac_cs_root.out + rm -f $ac_cs_root.in + mv $ac_cs_root.out $ac_cs_root.in +' >>$CONFIG_STATUS + sed 1,${ac_max_here_lines}d conftest.defines >conftest.tail + rm -f conftest.defines + mv conftest.tail conftest.defines +done +rm -f conftest.defines +echo ' fi # egrep' >>$CONFIG_STATUS +echo >>$CONFIG_STATUS + +# Break up conftest.undefs because some shells have a limit on the size +# of here documents, and old seds have small limits too (100 cmds). +echo ' # Handle all the #undef templates' >>$CONFIG_STATUS +rm -f conftest.tail +while grep . conftest.undefs >/dev/null +do + # Write a limited-size here document to $ac_cs_root.frag. + echo ' cat >$ac_cs_root.frag <<CEOF' >>$CONFIG_STATUS + echo '/^[ ]*#[ ]*undef/!b' >>$CONFIG_STATUS + sed ${ac_max_here_lines}q conftest.undefs >>$CONFIG_STATUS + echo 'CEOF + sed -f $ac_cs_root.frag $ac_cs_root.in >$ac_cs_root.out + rm -f $ac_cs_root.in + mv $ac_cs_root.out $ac_cs_root.in +' >>$CONFIG_STATUS + sed 1,${ac_max_here_lines}d conftest.undefs >conftest.tail + rm -f conftest.undefs + mv conftest.tail conftest.undefs +done +rm -f conftest.undefs + +cat >>$CONFIG_STATUS <<\EOF + rm -f $ac_cs_root.frag $ac_cs_root.h + echo "/* $ac_file. Generated automatically by configure. */" >$ac_cs_root.h + cat $ac_cs_root.in >>$ac_cs_root.h + rm -f $ac_cs_root.in + if cmp -s $ac_file $ac_cs_root.h 2>/dev/null; then + echo "$ac_file is unchanged" + rm -f $ac_cs_root.h + else + # Remove last slash and all that follows it. Not all systems have dirname. + ac_dir=`echo "$ac_file" | sed 's%/[^/][^/]*$%%'` + if test "$ac_dir" != "$ac_file" && test "$ac_dir" != .; then + # The file is in a subdirectory. + test ! -d "$ac_dir" && mkdir "$ac_dir" + fi + rm -f $ac_file + mv $ac_cs_root.h $ac_file + fi +fi; done +EOF +cat >>$CONFIG_STATUS <<\EOF + +# +# CONFIG_LINKS section. +# +srcdir=$ac_given_srcdir + +for ac_file in : $CONFIG_LINKS; do if test "x$ac_file" != x:; then + ac_dest=`echo "$ac_file" | sed 's%:.*%%'` + ac_source=`echo "$ac_file" | sed 's%[^:]*:%%'` + + echo "copying $srcdir/$ac_source to $ac_dest" + + if test ! -r $srcdir/$ac_source; then + { echo "configure: error: $srcdir/$ac_source: File not found" 1>&2; exit 1; } + fi + rm -f $ac_dest + + # Make relative symlinks. + # Remove last slash and all that follows it. Not all systems have dirname. + ac_dest_dir=`echo $ac_dest | sed 's%/[^/][^/]*$%%'` + if test "$ac_dest_dir" != "$ac_dest" && test "$ac_dest_dir" != .; then + # The dest file is in a subdirectory. + test ! -d "$ac_dest_dir" && mkdir "$ac_dest_dir" + ac_dest_dir_suffix="/`echo $ac_dest_dir|sed 's%^\./%%'`" + # A "../" for each directory in $ac_dest_dir_suffix. + ac_dots=`echo $ac_dest_dir_suffix|sed 's%/[^/]*%../%g'` + else + ac_dest_dir_suffix= ac_dots= + fi + + case "$srcdir" in + [\\/$]* | ?:[\\/]* ) ac_rel_source="$srcdir/$ac_source" ;; + *) ac_rel_source="$ac_dots$srcdir/$ac_source" ;; + esac + + # Note: Dodgy local mods to 'make things work' in an environment (cygwin) + # that supports symlinks (through silly hack) using tools that don't + # understand them (mingw). The end sometimes justifies the means, son. + # + # Make a symlink if possible; otherwise try a hard link. + #if ln -s $ac_rel_source $ac_dest 2>/dev/null || + # ln $srcdir/$ac_source $ac_dest; then : + # + # Note: If the -p offends your 'cp', just drop it; no harm done, you'll just + # get more recompilations. + # + if cp -p $srcdir/$ac_source $ac_dest; then : + else + { echo "configure: error: cannot copy $ac_dest to $srcdir/$ac_source" 1>&2; exit 1; } + fi +fi; done +EOF +cat >>$CONFIG_STATUS <<\EOF + +# +# CONFIG_COMMANDS section. +# +for ac_file in .. $CONFIG_COMMANDS; do if test "x$ac_file" != x..; then + ac_dest=`echo "$ac_file" | sed 's%:.*%%'` + ac_source=`echo "$ac_file" | sed 's%[^:]*:%%'` + + case "$ac_dest" in + default-1 ) test -z "$CONFIG_HEADERS" || echo timestamp > stamp-h ;; + esac +fi;done +EOF + +cat >>$CONFIG_STATUS <<\EOF + +exit 0 +EOF +chmod +x $CONFIG_STATUS +rm -fr confdefs* $ac_clean_files +trap 'exit 1' 1 2 15 + +test "$no_create" = yes || $SHELL $CONFIG_STATUS || exit 1 diff --git a/rts/gmp/configure.in b/rts/gmp/configure.in new file mode 100644 index 0000000000..18f610fe29 --- /dev/null +++ b/rts/gmp/configure.in @@ -0,0 +1,950 @@ +dnl Process this file with autoconf to produce a configure script. + + +dnl Copyright (C) 1996, 1997, 1998, 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + +AC_REVISION($Revision: 1.8 $)dnl +AC_PREREQ(2.14)dnl +AC_INIT(gmp-impl.h) + +dnl Check system. +AC_CANONICAL_SYSTEM + +dnl Automake +AM_INIT_AUTOMAKE(gmp, GMP_VERSION) +AM_CONFIG_HEADER(config.h:config.in) +AM_MAINTAINER_MODE + +dnl GMP specific +GMP_INIT(config.m4) + + +AC_ARG_ENABLE(assert, +AC_HELP_STRING([--enable-assert],[enable ASSERT checking [default=no]]), +[case "${enableval}" in +yes|no) ;; +*) AC_MSG_ERROR([bad value ${enableval} for --enable-assert, need yes or no]) ;; +esac], +[enable_assert=no]) + +if test "$enable_assert" = "yes"; then + AC_DEFINE(WANT_ASSERT,1, + [./configure --enable-assert option, to enable some ASSERT()s]) +fi + + +AC_ARG_ENABLE(alloca, +AC_HELP_STRING([--enable-alloca],[use alloca for temp space [default=yes]]), +[case "${enableval}" in +yes|no) ;; +*) AC_MSG_ERROR([bad value ${enableval} for --enable-alloca, need yes or no]) ;; +esac], +[enable_alloca=yes]) + +if test "$enable_alloca" = "no"; then + AC_DEFINE(USE_STACK_ALLOC,1, + [./configure --disable-alloca option, to use stack-alloc.c, not alloca]) +fi + + +AC_ARG_ENABLE(fft, +AC_HELP_STRING([--enable-fft],[enable FFTs for multiplication [default=no]]), +[case "${enableval}" in +yes|no) ;; +*) AC_MSG_ERROR([bad value ${enableval} for --enable-fft, need yes or no]) ;; +esac], +[enable_fft=no]) + +if test "$enable_fft" = "yes"; then + AC_DEFINE(WANT_FFT,1, + [./configure --enable-fft option, to enable FFTs for multiplication]) +fi + + +AC_ARG_ENABLE(mpbsd, +AC_HELP_STRING([--enable-mpbsd],[build Berkley MP compatibility library [default=no]]), +[case "${enableval}" in +yes|no) ;; +*) AC_MSG_ERROR([bad value ${enableval} for --enable-mpbsd, need yes or no]) ;; +esac], +[enable_mpbsd=no]) +AM_CONDITIONAL(WANT_MPBSD, test "$enable_mpbsd" = "yes") + + +AC_ARG_ENABLE(mpfr, +AC_HELP_STRING([--enable-mpfr],[build MPFR [default=no]]), +[case "${enableval}" in +yes|no) ;; +*) AC_MSG_ERROR([bad value ${enableval} for --enable-mpfr, need yes or no]) ;; +esac], +[enable_mpfr=no]) +AM_CONDITIONAL(WANT_MPFR, test "$enable_mpfr" = "yes") + + +dnl Switch on OS and determine what compiler to use. +dnl +dnl os_64bit Set to "yes" if OS is 64-bit capable. +dnl FIXME: Rename to `check_64bit_compiler'! +dnl cclist List of compilers, best first. +dnl gmp_cflags_{cc} Flags for compiler named {cc}. +dnl gmp_cflags64_{cc} Flags for compiler named {cc} for 64-bit code. +dnl gmp_optcflags_{cc} Optional compiler flags. +dnl gmp_xoptcflags_{cc} Exclusive optional compiler flags. +dnl +os_64bit="no" +cclist="gcc cc" # FIXME: Prefer c89 to cc. +gmp_cflags_gcc="-g -O2" +gmp_cflags64_gcc="-g -O2" +gmp_cflags_cc="-g" +gmp_cflags64_cc="-g" + +case "$target" in + # Alpha + alpha*-cray-unicos*) + # Don't perform any assembly syntax tests on this beast. + gmp_no_asm_syntax_testing=yes + cclist=cc + gmp_cflags_cc="$gmp_cflags_cc -O" + ;; + alpha*-*-osf*) + flavour=`echo $target_cpu | sed 's/^alpha//g'` + if test -n "$flavour"; then + case $flavour in # compilers don't seem to understand `ev67' and such. + ev6? | ev7*) flavour=ev6;; + esac + gmp_optcflags_gcc="-mcpu=$flavour" + # FIXME: We shouldn't fail fatally if none of these work, but that's + # how xoptcflags work and we don't have any other mechanism right now. + # Why do we need this here and not for alpha*-*-* below? + gmp_xoptcflags_gcc="-Wa,-arch,${flavour} -Wa,-m${flavour}" + gmp_optcflags_cc="-arch $flavour -tune $flavour" + fi + ;; + alpha*-*-*) + cclist="gcc" + flavour=`echo $target_cpu | sed 's/^alpha//g'` + if test -n "$flavour"; then + case $flavour in + ev6? | ev7*) flavour=ev6;; + esac + gmp_optcflags_gcc="-mcpu=$flavour" + fi + ;; + # Cray vector machines. This must come after alpha* so that we can + # recognize present and future vector processors with a wildcard. + *-cray-unicos*) + # Don't perform any assembly syntax tests on this beast. + gmp_no_asm_syntax_testing=yes + cclist=cc + # Don't inherit default gmp_cflags_cc value; it comes with -g which + # disables all optimization on Cray vector systems + gmp_cflags_cc="-O" + ;; + + # AMD and Intel x86 configurations + [i?86*-*-* | k[5-8]*-*-* | pentium*-*-* | athlon-*-*]) + # Rumour has it -O2 used to give worse register allocation than just -O. + gmp_cflags_gcc="-g -O -fomit-frame-pointer" + + case "${target}" in + i386*-*-*) gmp_optcflags_gcc="-mcpu=i386 -march=i386";; + i486*-*-*) gmp_optcflags_gcc="-mcpu=i486 -march=i486";; + i586*-*-* | pentium-*-* | pentiummmx-*-*) + gmp_optcflags_gcc="-mcpu=pentium -march=pentium";; + + # -march=pentiumpro not used because mpz/powm.c (swox cvs rev 1.4) + # tickles a bug in gcc 2.95.2 (believed fixed in 2.96). + [i686*-*-* | pentiumpro-*-* | pentium[23]-*-*]) + gmp_optcflags_gcc="-mcpu=pentiumpro";; + + k6*-*-*) gmp_optcflags_gcc="-mcpu=k6 -march=k6";; + + # Athlon instruction costs are close to p6: 3 cycle load latency, 4-6 + # cycle mul, 40 cycle div, pairable adc, ... + # FIXME: Change this when gcc gets something specific for Athlon. + # -march=pentiumpro not used, per i686 above. + athlon-*-*) gmp_optcflags_gcc="-mcpu=pentiumpro";; + esac + ;; + + # Sparc + [ultrasparc*-*-solaris2.[7-9] | sparcv9-*-solaris2.[7-9]]) + os_64bit=yes + gmp_cflags_gcc="$gmp_cflags_gcc -Wa,-xarch=v8plus" + gmp_xoptcflags_gcc="-mcpu=v9 -mcpu=v8 -mv8" + gmp_cflags64_gcc="$gmp_cflags64_gcc -m64 -mptr64 -Wa,-xarch=v9 -mcpu=v9" + gmp_cflags_cc="-xtarget=native -xarch=v8 -xO4" + gmp_cflags64_cc="-xtarget=native -xarch=v9 -xO4" + ;; + sparc64-*-linux*) + # Need to think more about the options passed here. This isn't good for + # some sparc64 linux distros, since we end up not optimizing when all the + # options below fail. + os_64bit=yes + gmp_cflags64_gcc="$gmp_cflags64_gcc -m64 -mptr64 -Wa,-xarch=v9 -mcpu=v9" + gmp_cflags_gcc="$gmp_cflags_gcc -m32" + gmp_xoptflags_gcc="-mcpu=ultrasparc -mvis" + ;; + ultrasparc*-*-* | sparcv9-*-*) + gmp_cflags_gcc="$gmp_cflags_gcc -Wa,-xarch=v8plus" + gmp_xoptcflags_gcc="-mcpu=v9 -mcpu=v8 -mv8" + gmp_cflags_cc="-xtarget=native -xarch=v8 -xO4" + ;; + sparcv8*-*-solaris2.* | microsparc*-*-solaris2.*) + gmp_cflags_gcc="$gmp_cflags_gcc" + gmp_xoptcflags_gcc="-mcpu=v8 -mv8" + gmp_cflags_cc="-xtarget=native -xarch=v8 -xO4" + ;; + sparcv8*-*-* | microsparc*-*-*) # SunOS, Linux, *BSD + cclist="gcc acc cc" + gmp_cflags_gcc="$gmp_cflags_gcc" + gmp_xoptcflags_gcc="-mcpu=v8 -mv8" + gmp_cflags_acc="-g -O2 -cg92" + gmp_cflags_cc="-O2" # FIXME: Flag for v8? + ;; + supersparc*-*-solaris2.*) + gmp_cflags_gcc="$gmp_cflags_gcc -DSUPERSPARC" + gmp_xoptcflags_gcc="-mcpu=v8 -mv8" + gmp_cflags_cc="-xtarget=native -xarch=v8 -xO4 -DSUPERSPARC" + ;; + supersparc*-*-*) # SunOS, Linux, *BSD + cclist="gcc acc cc" + gmp_cflags_gcc="$gmp_cflags_gcc -DSUPERSPARC" + gmp_xoptcflags_gcc="-mcpu=v8 -mv8" + gmp_cflags_acc="-g -O2 -cg92 -DSUPERSPARC" + gmp_cflags_cc="-O2 -DSUPERSPARC" # FIXME: Flag for v8? + ;; + *sparc*-*-*) + cclist="gcc acc cc" + gmp_cflags_acc="-g -O2" + gmp_cflags_cc="-g -O2" + ;; + + # POWER/PowerPC + powerpc64-*-aix*) + cclist="gcc xlc" + gmp_cflags_gcc="$gmp_cflags_gcc -maix64 -mpowerpc64" + gmp_cflags_xlc="-g -O2 -q64 -qtune=pwr3" + ;; + powerpc*-*-aix*) + cclist="gcc xlc" + gmp_cflags_gcc="$gmp_cflags_gcc -mpowerpc" + gmp_cflags_xlc="$gmp_cflags_cc -qarch=ppc -O2" + ;; + power-*-aix*) + cclist="gcc xlc" + gmp_cflags_gcc="$gmp_cflags_gcc -mpower" + gmp_cflags_xlc="$gmp_cflags_cc -qarch=pwr -O2" + ;; + powerpc64*-*-*) + gmp_cflags_gcc="$gmp_cflags_gcc -mpowerpc64" + AC_DEFINE(_LONG_LONG_LIMB) dnl FIXME: Remove. + ;; + powerpc-apple-darwin* | powerpc-apple-macosx*) + gmp_cflags_gcc="$gmp_cflags_gcc -mpowerpc -traditional-cpp" + ;; + powerpc*-*-*) + gmp_cflags_gcc="$gmp_cflags_gcc -mpowerpc" + ;; + + # MIPS + mips-sgi-irix6.*) + os_64bit=yes + gmp_cflags64_gcc="-g -O2 -mabi=n32" + gmp_cflags64_cc="$gmp_cflags64_cc -O2 -n32" + ;; + + # Motorola 68k family + m88110*-*-*) + gmp_cflags_gcc="-g -O -m88110" dnl FIXME: Use `-O2'? + ;; + m68*-*-*) + gmp_cflags_gcc="$gmp_cflags_gcc -fomit-frame-pointer" + ;; + + # HP + hppa1.0*-*-*) + cclist="gcc c89 cc" + gmp_cflags_c89="$gmp_cflags_cc +O2" + gmp_cflags_cc="$gmp_cflags_cc +O2" + ;; + hppa2.0w*-*-*) + cclist="c89 cc" + gmp_cflags_c89="+DD64 +O3" + gmp_cflags_cc="+DD64 +O3" + ;; + hppa2.0*-*-*) + os_64bit=yes + cclist="gcc c89 cc" + gmp_cflags64_gcc="$gmp_cflags64_gcc -mWHAT -D_LONG_LONG_LIMB" + # +O2 to cc triggers bug in mpz/powm.c (1.4) + gmp_cflags64_c89="+DA2.0 +e +O3 -D_LONG_LONG_LIMB" + gmp_cflags64_cc="+DA2.0 +e +O3 -D_LONG_LONG_LIMB" + gmp_cflags_c89="$gmp_cflags_cc +O2" + gmp_cflags_cc="$gmp_cflags_cc +O2" + ;; + + # VAX + vax*-*-*) + gmp_cflags_gcc="$gmp_cflags_gcc -fomit-frame-pointer" + ;; + + # Fujitsu + [f30[01]-fujitsu-sysv*]) + cclist="gcc vcc" + gmp_cflags_vcc="-g" # FIXME: flags for vcc? + ;; +esac + +case "${target}" in + *-*-mingw32) gmp_cflags_gcc="$gmp_cflags_gcc -mno-cygwin";; +esac + +dnl Check for programs needed by macros for finding compiler. +dnl More programs are checked for below, when a compiler is found. +AC_PROG_NM dnl Macro from Libtool. +# nm on 64-bit AIX needs to know the object file format +case "$target" in + powerpc64*-*-aix*) + NM="$NM -X 64" + ;; +esac + +# Save CFLAGS given on command line. +gmp_user_CFLAGS="$CFLAGS" + +if test -z "$CC"; then + # Find compiler. + GMP_PROG_CC_FIND($cclist, $os_64bit) + + # If 64-bit OS and we have a 64-bit compiler, use it. + if test -n "$os_64bit" && test -n "$CC64"; then + CC=$CC64 + CFLAGS=$CFLAGS64 + else + eval CFLAGS=\$gmp_cflags_$CC + fi + + # Try compiler flags that may work with only some compiler versions. + # gmp_optcflags: All or nothing. + eval optcflags=\$gmp_optcflags_$CC + if test -n "$optcflags"; then + CFLAGS_save="$CFLAGS" + CFLAGS="$CFLAGS $optcflags" + AC_MSG_CHECKING([whether $CC accepts $optcflags]) + AC_LANG_C + AC_TRY_COMPILER([int main(){return(0);}], optok, cross) + if test "$optok" = "yes"; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + CFLAGS="$CFLAGS_save" + fi + fi + # gmp_xoptcflags: First is best, one has to work. + eval xoptcflags=\$gmp_xoptcflags_$CC + if test -n "$xoptcflags"; then + gmp_found="no" + for xopt in $xoptcflags; do + CFLAGS_save="$CFLAGS" + CFLAGS="$CFLAGS $xopt" + AC_MSG_CHECKING([whether $CC accepts $xopt]) + AC_LANG_C + AC_TRY_COMPILER([int main(){return(0);}], optok, cross) + if test "$optok" = "yes"; then + AC_MSG_RESULT([yes]) + gmp_found="yes" + break + else + AC_MSG_RESULT([no]) + CFLAGS="$CFLAGS_save" + fi + done + if test "$gmp_found" = "no"; then + echo ["$0: fatal: need a compiler that understands one of $xoptcflags"] + exit 1 + fi + fi +fi + +# Restore CFLAGS given on command line. +# FIXME: We've run through quite some unnecessary code looking for a +# nice compiler and working flags for it, just to spoil that with user +# supplied flags. +test -n "$gmp_user_CFLAGS" && CFLAGS="$gmp_user_CFLAGS" + +# Select chosen compiler. +GMP_PROG_CC_SELECT + +# How to assemble. +CCAS="$CC -c" +AC_SUBST(CCAS) + +dnl Checks for programs. +dnl -------------------- +AC_PROG_CPP +AC_PROG_INSTALL +AC_PROG_LN_S +GMP_PROG_M4 +AC_CHECK_PROG(AR, ar, ar) +# ar on AIX needs to know the object file format +case "$target" in + powerpc64*-*-aix*) + AR="$AR -X 64" + ;; +esac +dnl FIXME: Find good ld? /usr/ucb/ld on Solaris won't work. + +dnl Checks for assembly syntax. +if test "$gmp_no_asm_syntax_testing" != "yes"; then + GMP_CHECK_ASM_TEXT + GMP_CHECK_ASM_DATA + GMP_CHECK_ASM_GLOBL + GMP_CHECK_ASM_LABEL_SUFFIX + GMP_CHECK_ASM_TYPE + GMP_CHECK_ASM_SIZE + GMP_CHECK_ASM_LSYM_PREFIX + GMP_CHECK_ASM_W32 + GMP_CHECK_ASM_UNDERSCORE(underscore=yes, underscore=no) + GMP_CHECK_ASM_ALIGN_LOG(asm_align=log, asm_align=nolog) +fi + +dnl FIXME: Check for FPU and set `floating_point' appropriately. + +dnl ======================================== +dnl Configuring mpn. +dnl ---------------------------------------- +dnl Set the following target specific variables: +dnl path where to search for source files +dnl family processor family (Needed for building +dnl asm-syntax.h for now. FIXME: Remove.) +dnl extra_functions extra functions + +family=generic + +case ${target} in + arm*-*-*) + path="arm" + ;; + [sparcv9*-*-solaris2.[789]* | sparc64*-*-solaris2.[789]* | ultrasparc*-*-solaris2.[789]*]) + if test -n "$CC64" + then path="sparc64" + else path="sparc32/v9 sparc32/v8 sparc32" + fi + ;; + sparc64-*-linux*) + if test -n "$CC64" + then path="sparc64" + else path="sparc32/v9 sparc32/v8 sparc32" + fi + ;; + sparcv8*-*-* | microsparc*-*-*) + path="sparc32/v8 sparc32" + if test x${floating_point} = xno + then extra_functions="udiv_nfp" + else extra_functions="udiv_fp" + fi + ;; + sparcv9*-*-* | ultrasparc*-*-*) + path="sparc32/v9 sparc32/v8 sparc32" + extra_functions="udiv_fp" + ;; + supersparc*-*-*) + path="sparc32/v8/supersparc sparc32/v8 sparc32" + extra_functions="udiv" + ;; + sparc*-*-*) path="sparc32" + if test x${floating_point} = xno + then extra_functions="udiv_nfp" + else extra_functions="udiv_fp" + fi + ;; + hppa7000*-*-*) + path="hppa/hppa1_1 hppa" + extra_functions="udiv_qrnnd" + ;; + hppa1.0*-*-*) + path="hppa" + extra_functions="udiv_qrnnd" + ;; + hppa2.0w-*-*) + path="pa64w" + extra_functions="umul_ppmm udiv_qrnnd" + ;; + hppa2.0*-*-*) + if test -n "$CC64"; then + path="pa64" + extra_functions="umul_ppmm udiv_qrnnd" + # We need to use the system compiler, or actually the system assembler, + # since GAS has not been ported to understand the 2.0 instructions. + CCAS="$CC64 -c" + else + # FIXME: path should be "hppa/hppa2_0 hppa/hppa1_1 hppa" + path="hppa/hppa1_1 hppa" + extra_functions="udiv_qrnnd" + fi + ;; + hppa*-*-*) #assume pa7100 + path="hppa/hppa1_1/pa7100 hppa/hppa1_1 hppa" + extra_functions="udiv_qrnnd";; + [f30[01]-fujitsu-sysv*]) + path=fujitsu;; + alphaev6*-*-*) path="alpha/ev6 alpha"; extra_functions="invert_limb cntlz";; + alphaev5*-*-*) path="alpha/ev5 alpha"; extra_functions="invert_limb cntlz";; + alpha*-*-*) path="alpha"; extra_functions="invert_limb cntlz";; + # Cray vector machines. This must come after alpha* so that we can + # recognize present and future vector processors with a wildcard. + *-cray-unicos*) + path="cray" + extra_functions="mulww";; + am29000*-*-*) path="a29k";; + a29k*-*-*) path="a29k";; + + # AMD and Intel x86 configurations + + [i?86*-*-* | k[5-8]*-*-* | pentium*-*-* | athlon-*-*]) + gmp_m4postinc="x86/x86-defs.m4" + extra_functions="udiv umul" + CALLING_CONVENTIONS_OBJS="x86call.o x86check.o" + + GMP_CHECK_ASM_SHLDL_CL( + [GMP_DEFINE(WANT_SHLDL_CL,1)], + [GMP_DEFINE(WANT_SHLDL_CL,0)]) + GMP_CHECK_ASM_ALIGN_FILL_0x90 + + # the CPUs below wanting to know about mmx + case ${target} in + [pentiummmx-*-* | pentium[23]-*-* | k6*-*-* | athlon-*-*]) + GMP_CHECK_ASM_MMX(tmp_mmx=yes, tmp_mmx=no) + ;; + esac + + # default for anything not otherwise mentioned + path="x86" + + case ${target} in + [i[34]86*-*-*]) + path="x86" + ;; + k5*-*-*) + # don't know what best suits k5 + path="x86" + ;; + i586*-*-* | pentium-*-*) + path="x86/pentium x86" + ;; + pentiummmx-*-*) + path="x86/pentium x86" + if test "$tmp_mmx" = yes; then + path="x86/pentium/mmx $path" + fi + ;; + i686*-*-* | pentiumpro-*-*) + path="x86/p6 x86" + ;; + pentium2-*-*) + path="x86/p6 x86" + # The pentium/mmx lshift and rshift are good on p6 and can be used + # until there's something specific for p6. + if test "$tmp_mmx" = yes; then + path="x86/p6/mmx x86/pentium/mmx $path" + fi + ;; + pentium3-*-*) + path="x86/p6 x86" + # The pentium/mmx lshift and rshift are good on p6 and can be used + # until there's something specific for p6. + if test "$tmp_mmx" = yes; then + path="x86/p6/p3mmx x86/p6/mmx x86/pentium/mmx $path" + fi + ;; + [k6[23]*-*-*]) + path="x86/k6 x86" + if test "$tmp_mmx" = yes; then + path="x86/k6/k62mmx x86/k6/mmx $path" + fi + ;; + k6*-*-*) + path="x86/k6 x86" + if test "$tmp_mmx" = yes; then + path="x86/k6/mmx $path" + fi + ;; + athlon-*-*) + path="x86/k7 x86" + if test "$tmp_mmx" = yes; then + path="x86/k7/mmx $path" + fi + ;; + esac + ;; + + + i960*-*-*) path="i960";; + + ia64*-*-*) path="ia64";; + +# Motorola 68k configurations. Let m68k mean 68020-68040. + [m680[234]0*-*-* | m68k*-*-* | \ + m68*-next-nextstep*]) # Nexts are at least '020 + path="m68k/mc68020 m68k" + family=m68k + ;; + m68000*-*-*) + path="m68k" + family=m68k + ;; + + m88k*-*-* | m88k*-*-*) path="m88k";; + m88110*-*-*) path="m88k/mc88110 m88k";; + ns32k*-*-*) path="ns32k";; + + pyramid-*-*) path="pyr";; + + ppc601-*-*) path="power powerpc32";; + powerpc64*-*-*) path="powerpc64";; + powerpc*-*-*) path="powerpc32";; + rs6000-*-* | power-*-* | power2-*-*) + path="power" + extra_functions="udiv_w_sdiv" + ;; + + sh-*-*) path="sh";; + sh2-*-*) path="sh/sh2 sh";; + + [mips[34]*-*-*]) path="mips3";; + mips*-*-irix6*) path="mips3";; + mips*-*-*) path="mips2";; + + vax*-*-*) path="vax"; extra_functions="udiv_w_sdiv";; + + z8000x*-*-*) path="z8000x"; extra_functions="udiv_w_sdiv";; + z8000*-*-*) path="z8000"; extra_functions="udiv_w_sdiv";; + + clipper*-*-*) path="clipper";; +esac + +AC_SUBST(CALLING_CONVENTIONS_OBJS) +if test -n "$CALLING_CONVENTIONS_OBJS"; then + AC_DEFINE(HAVE_CALLING_CONVENTIONS,1, + [Define if mpn/tests has calling conventions checking for the CPU]) +fi + + +case ${target} in + [i[5-8]86*-*-* | k[5-8]*-*-* | pentium*-*-* | athlon-*-*]) + # rdtsc is in pentium and up, not in i386 and i486 + SPEED_CYCLECOUNTER_OBJS=pentium.lo + ;; + alpha*-*-*) + SPEED_CYCLECOUNTER_OBJS=alpha.lo + ;; + sparcv9*-*-* | ultrasparc*-*-* | sparc64*-*-*) + SPEED_CYCLECOUNTER_OBJS=sparcv9.lo + ;; + hppa2*-*-*) + SPEED_CYCLECOUNTER_OBJS=hppa2.lo + ;; + hppa*-*-*) + SPEED_CYCLECOUNTER_OBJS=hppa.lo + ;; +esac + +AC_SUBST(SPEED_CYCLECOUNTER_OBJS) + +if test -n "$SPEED_CYCLECOUNTER_OBJS" +then + AC_DEFINE(HAVE_SPEED_CYCLECOUNTER, 1, + [Define if a speed_cyclecounter exists (for the tune programs)]) +fi + + +dnl Extensions for executable and object files. +dnl ------------------------------------------- +AC_EXEEXT +AC_OBJEXT + +dnl Use Libtool. +dnl ------------ +dnl FIXME: Shared libs seem to fail on aix4.3. +dnl FIXME: Should invoke [AC_DISABLE_SHARED], but m4 recurses to death. +case "$target" in + [*-*-aix4.[3-9]*]) enable_shared=no ;; +esac +AC_PROG_LIBTOOL + +dnl Checks for libraries. +dnl --------------------- +AC_CHECK_DECLS((optarg)) + +dnl Checks for header files. +dnl ------------------------ +AC_HEADER_STDC +AC_CHECK_HEADERS(getopt.h unistd.h sys/sysctl.h sys/time.h) + +dnl Checks for typedefs, structures, and compiler characteristics. +dnl -------------------------------------------------------------- +AC_CHECK_TYPES((void)) +AC_C_STRINGIZE + +dnl Checks for library functions. +dnl ----------------------------- +dnl Most of these are only for the benefit of supplementary programs. The +dnl library itself doesn't use anything weird. +dnl AC_FUNC_MEMCMP +dnl AC_TYPE_SIGNAL +dnl AC_CHECK_FUNCS(strtol) +AC_CHECK_FUNCS(getopt_long getpagesize popen processor_info strtoul sysconf sysctlbyname) + +dnl Trick automake into thinking we've run AM_C_PROTOTYPES which it wants +dnl for ansi2knr, and instead use our own test. (It's only a warning +dnl automake prints, but it's good to suppress it.) +ifelse(0,1,[ +AM_C_PROTOTYPES +]) +GMP_C_ANSI2KNR + + +dnl Set `syntax' to one of <blank>, "mit", "elf", "aix", "macho". +syntax= +# For now, we use the old switch for setting syntax. +# FIXME: Remove when conversion to .asm is completed. +changequote(,)dnl +case "${target}" in + m680[234]0*-*-linuxaout* | m68k*-*-linuxaout* | \ + m68k-next-nextstep* | \ + m68000*-*-*) + syntax=mit + ;; + m680[234]0*-*-linux* | m68k*-*-linux*) + syntax=elf + ;; + m680[234]0*-*-* | m68k*-*-*) + syntax=mit + ;; +esac +changequote([,])dnl + +dnl ---------------------------------------- +# Now build an asm-syntax.h file for targets that include that from the +# assembly files. +# FIXME: Remove when conversion to .asm is completed. +case "${family}-${underscore}-${asm_align}-${syntax}" in + m68k-yes-log-mit) + echo '#define MIT_SYNTAX' >asm-syntax.h + cat $srcdir/mpn/underscore.h >>asm-syntax.h + echo '#include "'$srcdir'/mpn/m68k/syntax.h"' >>asm-syntax.h;; + m68k-no-nolog-elf) + echo '#define ELF_SYNTAX' >asm-syntax.h + echo '#define C_SYMBOL_NAME(name) name' >>asm-syntax.h + echo '#include "'$srcdir'/mpn/m68k/syntax.h"' >>asm-syntax.h;; +esac + + +# The pattern here tests for an absolute path the same way as +# _AC_OUTPUT_FILES in autoconf acgeneral.m4. +GMP_DEFINE_RAW(["dnl CONFIG_TOP_SRCDIR is a path from the mpn builddir to the top srcdir"]) +case "$srcdir" in +[[\\/]]* | ?:[[\\/]]* ) + GMP_DEFINE_RAW(["define(<CONFIG_TOP_SRCDIR>,<\`$srcdir'>)"]) ;; +*) GMP_DEFINE_RAW(["define(<CONFIG_TOP_SRCDIR>,<\`../$srcdir'>)"]) ;; +esac + +GMP_DEFINE_RAW(["include(CONFIG_TOP_SRCDIR\`/mpn/asm-defs.m4')"], POST) + +# Must be after asm-defs.m4 +GMP_DEFINE_RAW("define_not_for_expansion(\`HAVE_TARGET_CPU_$target_cpu')", POST) + + +dnl config.m4 post-includes +dnl ----------------------- +dnl (Note x86 post include set with $path above.) +changequote(,)dnl +case "$target" in + alpha*-cray-unicos*) + gmp_m4postinc="alpha/unicos.m4" + ;; + alpha*-*-*) + gmp_m4postinc="alpha/default.m4" + ;; + power*-*-*) + case "$target" in + *-*-mach* | *-*-rhapsody* | *-*-nextstep* | *-*-darwin* | *-*-macosx*) + ;; # these use non-conventional assembly syntax. + powerpc64-*-aix*) + gmp_m4postinc="powerpc32/regmap.m4 powerpc64/aix.m4" + ;; + *-*-aix*) + gmp_m4postinc="powerpc32/regmap.m4 powerpc32/aix.m4" + ;; + *) + gmp_m4postinc="powerpc32/regmap.m4" + ;; + esac + ;; +esac +changequote([, ])dnl + +for tmp_f in $gmp_m4postinc; do + GMP_DEFINE_RAW(["include_mpn(\`$tmp_f')"], POST) +done + + +# Set up `gmp_links'. It's a list of link:file pairs that configure will +# process to create link -> file. +gmp_links= + +# If the user specified `MPN_PATH', use that instead of the path we've +# come up with. +if test -z "$MPN_PATH"; then + path="$path generic" +else + path="$MPN_PATH" +fi + +# Pick the correct source files in $path and link them to mpn/. +# $gmp_mpn_functions lists all functions we need. +# +# The rule is to find a file with the function name and a .asm, .S, +# .s, or .c extension. Certain multi-function files with special names +# can provide some functions too. (mpn/Makefile.am passes +# -DOPERATION_<func> to get them to generate the right code.) + +# FIXME: udiv and umul aren't in $gmp_mpn_functions_optional yet since +# there's some versions of those files which should be checked for bit +# rot first. Put them in $extra_functions for each target for now, +# change to standard optionals when all are ready. + +# Note: The following lines defining $gmp_mpn_functions_optional +# and $gmp_mpn_functions are parsed by the "macos/configure" +# Perl script. So if you change the lines in a major way +# make sure to run and examine the output from +# +# % (cd macos; perl configure) + +gmp_mpn_functions_optional="copyi copyd com_n \ + and_n andn_n nand_n ior_n iorn_n nior_n xor_n xnor_n" + +gmp_mpn_functions="${extra_functions} inlines add_n sub_n mul_1 addmul_1 \ + submul_1 lshift rshift diveby3 divrem divrem_1 divrem_2 \ + mod_1 mod_1_rs pre_mod_1 dump \ + mul mul_fft mul_n mul_basecase sqr_basecase random \ + random2 sqrtrem get_str set_str scan0 scan1 popcount hamdist cmp perfsqr \ + bdivmod gcd_1 gcd gcdext tdiv_qr bz_divrem_n sb_divrem_mn jacbase \ + $gmp_mpn_functions_optional" + +# the list of all object files used by mpn/Makefile.in and the +# top-level Makefile.in, respectively +mpn_objects= +mpn_objs_in_libgmp="mpn/mp_bases.lo" + +for tmp_fn in ${gmp_mpn_functions} ; do + [rm -f mpn/${tmp_fn}.[Ssc] mpn/${tmp_fn}.asm] + + # functions that can be provided by multi-function files + tmp_mulfunc= + case $tmp_fn in + add_n|sub_n) tmp_mulfunc="aors_n" ;; + addmul_1|submul_1) tmp_mulfunc="aorsmul_1" ;; + popcount|hamdist) tmp_mulfunc="popham" ;; + and_n|andn_n|nand_n | ior_n|iorn_n|nior_n | xor_n|xnor_n) + tmp_mulfunc="logops_n" ;; + esac + + found=no + for tmp_dir in $path; do + for tmp_base in $tmp_fn $tmp_mulfunc; do + for tmp_ext in asm S s c; do + tmp_file=$srcdir/mpn/$tmp_dir/$tmp_base.$tmp_ext + if test -f $tmp_file; then + found=yes + + mpn_objects="$mpn_objects ${tmp_fn}.lo" + mpn_objs_in_libgmp="$mpn_objs_in_libgmp mpn/${tmp_fn}.lo" + gmp_links="$gmp_links mpn/$tmp_fn.$tmp_ext:mpn/$tmp_dir/$tmp_base.$tmp_ext" + + # duplicate AC_DEFINEs are harmless, so it doesn't matter + # that multi-function files get grepped here repeatedly + gmp_ep=["` + sed -n 's/^[ ]*MULFUNC_PROLOGUE(\(.*\))/\1/p' $tmp_file ; + sed -n 's/^[ ]*PROLOGUE.*(\(.*\))/\1/p' $tmp_file + `"] + for gmp_tmp in $gmp_ep; do + AC_DEFINE_UNQUOTED(HAVE_NATIVE_${gmp_tmp}) + done + + break + fi + done + if test $found = yes; then break ; fi + done + if test $found = yes; then break ; fi + done + + if test $found = no; then + for tmp_optional in $gmp_mpn_functions_optional; do + if test $tmp_optional = $tmp_fn; then + found=yes + fi + done + if test $found = no; then + AC_MSG_ERROR([no version of $tmp_fn found in path: $path]) + fi + fi +done + +# Create link for gmp-mparam.h. +for tmp_dir in $path ; do + rm -f gmp-mparam.h + if test -f $srcdir/mpn/${tmp_dir}/gmp-mparam.h ; then + gmp_links="$gmp_links gmp-mparam.h:mpn/${tmp_dir}/gmp-mparam.h" + + # Copy any KARATSUBA_SQR_THRESHOLD in gmp-mparam.h to config.m4. + # Some versions of sqr_basecase.asm use this. + tmp_gmp_karatsuba_sqr_threshold="`sed -n 's/^#define KARATSUBA_SQR_THRESHOLD[ ]*\([0-9][0-9]*\).*$/\1/p' $srcdir/mpn/${tmp_dir}/gmp-mparam.h`" + if test -n "$tmp_gmp_karatsuba_sqr_threshold"; then + GMP_DEFINE_RAW(["define(<KARATSUBA_SQR_THRESHOLD>,<$tmp_gmp_karatsuba_sqr_threshold>)"]) + fi + + break + fi +done + +# Dig out the links from `gmp_links' for inclusion in DISTCLEANFILES. +gmp_srclinks= +for f in $gmp_links; do + gmp_srclinks="$gmp_srclinks `echo $f | sed 's/\(.*\):.*/\1/'`" +done + +AC_SUBST(mpn_objects) +AC_SUBST(mpn_objs_in_libgmp) +AC_SUBST(gmp_srclinks) + +dnl ---------------------------------------- +dnl Make links. +AC_CONFIG_LINKS($gmp_links) + +dnl Create config.m4. +GMP_FINISH + +dnl Create Makefiles +dnl FIXME: Upcoming version of autoconf/automake may not like broken lines. +AC_OUTPUT(Makefile mpz/Makefile mpn/Makefile) diff --git a/rts/gmp/depcomp b/rts/gmp/depcomp new file mode 100644 index 0000000000..7906096738 --- /dev/null +++ b/rts/gmp/depcomp @@ -0,0 +1,269 @@ +#! /bin/sh + +# depcomp - compile a program generating dependencies as side-effects +# Copyright (C) 1999 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +# 02111-1307, USA. + +# Originally written by Alexandre Oliva <oliva@dcc.unicamp.br>. + +if test -z "$depmode" || test -z "$source" || test -z "$object"; then + echo "depcomp: Variables source, object and depmode must be set" 1>&2 + exit 1 +fi +# `libtool' can also be set to `yes' or `no'. + +depfile=${depfile-`echo "$object" | sed 's,\([^/]*\)$,.deps/\1,;s/\.\([^.]*\)$/.P\1/'`} +tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`} + +rm -f "$tmpdepfile" + +# Some modes work just like other modes, but use different flags. We +# parameterize here, but still list the modes in the big case below, +# to make depend.m4 easier to write. Note that we *cannot* use a case +# here, because this file can only contain one case statement. +if test "$depmode" = hp; then + # HP compiler uses -M and no extra arg. + gccflag=-M + depmode=gcc +fi + +if test "$depmode" = dashXmstdout; then + # This is just like dashmstdout with a different argument. + dashmflag=-xM + depmode=dashmstdout +fi + +case "$depmode" in +gcc) +## There are various ways to get dependency output from gcc. Here's +## why we pick this rather obscure method: +## - Don't want to use -MD because we'd like the dependencies to end +## up in a subdir. Having to rename by hand is ugly. +## (We might end up doing this anyway to support other compilers.) +## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like +## -MM, not -M (despite what the docs say). +## - Using -M directly means running the compiler twice (even worse +## than renaming). + if test -z "$gccflag"; then + gccflag=-MD, + fi + if "$@" -Wp,"$gccflag$tmpdepfile"; then : + else + stat=$? + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + echo "$object : \\" > "$depfile" + sed 's/^[^:]*: / /' < "$tmpdepfile" >> "$depfile" +## This next piece of magic avoids the `deleted header file' problem. +## The problem is that when a header file which appears in a .P file +## is deleted, the dependency causes make to die (because there is +## typically no way to rebuild the header). We avoid this by adding +## dummy dependencies for each header file. Too bad gcc doesn't do +## this for us directly. + tr ' ' ' +' < "$tmpdepfile" | +## Some versions of gcc put a space before the `:'. On the theory +## that the space means something, we add a space to the output as +## well. +## Some versions of the HPUX 10.20 sed can't process this invocation +## correctly. Breaking it into two sed invocations is a workaround. + sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +hp) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +dashmd) + # The Java front end to gcc doesn't run cpp, so we can't use the -Wp + # trick. Instead we must use -M and then rename the resulting .d + # file. This is also the case for older versions of gcc, which + # don't implement -Wp. + if "$@" -MD; then : + else + stat=$? + rm -f FIXME + exit $stat + fi + FIXME: rewrite the file + ;; + +sgi) + if test "$libtool" = yes; then + "$@" "-Wc,-MDupdate,$tmpdepfile" + else + "$@" -MDupdate "$tmpdepfile" + fi + stat=$? + if test $stat -eq 0; then : + else + stat=$? + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + echo "$object : \\" > "$depfile" + sed 's/^[^:]*: / /' < "$tmpdepfile" >> "$depfile" + tr ' ' ' +' < "$tmpdepfile" | \ +## Some versions of the HPUX 10.20 sed can't process this invocation +## correctly. Breaking it into two sed invocations is a workaround. + sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +#nosideeffect) + # This comment above is used by automake to tell side-effect + # dependency tracking mechanisms from slower ones. + +dashmstdout) + # Important note: in order to support this mode, a compiler *must* + # always write the proprocessed file to stdout, regardless of -o, + # because we must use -o when running libtool. + test -z "$dashmflag" && dashmflag=-M + ( IFS=" " + case " $* " in + *" --mode=compile "*) # this is libtool, let us make it quiet + for arg + do # cycle over the arguments + case "$arg" in + "--mode=compile") + # insert --quiet before "--mode=compile" + set fnord "$@" --quiet + shift # fnord + ;; + esac + set fnord "$@" "$arg" + shift # fnord + shift # "$arg" + done + ;; + esac + "$@" $dashmflag | sed 's:^[^:]*\:[ ]*:'"$object"'\: :' > "$tmpdepfile" + ) & + proc=$! + "$@" + stat=$? + wait "$proc" + if test "$stat" != 0; then exit $stat; fi + rm -f "$depfile" + cat < "$tmpdepfile" > "$depfile" + tr ' ' ' +' < "$tmpdepfile" | \ +## Some versions of the HPUX 10.20 sed can't process this invocation +## correctly. Breaking it into two sed invocations is a workaround. + sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +dashXmstdout) + # This case only exists to satisfy depend.m4. It is never actually + # run, as this mode is specially recognized in the preamble. + exit 1 + ;; + +makedepend) + # X makedepend + ( + shift + cleared=no + for arg in "$@"; do + case $cleared in no) + set ""; shift + cleared=yes + esac + case "$arg" in + -D*|-I*) + set fnord "$@" "$arg"; shift;; + -*) + ;; + *) + set fnord "$@" "$arg"; shift;; + esac + done + obj_suffix="`echo $object | sed 's/^.*\././'`" + touch "$tmpdepfile" + ${MAKEDEPEND-makedepend} 2>/dev/null -o"$obj_suffix" -f"$tmpdepfile" "$@" + ) & + proc=$! + "$@" + stat=$? + wait "$proc" + if test "$stat" != 0; then exit $stat; fi + rm -f "$depfile" + cat < "$tmpdepfile" > "$depfile" + tail +3 "$tmpdepfile" | tr ' ' ' +' | \ +## Some versions of the HPUX 10.20 sed can't process this invocation +## correctly. Breaking it into two sed invocations is a workaround. + sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" "$tmpdepfile".bak + ;; + +cpp) + # Important note: in order to support this mode, a compiler *must* + # always write the proprocessed file to stdout, regardless of -o, + # because we must use -o when running libtool. + ( IFS=" " + case " $* " in + *" --mode=compile "*) + for arg + do # cycle over the arguments + case "$arg" in + "--mode=compile") + # insert --quiet before "--mode=compile" + set fnord "$@" --quiet + shift # fnord + ;; + esac + set fnord "$@" "$arg" + shift # fnord + shift # "$arg" + done + ;; + esac + "$@" -E | + sed -n '/^# [0-9][0-9]* "\([^"]*\)"/ s::'"$object"'\: \1:p' > "$tmpdepfile" + ) & + proc=$! + "$@" + stat=$? + wait "$proc" + if test "$stat" != 0; then exit $stat; fi + rm -f "$depfile" + cat < "$tmpdepfile" > "$depfile" + sed < "$tmpdepfile" -e 's/^[^:]*: //' -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +none) + exec "$@" + ;; + +*) + echo "Unknown depmode $depmode" 1>&2 + exit 1 + ;; +esac + +exit 0 diff --git a/rts/gmp/errno.c b/rts/gmp/errno.c new file mode 100644 index 0000000000..7dd223c19c --- /dev/null +++ b/rts/gmp/errno.c @@ -0,0 +1,26 @@ +/* gmp_errno -- The largest and most complex file in GMP. + +Copyright (C) 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + + +#include "gmp.h" +#include "gmp-impl.h" + +int gmp_errno = 0; diff --git a/rts/gmp/extract-dbl.c b/rts/gmp/extract-dbl.c new file mode 100644 index 0000000000..2d70d9a3b2 --- /dev/null +++ b/rts/gmp/extract-dbl.c @@ -0,0 +1,187 @@ +/* __gmp_extract_double -- convert from double to array of mp_limb_t. + +Copyright (C) 1996, 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +#ifdef XDEBUG +#undef _GMP_IEEE_FLOATS +#endif + +#ifndef _GMP_IEEE_FLOATS +#define _GMP_IEEE_FLOATS 0 +#endif + +/* Extract a non-negative double in d. */ + +int +#if __STDC__ +__gmp_extract_double (mp_ptr rp, double d) +#else +__gmp_extract_double (rp, d) + mp_ptr rp; + double d; +#endif +{ + long exp; + unsigned sc; + mp_limb_t manh, manl; + + /* BUGS + + 1. Should handle Inf and NaN in IEEE specific code. + 2. Handle Inf and NaN also in default code, to avoid hangs. + 3. Generalize to handle all BITS_PER_MP_LIMB >= 32. + 4. This lits is incomplete and misspelled. + */ + + if (d == 0.0) + { + rp[0] = 0; + rp[1] = 0; +#if BITS_PER_MP_LIMB == 32 + rp[2] = 0; +#endif + return 0; + } + +#if _GMP_IEEE_FLOATS + { +#if defined (__alpha) && __GNUC__ == 2 && __GNUC_MINOR__ == 8 + /* Work around alpha-specific bug in GCC 2.8.x. */ + volatile +#endif + union ieee_double_extract x; + x.d = d; + exp = x.s.exp; +#if BITS_PER_MP_LIMB == 64 + manl = (((mp_limb_t) 1 << 63) + | ((mp_limb_t) x.s.manh << 43) | ((mp_limb_t) x.s.manl << 11)); + if (exp == 0) + { + /* Denormalized number. Don't try to be clever about this, + since it is not an important case to make fast. */ + exp = 1; + do + { + manl = manl << 1; + exp--; + } + while ((mp_limb_signed_t) manl >= 0); + } +#else + manh = ((mp_limb_t) 1 << 31) | (x.s.manh << 11) | (x.s.manl >> 21); + manl = x.s.manl << 11; + if (exp == 0) + { + /* Denormalized number. Don't try to be clever about this, + since it is not an important case to make fast. */ + exp = 1; + do + { + manh = (manh << 1) | (manl >> 31); + manl = manl << 1; + exp--; + } + while ((mp_limb_signed_t) manh >= 0); + } +#endif + exp -= 1022; /* Remove IEEE bias. */ + } +#else + { + /* Unknown (or known to be non-IEEE) double format. */ + exp = 0; + if (d >= 1.0) + { + if (d * 0.5 == d) + abort (); + + while (d >= 32768.0) + { + d *= (1.0 / 65536.0); + exp += 16; + } + while (d >= 1.0) + { + d *= 0.5; + exp += 1; + } + } + else if (d < 0.5) + { + while (d < (1.0 / 65536.0)) + { + d *= 65536.0; + exp -= 16; + } + while (d < 0.5) + { + d *= 2.0; + exp -= 1; + } + } + + d *= MP_BASE_AS_DOUBLE; +#if BITS_PER_MP_LIMB == 64 + manl = d; +#else + manh = d; + manl = (d - manh) * MP_BASE_AS_DOUBLE; +#endif + } +#endif + + sc = (unsigned) exp % BITS_PER_MP_LIMB; + + /* We add something here to get rounding right. */ + exp = (exp + 2048) / BITS_PER_MP_LIMB - 2048 / BITS_PER_MP_LIMB + 1; + +#if BITS_PER_MP_LIMB == 64 + if (sc != 0) + { + rp[1] = manl >> (BITS_PER_MP_LIMB - sc); + rp[0] = manl << sc; + } + else + { + rp[1] = manl; + rp[0] = 0; + exp--; + } +#else + if (sc != 0) + { + rp[2] = manh >> (BITS_PER_MP_LIMB - sc); + rp[1] = (manl >> (BITS_PER_MP_LIMB - sc)) | (manh << sc); + rp[0] = manl << sc; + } + else + { + rp[2] = manh; + rp[1] = manl; + rp[0] = 0; + exp--; + } +#endif + + return exp; +} diff --git a/rts/gmp/gmp-impl.h b/rts/gmp/gmp-impl.h new file mode 100644 index 0000000000..3c7ac26e7d --- /dev/null +++ b/rts/gmp/gmp-impl.h @@ -0,0 +1,1072 @@ +/* Include file for internal GNU MP types and definitions. + + THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO + BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES. + +Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000 Free Software +Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "config.h" +#include "gmp-mparam.h" +/* #include "longlong.h" */ + +/* When using gcc, make sure to use its builtin alloca. */ +#if ! defined (alloca) && defined (__GNUC__) +#define alloca __builtin_alloca +#define HAVE_ALLOCA 1 +#endif + +/* When using cc, do whatever necessary to allow use of alloca. For many + machines, this means including alloca.h. IBM's compilers need a #pragma + in "each module that needs to use alloca". */ +#if ! defined (alloca) +/* We need lots of variants for MIPS, to cover all versions and perversions + of OSes for MIPS. */ +#if defined (__mips) || defined (MIPSEL) || defined (MIPSEB) \ + || defined (_MIPSEL) || defined (_MIPSEB) || defined (__sgi) \ + || defined (__alpha) || defined (__sparc) || defined (sparc) \ + || defined (__ksr__) +#include <alloca.h> +#define HAVE_ALLOCA +#endif +#if defined (_IBMR2) +#pragma alloca +#define HAVE_ALLOCA +#endif +#if defined (__DECC) +#define alloca(x) __ALLOCA(x) +#define HAVE_ALLOCA +#endif +#endif + +#if defined (alloca) +# ifndef HAVE_ALLOCA +#define HAVE_ALLOCA +# endif +#endif + +#if ! defined (HAVE_ALLOCA) || USE_STACK_ALLOC +#include "stack-alloc.h" +#else +#define TMP_DECL(m) +#define TMP_ALLOC(x) alloca(x) +#define TMP_MARK(m) +#define TMP_FREE(m) +#endif + +/* Allocating various types. */ +#define TMP_ALLOC_TYPE(n,type) ((type *) TMP_ALLOC ((n) * sizeof (type))) +#define TMP_ALLOC_LIMBS(n) TMP_ALLOC_TYPE(n,mp_limb_t) +#define TMP_ALLOC_MP_PTRS(n) TMP_ALLOC_TYPE(n,mp_ptr) + + +#if ! defined (__GNUC__) /* FIXME: Test for C++ compilers here, + __DECC understands __inline */ +#define inline /* Empty */ +#endif + +#define ABS(x) (x >= 0 ? x : -x) +#define MIN(l,o) ((l) < (o) ? (l) : (o)) +#define MAX(h,i) ((h) > (i) ? (h) : (i)) +#define numberof(x) (sizeof (x) / sizeof ((x)[0])) + +/* Field access macros. */ +#define SIZ(x) ((x)->_mp_size) +#define ABSIZ(x) ABS (SIZ (x)) +#define PTR(x) ((x)->_mp_d) +#define LIMBS(x) ((x)->_mp_d) +#define EXP(x) ((x)->_mp_exp) +#define PREC(x) ((x)->_mp_prec) +#define ALLOC(x) ((x)->_mp_alloc) + +/* Extra casts because shorts are promoted to ints by "~" and "<<". "-1" + rather than "1" in SIGNED_TYPE_MIN avoids warnings from some compilers + about arithmetic overflow. */ +#define UNSIGNED_TYPE_MAX(type) ((type) ~ (type) 0) +#define UNSIGNED_TYPE_HIGHBIT(type) ((type) ~ (UNSIGNED_TYPE_MAX(type) >> 1)) +#define SIGNED_TYPE_MIN(type) (((type) -1) << (8*sizeof(type)-1)) +#define SIGNED_TYPE_MAX(type) ((type) ~ SIGNED_TYPE_MIN(type)) +#define SIGNED_TYPE_HIGHBIT(type) SIGNED_TYPE_MIN(type) + +#define MP_LIMB_T_MAX UNSIGNED_TYPE_MAX (mp_limb_t) +#define MP_LIMB_T_HIGHBIT UNSIGNED_TYPE_HIGHBIT (mp_limb_t) + +#define MP_SIZE_T_MAX SIGNED_TYPE_MAX (mp_size_t) + +#ifndef ULONG_MAX +#define ULONG_MAX UNSIGNED_TYPE_MAX (unsigned long) +#endif +#define ULONG_HIGHBIT UNSIGNED_TYPE_HIGHBIT (unsigned long) +#define LONG_HIGHBIT SIGNED_TYPE_HIGHBIT (long) +#ifndef LONG_MAX +#define LONG_MAX SIGNED_TYPE_MAX (long) +#endif + +#ifndef USHORT_MAX +#define USHORT_MAX UNSIGNED_TYPE_MAX (unsigned short) +#endif +#define USHORT_HIGHBIT UNSIGNED_TYPE_HIGHBIT (unsigned short) +#define SHORT_HIGHBIT SIGNED_TYPE_HIGHBIT (short) +#ifndef SHORT_MAX +#define SHORT_MAX SIGNED_TYPE_MAX (short) +#endif + + +/* Swap macros. */ + +#define MP_LIMB_T_SWAP(x, y) \ + do { \ + mp_limb_t __mp_limb_t_swap__tmp = (x); \ + (x) = (y); \ + (y) = __mp_limb_t_swap__tmp; \ + } while (0) +#define MP_SIZE_T_SWAP(x, y) \ + do { \ + mp_size_t __mp_size_t_swap__tmp = (x); \ + (x) = (y); \ + (y) = __mp_size_t_swap__tmp; \ + } while (0) + +#define MP_PTR_SWAP(x, y) \ + do { \ + mp_ptr __mp_ptr_swap__tmp = (x); \ + (x) = (y); \ + (y) = __mp_ptr_swap__tmp; \ + } while (0) +#define MP_SRCPTR_SWAP(x, y) \ + do { \ + mp_srcptr __mp_srcptr_swap__tmp = (x); \ + (x) = (y); \ + (y) = __mp_srcptr_swap__tmp; \ + } while (0) + +#define MPN_PTR_SWAP(xp,xs, yp,ys) \ + do { \ + MP_PTR_SWAP (xp, yp); \ + MP_SIZE_T_SWAP (xs, ys); \ + } while(0) +#define MPN_SRCPTR_SWAP(xp,xs, yp,ys) \ + do { \ + MP_SRCPTR_SWAP (xp, yp); \ + MP_SIZE_T_SWAP (xs, ys); \ + } while(0) + +#define MPZ_PTR_SWAP(x, y) \ + do { \ + mpz_ptr __mpz_ptr_swap__tmp = (x); \ + (x) = (y); \ + (y) = __mpz_ptr_swap__tmp; \ + } while (0) +#define MPZ_SRCPTR_SWAP(x, y) \ + do { \ + mpz_srcptr __mpz_srcptr_swap__tmp = (x); \ + (x) = (y); \ + (y) = __mpz_srcptr_swap__tmp; \ + } while (0) + + +#if defined (__cplusplus) +extern "C" { +#endif + +/* FIXME: These are purely internal, so do a search and replace to change + them to __gmp forms, rather than using these macros. */ +#define _mp_allocate_func __gmp_allocate_func +#define _mp_reallocate_func __gmp_reallocate_func +#define _mp_free_func __gmp_free_func +#define _mp_default_allocate __gmp_default_allocate +#define _mp_default_reallocate __gmp_default_reallocate +#define _mp_default_free __gmp_default_free + +extern void * (*_mp_allocate_func) _PROTO ((size_t)); +extern void * (*_mp_reallocate_func) _PROTO ((void *, size_t, size_t)); +extern void (*_mp_free_func) _PROTO ((void *, size_t)); + +void *_mp_default_allocate _PROTO ((size_t)); +void *_mp_default_reallocate _PROTO ((void *, size_t, size_t)); +void _mp_default_free _PROTO ((void *, size_t)); + +#define _MP_ALLOCATE_FUNC_TYPE(n,type) \ + ((type *) (*_mp_allocate_func) ((n) * sizeof (type))) +#define _MP_ALLOCATE_FUNC_LIMBS(n) _MP_ALLOCATE_FUNC_TYPE(n,mp_limb_t) + +#define _MP_FREE_FUNC_TYPE(p,n,type) (*_mp_free_func) (p, (n) * sizeof (type)) +#define _MP_FREE_FUNC_LIMBS(p,n) _MP_FREE_FUNC_TYPE(p,n,mp_limb_t) + + +#if (__STDC__-0) || defined (__cplusplus) + +#else + +#define const /* Empty */ +#define signed /* Empty */ + +#endif + +#if defined (__GNUC__) && defined (__i386__) +#if 0 /* check that these actually improve things */ +#define MPN_COPY_INCR(DST, SRC, N) \ + __asm__ ("cld\n\trep\n\tmovsl" : : \ + "D" (DST), "S" (SRC), "c" (N) : \ + "cx", "di", "si", "memory") +#define MPN_COPY_DECR(DST, SRC, N) \ + __asm__ ("std\n\trep\n\tmovsl" : : \ + "D" ((DST) + (N) - 1), "S" ((SRC) + (N) - 1), "c" (N) : \ + "cx", "di", "si", "memory") +#define MPN_NORMALIZE_NOT_ZERO(P, N) \ + do { \ + __asm__ ("std\n\trepe\n\tscasl" : "=c" (N) : \ + "a" (0), "D" ((P) + (N) - 1), "0" (N) : \ + "cx", "di"); \ + (N)++; \ + } while (0) +#endif +#endif + +#if HAVE_NATIVE_mpn_copyi +#define mpn_copyi __MPN(copyi) +void mpn_copyi _PROTO ((mp_ptr, mp_srcptr, mp_size_t)); +#endif + +/* Remap names of internal mpn functions. */ +#define __clz_tab __MPN(clz_tab) +#define mpn_udiv_w_sdiv __MPN(udiv_w_sdiv) +#define mpn_reciprocal __MPN(reciprocal) + +#define mpn_sb_divrem_mn __MPN(sb_divrem_mn) +#define mpn_bz_divrem_n __MPN(bz_divrem_n) +/* #define mpn_tdiv_q __MPN(tdiv_q) */ + +#define mpn_kara_mul_n __MPN(kara_mul_n) +void mpn_kara_mul_n _PROTO((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr)); + +#define mpn_kara_sqr_n __MPN(kara_sqr_n) +void mpn_kara_sqr_n _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr)); + +#define mpn_toom3_mul_n __MPN(toom3_mul_n) +void mpn_toom3_mul_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t,mp_ptr)); + +#define mpn_toom3_sqr_n __MPN(toom3_sqr_n) +void mpn_toom3_sqr_n _PROTO((mp_ptr, mp_srcptr, mp_size_t, mp_ptr)); + +#define mpn_fft_best_k __MPN(fft_best_k) +int mpn_fft_best_k _PROTO ((mp_size_t n, int sqr)); + +#define mpn_mul_fft __MPN(mul_fft) +void mpn_mul_fft _PROTO ((mp_ptr op, mp_size_t pl, + mp_srcptr n, mp_size_t nl, + mp_srcptr m, mp_size_t ml, + int k)); + +#define mpn_mul_fft_full __MPN(mul_fft_full) +void mpn_mul_fft_full _PROTO ((mp_ptr op, + mp_srcptr n, mp_size_t nl, + mp_srcptr m, mp_size_t ml)); + +#define mpn_fft_next_size __MPN(fft_next_size) +mp_size_t mpn_fft_next_size _PROTO ((mp_size_t pl, int k)); + +mp_limb_t mpn_sb_divrem_mn _PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t)); +mp_limb_t mpn_bz_divrem_n _PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t)); +/* void mpn_tdiv_q _PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)); */ + +/* Copy NLIMBS *limbs* from SRC to DST, NLIMBS==0 allowed. */ +#ifndef MPN_COPY_INCR +#if HAVE_NATIVE_mpn_copyi +#define MPN_COPY_INCR(DST, SRC, NLIMBS) mpn_copyi (DST, SRC, NLIMBS) +#else +#define MPN_COPY_INCR(DST, SRC, NLIMBS) \ + do { \ + mp_size_t __i; \ + for (__i = 0; __i < (NLIMBS); __i++) \ + (DST)[__i] = (SRC)[__i]; \ + } while (0) +#endif +#endif + +#if HAVE_NATIVE_mpn_copyd +#define mpn_copyd __MPN(copyd) +void mpn_copyd _PROTO ((mp_ptr, mp_srcptr, mp_size_t)); +#endif + +/* NLIMBS==0 allowed */ +#ifndef MPN_COPY_DECR +#if HAVE_NATIVE_mpn_copyd +#define MPN_COPY_DECR(DST, SRC, NLIMBS) mpn_copyd (DST, SRC, NLIMBS) +#else +#define MPN_COPY_DECR(DST, SRC, NLIMBS) \ + do { \ + mp_size_t __i; \ + for (__i = (NLIMBS) - 1; __i >= 0; __i--) \ + (DST)[__i] = (SRC)[__i]; \ + } while (0) +#endif +#endif + +/* Define MPN_COPY for vector computers. Since #pragma cannot be in a macro, + rely on function inlining. */ +#if defined (_CRAY) || defined (__uxp__) +static inline void +_MPN_COPY (d, s, n) mp_ptr d; mp_srcptr s; mp_size_t n; +{ + int i; /* Faster for Cray with plain int */ +#pragma _CRI ivdep /* Cray PVP systems */ +#pragma loop noalias d,s /* Fujitsu VPP systems */ + for (i = 0; i < n; i++) + d[i] = s[i]; +} +#define MPN_COPY _MPN_COPY +#endif + +#ifndef MPN_COPY +#define MPN_COPY MPN_COPY_INCR +#endif + +/* Zero NLIMBS *limbs* AT DST. */ +#ifndef MPN_ZERO +#define MPN_ZERO(DST, NLIMBS) \ + do { \ + mp_size_t __i; \ + for (__i = 0; __i < (NLIMBS); __i++) \ + (DST)[__i] = 0; \ + } while (0) +#endif + +#ifndef MPN_NORMALIZE +#define MPN_NORMALIZE(DST, NLIMBS) \ + do { \ + while (NLIMBS > 0) \ + { \ + if ((DST)[(NLIMBS) - 1] != 0) \ + break; \ + NLIMBS--; \ + } \ + } while (0) +#endif +#ifndef MPN_NORMALIZE_NOT_ZERO +#define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS) \ + do { \ + while (1) \ + { \ + if ((DST)[(NLIMBS) - 1] != 0) \ + break; \ + NLIMBS--; \ + } \ + } while (0) +#endif + +/* Strip least significant zero limbs from ptr,size by incrementing ptr and + decrementing size. The number in ptr,size must be non-zero, ie. size!=0 + and somewhere a non-zero limb. */ +#define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size) \ + do \ + { \ + ASSERT ((size) != 0); \ + while ((ptr)[0] == 0) \ + { \ + (ptr)++; \ + (size)--; \ + ASSERT (size >= 0); \ + } \ + } \ + while (0) + +/* Initialize X of type mpz_t with space for NLIMBS limbs. X should be a + temporary variable; it will be automatically cleared out at function + return. We use __x here to make it possible to accept both mpz_ptr and + mpz_t arguments. */ +#define MPZ_TMP_INIT(X, NLIMBS) \ + do { \ + mpz_ptr __x = (X); \ + __x->_mp_alloc = (NLIMBS); \ + __x->_mp_d = (mp_ptr) TMP_ALLOC ((NLIMBS) * BYTES_PER_MP_LIMB); \ + } while (0) + +/* Realloc for an mpz_t WHAT if it has less thann NEEDED limbs. */ +#define MPZ_REALLOC(what,needed) \ + do { \ + if ((needed) > ALLOC (what)) \ + _mpz_realloc (what, needed); \ + } while (0) + +/* If KARATSUBA_MUL_THRESHOLD is not already defined, define it to a + value which is good on most machines. */ +#ifndef KARATSUBA_MUL_THRESHOLD +#define KARATSUBA_MUL_THRESHOLD 32 +#endif + +/* If TOOM3_MUL_THRESHOLD is not already defined, define it to a + value which is good on most machines. */ +#ifndef TOOM3_MUL_THRESHOLD +#define TOOM3_MUL_THRESHOLD 256 +#endif + +#ifndef KARATSUBA_SQR_THRESHOLD +#define KARATSUBA_SQR_THRESHOLD (2*KARATSUBA_MUL_THRESHOLD) +#endif + +#ifndef TOOM3_SQR_THRESHOLD +#define TOOM3_SQR_THRESHOLD (2*TOOM3_MUL_THRESHOLD) +#endif + +/* First k to use for an FFT modF multiply. A modF FFT is an order + log(2^k)/log(2^(k-1)) algorithm, so k=3 is merely 1.5 like karatsuba, + whereas k=4 is 1.33 which is faster than toom3 at 1.485. */ +#define FFT_FIRST_K 4 + +/* Threshold at which FFT should be used to do a modF NxN -> N multiply. */ +#ifndef FFT_MODF_MUL_THRESHOLD +#define FFT_MODF_MUL_THRESHOLD (TOOM3_MUL_THRESHOLD * 3) +#endif +#ifndef FFT_MODF_SQR_THRESHOLD +#define FFT_MODF_SQR_THRESHOLD (TOOM3_SQR_THRESHOLD * 3) +#endif + +/* Threshold at which FFT should be used to do an NxN -> 2N multiply. This + will be a size where FFT is using k=7 or k=8, since an FFT-k used for an + NxN->2N multiply and not recursing into itself is an order + log(2^k)/log(2^(k-2)) algorithm, so it'll be at least k=7 at 1.39 which + is the first better than toom3. */ +#ifndef FFT_MUL_THRESHOLD +#define FFT_MUL_THRESHOLD (FFT_MODF_MUL_THRESHOLD * 10) +#endif +#ifndef FFT_SQR_THRESHOLD +#define FFT_SQR_THRESHOLD (FFT_MODF_SQR_THRESHOLD * 10) +#endif + +/* Table of thresholds for successive modF FFT "k"s. The first entry is + where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2, + etc. See mpn_fft_best_k(). */ +#ifndef FFT_MUL_TABLE +#define FFT_MUL_TABLE \ + { TOOM3_MUL_THRESHOLD * 4, /* k=5 */ \ + TOOM3_MUL_THRESHOLD * 8, /* k=6 */ \ + TOOM3_MUL_THRESHOLD * 16, /* k=7 */ \ + TOOM3_MUL_THRESHOLD * 32, /* k=8 */ \ + TOOM3_MUL_THRESHOLD * 96, /* k=9 */ \ + TOOM3_MUL_THRESHOLD * 288, /* k=10 */ \ + 0 } +#endif +#ifndef FFT_SQR_TABLE +#define FFT_SQR_TABLE \ + { TOOM3_SQR_THRESHOLD * 4, /* k=5 */ \ + TOOM3_SQR_THRESHOLD * 8, /* k=6 */ \ + TOOM3_SQR_THRESHOLD * 16, /* k=7 */ \ + TOOM3_SQR_THRESHOLD * 32, /* k=8 */ \ + TOOM3_SQR_THRESHOLD * 96, /* k=9 */ \ + TOOM3_SQR_THRESHOLD * 288, /* k=10 */ \ + 0 } +#endif + +#ifndef FFT_TABLE_ATTRS +#define FFT_TABLE_ATTRS static const +#endif + +#define MPN_FFT_TABLE_SIZE 16 + + +/* Return non-zero if xp,xsize and yp,ysize overlap. + If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no + overlap. If both these are false, there's an overlap. */ +#define MPN_OVERLAP_P(xp, xsize, yp, ysize) \ + ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp)) + + +/* ASSERT() is a private assertion checking scheme, similar to <assert.h>. + ASSERT() does the check only if WANT_ASSERT is selected, ASSERT_ALWAYS() + does it always. Generally assertions are meant for development, but + might help when looking for a problem later too. + + ASSERT_NOCARRY() uses ASSERT() to check the expression is zero, but if + assertion checking is disabled, the expression is still evaluated. This + is meant for use with routines like mpn_add_n() where the return value + represents a carry or whatever that shouldn't occur. For example, + ASSERT_NOCARRY (mpn_add_n (rp, s1p, s2p, size)); */ + +#ifdef __LINE__ +#define ASSERT_LINE __LINE__ +#else +#define ASSERT_LINE -1 +#endif + +#ifdef __FILE__ +#define ASSERT_FILE __FILE__ +#else +#define ASSERT_FILE "" +#endif + +int __gmp_assert_fail _PROTO((const char *filename, int linenum, + const char *expr)); + +#if HAVE_STRINGIZE +#define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr) +#else +#define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, "expr") +#endif + +#if HAVE_VOID +#define CAST_TO_VOID (void) +#else +#define CAST_TO_VOID +#endif + +#define ASSERT_ALWAYS(expr) ((expr) ? 0 : ASSERT_FAIL (expr)) + +#if WANT_ASSERT +#define ASSERT(expr) ASSERT_ALWAYS (expr) +#define ASSERT_NOCARRY(expr) ASSERT_ALWAYS ((expr) == 0) + +#else +#define ASSERT(expr) (CAST_TO_VOID 0) +#define ASSERT_NOCARRY(expr) (expr) +#endif + + +#if HAVE_NATIVE_mpn_com_n +#define mpn_com_n __MPN(com_n) +void mpn_com_n _PROTO ((mp_ptr, mp_srcptr, mp_size_t)); +#else +#define mpn_com_n(d,s,n) \ + do \ + { \ + mp_ptr __d = (d); \ + mp_srcptr __s = (s); \ + mp_size_t __n = (n); \ + do \ + *__d++ = ~ *__s++; \ + while (--__n); \ + } \ + while (0) +#endif + +#define MPN_LOGOPS_N_INLINE(d,s1,s2,n,dop,op,s2op) \ + do \ + { \ + mp_ptr __d = (d); \ + mp_srcptr __s1 = (s1); \ + mp_srcptr __s2 = (s2); \ + mp_size_t __n = (n); \ + do \ + *__d++ = dop (*__s1++ op s2op *__s2++); \ + while (--__n); \ + } \ + while (0) + +#if HAVE_NATIVE_mpn_and_n +#define mpn_and_n __MPN(and_n) +void mpn_and_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); +#else +#define mpn_and_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,&, ) +#endif + +#if HAVE_NATIVE_mpn_andn_n +#define mpn_andn_n __MPN(andn_n) +void mpn_andn_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); +#else +#define mpn_andn_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,&,~) +#endif + +#if HAVE_NATIVE_mpn_nand_n +#define mpn_nand_n __MPN(nand_n) +void mpn_nand_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); +#else +#define mpn_nand_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n,~,&, ) +#endif + +#if HAVE_NATIVE_mpn_ior_n +#define mpn_ior_n __MPN(ior_n) +void mpn_ior_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); +#else +#define mpn_ior_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,|, ) +#endif + +#if HAVE_NATIVE_mpn_iorn_n +#define mpn_iorn_n __MPN(iorn_n) +void mpn_iorn_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); +#else +#define mpn_iorn_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,|,~) +#endif + +#if HAVE_NATIVE_mpn_nior_n +#define mpn_nior_n __MPN(nior_n) +void mpn_nior_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); +#else +#define mpn_nior_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n,~,|, ) +#endif + +#if HAVE_NATIVE_mpn_xor_n +#define mpn_xor_n __MPN(xor_n) +void mpn_xor_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); +#else +#define mpn_xor_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,^, ) +#endif + +#if HAVE_NATIVE_mpn_xnor_n +#define mpn_xnor_n __MPN(xnor_n) +void mpn_xnor_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); +#else +#define mpn_xnor_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n,~,^, ) +#endif + +/* Structure for conversion between internal binary format and + strings in base 2..36. */ +struct bases +{ + /* Number of digits in the conversion base that always fits in an mp_limb_t. + For example, for base 10 on a machine where a mp_limb_t has 32 bits this + is 9, since 10**9 is the largest number that fits into a mp_limb_t. */ + int chars_per_limb; + + /* log(2)/log(conversion_base) */ + double chars_per_bit_exactly; + + /* base**chars_per_limb, i.e. the biggest number that fits a word, built by + factors of base. Exception: For 2, 4, 8, etc, big_base is log2(base), + i.e. the number of bits used to represent each digit in the base. */ + mp_limb_t big_base; + + /* A BITS_PER_MP_LIMB bit approximation to 1/big_base, represented as a + fixed-point number. Instead of dividing by big_base an application can + choose to multiply by big_base_inverted. */ + mp_limb_t big_base_inverted; +}; + +#define __mp_bases __MPN(mp_bases) +extern const struct bases __mp_bases[]; +extern mp_size_t __gmp_default_fp_limb_precision; + +#if defined (__i386__) +#define TARGET_REGISTER_STARVED 1 +#else +#define TARGET_REGISTER_STARVED 0 +#endif + +/* Use a library function for invert_limb, if available. */ +#if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb +#define mpn_invert_limb __MPN(invert_limb) +mp_limb_t mpn_invert_limb _PROTO ((mp_limb_t)); +#define invert_limb(invxl,xl) (invxl = __MPN(invert_limb) (xl)) +#endif + +#ifndef invert_limb +#define invert_limb(invxl,xl) \ + do { \ + mp_limb_t dummy; \ + if (xl << 1 == 0) \ + invxl = ~(mp_limb_t) 0; \ + else \ + udiv_qrnnd (invxl, dummy, -xl, 0, xl); \ + } while (0) +#endif + +/* Divide the two-limb number in (NH,,NL) by D, with DI being the largest + limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB). + If this would yield overflow, DI should be the largest possible number + (i.e., only ones). For correct operation, the most significant bit of D + has to be set. Put the quotient in Q and the remainder in R. */ +#define udiv_qrnnd_preinv(q, r, nh, nl, d, di) \ + do { \ + mp_limb_t _q, _ql, _r; \ + mp_limb_t _xh, _xl; \ + umul_ppmm (_q, _ql, (nh), (di)); \ + _q += (nh); /* DI is 2**BITS_PER_MP_LIMB too small */\ + umul_ppmm (_xh, _xl, _q, (d)); \ + sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl); \ + if (_xh != 0) \ + { \ + sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \ + _q += 1; \ + if (_xh != 0) \ + { \ + sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \ + _q += 1; \ + } \ + } \ + if (_r >= (d)) \ + { \ + _r -= (d); \ + _q += 1; \ + } \ + (r) = _r; \ + (q) = _q; \ + } while (0) +/* Like udiv_qrnnd_preinv, but for for any value D. DNORM is D shifted left + so that its most significant bit is set. LGUP is ceil(log2(D)). */ +#define udiv_qrnnd_preinv2gen(q, r, nh, nl, d, di, dnorm, lgup) \ + do { \ + mp_limb_t _n2, _n10, _n1, _nadj, _q1; \ + mp_limb_t _xh, _xl; \ + _n2 = ((nh) << (BITS_PER_MP_LIMB - (lgup))) + ((nl) >> 1 >> (l - 1));\ + _n10 = (nl) << (BITS_PER_MP_LIMB - (lgup)); \ + _n1 = ((mp_limb_signed_t) _n10 >> (BITS_PER_MP_LIMB - 1)); \ + _nadj = _n10 + (_n1 & (dnorm)); \ + umul_ppmm (_xh, _xl, di, _n2 - _n1); \ + add_ssaaaa (_xh, _xl, _xh, _xl, 0, _nadj); \ + _q1 = ~(_n2 + _xh); \ + umul_ppmm (_xh, _xl, _q1, d); \ + add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \ + _xh -= (d); \ + (r) = _xl + ((d) & _xh); \ + (q) = _xh - _q1; \ + } while (0) +/* Exactly like udiv_qrnnd_preinv, but branch-free. It is not clear which + version to use. */ +#define udiv_qrnnd_preinv2norm(q, r, nh, nl, d, di) \ + do { \ + mp_limb_t _n2, _n10, _n1, _nadj, _q1; \ + mp_limb_t _xh, _xl; \ + _n2 = (nh); \ + _n10 = (nl); \ + _n1 = ((mp_limb_signed_t) _n10 >> (BITS_PER_MP_LIMB - 1)); \ + _nadj = _n10 + (_n1 & (d)); \ + umul_ppmm (_xh, _xl, di, _n2 - _n1); \ + add_ssaaaa (_xh, _xl, _xh, _xl, 0, _nadj); \ + _q1 = ~(_n2 + _xh); \ + umul_ppmm (_xh, _xl, _q1, d); \ + add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \ + _xh -= (d); \ + (r) = _xl + ((d) & _xh); \ + (q) = _xh - _q1; \ + } while (0) + + +/* modlimb_invert() sets "inv" to the multiplicative inverse of "n" modulo + 2^BITS_PER_MP_LIMB, ie. so that inv*n == 1 mod 2^BITS_PER_MP_LIMB. + "n" must be odd (otherwise such an inverse doesn't exist). + + This is not to be confused with invert_limb(), which is completely + different. + + The table lookup gives an inverse with the low 8 bits valid, and each + multiply step doubles the number of bits. See Jebelean's exact division + paper, end of section 4 (reference in gmp.texi). */ + +#define modlimb_invert_table __gmp_modlimb_invert_table +extern const unsigned char modlimb_invert_table[128]; + +#if BITS_PER_MP_LIMB <= 32 +#define modlimb_invert(inv,n) \ + do { \ + mp_limb_t __n = (n); \ + mp_limb_t __inv; \ + ASSERT ((__n & 1) == 1); \ + __inv = modlimb_invert_table[(__n&0xFF)/2]; /* 8 */ \ + __inv = 2 * __inv - __inv * __inv * __n; /* 16 */ \ + __inv = 2 * __inv - __inv * __inv * __n; /* 32 */ \ + ASSERT (__inv * __n == 1); \ + (inv) = __inv; \ + } while (0) +#endif + +#if BITS_PER_MP_LIMB > 32 && BITS_PER_MP_LIMB <= 64 +#define modlimb_invert(inv,n) \ + do { \ + mp_limb_t __n = (n); \ + mp_limb_t __inv; \ + ASSERT ((__n & 1) == 1); \ + __inv = modlimb_invert_table[(__n&0xFF)/2]; /* 8 */ \ + __inv = 2 * __inv - __inv * __inv * __n; /* 16 */ \ + __inv = 2 * __inv - __inv * __inv * __n; /* 32 */ \ + __inv = 2 * __inv - __inv * __inv * __n; /* 64 */ \ + ASSERT (__inv * __n == 1); \ + (inv) = __inv; \ + } while (0) +#endif + + +/* The `mode' attribute was introduced in GCC 2.2, but we can only distinguish + between GCC 2 releases from 2.5, since __GNUC_MINOR__ wasn't introduced + until then. */ +#if (__GNUC__ - 0 > 2 || defined (__GNUC_MINOR__)) && ! defined (__APPLE_CC__) +/* Define stuff for longlong.h. */ +typedef unsigned int UQItype __attribute__ ((mode (QI))); +typedef int SItype __attribute__ ((mode (SI))); +typedef unsigned int USItype __attribute__ ((mode (SI))); +typedef int DItype __attribute__ ((mode (DI))); +typedef unsigned int UDItype __attribute__ ((mode (DI))); +#else +typedef unsigned char UQItype; +typedef long SItype; +typedef unsigned long USItype; +#if defined _LONGLONG || defined _LONG_LONG_LIMB +typedef long long int DItype; +typedef unsigned long long int UDItype; +#else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */ +typedef long int DItype; +typedef unsigned long int UDItype; +#endif +#endif + +typedef mp_limb_t UWtype; +typedef unsigned int UHWtype; +#define W_TYPE_SIZE BITS_PER_MP_LIMB + +/* Define ieee_double_extract and _GMP_IEEE_FLOATS. */ + +#if (defined (__arm__) && (defined (__ARMWEL__) || defined (__linux__))) +/* Special case for little endian ARM since floats remain in big-endian. */ +#define _GMP_IEEE_FLOATS 1 +union ieee_double_extract +{ + struct + { + unsigned int manh:20; + unsigned int exp:11; + unsigned int sig:1; + unsigned int manl:32; + } s; + double d; +}; +#else +#if defined (_LITTLE_ENDIAN) || defined (__LITTLE_ENDIAN__) \ + || defined (__alpha) \ + || defined (__clipper__) \ + || defined (__cris) \ + || defined (__i386__) \ + || defined (__i860__) \ + || defined (__i960__) \ + || defined (MIPSEL) || defined (_MIPSEL) \ + || defined (__ns32000__) \ + || defined (__WINNT) || defined (_WIN32) +#define _GMP_IEEE_FLOATS 1 +union ieee_double_extract +{ + struct + { + unsigned int manl:32; + unsigned int manh:20; + unsigned int exp:11; + unsigned int sig:1; + } s; + double d; +}; +#else /* Need this as an #else since the tests aren't made exclusive. */ +#if defined (_BIG_ENDIAN) || defined (__BIG_ENDIAN__) \ + || defined (__a29k__) || defined (_AM29K) \ + || defined (__arm__) \ + || (defined (__convex__) && defined (_IEEE_FLOAT_)) \ + || defined (_CRAYMPP) \ + || defined (__i370__) || defined (__mvs__) \ + || defined (__mc68000__) || defined (__mc68020__) || defined (__m68k__)\ + || defined(mc68020) \ + || defined (__m88000__) \ + || defined (MIPSEB) || defined (_MIPSEB) \ + || defined (__hppa) || defined (__hppa__) \ + || defined (__pyr__) \ + || defined (__ibm032__) \ + || defined (_IBMR2) || defined (_ARCH_PPC) \ + || defined (__sh__) \ + || defined (__sparc) || defined (sparc) \ + || defined (__we32k__) +#define _GMP_IEEE_FLOATS 1 +union ieee_double_extract +{ + struct + { + unsigned int sig:1; + unsigned int exp:11; + unsigned int manh:20; + unsigned int manl:32; + } s; + double d; +}; +#endif +#endif +#endif + +/* Using "(2.0 * ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1)))" doesn't work on + SunOS 4.1.4 native /usr/ucb/cc (K&R), it comes out as -4294967296.0, + presumably due to treating the mp_limb_t constant as signed rather than + unsigned. */ +#define MP_BASE_AS_DOUBLE (4.0 * ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 2))) +#if BITS_PER_MP_LIMB == 64 +#define LIMBS_PER_DOUBLE 2 +#else +#define LIMBS_PER_DOUBLE 3 +#endif + +double __gmp_scale2 _PROTO ((double, int)); +int __gmp_extract_double _PROTO ((mp_ptr, double)); + +extern int __gmp_junk; +extern const int __gmp_0; +#define GMP_ERROR(code) (gmp_errno |= (code), __gmp_junk = 10/__gmp_0) +#define DIVIDE_BY_ZERO GMP_ERROR(GMP_ERROR_DIVISION_BY_ZERO) +#define SQRT_OF_NEGATIVE GMP_ERROR(GMP_ERROR_SQRT_OF_NEGATIVE) + +#if defined _LONG_LONG_LIMB +#if defined (__STDC__) +#define CNST_LIMB(C) C##LL +#else +#define CNST_LIMB(C) C/**/LL +#endif +#else /* not _LONG_LONG_LIMB */ +#if defined (__STDC__) +#define CNST_LIMB(C) C##L +#else +#define CNST_LIMB(C) C/**/L +#endif +#endif /* _LONG_LONG_LIMB */ + +/*** Stuff used by mpn/generic/prefsqr.c and mpn/generic/next_prime.c ***/ +#if BITS_PER_MP_LIMB == 32 +#define PP 0xC0CFD797L /* 3 x 5 x 7 x 11 x 13 x ... x 29 */ +#define PP_INVERTED 0x53E5645CL +#define PP_MAXPRIME 29 +#define PP_MASK 0x208A28A8L +#endif + +#if BITS_PER_MP_LIMB == 64 +#define PP CNST_LIMB(0xE221F97C30E94E1D) /* 3 x 5 x 7 x 11 x 13 x ... x 53 */ +#define PP_INVERTED CNST_LIMB(0x21CFE6CFC938B36B) +#define PP_MAXPRIME 53 +#define PP_MASK CNST_LIMB(0x208A20A08A28A8) +#endif + + +/* BIT1 means a result value in bit 1 (second least significant bit), with a + zero bit representing +1 and a one bit representing -1. Bits other than + bit 1 are garbage. + + JACOBI_TWOS_U_BIT1 and JACOBI_RECIP_UU_BIT1 are used in mpn_jacobi_base + and their speed is important. Expressions are used rather than + conditionals to accumulate sign changes, which effectively means XORs + instead of conditional JUMPs. */ + +/* (a/0), with a signed; is 1 if a=+/-1, 0 otherwise */ +#define JACOBI_S0(a) \ + (((a) == 1) | ((a) == -1)) + +/* (a/0), with a unsigned; is 1 if a=+/-1, 0 otherwise */ +#define JACOBI_U0(a) \ + ((a) == 1) + +/* (a/0), with a an mpz_t; is 1 if a=+/-1, 0 otherwise + An mpz_t always has at least one limb of allocated space, so the fetch of + the low limb is valid. */ +#define JACOBI_Z0(a) \ + (((SIZ(a) == 1) | (SIZ(a) == -1)) & (PTR(a)[0] == 1)) + +/* Convert a bit1 to +1 or -1. */ +#define JACOBI_BIT1_TO_PN(result_bit1) \ + (1 - ((result_bit1) & 2)) + +/* (2/b), with b unsigned and odd; + is (-1)^((b^2-1)/8) which is 1 if b==1,7mod8 or -1 if b==3,5mod8 and + hence obtained from (b>>1)^b */ +#define JACOBI_TWO_U_BIT1(b) \ + (ASSERT (b & 1), (((b) >> 1) ^ (b))) + +/* (2/b)^twos, with b unsigned and odd */ +#define JACOBI_TWOS_U_BIT1(twos, b) \ + (((twos) << 1) & JACOBI_TWO_U_BIT1 (b)) + +/* (2/b)^twos, with b unsigned and odd */ +#define JACOBI_TWOS_U(twos, b) \ + (JACOBI_BIT1_TO_PN (JACOBI_TWOS_U_BIT1 (twos, b))) + +/* (a/b) effect due to sign of a: signed/unsigned, b odd; + is (-1)^((b-1)/2) if a<0, or +1 if a>=0 */ +#define JACOBI_ASGN_SU_BIT1(a, b) \ + ((((a) < 0) << 1) & (b)) + +/* (a/b) effect due to sign of b: signed/mpz; + is -1 if a and b both negative, +1 otherwise */ +#define JACOBI_BSGN_SZ_BIT1(a, b) \ + ((((a) < 0) & (SIZ(b) < 0)) << 1) + +/* (a/b) effect due to sign of b: mpz/signed */ +#define JACOBI_BSGN_ZS_BIT1(a, b) \ + JACOBI_BSGN_SZ_BIT1(b, a) + +/* (a/b) reciprocity to switch to (b/a), a,b both unsigned and odd. + Is (-1)^((a-1)*(b-1)/4), which means +1 if either a,b==1mod4 or -1 if + both a,b==3mod4, achieved in bit 1 by a&b. No ASSERT()s about a,b odd + because this is used in a couple of places with only bit 1 of a or b + valid. */ +#define JACOBI_RECIP_UU_BIT1(a, b) \ + ((a) & (b)) + + +/* For testing and debugging. */ +#define MPZ_CHECK_FORMAT(z) \ + (ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0), \ + ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z))) +#define MPZ_PROVOKE_REALLOC(z) \ + do { ALLOC(z) = ABSIZ(z); } while (0) + + +#if TUNE_PROGRAM_BUILD +/* Some extras wanted when recompiling some .c files for use by the tune + program. Not part of a normal build. */ + +extern mp_size_t mul_threshold[]; +extern mp_size_t fft_modf_mul_threshold; +extern mp_size_t sqr_threshold[]; +extern mp_size_t fft_modf_sqr_threshold; +extern mp_size_t bz_threshold[]; +extern mp_size_t fib_threshold[]; +extern mp_size_t powm_threshold[]; +extern mp_size_t gcd_accel_threshold[]; +extern mp_size_t gcdext_threshold[]; + +#undef KARATSUBA_MUL_THRESHOLD +#undef TOOM3_MUL_THRESHOLD +#undef FFT_MUL_TABLE +#undef FFT_MUL_THRESHOLD +#undef FFT_MODF_MUL_THRESHOLD +#undef KARATSUBA_SQR_THRESHOLD +#undef TOOM3_SQR_THRESHOLD +#undef FFT_SQR_TABLE +#undef FFT_SQR_THRESHOLD +#undef FFT_MODF_SQR_THRESHOLD +#undef BZ_THRESHOLD +#undef FIB_THRESHOLD +#undef POWM_THRESHOLD +#undef GCD_ACCEL_THRESHOLD +#undef GCDEXT_THRESHOLD + +#define KARATSUBA_MUL_THRESHOLD mul_threshold[0] +#define TOOM3_MUL_THRESHOLD mul_threshold[1] +#define FFT_MUL_TABLE 0 +#define FFT_MUL_THRESHOLD mul_threshold[2] +#define FFT_MODF_MUL_THRESHOLD fft_modf_mul_threshold +#define KARATSUBA_SQR_THRESHOLD sqr_threshold[0] +#define TOOM3_SQR_THRESHOLD sqr_threshold[1] +#define FFT_SQR_TABLE 0 +#define FFT_SQR_THRESHOLD sqr_threshold[2] +#define FFT_MODF_SQR_THRESHOLD fft_modf_sqr_threshold +#define BZ_THRESHOLD bz_threshold[0] +#define FIB_THRESHOLD fib_threshold[0] +#define POWM_THRESHOLD powm_threshold[0] +#define GCD_ACCEL_THRESHOLD gcd_accel_threshold[0] +#define GCDEXT_THRESHOLD gcdext_threshold[0] + +#define TOOM3_MUL_THRESHOLD_LIMIT 700 + +#undef FFT_TABLE_ATTRS +#define FFT_TABLE_ATTRS +extern mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE]; + +#endif /* TUNE_PROGRAM_BUILD */ + +#if defined (__cplusplus) +} +#endif diff --git a/rts/gmp/gmp.h b/rts/gmp/gmp.h new file mode 100644 index 0000000000..0f1b9510e9 --- /dev/null +++ b/rts/gmp/gmp.h @@ -0,0 +1,1083 @@ +/* gmp.h -- Definitions for GNU multiple precision functions. + +Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000 Free Software +Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#ifndef __GMP_H__ + +#ifndef __GNU_MP__ /* to allow inclusion of both gmp.h and mp.h */ +#define __GNU_MP__ 2 +#define __need_size_t +#include <stddef.h> +#undef __need_size_t + +#ifndef STG_H +/* Get DLL_IMPORT */ +#include "../../includes/ghcconfig.h" +#include "../../includes/StgDLL.h" +#endif + +#if defined (__mips) && defined (_ABIN32) +/* Force the use of 64-bit limbs for all 64-bit MIPS CPUs if ABI permits. */ +#define _LONG_LONG_LIMB +#endif + +#if (__STDC__-0) || defined (__cplusplus) +#define __gmp_const const +#define __gmp_signed signed +#else +#define __gmp_const +#define __gmp_signed +#endif + +#if defined (__GNUC__) +#define __gmp_inline __inline__ +#else +#define __gmp_inline +#endif + +#ifndef _EXTERN_INLINE +#ifdef __GNUC__ +#define _EXTERN_INLINE extern __inline__ +#else +#define _EXTERN_INLINE static +#endif +#endif + +#ifdef _SHORT_LIMB +typedef unsigned int mp_limb_t; +typedef int mp_limb_signed_t; +#else +#ifdef _LONG_LONG_LIMB +typedef unsigned long long int mp_limb_t; +typedef long long int mp_limb_signed_t; +#else +typedef unsigned long int mp_limb_t; +typedef long int mp_limb_signed_t; +#endif +#endif + +typedef mp_limb_t * mp_ptr; +typedef __gmp_const mp_limb_t * mp_srcptr; +#if defined (_CRAY) && ! defined (_CRAYMPP) +/* plain `int' is much faster (48 bits) */ +typedef int mp_size_t; +typedef int mp_exp_t; +#else +typedef long int mp_size_t; +typedef long int mp_exp_t; +#endif + +typedef struct +{ + int _mp_alloc; /* Number of *limbs* allocated and pointed + to by the _mp_d field. */ + int _mp_size; /* abs(_mp_size) is the number of limbs the + last field points to. If _mp_size is + negative this is a negative number. */ + mp_limb_t *_mp_d; /* Pointer to the limbs. */ +} __mpz_struct; +#endif /* __GNU_MP__ */ + +typedef __mpz_struct MP_INT; +typedef __mpz_struct mpz_t[1]; + +typedef struct +{ + __mpz_struct _mp_num; + __mpz_struct _mp_den; +} __mpq_struct; + +typedef __mpq_struct MP_RAT; +typedef __mpq_struct mpq_t[1]; + +typedef struct +{ + int _mp_prec; /* Max precision, in number of `mp_limb_t's. + Set by mpf_init and modified by + mpf_set_prec. The area pointed to by the + _mp_d field contains `prec' + 1 limbs. */ + int _mp_size; /* abs(_mp_size) is the number of limbs the + last field points to. If _mp_size is + negative this is a negative number. */ + mp_exp_t _mp_exp; /* Exponent, in the base of `mp_limb_t'. */ + mp_limb_t *_mp_d; /* Pointer to the limbs. */ +} __mpf_struct; + +/* typedef __mpf_struct MP_FLOAT; */ +typedef __mpf_struct mpf_t[1]; + +/* Available random number generation algorithms. */ +typedef enum +{ + GMP_RAND_ALG_DEFAULT = 0, + GMP_RAND_ALG_LC = GMP_RAND_ALG_DEFAULT /* Linear congruential. */ +} gmp_randalg_t; + +/* Linear congruential data struct. */ +typedef struct { + mpz_t a; /* Multiplier. */ + unsigned long int c; /* Adder. */ + mpz_t m; /* Modulus (valid only if m2exp == 0). */ + unsigned long int m2exp; /* If != 0, modulus is 2 ^ m2exp. */ +} __gmp_randata_lc; + +/* Random state struct. */ +typedef struct +{ + mpz_t seed; /* Current seed. */ + gmp_randalg_t alg; /* Algorithm used. */ + union { /* Algorithm specific data. */ + __gmp_randata_lc *lc; /* Linear congruential. */ + } algdata; +} __gmp_randstate_struct; +typedef __gmp_randstate_struct gmp_randstate_t[1]; + +/* Types for function declarations in gmp files. */ +/* ??? Should not pollute user name space with these ??? */ +typedef __gmp_const __mpz_struct *mpz_srcptr; +typedef __mpz_struct *mpz_ptr; +typedef __gmp_const __mpf_struct *mpf_srcptr; +typedef __mpf_struct *mpf_ptr; +typedef __gmp_const __mpq_struct *mpq_srcptr; +typedef __mpq_struct *mpq_ptr; + +#ifndef _PROTO +#if (__STDC__-0) || defined (__cplusplus) +#define _PROTO(x) x +#else +#define _PROTO(x) () +#endif +#endif + +#ifndef __MPN +/* Really use `defined (__STDC__)' here; we want it to be true for Sun C */ +#if defined (__STDC__) || defined (__cplusplus) +#define __MPN(x) __gmpn_##x +#else +#define __MPN(x) __gmpn_/**/x +#endif +#endif + +#if defined (FILE) || defined (H_STDIO) || defined (_H_STDIO) \ + || defined (_STDIO_H) || defined (_STDIO_H_) || defined (__STDIO_H__) \ + || defined (_STDIO_INCLUDED) || defined (__dj_include_stdio_h_) +#define _GMP_H_HAVE_FILE 1 +#endif + +#if defined (__cplusplus) +extern "C" { +#endif + +#define mp_set_memory_functions __gmp_set_memory_functions +DLL_IMPORT void mp_set_memory_functions _PROTO ((void *(*) (size_t), + void *(*) (void *, size_t, size_t), + void (*) (void *, size_t))); + +#define mp_bits_per_limb __gmp_bits_per_limb +DLL_IMPORT extern __gmp_const int mp_bits_per_limb; + +#if defined (__cplusplus) +} +#endif + + +/**************** Random number routines. ****************/ + +#define _gmp_rand __gmp_rand +#define gmp_randinit __gmp_randinit +#define gmp_randinit_lc __gmp_randinit_lc +#define gmp_randinit_lc_2exp __gmp_randinit_lc_2exp +#define gmp_randseed __gmp_randseed +#define gmp_randseed_ui __gmp_randseed_ui +#define gmp_randclear __gmp_randclear + +#if defined (__cplusplus) +extern "C" { +#endif + +DLL_IMPORT void _gmp_rand _PROTO ((mp_ptr, gmp_randstate_t, unsigned long int)); +DLL_IMPORT void gmp_randinit _PROTO ((gmp_randstate_t, gmp_randalg_t, ...)); +DLL_IMPORT void gmp_randinit_lc _PROTO ((gmp_randstate_t, mpz_t, unsigned long int, + mpz_t)); +DLL_IMPORT void gmp_randinit_lc_2exp _PROTO ((gmp_randstate_t, mpz_t, unsigned long int, + unsigned long int)); +DLL_IMPORT void gmp_randseed _PROTO ((gmp_randstate_t, mpz_t)); +DLL_IMPORT void gmp_randseed_ui _PROTO ((gmp_randstate_t, unsigned long int)); +DLL_IMPORT void gmp_randclear _PROTO ((gmp_randstate_t)); + +#if defined (__cplusplus) +} +#endif + +/**************** Integer (i.e. Z) routines. ****************/ + +#define _mpz_realloc __gmpz_realloc +#define mpz_realloc __gmpz_realloc +#define mpz_abs __gmpz_abs +#define mpz_add __gmpz_add +#define mpz_add_ui __gmpz_add_ui +#define mpz_addmul_ui __gmpz_addmul_ui +#define mpz_and __gmpz_and +#define mpz_array_init __gmpz_array_init +#define mpz_bin_ui __gmpz_bin_ui +#define mpz_bin_uiui __gmpz_bin_uiui +#define mpz_cdiv_q __gmpz_cdiv_q +#define mpz_cdiv_q_ui __gmpz_cdiv_q_ui +#define mpz_cdiv_qr __gmpz_cdiv_qr +#define mpz_cdiv_qr_ui __gmpz_cdiv_qr_ui +#define mpz_cdiv_r __gmpz_cdiv_r +#define mpz_cdiv_r_ui __gmpz_cdiv_r_ui +#define mpz_cdiv_ui __gmpz_cdiv_ui +#define mpz_clear __gmpz_clear +#define mpz_clrbit __gmpz_clrbit +#define mpz_cmp __gmpz_cmp +#define _mpz_cmp_si __gmpz_cmp_si +#define _mpz_cmp_ui __gmpz_cmp_ui +#define mpz_cmpabs __gmpz_cmpabs +#define mpz_cmpabs_ui __gmpz_cmpabs_ui +#define mpz_com __gmpz_com +#define mpz_divexact __gmpz_divexact +#define mpz_dump __gmpz_dump +#define mpz_fac_ui __gmpz_fac_ui +#define mpz_fdiv_q __gmpz_fdiv_q +#define mpz_fdiv_q_2exp __gmpz_fdiv_q_2exp +#define mpz_fdiv_q_ui __gmpz_fdiv_q_ui +#define mpz_fdiv_qr __gmpz_fdiv_qr +#define mpz_fdiv_qr_ui __gmpz_fdiv_qr_ui +#define mpz_fdiv_r __gmpz_fdiv_r +#define mpz_fdiv_r_2exp __gmpz_fdiv_r_2exp +#define mpz_fdiv_r_ui __gmpz_fdiv_r_ui +#define mpz_fdiv_ui __gmpz_fdiv_ui +#define mpz_fib_ui __gmpz_fib_ui +#define mpz_fits_sint_p __gmpz_fits_sint_p +#define mpz_fits_slong_p __gmpz_fits_slong_p +#define mpz_fits_sshort_p __gmpz_fits_sshort_p +#define mpz_fits_uint_p __gmpz_fits_uint_p +#define mpz_fits_ulong_p __gmpz_fits_ulong_p +#define mpz_fits_ushort_p __gmpz_fits_ushort_p +#define mpz_gcd __gmpz_gcd +#define mpz_gcd_ui __gmpz_gcd_ui +#define mpz_gcdext __gmpz_gcdext +#define mpz_get_d __gmpz_get_d +#define mpz_get_si __gmpz_get_si +#define mpz_get_str __gmpz_get_str +#define mpz_get_ui __gmpz_get_ui +#define mpz_getlimbn __gmpz_getlimbn +#define mpz_hamdist __gmpz_hamdist +#define mpz_init __gmpz_init +#define mpz_inp_binary __gmpz_inp_binary +#define mpz_inp_raw __gmpz_inp_raw +#define mpz_inp_str __gmpz_inp_str +#define mpz_init_set __gmpz_init_set +#define mpz_init_set_d __gmpz_init_set_d +#define mpz_init_set_si __gmpz_init_set_si +#define mpz_init_set_str __gmpz_init_set_str +#define mpz_init_set_ui __gmpz_init_set_ui +#define mpz_invert __gmpz_invert +#define mpz_ior __gmpz_ior +#define mpz_jacobi __gmpz_jacobi +#define mpz_lcm __gmpz_lcm +#define mpz_legendre __gmpz_legendre +#define mpz_mod __gmpz_mod +#define mpz_mul __gmpz_mul +#define mpz_mul_2exp __gmpz_mul_2exp +#define mpz_neg __gmpz_neg +#define mpz_nextprime __gmpz_nextprime +#define mpz_out_binary __gmpz_out_binary +#define mpz_out_raw __gmpz_out_raw +#define mpz_out_str __gmpz_out_str +#define mpz_perfect_power_p __gmpz_perfect_power_p +#define mpz_perfect_square_p __gmpz_perfect_square_p +#define mpz_popcount __gmpz_popcount +#define mpz_pow_ui __gmpz_pow_ui +#define mpz_powm __gmpz_powm +#define mpz_powm_ui __gmpz_powm_ui +#define mpz_probab_prime_p __gmpz_probab_prime_p +#define mpz_random __gmpz_random +#define mpz_random2 __gmpz_random2 +#define mpz_remove __gmpz_remove +#define mpz_root __gmpz_root +#define mpz_rrandomb __gmpz_rrandomb +#define mpz_scan0 __gmpz_scan0 +#define mpz_scan1 __gmpz_scan1 +#define mpz_set __gmpz_set +#define mpz_set_d __gmpz_set_d +#define mpz_set_f __gmpz_set_f +#define mpz_set_q __gmpz_set_q +#define mpz_set_si __gmpz_set_si +#define mpz_set_str __gmpz_set_str +#define mpz_set_ui __gmpz_set_ui +#define mpz_setbit __gmpz_setbit +#define mpz_size __gmpz_size +#define mpz_sizeinbase __gmpz_sizeinbase +#define mpz_sqrt __gmpz_sqrt +#define mpz_sqrtrem __gmpz_sqrtrem +#define mpz_sub __gmpz_sub +#define mpz_sub_ui __gmpz_sub_ui +#define mpz_swap __gmpz_swap +#define mpz_tdiv_ui __gmpz_tdiv_ui +#define mpz_tdiv_q __gmpz_tdiv_q +#define mpz_tdiv_q_2exp __gmpz_tdiv_q_2exp +#define mpz_tdiv_q_ui __gmpz_tdiv_q_ui +#define mpz_tdiv_qr __gmpz_tdiv_qr +#define mpz_tdiv_qr_ui __gmpz_tdiv_qr_ui +#define mpz_tdiv_r __gmpz_tdiv_r +#define mpz_tdiv_r_2exp __gmpz_tdiv_r_2exp +#define mpz_tdiv_r_ui __gmpz_tdiv_r_ui +#define mpz_tstbit __gmpz_tstbit +#define mpz_ui_pow_ui __gmpz_ui_pow_ui +#define mpz_urandomb __gmpz_urandomb +#define mpz_urandomm __gmpz_urandomm +#define mpz_xor __gmpz_xor +#define mpz_eor __gmpz_xor + +#if defined (__cplusplus) +extern "C" { +#endif +DLL_IMPORT void *_mpz_realloc _PROTO ((mpz_ptr, mp_size_t)); + +DLL_IMPORT void mpz_abs _PROTO ((mpz_ptr, mpz_srcptr)); +DLL_IMPORT void mpz_add _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); +DLL_IMPORT void mpz_add_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int)); +DLL_IMPORT void mpz_addmul_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int)); +DLL_IMPORT void mpz_and _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); +DLL_IMPORT void mpz_array_init _PROTO ((mpz_ptr, mp_size_t, mp_size_t)); +DLL_IMPORT void mpz_bin_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int)); +DLL_IMPORT void mpz_bin_uiui _PROTO ((mpz_ptr, unsigned long int, unsigned long int)); +DLL_IMPORT void mpz_cdiv_q _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); +DLL_IMPORT unsigned long int mpz_cdiv_q_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int)); +DLL_IMPORT void mpz_cdiv_qr _PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr)); +DLL_IMPORT unsigned long int mpz_cdiv_qr_ui _PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int)); +DLL_IMPORT void mpz_cdiv_r _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); +DLL_IMPORT unsigned long int mpz_cdiv_r_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int)); +DLL_IMPORT unsigned long int mpz_cdiv_ui _PROTO ((mpz_srcptr, unsigned long int)); +DLL_IMPORT void mpz_clear _PROTO ((mpz_ptr)); +DLL_IMPORT void mpz_clrbit _PROTO ((mpz_ptr, unsigned long int)); +DLL_IMPORT int mpz_cmp _PROTO ((mpz_srcptr, mpz_srcptr)); +DLL_IMPORT int _mpz_cmp_si _PROTO ((mpz_srcptr, signed long int)); +DLL_IMPORT int _mpz_cmp_ui _PROTO ((mpz_srcptr, unsigned long int)); +DLL_IMPORT int mpz_cmpabs _PROTO ((mpz_srcptr, mpz_srcptr)); +DLL_IMPORT int mpz_cmpabs_ui _PROTO ((mpz_srcptr, unsigned long int)); +DLL_IMPORT void mpz_com _PROTO ((mpz_ptr, mpz_srcptr)); +DLL_IMPORT void mpz_divexact _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); +DLL_IMPORT void mpz_dump _PROTO ((mpz_srcptr)); +DLL_IMPORT void mpz_fac_ui _PROTO ((mpz_ptr, unsigned long int)); +DLL_IMPORT void mpz_fdiv_q _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); +DLL_IMPORT void mpz_fdiv_q_2exp _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int)); +DLL_IMPORT unsigned long int mpz_fdiv_q_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int)); +DLL_IMPORT void mpz_fdiv_qr _PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr)); +DLL_IMPORT unsigned long int mpz_fdiv_qr_ui _PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int)); +DLL_IMPORT void mpz_fdiv_r _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); +DLL_IMPORT void mpz_fdiv_r_2exp _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int)); +DLL_IMPORT unsigned long int mpz_fdiv_r_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int)); +DLL_IMPORT unsigned long int mpz_fdiv_ui _PROTO ((mpz_srcptr, unsigned long int)); +DLL_IMPORT void mpz_fib_ui _PROTO ((mpz_ptr, unsigned long int)); +DLL_IMPORT int mpz_fits_sint_p _PROTO ((mpz_srcptr)); +DLL_IMPORT int mpz_fits_slong_p _PROTO ((mpz_srcptr)); +DLL_IMPORT int mpz_fits_sshort_p _PROTO ((mpz_srcptr)); +DLL_IMPORT int mpz_fits_uint_p _PROTO ((mpz_srcptr)); +DLL_IMPORT int mpz_fits_ulong_p _PROTO ((mpz_srcptr)); +DLL_IMPORT int mpz_fits_ushort_p _PROTO ((mpz_srcptr)); +DLL_IMPORT void mpz_gcd _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); +DLL_IMPORT unsigned long int mpz_gcd_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int)); +DLL_IMPORT void mpz_gcdext _PROTO ((mpz_ptr, mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr)); +DLL_IMPORT double mpz_get_d _PROTO ((mpz_srcptr)); +/* signed */ long int mpz_get_si _PROTO ((mpz_srcptr)); +DLL_IMPORT char *mpz_get_str _PROTO ((char *, int, mpz_srcptr)); +DLL_IMPORT unsigned long int mpz_get_ui _PROTO ((mpz_srcptr)); +DLL_IMPORT mp_limb_t mpz_getlimbn _PROTO ((mpz_srcptr, mp_size_t)); +DLL_IMPORT unsigned long int mpz_hamdist _PROTO ((mpz_srcptr, mpz_srcptr)); +DLL_IMPORT void mpz_init _PROTO ((mpz_ptr)); +#ifdef _GMP_H_HAVE_FILE +DLL_IMPORT size_t mpz_inp_binary _PROTO ((mpz_ptr, FILE *)); +DLL_IMPORT size_t mpz_inp_raw _PROTO ((mpz_ptr, FILE *)); +DLL_IMPORT size_t mpz_inp_str _PROTO ((mpz_ptr, FILE *, int)); +#endif +DLL_IMPORT void mpz_init_set _PROTO ((mpz_ptr, mpz_srcptr)); +DLL_IMPORT void mpz_init_set_d _PROTO ((mpz_ptr, double)); +DLL_IMPORT void mpz_init_set_si _PROTO ((mpz_ptr, signed long int)); +DLL_IMPORT int mpz_init_set_str _PROTO ((mpz_ptr, __gmp_const char *, int)); +DLL_IMPORT void mpz_init_set_ui _PROTO ((mpz_ptr, unsigned long int)); +DLL_IMPORT int mpz_invert _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); +DLL_IMPORT void mpz_ior _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); +DLL_IMPORT int mpz_jacobi _PROTO ((mpz_srcptr, mpz_srcptr)); + +#define mpz_kronecker_si __gmpz_kronecker_si +DLL_IMPORT int mpz_kronecker_si _PROTO ((mpz_srcptr, long)); + +#define mpz_kronecker_ui __gmpz_kronecker_ui +DLL_IMPORT int mpz_kronecker_ui _PROTO ((mpz_srcptr, unsigned long)); + +#define mpz_si_kronecker __gmpz_si_kronecker +DLL_IMPORT int mpz_si_kronecker _PROTO ((long, mpz_srcptr)); + +#define mpz_ui_kronecker __gmpz_ui_kronecker +DLL_IMPORT int mpz_ui_kronecker _PROTO ((unsigned long, mpz_srcptr)); + +DLL_IMPORT void mpz_lcm _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); +DLL_IMPORT int mpz_legendre _PROTO ((mpz_srcptr, mpz_srcptr)); +DLL_IMPORT void mpz_mod _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); +DLL_IMPORT void mpz_mul _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); +DLL_IMPORT void mpz_mul_2exp _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int)); + +#define mpz_mul_si __gmpz_mul_si +DLL_IMPORT void mpz_mul_si _PROTO ((mpz_ptr, mpz_srcptr, long int)); + +#define mpz_mul_ui __gmpz_mul_ui +DLL_IMPORT void mpz_mul_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int)); + +DLL_IMPORT void mpz_neg _PROTO ((mpz_ptr, mpz_srcptr)); +DLL_IMPORT void mpz_nextprime _PROTO ((mpz_ptr, mpz_srcptr)); +#ifdef _GMP_H_HAVE_FILE +DLL_IMPORT size_t mpz_out_binary _PROTO ((FILE *, mpz_srcptr)); +DLL_IMPORT size_t mpz_out_raw _PROTO ((FILE *, mpz_srcptr)); +DLL_IMPORT size_t mpz_out_str _PROTO ((FILE *, int, mpz_srcptr)); +#endif +DLL_IMPORT int mpz_perfect_power_p _PROTO ((mpz_srcptr)); +DLL_IMPORT int mpz_perfect_square_p _PROTO ((mpz_srcptr)); +DLL_IMPORT unsigned long int mpz_popcount _PROTO ((mpz_srcptr)); +DLL_IMPORT void mpz_pow_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int)); +DLL_IMPORT void mpz_powm _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr)); +DLL_IMPORT void mpz_powm_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int, mpz_srcptr)); +DLL_IMPORT int mpz_probab_prime_p _PROTO ((mpz_srcptr, int)); +DLL_IMPORT void mpz_random _PROTO ((mpz_ptr, mp_size_t)); +DLL_IMPORT void mpz_random2 _PROTO ((mpz_ptr, mp_size_t)); +DLL_IMPORT unsigned long int mpz_remove _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); +DLL_IMPORT int mpz_root _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int)); +DLL_IMPORT void mpz_rrandomb _PROTO ((mpz_ptr, gmp_randstate_t, unsigned long int)); +DLL_IMPORT unsigned long int mpz_scan0 _PROTO ((mpz_srcptr, unsigned long int)); +DLL_IMPORT unsigned long int mpz_scan1 _PROTO ((mpz_srcptr, unsigned long int)); +DLL_IMPORT void mpz_set _PROTO ((mpz_ptr, mpz_srcptr)); +DLL_IMPORT void mpz_set_d _PROTO ((mpz_ptr, double)); +DLL_IMPORT void mpz_set_f _PROTO ((mpz_ptr, mpf_srcptr)); +DLL_IMPORT void mpz_set_q _PROTO ((mpz_ptr, mpq_srcptr)); +DLL_IMPORT void mpz_set_si _PROTO ((mpz_ptr, signed long int)); +DLL_IMPORT int mpz_set_str _PROTO ((mpz_ptr, __gmp_const char *, int)); +DLL_IMPORT void mpz_set_ui _PROTO ((mpz_ptr, unsigned long int)); +DLL_IMPORT void mpz_setbit _PROTO ((mpz_ptr, unsigned long int)); +DLL_IMPORT size_t mpz_size _PROTO ((mpz_srcptr)); +DLL_IMPORT size_t mpz_sizeinbase _PROTO ((mpz_srcptr, int)); +DLL_IMPORT void mpz_sqrt _PROTO ((mpz_ptr, mpz_srcptr)); +DLL_IMPORT void mpz_sqrtrem _PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr)); +DLL_IMPORT void mpz_sub _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); +DLL_IMPORT void mpz_sub_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int)); +DLL_IMPORT void mpz_swap _PROTO ((mpz_ptr, mpz_ptr)); +DLL_IMPORT void mpz_tdiv_q _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); +DLL_IMPORT void mpz_tdiv_q_2exp _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int)); +DLL_IMPORT unsigned long int mpz_tdiv_ui _PROTO ((mpz_srcptr, unsigned long int)); +DLL_IMPORT unsigned long int mpz_tdiv_q_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int)); +DLL_IMPORT void mpz_tdiv_qr _PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr)); +DLL_IMPORT unsigned long int mpz_tdiv_qr_ui _PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int)); +DLL_IMPORT void mpz_tdiv_r _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); +DLL_IMPORT void mpz_tdiv_r_2exp _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int)); +DLL_IMPORT unsigned long int mpz_tdiv_r_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int)); +DLL_IMPORT int mpz_tstbit _PROTO ((mpz_srcptr, unsigned long int)); +DLL_IMPORT void mpz_ui_pow_ui _PROTO ((mpz_ptr, unsigned long int, unsigned long int)); +DLL_IMPORT void mpz_urandomb _PROTO ((mpz_t, gmp_randstate_t, unsigned long int)); +DLL_IMPORT void mpz_urandomm _PROTO ((mpz_t, gmp_randstate_t, mpz_t)); +DLL_IMPORT void mpz_xor _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); +#if defined (__cplusplus) +} +#endif + +/**************** Rational (i.e. Q) routines. ****************/ + +#define mpq_init __gmpq_init +#define mpq_clear __gmpq_clear +#define mpq_set __gmpq_set +#define mpq_set_ui __gmpq_set_ui +#define mpq_set_si __gmpq_set_si +#define mpq_set_z __gmpq_set_z +#define mpq_add __gmpq_add +#define mpq_sub __gmpq_sub +#define mpq_mul __gmpq_mul +#define mpq_div __gmpq_div +#define mpq_neg __gmpq_neg +#define mpq_cmp __gmpq_cmp +#define _mpq_cmp_ui __gmpq_cmp_ui +#define mpq_equal __gmpq_equal +#define mpq_inv __gmpq_inv +#define mpq_set_num __gmpq_set_num +#define mpq_set_den __gmpq_set_den +#define mpq_get_num __gmpq_get_num +#define mpq_get_den __gmpq_get_den +#define mpq_get_d __gmpq_get_d +#define mpq_set_d __gmpq_set_d +#define mpq_canonicalize __gmpq_canonicalize + +#if defined (__cplusplus) +extern "C" { +#endif +DLL_IMPORT void mpq_init _PROTO ((mpq_ptr)); +DLL_IMPORT void mpq_clear _PROTO ((mpq_ptr)); +DLL_IMPORT void mpq_set _PROTO ((mpq_ptr, mpq_srcptr)); +DLL_IMPORT void mpq_set_ui _PROTO ((mpq_ptr, unsigned long int, unsigned long int)); +DLL_IMPORT void mpq_set_si _PROTO ((mpq_ptr, signed long int, unsigned long int)); +DLL_IMPORT void mpq_set_z _PROTO ((mpq_ptr, mpz_srcptr)); +DLL_IMPORT void mpq_add _PROTO ((mpq_ptr, mpq_srcptr, mpq_srcptr)); +DLL_IMPORT void mpq_sub _PROTO ((mpq_ptr, mpq_srcptr, mpq_srcptr)); +DLL_IMPORT void mpq_mul _PROTO ((mpq_ptr, mpq_srcptr, mpq_srcptr)); +DLL_IMPORT void mpq_div _PROTO ((mpq_ptr, mpq_srcptr, mpq_srcptr)); +DLL_IMPORT void mpq_neg _PROTO ((mpq_ptr, mpq_srcptr)); +DLL_IMPORT int mpq_cmp _PROTO ((mpq_srcptr, mpq_srcptr)); +DLL_IMPORT int _mpq_cmp_ui _PROTO ((mpq_srcptr, unsigned long int, unsigned long int)); +DLL_IMPORT int mpq_equal _PROTO ((mpq_srcptr, mpq_srcptr)); +DLL_IMPORT void mpq_inv _PROTO ((mpq_ptr, mpq_srcptr)); +DLL_IMPORT void mpq_set_num _PROTO ((mpq_ptr, mpz_srcptr)); +DLL_IMPORT void mpq_set_den _PROTO ((mpq_ptr, mpz_srcptr)); +DLL_IMPORT void mpq_get_num _PROTO ((mpz_ptr, mpq_srcptr)); +DLL_IMPORT void mpq_get_den _PROTO ((mpz_ptr, mpq_srcptr)); +DLL_IMPORT double mpq_get_d _PROTO ((mpq_srcptr)); +DLL_IMPORT void mpq_set_d _PROTO ((mpq_ptr, double)); +DLL_IMPORT void mpq_canonicalize _PROTO ((mpq_ptr)); + +#define mpq_swap __gmpq_swap +DLL_IMPORT void mpq_swap _PROTO ((mpq_ptr, mpq_ptr)); + +#ifdef _GMP_H_HAVE_FILE +#define mpq_out_str __gmpq_out_str +DLL_IMPORT size_t mpq_out_str _PROTO ((FILE *, int, mpq_srcptr)); +#endif + +#if defined (__cplusplus) +} +#endif + +/**************** Float (i.e. F) routines. ****************/ + +#define mpf_abs __gmpf_abs +#define mpf_add __gmpf_add +#define mpf_add_ui __gmpf_add_ui +#define mpf_ceil __gmpf_ceil +#define mpf_clear __gmpf_clear +#define mpf_cmp __gmpf_cmp +#define mpf_cmp_si __gmpf_cmp_si +#define mpf_cmp_ui __gmpf_cmp_ui +#define mpf_div __gmpf_div +#define mpf_div_2exp __gmpf_div_2exp +#define mpf_div_ui __gmpf_div_ui +#define mpf_dump __gmpf_dump +#define mpf_floor __gmpf_floor +#define mpf_eq __gmpf_eq +#define mpf_get_d __gmpf_get_d +#define mpf_get_prec __gmpf_get_prec +#define mpf_get_str __gmpf_get_str +#define mpf_init __gmpf_init +#define mpf_init2 __gmpf_init2 +#define mpf_inp_str __gmpf_inp_str +#define mpf_init_set __gmpf_init_set +#define mpf_init_set_d __gmpf_init_set_d +#define mpf_init_set_si __gmpf_init_set_si +#define mpf_init_set_str __gmpf_init_set_str +#define mpf_init_set_ui __gmpf_init_set_ui +#define mpf_mul __gmpf_mul +#define mpf_mul_2exp __gmpf_mul_2exp +#define mpf_mul_ui __gmpf_mul_ui +#define mpf_neg __gmpf_neg +#define mpf_out_str __gmpf_out_str +#define mpf_pow_ui __gmpf_pow_ui +#define mpf_random2 __gmpf_random2 +#define mpf_reldiff __gmpf_reldiff +#define mpf_set __gmpf_set +#define mpf_set_d __gmpf_set_d +#define mpf_set_default_prec __gmpf_set_default_prec +#define mpf_set_prec __gmpf_set_prec +#define mpf_set_prec_raw __gmpf_set_prec_raw +#define mpf_set_q __gmpf_set_q +#define mpf_set_si __gmpf_set_si +#define mpf_set_str __gmpf_set_str +#define mpf_set_ui __gmpf_set_ui +#define mpf_set_z __gmpf_set_z +#define mpf_size __gmpf_size +#define mpf_sqrt __gmpf_sqrt +#define mpf_sqrt_ui __gmpf_sqrt_ui +#define mpf_sub __gmpf_sub +#define mpf_sub_ui __gmpf_sub_ui +#define mpf_trunc __gmpf_trunc +#define mpf_ui_div __gmpf_ui_div +#define mpf_ui_sub __gmpf_ui_sub +#define mpf_urandomb __gmpf_urandomb + +#if defined (__cplusplus) +extern "C" { +#endif +DLL_IMPORT void mpf_abs _PROTO ((mpf_ptr, mpf_srcptr)); +DLL_IMPORT void mpf_add _PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr)); +DLL_IMPORT void mpf_add_ui _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int)); +DLL_IMPORT void mpf_ceil _PROTO ((mpf_ptr, mpf_srcptr)); +DLL_IMPORT void mpf_clear _PROTO ((mpf_ptr)); +DLL_IMPORT int mpf_cmp _PROTO ((mpf_srcptr, mpf_srcptr)); +DLL_IMPORT int mpf_cmp_si _PROTO ((mpf_srcptr, signed long int)); +DLL_IMPORT int mpf_cmp_ui _PROTO ((mpf_srcptr, unsigned long int)); +DLL_IMPORT void mpf_div _PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr)); +DLL_IMPORT void mpf_div_2exp _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int)); +DLL_IMPORT void mpf_div_ui _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int)); +DLL_IMPORT void mpf_dump _PROTO ((mpf_srcptr)); +DLL_IMPORT int mpf_eq _PROTO ((mpf_srcptr, mpf_srcptr, unsigned long int)); +DLL_IMPORT void mpf_floor _PROTO ((mpf_ptr, mpf_srcptr)); +DLL_IMPORT double mpf_get_d _PROTO ((mpf_srcptr)); +DLL_IMPORT unsigned long int mpf_get_prec _PROTO ((mpf_srcptr)); +char *mpf_get_str _PROTO ((char *, mp_exp_t *, int, size_t, mpf_srcptr)); +DLL_IMPORT void mpf_init _PROTO ((mpf_ptr)); +DLL_IMPORT void mpf_init2 _PROTO ((mpf_ptr, unsigned long int)); +#ifdef _GMP_H_HAVE_FILE +DLL_IMPORT size_t mpf_inp_str _PROTO ((mpf_ptr, FILE *, int)); +#endif +DLL_IMPORT void mpf_init_set _PROTO ((mpf_ptr, mpf_srcptr)); +DLL_IMPORT void mpf_init_set_d _PROTO ((mpf_ptr, double)); +DLL_IMPORT void mpf_init_set_si _PROTO ((mpf_ptr, signed long int)); +DLL_IMPORT int mpf_init_set_str _PROTO ((mpf_ptr, __gmp_const char *, int)); +DLL_IMPORT void mpf_init_set_ui _PROTO ((mpf_ptr, unsigned long int)); +DLL_IMPORT void mpf_mul _PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr)); +DLL_IMPORT void mpf_mul_2exp _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int)); +DLL_IMPORT void mpf_mul_ui _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int)); +DLL_IMPORT void mpf_neg _PROTO ((mpf_ptr, mpf_srcptr)); +#ifdef _GMP_H_HAVE_FILE +DLL_IMPORT size_t mpf_out_str _PROTO ((FILE *, int, size_t, mpf_srcptr)); +#endif +DLL_IMPORT void mpf_pow_ui _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int)); +DLL_IMPORT void mpf_random2 _PROTO ((mpf_ptr, mp_size_t, mp_exp_t)); +DLL_IMPORT void mpf_reldiff _PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr)); +DLL_IMPORT void mpf_set _PROTO ((mpf_ptr, mpf_srcptr)); +DLL_IMPORT void mpf_set_d _PROTO ((mpf_ptr, double)); +DLL_IMPORT void mpf_set_default_prec _PROTO ((unsigned long int)); +DLL_IMPORT void mpf_set_prec _PROTO ((mpf_ptr, unsigned long int)); +DLL_IMPORT void mpf_set_prec_raw _PROTO ((mpf_ptr, unsigned long int)); +DLL_IMPORT void mpf_set_q _PROTO ((mpf_ptr, mpq_srcptr)); +DLL_IMPORT void mpf_set_si _PROTO ((mpf_ptr, signed long int)); +DLL_IMPORT int mpf_set_str _PROTO ((mpf_ptr, __gmp_const char *, int)); +DLL_IMPORT void mpf_set_ui _PROTO ((mpf_ptr, unsigned long int)); +DLL_IMPORT void mpf_set_z _PROTO ((mpf_ptr, mpz_srcptr)); +DLL_IMPORT size_t mpf_size _PROTO ((mpf_srcptr)); +DLL_IMPORT void mpf_sqrt _PROTO ((mpf_ptr, mpf_srcptr)); +DLL_IMPORT void mpf_sqrt_ui _PROTO ((mpf_ptr, unsigned long int)); +DLL_IMPORT void mpf_sub _PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr)); +DLL_IMPORT void mpf_sub_ui _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int)); +DLL_IMPORT void mpf_trunc _PROTO ((mpf_ptr, mpf_srcptr)); +DLL_IMPORT void mpf_ui_div _PROTO ((mpf_ptr, unsigned long int, mpf_srcptr)); +DLL_IMPORT void mpf_ui_sub _PROTO ((mpf_ptr, unsigned long int, mpf_srcptr)); +DLL_IMPORT void mpf_urandomb _PROTO ((mpf_t, gmp_randstate_t, unsigned long int)); + +#define mpf_swap __gmpf_swap +DLL_IMPORT void mpf_swap _PROTO ((mpf_ptr, mpf_ptr)); + +#if defined (__cplusplus) +} +#endif +/************ Low level positive-integer (i.e. N) routines. ************/ + +/* This is ugly, but we need to make user calls reach the prefixed function. */ +#define mpn_add __MPN(add) +#define mpn_add_1 __MPN(add_1) +#define mpn_add_n __MPN(add_n) +#define mpn_add_nc __MPN(add_nc) +#define mpn_addmul_1 __MPN(addmul_1) +#define mpn_addsub_n __MPN(addsub_n) +#define mpn_addsub_nc __MPN(addsub_nc) +/* #define mpn_and_n __MPN(and_n) */ +/* #define mpn_andn_n __MPN(andn_n) */ +#define mpn_bdivmod __MPN(bdivmod) +#define mpn_cmp __MPN(cmp) +/* #define mpn_com_n __MPN(com_n) */ +#define mpn_copyd __MPN(copyd) +#define mpn_copyi __MPN(copyi) +#define mpn_divrem __MPN(divrem) +#define mpn_divrem_1 __MPN(divrem_1) +#define mpn_divrem_2 __MPN(divrem_2) +#define mpn_dump __MPN(dump) +#define mpn_gcd __MPN(gcd) +#define mpn_gcd_1 __MPN(gcd_1) +#define mpn_gcdext __MPN(gcdext) +#define mpn_get_str __MPN(get_str) +#define mpn_hamdist __MPN(hamdist) +#define mpn_invert_limb __MPN(invert_limb) +/* #define mpn_ior_n __MPN(ior_n) */ +/* #define mpn_iorn_n __MPN(iorn_n) */ +/* #define mpn_kara_mul_n __MPN(kara_mul_n) internal */ +/* #define mpn_kara_sqr_n __MPN(kara_sqr_n) internal */ +#define mpn_lshift __MPN(lshift) +#define mpn_lshiftc __MPN(lshiftc) +#define mpn_mod_1 __MPN(mod_1) +#define mpn_mul __MPN(mul) +#define mpn_mul_1 __MPN(mul_1) +#define mpn_mul_basecase __MPN(mul_basecase) +#define mpn_mul_n __MPN(mul_n) +#define mpn_perfect_square_p __MPN(perfect_square_p) +#define mpn_popcount __MPN(popcount) +#define mpn_preinv_mod_1 __MPN(preinv_mod_1) +/* #define mpn_nand_n __MPN(nand_n) */ +/* #define mpn_nior_n __MPN(nior_n) */ +#define mpn_random __MPN(random) +#define mpn_random2 __MPN(random2) +#define mpn_rshift __MPN(rshift) +#define mpn_rshiftc __MPN(rshiftc) +#define mpn_scan0 __MPN(scan0) +#define mpn_scan1 __MPN(scan1) +#define mpn_set_str __MPN(set_str) +#define mpn_sqr_basecase __MPN(sqr_basecase) +#define mpn_sqr_n __MPN(sqr_n) +#define mpn_sqrtrem __MPN(sqrtrem) +#define mpn_sub __MPN(sub) +#define mpn_sub_1 __MPN(sub_1) +#define mpn_sub_n __MPN(sub_n) +#define mpn_sub_nc __MPN(sub_nc) +#define mpn_submul_1 __MPN(submul_1) +/* #define mpn_toom3_mul_n __MPN(toom3_mul_n) internal */ +/* #define mpn_toom3_sqr_n __MPN(toom3_sqr_n) internal */ +/* #define mpn_xnor_n __MPN(xnor_n) */ +/* #define mpn_xor_n __MPN(xor_n) */ + +#if defined (__cplusplus) +extern "C" { +#endif + +DLL_IMPORT mp_limb_t mpn_add _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr,mp_size_t)); +DLL_IMPORT mp_limb_t mpn_add_1 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)); +DLL_IMPORT mp_limb_t mpn_add_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); +DLL_IMPORT mp_limb_t mpn_add_nc _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t)); + +DLL_IMPORT mp_limb_t mpn_addmul_1 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)); + +#define mpn_addmul_1c __MPN(addmul_1c) +DLL_IMPORT mp_limb_t mpn_addmul_1c _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)); + +DLL_IMPORT mp_limb_t mpn_addsub_n _PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); +DLL_IMPORT mp_limb_t mpn_bdivmod _PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, unsigned long int)); +DLL_IMPORT int mpn_cmp _PROTO ((mp_srcptr, mp_srcptr, mp_size_t)); + +#define mpn_divexact_by3(dst, src, size) mpn_divexact_by3c (dst, src, size, 0) + +#define mpn_divexact_by3c __MPN(divexact_by3c) +DLL_IMPORT mp_limb_t mpn_divexact_by3c _PROTO ((mp_ptr dst, mp_srcptr src, + mp_size_t size, mp_limb_t carry)); + +#define mpn_divmod_1(qp,np,nsize,dlimb) mpn_divrem_1 (qp,0,np,nsize,dlimb) + +DLL_IMPORT mp_limb_t mpn_divrem _PROTO((mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr, mp_size_t)); + +DLL_IMPORT mp_limb_t mpn_divrem_1 _PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t)); + +#define mpn_divrem_1c __MPN(divrem_1c) +DLL_IMPORT mp_limb_t mpn_divrem_1c _PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, + mp_limb_t, mp_limb_t)); + +DLL_IMPORT mp_limb_t mpn_divrem_2 _PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr)); +DLL_IMPORT void mpn_dump _PROTO ((mp_srcptr, mp_size_t)); +mp_size_t mpn_gcd _PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t)); +DLL_IMPORT mp_limb_t mpn_gcd_1 _PROTO ((mp_srcptr, mp_size_t, mp_limb_t)); +mp_size_t mpn_gcdext _PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t)); +DLL_IMPORT size_t mpn_get_str _PROTO ((unsigned char *, int, mp_ptr, mp_size_t)); +DLL_IMPORT unsigned long int mpn_hamdist _PROTO ((mp_srcptr, mp_srcptr, mp_size_t)); + +#define mpn_jacobi_base __MPN(jacobi_base) +DLL_IMPORT int mpn_jacobi_base _PROTO ((mp_limb_t a, mp_limb_t b, int result_bit1)); + +DLL_IMPORT mp_limb_t mpn_lshift _PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned int)); +DLL_IMPORT mp_limb_t mpn_mod_1 _PROTO ((mp_srcptr, mp_size_t, mp_limb_t)); + +#define mpn_mod_1c __MPN(mod_1c) +DLL_IMPORT mp_limb_t mpn_mod_1c _PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)); + +#define mpn_mod_1_rshift __MPN(mod_1_rshift) +DLL_IMPORT mp_limb_t mpn_mod_1_rshift _PROTO ((mp_srcptr, mp_size_t, unsigned,mp_limb_t)); + +DLL_IMPORT mp_limb_t mpn_mul _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)); +DLL_IMPORT mp_limb_t mpn_mul_1 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)); + +#define mpn_mul_1c __MPN(mul_1c) +DLL_IMPORT mp_limb_t mpn_mul_1c _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)); + +DLL_IMPORT void mpn_mul_basecase _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)); +DLL_IMPORT void mpn_mul_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); +DLL_IMPORT int mpn_perfect_square_p _PROTO ((mp_srcptr, mp_size_t)); +DLL_IMPORT unsigned long int mpn_popcount _PROTO ((mp_srcptr, mp_size_t)); +DLL_IMPORT mp_limb_t mpn_preinv_mod_1 _PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)); +DLL_IMPORT void mpn_random _PROTO ((mp_ptr, mp_size_t)); +DLL_IMPORT void mpn_random2 _PROTO ((mp_ptr, mp_size_t)); +DLL_IMPORT mp_limb_t mpn_rshift _PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned int)); +DLL_IMPORT unsigned long int mpn_scan0 _PROTO ((mp_srcptr, unsigned long int)); +DLL_IMPORT unsigned long int mpn_scan1 _PROTO ((mp_srcptr, unsigned long int)); +mp_size_t mpn_set_str _PROTO ((mp_ptr, __gmp_const unsigned char *, size_t, int)); +DLL_IMPORT void mpn_sqr_n _PROTO ((mp_ptr, mp_srcptr, mp_size_t)); +DLL_IMPORT void mpn_sqr_basecase _PROTO ((mp_ptr, mp_srcptr, mp_size_t)); +mp_size_t mpn_sqrtrem _PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t)); +DLL_IMPORT mp_limb_t mpn_sub _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr,mp_size_t)); +DLL_IMPORT mp_limb_t mpn_sub_1 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)); +DLL_IMPORT mp_limb_t mpn_sub_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); +DLL_IMPORT mp_limb_t mpn_sub_nc _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t)); +DLL_IMPORT mp_limb_t mpn_submul_1 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)); + +#define mpn_submul_1c __MPN(submul_1c) +DLL_IMPORT mp_limb_t mpn_submul_1c _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)); + +#define mpn_tdiv_qr __MPN(tdiv_qr) +DLL_IMPORT void mpn_tdiv_qr _PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)); + +#if defined (__cplusplus) +} +#endif + +#define mpn_incr_u(p,incr) \ + do { mp_limb_t __x; mp_ptr __p = p; \ + __x = *__p + incr; \ + *__p = __x; \ + if (__x < incr) \ + while (++(*(++__p)) == 0) \ + ; \ + } while (0) + +#define mpn_decr_u(p,incr) \ + do { mp_limb_t __x; mp_ptr __p = p; \ + __x = *__p; \ + *__p = __x - incr; \ + if (__x < incr) \ + while ((*(++__p))-- == 0) \ + ; \ + } while (0) + +#if defined (__GNUC__) || defined (_FORCE_INLINES) +_EXTERN_INLINE mp_limb_t +#if (__STDC__-0) || defined (__cplusplus) +mpn_add_1 (register mp_ptr res_ptr, + register mp_srcptr s1_ptr, + register mp_size_t s1_size, + register mp_limb_t s2_limb) +#else +mpn_add_1 (res_ptr, s1_ptr, s1_size, s2_limb) + register mp_ptr res_ptr; + register mp_srcptr s1_ptr; + register mp_size_t s1_size; + register mp_limb_t s2_limb; +#endif +{ + register mp_limb_t x; + + x = *s1_ptr++; + s2_limb = x + s2_limb; + *res_ptr++ = s2_limb; + if (s2_limb < x) + { + while (--s1_size != 0) + { + x = *s1_ptr++ + 1; + *res_ptr++ = x; + if (x != 0) + goto fin; + } + + return 1; + } + + fin: + if (res_ptr != s1_ptr) + { + mp_size_t i; + for (i = 0; i < s1_size - 1; i++) + res_ptr[i] = s1_ptr[i]; + } + return 0; +} + +_EXTERN_INLINE mp_limb_t +#if (__STDC__-0) || defined (__cplusplus) +mpn_add (register mp_ptr res_ptr, + register mp_srcptr s1_ptr, + register mp_size_t s1_size, + register mp_srcptr s2_ptr, + register mp_size_t s2_size) +#else +mpn_add (res_ptr, s1_ptr, s1_size, s2_ptr, s2_size) + register mp_ptr res_ptr; + register mp_srcptr s1_ptr; + register mp_size_t s1_size; + register mp_srcptr s2_ptr; + register mp_size_t s2_size; +#endif +{ + mp_limb_t cy_limb = 0; + + if (s2_size != 0) + cy_limb = mpn_add_n (res_ptr, s1_ptr, s2_ptr, s2_size); + + if (s1_size - s2_size != 0) + cy_limb = mpn_add_1 (res_ptr + s2_size, + s1_ptr + s2_size, + s1_size - s2_size, + cy_limb); + return cy_limb; +} + +_EXTERN_INLINE mp_limb_t +#if (__STDC__-0) || defined (__cplusplus) +mpn_sub_1 (register mp_ptr res_ptr, + register mp_srcptr s1_ptr, + register mp_size_t s1_size, + register mp_limb_t s2_limb) +#else +mpn_sub_1 (res_ptr, s1_ptr, s1_size, s2_limb) + register mp_ptr res_ptr; + register mp_srcptr s1_ptr; + register mp_size_t s1_size; + register mp_limb_t s2_limb; +#endif +{ + register mp_limb_t x; + + x = *s1_ptr++; + s2_limb = x - s2_limb; + *res_ptr++ = s2_limb; + if (s2_limb > x) + { + while (--s1_size != 0) + { + x = *s1_ptr++; + *res_ptr++ = x - 1; + if (x != 0) + goto fin; + } + + return 1; + } + + fin: + if (res_ptr != s1_ptr) + { + mp_size_t i; + for (i = 0; i < s1_size - 1; i++) + res_ptr[i] = s1_ptr[i]; + } + return 0; +} + +_EXTERN_INLINE mp_limb_t +#if (__STDC__-0) || defined (__cplusplus) +mpn_sub (register mp_ptr res_ptr, + register mp_srcptr s1_ptr, + register mp_size_t s1_size, + register mp_srcptr s2_ptr, + register mp_size_t s2_size) +#else +mpn_sub (res_ptr, s1_ptr, s1_size, s2_ptr, s2_size) + register mp_ptr res_ptr; + register mp_srcptr s1_ptr; + register mp_size_t s1_size; + register mp_srcptr s2_ptr; + register mp_size_t s2_size; +#endif +{ + mp_limb_t cy_limb = 0; + + if (s2_size != 0) + cy_limb = mpn_sub_n (res_ptr, s1_ptr, s2_ptr, s2_size); + + if (s1_size - s2_size != 0) + cy_limb = mpn_sub_1 (res_ptr + s2_size, + s1_ptr + s2_size, + s1_size - s2_size, + cy_limb); + return cy_limb; +} +#endif /* __GNUC__ */ + +/* Allow faster testing for negative, zero, and positive. */ +#define mpz_sgn(Z) ((Z)->_mp_size < 0 ? -1 : (Z)->_mp_size > 0) +#define mpf_sgn(F) ((F)->_mp_size < 0 ? -1 : (F)->_mp_size > 0) +#define mpq_sgn(Q) ((Q)->_mp_num._mp_size < 0 ? -1 : (Q)->_mp_num._mp_size > 0) + +/* When using GCC, optimize certain common comparisons. */ +#if defined (__GNUC__) +#define mpz_cmp_ui(Z,UI) \ + (__builtin_constant_p (UI) && (UI) == 0 \ + ? mpz_sgn (Z) : _mpz_cmp_ui (Z,UI)) +#define mpz_cmp_si(Z,SI) \ + (__builtin_constant_p (SI) && (SI) == 0 ? mpz_sgn (Z) \ + : __builtin_constant_p (SI) && (SI) > 0 \ + ? _mpz_cmp_ui (Z, (unsigned long int) SI) \ + : _mpz_cmp_si (Z,SI)) +#define mpq_cmp_ui(Q,NUI,DUI) \ + (__builtin_constant_p (NUI) && (NUI) == 0 \ + ? mpq_sgn (Q) : _mpq_cmp_ui (Q,NUI,DUI)) +#else +#define mpz_cmp_ui(Z,UI) _mpz_cmp_ui (Z,UI) +#define mpz_cmp_si(Z,UI) _mpz_cmp_si (Z,UI) +#define mpq_cmp_ui(Q,NUI,DUI) _mpq_cmp_ui (Q,NUI,DUI) +#endif + + +/* Using "&" rather than "&&" means these can come out branch-free. Every + mpz_t has at least one limb allocated, so fetching the low limb is always + allowed. */ +#define mpz_odd_p(z) ((int) ((z)->_mp_size != 0) & (int) (z)->_mp_d[0]) +#define mpz_even_p(z) (! mpz_odd_p (z)) + + +/* Allow direct user access to numerator and denominator of a mpq_t object. */ +#define mpq_numref(Q) (&((Q)->_mp_num)) +#define mpq_denref(Q) (&((Q)->_mp_den)) + + +/* Compatibility with GMP 2 and earlier. */ +#define mpn_divmod(qp,np,nsize,dp,dsize) mpn_divrem (qp,0,np,nsize,dp,dsize) + +/* Compatibility with GMP 1. */ +#define mpz_mdiv mpz_fdiv_q +#define mpz_mdivmod mpz_fdiv_qr +#define mpz_mmod mpz_fdiv_r +#define mpz_mdiv_ui mpz_fdiv_q_ui +#define mpz_mdivmod_ui(q,r,n,d) \ + ((r == 0) ? mpz_fdiv_q_ui (q,n,d) : mpz_fdiv_qr_ui (q,r,n,d)) +#define mpz_mmod_ui(r,n,d) \ + ((r == 0) ? mpz_fdiv_ui (n,d) : mpz_fdiv_r_ui (r,n,d)) + +/* Useful synonyms, but not quite compatible with GMP 1. */ +#define mpz_div mpz_fdiv_q +#define mpz_divmod mpz_fdiv_qr +#define mpz_div_ui mpz_fdiv_q_ui +#define mpz_divmod_ui mpz_fdiv_qr_ui +#define mpz_mod_ui mpz_fdiv_r_ui +#define mpz_div_2exp mpz_fdiv_q_2exp +#define mpz_mod_2exp mpz_fdiv_r_2exp + +#define gmp_errno __gmp_errno +extern int gmp_errno; + +enum +{ + GMP_ERROR_NONE = 0, + GMP_ERROR_UNSUPPORTED_ARGUMENT = 1, + GMP_ERROR_DIVISION_BY_ZERO = 2, + GMP_ERROR_SQRT_OF_NEGATIVE = 4, + GMP_ERROR_INVALID_ARGUMENT = 8, + GMP_ERROR_ALLOCATE = 16, + GMP_ERROR_BAD_STRING = 32, + GMP_ERROR_UNUSED_ERROR +}; + +/* Note: major version number is in mp.h too */ +#define __GNU_MP_VERSION 3 +#define __GNU_MP_VERSION_MINOR 1 +#define __GNU_MP_VERSION_PATCHLEVEL 1 + +#define gmp_version __gmp_version +extern __gmp_const char *gmp_version; + +#define __GMP_H__ +#endif /* __GMP_H__ */ diff --git a/rts/gmp/insert-dbl.c b/rts/gmp/insert-dbl.c new file mode 100644 index 0000000000..dc88a56f62 --- /dev/null +++ b/rts/gmp/insert-dbl.c @@ -0,0 +1,98 @@ +/* __gmp_insert_double -- convert from array of mp_limb_t to double. + +Copyright (C) 1996, 1997, 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +#ifdef XDEBUG +#undef _GMP_IEEE_FLOATS +#endif + +#ifndef _GMP_IEEE_FLOATS +#define _GMP_IEEE_FLOATS 0 +#endif + +double +#if __STDC__ +__gmp_scale2 (double d, int exp) +#else +__gmp_scale2 (d, exp) + double d; + int exp; +#endif +{ +#if _GMP_IEEE_FLOATS + { +#if defined (__alpha) && __GNUC__ == 2 && __GNUC_MINOR__ == 8 + /* Work around alpha-specific bug in GCC 2.8.x. */ + volatile +#endif + union ieee_double_extract x; + x.d = d; + exp += x.s.exp; + x.s.exp = exp; + if (exp >= 2047) + { + /* Return +-infinity */ + x.s.exp = 2047; + x.s.manl = x.s.manh = 0; + } + else if (exp < 1) + { + x.s.exp = 1; /* smallest exponent (biased) */ + /* Divide result by 2 until we have scaled it to the right IEEE + denormalized number, but stop if it becomes zero. */ + while (exp < 1 && x.d != 0) + { + x.d *= 0.5; + exp++; + } + } + return x.d; + } +#else + { + double factor, r; + + factor = 2.0; + if (exp < 0) + { + factor = 0.5; + exp = -exp; + } + r = d; + if (exp != 0) + { + if ((exp & 1) != 0) + r *= factor; + exp >>= 1; + while (exp != 0) + { + factor *= factor; + if ((exp & 1) != 0) + r *= factor; + exp >>= 1; + } + } + return r; + } +#endif +} diff --git a/rts/gmp/install-sh b/rts/gmp/install-sh new file mode 100644 index 0000000000..e9de23842d --- /dev/null +++ b/rts/gmp/install-sh @@ -0,0 +1,251 @@ +#!/bin/sh +# +# install - install a program, script, or datafile +# This comes from X11R5 (mit/util/scripts/install.sh). +# +# Copyright 1991 by the Massachusetts Institute of Technology +# +# Permission to use, copy, modify, distribute, and sell this software and its +# documentation for any purpose is hereby granted without fee, provided that +# the above copyright notice appear in all copies and that both that +# copyright notice and this permission notice appear in supporting +# documentation, and that the name of M.I.T. not be used in advertising or +# publicity pertaining to distribution of the software without specific, +# written prior permission. M.I.T. makes no representations about the +# suitability of this software for any purpose. It is provided "as is" +# without express or implied warranty. +# +# Calling this script install-sh is preferred over install.sh, to prevent +# `make' implicit rules from creating a file called install from it +# when there is no Makefile. +# +# This script is compatible with the BSD install script, but was written +# from scratch. It can only install one file at a time, a restriction +# shared with many OS's install programs. + + +# set DOITPROG to echo to test this script + +# Don't use :- since 4.3BSD and earlier shells don't like it. +doit="${DOITPROG-}" + + +# put in absolute paths if you don't have them in your path; or use env. vars. + +mvprog="${MVPROG-mv}" +cpprog="${CPPROG-cp}" +chmodprog="${CHMODPROG-chmod}" +chownprog="${CHOWNPROG-chown}" +chgrpprog="${CHGRPPROG-chgrp}" +stripprog="${STRIPPROG-strip}" +rmprog="${RMPROG-rm}" +mkdirprog="${MKDIRPROG-mkdir}" + +transformbasename="" +transform_arg="" +instcmd="$mvprog" +chmodcmd="$chmodprog 0755" +chowncmd="" +chgrpcmd="" +stripcmd="" +rmcmd="$rmprog -f" +mvcmd="$mvprog" +src="" +dst="" +dir_arg="" + +while [ x"$1" != x ]; do + case $1 in + -c) instcmd="$cpprog" + shift + continue;; + + -d) dir_arg=true + shift + continue;; + + -m) chmodcmd="$chmodprog $2" + shift + shift + continue;; + + -o) chowncmd="$chownprog $2" + shift + shift + continue;; + + -g) chgrpcmd="$chgrpprog $2" + shift + shift + continue;; + + -s) stripcmd="$stripprog" + shift + continue;; + + -t=*) transformarg=`echo $1 | sed 's/-t=//'` + shift + continue;; + + -b=*) transformbasename=`echo $1 | sed 's/-b=//'` + shift + continue;; + + *) if [ x"$src" = x ] + then + src=$1 + else + # this colon is to work around a 386BSD /bin/sh bug + : + dst=$1 + fi + shift + continue;; + esac +done + +if [ x"$src" = x ] +then + echo "install: no input file specified" + exit 1 +else + true +fi + +if [ x"$dir_arg" != x ]; then + dst=$src + src="" + + if [ -d $dst ]; then + instcmd=: + chmodcmd="" + else + instcmd=mkdir + fi +else + +# Waiting for this to be detected by the "$instcmd $src $dsttmp" command +# might cause directories to be created, which would be especially bad +# if $src (and thus $dsttmp) contains '*'. + + if [ -f $src -o -d $src ] + then + true + else + echo "install: $src does not exist" + exit 1 + fi + + if [ x"$dst" = x ] + then + echo "install: no destination specified" + exit 1 + else + true + fi + +# If destination is a directory, append the input filename; if your system +# does not like double slashes in filenames, you may need to add some logic + + if [ -d $dst ] + then + dst="$dst"/`basename $src` + else + true + fi +fi + +## this sed command emulates the dirname command +dstdir=`echo $dst | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'` + +# Make sure that the destination directory exists. +# this part is taken from Noah Friedman's mkinstalldirs script + +# Skip lots of stat calls in the usual case. +if [ ! -d "$dstdir" ]; then +defaultIFS=' +' +IFS="${IFS-${defaultIFS}}" + +oIFS="${IFS}" +# Some sh's can't handle IFS=/ for some reason. +IFS='%' +set - `echo ${dstdir} | sed -e 's@/@%@g' -e 's@^%@/@'` +IFS="${oIFS}" + +pathcomp='' + +while [ $# -ne 0 ] ; do + pathcomp="${pathcomp}${1}" + shift + + if [ ! -d "${pathcomp}" ] ; + then + $mkdirprog "${pathcomp}" + else + true + fi + + pathcomp="${pathcomp}/" +done +fi + +if [ x"$dir_arg" != x ] +then + $doit $instcmd $dst && + + if [ x"$chowncmd" != x ]; then $doit $chowncmd $dst; else true ; fi && + if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dst; else true ; fi && + if [ x"$stripcmd" != x ]; then $doit $stripcmd $dst; else true ; fi && + if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dst; else true ; fi +else + +# If we're going to rename the final executable, determine the name now. + + if [ x"$transformarg" = x ] + then + dstfile=`basename $dst` + else + dstfile=`basename $dst $transformbasename | + sed $transformarg`$transformbasename + fi + +# don't allow the sed command to completely eliminate the filename + + if [ x"$dstfile" = x ] + then + dstfile=`basename $dst` + else + true + fi + +# Make a temp file name in the proper directory. + + dsttmp=$dstdir/#inst.$$# + +# Move or copy the file name to the temp name + + $doit $instcmd $src $dsttmp && + + trap "rm -f ${dsttmp}" 0 && + +# and set any options; do chmod last to preserve setuid bits + +# If any of these fail, we abort the whole thing. If we want to +# ignore errors from any of these, just make sure not to ignore +# errors from the above "$doit $instcmd $src $dsttmp" command. + + if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; else true;fi && + if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; else true;fi && + if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; else true;fi && + if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; else true;fi && + +# Now rename the file to the real destination. + + $doit $rmcmd -f $dstdir/$dstfile && + $doit $mvcmd $dsttmp $dstdir/$dstfile + +fi && + + +exit 0 diff --git a/rts/gmp/longlong.h b/rts/gmp/longlong.h new file mode 100644 index 0000000000..9a12755053 --- /dev/null +++ b/rts/gmp/longlong.h @@ -0,0 +1,1347 @@ +/* longlong.h -- definitions for mixed size 32/64 bit arithmetic. + +Copyright (C) 1991, 1992, 1993, 1994, 1996, 1997, 1999, 2000 Free Software +Foundation, Inc. + +This file is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with this file; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* You have to define the following before including this file: + + UWtype -- An unsigned type, default type for operations (typically a "word") + UHWtype -- An unsigned type, at least half the size of UWtype. + UDWtype -- An unsigned type, at least twice as large a UWtype + W_TYPE_SIZE -- size in bits of UWtype + + SItype, USItype -- Signed and unsigned 32 bit types. + DItype, UDItype -- Signed and unsigned 64 bit types. + + On a 32 bit machine UWtype should typically be USItype; + on a 64 bit machine, UWtype should typically be UDItype. +*/ + +#define __BITS4 (W_TYPE_SIZE / 4) +#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) +#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) +#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) + +/* This is used to make sure no undesirable sharing between different libraries + that use this file takes place. */ +#ifndef __MPN +#define __MPN(x) __##x +#endif + +#ifndef _PROTO +#if (__STDC__-0) || defined (__cplusplus) +#define _PROTO(x) x +#else +#define _PROTO(x) () +#endif +#endif + +/* Define auxiliary asm macros. + + 1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two + UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype + word product in HIGH_PROD and LOW_PROD. + + 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a + UDWtype product. This is just a variant of umul_ppmm. + + 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator, + denominator) divides a UDWtype, composed by the UWtype integers + HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient + in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less + than DENOMINATOR for correct operation. If, in addition, the most + significant bit of DENOMINATOR must be 1, then the pre-processor symbol + UDIV_NEEDS_NORMALIZATION is defined to 1. + + 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator, + denominator). Like udiv_qrnnd but the numbers are signed. The quotient + is rounded towards 0. + + 5) count_leading_zeros(count, x) counts the number of zero-bits from the + msb to the first non-zero bit in the UWtype X. This is the number of + steps X needs to be shifted left to set the msb. Undefined for X == 0, + unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value. + + 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts + from the least significant end. + + 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1, + high_addend_2, low_addend_2) adds two UWtype integers, composed by + HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2 + respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow + (i.e. carry out) is not stored anywhere, and is lost. + + 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend, + high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers, + composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and + LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE + and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere, + and is lost. + + If any of these macros are left undefined for a particular CPU, + C macros are used. */ + +/* The CPUs come in alphabetical order below. + + Please add support for more CPUs here, or improve the current support + for the CPUs below! */ + +#if defined (__alpha) && W_TYPE_SIZE == 64 +#if defined (__GNUC__) +#define umul_ppmm(ph, pl, m0, m1) \ + do { \ + UDItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("umulh %r1,%2,%0" \ + : "=r" (ph) \ + : "%rJ" (m0), "rI" (m1)); \ + (pl) = __m0 * __m1; \ + } while (0) +#define UMUL_TIME 18 +#ifndef LONGLONG_STANDALONE +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { UDItype __di; \ + __di = __MPN(invert_limb) (d); \ + udiv_qrnnd_preinv (q, r, n1, n0, d, __di); \ + } while (0) +#define UDIV_NEEDS_NORMALIZATION 1 +#define UDIV_TIME 220 +long __MPN(count_leading_zeros) (); +#define count_leading_zeros(count, x) \ + ((count) = __MPN(count_leading_zeros) (x)) +#endif /* LONGLONG_STANDALONE */ +#else /* ! __GNUC__ */ +#include <machine/builtins.h> +#define umul_ppmm(ph, pl, m0, m1) \ + do { \ + UDItype __m0 = (m0), __m1 = (m1); \ + (ph) = __UMULH (m0, m1); \ + (pl) = __m0 * __m1; \ + } while (0) +#endif +#endif /* __alpha */ + +#if defined (__hppa) && W_TYPE_SIZE == 64 +/* We put the result pointer parameter last here, since it makes passing + of the other parameters more efficient. */ +#ifndef LONGLONG_STANDALONE +#define umul_ppmm(wh, wl, u, v) \ + do { \ + UDItype __p0; \ + (wh) = __MPN(umul_ppmm) (u, v, &__p0); \ + (wl) = __p0; \ + } while (0) +extern UDItype __MPN(umul_ppmm) _PROTO ((UDItype, UDItype, UDItype *)); +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { UDItype __r; \ + (q) = __MPN(udiv_qrnnd) (n1, n0, d, &__r); \ + (r) = __r; \ + } while (0) +extern UDItype __MPN(udiv_qrnnd) _PROTO ((UDItype, UDItype, UDItype, UDItype *)); +#define UMUL_TIME 8 +#define UDIV_TIME 60 +#endif /* LONGLONG_STANDALONE */ +#endif /* hppa */ + +#if defined (__ia64) && W_TYPE_SIZE == 64 +#if defined (__GNUC__) +#define umul_ppmm(ph, pl, m0, m1) \ + do { \ + UDItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("xma.hu %0 = %1, %2, f0" \ + : "=e" (ph) \ + : "e" (m0), "e" (m1)); \ + (pl) = __m0 * __m1; \ + } while (0) +#endif +#endif + + +#if defined (__GNUC__) && !defined (NO_ASM) + +/* We sometimes need to clobber "cc" with gcc2, but that would not be + understood by gcc1. Use cpp to avoid major code duplication. */ +#if __GNUC__ < 2 +#define __CLOBBER_CC +#define __AND_CLOBBER_CC +#else /* __GNUC__ >= 2 */ +#define __CLOBBER_CC : "cc" +#define __AND_CLOBBER_CC , "cc" +#endif /* __GNUC__ < 2 */ + +#if (defined (__a29k__) || defined (_AM29K)) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add %1,%4,%5\n\taddc %0,%2,%3" \ + : "=r" (sh), "=&r" (sl) \ + : "%r" (ah), "rI" (bh), "%r" (al), "rI" (bl)) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub %1,%4,%5\n\tsubc %0,%2,%3" \ + : "=r" (sh), "=&r" (sl) \ + : "r" (ah), "rI" (bh), "r" (al), "rI" (bl)) +#define umul_ppmm(xh, xl, m0, m1) \ + do { \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("multiplu %0,%1,%2" \ + : "=r" (xl) \ + : "r" (__m0), "r" (__m1)); \ + __asm__ ("multmu %0,%1,%2" \ + : "=r" (xh) \ + : "r" (__m0), "r" (__m1)); \ + } while (0) +#define udiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("dividu %0,%3,%4" \ + : "=r" (q), "=q" (r) \ + : "1" (n1), "r" (n0), "r" (d)) +#define count_leading_zeros(count, x) \ + __asm__ ("clz %0,%1" \ + : "=r" (count) \ + : "r" (x)) +#define COUNT_LEADING_ZEROS_0 32 +#endif /* __a29k__ */ + +#if defined (__arm__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("adds\t%1, %4, %5\n\tadc\t%0, %2, %3" \ + : "=r" (sh), "=&r" (sl) \ + : "%r" (ah), "rI" (bh), "%r" (al), "rI" (bl)) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \ + : "=r" (sh), "=&r" (sl) \ + : "r" (ah), "rI" (bh), "r" (al), "rI" (bl)) +#if 1 || defined (__arm_m__) /* `M' series has widening multiply support */ +#define umul_ppmm(xh, xl, a, b) \ + __asm__ ("umull %0,%1,%2,%3" : "=&r" (xl), "=&r" (xh) : "r" (a), "r" (b)) +#define smul_ppmm(xh, xl, a, b) \ + __asm__ ("smull %0,%1,%2,%3" : "=&r" (xl), "=&r" (xh) : "r" (a), "r" (b)) +#define UMUL_TIME 5 +#else +#define umul_ppmm(xh, xl, a, b) \ + __asm__ ("%@ Inlined umul_ppmm\n" \ + "mov %|r0, %2, lsr #16\n" \ + "mov %|r2, %3, lsr #16\n" \ + "bic %|r1, %2, %|r0, lsl #16\n" \ + "bic %|r2, %3, %|r2, lsl #16\n" \ + "mul %1, %|r1, %|r2\n" \ + "mul %|r2, %|r0, %|r2\n" \ + "mul %|r1, %0, %|r1\n" \ + "mul %0, %|r0, %0\n" \ + "adds %|r1, %|r2, %|r1\n" \ + "addcs %0, %0, #65536\n" \ + "adds %1, %1, %|r1, lsl #16\n" \ + "adc %0, %0, %|r1, lsr #16" \ + : "=&r" (xh), "=r" (xl) \ + : "r" (a), "r" (b) \ + : "r0", "r1", "r2") +#define UMUL_TIME 20 +#endif +#define UDIV_TIME 100 +#endif /* __arm__ */ + +#if defined (__clipper__) && W_TYPE_SIZE == 32 +#define umul_ppmm(w1, w0, u, v) \ + ({union {UDItype __ll; \ + struct {USItype __l, __h;} __i; \ + } __x; \ + __asm__ ("mulwux %2,%0" \ + : "=r" (__x.__ll) \ + : "%0" ((USItype)(u)), "r" ((USItype)(v))); \ + (w1) = __x.__i.__h; (w0) = __x.__i.__l;}) +#define smul_ppmm(w1, w0, u, v) \ + ({union {DItype __ll; \ + struct {SItype __l, __h;} __i; \ + } __x; \ + __asm__ ("mulwx %2,%0" \ + : "=r" (__x.__ll) \ + : "%0" ((SItype)(u)), "r" ((SItype)(v))); \ + (w1) = __x.__i.__h; (w0) = __x.__i.__l;}) +#define __umulsidi3(u, v) \ + ({UDItype __w; \ + __asm__ ("mulwux %2,%0" \ + : "=r" (__w) : "%0" ((USItype)(u)), "r" ((USItype)(v))); \ + __w; }) +#endif /* __clipper__ */ + +/* Fujitsu vector computers. */ +#if defined (__uxp__) && W_TYPE_SIZE == 32 +#define umul_ppmm(ph, pl, u, v) \ + do { \ + union {UDItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __x; \ + __asm__ ("mult.lu %1,%2,%0" : "=r" (__x.__ll) : "%r" (u), "rK" (v));\ + (ph) = __x.__i.__h; \ + (pl) = __x.__i.__l; \ + } while (0) +#define smul_ppmm(ph, pl, u, v) \ + do { \ + union {UDItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __x; \ + __asm__ ("mult.l %1,%2,%0" : "=r" (__x.__ll) : "%r" (u), "rK" (v)); \ + (ph) = __x.__i.__h; \ + (pl) = __x.__i.__l; \ + } while (0) +#endif + +#if defined (__gmicro__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add.w %5,%1\n\taddx %3,%0" \ + : "=g" ((USItype)(sh)), "=&g" ((USItype)(sl)) \ + : "%0" ((USItype)(ah)), "g" ((USItype)(bh)), \ + "%1" ((USItype)(al)), "g" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub.w %5,%1\n\tsubx %3,%0" \ + : "=g" ((USItype)(sh)), "=&g" ((USItype)(sl)) \ + : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \ + "1" ((USItype)(al)), "g" ((USItype)(bl))) +#define umul_ppmm(ph, pl, m0, m1) \ + __asm__ ("mulx %3,%0,%1" \ + : "=g" ((USItype)(ph)), "=r" ((USItype)(pl)) \ + : "%0" ((USItype)(m0)), "g" ((USItype)(m1))) +#define udiv_qrnnd(q, r, nh, nl, d) \ + __asm__ ("divx %4,%0,%1" \ + : "=g" ((USItype)(q)), "=r" ((USItype)(r)) \ + : "1" ((USItype)(nh)), "0" ((USItype)(nl)), "g" ((USItype)(d))) +#define count_leading_zeros(count, x) \ + __asm__ ("bsch/1 %1,%0" \ + : "=g" (count) : "g" ((USItype)(x)), "0" ((USItype)0)) +#endif + +#if defined (__hppa) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0" \ + : "=r" (sh), "=&r" (sl) \ + : "%rM" (ah), "rM" (bh), "%rM" (al), "rM" (bl)) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0" \ + : "=r" (sh), "=&r" (sl) \ + : "rM" (ah), "rM" (bh), "rM" (al), "rM" (bl)) +#if defined (_PA_RISC1_1) +#define umul_ppmm(wh, wl, u, v) \ + do { \ + union {UDItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __x; \ + __asm__ ("xmpyu %1,%2,%0" : "=*f" (__x.__ll) : "*f" (u), "*f" (v)); \ + (wh) = __x.__i.__h; \ + (wl) = __x.__i.__l; \ + } while (0) +#define UMUL_TIME 8 +#define UDIV_TIME 60 +#else +#define UMUL_TIME 40 +#define UDIV_TIME 80 +#endif +#ifndef LONGLONG_STANDALONE +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { USItype __r; \ + (q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d)); \ + (r) = __r; \ + } while (0) +extern USItype __MPN(udiv_qrnnd) _PROTO ((USItype *, USItype, USItype, USItype)); +#endif /* LONGLONG_STANDALONE */ +#define count_leading_zeros(count, x) \ + do { \ + USItype __tmp; \ + __asm__ ( \ + "ldi 2,%0\n" \ + "extru,= %1,15,16,%%r0 ; Bits 31..16 zero?\n" \ + "extru,tr %1,15,16,%1 ; No. Shift down, skip add.\n" \ + "ldo 16(%0),%0 ; Yes. Perform add.\n" \ + "extru,= %1,23,8,%%r0 ; Bits 15..8 zero?\n" \ + "extru,tr %1,23,8,%1 ; No. Shift down, skip add.\n" \ + "ldo 8(%0),%0 ; Yes. Perform add.\n" \ + "extru,= %1,27,4,%%r0 ; Bits 7..4 zero?\n" \ + "extru,tr %1,27,4,%1 ; No. Shift down, skip add.\n" \ + "ldo 4(%0),%0 ; Yes. Perform add.\n" \ + "extru,= %1,29,2,%%r0 ; Bits 3..2 zero?\n" \ + "extru,tr %1,29,2,%1 ; No. Shift down, skip add.\n" \ + "ldo 2(%0),%0 ; Yes. Perform add.\n" \ + "extru %1,30,1,%1 ; Extract bit 1.\n" \ + "sub %0,%1,%0 ; Subtract it.\n" \ + : "=r" (count), "=r" (__tmp) : "1" (x)); \ + } while (0) +#endif /* hppa */ + +#if (defined (__i370__) || defined (__mvs__)) && W_TYPE_SIZE == 32 +#define smul_ppmm(xh, xl, m0, m1) \ + do { \ + union {DItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __x; \ + __asm__ ("mr %0,%3" \ + : "=r" (__x.__i.__h), "=r" (__x.__i.__l) \ + : "%1" (m0), "r" (m1)); \ + (xh) = __x.__i.__h; (xl) = __x.__i.__l; \ + } while (0) +#define sdiv_qrnnd(q, r, n1, n0, d) \ + do { \ + union {DItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __x; \ + __x.__i.__h = n1; __x.__i.__l = n0; \ + __asm__ ("dr %0,%2" \ + : "=r" (__x.__ll) \ + : "0" (__x.__ll), "r" (d)); \ + (q) = __x.__i.__l; (r) = __x.__i.__h; \ + } while (0) +#endif + +#if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addl %5,%1\n\tadcl %3,%0" \ + : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \ + : "%0" ((USItype)(ah)), "g" ((USItype)(bh)), \ + "%1" ((USItype)(al)), "g" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subl %5,%1\n\tsbbl %3,%0" \ + : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \ + : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \ + "1" ((USItype)(al)), "g" ((USItype)(bl))) +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("mull %3" \ + : "=a" (w0), "=d" (w1) \ + : "%0" ((USItype)(u)), "rm" ((USItype)(v))) +#define udiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("divl %4" \ + : "=a" (q), "=d" (r) \ + : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "rm" ((USItype)(d))) +#define count_leading_zeros(count, x) \ + do { \ + USItype __cbtmp; \ + __asm__ ("bsrl %1,%0" : "=r" (__cbtmp) : "rm" ((USItype)(x))); \ + (count) = __cbtmp ^ 31; \ + } while (0) +#define count_trailing_zeros(count, x) \ + __asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x))) +#ifndef UMUL_TIME +#define UMUL_TIME 10 +#endif +#ifndef UDIV_TIME +#define UDIV_TIME 40 +#endif +#endif /* 80x86 */ + +#if defined (__i860__) && W_TYPE_SIZE == 32 +#define rshift_rhlc(r,h,l,c) \ + __asm__ ("shr %3,r0,r0\;shrd %1,%2,%0" \ + "=r" (r) : "r" (h), "r" (l), "rn" (c)) +#endif /* i860 */ + +#if defined (__i960__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("cmpo 1,0\;addc %5,%4,%1\;addc %3,%2,%0" \ + : "=r" (sh), "=&r" (sl) \ + : "%dI" (ah), "dI" (bh), "%dI" (al), "dI" (bl)) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("cmpo 0,0\;subc %5,%4,%1\;subc %3,%2,%0" \ + : "=r" (sh), "=&r" (sl) \ + : "dI" (ah), "dI" (bh), "dI" (al), "dI" (bl)) +#define umul_ppmm(w1, w0, u, v) \ + ({union {UDItype __ll; \ + struct {USItype __l, __h;} __i; \ + } __x; \ + __asm__ ("emul %2,%1,%0" \ + : "=d" (__x.__ll) : "%dI" (u), "dI" (v)); \ + (w1) = __x.__i.__h; (w0) = __x.__i.__l;}) +#define __umulsidi3(u, v) \ + ({UDItype __w; \ + __asm__ ("emul %2,%1,%0" : "=d" (__w) : "%dI" (u), "dI" (v)); \ + __w; }) +#define udiv_qrnnd(q, r, nh, nl, d) \ + do { \ + union {UDItype __ll; \ + struct {USItype __l, __h;} __i; \ + } __nn; \ + __nn.__i.__h = (nh); __nn.__i.__l = (nl); \ + __asm__ ("ediv %d,%n,%0" \ + : "=d" (__rq.__ll) : "dI" (__nn.__ll), "dI" (d)); \ + (r) = __rq.__i.__l; (q) = __rq.__i.__h; \ + } while (0) +#define count_leading_zeros(count, x) \ + do { \ + USItype __cbtmp; \ + __asm__ ("scanbit %1,%0" : "=r" (__cbtmp) : "r" (x)); \ + (count) = __cbtmp ^ 31; \ + } while (0) +#define COUNT_LEADING_ZEROS_0 (-32) /* sic */ +#if defined (__i960mx) /* what is the proper symbol to test??? */ +#define rshift_rhlc(r,h,l,c) \ + do { \ + union {UDItype __ll; \ + struct {USItype __l, __h;} __i; \ + } __nn; \ + __nn.__i.__h = (h); __nn.__i.__l = (l); \ + __asm__ ("shre %2,%1,%0" : "=d" (r) : "dI" (__nn.__ll), "dI" (c)); \ + } +#endif /* i960mx */ +#endif /* i960 */ + +#if (defined (__mc68000__) || defined (__mc68020__) || defined(mc68020) \ + || defined (__m68k__) || defined (__mc5200__) || defined (__mc5206e__) \ + || defined (__mc5307__)) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0" \ + : "=d" ((USItype)(sh)), "=&d" ((USItype)(sl)) \ + : "%0" ((USItype)(ah)), "d" ((USItype)(bh)), \ + "%1" ((USItype)(al)), "g" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0" \ + : "=d" ((USItype)(sh)), "=&d" ((USItype)(sl)) \ + : "0" ((USItype)(ah)), "d" ((USItype)(bh)), \ + "1" ((USItype)(al)), "g" ((USItype)(bl))) +/* The '020, '030, '040 and CPU32 have 32x32->64 and 64/32->32q-32r. */ +#if defined (__mc68020__) || defined(mc68020) \ + || defined (__mc68030__) || defined (mc68030) \ + || defined (__mc68040__) || defined (mc68040) \ + || defined (__mc68332__) || defined (mc68332) \ + || defined (__NeXT__) +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("mulu%.l %3,%1:%0" \ + : "=d" ((USItype)(w0)), "=d" ((USItype)(w1)) \ + : "%0" ((USItype)(u)), "dmi" ((USItype)(v))) +#define UMUL_TIME 45 +#define udiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("divu%.l %4,%1:%0" \ + : "=d" ((USItype)(q)), "=d" ((USItype)(r)) \ + : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d))) +#define UDIV_TIME 90 +#define sdiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("divs%.l %4,%1:%0" \ + : "=d" ((USItype)(q)), "=d" ((USItype)(r)) \ + : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d))) +#else /* for other 68k family members use 16x16->32 multiplication */ +#define umul_ppmm(xh, xl, a, b) \ + do { USItype __umul_tmp1, __umul_tmp2; \ + __asm__ ("| Inlined umul_ppmm\n" \ + "move%.l %5,%3\n" \ + "move%.l %2,%0\n" \ + "move%.w %3,%1\n" \ + "swap %3\n" \ + "swap %0\n" \ + "mulu%.w %2,%1\n" \ + "mulu%.w %3,%0\n" \ + "mulu%.w %2,%3\n" \ + "swap %2\n" \ + "mulu%.w %5,%2\n" \ + "add%.l %3,%2\n" \ + "jcc 1f\n" \ + "add%.l %#0x10000,%0\n" \ +"1: move%.l %2,%3\n" \ + "clr%.w %2\n" \ + "swap %2\n" \ + "swap %3\n" \ + "clr%.w %3\n" \ + "add%.l %3,%1\n" \ + "addx%.l %2,%0\n" \ + "| End inlined umul_ppmm" \ + : "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \ + "=d" (__umul_tmp1), "=&d" (__umul_tmp2) \ + : "%2" ((USItype)(a)), "d" ((USItype)(b))); \ + } while (0) +#define UMUL_TIME 100 +#define UDIV_TIME 400 +#endif /* not mc68020 */ +/* The '020, '030, '040 and '060 have bitfield insns. */ +#if defined (__mc68020__) || defined (mc68020) \ + || defined (__mc68030__) || defined (mc68030) \ + || defined (__mc68040__) || defined (mc68040) \ + || defined (__mc68060__) || defined (mc68060) \ + || defined (__NeXT__) +#define count_leading_zeros(count, x) \ + __asm__ ("bfffo %1{%b2:%b2},%0" \ + : "=d" ((USItype) (count)) \ + : "od" ((USItype) (x)), "n" (0)) +#define COUNT_LEADING_ZEROS_0 32 +#endif +#endif /* mc68000 */ + +#if defined (__m88000__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3" \ + : "=r" (sh), "=&r" (sl) \ + : "%rJ" (ah), "rJ" (bh), "%rJ" (al), "rJ" (bl)) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3" \ + : "=r" (sh), "=&r" (sl) \ + : "rJ" (ah), "rJ" (bh), "rJ" (al), "rJ" (bl)) +#define count_leading_zeros(count, x) \ + do { \ + USItype __cbtmp; \ + __asm__ ("ff1 %0,%1" : "=r" (__cbtmp) : "r" (x)); \ + (count) = __cbtmp ^ 31; \ + } while (0) +#define COUNT_LEADING_ZEROS_0 63 /* sic */ +#if defined (__m88110__) +#define umul_ppmm(wh, wl, u, v) \ + do { \ + union {UDItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __x; \ + __asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v)); \ + (wh) = __x.__i.__h; \ + (wl) = __x.__i.__l; \ + } while (0) +#define udiv_qrnnd(q, r, n1, n0, d) \ + ({union {UDItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __x, __q; \ + __x.__i.__h = (n1); __x.__i.__l = (n0); \ + __asm__ ("divu.d %0,%1,%2" \ + : "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \ + (r) = (n0) - __q.__l * (d); (q) = __q.__l; }) +#define UMUL_TIME 5 +#define UDIV_TIME 25 +#else +#define UMUL_TIME 17 +#define UDIV_TIME 150 +#endif /* __m88110__ */ +#endif /* __m88000__ */ + +#if defined (__mips) && W_TYPE_SIZE == 32 +#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7 +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("multu %2,%3" : "=l" (w0), "=h" (w1) : "d" (u), "d" (v)) +#else +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("multu %2,%3\n\tmflo %0\n\tmfhi %1" \ + : "=d" (w0), "=d" (w1) : "d" (u), "d" (v)) +#endif +#define UMUL_TIME 10 +#define UDIV_TIME 100 +#endif /* __mips */ + +#if (defined (__mips) && __mips >= 3) && W_TYPE_SIZE == 64 +#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7 +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("dmultu %2,%3" : "=l" (w0), "=h" (w1) : "d" (u), "d" (v)) +#else +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("dmultu %2,%3\n\tmflo %0\n\tmfhi %1" \ + : "=d" (w0), "=d" (w1) : "d" (u), "d" (v)) +#endif +#define UMUL_TIME 20 +#define UDIV_TIME 140 +#endif /* __mips */ + +#if defined (__ns32000__) && W_TYPE_SIZE == 32 +#define umul_ppmm(w1, w0, u, v) \ + ({union {UDItype __ll; \ + struct {USItype __l, __h;} __i; \ + } __x; \ + __asm__ ("meid %2,%0" \ + : "=g" (__x.__ll) \ + : "%0" ((USItype)(u)), "g" ((USItype)(v))); \ + (w1) = __x.__i.__h; (w0) = __x.__i.__l;}) +#define __umulsidi3(u, v) \ + ({UDItype __w; \ + __asm__ ("meid %2,%0" \ + : "=g" (__w) \ + : "%0" ((USItype)(u)), "g" ((USItype)(v))); \ + __w; }) +#define udiv_qrnnd(q, r, n1, n0, d) \ + ({union {UDItype __ll; \ + struct {USItype __l, __h;} __i; \ + } __x; \ + __x.__i.__h = (n1); __x.__i.__l = (n0); \ + __asm__ ("deid %2,%0" \ + : "=g" (__x.__ll) \ + : "0" (__x.__ll), "g" ((USItype)(d))); \ + (r) = __x.__i.__l; (q) = __x.__i.__h; }) +#define count_trailing_zeros(count,x) \ + do { \ + __asm__ ("ffsd %2,%0" \ + : "=r" ((USItype) (count)) \ + : "0" ((USItype) 0), "r" ((USItype) (x))); \ + } while (0) +#endif /* __ns32000__ */ + +/* We should test _IBMR2 here when we add assembly support for the system + vendor compilers. */ +#if (defined (_ARCH_PPC) || defined (_ARCH_PWR) || defined (__powerpc__)) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + do { \ + if (__builtin_constant_p (bh) && (bh) == 0) \ + __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \ + : "=r" (sh), "=&r" (sl) : "%r" (ah), "%r" (al), "rI" (bl));\ + else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \ + __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \ + : "=r" (sh), "=&r" (sl) : "%r" (ah), "%r" (al), "rI" (bl));\ + else \ + __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \ + : "=r" (sh), "=&r" (sl) \ + : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \ + } while (0) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + do { \ + if (__builtin_constant_p (ah) && (ah) == 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ + else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ + else if (__builtin_constant_p (bh) && (bh) == 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ + else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ + else \ + __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \ + : "=r" (sh), "=&r" (sl) \ + : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \ + } while (0) +#define count_leading_zeros(count, x) \ + __asm__ ("{cntlz|cntlzw} %0,%1" : "=r" (count) : "r" (x)) +#define COUNT_LEADING_ZEROS_0 32 +#if defined (_ARCH_PPC) || defined (__powerpc__) +#define umul_ppmm(ph, pl, m0, m1) \ + do { \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ + (pl) = __m0 * __m1; \ + } while (0) +#define UMUL_TIME 15 +#define smul_ppmm(ph, pl, m0, m1) \ + do { \ + SItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ + (pl) = __m0 * __m1; \ + } while (0) +#define SMUL_TIME 14 +#define UDIV_TIME 120 +#else +#define UMUL_TIME 8 +#define smul_ppmm(xh, xl, m0, m1) \ + __asm__ ("mul %0,%2,%3" : "=r" (xh), "=q" (xl) : "r" (m0), "r" (m1)) +#define SMUL_TIME 4 +#define sdiv_qrnnd(q, r, nh, nl, d) \ + __asm__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d)) +#define UDIV_TIME 100 +#endif +#endif /* 32-bit POWER architecture variants. */ + +/* We should test _IBMR2 here when we add assembly support for the system + vendor compilers. */ +#if (defined (_ARCH_PPC) || defined (__powerpc__)) && W_TYPE_SIZE == 64 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + do { \ + if (__builtin_constant_p (bh) && (bh) == 0) \ + __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \ + : "=r" (sh), "=&r" (sl) : "%r" (ah), "%r" (al), "rI" (bl));\ + else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \ + __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \ + : "=r" (sh), "=&r" (sl) : "%r" (ah), "%r" (al), "rI" (bl));\ + else \ + __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \ + : "=r" (sh), "=&r" (sl) \ + : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \ + } while (0) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + do { \ + if (__builtin_constant_p (ah) && (ah) == 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ + else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ + else if (__builtin_constant_p (bh) && (bh) == 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ + else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ + else \ + __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \ + : "=r" (sh), "=&r" (sl) \ + : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \ + } while (0) +#define count_leading_zeros(count, x) \ + __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x)) +#define COUNT_LEADING_ZEROS_0 64 +#define umul_ppmm(ph, pl, m0, m1) \ + do { \ + UDItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ + (pl) = __m0 * __m1; \ + } while (0) +#define UMUL_TIME 15 +#define smul_ppmm(ph, pl, m0, m1) \ + do { \ + DItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ + (pl) = __m0 * __m1; \ + } while (0) +#define SMUL_TIME 14 /* ??? */ +#define UDIV_TIME 120 /* ??? */ +#endif /* 64-bit PowerPC. */ + +#if defined (__pyr__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addw %5,%1\n\taddwc %3,%0" \ + : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \ + : "%0" ((USItype)(ah)), "g" ((USItype)(bh)), \ + "%1" ((USItype)(al)), "g" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subw %5,%1\n\tsubwb %3,%0" \ + : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \ + : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \ + "1" ((USItype)(al)), "g" ((USItype)(bl))) +/* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP. */ +#define umul_ppmm(w1, w0, u, v) \ + ({union {UDItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __x; \ + __asm__ ("movw %1,%R0\n\tuemul %2,%0" \ + : "=&r" (__x.__ll) \ + : "g" ((USItype) (u)), "g" ((USItype)(v))); \ + (w1) = __x.__i.__h; (w0) = __x.__i.__l;}) +#endif /* __pyr__ */ + +#if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("a %1,%5\n\tae %0,%3" \ + : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \ + : "%0" ((USItype)(ah)), "r" ((USItype)(bh)), \ + "%1" ((USItype)(al)), "r" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("s %1,%5\n\tse %0,%3" \ + : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \ + : "0" ((USItype)(ah)), "r" ((USItype)(bh)), \ + "1" ((USItype)(al)), "r" ((USItype)(bl))) +#define smul_ppmm(ph, pl, m0, m1) \ + __asm__ ( \ + "s r2,r2\n" \ + "mts r10,%2\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "cas %0,r2,r0\n" \ + "mfs r10,%1" \ + : "=r" ((USItype)(ph)), "=r" ((USItype)(pl)) \ + : "%r" ((USItype)(m0)), "r" ((USItype)(m1)) \ + : "r2"); \ +#define UMUL_TIME 20 +#define UDIV_TIME 200 +#define count_leading_zeros(count, x) \ + do { \ + if ((x) >= 0x10000) \ + __asm__ ("clz %0,%1" \ + : "=r" ((USItype)(count)) : "r" ((USItype)(x) >> 16)); \ + else \ + { \ + __asm__ ("clz %0,%1" \ + : "=r" ((USItype)(count)) : "r" ((USItype)(x))); \ + (count) += 16; \ + } \ + } while (0) +#endif /* RT/ROMP */ + +#if defined (__sh2__) && W_TYPE_SIZE == 32 +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("dmulu.l %2,%3\n\tsts macl,%1\n\tsts mach,%0" \ + : "=r" (w1), "=r" (w0) : "r" (u), "r" (v) : "macl", "mach") +#define UMUL_TIME 5 +#endif + +#if defined (__sparc__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0" \ + : "=r" (sh), "=&r" (sl) \ + : "%rJ" (ah), "rI" (bh),"%rJ" (al), "rI" (bl) \ + __CLOBBER_CC) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0" \ + : "=r" (sh), "=&r" (sl) \ + : "rJ" (ah), "rI" (bh), "rJ" (al), "rI" (bl) \ + __CLOBBER_CC) +#if defined (__sparc_v9__) || defined (__sparcv9) +/* Perhaps we should use floating-point operations here? */ +#if 0 +/* Triggers a bug making mpz/tests/t-gcd.c fail. + Perhaps we simply need explicitly zero-extend the inputs? */ +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("mulx %2,%3,%%g1; srl %%g1,0,%1; srlx %%g1,32,%0" : \ + "=r" (w1), "=r" (w0) : "r" (u), "r" (v) : "g1") +#else +/* Use v8 umul until above bug is fixed. */ +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v)) +#endif +/* Use a plain v8 divide for v9. */ +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { \ + USItype __q; \ + __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \ + : "=r" (__q) : "r" (n1), "r" (n0), "r" (d)); \ + (r) = (n0) - __q * (d); \ + (q) = __q; \ + } while (0) +#else +#if defined (__sparc_v8__) +/* Don't match immediate range because, 1) it is not often useful, + 2) the 'I' flag thinks of the range as a 13 bit signed interval, + while we want to match a 13 bit interval, sign extended to 32 bits, + but INTERPRETED AS UNSIGNED. */ +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v)) +#define UMUL_TIME 5 +#ifndef SUPERSPARC /* SuperSPARC's udiv only handles 53 bit dividends */ +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { \ + USItype __q; \ + __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \ + : "=r" (__q) : "r" (n1), "r" (n0), "r" (d)); \ + (r) = (n0) - __q * (d); \ + (q) = __q; \ + } while (0) +#define UDIV_TIME 25 +#else +#define UDIV_TIME 60 /* SuperSPARC timing */ +#endif /* SUPERSPARC */ +#else /* ! __sparc_v8__ */ +#if defined (__sparclite__) +/* This has hardware multiply but not divide. It also has two additional + instructions scan (ffs from high bit) and divscc. */ +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v)) +#define UMUL_TIME 5 +#define udiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("! Inlined udiv_qrnnd\n" \ + "wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \ + "tst %%g0\n" \ + "divscc %3,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%0\n" \ + "rd %%y,%1\n" \ + "bl,a 1f\n" \ + "add %1,%4,%1\n" \ +"1: ! End of inline udiv_qrnnd" \ + : "=r" (q), "=r" (r) : "r" (n1), "r" (n0), "rI" (d) \ + : "%g1" __AND_CLOBBER_CC) +#define UDIV_TIME 37 +#define count_leading_zeros(count, x) \ + __asm__ ("scan %1,0,%0" : "=r" (x) : "r" (count)) +/* Early sparclites return 63 for an argument of 0, but they warn that future + implementations might change this. Therefore, leave COUNT_LEADING_ZEROS_0 + undefined. */ +#endif /* __sparclite__ */ +#endif /* __sparc_v8__ */ +#endif /* __sparc_v9__ */ +/* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd. */ +#ifndef umul_ppmm +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("! Inlined umul_ppmm\n" \ + "wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n" \ + "sra %3,31,%%g2 ! Don't move this insn\n" \ + "and %2,%%g2,%%g2 ! Don't move this insn\n" \ + "andcc %%g0,0,%%g1 ! Don't move this insn\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,0,%%g1\n" \ + "add %%g1,%%g2,%0\n" \ + "rd %%y,%1" \ + : "=r" (w1), "=r" (w0) : "%rI" (u), "r" (v) \ + : "%g1", "%g2" __AND_CLOBBER_CC) +#define UMUL_TIME 39 /* 39 instructions */ +#endif +#ifndef udiv_qrnnd +#ifndef LONGLONG_STANDALONE +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { USItype __r; \ + (q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d)); \ + (r) = __r; \ + } while (0) +extern USItype __MPN(udiv_qrnnd) _PROTO ((USItype *, USItype, USItype, USItype)); +#ifndef UDIV_TIME +#define UDIV_TIME 140 +#endif +#endif /* LONGLONG_STANDALONE */ +#endif /* udiv_qrnnd */ +#endif /* __sparc__ */ + +#if defined (__vax__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addl2 %5,%1\n\tadwc %3,%0" \ + : "=g" ((USItype)(sh)), "=&g" ((USItype)(sl)) \ + : "%0" ((USItype)(ah)), "g" ((USItype)(bh)), \ + "%1" ((USItype)(al)), "g" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subl2 %5,%1\n\tsbwc %3,%0" \ + : "=g" ((USItype)(sh)), "=&g" ((USItype)(sl)) \ + : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \ + "1" ((USItype)(al)), "g" ((USItype)(bl))) +#define smul_ppmm(xh, xl, m0, m1) \ + do { \ + union {UDItype __ll; \ + struct {USItype __l, __h;} __i; \ + } __x; \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("emul %1,%2,$0,%0" \ + : "=g" (__x.__ll) : "g" (__m0), "g" (__m1)); \ + (xh) = __x.__i.__h; (xl) = __x.__i.__l; \ + } while (0) +#define sdiv_qrnnd(q, r, n1, n0, d) \ + do { \ + union {DItype __ll; \ + struct {SItype __l, __h;} __i; \ + } __x; \ + __x.__i.__h = n1; __x.__i.__l = n0; \ + __asm__ ("ediv %3,%2,%0,%1" \ + : "=g" (q), "=g" (r) : "g" (__x.__ll), "g" (d)); \ + } while (0) +#endif /* __vax__ */ + +#if defined (__z8000__) && W_TYPE_SIZE == 16 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \ + : "=r" ((unsigned int)(sh)), "=&r" ((unsigned int)(sl)) \ + : "%0" ((unsigned int)(ah)), "r" ((unsigned int)(bh)), \ + "%1" ((unsigned int)(al)), "rQR" ((unsigned int)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \ + : "=r" ((unsigned int)(sh)), "=&r" ((unsigned int)(sl)) \ + : "0" ((unsigned int)(ah)), "r" ((unsigned int)(bh)), \ + "1" ((unsigned int)(al)), "rQR" ((unsigned int)(bl))) +#define umul_ppmm(xh, xl, m0, m1) \ + do { \ + union {long int __ll; \ + struct {unsigned int __h, __l;} __i; \ + } __x; \ + unsigned int __m0 = (m0), __m1 = (m1); \ + __asm__ ("mult %S0,%H3" \ + : "=r" (__x.__i.__h), "=r" (__x.__i.__l) \ + : "%1" (m0), "rQR" (m1)); \ + (xh) = __x.__i.__h; (xl) = __x.__i.__l; \ + (xh) += ((((signed int) __m0 >> 15) & __m1) \ + + (((signed int) __m1 >> 15) & __m0)); \ + } while (0) +#endif /* __z8000__ */ + +#endif /* __GNUC__ */ + + +#if !defined (umul_ppmm) && defined (__umulsidi3) +#define umul_ppmm(ph, pl, m0, m1) \ + { \ + UDWtype __ll = __umulsidi3 (m0, m1); \ + ph = (UWtype) (__ll >> W_TYPE_SIZE); \ + pl = (UWtype) __ll; \ + } +#endif + +#if !defined (__umulsidi3) +#define __umulsidi3(u, v) \ + ({UWtype __hi, __lo; \ + umul_ppmm (__hi, __lo, u, v); \ + ((UDWtype) __hi << W_TYPE_SIZE) | __lo; }) +#endif + + +/* Note the prototypes are under !define(umul_ppmm) etc too, since the HPPA + versions above are different and we don't want to conflict. */ + +#if ! defined (umul_ppmm) && HAVE_NATIVE_mpn_umul_ppmm +#define mpn_umul_ppmm __MPN(umul_ppmm) +extern mp_limb_t mpn_umul_ppmm _PROTO ((mp_limb_t *, mp_limb_t, mp_limb_t)); +#define umul_ppmm(wh, wl, u, v) \ + do { \ + mp_limb_t __umul_ppmm__p0; \ + (wh) = __MPN(umul_ppmm) (&__umul_ppmm__p0, \ + (mp_limb_t) (u), (mp_limb_t) (v)); \ + (wl) = __umul_ppmm__p0; \ + } while (0) +#endif + +#if ! defined (udiv_qrnnd) && HAVE_NATIVE_mpn_udiv_qrnnd +#define mpn_udiv_qrnnd __MPN(udiv_qrnnd) +extern mp_limb_t mpn_udiv_qrnnd _PROTO ((mp_limb_t *, + mp_limb_t, mp_limb_t, mp_limb_t)); +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { \ + mp_limb_t __udiv_qrnnd__r; \ + (q) = mpn_udiv_qrnnd (&__udiv_qrnnd__r, \ + (mp_limb_t) (n1), (mp_limb_t) (n0), (mp_limb_t) d); \ + (r) = __udiv_qrnnd__r; \ + } while (0) +#endif + + +/* If this machine has no inline assembler, use C macros. */ + +#if !defined (add_ssaaaa) +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + do { \ + UWtype __x; \ + __x = (al) + (bl); \ + (sh) = (ah) + (bh) + (__x < (al)); \ + (sl) = __x; \ + } while (0) +#endif + +#if !defined (sub_ddmmss) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + do { \ + UWtype __x; \ + __x = (al) - (bl); \ + (sh) = (ah) - (bh) - (__x > (al)); \ + (sl) = __x; \ + } while (0) +#endif + +/* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of + smul_ppmm. */ +#if !defined (umul_ppmm) && defined (smul_ppmm) +#define umul_ppmm(w1, w0, u, v) \ + do { \ + UWtype __w1; \ + UWtype __xm0 = (u), __xm1 = (v); \ + smul_ppmm (__w1, w0, __xm0, __xm1); \ + (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1) \ + + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0); \ + } while (0) +#endif + +/* If we still don't have umul_ppmm, define it using plain C. */ +#if !defined (umul_ppmm) +#define umul_ppmm(w1, w0, u, v) \ + do { \ + UWtype __x0, __x1, __x2, __x3; \ + UHWtype __ul, __vl, __uh, __vh; \ + UWtype __u = (u), __v = (v); \ + \ + __ul = __ll_lowpart (__u); \ + __uh = __ll_highpart (__u); \ + __vl = __ll_lowpart (__v); \ + __vh = __ll_highpart (__v); \ + \ + __x0 = (UWtype) __ul * __vl; \ + __x1 = (UWtype) __ul * __vh; \ + __x2 = (UWtype) __uh * __vl; \ + __x3 = (UWtype) __uh * __vh; \ + \ + __x1 += __ll_highpart (__x0);/* this can't give carry */ \ + __x1 += __x2; /* but this indeed can */ \ + if (__x1 < __x2) /* did we get it? */ \ + __x3 += __ll_B; /* yes, add it in the proper pos. */ \ + \ + (w1) = __x3 + __ll_highpart (__x1); \ + (w0) = (__x1 << W_TYPE_SIZE/2) + __ll_lowpart (__x0); \ + } while (0) +#endif + +/* If we don't have smul_ppmm, define it using umul_ppmm (which surely will + exist in one form or another. */ +#if !defined (smul_ppmm) +#define smul_ppmm(w1, w0, u, v) \ + do { \ + UWtype __w1; \ + UWtype __xm0 = (u), __xm1 = (v); \ + umul_ppmm (__w1, w0, __xm0, __xm1); \ + (w1) = __w1 - (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1) \ + - (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0); \ + } while (0) +#endif + +/* Define this unconditionally, so it can be used for debugging. */ +#define __udiv_qrnnd_c(q, r, n1, n0, d) \ + do { \ + UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \ + __d1 = __ll_highpart (d); \ + __d0 = __ll_lowpart (d); \ + \ + __q1 = (n1) / __d1; \ + __r1 = (n1) - __q1 * __d1; \ + __m = (UWtype) __q1 * __d0; \ + __r1 = __r1 * __ll_B | __ll_highpart (n0); \ + if (__r1 < __m) \ + { \ + __q1--, __r1 += (d); \ + if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\ + if (__r1 < __m) \ + __q1--, __r1 += (d); \ + } \ + __r1 -= __m; \ + \ + __q0 = __r1 / __d1; \ + __r0 = __r1 - __q0 * __d1; \ + __m = (UWtype) __q0 * __d0; \ + __r0 = __r0 * __ll_B | __ll_lowpart (n0); \ + if (__r0 < __m) \ + { \ + __q0--, __r0 += (d); \ + if (__r0 >= (d)) \ + if (__r0 < __m) \ + __q0--, __r0 += (d); \ + } \ + __r0 -= __m; \ + \ + (q) = (UWtype) __q1 * __ll_B | __q0; \ + (r) = __r0; \ + } while (0) + +/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through + __udiv_w_sdiv (defined in libgcc or elsewhere). */ +#if !defined (udiv_qrnnd) && defined (sdiv_qrnnd) +#define udiv_qrnnd(q, r, nh, nl, d) \ + do { \ + UWtype __r; \ + (q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d); \ + (r) = __r; \ + } while (0) +#endif + +/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */ +#if !defined (udiv_qrnnd) +#define UDIV_NEEDS_NORMALIZATION 1 +#define udiv_qrnnd __udiv_qrnnd_c +#endif + +#if !defined (count_leading_zeros) +extern +#if __STDC__ +const +#endif +unsigned char __clz_tab[]; +#define count_leading_zeros(count, x) \ + do { \ + UWtype __xr = (x); \ + UWtype __a; \ + \ + if (W_TYPE_SIZE <= 32) \ + { \ + __a = __xr < ((UWtype) 1 << 2*__BITS4) \ + ? (__xr < ((UWtype) 1 << __BITS4) ? 0 : __BITS4) \ + : (__xr < ((UWtype) 1 << 3*__BITS4) ? 2*__BITS4 : 3*__BITS4);\ + } \ + else \ + { \ + for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \ + if (((__xr >> __a) & 0xff) != 0) \ + break; \ + } \ + \ + (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \ + } while (0) +/* This version gives a well-defined value for zero. */ +#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE +#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB +#endif + +#if !defined (count_trailing_zeros) +/* Define count_trailing_zeros using count_leading_zeros. The latter might be + defined in asm, but if it is not, the C version above is good enough. */ +#define count_trailing_zeros(count, x) \ + do { \ + UWtype __ctz_x = (x); \ + UWtype __ctz_c; \ + count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x); \ + (count) = W_TYPE_SIZE - 1 - __ctz_c; \ + } while (0) +#endif + +#ifndef UDIV_NEEDS_NORMALIZATION +#define UDIV_NEEDS_NORMALIZATION 0 +#endif + +/* Give defaults for UMUL_TIME and UDIV_TIME. */ +#ifndef UMUL_TIME +#define UMUL_TIME 1 +#endif + +#ifndef UDIV_TIME +#define UDIV_TIME UMUL_TIME +#endif + +/* count_trailing_zeros is often on the slow side, so make that the default */ +#ifndef COUNT_TRAILING_ZEROS_TIME +#define COUNT_TRAILING_ZEROS_TIME 15 /* cycles */ +#endif + + diff --git a/rts/gmp/ltconfig b/rts/gmp/ltconfig new file mode 100644 index 0000000000..6d8cf33e8f --- /dev/null +++ b/rts/gmp/ltconfig @@ -0,0 +1,3109 @@ +#! /bin/sh + +# ltconfig - Create a system-specific libtool. +# Copyright (C) 1996-2000 Free Software Foundation, Inc. +# Originally by Gordon Matzigkeit <gord@gnu.ai.mit.edu>, 1996 +# +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# A lot of this script is taken from autoconf-2.10. + +# Check that we are running under the correct shell. +SHELL=${CONFIG_SHELL-/bin/sh} +echo=echo +if test "X$1" = X--no-reexec; then + # Discard the --no-reexec flag, and continue. + shift +elif test "X$1" = X--fallback-echo; then + # Avoid inline document here, it may be left over + : +elif test "X`($echo '\t') 2>/dev/null`" = 'X\t'; then + # Yippee, $echo works! + : +else + # Restart under the correct shell. + exec "$SHELL" "$0" --no-reexec ${1+"$@"} +fi + +if test "X$1" = X--fallback-echo; then + # used as fallback echo + shift + cat <<EOF +$* +EOF + exit 0 +fi + +# Find the correct PATH separator. Usually this is `:', but +# DJGPP uses `;' like DOS. +if test "X${PATH_SEPARATOR+set}" != Xset; then + UNAME=${UNAME-`uname 2>/dev/null`} + case X$UNAME in + *-DOS) PATH_SEPARATOR=';' ;; + *) PATH_SEPARATOR=':' ;; + esac +fi + +# The HP-UX ksh and POSIX shell print the target directory to stdout +# if CDPATH is set. +if test "X${CDPATH+set}" = Xset; then CDPATH=:; export CDPATH; fi + +if test "X${echo_test_string+set}" != Xset; then + # find a string as large as possible, as long as the shell can cope with it + for cmd in 'sed 50q "$0"' 'sed 20q "$0"' 'sed 10q "$0"' 'sed 2q "$0"' 'echo test'; do + # expected sizes: less than 2Kb, 1Kb, 512 bytes, 16 bytes, ... + if (echo_test_string="`eval $cmd`") 2>/dev/null && + echo_test_string="`eval $cmd`" && + (test "X$echo_test_string" = "X$echo_test_string") 2>/dev/null; then + break + fi + done +fi + +if test "X`($echo '\t') 2>/dev/null`" = 'X\t' && + echo_testing_string=`($echo "$echo_test_string") 2>/dev/null` && + test "X$echo_testing_string" = "X$echo_test_string"; then + : +else + # The Solaris, AIX, and Digital Unix default echo programs unquote + # backslashes. This makes it impossible to quote backslashes using + # echo "$something" | sed 's/\\/\\\\/g' + # + # So, first we look for a working echo in the user's PATH. + + IFS="${IFS= }"; save_ifs="$IFS"; IFS="${IFS}${PATH_SEPARATOR}" + for dir in $PATH /usr/ucb; do + if (test -f $dir/echo || test -f $dir/echo$ac_exeext) && + test "X`($dir/echo '\t') 2>/dev/null`" = 'X\t' && + echo_testing_string=`($dir/echo "$echo_test_string") 2>/dev/null` && + test "X$echo_testing_string" = "X$echo_test_string"; then + echo="$dir/echo" + break + fi + done + IFS="$save_ifs" + + if test "X$echo" = Xecho; then + # We didn't find a better echo, so look for alternatives. + if test "X`(print -r '\t') 2>/dev/null`" = 'X\t' && + echo_testing_string=`(print -r "$echo_test_string") 2>/dev/null` && + test "X$echo_testing_string" = "X$echo_test_string"; then + # This shell has a builtin print -r that does the trick. + echo='print -r' + elif (test -f /bin/ksh || test -f /bin/ksh$ac_exeext) && + test "X$CONFIG_SHELL" != X/bin/ksh; then + # If we have ksh, try running ltconfig again with it. + ORIGINAL_CONFIG_SHELL="${CONFIG_SHELL-/bin/sh}" + export ORIGINAL_CONFIG_SHELL + CONFIG_SHELL=/bin/ksh + export CONFIG_SHELL + exec "$CONFIG_SHELL" "$0" --no-reexec ${1+"$@"} + else + # Try using printf. + echo='printf "%s\n"' + if test "X`($echo '\t') 2>/dev/null`" = 'X\t' && + echo_testing_string=`($echo "$echo_test_string") 2>/dev/null` && + test "X$echo_testing_string" = "X$echo_test_string"; then + # Cool, printf works + : + elif echo_testing_string=`("$ORIGINAL_CONFIG_SHELL" "$0" --fallback-echo '\t') 2>/dev/null` && + test "X$echo_testing_string" = 'X\t' && + echo_testing_string=`("$ORIGINAL_CONFIG_SHELL" "$0" --fallback-echo "$echo_test_string") 2>/dev/null` && + test "X$echo_testing_string" = "X$echo_test_string"; then + CONFIG_SHELL="$ORIGINAL_CONFIG_SHELL" + export CONFIG_SHELL + SHELL="$CONFIG_SHELL" + export SHELL + echo="$CONFIG_SHELL $0 --fallback-echo" + elif echo_testing_string=`("$CONFIG_SHELL" "$0" --fallback-echo '\t') 2>/dev/null` && + test "X$echo_testing_string" = 'X\t' && + echo_testing_string=`("$CONFIG_SHELL" "$0" --fallback-echo "$echo_test_string") 2>/dev/null` && + test "X$echo_testing_string" = "X$echo_test_string"; then + echo="$CONFIG_SHELL $0 --fallback-echo" + else + # maybe with a smaller string... + prev=: + + for cmd in 'echo test' 'sed 2q "$0"' 'sed 10q "$0"' 'sed 20q "$0"' 'sed 50q "$0"'; do + if (test "X$echo_test_string" = "X`eval $cmd`") 2>/dev/null; then + break + fi + prev="$cmd" + done + + if test "$prev" != 'sed 50q "$0"'; then + echo_test_string=`eval $prev` + export echo_test_string + exec "${ORIGINAL_CONFIG_SHELL}" "$0" ${1+"$@"} + else + # Oops. We lost completely, so just stick with echo. + echo=echo + fi + fi + fi + fi +fi + +# Sed substitution that helps us do robust quoting. It backslashifies +# metacharacters that are still active within double-quoted strings. +Xsed='sed -e s/^X//' +sed_quote_subst='s/\([\\"\\`$\\\\]\)/\\\1/g' + +# Same as above, but do not quote variable references. +double_quote_subst='s/\([\\"\\`\\\\]\)/\\\1/g' + +# Sed substitution to delay expansion of an escaped shell variable in a +# double_quote_subst'ed string. +delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g' + +# The name of this program. +progname=`$echo "X$0" | $Xsed -e 's%^.*/%%'` + +# Constants: +PROGRAM=ltconfig +PACKAGE=libtool +VERSION=1.3c +TIMESTAMP=" (1.696 2000/03/14 20:22:42)" +ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' +ac_link='${CC-cc} -o conftest $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' +rm="rm -f" + +help="Try \`$progname --help' for more information." + +# Global variables: +default_ofile=libtool +can_build_shared=yes +enable_shared=yes +# All known linkers require a `.a' archive for static linking (except M$VC, +# which needs '.lib'). +enable_static=yes +enable_fast_install=yes +enable_dlopen=unknown +enable_win32_dll=no +pic_mode=default +ltmain= +silent= +srcdir= +ac_config_guess= +ac_config_sub= +host= +build=NONE +nonopt=NONE +ofile="$default_ofile" +verify_host=yes +with_gcc=no +with_gnu_ld=no +need_locks=yes +ac_ext=c +libext=a +cache_file= + +old_AR="$AR" +old_CC="$CC" +old_CFLAGS="$CFLAGS" +old_CPPFLAGS="$CPPFLAGS" +old_LDFLAGS="$LDFLAGS" +old_LIBS="$LIBS" +old_MAGIC="$MAGIC" +old_LD="$LD" +old_LN_S="$LN_S" +old_NM="$NM" +old_RANLIB="$RANLIB" +old_STRIP="$STRIP" +old_AS="$AS" +old_DLLTOOL="$DLLTOOL" +old_OBJDUMP="$OBJDUMP" +old_OBJEXT="$OBJEXT" +old_EXEEXT="$EXEEXT" +old_reload_Flag="$reload_flag" +old_deplibs_check_method="$deplibs_check_method" +old_file_magic_cmd="$file_magic_cmd" + +# Parse the command line options. +args= +prev= +for option +do + case "$option" in + -*=*) optarg=`echo "$option" | sed 's/[-_a-zA-Z0-9]*=//'` ;; + *) optarg= ;; + esac + + # If the previous option needs an argument, assign it. + if test -n "$prev"; then + eval "$prev=\$option" + prev= + continue + fi + + case "$option" in + --help) cat <<EOM +Usage: $progname [OPTION]... LTMAIN [HOST] + +Generate a system-specific libtool script. + + --build configure for building on BUILD [BUILD=HOST] + --debug enable verbose shell tracing + --disable-shared do not build shared libraries + --disable-static do not build static libraries + --disable-fast-install do not optimize for fast installation + --enable-dlopen enable dlopen support + --enable-win32-dll enable building dlls on win32 hosts + --help display this help and exit + --no-verify do not verify that HOST is a valid host type +-o, --output=FILE specify the output file [default=$default_ofile] + --quiet same as \`--silent' + --silent do not print informational messages + --srcdir=DIR find \`config.guess' in DIR + --version output version information and exit + --with-gcc assume that the GNU C compiler will be used + --with-gnu-ld assume that the C compiler uses the GNU linker + --prefer-pic try to use only PIC objects + --prefer-non-pic try to use only non-PIC objects + --disable-lock disable file locking + --cache-file=FILE configure cache file + +LTMAIN is the \`ltmain.sh' shell script fragment or \`ltmain.c' program +that provides basic libtool functionality. + +HOST is the canonical host system name [default=guessed]. +EOM + exit 0 + ;; + + --build) prev=build ;; + --build=*) build="$optarg" ;; + + --debug) + echo "$progname: enabling shell trace mode" + set -x + ;; + + --disable-shared) enable_shared=no ;; + + --disable-static) enable_static=no ;; + + --disable-fast-install) enable_fast_install=no ;; + + --enable-dlopen) enable_dlopen=yes ;; + + --enable-win32-dll) enable_win32_dll=yes ;; + + --quiet | --silent) silent=yes ;; + + --srcdir) prev=srcdir ;; + --srcdir=*) srcdir="$optarg" ;; + + --no-verify) verify_host=no ;; + + --output | -o) prev=ofile ;; + --output=*) ofile="$optarg" ;; + + --version) echo "$PROGRAM (GNU $PACKAGE) $VERSION$TIMESTAMP"; exit 0 ;; + + --with-gcc) with_gcc=yes ;; + --with-gnu-ld) with_gnu_ld=yes ;; + + --prefer-pic) pic_mode=yes ;; + --prefer-non-pic) pic_mode=no ;; + + --disable-lock) need_locks=no ;; + + --cache-file=*) cache_file="$optarg" ;; + + -*) + echo "$progname: unrecognized option \`$option'" 1>&2 + echo "$help" 1>&2 + exit 1 + ;; + + *) + if test -z "$ltmain"; then + ltmain="$option" + elif test -z "$host"; then +# This generates an unnecessary warning for sparc-sun-solaris4.1.3_U1 +# if test -n "`echo $option| sed 's/[-a-z0-9.]//g'`"; then +# echo "$progname: warning \`$option' is not a valid host type" 1>&2 +# fi + host="$option" + else + echo "$progname: too many arguments" 1>&2 + echo "$help" 1>&2 + exit 1 + fi ;; + esac +done + +if test -z "$ltmain"; then + echo "$progname: you must specify a LTMAIN file" 1>&2 + echo "$help" 1>&2 + exit 1 +fi + +if test ! -f "$ltmain"; then + echo "$progname: \`$ltmain' does not exist" 1>&2 + echo "$help" 1>&2 + exit 1 +fi + +# Quote any args containing shell metacharacters. +ltconfig_args= +for arg +do + case "$arg" in + *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?]*) + ltconfig_args="$ltconfig_args '$arg'" ;; + *) ltconfig_args="$ltconfig_args $arg" ;; + esac +done + +# A relevant subset of AC_INIT. + +# File descriptor usage: +# 0 standard input +# 1 file creation +# 2 errors and warnings +# 3 some systems may open it to /dev/tty +# 4 used on the Kubota Titan +# 5 compiler messages saved in config.log +# 6 checking for... messages and results +if test "$silent" = yes; then + exec 6>/dev/null +else + exec 6>&1 +fi +exec 5>>./config.log + +# NLS nuisances. +# Only set LANG and LC_ALL to C if already set. +# These must not be set unconditionally because not all systems understand +# e.g. LANG=C (notably SCO). +if test "X${LC_ALL+set}" = Xset; then LC_ALL=C; export LC_ALL; fi +if test "X${LANG+set}" = Xset; then LANG=C; export LANG; fi + +if test -n "$cache_file" && test -r "$cache_file"; then + echo "loading cache $cache_file within ltconfig" + . $cache_file +fi + +if (echo "testing\c"; echo 1,2,3) | grep c >/dev/null; then + # Stardent Vistra SVR4 grep lacks -e, says ghazi@caip.rutgers.edu. + if (echo -n testing; echo 1,2,3) | sed s/-n/xn/ | grep xn >/dev/null; then + ac_n= ac_c=' +' ac_t=' ' + else + ac_n=-n ac_c= ac_t= + fi +else + ac_n= ac_c='\c' ac_t= +fi + +if test -z "$srcdir"; then + # Assume the source directory is the same one as the path to LTMAIN. + srcdir=`$echo "X$ltmain" | $Xsed -e 's%/[^/]*$%%'` + test "$srcdir" = "$ltmain" && srcdir=. +fi + +trap "$rm conftest*; exit 1" 1 2 15 +if test "$verify_host" = yes; then + # Check for config.guess and config.sub. + ac_aux_dir= + for ac_dir in $srcdir $srcdir/.. $srcdir/../..; do + if test -f $ac_dir/config.guess; then + ac_aux_dir=$ac_dir + break + fi + done + if test -z "$ac_aux_dir"; then + echo "$progname: cannot find config.guess in $srcdir $srcdir/.. $srcdir/../.." 1>&2 + echo "$help" 1>&2 + exit 1 + fi + ac_config_guess=$ac_aux_dir/config.guess + ac_config_sub=$ac_aux_dir/config.sub + + # Make sure we can run config.sub. + if $SHELL $ac_config_sub sun4 >/dev/null 2>&1; then : + else + echo "$progname: cannot run $ac_config_sub" 1>&2 + echo "$help" 1>&2 + exit 1 + fi + + echo $ac_n "checking host system type""... $ac_c" 1>&6 + + host_alias=$host + case "$host_alias" in + "") + if host_alias=`$SHELL $ac_config_guess`; then : + else + echo "$progname: cannot guess host type; you must specify one" 1>&2 + echo "$help" 1>&2 + exit 1 + fi ;; + esac + host=`$SHELL $ac_config_sub $host_alias` + echo "$ac_t$host" 1>&6 + + # Make sure the host verified. + test -z "$host" && exit 1 + + # Check for the build system type + echo $ac_n "checking build system type... $ac_c" 1>&6 + + build_alias=$build + case "$build_alias" in + NONE) + case $nonopt in + NONE) build_alias=$host_alias ;; + *) build_alias=$nonopt ;; + esac ;; + esac + + build=`$SHELL $ac_config_sub $build_alias` + build_cpu=`echo $build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'` + build_vendor=`echo $build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'` + build_os=`echo $build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'` + echo "$ac_t""$build" 1>&6 + +elif test -z "$host"; then + echo "$progname: you must specify a host type if you use \`--no-verify'" 1>&2 + echo "$help" 1>&2 + exit 1 +else + host_alias=$host + build_alias=$host_alias + build=$host +fi + +if test x"$host" != x"$build"; then + ac_tool_prefix=${host_alias}- +else + ac_tool_prefix= +fi + +host_cpu=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'` +host_vendor=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'` +host_os=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'` + +# Transform linux* to *-*-linux-gnu*, to support old configure scripts. +case "$host_os" in +linux-gnu*) ;; +linux*) host=`echo $host | sed 's/^\(.*-.*-linux\)\(.*\)$/\1-gnu\2/'` +esac + +case "$host_os" in +aix3*) + # AIX sometimes has problems with the GCC collect2 program. For some + # reason, if we set the COLLECT_NAMES environment variable, the problems + # vanish in a puff of smoke. + if test "X${COLLECT_NAMES+set}" != Xset; then + COLLECT_NAMES= + export COLLECT_NAMES + fi + ;; +esac + +# Determine commands to create old-style static archives. +old_archive_cmds='$AR cru $oldlib$oldobjs$old_deplibs' +old_postinstall_cmds='chmod 644 $oldlib' +old_postuninstall_cmds= + +# Set sane defaults for various variables +test -z "$AR" && AR=ar +test -z "$AS" && AS=as +test -z "$CC" && CC=cc +test -z "$DLLTOOL" && DLLTOOL=dlltool +test -z "$MAGIC" && MAGIC=file +test -z "$LD" && LD=ld +test -z "$LN_S" && LN_S="ln -s" +test -z "$NM" && NM=nm +test -z "$OBJDUMP" && OBJDUMP=objdump +test -z "$RANLIB" && RANLIB=: +test -z "$STRIP" && STRIP=: +test -z "$objext" && objext=o + +echo $ac_n "checking for objdir... $ac_c" 1>&6 +rm -f .libs 2>/dev/null +mkdir .libs 2>/dev/null +if test -d .libs; then + objdir=.libs +else + # MS-DOS does not allow filenames that begin with a dot. + objdir=_libs +fi +rmdir .libs 2>/dev/null +echo "$ac_t$objdir" 1>&6 + +# Allow CC to be a program name with arguments. +set dummy $CC +compiler="$2" + +# We assume here that the value for ac_cv_prog_cc_pic will not be cached +# in isolation, and that seeing it set (from the cache) indicates that +# the associated values are set (in the cache) correctly too. +echo $ac_n "checking for $compiler option to produce PIC... $ac_c" 1>&6 +echo "$progname:563:checking for $compiler option to produce PIC" 1>&5 +if test "X${ac_cv_prog_cc_pic+set}" = Xset; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + ac_cv_prog_cc_pic= + ac_cv_prog_cc_shlib= + ac_cv_prog_cc_wl= + ac_cv_prog_cc_static= + ac_cv_prog_cc_no_builtin= + ac_cv_prog_cc_can_build_shared=$can_build_shared + + if test "$with_gcc" = yes; then + ac_cv_prog_cc_wl='-Wl,' + ac_cv_prog_cc_static='-static' + + case "$host_os" in + beos* | irix5* | irix6* | osf3* | osf4* | osf5*) + # PIC is the default for these OSes. + ;; + aix*) + # Below there is a dirty hack to force normal static linking with -ldl + # The problem is because libdl dynamically linked with both libc and + # libC (AIX C++ library), which obviously doesn't included in libraries + # list by gcc. This cause undefined symbols with -static flags. + # This hack allows C programs to be linked with "-static -ldl", but + # we not sure about C++ programs. + ac_cv_prog_cc_static="$ac_cv_prog_cc_static ${ac_cv_prog_cc_wl}-lC" + ;; + cygwin* | mingw* | os2*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + ac_cv_prog_cc_pic='-DDLL_EXPORT' + ;; + amigaos*) + # FIXME: we need at least 68020 code to build shared libraries, but + # adding the `-m68020' flag to GCC prevents building anything better, + # like `-m68040'. + ac_cv_prog_cc_pic='-m68020 -resident32 -malways-restore-a4' + ;; + sysv4*MP*) + if test -d /usr/nec; then + ac_cv_prog_cc_pic=-Kconform_pic + fi + ;; + *) + ac_cv_prog_cc_pic='-fPIC' + ;; + esac + else + # PORTME Check for PIC flags for the system compiler. + case "$host_os" in + aix3* | aix4*) + # All AIX code is PIC. + ac_cv_prog_cc_static='-bnso -bI:/lib/syscalls.exp' + ;; + + hpux9* | hpux10* | hpux11*) + # Is there a better ac_cv_prog_cc_static that works with the bundled CC? + ac_cv_prog_cc_wl='-Wl,' + ac_cv_prog_cc_static="${ac_cv_prog_cc_wl}-a ${ac_cv_prog_cc_wl}archive" + ac_cv_prog_cc_pic='+Z' + ;; + + irix5* | irix6*) + ac_cv_prog_cc_wl='-Wl,' + ac_cv_prog_cc_static='-non_shared' + # PIC (with -KPIC) is the default. + ;; + + cygwin* | mingw* | os2*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + ac_cv_prog_cc_pic='-DDLL_EXPORT' + ;; + + osf3* | osf4* | osf5*) + # All OSF/1 code is PIC. + ac_cv_prog_cc_wl='-Wl,' + ac_cv_prog_cc_static='-non_shared' + ;; + + sco3.2v5*) + ac_cv_prog_cc_pic='-Kpic' + ac_cv_prog_cc_static='-dn' + ac_cv_prog_cc_shlib='-belf' + ;; + + solaris*) + ac_cv_prog_cc_pic='-KPIC' + ac_cv_prog_cc_static='-Bstatic' + ac_cv_prog_cc_wl='-Wl,' + ;; + + sunos4*) + ac_cv_prog_cc_pic='-PIC' + ac_cv_prog_cc_static='-Bstatic' + ac_cv_prog_cc_wl='-Qoption ld ' + ;; + + sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) + ac_cv_prog_cc_pic='-KPIC' + ac_cv_prog_cc_static='-Bstatic' + ac_cv_prog_cc_wl='-Wl,' + ;; + + uts4*) + ac_cv_prog_cc_pic='-pic' + ac_cv_prog_cc_static='-Bstatic' + ;; + + sysv4*MP*) + if test -d /usr/nec ;then + ac_cv_prog_cc_pic='-Kconform_pic' + ac_cv_prog_cc_static='-Bstatic' + fi + ;; + + *) + ac_cv_prog_cc_can_build_shared=no + ;; + esac + fi +fi +if test -z "$ac_cv_prog_cc_pic"; then + echo "$ac_t"none 1>&6 +else + echo "$ac_t""$ac_cv_prog_cc_pic" 1>&6 + + # Check to make sure the pic_flag actually works. + echo $ac_n "checking if $compiler PIC flag $ac_cv_prog_cc_pic works... $ac_c" 1>&6 + echo "$progname:693:checking that $compiler PIC flag $ac_cv_prog_cc_pic works." 1>&5 + if test "X${ac_cv_prog_cc_pic_works+set}" = Xset; then + echo $ac_n "(cached) $ac_c" 1>&6 + else + ac_cv_prog_cc_pic_works=yes + $rm conftest* + echo "int some_variable = 0;" > conftest.c + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $ac_cv_prog_cc_pic -DPIC" + if { (eval echo $progname:702: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>conftest.err; } && test -s conftest.$objext; then + # Append any warnings to the config.log. + cat conftest.err 1>&5 + + case "$host_os" in + hpux9* | hpux10* | hpux11*) + # On HP-UX, both CC and GCC only warn that PIC is supported... then + # they create non-PIC objects. So, if there were any warnings, we + # assume that PIC is not supported. + if test -s conftest.err; then + ac_cv_prog_cc_pic_works=no + ac_cv_prog_cc_can_build_shared=no + ac_cv_prog_cc_pic= + else + ac_cv_prog_cc_pic_works=yes + ac_cv_prog_cc_pic=" $ac_cv_prog_cc_pic" + fi + ;; + *) + ac_cv_prog_cc_pic_works=yes + ac_cv_prog_cc_pic=" $ac_cv_prog_cc_pic" + ;; + esac + else + # Append any errors to the config.log. + cat conftest.err 1>&5 + ac_cv_prog_cc_pic_works=no + ac_cv_prog_cc_can_build_shared=no + ac_cv_prog_cc_pic= + fi + CFLAGS="$save_CFLAGS" + $rm conftest* + fi + # Belt *and* braces to stop my trousers falling down: + if test "X$ac_cv_prog_cc_pic_works" = Xno; then + ac_cv_prog_cc_pic= + ac_cv_prog_cc_can_build_shared=no + fi + echo "$ac_t""$ac_cv_prog_cc_pic_works" 1>&6 +fi + +# Check for any special shared library compilation flags. +if test -n "$ac_cv_prog_cc_shlib"; then + echo "$progname: warning: \`$CC' requires \`$ac_cv_prog_cc_shlib' to build shared libraries" 1>&2 + if echo "$old_CC $old_CFLAGS " | egrep -e "[ ]$ac_cv_prog_cc_shlib[ ]" >/dev/null; then : + else + echo "$progname: add \`$ac_cv_prog_cc_shlib' to the CC or CFLAGS env variable and reconfigure" 1>&2 + ac_cv_prog_cc_can_build_shared=no + fi +fi + +echo $ac_n "checking if $compiler static flag $ac_cv_prog_cc_static works... $ac_c" 1>&6 +echo "$progname:754: checking if $compiler static flag $ac_cv_prog_cc_static works" >&5 +if test "X${ac_cv_prog_cc_static_works+set}" = Xset; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + $rm conftest* + echo 'main(){return(0);}' > conftest.c + save_LDFLAGS="$LDFLAGS" + LDFLAGS="$LDFLAGS $ac_cv_prog_cc_static" + if { (eval echo $progname:762: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then + ac_cv_prog_cc_static_works=yes + else + ac_cv_prog_cc_static_works=no + ac_cv_prog_cc_static= + fi + LDFLAGS="$save_LDFLAGS" + $rm conftest* +fi +# Belt *and* braces to stop my trousers falling down: +if test "X$ac_cv_prog_cc_static_works" = Xno; then + ac_cv_prog_cc_static= +fi +echo "$ac_t""$ac_cv_prog_cc_static_works" 1>&6 +pic_flag="$ac_cv_prog_cc_pic" +special_shlib_compile_flags="$ac_cv_prog_cc_shlib" +wl="$ac_cv_prog_cc_wl" +link_static_flag="$ac_cv_prog_cc_static" +no_builtin_flag="$ac_cv_prog_cc_no_builtin" +can_build_shared="$ac_cv_prog_cc_can_build_shared" + +# Check to see if options -o and -c are simultaneously supported by compiler +echo $ac_n "checking if $compiler supports -c -o file.o... $ac_c" 1>&6 +$rm -r conftest 2>/dev/null +mkdir conftest +cd conftest +$rm conftest* +echo "int some_variable = 0;" > conftest.c +mkdir out +# According to Tom Tromey, Ian Lance Taylor reported there are C compilers +# that will create temporary files in the current directory regardless of +# the output directory. Thus, making CWD read-only will cause this test +# to fail, enabling locking or at least warning the user not to do parallel +# builds. +chmod -w . +save_CFLAGS="$CFLAGS" +CFLAGS="$CFLAGS -o out/conftest2.o" +echo "$progname:799: checking if $compiler supports -c -o file.o" >&5 +if { (eval echo $progname:800: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>out/conftest.err; } && test -s out/conftest2.o; then + + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + if test -s out/conftest.err; then + echo "$ac_t"no 1>&6 + compiler_c_o=no + else + echo "$ac_t"yes 1>&6 + compiler_c_o=yes + fi +else + # Append any errors to the config.log. + cat out/conftest.err 1>&5 + compiler_c_o=no + echo "$ac_t"no 1>&6 +fi +CFLAGS="$save_CFLAGS" +chmod u+w . +$rm conftest* out/* +rmdir out +cd .. +rmdir conftest +$rm -r conftest 2>/dev/null + +if test x"$compiler_c_o" = x"yes"; then + # Check to see if we can write to a .lo + echo $ac_n "checking if $compiler supports -c -o file.lo... $ac_c" 1>&6 + $rm conftest* + echo "int some_variable = 0;" > conftest.c + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -c -o conftest.lo" + echo "$progname:832: checking if $compiler supports -c -o file.lo" >&5 +if { (eval echo $progname:833: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>conftest.err; } && test -s conftest.lo; then + + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + if test -s conftest.err; then + echo "$ac_t"no 1>&6 + compiler_o_lo=no + else + echo "$ac_t"yes 1>&6 + compiler_o_lo=yes + fi + else + # Append any errors to the config.log. + cat conftest.err 1>&5 + compiler_o_lo=no + echo "$ac_t"no 1>&6 + fi + CFLAGS="$save_CFLAGS" + $rm conftest* +else + compiler_o_lo=no +fi + +# Check to see if we can do hard links to lock some files if needed +hard_links="nottested" +if test "$compiler_c_o" = no && test "$need_locks" != no; then + # do not overwrite the value of need_locks provided by the user + echo $ac_n "checking if we can lock with hard links... $ac_c" 1>&6 + hard_links=yes + $rm conftest* + ln conftest.a conftest.b 2>/dev/null && hard_links=no + touch conftest.a + ln conftest.a conftest.b 2>&5 || hard_links=no + ln conftest.a conftest.b 2>/dev/null && hard_links=no + echo "$ac_t$hard_links" 1>&6 + $rm conftest* + if test "$hard_links" = no; then + echo "*** WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&2 + need_locks=warn + fi +else + need_locks=no +fi + +if test "$with_gcc" = yes; then + # Check to see if options -fno-rtti -fno-exceptions are supported by compiler + echo $ac_n "checking if $compiler supports -fno-rtti -fno-exceptions ... $ac_c" 1>&6 + $rm conftest* + echo "int some_variable = 0;" > conftest.c + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -fno-rtti -fno-exceptions -c conftest.c" + echo "$progname:884: checking if $compiler supports -fno-rtti -fno-exceptions" >&5 + if { (eval echo $progname:885: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>conftest.err; } && test -s conftest.o; then + + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + if test -s conftest.err; then + echo "$ac_t"no 1>&6 + compiler_rtti_exceptions=no + else + echo "$ac_t"yes 1>&6 + compiler_rtti_exceptions=yes + fi + else + # Append any errors to the config.log. + cat conftest.err 1>&5 + compiler_rtti_exceptions=no + echo "$ac_t"no 1>&6 + fi + CFLAGS="$save_CFLAGS" + $rm conftest* + + if test "$compiler_rtti_exceptions" = "yes"; then + no_builtin_flag=' -fno-builtin -fno-rtti -fno-exceptions' + else + no_builtin_flag=' -fno-builtin' + fi + +fi + +# See if the linker supports building shared libraries. +echo $ac_n "checking whether the linker ($LD) supports shared libraries... $ac_c" 1>&6 + +allow_undefined_flag= +no_undefined_flag= +need_lib_prefix=unknown +need_version=unknown +# when you set need_version to no, make sure it does not cause -set_version +# flags to be left without arguments +archive_cmds= +archive_expsym_cmds= +old_archive_from_new_cmds= +old_archive_from_expsyms_cmds= +striplib= +old_striplib= +export_dynamic_flag_spec= +whole_archive_flag_spec= +thread_safe_flag_spec= +hardcode_into_libs=no +hardcode_libdir_flag_spec= +hardcode_libdir_separator= +hardcode_direct=no +hardcode_minus_L=no +hardcode_shlibpath_var=unsupported +runpath_var= +link_all_deplibs=unknown +always_export_symbols=no +export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | sed '\''s/.* //'\'' | sort | uniq > $export_symbols' +# include_expsyms should be a list of space-separated symbols to be *always* +# included in the symbol list +include_expsyms= +# exclude_expsyms can be an egrep regular expression of symbols to exclude +# it will be wrapped by ` (' and `)$', so one must not match beginning or +# end of line. Example: `a|bc|.*d.*' will exclude the symbols `a' and `bc', +# as well as any symbol that contains `d'. +exclude_expsyms="_GLOBAL_OFFSET_TABLE_" +# Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out +# platforms (ab)use it in PIC code, but their linkers get confused if +# the symbol is explicitly referenced. Since portable code cannot +# rely on this symbol name, it's probably fine to never include it in +# preloaded symbol tables. +extract_expsyms_cmds= + +case "$host_os" in +cygwin* | mingw*) + # FIXME: the MSVC++ port hasn't been tested in a loooong time + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++. + if test "$with_gcc" != yes; then + with_gnu_ld=no + fi + ;; + +esac + +ld_shlibs=yes +if test "$with_gnu_ld" = yes; then + # If archive_cmds runs LD, not CC, wlarc should be empty + wlarc='${wl}' + + # See if GNU ld supports shared libraries. + case "$host_os" in + aix3* | aix4*) + # On AIX, the GNU linker is very broken + ld_shlibs=no + cat <<EOF 1>&2 + +*** Warning: the GNU linker, at least up to release 2.9.1, is reported +*** to be unable to reliably create shared libraries on AIX. +*** Therefore, libtool is disabling shared libraries support. If you +*** really care for shared libraries, you may want to modify your PATH +*** so that a non-GNU linker is found, and then restart. + +EOF + ;; + + amigaos*) + archive_cmds='$rm $output_objdir/a2ixlibrary.data~$echo "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$echo "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$echo "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$echo "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR cru $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_minus_L=yes + + # Samuel A. Falvo II <kc5tja@dolphin.openprojects.net> reports + # that the semantics of dynamic libraries on AmigaOS, at least up + # to version 4, is to share data among multiple programs linked + # with the same dynamic library. Since this doesn't match the + # behavior of shared libraries on other platforms, we can use + # them. + ld_shlibs=no + ;; + + beos*) + if $LD --help 2>&1 | egrep ': supported targets:.* elf' > /dev/null; then + allow_undefined_flag=unsupported + # Joseph Beckenbach <jrb3@best.com> says some releases of gcc + # support --undefined. This deserves some investigation. FIXME + archive_cmds='$CC -nostart $libobjs $deplibs $linker_flags ${wl}-soname $wl$soname -o $lib' + else + ld_shlibs=no + fi + ;; + + cygwin* | mingw*) + # hardcode_libdir_flag_spec is actually meaningless, as there is + # no search path for DLLs. + hardcode_libdir_flag_spec='-L$libdir' + allow_undefined_flag=unsupported + always_export_symbols=yes + + extract_expsyms_cmds='test -f $output_objdir/impgen.c || \ + sed -e "/^# \/\* impgen\.c starts here \*\//,/^# \/\* impgen.c ends here \*\// { s/^# //; p; }" -e d < $0 > $output_objdir/impgen.c~ + test -f $output_objdir/impgen.exe || (cd $output_objdir && \ + if test "x$HOST_CC" != "x" ; then $HOST_CC -o impgen impgen.c ; \ + else $CC -o impgen impgen.c ; fi)~ + $output_objdir/impgen $dir/$soname > $output_objdir/$soname-def' + + old_archive_from_expsyms_cmds='$DLLTOOL --as=$AS --dllname $soname --def $output_objdir/$soname-def --output-lib $output_objdir/$newlib' + + # cygwin and mingw dlls have different entry points and sets of symbols + # to exclude. + # FIXME: what about values for MSVC? + dll_entry=__cygwin_dll_entry@12 + dll_exclude_symbols=DllMain@12,_cygwin_dll_entry@12,_cygwin_noncygwin_dll_entry@12~ + case "$host_os" in + mingw*) + # mingw values + dll_entry=_DllMainCRTStartup@12 + dll_exclude_symbols=DllMain@12,DllMainCRTStartup@12,DllEntryPoint@12~ + ;; + esac + + # mingw and cygwin differ, and it's simplest to just exclude the union + # of the two symbol sets. + dll_exclude_symbols=DllMain@12,_cygwin_dll_entry@12,_cygwin_noncygwin_dll_entry@12,DllMainCRTStartup@12,DllEntryPoint@12 + + # recent cygwin and mingw systems supply a stub DllMain which the user + # can override, but on older systems we have to supply one (in ltdll.c) + if test "x$lt_cv_need_dllmain" = "xyes"; then + ltdll_obj='$output_objdir/$soname-ltdll.'"$objext " + ltdll_cmds='test -f $output_objdir/$soname-ltdll.c || sed -e "/^# \/\* ltdll\.c starts here \*\//,/^# \/\* ltdll.c ends here \*\// { s/^# //; p; }" -e d < $0 > $output_objdir/$soname-ltdll.c~ + test -f $output_objdir/$soname-ltdll.$objext || (cd $output_objdir && $CC -c $soname-ltdll.c)~' + else + ltdll_obj= + ltdll_cmds= + fi + + # Extract the symbol export list from an `--export-all' def file, + # then regenerate the def file from the symbol export list, so that + # the compiled dll only exports the symbol export list. + # Be careful not to strip the DATA tag left be newer dlltools. + export_symbols_cmds="$ltdll_cmds"' + $DLLTOOL --export-all --exclude-symbols '$dll_exclude_symbols' --output-def $output_objdir/$soname-def '$ltdll_obj'$libobjs $convenience~ + sed -e "1,/EXPORTS/d" -e "s/ @ [0-9]*//" -e "s/ *;.*$//" < $output_objdir/$soname-def > $export_symbols' + + # If DATA tags from a recent dlltool are present, honour them! + archive_expsym_cmds='echo EXPORTS > $output_objdir/$soname-def~ + _lt_hint=1; + cat $export_symbols | while read symbol; do + set dummy \$symbol; + case \$# in + 2) echo " \$2 @ \$_lt_hint ; " >> $output_objdir/$soname-def;; + *) echo " \$2 @ \$_lt_hint \$3 ; " >> $output_objdir/$soname-def;; + esac; + _lt_hint=`expr 1 + \$_lt_hint`; + done~ + '"$ltdll_cmds"' + $CC -Wl,--base-file,$output_objdir/$soname-base '$lt_cv_cc_dll_switch' -Wl,-e,'$dll_entry' -o $lib '$ltdll_obj'$libobjs $deplibs $compiler_flags~ + $DLLTOOL --as=$AS --dllname $soname --exclude-symbols '$dll_exclude_symbols' --def $output_objdir/$soname-def --base-file $output_objdir/$soname-base --output-exp $output_objdir/$soname-exp~ + $CC -Wl,--base-file,$output_objdir/$soname-base $output_objdir/$soname-exp '$lt_cv_cc_dll_switch' -Wl,-e,'$dll_entry' -o $lib '$ltdll_obj'$libobjs $deplibs $compiler_flags~ + $DLLTOOL --as=$AS --dllname $soname --exclude-symbols '$dll_exclude_symbols' --def $output_objdir/$soname-def --base-file $output_objdir/$soname-base --output-exp $output_objdir/$soname-exp~ + $CC $output_objdir/$soname-exp '$lt_cv_cc_dll_switch' -Wl,-e,'$dll_entry' -o $lib '$ltdll_obj'$libobjs $deplibs $compiler_flags' + ;; + + netbsd*) + if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + else + archive_cmds='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib' + fi + ;; + + solaris* | sysv5*) + if $LD -v 2>&1 | egrep 'BFD 2\.8' > /dev/null; then + ld_shlibs=no + cat <<EOF 1>&2 + +*** Warning: The releases 2.8.* of the GNU linker cannot reliably +*** create shared libraries on Solaris systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.9.1 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +EOF + elif $LD --help 2>&1 | egrep ': supported targets:.* elf' > /dev/null; then + archive_cmds='$CC -shared $libobjs $deplibs $linker_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared $libobjs $deplibs $linker_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + else + ld_shlibs=no + fi + ;; + + sunos4*) + archive_cmds='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags' + wlarc= + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + *) + if $LD --help 2>&1 | egrep ': supported targets:.* elf' > /dev/null; then + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' + else + ld_shlibs=no + fi + ;; + esac + + if test "$ld_shlibs" = yes; then + runpath_var=LD_RUN_PATH + hardcode_libdir_flag_spec='${wl}--rpath ${wl}$libdir' + export_dynamic_flag_spec='${wl}--export-dynamic' + case $host_os in + cygwin* | mingw*) + # dlltool doesn't understand --whole-archive et. al. + whole_archive_flag_spec= + ;; + *) + # ancient GNU ld didn't support --whole-archive et. al. + if $LD --help 2>&1 | egrep 'no-whole-archive' > /dev/null; then + whole_archive_flag_spec="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' + else + whole_archive_flag_spec= + fi + ;; + esac + fi +else + # PORTME fill in a description of your system's linker (not GNU ld) + case "$host_os" in + aix3*) + allow_undefined_flag=unsupported + always_export_symbols=yes + archive_expsym_cmds='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR cru $lib $output_objdir/$soname' + # Note: this linker hardcodes the directories in LIBPATH if there + # are no directories specified by -L. + hardcode_minus_L=yes + if test "$with_gcc" = yes && test -z "$link_static_flag"; then + # Neither direct hardcoding nor static linking is supported with a + # broken collect2. + hardcode_direct=unsupported + fi + ;; + + aix4*) + hardcode_libdir_flag_spec='${wl}-b ${wl}nolibpath ${wl}-b ${wl}libpath:$libdir:/usr/lib:/lib' + hardcode_libdir_separator=':' + if test "$with_gcc" = yes; then + collect2name=`${CC} -print-prog-name=collect2` + if test -f "$collect2name" && \ + strings "$collect2name" | grep resolve_lib_name >/dev/null + then + # We have reworked collect2 + hardcode_direct=yes + else + # We have old collect2 + hardcode_direct=unsupported + # It fails to find uninstalled libraries when the uninstalled + # path is not listed in the libpath. Setting hardcode_minus_L + # to unsupported forces relinking + hardcode_minus_L=yes + hardcode_libdir_flag_spec='-L$libdir' + hardcode_libdir_separator= + fi + shared_flag='-shared' + else + shared_flag='${wl}-bM:SRE' + hardcode_direct=yes + fi + allow_undefined_flag=' ${wl}-berok' + archive_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs $compiler_flags ${wl}-bexpall ${wl}-bnoentry${allow_undefined_flag}' + archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs $compiler_flags ${wl}-bE:$export_symbols ${wl}-bnoentry${allow_undefined_flag}' + case "$host_os" in aix4.[01]|aix4.[01].*) + # According to Greg Wooledge, -bexpall is only supported from AIX 4.2 on + always_export_symbols=yes ;; + esac + ;; + + amigaos*) + archive_cmds='$rm $output_objdir/a2ixlibrary.data~$echo "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$echo "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$echo "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$echo "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR cru $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_minus_L=yes + # see comment about different semantics on the GNU ld section + ld_shlibs=no + ;; + + cygwin* | mingw*) + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++. + # hardcode_libdir_flag_spec is actually meaningless, as there is + # no search path for DLLs. + hardcode_libdir_flag_spec=' ' + allow_undefined_flag=unsupported + # Tell ltmain to make .lib files, not .a files. + libext=lib + # FIXME: Setting linknames here is a bad hack. + archive_cmds='$CC -o $lib $libobjs $compiler_flags `echo "$deplibs" | sed -e '\''s/ -lc$//'\''` -link -dll~linknames=' + # The linker will automatically build a .lib file if we build a DLL. + old_archive_from_new_cmds='true' + # FIXME: Should let the user specify the lib program. + old_archive_cmds='lib /OUT:$oldlib$oldobjs$old_deplibs' + fix_srcfile_path='`cygpath -w $srcfile`' + ;; + + freebsd1*) + ld_shlibs=no + ;; + + # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor + # support. Future versions do this automatically, but an explicit c++rt0.o + # does not break anything, and helps significantly (at the cost of a little + # extra space). + freebsd2.2*) + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o' + hardcode_libdir_flag_spec='-R$libdir' + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + # Unfortunately, older versions of FreeBSD 2 do not have this feature. + freebsd2*) + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=yes + hardcode_minus_L=yes + hardcode_shlibpath_var=no + ;; + + # FreeBSD 3 and greater uses gcc -shared to do shared libraries. + freebsd*) + archive_cmds='$CC -shared -o $lib $libobjs $deplibs $compiler_flags' + hardcode_libdir_flag_spec='-R$libdir' + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + hpux9* | hpux10* | hpux11*) + case "$host_os" in + hpux9*) archive_cmds='$rm $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' ;; + *) archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' ;; + esac + hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir' + hardcode_libdir_separator=: + hardcode_direct=yes + hardcode_minus_L=yes # Not in the search PATH, but as the default + # location of the library. + export_dynamic_flag_spec='${wl}-E' + ;; + + irix5* | irix6*) + if test "$with_gcc" = yes; then + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + else + archive_cmds='$LD -shared $libobjs $deplibs $linker_flags -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib' + fi + hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' + hardcode_libdir_separator=: + link_all_deplibs=yes + ;; + + netbsd*) + if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out + else + archive_cmds='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF + fi + hardcode_libdir_flag_spec='${wl}-R$libdir' + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + openbsd*) + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec='-R$libdir' + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + os2*) + hardcode_libdir_flag_spec='-L$libdir' + hardcode_minus_L=yes + allow_undefined_flag=unsupported + archive_cmds='$echo "LIBRARY $libname INITINSTANCE" > $output_objdir/$libname.def~$echo "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~$echo DATA >> $output_objdir/$libname.def~$echo " SINGLE NONSHARED" >> $output_objdir/$libname.def~$echo EXPORTS >> $output_objdir/$libname.def~emxexp $libobjs >> $output_objdir/$libname.def~$CC -Zdll -Zcrtdll -o $lib $libobjs $deplibs $compiler_flags $output_objdir/$libname.def' + old_archive_from_new_cmds='emximp -o $output_objdir/$libname.a $output_objdir/$libname.def' + ;; + + osf3*) + if test "$with_gcc" = yes; then + allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*' + archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + else + allow_undefined_flag=' -expect_unresolved \*' + archive_cmds='$LD -shared${allow_undefined_flag} $libobjs $deplibs $linker_flags -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib' + fi + hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' + hardcode_libdir_separator=: + ;; + + osf4* | osf5*) # as osf3* with the addition of -msym flag + if test "$with_gcc" = yes; then + allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*' + archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' + else + allow_undefined_flag=' -expect_unresolved \*' + archive_cmds='$LD -shared${allow_undefined_flag} $libobjs $deplibs $linker_flags -msym -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib' + fi + hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' + hardcode_libdir_separator=: + ;; + + sco3.2v5*) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_shlibpath_var=no + runpath_var=LD_RUN_PATH + hardcode_runpath_var=yes + ;; + + solaris*) + no_undefined_flag=' -z text' + # $CC -shared without GNU ld will not create a library from C++ + # object files and a static libstdc++, better avoid it by now + archive_cmds='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags' + archive_expsym_cmds='$echo "{ global:" > $lib.exp~cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~ + $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$rm $lib.exp' + hardcode_libdir_flag_spec='-R$libdir' + hardcode_shlibpath_var=no + case "$host_os" in + solaris2.[0-5] | solaris2.[0-5].*) ;; + *) # Supported since Solaris 2.6 (maybe 2.5.1?) + whole_archive_flag_spec='-z allextract$convenience -z defaultextract' ;; + esac + link_all_deplibs=yes + ;; + + sunos4*) + archive_cmds='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_direct=yes + hardcode_minus_L=yes + hardcode_shlibpath_var=no + ;; + + sysv4) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + runpath_var='LD_RUN_PATH' + hardcode_shlibpath_var=no + hardcode_direct=no #Motorola manual says yes, but my tests say they lie + ;; + + sysv4.3*) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_shlibpath_var=no + export_dynamic_flag_spec='-Bexport' + ;; + + sysv5*) + no_undefined_flag=' -z text' + # $CC -shared without GNU ld will not create a library from C++ + # object files and a static libstdc++, better avoid it by now + archive_cmds='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags' + archive_expsym_cmds='$echo "{ global:" > $lib.exp~cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~ + $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$rm $lib.exp' + hardcode_libdir_flag_spec= + hardcode_shlibpath_var=no + runpath_var='LD_RUN_PATH' + ;; + + uts4*) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_shlibpath_var=no + ;; + + dgux*) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_shlibpath_var=no + ;; + + sysv4*MP*) + if test -d /usr/nec; then + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_shlibpath_var=no + runpath_var=LD_RUN_PATH + hardcode_runpath_var=yes + ld_shlibs=yes + fi + ;; + + sysv4.2uw2*) + archive_cmds='$LD -G -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=yes + hardcode_minus_L=no + hardcode_shlibpath_var=no + hardcode_runpath_var=yes + runpath_var=LD_RUN_PATH + ;; + + unixware7*) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + runpath_var='LD_RUN_PATH' + hardcode_shlibpath_var=no + ;; + + *) + ld_shlibs=no + ;; + esac +fi +echo "$ac_t$ld_shlibs" 1>&6 +test "$ld_shlibs" = no && can_build_shared=no + +# Check hardcoding attributes. +echo $ac_n "checking how to hardcode library paths into programs... $ac_c" 1>&6 +hardcode_action= +if test -n "$hardcode_libdir_flag_spec" || \ + test -n "$runpath_var"; then + + # We can hardcode non-existant directories. + if test "$hardcode_direct" != no && + # If the only mechanism to avoid hardcoding is shlibpath_var, we + # have to relink, otherwise we might link with an installed library + # when we should be linking with a yet-to-be-installed one + ## test "$hardcode_shlibpath_var" != no && + test "$hardcode_minus_L" != no; then + # Linking always hardcodes the temporary library directory. + hardcode_action=relink + else + # We can link without hardcoding, and we can hardcode nonexisting dirs. + hardcode_action=immediate + fi +else + # We cannot hardcode anything, or else we can only hardcode existing + # directories. + hardcode_action=unsupported +fi +echo "$ac_t$hardcode_action" 1>&6 + +echo $ac_n "checking whether stripping libraries is possible... $ac_c" 1>&6 +if test -n "$STRIP" && $STRIP -V 2>&1 | grep "GNU strip" >/dev/null; then + test -z "$old_striplib" && old_striplib="$STRIP --strip-debug" + test -z "$striplib" && striplib="$STRIP --strip-unneeded" + echo "${ac_t}yes" 1>&6 +else + echo "${ac_t}no" 1>&6 +fi + +reload_cmds='$LD$reload_flag -o $output$reload_objs' +test -z "$deplibs_check_method" && deplibs_check_method=unknown + +# PORTME Fill in your ld.so characteristics +library_names_spec= +libname_spec='lib$name' +soname_spec= +postinstall_cmds= +postuninstall_cmds= +finish_cmds= +finish_eval= +shlibpath_var= +shlibpath_overrides_runpath=unknown +version_type=none +dynamic_linker="$host_os ld.so" +sys_lib_dlsearch_path_spec="/lib /usr/lib" +sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" + +echo $ac_n "checking dynamic linker characteristics... $ac_c" 1>&6 +case "$host_os" in +aix3*) + version_type=linux + library_names_spec='${libname}${release}.so$versuffix $libname.a' + shlibpath_var=LIBPATH + + # AIX has no versioning support, so we append a major version to the name. + soname_spec='${libname}${release}.so$major' + ;; + +aix4*) + version_type=linux + # AIX has no versioning support, so currently we can not hardcode correct + # soname into executable. Probably we can add versioning support to + # collect2, so additional links can be useful in future. + # We preserve .a as extension for shared libraries though AIX4.2 + # and later linker supports .so + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.a' + shlibpath_var=LIBPATH + ;; + +amigaos*) + library_names_spec='$libname.ixlibrary $libname.a' + # Create ${libname}_ixlibrary.a entries in /sys/libs. + finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`$echo "X$lib" | $Xsed -e '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; test $rm /sys/libs/${libname}_ixlibrary.a; $show "(cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a)"; (cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a) || exit 1; done' + ;; + +beos*) + library_names_spec='${libname}.so' + dynamic_linker="$host_os ld.so" + shlibpath_var=LIBRARY_PATH + lt_cv_dlopen="load_add_on" + lt_cv_dlopen_libs= + lt_cv_dlopen_self=yes + ;; + +bsdi4*) + version_type=linux + need_version=no + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' + soname_spec='${libname}${release}.so$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" + sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" + export_dynamic_flag_spec=-rdynamic + # the default ld.so.conf also contains /usr/contrib/lib and + # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow + # libtool to hard-code these into programs + ;; + +cygwin* | mingw*) + version_type=windows + need_version=no + need_lib_prefix=no + if test "$with_gcc" = yes; then + library_names_spec='${libname}`echo ${release} | sed -e 's/[.]/-/g'`${versuffix}.dll' + else + library_names_spec='${libname}`echo ${release} | sed -e 's/[.]/-/g'`${versuffix}.dll $libname.lib' + fi + dynamic_linker='Win32 ld.exe' + # FIXME: first we should search . and the directory the executable is in + shlibpath_var=PATH + lt_cv_dlopen="LoadLibrary" + lt_cv_dlopen_libs= + ;; + +freebsd1*) + dynamic_linker=no + ;; + +freebsd*) + objformat=`test -x /usr/bin/objformat && /usr/bin/objformat || echo aout` + version_type=freebsd-$objformat + case "$version_type" in + freebsd-elf*) + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so $libname.so' + need_version=no + need_lib_prefix=no + ;; + freebsd-*) + library_names_spec='${libname}${release}.so$versuffix $libname.so$versuffix' + need_version=yes + ;; + esac + shlibpath_var=LD_LIBRARY_PATH + case "$host_os" in + freebsd2*) + shlibpath_overrides_runpath=yes + ;; + freebsd3.[01]* | freebsdelf3.[01]*) + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + *) # from 3.2 on + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + esac + ;; + +gnu*) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so${major} ${libname}.so' + soname_spec='${libname}${release}.so$major' + shlibpath_var=LD_LIBRARY_PATH + hardcode_into_libs=yes + ;; + +hpux9* | hpux10* | hpux11*) + # Give a soname corresponding to the major version so that dld.sl refuses to + # link against other versions. + dynamic_linker="$host_os dld.sl" + version_type=sunos + need_lib_prefix=no + need_version=no + shlibpath_var=SHLIB_PATH + shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH + library_names_spec='${libname}${release}.sl$versuffix ${libname}${release}.sl$major $libname.sl' + soname_spec='${libname}${release}.sl$major' + # HP-UX runs *really* slowly unless shared libraries are mode 555. + postinstall_cmds='chmod 555 $lib' + ;; + +irix5* | irix6*) + version_type=irix + need_lib_prefix=no + need_version=no + soname_spec='${libname}${release}.so.$major' + library_names_spec='${libname}${release}.so.$versuffix ${libname}${release}.so.$major ${libname}${release}.so $libname.so' + case "$host_os" in + irix5*) + libsuff= shlibsuff= + ;; + *) + case "$LD" in # libtool.m4 will add one of these switches to LD + *-32|*"-32 ") libsuff= shlibsuff= libmagic=32-bit;; + *-n32|*"-n32 ") libsuff=32 shlibsuff=N32 libmagic=N32;; + *-64|*"-64 ") libsuff=64 shlibsuff=64 libmagic=64-bit;; + *) libsuff= shlibsuff= libmagic=never-match;; + esac + ;; + esac + shlibpath_var=LD_LIBRARY${shlibsuff}_PATH + shlibpath_overrides_runpath=no + sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}" + sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}" + ;; + +# No shared lib support for Linux oldld, aout, or coff. +linux-gnuoldld* | linux-gnuaout* | linux-gnucoff*) + dynamic_linker=no + ;; + +# This must be Linux ELF. +linux-gnu*) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' + soname_spec='${libname}${release}.so$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + # This implies no fast_install, which is unacceptable. + # Some rework will be needed to allow for fast_install + # before this can be enabled. + hardcode_into_libs=yes + + if test -f /lib/ld.so.1; then + dynamic_linker='GNU ld.so' + else + # Only the GNU ld.so supports shared libraries on MkLinux. + case "$host_cpu" in + powerpc*) dynamic_linker=no ;; + *) dynamic_linker='Linux ld.so' ;; + esac + fi + ;; + +netbsd*) + version_type=sunos + if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then + library_names_spec='${libname}${release}.so$versuffix ${libname}.so$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + dynamic_linker='NetBSD (a.out) ld.so' + else + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major ${libname}${release}.so ${libname}.so' + soname_spec='${libname}${release}.so$major' + dynamic_linker='NetBSD ld.elf_so' + fi + shlibpath_var=LD_LIBRARY_PATH + ;; + +openbsd*) + version_type=sunos + if test "$with_gnu_ld" = yes; then + need_lib_prefix=no + need_version=no + fi + library_names_spec='${libname}${release}.so$versuffix ${libname}.so$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + shlibpath_var=LD_LIBRARY_PATH + ;; + +os2*) + libname_spec='$name' + need_lib_prefix=no + library_names_spec='$libname.dll $libname.a' + dynamic_linker='OS/2 ld.exe' + shlibpath_var=LIBPATH + ;; + +osf3* | osf4* | osf5*) + version_type=osf + need_version=no + soname_spec='${libname}${release}.so' + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so $libname.so' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" + sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec" + ;; + +sco3.2v5*) + version_type=osf + soname_spec='${libname}${release}.so$major' + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' + shlibpath_var=LD_LIBRARY_PATH + ;; + +solaris*) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' + soname_spec='${libname}${release}.so$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + # ldd complains unless libraries are executable + postinstall_cmds='chmod +x $lib' + ;; + +sunos4*) + version_type=sunos + library_names_spec='${libname}${release}.so$versuffix ${libname}.so$versuffix' + finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + if test "$with_gnu_ld" = yes; then + need_lib_prefix=no + fi + need_version=yes + ;; + +sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) + version_type=linux + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' + soname_spec='${libname}${release}.so$major' + shlibpath_var=LD_LIBRARY_PATH + case "$host_vendor" in + motorola) + need_lib_prefix=no + need_version=no + shlibpath_overrides_runpath=no + sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' + ;; + esac + ;; + +uts4*) + version_type=linux + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' + soname_spec='${libname}${release}.so$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +dgux*) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' + soname_spec='${libname}${release}.so$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +sysv4*MP*) + if test -d /usr/nec ;then + version_type=linux + library_names_spec='$libname.so.$versuffix $libname.so.$major $libname.so' + soname_spec='$libname.so.$major' + shlibpath_var=LD_LIBRARY_PATH + fi + ;; + +*) + dynamic_linker=no + ;; +esac +echo "$ac_t$dynamic_linker" 1>&6 +test "$dynamic_linker" = no && can_build_shared=no + +# Check for command to grab the raw symbol name followed by C symbol from nm. +echo $ac_n "checking command to parse $NM output... $ac_c" 1>&6 + +# These are sane defaults that work on at least a few old systems. +# [They come from Ultrix. What could be older than Ultrix?!! ;)] + +# Character class describing NM global symbol codes. +symcode='[BCDEGRST]' + +# Regexp to match symbols that can be accessed directly from C. +sympat='\([_A-Za-z][_A-Za-z0-9]*\)' + +# Transform the above into a raw symbol and a C symbol. +symxfrm='\1 \2\3 \3' + +# Transform an extracted symbol line into a proper C declaration +global_symbol_to_cdecl="sed -n -e 's/^. .* \(.*\)$/extern char \1;/p'" + +# Define system-specific variables. +case "$host_os" in +aix*) + symcode='[BCDT]' + ;; +cygwin* | mingw*) + symcode='[ABCDGISTW]' + ;; +hpux*) # Its linker distinguishes data from code symbols + global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern char \1();/p' -e 's/^. .* \(.*\)$/extern char \1;/p'" + ;; +irix*) + symcode='[BCDEGRST]' + ;; +solaris* | sysv5*) + symcode='[BDT]' + ;; +sysv4) + symcode='[DFNSTU]' + ;; +esac + +# Handle CRLF in mingw too chain +opt_cr= +case "$host_os" in +mingw*) + opt_cr=`echo 'x\{0,1\}' | tr x '\015'` # option cr in regexp + ;; +esac + +# If we're using GNU nm, then use its standard symbol codes. +if $NM -V 2>&1 | egrep '(GNU|with BFD)' > /dev/null; then + symcode='[ABCDGISTW]' +fi + +# Try without a prefix undercore, then with it. +for ac_symprfx in "" "_"; do + + # Write the raw and C identifiers. +global_symbol_pipe="sed -n -e 's/^.*[ ]\($symcode\)[ ][ ]*\($ac_symprfx\)$sympat$opt_cr$/$symxfrm/p'" + + # Check to see that the pipe works correctly. + pipe_works=no + $rm conftest* + cat > conftest.c <<EOF +#ifdef __cplusplus +extern "C" { +#endif +char nm_test_var; +void nm_test_func(){} +#ifdef __cplusplus +} +#endif +main(){nm_test_var='a';nm_test_func();return(0);} +EOF + + echo "$progname:1867: checking if global_symbol_pipe works" >&5 + if { (eval echo $progname:1868: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; } && test -s conftest.$objext; then + # Now try to grab the symbols. + nlist=conftest.nm + if { echo "$progname:1871: eval \"$NM conftest.$objext | $global_symbol_pipe > $nlist\"" >&5; eval "$NM conftest.$objext | $global_symbol_pipe > $nlist 2>&5"; } && test -s "$nlist"; then + + # Try sorting and uniquifying the output. + if sort "$nlist" | uniq > "$nlist"T; then + mv -f "$nlist"T "$nlist" + else + rm -f "$nlist"T + fi + + # Make sure that we snagged all the symbols we need. + if egrep ' nm_test_var$' "$nlist" >/dev/null; then + if egrep ' nm_test_func$' "$nlist" >/dev/null; then + cat <<EOF > conftest.c +#ifdef __cplusplus +extern "C" { +#endif + +EOF + # Now generate the symbol file. + eval "$global_symbol_to_cdecl"' < "$nlist" >> conftest.c' + + cat <<EOF >> conftest.c +#if defined (__STDC__) && __STDC__ +# define lt_ptr_t void * +#else +# define lt_ptr_t char * +# define const +#endif + +/* The mapping between symbol names and symbols. */ +const struct { + const char *name; + lt_ptr_t address; +} +lt_preloaded_symbols[] = +{ +EOF + sed 's/^. \(.*\) \(.*\)$/ {"\2", (lt_ptr_t) \&\2},/' < "$nlist" >> conftest.c + cat <<\EOF >> conftest.c + {0, (lt_ptr_t) 0} +}; + +#ifdef __cplusplus +} +#endif +EOF + # Now try linking the two files. + mv conftest.$objext conftstm.$objext + save_LIBS="$LIBS" + save_CFLAGS="$CFLAGS" + LIBS="conftstm.$objext" + CFLAGS="$CFLAGS$no_builtin_flag" + if { (eval echo $progname:1923: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then + pipe_works=yes + else + echo "$progname: failed program was:" >&5 + cat conftest.c >&5 + fi + LIBS="$save_LIBS" + else + echo "cannot find nm_test_func in $nlist" >&5 + fi + else + echo "cannot find nm_test_var in $nlist" >&5 + fi + else + echo "cannot run $global_symbol_pipe" >&5 + fi + else + echo "$progname: failed program was:" >&5 + cat conftest.c >&5 + fi + $rm conftest* conftst* + + # Do not use the global_symbol_pipe unless it works. + if test "$pipe_works" = yes; then + break + else + global_symbol_pipe= + fi +done +if test "$pipe_works" = yes; then + echo "${ac_t}ok" 1>&6 +else + echo "${ac_t}failed" 1>&6 +fi + +if test -z "$global_symbol_pipe"; then + global_symbol_to_cdecl= +fi + +# Report the final consequences. +echo "checking if libtool supports shared libraries... $can_build_shared" 1>&6 + +# Only try to build win32 dlls if AC_LIBTOOL_WIN32_DLL was used in +# configure.in, otherwise build static only libraries. +case "$host_os" in +cygwin* | mingw* | os2*) + if test x$can_build_shared = xyes; then + test x$enable_win32_dll = xno && can_build_shared=no + echo "checking if package supports dlls... $can_build_shared" 1>&6 + fi +;; +esac + +echo $ac_n "checking whether to build shared libraries... $ac_c" 1>&6 +test "$can_build_shared" = "no" && enable_shared=no + +# On AIX, shared libraries and static libraries use the same namespace, and +# are all built from PIC. +case "$host_os" in +aix3*) + test "$enable_shared" = yes && enable_static=no + if test -n "$RANLIB"; then + archive_cmds="$archive_cmds~\$RANLIB \$lib" + postinstall_cmds='$RANLIB $lib' + fi + ;; + +aix4*) + test "$enable_shared" = yes && enable_static=no + ;; +esac + +echo "$ac_t$enable_shared" 1>&6 + +# Make sure either enable_shared or enable_static is yes. +test "$enable_shared" = yes || enable_static=yes + +echo "checking whether to build static libraries... $enable_static" 1>&6 + +if test "$hardcode_action" = relink || test "$hardcode_into_libs" = all; then + # Fast installation is not supported + enable_fast_install=no +elif test "$shlibpath_overrides_runpath" = yes || + test "$enable_shared" = no; then + # Fast installation is not necessary + enable_fast_install=needless +fi + +# Check whether we must set pic_mode to default +test -z "$pic_flag" && pic_mode=default +# On Cygwin there's no "real" PIC flag so we must build both object types +case "$host_os" in +cygwin* | mingw* | os2*) + pic_mode=default + ;; +esac +if test $pic_mode = no && test "$deplibs_check_method" != pass_all; then + # non-PIC code in shared libraries is not supported + pic_mode=default +fi + +if test "x$enable_dlopen" != xyes; then + enable_dlopen=unknown + enable_dlopen_self=unknown + enable_dlopen_self_static=unknown +else +if test "X${lt_cv_dlopen+set}" != Xset; then + lt_cv_dlopen=no lt_cv_dlopen_libs= +echo $ac_n "checking for dlopen in -ldl""... $ac_c" 1>&6 +echo "$progname:2032: checking for dlopen in -ldl" >&5 +if test "X${ac_cv_lib_dl_dlopen+set}" = Xset; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + ac_save_LIBS="$LIBS" +LIBS="-ldl $LIBS" +cat > conftest.$ac_ext <<EOF +#line 2039 "ltconfig" +/* Override any gcc2 internal prototype to avoid an error. */ +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char dlopen(); + +int main() { +dlopen() +; return 0; } +EOF +if { (eval echo $progname:2052: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then + rm -rf conftest* + ac_cv_lib_dl_dlopen=yes +else + echo "$progname: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + ac_cv_lib_dl_dlopen=no +fi +rm -f conftest* +LIBS="$ac_save_LIBS" + +fi +if test "X$ac_cv_lib_dl_dlopen" = Xyes; then + echo "$ac_t""yes" 1>&6 + lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl" +else + echo "$ac_t""no" 1>&6 +echo $ac_n "checking for dlopen""... $ac_c" 1>&6 +echo "$progname:2071: checking for dlopen" >&5 +if test "X${ac_cv_func_dlopen+set}" = Xset; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + cat > conftest.$ac_ext <<EOF +#line 2076 "ltconfig" +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char dlopen(); below. */ +#include <assert.h> +/* Override any gcc2 internal prototype to avoid an error. */ +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char dlopen(); + +int main() { + +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined (__stub_dlopen) || defined (__stub___dlopen) +choke me +#else +dlopen(); +#endif + +; return 0; } +EOF +if { (eval echo $progname:2101: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then + rm -rf conftest* + ac_cv_func_dlopen=yes +else + echo "$progname: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + ac_cv_func_dlopen=no +fi +rm -f conftest* +fi +if test "X$ac_cv_func_dlopen" = Xyes; then + echo "$ac_t""yes" 1>&6 + lt_cv_dlopen="dlopen" +else + echo "$ac_t""no" 1>&6 +echo $ac_n "checking for dld_link in -ldld""... $ac_c" 1>&6 +echo "$progname:2118: checking for dld_link in -ldld" >&5 +if test "X${ac_cv_lib_dld_dld_link+set}" = Xset; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + ac_save_LIBS="$LIBS" +LIBS="-ldld $LIBS" +cat > conftest.$ac_ext <<EOF +#line 2125 "ltconfig" +/* Override any gcc2 internal prototype to avoid an error. */ +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char dld_link(); + +int main() { +dld_link() +; return 0; } +EOF +if { (eval echo $progname:2138: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then + rm -rf conftest* + ac_cv_lib_dld_dld_link=yes +else + echo "$progname: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + ac_cv_lib_dld_dld_link=no +fi +rm -f conftest* +LIBS="$ac_save_LIBS" + +fi +if test "X$ac_cv_lib_dld_dld_link" = Xyes; then + echo "$ac_t""yes" 1>&6 + lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-ldld" +else + echo "$ac_t""no" 1>&6 +echo $ac_n "checking for shl_load""... $ac_c" 1>&6 +echo "$progname:2157: checking for shl_load" >&5 +if test "X${ac_cv_func_shl_load+set}" = Xset; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + cat > conftest.$ac_ext <<EOF +#line 2162 "ltconfig" +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char shl_load(); below. */ +#include <assert.h> +/* Override any gcc2 internal prototype to avoid an error. */ +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char shl_load(); + +int main() { + +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined (__stub_shl_load) || defined (__stub___shl_load) +choke me +#else +shl_load(); +#endif + +; return 0; } +EOF +if { (eval echo $progname:2187: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then + rm -rf conftest* + ac_cv_func_shl_load=yes +else + echo "$progname: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + ac_cv_func_shl_load=no +fi +rm -f conftest* +fi + +if test "X$ac_cv_func_shl_load" = Xyes; then + echo "$ac_t""yes" 1>&6 + lt_cv_dlopen="shl_load" +else + echo "$ac_t""no" 1>&6 +echo $ac_n "checking for shl_load in -ldld""... $ac_c" 1>&6 +echo "$progname:2205: checking for shl_load in -ldld" >&5 +if test "X${ac_cv_lib_dld_shl_load+set}" = Xset; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + ac_save_LIBS="$LIBS" +LIBS="-ldld $LIBS" +cat > conftest.$ac_ext <<EOF +#line 2212 "ltconfig" +#include "confdefs.h" +/* Override any gcc2 internal prototype to avoid an error. */ +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char shl_load(); + +int main() { +shl_load() +; return 0; } +EOF +if { (eval echo $progname:2226: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then + rm -rf conftest* + ac_cv_lib_dld_shl_load=yes +else + echo "$progname: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + ac_cv_lib_dld_shl_load=no +fi +rm -f conftest* +LIBS="$ac_save_LIBS" + +fi +if test "X$ac_cv_lib_dld_shl_load" = Xyes; then + echo "$ac_t""yes" 1>&6 + lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-ldld" +else + echo "$ac_t""no" 1>&6 +fi + + +fi + + +fi + + +fi + + +fi + +fi + + if test "x$lt_cv_dlopen" != xno; then + enable_dlopen=yes + fi + + case "$lt_cv_dlopen" in + dlopen) +for ac_hdr in dlfcn.h; do +ac_safe=`echo "$ac_hdr" | sed 'y%./+-%__p_%'` +echo $ac_n "checking for $ac_hdr""... $ac_c" 1>&6 +echo "$progname:2269: checking for $ac_hdr" >&5 +if eval "test \"`echo 'X$''{'ac_cv_header_$ac_safe'+set}'`\" = Xset"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + cat > conftest.$ac_ext <<EOF +#line 2274 "ltconfig" +#include <$ac_hdr> +int fnord = 0; +int main () { } +EOF +ac_try="$ac_compile >/dev/null 2>conftest.out" +{ (eval echo $progname:2280: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` +if test -z "$ac_err"; then + rm -rf conftest* + eval "ac_cv_header_$ac_safe=yes" +else + echo "$ac_err" >&5 + echo "$progname: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + eval "ac_cv_header_$ac_safe=no" +fi +rm -f conftest* +fi +if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then + echo "$ac_t""yes" 1>&6 +else + echo "$ac_t""no" 1>&6 +fi +done + + if test "x$ac_cv_header_dlfcn_h" = xyes; then + CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H" + fi + eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\" + LIBS="$lt_cv_dlopen_libs $LIBS" + + echo $ac_n "checking whether a program can dlopen itself""... $ac_c" 1>&6 +echo "$progname:2308: checking whether a program can dlopen itself" >&5 +if test "X${lt_cv_dlopen_self+set}" = Xset; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + if test "$cross_compiling" = yes; then + lt_cv_dlopen_self=cross + else + cat > conftest.c <<EOF +#line 2316 "ltconfig" + +#if HAVE_DLFCN_H +#include <dlfcn.h> +#endif + +#include <stdio.h> + +#ifdef RTLD_GLOBAL +# define LTDL_GLOBAL RTLD_GLOBAL +#else +# ifdef DL_GLOBAL +# define LTDL_GLOBAL DL_GLOBAL +# else +# define LTDL_GLOBAL 0 +# endif +#endif + +/* We may have to define LTDL_LAZY_OR_NOW in the command line if we + find out it does not work in some platform. */ +#ifndef LTDL_LAZY_OR_NOW +# ifdef RTLD_LAZY +# define LTDL_LAZY_OR_NOW RTLD_LAZY +# else +# ifdef DL_LAZY +# define LTDL_LAZY_OR_NOW DL_LAZY +# else +# ifdef RTLD_NOW +# define LTDL_LAZY_OR_NOW RTLD_NOW +# else +# ifdef DL_NOW +# define LTDL_LAZY_OR_NOW DL_NOW +# else +# define LTDL_LAZY_OR_NOW 0 +# endif +# endif +# endif +# endif +#endif + +fnord() { int i=42;} +main() { void *self, *ptr1, *ptr2; self=dlopen(0,LTDL_GLOBAL|LTDL_LAZY_OR_NOW); + if(self) { ptr1=dlsym(self,"fnord"); ptr2=dlsym(self,"_fnord"); + if(ptr1 || ptr2) { dlclose(self); exit(0); } } exit(1); } + +EOF +if { (eval echo $progname:2362: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest && (./conftest; exit) 2>/dev/null +then + lt_cv_dlopen_self=yes +else + echo "$progname: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -fr conftest* + lt_cv_dlopen_self=no +fi +rm -fr conftest* +fi + +fi + +echo "$ac_t""$lt_cv_dlopen_self" 1>&6 + + if test "$lt_cv_dlopen_self" = yes; then + LDFLAGS="$LDFLAGS $link_static_flag" + echo $ac_n "checking whether a statically linked program can dlopen itself""... $ac_c" 1>&6 +echo "$progname:2381: checking whether a statically linked program can dlopen itself" >&5 +if test "X${lt_cv_dlopen_self_static+set}" = Xset; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + if test "$cross_compiling" = yes; then + lt_cv_dlopen_self_static=cross + else + cat > conftest.c <<EOF +#line 2389 "ltconfig" + +#if HAVE_DLFCN_H +#include <dlfcn.h> +#endif + +#include <stdio.h> + +#ifdef RTLD_GLOBAL +# define LTDL_GLOBAL RTLD_GLOBAL +#else +# ifdef DL_GLOBAL +# define LTDL_GLOBAL DL_GLOBAL +# else +# define LTDL_GLOBAL 0 +# endif +#endif + +/* We may have to define LTDL_LAZY_OR_NOW in the command line if we + find out it does not work in some platform. */ +#ifndef LTDL_LAZY_OR_NOW +# ifdef RTLD_LAZY +# define LTDL_LAZY_OR_NOW RTLD_LAZY +# else +# ifdef DL_LAZY +# define LTDL_LAZY_OR_NOW DL_LAZY +# else +# ifdef RTLD_NOW +# define LTDL_LAZY_OR_NOW RTLD_NOW +# else +# ifdef DL_NOW +# define LTDL_LAZY_OR_NOW DL_NOW +# else +# define LTDL_LAZY_OR_NOW 0 +# endif +# endif +# endif +# endif +#endif + +fnord() { int i=42;} +main() { void *self, *ptr1, *ptr2; self=dlopen(0,LTDL_GLOBAL|LTDL_LAZY_OR_NOW); + if(self) { ptr1=dlsym(self,"fnord"); ptr2=dlsym(self,"_fnord"); + if(ptr1 || ptr2) { dlclose(self); exit(0); } } exit(1); } + +EOF +if { (eval echo $progname:2435: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest && (./conftest; exit) 2>/dev/null +then + lt_cv_dlopen_self_static=yes +else + echo "$progname: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -fr conftest* + lt_cv_dlopen_self_static=no +fi +rm -fr conftest* +fi + +fi + +echo "$ac_t""$lt_cv_dlopen_self_static" 1>&6 +fi + ;; + esac + + case "$lt_cv_dlopen_self" in + yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;; + *) enable_dlopen_self=unknown ;; + esac + + case "$lt_cv_dlopen_self_static" in + yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;; + *) enable_dlopen_self_static=unknown ;; + esac +fi + +# Copy echo and quote the copy, instead of the original, because it is +# used later. +ltecho="$echo" +if test "X$ltecho" = "X$CONFIG_SHELL $0 --fallback-echo"; then + ltecho="$CONFIG_SHELL \$0 --fallback-echo" +fi +LTSHELL="$SHELL" + +LTCONFIG_VERSION="$VERSION" + +# Only quote variables if we're using ltmain.sh. +case "$ltmain" in +*.sh) + # Now quote all the things that may contain metacharacters. + for var in ltecho old_AR old_CC old_CFLAGS old_CPPFLAGS \ + old_MAGIC old_LD old_LDFLAGS old_LIBS \ + old_LN_S old_NM old_RANLIB old_STRIP \ + old_AS old_DLLTOOL old_OBJDUMP \ + old_OBJEXT old_EXEEXT old_reload_flag \ + old_deplibs_check_method old_file_magic_cmd \ + AR CC LD LN_S NM LTSHELL LTCONFIG_VERSION \ + reload_flag reload_cmds wl \ + pic_flag link_static_flag no_builtin_flag export_dynamic_flag_spec \ + thread_safe_flag_spec whole_archive_flag_spec libname_spec \ + library_names_spec soname_spec \ + RANLIB old_archive_cmds old_archive_from_new_cmds old_postinstall_cmds \ + old_postuninstall_cmds archive_cmds archive_expsym_cmds postinstall_cmds \ + postuninstall_cmds extract_expsyms_cmds old_archive_from_expsyms_cmds \ + old_striplib striplib file_magic_cmd export_symbols_cmds \ + deplibs_check_method allow_undefined_flag no_undefined_flag \ + finish_cmds finish_eval global_symbol_pipe global_symbol_to_cdecl \ + hardcode_libdir_flag_spec hardcode_libdir_separator \ + sys_lib_search_path_spec sys_lib_dlsearch_path_spec \ + compiler_c_o compiler_o_lo need_locks exclude_expsyms include_expsyms; do + + case "$var" in + reload_cmds | old_archive_cmds | old_archive_from_new_cmds | \ + old_postinstall_cmds | old_postuninstall_cmds | \ + export_symbols_cmds | archive_cmds | archive_expsym_cmds | \ + extract_expsyms_cmds | old_archive_from_expsyms_cmds | \ + postinstall_cmds | postuninstall_cmds | \ + finish_cmds | sys_lib_search_path_spec | sys_lib_dlsearch_path_spec) + # Double-quote double-evaled strings. + eval "$var=\\\"\`\$echo \"X\$$var\" | \$Xsed -e \"\$double_quote_subst\" -e \"\$sed_quote_subst\" -e \"\$delay_variable_subst\"\`\\\"" ### testsuite: skip nested quoting test + ;; + *) + eval "$var=\\\"\`\$echo \"X\$$var\" | \$Xsed -e \"\$sed_quote_subst\"\`\\\"" ### testsuite: skip nested quoting test + ;; + esac + done + + case "$ltecho" in + *'\$0 --fallback-echo"') + ltecho=`$echo "X$ltecho" | $Xsed -e 's/\\\\\\\$0 --fallback-echo"$/$0 --fallback-echo"/'` + ;; + esac + + trap "$rm \"$ofile\"; exit 1" 1 2 15 + echo "creating $ofile" + $rm "$ofile" + cat <<EOF > "$ofile" +#! $SHELL + +# `$echo "$ofile" | sed 's%^.*/%%'` - Provide generalized library-building support services. +# Generated automatically by $PROGRAM (GNU $PACKAGE $VERSION$TIMESTAMP) +# NOTE: Changes made to this file will be lost: look at ltconfig or ltmain.sh. +# +# Copyright (C) 1996-2000 Free Software Foundation, Inc. +# Originally by Gordon Matzigkeit <gord@gnu.ai.mit.edu>, 1996 +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# Sed that helps us avoid accidentally triggering echo(1) options like -n. +Xsed="sed -e s/^X//" + +# The HP-UX ksh and POSIX shell print the target directory to stdout +# if CDPATH is set. +if test "X\${CDPATH+set}" = Xset; then CDPATH=:; export CDPATH; fi + +### BEGIN LIBTOOL CONFIG +EOF + cfgfile="$ofile" + ;; + +*) + # Double-quote the variables that need it (for aesthetics). + for var in old_AR old_CC old_CFLAGS old_CPPFLAGS \ + old_MAGIC old_LD old_LDFLAGS old_LIBS \ + old_LN_S old_NM old_RANLIB old_STRIP \ + old_AS old_DLLTOOL old_OBJDUMP \ + old_OBJEXT old_EXEEXT old_reload_flag \ + old_deplibs_check_method old_file_magic_cmd; do + eval "$var=\\\"\$var\\\"" + done + + # Just create a config file. + cfgfile="$ofile.cfg" + trap "$rm \"$cfgfile\"; exit 1" 1 2 15 + echo "creating $cfgfile" + $rm "$cfgfile" + cat <<EOF > "$cfgfile" +# `$echo "$cfgfile" | sed 's%^.*/%%'` - Libtool configuration file. +# Generated automatically by $PROGRAM (GNU $PACKAGE $VERSION$TIMESTAMP) +EOF + ;; +esac + +cat <<EOF >> "$cfgfile" +# Libtool was configured as follows, on host `(hostname || uname -n) 2>/dev/null | sed 1q`: +# +# AR=$old_AR CC=$old_CC CFLAGS=$old_CFLAGS CPPFLAGS=$old_CPPFLAGS \\ +# MAGIC=$old_MAGIC LD=$old_LD LDFLAGS=$old_LDFLAGS LIBS=$old_LIBS \\ +# LN_S=$old_LN_S NM=$old_NM RANLIB=$old_RANLIB STRIP=$old_STRIP \\ +# AS=$old_AS DLLTOOL=$old_DLLTOOL OBJDUMP=$old_OBJDUMP \\ +# objext=$old_OBJEXT exeext=$old_EXEEXT reload_flag=$old_reload_flag \\ +# deplibs_check_method=$old_deplibs_check_method file_magic_cmd=$old_file_magic_cmd \\ +# $0$ltconfig_args +# +# Compiler and other test output produced by $progname, useful for +# debugging $progname, is in ./config.log if it exists. +# The version of $progname that generated this script. +LTCONFIG_VERSION=$LTCONFIG_VERSION + +# Shell to use when invoking shell scripts. +SHELL=$LTSHELL + +# Whether or not to build shared libraries. +build_libtool_libs=$enable_shared + +# Whether or not to build static libraries. +build_old_libs=$enable_static + +# Whether or not to optimize for fast installation. +fast_install=$enable_fast_install + +# The host system. +host_alias=$host_alias +host=$host + +# An echo program that does not interpret backslashes. +echo=$ltecho + +# The archiver. +AR=$AR + +# The default C compiler. +CC=$CC + +# The linker used to build libraries. +LD=$LD + +# Whether we need hard or soft links. +LN_S=$LN_S + +# A BSD-compatible nm program. +NM=$NM + +# A symbol stripping program +STRIP=$STRIP + +# Used to examine libraries when file_magic_cmd begins "file" +MAGIC=$MAGIC + +# Used on cygwin: DLL creation program. +DLLTOOL="$DLLTOOL" + +# Used on cygwin: object dumper. +OBJDUMP="$OBJDUMP" + +# Used on cygwin: assembler. +AS="$AS" + +# The name of the directory that contains temporary libtool files. +objdir=$objdir + +# How to create reloadable object files. +reload_flag=$reload_flag +reload_cmds=$reload_cmds + +# How to pass a linker flag through the compiler. +wl=$wl + +# Object file suffix (normally "o"). +objext="$objext" + +# Old archive suffix (normally "a"). +libext="$libext" + +# Executable file suffix (normally ""). +exeext="$exeext" + +# Additional compiler flags for building library objects. +pic_flag=$pic_flag +pic_mode=$pic_mode + +# Does compiler simultaneously support -c and -o options? +compiler_c_o=$compiler_c_o + +# Can we write directly to a .lo ? +compiler_o_lo=$compiler_o_lo + +# Must we lock files when doing compilation ? +need_locks=$need_locks + +# Do we need the lib prefix for modules? +need_lib_prefix=$need_lib_prefix + +# Do we need a version for libraries? +need_version=$need_version + +# Whether dlopen is supported. +dlopen_support=$enable_dlopen + +# Whether dlopen of programs is supported. +dlopen_self=$enable_dlopen_self + +# Whether dlopen of statically linked programs is supported. +dlopen_self_static=$enable_dlopen_self_static + +# Compiler flag to prevent dynamic linking. +link_static_flag=$link_static_flag + +# Compiler flag to turn off builtin functions. +no_builtin_flag=$no_builtin_flag + +# Compiler flag to allow reflexive dlopens. +export_dynamic_flag_spec=$export_dynamic_flag_spec + +# Compiler flag to generate shared objects directly from archives. +whole_archive_flag_spec=$whole_archive_flag_spec + +# Compiler flag to generate thread-safe objects. +thread_safe_flag_spec=$thread_safe_flag_spec + +# Library versioning type. +version_type=$version_type + +# Format of library name prefix. +libname_spec=$libname_spec + +# List of archive names. First name is the real one, the rest are links. +# The last name is the one that the linker finds with -lNAME. +library_names_spec=$library_names_spec + +# The coded name of the library, if different from the real name. +soname_spec=$soname_spec + +# Commands used to build and install an old-style archive. +RANLIB=$RANLIB +old_archive_cmds=$old_archive_cmds +old_postinstall_cmds=$old_postinstall_cmds +old_postuninstall_cmds=$old_postuninstall_cmds + +# Create an old-style archive from a shared archive. +old_archive_from_new_cmds=$old_archive_from_new_cmds + +# Create a temporary old-style archive to link instead of a shared archive. +old_archive_from_expsyms_cmds=$old_archive_from_expsyms_cmds + +# Commands used to build and install a shared archive. +archive_cmds=$archive_cmds +archive_expsym_cmds=$archive_expsym_cmds +postinstall_cmds=$postinstall_cmds +postuninstall_cmds=$postuninstall_cmds + +# Commands to strip libraries. +old_striplib=$old_striplib +striplib=$striplib + +# Method to check whether dependent libraries are shared objects. +deplibs_check_method=$deplibs_check_method + +# Command to use when deplibs_check_method == file_magic. +file_magic_cmd=$file_magic_cmd + +# Flag that allows shared libraries with undefined symbols to be built. +allow_undefined_flag=$allow_undefined_flag + +# Flag that forces no undefined symbols. +no_undefined_flag=$no_undefined_flag + +# Commands used to finish a libtool library installation in a directory. +finish_cmds=$finish_cmds + +# Same as above, but a single script fragment to be evaled but not shown. +finish_eval=$finish_eval + +# Take the output of nm and produce a listing of raw symbols and C names. +global_symbol_pipe=$global_symbol_pipe + +# Transform the output of nm in a proper C declaration +global_symbol_to_cdecl=$global_symbol_to_cdecl + +# This is the shared library runtime path variable. +runpath_var=$runpath_var + +# This is the shared library path variable. +shlibpath_var=$shlibpath_var + +# Is shlibpath searched before the hard-coded library search path? +shlibpath_overrides_runpath=$shlibpath_overrides_runpath + +# How to hardcode a shared library path into an executable. +hardcode_action=$hardcode_action + +# Whether we should hardcode library paths into libraries. +hardcode_into_libs=$hardcode_into_libs + +# Flag to hardcode \$libdir into a binary during linking. +# This must work even if \$libdir does not exist. +hardcode_libdir_flag_spec=$hardcode_libdir_flag_spec + +# Whether we need a single -rpath flag with a separated argument. +hardcode_libdir_separator=$hardcode_libdir_separator + +# Set to yes if using DIR/libNAME.so during linking hardcodes DIR into the +# resulting binary. +hardcode_direct=$hardcode_direct + +# Set to yes if using the -LDIR flag during linking hardcodes DIR into the +# resulting binary. +hardcode_minus_L=$hardcode_minus_L + +# Set to yes if using SHLIBPATH_VAR=DIR during linking hardcodes DIR into +# the resulting binary. +hardcode_shlibpath_var=$hardcode_shlibpath_var + +# Whether libtool must link a program against all its dependency libraries. +link_all_deplibs=$link_all_deplibs + +# Compile-time system search path for libraries +sys_lib_search_path_spec=$sys_lib_search_path_spec + +# Run-time system search path for libraries +sys_lib_dlsearch_path_spec=$sys_lib_dlsearch_path_spec + +# Fix the shell variable \$srcfile for the compiler. +fix_srcfile_path="$fix_srcfile_path" + +# Set to yes if exported symbols are required. +always_export_symbols=$always_export_symbols + +# The commands to list exported symbols. +export_symbols_cmds=$export_symbols_cmds + +# The commands to extract the exported symbol list from a shared archive. +extract_expsyms_cmds=$extract_expsyms_cmds + +# Symbols that should not be listed in the preloaded symbols. +exclude_expsyms=$exclude_expsyms + +# Symbols that must always be exported. +include_expsyms=$include_expsyms + +EOF + +case "$ltmain" in +*.sh) + echo '### END LIBTOOL CONFIG' >> "$ofile" + echo >> "$ofile" + case "$host_os" in + aix3*) + cat <<\EOF >> "$ofile" + +# AIX sometimes has problems with the GCC collect2 program. For some +# reason, if we set the COLLECT_NAMES environment variable, the problems +# vanish in a puff of smoke. +if test "X${COLLECT_NAMES+set}" != Xset; then + COLLECT_NAMES= + export COLLECT_NAMES +fi +EOF + ;; + esac + case "$host" in + *-*-cygwin* | *-*-mingw* | *-*-os2*) + cat <<'EOF' >> "$ofile" + # This is a source program that is used to create dlls on Windows + # Don't remove nor modify the starting and closing comments +# /* ltdll.c starts here */ +# #define WIN32_LEAN_AND_MEAN +# #include <windows.h> +# #undef WIN32_LEAN_AND_MEAN +# #include <stdio.h> +# +# #ifndef __CYGWIN__ +# # ifdef __CYGWIN32__ +# # define __CYGWIN__ __CYGWIN32__ +# # endif +# #endif +# +# #ifdef __cplusplus +# extern "C" { +# #endif +# BOOL APIENTRY DllMain (HINSTANCE hInst, DWORD reason, LPVOID reserved); +# #ifdef __cplusplus +# } +# #endif +# +# #ifdef __CYGWIN__ +# #include <cygwin/cygwin_dll.h> +# DECLARE_CYGWIN_DLL( DllMain ); +# #endif +# HINSTANCE __hDllInstance_base; +# +# BOOL APIENTRY +# DllMain (HINSTANCE hInst, DWORD reason, LPVOID reserved) +# { +# __hDllInstance_base = hInst; +# return TRUE; +# } +# /* ltdll.c ends here */ + # This is a source program that is used to create import libraries + # on Windows for dlls which lack them. Don't remove nor modify the + # starting and closing comments +# /* impgen.c starts here */ +# /* Copyright (C) 1999-2000 Free Software Foundation, Inc. +# +# This file is part of GNU libtool. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# */ +# +# #include <stdio.h> /* for printf() */ +# #include <unistd.h> /* for open(), lseek(), read() */ +# #include <fcntl.h> /* for O_RDONLY, O_BINARY */ +# #include <string.h> /* for strdup() */ +# +# /* O_BINARY isn't required (or even defined sometimes) under Unix */ +# #ifndef O_BINARY +# #define O_BINARY 0 +# #endif +# +# static unsigned int +# pe_get16 (fd, offset) +# int fd; +# int offset; +# { +# unsigned char b[2]; +# lseek (fd, offset, SEEK_SET); +# read (fd, b, 2); +# return b[0] + (b[1]<<8); +# } +# +# static unsigned int +# pe_get32 (fd, offset) +# int fd; +# int offset; +# { +# unsigned char b[4]; +# lseek (fd, offset, SEEK_SET); +# read (fd, b, 4); +# return b[0] + (b[1]<<8) + (b[2]<<16) + (b[3]<<24); +# } +# +# static unsigned int +# pe_as32 (ptr) +# void *ptr; +# { +# unsigned char *b = ptr; +# return b[0] + (b[1]<<8) + (b[2]<<16) + (b[3]<<24); +# } +# +# int +# main (argc, argv) +# int argc; +# char *argv[]; +# { +# int dll; +# unsigned long pe_header_offset, opthdr_ofs, num_entries, i; +# unsigned long export_rva, export_size, nsections, secptr, expptr; +# unsigned long name_rvas, nexp; +# unsigned char *expdata, *erva; +# char *filename, *dll_name; +# +# filename = argv[1]; +# +# dll = open(filename, O_RDONLY|O_BINARY); +# if (!dll) +# return 1; +# +# dll_name = filename; +# +# for (i=0; filename[i]; i++) +# if (filename[i] == '/' || filename[i] == '\\' || filename[i] == ':') +# dll_name = filename + i +1; +# +# pe_header_offset = pe_get32 (dll, 0x3c); +# opthdr_ofs = pe_header_offset + 4 + 20; +# num_entries = pe_get32 (dll, opthdr_ofs + 92); +# +# if (num_entries < 1) /* no exports */ +# return 1; +# +# export_rva = pe_get32 (dll, opthdr_ofs + 96); +# export_size = pe_get32 (dll, opthdr_ofs + 100); +# nsections = pe_get16 (dll, pe_header_offset + 4 +2); +# secptr = (pe_header_offset + 4 + 20 + +# pe_get16 (dll, pe_header_offset + 4 + 16)); +# +# expptr = 0; +# for (i = 0; i < nsections; i++) +# { +# char sname[8]; +# unsigned long secptr1 = secptr + 40 * i; +# unsigned long vaddr = pe_get32 (dll, secptr1 + 12); +# unsigned long vsize = pe_get32 (dll, secptr1 + 16); +# unsigned long fptr = pe_get32 (dll, secptr1 + 20); +# lseek(dll, secptr1, SEEK_SET); +# read(dll, sname, 8); +# if (vaddr <= export_rva && vaddr+vsize > export_rva) +# { +# expptr = fptr + (export_rva - vaddr); +# if (export_rva + export_size > vaddr + vsize) +# export_size = vsize - (export_rva - vaddr); +# break; +# } +# } +# +# expdata = (unsigned char*)malloc(export_size); +# lseek (dll, expptr, SEEK_SET); +# read (dll, expdata, export_size); +# erva = expdata - export_rva; +# +# nexp = pe_as32 (expdata+24); +# name_rvas = pe_as32 (expdata+32); +# +# printf ("EXPORTS\n"); +# for (i = 0; i<nexp; i++) +# { +# unsigned long name_rva = pe_as32 (erva+name_rvas+i*4); +# printf ("\t%s @ %ld ;\n", erva+name_rva, 1+ i); +# } +# +# return 0; +# } +# /* impgen.c ends here */ + +EOF + ;; + esac + + + # Append the ltmain.sh script. + sed '$q' "$ltmain" >> "$ofile" || (rm -f "$ofile"; exit 1) + # We use sed instead of cat because bash on DJGPP gets confused if + # if finds mixed CR/LF and LF-only lines. Since sed operates in + # text mode, it properly converts lines to CR/LF. This bash problem + # is reportedly fixed, but why not run on old versions too? + + chmod +x "$ofile" + ;; + +*) + # Compile the libtool program. + echo "FIXME: would compile $ltmain" + ;; +esac + +test -n "$cache_file" || exit 0 + +# AC_CACHE_SAVE +trap '' 1 2 15 +cat > confcache <<\EOF +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs. It is not useful on other systems. +# If it contains results you don't want to keep, you may remove or edit it. +# +# By default, configure uses ./config.cache as the cache file, +# creating it if it does not exist already. You can give configure +# the --cache-file=FILE option to use a different cache file; that is +# what configure does when it calls configure scripts in +# subdirectories, so they share the cache. +# Giving --cache-file=/dev/null disables caching, for debugging configure. +# config.status only pays attention to the cache file if you give it the +# --recheck option to rerun configure. +# +EOF +# The following way of writing the cache mishandles newlines in values, +# but we know of no workaround that is simple, portable, and efficient. +# So, don't put newlines in cache variables' values. +# Ultrix sh set writes to stderr and can't be redirected directly, +# and sets the high bit in the cache file unless we assign to the vars. +(set) 2>&1 | + case `(ac_space=' '; set | grep ac_space) 2>&1` in + *ac_space=\ *) + # `set' does not quote correctly, so add quotes (double-quote substitution + # turns \\\\ into \\, and sed turns \\ into \). + sed -n \ + -e "s/'/'\\\\''/g" \ + -e "s/^\\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\\)=\\(.*\\)/\\1=\${\\1='\\2'}/p" + ;; + *) + # `set' quotes correctly as required by POSIX, so do not add quotes. + sed -n -e 's/^\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\)=\(.*\)/\1=${\1=\2}/p' + ;; + esac >> confcache +if cmp -s $cache_file confcache; then + : +else + if test -w $cache_file; then + echo "updating cache $cache_file" + cat confcache > $cache_file + else + echo "not updating unwritable cache $cache_file" + fi +fi +rm -f confcache + +exit 0 + +# Local Variables: +# mode:shell-script +# sh-indentation:2 +# End: diff --git a/rts/gmp/ltmain.sh b/rts/gmp/ltmain.sh new file mode 100644 index 0000000000..d81d89f878 --- /dev/null +++ b/rts/gmp/ltmain.sh @@ -0,0 +1,4692 @@ +# ltmain.sh - Provide generalized library-building support services. +# NOTE: Changing this file will not affect anything until you rerun ltconfig. +# +# Copyright (C) 1996-2000 Free Software Foundation, Inc. +# Originally by Gordon Matzigkeit <gord@gnu.ai.mit.edu>, 1996 +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# Check that we have a working $echo. +if test "X$1" = X--no-reexec; then + # Discard the --no-reexec flag, and continue. + shift +elif test "X$1" = X--fallback-echo; then + # Avoid inline document here, it may be left over + : +elif test "X`($echo '\t') 2>/dev/null`" = 'X\t'; then + # Yippee, $echo works! + : +else + # Restart under the correct shell, and then maybe $echo will work. + exec $SHELL "$0" --no-reexec ${1+"$@"} +fi + +if test "X$1" = X--fallback-echo; then + # used as fallback echo + shift + cat <<EOF +$* +EOF + exit 0 +fi + +# The name of this program. +progname=`$echo "$0" | sed 's%^.*/%%'` +modename="$progname" + +# Constants. +PROGRAM=ltmain.sh +PACKAGE=libtool +VERSION=1.3c +TIMESTAMP=" (1.696 2000/03/14 20:22:42)" + +default_mode= +help="Try \`$progname --help' for more information." +magic="%%%MAGIC variable%%%" +mkdir="mkdir" +mv="mv -f" +rm="rm -f" + +# Sed substitution that helps us do robust quoting. It backslashifies +# metacharacters that are still active within double-quoted strings. +Xsed='sed -e 1s/^X//' +sed_quote_subst='s/\([\\`\\"$\\\\]\)/\\\1/g' +SP2NL='tr \040 \012' +NL2SP='tr \015\012 \040\040' + +# NLS nuisances. +# Only set LANG and LC_ALL to C if already set. +# These must not be set unconditionally because not all systems understand +# e.g. LANG=C (notably SCO). +# We save the old values to restore during execute mode. +if test "${LC_ALL+set}" = set; then + save_LC_ALL="$LC_ALL"; LC_ALL=C; export LC_ALL +fi +if test "${LANG+set}" = set; then + save_LANG="$LANG"; LANG=C; export LANG +fi + +if test "$LTCONFIG_VERSION" != "$VERSION"; then + echo "$modename: ltconfig version \`$LTCONFIG_VERSION' does not match $PROGRAM version \`$VERSION'" 1>&2 + echo "Fatal configuration error. See the $PACKAGE docs for more information." 1>&2 + exit 1 +fi + +if test "$build_libtool_libs" != yes && test "$build_old_libs" != yes; then + echo "$modename: not configured to build any kind of library" 1>&2 + echo "Fatal configuration error. See the $PACKAGE docs for more information." 1>&2 + exit 1 +fi + +# Global variables. +mode=$default_mode +nonopt= +prev= +prevopt= +run= +show="$echo" +show_help= +execute_dlfiles= +lo2o="s/\\.lo\$/.${objext}/" +o2lo="s/\\.${objext}\$/.lo/" + +# Parse our command line options once, thoroughly. +while test $# -gt 0 +do + arg="$1" + shift + + case "$arg" in + -*=*) optarg=`$echo "X$arg" | $Xsed -e 's/[-_a-zA-Z0-9]*=//'` ;; + *) optarg= ;; + esac + + # If the previous option needs an argument, assign it. + if test -n "$prev"; then + case "$prev" in + execute_dlfiles) + eval "$prev=\"\$$prev \$arg\"" + ;; + *) + eval "$prev=\$arg" + ;; + esac + + prev= + prevopt= + continue + fi + + # Have we seen a non-optional argument yet? + case "$arg" in + --help) + show_help=yes + ;; + + --version) + echo "$PROGRAM (GNU $PACKAGE) $VERSION$TIMESTAMP" + exit 0 + ;; + + --config) + sed -e '1,/^### BEGIN LIBTOOL CONFIG/d' -e '/^### END LIBTOOL CONFIG/,$d' $0 + exit 0 + ;; + + --debug) + echo "$progname: enabling shell trace mode" + set -x + ;; + + --dry-run | -n) + run=: + ;; + + --features) + echo "host: $host" + if test "$build_libtool_libs" = yes; then + echo "enable shared libraries" + else + echo "disable shared libraries" + fi + if test "$build_old_libs" = yes; then + echo "enable static libraries" + else + echo "disable static libraries" + fi + exit 0 + ;; + + --finish) mode="finish" ;; + + --mode) prevopt="--mode" prev=mode ;; + --mode=*) mode="$optarg" ;; + + --quiet | --silent) + show=: + ;; + + -dlopen) + prevopt="-dlopen" + prev=execute_dlfiles + ;; + + -*) + $echo "$modename: unrecognized option \`$arg'" 1>&2 + $echo "$help" 1>&2 + exit 1 + ;; + + *) + nonopt="$arg" + break + ;; + esac +done + +if test -n "$prevopt"; then + $echo "$modename: option \`$prevopt' requires an argument" 1>&2 + $echo "$help" 1>&2 + exit 1 +fi + +if test -z "$show_help"; then + + # Infer the operation mode. + if test -z "$mode"; then + case "$nonopt" in + *cc | *++ | gcc* | *-gcc*) + mode=link + for arg + do + case "$arg" in + -c) + mode=compile + break + ;; + esac + done + ;; + *db | *dbx | *strace | *truss) + mode=execute + ;; + *install*|cp|mv) + mode=install + ;; + *rm) + mode=uninstall + ;; + *) + # If we have no mode, but dlfiles were specified, then do execute mode. + test -n "$execute_dlfiles" && mode=execute + + # Just use the default operation mode. + if test -z "$mode"; then + if test -n "$nonopt"; then + $echo "$modename: warning: cannot infer operation mode from \`$nonopt'" 1>&2 + else + $echo "$modename: warning: cannot infer operation mode without MODE-ARGS" 1>&2 + fi + fi + ;; + esac + fi + + # Only execute mode is allowed to have -dlopen flags. + if test -n "$execute_dlfiles" && test "$mode" != execute; then + $echo "$modename: unrecognized option \`-dlopen'" 1>&2 + $echo "$help" 1>&2 + exit 1 + fi + + # Change the help message to a mode-specific one. + generic_help="$help" + help="Try \`$modename --help --mode=$mode' for more information." + + # These modes are in order of execution frequency so that they run quickly. + case "$mode" in + # libtool compile mode + compile) + modename="$modename: compile" + # Get the compilation command and the source file. + base_compile= + prev= + lastarg= + srcfile="$nonopt" + suppress_output= + + user_target=no + for arg + do + case "$prev" in + "") ;; + xcompiler) + # Aesthetically quote the previous argument. + prev= + lastarg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"` + + case "$arg" in + # Double-quote args containing other shell metacharacters. + # Many Bourne shells cannot handle close brackets correctly + # in scan sets, so we specify it separately. + *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") + arg="\"$arg\"" + ;; + esac + + # Add the previous argument to base_compile. + if test -z "$base_compile"; then + base_compile="$lastarg" + else + base_compile="$base_compile $lastarg" + fi + continue + ;; + esac + + # Accept any command-line options. + case "$arg" in + -o) + if test "$user_target" != "no"; then + $echo "$modename: you cannot specify \`-o' more than once" 1>&2 + exit 1 + fi + user_target=next + ;; + + -static) + build_old_libs=yes + continue + ;; + + -Xcompiler) + prev=xcompiler + continue + ;; + + -Wc,*) + args=`$echo "X$arg" | $Xsed -e "s/^-Wc,//"` + lastarg= + IFS="${IFS= }"; save_ifs="$IFS"; IFS=',' + for arg in $args; do + IFS="$save_ifs" + + # Double-quote args containing other shell metacharacters. + # Many Bourne shells cannot handle close brackets correctly + # in scan sets, so we specify it separately. + case "$arg" in + *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") + arg="\"$arg\"" + ;; + esac + lastarg="$lastarg $arg" + done + IFS="$save_ifs" + lastarg=`$echo "X$lastarg" | $Xsed -e "s/^ //"` + + # Add the arguments to base_compile. + if test -z "$base_compile"; then + base_compile="$lastarg" + else + base_compile="$base_compile $lastarg" + fi + continue + ;; + esac + + case "$user_target" in + next) + # The next one is the -o target name + user_target=yes + continue + ;; + yes) + # We got the output file + user_target=set + libobj="$arg" + continue + ;; + esac + + # Accept the current argument as the source file. + lastarg="$srcfile" + srcfile="$arg" + + # Aesthetically quote the previous argument. + + # Backslashify any backslashes, double quotes, and dollar signs. + # These are the only characters that are still specially + # interpreted inside of double-quoted scrings. + lastarg=`$echo "X$lastarg" | $Xsed -e "$sed_quote_subst"` + + # Double-quote args containing other shell metacharacters. + # Many Bourne shells cannot handle close brackets correctly + # in scan sets, so we specify it separately. + case "$lastarg" in + *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") + lastarg="\"$lastarg\"" + ;; + esac + + # Add the previous argument to base_compile. + if test -z "$base_compile"; then + base_compile="$lastarg" + else + base_compile="$base_compile $lastarg" + fi + done + + case "$user_target" in + set) + ;; + no) + # Get the name of the library object. + libobj=`$echo "X$srcfile" | $Xsed -e 's%^.*/%%'` + ;; + *) + $echo "$modename: you must specify a target with \`-o'" 1>&2 + exit 1 + ;; + esac + + # Recognize several different file suffixes. + # If the user specifies -o file.o, it is replaced with file.lo + xform='[cCFSfmso]' + case "$libobj" in + *.ada) xform=ada ;; + *.adb) xform=adb ;; + *.ads) xform=ads ;; + *.asm) xform=asm ;; + *.c++) xform=c++ ;; + *.cc) xform=cc ;; + *.cpp) xform=cpp ;; + *.cxx) xform=cxx ;; + *.f90) xform=f90 ;; + *.for) xform=for ;; + esac + + libobj=`$echo "X$libobj" | $Xsed -e "s/\.$xform$/.lo/"` + + case "$libobj" in + *.lo) obj=`$echo "X$libobj" | $Xsed -e "$lo2o"` ;; + *) + $echo "$modename: cannot determine name of library object from \`$libobj'" 1>&2 + exit 1 + ;; + esac + + if test -z "$base_compile"; then + $echo "$modename: you must specify a compilation command" 1>&2 + $echo "$help" 1>&2 + exit 1 + fi + + # Delete any leftover library objects. + if test "$build_old_libs" = yes; then + removelist="$obj $libobj" + else + removelist="$libobj" + fi + + $run $rm $removelist + trap "$run $rm $removelist; exit 1" 1 2 15 + + # Calculate the filename of the output object if compiler does + # not support -o with -c + if test "$compiler_c_o" = no; then + output_obj=`$echo "X$srcfile" | $Xsed -e 's%^.*/%%' -e 's%\..*$%%'`.${objext} + lockfile="$output_obj.lock" + removelist="$removelist $output_obj $lockfile" + trap "$run $rm $removelist; exit 1" 1 2 15 + else + need_locks=no + lockfile= + fi + + # Lock this critical section if it is needed + # We use this script file to make the link, it avoids creating a new file + if test "$need_locks" = yes; then + until ln "$0" "$lockfile" 2>/dev/null; do + $show "Waiting for $lockfile to be removed" + sleep 2 + done + elif test "$need_locks" = warn; then + if test -f "$lockfile"; then + echo "\ +*** ERROR, $lockfile exists and contains: +`cat $lockfile 2>/dev/null` + +This indicates that another process is trying to use the same +temporary object file, and libtool could not work around it because +your compiler does not support \`-c' and \`-o' together. If you +repeat this compilation, it may succeed, by chance, but you had better +avoid parallel builds (make -j) in this platform, or get a better +compiler." + + $run $rm $removelist + exit 1 + fi + echo $srcfile > "$lockfile" + fi + + if test -n "$fix_srcfile_path"; then + eval srcfile=\"$fix_srcfile_path\" + fi + + # Only build a PIC object if we are building libtool libraries. + if test "$build_libtool_libs" = yes; then + # Without this assignment, base_compile gets emptied. + fbsd_hideous_sh_bug=$base_compile + + if test "$pic_mode" != no; then + # All platforms use -DPIC, to notify preprocessed assembler code. + command="$base_compile $srcfile $pic_flag -DPIC" + else + # Don't build PIC code + command="$base_compile $srcfile" + fi + if test "$build_old_libs" = yes; then + lo_libobj="$libobj" + dir=`$echo "X$libobj" | $Xsed -e 's%/[^/]*$%%'` + if test "X$dir" = "X$libobj"; then + dir="$objdir" + else + dir="$dir/$objdir" + fi + libobj="$dir/"`$echo "X$libobj" | $Xsed -e 's%^.*/%%'` + + if test -d "$dir"; then + $show "$rm $libobj" + $run $rm $libobj + else + $show "$mkdir $dir" + $run $mkdir $dir + status=$? + if test $status -ne 0 && test ! -d $dir; then + exit $status + fi + fi + fi + if test "$compiler_o_lo" = yes; then + output_obj="$libobj" + command="$command -o $output_obj" + elif test "$compiler_c_o" = yes; then + output_obj="$obj" + command="$command -o $output_obj" + fi + + $run $rm "$output_obj" + $show "$command" + if $run eval "$command"; then : + else + test -n "$output_obj" && $run $rm $removelist + exit 1 + fi + + if test "$need_locks" = warn && + test x"`cat $lockfile 2>/dev/null`" != x"$srcfile"; then + echo "\ +*** ERROR, $lockfile contains: +`cat $lockfile 2>/dev/null` + +but it should contain: +$srcfile + +This indicates that another process is trying to use the same +temporary object file, and libtool could not work around it because +your compiler does not support \`-c' and \`-o' together. If you +repeat this compilation, it may succeed, by chance, but you had better +avoid parallel builds (make -j) in this platform, or get a better +compiler." + + $run $rm $removelist + exit 1 + fi + + # Just move the object if needed, then go on to compile the next one + if test x"$output_obj" != x"$libobj"; then + $show "$mv $output_obj $libobj" + if $run $mv $output_obj $libobj; then : + else + error=$? + $run $rm $removelist + exit $error + fi + fi + + # If we have no pic_flag, then copy the object into place and finish. + if (test -z "$pic_flag" || test "$pic_mode" != default) && + test "$build_old_libs" = yes; then + # Rename the .lo from within objdir to obj + if test -f $obj; then + $show $rm $obj + $run $rm $obj + fi + + $show "$mv $libobj $obj" + if $run $mv $libobj $obj; then : + else + error=$? + $run $rm $removelist + exit $error + fi + + xdir=`$echo "X$obj" | $Xsed -e 's%/[^/]*$%%'` + if test "X$xdir" = "X$obj"; then + xdir="." + else + xdir="$xdir" + fi + baseobj=`$echo "X$obj" | $Xsed -e "s%.*/%%"` + libobj=`$echo "X$baseobj" | $Xsed -e "$o2lo"` + # Now arrange that obj and lo_libobj become the same file + $show "(cd $xdir && $LN_S $baseobj $libobj)" + if $run eval '(cd $xdir && $LN_S $baseobj $libobj)'; then + exit 0 + else + error=$? + $run $rm $removelist + exit $error + fi + fi + + # Allow error messages only from the first compilation. + suppress_output=' >/dev/null 2>&1' + fi + + # Only build a position-dependent object if we build old libraries. + if test "$build_old_libs" = yes; then + if test "$pic_mode" != yes; then + # Don't build PIC code + command="$base_compile $srcfile" + else + # All platforms use -DPIC, to notify preprocessed assembler code. + command="$base_compile $srcfile $pic_flag -DPIC" + fi + if test "$compiler_c_o" = yes; then + command="$command -o $obj" + output_obj="$obj" + fi + + # Suppress compiler output if we already did a PIC compilation. + command="$command$suppress_output" + $run $rm "$output_obj" + $show "$command" + if $run eval "$command"; then : + else + $run $rm $removelist + exit 1 + fi + + if test "$need_locks" = warn && + test x"`cat $lockfile 2>/dev/null`" != x"$srcfile"; then + echo "\ +*** ERROR, $lockfile contains: +`cat $lockfile 2>/dev/null` + +but it should contain: +$srcfile + +This indicates that another process is trying to use the same +temporary object file, and libtool could not work around it because +your compiler does not support \`-c' and \`-o' together. If you +repeat this compilation, it may succeed, by chance, but you had better +avoid parallel builds (make -j) in this platform, or get a better +compiler." + + $run $rm $removelist + exit 1 + fi + + # Just move the object if needed + if test x"$output_obj" != x"$obj"; then + $show "$mv $output_obj $obj" + if $run $mv $output_obj $obj; then : + else + error=$? + $run $rm $removelist + exit $error + fi + fi + + # Create an invalid libtool object if no PIC, so that we do not + # accidentally link it into a program. + if test "$build_libtool_libs" != yes; then + $show "echo timestamp > $libobj" + $run eval "echo timestamp > \$libobj" || exit $? + else + # Move the .lo from within objdir + $show "$mv $libobj $lo_libobj" + if $run $mv $libobj $lo_libobj; then : + else + error=$? + $run $rm $removelist + exit $error + fi + fi + fi + + # Unlock the critical section if it was locked + if test "$need_locks" != no; then + $rm "$lockfile" + fi + + exit 0 + ;; + + # libtool link mode + link | relink) + modename="$modename: link" + case "$host" in + *-*-cygwin* | *-*-mingw* | *-*-os2*) + # It is impossible to link a dll without this setting, and + # we shouldn't force the makefile maintainer to figure out + # which system we are compiling for in order to pass an extra + # flag for every libtool invokation. + # allow_undefined=no + + # FIXME: Unfortunately, there are problems with the above when trying + # to make a dll which has undefined symbols, in which case not + # even a static library is built. For now, we need to specify + # -no-undefined on the libtool link line when we can be certain + # that all symbols are satisfied, otherwise we get a static library. + allow_undefined=yes + ;; + *) + allow_undefined=yes + ;; + esac + libtool_args="$nonopt" + compile_command="$nonopt" + finalize_command="$nonopt" + + compile_rpath= + finalize_rpath= + compile_shlibpath= + finalize_shlibpath= + convenience= + old_convenience= + deplibs= + old_deplibs= + compiler_flags= + linker_flags= + dllsearchpath= + lib_search_path=`pwd` + + avoid_version=no + dlfiles= + dlprefiles= + dlself=no + export_dynamic=no + export_symbols= + export_symbols_regex= + generated= + libobjs= + ltlibs= + module=no + no_install=no + objs= + prefer_static_libs=no + preload=no + prev= + prevarg= + release= + rpath= + xrpath= + perm_rpath= + temp_rpath= + thread_safe=no + vinfo= + + # We need to know -static, to get the right output filenames. + for arg + do + case "$arg" in + -all-static | -static) + if test "X$arg" = "X-all-static"; then + if test "$build_libtool_libs" = yes && test -z "$link_static_flag"; then + $echo "$modename: warning: complete static linking is impossible in this configuration" 1>&2 + fi + if test -n "$link_static_flag"; then + dlopen_self=$dlopen_self_static + fi + else + if test -z "$pic_flag" && test -n "$link_static_flag"; then + dlopen_self=$dlopen_self_static + fi + fi + build_libtool_libs=no + build_old_libs=yes + prefer_static_libs=yes + break + ;; + esac + done + + # See if our shared archives depend on static archives. + test -n "$old_archive_from_new_cmds" && build_old_libs=yes + + # Go through the arguments, transforming them on the way. + while test $# -gt 0; do + arg="$1" + shift + case "$arg" in + *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") + qarg=\"`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`\" ### testsuite: skip nested quoting test + ;; + *) qarg=$arg ;; + esac + libtool_args="$libtool_args $qarg" + + # If the previous option needs an argument, assign it. + if test -n "$prev"; then + case "$prev" in + output) + compile_command="$compile_command @OUTPUT@" + finalize_command="$finalize_command @OUTPUT@" + ;; + esac + + case "$prev" in + dlfiles|dlprefiles) + if test "$preload" = no; then + # Add the symbol object into the linking commands. + compile_command="$compile_command @SYMFILE@" + finalize_command="$finalize_command @SYMFILE@" + preload=yes + fi + case "$arg" in + *.la | *.lo) ;; # We handle these cases below. + force) + if test "$dlself" = no; then + dlself=needless + export_dynamic=yes + fi + prev= + continue + ;; + self) + if test "$prev" = dlprefiles; then + dlself=yes + elif test "$prev" = dlfiles && test "$dlopen_self" != yes; then + dlself=yes + else + dlself=needless + export_dynamic=yes + fi + prev= + continue + ;; + *) + if test "$prev" = dlfiles; then + dlfiles="$dlfiles $arg" + else + dlprefiles="$dlprefiles $arg" + fi + prev= + continue + ;; + esac + ;; + expsyms) + export_symbols="$arg" + if test ! -f "$arg"; then + $echo "$modename: symbol file \`$arg' does not exist" + exit 1 + fi + prev= + continue + ;; + expsyms_regex) + export_symbols_regex="$arg" + prev= + continue + ;; + release) + release="-$arg" + prev= + continue + ;; + rpath | xrpath) + # We need an absolute path. + case "$arg" in + [\\/]* | [A-Za-z]:[\\/]*) ;; + *) + $echo "$modename: only absolute run-paths are allowed" 1>&2 + exit 1 + ;; + esac + if test "$prev" = rpath; then + case "$rpath " in + *" $arg "*) ;; + *) rpath="$rpath $arg" ;; + esac + else + case "$xrpath " in + *" $arg "*) ;; + *) xrpath="$xrpath $arg" ;; + esac + fi + prev= + continue + ;; + xcompiler) + compiler_flags="$compiler_flags $qarg" + prev= + compile_command="$compile_command $qarg" + finalize_command="$finalize_command $qarg" + continue + ;; + xlinker) + linker_flags="$linker_flags $qarg" + compiler_flags="$compiler_flags $wl$qarg" + prev= + compile_command="$compile_command $wl$qarg" + finalize_command="$finalize_command $wl$qarg" + continue + ;; + *) + eval "$prev=\"\$arg\"" + prev= + continue + ;; + esac + fi + + prevarg="$arg" + + case "$arg" in + -all-static) + if test -n "$link_static_flag"; then + compile_command="$compile_command $link_static_flag" + finalize_command="$finalize_command $link_static_flag" + fi + continue + ;; + + -allow-undefined) + # FIXME: remove this flag sometime in the future. + $echo "$modename: \`-allow-undefined' is deprecated because it is the default" 1>&2 + continue + ;; + + -avoid-version) + avoid_version=yes + continue + ;; + + -dlopen) + prev=dlfiles + continue + ;; + + -dlpreopen) + prev=dlprefiles + continue + ;; + + -export-dynamic) + export_dynamic=yes + continue + ;; + + -export-symbols | -export-symbols-regex) + if test -n "$export_symbols" || test -n "$export_symbols_regex"; then + $echo "$modename: not more than one -exported-symbols argument allowed" + exit 1 + fi + if test "X$arg" = "X-export-symbols"; then + prev=expsyms + else + prev=expsyms_regex + fi + continue + ;; + + -L*) + dir=`$echo "X$arg" | $Xsed -e 's/^-L//'` + # We need an absolute path. + case "$dir" in + [\\/]* | [A-Za-z]:[\\/]*) ;; + *) + absdir=`cd "$dir" && pwd` + if test -z "$absdir"; then + $echo "$modename: cannot determine absolute directory name of \`$dir'" 1>&2 + exit 1 + fi + dir="$absdir" + ;; + esac + case "$deplibs " in + *" -L$dir "*) ;; + *) + deplibs="$deplibs -L$dir" + lib_search_path="$lib_search_path $dir" + ;; + esac + case "$host" in + *-*-cygwin* | *-*-mingw* | *-*-os2*) + case ":$dllsearchpath:" in + *":$dir:"*) ;; + *) dllsearchpath="$dllsearchpath:$dir";; + esac + ;; + esac + continue + ;; + + -l*) + if test "$arg" = "-lc"; then + case "$host" in + *-*-cygwin* | *-*-mingw* | *-*-os2* | *-*-beos*) + # These systems don't actually have c library (as such) + continue + ;; + esac + elif test "$arg" = "-lm"; then + case "$host" in + *-*-cygwin* | *-*-beos*) + # These systems don't actually have math library (as such) + continue + ;; + esac + fi + deplibs="$deplibs $arg" + continue + ;; + + -module) + module=yes + continue + ;; + + -no-fast-install) + fast_install=no + continue + ;; + + -no-install) + case "$host" in + *-*-cygwin* | *-*-mingw* | *-*-os2*) + # The PATH hackery in wrapper scripts is required on Windows + # in order for the loader to find any dlls it needs. + $echo "$modename: warning: \`-no-install' is ignored for $host" 1>&2 + $echo "$modename: warning: assuming \`-no-fast-install' instead" 1>&2 + fast_install=no + ;; + *) + no_install=yes + ;; + esac + continue + ;; + + -no-undefined) + allow_undefined=no + continue + ;; + + -o) prev=output ;; + + -release) + prev=release + continue + ;; + + -rpath) + prev=rpath + continue + ;; + + -R) + prev=xrpath + continue + ;; + + -R*) + dir=`$echo "X$arg" | $Xsed -e 's/^-R//'` + # We need an absolute path. + case "$dir" in + [\\/]* | [A-Za-z]:[\\/]*) ;; + *) + $echo "$modename: only absolute run-paths are allowed" 1>&2 + exit 1 + ;; + esac + case "$xrpath " in + *" $dir "*) ;; + *) xrpath="$xrpath $dir" ;; + esac + continue + ;; + + -static) + # If we have no pic_flag, then this is the same as -all-static. + if test -z "$pic_flag" && test -n "$link_static_flag"; then + compile_command="$compile_command $link_static_flag" + finalize_command="$finalize_command $link_static_flag" + fi + continue + ;; + + -thread-safe) + thread_safe=yes + continue + ;; + + -version-info) + prev=vinfo + continue + ;; + + -Wc,*) + args=`$echo "X$arg" | $Xsed -e "$sed_quote_subst" -e 's/^-Wc,//'` + arg= + IFS="${IFS= }"; save_ifs="$IFS"; IFS=',' + for flag in $args; do + IFS="$save_ifs" + case "$flag" in + *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") + flag="\"$flag\"" + ;; + esac + arg="$arg $wl$flag" + compiler_flags="$compiler_flags $flag" + done + IFS="$save_ifs" + arg=`$echo "X$arg" | $Xsed -e "s/^ //"` + ;; + + -Wl,*) + args=`$echo "X$arg" | $Xsed -e "$sed_quote_subst" -e 's/^-Wl,//'` + arg= + IFS="${IFS= }"; save_ifs="$IFS"; IFS=',' + for flag in $args; do + IFS="$save_ifs" + case "$flag" in + *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") + flag="\"$flag\"" + ;; + esac + arg="$arg $wl$flag" + compiler_flags="$compiler_flags $wl$flag" + linker_flags="$linker_flags $flag" + done + IFS="$save_ifs" + arg=`$echo "X$arg" | $Xsed -e "s/^ //"` + ;; + + -Xcompiler) + prev=xcompiler + continue + ;; + + -Xlinker) + prev=xlinker + continue + ;; + + # Some other compiler flag. + -* | +*) + # Unknown arguments in both finalize_command and compile_command need + # to be aesthetically quoted because they are evaled later. + arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"` + case "$arg" in + *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") + arg="\"$arg\"" + ;; + esac + ;; + + *.$objext) + # A standard object. + objs="$objs $arg" + ;; + + *.lo) + # A library object. + if test "$prev" = dlfiles; then + # This file was specified with -dlopen. + if test "$build_libtool_libs" = yes && test "$dlopen_support" = yes; then + dlfiles="$dlfiles $arg" + prev= + continue + else + # If libtool objects are unsupported, then we need to preload. + prev=dlprefiles + fi + fi + + if test "$prev" = dlprefiles; then + # Preload the old-style object. + dlprefiles="$dlprefiles "`$echo "X$arg" | $Xsed -e "$lo2o"` + prev= + else + libobjs="$libobjs $arg" + fi + ;; + + *.$libext) + # An archive. + deplibs="$deplibs $arg" + old_deplibs="$old_deplibs $arg" + continue + ;; + + *.la) + # A libtool-controlled library. + + if test "$prev" = dlfiles; then + # This library was specified with -dlopen. + dlfiles="$dlfiles $arg" + prev= + elif test "$prev" = dlprefiles; then + # The library was specified with -dlpreopen. + dlprefiles="$dlprefiles $arg" + prev= + else + deplibs="$deplibs $arg" + fi + continue + ;; + + # Some other compiler argument. + *) + # Unknown arguments in both finalize_command and compile_command need + # to be aesthetically quoted because they are evaled later. + arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"` + case "$arg" in + *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") + arg="\"$arg\"" + ;; + esac + ;; + esac + + # Now actually substitute the argument into the commands. + if test -n "$arg"; then + compile_command="$compile_command $arg" + finalize_command="$finalize_command $arg" + fi + done + + if test -n "$prev"; then + $echo "$modename: the \`$prevarg' option requires an argument" 1>&2 + $echo "$help" 1>&2 + exit 1 + fi + + if test "$export_dynamic" = yes && test -n "$export_dynamic_flag_spec"; then + eval arg=\"$export_dynamic_flag_spec\" + compile_command="$compile_command $arg" + finalize_command="$finalize_command $arg" + fi + + oldlibs= + # calculate the name of the file, without its directory + outputname=`$echo "X$output" | $Xsed -e 's%^.*/%%'` + libobjs_save="$libobjs" + + if test -n "$shlibpath_var"; then + # get the directories listed in $shlibpath_var + eval shlib_search_path=\`\$echo \"X \${$shlibpath_var}\" \| \$Xsed -e \'s/:/ /g\'\` + else + shlib_search_path= + fi + eval sys_lib_search_path=\"$sys_lib_search_path_spec\" + eval sys_lib_dlsearch_path=\"$sys_lib_dlsearch_path_spec\" + lib_search_path="$lib_search_path $sys_lib_search_path $shlib_search_path" + + output_objdir=`$echo "X$output" | $Xsed -e 's%/[^/]*$%%'` + if test "X$output_objdir" = "X$output"; then + output_objdir="$objdir" + else + output_objdir="$output_objdir/$objdir" + fi + # Create the object directory. + if test ! -d $output_objdir; then + $show "$mkdir $output_objdir" + $run $mkdir $output_objdir + status=$? + if test $status -ne 0 && test ! -d $output_objdir; then + exit $status + fi + fi + + case "$output" in + "") + $echo "$modename: you must specify an output file" 1>&2 + $echo "$help" 1>&2 + exit 1 + ;; + *.$libext) + linkmode=oldlib ;; + *.lo | *.$objext) + linkmode=obj ;; + *.la) + linkmode=lib ;; + *) # Anything else should be a program. + linkmode=prog ;; + esac + + specialdeplibs= + libs= + # Find all interdependent deplibs that + # are linked more than once (e.g. -la -lb -la) + for deplib in $deplibs; do + case "$libs " in + *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;; + esac + libs="$libs $deplib" + done + deplibs= + newdependency_libs= + uninst_path= # paths that contain uninstalled libtool libraries + new_lib_search_path= + need_relink=no # whether we're linking any uninstalled libtool libraries + case $linkmode in + lib) + passes="link" + for file in $dlfiles $dlprefiles; do + case "$file" in + *.la) ;; + *) + $echo "$modename: libraries can \`-dlopen' only libtool libraries" 1>&2 + exit 1 + ;; + esac + done + ;; + prog) + compile_deplibs= + finalize_deplibs= + alldeplibs=no + newdlfiles= + newdlprefiles= + link_against_libtool_libs= + passes="scan dlopen dlpreopen link" + ;; + *) passes="link" + ;; + esac + for pass in $passes; do + if test $linkmode = prog; then + case $pass in + dlopen) libs="$dlfiles" ;; + dlpreopen) libs="$dlprefiles" ;; + link) libs="$deplibs %DEPLIBS% $dependency_libs" ;; + esac + fi + if test $pass = dlopen; then + # Collect dlpreopened libraries + save_deplibs="$deplibs" + deplibs= + fi + for deplib in $libs; do + lib= + found=no + case "$deplib" in + -l*) + if test $linkmode != lib && test $linkmode != prog; then + $echo "$modename: warning: \`-l' is ignored for archives/objects" 1>&2 + continue + fi + name=`$echo "X$deplib" | $Xsed -e 's/^-l//'` + for searchdir in $lib_search_path; do + # Search the libtool library + lib="$searchdir/lib${name}.la" + if test -f "$lib"; then + found=yes + break + fi + done + if test "$found" != yes; then + if test "$linkmode,$pass" = "prog,link"; then + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + deplibs="$deplib $deplibs" + test $linkmode = lib && newdependency_libs="$deplib $newdependency_libs" + fi + continue + fi + ;; + -L*) + case $linkmode in + lib) + deplibs="$deplib $deplibs" + newdependency_libs="$deplib $newdependency_libs" + new_lib_search_path="$new_lib_search_path "`$echo "X$deplib" | $Xsed -e 's/^-L//'` + ;; + prog) + if test $pass = scan; then + deplibs="$deplib $deplibs" + new_lib_search_path="$new_lib_search_path "`$echo "X$deplib" | $Xsed -e 's/^-L//'` + else + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + fi + ;; + *) + $echo "$modename: warning: \`-L' is ignored for archives/objects" 1>&2 + ;; + esac + continue + ;; + -R*) + if test "$linkmode,$pass" = "prog,link"; then + dir=`$echo "X$deplib" | $Xsed -e 's/^-R//'` + # Make sure the xrpath contains only unique directories. + case "$xrpath " in + *" $dir "*) ;; + *) xrpath="$xrpath $dir" ;; + esac + fi + continue + ;; + *.la) lib="$deplib" ;; + *.$libext) + case $linkmode in + lib) + if test "$deplibs_check_method" != pass_all; then + echo + echo "*** Warning: This library needs some functionality provided by $deplib." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which you do not appear to have." + else + echo + echo "*** Warning: Linking the shared library $output against the" + echo "*** static library $deplib is not portable!" + deplibs="$deplib $deplibs" + fi + continue + ;; + prog) + if test $pass != link; then + deplibs="$deplib $deplibs" + else + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + fi + continue + ;; + esac + ;; + *.lo | *.$objext) + if test $linkmode = prog; then + if test $pass = dlpreopen || test "$dlopen_support" != yes || test "$build_libtool_libs" = no; then + # If there is no dlopen support or we're linking statically, + # we need to preload. + newdlprefiles="$newdlprefiles $deplib" + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + newdlfiles="$newdlfiles $deplib" + fi + fi + continue + ;; + %DEPLIBS%) + alldeplibs=yes + continue + ;; + esac + if test $found = yes || test -f "$lib"; then : + else + $echo "$modename: cannot find the library \`$lib'" 1>&2 + exit 1 + fi + + # Check to see that this really is a libtool archive. + if (sed -e '2q' $lib | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then : + else + $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2 + exit 1 + fi + + ladir=`$echo "X$lib" | $Xsed -e 's%/[^/]*$%%'` + test "X$ladir" = "X$lib" && ladir="." + + dlname= + dlopen= + dlpreopen= + libdir= + library_names= + old_library= + # If the library was installed with an old release of libtool, + # it will not redefine variable installed. + installed=yes + + # Read the .la file + case "$lib" in + */* | *\\*) . $lib ;; + *) . ./$lib ;; + esac + + if test $linkmode = lib || test "$linkmode,$pass" = "prog,scan"; then + test -n "$dlopen" && dlfiles="$dlfiles $dlopen" + test -n "$dlpreopen" && dlprefiles="$dlprefiles $dlpreopen" + fi + + if test $linkmode != lib && test $linkmode != prog; then + # only check for convenience libraries + if test -z "$old_library"; then + $echo "$modename: cannot find name of link library for \`$lib'" 1>&2 + exit 1 + fi + if test -n "$libdir"; then + $echo "$modename: \`$lib' is not a convenience library" 1>&2 + exit 1 + fi + # It is a libtool convenience library, so add in its objects. + convenience="$convenience $ladir/$objdir/$old_library" + old_convenience="$old_convenience $ladir/$objdir/$old_library" + continue + fi + + # Get the name of the library we link against. + linklib= + for l in $old_library $library_names; do + linklib="$l" + done + if test -z "$linklib"; then + $echo "$modename: cannot find name of link library for \`$lib'" 1>&2 + exit 1 + fi + + # This library was specified with -dlopen. + if test $pass = dlopen; then + if test -z "$dlname" || test "$dlopen_support" != yes || test "$build_libtool_libs" = no; then + # If there is no dlname, no dlopen support or we're linking statically, + # we need to preload. + dlprefiles="$dlprefiles $lib" + else + newdlfiles="$newdlfiles $lib" + fi + continue + fi + + # We need an absolute path. + case "$ladir" in + [\\/]* | [A-Za-z]:[\\/]*) abs_ladir="$ladir" ;; + *) + abs_ladir=`cd "$ladir" && pwd` + if test -z "$abs_ladir"; then + $echo "$modename: warning: cannot determine absolute directory name of \`$ladir'" 1>&2 + $echo "$modename: passing it literally to the linker, although it might fail" 1>&2 + abs_ladir="$ladir" + fi + ;; + esac + laname=`$echo "X$lib" | $Xsed -e 's%^.*/%%'` + + # Find the relevant object directory and library name. + if test "X$installed" = Xyes; then + if test ! -f "$libdir/$linklib" && test -f "$abs_ladir/$linklib"; then + $echo "$modename: warning: library \`$lib' was moved." 1>&2 + dir="$ladir" + absdir="$abs_ladir" + libdir="$abs_ladir" + else + dir="$libdir" + absdir="$libdir" + fi + else + dir="$ladir/$objdir" + absdir="$abs_ladir/$objdir" + # Remove this search path later + uninst_path="$uninst_path $abs_ladir" + fi + name=`$echo "X$laname" | $Xsed -e 's/\.la$//' -e 's/^lib//'` + + # This library was specified with -dlpreopen. + if test $pass = dlpreopen; then + # Prefer using a static library (so that no silly _DYNAMIC symbols + # are required to link). + if test -n "$old_library"; then + newdlprefiles="$newdlprefiles $dir/$old_library" + else + newdlprefiles="$newdlprefiles $dir/$linklib" + fi + fi + + if test $linkmode = prog && test $pass != link; then + new_lib_search_path="$new_lib_search_path $ladir" + deplibs="$lib $deplibs" + + linkalldeplibs=no + if test "$link_all_deplibs" != no || test "$fast_install" != no || \ + test "$build_libtool_libs" = no || test -z "$library_names"; then + linkalldeplibs=yes + fi + + tmp_libs= + for deplib in $dependency_libs; do + case "$deplib" in + -L*) new_lib_search_path="$new_lib_search_path "`$echo "X$deplib" | $Xsed -e 's/^-L//'`;; ### testsuite: skip nested quoting test + esac + # Need to link against all dependency_libs? + if test $linkalldeplibs = yes; then + deplibs="$deplib $deplibs" + else + # Need to hardcode shared library paths + # or/and link against static libraries + newdependency_libs="$deplib $newdependency_libs" + fi + case "$tmp_libs " in + *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;; + esac + tmp_libs="$tmp_libs $deplib" + done + continue + fi + + if test -z "$libdir"; then + # It is a libtool convenience library, so add in its objects. + convenience="$convenience $dir/$old_library" + old_convenience="$old_convenience $dir/$old_library" + if test $linkmode = lib; then + deplibs="$dir/$old_library $deplibs" + tmp_libs= + for deplib in $dependency_libs; do + newdependency_libs="$deplib $newdependency_libs" + case "$tmp_libs " in + *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;; + esac + tmp_libs="$tmp_libs $deplib" + done + elif test "$linkmode,$pass" = "prog,link"; then + compile_deplibs="$dir/$old_library $compile_deplibs" + finalize_deplibs="$dir/$old_library $finalize_deplibs" + fi + continue + fi + + if test "$linkmode,$pass" = "prog,link"; then + if test -n "$library_names" && + { test "$hardcode_into_libs" != all || test "$alldeplibs" != yes; } && + { test "$prefer_static_libs" = no || test -z "$old_library"; }; then + # We need to hardcode the library path + if test -n "$shlibpath_var"; then + # Make sure the rpath contains only unique directories. + case "$temp_rpath " in + *" $dir "*) ;; + *" $absdir "*) ;; + *) temp_rpath="$temp_rpath $dir" ;; + esac + fi + + # Hardcode the library path. + # Skip directories that are in the system default run-time + # search path. + case " $sys_lib_dlsearch_path " in + *" $absdir "*) ;; + *) + case "$compile_rpath " in + *" $absdir "*) ;; + *) compile_rpath="$compile_rpath $absdir" + esac + ;; + esac + + case " $sys_lib_dlsearch_path " in + *" $libdir "*) ;; + *) + case "$finalize_rpath " in + *" $libdir "*) ;; + *) finalize_rpath="$finalize_rpath $libdir" + esac + ;; + esac + fi + + if test "$alldeplibs" = yes && + { test "$deplibs_check_method" = pass_all || + { test "$build_libtool_libs" = yes && + test -n "$library_names"; }; }; then + # Do we only need to link against static libraries? + continue + fi + fi + + link_static=no # Whether this library is linked statically + if test -n "$library_names" && + { test "$prefer_static_libs" = no || test -z "$old_library"; }; then + link_against_libtool_libs="$link_against_libtool_libs $lib" + test "X$installed" = xno && need_relink=yes + # This is a shared library + if test $linkmode = lib && test "$hardcode_into_libs" = all; then + # Hardcode the library path. + # Skip directories that are in the system default run-time + # search path. + case " $sys_lib_dlsearch_path " in + *" $absdir "*) ;; + *) + case "$compile_rpath " in + *" $absdir "*) ;; + *) compile_rpath="$compile_rpath $absdir" + esac + ;; + esac + case " $sys_lib_dlsearch_path " in + *" $libdir "*) ;; + *) + case "$finalize_rpath " in + *" $libdir "*) ;; + *) finalize_rpath="$finalize_rpath $libdir" + esac + ;; + esac + fi + + if test -n "$old_archive_from_expsyms_cmds"; then + # figure out the soname + set dummy $library_names + realname="$2" + shift; shift + libname=`eval \\$echo \"$libname_spec\"` + if test -n "$soname_spec"; then + eval soname=\"$soname_spec\" + else + soname="$realname" + fi + + # Make a new name for the extract_expsyms_cmds to use + newlib="libimp-`echo $soname | sed 's/^lib//;s/\.dll$//'`.a" + + # If the library has no export list, then create one now + if test -f "$output_objdir/$soname-def"; then : + else + $show "extracting exported symbol list from \`$soname'" + IFS="${IFS= }"; save_ifs="$IFS"; IFS='~' + eval cmds=\"$extract_expsyms_cmds\" + for cmd in $cmds; do + IFS="$save_ifs" + $show "$cmd" + $run eval "$cmd" || exit $? + done + IFS="$save_ifs" + fi + + # Create $newlib + if test -f "$output_objdir/$newlib"; then :; else + $show "generating import library for \`$soname'" + IFS="${IFS= }"; save_ifs="$IFS"; IFS='~' + eval cmds=\"$old_archive_from_expsyms_cmds\" + for cmd in $cmds; do + IFS="$save_ifs" + $show "$cmd" + $run eval "$cmd" || exit $? + done + IFS="$save_ifs" + fi + # make sure the library variables are pointing to the new library + dir=$output_objdir + linklib=$newlib + fi + + if test $linkmode = prog || test "$mode" != relink; then + add_shlibpath= + add_dir= + add= + lib_linked=yes + case "$hardcode_action" in + immediate | unsupported) + if test "$hardcode_direct" = no; then + add="$dir/$linklib" + elif test "$hardcode_minus_L" = no; then + case "$host" in + *-*-sunos*) add_shlibpath="$dir" ;; + esac + add_dir="-L$dir" + add="-l$name" + elif test "$hardcode_shlibpath_var" = no; then + add_shlibpath="$dir" + add="-l$name" + else + lib_linked=no + fi + ;; + relink) + if test "$hardcode_direct" = yes; then + add="$dir/$linklib" + elif test "$hardcode_minus_L" = yes; then + add_dir="-L$dir" + add="-l$name" + elif test "$hardcode_shlibpath_var" = yes; then + add_shlibpath="$dir" + add="-l$name" + else + lib_linked=no + fi + ;; + *) lib_linked=no ;; + esac + + if test "$lib_linked" != yes; then + $echo "$modename: configuration error: unsupported hardcode properties" + exit 1 + fi + + if test -n "$add_shlibpath"; then + case ":$compile_shlibpath:" in + *":$add_shlibpath:"*) ;; + *) compile_shlibpath="$compile_shlibpath$add_shlibpath:" ;; + esac + fi + if test $linkmode = prog; then + test -n "$add_dir" && compile_deplibs="$add_dir $compile_deplibs" + test -n "$add" && compile_deplibs="$add $compile_deplibs" + else + test -n "$add_dir" && deplibs="$add_dir $deplibs" + test -n "$add" && deplibs="$add $deplibs" + if test "$hardcode_direct" != yes && \ + test "$hardcode_minus_L" != yes && \ + test "$hardcode_shlibpath_var" = yes; then + case ":$finalize_shlibpath:" in + *":$libdir:"*) ;; + *) finalize_shlibpath="$finalize_shlibpath$libdir:" ;; + esac + fi + fi + fi + + if test $linkmode = prog || test "$mode" = relink; then + add_shlibpath= + add_dir= + add= + # Finalize command for both is simple: just hardcode it. + if test "$hardcode_direct" = yes; then + add="$libdir/$linklib" + elif test "$hardcode_minus_L" = yes; then + add_dir="-L$libdir" + add="-l$name" + elif test "$hardcode_shlibpath_var" = yes; then + case ":$finalize_shlibpath:" in + *":$libdir:"*) ;; + *) finalize_shlibpath="$finalize_shlibpath$libdir:" ;; + esac + add="-l$name" + else + # We cannot seem to hardcode it, guess we'll fake it. + add_dir="-L$libdir" + add="-l$name" + fi + + if test $linkmode = prog; then + test -n "$add_dir" && finalize_deplibs="$add_dir $finalize_deplibs" + test -n "$add" && finalize_deplibs="$add $finalize_deplibs" + else + test -n "$add_dir" && deplibs="$add_dir $deplibs" + test -n "$add" && deplibs="$add deplibs" + fi + fi + elif test $linkmode = prog; then + # Here we assume that one of hardcode_direct or hardcode_minus_L + # is not unsupported. This is valid on all known static and + # shared platforms. + if test "$hardcode_direct" != unsupported; then + test -n "$old_library" && linklib="$old_library" + compile_deplibs="$dir/$linklib $compile_deplibs" + finalize_deplibs="$dir/$linklib $finalize_deplibs" + else + compile_deplibs="-l$name -L$dir $compile_deplibs" + finalize_deplibs="-l$name -L$dir $finalize_deplibs" + fi + elif test "$build_libtool_libs" = yes; then + # Not a shared library + if test "$deplibs_check_method" != pass_all; then + # We're trying link a shared library against a static one + # but the system doesn't support it. + # Just print a warning and add the library to dependency_libs so + # that the program can be linked against the static library. + echo + echo "*** Warning: This library needs some functionality provided by $lib." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which you do not appear to have." + else + convenience="$convenience $dir/$old_library" + old_convenience="$old_convenience $dir/$old_library" + deplibs="$dir/$old_library $deplibs" + link_static=yes + fi + fi + + if test $linkmode = lib; then + if test -n "$dependency_libs" && + { test "$hardcode_into_libs" = no || test $build_old_libs = yes || + test $link_static = yes; }; then + # Extract -R from dependency_libs + temp_deplibs= + for libdir in $dependency_libs; do + case "$libdir" in + -R*) temp_xrpath=`$echo "X$libdir" | $Xsed -e 's/^-R//'` + case " $xrpath " in + *" $temp_xrpath "*) ;; + *) xrpath="$xrpath $temp_xrpath";; + esac;; + *) temp_deplibs="$temp_deplibs $libdir";; + esac + done + dependency_libs="$temp_deplibs" + fi + + new_lib_search_path="$new_lib_search_path $absdir" + # Link against this library + test "$link_static" = no && newdependency_libs="$abs_ladir/$laname $newdependency_libs" + # ... and its dependency_libs + tmp_libs= + for deplib in $dependency_libs; do + newdependency_libs="$deplib $newdependency_libs" + case "$tmp_libs " in + *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;; + esac + tmp_libs="$tmp_libs $deplib" + done + + if test $link_all_deplibs != no; then + # Add the search paths of all dependency libraries + for deplib in $dependency_libs; do + case "$deplib" in + -L*) path="$deplib" ;; + *.la) + dir=`$echo "X$deplib" | $Xsed -e 's%/[^/]*$%%'` + test "X$dir" = "X$deplib" && dir="." + # We need an absolute path. + case "$dir" in + [\\/]* | [A-Za-z]:[\\/]*) absdir="$dir" ;; + *) + absdir=`cd "$dir" && pwd` + if test -z "$absdir"; then + $echo "$modename: warning: cannot determine absolute directory name of \`$dir'" 1>&2 + absdir="$dir" + fi + ;; + esac + if grep "^installed=no" $deplib > /dev/null; then + path="-L$absdir/$objdir" + else + eval libdir=`sed -n -e 's/^libdir=\(.*\)$/\1/p' $deplib` + if test -z "$libdir"; then + $echo "$modename: \`$deplib' is not a valid libtool archive" 1>&2 + exit 1 + fi + if test "$absdir" != "$libdir"; then + $echo "$modename: warning: \`$deplib' seems to be moved" 1>&2 + fi + path="-L$absdir" + fi + ;; + *) continue ;; + esac + case " $deplibs " in + *" $path "*) ;; + *) deplibs="$deplibs $path" ;; + esac + done + fi + fi + done + dependency_libs="$newdependency_libs" + if test $pass = dlpreopen; then + # Link the dlpreopened libraries before other libraries + deplibs="$deplibs $save_deplibs" + elif test $pass != dlopen; then + # Make sure lib_search_path contains only unique directories. + lib_search_path= + for dir in $new_lib_search_path; do + case "$lib_search_path " in + *" $dir "*) ;; + *) lib_search_path="$lib_search_path $dir" ;; + esac + done + lib_search_path="$lib_search_path $sys_lib_search_path" + + if test "$linkmode,$pass" != "prog,link"; then + vars="deplibs" + else + vars="compile_deplibs finalize_deplibs" + fi + for var in $vars dependency_libs; do + # Make sure that $var contains only unique libraries + # and add them in reverse order + eval tmp_libs=\"\$$var\" + new_libs= + for deplib in $tmp_libs; do + case "$deplib" in + -L*) new_libs="$deplib $new_libs" ;; + *) + case " $specialdeplibs " in + *" $deplib "*) new_libs="$deplib $new_libs" ;; + *) + case " $new_libs " in + *" $deplib "*) ;; + *) new_libs="$deplib $new_libs" ;; + esac + ;; + esac + ;; + esac + done + tmp_libs= + for deplib in $new_libs; do + case "$deplib" in + -L*) + case " $tmp_libs " in + *" $deplib "*) ;; + *) tmp_libs="$tmp_libs $deplib" ;; + esac + ;; + *) tmp_libs="$tmp_libs $deplib" ;; + esac + done + eval $var=\"$tmp_libs\" + done + fi + done + if test $linkmode = prog; then + dlfiles="$newdlfiles" + dlprefiles="$newdlprefiles" + fi + + case $linkmode in + oldlib) + if test -n "$deplibs"; then + $echo "$modename: warning: \`-l' and \`-L' are ignored for archives" 1>&2 + fi + + if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then + $echo "$modename: warning: \`-dlopen' is ignored for archives" 1>&2 + fi + + if test -n "$rpath"; then + $echo "$modename: warning: \`-rpath' is ignored for archives" 1>&2 + fi + + if test -n "$xrpath"; then + $echo "$modename: warning: \`-R' is ignored for archives" 1>&2 + fi + + if test -n "$vinfo"; then + $echo "$modename: warning: \`-version-info' is ignored for archives" 1>&2 + fi + + if test -n "$release"; then + $echo "$modename: warning: \`-release' is ignored for archives" 1>&2 + fi + + if test -n "$export_symbols" || test -n "$export_symbols_regex"; then + $echo "$modename: warning: \`-export-symbols' is ignored for archives" 1>&2 + fi + + # Now set the variables for building old libraries. + build_libtool_libs=no + oldlibs="$output" + objs="$objs$old_deplibs" + ;; + + lib) + # Make sure we only generate libraries of the form `libNAME.la'. + case "$outputname" in + lib*) + name=`$echo "X$outputname" | $Xsed -e 's/\.la$//' -e 's/^lib//'` + eval libname=\"$libname_spec\" + ;; + *) + if test "$module" = no; then + $echo "$modename: libtool library \`$output' must begin with \`lib'" 1>&2 + $echo "$help" 1>&2 + exit 1 + fi + if test "$need_lib_prefix" != no; then + # Add the "lib" prefix for modules if required + name=`$echo "X$outputname" | $Xsed -e 's/\.la$//'` + eval libname=\"$libname_spec\" + else + libname=`$echo "X$outputname" | $Xsed -e 's/\.la$//'` + fi + ;; + esac + + if test -n "$objs"; then + if test "$deplibs_check_method" != pass_all; then + $echo "$modename: cannot build libtool library \`$output' from non-libtool objects on this host:$objs" 2>&1 + exit 1 + else + echo + echo "*** Warning: Linking the shared library $output against the non-libtool" + echo "*** objects $objs is not portable!" + libobjs="$libobjs $objs" + fi + fi + + if test "$dlself" != no; then + $echo "$modename: warning: \`-dlopen self' is ignored for libtool libraries" 1>&2 + fi + + set dummy $rpath + if test $# -gt 2; then + $echo "$modename: warning: ignoring multiple \`-rpath's for a libtool library" 1>&2 + fi + install_libdir="$2" + + oldlibs= + if test -z "$rpath"; then + if test "$build_libtool_libs" = yes; then + # Building a libtool convenience library. + libext=al + oldlibs="$output_objdir/$libname.$libext $oldlibs" + build_libtool_libs=convenience + build_old_libs=yes + fi + + if test -n "$vinfo"; then + $echo "$modename: warning: \`-version-info' is ignored for convenience libraries" 1>&2 + fi + + if test -n "$release"; then + $echo "$modename: warning: \`-release' is ignored for convenience libraries" 1>&2 + fi + else + + # Parse the version information argument. + IFS="${IFS= }"; save_ifs="$IFS"; IFS=':' + set dummy $vinfo 0 0 0 + IFS="$save_ifs" + + if test -n "$8"; then + $echo "$modename: too many parameters to \`-version-info'" 1>&2 + $echo "$help" 1>&2 + exit 1 + fi + + current="$2" + revision="$3" + age="$4" + + # Check that each of the things are valid numbers. + case "$current" in + 0 | [1-9] | [1-9][0-9]*) ;; + *) + $echo "$modename: CURRENT \`$current' is not a nonnegative integer" 1>&2 + $echo "$modename: \`$vinfo' is not valid version information" 1>&2 + exit 1 + ;; + esac + + case "$revision" in + 0 | [1-9] | [1-9][0-9]*) ;; + *) + $echo "$modename: REVISION \`$revision' is not a nonnegative integer" 1>&2 + $echo "$modename: \`$vinfo' is not valid version information" 1>&2 + exit 1 + ;; + esac + + case "$age" in + 0 | [1-9] | [1-9][0-9]*) ;; + *) + $echo "$modename: AGE \`$age' is not a nonnegative integer" 1>&2 + $echo "$modename: \`$vinfo' is not valid version information" 1>&2 + exit 1 + ;; + esac + + if test $age -gt $current; then + $echo "$modename: AGE \`$age' is greater than the current interface number \`$current'" 1>&2 + $echo "$modename: \`$vinfo' is not valid version information" 1>&2 + exit 1 + fi + + # Calculate the version variables. + major= + versuffix= + verstring= + case "$version_type" in + none) ;; + + irix) + major=`expr $current - $age + 1` + versuffix="$major.$revision" + verstring="sgi$major.$revision" + + # Add in all the interfaces that we are compatible with. + loop=$revision + while test $loop != 0; do + iface=`expr $revision - $loop` + loop=`expr $loop - 1` + verstring="sgi$major.$iface:$verstring" + done + ;; + + linux) + major=.`expr $current - $age` + versuffix="$major.$age.$revision" + ;; + + osf) + major=`expr $current - $age` + versuffix=".$current.$age.$revision" + verstring="$current.$age.$revision" + + # Add in all the interfaces that we are compatible with. + loop=$age + while test $loop != 0; do + iface=`expr $current - $loop` + loop=`expr $loop - 1` + verstring="$verstring:${iface}.0" + done + + # Make executables depend on our current version. + verstring="$verstring:${current}.0" + ;; + + sunos) + major=".$current" + versuffix=".$current.$revision" + ;; + + freebsd-aout) + major=".$current" + versuffix=".$current.$revision"; + ;; + + freebsd-elf) + major=".$current" + versuffix=".$current"; + ;; + + windows) + # Like Linux, but with '-' rather than '.', since we only + # want one extension on Windows 95. + major=`expr $current - $age` + versuffix="-$major-$age-$revision" + ;; + + *) + $echo "$modename: unknown library version type \`$version_type'" 1>&2 + echo "Fatal configuration error. See the $PACKAGE docs for more information." 1>&2 + exit 1 + ;; + esac + + # Clear the version info if we defaulted, and they specified a release. + if test -z "$vinfo" && test -n "$release"; then + major= + verstring="0.0" + if test "$need_version" = no; then + versuffix= + else + versuffix=".0.0" + fi + fi + + # Remove version info from name if versioning should be avoided + if test "$avoid_version" = yes && test "$need_version" = no; then + major= + versuffix= + verstring="" + fi + + # Check to see if the archive will have undefined symbols. + if test "$allow_undefined" = yes; then + if test "$allow_undefined_flag" = unsupported; then + $echo "$modename: warning: undefined symbols not allowed in $host shared libraries" 1>&2 + build_libtool_libs=no + build_old_libs=yes + fi + else + # Don't allow undefined symbols. + allow_undefined_flag="$no_undefined_flag" + fi + fi + + if test "$mode" != relink; then + # Remove our outputs. + $show "${rm}r $output_objdir/$outputname $output_objdir/$libname.* $output_objdir/${libname}${release}.*" + $run ${rm}r $output_objdir/$outputname $output_objdir/$libname.* $output_objdir/${libname}${release}.* + fi + + # Now set the variables for building old libraries. + if test "$build_old_libs" = yes && test "$build_libtool_libs" != convenience ; then + oldlibs="$oldlibs $output_objdir/$libname.$libext" + + # Transform .lo files to .o files. + oldobjs="$objs "`$echo "X$libobjs" | $SP2NL | $Xsed -e '/\.'${libext}'$/d' -e "$lo2o" | $NL2SP` + fi + + # Eliminate all temporary directories. + for path in $uninst_path; do + lib_search_path=`echo "$lib_search_path " | sed -e 's% $path % %g'` + deplibs=`echo "$deplibs " | sed -e 's% -L$path % %g'` + dependency_libs=`echo "$dependency_libs " | sed -e 's% -L$path % %g'` + done + + if test -n "$xrpath"; then + # If the user specified any rpath flags, then add them. + temp_xrpath= + for libdir in $xrpath; do + temp_xrpath="$temp_xrpath -R$libdir" + case "$finalize_rpath " in + *" $libdir "*) ;; + *) finalize_rpath="$finalize_rpath $libdir" ;; + esac + done + if test "$hardcode_into_libs" = no || test $build_old_libs = yes; then + dependency_libs="$temp_xrpath $dependency_libs" + fi + fi + + # Make sure dlfiles contains only unique files that won't be dlpreopened + old_dlfiles="$dlfiles" + dlfiles= + for lib in $old_dlfiles; do + case " $dlprefiles $dlfiles " in + *" $lib "*) ;; + *) dlfiles="$dlfiles $lib" ;; + esac + done + + # Make sure dlprefiles contains only unique files + old_dlprefiles="$dlprefiles" + dlprefiles= + for lib in $old_dlprefiles; do + case "$dlprefiles " in + *" $lib "*) ;; + *) dlprefiles="$dlprefiles $lib" ;; + esac + done + + if test "$build_libtool_libs" = yes; then + if test -n "$rpath"; then + case "$host" in + *-*-cygwin* | *-*-mingw* | *-*-os2* | *-*-beos*) + # these systems don't actually have a c library (as such)! + ;; + *) + # Add libc to deplibs on all other systems. + deplibs="$deplibs -lc" + ;; + esac + fi + + # Transform deplibs into only deplibs that can be linked in shared. + name_save=$name + libname_save=$libname + release_save=$release + versuffix_save=$versuffix + major_save=$major + # I'm not sure if I'm treating the release correctly. I think + # release should show up in the -l (ie -lgmp5) so we don't want to + # add it in twice. Is that correct? + release="" + versuffix="" + major="" + newdeplibs= + droppeddeps=no + case "$deplibs_check_method" in + pass_all) + # Don't check for shared/static. Everything works. + # This might be a little naive. We might want to check + # whether the library exists or not. But this is on + # osf3 & osf4 and I'm not really sure... Just + # implementing what was already the behaviour. + newdeplibs=$deplibs + ;; + test_compile) + # This code stresses the "libraries are programs" paradigm to its + # limits. Maybe even breaks it. We compile a program, linking it + # against the deplibs as a proxy for the library. Then we can check + # whether they linked in statically or dynamically with ldd. + $rm conftest.c + cat > conftest.c <<EOF + int main() { return 0; } +EOF + $rm conftest + $CC -o conftest conftest.c $deplibs + if test $? -eq 0 ; then + ldd_output=`ldd conftest` + for i in $deplibs; do + name="`expr $i : '-l\(.*\)'`" + # If $name is empty we are operating on a -L argument. + if test "$name" != "" ; then + libname=`eval \\$echo \"$libname_spec\"` + deplib_matches=`eval \\$echo \"$library_names_spec\"` + set dummy $deplib_matches + deplib_match=$2 + if test `expr "$ldd_output" : ".*$deplib_match"` -ne 0 ; then + newdeplibs="$newdeplibs $i" + else + droppeddeps=yes + echo + echo "*** Warning: This library needs some functionality provided by $i." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which you do not appear to have." + fi + else + newdeplibs="$newdeplibs $i" + fi + done + else + # Error occured in the first compile. Let's try to salvage the situation: + # Compile a seperate program for each library. + for i in $deplibs; do + name="`expr $i : '-l\(.*\)'`" + # If $name is empty we are operating on a -L argument. + if test "$name" != "" ; then + $rm conftest + $CC -o conftest conftest.c $i + # Did it work? + if test $? -eq 0 ; then + ldd_output=`ldd conftest` + libname=`eval \\$echo \"$libname_spec\"` + deplib_matches=`eval \\$echo \"$library_names_spec\"` + set dummy $deplib_matches + deplib_match=$2 + if test `expr "$ldd_output" : ".*$deplib_match"` -ne 0 ; then + newdeplibs="$newdeplibs $i" + else + droppeddeps=yes + echo + echo "*** Warning: This library needs some functionality provided by $i." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which you do not appear to have." + fi + else + droppeddeps=yes + echo + echo "*** Warning! Library $i is needed by this library but I was not able to" + echo "*** make it link in! You will probably need to install it or some" + echo "*** library that it depends on before this library will be fully" + echo "*** functional. Installing it before continuing would be even better." + fi + else + newdeplibs="$newdeplibs $i" + fi + done + fi + ;; + file_magic*) + set dummy $deplibs_check_method + file_magic_regex=`expr "$deplibs_check_method" : "$2 \(.*\)"` + for a_deplib in $deplibs; do + name="`expr $a_deplib : '-l\(.*\)'`" + # If $name is empty we are operating on a -L argument. + if test "$name" != "" ; then + libname=`eval \\$echo \"$libname_spec\"` + for i in $lib_search_path; do + potential_libs=`ls $i/$libname[.-]* 2>/dev/null` + for potent_lib in $potential_libs; do + # Follow soft links. + if ls -lLd "$potent_lib" 2>/dev/null \ + | grep " -> " >/dev/null; then + continue + fi + # The statement above tries to avoid entering an + # endless loop below, in case of cyclic links. + # We might still enter an endless loop, since a link + # loop can be closed while we follow links, + # but so what? + potlib="$potent_lib" + while test -h "$potlib" 2>/dev/null; do + potliblink=`ls -ld $potlib | sed 's/.* -> //'` + case "$potliblink" in + [\\/]* | [A-Za-z]:[\\/]*) potlib="$potliblink";; + *) potlib=`$echo "X$potlib" | $Xsed -e 's,[^/]*$,,'`"$potliblink";; + esac + done + if eval $file_magic_cmd \"\$potlib\" 2>/dev/null \ + | sed 10q \ + | egrep "$file_magic_regex" > /dev/null; then + newdeplibs="$newdeplibs $a_deplib" + a_deplib="" + break 2 + fi + done + done + if test -n "$a_deplib" ; then + droppeddeps=yes + echo + echo "*** Warning: This library needs some functionality provided by $a_deplib." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which you do not appear to have." + fi + else + # Add a -L argument. + newdeplibs="$newdeplibs $a_deplib" + fi + done # Gone through all deplibs. + ;; + none | unknown | *) + newdeplibs="" + if $echo "X $deplibs" | $Xsed -e 's/ -lc$//' \ + -e 's/ -[LR][^ ]*//g' -e 's/[ ]//g' | + grep . >/dev/null; then + echo + if test "X$deplibs_check_method" = "Xnone"; then + echo "*** Warning: inter-library dependencies are not supported in this platform." + else + echo "*** Warning: inter-library dependencies are not known to be supported." + fi + echo "*** All declared inter-library dependencies are being dropped." + droppeddeps=yes + fi + ;; + esac + versuffix=$versuffix_save + major=$major_save + release=$release_save + libname=$libname_save + name=$name_save + + if test "$droppeddeps" = yes; then + if test "$module" = yes; then + echo + echo "*** Warning: libtool could not satisfy all declared inter-library" + echo "*** dependencies of module $libname. Therefore, libtool will create" + echo "*** a static module, that should work as long as the dlopening" + echo "*** application is linked with the -dlopen flag." + if test -z "$global_symbol_pipe"; then + echo + echo "*** However, this would only work if libtool was able to extract symbol" + echo "*** lists from a program, using \`nm' or equivalent, but libtool could" + echo "*** not find such a program. So, this module is probably useless." + echo "*** \`nm' from GNU binutils and a full rebuild may help." + fi + if test "$build_old_libs" = no; then + oldlibs="$output_objdir/$libname.$libext" + build_libtool_libs=module + build_old_libs=yes + else + build_libtool_libs=no + fi + else + echo "*** The inter-library dependencies that have been dropped here will be" + echo "*** automatically added whenever a program is linked with this library" + echo "*** or is declared to -dlopen it." + fi + fi + # Done checking deplibs! + deplibs=$newdeplibs + fi + + # All the library-specific variables (install_libdir is set above). + library_names= + old_library= + dlname= + + # Test again, we may have decided not to build it any more + if test "$build_libtool_libs" = yes; then + if test "$hardcode_into_libs" != no; then + # Hardcode the library paths + hardcode_libdirs= + dep_rpath= + rpath="$finalize_rpath" + test "$mode" != relink && rpath="$compile_rpath$rpath" + for libdir in $rpath; do + if test -n "$hardcode_libdir_flag_spec"; then + if test -n "$hardcode_libdir_separator"; then + if test -z "$hardcode_libdirs"; then + hardcode_libdirs="$libdir" + else + # Just accumulate the unique libdirs. + case "$hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator" in + *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) + ;; + *) + hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir" + ;; + esac + fi + else + eval flag=\"$hardcode_libdir_flag_spec\" + dep_rpath="$dep_rpath $flag" + fi + elif test -n "$runpath_var"; then + case "$perm_rpath " in + *" $libdir "*) ;; + *) perm_rpath="$perm_rpath $libdir" ;; + esac + fi + done + # Substitute the hardcoded libdirs into the rpath. + if test -n "$hardcode_libdir_separator" && + test -n "$hardcode_libdirs"; then + libdir="$hardcode_libdirs" + eval dep_rpath=\"$hardcode_libdir_flag_spec\" + fi + if test -n "$runpath_var" && test -n "$perm_rpath"; then + # We should set the runpath_var. + rpath= + for dir in $perm_rpath; do + rpath="$rpath$dir:" + done + eval "$runpath_var='$rpath\$$runpath_var'; export $runpath_var" + fi + test -n "$dep_rpath" && deplibs="$dep_rpath $deplibs" + fi + + shlibpath="$finalize_shlibpath" + test "$mode" != relink && shlibpath="$compile_shlibpath$shlibpath" + if test -n "$shlibpath"; then + eval "$shlibpath_var='$shlibpath\$$shlibpath_var'; export $shlibpath_var" + fi + + # Get the real and link names of the library. + eval library_names=\"$library_names_spec\" + set dummy $library_names + realname="$2" + shift; shift + + if test -n "$soname_spec"; then + eval soname=\"$soname_spec\" + else + soname="$realname" + fi + + lib="$output_objdir/$realname" + for link + do + linknames="$linknames $link" + done + + # Ensure that we have .o objects for linkers which dislike .lo + # (e.g. aix) in case we are running --disable-static + for obj in $libobjs; do + xdir=`$echo "X$obj" | $Xsed -e 's%/[^/]*$%%'` + if test "X$xdir" = "X$obj"; then + xdir="." + else + xdir="$xdir" + fi + baseobj=`$echo "X$obj" | $Xsed -e 's%^.*/%%'` + oldobj=`$echo "X$baseobj" | $Xsed -e "$lo2o"` + if test ! -f $xdir/$oldobj; then + $show "(cd $xdir && ${LN_S} $baseobj $oldobj)" + $run eval '(cd $xdir && ${LN_S} $baseobj $oldobj)' || exit $? + fi + done + + # Use standard objects if they are pic + test -z "$pic_flag" && libobjs=`$echo "X$libobjs" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP` + + # Prepare the list of exported symbols + if test -z "$export_symbols"; then + if test "$always_export_symbols" = yes || test -n "$export_symbols_regex"; then + $show "generating symbol list for \`$libname.la'" + export_symbols="$output_objdir/$libname.exp" + $run $rm $export_symbols + eval cmds=\"$export_symbols_cmds\" + IFS="${IFS= }"; save_ifs="$IFS"; IFS='~' + for cmd in $cmds; do + IFS="$save_ifs" + $show "$cmd" + $run eval "$cmd" || exit $? + done + IFS="$save_ifs" + if test -n "$export_symbols_regex"; then + $show "egrep -e \"$export_symbols_regex\" \"$export_symbols\" > \"${export_symbols}T\"" + $run eval 'egrep -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"' + $show "$mv \"${export_symbols}T\" \"$export_symbols\"" + $run eval '$mv "${export_symbols}T" "$export_symbols"' + fi + fi + fi + + if test -n "$export_symbols" && test -n "$include_expsyms"; then + $run eval '$echo "X$include_expsyms" | $SP2NL >> "$export_symbols"' + fi + + if test -n "$convenience"; then + if test -n "$whole_archive_flag_spec"; then + eval libobjs=\"\$libobjs $whole_archive_flag_spec\" + else + gentop="$output_objdir/${outputname}x" + $show "${rm}r $gentop" + $run ${rm}r "$gentop" + $show "mkdir $gentop" + $run mkdir "$gentop" + status=$? + if test $status -ne 0 && test ! -d "$gentop"; then + exit $status + fi + generated="$generated $gentop" + + for xlib in $convenience; do + # Extract the objects. + case "$xlib" in + [\\/]* | [A-Za-z]:[\\/]*) xabs="$xlib" ;; + *) xabs=`pwd`"/$xlib" ;; + esac + xlib=`$echo "X$xlib" | $Xsed -e 's%^.*/%%'` + xdir="$gentop/$xlib" + + $show "${rm}r $xdir" + $run ${rm}r "$xdir" + $show "mkdir $xdir" + $run mkdir "$xdir" + status=$? + if test $status -ne 0 && test ! -d "$xdir"; then + exit $status + fi + $show "(cd $xdir && $AR x $xabs)" + $run eval "(cd \$xdir && $AR x \$xabs)" || exit $? + + libobjs="$libobjs "`find $xdir -name \*.o -print -o -name \*.lo -print | $NL2SP` + done + fi + fi + + if test "$thread_safe" = yes && test -n "$thread_safe_flag_spec"; then + eval flag=\"$thread_safe_flag_spec\" + linker_flags="$linker_flags $flag" + fi + + # Make a backup of the uninstalled library when relinking + if test "$mode" = relink && test "$hardcode_into_libs" = all; then + $run eval '(cd $output_objdir && $rm ${realname}U && $mv $realname ${realname}U)' || exit $? + fi + + # Do each of the archive commands. + if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then + eval cmds=\"$archive_expsym_cmds\" + else + eval cmds=\"$archive_cmds\" + fi + IFS="${IFS= }"; save_ifs="$IFS"; IFS='~' + for cmd in $cmds; do + IFS="$save_ifs" + $show "$cmd" + $run eval "$cmd" || exit $? + done + IFS="$save_ifs" + + # Restore the uninstalled library and exit + if test "$mode" = relink && test "$hardcode_into_libs" = all; then + $run eval '(cd $output_objdir && $rm ${realname}T && $mv $realname ${realname}T && $mv "$realname"U $realname)' || exit $? + exit 0 + fi + + # Create links to the real library. + for linkname in $linknames; do + if test "$realname" != "$linkname"; then + $show "(cd $output_objdir && $rm $linkname && $LN_S $realname $linkname)" + $run eval '(cd $output_objdir && $rm $linkname && $LN_S $realname $linkname)' || exit $? + fi + done + + # If -module or -export-dynamic was specified, set the dlname. + if test "$module" = yes || test "$export_dynamic" = yes; then + # On all known operating systems, these are identical. + dlname="$soname" + fi + fi + ;; + + obj) + if test -n "$deplibs"; then + $echo "$modename: warning: \`-l' and \`-L' are ignored for objects" 1>&2 + fi + + if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then + $echo "$modename: warning: \`-dlopen' is ignored for objects" 1>&2 + fi + + if test -n "$rpath"; then + $echo "$modename: warning: \`-rpath' is ignored for objects" 1>&2 + fi + + if test -n "$xrpath"; then + $echo "$modename: warning: \`-R' is ignored for objects" 1>&2 + fi + + if test -n "$vinfo"; then + $echo "$modename: warning: \`-version-info' is ignored for objects" 1>&2 + fi + + if test -n "$release"; then + $echo "$modename: warning: \`-release' is ignored for objects" 1>&2 + fi + + case "$output" in + *.lo) + if test -n "$objs$old_deplibs"; then + $echo "$modename: cannot build library object \`$output' from non-libtool objects" 1>&2 + exit 1 + fi + libobj="$output" + obj=`$echo "X$output" | $Xsed -e "$lo2o"` + ;; + *) + libobj= + obj="$output" + ;; + esac + + # Delete the old objects. + $run $rm $obj $libobj + + # Objects from convenience libraries. This assumes + # single-version convenience libraries. Whenever we create + # different ones for PIC/non-PIC, this we'll have to duplicate + # the extraction. + reload_conv_objs= + gentop= + # reload_cmds runs $LD directly, so let us get rid of + # -Wl from whole_archive_flag_spec + wl= + + if test -n "$convenience"; then + if test -n "$whole_archive_flag_spec"; then + eval reload_conv_objs=\"\$reload_objs $whole_archive_flag_spec\" + else + gentop="$output_objdir/${obj}x" + $show "${rm}r $gentop" + $run ${rm}r "$gentop" + $show "mkdir $gentop" + $run mkdir "$gentop" + status=$? + if test $status -ne 0 && test ! -d "$gentop"; then + exit $status + fi + generated="$generated $gentop" + + for xlib in $convenience; do + # Extract the objects. + case "$xlib" in + [\\/]* | [A-Za-z]:[\\/]*) xabs="$xlib" ;; + *) xabs=`pwd`"/$xlib" ;; + esac + xlib=`$echo "X$xlib" | $Xsed -e 's%^.*/%%'` + xdir="$gentop/$xlib" + + $show "${rm}r $xdir" + $run ${rm}r "$xdir" + $show "mkdir $xdir" + $run mkdir "$xdir" + status=$? + if test $status -ne 0 && test ! -d "$xdir"; then + exit $status + fi + $show "(cd $xdir && $AR x $xabs)" + $run eval "(cd \$xdir && $AR x \$xabs)" || exit $? + + reload_conv_objs="$reload_objs "`find $xdir -name \*.o -print -o -name \*.lo -print | $NL2SP` + done + fi + fi + + # Create the old-style object. + reload_objs="$objs$old_deplibs "`$echo "X$libobjs" | $SP2NL | $Xsed -e '/\.'${libext}$'/d' -e '/\.lib$/d' -e "$lo2o" | $NL2SP`" $reload_conv_objs" ### testsuite: skip nested quoting test + + output="$obj" + eval cmds=\"$reload_cmds\" + IFS="${IFS= }"; save_ifs="$IFS"; IFS='~' + for cmd in $cmds; do + IFS="$save_ifs" + $show "$cmd" + $run eval "$cmd" || exit $? + done + IFS="$save_ifs" + + # Exit if we aren't doing a library object file. + if test -z "$libobj"; then + if test -n "$gentop"; then + $show "${rm}r $gentop" + $run ${rm}r $gentop + fi + + exit 0 + fi + + if test "$build_libtool_libs" != yes; then + if test -n "$gentop"; then + $show "${rm}r $gentop" + $run ${rm}r $gentop + fi + + # Create an invalid libtool object if no PIC, so that we don't + # accidentally link it into a program. + $show "echo timestamp > $libobj" + $run eval "echo timestamp > $libobj" || exit $? + exit 0 + fi + + if test -n "$pic_flag" || test "$pic_mode" != default; then + # Only do commands if we really have different PIC objects. + reload_objs="$libobjs $reload_conv_objs" + output="$libobj" + eval cmds=\"$reload_cmds\" + IFS="${IFS= }"; save_ifs="$IFS"; IFS='~' + for cmd in $cmds; do + IFS="$save_ifs" + $show "$cmd" + $run eval "$cmd" || exit $? + done + IFS="$save_ifs" + else + # Just create a symlink. + $show $rm $libobj + $run $rm $libobj + xdir=`$echo "X$libobj" | $Xsed -e 's%/[^/]*$%%'` + if test "X$xdir" = "X$libobj"; then + xdir="." + else + xdir="$xdir" + fi + baseobj=`$echo "X$libobj" | $Xsed -e 's%^.*/%%'` + oldobj=`$echo "X$baseobj" | $Xsed -e "$lo2o"` + $show "(cd $xdir && $LN_S $oldobj $baseobj)" + $run eval '(cd $xdir && $LN_S $oldobj $baseobj)' || exit $? + fi + + if test -n "$gentop"; then + $show "${rm}r $gentop" + $run ${rm}r $gentop + fi + + exit 0 + ;; + + prog) + if test -n "$vinfo"; then + $echo "$modename: warning: \`-version-info' is ignored for programs" 1>&2 + fi + + if test -n "$release"; then + $echo "$modename: warning: \`-release' is ignored for programs" 1>&2 + fi + + if test "$preload" = yes; then + if test "$dlopen_support" = unknown && test "$dlopen_self" = unknown && + test "$dlopen_self_static" = unknown; then + $echo "$modename: warning: \`AC_LIBTOOL_DLOPEN' not used. Assuming no dlopen support." + fi + fi + + compile_command="$compile_command $compile_deplibs" + finalize_command="$finalize_command $finalize_deplibs" + + if test -n "$rpath$xrpath"; then + # If the user specified any rpath flags, then add them. + for libdir in $rpath $xrpath; do + # This is the magic to use -rpath. + case "$finalize_rpath " in + *" $libdir "*) ;; + *) finalize_rpath="$finalize_rpath $libdir" ;; + esac + done + fi + + # Now hardcode the library paths + rpath= + hardcode_libdirs= + for libdir in $compile_rpath $finalize_rpath; do + if test -n "$hardcode_libdir_flag_spec"; then + if test -n "$hardcode_libdir_separator"; then + if test -z "$hardcode_libdirs"; then + hardcode_libdirs="$libdir" + else + # Just accumulate the unique libdirs. + case "$hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator" in + *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) + ;; + *) + hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir" + ;; + esac + fi + else + eval flag=\"$hardcode_libdir_flag_spec\" + rpath="$rpath $flag" + fi + elif test -n "$runpath_var"; then + case "$perm_rpath " in + *" $libdir "*) ;; + *) perm_rpath="$perm_rpath $libdir" ;; + esac + fi + case "$host" in + *-*-cygwin* | *-*-mingw* | *-*-os2*) + case ":$dllsearchpath:" in + *":$libdir:"*) ;; + *) dllsearchpath="$dllsearchpath:$libdir";; + esac + ;; + esac + done + # Substitute the hardcoded libdirs into the rpath. + if test -n "$hardcode_libdir_separator" && + test -n "$hardcode_libdirs"; then + libdir="$hardcode_libdirs" + eval rpath=\" $hardcode_libdir_flag_spec\" + fi + compile_rpath="$rpath" + + rpath= + hardcode_libdirs= + for libdir in $finalize_rpath; do + if test -n "$hardcode_libdir_flag_spec"; then + if test -n "$hardcode_libdir_separator"; then + if test -z "$hardcode_libdirs"; then + hardcode_libdirs="$libdir" + else + # Just accumulate the unique libdirs. + case "$hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator" in + *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) + ;; + *) + hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir" + ;; + esac + fi + else + eval flag=\"$hardcode_libdir_flag_spec\" + rpath="$rpath $flag" + fi + elif test -n "$runpath_var"; then + case "$finalize_perm_rpath " in + *" $libdir "*) ;; + *) finalize_perm_rpath="$finalize_perm_rpath $libdir" ;; + esac + fi + done + # Substitute the hardcoded libdirs into the rpath. + if test -n "$hardcode_libdir_separator" && + test -n "$hardcode_libdirs"; then + libdir="$hardcode_libdirs" + eval rpath=\" $hardcode_libdir_flag_spec\" + fi + finalize_rpath="$rpath" + + if test -n "$libobjs" && test "$build_old_libs" = yes; then + # Transform all the library objects into standard objects. + compile_command=`$echo "X$compile_command" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP` + finalize_command=`$echo "X$finalize_command" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP` + fi + + dlsyms= + if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then + if test -n "$NM" && test -n "$global_symbol_pipe"; then + dlsyms="${outputname}S.c" + else + $echo "$modename: not configured to extract global symbols from dlpreopened files" 1>&2 + fi + fi + + if test -n "$dlsyms"; then + case "$dlsyms" in + "") ;; + *.c) + # Discover the nlist of each of the dlfiles. + nlist="$output_objdir/${outputname}.nm" + + $show "$rm $nlist ${nlist}S ${nlist}T" + $run $rm "$nlist" "${nlist}S" "${nlist}T" + + # Parse the name list into a source file. + $show "creating $output_objdir/$dlsyms" + + test -z "$run" && $echo > "$output_objdir/$dlsyms" "\ +/* $dlsyms - symbol resolution table for \`$outputname' dlsym emulation. */ +/* Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP */ + +#ifdef __cplusplus +extern \"C\" { +#endif + +/* Prevent the only kind of declaration conflicts we can make. */ +#define lt_preloaded_symbols some_other_symbol + +/* External symbol declarations for the compiler. */\ +" + + if test "$dlself" = yes; then + $show "generating symbol list for \`$output'" + + test -z "$run" && $echo ': @PROGRAM@ ' > "$nlist" + + # Add our own program objects to the symbol list. + progfiles=`$echo "X$objs$old_deplibs" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP` + for arg in $progfiles; do + $show "extracting global C symbols from \`$arg'" + $run eval "$NM $arg | $global_symbol_pipe >> '$nlist'" + done + + if test -n "$exclude_expsyms"; then + $run eval 'egrep -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T' + $run eval '$mv "$nlist"T "$nlist"' + fi + + if test -n "$export_symbols_regex"; then + $run eval 'egrep -e "$export_symbols_regex" "$nlist" > "$nlist"T' + $run eval '$mv "$nlist"T "$nlist"' + fi + + # Prepare the list of exported symbols + if test -z "$export_symbols"; then + export_symbols="$output_objdir/$output.exp" + $run $rm $export_symbols + $run eval "sed -n -e '/^: @PROGRAM@$/d' -e 's/^.* \(.*\)$/\1/p' "'< "$nlist" > "$export_symbols"' + else + $run eval "sed -e 's/\([][.*^$]\)/\\\1/g' -e 's/^/ /' -e 's/$/$/'"' < "$export_symbols" > "$output_objdir/$output.exp"' + $run eval 'grep -f "$output_objdir/$output.exp" < "$nlist" > "$nlist"T' + $run eval 'mv "$nlist"T "$nlist"' + fi + fi + + for arg in $dlprefiles; do + $show "extracting global C symbols from \`$arg'" + name=`echo "$arg" | sed -e 's%^.*/%%'` + $run eval 'echo ": $name " >> "$nlist"' + $run eval "$NM $arg | $global_symbol_pipe >> '$nlist'" + done + + if test -z "$run"; then + # Make sure we have at least an empty file. + test -f "$nlist" || : > "$nlist" + + if test -n "$exclude_expsyms"; then + egrep -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T + $mv "$nlist"T "$nlist" + fi + + # Try sorting and uniquifying the output. + if grep -v "^: " < "$nlist" | sort +2 | uniq > "$nlist"S; then + : + else + grep -v "^: " < "$nlist" > "$nlist"S + fi + + if test -f "$nlist"S; then + eval "$global_symbol_to_cdecl"' < "$nlist"S >> "$output_objdir/$dlsyms"' + else + echo '/* NONE */' >> "$output_objdir/$dlsyms" + fi + + $echo >> "$output_objdir/$dlsyms" "\ + +#undef lt_preloaded_symbols + +#if defined (__STDC__) && __STDC__ +# define lt_ptr_t void * +#else +# define lt_ptr_t char * +# define const +#endif + +/* The mapping between symbol names and symbols. */ +const struct { + const char *name; + lt_ptr_t address; +} +lt_preloaded_symbols[] = +{\ +" + + sed -n -e 's/^: \([^ ]*\) $/ {\"\1\", (lt_ptr_t) 0},/p' \ + -e 's/^. \([^ ]*\) \([^ ]*\)$/ {"\2", (lt_ptr_t) \&\2},/p' \ + < "$nlist" >> "$output_objdir/$dlsyms" + + $echo >> "$output_objdir/$dlsyms" "\ + {0, (lt_ptr_t) 0} +}; + +/* This works around a problem in FreeBSD linker */ +#ifdef FREEBSD_WORKAROUND +static const void *lt_preloaded_setup() { + return lt_preloaded_symbols; +} +#endif + +#ifdef __cplusplus +} +#endif\ +" + fi + + pic_flag_for_symtable= + case "$host" in + # compiling the symbol table file with pic_flag works around + # a FreeBSD bug that causes programs to crash when -lm is + # linked before any other PIC object. But we must not use + # pic_flag when linking with -static. The problem exists in + # FreeBSD 2.2.6 and is fixed in FreeBSD 3.1. + *-*-freebsd2*|*-*-freebsd3.0*|*-*-freebsdelf3.0*) + case "$compile_command " in + *" -static "*) ;; + *) pic_flag_for_symtable=" $pic_flag -DPIC -DFREEBSD_WORKAROUND";; + esac;; + *-*-hpux*) + case "$compile_command " in + *" -static "*) ;; + *) pic_flag_for_symtable=" $pic_flag -DPIC";; + esac + esac + + # Now compile the dynamic symbol file. + $show "(cd $output_objdir && $CC -c$no_builtin_flag$pic_flag_for_symtable \"$dlsyms\")" + $run eval '(cd $output_objdir && $CC -c$no_builtin_flag$pic_flag_for_symtable "$dlsyms")' || exit $? + + # Clean up the generated files. + $show "$rm $output_objdir/$dlsyms $nlist ${nlist}S ${nlist}T" + $run $rm "$output_objdir/$dlsyms" "$nlist" "${nlist}S" "${nlist}T" + + # Transform the symbol file into the correct name. + compile_command=`$echo "X$compile_command" | $Xsed -e "s%@SYMFILE@%$output_objdir/${outputname}S.${objext}%"` + finalize_command=`$echo "X$finalize_command" | $Xsed -e "s%@SYMFILE@%$output_objdir/${outputname}S.${objext}%"` + ;; + *) + $echo "$modename: unknown suffix for \`$dlsyms'" 1>&2 + exit 1 + ;; + esac + else + # We keep going just in case the user didn't refer to + # lt_preloaded_symbols. The linker will fail if global_symbol_pipe + # really was required. + + # Nullify the symbol file. + compile_command=`$echo "X$compile_command" | $Xsed -e "s% @SYMFILE@%%"` + finalize_command=`$echo "X$finalize_command" | $Xsed -e "s% @SYMFILE@%%"` + fi + + if test -z "$link_against_libtool_libs" || test "$build_libtool_libs" != yes; then + # Replace the output file specification. + compile_command=`$echo "X$compile_command" | $Xsed -e 's%@OUTPUT@%'"$output"'%g'` + link_command="$compile_command$compile_rpath" + + # We have no uninstalled library dependencies, so finalize right now. + $show "$link_command" + $run eval "$link_command" + status=$? + + # Delete the generated files. + if test -n "$dlsyms"; then + $show "$rm $output_objdir/${outputname}S.${objext}" + $run $rm "$output_objdir/${outputname}S.${objext}" + fi + + exit $status + fi + + if test -n "$shlibpath_var"; then + # We should set the shlibpath_var + rpath= + for dir in $temp_rpath; do + case "$dir" in + [\\/]* | [A-Za-z]:[\\/]*) + # Absolute path. + rpath="$rpath$dir:" + ;; + *) + # Relative path: add a thisdir entry. + rpath="$rpath\$thisdir/$dir:" + ;; + esac + done + temp_rpath="$rpath" + fi + + if test -n "$compile_shlibpath$finalize_shlibpath"; then + compile_command="$shlibpath_var=\"$compile_shlibpath$finalize_shlibpath\$$shlibpath_var\" $compile_command" + fi + if test -n "$finalize_shlibpath"; then + finalize_command="$shlibpath_var=\"$finalize_shlibpath\$$shlibpath_var\" $finalize_command" + fi + + compile_var= + finalize_var= + if test -n "$runpath_var"; then + if test -n "$perm_rpath"; then + # We should set the runpath_var. + rpath= + for dir in $perm_rpath; do + rpath="$rpath$dir:" + done + compile_var="$runpath_var=\"$rpath\$$runpath_var\" " + fi + if test -n "$finalize_perm_rpath"; then + # We should set the runpath_var. + rpath= + for dir in $finalize_perm_rpath; do + rpath="$rpath$dir:" + done + finalize_var="$runpath_var=\"$rpath\$$runpath_var\" " + fi + fi + + if test "$no_install" = yes; then + # We don't need to create a wrapper script. + link_command="$compile_var$compile_command$compile_rpath" + # Replace the output file specification. + link_command=`$echo "X$link_command" | $Xsed -e 's%@OUTPUT@%'"$output"'%g'` + # Delete the old output file. + $run $rm $output + # Link the executable and exit + $show "$link_command" + $run eval "$link_command" || exit $? + exit 0 + fi + + if test "$hardcode_action" = relink || test "$hardcode_into_libs" = all; then + # Fast installation is not supported + link_command="$compile_var$compile_command$compile_rpath" + relink_command="$finalize_var$finalize_command$finalize_rpath" + + $echo "$modename: warning: this platform does not like uninstalled shared libraries" 1>&2 + $echo "$modename: \`$output' will be relinked during installation" 1>&2 + else + if test "$fast_install" != no; then + link_command="$finalize_var$compile_command$finalize_rpath" + if test "$fast_install" = yes; then + relink_command=`$echo "X$compile_var$compile_command$compile_rpath" | $Xsed -e 's%@OUTPUT@%\$progdir/\$file%g'` + else + # fast_install is set to needless + relink_command= + fi + else + link_command="$compile_var$compile_command$compile_rpath" + relink_command="$finalize_var$finalize_command$finalize_rpath" + fi + fi + + # Replace the output file specification. + link_command=`$echo "X$link_command" | $Xsed -e 's%@OUTPUT@%'"$output_objdir/$outputname"'%g'` + + # Delete the old output files. + $run $rm $output $output_objdir/$outputname $output_objdir/lt-$outputname + + $show "$link_command" + $run eval "$link_command" || exit $? + + # Now create the wrapper script. + $show "creating $output" + + # Quote the relink command for shipping. + if test -n "$relink_command"; then + relink_command="cd `pwd`; $relink_command" + relink_command=`$echo "X$relink_command" | $Xsed -e "$sed_quote_subst"` + fi + + # Quote $echo for shipping. + if test "X$echo" = "X$SHELL $0 --fallback-echo"; then + case "$0" in + [\\/]* | [A-Za-z]:[\\/]*) qecho="$SHELL $0 --fallback-echo";; + *) qecho="$SHELL `pwd`/$0 --fallback-echo";; + esac + qecho=`$echo "X$qecho" | $Xsed -e "$sed_quote_subst"` + else + qecho=`$echo "X$echo" | $Xsed -e "$sed_quote_subst"` + fi + + # Only actually do things if our run command is non-null. + if test -z "$run"; then + # win32 will think the script is a binary if it has + # a .exe suffix, so we strip it off here. + case $output in + *.exe) output=`echo $output|sed 's,.exe$,,'` ;; + esac + $rm $output + trap "$rm $output; exit 1" 1 2 15 + + $echo > $output "\ +#! $SHELL + +# $output - temporary wrapper script for $objdir/$outputname +# Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP +# +# The $output program cannot be directly executed until all the libtool +# libraries that it depends on are installed. +# +# This wrapper script should never be moved out of the build directory. +# If it is, it will not operate correctly. + +# Sed substitution that helps us do robust quoting. It backslashifies +# metacharacters that are still active within double-quoted strings. +Xsed='sed -e 1s/^X//' +sed_quote_subst='$sed_quote_subst' + +# The HP-UX ksh and POSIX shell print the target directory to stdout +# if CDPATH is set. +if test \"\${CDPATH+set}\" = set; then CDPATH=:; export CDPATH; fi + +relink_command=\"$relink_command\" + +# This environment variable determines our operation mode. +if test \"\$libtool_install_magic\" = \"$magic\"; then + # install mode needs the following variable: + link_against_libtool_libs='$link_against_libtool_libs' +else + # When we are sourced in execute mode, \$file and \$echo are already set. + if test \"\$libtool_execute_magic\" != \"$magic\"; then + echo=\"$qecho\" + file=\"\$0\" + # Make sure echo works. + if test \"X\$1\" = X--no-reexec; then + # Discard the --no-reexec flag, and continue. + shift + elif test \"X\`(\$echo '\t') 2>/dev/null\`\" = 'X\t'; then + # Yippee, \$echo works! + : + else + # Restart under the correct shell, and then maybe \$echo will work. + exec $SHELL \"\$0\" --no-reexec \${1+\"\$@\"} + fi + fi\ +" + $echo >> $output "\ + + # Find the directory that this script lives in. + thisdir=\`\$echo \"X\$file\" | \$Xsed -e 's%/[^/]*$%%'\` + test \"x\$thisdir\" = \"x\$file\" && thisdir=. + + # Follow symbolic links until we get to the real thisdir. + file=\`ls -ld \"\$file\" | sed -n 's/.*-> //p'\` + while test -n \"\$file\"; do + destdir=\`\$echo \"X\$file\" | \$Xsed -e 's%/[^/]*\$%%'\` + + # If there was a directory component, then change thisdir. + if test \"x\$destdir\" != \"x\$file\"; then + case \"\$destdir\" in + [\\/]* | [A-Za-z]:[\\/]*) thisdir=\"\$destdir\" ;; + *) thisdir=\"\$thisdir/\$destdir\" ;; + esac + fi + + file=\`\$echo \"X\$file\" | \$Xsed -e 's%^.*/%%'\` + file=\`ls -ld \"\$thisdir/\$file\" | sed -n 's/.*-> //p'\` + done + + # Try to get the absolute directory name. + absdir=\`cd \"\$thisdir\" && pwd\` + test -n \"\$absdir\" && thisdir=\"\$absdir\" +" + + if test "$fast_install" = yes; then + echo >> $output "\ + program=lt-'$outputname' + progdir=\"\$thisdir/$objdir\" + + if test ! -f \"\$progdir/\$program\" || \\ + { file=\`ls -1dt \"\$progdir/\$program\" \"\$progdir/../\$program\" 2>/dev/null | sed 1q\`; \\ + test \"X\$file\" != \"X\$progdir/\$program\"; }; then + + file=\"\$\$-\$program\" + + if test ! -d \"\$progdir\"; then + $mkdir \"\$progdir\" + else + $rm \"\$progdir/\$file\" + fi" + + echo >> $output "\ + + # relink executable if necessary + if test -n \"\$relink_command\"; then + if (eval \$relink_command); then : + else + $rm \"\$progdir/\$file\" + exit 1 + fi + fi + + $mv \"\$progdir/\$file\" \"\$progdir/\$program\" 2>/dev/null || + { $rm \"\$progdir/\$program\"; + $mv \"\$progdir/\$file\" \"\$progdir/\$program\"; } + $rm \"\$progdir/\$file\" + fi" + else + echo >> $output "\ + program='$outputname' + progdir=\"\$thisdir/$objdir\" +" + fi + + echo >> $output "\ + + if test -f \"\$progdir/\$program\"; then" + + # Export our shlibpath_var if we have one. + if test "$shlibpath_overrides_runpath" = yes && test -n "$shlibpath_var" && test -n "$temp_rpath"; then + $echo >> $output "\ + # Add our own library path to $shlibpath_var + $shlibpath_var=\"$temp_rpath\$$shlibpath_var\" + + # Some systems cannot cope with colon-terminated $shlibpath_var + # The second colon is a workaround for a bug in BeOS R4 sed + $shlibpath_var=\`\$echo \"X\$$shlibpath_var\" | \$Xsed -e 's/::*\$//'\` + + export $shlibpath_var +" + fi + + # fixup the dll searchpath if we need to. + if test -n "$dllsearchpath"; then + $echo >> $output "\ + # Add the dll search path components to the executable PATH + PATH=$dllsearchpath:\$PATH +" + fi + + $echo >> $output "\ + if test \"\$libtool_execute_magic\" != \"$magic\"; then + # Run the actual program with our arguments. +" + case $host in + *-*-cygwin* | *-*-mingw | *-*-os2*) + # win32 systems need to use the prog path for dll + # lookup to work + $echo >> $output "\ + exec \$progdir\\\\\$program \${1+\"\$@\"} +" + ;; + *) + $echo >> $output "\ + # Export the path to the program. + PATH=\"\$progdir:\$PATH\" + export PATH + + exec \$program \${1+\"\$@\"} +" + ;; + esac + $echo >> $output "\ + \$echo \"\$0: cannot exec \$program \${1+\"\$@\"}\" + exit 1 + fi + else + # The program doesn't exist. + \$echo \"\$0: error: \$progdir/\$program does not exist\" 1>&2 + \$echo \"This script is just a wrapper for \$program.\" 1>&2 + echo \"See the $PACKAGE documentation for more information.\" 1>&2 + exit 1 + fi +fi\ +" + chmod +x $output + fi + exit 0 + ;; + esac + + # See if we need to build an old-fashioned archive. + for oldlib in $oldlibs; do + + if test "$build_libtool_libs" = convenience; then + oldobjs="$libobjs_save" + addlibs="$convenience" + build_libtool_libs=no + else + if test "$build_libtool_libs" = module; then + oldobjs="$libobjs_save" + build_libtool_libs=no + else + oldobjs="$objs$old_deplibs "`$echo "X$libobjs_save" | $SP2NL | $Xsed -e '/\.'${libext}'$/d' -e '/\.lib$/d' -e "$lo2o" | $NL2SP` + fi + addlibs="$old_convenience" + fi + + if test -n "$addlibs"; then + gentop="$output_objdir/${outputname}x" + $show "${rm}r $gentop" + $run ${rm}r "$gentop" + $show "mkdir $gentop" + $run mkdir "$gentop" + status=$? + if test $status -ne 0 && test ! -d "$gentop"; then + exit $status + fi + generated="$generated $gentop" + + # Add in members from convenience archives. + for xlib in $addlibs; do + # Extract the objects. + case "$xlib" in + [\\/]* | [A-Za-z]:[\\/]*) xabs="$xlib" ;; + *) xabs=`pwd`"/$xlib" ;; + esac + xlib=`$echo "X$xlib" | $Xsed -e 's%^.*/%%'` + xdir="$gentop/$xlib" + + $show "${rm}r $xdir" + $run ${rm}r "$xdir" + $show "mkdir $xdir" + $run mkdir "$xdir" + status=$? + if test $status -ne 0 && test ! -d "$xdir"; then + exit $status + fi + $show "(cd $xdir && $AR x $xabs)" + $run eval "(cd \$xdir && $AR x \$xabs)" || exit $? + + oldobjs="$oldobjs "`find $xdir -name \*.${objext} -print -o -name \*.lo -print | $NL2SP` + done + fi + + # Do each command in the archive commands. + if test -n "$old_archive_from_new_cmds" && test "$build_libtool_libs" = yes; then + eval cmds=\"$old_archive_from_new_cmds\" + else + # Ensure that we have .o objects in place in case we decided + # not to build a shared library, and have fallen back to building + # static libs even though --disable-static was passed! + for oldobj in $oldobjs; do + if test ! -f $oldobj; then + xdir=`$echo "X$oldobj" | $Xsed -e 's%/[^/]*$%%'` + if test "X$xdir" = "X$oldobj"; then + xdir="." + else + xdir="$xdir" + fi + baseobj=`$echo "X$oldobj" | $Xsed -e 's%^.*/%%'` + obj=`$echo "X$baseobj" | $Xsed -e "$o2lo"` + $show "(cd $xdir && ${LN_S} $obj $baseobj)" + $run eval '(cd $xdir && ${LN_S} $obj $baseobj)' || exit $? + fi + done + + eval cmds=\"$old_archive_cmds\" + fi + IFS="${IFS= }"; save_ifs="$IFS"; IFS='~' + for cmd in $cmds; do + IFS="$save_ifs" + $show "$cmd" + $run eval "$cmd" || exit $? + done + IFS="$save_ifs" + done + + if test -n "$generated"; then + $show "${rm}r$generated" + $run ${rm}r$generated + fi + + # Now create the libtool archive. + case "$output" in + *.la) + old_library= + test "$build_old_libs" = yes && old_library="$libname.$libext" + $show "creating $output" + + # Quote the link command for shipping. + relink_command="cd `pwd`; $SHELL $0 --mode=relink $libtool_args" + relink_command=`$echo "X$relink_command" | $Xsed -e "$sed_quote_subst"` + + # Only create the output if not a dry run. + if test -z "$run"; then + for installed in no yes; do + if test "$installed" = yes; then + if test -z "$install_libdir"; then + break + fi + output="$output_objdir/$outputname"i + # Replace all uninstalled libtool libraries with the installed ones + newdependency_libs= + for deplib in $dependency_libs; do + case "$deplib" in + *.la) + name=`$echo "X$deplib" | $Xsed -e 's%^.*/%%'` + eval libdir=`sed -n -e 's/^libdir=\(.*\)$/\1/p' $deplib` + if test -z "$libdir"; then + $echo "$modename: \`$deplib' is not a valid libtool archive" 1>&2 + exit 1 + fi + newdependency_libs="$newdependency_libs $libdir/$name" + ;; + *) newdependency_libs="$newdependency_libs $deplib" ;; + esac + done + dependency_libs="$newdependency_libs" + newdlfiles= + for lib in $dlfiles; do + name=`$echo "X$lib" | $Xsed -e 's%^.*/%%'` + eval libdir=`sed -n -e 's/^libdir=\(.*\)$/\1/p' $lib` + if test -z "$libdir"; then + $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2 + exit 1 + fi + newdlfiles="$newdlfiles $libdir/$name" + done + dlfiles="$newdlfiles" + newdlprefiles= + for lib in $dlprefiles; do + name=`$echo "X$lib" | $Xsed -e 's%^.*/%%'` + eval libdir=`sed -n -e 's/^libdir=\(.*\)$/\1/p' $lib` + if test -z "$libdir"; then + $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2 + exit 1 + fi + newdlprefiles="$newdlprefiles $libdir/$name" + done + dlprefiles="$newdlprefiles" + fi + $rm $output + $echo > $output "\ +# $outputname - a libtool library file +# Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP +# +# Please DO NOT delete this file! +# It is necessary for linking the library. + +# The name that we can dlopen(3). +dlname='$dlname' + +# Names of this library. +library_names='$library_names' + +# The name of the static archive. +old_library='$old_library' + +# Libraries that this one depends upon. +dependency_libs='$dependency_libs' + +# Version information for $libname. +current=$current +age=$age +revision=$revision + +# Is this an already installed library? +installed=$installed + +# Files to dlopen/dlpreopen +dlopen='$dlfiles' +dlpreopen='$dlprefiles' + +# Directory that this library needs to be installed in: +libdir='$install_libdir'" + if test "$installed" = no; then + $echo >> $output "\ +relink_command=\"$relink_command\"" + fi + done + fi + + # Do a symbolic link so that the libtool archive can be found in + # LD_LIBRARY_PATH before the program is installed. + $show "(cd $output_objdir && $rm $outputname && $LN_S ../$outputname $outputname)" + $run eval '(cd $output_objdir && $rm $outputname && $LN_S ../$outputname $outputname)' || exit $? + ;; + esac + exit 0 + ;; + + # libtool install mode + install) + modename="$modename: install" + + # There may be an optional sh(1) argument at the beginning of + # install_prog (especially on Windows NT). + if test "$nonopt" = "$SHELL" || test "$nonopt" = /bin/sh; then + # Aesthetically quote it. + arg=`$echo "X$nonopt" | $Xsed -e "$sed_quote_subst"` + case "$arg" in + *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*) + arg="\"$arg\"" + ;; + esac + install_prog="$arg " + arg="$1" + shift + else + install_prog= + arg="$nonopt" + fi + + # The real first argument should be the name of the installation program. + # Aesthetically quote it. + arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"` + case "$arg" in + *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*) + arg="\"$arg\"" + ;; + esac + install_prog="$install_prog$arg" + + # We need to accept at least all the BSD install flags. + dest= + files= + opts= + prev= + install_type= + isdir=no + stripme= + for arg + do + if test -n "$dest"; then + files="$files $dest" + dest="$arg" + continue + fi + + case "$arg" in + -d) isdir=yes ;; + -f) prev="-f" ;; + -g) prev="-g" ;; + -m) prev="-m" ;; + -o) prev="-o" ;; + -s) + stripme=" -s" + continue + ;; + -*) ;; + + *) + # If the previous option needed an argument, then skip it. + if test -n "$prev"; then + prev= + else + dest="$arg" + continue + fi + ;; + esac + + # Aesthetically quote the argument. + arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"` + case "$arg" in + *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*) + arg="\"$arg\"" + ;; + esac + install_prog="$install_prog $arg" + done + + if test -z "$install_prog"; then + $echo "$modename: you must specify an install program" 1>&2 + $echo "$help" 1>&2 + exit 1 + fi + + if test -n "$prev"; then + $echo "$modename: the \`$prev' option requires an argument" 1>&2 + $echo "$help" 1>&2 + exit 1 + fi + + if test -z "$files"; then + if test -z "$dest"; then + $echo "$modename: no file or destination specified" 1>&2 + else + $echo "$modename: you must specify a destination" 1>&2 + fi + $echo "$help" 1>&2 + exit 1 + fi + + # Strip any trailing slash from the destination. + dest=`$echo "X$dest" | $Xsed -e 's%/$%%'` + + # Check to see that the destination is a directory. + test -d "$dest" && isdir=yes + if test "$isdir" = yes; then + destdir="$dest" + destname= + else + destdir=`$echo "X$dest" | $Xsed -e 's%/[^/]*$%%'` + test "X$destdir" = "X$dest" && destdir=. + destname=`$echo "X$dest" | $Xsed -e 's%^.*/%%'` + + # Not a directory, so check to see that there is only one file specified. + set dummy $files + if test $# -gt 2; then + $echo "$modename: \`$dest' is not a directory" 1>&2 + $echo "$help" 1>&2 + exit 1 + fi + fi + case "$destdir" in + [\\/]* | [A-Za-z]:[\\/]*) ;; + *) + for file in $files; do + case "$file" in + *.lo) ;; + *) + $echo "$modename: \`$destdir' must be an absolute directory name" 1>&2 + $echo "$help" 1>&2 + exit 1 + ;; + esac + done + ;; + esac + + # This variable tells wrapper scripts just to set variables rather + # than running their programs. + libtool_install_magic="$magic" + + staticlibs= + future_libdirs= + current_libdirs= + for file in $files; do + + # Do each installation. + case "$file" in + *.$libext) + # Do the static libraries later. + staticlibs="$staticlibs $file" + ;; + + *.la) + # Check to see that this really is a libtool archive. + if (sed -e '2q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then : + else + $echo "$modename: \`$file' is not a valid libtool archive" 1>&2 + $echo "$help" 1>&2 + exit 1 + fi + + library_names= + old_library= + relink_command= + # If there is no directory component, then add one. + case "$file" in + */* | *\\*) . $file ;; + *) . ./$file ;; + esac + + # Add the libdir to current_libdirs if it is the destination. + if test "X$destdir" = "X$libdir"; then + case "$current_libdirs " in + *" $libdir "*) ;; + *) current_libdirs="$current_libdirs $libdir" ;; + esac + else + # Note the libdir as a future libdir. + case "$future_libdirs " in + *" $libdir "*) ;; + *) future_libdirs="$future_libdirs $libdir" ;; + esac + fi + + dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'`/ + test "X$dir" = "X$file/" && dir= + dir="$dir$objdir" + + if test "$hardcode_into_libs" = all; then + if test -z "$relink_command"; then + $echo "$modename: invalid libtool pseudo library \`$file'" 1>&2 + exit 1 + fi + $echo "$modename: warning: relinking \`$file'" 1>&2 + $show "$relink_command" + if $run eval "$relink_command"; then : + else + $echo "$modename: error: relink \`$file' with the above command before installing it" 1>&2 + continue + fi + fi + + # See the names of the shared library. + set dummy $library_names + if test -n "$2"; then + realname="$2" + shift + shift + + srcname="$realname" + test "$hardcode_into_libs" = all && srcname="$realname"T + + # Install the shared library and build the symlinks. + $show "$install_prog $dir/$srcname $destdir/$realname" + $run eval "$install_prog $dir/$srcname $destdir/$realname" || exit $? + if test -n "$stripme" && test -n "$striplib"; then + $show "$striplib $destdir/$realname" + $run eval "$striplib $destdir/$realname" || exit $? + fi + + if test $# -gt 0; then + # Delete the old symlinks, and create new ones. + for linkname + do + if test "$linkname" != "$realname"; then + $show "(cd $destdir && $rm $linkname && $LN_S $realname $linkname)" + $run eval "(cd $destdir && $rm $linkname && $LN_S $realname $linkname)" + fi + done + fi + + # Do each command in the postinstall commands. + lib="$destdir/$realname" + eval cmds=\"$postinstall_cmds\" + IFS="${IFS= }"; save_ifs="$IFS"; IFS='~' + for cmd in $cmds; do + IFS="$save_ifs" + $show "$cmd" + $run eval "$cmd" || exit $? + done + IFS="$save_ifs" + fi + + # Install the pseudo-library for information purposes. + name=`$echo "X$file" | $Xsed -e 's%^.*/%%'` + instname="$dir/$name"i + $show "$install_prog $instname $destdir/$name" + $run eval "$install_prog $instname $destdir/$name" || exit $? + + # Maybe install the static library, too. + test -n "$old_library" && staticlibs="$staticlibs $dir/$old_library" + ;; + + *.lo) + # Install (i.e. copy) a libtool object. + + # Figure out destination file name, if it wasn't already specified. + if test -n "$destname"; then + destfile="$destdir/$destname" + else + destfile=`$echo "X$file" | $Xsed -e 's%^.*/%%'` + destfile="$destdir/$destfile" + fi + + # Deduce the name of the destination old-style object file. + case "$destfile" in + *.lo) + staticdest=`$echo "X$destfile" | $Xsed -e "$lo2o"` + ;; + *.$objext) + staticdest="$destfile" + destfile= + ;; + *) + $echo "$modename: cannot copy a libtool object to \`$destfile'" 1>&2 + $echo "$help" 1>&2 + exit 1 + ;; + esac + + # Install the libtool object if requested. + if test -n "$destfile"; then + $show "$install_prog $file $destfile" + $run eval "$install_prog $file $destfile" || exit $? + fi + + # Install the old object if enabled. + if test "$build_old_libs" = yes; then + # Deduce the name of the old-style object file. + staticobj=`$echo "X$file" | $Xsed -e "$lo2o"` + + $show "$install_prog $staticobj $staticdest" + $run eval "$install_prog \$staticobj \$staticdest" || exit $? + fi + exit 0 + ;; + + *) + # Figure out destination file name, if it wasn't already specified. + if test -n "$destname"; then + destfile="$destdir/$destname" + else + destfile=`$echo "X$file" | $Xsed -e 's%^.*/%%'` + destfile="$destdir/$destfile" + fi + + # Do a test to see if this is really a libtool program. + if (sed -e '4q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then + link_against_libtool_libs= + relink_command= + + # If there is no directory component, then add one. + case "$file" in + */* | *\\*) . $file ;; + *) . ./$file ;; + esac + + # Check the variables that should have been set. + if test -z "$link_against_libtool_libs"; then + $echo "$modename: invalid libtool wrapper script \`$file'" 1>&2 + exit 1 + fi + + finalize=yes + for lib in $link_against_libtool_libs; do + # Check to see that each library is installed. + libdir= + if test -f "$lib"; then + # If there is no directory component, then add one. + case "$lib" in + */* | *\\*) . $lib ;; + *) . ./$lib ;; + esac + fi + libfile="$libdir/"`$echo "X$lib" | $Xsed -e 's%^.*/%%g'` ### testsuite: skip nested quoting test + if test -n "$libdir" && test ! -f "$libfile"; then + $echo "$modename: warning: \`$lib' has not been installed in \`$libdir'" 1>&2 + finalize=no + fi + done + + relink_command= + # If there is no directory component, then add one. + case "$file" in + */* | *\\*) . $file ;; + *) . ./$file ;; + esac + + outputname= + if test "$fast_install" = no && test -n "$relink_command"; then + if test "$finalize" = yes && test -z "$run"; then + tmpdir="/tmp" + test -n "$TMPDIR" && tmpdir="$TMPDIR" + tmpdir="$tmpdir/libtool-$$" + if $mkdir -p "$tmpdir" && chmod 700 "$tmpdir"; then : + else + $echo "$modename: error: cannot create temporary directory \`$tmpdir'" 1>&2 + continue + fi + outputname="$tmpdir/$file" + # Replace the output file specification. + relink_command=`$echo "X$relink_command" | $Xsed -e 's%@OUTPUT@%'"$outputname"'%g'` + + $show "$relink_command" + if $run eval "$relink_command"; then : + else + $echo "$modename: error: relink \`$file' with the above command before installing it" 1>&2 + ${rm}r "$tmpdir" + continue + fi + file="$outputname" + else + $echo "$modename: warning: cannot relink \`$file'" 1>&2 + fi + else + # Install the binary that we compiled earlier. + file=`$echo "X$file" | $Xsed -e "s%\([^/]*\)$%$objdir/\1%"` + fi + fi + + $show "$install_prog$stripme $file $destfile" + $run eval "$install_prog\$stripme \$file \$destfile" || exit $? + test -n "$outputname" && ${rm}r "$tmpdir" + ;; + esac + done + + for file in $staticlibs; do + name=`$echo "X$file" | $Xsed -e 's%^.*/%%'` + + # Set up the ranlib parameters. + oldlib="$destdir/$name" + + $show "$install_prog $file $oldlib" + $run eval "$install_prog \$file \$oldlib" || exit $? + + if test -n "$stripme" && test -n "$striplib"; then + $show "$old_striplib $oldlib" + $run eval "$old_striplib $oldlib" || exit $? + fi + + # Do each command in the postinstall commands. + eval cmds=\"$old_postinstall_cmds\" + IFS="${IFS= }"; save_ifs="$IFS"; IFS='~' + for cmd in $cmds; do + IFS="$save_ifs" + $show "$cmd" + $run eval "$cmd" || exit $? + done + IFS="$save_ifs" + done + + if test -n "$future_libdirs"; then + $echo "$modename: warning: remember to run \`$progname --finish$future_libdirs'" 1>&2 + fi + + if test -n "$current_libdirs"; then + # Maybe just do a dry run. + test -n "$run" && current_libdirs=" -n$current_libdirs" + exec $SHELL $0 --finish$current_libdirs + exit 1 + fi + + exit 0 + ;; + + # libtool finish mode + finish) + modename="$modename: finish" + libdirs="$nonopt" + admincmds= + + if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then + for dir + do + libdirs="$libdirs $dir" + done + + for libdir in $libdirs; do + if test -n "$finish_cmds"; then + # Do each command in the finish commands. + eval cmds=\"$finish_cmds\" + IFS="${IFS= }"; save_ifs="$IFS"; IFS='~' + for cmd in $cmds; do + IFS="$save_ifs" + $show "$cmd" + $run eval "$cmd" || admincmds="$admincmds + $cmd" + done + IFS="$save_ifs" + fi + if test -n "$finish_eval"; then + # Do the single finish_eval. + eval cmds=\"$finish_eval\" + $run eval "$cmds" || admincmds="$admincmds + $cmds" + fi + done + fi + + # Exit here if they wanted silent mode. + test "$show" = : && exit 0 + + echo "----------------------------------------------------------------------" + echo "Libraries have been installed in:" + for libdir in $libdirs; do + echo " $libdir" + done + echo + echo "If you ever happen to want to link against installed libraries" + echo "in a given directory, LIBDIR, you must either use libtool, and" + echo "specify the full pathname of the library, or use \`-LLIBDIR'" + echo "flag during linking and do at least one of the following:" + if test -n "$shlibpath_var"; then + echo " - add LIBDIR to the \`$shlibpath_var' environment variable" + echo " during execution" + fi + if test -n "$runpath_var"; then + echo " - add LIBDIR to the \`$runpath_var' environment variable" + echo " during linking" + fi + if test -n "$hardcode_libdir_flag_spec"; then + libdir=LIBDIR + eval flag=\"$hardcode_libdir_flag_spec\" + + echo " - use the \`$flag' linker flag" + fi + if test -n "$admincmds"; then + echo " - have your system administrator run these commands:$admincmds" + fi + if test -f /etc/ld.so.conf; then + echo " - have your system administrator add LIBDIR to \`/etc/ld.so.conf'" + fi + echo + echo "See any operating system documentation about shared libraries for" + echo "more information, such as the ld(1) and ld.so(8) manual pages." + echo "----------------------------------------------------------------------" + exit 0 + ;; + + # libtool execute mode + execute) + modename="$modename: execute" + + # The first argument is the command name. + cmd="$nonopt" + if test -z "$cmd"; then + $echo "$modename: you must specify a COMMAND" 1>&2 + $echo "$help" + exit 1 + fi + + # Handle -dlopen flags immediately. + for file in $execute_dlfiles; do + if test ! -f "$file"; then + $echo "$modename: \`$file' is not a file" 1>&2 + $echo "$help" 1>&2 + exit 1 + fi + + dir= + case "$file" in + *.la) + # Check to see that this really is a libtool archive. + if (sed -e '2q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then : + else + $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2 + $echo "$help" 1>&2 + exit 1 + fi + + # Read the libtool library. + dlname= + library_names= + + # If there is no directory component, then add one. + case "$file" in + */* | *\\*) . $file ;; + *) . ./$file ;; + esac + + # Skip this library if it cannot be dlopened. + if test -z "$dlname"; then + # Warn if it was a shared library. + test -n "$library_names" && $echo "$modename: warning: \`$file' was not linked with \`-export-dynamic'" + continue + fi + + dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'` + test "X$dir" = "X$file" && dir=. + + if test -f "$dir/$objdir/$dlname"; then + dir="$dir/$objdir" + else + $echo "$modename: cannot find \`$dlname' in \`$dir' or \`$dir/$objdir'" 1>&2 + exit 1 + fi + ;; + + *.lo) + # Just add the directory containing the .lo file. + dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'` + test "X$dir" = "X$file" && dir=. + ;; + + *) + $echo "$modename: warning \`-dlopen' is ignored for non-libtool libraries and objects" 1>&2 + continue + ;; + esac + + # Get the absolute pathname. + absdir=`cd "$dir" && pwd` + test -n "$absdir" && dir="$absdir" + + # Now add the directory to shlibpath_var. + if eval "test -z \"\$$shlibpath_var\""; then + eval "$shlibpath_var=\"\$dir\"" + else + eval "$shlibpath_var=\"\$dir:\$$shlibpath_var\"" + fi + done + + # This variable tells wrapper scripts just to set shlibpath_var + # rather than running their programs. + libtool_execute_magic="$magic" + + # Check if any of the arguments is a wrapper script. + args= + for file + do + case "$file" in + -*) ;; + *) + # Do a test to see if this is really a libtool program. + if (sed -e '4q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then + # If there is no directory component, then add one. + case "$file" in + */* | *\\*) . $file ;; + *) . ./$file ;; + esac + + # Transform arg to wrapped name. + file="$progdir/$program" + fi + ;; + esac + # Quote arguments (to preserve shell metacharacters). + file=`$echo "X$file" | $Xsed -e "$sed_quote_subst"` + args="$args \"$file\"" + done + + if test -z "$run"; then + if test -n "$shlibpath_var"; then + # Export the shlibpath_var. + eval "export $shlibpath_var" + fi + + # Restore saved enviroment variables + if test "${save_LC_ALL+set}" = set; then + LC_ALL="$save_LC_ALL"; export LC_ALL + fi + if test "${save_LANG+set}" = set; then + LANG="$save_LANG"; export LANG + fi + + # Now actually exec the command. + eval "exec \$cmd$args" + + $echo "$modename: cannot exec \$cmd$args" + exit 1 + else + # Display what would be done. + if test -n "$shlibpath_var"; then + eval "\$echo \"\$shlibpath_var=\$$shlibpath_var\"" + $echo "export $shlibpath_var" + fi + $echo "$cmd$args" + exit 0 + fi + ;; + + # libtool clean and uninstall mode + clean | uninstall) + modename="$modename: $mode" + rm="$nonopt" + files= + + # This variable tells wrapper scripts just to set variables rather + # than running their programs. + libtool_install_magic="$magic" + + for arg + do + case "$arg" in + -*) rm="$rm $arg" ;; + *) files="$files $arg" ;; + esac + done + + if test -z "$rm"; then + $echo "$modename: you must specify an RM program" 1>&2 + $echo "$help" 1>&2 + exit 1 + fi + + for file in $files; do + dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'` + if test "X$dir" = "X$file"; then + dir=. + objdir="$objdir" + else + objdir="$dir/$objdir" + fi + name=`$echo "X$file" | $Xsed -e 's%^.*/%%'` + test $mode = uninstall && objdir="$dir" + + rmfiles="$file" + + case "$name" in + *.la) + # Possibly a libtool archive, so verify it. + if (sed -e '2q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then + . $dir/$name + + # Delete the libtool libraries and symlinks. + for n in $library_names; do + rmfiles="$rmfiles $objdir/$n" + done + test -n "$old_library" && rmfiles="$rmfiles $objdir/$old_library" + test $mode = clean && rmfiles="$rmfiles $objdir/$name $objdir/${name}i" + + if test $mode = uninstall; then + if test -n "$library_names"; then + # Do each command in the postuninstall commands. + eval cmds=\"$postuninstall_cmds\" + IFS="${IFS= }"; save_ifs="$IFS"; IFS='~' + for cmd in $cmds; do + IFS="$save_ifs" + $show "$cmd" + $run eval "$cmd" + done + IFS="$save_ifs" + fi + + if test -n "$old_library"; then + # Do each command in the old_postuninstall commands. + eval cmds=\"$old_postuninstall_cmds\" + IFS="${IFS= }"; save_ifs="$IFS"; IFS='~' + for cmd in $cmds; do + IFS="$save_ifs" + $show "$cmd" + $run eval "$cmd" + done + IFS="$save_ifs" + fi + # FIXME: should reinstall the best remaining shared library. + fi + fi + ;; + + *.lo) + if test "$build_old_libs" = yes; then + oldobj=`$echo "X$name" | $Xsed -e "$lo2o"` + rmfiles="$rmfiles $dir/$oldobj" + fi + ;; + + *) + # Do a test to see if this is a libtool program. + if test $mode = clean && + (sed -e '4q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then + relink_command= + . $dir/$file + + rmfiles="$rmfiles $objdir/$name $objdir/${name}S.${objext}" + if test "$fast_install" = yes && test -n "$relink_command"; then + rmfiles="$rmfiles $objdir/lt-$name" + fi + fi + ;; + esac + $show "$rm $rmfiles" + $run $rm $rmfiles + done + exit 0 + ;; + + "") + $echo "$modename: you must specify a MODE" 1>&2 + $echo "$generic_help" 1>&2 + exit 1 + ;; + esac + + $echo "$modename: invalid operation mode \`$mode'" 1>&2 + $echo "$generic_help" 1>&2 + exit 1 +fi # test -z "$show_help" + +# We need to display help for each of the modes. +case "$mode" in +"") $echo \ +"Usage: $modename [OPTION]... [MODE-ARG]... + +Provide generalized library-building support services. + + --config show all configuration variables + --debug enable verbose shell tracing +-n, --dry-run display commands without modifying any files + --features display basic configuration information and exit + --finish same as \`--mode=finish' + --help display this help message and exit + --mode=MODE use operation mode MODE [default=inferred from MODE-ARGS] + --quiet same as \`--silent' + --silent don't print informational messages + --version print version information + +MODE must be one of the following: + + clean remove files from the build directory + compile compile a source file into a libtool object + execute automatically set library path, then run a program + finish complete the installation of libtool libraries + install install libraries or executables + link create a library or an executable + uninstall remove libraries from an installed directory + +MODE-ARGS vary depending on the MODE. Try \`$modename --help --mode=MODE' for +a more detailed description of MODE." + exit 0 + ;; + +clean) + $echo \ +"Usage: $modename [OPTION]... --mode=clean RM [RM-OPTION]... FILE... + +Remove files from the build directory. + +RM is the name of the program to use to delete files associated with each FILE +(typically \`/bin/rm'). RM-OPTIONS are options (such as \`-f') to be passed +to RM. + +If FILE is a libtool library, object or program, all the files associated +with it are deleted. Otherwise, only FILE itself is deleted using RM." + ;; + +compile) + $echo \ +"Usage: $modename [OPTION]... --mode=compile COMPILE-COMMAND... SOURCEFILE + +Compile a source file into a libtool library object. + +This mode accepts the following additional options: + + -o OUTPUT-FILE set the output file name to OUTPUT-FILE + -static always build a \`.o' file suitable for static linking + +COMPILE-COMMAND is a command to be used in creating a \`standard' object file +from the given SOURCEFILE. + +The output file name is determined by removing the directory component from +SOURCEFILE, then substituting the C source code suffix \`.c' with the +library object suffix, \`.lo'." + ;; + +execute) + $echo \ +"Usage: $modename [OPTION]... --mode=execute COMMAND [ARGS]... + +Automatically set library path, then run a program. + +This mode accepts the following additional options: + + -dlopen FILE add the directory containing FILE to the library path + +This mode sets the library path environment variable according to \`-dlopen' +flags. + +If any of the ARGS are libtool executable wrappers, then they are translated +into their corresponding uninstalled binary, and any of their required library +directories are added to the library path. + +Then, COMMAND is executed, with ARGS as arguments." + ;; + +finish) + $echo \ +"Usage: $modename [OPTION]... --mode=finish [LIBDIR]... + +Complete the installation of libtool libraries. + +Each LIBDIR is a directory that contains libtool libraries. + +The commands that this mode executes may require superuser privileges. Use +the \`--dry-run' option if you just want to see what would be executed." + ;; + +install) + $echo \ +"Usage: $modename [OPTION]... --mode=install INSTALL-COMMAND... + +Install executables or libraries. + +INSTALL-COMMAND is the installation command. The first component should be +either the \`install' or \`cp' program. + +The rest of the components are interpreted as arguments to that command (only +BSD-compatible install options are recognized)." + ;; + +link) + $echo \ +"Usage: $modename [OPTION]... --mode=link LINK-COMMAND... + +Link object files or libraries together to form another library, or to +create an executable program. + +LINK-COMMAND is a command using the C compiler that you would use to create +a program from several object files. + +The following components of LINK-COMMAND are treated specially: + + -all-static do not do any dynamic linking at all + -avoid-version do not add a version suffix if possible + -dlopen FILE \`-dlpreopen' FILE if it cannot be dlopened at runtime + -dlpreopen FILE link in FILE and add its symbols to lt_preloaded_symbols + -export-dynamic allow symbols from OUTPUT-FILE to be resolved with dlsym(3) + -export-symbols SYMFILE + try to export only the symbols listed in SYMFILE + -export-symbols-regex REGEX + try to export only the symbols matching REGEX + -LLIBDIR search LIBDIR for required installed libraries + -lNAME OUTPUT-FILE requires the installed library libNAME + -module build a library that can dlopened + -no-fast-install disable the fast-install mode + -no-install link a not-installable executable + -no-undefined declare that a library does not refer to external symbols + -o OUTPUT-FILE create OUTPUT-FILE from the specified objects + -release RELEASE specify package release information + -rpath LIBDIR the created library will eventually be installed in LIBDIR + -R[ ]LIBDIR add LIBDIR to the runtime path of programs and libraries + -static do not do any dynamic linking of libtool libraries + -version-info CURRENT[:REVISION[:AGE]] + specify library version info [each variable defaults to 0] + +All other options (arguments beginning with \`-') are ignored. + +Every other argument is treated as a filename. Files ending in \`.la' are +treated as uninstalled libtool libraries, other files are standard or library +object files. + +If the OUTPUT-FILE ends in \`.la', then a libtool library is created, +only library objects (\`.lo' files) may be specified, and \`-rpath' is +required, except when creating a convenience library. + +If OUTPUT-FILE ends in \`.a' or \`.lib', then a standard library is created +using \`ar' and \`ranlib', or on Windows using \`lib'. + +If OUTPUT-FILE ends in \`.lo' or \`.${objext}', then a reloadable object file +is created, otherwise an executable program is created." + ;; + +uninstall) + $echo \ +"Usage: $modename [OPTION]... --mode=uninstall RM [RM-OPTION]... FILE... + +Remove libraries from an installation directory. + +RM is the name of the program to use to delete files associated with each FILE +(typically \`/bin/rm'). RM-OPTIONS are options (such as \`-f') to be passed +to RM. + +If FILE is a libtool library, all the files associated with it are deleted. +Otherwise, only FILE itself is deleted using RM." + ;; + +*) + $echo "$modename: invalid operation mode \`$mode'" 1>&2 + $echo "$help" 1>&2 + exit 1 + ;; +esac + +echo +$echo "Try \`$modename --help' for more information about other modes." + +exit 0 + +# Local Variables: +# mode:shell-script +# sh-indentation:2 +# End: diff --git a/rts/gmp/mdate-sh b/rts/gmp/mdate-sh new file mode 100644 index 0000000000..37171f21fb --- /dev/null +++ b/rts/gmp/mdate-sh @@ -0,0 +1,92 @@ +#!/bin/sh +# Get modification time of a file or directory and pretty-print it. +# Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc. +# written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, June 1995 +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +# Prevent date giving response in another language. +LANG=C +export LANG +LC_ALL=C +export LC_ALL +LC_TIME=C +export LC_TIME + +# Get the extended ls output of the file or directory. +# On HPUX /bin/sh, "set" interprets "-rw-r--r--" as options, so the "x" below. +if ls -L /dev/null 1>/dev/null 2>&1; then + set - x`ls -L -l -d $1` +else + set - x`ls -l -d $1` +fi +# The month is at least the fourth argument +# (3 shifts here, the next inside the loop). +shift +shift +shift + +# Find the month. Next argument is day, followed by the year or time. +month= +until test $month +do + shift + case $1 in + Jan) month=January; nummonth=1;; + Feb) month=February; nummonth=2;; + Mar) month=March; nummonth=3;; + Apr) month=April; nummonth=4;; + May) month=May; nummonth=5;; + Jun) month=June; nummonth=6;; + Jul) month=July; nummonth=7;; + Aug) month=August; nummonth=8;; + Sep) month=September; nummonth=9;; + Oct) month=October; nummonth=10;; + Nov) month=November; nummonth=11;; + Dec) month=December; nummonth=12;; + esac +done + +day=$2 + +# Here we have to deal with the problem that the ls output gives either +# the time of day or the year. +case $3 in + *:*) set `date`; eval year=\$$# + case $2 in + Jan) nummonthtod=1;; + Feb) nummonthtod=2;; + Mar) nummonthtod=3;; + Apr) nummonthtod=4;; + May) nummonthtod=5;; + Jun) nummonthtod=6;; + Jul) nummonthtod=7;; + Aug) nummonthtod=8;; + Sep) nummonthtod=9;; + Oct) nummonthtod=10;; + Nov) nummonthtod=11;; + Dec) nummonthtod=12;; + esac + # For the first six month of the year the time notation can also + # be used for files modified in the last year. + if (expr $nummonth \> $nummonthtod) > /dev/null; + then + year=`expr $year - 1` + fi;; + *) year=$3;; +esac + +# The result. +echo $day $month $year diff --git a/rts/gmp/memory.c b/rts/gmp/memory.c new file mode 100644 index 0000000000..9df440ce22 --- /dev/null +++ b/rts/gmp/memory.c @@ -0,0 +1,160 @@ +/* Memory allocation routines. + +Copyright (C) 1991, 1993, 1994, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include <stdio.h> +#include <stdlib.h> /* for malloc, realloc, free */ + +#include "gmp.h" +#include "gmp-impl.h" + +#ifdef __NeXT__ +#define static +#endif + + +void * (*_mp_allocate_func) _PROTO ((size_t)) = _mp_default_allocate; +void * (*_mp_reallocate_func) _PROTO ((void *, size_t, size_t)) + = _mp_default_reallocate; +void (*_mp_free_func) _PROTO ((void *, size_t)) = _mp_default_free; + + +/* Default allocation functions. In case of failure to allocate/reallocate + an error message is written to stderr and the program aborts. */ + +void * +#if __STDC__ +_mp_default_allocate (size_t size) +#else +_mp_default_allocate (size) + size_t size; +#endif +{ + void *ret; +#ifdef DEBUG + size_t req_size = size; + size += 2 * BYTES_PER_MP_LIMB; +#endif + ret = malloc (size); + if (ret == 0) + { + perror ("cannot allocate in gmp"); + abort (); + } + +#ifdef DEBUG + { + mp_ptr p = ret; + p++; + p[-1] = (0xdeadbeef << 31) + 0xdeafdeed; + if (req_size % BYTES_PER_MP_LIMB == 0) + p[req_size / BYTES_PER_MP_LIMB] = ~((0xdeadbeef << 31) + 0xdeafdeed); + ret = p; + } +#endif + return ret; +} + +void * +#if __STDC__ +_mp_default_reallocate (void *oldptr, size_t old_size, size_t new_size) +#else +_mp_default_reallocate (oldptr, old_size, new_size) + void *oldptr; + size_t old_size; + size_t new_size; +#endif +{ + void *ret; + +#ifdef DEBUG + size_t req_size = new_size; + + if (old_size != 0) + { + mp_ptr p = oldptr; + if (p[-1] != (0xdeadbeef << 31) + 0xdeafdeed) + { + fprintf (stderr, "gmp: (realloc) data clobbered before allocation block\n"); + abort (); + } + if (old_size % BYTES_PER_MP_LIMB == 0) + if (p[old_size / BYTES_PER_MP_LIMB] != ~((0xdeadbeef << 31) + 0xdeafdeed)) + { + fprintf (stderr, "gmp: (realloc) data clobbered after allocation block\n"); + abort (); + } + oldptr = p - 1; + } + + new_size += 2 * BYTES_PER_MP_LIMB; +#endif + + ret = realloc (oldptr, new_size); + if (ret == 0) + { + perror ("cannot allocate in gmp"); + abort (); + } + +#ifdef DEBUG + { + mp_ptr p = ret; + p++; + p[-1] = (0xdeadbeef << 31) + 0xdeafdeed; + if (req_size % BYTES_PER_MP_LIMB == 0) + p[req_size / BYTES_PER_MP_LIMB] = ~((0xdeadbeef << 31) + 0xdeafdeed); + ret = p; + } +#endif + return ret; +} + +void +#if __STDC__ +_mp_default_free (void *blk_ptr, size_t blk_size) +#else +_mp_default_free (blk_ptr, blk_size) + void *blk_ptr; + size_t blk_size; +#endif +{ +#ifdef DEBUG + { + mp_ptr p = blk_ptr; + if (blk_size != 0) + { + if (p[-1] != (0xdeadbeef << 31) + 0xdeafdeed) + { + fprintf (stderr, "gmp: (free) data clobbered before allocation block\n"); + abort (); + } + if (blk_size % BYTES_PER_MP_LIMB == 0) + if (p[blk_size / BYTES_PER_MP_LIMB] != ~((0xdeadbeef << 31) + 0xdeafdeed)) + { + fprintf (stderr, "gmp: (free) data clobbered after allocation block\n"); + abort (); + } + } + blk_ptr = p - 1; + } +#endif + free (blk_ptr); +} diff --git a/rts/gmp/missing b/rts/gmp/missing new file mode 100644 index 0000000000..c60e9d772f --- /dev/null +++ b/rts/gmp/missing @@ -0,0 +1,244 @@ +#! /bin/sh +# Common stub for a few missing GNU programs while installing. +# Copyright (C) 1996, 1997, 1999 Free Software Foundation, Inc. +# Originally by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +# 02111-1307, USA. + +if test $# -eq 0; then + echo 1>&2 "Try \`$0 --help' for more information" + exit 1 +fi + +run=: + +case "$1" in +--run) + # Try to run requested program, and just exit if it succeeds. + run= + shift + "$@" && exit 0 + ;; +esac + +# If it does not exist, or fails to run (possibly an outdated version), +# try to emulate it. +case "$1" in + + -h|--h|--he|--hel|--help) + echo "\ +$0 [OPTION]... PROGRAM [ARGUMENT]... + +Handle \`PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an +error status if there is no known handling for PROGRAM. + +Options: + -h, --help display this help and exit + -v, --version output version information and exit + --run try to run the given command, and emulate it if it fails + +Supported PROGRAM values: + aclocal touch file \`aclocal.m4' + autoconf touch file \`configure' + autoheader touch file \`config.h.in' + automake touch all \`Makefile.in' files + bison create \`y.tab.[ch]', if possible, from existing .[ch] + flex create \`lex.yy.c', if possible, from existing .c + lex create \`lex.yy.c', if possible, from existing .c + makeinfo touch the output file + tar try tar, gnutar, gtar, then tar without non-portable flags + yacc create \`y.tab.[ch]', if possible, from existing .[ch]" + ;; + + -v|--v|--ve|--ver|--vers|--versi|--versio|--version) + echo "missing 0.2 - GNU automake" + ;; + + -*) + echo 1>&2 "$0: Unknown \`$1' option" + echo 1>&2 "Try \`$0 --help' for more information" + exit 1 + ;; + + aclocal) + echo 1>&2 "\ +WARNING: \`$1' is missing on your system. You should only need it if + you modified \`acinclude.m4' or \`configure.in'. You might want + to install the \`Automake' and \`Perl' packages. Grab them from + any GNU archive site." + touch aclocal.m4 + ;; + + autoconf) + echo 1>&2 "\ +WARNING: \`$1' is missing on your system. You should only need it if + you modified \`configure.in'. You might want to install the + \`Autoconf' and \`GNU m4' packages. Grab them from any GNU + archive site." + touch configure + ;; + + autoheader) + echo 1>&2 "\ +WARNING: \`$1' is missing on your system. You should only need it if + you modified \`acconfig.h' or \`configure.in'. You might want + to install the \`Autoconf' and \`GNU m4' packages. Grab them + from any GNU archive site." + files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' configure.in` + test -z "$files" && files="config.h" + touch_files= + for f in $files; do + case "$f" in + *:*) touch_files="$touch_files "`echo "$f" | + sed -e 's/^[^:]*://' -e 's/:.*//'`;; + *) touch_files="$touch_files $f.in";; + esac + done + touch $touch_files + ;; + + automake) + echo 1>&2 "\ +WARNING: \`$1' is missing on your system. You should only need it if + you modified \`Makefile.am', \`acinclude.m4' or \`configure.in'. + You might want to install the \`Automake' and \`Perl' packages. + Grab them from any GNU archive site." + find . -type f -name Makefile.am -print | + sed 's/\.am$/.in/' | + while read f; do touch "$f"; done + ;; + + bison|yacc) + echo 1>&2 "\ +WARNING: \`$1' is missing on your system. You should only need it if + you modified a \`.y' file. You may need the \`Bison' package + in order for those modifications to take effect. You can get + \`Bison' from any GNU archive site." + rm -f y.tab.c y.tab.h + if [ $# -ne 1 ]; then + eval LASTARG="\${$#}" + case "$LASTARG" in + *.y) + SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'` + if [ -f "$SRCFILE" ]; then + cp "$SRCFILE" y.tab.c + fi + SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'` + if [ -f "$SRCFILE" ]; then + cp "$SRCFILE" y.tab.h + fi + ;; + esac + fi + if [ ! -f y.tab.h ]; then + echo >y.tab.h + fi + if [ ! -f y.tab.c ]; then + echo 'main() { return 0; }' >y.tab.c + fi + ;; + + lex|flex) + echo 1>&2 "\ +WARNING: \`$1' is missing on your system. You should only need it if + you modified a \`.l' file. You may need the \`Flex' package + in order for those modifications to take effect. You can get + \`Flex' from any GNU archive site." + rm -f lex.yy.c + if [ $# -ne 1 ]; then + eval LASTARG="\${$#}" + case "$LASTARG" in + *.l) + SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'` + if [ -f "$SRCFILE" ]; then + cp "$SRCFILE" lex.yy.c + fi + ;; + esac + fi + if [ ! -f lex.yy.c ]; then + echo 'main() { return 0; }' >lex.yy.c + fi + ;; + + makeinfo) + echo 1>&2 "\ +WARNING: \`$1' is missing on your system. You should only need it if + you modified a \`.texi' or \`.texinfo' file, or any other file + indirectly affecting the aspect of the manual. The spurious + call might also be the consequence of using a buggy \`make' (AIX, + DU, IRIX). You might want to install the \`Texinfo' package or + the \`GNU make' package. Grab either from any GNU archive site." + file=`echo "$*" | sed -n 's/.*-o \([^ ]*\).*/\1/p'` + if test -z "$file"; then + file=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'` + file=`sed -n '/^@setfilename/ { s/.* \([^ ]*\) *$/\1/; p; q; }' $file` + fi + touch $file + ;; + + tar) + shift + if test -n "$run"; then + echo 1>&2 "ERROR: \`tar' requires --run" + exit 1 + fi + + # We have already tried tar in the generic part. + # Look for gnutar/gtar before invocation to avoid ugly error + # messages. + if (gnutar --version > /dev/null 2>&1); then + gnutar ${1+"$@"} && exit 0 + fi + if (gtar --version > /dev/null 2>&1); then + gtar ${1+"$@"} && exit 0 + fi + firstarg="$1" + if shift; then + case "$firstarg" in + *o*) + firstarg=`echo "$firstarg" | sed s/o//` + tar "$firstarg" ${1+"$@"} && exit 0 + ;; + esac + case "$firstarg" in + *h*) + firstarg=`echo "$firstarg" | sed s/h//` + tar "$firstarg" ${1+"$@"} && exit 0 + ;; + esac + fi + + echo 1>&2 "\ +WARNING: I can't seem to be able to run \`tar' with the given arguments. + You may want to install GNU tar or Free paxutils, or check the + command line arguments." + exit 1 + ;; + + *) + echo 1>&2 "\ +WARNING: \`$1' is needed, and you do not seem to have it handy on your + system. You might have modified some files without having the + proper tools for further handling them. Check the \`README' file, + it often tells you about the needed prerequirements for installing + this package. You may also peek at any GNU archive site, in case + some other package would contain this missing \`$1' program." + exit 1 + ;; +esac + +exit 0 diff --git a/rts/gmp/mkinstalldirs b/rts/gmp/mkinstalldirs new file mode 100644 index 0000000000..5e17cd39fb --- /dev/null +++ b/rts/gmp/mkinstalldirs @@ -0,0 +1,38 @@ +#! /bin/sh +# mkinstalldirs --- make directory hierarchy +# Author: Noah Friedman <friedman@prep.ai.mit.edu> +# Created: 1993-05-16 +# Public domain + +errstatus=0 + +for file +do + set fnord `echo ":$file" | sed -ne 's/^:\//#/;s/^://;s/\// /g;s/^#/\//;p'` + shift + + pathcomp= + for d + do + pathcomp="$pathcomp$d" + case "$pathcomp" in + -* ) pathcomp=./$pathcomp ;; + esac + + if test ! -d "$pathcomp"; then + echo "mkdir $pathcomp" + + mkdir "$pathcomp" || lasterr=$? + + if test ! -d "$pathcomp"; then + errstatus=$lasterr + fi + fi + + pathcomp="$pathcomp/" + done +done + +exit $errstatus + +# mkinstalldirs ends here diff --git a/rts/gmp/mp.h b/rts/gmp/mp.h new file mode 100644 index 0000000000..ffab4cba82 --- /dev/null +++ b/rts/gmp/mp.h @@ -0,0 +1,124 @@ +/* mp.h -- Definitions for Berkeley compatible multiple precision functions. + +Copyright (C) 1991, 1993, 1994, 1995, 1996, 2000 Free Software Foundation, +Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#ifndef __MP_H__ + +#ifndef __GNU_MP__ /* to allow inclusion of both gmp.h and mp.h */ +#define __GNU_MP__ 3 +#define __need_size_t +#include <stddef.h> +#undef __need_size_t + +#if defined (__STDC__) || defined (__cplusplus) +#define __gmp_const const +#else +#define __gmp_const +#endif + +#if defined (__GNUC__) +#define __gmp_inline __inline__ +#else +#define __gmp_inline +#endif + +#ifndef _EXTERN_INLINE +#ifdef __GNUC__ +#define _EXTERN_INLINE extern __inline__ +#else +#define _EXTERN_INLINE static +#endif +#endif + +#ifdef _SHORT_LIMB +typedef unsigned int mp_limb_t; +typedef int mp_limb_signed_t; +#else +#ifdef _LONG_LONG_LIMB +typedef unsigned long long int mp_limb_t; +typedef long long int mp_limb_signed_t; +#else +typedef unsigned long int mp_limb_t; +typedef long int mp_limb_signed_t; +#endif +#endif + +typedef mp_limb_t * mp_ptr; +typedef __gmp_const mp_limb_t * mp_srcptr; +typedef int mp_size_t; +typedef long int mp_exp_t; + +typedef struct +{ + int _mp_alloc; /* Number of *limbs* allocated and pointed + to by the D field. */ + int _mp_size; /* abs(SIZE) is the number of limbs + the last field points to. If SIZE + is negative this is a negative + number. */ + mp_limb_t *_mp_d; /* Pointer to the limbs. */ +} __mpz_struct; +#endif /* __GNU_MP__ */ + +/* User-visible types. */ +typedef __mpz_struct MINT; + + +#ifndef _PROTO +#if (__STDC__-0) || defined (__cplusplus) +#define _PROTO(x) x +#else +#define _PROTO(x) () +#endif +#endif + +#if defined (__cplusplus) +extern "C" { +#endif + +#define mp_set_memory_functions __gmp_set_memory_functions +void mp_set_memory_functions _PROTO ((void *(*) (size_t), + void *(*) (void *, size_t, size_t), + void (*) (void *, size_t))); +MINT *itom _PROTO ((signed short int)); +MINT *xtom _PROTO ((const char *)); +void move _PROTO ((const MINT *, MINT *)); +void madd _PROTO ((const MINT *, const MINT *, MINT *)); +void msub _PROTO ((const MINT *, const MINT *, MINT *)); +void mult _PROTO ((const MINT *, const MINT *, MINT *)); +void mdiv _PROTO ((const MINT *, const MINT *, MINT *, MINT *)); +void sdiv _PROTO ((const MINT *, signed short int, MINT *, signed short int *)); +void msqrt _PROTO ((const MINT *, MINT *, MINT *)); +void pow _PROTO ((const MINT *, const MINT *, const MINT *, MINT *)); +void rpow _PROTO ((const MINT *, signed short int, MINT *)); +void gcd _PROTO ((const MINT *, const MINT *, MINT *)); +int mcmp _PROTO ((const MINT *, const MINT *)); +void min _PROTO ((MINT *)); +void mout _PROTO ((const MINT *)); +char *mtox _PROTO ((const MINT *)); +void mfree _PROTO ((MINT *)); + +#if defined (__cplusplus) +} +#endif + +#define __MP_H__ +#endif /* __MP_H__ */ diff --git a/rts/gmp/mp_bpl.c b/rts/gmp/mp_bpl.c new file mode 100644 index 0000000000..df8b03e5ab --- /dev/null +++ b/rts/gmp/mp_bpl.c @@ -0,0 +1,27 @@ +/* +Copyright (C) 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. +*/ + +#include "gmp.h" +#include "gmp-impl.h" + +const int mp_bits_per_limb = BITS_PER_MP_LIMB; +const int __gmp_0 = 0; +int __gmp_junk; diff --git a/rts/gmp/mp_clz_tab.c b/rts/gmp/mp_clz_tab.c new file mode 100644 index 0000000000..1bbd1d6a66 --- /dev/null +++ b/rts/gmp/mp_clz_tab.c @@ -0,0 +1,36 @@ +/* __clz_tab -- support for longlong.h + +Copyright (C) 1991, 1993, 1994, 1996, 1997 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +const +unsigned char __clz_tab[] = +{ + 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, +}; diff --git a/rts/gmp/mp_minv_tab.c b/rts/gmp/mp_minv_tab.c new file mode 100644 index 0000000000..4afff85cfc --- /dev/null +++ b/rts/gmp/mp_minv_tab.c @@ -0,0 +1,50 @@ +/* A table of data supporting modlimb_invert(). + + THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND MAY CHANGE + INCOMPATIBLY OR DISAPPEAR IN A FUTURE GNU MP RELEASE. */ + +/* +Copyright (C) 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + + +/* modlimb_invert_table[i] is the multiplicative inverse of 2*i+1 mod 256, + ie. (modlimb_invert_table[i] * (2*i+1)) % 256 == 1 */ + +const unsigned char modlimb_invert_table[128] = { + 0x01, 0xAB, 0xCD, 0xB7, 0x39, 0xA3, 0xC5, 0xEF, + 0xF1, 0x1B, 0x3D, 0xA7, 0x29, 0x13, 0x35, 0xDF, + 0xE1, 0x8B, 0xAD, 0x97, 0x19, 0x83, 0xA5, 0xCF, + 0xD1, 0xFB, 0x1D, 0x87, 0x09, 0xF3, 0x15, 0xBF, + 0xC1, 0x6B, 0x8D, 0x77, 0xF9, 0x63, 0x85, 0xAF, + 0xB1, 0xDB, 0xFD, 0x67, 0xE9, 0xD3, 0xF5, 0x9F, + 0xA1, 0x4B, 0x6D, 0x57, 0xD9, 0x43, 0x65, 0x8F, + 0x91, 0xBB, 0xDD, 0x47, 0xC9, 0xB3, 0xD5, 0x7F, + 0x81, 0x2B, 0x4D, 0x37, 0xB9, 0x23, 0x45, 0x6F, + 0x71, 0x9B, 0xBD, 0x27, 0xA9, 0x93, 0xB5, 0x5F, + 0x61, 0x0B, 0x2D, 0x17, 0x99, 0x03, 0x25, 0x4F, + 0x51, 0x7B, 0x9D, 0x07, 0x89, 0x73, 0x95, 0x3F, + 0x41, 0xEB, 0x0D, 0xF7, 0x79, 0xE3, 0x05, 0x2F, + 0x31, 0x5B, 0x7D, 0xE7, 0x69, 0x53, 0x75, 0x1F, + 0x21, 0xCB, 0xED, 0xD7, 0x59, 0xC3, 0xE5, 0x0F, + 0x11, 0x3B, 0x5D, 0xC7, 0x49, 0x33, 0x55, 0xFF +}; diff --git a/rts/gmp/mp_set_fns.c b/rts/gmp/mp_set_fns.c new file mode 100644 index 0000000000..55d4d9d6e4 --- /dev/null +++ b/rts/gmp/mp_set_fns.c @@ -0,0 +1,48 @@ +/* mp_set_memory_functions -- Set the allocate, reallocate, and free functions + for use by the mp package. + +Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mp_set_memory_functions (void *(*alloc_func) (size_t), + void *(*realloc_func) (void *, size_t, size_t), + void (*free_func) (void *, size_t)) +#else +mp_set_memory_functions (alloc_func, realloc_func, free_func) + void *(*alloc_func) (); + void *(*realloc_func) (); + void (*free_func) (); +#endif +{ + if (alloc_func == 0) + alloc_func = _mp_default_allocate; + if (realloc_func == 0) + realloc_func = _mp_default_reallocate; + if (free_func == 0) + free_func = _mp_default_free; + + _mp_allocate_func = alloc_func; + _mp_reallocate_func = realloc_func; + _mp_free_func = free_func; +} diff --git a/rts/gmp/mpn/Makefile.am b/rts/gmp/mpn/Makefile.am new file mode 100644 index 0000000000..1c49ccda25 --- /dev/null +++ b/rts/gmp/mpn/Makefile.am @@ -0,0 +1,94 @@ +## Process this file with automake to generate Makefile.in + +# Copyright (C) 1996, 1998, 1999, 2000 Free Software Foundation, Inc. +# +# This file is part of the GNU MP Library. +# +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. +# +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +AUTOMAKE_OPTIONS = gnu no-dependencies +SUBDIRS = tests + +CPP = @CPP@ + +# -DOPERATION_$* tells multi-function files which function to produce. +INCLUDES = -I$(top_srcdir) -DOPERATION_$* + +GENERIC_SOURCES = mp_bases.c +OFILES = @mpn_objects@ + +noinst_LTLIBRARIES = libmpn.la +libmpn_la_SOURCES = $(GENERIC_SOURCES) +libmpn_la_LIBADD = $(OFILES) +libmpn_la_DEPENDENCIES = $(OFILES) + +TARG_DIST = a29k alpha arm clipper cray generic hppa i960 lisp m68k m88k \ + mips2 mips3 ns32k pa64 pa64w power powerpc32 powerpc64 pyr sh sparc32 \ + sparc64 thumb vax x86 z8000 z8000x + +EXTRA_DIST = underscore.h asm-defs.m4 $(TARG_DIST) + +# COMPILE minus CC. FIXME: Really pass *_CFLAGS to CPP? +COMPILE_FLAGS = \ + $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) + +SUFFIXES = .s .S .asm + +# *.s are not preprocessed at all. +.s.o: + $(CCAS) $(COMPILE_FLAGS) $< +.s.obj: + $(CCAS) $(COMPILE_FLAGS) `cygpath -w $<` +.s.lo: + $(LIBTOOL) --mode=compile $(CCAS) $(COMPILE_FLAGS) $< + +# *.S are preprocessed with CPP. +.S.o: + $(CPP) $(COMPILE_FLAGS) $< | grep -v '^#' >tmp-$*.s + $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@ + rm -f tmp-$*.s +.S.obj: + $(CPP) $(COMPILE_FLAGS) `cygpath -w $<` | grep -v '^#' >tmp-$*.s + $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@ + rm -f tmp-$*.s + +# We have to rebuild the static object file without passing -DPIC to +# preprocessor. The overhead cost is one extra assemblation. FIXME: +# Teach libtool how to assemble with a preprocessor pass (CPP or m4). + +.S.lo: + $(CPP) $(COMPILE_FLAGS) -DPIC $< | grep -v '^#' >tmp-$*.s + $(LIBTOOL) --mode=compile $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@ + $(CPP) $(COMPILE_FLAGS) $< | grep -v '^#' >tmp-$*.s + $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $*.o + rm -f tmp-$*.s + +# *.m4 are preprocessed with m4. +.asm.o: + $(M4) -DOPERATION_$* $< >tmp-$*.s + $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@ + rm -f tmp-$*.s +.asm.obj: + $(M4) -DOPERATION_$* `cygpath -w $<` >tmp-$*.s + $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@ + rm -f tmp-$*.s +.asm.lo: + $(M4) -DPIC -DOPERATION_$* $< >tmp-$*.s + $(LIBTOOL) --mode=compile $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@ + $(M4) -DOPERATION_$* $< >tmp-$*.s + $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $*.o + rm -f tmp-$*.s diff --git a/rts/gmp/mpn/Makefile.in b/rts/gmp/mpn/Makefile.in new file mode 100644 index 0000000000..59ee958c92 --- /dev/null +++ b/rts/gmp/mpn/Makefile.in @@ -0,0 +1,472 @@ +# Makefile.in generated automatically by automake 1.4a from Makefile.am + +# Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +SHELL = @SHELL@ + +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ +VPATH = @srcdir@ +prefix = @prefix@ +exec_prefix = @exec_prefix@ + +bindir = @bindir@ +sbindir = @sbindir@ +libexecdir = @libexecdir@ +datadir = @datadir@ +sysconfdir = @sysconfdir@ +sharedstatedir = @sharedstatedir@ +localstatedir = @localstatedir@ +libdir = @libdir@ +infodir = @infodir@ +mandir = @mandir@ +includedir = @includedir@ +oldincludedir = /usr/include + +DESTDIR = + +pkgdatadir = $(datadir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ + +top_builddir = .. + +ACLOCAL = @ACLOCAL@ +AUTOCONF = @AUTOCONF@ +AUTOMAKE = @AUTOMAKE@ +AUTOHEADER = @AUTOHEADER@ + +INSTALL = @INSTALL@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_FLAG = +transform = @program_transform_name@ + +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : + +@SET_MAKE@ +build_alias = @build_alias@ +build_triplet = @build@ +host_alias = @host_alias@ +host_triplet = @host@ +target_alias = @target_alias@ +target_triplet = @target@ +AMDEP = @AMDEP@ +AMTAR = @AMTAR@ +AR = @AR@ +AS = @AS@ +AWK = @AWK@ +CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@ +CC = @CC@ +CCAS = @CCAS@ +CPP = @CPP@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +EXEEXT = @EXEEXT@ +LIBTOOL = @LIBTOOL@ +LN_S = @LN_S@ +M4 = @M4@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +PACKAGE = @PACKAGE@ +RANLIB = @RANLIB@ +SPEED_CYCLECOUNTER_OBJS = @SPEED_CYCLECOUNTER_OBJS@ +STRIP = @STRIP@ +U = @U@ +VERSION = @VERSION@ +gmp_srclinks = @gmp_srclinks@ +install_sh = @install_sh@ +mpn_objects = @mpn_objects@ +mpn_objs_in_libgmp = @mpn_objs_in_libgmp@ + +# Copyright (C) 1996, 1998, 1999, 2000 Free Software Foundation, Inc. +# +# This file is part of the GNU MP Library. +# +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. +# +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +AUTOMAKE_OPTIONS = gnu no-dependencies +SUBDIRS = + +CPP = @CPP@ + +# -DOPERATION_$* tells multi-function files which function to produce. +INCLUDES = -I$(top_srcdir) -DOPERATION_$* + +GENERIC_SOURCES = mp_bases.c +OFILES = @mpn_objects@ + +noinst_LTLIBRARIES = libmpn.la +libmpn_la_SOURCES = $(GENERIC_SOURCES) +libmpn_la_LIBADD = $(OFILES) +libmpn_la_DEPENDENCIES = $(OFILES) + +TARG_DIST = a29k alpha arm clipper cray generic hppa i960 lisp m68k m88k \ + mips2 mips3 ns32k pa64 pa64w power powerpc32 powerpc64 pyr sh sparc32 \ + sparc64 thumb vax x86 z8000 z8000x + + +EXTRA_DIST = underscore.h asm-defs.m4 $(TARG_DIST) + +# COMPILE minus CC. FIXME: Really pass *_CFLAGS to CPP? +COMPILE_FLAGS = \ + $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) + + +SUFFIXES = .s .S .asm +subdir = mpn +mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs +CONFIG_HEADER = ../config.h +CONFIG_CLEAN_FILES = +LTLIBRARIES = $(noinst_LTLIBRARIES) + + +DEFS = @DEFS@ -I. -I$(srcdir) -I.. +CPPFLAGS = @CPPFLAGS@ +LDFLAGS = @LDFLAGS@ +LIBS = @LIBS@ +libmpn_la_LDFLAGS = +am_libmpn_la_OBJECTS = mp_bases.lo +libmpn_la_OBJECTS = $(am_libmpn_la_OBJECTS) +COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CFLAGS = @CFLAGS@ +CCLD = $(CC) +LINK = $(LIBTOOL) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +DIST_SOURCES = $(libmpn_la_SOURCES) +DIST_COMMON = README Makefile.am Makefile.in + + +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) + +GZIP_ENV = --best +depcomp = +SOURCES = $(libmpn_la_SOURCES) +OBJECTS = $(am_libmpn_la_OBJECTS) + +all: all-redirect +.SUFFIXES: +.SUFFIXES: .S .asm .c .lo .o .obj .s +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) + cd $(top_srcdir) && $(AUTOMAKE) --gnu mpn/Makefile + +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + cd $(top_builddir) \ + && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status + + +mostlyclean-noinstLTLIBRARIES: + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + +distclean-noinstLTLIBRARIES: + +maintainer-clean-noinstLTLIBRARIES: + +mostlyclean-compile: + -rm -f *.o core *.core + -rm -f *.$(OBJEXT) + +clean-compile: + +distclean-compile: + -rm -f *.tab.c + +maintainer-clean-compile: + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +distclean-libtool: + +maintainer-clean-libtool: + +libmpn.la: $(libmpn_la_OBJECTS) $(libmpn_la_DEPENDENCIES) + $(LINK) $(libmpn_la_LDFLAGS) $(libmpn_la_OBJECTS) $(libmpn_la_LIBADD) $(LIBS) +.c.o: + $(COMPILE) -c $< +.c.obj: + $(COMPILE) -c `cygpath -w $<` +.c.lo: + $(LTCOMPILE) -c -o $@ $< + +# This directory's subdirectories are mostly independent; you can cd +# into them and run `make' without going through this Makefile. +# To change the values of `make' variables: instead of editing Makefiles, +# (1) if the variable is set in `config.status', edit `config.status' +# (which will cause the Makefiles to be regenerated when you run `make'); +# (2) otherwise, pass the desired values on the `make' command line. + +all-recursive install-data-recursive install-exec-recursive \ +installdirs-recursive install-recursive uninstall-recursive \ +check-recursive installcheck-recursive info-recursive dvi-recursive: + @set fnord $(MAKEFLAGS); amf=$$2; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || case "$$amf" in *=*) exit 1;; *k*) fail=yes;; *) exit 1;; esac; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +mostlyclean-recursive clean-recursive distclean-recursive \ +maintainer-clean-recursive: + @set fnord $(MAKEFLAGS); amf=$$2; \ + dot_seen=no; \ + rev=''; list='$(SUBDIRS)'; for subdir in $$list; do \ + rev="$$subdir $$rev"; \ + if test "$$subdir" = "."; then dot_seen=yes; else :; fi; \ + done; \ + test "$$dot_seen" = "no" && rev=". $$rev"; \ + target=`echo $@ | sed s/-recursive//`; \ + for subdir in $$rev; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || case "$$amf" in *=*) exit 1;; *k*) fail=yes;; *) exit 1;; esac; \ + done && test -z "$$fail" +tags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \ + done + +tags: TAGS + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + mkid -f$$here/ID $$unique $(LISP) + +TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test -f $$subdir/TAGS && tags="$$tags -i $$here/$$subdir/TAGS"; \ + fi; \ + done; \ + list='$(SOURCES) $(HEADERS) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + test -z "$(ETAGS_ARGS)$$unique$(LISP)$$tags" \ + || etags $(ETAGS_ARGS) $$tags $$unique $(LISP) + +mostlyclean-tags: + +clean-tags: + +distclean-tags: + -rm -f TAGS ID + +maintainer-clean-tags: + +distdir = $(top_builddir)/$(PACKAGE)-$(VERSION)/$(subdir) + +distdir: $(DISTFILES) + @for file in $(DISTFILES); do \ + d=$(srcdir); \ + if test -d $$d/$$file; then \ + cp -pR $$d/$$file $(distdir); \ + else \ + test -f $(distdir)/$$file \ + || cp -p $$d/$$file $(distdir)/$$file || :; \ + fi; \ + done + for subdir in $(SUBDIRS); do \ + if test "$$subdir" = .; then :; else \ + test -d $(distdir)/$$subdir \ + || mkdir $(distdir)/$$subdir \ + || exit 1; \ + (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir=../$(top_distdir) distdir=../$(distdir)/$$subdir distdir) \ + || exit 1; \ + fi; \ + done +info-am: +info: info-recursive +dvi-am: +dvi: dvi-recursive +check-am: all-am +check: check-recursive +installcheck-am: +installcheck: installcheck-recursive +install-exec-am: +install-exec: install-exec-recursive + +install-data-am: +install-data: install-data-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am +install: install-recursive +uninstall-am: +uninstall: uninstall-recursive +all-am: Makefile $(LTLIBRARIES) +all-redirect: all-recursive +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_STRIP_FLAG=-s install +installdirs: installdirs-recursive +installdirs-am: + + +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -rm -f Makefile $(CONFIG_CLEAN_FILES) + -rm -f config.cache config.log stamp-h stamp-h[0-9]* + +maintainer-clean-generic: + -rm -f Makefile.in +mostlyclean-am: mostlyclean-noinstLTLIBRARIES mostlyclean-compile \ + mostlyclean-libtool mostlyclean-tags \ + mostlyclean-generic + +mostlyclean: mostlyclean-recursive + +clean-am: clean-noinstLTLIBRARIES clean-compile clean-libtool \ + clean-tags clean-generic mostlyclean-am + +clean: clean-recursive + +distclean-am: distclean-noinstLTLIBRARIES distclean-compile \ + distclean-libtool distclean-tags distclean-generic \ + clean-am + -rm -f libtool + +distclean: distclean-recursive + +maintainer-clean-am: maintainer-clean-noinstLTLIBRARIES \ + maintainer-clean-compile maintainer-clean-libtool \ + maintainer-clean-tags maintainer-clean-generic \ + distclean-am + @echo "This command is intended for maintainers to use;" + @echo "it deletes files that may require special tools to rebuild." + +maintainer-clean: maintainer-clean-recursive + +.PHONY: mostlyclean-noinstLTLIBRARIES distclean-noinstLTLIBRARIES \ +clean-noinstLTLIBRARIES maintainer-clean-noinstLTLIBRARIES \ +mostlyclean-compile distclean-compile clean-compile \ +maintainer-clean-compile mostlyclean-libtool distclean-libtool \ +clean-libtool maintainer-clean-libtool install-recursive \ +uninstall-recursive install-data-recursive uninstall-data-recursive \ +install-exec-recursive uninstall-exec-recursive installdirs-recursive \ +uninstalldirs-recursive all-recursive check-recursive \ +installcheck-recursive info-recursive dvi-recursive \ +mostlyclean-recursive distclean-recursive clean-recursive \ +maintainer-clean-recursive tags tags-recursive mostlyclean-tags \ +distclean-tags clean-tags maintainer-clean-tags distdir info-am info \ +dvi-am dvi check check-am installcheck-am installcheck install-exec-am \ +install-exec install-data-am install-data install-am install \ +uninstall-am uninstall all-redirect all-am all install-strip \ +installdirs-am installdirs mostlyclean-generic distclean-generic \ +clean-generic maintainer-clean-generic clean mostlyclean distclean \ +maintainer-clean + + +# *.s are not preprocessed at all. +.s.o: + $(CCAS) $(COMPILE_FLAGS) $< +.s.obj: + $(CCAS) $(COMPILE_FLAGS) `cygpath -w $<` +.s.lo: + $(LIBTOOL) --mode=compile $(CCAS) $(COMPILE_FLAGS) $< + +# *.S are preprocessed with CPP. +.S.o: + $(CPP) $(COMPILE_FLAGS) $< | grep -v '^#' >tmp-$*.s + $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@ + rm -f tmp-$*.s +.S.obj: + $(CPP) $(COMPILE_FLAGS) `cygpath -w $<` | grep -v '^#' >tmp-$*.s + $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@ + rm -f tmp-$*.s + +# We have to rebuild the static object file without passing -DPIC to +# preprocessor. The overhead cost is one extra assemblation. FIXME: +# Teach libtool how to assemble with a preprocessor pass (CPP or m4). + +.S.lo: + $(CPP) $(COMPILE_FLAGS) -DPIC $< | grep -v '^#' >tmp-$*.s + $(LIBTOOL) --mode=compile $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@ + $(CPP) $(COMPILE_FLAGS) $< | grep -v '^#' >tmp-$*.s + $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $*.o + rm -f tmp-$*.s + +# *.m4 are preprocessed with m4. +.asm.o: + $(M4) -DOPERATION_$* $< >tmp-$*.s + $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@ + rm -f tmp-$*.s +.asm.obj: + $(M4) -DOPERATION_$* `cygpath -w $<` >tmp-$*.s + $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@ + rm -f tmp-$*.s +.asm.lo: + $(M4) -DPIC -DOPERATION_$* $< >tmp-$*.s + $(LIBTOOL) --mode=compile $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@ + $(M4) -DOPERATION_$* $< >tmp-$*.s + $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $*.o + rm -f tmp-$*.s + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/rts/gmp/mpn/README b/rts/gmp/mpn/README new file mode 100644 index 0000000000..7453c9d03e --- /dev/null +++ b/rts/gmp/mpn/README @@ -0,0 +1,13 @@ +This directory contains all code for the mpn layer of GMP. + +Most subdirectories contain machine-dependent code, written in assembly or C. +The `generic' subdirectory contains default code, used when there is no +machine-dependent replacement for a particular machine. + +There is one subdirectory for each ISA family. Note that e.g., 32-bit SPARC +and 64-bit SPARC are very different ISA's, and thus cannot share any code. + +A particular compile will only use code from one subdirectory, and the +`generic' subdirectory. The ISA-specific subdirectories contain hierachies of +directories for various architecture variants and implementations; the +top-most level contains code that runs correctly on all variants. diff --git a/rts/gmp/mpn/a29k/add_n.s b/rts/gmp/mpn/a29k/add_n.s new file mode 100644 index 0000000000..e3ee6dfa60 --- /dev/null +++ b/rts/gmp/mpn/a29k/add_n.s @@ -0,0 +1,120 @@ +; 29000 __gmpn_add -- Add two limb vectors of the same length > 0 and store +; sum in a third limb vector. + +; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr lr2 +; s1_ptr lr3 +; s2_ptr lr4 +; size lr5 + +; We use the loadm/storem instructions and operate on chunks of 8 +; limbs/per iteration, until less than 8 limbs remain. + +; The 29k has no addition or subtraction instructions that doesn't +; affect carry, so we need to save and restore that as soon as we +; adjust the pointers. gr116 is used for this purpose. Note that +; gr116==0 means that carry should be set. + + .sect .lit,lit + .text + .align 4 + .global ___gmpn_add_n + .word 0x60000 +___gmpn_add_n: + srl gr117,lr5,3 + sub gr118,gr117,1 + jmpt gr118,Ltail + constn gr116,-1 ; init cy reg + sub gr117,gr117,2 ; count for jmpfdec + +; Main loop working 8 limbs/iteration. +Loop: mtsrim cr,(8-1) + loadm 0,0,gr96,lr3 + add lr3,lr3,32 + mtsrim cr,(8-1) + loadm 0,0,gr104,lr4 + add lr4,lr4,32 + + subr gr116,gr116,0 ; restore carry + addc gr96,gr96,gr104 + addc gr97,gr97,gr105 + addc gr98,gr98,gr106 + addc gr99,gr99,gr107 + addc gr100,gr100,gr108 + addc gr101,gr101,gr109 + addc gr102,gr102,gr110 + addc gr103,gr103,gr111 + subc gr116,gr116,gr116 ; gr116 = not(cy) + + mtsrim cr,(8-1) + storem 0,0,gr96,lr2 + jmpfdec gr117,Loop + add lr2,lr2,32 + +; Code for the last up-to-7 limbs. +; This code might look very strange, but it's hard to write it +; differently without major slowdown. + + and lr5,lr5,(8-1) +Ltail: sub gr118,lr5,1 ; count for CR + jmpt gr118,Lend + sub gr117,lr5,2 ; count for jmpfdec + + mtsr cr,gr118 + loadm 0,0,gr96,lr3 + mtsr cr,gr118 + loadm 0,0,gr104,lr4 + + subr gr116,gr116,0 ; restore carry + + jmpfdec gr117,L1 + addc gr96,gr96,gr104 + jmp Lstore + mtsr cr,gr118 +L1: jmpfdec gr117,L2 + addc gr97,gr97,gr105 + jmp Lstore + mtsr cr,gr118 +L2: jmpfdec gr117,L3 + addc gr98,gr98,gr106 + jmp Lstore + mtsr cr,gr118 +L3: jmpfdec gr117,L4 + addc gr99,gr99,gr107 + jmp Lstore + mtsr cr,gr118 +L4: jmpfdec gr117,L5 + addc gr100,gr100,gr108 + jmp Lstore + mtsr cr,gr118 +L5: jmpfdec gr117,L6 + addc gr101,gr101,gr109 + jmp Lstore + mtsr cr,gr118 +L6: addc gr102,gr102,gr110 + +Lstore: storem 0,0,gr96,lr2 + subc gr116,gr116,gr116 ; gr116 = not(cy) + +Lend: jmpi lr0 + add gr96,gr116,1 diff --git a/rts/gmp/mpn/a29k/addmul_1.s b/rts/gmp/mpn/a29k/addmul_1.s new file mode 100644 index 0000000000..f51b6d7af6 --- /dev/null +++ b/rts/gmp/mpn/a29k/addmul_1.s @@ -0,0 +1,113 @@ +; 29000 __gmpn_addmul_1 -- Multiply a limb vector with a single limb and +; add the product to a second limb vector. + +; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr lr2 +; s1_ptr lr3 +; size lr4 +; s2_limb lr5 + + .cputype 29050 + .sect .lit,lit + .text + .align 4 + .global ___gmpn_addmul_1 + .word 0x60000 +___gmpn_addmul_1: + sub lr4,lr4,8 + jmpt lr4,Ltail + const gr120,0 ; init cylimb reg + + srl gr117,lr4,3 ; divide by 8 + sub gr117,gr117,1 ; count for jmpfdec + +Loop: mtsrim cr,(8-1) + loadm 0,0,gr96,lr3 + add lr3,lr3,32 + + multiplu gr104,gr96,lr5 + multmu gr96,gr96,lr5 + multiplu gr105,gr97,lr5 + multmu gr97,gr97,lr5 + multiplu gr106,gr98,lr5 + multmu gr98,gr98,lr5 + multiplu gr107,gr99,lr5 + multmu gr99,gr99,lr5 + multiplu gr108,gr100,lr5 + multmu gr100,gr100,lr5 + multiplu gr109,gr101,lr5 + multmu gr101,gr101,lr5 + multiplu gr110,gr102,lr5 + multmu gr102,gr102,lr5 + multiplu gr111,gr103,lr5 + multmu gr103,gr103,lr5 + + add gr104,gr104,gr120 + addc gr105,gr105,gr96 + addc gr106,gr106,gr97 + addc gr107,gr107,gr98 + addc gr108,gr108,gr99 + addc gr109,gr109,gr100 + addc gr110,gr110,gr101 + addc gr111,gr111,gr102 + addc gr120,gr103,0 + + mtsrim cr,(8-1) + loadm 0,0,gr96,lr2 + + add gr104,gr96,gr104 + addc gr105,gr97,gr105 + addc gr106,gr98,gr106 + addc gr107,gr99,gr107 + addc gr108,gr100,gr108 + addc gr109,gr101,gr109 + addc gr110,gr102,gr110 + addc gr111,gr103,gr111 + addc gr120,gr120,0 + + mtsrim cr,(8-1) + storem 0,0,gr104,lr2 + jmpfdec gr117,Loop + add lr2,lr2,32 + +Ltail: and lr4,lr4,(8-1) + sub gr118,lr4,1 ; count for CR + jmpt gr118,Lend + sub lr4,lr4,2 + sub lr2,lr2,4 ; offset res_ptr by one limb + +Loop2: load 0,0,gr116,lr3 + add lr3,lr3,4 + multiplu gr117,gr116,lr5 + multmu gr118,gr116,lr5 + add lr2,lr2,4 + load 0,0,gr119,lr2 + add gr117,gr117,gr120 + addc gr118,gr118,0 + add gr117,gr117,gr119 + store 0,0,gr117,lr2 + jmpfdec lr4,Loop2 + addc gr120,gr118,0 + +Lend: jmpi lr0 + or gr96,gr120,0 ; copy diff --git a/rts/gmp/mpn/a29k/lshift.s b/rts/gmp/mpn/a29k/lshift.s new file mode 100644 index 0000000000..93e1917127 --- /dev/null +++ b/rts/gmp/mpn/a29k/lshift.s @@ -0,0 +1,93 @@ +; 29000 __gmpn_lshift -- + +; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr lr2 +; s1_ptr lr3 +; s2_ptr lr4 +; size lr5 + +; We use the loadm/storem instructions and operate on chunks of 8 +; limbs/per iteration, until less than 8 limbs remain. + + .sect .lit,lit + .text + .align 4 + .global ___gmpn_lshift + .word 0x60000 +___gmpn_lshift: + sll gr116,lr4,2 + add lr3,gr116,lr3 + add lr2,gr116,lr2 + sub lr3,lr3,4 + load 0,0,gr119,lr3 + + subr gr116,lr5,32 + srl gr96,gr119,gr116 ; return value + sub lr4,lr4,1 ; actual loop count is SIZE - 1 + + srl gr117,lr4,3 ; chuck count = (actual count) / 8 + cpeq gr118,gr117,0 + jmpt gr118,Ltail + mtsr fc,lr5 + + sub gr117,gr117,2 ; count for jmpfdec + +; Main loop working 8 limbs/iteration. +Loop: sub lr3,lr3,32 + mtsrim cr,(8-1) + loadm 0,0,gr100,lr3 + + extract gr109,gr119,gr107 + extract gr108,gr107,gr106 + extract gr107,gr106,gr105 + extract gr106,gr105,gr104 + extract gr105,gr104,gr103 + extract gr104,gr103,gr102 + extract gr103,gr102,gr101 + extract gr102,gr101,gr100 + + sub lr2,lr2,32 + mtsrim cr,(8-1) + storem 0,0,gr102,lr2 + jmpfdec gr117,Loop + or gr119,gr100,0 + +; Code for the last up-to-7 limbs. + + and lr4,lr4,(8-1) +Ltail: cpeq gr118,lr4,0 + jmpt gr118,Lend + sub lr4,lr4,2 ; count for jmpfdec + +Loop2: sub lr3,lr3,4 + load 0,0,gr116,lr3 + extract gr117,gr119,gr116 + sub lr2,lr2,4 + store 0,0,gr117,lr2 + jmpfdec lr4,Loop2 + or gr119,gr116,0 + +Lend: extract gr117,gr119,0 + sub lr2,lr2,4 + jmpi lr0 + store 0,0,gr117,lr2 diff --git a/rts/gmp/mpn/a29k/mul_1.s b/rts/gmp/mpn/a29k/mul_1.s new file mode 100644 index 0000000000..6bcf7ce0cf --- /dev/null +++ b/rts/gmp/mpn/a29k/mul_1.s @@ -0,0 +1,97 @@ +; 29000 __gmpn_mul_1 -- Multiply a limb vector with a single limb and +; store the product in a second limb vector. + +; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr lr2 +; s1_ptr lr3 +; size lr4 +; s2_limb lr5 + + .cputype 29050 + .sect .lit,lit + .text + .align 4 + .global ___gmpn_mul_1 + .word 0x60000 +___gmpn_mul_1: + sub lr4,lr4,8 + jmpt lr4,Ltail + const gr120,0 ; init cylimb reg + + srl gr117,lr4,3 ; divide by 8 + sub gr117,gr117,1 ; count for jmpfdec + +Loop: mtsrim cr,(8-1) + loadm 0,0,gr96,lr3 + add lr3,lr3,32 + + multiplu gr104,gr96,lr5 + multmu gr96,gr96,lr5 + multiplu gr105,gr97,lr5 + multmu gr97,gr97,lr5 + multiplu gr106,gr98,lr5 + multmu gr98,gr98,lr5 + multiplu gr107,gr99,lr5 + multmu gr99,gr99,lr5 + multiplu gr108,gr100,lr5 + multmu gr100,gr100,lr5 + multiplu gr109,gr101,lr5 + multmu gr101,gr101,lr5 + multiplu gr110,gr102,lr5 + multmu gr102,gr102,lr5 + multiplu gr111,gr103,lr5 + multmu gr103,gr103,lr5 + + add gr104,gr104,gr120 + addc gr105,gr105,gr96 + addc gr106,gr106,gr97 + addc gr107,gr107,gr98 + addc gr108,gr108,gr99 + addc gr109,gr109,gr100 + addc gr110,gr110,gr101 + addc gr111,gr111,gr102 + addc gr120,gr103,0 + + mtsrim cr,(8-1) + storem 0,0,gr104,lr2 + jmpfdec gr117,Loop + add lr2,lr2,32 + +Ltail: and lr4,lr4,(8-1) + sub gr118,lr4,1 ; count for CR + jmpt gr118,Lend + sub lr4,lr4,2 + sub lr2,lr2,4 ; offset res_ptr by one limb + +Loop2: load 0,0,gr116,lr3 + add lr3,lr3,4 + multiplu gr117,gr116,lr5 + multmu gr118,gr116,lr5 + add lr2,lr2,4 + add gr117,gr117,gr120 + store 0,0,gr117,lr2 + jmpfdec lr4,Loop2 + addc gr120,gr118,0 + +Lend: jmpi lr0 + or gr96,gr120,0 ; copy diff --git a/rts/gmp/mpn/a29k/rshift.s b/rts/gmp/mpn/a29k/rshift.s new file mode 100644 index 0000000000..ea163bff2b --- /dev/null +++ b/rts/gmp/mpn/a29k/rshift.s @@ -0,0 +1,89 @@ +; 29000 __gmpn_rshift -- + +; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr lr2 +; s1_ptr lr3 +; s2_ptr lr4 +; size lr5 + +; We use the loadm/storem instructions and operate on chunks of 8 +; limbs/per iteration, until less than 8 limbs remain. + + .sect .lit,lit + .text + .align 4 + .global ___gmpn_rshift + .word 0x60000 +___gmpn_rshift: + load 0,0,gr119,lr3 + add lr3,lr3,4 + + subr gr116,lr5,32 + sll gr96,gr119,gr116 ; return value + sub lr4,lr4,1 ; actual loop count is SIZE - 1 + + srl gr117,lr4,3 ; chuck count = (actual count) / 8 + cpeq gr118,gr117,0 + jmpt gr118,Ltail + mtsr fc,gr116 + + sub gr117,gr117,2 ; count for jmpfdec + +; Main loop working 8 limbs/iteration. +Loop: mtsrim cr,(8-1) + loadm 0,0,gr100,lr3 + add lr3,lr3,32 + + extract gr98,gr100,gr119 + extract gr99,gr101,gr100 + extract gr100,gr102,gr101 + extract gr101,gr103,gr102 + extract gr102,gr104,gr103 + extract gr103,gr105,gr104 + extract gr104,gr106,gr105 + extract gr105,gr107,gr106 + + mtsrim cr,(8-1) + storem 0,0,gr98,lr2 + add lr2,lr2,32 + jmpfdec gr117,Loop + or gr119,gr107,0 + +; Code for the last up-to-7 limbs. + + and lr4,lr4,(8-1) +Ltail: cpeq gr118,lr4,0 + jmpt gr118,Lend + sub lr4,lr4,2 ; count for jmpfdec + +Loop2: load 0,0,gr100,lr3 + add lr3,lr3,4 + extract gr117,gr100,gr119 + store 0,0,gr117,lr2 + add lr2,lr2,4 + jmpfdec lr4,Loop2 + or gr119,gr100,0 + +Lend: srl gr117,gr119,lr5 + jmpi lr0 + store 0,0,gr117,lr2 diff --git a/rts/gmp/mpn/a29k/sub_n.s b/rts/gmp/mpn/a29k/sub_n.s new file mode 100644 index 0000000000..c6b64c5bee --- /dev/null +++ b/rts/gmp/mpn/a29k/sub_n.s @@ -0,0 +1,120 @@ +; 29000 __gmpn_sub -- Subtract two limb vectors of the same length > 0 and +; store difference in a third limb vector. + +; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr lr2 +; s1_ptr lr3 +; s2_ptr lr4 +; size lr5 + +; We use the loadm/storem instructions and operate on chunks of 8 +; limbs/per iteration, until less than 8 limbs remain. + +; The 29k has no addition or subtraction instructions that doesn't +; affect carry, so we need to save and restore that as soon as we +; adjust the pointers. gr116 is used for this purpose. Note that +; gr116==0 means that carry should be set. + + .sect .lit,lit + .text + .align 4 + .global ___gmpn_sub_n + .word 0x60000 +___gmpn_sub_n: + srl gr117,lr5,3 + sub gr118,gr117,1 + jmpt gr118,Ltail + constn gr116,-1 ; init cy reg + sub gr117,gr117,2 ; count for jmpfdec + +; Main loop working 8 limbs/iteration. +Loop: mtsrim cr,(8-1) + loadm 0,0,gr96,lr3 + add lr3,lr3,32 + mtsrim cr,(8-1) + loadm 0,0,gr104,lr4 + add lr4,lr4,32 + + subr gr116,gr116,0 ; restore carry + subc gr96,gr96,gr104 + subc gr97,gr97,gr105 + subc gr98,gr98,gr106 + subc gr99,gr99,gr107 + subc gr100,gr100,gr108 + subc gr101,gr101,gr109 + subc gr102,gr102,gr110 + subc gr103,gr103,gr111 + subc gr116,gr116,gr116 ; gr116 = not(cy) + + mtsrim cr,(8-1) + storem 0,0,gr96,lr2 + jmpfdec gr117,Loop + add lr2,lr2,32 + +; Code for the last up-to-7 limbs. +; This code might look very strange, but it's hard to write it +; differently without major slowdown. + + and lr5,lr5,(8-1) +Ltail: sub gr118,lr5,1 ; count for CR + jmpt gr118,Lend + sub gr117,lr5,2 ; count for jmpfdec + + mtsr cr,gr118 + loadm 0,0,gr96,lr3 + mtsr cr,gr118 + loadm 0,0,gr104,lr4 + + subr gr116,gr116,0 ; restore carry + + jmpfdec gr117,L1 + subc gr96,gr96,gr104 + jmp Lstore + mtsr cr,gr118 +L1: jmpfdec gr117,L2 + subc gr97,gr97,gr105 + jmp Lstore + mtsr cr,gr118 +L2: jmpfdec gr117,L3 + subc gr98,gr98,gr106 + jmp Lstore + mtsr cr,gr118 +L3: jmpfdec gr117,L4 + subc gr99,gr99,gr107 + jmp Lstore + mtsr cr,gr118 +L4: jmpfdec gr117,L5 + subc gr100,gr100,gr108 + jmp Lstore + mtsr cr,gr118 +L5: jmpfdec gr117,L6 + subc gr101,gr101,gr109 + jmp Lstore + mtsr cr,gr118 +L6: subc gr102,gr102,gr110 + +Lstore: storem 0,0,gr96,lr2 + subc gr116,gr116,gr116 ; gr116 = not(cy) + +Lend: jmpi lr0 + add gr96,gr116,1 diff --git a/rts/gmp/mpn/a29k/submul_1.s b/rts/gmp/mpn/a29k/submul_1.s new file mode 100644 index 0000000000..ef97d8d4e5 --- /dev/null +++ b/rts/gmp/mpn/a29k/submul_1.s @@ -0,0 +1,116 @@ +; 29000 __gmpn_submul_1 -- Multiply a limb vector with a single limb and +; subtract the product from a second limb vector. + +; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr lr2 +; s1_ptr lr3 +; size lr4 +; s2_limb lr5 + + .cputype 29050 + .sect .lit,lit + .text + .align 4 + .global ___gmpn_submul_1 + .word 0x60000 +___gmpn_submul_1: + sub lr4,lr4,8 + jmpt lr4,Ltail + const gr120,0 ; init cylimb reg + + srl gr117,lr4,3 ; divide by 8 + sub gr117,gr117,1 ; count for jmpfdec + +Loop: mtsrim cr,(8-1) + loadm 0,0,gr96,lr3 + add lr3,lr3,32 + + multiplu gr104,gr96,lr5 + multmu gr96,gr96,lr5 + multiplu gr105,gr97,lr5 + multmu gr97,gr97,lr5 + multiplu gr106,gr98,lr5 + multmu gr98,gr98,lr5 + multiplu gr107,gr99,lr5 + multmu gr99,gr99,lr5 + multiplu gr108,gr100,lr5 + multmu gr100,gr100,lr5 + multiplu gr109,gr101,lr5 + multmu gr101,gr101,lr5 + multiplu gr110,gr102,lr5 + multmu gr102,gr102,lr5 + multiplu gr111,gr103,lr5 + multmu gr103,gr103,lr5 + + add gr104,gr104,gr120 + addc gr105,gr105,gr96 + addc gr106,gr106,gr97 + addc gr107,gr107,gr98 + addc gr108,gr108,gr99 + addc gr109,gr109,gr100 + addc gr110,gr110,gr101 + addc gr111,gr111,gr102 + addc gr120,gr103,0 + + mtsrim cr,(8-1) + loadm 0,0,gr96,lr2 + + sub gr96,gr96,gr104 + subc gr97,gr97,gr105 + subc gr98,gr98,gr106 + subc gr99,gr99,gr107 + subc gr100,gr100,gr108 + subc gr101,gr101,gr109 + subc gr102,gr102,gr110 + subc gr103,gr103,gr111 + + add gr104,gr103,gr111 ; invert carry from previus sub + addc gr120,gr120,0 + + mtsrim cr,(8-1) + storem 0,0,gr96,lr2 + jmpfdec gr117,Loop + add lr2,lr2,32 + +Ltail: and lr4,lr4,(8-1) + sub gr118,lr4,1 ; count for CR + jmpt gr118,Lend + sub lr4,lr4,2 + sub lr2,lr2,4 ; offset res_ptr by one limb + +Loop2: load 0,0,gr116,lr3 + add lr3,lr3,4 + multiplu gr117,gr116,lr5 + multmu gr118,gr116,lr5 + add lr2,lr2,4 + load 0,0,gr119,lr2 + add gr117,gr117,gr120 + addc gr118,gr118,0 + sub gr119,gr119,gr117 + add gr104,gr119,gr117 ; invert carry from previus sub + store 0,0,gr119,lr2 + jmpfdec lr4,Loop2 + addc gr120,gr118,0 + +Lend: jmpi lr0 + or gr96,gr120,0 ; copy diff --git a/rts/gmp/mpn/a29k/udiv.s b/rts/gmp/mpn/a29k/udiv.s new file mode 100644 index 0000000000..fdd53a9a88 --- /dev/null +++ b/rts/gmp/mpn/a29k/udiv.s @@ -0,0 +1,30 @@ +; Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + .sect .lit,lit + .text + .align 4 + .global ___udiv_qrnnd + .word 0x60000 +___udiv_qrnnd: + mtsr q,lr3 + dividu gr96,lr4,lr5 + mfsr gr116,q + jmpi lr0 + store 0,0,gr116,lr2 diff --git a/rts/gmp/mpn/a29k/umul.s b/rts/gmp/mpn/a29k/umul.s new file mode 100644 index 0000000000..7741981167 --- /dev/null +++ b/rts/gmp/mpn/a29k/umul.s @@ -0,0 +1,29 @@ +; Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + .sect .lit,lit + .text + .align 4 + .global ___umul_ppmm + .word 0x50000 +___umul_ppmm: + multiplu gr116,lr3,lr4 + multmu gr96,lr3,lr4 + jmpi lr0 + store 0,0,gr116,lr2 diff --git a/rts/gmp/mpn/alpha/README b/rts/gmp/mpn/alpha/README new file mode 100644 index 0000000000..744260c7c5 --- /dev/null +++ b/rts/gmp/mpn/alpha/README @@ -0,0 +1,224 @@ +This directory contains mpn functions optimized for DEC Alpha processors. + +ALPHA ASSEMBLY RULES AND REGULATIONS + +The `.prologue N' pseudo op marks the end of instruction that needs +special handling by unwinding. It also says whether $27 is really +needed for computing the gp. The `.mask M' pseudo op says which +registers are saved on the stack, and at what offset in the frame. + +Cray code is very very different... + + +RELEVANT OPTIMIZATION ISSUES + +EV4 + +1. This chip has very limited store bandwidth. The on-chip L1 cache is + write-through, and a cache line is transfered from the store buffer to + the off-chip L2 in as much 15 cycles on most systems. This delay hurts + mpn_add_n, mpn_sub_n, mpn_lshift, and mpn_rshift. + +2. Pairing is possible between memory instructions and integer arithmetic + instructions. + +3. mulq and umulh are documented to have a latency of 23 cycles, but 2 of + these cycles are pipelined. Thus, multiply instructions can be issued at + a rate of one each 21st cycle. + +EV5 + +1. The memory bandwidth of this chip seems excellent, both for loads and + stores. Even when the working set is larger than the on-chip L1 and L2 + caches, the performance remain almost unaffected. + +2. mulq has a latency of 12 cycles and an issue rate of 1 each 8th cycle. + umulh has a measured latency of 14 cycles and an issue rate of 1 each + 10th cycle. But the exact timing is somewhat confusing. + +3. mpn_add_n. With 4-fold unrolling, we need 37 instructions, whereof 12 + are memory operations. This will take at least + ceil(37/2) [dual issue] + 1 [taken branch] = 19 cycles + We have 12 memory cycles, plus 4 after-store conflict cycles, or 16 data + cache cycles, which should be completely hidden in the 19 issue cycles. + The computation is inherently serial, with these dependencies: + + ldq ldq + \ /\ + (or) addq | + |\ / \ | + | addq cmpult + \ | | + cmpult | + \ / + or + + I.e., 3 operations are needed between carry-in and carry-out, making 12 + cycles the absolute minimum for the 4 limbs. We could replace the `or' + with a cmoveq/cmovne, which could issue one cycle earlier that the `or', + but that might waste a cycle on EV4. The total depth remain unaffected, + since cmov has a latency of 2 cycles. + + addq + / \ + addq cmpult + | \ + cmpult -> cmovne + +Montgomery has a slightly different way of computing carry that requires one +less instruction, but has depth 4 (instead of the current 3). Since the +code is currently instruction issue bound, Montgomery's idea should save us +1/2 cycle per limb, or bring us down to a total of 17 cycles or 4.25 +cycles/limb. Unfortunately, this method will not be good for the EV6. + +EV6 + +Here we have a really parallel pipeline, capable of issuing up to 4 integer +instructions per cycle. One integer multiply instruction can issue each +cycle. To get optimal speed, we need to pretend we are vectorizing the code, +i.e., minimize the iterative dependencies. + +There are two dependencies to watch out for. 1) Address arithmetic +dependencies, and 2) carry propagation dependencies. + +We can avoid serializing due to address arithmetic by unrolling the loop, so +that addresses don't depend heavily on an index variable. Avoiding +serializing because of carry propagation is trickier; the ultimate performance +of the code will be determined of the number of latency cycles it takes from +accepting carry-in to a vector point until we can generate carry-out. + +Most integer instructions can execute in either the L0, U0, L1, or U1 +pipelines. Shifts only execute in U0 and U1, and multiply only in U1. + +CMOV instructions split into two internal instructions, CMOV1 and CMOV2, but +the execute efficiently. But CMOV split the mapping process (see pg 2-26 in +cmpwrgd.pdf), suggesting the CMOV should always be placed as the last +instruction of an aligned 4 instruction block (?). + +Perhaps the most important issue is the latency between the L0/U0 and L1/U1 +clusters; a result obtained on either cluster has an extra cycle of latency +for consumers in the opposite cluster. Because of the dynamic nature of the +implementation, it is hard to predict where an instruction will execute. + +The shift loops need (per limb): + 1 load (Lx pipes) + 1 store (Lx pipes) + 2 shift (Ux pipes) + 1 iaddlog (Lx pipes, Ux pipes) +Obviously, since the pipes are very equally loaded, we should get 4 insn/cycle, or 1.25 cycles/limb. + +For mpn_add_n, we currently have + 2 load (Lx pipes) + 1 store (Lx pipes) + 5 iaddlog (Lx pipes, Ux pipes) + +Again, we have a perfect balance and will be limited by carry propagation +delays, currently three cycles. The superoptimizer indicates that ther +might be sequences that--using a final cmov--have a carry propagation delay +of just two. Montgomery's subtraction sequence could perhaps be used, by +complementing some operands. All in all, we should get down to 2 cycles +without much problems. + +For mpn_mul_1, we could do, just like for mpn_add_n: + not newlo,notnewlo + addq cylimb,newlo,newlo || cmpult cylimb,notnewlo,cyout + addq cyout,newhi,cylimb +and get 2-cycle carry propagation. The instructions needed will be + 1 ld (Lx pipes) + 1 st (Lx pipes) + 2 mul (U1 pipe) + 4 iaddlog (Lx pipes, Ux pipes) +issue1: addq not mul ld +issue2: cmpult addq mul st +Conclusion: no cluster delays and 2-cycle carry delays will give us 2 cycles/limb! + +Last, we have mpn_addmul_1. Almost certainly, we will get down to 3 +cycles/limb, which would be absolutely awesome. + +Old, perhaps obsolete addmul_1 dependency diagram (needs 175 columns wide screen): + + i + s + s i + u n + e s + d t + r + i u +l n c +i s t +v t i +e r o + u n +v c +a t t +l i y +u o p +e n e +s s s + issue + in + cycle + -1 ldq + / \ + 0 | \ + | \ + 1 | | + | | + 2 | | ldq + | | / \ + 3 | mulq | \ + | \ | \ + 4 umulh \ | | + | | | | + 5 | | | | ldq + | | | | / \ + 4calm 6 | | ldq | mulq | \ + | | / | \ | \ + 4casm 7 | | / umulh \ | | +6 | || | | | | + 3aal 8 | || | | | | ldq +7 | || | | | | / \ + 4calm 9 | || | | ldq | mulq | \ +9 | || | | / | \ | \ + 4casm 10 | || | | / umulh \ | | +9 | || | || | | | | + 3aal 11 | addq | || | | | | ldq +9 | // \ | || | | | | / \ + 4calm 12 \ cmpult addq<-cy | || | | ldq | mulq | \ +13 \ / // \ | || | | / | \ | \ + 4casm 13 addq cmpult stq | || | | / umulh \ | | +11 \ / | || | || | | | | + 3aal 14 addq | addq | || | | | | ldq +10 \ | // \ | || | | | | / \ + 4calm 15 cy ----> \ cmpult addq<-cy | || | | ldq | mulq | \ +13 \ / // \ | || | | / | \ | \ + 4casm 16 addq cmpult stq | || | | / umulh \ | | +11 \ / | || | || | | | | + 3aal 17 addq | addq | || | | | | +10 \ | // \ | || | | | | + 4calm 18 cy ----> \ cmpult addq<-cy | || | | ldq | mulq +13 \ / // \ | || | | / | \ + 4casm 19 addq cmpult stq | || | | / umulh \ +11 \ / | || | || | | + 3aal 20 addq | addq | || | | +10 \ | // \ | || | | + 4calm 21 cy ----> \ cmpult addq<-cy | || | | ldq + \ / // \ | || | | / + 22 addq cmpult stq | || | | / + \ / | || | || + 23 addq | addq | || + \ | // \ | || + 24 cy ----> \ cmpult addq<-cy | || + \ / // \ | || + 25 addq cmpult stq | || + \ / | || + 26 addq | addq + \ | // \ + 27 cy ----> \ cmpult addq<-cy + \ / // \ + 28 addq cmpult stq + \ / +As many as 6 consecutive points will be under execution simultaneously, or if we addq +schedule loads even further away, maybe 7 or 8. But the number of live quantities \ +is reasonable, and can easily be satisfied. cy ----> diff --git a/rts/gmp/mpn/alpha/add_n.asm b/rts/gmp/mpn/alpha/add_n.asm new file mode 100644 index 0000000000..08d6a9f7b8 --- /dev/null +++ b/rts/gmp/mpn/alpha/add_n.asm @@ -0,0 +1,114 @@ +dnl Alpha mpn_add_n -- Add two limb vectors of the same length > 0 and +dnl store sum in a third limb vector. + +dnl Copyright (C) 1995, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published by +dnl the Free Software Foundation; either version 2.1 of the License, or (at your +dnl option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + +include(`../config.m4') + +dnl INPUT PARAMETERS +dnl res_ptr r16 +dnl s1_ptr r17 +dnl s2_ptr r18 +dnl size r19 + +ASM_START() +PROLOGUE(mpn_add_n) + ldq r3,0(r17) + ldq r4,0(r18) + + subq r19,1,r19 + and r19,4-1,r2 C number of limbs in first loop + bis r31,r31,r0 + beq r2,$L0 C if multiple of 4 limbs, skip first loop + + subq r19,r2,r19 + +$Loop0: subq r2,1,r2 + ldq r5,8(r17) + addq r4,r0,r4 + ldq r6,8(r18) + cmpult r4,r0,r1 + addq r3,r4,r4 + cmpult r4,r3,r0 + stq r4,0(r16) + bis r0,r1,r0 + + addq r17,8,r17 + addq r18,8,r18 + bis r5,r5,r3 + bis r6,r6,r4 + addq r16,8,r16 + bne r2,$Loop0 + +$L0: beq r19,$Lend + + ALIGN(8) +$Loop: subq r19,4,r19 + + ldq r5,8(r17) + addq r4,r0,r4 + ldq r6,8(r18) + cmpult r4,r0,r1 + addq r3,r4,r4 + cmpult r4,r3,r0 + stq r4,0(r16) + bis r0,r1,r0 + + ldq r3,16(r17) + addq r6,r0,r6 + ldq r4,16(r18) + cmpult r6,r0,r1 + addq r5,r6,r6 + cmpult r6,r5,r0 + stq r6,8(r16) + bis r0,r1,r0 + + ldq r5,24(r17) + addq r4,r0,r4 + ldq r6,24(r18) + cmpult r4,r0,r1 + addq r3,r4,r4 + cmpult r4,r3,r0 + stq r4,16(r16) + bis r0,r1,r0 + + ldq r3,32(r17) + addq r6,r0,r6 + ldq r4,32(r18) + cmpult r6,r0,r1 + addq r5,r6,r6 + cmpult r6,r5,r0 + stq r6,24(r16) + bis r0,r1,r0 + + addq r17,32,r17 + addq r18,32,r18 + addq r16,32,r16 + bne r19,$Loop + +$Lend: addq r4,r0,r4 + cmpult r4,r0,r1 + addq r3,r4,r4 + cmpult r4,r3,r0 + stq r4,0(r16) + bis r0,r1,r0 + ret r31,(r26),1 +EPILOGUE(mpn_add_n) +ASM_END() diff --git a/rts/gmp/mpn/alpha/addmul_1.asm b/rts/gmp/mpn/alpha/addmul_1.asm new file mode 100644 index 0000000000..4ea900be6b --- /dev/null +++ b/rts/gmp/mpn/alpha/addmul_1.asm @@ -0,0 +1,87 @@ +dnl Alpha __gmpn_addmul_1 -- Multiply a limb vector with a limb and add +dnl the result to a second limb vector. + +dnl Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + +include(`../config.m4') + +dnl INPUT PARAMETERS +dnl res_ptr r16 +dnl s1_ptr r17 +dnl size r18 +dnl s2_limb r19 + +dnl This code runs at 42 cycles/limb on EV4, 18 cycles/limb on EV5, and 7 +dnl cycles/limb on EV6. + +ASM_START() +PROLOGUE(mpn_addmul_1) + ldq r2,0(r17) C r2 = s1_limb + addq r17,8,r17 C s1_ptr++ + subq r18,1,r18 C size-- + mulq r2,r19,r3 C r3 = prod_low + ldq r5,0(r16) C r5 = *res_ptr + umulh r2,r19,r0 C r0 = prod_high + beq r18,$Lend1 C jump if size was == 1 + ldq r2,0(r17) C r2 = s1_limb + addq r17,8,r17 C s1_ptr++ + subq r18,1,r18 C size-- + addq r5,r3,r3 + cmpult r3,r5,r4 + stq r3,0(r16) + addq r16,8,r16 C res_ptr++ + beq r18,$Lend2 C jump if size was == 2 + + ALIGN(8) +$Loop: mulq r2,r19,r3 C r3 = prod_low + ldq r5,0(r16) C r5 = *res_ptr + addq r4,r0,r0 C cy_limb = cy_limb + 'cy' + subq r18,1,r18 C size-- + umulh r2,r19,r4 C r4 = cy_limb + ldq r2,0(r17) C r2 = s1_limb + addq r17,8,r17 C s1_ptr++ + addq r3,r0,r3 C r3 = cy_limb + prod_low + cmpult r3,r0,r0 C r0 = carry from (cy_limb + prod_low) + addq r5,r3,r3 + cmpult r3,r5,r5 + stq r3,0(r16) + addq r16,8,r16 C res_ptr++ + addq r5,r0,r0 C combine carries + bne r18,$Loop + +$Lend2: mulq r2,r19,r3 C r3 = prod_low + ldq r5,0(r16) C r5 = *res_ptr + addq r4,r0,r0 C cy_limb = cy_limb + 'cy' + umulh r2,r19,r4 C r4 = cy_limb + addq r3,r0,r3 C r3 = cy_limb + prod_low + cmpult r3,r0,r0 C r0 = carry from (cy_limb + prod_low) + addq r5,r3,r3 + cmpult r3,r5,r5 + stq r3,0(r16) + addq r5,r0,r0 C combine carries + addq r4,r0,r0 C cy_limb = prod_high + cy + ret r31,(r26),1 +$Lend1: addq r5,r3,r3 + cmpult r3,r5,r5 + stq r3,0(r16) + addq r0,r5,r0 + ret r31,(r26),1 +EPILOGUE(mpn_addmul_1) +ASM_END() diff --git a/rts/gmp/mpn/alpha/cntlz.asm b/rts/gmp/mpn/alpha/cntlz.asm new file mode 100644 index 0000000000..febb3b70d9 --- /dev/null +++ b/rts/gmp/mpn/alpha/cntlz.asm @@ -0,0 +1,68 @@ +dnl Alpha auxiliary for longlong.h's count_leading_zeros + +dnl Copyright (C) 1997, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published by +dnl the Free Software Foundation; either version 2.1 of the License, or (at your +dnl option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + +include(`../config.m4') + +dnl DISCUSSION: + +dnl Other methods have been tried, and using a 128-entry table actually trims +dnl about 10% of the execution time (on a 21164) when the table is in the L1 +dnl cache. But under non-benchmarking conditions, the table will hardly be in +dnl the L1 cache. Tricky bit-fiddling methods with multiplies and magic tables +dnl are also possible, but they require many more instructions than the current +dnl code. (But for count_trailing_zeros, such tricks are beneficial.) +dnl Finally, converting to floating-point and extracting the exponent is much +dnl slower. + +ASM_START() +PROLOGUE(MPN(count_leading_zeros)) + bis r31,63,r0 C initialize partial result count + + srl r16,32,r1 C shift down 32 steps -> r1 + cmovne r1,r1,r16 C select r1 if non-zero + cmovne r1,31,r0 C if r1 is nonzero choose smaller count + + srl r16,16,r1 C shift down 16 steps -> r1 + subq r0,16,r2 C generate new partial result count + cmovne r1,r1,r16 C choose new r1 if non-zero + cmovne r1,r2,r0 C choose new count if r1 was non-zero + + srl r16,8,r1 + subq r0,8,r2 + cmovne r1,r1,r16 + cmovne r1,r2,r0 + + srl r16,4,r1 + subq r0,4,r2 + cmovne r1,r1,r16 + cmovne r1,r2,r0 + + srl r16,2,r1 + subq r0,2,r2 + cmovne r1,r1,r16 + cmovne r1,r2,r0 + + srl r16,1,r1 C extract bit 1 + subq r0,r1,r0 C subtract it from partial result + + ret r31,(r26),1 +EPILOGUE(MPN(count_leading_zeros)) +ASM_END() diff --git a/rts/gmp/mpn/alpha/default.m4 b/rts/gmp/mpn/alpha/default.m4 new file mode 100644 index 0000000000..5f4c48dc73 --- /dev/null +++ b/rts/gmp/mpn/alpha/default.m4 @@ -0,0 +1,77 @@ +divert(-1) + + +dnl Copyright (C) 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +define(`ASM_START', + ` + .set noreorder + .set noat') + +define(`X',`0x$1') +define(`FLOAT64', + ` + .align 3 +$1: .t_floating $2') + +define(`PROLOGUE', + ` + .text + .align 3 + .globl $1 + .ent $1 +$1: + .frame r30,0,r26 + .prologue 0') + +define(`PROLOGUE_GP', + ` + .text + .align 3 + .globl $1 + .ent $1 +$1: + ldgp r29,0(r27) + .frame r30,0,r26 + .prologue 1') + +define(`EPILOGUE', + ` + .end $1') + +dnl Map register names r0, r1, etc, to `$0', `$1', etc. +dnl This is needed on all systems but Unicos +forloop(i,0,31, +`define(`r'i,``$''i)' +) +forloop(i,0,31, +`define(`f'i,``$f''i)' +) + +define(`DATASTART', + `dnl + DATA +$1:') +define(`DATAEND',`dnl') + +define(`ASM_END',`dnl') + +divert diff --git a/rts/gmp/mpn/alpha/ev5/add_n.asm b/rts/gmp/mpn/alpha/ev5/add_n.asm new file mode 100644 index 0000000000..716d6404ae --- /dev/null +++ b/rts/gmp/mpn/alpha/ev5/add_n.asm @@ -0,0 +1,143 @@ +dnl Alpha EV5 __gmpn_add_n -- Add two limb vectors of the same length > 0 and +dnl store sum in a third limb vector. + +dnl Copyright (C) 1995, 1999, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published by +dnl the Free Software Foundation; either version 2.1 of the License, or (at your +dnl option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + +include(`../config.m4') + +dnl INPUT PARAMETERS +dnl res_ptr r16 +dnl s1_ptr r17 +dnl s2_ptr r18 +dnl size r19 + +ASM_START() +PROLOGUE(mpn_add_n) + bis r31,r31,r25 C clear cy + subq r19,4,r19 C decr loop cnt + blt r19,$Lend2 C if less than 4 limbs, goto 2nd loop +C Start software pipeline for 1st loop + ldq r0,0(r18) + ldq r4,0(r17) + ldq r1,8(r18) + ldq r5,8(r17) + addq r17,32,r17 C update s1_ptr + ldq r2,16(r18) + addq r0,r4,r20 C 1st main add + ldq r3,24(r18) + subq r19,4,r19 C decr loop cnt + ldq r6,-16(r17) + cmpult r20,r0,r25 C compute cy from last add + ldq r7,-8(r17) + addq r1,r5,r28 C 2nd main add + addq r18,32,r18 C update s2_ptr + addq r28,r25,r21 C 2nd carry add + cmpult r28,r5,r8 C compute cy from last add + blt r19,$Lend1 C if less than 4 limbs remain, jump +C 1st loop handles groups of 4 limbs in a software pipeline + ALIGN(16) +$Loop: cmpult r21,r28,r25 C compute cy from last add + ldq r0,0(r18) + bis r8,r25,r25 C combine cy from the two adds + ldq r1,8(r18) + addq r2,r6,r28 C 3rd main add + ldq r4,0(r17) + addq r28,r25,r22 C 3rd carry add + ldq r5,8(r17) + cmpult r28,r6,r8 C compute cy from last add + cmpult r22,r28,r25 C compute cy from last add + stq r20,0(r16) + bis r8,r25,r25 C combine cy from the two adds + stq r21,8(r16) + addq r3,r7,r28 C 4th main add + addq r28,r25,r23 C 4th carry add + cmpult r28,r7,r8 C compute cy from last add + cmpult r23,r28,r25 C compute cy from last add + addq r17,32,r17 C update s1_ptr + bis r8,r25,r25 C combine cy from the two adds + addq r16,32,r16 C update res_ptr + addq r0,r4,r28 C 1st main add + ldq r2,16(r18) + addq r25,r28,r20 C 1st carry add + ldq r3,24(r18) + cmpult r28,r4,r8 C compute cy from last add + ldq r6,-16(r17) + cmpult r20,r28,r25 C compute cy from last add + ldq r7,-8(r17) + bis r8,r25,r25 C combine cy from the two adds + subq r19,4,r19 C decr loop cnt + stq r22,-16(r16) + addq r1,r5,r28 C 2nd main add + stq r23,-8(r16) + addq r25,r28,r21 C 2nd carry add + addq r18,32,r18 C update s2_ptr + cmpult r28,r5,r8 C compute cy from last add + bge r19,$Loop +C Finish software pipeline for 1st loop +$Lend1: cmpult r21,r28,r25 C compute cy from last add + bis r8,r25,r25 C combine cy from the two adds + addq r2,r6,r28 C 3rd main add + addq r28,r25,r22 C 3rd carry add + cmpult r28,r6,r8 C compute cy from last add + cmpult r22,r28,r25 C compute cy from last add + stq r20,0(r16) + bis r8,r25,r25 C combine cy from the two adds + stq r21,8(r16) + addq r3,r7,r28 C 4th main add + addq r28,r25,r23 C 4th carry add + cmpult r28,r7,r8 C compute cy from last add + cmpult r23,r28,r25 C compute cy from last add + bis r8,r25,r25 C combine cy from the two adds + addq r16,32,r16 C update res_ptr + stq r22,-16(r16) + stq r23,-8(r16) +$Lend2: addq r19,4,r19 C restore loop cnt + beq r19,$Lret +C Start software pipeline for 2nd loop + ldq r0,0(r18) + ldq r4,0(r17) + subq r19,1,r19 + beq r19,$Lend0 +C 2nd loop handles remaining 1-3 limbs + ALIGN(16) +$Loop0: addq r0,r4,r28 C main add + ldq r0,8(r18) + cmpult r28,r4,r8 C compute cy from last add + ldq r4,8(r17) + addq r28,r25,r20 C carry add + addq r18,8,r18 + addq r17,8,r17 + stq r20,0(r16) + cmpult r20,r28,r25 C compute cy from last add + subq r19,1,r19 C decr loop cnt + bis r8,r25,r25 C combine cy from the two adds + addq r16,8,r16 + bne r19,$Loop0 +$Lend0: addq r0,r4,r28 C main add + addq r28,r25,r20 C carry add + cmpult r28,r4,r8 C compute cy from last add + cmpult r20,r28,r25 C compute cy from last add + stq r20,0(r16) + bis r8,r25,r25 C combine cy from the two adds + +$Lret: bis r25,r31,r0 C return cy + ret r31,(r26),1 +EPILOGUE(mpn_add_n) +ASM_END() diff --git a/rts/gmp/mpn/alpha/ev5/lshift.asm b/rts/gmp/mpn/alpha/ev5/lshift.asm new file mode 100644 index 0000000000..cb181dda66 --- /dev/null +++ b/rts/gmp/mpn/alpha/ev5/lshift.asm @@ -0,0 +1,169 @@ +dnl Alpha EV5 __gmpn_lshift -- Shift a number left. + +dnl Copyright (C) 1994, 1995, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published by +dnl the Free Software Foundation; either version 2.1 of the License, or (at your +dnl option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + +include(`../config.m4') + +dnl INPUT PARAMETERS +dnl res_ptr r16 +dnl s1_ptr r17 +dnl size r18 +dnl cnt r19 + +dnl This code runs at 3.25 cycles/limb on the EV5. + +ASM_START() +PROLOGUE(mpn_lshift) + s8addq r18,r17,r17 C make r17 point at end of s1 + ldq r4,-8(r17) C load first limb + subq r31,r19,r20 + s8addq r18,r16,r16 C make r16 point at end of RES + subq r18,1,r18 + and r18,4-1,r28 C number of limbs in first loop + srl r4,r20,r0 C compute function result + + beq r28,$L0 + subq r18,r28,r18 + + ALIGN(8) +$Loop0: ldq r3,-16(r17) + subq r16,8,r16 + sll r4,r19,r5 + subq r17,8,r17 + subq r28,1,r28 + srl r3,r20,r6 + bis r3,r3,r4 + bis r5,r6,r8 + stq r8,0(r16) + bne r28,$Loop0 + +$L0: sll r4,r19,r24 + beq r18,$Lend +C warm up phase 1 + ldq r1,-16(r17) + subq r18,4,r18 + ldq r2,-24(r17) + ldq r3,-32(r17) + ldq r4,-40(r17) + beq r18,$Lend1 +C warm up phase 2 + srl r1,r20,r7 + sll r1,r19,r21 + srl r2,r20,r8 + ldq r1,-48(r17) + sll r2,r19,r22 + ldq r2,-56(r17) + srl r3,r20,r5 + bis r7,r24,r7 + sll r3,r19,r23 + bis r8,r21,r8 + srl r4,r20,r6 + ldq r3,-64(r17) + sll r4,r19,r24 + ldq r4,-72(r17) + subq r18,4,r18 + beq r18,$Lend2 + ALIGN(16) +C main loop +$Loop: stq r7,-8(r16) + bis r5,r22,r5 + stq r8,-16(r16) + bis r6,r23,r6 + + srl r1,r20,r7 + subq r18,4,r18 + sll r1,r19,r21 + unop C ldq r31,-96(r17) + + srl r2,r20,r8 + ldq r1,-80(r17) + sll r2,r19,r22 + ldq r2,-88(r17) + + stq r5,-24(r16) + bis r7,r24,r7 + stq r6,-32(r16) + bis r8,r21,r8 + + srl r3,r20,r5 + unop C ldq r31,-96(r17) + sll r3,r19,r23 + subq r16,32,r16 + + srl r4,r20,r6 + ldq r3,-96(r17) + sll r4,r19,r24 + ldq r4,-104(r17) + + subq r17,32,r17 + bne r18,$Loop +C cool down phase 2/1 +$Lend2: stq r7,-8(r16) + bis r5,r22,r5 + stq r8,-16(r16) + bis r6,r23,r6 + srl r1,r20,r7 + sll r1,r19,r21 + srl r2,r20,r8 + sll r2,r19,r22 + stq r5,-24(r16) + bis r7,r24,r7 + stq r6,-32(r16) + bis r8,r21,r8 + srl r3,r20,r5 + sll r3,r19,r23 + srl r4,r20,r6 + sll r4,r19,r24 +C cool down phase 2/2 + stq r7,-40(r16) + bis r5,r22,r5 + stq r8,-48(r16) + bis r6,r23,r6 + stq r5,-56(r16) + stq r6,-64(r16) +C cool down phase 2/3 + stq r24,-72(r16) + ret r31,(r26),1 + +C cool down phase 1/1 +$Lend1: srl r1,r20,r7 + sll r1,r19,r21 + srl r2,r20,r8 + sll r2,r19,r22 + srl r3,r20,r5 + bis r7,r24,r7 + sll r3,r19,r23 + bis r8,r21,r8 + srl r4,r20,r6 + sll r4,r19,r24 +C cool down phase 1/2 + stq r7,-8(r16) + bis r5,r22,r5 + stq r8,-16(r16) + bis r6,r23,r6 + stq r5,-24(r16) + stq r6,-32(r16) + stq r24,-40(r16) + ret r31,(r26),1 + +$Lend: stq r24,-8(r16) + ret r31,(r26),1 +EPILOGUE(mpn_lshift) +ASM_END() diff --git a/rts/gmp/mpn/alpha/ev5/rshift.asm b/rts/gmp/mpn/alpha/ev5/rshift.asm new file mode 100644 index 0000000000..9940d83fad --- /dev/null +++ b/rts/gmp/mpn/alpha/ev5/rshift.asm @@ -0,0 +1,167 @@ +dnl Alpha EV5 __gmpn_rshift -- Shift a number right. + +dnl Copyright (C) 1994, 1995, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published by +dnl the Free Software Foundation; either version 2.1 of the License, or (at your +dnl option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + +include(`../config.m4') + +dnl INPUT PARAMETERS +dnl res_ptr r16 +dnl s1_ptr r17 +dnl size r18 +dnl cnt r19 + +dnl This code runs at 3.25 cycles/limb on the EV5. + +ASM_START() +PROLOGUE(mpn_rshift) + ldq r4,0(r17) C load first limb + subq r31,r19,r20 + subq r18,1,r18 + and r18,4-1,r28 C number of limbs in first loop + sll r4,r20,r0 C compute function result + + beq r28,$L0 + subq r18,r28,r18 + + ALIGN(8) +$Loop0: ldq r3,8(r17) + addq r16,8,r16 + srl r4,r19,r5 + addq r17,8,r17 + subq r28,1,r28 + sll r3,r20,r6 + bis r3,r3,r4 + bis r5,r6,r8 + stq r8,-8(r16) + bne r28,$Loop0 + +$L0: srl r4,r19,r24 + beq r18,$Lend +C warm up phase 1 + ldq r1,8(r17) + subq r18,4,r18 + ldq r2,16(r17) + ldq r3,24(r17) + ldq r4,32(r17) + beq r18,$Lend1 +C warm up phase 2 + sll r1,r20,r7 + srl r1,r19,r21 + sll r2,r20,r8 + ldq r1,40(r17) + srl r2,r19,r22 + ldq r2,48(r17) + sll r3,r20,r5 + bis r7,r24,r7 + srl r3,r19,r23 + bis r8,r21,r8 + sll r4,r20,r6 + ldq r3,56(r17) + srl r4,r19,r24 + ldq r4,64(r17) + subq r18,4,r18 + beq r18,$Lend2 + ALIGN(16) +C main loop +$Loop: stq r7,0(r16) + bis r5,r22,r5 + stq r8,8(r16) + bis r6,r23,r6 + + sll r1,r20,r7 + subq r18,4,r18 + srl r1,r19,r21 + unop C ldq r31,-96(r17) + + sll r2,r20,r8 + ldq r1,72(r17) + srl r2,r19,r22 + ldq r2,80(r17) + + stq r5,16(r16) + bis r7,r24,r7 + stq r6,24(r16) + bis r8,r21,r8 + + sll r3,r20,r5 + unop C ldq r31,-96(r17) + srl r3,r19,r23 + addq r16,32,r16 + + sll r4,r20,r6 + ldq r3,88(r17) + srl r4,r19,r24 + ldq r4,96(r17) + + addq r17,32,r17 + bne r18,$Loop +C cool down phase 2/1 +$Lend2: stq r7,0(r16) + bis r5,r22,r5 + stq r8,8(r16) + bis r6,r23,r6 + sll r1,r20,r7 + srl r1,r19,r21 + sll r2,r20,r8 + srl r2,r19,r22 + stq r5,16(r16) + bis r7,r24,r7 + stq r6,24(r16) + bis r8,r21,r8 + sll r3,r20,r5 + srl r3,r19,r23 + sll r4,r20,r6 + srl r4,r19,r24 +C cool down phase 2/2 + stq r7,32(r16) + bis r5,r22,r5 + stq r8,40(r16) + bis r6,r23,r6 + stq r5,48(r16) + stq r6,56(r16) +C cool down phase 2/3 + stq r24,64(r16) + ret r31,(r26),1 + +C cool down phase 1/1 +$Lend1: sll r1,r20,r7 + srl r1,r19,r21 + sll r2,r20,r8 + srl r2,r19,r22 + sll r3,r20,r5 + bis r7,r24,r7 + srl r3,r19,r23 + bis r8,r21,r8 + sll r4,r20,r6 + srl r4,r19,r24 +C cool down phase 1/2 + stq r7,0(r16) + bis r5,r22,r5 + stq r8,8(r16) + bis r6,r23,r6 + stq r5,16(r16) + stq r6,24(r16) + stq r24,32(r16) + ret r31,(r26),1 + +$Lend: stq r24,0(r16) + ret r31,(r26),1 +EPILOGUE(mpn_rshift) +ASM_END() diff --git a/rts/gmp/mpn/alpha/ev5/sub_n.asm b/rts/gmp/mpn/alpha/ev5/sub_n.asm new file mode 100644 index 0000000000..5248a2aa38 --- /dev/null +++ b/rts/gmp/mpn/alpha/ev5/sub_n.asm @@ -0,0 +1,143 @@ +dnl Alpha EV5 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 +dnl and store difference in a third limb vector. + +dnl Copyright (C) 1995, 1999, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published by +dnl the Free Software Foundation; either version 2.1 of the License, or (at your +dnl option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + +include(`../config.m4') + +dnl INPUT PARAMETERS +dnl res_ptr r16 +dnl s1_ptr r17 +dnl s2_ptr r18 +dnl size r19 + +ASM_START() +PROLOGUE(mpn_sub_n) + bis r31,r31,r25 C clear cy + subq r19,4,r19 C decr loop cnt + blt r19,$Lend2 C if less than 4 limbs, goto 2nd loop +C Start software pipeline for 1st loop + ldq r0,0(r18) + ldq r4,0(r17) + ldq r1,8(r18) + ldq r5,8(r17) + addq r17,32,r17 C update s1_ptr + ldq r2,16(r18) + subq r4,r0,r20 C 1st main subtract + ldq r3,24(r18) + subq r19,4,r19 C decr loop cnt + ldq r6,-16(r17) + cmpult r4,r0,r25 C compute cy from last subtract + ldq r7,-8(r17) + subq r5,r1,r28 C 2nd main subtract + addq r18,32,r18 C update s2_ptr + subq r28,r25,r21 C 2nd carry subtract + cmpult r5,r1,r8 C compute cy from last subtract + blt r19,$Lend1 C if less than 4 limbs remain, jump +C 1st loop handles groups of 4 limbs in a software pipeline + ALIGN(16) +$Loop: cmpult r28,r25,r25 C compute cy from last subtract + ldq r0,0(r18) + bis r8,r25,r25 C combine cy from the two subtracts + ldq r1,8(r18) + subq r6,r2,r28 C 3rd main subtract + ldq r4,0(r17) + subq r28,r25,r22 C 3rd carry subtract + ldq r5,8(r17) + cmpult r6,r2,r8 C compute cy from last subtract + cmpult r28,r25,r25 C compute cy from last subtract + stq r20,0(r16) + bis r8,r25,r25 C combine cy from the two subtracts + stq r21,8(r16) + subq r7,r3,r28 C 4th main subtract + subq r28,r25,r23 C 4th carry subtract + cmpult r7,r3,r8 C compute cy from last subtract + cmpult r28,r25,r25 C compute cy from last subtract + addq r17,32,r17 C update s1_ptr + bis r8,r25,r25 C combine cy from the two subtracts + addq r16,32,r16 C update res_ptr + subq r4,r0,r28 C 1st main subtract + ldq r2,16(r18) + subq r28,r25,r20 C 1st carry subtract + ldq r3,24(r18) + cmpult r4,r0,r8 C compute cy from last subtract + ldq r6,-16(r17) + cmpult r28,r25,r25 C compute cy from last subtract + ldq r7,-8(r17) + bis r8,r25,r25 C combine cy from the two subtracts + subq r19,4,r19 C decr loop cnt + stq r22,-16(r16) + subq r5,r1,r28 C 2nd main subtract + stq r23,-8(r16) + subq r28,r25,r21 C 2nd carry subtract + addq r18,32,r18 C update s2_ptr + cmpult r5,r1,r8 C compute cy from last subtract + bge r19,$Loop +C Finish software pipeline for 1st loop +$Lend1: cmpult r28,r25,r25 C compute cy from last subtract + bis r8,r25,r25 C combine cy from the two subtracts + subq r6,r2,r28 C cy add + subq r28,r25,r22 C 3rd main subtract + cmpult r6,r2,r8 C compute cy from last subtract + cmpult r28,r25,r25 C compute cy from last subtract + stq r20,0(r16) + bis r8,r25,r25 C combine cy from the two subtracts + stq r21,8(r16) + subq r7,r3,r28 C cy add + subq r28,r25,r23 C 4th main subtract + cmpult r7,r3,r8 C compute cy from last subtract + cmpult r28,r25,r25 C compute cy from last subtract + bis r8,r25,r25 C combine cy from the two subtracts + addq r16,32,r16 C update res_ptr + stq r22,-16(r16) + stq r23,-8(r16) +$Lend2: addq r19,4,r19 C restore loop cnt + beq r19,$Lret +C Start software pipeline for 2nd loop + ldq r0,0(r18) + ldq r4,0(r17) + subq r19,1,r19 + beq r19,$Lend0 +C 2nd loop handles remaining 1-3 limbs + ALIGN(16) +$Loop0: subq r4,r0,r28 C main subtract + cmpult r4,r0,r8 C compute cy from last subtract + ldq r0,8(r18) + ldq r4,8(r17) + subq r28,r25,r20 C carry subtract + addq r18,8,r18 + addq r17,8,r17 + stq r20,0(r16) + cmpult r28,r25,r25 C compute cy from last subtract + subq r19,1,r19 C decr loop cnt + bis r8,r25,r25 C combine cy from the two subtracts + addq r16,8,r16 + bne r19,$Loop0 +$Lend0: subq r4,r0,r28 C main subtract + subq r28,r25,r20 C carry subtract + cmpult r4,r0,r8 C compute cy from last subtract + cmpult r28,r25,r25 C compute cy from last subtract + stq r20,0(r16) + bis r8,r25,r25 C combine cy from the two subtracts + +$Lret: bis r25,r31,r0 C return cy + ret r31,(r26),1 +EPILOGUE(mpn_sub_n) +ASM_END() diff --git a/rts/gmp/mpn/alpha/ev6/addmul_1.asm b/rts/gmp/mpn/alpha/ev6/addmul_1.asm new file mode 100644 index 0000000000..2f588626a5 --- /dev/null +++ b/rts/gmp/mpn/alpha/ev6/addmul_1.asm @@ -0,0 +1,474 @@ +dnl Alpha ev6 mpn_addmul_1 -- Multiply a limb vector with a limb and add +dnl the result to a second limb vector. + +dnl Copyright (C) 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + +include(`../config.m4') + +dnl INPUT PARAMETERS +dnl res_ptr r16 +dnl s1_ptr r17 +dnl size r18 +dnl s2_limb r19 + +dnl This code runs at 42 cycles/limb on EV4, 18 cycles/limb on EV5, and +dnl exactly 3.625 cycles/limb on EV6... + +dnl This code was written in close cooperation with ev6 pipeline expert +dnl Steve Root (root@toober.hlo.dec.com). Any errors are tege's fault, though. +dnl +dnl Register usages for unrolled loop: +dnl 0-3 mul's +dnl 4-7 acc's +dnl 8-15 mul results +dnl 20,21 carry's +dnl 22,23 save for stores + +dnl Sustains 8 mul-adds in 29 cycles in the unrolled inner loop. + +dnl The stores can issue a cycle late so we have paired no-op's to 'catch' +dnl them, so that further disturbance to the schedule is damped. + +dnl We couldn't pair the loads, because the entangled schedule of the +dnl carry's has to happen on one side {0} of the machine. Note, the total +dnl use of U0, and the total use of L0 (after attending to the stores). +dnl which is part of the reason why.... + +dnl This is a great schedule for the d_cache, a poor schedule for the +dnl b_cache. The lockup on U0 means that any stall can't be recovered +dnl from. Consider a ldq in L1. say that load gets stalled because it +dnl collides with a fill from the b_Cache. On the next cycle, this load +dnl gets priority. If first looks at L0, and goes there. The instruction +dnl we intended for L0 gets to look at L1, which is NOT where we want +dnl it. It either stalls 1, because it can't go in L0, or goes there, and +dnl causes a further instruction to stall. + +dnl So for b_cache, we're likely going to want to put one or more cycles +dnl back into the code! And, of course, put in prefetches. For the +dnl accumulator, lds, intent to modify. For the multiplier, you might +dnl want ldq, evict next, if you're not wanting to use it again soon. Use +dnl 256 ahead of present pointer value. At a place where we have an mt +dnl followed by a bookkeeping, put the bookkeeping in upper, and the +dnl prefetch into lower. + +dnl Note, the usage of physical registers per cycle is smoothed off, as +dnl much as possible. + +dnl Note, the ldq's and stq's are at the end of the quadpacks. note, we'd +dnl like not to have a ldq or stq to preceded a conditional branch in a +dnl quadpack. The conditional branch moves the retire pointer one cycle +dnl later. + +dnl Optimization notes: +dnl Callee-saves regs: r9 r10 r11 r12 r13 r14 r15 r26 ?r27? +dnl Reserved regs: r29 r30 r31 +dnl Free caller-saves regs in unrolled code: r24 r25 r28 +dnl We should swap some of the callee-saves regs for some of the free +dnl caller-saves regs, saving some overhead cycles. +dnl Most importantly, we should write fast code for the 0-7 case. +dnl The code we use there are for the 21164, and runs at 7 cycles/limb +dnl on the 21264. Should not be hard, if we write specialized code for +dnl 1-7 limbs (the one for 0 limbs should be straightforward). We then just +dnl need a jump table indexed by the low 3 bits of the count argument. + + +ASM_START() +PROLOGUE(mpn_addmul_1) + cmpult r18, 8, r1 + beq r1, $Large + + ldq r2, 0(r17) C r2 = s1_limb + addq r17, 8, r17 C s1_ptr++ + subq r18, 1, r18 C size-- + mulq r2, r19, r3 C r3 = prod_low + ldq r5, 0(r16) C r5 = *res_ptr + umulh r2, r19, r0 C r0 = prod_high + beq r18, $Lend0b C jump if size was == 1 + ldq r2, 0(r17) C r2 = s1_limb + addq r17, 8, r17 C s1_ptr++ + subq r18, 1, r18 C size-- + addq r5, r3, r3 + cmpult r3, r5, r4 + stq r3, 0(r16) + addq r16, 8, r16 C res_ptr++ + beq r18, $Lend0a C jump if size was == 2 + + ALIGN(8) +$Loop0: mulq r2, r19, r3 C r3 = prod_low + ldq r5, 0(r16) C r5 = *res_ptr + addq r4, r0, r0 C cy_limb = cy_limb + 'cy' + subq r18, 1, r18 C size-- + umulh r2, r19, r4 C r4 = cy_limb + ldq r2, 0(r17) C r2 = s1_limb + addq r17, 8, r17 C s1_ptr++ + addq r3, r0, r3 C r3 = cy_limb + prod_low + cmpult r3, r0, r0 C r0 = carry from (cy_limb + prod_low) + addq r5, r3, r3 + cmpult r3, r5, r5 + stq r3, 0(r16) + addq r16, 8, r16 C res_ptr++ + addq r5, r0, r0 C combine carries + bne r18, $Loop0 +$Lend0a: + mulq r2, r19, r3 C r3 = prod_low + ldq r5, 0(r16) C r5 = *res_ptr + addq r4, r0, r0 C cy_limb = cy_limb + 'cy' + umulh r2, r19, r4 C r4 = cy_limb + addq r3, r0, r3 C r3 = cy_limb + prod_low + cmpult r3, r0, r0 C r0 = carry from (cy_limb + prod_low) + addq r5, r3, r3 + cmpult r3, r5, r5 + stq r3, 0(r16) + addq r5, r0, r0 C combine carries + addq r4, r0, r0 C cy_limb = prod_high + cy + ret r31, (r26), 1 +$Lend0b: + addq r5, r3, r3 + cmpult r3, r5, r5 + stq r3, 0(r16) + addq r0, r5, r0 + ret r31, (r26), 1 + +$Large: + lda $30, -240($30) + stq $9, 8($30) + stq $10, 16($30) + stq $11, 24($30) + stq $12, 32($30) + stq $13, 40($30) + stq $14, 48($30) + stq $15, 56($30) + + and r18, 7, r20 C count for the first loop, 0-7 + srl r18, 3, r18 C count for unrolled loop + bis r31, r31, r0 + beq r20, $Lunroll + ldq r2, 0(r17) C r2 = s1_limb + addq r17, 8, r17 C s1_ptr++ + subq r20, 1, r20 C size-- + mulq r2, r19, r3 C r3 = prod_low + ldq r5, 0(r16) C r5 = *res_ptr + umulh r2, r19, r0 C r0 = prod_high + beq r20, $Lend1b C jump if size was == 1 + ldq r2, 0(r17) C r2 = s1_limb + addq r17, 8, r17 C s1_ptr++ + subq r20, 1, r20 C size-- + addq r5, r3, r3 + cmpult r3, r5, r4 + stq r3, 0(r16) + addq r16, 8, r16 C res_ptr++ + beq r20, $Lend1a C jump if size was == 2 + + ALIGN(8) +$Loop1: mulq r2, r19, r3 C r3 = prod_low + ldq r5, 0(r16) C r5 = *res_ptr + addq r4, r0, r0 C cy_limb = cy_limb + 'cy' + subq r20, 1, r20 C size-- + umulh r2, r19, r4 C r4 = cy_limb + ldq r2, 0(r17) C r2 = s1_limb + addq r17, 8, r17 C s1_ptr++ + addq r3, r0, r3 C r3 = cy_limb + prod_low + cmpult r3, r0, r0 C r0 = carry from (cy_limb + prod_low) + addq r5, r3, r3 + cmpult r3, r5, r5 + stq r3, 0(r16) + addq r16, 8, r16 C res_ptr++ + addq r5, r0, r0 C combine carries + bne r20, $Loop1 + +$Lend1a: + mulq r2, r19, r3 C r3 = prod_low + ldq r5, 0(r16) C r5 = *res_ptr + addq r4, r0, r0 C cy_limb = cy_limb + 'cy' + umulh r2, r19, r4 C r4 = cy_limb + addq r3, r0, r3 C r3 = cy_limb + prod_low + cmpult r3, r0, r0 C r0 = carry from (cy_limb + prod_low) + addq r5, r3, r3 + cmpult r3, r5, r5 + stq r3, 0(r16) + addq r16, 8, r16 C res_ptr++ + addq r5, r0, r0 C combine carries + addq r4, r0, r0 C cy_limb = prod_high + cy + br r31, $Lunroll +$Lend1b: + addq r5, r3, r3 + cmpult r3, r5, r5 + stq r3, 0(r16) + addq r16, 8, r16 C res_ptr++ + addq r0, r5, r0 + +$Lunroll: + lda r17, -16(r17) C L1 bookkeeping + lda r16, -16(r16) C L1 bookkeeping + bis r0, r31, r12 + +C ____ UNROLLED LOOP SOFTWARE PIPELINE STARTUP ____ + + ldq r2, 16(r17) C L1 + ldq r3, 24(r17) C L1 + lda r18, -1(r18) C L1 bookkeeping + ldq r6, 16(r16) C L1 + ldq r7, 24(r16) C L1 + ldq r0, 32(r17) C L1 + mulq r19, r2, r13 C U1 + ldq r1, 40(r17) C L1 + umulh r19, r2, r14 C U1 + mulq r19, r3, r15 C U1 + lda r17, 64(r17) C L1 bookkeeping + ldq r4, 32(r16) C L1 + ldq r5, 40(r16) C L1 + umulh r19, r3, r8 C U1 + ldq r2, -16(r17) C L1 + mulq r19, r0, r9 C U1 + ldq r3, -8(r17) C L1 + umulh r19, r0, r10 C U1 + addq r6, r13, r6 C L0 lo + acc + mulq r19, r1, r11 C U1 + cmpult r6, r13, r20 C L0 lo add => carry + lda r16, 64(r16) C L1 bookkeeping + addq r6, r12, r22 C U0 hi add => answer + cmpult r22, r12, r21 C L0 hi add => carry + addq r14, r20, r14 C U0 hi mul + carry + ldq r6, -16(r16) C L1 + addq r7, r15, r23 C L0 lo + acc + addq r14, r21, r14 C U0 hi mul + carry + ldq r7, -8(r16) C L1 + umulh r19, r1, r12 C U1 + cmpult r23, r15, r20 C L0 lo add => carry + addq r23, r14, r23 C U0 hi add => answer + ldq r0, 0(r17) C L1 + mulq r19, r2, r13 C U1 + cmpult r23, r14, r21 C L0 hi add => carry + addq r8, r20, r8 C U0 hi mul + carry + ldq r1, 8(r17) C L1 + umulh r19, r2, r14 C U1 + addq r4, r9, r4 C L0 lo + acc + stq r22, -48(r16) C L0 + stq r23, -40(r16) C L1 + mulq r19, r3, r15 C U1 + addq r8, r21, r8 C U0 hi mul + carry + cmpult r4, r9, r20 C L0 lo add => carry + addq r4, r8, r22 C U0 hi add => answer + ble r18, $Lend C U1 bookkeeping + +C ____ MAIN UNROLLED LOOP ____ + ALIGN(16) +$Loop: + bis r31, r31, r31 C U1 mt + cmpult r22, r8, r21 C L0 hi add => carry + addq r10, r20, r10 C U0 hi mul + carry + ldq r4, 0(r16) C L1 + + bis r31, r31, r31 C U1 mt + addq r5, r11, r23 C L0 lo + acc + addq r10, r21, r10 C L0 hi mul + carry + ldq r5, 8(r16) C L1 + + umulh r19, r3, r8 C U1 + cmpult r23, r11, r20 C L0 lo add => carry + addq r23, r10, r23 C U0 hi add => answer + ldq r2, 16(r17) C L1 + + mulq r19, r0, r9 C U1 + cmpult r23, r10, r21 C L0 hi add => carry + addq r12, r20, r12 C U0 hi mul + carry + ldq r3, 24(r17) C L1 + + umulh r19, r0, r10 C U1 + addq r6, r13, r6 C L0 lo + acc + stq r22, -32(r16) C L0 + stq r23, -24(r16) C L1 + + bis r31, r31, r31 C L0 st slosh + mulq r19, r1, r11 C U1 + bis r31, r31, r31 C L1 st slosh + addq r12, r21, r12 C U0 hi mul + carry + + cmpult r6, r13, r20 C L0 lo add => carry + bis r31, r31, r31 C U1 mt + lda r18, -1(r18) C L1 bookkeeping + addq r6, r12, r22 C U0 hi add => answer + + bis r31, r31, r31 C U1 mt + cmpult r22, r12, r21 C L0 hi add => carry + addq r14, r20, r14 C U0 hi mul + carry + ldq r6, 16(r16) C L1 + + bis r31, r31, r31 C U1 mt + addq r7, r15, r23 C L0 lo + acc + addq r14, r21, r14 C U0 hi mul + carry + ldq r7, 24(r16) C L1 + + umulh r19, r1, r12 C U1 + cmpult r23, r15, r20 C L0 lo add => carry + addq r23, r14, r23 C U0 hi add => answer + ldq r0, 32(r17) C L1 + + mulq r19, r2, r13 C U1 + cmpult r23, r14, r21 C L0 hi add => carry + addq r8, r20, r8 C U0 hi mul + carry + ldq r1, 40(r17) C L1 + + umulh r19, r2, r14 C U1 + addq r4, r9, r4 C U0 lo + acc + stq r22, -16(r16) C L0 + stq r23, -8(r16) C L1 + + bis r31, r31, r31 C L0 st slosh + mulq r19, r3, r15 C U1 + bis r31, r31, r31 C L1 st slosh + addq r8, r21, r8 C L0 hi mul + carry + + cmpult r4, r9, r20 C L0 lo add => carry + bis r31, r31, r31 C U1 mt + lda r17, 64(r17) C L1 bookkeeping + addq r4, r8, r22 C U0 hi add => answer + + bis r31, r31, r31 C U1 mt + cmpult r22, r8, r21 C L0 hi add => carry + addq r10, r20, r10 C U0 hi mul + carry + ldq r4, 32(r16) C L1 + + bis r31, r31, r31 C U1 mt + addq r5, r11, r23 C L0 lo + acc + addq r10, r21, r10 C L0 hi mul + carry + ldq r5, 40(r16) C L1 + + umulh r19, r3, r8 C U1 + cmpult r23, r11, r20 C L0 lo add => carry + addq r23, r10, r23 C U0 hi add => answer + ldq r2, -16(r17) C L1 + + mulq r19, r0, r9 C U1 + cmpult r23, r10, r21 C L0 hi add => carry + addq r12, r20, r12 C U0 hi mul + carry + ldq r3, -8(r17) C L1 + + umulh r19, r0, r10 C U1 + addq r6, r13, r6 C L0 lo + acc + stq r22, 0(r16) C L0 + stq r23, 8(r16) C L1 + + bis r31, r31, r31 C L0 st slosh + mulq r19, r1, r11 C U1 + bis r31, r31, r31 C L1 st slosh + addq r12, r21, r12 C U0 hi mul + carry + + cmpult r6, r13, r20 C L0 lo add => carry + bis r31, r31, r31 C U1 mt + lda r16, 64(r16) C L1 bookkeeping + addq r6, r12, r22 C U0 hi add => answer + + bis r31, r31, r31 C U1 mt + cmpult r22, r12, r21 C L0 hi add => carry + addq r14, r20, r14 C U0 hi mul + carry + ldq r6, -16(r16) C L1 + + bis r31, r31, r31 C U1 mt + addq r7, r15, r23 C L0 lo + acc + addq r14, r21, r14 C U0 hi mul + carry + ldq r7, -8(r16) C L1 + + umulh r19, r1, r12 C U1 + cmpult r23, r15, r20 C L0 lo add => carry + addq r23, r14, r23 C U0 hi add => answer + ldq r0, 0(r17) C L1 + + mulq r19, r2, r13 C U1 + cmpult r23, r14, r21 C L0 hi add => carry + addq r8, r20, r8 C U0 hi mul + carry + ldq r1, 8(r17) C L1 + + umulh r19, r2, r14 C U1 + addq r4, r9, r4 C L0 lo + acc + stq r22, -48(r16) C L0 + stq r23, -40(r16) C L1 + + bis r31, r31, r31 C L0 st slosh + mulq r19, r3, r15 C U1 + bis r31, r31, r31 C L1 st slosh + addq r8, r21, r8 C U0 hi mul + carry + + cmpult r4, r9, r20 C L0 lo add => carry + addq r4, r8, r22 C U0 hi add => answer + bis r31, r31, r31 C L1 mt + bgt r18, $Loop C U1 bookkeeping + +C ____ UNROLLED LOOP SOFTWARE PIPELINE FINISH ____ +$Lend: + cmpult r22, r8, r21 C L0 hi add => carry + addq r10, r20, r10 C U0 hi mul + carry + ldq r4, 0(r16) C L1 + addq r5, r11, r23 C L0 lo + acc + addq r10, r21, r10 C L0 hi mul + carry + ldq r5, 8(r16) C L1 + umulh r19, r3, r8 C U1 + cmpult r23, r11, r20 C L0 lo add => carry + addq r23, r10, r23 C U0 hi add => answer + mulq r19, r0, r9 C U1 + cmpult r23, r10, r21 C L0 hi add => carry + addq r12, r20, r12 C U0 hi mul + carry + umulh r19, r0, r10 C U1 + addq r6, r13, r6 C L0 lo + acc + stq r22, -32(r16) C L0 + stq r23, -24(r16) C L1 + mulq r19, r1, r11 C U1 + addq r12, r21, r12 C U0 hi mul + carry + cmpult r6, r13, r20 C L0 lo add => carry + addq r6, r12, r22 C U0 hi add => answer + cmpult r22, r12, r21 C L0 hi add => carry + addq r14, r20, r14 C U0 hi mul + carry + addq r7, r15, r23 C L0 lo + acc + addq r14, r21, r14 C U0 hi mul + carry + umulh r19, r1, r12 C U1 + cmpult r23, r15, r20 C L0 lo add => carry + addq r23, r14, r23 C U0 hi add => answer + cmpult r23, r14, r21 C L0 hi add => carry + addq r8, r20, r8 C U0 hi mul + carry + addq r4, r9, r4 C U0 lo + acc + stq r22, -16(r16) C L0 + stq r23, -8(r16) C L1 + bis r31, r31, r31 C L0 st slosh + addq r8, r21, r8 C L0 hi mul + carry + cmpult r4, r9, r20 C L0 lo add => carry + addq r4, r8, r22 C U0 hi add => answer + cmpult r22, r8, r21 C L0 hi add => carry + addq r10, r20, r10 C U0 hi mul + carry + addq r5, r11, r23 C L0 lo + acc + addq r10, r21, r10 C L0 hi mul + carry + cmpult r23, r11, r20 C L0 lo add => carry + addq r23, r10, r23 C U0 hi add => answer + cmpult r23, r10, r21 C L0 hi add => carry + addq r12, r20, r12 C U0 hi mul + carry + stq r22, 0(r16) C L0 + stq r23, 8(r16) C L1 + addq r12, r21, r0 C U0 hi mul + carry + + ldq $9, 8($30) + ldq $10, 16($30) + ldq $11, 24($30) + ldq $12, 32($30) + ldq $13, 40($30) + ldq $14, 48($30) + ldq $15, 56($30) + lda $30, 240($30) + ret r31, (r26), 1 +EPILOGUE(mpn_addmul_1) +ASM_END() diff --git a/rts/gmp/mpn/alpha/ev6/gmp-mparam.h b/rts/gmp/mpn/alpha/ev6/gmp-mparam.h new file mode 100644 index 0000000000..7ea20577f8 --- /dev/null +++ b/rts/gmp/mpn/alpha/ev6/gmp-mparam.h @@ -0,0 +1,62 @@ +/* gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#define BITS_PER_MP_LIMB 64 +#define BYTES_PER_MP_LIMB 8 +#define BITS_PER_LONGINT 64 +#define BITS_PER_INT 32 +#define BITS_PER_SHORTINT 16 +#define BITS_PER_CHAR 8 + +/* Generated by tuneup.c, 2000-08-02. */ + +#ifndef KARATSUBA_MUL_THRESHOLD +#define KARATSUBA_MUL_THRESHOLD 47 +#endif +#ifndef TOOM3_MUL_THRESHOLD +#define TOOM3_MUL_THRESHOLD 70 +#endif + +#ifndef KARATSUBA_SQR_THRESHOLD +#define KARATSUBA_SQR_THRESHOLD 94 +#endif +#ifndef TOOM3_SQR_THRESHOLD +#define TOOM3_SQR_THRESHOLD 101 +#endif + +#ifndef BZ_THRESHOLD +#define BZ_THRESHOLD 33 +#endif + +#ifndef FIB_THRESHOLD +#define FIB_THRESHOLD 70 +#endif + +#ifndef POWM_THRESHOLD +#define POWM_THRESHOLD 29 +#endif + +#ifndef GCD_ACCEL_THRESHOLD +#define GCD_ACCEL_THRESHOLD 46 +#endif +#ifndef GCDEXT_THRESHOLD +#define GCDEXT_THRESHOLD 33 +#endif diff --git a/rts/gmp/mpn/alpha/gmp-mparam.h b/rts/gmp/mpn/alpha/gmp-mparam.h new file mode 100644 index 0000000000..054ff2fe5f --- /dev/null +++ b/rts/gmp/mpn/alpha/gmp-mparam.h @@ -0,0 +1,64 @@ +/* gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#define BITS_PER_MP_LIMB 64 +#define BYTES_PER_MP_LIMB 8 +#define BITS_PER_LONGINT 64 +#define BITS_PER_INT 32 +#define BITS_PER_SHORTINT 16 +#define BITS_PER_CHAR 8 + +/* These values are for the 21164 family. The 21264 will require + different values, since it has such quick multiplication. */ +/* Generated by tuneup.c, 2000-07-19. */ + +#ifndef KARATSUBA_MUL_THRESHOLD +#define KARATSUBA_MUL_THRESHOLD 22 +#endif +#ifndef TOOM3_MUL_THRESHOLD +#define TOOM3_MUL_THRESHOLD 53 +#endif + +#ifndef KARATSUBA_SQR_THRESHOLD +#define KARATSUBA_SQR_THRESHOLD 31 +#endif +#ifndef TOOM3_SQR_THRESHOLD +#define TOOM3_SQR_THRESHOLD 47 +#endif + +#ifndef BZ_THRESHOLD +#define BZ_THRESHOLD 64 +#endif + +#ifndef FIB_THRESHOLD +#define FIB_THRESHOLD 98 +#endif + +#ifndef POWM_THRESHOLD +#define POWM_THRESHOLD 17 +#endif + +#ifndef GCD_ACCEL_THRESHOLD +#define GCD_ACCEL_THRESHOLD 4 +#endif +#ifndef GCDEXT_THRESHOLD +#define GCDEXT_THRESHOLD 4 +#endif diff --git a/rts/gmp/mpn/alpha/invert_limb.asm b/rts/gmp/mpn/alpha/invert_limb.asm new file mode 100644 index 0000000000..a921b32b3f --- /dev/null +++ b/rts/gmp/mpn/alpha/invert_limb.asm @@ -0,0 +1,345 @@ +dnl Alpha mpn_invert_limb -- Invert a normalized limb. + +dnl Copyright (C) 1996, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published by +dnl the Free Software Foundation; either version 2.1 of the License, or (at your +dnl option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + +dnl +dnl This is based on sophie:/gmp-stuff/dbg-inv-limb.c. +dnl The ideas are due to Peter L. Montgomery +dnl +dnl The table below uses 4096 bytes. The file mentioned above has an +dnl alternative function that doesn't require the table, but it runs 50% +dnl slower than this. + +include(`../config.m4') + +ASM_START() + +FLOAT64($C36,9223372036854775808.0) C 2^63 + +PROLOGUE_GP(mpn_invert_limb) + lda r30,-16(r30) + addq r16,r16,r1 + bne r1,$73 + lda r0,-1 + br r31,$Lend +$73: + srl r16,1,r1 + stq r1,0(r30) + ldt f11,0(r30) + cvtqt f11,f1 + lda r1,$C36 + ldt f10,0(r1) + divt f10,f1,f10 + lda r2,$invtab-4096 + srl r16,52,r1 + addq r1,r1,r1 + addq r1,r2,r1 + bic r1,6,r2 + ldq r2,0(r2) + bic r1,1,r1 + extwl r2,r1,r2 + sll r2,48,r0 + umulh r16,r0,r1 + addq r16,r1,r3 + stq r3,0(r30) + ldt f11,0(r30) + cvtqt f11,f1 + mult f1,f10,f1 + cvttqc f1,f1 + stt f1,0(r30) + ldq r4,0(r30) + subq r0,r4,r0 + umulh r16,r0,r1 + mulq r16,r0,r2 + addq r16,r1,r3 + bge r3,$Loop2 +$Loop1: addq r2,r16,r2 + cmpult r2,r16,r1 + addq r3,r1,r3 + addq r0,1,r0 + blt r3,$Loop1 +$Loop2: cmpult r2,r16,r1 + subq r0,1,r0 + subq r3,r1,r3 + subq r2,r16,r2 + bge r3,$Loop2 +$Lend: + lda r30,16(r30) + ret r31,(r26),1 +EPILOGUE(mpn_invert_limb) +DATASTART(`$invtab',4) + .word 0xffff,0xffc0,0xff80,0xff40,0xff00,0xfec0,0xfe81,0xfe41 + .word 0xfe01,0xfdc2,0xfd83,0xfd43,0xfd04,0xfcc5,0xfc86,0xfc46 + .word 0xfc07,0xfbc8,0xfb8a,0xfb4b,0xfb0c,0xfacd,0xfa8e,0xfa50 + .word 0xfa11,0xf9d3,0xf994,0xf956,0xf918,0xf8d9,0xf89b,0xf85d + .word 0xf81f,0xf7e1,0xf7a3,0xf765,0xf727,0xf6ea,0xf6ac,0xf66e + .word 0xf631,0xf5f3,0xf5b6,0xf578,0xf53b,0xf4fd,0xf4c0,0xf483 + .word 0xf446,0xf409,0xf3cc,0xf38f,0xf352,0xf315,0xf2d8,0xf29c + .word 0xf25f,0xf222,0xf1e6,0xf1a9,0xf16d,0xf130,0xf0f4,0xf0b8 + .word 0xf07c,0xf03f,0xf003,0xefc7,0xef8b,0xef4f,0xef14,0xeed8 + .word 0xee9c,0xee60,0xee25,0xede9,0xedae,0xed72,0xed37,0xecfb + .word 0xecc0,0xec85,0xec4a,0xec0e,0xebd3,0xeb98,0xeb5d,0xeb22 + .word 0xeae8,0xeaad,0xea72,0xea37,0xe9fd,0xe9c2,0xe988,0xe94d + .word 0xe913,0xe8d8,0xe89e,0xe864,0xe829,0xe7ef,0xe7b5,0xe77b + .word 0xe741,0xe707,0xe6cd,0xe694,0xe65a,0xe620,0xe5e6,0xe5ad + .word 0xe573,0xe53a,0xe500,0xe4c7,0xe48d,0xe454,0xe41b,0xe3e2 + .word 0xe3a9,0xe370,0xe336,0xe2fd,0xe2c5,0xe28c,0xe253,0xe21a + .word 0xe1e1,0xe1a9,0xe170,0xe138,0xe0ff,0xe0c7,0xe08e,0xe056 + .word 0xe01e,0xdfe5,0xdfad,0xdf75,0xdf3d,0xdf05,0xdecd,0xde95 + .word 0xde5d,0xde25,0xdded,0xddb6,0xdd7e,0xdd46,0xdd0f,0xdcd7 + .word 0xdca0,0xdc68,0xdc31,0xdbf9,0xdbc2,0xdb8b,0xdb54,0xdb1d + .word 0xdae6,0xdaae,0xda78,0xda41,0xda0a,0xd9d3,0xd99c,0xd965 + .word 0xd92f,0xd8f8,0xd8c1,0xd88b,0xd854,0xd81e,0xd7e8,0xd7b1 + .word 0xd77b,0xd745,0xd70e,0xd6d8,0xd6a2,0xd66c,0xd636,0xd600 + .word 0xd5ca,0xd594,0xd55f,0xd529,0xd4f3,0xd4bd,0xd488,0xd452 + .word 0xd41d,0xd3e7,0xd3b2,0xd37c,0xd347,0xd312,0xd2dd,0xd2a7 + .word 0xd272,0xd23d,0xd208,0xd1d3,0xd19e,0xd169,0xd134,0xd100 + .word 0xd0cb,0xd096,0xd061,0xd02d,0xcff8,0xcfc4,0xcf8f,0xcf5b + .word 0xcf26,0xcef2,0xcebe,0xce89,0xce55,0xce21,0xcded,0xcdb9 + .word 0xcd85,0xcd51,0xcd1d,0xcce9,0xccb5,0xcc81,0xcc4e,0xcc1a + .word 0xcbe6,0xcbb3,0xcb7f,0xcb4c,0xcb18,0xcae5,0xcab1,0xca7e + .word 0xca4b,0xca17,0xc9e4,0xc9b1,0xc97e,0xc94b,0xc918,0xc8e5 + .word 0xc8b2,0xc87f,0xc84c,0xc819,0xc7e7,0xc7b4,0xc781,0xc74f + .word 0xc71c,0xc6e9,0xc6b7,0xc684,0xc652,0xc620,0xc5ed,0xc5bb + .word 0xc589,0xc557,0xc524,0xc4f2,0xc4c0,0xc48e,0xc45c,0xc42a + .word 0xc3f8,0xc3c7,0xc395,0xc363,0xc331,0xc300,0xc2ce,0xc29c + .word 0xc26b,0xc239,0xc208,0xc1d6,0xc1a5,0xc174,0xc142,0xc111 + .word 0xc0e0,0xc0af,0xc07e,0xc04d,0xc01c,0xbfeb,0xbfba,0xbf89 + .word 0xbf58,0xbf27,0xbef6,0xbec5,0xbe95,0xbe64,0xbe33,0xbe03 + .word 0xbdd2,0xbda2,0xbd71,0xbd41,0xbd10,0xbce0,0xbcb0,0xbc80 + .word 0xbc4f,0xbc1f,0xbbef,0xbbbf,0xbb8f,0xbb5f,0xbb2f,0xbaff + .word 0xbacf,0xba9f,0xba6f,0xba40,0xba10,0xb9e0,0xb9b1,0xb981 + .word 0xb951,0xb922,0xb8f2,0xb8c3,0xb894,0xb864,0xb835,0xb806 + .word 0xb7d6,0xb7a7,0xb778,0xb749,0xb71a,0xb6eb,0xb6bc,0xb68d + .word 0xb65e,0xb62f,0xb600,0xb5d1,0xb5a2,0xb574,0xb545,0xb516 + .word 0xb4e8,0xb4b9,0xb48a,0xb45c,0xb42e,0xb3ff,0xb3d1,0xb3a2 + .word 0xb374,0xb346,0xb318,0xb2e9,0xb2bb,0xb28d,0xb25f,0xb231 + .word 0xb203,0xb1d5,0xb1a7,0xb179,0xb14b,0xb11d,0xb0f0,0xb0c2 + .word 0xb094,0xb067,0xb039,0xb00b,0xafde,0xafb0,0xaf83,0xaf55 + .word 0xaf28,0xaefb,0xaecd,0xaea0,0xae73,0xae45,0xae18,0xadeb + .word 0xadbe,0xad91,0xad64,0xad37,0xad0a,0xacdd,0xacb0,0xac83 + .word 0xac57,0xac2a,0xabfd,0xabd0,0xaba4,0xab77,0xab4a,0xab1e + .word 0xaaf1,0xaac5,0xaa98,0xaa6c,0xaa40,0xaa13,0xa9e7,0xa9bb + .word 0xa98e,0xa962,0xa936,0xa90a,0xa8de,0xa8b2,0xa886,0xa85a + .word 0xa82e,0xa802,0xa7d6,0xa7aa,0xa77e,0xa753,0xa727,0xa6fb + .word 0xa6d0,0xa6a4,0xa678,0xa64d,0xa621,0xa5f6,0xa5ca,0xa59f + .word 0xa574,0xa548,0xa51d,0xa4f2,0xa4c6,0xa49b,0xa470,0xa445 + .word 0xa41a,0xa3ef,0xa3c4,0xa399,0xa36e,0xa343,0xa318,0xa2ed + .word 0xa2c2,0xa297,0xa26d,0xa242,0xa217,0xa1ed,0xa1c2,0xa197 + .word 0xa16d,0xa142,0xa118,0xa0ed,0xa0c3,0xa098,0xa06e,0xa044 + .word 0xa01a,0x9fef,0x9fc5,0x9f9b,0x9f71,0x9f47,0x9f1c,0x9ef2 + .word 0x9ec8,0x9e9e,0x9e74,0x9e4b,0x9e21,0x9df7,0x9dcd,0x9da3 + .word 0x9d79,0x9d50,0x9d26,0x9cfc,0x9cd3,0x9ca9,0x9c80,0x9c56 + .word 0x9c2d,0x9c03,0x9bda,0x9bb0,0x9b87,0x9b5e,0x9b34,0x9b0b + .word 0x9ae2,0x9ab9,0x9a8f,0x9a66,0x9a3d,0x9a14,0x99eb,0x99c2 + .word 0x9999,0x9970,0x9947,0x991e,0x98f6,0x98cd,0x98a4,0x987b + .word 0x9852,0x982a,0x9801,0x97d8,0x97b0,0x9787,0x975f,0x9736 + .word 0x970e,0x96e5,0x96bd,0x9695,0x966c,0x9644,0x961c,0x95f3 + .word 0x95cb,0x95a3,0x957b,0x9553,0x952b,0x9503,0x94db,0x94b3 + .word 0x948b,0x9463,0x943b,0x9413,0x93eb,0x93c3,0x939b,0x9374 + .word 0x934c,0x9324,0x92fd,0x92d5,0x92ad,0x9286,0x925e,0x9237 + .word 0x920f,0x91e8,0x91c0,0x9199,0x9172,0x914a,0x9123,0x90fc + .word 0x90d4,0x90ad,0x9086,0x905f,0x9038,0x9011,0x8fea,0x8fc3 + .word 0x8f9c,0x8f75,0x8f4e,0x8f27,0x8f00,0x8ed9,0x8eb2,0x8e8b + .word 0x8e65,0x8e3e,0x8e17,0x8df1,0x8dca,0x8da3,0x8d7d,0x8d56 + .word 0x8d30,0x8d09,0x8ce3,0x8cbc,0x8c96,0x8c6f,0x8c49,0x8c23 + .word 0x8bfc,0x8bd6,0x8bb0,0x8b8a,0x8b64,0x8b3d,0x8b17,0x8af1 + .word 0x8acb,0x8aa5,0x8a7f,0x8a59,0x8a33,0x8a0d,0x89e7,0x89c1 + .word 0x899c,0x8976,0x8950,0x892a,0x8904,0x88df,0x88b9,0x8893 + .word 0x886e,0x8848,0x8823,0x87fd,0x87d8,0x87b2,0x878d,0x8767 + .word 0x8742,0x871d,0x86f7,0x86d2,0x86ad,0x8687,0x8662,0x863d + .word 0x8618,0x85f3,0x85ce,0x85a9,0x8583,0x855e,0x8539,0x8514 + .word 0x84f0,0x84cb,0x84a6,0x8481,0x845c,0x8437,0x8412,0x83ee + .word 0x83c9,0x83a4,0x8380,0x835b,0x8336,0x8312,0x82ed,0x82c9 + .word 0x82a4,0x8280,0x825b,0x8237,0x8212,0x81ee,0x81ca,0x81a5 + .word 0x8181,0x815d,0x8138,0x8114,0x80f0,0x80cc,0x80a8,0x8084 + .word 0x8060,0x803c,0x8018,0x7ff4,0x7fd0,0x7fac,0x7f88,0x7f64 + .word 0x7f40,0x7f1c,0x7ef8,0x7ed4,0x7eb1,0x7e8d,0x7e69,0x7e45 + .word 0x7e22,0x7dfe,0x7ddb,0x7db7,0x7d93,0x7d70,0x7d4c,0x7d29 + .word 0x7d05,0x7ce2,0x7cbf,0x7c9b,0x7c78,0x7c55,0x7c31,0x7c0e + .word 0x7beb,0x7bc7,0x7ba4,0x7b81,0x7b5e,0x7b3b,0x7b18,0x7af5 + .word 0x7ad2,0x7aaf,0x7a8c,0x7a69,0x7a46,0x7a23,0x7a00,0x79dd + .word 0x79ba,0x7997,0x7975,0x7952,0x792f,0x790c,0x78ea,0x78c7 + .word 0x78a4,0x7882,0x785f,0x783c,0x781a,0x77f7,0x77d5,0x77b2 + .word 0x7790,0x776e,0x774b,0x7729,0x7706,0x76e4,0x76c2,0x76a0 + .word 0x767d,0x765b,0x7639,0x7617,0x75f5,0x75d2,0x75b0,0x758e + .word 0x756c,0x754a,0x7528,0x7506,0x74e4,0x74c2,0x74a0,0x747e + .word 0x745d,0x743b,0x7419,0x73f7,0x73d5,0x73b4,0x7392,0x7370 + .word 0x734f,0x732d,0x730b,0x72ea,0x72c8,0x72a7,0x7285,0x7264 + .word 0x7242,0x7221,0x71ff,0x71de,0x71bc,0x719b,0x717a,0x7158 + .word 0x7137,0x7116,0x70f5,0x70d3,0x70b2,0x7091,0x7070,0x704f + .word 0x702e,0x700c,0x6feb,0x6fca,0x6fa9,0x6f88,0x6f67,0x6f46 + .word 0x6f26,0x6f05,0x6ee4,0x6ec3,0x6ea2,0x6e81,0x6e60,0x6e40 + .word 0x6e1f,0x6dfe,0x6dde,0x6dbd,0x6d9c,0x6d7c,0x6d5b,0x6d3a + .word 0x6d1a,0x6cf9,0x6cd9,0x6cb8,0x6c98,0x6c77,0x6c57,0x6c37 + .word 0x6c16,0x6bf6,0x6bd6,0x6bb5,0x6b95,0x6b75,0x6b54,0x6b34 + .word 0x6b14,0x6af4,0x6ad4,0x6ab4,0x6a94,0x6a73,0x6a53,0x6a33 + .word 0x6a13,0x69f3,0x69d3,0x69b3,0x6993,0x6974,0x6954,0x6934 + .word 0x6914,0x68f4,0x68d4,0x68b5,0x6895,0x6875,0x6855,0x6836 + .word 0x6816,0x67f6,0x67d7,0x67b7,0x6798,0x6778,0x6758,0x6739 + .word 0x6719,0x66fa,0x66db,0x66bb,0x669c,0x667c,0x665d,0x663e + .word 0x661e,0x65ff,0x65e0,0x65c0,0x65a1,0x6582,0x6563,0x6544 + .word 0x6524,0x6505,0x64e6,0x64c7,0x64a8,0x6489,0x646a,0x644b + .word 0x642c,0x640d,0x63ee,0x63cf,0x63b0,0x6391,0x6373,0x6354 + .word 0x6335,0x6316,0x62f7,0x62d9,0x62ba,0x629b,0x627c,0x625e + .word 0x623f,0x6221,0x6202,0x61e3,0x61c5,0x61a6,0x6188,0x6169 + .word 0x614b,0x612c,0x610e,0x60ef,0x60d1,0x60b3,0x6094,0x6076 + .word 0x6058,0x6039,0x601b,0x5ffd,0x5fdf,0x5fc0,0x5fa2,0x5f84 + .word 0x5f66,0x5f48,0x5f2a,0x5f0b,0x5eed,0x5ecf,0x5eb1,0x5e93 + .word 0x5e75,0x5e57,0x5e39,0x5e1b,0x5dfd,0x5de0,0x5dc2,0x5da4 + .word 0x5d86,0x5d68,0x5d4a,0x5d2d,0x5d0f,0x5cf1,0x5cd3,0x5cb6 + .word 0x5c98,0x5c7a,0x5c5d,0x5c3f,0x5c21,0x5c04,0x5be6,0x5bc9 + .word 0x5bab,0x5b8e,0x5b70,0x5b53,0x5b35,0x5b18,0x5afb,0x5add + .word 0x5ac0,0x5aa2,0x5a85,0x5a68,0x5a4b,0x5a2d,0x5a10,0x59f3 + .word 0x59d6,0x59b8,0x599b,0x597e,0x5961,0x5944,0x5927,0x590a + .word 0x58ed,0x58d0,0x58b3,0x5896,0x5879,0x585c,0x583f,0x5822 + .word 0x5805,0x57e8,0x57cb,0x57ae,0x5791,0x5775,0x5758,0x573b + .word 0x571e,0x5702,0x56e5,0x56c8,0x56ac,0x568f,0x5672,0x5656 + .word 0x5639,0x561c,0x5600,0x55e3,0x55c7,0x55aa,0x558e,0x5571 + .word 0x5555,0x5538,0x551c,0x5500,0x54e3,0x54c7,0x54aa,0x548e + .word 0x5472,0x5456,0x5439,0x541d,0x5401,0x53e5,0x53c8,0x53ac + .word 0x5390,0x5374,0x5358,0x533c,0x5320,0x5304,0x52e8,0x52cb + .word 0x52af,0x5293,0x5277,0x525c,0x5240,0x5224,0x5208,0x51ec + .word 0x51d0,0x51b4,0x5198,0x517c,0x5161,0x5145,0x5129,0x510d + .word 0x50f2,0x50d6,0x50ba,0x509f,0x5083,0x5067,0x504c,0x5030 + .word 0x5015,0x4ff9,0x4fdd,0x4fc2,0x4fa6,0x4f8b,0x4f6f,0x4f54 + .word 0x4f38,0x4f1d,0x4f02,0x4ee6,0x4ecb,0x4eb0,0x4e94,0x4e79 + .word 0x4e5e,0x4e42,0x4e27,0x4e0c,0x4df0,0x4dd5,0x4dba,0x4d9f + .word 0x4d84,0x4d69,0x4d4d,0x4d32,0x4d17,0x4cfc,0x4ce1,0x4cc6 + .word 0x4cab,0x4c90,0x4c75,0x4c5a,0x4c3f,0x4c24,0x4c09,0x4bee + .word 0x4bd3,0x4bb9,0x4b9e,0x4b83,0x4b68,0x4b4d,0x4b32,0x4b18 + .word 0x4afd,0x4ae2,0x4ac7,0x4aad,0x4a92,0x4a77,0x4a5d,0x4a42 + .word 0x4a27,0x4a0d,0x49f2,0x49d8,0x49bd,0x49a3,0x4988,0x496e + .word 0x4953,0x4939,0x491e,0x4904,0x48e9,0x48cf,0x48b5,0x489a + .word 0x4880,0x4865,0x484b,0x4831,0x4817,0x47fc,0x47e2,0x47c8 + .word 0x47ae,0x4793,0x4779,0x475f,0x4745,0x472b,0x4711,0x46f6 + .word 0x46dc,0x46c2,0x46a8,0x468e,0x4674,0x465a,0x4640,0x4626 + .word 0x460c,0x45f2,0x45d8,0x45be,0x45a5,0x458b,0x4571,0x4557 + .word 0x453d,0x4523,0x4509,0x44f0,0x44d6,0x44bc,0x44a2,0x4489 + .word 0x446f,0x4455,0x443c,0x4422,0x4408,0x43ef,0x43d5,0x43bc + .word 0x43a2,0x4388,0x436f,0x4355,0x433c,0x4322,0x4309,0x42ef + .word 0x42d6,0x42bc,0x42a3,0x428a,0x4270,0x4257,0x423d,0x4224 + .word 0x420b,0x41f2,0x41d8,0x41bf,0x41a6,0x418c,0x4173,0x415a + .word 0x4141,0x4128,0x410e,0x40f5,0x40dc,0x40c3,0x40aa,0x4091 + .word 0x4078,0x405f,0x4046,0x402d,0x4014,0x3ffb,0x3fe2,0x3fc9 + .word 0x3fb0,0x3f97,0x3f7e,0x3f65,0x3f4c,0x3f33,0x3f1a,0x3f01 + .word 0x3ee8,0x3ed0,0x3eb7,0x3e9e,0x3e85,0x3e6c,0x3e54,0x3e3b + .word 0x3e22,0x3e0a,0x3df1,0x3dd8,0x3dc0,0x3da7,0x3d8e,0x3d76 + .word 0x3d5d,0x3d45,0x3d2c,0x3d13,0x3cfb,0x3ce2,0x3cca,0x3cb1 + .word 0x3c99,0x3c80,0x3c68,0x3c50,0x3c37,0x3c1f,0x3c06,0x3bee + .word 0x3bd6,0x3bbd,0x3ba5,0x3b8d,0x3b74,0x3b5c,0x3b44,0x3b2b + .word 0x3b13,0x3afb,0x3ae3,0x3acb,0x3ab2,0x3a9a,0x3a82,0x3a6a + .word 0x3a52,0x3a3a,0x3a22,0x3a09,0x39f1,0x39d9,0x39c1,0x39a9 + .word 0x3991,0x3979,0x3961,0x3949,0x3931,0x3919,0x3901,0x38ea + .word 0x38d2,0x38ba,0x38a2,0x388a,0x3872,0x385a,0x3843,0x382b + .word 0x3813,0x37fb,0x37e3,0x37cc,0x37b4,0x379c,0x3785,0x376d + .word 0x3755,0x373e,0x3726,0x370e,0x36f7,0x36df,0x36c8,0x36b0 + .word 0x3698,0x3681,0x3669,0x3652,0x363a,0x3623,0x360b,0x35f4 + .word 0x35dc,0x35c5,0x35ae,0x3596,0x357f,0x3567,0x3550,0x3539 + .word 0x3521,0x350a,0x34f3,0x34db,0x34c4,0x34ad,0x3496,0x347e + .word 0x3467,0x3450,0x3439,0x3422,0x340a,0x33f3,0x33dc,0x33c5 + .word 0x33ae,0x3397,0x3380,0x3368,0x3351,0x333a,0x3323,0x330c + .word 0x32f5,0x32de,0x32c7,0x32b0,0x3299,0x3282,0x326c,0x3255 + .word 0x323e,0x3227,0x3210,0x31f9,0x31e2,0x31cb,0x31b5,0x319e + .word 0x3187,0x3170,0x3159,0x3143,0x312c,0x3115,0x30fe,0x30e8 + .word 0x30d1,0x30ba,0x30a4,0x308d,0x3076,0x3060,0x3049,0x3033 + .word 0x301c,0x3005,0x2fef,0x2fd8,0x2fc2,0x2fab,0x2f95,0x2f7e + .word 0x2f68,0x2f51,0x2f3b,0x2f24,0x2f0e,0x2ef8,0x2ee1,0x2ecb + .word 0x2eb4,0x2e9e,0x2e88,0x2e71,0x2e5b,0x2e45,0x2e2e,0x2e18 + .word 0x2e02,0x2dec,0x2dd5,0x2dbf,0x2da9,0x2d93,0x2d7c,0x2d66 + .word 0x2d50,0x2d3a,0x2d24,0x2d0e,0x2cf8,0x2ce1,0x2ccb,0x2cb5 + .word 0x2c9f,0x2c89,0x2c73,0x2c5d,0x2c47,0x2c31,0x2c1b,0x2c05 + .word 0x2bef,0x2bd9,0x2bc3,0x2bad,0x2b97,0x2b81,0x2b6c,0x2b56 + .word 0x2b40,0x2b2a,0x2b14,0x2afe,0x2ae8,0x2ad3,0x2abd,0x2aa7 + .word 0x2a91,0x2a7c,0x2a66,0x2a50,0x2a3a,0x2a25,0x2a0f,0x29f9 + .word 0x29e4,0x29ce,0x29b8,0x29a3,0x298d,0x2977,0x2962,0x294c + .word 0x2937,0x2921,0x290c,0x28f6,0x28e0,0x28cb,0x28b5,0x28a0 + .word 0x288b,0x2875,0x2860,0x284a,0x2835,0x281f,0x280a,0x27f5 + .word 0x27df,0x27ca,0x27b4,0x279f,0x278a,0x2774,0x275f,0x274a + .word 0x2735,0x271f,0x270a,0x26f5,0x26e0,0x26ca,0x26b5,0x26a0 + .word 0x268b,0x2676,0x2660,0x264b,0x2636,0x2621,0x260c,0x25f7 + .word 0x25e2,0x25cd,0x25b8,0x25a2,0x258d,0x2578,0x2563,0x254e + .word 0x2539,0x2524,0x250f,0x24fa,0x24e5,0x24d1,0x24bc,0x24a7 + .word 0x2492,0x247d,0x2468,0x2453,0x243e,0x2429,0x2415,0x2400 + .word 0x23eb,0x23d6,0x23c1,0x23ad,0x2398,0x2383,0x236e,0x235a + .word 0x2345,0x2330,0x231c,0x2307,0x22f2,0x22dd,0x22c9,0x22b4 + .word 0x22a0,0x228b,0x2276,0x2262,0x224d,0x2239,0x2224,0x2210 + .word 0x21fb,0x21e6,0x21d2,0x21bd,0x21a9,0x2194,0x2180,0x216c + .word 0x2157,0x2143,0x212e,0x211a,0x2105,0x20f1,0x20dd,0x20c8 + .word 0x20b4,0x20a0,0x208b,0x2077,0x2063,0x204e,0x203a,0x2026 + .word 0x2012,0x1ffd,0x1fe9,0x1fd5,0x1fc1,0x1fac,0x1f98,0x1f84 + .word 0x1f70,0x1f5c,0x1f47,0x1f33,0x1f1f,0x1f0b,0x1ef7,0x1ee3 + .word 0x1ecf,0x1ebb,0x1ea7,0x1e93,0x1e7f,0x1e6a,0x1e56,0x1e42 + .word 0x1e2e,0x1e1a,0x1e06,0x1df3,0x1ddf,0x1dcb,0x1db7,0x1da3 + .word 0x1d8f,0x1d7b,0x1d67,0x1d53,0x1d3f,0x1d2b,0x1d18,0x1d04 + .word 0x1cf0,0x1cdc,0x1cc8,0x1cb5,0x1ca1,0x1c8d,0x1c79,0x1c65 + .word 0x1c52,0x1c3e,0x1c2a,0x1c17,0x1c03,0x1bef,0x1bdb,0x1bc8 + .word 0x1bb4,0x1ba0,0x1b8d,0x1b79,0x1b66,0x1b52,0x1b3e,0x1b2b + .word 0x1b17,0x1b04,0x1af0,0x1add,0x1ac9,0x1ab6,0x1aa2,0x1a8f + .word 0x1a7b,0x1a68,0x1a54,0x1a41,0x1a2d,0x1a1a,0x1a06,0x19f3 + .word 0x19e0,0x19cc,0x19b9,0x19a5,0x1992,0x197f,0x196b,0x1958 + .word 0x1945,0x1931,0x191e,0x190b,0x18f8,0x18e4,0x18d1,0x18be + .word 0x18ab,0x1897,0x1884,0x1871,0x185e,0x184b,0x1837,0x1824 + .word 0x1811,0x17fe,0x17eb,0x17d8,0x17c4,0x17b1,0x179e,0x178b + .word 0x1778,0x1765,0x1752,0x173f,0x172c,0x1719,0x1706,0x16f3 + .word 0x16e0,0x16cd,0x16ba,0x16a7,0x1694,0x1681,0x166e,0x165b + .word 0x1648,0x1635,0x1623,0x1610,0x15fd,0x15ea,0x15d7,0x15c4 + .word 0x15b1,0x159f,0x158c,0x1579,0x1566,0x1553,0x1541,0x152e + .word 0x151b,0x1508,0x14f6,0x14e3,0x14d0,0x14bd,0x14ab,0x1498 + .word 0x1485,0x1473,0x1460,0x144d,0x143b,0x1428,0x1416,0x1403 + .word 0x13f0,0x13de,0x13cb,0x13b9,0x13a6,0x1394,0x1381,0x136f + .word 0x135c,0x1349,0x1337,0x1325,0x1312,0x1300,0x12ed,0x12db + .word 0x12c8,0x12b6,0x12a3,0x1291,0x127f,0x126c,0x125a,0x1247 + .word 0x1235,0x1223,0x1210,0x11fe,0x11ec,0x11d9,0x11c7,0x11b5 + .word 0x11a3,0x1190,0x117e,0x116c,0x1159,0x1147,0x1135,0x1123 + .word 0x1111,0x10fe,0x10ec,0x10da,0x10c8,0x10b6,0x10a4,0x1091 + .word 0x107f,0x106d,0x105b,0x1049,0x1037,0x1025,0x1013,0x1001 + .word 0x0fef,0x0fdc,0x0fca,0x0fb8,0x0fa6,0x0f94,0x0f82,0x0f70 + .word 0x0f5e,0x0f4c,0x0f3a,0x0f28,0x0f17,0x0f05,0x0ef3,0x0ee1 + .word 0x0ecf,0x0ebd,0x0eab,0x0e99,0x0e87,0x0e75,0x0e64,0x0e52 + .word 0x0e40,0x0e2e,0x0e1c,0x0e0a,0x0df9,0x0de7,0x0dd5,0x0dc3 + .word 0x0db2,0x0da0,0x0d8e,0x0d7c,0x0d6b,0x0d59,0x0d47,0x0d35 + .word 0x0d24,0x0d12,0x0d00,0x0cef,0x0cdd,0x0ccb,0x0cba,0x0ca8 + .word 0x0c97,0x0c85,0x0c73,0x0c62,0x0c50,0x0c3f,0x0c2d,0x0c1c + .word 0x0c0a,0x0bf8,0x0be7,0x0bd5,0x0bc4,0x0bb2,0x0ba1,0x0b8f + .word 0x0b7e,0x0b6c,0x0b5b,0x0b4a,0x0b38,0x0b27,0x0b15,0x0b04 + .word 0x0af2,0x0ae1,0x0ad0,0x0abe,0x0aad,0x0a9c,0x0a8a,0x0a79 + .word 0x0a68,0x0a56,0x0a45,0x0a34,0x0a22,0x0a11,0x0a00,0x09ee + .word 0x09dd,0x09cc,0x09bb,0x09a9,0x0998,0x0987,0x0976,0x0965 + .word 0x0953,0x0942,0x0931,0x0920,0x090f,0x08fe,0x08ec,0x08db + .word 0x08ca,0x08b9,0x08a8,0x0897,0x0886,0x0875,0x0864,0x0853 + .word 0x0842,0x0831,0x081f,0x080e,0x07fd,0x07ec,0x07db,0x07ca + .word 0x07b9,0x07a8,0x0798,0x0787,0x0776,0x0765,0x0754,0x0743 + .word 0x0732,0x0721,0x0710,0x06ff,0x06ee,0x06dd,0x06cd,0x06bc + .word 0x06ab,0x069a,0x0689,0x0678,0x0668,0x0657,0x0646,0x0635 + .word 0x0624,0x0614,0x0603,0x05f2,0x05e1,0x05d1,0x05c0,0x05af + .word 0x059e,0x058e,0x057d,0x056c,0x055c,0x054b,0x053a,0x052a + .word 0x0519,0x0508,0x04f8,0x04e7,0x04d6,0x04c6,0x04b5,0x04a5 + .word 0x0494,0x0484,0x0473,0x0462,0x0452,0x0441,0x0431,0x0420 + .word 0x0410,0x03ff,0x03ef,0x03de,0x03ce,0x03bd,0x03ad,0x039c + .word 0x038c,0x037b,0x036b,0x035b,0x034a,0x033a,0x0329,0x0319 + .word 0x0309,0x02f8,0x02e8,0x02d7,0x02c7,0x02b7,0x02a6,0x0296 + .word 0x0286,0x0275,0x0265,0x0255,0x0245,0x0234,0x0224,0x0214 + .word 0x0204,0x01f3,0x01e3,0x01d3,0x01c3,0x01b2,0x01a2,0x0192 + .word 0x0182,0x0172,0x0161,0x0151,0x0141,0x0131,0x0121,0x0111 + .word 0x0101,0x00f0,0x00e0,0x00d0,0x00c0,0x00b0,0x00a0,0x0090 + .word 0x0080,0x0070,0x0060,0x0050,0x0040,0x0030,0x0020,0x0010 +DATAEND() +ASM_END() diff --git a/rts/gmp/mpn/alpha/lshift.asm b/rts/gmp/mpn/alpha/lshift.asm new file mode 100644 index 0000000000..87c46f6fe7 --- /dev/null +++ b/rts/gmp/mpn/alpha/lshift.asm @@ -0,0 +1,104 @@ +dnl Alpha mpn_lshift -- Shift a number left. + +dnl Copyright (C) 1994, 1995, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published by +dnl the Free Software Foundation; either version 2.1 of the License, or (at your +dnl option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + +include(`../config.m4') + +dnl INPUT PARAMETERS +dnl res_ptr r16 +dnl s1_ptr r17 +dnl size r18 +dnl cnt r19 + +dnl This code runs at 4.8 cycles/limb on the 21064. With infinite unrolling, +dnl it would take 4 cycles/limb. It should be possible to get down to 3 +dnl cycles/limb since both ldq and stq can be paired with the other used +dnl instructions. But there are many restrictions in the 21064 pipeline that +dnl makes it hard, if not impossible, to get down to 3 cycles/limb: + +dnl 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay. +dnl 2. Only aligned instruction pairs can be paired. +dnl 3. The store buffer or silo might not be able to deal with the bandwidth. + +ASM_START() +PROLOGUE(mpn_lshift) + s8addq r18,r17,r17 C make r17 point at end of s1 + ldq r4,-8(r17) C load first limb + subq r17,8,r17 + subq r31,r19,r7 + s8addq r18,r16,r16 C make r16 point at end of RES + subq r18,1,r18 + and r18,4-1,r20 C number of limbs in first loop + srl r4,r7,r0 C compute function result + + beq r20,$L0 + subq r18,r20,r18 + + ALIGN(8) +$Loop0: + ldq r3,-8(r17) + subq r16,8,r16 + subq r17,8,r17 + subq r20,1,r20 + sll r4,r19,r5 + srl r3,r7,r6 + bis r3,r3,r4 + bis r5,r6,r8 + stq r8,0(r16) + bne r20,$Loop0 + +$L0: beq r18,$Lend + + ALIGN(8) +$Loop: ldq r3,-8(r17) + subq r16,32,r16 + subq r18,4,r18 + sll r4,r19,r5 + srl r3,r7,r6 + + ldq r4,-16(r17) + sll r3,r19,r1 + bis r5,r6,r8 + stq r8,24(r16) + srl r4,r7,r2 + + ldq r3,-24(r17) + sll r4,r19,r5 + bis r1,r2,r8 + stq r8,16(r16) + srl r3,r7,r6 + + ldq r4,-32(r17) + sll r3,r19,r1 + bis r5,r6,r8 + stq r8,8(r16) + srl r4,r7,r2 + + subq r17,32,r17 + bis r1,r2,r8 + stq r8,0(r16) + + bgt r18,$Loop + +$Lend: sll r4,r19,r8 + stq r8,-8(r16) + ret r31,(r26),1 +EPILOGUE(mpn_lshift) +ASM_END() diff --git a/rts/gmp/mpn/alpha/mul_1.asm b/rts/gmp/mpn/alpha/mul_1.asm new file mode 100644 index 0000000000..46b8df34f5 --- /dev/null +++ b/rts/gmp/mpn/alpha/mul_1.asm @@ -0,0 +1,71 @@ +dnl Alpha __gmpn_mul_1 -- Multiply a limb vector with a limb and store +dnl the result in a second limb vector. + +dnl Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published by +dnl the Free Software Foundation; either version 2.1 of the License, or (at your +dnl option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + +include(`../config.m4') + +dnl INPUT PARAMETERS +dnl res_ptr r16 +dnl s1_ptr r17 +dnl size r18 +dnl s2_limb r19 + +dnl This code runs at 42 cycles/limb on EV4, 18 cycles/limb on EV5, and 7 +dnl cycles/limb on EV6. + +ASM_START() +PROLOGUE(mpn_mul_1) + ldq r2,0(r17) C r2 = s1_limb + subq r18,1,r18 C size-- + mulq r2,r19,r3 C r3 = prod_low + bic r31,r31,r4 C clear cy_limb + umulh r2,r19,r0 C r0 = prod_high + beq r18,$Lend1 C jump if size was == 1 + ldq r2,8(r17) C r2 = s1_limb + subq r18,1,r18 C size-- + stq r3,0(r16) + beq r18,$Lend2 C jump if size was == 2 + + ALIGN(8) +$Loop: mulq r2,r19,r3 C r3 = prod_low + addq r4,r0,r0 C cy_limb = cy_limb + 'cy' + subq r18,1,r18 C size-- + umulh r2,r19,r4 C r4 = cy_limb + ldq r2,16(r17) C r2 = s1_limb + addq r17,8,r17 C s1_ptr++ + addq r3,r0,r3 C r3 = cy_limb + prod_low + stq r3,8(r16) + cmpult r3,r0,r0 C r0 = carry from (cy_limb + prod_low) + addq r16,8,r16 C res_ptr++ + bne r18,$Loop + +$Lend2: mulq r2,r19,r3 C r3 = prod_low + addq r4,r0,r0 C cy_limb = cy_limb + 'cy' + umulh r2,r19,r4 C r4 = cy_limb + addq r3,r0,r3 C r3 = cy_limb + prod_low + cmpult r3,r0,r0 C r0 = carry from (cy_limb + prod_low) + stq r3,8(r16) + addq r4,r0,r0 C cy_limb = prod_high + cy + ret r31,(r26),1 +$Lend1: stq r3,0(r16) + ret r31,(r26),1 +EPILOGUE(mpn_mul_1) +ASM_END() diff --git a/rts/gmp/mpn/alpha/rshift.asm b/rts/gmp/mpn/alpha/rshift.asm new file mode 100644 index 0000000000..aa25eda54e --- /dev/null +++ b/rts/gmp/mpn/alpha/rshift.asm @@ -0,0 +1,102 @@ +dnl Alpha mpn_rshift -- Shift a number right. + +dnl Copyright (C) 1994, 1995, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published by +dnl the Free Software Foundation; either version 2.1 of the License, or (at your +dnl option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + +include(`../config.m4') + +dnl INPUT PARAMETERS +dnl res_ptr r16 +dnl s1_ptr r17 +dnl size r18 +dnl cnt r19 + +dnl This code runs at 4.8 cycles/limb on the 21064. With infinite unrolling, +dnl it would take 4 cycles/limb. It should be possible to get down to 3 +dnl cycles/limb since both ldq and stq can be paired with the other used +dnl instructions. But there are many restrictions in the 21064 pipeline that +dnl makes it hard, if not impossible, to get down to 3 cycles/limb: + +dnl 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay. +dnl 2. Only aligned instruction pairs can be paired. +dnl 3. The store buffer or silo might not be able to deal with the bandwidth. + +ASM_START() +PROLOGUE(mpn_rshift) + ldq r4,0(r17) C load first limb + addq r17,8,r17 + subq r31,r19,r7 + subq r18,1,r18 + and r18,4-1,r20 C number of limbs in first loop + sll r4,r7,r0 C compute function result + + beq r20,$L0 + subq r18,r20,r18 + + ALIGN(8) +$Loop0: + ldq r3,0(r17) + addq r16,8,r16 + addq r17,8,r17 + subq r20,1,r20 + srl r4,r19,r5 + sll r3,r7,r6 + bis r3,r3,r4 + bis r5,r6,r8 + stq r8,-8(r16) + bne r20,$Loop0 + +$L0: beq r18,$Lend + + ALIGN(8) +$Loop: ldq r3,0(r17) + addq r16,32,r16 + subq r18,4,r18 + srl r4,r19,r5 + sll r3,r7,r6 + + ldq r4,8(r17) + srl r3,r19,r1 + bis r5,r6,r8 + stq r8,-32(r16) + sll r4,r7,r2 + + ldq r3,16(r17) + srl r4,r19,r5 + bis r1,r2,r8 + stq r8,-24(r16) + sll r3,r7,r6 + + ldq r4,24(r17) + srl r3,r19,r1 + bis r5,r6,r8 + stq r8,-16(r16) + sll r4,r7,r2 + + addq r17,32,r17 + bis r1,r2,r8 + stq r8,-8(r16) + + bgt r18,$Loop + +$Lend: srl r4,r19,r8 + stq r8,0(r16) + ret r31,(r26),1 +EPILOGUE(mpn_rshift) +ASM_END() diff --git a/rts/gmp/mpn/alpha/sub_n.asm b/rts/gmp/mpn/alpha/sub_n.asm new file mode 100644 index 0000000000..718f657141 --- /dev/null +++ b/rts/gmp/mpn/alpha/sub_n.asm @@ -0,0 +1,114 @@ +dnl Alpha mpn_sub_n -- Subtract two limb vectors of the same length > 0 and +dnl store difference in a third limb vector. + +dnl Copyright (C) 1995, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published by +dnl the Free Software Foundation; either version 2.1 of the License, or (at your +dnl option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + +include(`../config.m4') + +dnl INPUT PARAMETERS +dnl res_ptr r16 +dnl s1_ptr r17 +dnl s2_ptr r18 +dnl size r19 + +ASM_START() +PROLOGUE(mpn_sub_n) + ldq r3,0(r17) + ldq r4,0(r18) + + subq r19,1,r19 + and r19,4-1,r2 C number of limbs in first loop + bis r31,r31,r0 + beq r2,$L0 C if multiple of 4 limbs, skip first loop + + subq r19,r2,r19 + +$Loop0: subq r2,1,r2 + ldq r5,8(r17) + addq r4,r0,r4 + ldq r6,8(r18) + cmpult r4,r0,r1 + subq r3,r4,r4 + cmpult r3,r4,r0 + stq r4,0(r16) + bis r0,r1,r0 + + addq r17,8,r17 + addq r18,8,r18 + bis r5,r5,r3 + bis r6,r6,r4 + addq r16,8,r16 + bne r2,$Loop0 + +$L0: beq r19,$Lend + + ALIGN(8) +$Loop: subq r19,4,r19 + + ldq r5,8(r17) + addq r4,r0,r4 + ldq r6,8(r18) + cmpult r4,r0,r1 + subq r3,r4,r4 + cmpult r3,r4,r0 + stq r4,0(r16) + bis r0,r1,r0 + + ldq r3,16(r17) + addq r6,r0,r6 + ldq r4,16(r18) + cmpult r6,r0,r1 + subq r5,r6,r6 + cmpult r5,r6,r0 + stq r6,8(r16) + bis r0,r1,r0 + + ldq r5,24(r17) + addq r4,r0,r4 + ldq r6,24(r18) + cmpult r4,r0,r1 + subq r3,r4,r4 + cmpult r3,r4,r0 + stq r4,16(r16) + bis r0,r1,r0 + + ldq r3,32(r17) + addq r6,r0,r6 + ldq r4,32(r18) + cmpult r6,r0,r1 + subq r5,r6,r6 + cmpult r5,r6,r0 + stq r6,24(r16) + bis r0,r1,r0 + + addq r17,32,r17 + addq r18,32,r18 + addq r16,32,r16 + bne r19,$Loop + +$Lend: addq r4,r0,r4 + cmpult r4,r0,r1 + subq r3,r4,r4 + cmpult r3,r4,r0 + stq r4,0(r16) + bis r0,r1,r0 + ret r31,(r26),1 +EPILOGUE(mpn_sub_n) +ASM_END() diff --git a/rts/gmp/mpn/alpha/submul_1.asm b/rts/gmp/mpn/alpha/submul_1.asm new file mode 100644 index 0000000000..caec1a720b --- /dev/null +++ b/rts/gmp/mpn/alpha/submul_1.asm @@ -0,0 +1,87 @@ +dnl Alpha __gmpn_submul_1 -- Multiply a limb vector with a limb and +dnl subtract the result from a second limb vector. + +dnl Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published by +dnl the Free Software Foundation; either version 2.1 of the License, or (at your +dnl option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + +include(`../config.m4') + +dnl INPUT PARAMETERS +dnl res_ptr r16 +dnl s1_ptr r17 +dnl size r18 +dnl s2_limb r19 + +dnl This code runs at 42 cycles/limb on EV4, 18 cycles/limb on EV5, and 7 +dnl cycles/limb on EV6. + +ASM_START() +PROLOGUE(mpn_submul_1) + ldq r2,0(r17) C r2 = s1_limb + addq r17,8,r17 C s1_ptr++ + subq r18,1,r18 C size-- + mulq r2,r19,r3 C r3 = prod_low + ldq r5,0(r16) C r5 = *res_ptr + umulh r2,r19,r0 C r0 = prod_high + beq r18,$Lend1 C jump if size was == 1 + ldq r2,0(r17) C r2 = s1_limb + addq r17,8,r17 C s1_ptr++ + subq r18,1,r18 C size-- + subq r5,r3,r3 + cmpult r5,r3,r4 + stq r3,0(r16) + addq r16,8,r16 C res_ptr++ + beq r18,$Lend2 C jump if size was == 2 + + ALIGN(8) +$Loop: mulq r2,r19,r3 C r3 = prod_low + ldq r5,0(r16) C r5 = *res_ptr + addq r4,r0,r0 C cy_limb = cy_limb + 'cy' + subq r18,1,r18 C size-- + umulh r2,r19,r4 C r4 = cy_limb + ldq r2,0(r17) C r2 = s1_limb + addq r17,8,r17 C s1_ptr++ + addq r3,r0,r3 C r3 = cy_limb + prod_low + cmpult r3,r0,r0 C r0 = carry from (cy_limb + prod_low) + subq r5,r3,r3 + cmpult r5,r3,r5 + stq r3,0(r16) + addq r16,8,r16 C res_ptr++ + addq r5,r0,r0 C combine carries + bne r18,$Loop + +$Lend2: mulq r2,r19,r3 C r3 = prod_low + ldq r5,0(r16) C r5 = *res_ptr + addq r4,r0,r0 C cy_limb = cy_limb + 'cy' + umulh r2,r19,r4 C r4 = cy_limb + addq r3,r0,r3 C r3 = cy_limb + prod_low + cmpult r3,r0,r0 C r0 = carry from (cy_limb + prod_low) + subq r5,r3,r3 + cmpult r5,r3,r5 + stq r3,0(r16) + addq r5,r0,r0 C combine carries + addq r4,r0,r0 C cy_limb = prod_high + cy + ret r31,(r26),1 +$Lend1: subq r5,r3,r3 + cmpult r5,r3,r5 + stq r3,0(r16) + addq r0,r5,r0 + ret r31,(r26),1 +EPILOGUE(mpn_submul_1) +ASM_END() diff --git a/rts/gmp/mpn/alpha/udiv_qrnnd.S b/rts/gmp/mpn/alpha/udiv_qrnnd.S new file mode 100644 index 0000000000..53814bbcb0 --- /dev/null +++ b/rts/gmp/mpn/alpha/udiv_qrnnd.S @@ -0,0 +1,151 @@ + # Alpha 21064 __udiv_qrnnd + + # Copyright (C) 1992, 1994, 1995, 1997, 2000 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + .set noreorder + .set noat +.text + .align 3 + .globl __gmpn_udiv_qrnnd + .ent __gmpn_udiv_qrnnd +__gmpn_udiv_qrnnd: + .frame $30,0,$26,0 + .prologue 0 +#define cnt $2 +#define tmp $3 +#define rem_ptr $16 +#define n1 $17 +#define n0 $18 +#define d $19 +#define qb $20 + + ldiq cnt,16 + blt d,.Largedivisor + +.Loop1: cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule d,n1,qb + subq n1,d,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule d,n1,qb + subq n1,d,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule d,n1,qb + subq n1,d,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule d,n1,qb + subq n1,d,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + subq cnt,1,cnt + bgt cnt,.Loop1 + stq n1,0(rem_ptr) + bis $31,n0,$0 + ret $31,($26),1 + +.Largedivisor: + and n0,1,$4 + + srl n0,1,n0 + sll n1,63,tmp + or tmp,n0,n0 + srl n1,1,n1 + + and d,1,$6 + srl d,1,$5 + addq $5,$6,$5 + +.Loop2: cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule $5,n1,qb + subq n1,$5,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule $5,n1,qb + subq n1,$5,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule $5,n1,qb + subq n1,$5,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule $5,n1,qb + subq n1,$5,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + subq cnt,1,cnt + bgt cnt,.Loop2 + + addq n1,n1,n1 + addq $4,n1,n1 + bne $6,.LOdd + stq n1,0(rem_ptr) + bis $31,n0,$0 + ret $31,($26),1 + +.LOdd: + /* q' in n0. r' in n1 */ + addq n1,n0,n1 + cmpult n1,n0,tmp # tmp := carry from addq + beq tmp,.LLp6 + addq n0,1,n0 + subq n1,d,n1 +.LLp6: cmpult n1,d,tmp + bne tmp,.LLp7 + addq n0,1,n0 + subq n1,d,n1 +.LLp7: + stq n1,0(rem_ptr) + bis $31,n0,$0 + ret $31,($26),1 + + .end __gmpn_udiv_qrnnd diff --git a/rts/gmp/mpn/alpha/umul.asm b/rts/gmp/mpn/alpha/umul.asm new file mode 100644 index 0000000000..44428ed5f5 --- /dev/null +++ b/rts/gmp/mpn/alpha/umul.asm @@ -0,0 +1,39 @@ +dnl Currently unused. + + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published by +dnl the Free Software Foundation; either version 2.1 of the License, or (at your +dnl option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + .set noreorder + .set volatile + .set noat + +.text + .align 3 + .globl __umul_ppmm + .ent __umul_ppmm +__umul_ppmm: +__umul_ppmm..ng: + .frame $30,0,$26,0 + .prologue 0 + mulq $17,$18,$1 + umulh $17,$18,$0 + stq $1,0($16) + ret $31,($26),1 + .end __umul_ppmm diff --git a/rts/gmp/mpn/alpha/unicos.m4 b/rts/gmp/mpn/alpha/unicos.m4 new file mode 100644 index 0000000000..7ff26c090c --- /dev/null +++ b/rts/gmp/mpn/alpha/unicos.m4 @@ -0,0 +1,63 @@ +divert(-1) + + +dnl Copyright (C) 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +define(`ASM_START', + `.ident dummy') + +define(`X',`^X$1') +define(`FLOAT64', + `dnl + .psect $1@crud,data +$1: .t_floating $2 + .endp') + +define(`PROLOGUE', + `dnl + .stack 192 ; What does this mean? Only Cray knows. + .psect $1@code,code,cache +$1::') +define(`PROLOGUE_GP', `PROLOGUE($1)') + +define(`EPILOGUE', + `dnl + .endp') + +define(`DATASTART', + `dnl + .psect $1@crud,data +$1:') +define(`DATAEND', + `dnl + .endp') + +define(`ASM_END', + `dnl + .end') + +define(`unop',`bis r31,r31,r31') ; Unicos assembler lacks unop +define(`cvttqc',`cvttq/c') + +define(`ALIGN',`') ; Unicos assembler seems to align using garbage + +divert + diff --git a/rts/gmp/mpn/arm/add_n.S b/rts/gmp/mpn/arm/add_n.S new file mode 100644 index 0000000000..fb3f8f703b --- /dev/null +++ b/rts/gmp/mpn/arm/add_n.S @@ -0,0 +1,77 @@ +@ ARM mpn_add -- Add two limb vectors of the same length > 0 and store sum in +@ a third limb vector. +@ Contributed by Robert Harley. + +@ Copyright (C) 1997, 2000 Free Software Foundation, Inc. + +@ This file is part of the GNU MP Library. + +@ The GNU MP Library is free software; you can redistribute it and/or modify +@ it under the terms of the GNU Lesser General Public License as published by +@ the Free Software Foundation; either version 2.1 of the License, or (at your +@ option) any later version. + +@ The GNU MP Library is distributed in the hope that it will be useful, but +@ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +@ License for more details. + +@ You should have received a copy of the GNU Lesser General Public License +@ along with the GNU MP Library; see the file COPYING.LIB. If not, write to +@ the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +@ MA 02111-1307, USA. + +#define s r0 +#define a r1 +#define b r2 +#define n r3 + +#define sl r10 +#define fp r11 +#define ip r12 +#define sp r13 +#define lr r14 +#define pc r15 + +.text + .align 0 + .global __gmpn_add_n + .type __gmpn_add_n,%function +__gmpn_add_n: + stmfd sp!, { r8, r9, lr } + movs n, n, lsr #1 + bcc skip1 + ldr ip, [a], #4 + ldr lr, [b], #4 + adds ip, ip, lr + str ip, [s], #4 +skip1: + tst n, #1 + beq skip2 + ldmia a!, { r8, r9 } + ldmia b!, { ip, lr } + adcs r8, r8, ip + adcs r9, r9, lr + stmia s!, { r8, r9 } +skip2: + bics n, n, #1 + beq return + stmfd sp!, { r4, r5, r6, r7 } +add_n_loop: + ldmia a!, { r4, r5, r6, r7 } + ldmia b!, { r8, r9, ip, lr } + adcs r4, r4, r8 + ldr r8, [s] /* Bring stuff into cache. */ + adcs r5, r5, r9 + adcs r6, r6, ip + adcs r7, r7, lr + stmia s!, { r4, r5, r6, r7 } + sub n, n, #2 + teq n, #0 + bne add_n_loop + ldmfd sp!, { r4, r5, r6, r7 } +return: + adc r0, n, #0 + ldmfd sp!, { r8, r9, pc } +end: + .size __gmpn_add_n, end - __gmpn_add_n diff --git a/rts/gmp/mpn/arm/addmul_1.S b/rts/gmp/mpn/arm/addmul_1.S new file mode 100644 index 0000000000..396fff77a3 --- /dev/null +++ b/rts/gmp/mpn/arm/addmul_1.S @@ -0,0 +1,89 @@ +@ ARM mpn_mul_1 -- Multiply a limb vector with a limb and add the result to a +@ second limb vector. +@ Contributed by Robert Harley. + +@ Copyright (C) 1998, 2000 Free Software Foundation, Inc. + +@ This file is part of the GNU MP Library. + +@ The GNU MP Library is free software; you can redistribute it and/or modify +@ it under the terms of the GNU Lesser General Public License as published by +@ the Free Software Foundation; either version 2.1 of the License, or (at your +@ option) any later version. + +@ The GNU MP Library is distributed in the hope that it will be useful, but +@ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +@ License for more details. + +@ You should have received a copy of the GNU Lesser General Public License +@ along with the GNU MP Library; see the file COPYING.LIB. If not, write to +@ the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +@ MA 02111-1307, USA. + +#define p r0 +#define a r1 +#define n r2 +#define w r3 + +#define z r11 + +#define ip r12 +#define sp r13 +#define lr r14 +#define pc r15 + +.text + .align 0 + .global __gmpn_addmul_1 + .type __gmpn_addmul_1,%function +__gmpn_addmul_1: + stmfd sp!, { r8-r11, lr } + mov z, #0 + mov ip, #0 + movs n, n, lsr #1 + bcc skip1 + ldr lr, [a], #4 + ldr r9, [p] + umlal r9, ip, w, lr + str r9, [p], #4 +skip1: + movs n, n, lsr #1 + bcc skip2 + ldmia p, { r9, r10 } + adds r8, ip, r9 + adc r9, z, #0 + ldmia a!, { ip, lr } + umlal r8, r9, w, ip + adds r9, r9, r10 + adc ip, z, #0 + umlal r9, ip, w, lr + stmia p!, { r8, r9 } +skip2: + teq n, #0 + beq return + stmfd sp!, { r4-r7 } +addmul_loop: + ldmia p, { r5, r6, r7, r8 } + adds r4, ip, r5 + adc r5, z, #0 + ldmia a!, { r9, r10, ip, lr } + umlal r4, r5, w, r9 + adds r5, r5, r6 + adc r6, z, #0 + umlal r5, r6, w, r10 + adds r6, r6, r7 + adc r7, z, #0 + umlal r6, r7, w, ip + adds r7, r7, r8 + adc ip, z, #0 + umlal r7, ip, w, lr + subs n, n, #1 + stmia p!, { r4, r5, r6, r7 } + bne addmul_loop + ldmfd sp!, { r4-r7 } +return: + mov r0, ip + ldmfd sp!, { r8-r11, pc } +end: + .size __gmpn_addmul_1, end - __gmpn_addmul_1 diff --git a/rts/gmp/mpn/arm/gmp-mparam.h b/rts/gmp/mpn/arm/gmp-mparam.h new file mode 100644 index 0000000000..a35b0c7b66 --- /dev/null +++ b/rts/gmp/mpn/arm/gmp-mparam.h @@ -0,0 +1,34 @@ +/* gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#define BITS_PER_MP_LIMB 32 +#define BYTES_PER_MP_LIMB 4 +#define BITS_PER_LONGINT 32 +#define BITS_PER_INT 32 +#define BITS_PER_SHORTINT 16 +#define BITS_PER_CHAR 8 + +#ifndef KARATSUBA_MUL_THRESHOLD +#define KARATSUBA_MUL_THRESHOLD 21 +#endif +#ifndef KARATSUBA_SQR_THRESHOLD +#define KARATSUBA_SQR_THRESHOLD 48 +#endif diff --git a/rts/gmp/mpn/arm/mul_1.S b/rts/gmp/mpn/arm/mul_1.S new file mode 100644 index 0000000000..bae526a0f0 --- /dev/null +++ b/rts/gmp/mpn/arm/mul_1.S @@ -0,0 +1,81 @@ +@ ARM mpn_addmul_1 -- Multiply a limb vector with a limb and store the result +@ in a second limb vector. +@ Contributed by Robert Harley. + +@ Copyright (C) 1998, 2000 Free Software Foundation, Inc. + +@ This file is part of the GNU MP Library. + +@ The GNU MP Library is free software; you can redistribute it and/or modify +@ it under the terms of the GNU Lesser General Public License as published by +@ the Free Software Foundation; either version 2.1 of the License, or (at your +@ option) any later version. + +@ The GNU MP Library is distributed in the hope that it will be useful, but +@ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +@ License for more details. + +@ You should have received a copy of the GNU Lesser General Public License +@ along with the GNU MP Library; see the file COPYING.LIB. If not, write to +@ the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +@ MA 02111-1307, USA. + +#define p r0 +#define a r1 +#define n r2 +#define w r3 + +#define sl r10 +#define fp r11 +#define ip r12 +#define sp r13 +#define lr r14 +#define pc r15 + +.text + .align 0 + .global __gmpn_mul_1 + .type __gmpn_mul_1,%function +__gmpn_mul_1: + stmfd sp!, { r8, r9, lr } + ands ip, n, #1 + beq skip1 + ldr lr, [a], #4 + umull r9, ip, w, lr + str r9, [p], #4 +skip1: + tst n, #2 + beq skip2 + mov r8, ip + ldmia a!, { ip, lr } + mov r9, #0 + umlal r8, r9, w, ip + mov ip, #0 + umlal r9, ip, w, lr + stmia p!, { r8, r9 } +skip2: + bics n, n, #3 + beq return + stmfd sp!, { r6, r7 } +mul_1_loop: + mov r6, ip + ldmia a!, { r8, r9, ip, lr } + ldr r7, [p] /* Bring stuff into cache. */ + mov r7, #0 + umlal r6, r7, w, r8 + mov r8, #0 + umlal r7, r8, w, r9 + mov r9, #0 + umlal r8, r9, w, ip + mov ip, #0 + umlal r9, ip, w, lr + subs n, n, #4 + stmia p!, { r6, r7, r8, r9 } + bne mul_1_loop + ldmfd sp!, { r6, r7 } +return: + mov r0, ip + ldmfd sp!, { r8, r9, pc } +end: + .size __gmpn_mul_1, end - __gmpn_mul_1 diff --git a/rts/gmp/mpn/arm/sub_n.S b/rts/gmp/mpn/arm/sub_n.S new file mode 100644 index 0000000000..856505fe21 --- /dev/null +++ b/rts/gmp/mpn/arm/sub_n.S @@ -0,0 +1,79 @@ +@ ARM mpn_sub -- Subtract two limb vectors of the same length > 0 and store +@ difference in a third limb vector. +@ Contributed by Robert Harley. + +@ Copyright (C) 1997, 2000 Free Software Foundation, Inc. + +@ This file is part of the GNU MP Library. + +@ The GNU MP Library is free software; you can redistribute it and/or modify +@ it under the terms of the GNU Lesser General Public License as published by +@ the Free Software Foundation; either version 2.1 of the License, or (at your +@ option) any later version. + +@ The GNU MP Library is distributed in the hope that it will be useful, but +@ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +@ License for more details. + +@ You should have received a copy of the GNU Lesser General Public License +@ along with the GNU MP Library; see the file COPYING.LIB. If not, write to +@ the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +@ MA 02111-1307, USA. + +#define d r0 +#define a r1 +#define b r2 +#define n r3 + +#define sl r10 +#define fp r11 +#define ip r12 +#define sp r13 +#define lr r14 +#define pc r15 + +.text + .align 0 + .global __gmpn_sub_n + .type __gmpn_sub_n,%function +__gmpn_sub_n: + stmfd sp!, { r8, r9, lr } + subs ip, ip, ip + tst n, #1 + beq skip1 + ldr ip, [a], #4 + ldr lr, [b], #4 + subs ip, ip, lr + str ip, [d], #4 +skip1: + tst n, #2 + beq skip2 + ldmia a!, { r8, r9 } + ldmia b!, { ip, lr } + sbcs r8, r8, ip + sbcs r9, r9, lr + stmia d!, { r8, r9 } +skip2: + bics n, n, #3 + beq return + stmfd sp!, { r4, r5, r6, r7 } +sub_n_loop: + ldmia a!, { r4, r5, r6, r7 } + ldmia b!, { r8, r9, ip, lr } + sbcs r4, r4, r8 + ldr r8, [d] /* Bring stuff into cache. */ + sbcs r5, r5, r9 + sbcs r6, r6, ip + sbcs r7, r7, lr + stmia d!, { r4, r5, r6, r7 } + sub n, n, #4 + teq n, #0 + bne sub_n_loop + ldmfd sp!, { r4, r5, r6, r7 } +return: + sbc r0, r0, r0 + and r0, r0, #1 + ldmfd sp!, { r8, r9, pc } +end: + .size __gmpn_sub_n, end - __gmpn_sub_n diff --git a/rts/gmp/mpn/asm-defs.m4 b/rts/gmp/mpn/asm-defs.m4 new file mode 100644 index 0000000000..aa2024138b --- /dev/null +++ b/rts/gmp/mpn/asm-defs.m4 @@ -0,0 +1,1182 @@ +divert(-1) +dnl +dnl m4 macros for gmp assembly code, shared by all CPUs. +dnl +dnl These macros are designed for use with any m4 and have been used on +dnl GNU, FreeBSD, OpenBSD and SysV. +dnl +dnl GNU m4 and OpenBSD 2.7 m4 will give filenames and line numbers in error +dnl messages. + + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +dnl Macros: +dnl +dnl Most new m4 specific macros have an "m4_" prefix to emphasise they're +dnl m4 expansions. But new defining things like deflit() and defreg() are +dnl named like the builtin define(), and forloop() is named following the +dnl GNU m4 example on which it's based. +dnl +dnl GNU m4 with the -P option uses "m4_" as a prefix for builtins, but that +dnl option isn't going to be used, so there's no conflict or confusion. +dnl +dnl +dnl Comments in output: +dnl +dnl The m4 comment delimiters are left at # and \n, the normal assembler +dnl commenting for most CPUs. m4 passes comment text through without +dnl expanding macros in it, which is generally a good thing since it stops +dnl unexpected expansions and possible resultant errors. +dnl +dnl But note that when a quoted string is being read, a # isn't special, so +dnl apostrophes in comments in quoted strings must be avoided or they'll be +dnl interpreted as a closing quote mark. But when the quoted text is +dnl re-read # will still act like a normal comment, supressing macro +dnl expansion. +dnl +dnl For example, +dnl +dnl # apostrophes in comments that're outside quotes are ok +dnl # and using macro names like PROLOGUE is ok too +dnl ... +dnl ifdef(`PIC',` +dnl # but apostrophes aren't ok inside quotes +dnl # ^--wrong +dnl ... +dnl # though macro names like PROLOGUE are still ok +dnl ... +dnl ') +dnl +dnl If macro expansion in a comment is wanted, use `#' in the .asm (ie. a +dnl quoted hash symbol), which will turn into # in the .s but get +dnl expansions done on that line. This can make the .s more readable to +dnl humans, but it won't make a blind bit of difference to the assembler. +dnl +dnl All the above applies, mutatis mutandis, when changecom() is used to +dnl select @ ! ; or whatever other commenting. +dnl +dnl +dnl Variations in m4 affecting gmp: +dnl +dnl $# - When a macro is called as "foo" with no brackets, BSD m4 sets $# +dnl to 1, whereas GNU or SysV m4 set it to 0. In all cases though +dnl "foo()" sets $# to 1. This is worked around in various places. +dnl +dnl len() - When "len()" is given an empty argument, BSD m4 evaluates to +dnl nothing, whereas GNU, SysV, and the new OpenBSD, evaluate to 0. +dnl See m4_length() below which works around this. +dnl +dnl translit() - GNU m4 accepts character ranges like A-Z, and the new +dnl OpenBSD m4 does under option -g, but basic BSD and SysV don't. +dnl +dnl popdef() - in BSD and SysV m4 popdef() takes multiple arguments and +dnl pops each, but GNU m4 only takes one argument. +dnl +dnl push back - BSD m4 has some limits on the amount of text that can be +dnl pushed back. The limit is reasonably big and so long as macros +dnl don't gratuitously duplicate big arguments it isn't a problem. +dnl Normally an error message is given, but sometimes it just hangs. +dnl +dnl eval() &,|,^ - GNU and SysV m4 have bitwise operators &,|,^ available, +dnl but BSD m4 doesn't (contrary to what the man page suggests) and +dnl instead ^ is exponentiation. +dnl +dnl eval() ?: - The C ternary operator "?:" is available in BSD m4, but not +dnl in SysV or GNU m4 (as of GNU m4 1.4 and betas of 1.5). +dnl +dnl eval() -2^31 - BSD m4 has a bug where an eval() resulting in -2^31 +dnl (ie. -2147483648) gives "-(". Using -2147483648 within an +dnl expression is ok, it just can't be a final result. "-(" will of +dnl course upset parsing, with all sorts of strange effects. +dnl +dnl eval() <<,>> - SysV m4 doesn't support shift operators in eval() (on +dnl SunOS 5.7 /usr/xpg4/m4 has them but /usr/ccs/m4 doesn't). See +dnl m4_lshift() and m4_rshift() below for workarounds. +dnl +dnl m4wrap() - in BSD m4, m4wrap() replaces any previous m4wrap() string, +dnl in SysV m4 it appends to it, and in GNU m4 it prepends. See +dnl m4wrap_prepend() below which brings uniformity to this. +dnl +dnl __file__,__line__ - GNU m4 and OpenBSD 2.7 m4 provide these, and +dnl they're used here to make error messages more informative. GNU m4 +dnl gives an unhelpful "NONE 0" in an m4wrap(), but that's worked +dnl around. +dnl +dnl __file__ quoting - OpenBSD m4, unlike GNU m4, doesn't quote the +dnl filename in __file__, so care should be taken that no macro has +dnl the same name as a file, or an unwanted expansion will occur when +dnl printing an error or warning. +dnl +dnl OpenBSD 2.6 m4 - this m4 rejects decimal constants containing an 8 or 9 +dnl in eval(), making it pretty much unusable. This bug is confined +dnl to version 2.6 (it's not in 2.5, and has been fixed in 2.7). +dnl +dnl SunOS /usr/bin/m4 - this m4 lacks a number of desired features, +dnl including $# and $@, defn(), m4exit(), m4wrap(), pushdef(), +dnl popdef(). /usr/5bin/m4 is a SysV style m4 which should always be +dnl available, and "configure" will reject /usr/bin/m4 in favour of +dnl /usr/5bin/m4 (if necessary). +dnl +dnl The sparc code actually has modest m4 requirements currently and +dnl could manage with /usr/bin/m4, but there's no reason to put our +dnl macros through contortions when /usr/5bin/m4 is available or GNU +dnl m4 can be installed. + + +ifdef(`__ASM_DEFS_M4_INCLUDED__', +`m4_error(`asm-defs.m4 already included, dont include it twice +')m4exit(1)') +define(`__ASM_DEFS_M4_INCLUDED__') + + +dnl Detect and give a message about the unsuitable OpenBSD 2.6 m4. + +ifelse(eval(89),89,, +`errprint( +`This m4 doesnt accept 8 and/or 9 in constants in eval(), making it unusable. +This is probably OpenBSD 2.6 m4 (September 1999). Upgrade to OpenBSD 2.7, +or get a bug fix from the CVS (expr.c rev 1.9), or get GNU m4. Dont forget +to configure with M4=/wherever/m4 if you install one of these in a directory +not in $PATH. +')m4exit(1)') + + +dnl Detect and give a message about the unsuitable SunOS /usr/bin/m4. +dnl +dnl Unfortunately this test doesn't work when m4 is run in the normal way +dnl from mpn/Makefile with "m4 -DOPERATION_foo foo.asm", since the bad m4 +dnl takes "-" in "-D..." to mean read stdin, so it will look like it just +dnl hangs. But running "m4 asm-defs.m4" to try it out will work. +dnl +dnl We'd like to abort immediately on finding a problem, but unfortunately +dnl the bad m4 doesn't have an m4exit(), nor does an invalid eval() kill +dnl it. Unexpanded $#'s in some m4_assert_numargs() later on will comment +dnl out some closing parentheses and kill it with "m4: arg stack overflow". + +define(m4_dollarhash_works_test,``$#'') +ifelse(m4_dollarhash_works_test(x),1,, +`errprint( +`This m4 doesnt support $# and cant be used for GMP asm processing. +If this is on SunOS, ./configure should choose /usr/5bin/m4 if you have that +or can get it, otherwise install GNU m4. Dont forget to configure with +M4=/wherever/m4 if you install in a directory not in $PATH. +')') +undefine(`m4_dollarhash_works_test') + + +dnl -------------------------------------------------------------------------- +dnl Basic error handling things. + + +dnl Usage: m4_dollarhash_1_if_noparen_p +dnl +dnl Expand to 1 if a call "foo" gives $# set to 1 (as opposed to 0 like GNU +dnl and SysV m4 give). + +define(m4_dollarhash_1_if_noparen_test,`$#') +define(m4_dollarhash_1_if_noparen_p, +eval(m4_dollarhash_1_if_noparen_test==1)) +undefine(`m4_dollarhash_1_if_noparen_test') + + +dnl Usage: m4wrap_prepend(string) +dnl +dnl Prepend the given string to what will be exapanded under m4wrap at the +dnl end of input. +dnl +dnl This macro exists to work around variations in m4wrap() behaviour in +dnl the various m4s (notes at the start of this file). Don't use m4wrap() +dnl directly since it will interfere with this scheme. + +define(m4wrap_prepend, +m4_assert_numargs(1) +`define(`m4wrap_string',`$1'defn(`m4wrap_string'))') + +m4wrap(`m4wrap_string') +define(m4wrap_string,`') + + +dnl Usage: m4_file_and_line +dnl +dnl Expand to the current file and line number, if the GNU m4 extensions +dnl __file__ and __line__ are available. +dnl +dnl In GNU m4 1.4 at the end of input when m4wrap text is expanded, +dnl __file__ is NONE and __line__ is 0, which is not a helpful thing to +dnl print. If m4_file_seen() has been called to note the last file seen, +dnl then that file at a big line number is used, otherwise "end of input" +dnl is used (although "end of input" won't parse as an error message). + +define(m4_file_and_line, +`ifdef(`__file__', +`ifelse(__file__`'__line__,`NONE0', +`ifdef(`m4_file_seen_last',`m4_file_seen_last: 999999: ',`end of input: ')', +`__file__: __line__: ')')') + + +dnl Usage: m4_errprint_commas(arg,...) +dnl +dnl The same as errprint(), but commas are printed between arguments +dnl instead of spaces. + +define(m4_errprint_commas, +`errprint(`$1')dnl +ifelse(eval($#>1),1,`errprint(`,')m4_errprint_commas(shift($@))')') + + +dnl Usage: m4_error(args...) +dnl m4_warning(args...) +dnl +dnl Print an error message, using m4_errprint_commas, prefixed with the +dnl current filename and line number (if available). m4_error sets up to +dnl give an error exit at the end of processing, m4_warning just prints. +dnl These macros are the recommended way to print errors. +dnl +dnl The arguments here should be quoted in the usual way to prevent them +dnl being expanded when the macro call is read. (m4_error takes care not +dnl to do any further expansion.) +dnl +dnl For example, +dnl +dnl m4_error(`some error message +dnl ') +dnl +dnl which prints +dnl +dnl foo.asm:123: some error message +dnl +dnl or if __file__ and __line__ aren't available +dnl +dnl some error message +dnl +dnl The "file:line:" format is a basic style, used by gcc and GNU m4, so +dnl emacs and other editors will recognise it in their normal error message +dnl parsing. + +define(m4_warning, +`m4_errprint_commas(m4_file_and_line`'$@)') + +define(m4_error, +`define(`m4_error_occurred',1)m4_warning($@)') + +define(`m4_error_occurred',0) + +dnl This m4wrap_prepend() is first, so it'll be executed last. +m4wrap_prepend( +`ifelse(m4_error_occurred,1, +`m4_error(`Errors occurred during m4 processing +')m4exit(1)')') + + +dnl Usage: m4_assert_numargs(num) +dnl +dnl Put this unquoted on a line on its own at the start of a macro +dnl definition to add some code to check that num many arguments get passed +dnl to the macro. For example, +dnl +dnl define(foo, +dnl m4_assert_numargs(2) +dnl `something `$1' and `$2' blah blah') +dnl +dnl Then a call like foo(one,two,three) will provoke an error like +dnl +dnl file:10: foo expected 2 arguments, got 3 arguments +dnl +dnl Here are some calls and how many arguments they're interpreted as passing. +dnl +dnl foo(abc,def) 2 +dnl foo(xyz) 1 +dnl foo() 0 +dnl foo -1 +dnl +dnl The -1 for no parentheses at all means a macro that's meant to be used +dnl that way can be checked with m4_assert_numargs(-1). For example, +dnl +dnl define(SPECIAL_SUFFIX, +dnl m4_assert_numargs(-1) +dnl `ifdef(`FOO',`_foo',`_bar')') +dnl +dnl But as an alternative see also deflit() below where parenthesized +dnl expressions following a macro are passed through to the output. +dnl +dnl Note that in BSD m4 there's no way to differentiate calls "foo" and +dnl "foo()", so in BSD m4 the distinction between the two isn't enforced. +dnl (In GNU and SysV m4 it can be checked, and is.) + + +dnl m4_assert_numargs is able to check its own arguments by calling +dnl assert_numargs_internal directly. +dnl +dnl m4_doublequote($`'0) expands to ``$0'', whereas ``$`'0'' would expand +dnl to `$`'0' and do the wrong thing, and likewise for $1. The same is +dnl done in other assert macros. +dnl +dnl $`#' leaves $# in the new macro being defined, and stops # being +dnl interpreted as a comment character. +dnl +dnl `dnl ' means an explicit dnl isn't necessary when m4_assert_numargs is +dnl used. The space means that if there is a dnl it'll still work. + +dnl Usage: m4_doublequote(x) expands to ``x'' +define(m4_doublequote, +`m4_assert_numargs_internal(`$0',1,$#,len(`$1'))``$1''') + +define(m4_assert_numargs, +`m4_assert_numargs_internal(`$0',1,$#,len(`$1'))dnl +`m4_assert_numargs_internal'(m4_doublequote($`'0),$1,$`#',`len'(m4_doublequote($`'1)))`dnl '') + +dnl Called: m4_assert_numargs_internal(`macroname',wantargs,$#,len(`$1')) +define(m4_assert_numargs_internal, +`m4_assert_numargs_internal_check(`$1',`$2',m4_numargs_count(`$3',`$4'))') + +dnl Called: m4_assert_numargs_internal_check(`macroname',wantargs,gotargs) +dnl +dnl If m4_dollarhash_1_if_noparen_p (BSD m4) then gotargs can be 0 when it +dnl should be -1. If wantargs is -1 but gotargs is 0 and the two can't be +dnl distinguished then it's allowed to pass. +dnl +define(m4_assert_numargs_internal_check, +`ifelse(eval($2 == $3 + || ($2==-1 && $3==0 && m4_dollarhash_1_if_noparen_p)),0, +`m4_error(`$1 expected 'm4_Narguments(`$2')`, got 'm4_Narguments(`$3') +)')') + +dnl Called: m4_numargs_count($#,len(`$1')) +dnl If $#==0 then -1 args, if $#==1 but len(`$1')==0 then 0 args, otherwise +dnl $# args. +define(m4_numargs_count, +`ifelse($1,0, -1, +`ifelse(eval($1==1 && $2-0==0),1, 0, $1)')') + +dnl Usage: m4_Narguments(N) +dnl "$1 argument" or "$1 arguments" with the plural according to $1. +define(m4_Narguments, +`$1 argument`'ifelse(`$1',1,,s)') + + +dnl -------------------------------------------------------------------------- +dnl Additional error checking things. + + +dnl Usage: m4_file_seen() +dnl +dnl Record __file__ for the benefit of m4_file_and_line in m4wrap text. +dnl The basic __file__ macro comes out quoted, like `foo.asm', and +dnl m4_file_seen_last is defined like that too. +dnl +dnl This only needs to be used with something that could generate an error +dnl message in m4wrap text. The x86 PROLOGUE is the only such at the +dnl moment (at end of input its m4wrap checks for missing EPILOGUE). A few +dnl include()s can easily trick this scheme, but you'd expect an EPILOGUE +dnl in the same file as the PROLOGUE. + +define(m4_file_seen, +m4_assert_numargs(0) +`ifelse(__file__,`NONE',, +`define(`m4_file_seen_last',m4_doublequote(__file__))')') + + +dnl Usage: m4_assert_onearg() +dnl +dnl Put this, unquoted, at the start of a macro definition to add some code +dnl to check that one argument is passed to the macro, but with that +dnl argument allowed to be empty. For example, +dnl +dnl define(foo, +dnl m4_assert_onearg() +dnl `blah blah $1 blah blah') +dnl +dnl Calls "foo(xyz)" or "foo()" are accepted. A call "foo(xyz,abc)" fails. +dnl A call "foo" fails too, but BSD m4 can't detect this case (GNU and SysV +dnl m4 can). + +define(m4_assert_onearg, +m4_assert_numargs(0) +`m4_assert_onearg_internal'(m4_doublequote($`'0),$`#')`dnl ') + +dnl Called: m4_assert_onearg(`macroname',$#) +define(m4_assert_onearg_internal, +`ifelse($2,1,, +`m4_error(`$1 expected 1 argument, got 'm4_Narguments(`$2') +)')') + + +dnl Usage: m4_assert_numargs_range(low,high) +dnl +dnl Put this, unquoted, at the start of a macro definition to add some code +dnl to check that between low and high many arguments get passed to the +dnl macro. For example, +dnl +dnl define(foo, +dnl m4_assert_numargs_range(3,5) +dnl `mandatory $1 $2 $3 optional $4 $5 end') +dnl +dnl See m4_assert_numargs() for more info. + +define(m4_assert_numargs_range, +m4_assert_numargs(2) +``m4_assert_numargs_range_internal'(m4_doublequote($`'0),$1,$2,$`#',`len'(m4_doublequote($`'1)))`dnl '') + +dnl Called: m4_assert_numargs_range_internal(`name',low,high,$#,len(`$1')) +define(m4_assert_numargs_range_internal, +m4_assert_numargs(5) +`m4_assert_numargs_range_check(`$1',`$2',`$3',m4_numargs_count(`$4',`$5'))') + +dnl Called: m4_assert_numargs_range_check(`name',low,high,gotargs) +dnl +dnl If m4_dollarhash_1_if_noparen_p (BSD m4) then gotargs can be 0 when it +dnl should be -1. To ensure a `high' of -1 works, a fudge is applied to +dnl gotargs if it's 0 and the 0 and -1 cases can't be distinguished. +dnl +define(m4_assert_numargs_range_check, +m4_assert_numargs(4) +`ifelse(eval($2 <= $4 && + ($4 - ($4==0 && m4_dollarhash_1_if_noparen_p) <= $3)),0, +`m4_error(`$1 expected $2 to $3 arguments, got 'm4_Narguments(`$4') +)')') + + +dnl Usage: m4_assert_defined(symbol) +dnl +dnl Put this unquoted on a line of its own at the start of a macro +dnl definition to add some code to check that the given symbol is defined +dnl when the macro is used. For example, +dnl +dnl define(foo, +dnl m4_assert_defined(`FOO_PREFIX') +dnl `FOO_PREFIX whatever') +dnl +dnl This is a convenient way to check that the user or ./configure or +dnl whatever has defined the things needed by a macro, as opposed to +dnl silently generating garbage. + +define(m4_assert_defined, +m4_assert_numargs(1) +``m4_assert_defined_internal'(m4_doublequote($`'0),``$1'')`dnl '') + +dnl Called: m4_assert_defined_internal(`macroname',`define_required') +define(m4_assert_defined_internal, +m4_assert_numargs(2) +`ifdef(`$2',, +`m4_error(`$1 needs $2 defined +')')') + + +dnl Usage: m4_not_for_expansion(`SYMBOL') +dnl define_not_for_expansion(`SYMBOL') +dnl +dnl m4_not_for_expansion turns SYMBOL, if defined, into something which +dnl will give an error if expanded. For example, +dnl +dnl m4_not_for_expansion(`PIC') +dnl +dnl define_not_for_expansion is the same, but always makes a definition. +dnl +dnl These are for symbols that should be tested with ifdef(`FOO',...) +dnl rather than be expanded as such. They guard against accidentally +dnl omitting the quotes, as in ifdef(FOO,...). Note though that they only +dnl catches this when FOO is defined, so be sure to test code both with and +dnl without each definition. + +define(m4_not_for_expansion, +m4_assert_numargs(1) +`ifdef(`$1',`define_not_for_expansion(`$1')')') + +define(define_not_for_expansion, +m4_assert_numargs(1) +`ifelse(defn(`$1'),,, +`m4_error(``$1' has a non-empty value, maybe it shouldnt be munged with m4_not_for_expansion() +')')dnl +define(`$1',`m4_not_for_expansion_internal(`$1')')') + +define(m4_not_for_expansion_internal, +`m4_error(``$1' is not meant to be expanded, perhaps you mean `ifdef(`$1',...)' +')') + + +dnl -------------------------------------------------------------------------- +dnl Various generic m4 things. + + +dnl Usage: m4_ifdef_anyof_p(`symbol',...) +dnl +dnl Expand to 1 if any of the symbols in the argument list are defined, or +dnl to 0 if not. + +define(m4_ifdef_anyof_p, +`ifelse(eval($#<=1 && m4_length(`$1')==0),1, 0, +`ifdef(`$1', 1, +`m4_ifdef_anyof_p(shift($@))')')') + + +dnl Usage: m4_length(string) +dnl +dnl Determine the length of a string. This is the same as len(), but +dnl always expands to a number, working around the BSD len() which +dnl evaluates to nothing given an empty argument. + +define(m4_length, +m4_assert_onearg() +`eval(len(`$1')-0)') + + +dnl Usage: m4_stringequal_p(x,y) +dnl +dnl Expand to 1 or 0 according as strings x and y are equal or not. + +define(m4_stringequal_p, +`ifelse(`$1',`$2',1,0)') + + +dnl Usage: m4_incr_or_decr(n,last) +dnl +dnl Do an incr(n) or decr(n), whichever is in the direction of "last". +dnl Both n and last must be numbers of course. + +define(m4_incr_or_decr, +m4_assert_numargs(2) +`ifelse(eval($1<$2),1,incr($1),decr($1))') + + +dnl Usage: forloop(i, first, last, statement) +dnl +dnl Based on GNU m4 examples/forloop.m4, but extended. +dnl +dnl statement is expanded repeatedly, with i successively defined as +dnl +dnl first, first+1, ..., last-1, last +dnl +dnl Or if first > last, then it's +dnl +dnl first, first-1, ..., last+1, last +dnl +dnl If first == last, then one expansion is done. +dnl +dnl A pushdef/popdef of i is done to preserve any previous definition (or +dnl lack of definition). first and last are eval()ed and so can be +dnl expressions. +dnl +dnl forloop_first is defined to 1 on the first iteration, 0 on the rest. +dnl forloop_last is defined to 1 on the last iteration, 0 on the others. +dnl Nested forloops are allowed, in which case forloop_first and +dnl forloop_last apply to the innermost loop that's open. +dnl +dnl A simple example, +dnl +dnl forloop(i, 1, 2*2+1, `dnl +dnl iteration number i ... ifelse(forloop_first,1,FIRST) +dnl ') + + +dnl "i" and "statement" are carefully quoted, but "first" and "last" are +dnl just plain numbers once eval()ed. + +define(`forloop', +m4_assert_numargs(4) +`pushdef(`$1',eval(`$2'))dnl +pushdef(`forloop_first',1)dnl +pushdef(`forloop_last',0)dnl +forloop_internal(`$1',eval(`$3'),`$4')`'dnl +popdef(`forloop_first')dnl +popdef(`forloop_last')dnl +popdef(`$1')') + +dnl Called: forloop_internal(`var',last,statement) +define(`forloop_internal', +m4_assert_numargs(3) +`ifelse($1,$2, +`define(`forloop_last',1)$3', +`$3`'dnl +define(`forloop_first',0)dnl +define(`$1',m4_incr_or_decr($1,$2))dnl +forloop_internal(`$1',$2,`$3')')') + + +dnl Usage: m4_toupper(x) +dnl m4_tolower(x) +dnl +dnl Convert the argument string to upper or lower case, respectively. +dnl Only one argument accepted. +dnl +dnl BSD m4 doesn't take ranges like a-z in translit(), so the full alphabet +dnl is written out. + +define(m4_alphabet_lower, `abcdefghijklmnopqrstuvwxyz') +define(m4_alphabet_upper, `ABCDEFGHIJKLMNOPQRSTUVWXYZ') + +define(m4_toupper, +m4_assert_onearg() +`translit(`$1', m4_alphabet_lower, m4_alphabet_upper)') + +define(m4_tolower, +m4_assert_onearg() +`translit(`$1', m4_alphabet_upper, m4_alphabet_lower)') + + +dnl Usage: m4_empty_if_zero(x) +dnl +dnl Evaluate to x, or to nothing if x is 0. x is eval()ed and so can be an +dnl expression. +dnl +dnl This is useful for x86 addressing mode displacements since forms like +dnl (%ebx) are one byte shorter than 0(%ebx). A macro `foo' for use as +dnl foo(%ebx) could be defined with the following so it'll be empty if the +dnl expression comes out zero. +dnl +dnl deflit(`foo', `m4_empty_if_zero(a+b*4-c)') +dnl +dnl Naturally this shouldn't be done if, say, a computed jump depends on +dnl the code being a particular size. + +define(m4_empty_if_zero, +m4_assert_onearg() +`ifelse(eval($1),0,,eval($1))') + + +dnl Usage: m4_log2(x) +dnl +dnl Calculate a logarithm to base 2. +dnl x must be an integral power of 2, between 2**0 and 2**30. +dnl x is eval()ed, so it can be an expression. +dnl An error results if x is invalid. +dnl +dnl 2**31 isn't supported, because an unsigned 2147483648 is out of range +dnl of a 32-bit signed int. Also, the bug in BSD m4 where an eval() +dnl resulting in 2147483648 (or -2147483648 as the case may be) gives `-(' +dnl means tests like eval(1<<31==(x)) would be necessary, but that then +dnl gives an unattractive explosion of eval() error messages if x isn't +dnl numeric. + +define(m4_log2, +m4_assert_numargs(1) +`m4_log2_internal(0,1,eval(`$1'))') + +dnl Called: m4_log2_internal(n,2**n,target) +define(m4_log2_internal, +m4_assert_numargs(3) +`ifelse($2,$3,$1, +`ifelse($1,30, +`m4_error(`m4_log2() argument too big or not a power of two: $3 +')', +`m4_log2_internal(incr($1),eval(2*$2),$3)')')') + + +dnl Usage: m4_div2_towards_zero +dnl +dnl m4 division is probably whatever a C signed division is, and C doesn't +dnl specify what rounding gets used on negatives, so this expression forces +dnl a rounding towards zero. + +define(m4_div2_towards_zero, +m4_assert_numargs(1) +`eval((($1) + ((($1)<0) & ($1))) / 2)') + + +dnl Usage: m4_lshift(n,count) +dnl m4_rshift(n,count) +dnl +dnl Calculate n shifted left or right by count many bits. Both n and count +dnl are eval()ed and so can be expressions. +dnl +dnl Negative counts are allowed and mean a shift in the opposite direction. +dnl Negative n is allowed and right shifts will be arithmetic (meaning +dnl divide by 2**count, rounding towards zero, also meaning the sign bit is +dnl duplicated). +dnl +dnl Use these macros instead of << and >> in eval() since the basic ccs +dnl SysV m4 doesn't have those operators. + +define(m4_rshift, +m4_assert_numargs(2) +`m4_lshift(`$1',-(`$2'))') + +define(m4_lshift, +m4_assert_numargs(2) +`m4_lshift_internal(eval(`$1'),eval(`$2'))') + +define(m4_lshift_internal, +m4_assert_numargs(2) +`ifelse(eval($2-0==0),1,$1, +`ifelse(eval($2>0),1, +`m4_lshift_internal(eval($1*2),decr($2))', +`m4_lshift_internal(m4_div2_towards_zero($1),incr($2))')')') + + +dnl Usage: deflit(name,value) +dnl +dnl Like define(), but "name" expands like a literal, rather than taking +dnl arguments. For example "name(%eax)" expands to "value(%eax)". +dnl +dnl Limitations: +dnl +dnl $ characters in the value part must have quotes to stop them looking +dnl like macro parameters. For example, deflit(reg,`123+$`'4+567'). See +dnl defreg() below for handling simple register definitions like $7 etc. +dnl +dnl "name()" is turned into "name", unfortunately. In GNU and SysV m4 an +dnl error is generated when this happens, but in BSD m4 it will happen +dnl silently. The problem is that in BSD m4 $# is 1 in both "name" or +dnl "name()", so there's no way to differentiate them. Because we want +dnl plain "name" to turn into plain "value", we end up with "name()" +dnl turning into plain "value" too. +dnl +dnl "name(foo)" will lose any whitespace after commas in "foo", for example +dnl "disp(%eax, %ecx)" would become "128(%eax,%ecx)". +dnl +dnl These parentheses oddities shouldn't matter in assembler text, but if +dnl they do the suggested workaround is to write "name ()" or "name (foo)" +dnl to stop the parentheses looking like a macro argument list. If a space +dnl isn't acceptable in the output, then write "name`'()" or "name`'(foo)". +dnl The `' is stripped when read, but again stops the parentheses looking +dnl like parameters. + +dnl Quoting for deflit_emptyargcheck is similar to m4_assert_numargs. The +dnl stuff in the ifelse gives a $#, $1 and $@ evaluated in the new macro +dnl created, not in deflit. +define(deflit, +m4_assert_numargs(2) +`define(`$1', +`deflit_emptyargcheck'(``$1'',$`#',m4_doublequote($`'1))`dnl +$2`'dnl +ifelse(eval($'`#>1 || m4_length('m4_doublequote($`'1)`)!=0),1,($'`@))')') + +dnl Called: deflit_emptyargcheck(macroname,$#,`$1') +define(deflit_emptyargcheck, +`ifelse(eval($2==1 && !m4_dollarhash_1_if_noparen_p && m4_length(`$3')==0),1, +`m4_error(`dont use a deflit as $1() because it loses the brackets (see deflit in asm-incl.m4 for more information) +')')') + + +dnl Usage: m4_assert(`expr') +dnl +dnl Test a compile-time requirement with an m4 expression. The expression +dnl should be quoted, and will be eval()ed and expected to be non-zero. +dnl For example, +dnl +dnl m4_assert(`FOO*2+6 < 14') + +define(m4_assert, +m4_assert_numargs(1) +`ifelse(eval($1),1,, +`m4_error(`assertion failed: $1 +')')') + + +dnl -------------------------------------------------------------------------- +dnl Various assembler things, not specific to any particular CPU. +dnl + + +dnl Usage: include_mpn(`filename') +dnl +dnl Like include(), but adds a path to the mpn source directory. For +dnl example, +dnl +dnl include_mpn(`sparc64/addmul_1h.asm') + +define(include_mpn, +m4_assert_numargs(1) +m4_assert_defined(`CONFIG_TOP_SRCDIR') +`include(CONFIG_TOP_SRCDIR`/mpn/$1')') + + +dnl Usage: C comment ... +dnl +dnl "C" works like a FORTRAN-style comment character. This can be used for +dnl comments to the right of assembly instructions, where just dnl would +dnl remove the linefeed, and concatenate adjacent lines. +dnl +dnl "C" and/or "dnl" are useful when an assembler doesn't support comments, +dnl or where different assemblers for a particular CPU have different +dnl comment styles. The intermediate ".s" files will end up with no +dnl comments, just code. +dnl +dnl Using "C" is not intended to cause offence to anyone who doesn't like +dnl FORTRAN; but if that happens it's an unexpected bonus. + +define(C, ` +dnl') + + +dnl Various possible defines passed from the Makefile that are to be tested +dnl with ifdef() rather than be expanded. + +m4_not_for_expansion(`PIC') + +dnl aors_n +m4_not_for_expansion(`OPERATION_add_n') +m4_not_for_expansion(`OPERATION_sub_n') + +dnl aorsmul_n +m4_not_for_expansion(`OPERATION_addmul_1') +m4_not_for_expansion(`OPERATION_submul_1') + +dnl logops_n +m4_not_for_expansion(`OPERATION_and_n') +m4_not_for_expansion(`OPERATION_andn_n') +m4_not_for_expansion(`OPERATION_nand_n') +m4_not_for_expansion(`OPERATION_ior_n') +m4_not_for_expansion(`OPERATION_iorn_n') +m4_not_for_expansion(`OPERATION_nior_n') +m4_not_for_expansion(`OPERATION_xor_n') +m4_not_for_expansion(`OPERATION_xnor_n') + +dnl popham +m4_not_for_expansion(`OPERATION_popcount') +m4_not_for_expansion(`OPERATION_hamdist') + + +dnl Usage: m4_config_gmp_mparam(`symbol') +dnl +dnl Check that `symbol' is defined. If it isn't, issue an error and +dnl terminate immediately. The error message explains that the symbol +dnl should be in config.m4, copied from gmp-mparam.h. +dnl +dnl Processing is terminated immediately since missing something like +dnl KARATSUBA_SQR_THRESHOLD can lead to infinite loops with endless error +dnl messages. + +define(m4_config_gmp_mparam, +m4_assert_numargs(1) +`ifdef(`$1',, +`m4_error(`$1 is not defined. + "configure" should have extracted this from gmp-mparam.h and put it + in config.m4, but somehow this has failed. +')m4exit(1)')') + + +dnl Usage: defreg(name,reg) +dnl +dnl Give a name to a $ style register. For example, +dnl +dnl defreg(foo,$12) +dnl +dnl defreg() inserts an extra pair of quotes after the $ so that it's not +dnl interpreted as an m4 macro parameter, ie. foo is actually $`'12. m4 +dnl strips those quotes when foo is expanded. +dnl +dnl deflit() is used to make the new definition, so it will expand +dnl literally even if followed by parentheses ie. foo(99) will become +dnl $12(99). (But there's nowhere that would be used is there?) +dnl +dnl When making further definitions from existing defreg() macros, remember +dnl to use defreg() again to protect the $ in the new definitions too. For +dnl example, +dnl +dnl defreg(a0,$4) +dnl defreg(a1,$5) +dnl ... +dnl +dnl defreg(PARAM_DST,a0) +dnl +dnl This is only because a0 is expanding at the time the PARAM_DST +dnl definition is made, leaving a literal $4 that must be re-quoted. On +dnl the other hand in something like the following ra is only expanded when +dnl ret is used and its $`'31 protection will have its desired effect at +dnl that time. +dnl +dnl defreg(ra,$31) +dnl ... +dnl define(ret,`j ra') +dnl +dnl Note that only $n forms are meant to be used here, and something like +dnl 128($30) doesn't get protected and will come out wrong. + +define(defreg, +m4_assert_numargs(2) +`deflit(`$1', +substr(`$2',0,1)``''substr(`$2',1))') + + +dnl Usage: m4_instruction_wrapper(num) +dnl +dnl Put this, unquoted, on a line on its own, at the start of a macro +dnl that's a wrapper around an assembler instruction. It adds code to give +dnl a descriptive error message if the macro is invoked without arguments. +dnl +dnl For example, suppose jmp needs to be wrapped, +dnl +dnl define(jmp, +dnl m4_instruction_wrapper() +dnl m4_assert_numargs(1) +dnl `.byte 0x42 +dnl .long $1 +dnl nop') +dnl +dnl The point of m4_instruction_wrapper is to get a better error message +dnl than m4_assert_numargs would give if jmp is accidentally used as plain +dnl "jmp foo" instead of the intended "jmp( foo)". "jmp()" with no +dnl argument also provokes the error message. +dnl +dnl m4_instruction_wrapper should only be used with wrapped instructions +dnl that take arguments, since obviously something meant to be used as +dnl plain "ret", say, doesn't want to give an error when used that way. + +define(m4_instruction_wrapper, +m4_assert_numargs(0) +``m4_instruction_wrapper_internal'(m4_doublequote($`'0),dnl +m4_doublequote(ifdef(`__file__',__file__,`the m4 sources')),dnl +$`#',m4_doublequote($`'1))`dnl'') + +dnl Called: m4_instruction_wrapper_internal($0,`filename',$#,$1) +define(m4_instruction_wrapper_internal, +`ifelse(eval($3<=1 && m4_length(`$4')==0),1, +`m4_error(`$1 is a macro replacing that instruction and needs arguments, see $2 for details +')')') + + +dnl Usage: UNROLL_LOG2, UNROLL_MASK, UNROLL_BYTES +dnl CHUNK_LOG2, CHUNK_MASK, CHUNK_BYTES +dnl +dnl When code supports a variable amount of loop unrolling, the convention +dnl is to define UNROLL_COUNT to the number of limbs processed per loop. +dnl When testing code this can be varied to see how much the loop overhead +dnl is costing. For example, +dnl +dnl deflit(UNROLL_COUNT, 32) +dnl +dnl If the forloop() generating the unrolled loop has a pattern processing +dnl more than one limb, the convention is to express this with CHUNK_COUNT. +dnl For example, +dnl +dnl deflit(CHUNK_COUNT, 2) +dnl +dnl The LOG2, MASK and BYTES definitions below are derived from these COUNT +dnl definitions. If COUNT is redefined, the LOG2, MASK and BYTES follow +dnl the new definition automatically. +dnl +dnl LOG2 is the log base 2 of COUNT. MASK is COUNT-1, which can be used as +dnl a bit mask. BYTES is BYTES_PER_MP_LIMB*COUNT, the number of bytes +dnl processed in each unrolled loop. +dnl +dnl BYTES_PER_MP_LIMB is defined in a CPU specific m4 include file. It +dnl exists only so the BYTES definitions here can be common to all CPUs. +dnl In the actual code for a given CPU, an explicit 4 or 8 may as well be +dnl used because the code is only for a particular CPU, it doesn't need to +dnl be general. +dnl +dnl Note that none of these macros do anything except give conventional +dnl names to commonly used things. You still have to write your own +dnl expressions for a forloop() and the resulting address displacements. +dnl Something like the following would be typical for 4 bytes per limb. +dnl +dnl forloop(`i',0,UNROLL_COUNT-1,` +dnl deflit(`disp',eval(i*4)) +dnl ... +dnl ') +dnl +dnl Or when using CHUNK_COUNT, +dnl +dnl forloop(`i',0,UNROLL_COUNT/CHUNK_COUNT-1,` +dnl deflit(`disp0',eval(i*CHUNK_COUNT*4)) +dnl deflit(`disp1',eval(disp0+4)) +dnl ... +dnl ') +dnl +dnl Clearly `i' can be run starting from 1, or from high to low or whatever +dnl best suits. + +deflit(UNROLL_LOG2, +m4_assert_defined(`UNROLL_COUNT') +`m4_log2(UNROLL_COUNT)') + +deflit(UNROLL_MASK, +m4_assert_defined(`UNROLL_COUNT') +`eval(UNROLL_COUNT-1)') + +deflit(UNROLL_BYTES, +m4_assert_defined(`UNROLL_COUNT') +m4_assert_defined(`BYTES_PER_MP_LIMB') +`eval(UNROLL_COUNT * BYTES_PER_MP_LIMB)') + +deflit(CHUNK_LOG2, +m4_assert_defined(`CHUNK_COUNT') +`m4_log2(CHUNK_COUNT)') + +deflit(CHUNK_MASK, +m4_assert_defined(`CHUNK_COUNT') +`eval(CHUNK_COUNT-1)') + +deflit(CHUNK_BYTES, +m4_assert_defined(`CHUNK_COUNT') +m4_assert_defined(`BYTES_PER_MP_LIMB') +`eval(CHUNK_COUNT * BYTES_PER_MP_LIMB)') + + +dnl Usage: MPN(name) +dnl +dnl Add MPN_PREFIX to a name. +dnl MPN_PREFIX defaults to "__gmpn_" if not defined. + +ifdef(`MPN_PREFIX',, +`define(`MPN_PREFIX',`__gmpn_')') + +define(MPN, +m4_assert_numargs(1) +`MPN_PREFIX`'$1') + + +dnl Usage: mpn_add_n, etc +dnl +dnl Convenience definitions using MPN(), like the #defines in gmp.h. Each +dnl function that might be implemented in assembler is here. + +define(define_mpn, +m4_assert_numargs(1) +`define(`mpn_$1',`MPN(`$1')')') + +define_mpn(add) +define_mpn(add_1) +define_mpn(add_n) +define_mpn(add_nc) +define_mpn(addmul_1) +define_mpn(addmul_1c) +define_mpn(addsub_n) +define_mpn(addsub_nc) +define_mpn(and_n) +define_mpn(andn_n) +define_mpn(bdivmod) +define_mpn(cmp) +define_mpn(com_n) +define_mpn(copyd) +define_mpn(copyi) +define_mpn(divexact_by3c) +define_mpn(divrem) +define_mpn(divrem_1) +define_mpn(divrem_1c) +define_mpn(divrem_2) +define_mpn(divrem_classic) +define_mpn(divrem_newton) +define_mpn(dump) +define_mpn(gcd) +define_mpn(gcd_1) +define_mpn(gcdext) +define_mpn(get_str) +define_mpn(hamdist) +define_mpn(invert_limb) +define_mpn(ior_n) +define_mpn(iorn_n) +define_mpn(kara_mul_n) +define_mpn(kara_sqr_n) +define_mpn(lshift) +define_mpn(lshiftc) +define_mpn(mod_1) +define_mpn(mod_1c) +define_mpn(mul) +define_mpn(mul_1) +define_mpn(mul_1c) +define_mpn(mul_basecase) +define_mpn(mul_n) +define_mpn(perfect_square_p) +define_mpn(popcount) +define_mpn(preinv_mod_1) +define_mpn(nand_n) +define_mpn(nior_n) +define_mpn(random) +define_mpn(random2) +define_mpn(rshift) +define_mpn(rshiftc) +define_mpn(scan0) +define_mpn(scan1) +define_mpn(set_str) +define_mpn(sqr_basecase) +define_mpn(sub_n) +define_mpn(sqrtrem) +define_mpn(sub) +define_mpn(sub_1) +define_mpn(sub_n) +define_mpn(sub_nc) +define_mpn(submul_1) +define_mpn(submul_1c) +define_mpn(toom3_mul_n) +define_mpn(toom3_sqr_n) +define_mpn(umul_ppmm) +define_mpn(udiv_qrnnd) +define_mpn(xnor_n) +define_mpn(xor_n) + +define(`ASM_START', + `') + +define(`PROLOGUE', + ` + TEXT + ALIGN(4) + GLOBL GSYM_PREFIX`$1' + TYPE(GSYM_PREFIX`$1',`function') +GSYM_PREFIX`$1':') + +define(`EPILOGUE', + ` + SIZE(GSYM_PREFIX`$1',.-GSYM_PREFIX`$1')') + +dnl LSYM_PREFIX might be L$, so defn() must be used to quote it or the L +dnl will expand as the L macro, an infinite recursion. +define(`L',`defn(`LSYM_PREFIX')$1') + +define(`INT32', + ` + ALIGN(4) +$1: + W32 $2 + ') + +define(`INT64', + ` + ALIGN(8) +$1: + W32 $2 + W32 $3 + ') + + +dnl Usage: ALIGN(bytes) +dnl +dnl Emit a ".align" directive. The alignment is specified in bytes, and +dnl will normally need to be a power of 2. The actual ".align" generated +dnl is either bytes or logarithmic according to what ./configure detects. +dnl +dnl ALIGN_FILL_0x90, if defined and equal to "yes", means a ", 0x90" should +dnl be appended (this is for x86). + +define(ALIGN, +m4_assert_numargs(1) +m4_assert_defined(`ALIGN_LOGARITHMIC') +`.align ifelse(ALIGN_LOGARITHMIC,yes,`m4_log2($1)',`eval($1)')dnl +ifelse(ALIGN_FILL_0x90,yes,`, 0x90')') + + +dnl Usage: MULFUNC_PROLOGUE(function function...) +dnl +dnl A dummy macro which is grepped for by ./configure to know what +dnl functions a multi-function file is providing. Use this if there aren't +dnl explicit PROLOGUE()s for each possible function. +dnl +dnl Multiple MULFUNC_PROLOGUEs can be used, or just one with the function +dnl names separated by spaces. + +define(`MULFUNC_PROLOGUE', +m4_assert_numargs(1) +`') + + +divert`'dnl diff --git a/rts/gmp/mpn/clipper/add_n.s b/rts/gmp/mpn/clipper/add_n.s new file mode 100644 index 0000000000..538a1caed0 --- /dev/null +++ b/rts/gmp/mpn/clipper/add_n.s @@ -0,0 +1,48 @@ +; Clipper __gmpn_add_n -- Add two limb vectors of the same length > 0 and store +; sum in a third limb vector. + +; Copyright (C) 1995, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + +.text + .align 16 +.globl ___gmpn_add_n +___gmpn_add_n: + subq $8,sp + storw r6,(sp) + loadw 12(sp),r2 + loadw 16(sp),r3 + loadq $0,r6 ; clear carry-save register + +.Loop: loadw (r1),r4 + loadw (r2),r5 + addwc r6,r6 ; restore carry from r6 + addwc r5,r4 + storw r4,(r0) + subwc r6,r6 ; save carry in r6 + addq $4,r0 + addq $4,r1 + addq $4,r2 + subq $1,r3 + brne .Loop + + negw r6,r0 + loadw (sp),r6 + addq $8,sp + ret sp diff --git a/rts/gmp/mpn/clipper/mul_1.s b/rts/gmp/mpn/clipper/mul_1.s new file mode 100644 index 0000000000..c0c756488c --- /dev/null +++ b/rts/gmp/mpn/clipper/mul_1.s @@ -0,0 +1,47 @@ +; Clipper __gmpn_mul_1 -- Multiply a limb vector with a limb and store +; the result in a second limb vector. + +; Copyright (C) 1995, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + +.text + .align 16 +.globl ___gmpn_mul_1 +___gmpn_mul_1: + subq $8,sp + storw r6,(sp) + loadw 12(sp),r2 + loadw 16(sp),r3 + loadq $0,r6 ; clear carry limb + +.Loop: loadw (r1),r4 + mulwux r3,r4 + addw r6,r4 ; add old carry limb into low product limb + loadq $0,r6 + addwc r5,r6 ; propagate cy into high product limb + storw r4,(r0) + addq $4,r0 + addq $4,r1 + subq $1,r2 + brne .Loop + + movw r6,r0 + loadw 0(sp),r6 + addq $8,sp + ret sp diff --git a/rts/gmp/mpn/clipper/sub_n.s b/rts/gmp/mpn/clipper/sub_n.s new file mode 100644 index 0000000000..44d8797289 --- /dev/null +++ b/rts/gmp/mpn/clipper/sub_n.s @@ -0,0 +1,48 @@ +; Clipper __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and +; store difference in a third limb vector. + +; Copyright (C) 1995, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + +.text + .align 16 +.globl ___gmpn_sub_n +___gmpn_sub_n: + subq $8,sp + storw r6,(sp) + loadw 12(sp),r2 + loadw 16(sp),r3 + loadq $0,r6 ; clear carry-save register + +.Loop: loadw (r1),r4 + loadw (r2),r5 + addwc r6,r6 ; restore carry from r6 + subwc r5,r4 + storw r4,(r0) + subwc r6,r6 ; save carry in r6 + addq $4,r0 + addq $4,r1 + addq $4,r2 + subq $1,r3 + brne .Loop + + negw r6,r0 + loadw (sp),r6 + addq $8,sp + ret sp diff --git a/rts/gmp/mpn/cray/README b/rts/gmp/mpn/cray/README new file mode 100644 index 0000000000..8195c67e21 --- /dev/null +++ b/rts/gmp/mpn/cray/README @@ -0,0 +1,14 @@ +The (poorly optimized) code in this directory was originally written for a +j90 system, but finished on a c90. It should work on all Cray vector +computers. For the T3E and T3D systems, the `alpha' subdirectory at the +same level as the directory containing this file, is much better. + +* `+' seems to be faster than `|' when combining carries. + +* It is possible that the best multiply performance would be achived by + storing only 24 bits per element, and using lazy carry propagation. Before + calling i24mult, full carry propagation would be needed. + +* Supply tasking versions of the C loops. + + diff --git a/rts/gmp/mpn/cray/add_n.c b/rts/gmp/mpn/cray/add_n.c new file mode 100644 index 0000000000..1fdb394993 --- /dev/null +++ b/rts/gmp/mpn/cray/add_n.c @@ -0,0 +1,96 @@ +/* mpn_add_n -- Add two limb vectors of equal, non-zero length. + For Cray vector processors. + + Copyright (C) 1996, 2000 Free Software Foundation, Inc. + + This file is part of the GNU MP Library. + + The GNU MP Library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or (at your + option) any later version. + + The GNU MP Library is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with the GNU MP Library; see the file COPYING.LIB. If not, write to + the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +mp_limb_t +mpn_add_n (c, a, b, n) + mp_ptr c; + mp_srcptr a, b; + mp_size_t n; +{ + mp_size_t i; + mp_size_t nm1 = n - 1; + int more_carries = 0; + int carry_out; + + /* For small operands the non-vector code is faster. */ + if (n < 16) + goto sequential; + + if (a == c || b == c) + { + TMP_DECL (marker); + TMP_MARK (marker); + if (c == a) + { + /* allocate temp space for a */ + mp_ptr ax = (mp_ptr) TMP_ALLOC (n * BYTES_PER_MP_LIMB); + MPN_COPY (ax, a, n); + a = (mp_srcptr) ax; + } + if (c == b) + { + /* allocate temp space for b */ + mp_ptr bx = (mp_ptr) TMP_ALLOC (n * BYTES_PER_MP_LIMB); + MPN_COPY (bx, b, n); + b = (mp_srcptr) bx; + } + carry_out = mpn_add_n (c, a, b, n); + TMP_FREE (marker); + return carry_out; + } + + carry_out = a[nm1] + b[nm1] < a[nm1]; + +#pragma _CRI ivdep /* Cray PVP systems */ + for (i = nm1; i > 0; i--) + { + int cy_in; + cy_in = a[i - 1] + b[i - 1] < a[i - 1]; + c[i] = a[i] + b[i] + cy_in; + more_carries += c[i] < cy_in; + } + c[0] = a[0] + b[0]; + + if (more_carries) + { + /* This won't vectorize, but we should come here rarely. */ + int cy; + sequential: + cy = 0; + for (i = 0; i < n; i++) + { + mp_limb_t ai, ci, t; + ai = a[i]; + t = b[i] + cy; + cy = t < cy; + ci = ai + t; + cy += ci < ai; + c[i] = ci; + } + carry_out = cy; + } + + return carry_out; +} diff --git a/rts/gmp/mpn/cray/addmul_1.c b/rts/gmp/mpn/cray/addmul_1.c new file mode 100644 index 0000000000..031b4e8e8d --- /dev/null +++ b/rts/gmp/mpn/cray/addmul_1.c @@ -0,0 +1,46 @@ +/* mpn_addmul_1 for Cray PVP. + +Copyright (C) 1996, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + + +#include "gmp.h" +#include "gmp-impl.h" + +mp_limb_t +mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t limb) +{ + mp_ptr p0, p1, tp; + mp_limb_t cy_limb; + TMP_DECL (marker); + TMP_MARK (marker); + + p1 = TMP_ALLOC (n * BYTES_PER_MP_LIMB); + p0 = TMP_ALLOC (n * BYTES_PER_MP_LIMB); + tp = TMP_ALLOC (n * BYTES_PER_MP_LIMB); + + GMPN_MULWW (p1, p0, up, &n, &limb); + cy_limb = mpn_add_n (tp, rp, p0, n); + rp[0] = tp[0]; + cy_limb += mpn_add_n (rp + 1, tp + 1, p1, n - 1); + cy_limb += p1[n - 1]; + + TMP_FREE (marker); + return cy_limb; +} diff --git a/rts/gmp/mpn/cray/gmp-mparam.h b/rts/gmp/mpn/cray/gmp-mparam.h new file mode 100644 index 0000000000..14f7b8e05b --- /dev/null +++ b/rts/gmp/mpn/cray/gmp-mparam.h @@ -0,0 +1,27 @@ +/* gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#define BITS_PER_MP_LIMB 64 +#define BYTES_PER_MP_LIMB 8 +#define BITS_PER_LONGINT 64 +#define BITS_PER_INT 64 +#define BITS_PER_SHORTINT 32 +#define BITS_PER_CHAR 8 diff --git a/rts/gmp/mpn/cray/mul_1.c b/rts/gmp/mpn/cray/mul_1.c new file mode 100644 index 0000000000..0c8750b4ac --- /dev/null +++ b/rts/gmp/mpn/cray/mul_1.c @@ -0,0 +1,44 @@ +/* mpn_mul_1 for Cray PVP. + +Copyright (C) 1996, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + + +#include "gmp.h" +#include "gmp-impl.h" + +mp_limb_t +mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t limb) +{ + mp_ptr p0, p1; + mp_limb_t cy_limb; + TMP_DECL (marker); + TMP_MARK (marker); + + p1 = TMP_ALLOC (n * BYTES_PER_MP_LIMB); + p0 = TMP_ALLOC (n * BYTES_PER_MP_LIMB); + + GMPN_MULWW (p1, p0, up, &n, &limb); + rp[0] = p0[0]; + cy_limb = mpn_add_n (rp + 1, p0 + 1, p1, n - 1); + cy_limb += p1[n - 1]; + + TMP_FREE (marker); + return cy_limb; +} diff --git a/rts/gmp/mpn/cray/mulww.f b/rts/gmp/mpn/cray/mulww.f new file mode 100644 index 0000000000..99507c1e44 --- /dev/null +++ b/rts/gmp/mpn/cray/mulww.f @@ -0,0 +1,54 @@ +c Helper for mpn_mul_1, mpn_addmul_1, and mpn_submul_1 for Cray PVP. + +c Copyright (C) 1996, 2000 Free Software Foundation, Inc. + +c This file is part of the GNU MP Library. + +c The GNU MP Library is free software; you can redistribute it and/or +c modify it under the terms of the GNU Lesser General Public License as +c published by the Free Software Foundation; either version 2.1 of the +c License, or (at your option) any later version. + +c The GNU MP Library is distributed in the hope that it will be useful, +c but WITHOUT ANY WARRANTY; without even the implied warranty of +c MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +c Lesser General Public License for more details. + +c You should have received a copy of the GNU Lesser General Public +c License along with the GNU MP Library; see the file COPYING.LIB. If +c not, write to the Free Software Foundation, Inc., 59 Temple Place - +c Suite 330, Boston, MA 02111-1307, USA. + +c p1[] = hi(a[]*s); the upper limbs of each product +c p0[] = low(a[]*s); the corresponding lower limbs +c n is number of limbs in the vectors + + subroutine gmpn_mulww(p1,p0,a,n,s) + integer*8 p1(0:*),p0(0:*),a(0:*),s + integer n + + integer*8 a0,a1,a2,s0,s1,s2,c + integer*8 ai,t0,t1,t2,t3,t4 + + s0 = shiftl(and(s,4194303),24) + s1 = shiftl(and(shiftr(s,22),4194303),24) + s2 = shiftl(and(shiftr(s,44),4194303),24) + + do i = 0,n-1 + ai = a(i) + a0 = shiftl(and(ai,4194303),24) + a1 = shiftl(and(shiftr(ai,22),4194303),24) + a2 = shiftl(and(shiftr(ai,44),4194303),24) + + t0 = i24mult(a0,s0) + t1 = i24mult(a0,s1)+i24mult(a1,s0) + t2 = i24mult(a0,s2)+i24mult(a1,s1)+i24mult(a2,s0) + t3 = i24mult(a1,s2)+i24mult(a2,s1) + t4 = i24mult(a2,s2) + + p0(i)=shiftl(t2,44)+shiftl(t1,22)+t0 + c=shiftr(shiftr(t0,22)+and(t1,4398046511103)+ + $ shiftl(and(t2,1048575),22),42) + p1(i)=shiftl(t4,24)+shiftl(t3,2)+shiftr(t2,20)+shiftr(t1,42)+c + end do + end diff --git a/rts/gmp/mpn/cray/mulww.s b/rts/gmp/mpn/cray/mulww.s new file mode 100644 index 0000000000..890cdcf94d --- /dev/null +++ b/rts/gmp/mpn/cray/mulww.s @@ -0,0 +1,245 @@ +* Helper for mpn_mul_1, mpn_addmul_1, and mpn_submul_1 for Cray PVP. + +* Copyright (C) 1996, 2000 Free Software Foundation, Inc. +* This file is generated from mulww.f in this same directory. + +* This file is part of the GNU MP Library. + +* The GNU MP Library is free software; you can redistribute it and/or +* modify it under the terms of the GNU Lesser General Public License as +* published by the Free Software Foundation; either version 2.1 of the +* License, or (at your option) any later version. + +* The GNU MP Library is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* Lesser General Public License for more details. + +* You should have received a copy of the GNU Lesser General Public +* License along with the GNU MP Library; see the file COPYING.LIB. If +* not, write to the Free Software Foundation, Inc., 59 Temple Place - +* Suite 330, Boston, MA 02111-1307, USA. + + IDENT GMPN_MULWW +********************************************** +* Assemble with Cal Version 2.0 * +* * +* Generated by CFT77 6.0.4.19 * +* on 06/27/00 at 04:34:13 * +* * +********************************************** +* ALLOW UNDERSCORES IN IDENTIFIERS + EDIT OFF + FORMAT NEW +@DATA SECTION DATA,CM +@DATA = W.* + CON O'0000000000040000000000 + CON O'0435152404713723252514 ;GMPN_MUL 1 + CON O'0535270000000000000000 ;WW 1 + CON O'0000000000000001200012 ;trbk tbl 1 + VWD 32/0,32/P.GMPN_MULWW ;trbk tbl 1 + CON O'0014003000000000001416 ;trbk tbl 1 + CON O'0000000000000000000011 ;trbk tbl 1 + CON O'0000000000000000000215 ;trbk tbl 1 + BSSZ 1 ;trbk tbl 1 +@CODE SECTION CODE +@CODE = P.* +L3 = P.* ; 1 + A0 A6 ;arg base 1 + A5 6 ;num Darg 1 + B03,A5 0,A0 ;load DAs 1 + A0 A1+A2 ; 1 + A5 1 ;num Ts 1 + 0,A0 T00,A5 ; 1 + B02 A2 ;new base 1 + B66 A3 ;stk top 1 + B01 A6 ;arg base 1 + A7 P.L4 ;ofrn rtn 1 + B00 A7 ;return 1 + A6 @DATA ; 1 + J $STKOFEN ;$STKOFEN 1 +GMPN_MULWW = P.* ; 1 + A0 @DATA+3 ;(trbk) 1 + B77 A0 ;(trbk) 1 + A1 13 ;num Bs 1 + A0 B66 ;stk top 1 + A2 B66 ;stk tmp 1 + A4 B67 ;stk limt 1 + 0,A0 B77,A1 ; 1 + A7 782 ;stk size 1 + A3 A2+A7 ; 1 + A0 A4-A3 ; 1 + JAM L3 ;overflow 1 + A0 A6 ;arg base 1 + A5 6 ;num Darg 1 + B03,A5 0,A0 ;load DAs 1 + A0 A1+A2 ; 1 + A5 1 ;num Ts 1 + 0,A0 T00,A5 ; 1 + B02 A2 ;new base 1 + B66 A3 ;new top 1 + B01 A6 ;arg base 1 +L4 = P.* ;ofrn rtn 1 + A7 B07 ;regs 14 + S7 0,A7 ; 14 + A6 B10 ;regs 9 + S6 0,A6 ; 9 + S5 1 ; 14 + S4 <22 ; 9 + S7 S7-S5 ; 14 + S5 #S7 ; 14 + T00 S6 ;regs 10 + S6 S6>22 ; 10 + S7 T00 ;regs 11 + S7 S7>44 ; 11 + S3 T00 ;regs 9 + S3 S3&S4 ; 9 + S6 S6&S4 ; 10 + S7 S7&S4 ; 11 + S3 S3<24 ; 9 + S6 S6<24 ; 10 + S7 S7<24 ; 11 + S0 S5 ;regs 14 + S4 S5 ;regs 14 + S1 S6 ;regs 14 + S2 S3 ;regs 14 + S3 S7 ;regs 14 + JSP L5 ; 14 +L6 = P.* ; 14 + S7 -S4 ; 14 + A2 S7 ;regs 14 + VL A2 ;regs 14 + A3 B06 ;s_bt_sp 14 + A5 B05 ;s_bt_sp 14 + A4 B04 ;s_bt_sp 14 + A1 VL ; 14 + A2 S4 ;regs 14 +L7 = P.* ; 14 + A0 A3 ;regs 15 + VL A1 ;regs 15 + V7 ,A0,1 ; 15 + B11 A5 ;s_bt_sp 15 + A7 22 ; 17 + B12 A4 ;s_bt_sp 17 + V6 V7>A7 ; 17 + B13 A3 ;s_bt_sp 17 + S7 <22 ; 17 + A3 B02 ;s_bt_sp 17 + V5 S7&V6 ; 17 + A6 24 ; 17 + V4 V5<A6 ; 17 + V3 S1*FV4 ; 22 + V2 S7&V7 ; 16 + V1 V2<A6 ; 16 + V0 S3*FV1 ; 22 + V6 V0+V3 ; 22 + A5 44 ; 18 + V5 V7>A5 ; 18 + V2 S1*FV1 ; 21 + V3 S7&V5 ; 18 + A0 14 ; 34 + B77 A0 ;regs 34 + A4 B77 ;regs 34 + A0 A4+A3 ; 34 + ,A0,1 V2 ;v_ld_str 34 + V0 V3<A6 ; 18 + V7 S2*FV1 ; 20 + A4 142 ; 34 + A0 A4+A3 ; 34 + ,A0,1 V7 ;v_ld_str 34 + V5 V7>A7 ; 28 + V2 S2*FV0 ; 22 + V3 V6+V2 ; 22 + S7 <20 ; 28 + V1 S7&V3 ; 28 + A4 270 ; 34 + A0 A4+A3 ; 34 + ,A0,1 V0 ;v_ld_str 34 + A4 14 ; 34 + A0 A4+A3 ; 34 + V7 ,A0,1 ;v_ld_str 34 + V6 V1<A7 ; 28 + V2 S2*FV4 ; 21 + V0 V7+V2 ; 21 + S7 <42 ; 28 + V1 S7&V0 ; 28 + A4 398 ; 34 + A0 A4+A3 ; 34 + ,A0,1 V0 ;v_ld_str 34 + V7 S3*FV4 ; 23 + V2 V5+V1 ; 28 + V0 V3<A5 ; 26 + A5 526 ; 34 + A0 A5+A3 ; 34 + ,A0,1 V0 ;v_ld_str 34 + A5 270 ; 34 + A0 A5+A3 ; 34 + V4 ,A0,1 ;v_ld_str 34 + V5 V2+V6 ; 28 + A5 20 ; 32 + V1 V3>A5 ; 32 + V0 S1*FV4 ; 23 + A5 654 ; 34 + A0 A5+A3 ; 34 + ,A0,1 V1 ;v_ld_str 34 + V6 V7+V0 ; 23 + A5 2 ; 32 + V2 V6<A5 ; 32 + V3 S3*FV4 ; 24 + A5 142 ; 34 + A0 A5+A3 ; 34 + V1 ,A0,1 ;v_ld_str 34 + A5 526 ; 34 + A0 A5+A3 ; 34 + V7 ,A0,1 ;v_ld_str 34 + V0 V1+V7 ; 26 + V6 V3<A6 ; 32 + V4 V6+V2 ; 32 + A6 42 ; 28 + V7 V5>A6 ; 28 + A5 654 ; 34 + CPW ;cmr_vrsp 34 + A0 A5+A3 ; 34 + V1 ,A0,1 ;v_ld_str 34 + A5 398 ; 34 + A0 A5+A3 ; 34 + V3 ,A0,1 ;v_ld_str 34 + V6 V4+V1 ; 32 + V2 V3>A6 ; 32 + V5 V6+V2 ; 32 + A6 B12 ;s_bt_sp 32 + V4 V3<A7 ; 26 + A7 B13 ;regs 34 + A3 A7+A1 ; 34 + A7 B11 ;regs 34 + A5 A7+A1 ; 34 + A4 A6+A1 ; 34 + A7 A2+A1 ; 34 + A0 A2+A1 ; 34 + A2 128 ; 34 + B13 A0 ;s_bt_sp 34 + V1 V0+V4 ; 26 + A0 B11 ;regs 31 + ,A0,1 V1 ; 31 + V6 V5+V7 ; 33 + A0 A6 ;regs 33 + ,A0,1 V6 ; 33 + A0 B13 ;regs 34 + A1 A2 ;regs 34 + A2 A7 ;regs 34 + JAN L7 ; 34 +L8 = P.* ; 34 +L5 = P.* ; 34 + S1 0 ; 35 + A0 B02 ; 35 + A2 B02 ; 35 + A1 13 ;num Bs 35 + B66 A0 ; 35 + B77,A1 0,A0 ; 35 + A0 A2+A1 ; 35 + A1 1 ;num Ts 35 + T00,A1 0,A0 ; 35 + J B00 ; 35 + EXT $STKOFEN:p + ENTRY GMPN_MULWW + END diff --git a/rts/gmp/mpn/cray/sub_n.c b/rts/gmp/mpn/cray/sub_n.c new file mode 100644 index 0000000000..902e07a727 --- /dev/null +++ b/rts/gmp/mpn/cray/sub_n.c @@ -0,0 +1,97 @@ +/* mpn_sub_n -- Subtract two limb vectors of equal, non-zero length. + For Cray vector processors. + + Copyright (C) 1996, 2000 Free Software Foundation, Inc. + + This file is part of the GNU MP Library. + + The GNU MP Library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or (at your + option) any later version. + + The GNU MP Library is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with the GNU MP Library; see the file COPYING.LIB. If not, write to + the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +mp_limb_t +mpn_sub_n (c, a, b, n) + mp_ptr c; + mp_srcptr a, b; + mp_size_t n; +{ + mp_size_t i; + mp_size_t nm1 = n - 1; + int more_carries = 0; + int carry_out; + + /* For small operands the non-vector code is faster. */ + if (n < 16) + goto sequential; + + if (a == c || b == c) + { + TMP_DECL (marker); + TMP_MARK (marker); + if (c == a) + { + /* allocate temp space for a */ + mp_ptr ax = (mp_ptr) TMP_ALLOC (n * BYTES_PER_MP_LIMB); + MPN_COPY (ax, a, n); + a = (mp_srcptr) ax; + } + if (c == b) + { + /* allocate temp space for b */ + mp_ptr bx = (mp_ptr) TMP_ALLOC (n * BYTES_PER_MP_LIMB); + MPN_COPY (bx, b, n); + b = (mp_srcptr) bx; + } + carry_out = mpn_sub_n (c, a, b, n); + TMP_FREE (marker); + return carry_out; + } + + carry_out = a[nm1] < b[nm1]; + +#pragma _CRI ivdep /* Cray PVP systems */ + for (i = nm1; i > 0; i--) + { + int cy_in; mp_limb_t t; + cy_in = a[i - 1] < b[i - 1]; + t = a[i] - b[i]; + more_carries += t < cy_in; + c[i] = t - cy_in; + } + c[0] = a[0] - b[0]; + + if (more_carries) + { + /* This won't vectorize, but we should come here rarely. */ + int cy; + sequential: + cy = 0; + for (i = 0; i < n; i++) + { + mp_limb_t ai, ci, t; + ai = a[i]; + t = b[i] + cy; + cy = t < cy; + ci = ai - t; + cy += ci > ai; + c[i] = ci; + } + carry_out = cy; + } + + return carry_out; +} diff --git a/rts/gmp/mpn/cray/submul_1.c b/rts/gmp/mpn/cray/submul_1.c new file mode 100644 index 0000000000..4d2fb13c62 --- /dev/null +++ b/rts/gmp/mpn/cray/submul_1.c @@ -0,0 +1,46 @@ +/* mpn_submul_1 for Cray PVP. + +Copyright (C) 1996, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + + +#include "gmp.h" +#include "gmp-impl.h" + +mp_limb_t +mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t limb) +{ + mp_ptr p0, p1, tp; + mp_limb_t cy_limb; + TMP_DECL (marker); + TMP_MARK (marker); + + p1 = TMP_ALLOC (n * BYTES_PER_MP_LIMB); + p0 = TMP_ALLOC (n * BYTES_PER_MP_LIMB); + tp = TMP_ALLOC (n * BYTES_PER_MP_LIMB); + + GMPN_MULWW (p1, p0, up, &n, &limb); + cy_limb = mpn_sub_n (tp, rp, p0, n); + rp[0] = tp[0]; + cy_limb += mpn_sub_n (rp + 1, tp + 1, p1, n - 1); + cy_limb += p1[n - 1]; + + TMP_FREE (marker); + return cy_limb; +} diff --git a/rts/gmp/mpn/generic/add_n.c b/rts/gmp/mpn/generic/add_n.c new file mode 100644 index 0000000000..5fcb7e4835 --- /dev/null +++ b/rts/gmp/mpn/generic/add_n.c @@ -0,0 +1,62 @@ +/* mpn_add_n -- Add two limb vectors of equal, non-zero length. + +Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +mp_limb_t +#if __STDC__ +mpn_add_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size) +#else +mpn_add_n (res_ptr, s1_ptr, s2_ptr, size) + register mp_ptr res_ptr; + register mp_srcptr s1_ptr; + register mp_srcptr s2_ptr; + mp_size_t size; +#endif +{ + register mp_limb_t x, y, cy; + register mp_size_t j; + + /* The loop counter and index J goes from -SIZE to -1. This way + the loop becomes faster. */ + j = -size; + + /* Offset the base pointers to compensate for the negative indices. */ + s1_ptr -= j; + s2_ptr -= j; + res_ptr -= j; + + cy = 0; + do + { + y = s2_ptr[j]; + x = s1_ptr[j]; + y += cy; /* add previous carry to one addend */ + cy = (y < cy); /* get out carry from that addition */ + y = x + y; /* add other addend */ + cy = (y < x) + cy; /* get out carry from that add, combine */ + res_ptr[j] = y; + } + while (++j != 0); + + return cy; +} diff --git a/rts/gmp/mpn/generic/addmul_1.c b/rts/gmp/mpn/generic/addmul_1.c new file mode 100644 index 0000000000..746ae31307 --- /dev/null +++ b/rts/gmp/mpn/generic/addmul_1.c @@ -0,0 +1,65 @@ +/* mpn_addmul_1 -- multiply the S1_SIZE long limb vector pointed to by S1_PTR + by S2_LIMB, add the S1_SIZE least significant limbs of the product to the + limb vector pointed to by RES_PTR. Return the most significant limb of + the product, adjusted for carry-out from the addition. + +Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +mp_limb_t +mpn_addmul_1 (res_ptr, s1_ptr, s1_size, s2_limb) + register mp_ptr res_ptr; + register mp_srcptr s1_ptr; + mp_size_t s1_size; + register mp_limb_t s2_limb; +{ + register mp_limb_t cy_limb; + register mp_size_t j; + register mp_limb_t prod_high, prod_low; + register mp_limb_t x; + + /* The loop counter and index J goes from -SIZE to -1. This way + the loop becomes faster. */ + j = -s1_size; + + /* Offset the base pointers to compensate for the negative indices. */ + res_ptr -= j; + s1_ptr -= j; + + cy_limb = 0; + do + { + umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb); + + prod_low += cy_limb; + cy_limb = (prod_low < cy_limb) + prod_high; + + x = res_ptr[j]; + prod_low = x + prod_low; + cy_limb += (prod_low < x); + res_ptr[j] = prod_low; + } + while (++j != 0); + + return cy_limb; +} diff --git a/rts/gmp/mpn/generic/addsub_n.c b/rts/gmp/mpn/generic/addsub_n.c new file mode 100644 index 0000000000..c9bab3ef60 --- /dev/null +++ b/rts/gmp/mpn/generic/addsub_n.c @@ -0,0 +1,167 @@ +/* mpn_addsub_n -- Add and Subtract two limb vectors of equal, non-zero length. + +Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +#ifndef L1_CACHE_SIZE +#define L1_CACHE_SIZE 8192 /* only 68040 has less than this */ +#endif + +#define PART_SIZE (L1_CACHE_SIZE / BYTES_PER_MP_LIMB / 6) + + +/* mpn_addsub_n. + r1[] = s1[] + s2[] + r2[] = s1[] - s2[] + All operands have n limbs. + In-place operations allowed. */ +mp_limb_t +#if __STDC__ +mpn_addsub_n (mp_ptr r1p, mp_ptr r2p, mp_srcptr s1p, mp_srcptr s2p, mp_size_t n) +#else +mpn_addsub_n (r1p, r2p, s1p, s2p, n) + mp_ptr r1p, r2p; + mp_srcptr s1p, s2p; + mp_size_t n; +#endif +{ + mp_limb_t acyn, acyo; /* carry for add */ + mp_limb_t scyn, scyo; /* carry for subtract */ + mp_size_t off; /* offset in operands */ + mp_size_t this_n; /* size of current chunk */ + + /* We alternatingly add and subtract in chunks that fit into the (L1) + cache. Since the chunks are several hundred limbs, the function call + overhead is insignificant, but we get much better locality. */ + + /* We have three variant of the inner loop, the proper loop is chosen + depending on whether r1 or r2 are the same operand as s1 or s2. */ + + if (r1p != s1p && r1p != s2p) + { + /* r1 is not identical to either input operand. We can therefore write + to r1 directly, without using temporary storage. */ + acyo = 0; + scyo = 0; + for (off = 0; off < n; off += PART_SIZE) + { + this_n = MIN (n - off, PART_SIZE); +#if HAVE_NATIVE_mpn_add_nc || !HAVE_NATIVE_mpn_add_n + acyo = mpn_add_nc (r1p + off, s1p + off, s2p + off, this_n, acyo); +#else + acyn = mpn_add_n (r1p + off, s1p + off, s2p + off, this_n); + acyo = acyn + mpn_add_1 (r1p + off, r1p + off, this_n, acyo); +#endif +#if HAVE_NATIVE_mpn_sub_nc || !HAVE_NATIVE_mpn_sub_n + scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo); +#else + scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n); + scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo); +#endif + } + } + else if (r2p != s1p && r2p != s2p) + { + /* r2 is not identical to either input operand. We can therefore write + to r2 directly, without using temporary storage. */ + acyo = 0; + scyo = 0; + for (off = 0; off < n; off += PART_SIZE) + { + this_n = MIN (n - off, PART_SIZE); +#if HAVE_NATIVE_mpn_sub_nc || !HAVE_NATIVE_mpn_sub_n + scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo); +#else + scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n); + scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo); +#endif +#if HAVE_NATIVE_mpn_add_nc || !HAVE_NATIVE_mpn_add_n + acyo = mpn_add_nc (r1p + off, s1p + off, s2p + off, this_n, acyo); +#else + acyn = mpn_add_n (r1p + off, s1p + off, s2p + off, this_n); + acyo = acyn + mpn_add_1 (r1p + off, r1p + off, this_n, acyo); +#endif + } + } + else + { + /* r1 and r2 are identical to s1 and s2 (r1==s1 and r2=s2 or vice versa) + Need temporary storage. */ + mp_limb_t tp[PART_SIZE]; + acyo = 0; + scyo = 0; + for (off = 0; off < n; off += PART_SIZE) + { + this_n = MIN (n - off, PART_SIZE); +#if HAVE_NATIVE_mpn_add_nc || !HAVE_NATIVE_mpn_add_n + acyo = mpn_add_nc (tp, s1p + off, s2p + off, this_n, acyo); +#else + acyn = mpn_add_n (tp, s1p + off, s2p + off, this_n); + acyo = acyn + mpn_add_1 (tp, tp, this_n, acyo); +#endif +#if HAVE_NATIVE_mpn_sub_nc || !HAVE_NATIVE_mpn_sub_n + scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo); +#else + scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n); + scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo); +#endif + MPN_COPY (r1p + off, tp, this_n); + } + } + + return 2 * acyo + scyo; +} + +#ifdef MAIN +#include <stdlib.h> +#include <stdio.h> +#include "timing.h" + +long cputime (); + +int +main (int argc, char **argv) +{ + mp_ptr r1p, r2p, s1p, s2p; + double t; + mp_size_t n; + + n = strtol (argv[1], 0, 0); + + r1p = malloc (n * BYTES_PER_MP_LIMB); + r2p = malloc (n * BYTES_PER_MP_LIMB); + s1p = malloc (n * BYTES_PER_MP_LIMB); + s2p = malloc (n * BYTES_PER_MP_LIMB); + TIME (t,(mpn_add_n(r1p,s1p,s2p,n),mpn_sub_n(r1p,s1p,s2p,n))); + printf (" separate add and sub: %.3f\n", t); + TIME (t,mpn_addsub_n(r1p,r2p,s1p,s2p,n)); + printf ("combined addsub separate variables: %.3f\n", t); + TIME (t,mpn_addsub_n(r1p,r2p,r1p,s2p,n)); + printf (" combined addsub r1 overlap: %.3f\n", t); + TIME (t,mpn_addsub_n(r1p,r2p,r1p,s2p,n)); + printf (" combined addsub r2 overlap: %.3f\n", t); + TIME (t,mpn_addsub_n(r1p,r2p,r1p,r2p,n)); + printf (" combined addsub in-place: %.3f\n", t); + + return 0; +} +#endif diff --git a/rts/gmp/mpn/generic/bdivmod.c b/rts/gmp/mpn/generic/bdivmod.c new file mode 100644 index 0000000000..c4bcb414e6 --- /dev/null +++ b/rts/gmp/mpn/generic/bdivmod.c @@ -0,0 +1,120 @@ +/* mpn/bdivmod.c: mpn_bdivmod for computing U/V mod 2^d. + +Copyright (C) 1991, 1993, 1994, 1995, 1996, 1999, 2000 Free Software +Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* q_high = mpn_bdivmod (qp, up, usize, vp, vsize, d). + + Puts the low d/BITS_PER_MP_LIMB limbs of Q = U / V mod 2^d at qp, and + returns the high d%BITS_PER_MP_LIMB bits of Q as the result. + + Also, U - Q * V mod 2^(usize*BITS_PER_MP_LIMB) is placed at up. Since the + low d/BITS_PER_MP_LIMB limbs of this difference are zero, the code allows + the limb vectors at qp to overwrite the low limbs at up, provided qp <= up. + + Preconditions: + 1. V is odd. + 2. usize * BITS_PER_MP_LIMB >= d. + 3. If Q and U overlap, qp <= up. + + Ken Weber (kweber@mat.ufrgs.br, kweber@mcs.kent.edu) + + Funding for this work has been partially provided by Conselho Nacional + de Desenvolvimento Cienti'fico e Tecnolo'gico (CNPq) do Brazil, Grant + 301314194-2, and was done while I was a visiting reseacher in the Instituto + de Matema'tica at Universidade Federal do Rio Grande do Sul (UFRGS). + + References: + T. Jebelean, An algorithm for exact division, Journal of Symbolic + Computation, v. 15, 1993, pp. 169-180. + + K. Weber, The accelerated integer GCD algorithm, ACM Transactions on + Mathematical Software, v. 21 (March), 1995, pp. 111-122. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +mp_limb_t +#if __STDC__ +mpn_bdivmod (mp_ptr qp, mp_ptr up, mp_size_t usize, + mp_srcptr vp, mp_size_t vsize, unsigned long int d) +#else +mpn_bdivmod (qp, up, usize, vp, vsize, d) + mp_ptr qp; + mp_ptr up; + mp_size_t usize; + mp_srcptr vp; + mp_size_t vsize; + unsigned long int d; +#endif +{ + mp_limb_t v_inv; + + /* 1/V mod 2^BITS_PER_MP_LIMB. */ + modlimb_invert (v_inv, vp[0]); + + /* Fast code for two cases previously used by the accel part of mpn_gcd. + (Could probably remove this now it's inlined there.) */ + if (usize == 2 && vsize == 2 && + (d == BITS_PER_MP_LIMB || d == 2*BITS_PER_MP_LIMB)) + { + mp_limb_t hi, lo; + mp_limb_t q = up[0] * v_inv; + umul_ppmm (hi, lo, q, vp[0]); + up[0] = 0, up[1] -= hi + q*vp[1], qp[0] = q; + if (d == 2*BITS_PER_MP_LIMB) + q = up[1] * v_inv, up[1] = 0, qp[1] = q; + return 0; + } + + /* Main loop. */ + while (d >= BITS_PER_MP_LIMB) + { + mp_limb_t q = up[0] * v_inv; + mp_limb_t b = mpn_submul_1 (up, vp, MIN (usize, vsize), q); + if (usize > vsize) + mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b); + d -= BITS_PER_MP_LIMB; + up += 1, usize -= 1; + *qp++ = q; + } + + if (d) + { + mp_limb_t b; + mp_limb_t q = (up[0] * v_inv) & (((mp_limb_t)1<<d) - 1); + if (q <= 1) + { + if (q == 0) + return 0; + else + b = mpn_sub_n (up, up, vp, MIN (usize, vsize)); + } + else + b = mpn_submul_1 (up, vp, MIN (usize, vsize), q); + + if (usize > vsize) + mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b); + return q; + } + + return 0; +} diff --git a/rts/gmp/mpn/generic/bz_divrem_n.c b/rts/gmp/mpn/generic/bz_divrem_n.c new file mode 100644 index 0000000000..d234b22af5 --- /dev/null +++ b/rts/gmp/mpn/generic/bz_divrem_n.c @@ -0,0 +1,153 @@ +/* mpn_bz_divrem_n and auxilliary routines. + + THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE + INTERFACES. IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. + IN FACT, IT IS ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A + FUTURE GNU MP RELEASE. + + +Copyright (C) 2000 Free Software Foundation, Inc. +Contributed by Paul Zimmermann. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +/* +[1] Fast Recursive Division, by Christoph Burnikel and Joachim Ziegler, + Technical report MPI-I-98-1-022, october 1998. + http://www.mpi-sb.mpg.de/~ziegler/TechRep.ps.gz +*/ + +static mp_limb_t mpn_bz_div_3_halves_by_2 + _PROTO ((mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n)); + + +/* mpn_bz_divrem_n(n) calls 2*mul(n/2)+2*div(n/2), thus to be faster than + div(n) = 4*div(n/2), we need mul(n/2) to be faster than the classic way, + i.e. n/2 >= KARATSUBA_MUL_THRESHOLD */ +#ifndef BZ_THRESHOLD +#define BZ_THRESHOLD (7 * KARATSUBA_MUL_THRESHOLD) +#endif + +#if 0 +static +unused_mpn_divrem (qp, qxn, np, nn, dp, dn) + mp_ptr qp; + mp_size_t qxn; + mp_ptr np; + mp_size_t nn; + mp_srcptr dp; + mp_size_t dn; +{ + /* This might be useful: */ + if (qxn != 0) + { + mp_limb_t c; + mp_ptr tp = alloca ((nn + qxn) * BYTES_PER_MP_LIMB); + MPN_COPY (tp + qxn - nn, np, nn); + MPN_ZERO (tp, qxn); + c = mpn_divrem (qp, 0L, tp, nn + qxn, dp, dn); + /* Maybe copy proper part of tp to np? Documentation is unclear about + the returned np value when qxn != 0 */ + return c; + } +} +#endif + + +/* mpn_bz_divrem_n - Implements algorithm of page 8 in [1]: divides (np,2n) + by (dp,n) and puts the quotient in (qp,n), the remainder in (np,n). + Returns most significant limb of the quotient, which is 0 or 1. + Requires that the most significant bit of the divisor is set. */ + +mp_limb_t +#if __STDC__ +mpn_bz_divrem_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n) +#else +mpn_bz_divrem_n (qp, np, dp, n) + mp_ptr qp; + mp_ptr np; + mp_srcptr dp; + mp_size_t n; +#endif +{ + mp_limb_t qhl, cc; + + if (n % 2 != 0) + { + qhl = mpn_bz_divrem_n (qp + 1, np + 2, dp + 1, n - 1); + cc = mpn_submul_1 (np + 1, qp + 1, n - 1, dp[0]); + cc = mpn_sub_1 (np + n, np + n, 1, cc); + if (qhl) cc += mpn_sub_1 (np + n, np + n, 1, dp[0]); + while (cc) + { + qhl -= mpn_sub_1 (qp + 1, qp + 1, n - 1, (mp_limb_t) 1); + cc -= mpn_add_n (np + 1, np + 1, dp, n); + } + qhl += mpn_add_1 (qp + 1, qp + 1, n - 1, + mpn_sb_divrem_mn (qp, np, n + 1, dp, n)); + } + else + { + mp_size_t n2 = n/2; + qhl = mpn_bz_div_3_halves_by_2 (qp + n2, np + n2, dp, n2); + qhl += mpn_add_1 (qp + n2, qp + n2, n2, + mpn_bz_div_3_halves_by_2 (qp, np, dp, n2)); + } + return qhl; +} + + +/* divides (np, 3n) by (dp, 2n) and puts the quotient in (qp, n), + the remainder in (np, 2n) */ + +static mp_limb_t +#if __STDC__ +mpn_bz_div_3_halves_by_2 (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n) +#else +mpn_bz_div_3_halves_by_2 (qp, np, dp, n) + mp_ptr qp; + mp_ptr np; + mp_srcptr dp; + mp_size_t n; +#endif +{ + mp_size_t twon = n + n; + mp_limb_t qhl, cc; + mp_ptr tmp; + TMP_DECL (marker); + + TMP_MARK (marker); + if (n < BZ_THRESHOLD) + qhl = mpn_sb_divrem_mn (qp, np + n, twon, dp + n, n); + else + qhl = mpn_bz_divrem_n (qp, np + n, dp + n, n); + tmp = (mp_ptr) TMP_ALLOC (twon * BYTES_PER_MP_LIMB); + mpn_mul_n (tmp, qp, dp, n); + cc = mpn_sub_n (np, np, tmp, twon); + TMP_FREE (marker); + if (qhl) cc += mpn_sub_n (np + n, np + n, dp, n); + while (cc) + { + qhl -= mpn_sub_1 (qp, qp, n, (mp_limb_t) 1); + cc -= mpn_add_n (np, np, dp, twon); + } + return qhl; +} diff --git a/rts/gmp/mpn/generic/cmp.c b/rts/gmp/mpn/generic/cmp.c new file mode 100644 index 0000000000..8e9792f54e --- /dev/null +++ b/rts/gmp/mpn/generic/cmp.c @@ -0,0 +1,56 @@ +/* mpn_cmp -- Compare two low-level natural-number integers. + +Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +/* Compare OP1_PTR/OP1_SIZE with OP2_PTR/OP2_SIZE. + There are no restrictions on the relative sizes of + the two arguments. + Return 1 if OP1 > OP2, 0 if they are equal, and -1 if OP1 < OP2. */ + +int +#if __STDC__ +mpn_cmp (mp_srcptr op1_ptr, mp_srcptr op2_ptr, mp_size_t size) +#else +mpn_cmp (op1_ptr, op2_ptr, size) + mp_srcptr op1_ptr; + mp_srcptr op2_ptr; + mp_size_t size; +#endif +{ + mp_size_t i; + mp_limb_t op1_word, op2_word; + + for (i = size - 1; i >= 0; i--) + { + op1_word = op1_ptr[i]; + op2_word = op2_ptr[i]; + if (op1_word != op2_word) + goto diff; + } + return 0; + diff: + /* This can *not* be simplified to + op2_word - op2_word + since that expression might give signed overflow. */ + return (op1_word > op2_word) ? 1 : -1; +} diff --git a/rts/gmp/mpn/generic/diveby3.c b/rts/gmp/mpn/generic/diveby3.c new file mode 100644 index 0000000000..a2fb552bfa --- /dev/null +++ b/rts/gmp/mpn/generic/diveby3.c @@ -0,0 +1,77 @@ +/* mpn_divexact_by3 -- mpn division by 3, expecting no remainder. */ + +/* +Copyright (C) 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. +*/ + + +#include "gmp.h" +#include "gmp-impl.h" + + +/* Multiplicative inverse of 3, modulo 2^BITS_PER_MP_LIMB. + 0xAAAAAAAB for 32 bits, 0xAAAAAAAAAAAAAAAB for 64 bits. */ +#define INVERSE_3 ((MP_LIMB_T_MAX / 3) * 2 + 1) + + +/* The "c += ..."s are adding the high limb of 3*l to c. That high limb + will be 0, 1 or 2. Doing two separate "+="s seems to turn out better + code on gcc (as of 2.95.2 at least). + + When a subtraction of a 0,1,2 carry value causes a borrow, that leaves a + limb value of either 0xFF...FF or 0xFF...FE and the multiply by INVERSE_3 + gives 0x55...55 or 0xAA...AA respectively, producing a further borrow of + only 0 or 1 respectively. Hence the carry out of each stage and for the + return value is always only 0, 1 or 2. */ + +mp_limb_t +#if __STDC__ +mpn_divexact_by3c (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t c) +#else +mpn_divexact_by3c (dst, src, size, c) + mp_ptr dst; + mp_srcptr src; + mp_size_t size; + mp_limb_t c; +#endif +{ + mp_size_t i; + + ASSERT (size >= 1); + + i = 0; + do + { + mp_limb_t l, s; + + s = src[i]; + l = s - c; + c = (l > s); + + l *= INVERSE_3; + dst[i] = l; + + c += (l > MP_LIMB_T_MAX/3); + c += (l > (MP_LIMB_T_MAX/3)*2); + } + while (++i < size); + + return c; +} diff --git a/rts/gmp/mpn/generic/divrem.c b/rts/gmp/mpn/generic/divrem.c new file mode 100644 index 0000000000..30673e76d9 --- /dev/null +++ b/rts/gmp/mpn/generic/divrem.c @@ -0,0 +1,101 @@ +/* mpn_divrem -- Divide natural numbers, producing both remainder and + quotient. This is now just a middle layer for calling the new + internal mpn_tdiv_qr. + +Copyright (C) 1993, 1994, 1995, 1996, 1997, 1999, 2000 Free Software +Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +mp_limb_t +#if __STDC__ +mpn_divrem (mp_ptr qp, mp_size_t qxn, + mp_ptr np, mp_size_t nn, + mp_srcptr dp, mp_size_t dn) +#else +mpn_divrem (qp, qxn, np, nn, dp, dn) + mp_ptr qp; + mp_size_t qxn; + mp_ptr np; + mp_size_t nn; + mp_srcptr dp; + mp_size_t dn; +#endif +{ + if (dn == 1) + { + mp_limb_t ret; + mp_ptr q2p; + mp_size_t qn; + TMP_DECL (marker); + + TMP_MARK (marker); + q2p = (mp_ptr) TMP_ALLOC ((nn + qxn) * BYTES_PER_MP_LIMB); + + np[0] = mpn_divrem_1 (q2p, qxn, np, nn, dp[0]); + qn = nn + qxn - 1; + MPN_COPY (qp, q2p, qn); + ret = q2p[qn]; + + TMP_FREE (marker); + return ret; + } + else if (dn == 2) + { + return mpn_divrem_2 (qp, qxn, np, nn, dp); + } + else + { + mp_ptr rp, q2p; + mp_limb_t qhl; + mp_size_t qn; + TMP_DECL (marker); + + TMP_MARK (marker); + if (qxn != 0) + { + mp_ptr n2p; + n2p = (mp_ptr) TMP_ALLOC ((nn + qxn) * BYTES_PER_MP_LIMB); + MPN_ZERO (n2p, qxn); + MPN_COPY (n2p + qxn, np, nn); + q2p = (mp_ptr) TMP_ALLOC ((nn - dn + qxn + 1) * BYTES_PER_MP_LIMB); + rp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB); + mpn_tdiv_qr (q2p, rp, 0L, n2p, nn + qxn, dp, dn); + MPN_COPY (np, rp, dn); + qn = nn - dn + qxn; + MPN_COPY (qp, q2p, qn); + qhl = q2p[qn]; + } + else + { + q2p = (mp_ptr) TMP_ALLOC ((nn - dn + 1) * BYTES_PER_MP_LIMB); + rp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB); + mpn_tdiv_qr (q2p, rp, 0L, np, nn, dp, dn); + MPN_COPY (np, rp, dn); /* overwrite np area with remainder */ + qn = nn - dn; + MPN_COPY (qp, q2p, qn); + qhl = q2p[qn]; + } + TMP_FREE (marker); + return qhl; + } +} diff --git a/rts/gmp/mpn/generic/divrem_1.c b/rts/gmp/mpn/generic/divrem_1.c new file mode 100644 index 0000000000..e93f241c9d --- /dev/null +++ b/rts/gmp/mpn/generic/divrem_1.c @@ -0,0 +1,248 @@ +/* mpn_divrem_1(quot_ptr, qsize, dividend_ptr, dividend_size, divisor_limb) -- + Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB. + Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR. + Return the single-limb remainder. + There are no constraints on the value of the divisor. + + QUOT_PTR and DIVIDEND_PTR might point to the same limb. + +Copyright (C) 1991, 1993, 1994, 1996, 1998, 1999, 2000 Free Software +Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + + + +/* __gmpn_divmod_1_internal(quot_ptr,dividend_ptr,dividend_size,divisor_limb) + Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB. + Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR. + Return the single-limb remainder. + There are no constraints on the value of the divisor. + + QUOT_PTR and DIVIDEND_PTR might point to the same limb. */ + +#ifndef UMUL_TIME +#define UMUL_TIME 1 +#endif + +#ifndef UDIV_TIME +#define UDIV_TIME UMUL_TIME +#endif + +static mp_limb_t +#if __STDC__ +__gmpn_divmod_1_internal (mp_ptr quot_ptr, + mp_srcptr dividend_ptr, mp_size_t dividend_size, + mp_limb_t divisor_limb) +#else +__gmpn_divmod_1_internal (quot_ptr, dividend_ptr, dividend_size, divisor_limb) + mp_ptr quot_ptr; + mp_srcptr dividend_ptr; + mp_size_t dividend_size; + mp_limb_t divisor_limb; +#endif +{ + mp_size_t i; + mp_limb_t n1, n0, r; + int dummy; + + /* ??? Should this be handled at all? Rely on callers? */ + if (dividend_size == 0) + return 0; + + /* If multiplication is much faster than division, and the + dividend is large, pre-invert the divisor, and use + only multiplications in the inner loop. */ + + /* This test should be read: + Does it ever help to use udiv_qrnnd_preinv? + && Does what we save compensate for the inversion overhead? */ + if (UDIV_TIME > (2 * UMUL_TIME + 6) + && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) + { + int normalization_steps; + + count_leading_zeros (normalization_steps, divisor_limb); + if (normalization_steps != 0) + { + mp_limb_t divisor_limb_inverted; + + divisor_limb <<= normalization_steps; + invert_limb (divisor_limb_inverted, divisor_limb); + + n1 = dividend_ptr[dividend_size - 1]; + r = n1 >> (BITS_PER_MP_LIMB - normalization_steps); + + /* Possible optimization: + if (r == 0 + && divisor_limb > ((n1 << normalization_steps) + | (dividend_ptr[dividend_size - 2] >> ...))) + ...one division less... */ + + for (i = dividend_size - 2; i >= 0; i--) + { + n0 = dividend_ptr[i]; + udiv_qrnnd_preinv (quot_ptr[i + 1], r, r, + ((n1 << normalization_steps) + | (n0 >> (BITS_PER_MP_LIMB - normalization_steps))), + divisor_limb, divisor_limb_inverted); + n1 = n0; + } + udiv_qrnnd_preinv (quot_ptr[0], r, r, + n1 << normalization_steps, + divisor_limb, divisor_limb_inverted); + return r >> normalization_steps; + } + else + { + mp_limb_t divisor_limb_inverted; + + invert_limb (divisor_limb_inverted, divisor_limb); + + i = dividend_size - 1; + r = dividend_ptr[i]; + + if (r >= divisor_limb) + r = 0; + else + { + quot_ptr[i] = 0; + i--; + } + + for (; i >= 0; i--) + { + n0 = dividend_ptr[i]; + udiv_qrnnd_preinv (quot_ptr[i], r, r, + n0, divisor_limb, divisor_limb_inverted); + } + return r; + } + } + else + { + if (UDIV_NEEDS_NORMALIZATION) + { + int normalization_steps; + + count_leading_zeros (normalization_steps, divisor_limb); + if (normalization_steps != 0) + { + divisor_limb <<= normalization_steps; + + n1 = dividend_ptr[dividend_size - 1]; + r = n1 >> (BITS_PER_MP_LIMB - normalization_steps); + + /* Possible optimization: + if (r == 0 + && divisor_limb > ((n1 << normalization_steps) + | (dividend_ptr[dividend_size - 2] >> ...))) + ...one division less... */ + + for (i = dividend_size - 2; i >= 0; i--) + { + n0 = dividend_ptr[i]; + udiv_qrnnd (quot_ptr[i + 1], r, r, + ((n1 << normalization_steps) + | (n0 >> (BITS_PER_MP_LIMB - normalization_steps))), + divisor_limb); + n1 = n0; + } + udiv_qrnnd (quot_ptr[0], r, r, + n1 << normalization_steps, + divisor_limb); + return r >> normalization_steps; + } + } + /* No normalization needed, either because udiv_qrnnd doesn't require + it, or because DIVISOR_LIMB is already normalized. */ + + i = dividend_size - 1; + r = dividend_ptr[i]; + + if (r >= divisor_limb) + r = 0; + else + { + quot_ptr[i] = 0; + i--; + } + + for (; i >= 0; i--) + { + n0 = dividend_ptr[i]; + udiv_qrnnd (quot_ptr[i], r, r, n0, divisor_limb); + } + return r; + } +} + + + +mp_limb_t +#if __STDC__ +mpn_divrem_1 (mp_ptr qp, mp_size_t qxn, + mp_srcptr np, mp_size_t nn, + mp_limb_t d) +#else +mpn_divrem_1 (qp, qxn, np, nn, d) + mp_ptr qp; + mp_size_t qxn; + mp_srcptr np; + mp_size_t nn; + mp_limb_t d; +#endif +{ + mp_limb_t rlimb; + mp_size_t i; + + /* Develop integer part of quotient. */ + rlimb = __gmpn_divmod_1_internal (qp + qxn, np, nn, d); + + /* Develop fraction part of quotient. This is not as fast as it should; + the preinvert stuff from __gmpn_divmod_1_internal ought to be used here + too. */ + if (UDIV_NEEDS_NORMALIZATION) + { + int normalization_steps; + + count_leading_zeros (normalization_steps, d); + if (normalization_steps != 0) + { + d <<= normalization_steps; + rlimb <<= normalization_steps; + + for (i = qxn - 1; i >= 0; i--) + udiv_qrnnd (qp[i], rlimb, rlimb, 0, d); + + return rlimb >> normalization_steps; + } + else + /* fall out */ + ; + } + + for (i = qxn - 1; i >= 0; i--) + udiv_qrnnd (qp[i], rlimb, rlimb, 0, d); + + return rlimb; +} diff --git a/rts/gmp/mpn/generic/divrem_2.c b/rts/gmp/mpn/generic/divrem_2.c new file mode 100644 index 0000000000..0bc31ae2e7 --- /dev/null +++ b/rts/gmp/mpn/generic/divrem_2.c @@ -0,0 +1,151 @@ +/* mpn_divrem_2 -- Divide natural numbers, producing both remainder and + quotient. The divisor is two limbs. + + THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES. IT IS + ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS + ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP + RELEASE. + + +Copyright (C) 1993, 1994, 1995, 1996, 1999, 2000 Free Software Foundation, +Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +/* Divide num (NP/NSIZE) by den (DP/2) and write + the NSIZE-2 least significant quotient limbs at QP + and the 2 long remainder at NP. If QEXTRA_LIMBS is + non-zero, generate that many fraction bits and append them after the + other quotient limbs. + Return the most significant limb of the quotient, this is always 0 or 1. + + Preconditions: + 0. NSIZE >= 2. + 1. The most significant bit of the divisor must be set. + 2. QP must either not overlap with the input operands at all, or + QP + 2 >= NP must hold true. (This means that it's + possible to put the quotient in the high part of NUM, right after the + remainder in NUM. + 3. NSIZE >= 2, even if QEXTRA_LIMBS is non-zero. */ + +mp_limb_t +#if __STDC__ +mpn_divrem_2 (mp_ptr qp, mp_size_t qxn, + mp_ptr np, mp_size_t nsize, + mp_srcptr dp) +#else +mpn_divrem_2 (qp, qxn, np, nsize, dp) + mp_ptr qp; + mp_size_t qxn; + mp_ptr np; + mp_size_t nsize; + mp_srcptr dp; +#endif +{ + mp_limb_t most_significant_q_limb = 0; + mp_size_t i; + mp_limb_t n1, n0, n2; + mp_limb_t d1, d0; + mp_limb_t d1inv; + int have_preinv; + + np += nsize - 2; + d1 = dp[1]; + d0 = dp[0]; + n1 = np[1]; + n0 = np[0]; + + if (n1 >= d1 && (n1 > d1 || n0 >= d0)) + { + sub_ddmmss (n1, n0, n1, n0, d1, d0); + most_significant_q_limb = 1; + } + + /* If multiplication is much faster than division, preinvert the most + significant divisor limb before entering the loop. */ + if (UDIV_TIME > 2 * UMUL_TIME + 6) + { + have_preinv = 0; + if ((UDIV_TIME - (2 * UMUL_TIME + 6)) * (nsize - 2) > UDIV_TIME) + { + invert_limb (d1inv, d1); + have_preinv = 1; + } + } + + for (i = qxn + nsize - 2 - 1; i >= 0; i--) + { + mp_limb_t q; + mp_limb_t r; + + if (i >= qxn) + np--; + else + np[0] = 0; + + if (n1 == d1) + { + /* Q should be either 111..111 or 111..110. Need special treatment + of this rare case as normal division would give overflow. */ + q = ~(mp_limb_t) 0; + + r = n0 + d1; + if (r < d1) /* Carry in the addition? */ + { + add_ssaaaa (n1, n0, r - d0, np[0], 0, d0); + qp[i] = q; + continue; + } + n1 = d0 - (d0 != 0); + n0 = -d0; + } + else + { + if (UDIV_TIME > 2 * UMUL_TIME + 6 && have_preinv) + udiv_qrnnd_preinv (q, r, n1, n0, d1, d1inv); + else + udiv_qrnnd (q, r, n1, n0, d1); + umul_ppmm (n1, n0, d0, q); + } + + n2 = np[0]; + + q_test: + if (n1 > r || (n1 == r && n0 > n2)) + { + /* The estimated Q was too large. */ + q--; + + sub_ddmmss (n1, n0, n1, n0, 0, d0); + r += d1; + if (r >= d1) /* If not carry, test Q again. */ + goto q_test; + } + + qp[i] = q; + sub_ddmmss (n1, n0, r, n2, n1, n0); + } + np[1] = n1; + np[0] = n0; + + return most_significant_q_limb; +} diff --git a/rts/gmp/mpn/generic/dump.c b/rts/gmp/mpn/generic/dump.c new file mode 100644 index 0000000000..66f375c74b --- /dev/null +++ b/rts/gmp/mpn/generic/dump.c @@ -0,0 +1,76 @@ +/* THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS NOT SAFE TO + CALL THIS FUNCTION DIRECTLY. IN FACT, IT IS ALMOST GUARANTEED THAT THIS + FUNCTION WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. + + +Copyright (C) 1996, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. +*/ + +#include <stdio.h> +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpn_dump (mp_srcptr ptr, mp_size_t size) +#else +mpn_dump (ptr, size) + mp_srcptr ptr; + mp_size_t size; +#endif +{ + MPN_NORMALIZE (ptr, size); + + if (size == 0) + printf ("0\n"); + else + { + size--; + if (BYTES_PER_MP_LIMB > sizeof (long)) + { + if ((ptr[size] >> BITS_PER_MP_LIMB/2) != 0) + { + printf ("%lX", + (unsigned long) (ptr[size] >> BITS_PER_MP_LIMB/2)); + printf ("%0*lX", (int) (BYTES_PER_MP_LIMB), + (unsigned long) ptr[size]); + } + else + printf ("%lX", (unsigned long) ptr[size]); + } + else + printf ("%lX", ptr[size]); + + while (size) + { + size--; + if (BYTES_PER_MP_LIMB > sizeof (long)) + { + printf ("%0*lX", (int) (BYTES_PER_MP_LIMB), + (unsigned long) (ptr[size] >> BITS_PER_MP_LIMB/2)); + printf ("%0*lX", (int) (BYTES_PER_MP_LIMB), + (unsigned long) ptr[size]); + } + else + printf ("%0*lX", (int) (2 * BYTES_PER_MP_LIMB), ptr[size]); + } + printf ("\n"); + } +} diff --git a/rts/gmp/mpn/generic/gcd.c b/rts/gmp/mpn/generic/gcd.c new file mode 100644 index 0000000000..059e219a06 --- /dev/null +++ b/rts/gmp/mpn/generic/gcd.c @@ -0,0 +1,414 @@ +/* mpn/gcd.c: mpn_gcd for gcd of two odd integers. + +Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 2000 Free Software +Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* Integer greatest common divisor of two unsigned integers, using + the accelerated algorithm (see reference below). + + mp_size_t mpn_gcd (up, usize, vp, vsize). + + Preconditions [U = (up, usize) and V = (vp, vsize)]: + + 1. V is odd. + 2. numbits(U) >= numbits(V). + + Both U and V are destroyed by the operation. The result is left at vp, + and its size is returned. + + Ken Weber (kweber@mat.ufrgs.br, kweber@mcs.kent.edu) + + Funding for this work has been partially provided by Conselho Nacional + de Desenvolvimento Cienti'fico e Tecnolo'gico (CNPq) do Brazil, Grant + 301314194-2, and was done while I was a visiting reseacher in the Instituto + de Matema'tica at Universidade Federal do Rio Grande do Sul (UFRGS). + + Refer to + K. Weber, The accelerated integer GCD algorithm, ACM Transactions on + Mathematical Software, v. 21 (March), 1995, pp. 111-122. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +/* If MIN (usize, vsize) >= GCD_ACCEL_THRESHOLD, then the accelerated + algorithm is used, otherwise the binary algorithm is used. This may be + adjusted for different architectures. */ +#ifndef GCD_ACCEL_THRESHOLD +#define GCD_ACCEL_THRESHOLD 5 +#endif + +/* When U and V differ in size by more than BMOD_THRESHOLD, the accelerated + algorithm reduces using the bmod operation. Otherwise, the k-ary reduction + is used. 0 <= BMOD_THRESHOLD < BITS_PER_MP_LIMB. */ +enum + { + BMOD_THRESHOLD = BITS_PER_MP_LIMB/2 + }; + + +/* Use binary algorithm to compute V <-- GCD (V, U) for usize, vsize == 2. + Both U and V must be odd. */ +static __gmp_inline mp_size_t +#if __STDC__ +gcd_2 (mp_ptr vp, mp_srcptr up) +#else +gcd_2 (vp, up) + mp_ptr vp; + mp_srcptr up; +#endif +{ + mp_limb_t u0, u1, v0, v1; + mp_size_t vsize; + + u0 = up[0], u1 = up[1], v0 = vp[0], v1 = vp[1]; + + while (u1 != v1 && u0 != v0) + { + unsigned long int r; + if (u1 > v1) + { + u1 -= v1 + (u0 < v0), u0 -= v0; + count_trailing_zeros (r, u0); + u0 = u1 << (BITS_PER_MP_LIMB - r) | u0 >> r; + u1 >>= r; + } + else /* u1 < v1. */ + { + v1 -= u1 + (v0 < u0), v0 -= u0; + count_trailing_zeros (r, v0); + v0 = v1 << (BITS_PER_MP_LIMB - r) | v0 >> r; + v1 >>= r; + } + } + + vp[0] = v0, vp[1] = v1, vsize = 1 + (v1 != 0); + + /* If U == V == GCD, done. Otherwise, compute GCD (V, |U - V|). */ + if (u1 == v1 && u0 == v0) + return vsize; + + v0 = (u0 == v0) ? (u1 > v1) ? u1-v1 : v1-u1 : (u0 > v0) ? u0-v0 : v0-u0; + vp[0] = mpn_gcd_1 (vp, vsize, v0); + + return 1; +} + +/* The function find_a finds 0 < N < 2^BITS_PER_MP_LIMB such that there exists + 0 < |D| < 2^BITS_PER_MP_LIMB, and N == D * C mod 2^(2*BITS_PER_MP_LIMB). + In the reference article, D was computed along with N, but it is better to + compute D separately as D <-- N / C mod 2^(BITS_PER_MP_LIMB + 1), treating + the result as a twos' complement signed integer. + + Initialize N1 to C mod 2^(2*BITS_PER_MP_LIMB). According to the reference + article, N2 should be initialized to 2^(2*BITS_PER_MP_LIMB), but we use + 2^(2*BITS_PER_MP_LIMB) - N1 to start the calculations within double + precision. If N2 > N1 initially, the first iteration of the while loop + will swap them. In all other situations, N1 >= N2 is maintained. */ + +static +#if ! defined (__i386__) +__gmp_inline /* don't inline this for the x86 */ +#endif +mp_limb_t +#if __STDC__ +find_a (mp_srcptr cp) +#else +find_a (cp) + mp_srcptr cp; +#endif +{ + unsigned long int leading_zero_bits = 0; + + mp_limb_t n1_l = cp[0]; /* N1 == n1_h * 2^BITS_PER_MP_LIMB + n1_l. */ + mp_limb_t n1_h = cp[1]; + + mp_limb_t n2_l = -n1_l; /* N2 == n2_h * 2^BITS_PER_MP_LIMB + n2_l. */ + mp_limb_t n2_h = ~n1_h; + + /* Main loop. */ + while (n2_h) /* While N2 >= 2^BITS_PER_MP_LIMB. */ + { + /* N1 <-- N1 % N2. */ + if ((MP_LIMB_T_HIGHBIT >> leading_zero_bits & n2_h) == 0) + { + unsigned long int i; + count_leading_zeros (i, n2_h); + i -= leading_zero_bits, leading_zero_bits += i; + n2_h = n2_h<<i | n2_l>>(BITS_PER_MP_LIMB - i), n2_l <<= i; + do + { + if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l)) + n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l; + n2_l = n2_l>>1 | n2_h<<(BITS_PER_MP_LIMB - 1), n2_h >>= 1; + i -= 1; + } + while (i); + } + if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l)) + n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l; + + MP_LIMB_T_SWAP (n1_h, n2_h); + MP_LIMB_T_SWAP (n1_l, n2_l); + } + + return n2_l; +} + +mp_size_t +#if __STDC__ +mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t vsize) +#else +mpn_gcd (gp, up, usize, vp, vsize) + mp_ptr gp; + mp_ptr up; + mp_size_t usize; + mp_ptr vp; + mp_size_t vsize; +#endif +{ + mp_ptr orig_vp = vp; + mp_size_t orig_vsize = vsize; + int binary_gcd_ctr; /* Number of times binary gcd will execute. */ + TMP_DECL (marker); + + TMP_MARK (marker); + + /* Use accelerated algorithm if vsize is over GCD_ACCEL_THRESHOLD. + Two EXTRA limbs for U and V are required for kary reduction. */ + if (vsize >= GCD_ACCEL_THRESHOLD) + { + unsigned long int vbitsize, d; + mp_ptr orig_up = up; + mp_size_t orig_usize = usize; + mp_ptr anchor_up = (mp_ptr) TMP_ALLOC ((usize + 2) * BYTES_PER_MP_LIMB); + + MPN_COPY (anchor_up, orig_up, usize); + up = anchor_up; + + count_leading_zeros (d, up[usize-1]); + d = usize * BITS_PER_MP_LIMB - d; + count_leading_zeros (vbitsize, vp[vsize-1]); + vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize; + d = d - vbitsize + 1; + + /* Use bmod reduction to quickly discover whether V divides U. */ + up[usize++] = 0; /* Insert leading zero. */ + mpn_bdivmod (up, up, usize, vp, vsize, d); + + /* Now skip U/V mod 2^d and any low zero limbs. */ + d /= BITS_PER_MP_LIMB, up += d, usize -= d; + while (usize != 0 && up[0] == 0) + up++, usize--; + + if (usize == 0) /* GCD == ORIG_V. */ + goto done; + + vp = (mp_ptr) TMP_ALLOC ((vsize + 2) * BYTES_PER_MP_LIMB); + MPN_COPY (vp, orig_vp, vsize); + + do /* Main loop. */ + { + /* mpn_com_n can't be used here because anchor_up and up may + partially overlap */ + if (up[usize-1] & MP_LIMB_T_HIGHBIT) /* U < 0; take twos' compl. */ + { + mp_size_t i; + anchor_up[0] = -up[0]; + for (i = 1; i < usize; i++) + anchor_up[i] = ~up[i]; + up = anchor_up; + } + + MPN_NORMALIZE_NOT_ZERO (up, usize); + + if ((up[0] & 1) == 0) /* Result even; remove twos. */ + { + unsigned int r; + count_trailing_zeros (r, up[0]); + mpn_rshift (anchor_up, up, usize, r); + usize -= (anchor_up[usize-1] == 0); + } + else if (anchor_up != up) + MPN_COPY_INCR (anchor_up, up, usize); + + MPN_PTR_SWAP (anchor_up,usize, vp,vsize); + up = anchor_up; + + if (vsize <= 2) /* Kary can't handle < 2 limbs and */ + break; /* isn't efficient for == 2 limbs. */ + + d = vbitsize; + count_leading_zeros (vbitsize, vp[vsize-1]); + vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize; + d = d - vbitsize + 1; + + if (d > BMOD_THRESHOLD) /* Bmod reduction. */ + { + up[usize++] = 0; + mpn_bdivmod (up, up, usize, vp, vsize, d); + d /= BITS_PER_MP_LIMB, up += d, usize -= d; + } + else /* Kary reduction. */ + { + mp_limb_t bp[2], cp[2]; + + /* C <-- V/U mod 2^(2*BITS_PER_MP_LIMB). */ + { + mp_limb_t u_inv, hi, lo; + modlimb_invert (u_inv, up[0]); + cp[0] = vp[0] * u_inv; + umul_ppmm (hi, lo, cp[0], up[0]); + cp[1] = (vp[1] - hi - cp[0] * up[1]) * u_inv; + } + + /* U <-- find_a (C) * U. */ + up[usize] = mpn_mul_1 (up, up, usize, find_a (cp)); + usize++; + + /* B <-- A/C == U/V mod 2^(BITS_PER_MP_LIMB + 1). + bp[0] <-- U/V mod 2^BITS_PER_MP_LIMB and + bp[1] <-- ( (U - bp[0] * V)/2^BITS_PER_MP_LIMB ) / V mod 2 + + Like V/U above, but simplified because only the low bit of + bp[1] is wanted. */ + { + mp_limb_t v_inv, hi, lo; + modlimb_invert (v_inv, vp[0]); + bp[0] = up[0] * v_inv; + umul_ppmm (hi, lo, bp[0], vp[0]); + bp[1] = (up[1] + hi + (bp[0]&vp[1])) & 1; + } + + up[usize++] = 0; + if (bp[1]) /* B < 0: U <-- U + (-B) * V. */ + { + mp_limb_t c = mpn_addmul_1 (up, vp, vsize, -bp[0]); + mpn_add_1 (up + vsize, up + vsize, usize - vsize, c); + } + else /* B >= 0: U <-- U - B * V. */ + { + mp_limb_t b = mpn_submul_1 (up, vp, vsize, bp[0]); + mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b); + } + + up += 2, usize -= 2; /* At least two low limbs are zero. */ + } + + /* Must remove low zero limbs before complementing. */ + while (usize != 0 && up[0] == 0) + up++, usize--; + } + while (usize); + + /* Compute GCD (ORIG_V, GCD (ORIG_U, V)). Binary will execute twice. */ + up = orig_up, usize = orig_usize; + binary_gcd_ctr = 2; + } + else + binary_gcd_ctr = 1; + + /* Finish up with the binary algorithm. Executes once or twice. */ + for ( ; binary_gcd_ctr--; up = orig_vp, usize = orig_vsize) + { + if (usize > 2) /* First make U close to V in size. */ + { + unsigned long int vbitsize, d; + count_leading_zeros (d, up[usize-1]); + d = usize * BITS_PER_MP_LIMB - d; + count_leading_zeros (vbitsize, vp[vsize-1]); + vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize; + d = d - vbitsize - 1; + if (d != -(unsigned long int)1 && d > 2) + { + mpn_bdivmod (up, up, usize, vp, vsize, d); /* Result > 0. */ + d /= (unsigned long int)BITS_PER_MP_LIMB, up += d, usize -= d; + } + } + + /* Start binary GCD. */ + do + { + mp_size_t zeros; + + /* Make sure U is odd. */ + MPN_NORMALIZE (up, usize); + while (up[0] == 0) + up += 1, usize -= 1; + if ((up[0] & 1) == 0) + { + unsigned int r; + count_trailing_zeros (r, up[0]); + mpn_rshift (up, up, usize, r); + usize -= (up[usize-1] == 0); + } + + /* Keep usize >= vsize. */ + if (usize < vsize) + MPN_PTR_SWAP (up, usize, vp, vsize); + + if (usize <= 2) /* Double precision. */ + { + if (vsize == 1) + vp[0] = mpn_gcd_1 (up, usize, vp[0]); + else + vsize = gcd_2 (vp, up); + break; /* Binary GCD done. */ + } + + /* Count number of low zero limbs of U - V. */ + for (zeros = 0; up[zeros] == vp[zeros] && ++zeros != vsize; ) + continue; + + /* If U < V, swap U and V; in any case, subtract V from U. */ + if (zeros == vsize) /* Subtract done. */ + up += zeros, usize -= zeros; + else if (usize == vsize) + { + mp_size_t size = vsize; + do + size--; + while (up[size] == vp[size]); + if (up[size] < vp[size]) /* usize == vsize. */ + MP_PTR_SWAP (up, vp); + up += zeros, usize = size + 1 - zeros; + mpn_sub_n (up, up, vp + zeros, usize); + } + else + { + mp_size_t size = vsize - zeros; + up += zeros, usize -= zeros; + if (mpn_sub_n (up, up, vp + zeros, size)) + { + while (up[size] == 0) /* Propagate borrow. */ + up[size++] = -(mp_limb_t)1; + up[size] -= 1; + } + } + } + while (usize); /* End binary GCD. */ + } + +done: + if (vp != gp) + MPN_COPY (gp, vp, vsize); + TMP_FREE (marker); + return vsize; +} diff --git a/rts/gmp/mpn/generic/gcd_1.c b/rts/gmp/mpn/generic/gcd_1.c new file mode 100644 index 0000000000..1832636636 --- /dev/null +++ b/rts/gmp/mpn/generic/gcd_1.c @@ -0,0 +1,77 @@ +/* mpn_gcd_1 -- + +Copyright (C) 1994, 1996, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +/* Does not work for U == 0 or V == 0. It would be tough to make it work for + V == 0 since gcd(x,0) = x, and U does not generally fit in an mp_limb_t. */ + +mp_limb_t +#if __STDC__ +mpn_gcd_1 (mp_srcptr up, mp_size_t size, mp_limb_t vlimb) +#else +mpn_gcd_1 (up, size, vlimb) + mp_srcptr up; + mp_size_t size; + mp_limb_t vlimb; +#endif +{ + mp_limb_t ulimb; + unsigned long int u_low_zero_bits, v_low_zero_bits; + + if (size > 1) + { + ulimb = mpn_mod_1 (up, size, vlimb); + if (ulimb == 0) + return vlimb; + } + else + ulimb = up[0]; + + /* Need to eliminate low zero bits. */ + count_trailing_zeros (u_low_zero_bits, ulimb); + ulimb >>= u_low_zero_bits; + + count_trailing_zeros (v_low_zero_bits, vlimb); + vlimb >>= v_low_zero_bits; + + while (ulimb != vlimb) + { + if (ulimb > vlimb) + { + ulimb -= vlimb; + do + ulimb >>= 1; + while ((ulimb & 1) == 0); + } + else /* vlimb > ulimb. */ + { + vlimb -= ulimb; + do + vlimb >>= 1; + while ((vlimb & 1) == 0); + } + } + + return ulimb << MIN (u_low_zero_bits, v_low_zero_bits); +} diff --git a/rts/gmp/mpn/generic/gcdext.c b/rts/gmp/mpn/generic/gcdext.c new file mode 100644 index 0000000000..fe22d779a6 --- /dev/null +++ b/rts/gmp/mpn/generic/gcdext.c @@ -0,0 +1,700 @@ +/* mpn_gcdext -- Extended Greatest Common Divisor. + +Copyright (C) 1996, 1998, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#ifndef GCDEXT_THRESHOLD +#define GCDEXT_THRESHOLD 17 +#endif + +#ifndef EXTEND +#define EXTEND 1 +#endif + +#if STAT +int arr[BITS_PER_MP_LIMB]; +#endif + + +/* mpn_gcdext (GP, SP, SSIZE, UP, USIZE, VP, VSIZE) + + Compute the extended GCD of {UP,USIZE} and {VP,VSIZE} and store the + greatest common divisor at GP (unless it is 0), and the first cofactor at + SP. Write the size of the cofactor through the pointer SSIZE. Return the + size of the value at GP. Note that SP might be a negative number; this is + denoted by storing the negative of the size through SSIZE. + + {UP,USIZE} and {VP,VSIZE} are both clobbered. + + The space allocation for all four areas needs to be USIZE+1. + + Preconditions: 1) U >= V. + 2) V > 0. */ + +/* We use Lehmer's algorithm. The idea is to extract the most significant + bits of the operands, and compute the continued fraction for them. We then + apply the gathered cofactors to the full operands. + + Idea 1: After we have performed a full division, don't shift operands back, + but instead account for the extra factors-of-2 thus introduced. + Idea 2: Simple generalization to use divide-and-conquer would give us an + algorithm that runs faster than O(n^2). + Idea 3: The input numbers need less space as the computation progresses, + while the s0 and s1 variables need more space. To save memory, we + could make them share space, and have the latter variables grow + into the former. + Idea 4: We should not do double-limb arithmetic from the start. Instead, + do things in single-limb arithmetic until the quotients differ, + and then switch to double-limb arithmetic. */ + + +/* Division optimized for small quotients. If the quotient is more than one limb, + store 1 in *qh and return 0. */ +static mp_limb_t +#if __STDC__ +div2 (mp_limb_t *qh, mp_limb_t n1, mp_limb_t n0, mp_limb_t d1, mp_limb_t d0) +#else +div2 (qh, n1, n0, d1, d0) + mp_limb_t *qh; + mp_limb_t n1; + mp_limb_t n0; + mp_limb_t d1; + mp_limb_t d0; +#endif +{ + if (d1 == 0) + { + *qh = 1; + return 0; + } + + if ((mp_limb_signed_t) n1 < 0) + { + mp_limb_t q; + int cnt; + for (cnt = 1; (mp_limb_signed_t) d1 >= 0; cnt++) + { + d1 = (d1 << 1) | (d0 >> (BITS_PER_MP_LIMB - 1)); + d0 = d0 << 1; + } + + q = 0; + while (cnt) + { + q <<= 1; + if (n1 > d1 || (n1 == d1 && n0 >= d0)) + { + sub_ddmmss (n1, n0, n1, n0, d1, d0); + q |= 1; + } + d0 = (d1 << (BITS_PER_MP_LIMB - 1)) | (d0 >> 1); + d1 = d1 >> 1; + cnt--; + } + + *qh = 0; + return q; + } + else + { + mp_limb_t q; + int cnt; + for (cnt = 0; n1 > d1 || (n1 == d1 && n0 >= d0); cnt++) + { + d1 = (d1 << 1) | (d0 >> (BITS_PER_MP_LIMB - 1)); + d0 = d0 << 1; + } + + q = 0; + while (cnt) + { + d0 = (d1 << (BITS_PER_MP_LIMB - 1)) | (d0 >> 1); + d1 = d1 >> 1; + q <<= 1; + if (n1 > d1 || (n1 == d1 && n0 >= d0)) + { + sub_ddmmss (n1, n0, n1, n0, d1, d0); + q |= 1; + } + cnt--; + } + + *qh = 0; + return q; + } +} + +mp_size_t +#if EXTEND +#if __STDC__ +mpn_gcdext (mp_ptr gp, mp_ptr s0p, mp_size_t *s0size, + mp_ptr up, mp_size_t size, mp_ptr vp, mp_size_t vsize) +#else +mpn_gcdext (gp, s0p, s0size, up, size, vp, vsize) + mp_ptr gp; + mp_ptr s0p; + mp_size_t *s0size; + mp_ptr up; + mp_size_t size; + mp_ptr vp; + mp_size_t vsize; +#endif +#else +#if __STDC__ +mpn_gcd (mp_ptr gp, + mp_ptr up, mp_size_t size, mp_ptr vp, mp_size_t vsize) +#else +mpn_gcd (gp, up, size, vp, vsize) + mp_ptr gp; + mp_ptr up; + mp_size_t size; + mp_ptr vp; + mp_size_t vsize; +#endif +#endif +{ + mp_limb_t A, B, C, D; + int cnt; + mp_ptr tp, wp; +#if RECORD + mp_limb_t max = 0; +#endif +#if EXTEND + mp_ptr s1p; + mp_ptr orig_s0p = s0p; + mp_size_t ssize; + int sign = 1; +#endif + int use_double_flag; + TMP_DECL (mark); + + TMP_MARK (mark); + + use_double_flag = (size >= GCDEXT_THRESHOLD); + + tp = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB); + wp = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB); +#if EXTEND + s1p = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB); + + MPN_ZERO (s0p, size); + MPN_ZERO (s1p, size); + + s0p[0] = 1; + s1p[0] = 0; + ssize = 1; +#endif + + if (size > vsize) + { + /* Normalize V (and shift up U the same amount). */ + count_leading_zeros (cnt, vp[vsize - 1]); + if (cnt != 0) + { + mp_limb_t cy; + mpn_lshift (vp, vp, vsize, cnt); + cy = mpn_lshift (up, up, size, cnt); + up[size] = cy; + size += cy != 0; + } + + mpn_divmod (up + vsize, up, size, vp, vsize); +#if EXTEND + /* This is really what it boils down to in this case... */ + s0p[0] = 0; + s1p[0] = 1; + sign = -sign; +#endif + size = vsize; + if (cnt != 0) + { + mpn_rshift (up, up, size, cnt); + mpn_rshift (vp, vp, size, cnt); + } + MP_PTR_SWAP (up, vp); + } + + for (;;) + { + mp_limb_t asign; + /* Figure out exact size of V. */ + vsize = size; + MPN_NORMALIZE (vp, vsize); + if (vsize <= 1) + break; + + if (use_double_flag) + { + mp_limb_t uh, vh, ul, vl; + /* Let UH,UL be the most significant limbs of U, and let VH,VL be + the corresponding bits from V. */ + uh = up[size - 1]; + vh = vp[size - 1]; + ul = up[size - 2]; + vl = vp[size - 2]; + count_leading_zeros (cnt, uh); + if (cnt != 0) + { + uh = (uh << cnt) | (ul >> (BITS_PER_MP_LIMB - cnt)); + vh = (vh << cnt) | (vl >> (BITS_PER_MP_LIMB - cnt)); + vl <<= cnt; + ul <<= cnt; + if (size >= 3) + { + ul |= (up[size - 3] >> (BITS_PER_MP_LIMB - cnt)); + vl |= (vp[size - 3] >> (BITS_PER_MP_LIMB - cnt)); + } + } + + A = 1; + B = 0; + C = 0; + D = 1; + + asign = 0; + for (;;) + { + mp_limb_t T; + mp_limb_t qh, q1, q2; + mp_limb_t nh, nl, dh, dl; + mp_limb_t t1, t0; + mp_limb_t Th, Tl; + + sub_ddmmss (dh, dl, vh, vl, 0, C); + if ((dl | dh) == 0) + break; + add_ssaaaa (nh, nl, uh, ul, 0, A); + q1 = div2 (&qh, nh, nl, dh, dl); + if (qh != 0) + break; /* could handle this */ + + add_ssaaaa (dh, dl, vh, vl, 0, D); + if ((dl | dh) == 0) + break; + sub_ddmmss (nh, nl, uh, ul, 0, B); + q2 = div2 (&qh, nh, nl, dh, dl); + if (qh != 0) + break; /* could handle this */ + + if (q1 != q2) + break; + + asign = ~asign; + + T = A + q1 * C; + A = C; + C = T; + T = B + q1 * D; + B = D; + D = T; + umul_ppmm (t1, t0, q1, vl); + t1 += q1 * vh; + sub_ddmmss (Th, Tl, uh, ul, t1, t0); + uh = vh, ul = vl; + vh = Th, vl = Tl; + + add_ssaaaa (dh, dl, vh, vl, 0, C); + sub_ddmmss (nh, nl, uh, ul, 0, A); + q1 = div2 (&qh, nh, nl, dh, dl); + if (qh != 0) + break; /* could handle this */ + + sub_ddmmss (dh, dl, vh, vl, 0, D); + if ((dl | dh) == 0) + break; + add_ssaaaa (nh, nl, uh, ul, 0, B); + q2 = div2 (&qh, nh, nl, dh, dl); + if (qh != 0) + break; /* could handle this */ + + if (q1 != q2) + break; + + asign = ~asign; + + T = A + q1 * C; + A = C; + C = T; + T = B + q1 * D; + B = D; + D = T; + umul_ppmm (t1, t0, q1, vl); + t1 += q1 * vh; + sub_ddmmss (Th, Tl, uh, ul, t1, t0); + uh = vh, ul = vl; + vh = Th, vl = Tl; + } +#if EXTEND + if (asign) + sign = -sign; +#endif + } + else /* Same, but using single-limb calculations. */ + { + mp_limb_t uh, vh; + /* Make UH be the most significant limb of U, and make VH be + corresponding bits from V. */ + uh = up[size - 1]; + vh = vp[size - 1]; + count_leading_zeros (cnt, uh); + if (cnt != 0) + { + uh = (uh << cnt) | (up[size - 2] >> (BITS_PER_MP_LIMB - cnt)); + vh = (vh << cnt) | (vp[size - 2] >> (BITS_PER_MP_LIMB - cnt)); + } + + A = 1; + B = 0; + C = 0; + D = 1; + + asign = 0; + for (;;) + { + mp_limb_t q, T; + if (vh - C == 0 || vh + D == 0) + break; + + q = (uh + A) / (vh - C); + if (q != (uh - B) / (vh + D)) + break; + + asign = ~asign; + + T = A + q * C; + A = C; + C = T; + T = B + q * D; + B = D; + D = T; + T = uh - q * vh; + uh = vh; + vh = T; + + if (vh - D == 0) + break; + + q = (uh - A) / (vh + C); + if (q != (uh + B) / (vh - D)) + break; + + asign = ~asign; + + T = A + q * C; + A = C; + C = T; + T = B + q * D; + B = D; + D = T; + T = uh - q * vh; + uh = vh; + vh = T; + } +#if EXTEND + if (asign) + sign = -sign; +#endif + } + +#if RECORD + max = MAX (A, max); max = MAX (B, max); + max = MAX (C, max); max = MAX (D, max); +#endif + + if (B == 0) + { + mp_limb_t qh; + mp_size_t i; + /* This is quite rare. I.e., optimize something else! */ + + /* Normalize V (and shift up U the same amount). */ + count_leading_zeros (cnt, vp[vsize - 1]); + if (cnt != 0) + { + mp_limb_t cy; + mpn_lshift (vp, vp, vsize, cnt); + cy = mpn_lshift (up, up, size, cnt); + up[size] = cy; + size += cy != 0; + } + + qh = mpn_divmod (up + vsize, up, size, vp, vsize); +#if EXTEND + MPN_COPY (tp, s0p, ssize); + { + mp_size_t qsize; + + qsize = size - vsize; /* size of stored quotient from division */ + if (ssize < qsize) + { + MPN_ZERO (tp + ssize, qsize - ssize); + MPN_ZERO (s1p + ssize, qsize); /* zero s1 too */ + for (i = 0; i < ssize; i++) + { + mp_limb_t cy; + cy = mpn_addmul_1 (tp + i, up + vsize, qsize, s1p[i]); + tp[qsize + i] = cy; + } + if (qh != 0) + { + mp_limb_t cy; + cy = mpn_add_n (tp + qsize, tp + qsize, s1p, ssize); + if (cy != 0) + abort (); + } + } + else + { + MPN_ZERO (s1p + ssize, qsize); /* zero s1 too */ + for (i = 0; i < qsize; i++) + { + mp_limb_t cy; + cy = mpn_addmul_1 (tp + i, s1p, ssize, up[vsize + i]); + tp[ssize + i] = cy; + } + if (qh != 0) + { + mp_limb_t cy; + cy = mpn_add_n (tp + qsize, tp + qsize, s1p, ssize); + if (cy != 0) + { + tp[qsize + ssize] = cy; + s1p[qsize + ssize] = 0; + ssize++; + } + } + } + ssize += qsize; + ssize -= tp[ssize - 1] == 0; + } + + sign = -sign; + MP_PTR_SWAP (s0p, s1p); + MP_PTR_SWAP (s1p, tp); +#endif + size = vsize; + if (cnt != 0) + { + mpn_rshift (up, up, size, cnt); + mpn_rshift (vp, vp, size, cnt); + } + MP_PTR_SWAP (up, vp); + } + else + { +#if EXTEND + mp_size_t tsize, wsize; +#endif + /* T = U*A + V*B + W = U*C + V*D + U = T + V = W */ + +#if STAT + { mp_limb_t x; x = A | B | C | D; count_leading_zeros (cnt, x); + arr[BITS_PER_MP_LIMB - cnt]++; } +#endif + if (A == 0) + { + /* B == 1 and C == 1 (D is arbitrary) */ + mp_limb_t cy; + MPN_COPY (tp, vp, size); + MPN_COPY (wp, up, size); + mpn_submul_1 (wp, vp, size, D); + MP_PTR_SWAP (tp, up); + MP_PTR_SWAP (wp, vp); +#if EXTEND + MPN_COPY (tp, s1p, ssize); + tsize = ssize; + tp[ssize] = 0; /* must zero since wp might spill below */ + MPN_COPY (wp, s0p, ssize); + cy = mpn_addmul_1 (wp, s1p, ssize, D); + wp[ssize] = cy; + wsize = ssize + (cy != 0); + MP_PTR_SWAP (tp, s0p); + MP_PTR_SWAP (wp, s1p); + ssize = MAX (wsize, tsize); +#endif + } + else + { + if (asign) + { + mp_limb_t cy; + mpn_mul_1 (tp, vp, size, B); + mpn_submul_1 (tp, up, size, A); + mpn_mul_1 (wp, up, size, C); + mpn_submul_1 (wp, vp, size, D); + MP_PTR_SWAP (tp, up); + MP_PTR_SWAP (wp, vp); +#if EXTEND + cy = mpn_mul_1 (tp, s1p, ssize, B); + cy += mpn_addmul_1 (tp, s0p, ssize, A); + tp[ssize] = cy; + tsize = ssize + (cy != 0); + cy = mpn_mul_1 (wp, s0p, ssize, C); + cy += mpn_addmul_1 (wp, s1p, ssize, D); + wp[ssize] = cy; + wsize = ssize + (cy != 0); + MP_PTR_SWAP (tp, s0p); + MP_PTR_SWAP (wp, s1p); + ssize = MAX (wsize, tsize); +#endif + } + else + { + mp_limb_t cy; + mpn_mul_1 (tp, up, size, A); + mpn_submul_1 (tp, vp, size, B); + mpn_mul_1 (wp, vp, size, D); + mpn_submul_1 (wp, up, size, C); + MP_PTR_SWAP (tp, up); + MP_PTR_SWAP (wp, vp); +#if EXTEND + cy = mpn_mul_1 (tp, s0p, ssize, A); + cy += mpn_addmul_1 (tp, s1p, ssize, B); + tp[ssize] = cy; + tsize = ssize + (cy != 0); + cy = mpn_mul_1 (wp, s1p, ssize, D); + cy += mpn_addmul_1 (wp, s0p, ssize, C); + wp[ssize] = cy; + wsize = ssize + (cy != 0); + MP_PTR_SWAP (tp, s0p); + MP_PTR_SWAP (wp, s1p); + ssize = MAX (wsize, tsize); +#endif + } + } + + size -= up[size - 1] == 0; + } + } + +#if RECORD + printf ("max: %lx\n", max); +#endif + +#if STAT + {int i; for (i = 0; i < BITS_PER_MP_LIMB; i++) printf ("%d:%d\n", i, arr[i]);} +#endif + + if (vsize == 0) + { + if (gp != up && gp != 0) + MPN_COPY (gp, up, size); +#if EXTEND + MPN_NORMALIZE (s0p, ssize); + if (orig_s0p != s0p) + MPN_COPY (orig_s0p, s0p, ssize); + *s0size = sign >= 0 ? ssize : -ssize; +#endif + TMP_FREE (mark); + return size; + } + else + { + mp_limb_t vl, ul, t; +#if EXTEND + mp_size_t qsize, i; +#endif + vl = vp[0]; +#if EXTEND + t = mpn_divmod_1 (wp, up, size, vl); + + MPN_COPY (tp, s0p, ssize); + + qsize = size - (wp[size - 1] == 0); /* size of quotient from division */ + if (ssize < qsize) + { + MPN_ZERO (tp + ssize, qsize - ssize); + MPN_ZERO (s1p + ssize, qsize); /* zero s1 too */ + for (i = 0; i < ssize; i++) + { + mp_limb_t cy; + cy = mpn_addmul_1 (tp + i, wp, qsize, s1p[i]); + tp[qsize + i] = cy; + } + } + else + { + MPN_ZERO (s1p + ssize, qsize); /* zero s1 too */ + for (i = 0; i < qsize; i++) + { + mp_limb_t cy; + cy = mpn_addmul_1 (tp + i, s1p, ssize, wp[i]); + tp[ssize + i] = cy; + } + } + ssize += qsize; + ssize -= tp[ssize - 1] == 0; + + sign = -sign; + MP_PTR_SWAP (s0p, s1p); + MP_PTR_SWAP (s1p, tp); +#else + t = mpn_mod_1 (up, size, vl); +#endif + ul = vl; + vl = t; + while (vl != 0) + { + mp_limb_t t; +#if EXTEND + mp_limb_t q; + q = ul / vl; + t = ul - q * vl; + + MPN_COPY (tp, s0p, ssize); + + MPN_ZERO (s1p + ssize, 1); /* zero s1 too */ + + { + mp_limb_t cy; + cy = mpn_addmul_1 (tp, s1p, ssize, q); + tp[ssize] = cy; + } + + ssize += 1; + ssize -= tp[ssize - 1] == 0; + + sign = -sign; + MP_PTR_SWAP (s0p, s1p); + MP_PTR_SWAP (s1p, tp); +#else + t = ul % vl; +#endif + ul = vl; + vl = t; + } + if (gp != 0) + gp[0] = ul; +#if EXTEND + MPN_NORMALIZE (s0p, ssize); + if (orig_s0p != s0p) + MPN_COPY (orig_s0p, s0p, ssize); + *s0size = sign >= 0 ? ssize : -ssize; +#endif + TMP_FREE (mark); + return 1; + } +} diff --git a/rts/gmp/mpn/generic/get_str.c b/rts/gmp/mpn/generic/get_str.c new file mode 100644 index 0000000000..a713b61825 --- /dev/null +++ b/rts/gmp/mpn/generic/get_str.c @@ -0,0 +1,216 @@ +/* mpn_get_str -- Convert a MSIZE long limb vector pointed to by MPTR + to a printable string in STR in base BASE. + +Copyright (C) 1991, 1992, 1993, 1994, 1996, 2000 Free Software Foundation, +Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +/* Convert the limb vector pointed to by MPTR and MSIZE long to a + char array, using base BASE for the result array. Store the + result in the character array STR. STR must point to an array with + space for the largest possible number represented by a MSIZE long + limb vector + 1 extra character. + + The result is NOT in Ascii, to convert it to printable format, add + '0' or 'A' depending on the base and range. + + Return the number of digits in the result string. + This may include some leading zeros. + + The limb vector pointed to by MPTR is clobbered. */ + +size_t +#if __STDC__ +mpn_get_str (unsigned char *str, int base, mp_ptr mptr, mp_size_t msize) +#else +mpn_get_str (str, base, mptr, msize) + unsigned char *str; + int base; + mp_ptr mptr; + mp_size_t msize; +#endif +{ + mp_limb_t big_base; +#if UDIV_NEEDS_NORMALIZATION || UDIV_TIME > 2 * UMUL_TIME + int normalization_steps; +#endif +#if UDIV_TIME > 2 * UMUL_TIME + mp_limb_t big_base_inverted; +#endif + unsigned int dig_per_u; + mp_size_t out_len; + register unsigned char *s; + + big_base = __mp_bases[base].big_base; + + s = str; + + /* Special case zero, as the code below doesn't handle it. */ + if (msize == 0) + { + s[0] = 0; + return 1; + } + + if ((base & (base - 1)) == 0) + { + /* The base is a power of 2. Make conversion from most + significant side. */ + mp_limb_t n1, n0; + register int bits_per_digit = big_base; + register int x; + register int bit_pos; + register int i; + + n1 = mptr[msize - 1]; + count_leading_zeros (x, n1); + + /* BIT_POS should be R when input ends in least sign. nibble, + R + bits_per_digit * n when input ends in n:th least significant + nibble. */ + + { + int bits; + + bits = BITS_PER_MP_LIMB * msize - x; + x = bits % bits_per_digit; + if (x != 0) + bits += bits_per_digit - x; + bit_pos = bits - (msize - 1) * BITS_PER_MP_LIMB; + } + + /* Fast loop for bit output. */ + i = msize - 1; + for (;;) + { + bit_pos -= bits_per_digit; + while (bit_pos >= 0) + { + *s++ = (n1 >> bit_pos) & ((1 << bits_per_digit) - 1); + bit_pos -= bits_per_digit; + } + i--; + if (i < 0) + break; + n0 = (n1 << -bit_pos) & ((1 << bits_per_digit) - 1); + n1 = mptr[i]; + bit_pos += BITS_PER_MP_LIMB; + *s++ = n0 | (n1 >> bit_pos); + } + + *s = 0; + + return s - str; + } + else + { + /* General case. The base is not a power of 2. Make conversion + from least significant end. */ + + /* If udiv_qrnnd only handles divisors with the most significant bit + set, prepare BIG_BASE for being a divisor by shifting it to the + left exactly enough to set the most significant bit. */ +#if UDIV_NEEDS_NORMALIZATION || UDIV_TIME > 2 * UMUL_TIME + count_leading_zeros (normalization_steps, big_base); + big_base <<= normalization_steps; +#if UDIV_TIME > 2 * UMUL_TIME + /* Get the fixed-point approximation to 1/(BIG_BASE << NORMALIZATION_STEPS). */ + big_base_inverted = __mp_bases[base].big_base_inverted; +#endif +#endif + + dig_per_u = __mp_bases[base].chars_per_limb; + out_len = ((size_t) msize * BITS_PER_MP_LIMB + * __mp_bases[base].chars_per_bit_exactly) + 1; + s += out_len; + + while (msize != 0) + { + int i; + mp_limb_t n0, n1; + +#if UDIV_NEEDS_NORMALIZATION || UDIV_TIME > 2 * UMUL_TIME + /* If we shifted BIG_BASE above, shift the dividend too, to get + the right quotient. We need to do this every loop, + since the intermediate quotients are OK, but the quotient from + one turn in the loop is going to be the dividend in the + next turn, and the dividend needs to be up-shifted. */ + if (normalization_steps != 0) + { + n0 = mpn_lshift (mptr, mptr, msize, normalization_steps); + + /* If the shifting gave a carry out limb, store it and + increase the length. */ + if (n0 != 0) + { + mptr[msize] = n0; + msize++; + } + } +#endif + + /* Divide the number at TP with BIG_BASE to get a quotient and a + remainder. The remainder is our new digit in base BIG_BASE. */ + i = msize - 1; + n1 = mptr[i]; + + if (n1 >= big_base) + n1 = 0; + else + { + msize--; + i--; + } + + for (; i >= 0; i--) + { + n0 = mptr[i]; +#if UDIV_TIME > 2 * UMUL_TIME + udiv_qrnnd_preinv (mptr[i], n1, n1, n0, big_base, big_base_inverted); +#else + udiv_qrnnd (mptr[i], n1, n1, n0, big_base); +#endif + } + +#if UDIV_NEEDS_NORMALIZATION || UDIV_TIME > 2 * UMUL_TIME + /* If we shifted above (at previous UDIV_NEEDS_NORMALIZATION tests) + the remainder will be up-shifted here. Compensate. */ + n1 >>= normalization_steps; +#endif + + /* Convert N1 from BIG_BASE to a string of digits in BASE + using single precision operations. */ + for (i = dig_per_u - 1; i >= 0; i--) + { + *--s = n1 % base; + n1 /= base; + if (n1 == 0 && msize == 0) + break; + } + } + + while (s != str) + *--s = 0; + return out_len; + } +} diff --git a/rts/gmp/mpn/generic/gmp-mparam.h b/rts/gmp/mpn/generic/gmp-mparam.h new file mode 100644 index 0000000000..14bcaece83 --- /dev/null +++ b/rts/gmp/mpn/generic/gmp-mparam.h @@ -0,0 +1,27 @@ +/* gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#define BITS_PER_MP_LIMB 32 +#define BYTES_PER_MP_LIMB 4 +#define BITS_PER_LONGINT 32 +#define BITS_PER_INT 32 +#define BITS_PER_SHORTINT 16 +#define BITS_PER_CHAR 8 diff --git a/rts/gmp/mpn/generic/hamdist.c b/rts/gmp/mpn/generic/hamdist.c new file mode 100644 index 0000000000..35c10e8450 --- /dev/null +++ b/rts/gmp/mpn/generic/hamdist.c @@ -0,0 +1,94 @@ +/* mpn_hamdist -- + +Copyright (C) 1994, 1996, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +#if defined __GNUC__ +/* No processor claiming to be SPARC v9 compliant seem to + implement the POPC instruction. Disable pattern for now. */ +#if 0 && defined __sparc_v9__ && BITS_PER_MP_LIMB == 64 +#define popc_limb(a) \ + ({ \ + DItype __res; \ + asm ("popc %1,%0" : "=r" (__res) : "rI" (a)); \ + __res; \ + }) +#endif +#endif + +#ifndef popc_limb + +/* Cool population count of a mp_limb_t. + You have to figure out how this works, I won't tell you! */ + +static inline unsigned int +#if __STDC__ +popc_limb (mp_limb_t x) +#else +popc_limb (x) + mp_limb_t x; +#endif +{ +#if BITS_PER_MP_LIMB == 64 + /* We have to go into some trouble to define these constants. + (For mp_limb_t being `long long'.) */ + mp_limb_t cnst; + cnst = 0xaaaaaaaaL | ((mp_limb_t) 0xaaaaaaaaL << BITS_PER_MP_LIMB/2); + x -= (x & cnst) >> 1; + cnst = 0x33333333L | ((mp_limb_t) 0x33333333L << BITS_PER_MP_LIMB/2); + x = ((x & ~cnst) >> 2) + (x & cnst); + cnst = 0x0f0f0f0fL | ((mp_limb_t) 0x0f0f0f0fL << BITS_PER_MP_LIMB/2); + x = ((x >> 4) + x) & cnst; + x = ((x >> 8) + x); + x = ((x >> 16) + x); + x = ((x >> 32) + x) & 0xff; +#endif +#if BITS_PER_MP_LIMB == 32 + x -= (x & 0xaaaaaaaa) >> 1; + x = ((x >> 2) & 0x33333333L) + (x & 0x33333333L); + x = ((x >> 4) + x) & 0x0f0f0f0fL; + x = ((x >> 8) + x); + x = ((x >> 16) + x) & 0xff; +#endif + return x; +} +#endif + +unsigned long int +#if __STDC__ +mpn_hamdist (mp_srcptr up, mp_srcptr vp, mp_size_t size) +#else +mpn_hamdist (up, vp, size) + register mp_srcptr up; + register mp_srcptr vp; + register mp_size_t size; +#endif +{ + unsigned long int hamdist; + mp_size_t i; + + hamdist = 0; + for (i = 0; i < size; i++) + hamdist += popc_limb (up[i] ^ vp[i]); + + return hamdist; +} diff --git a/rts/gmp/mpn/generic/inlines.c b/rts/gmp/mpn/generic/inlines.c new file mode 100644 index 0000000000..9487e58cf2 --- /dev/null +++ b/rts/gmp/mpn/generic/inlines.c @@ -0,0 +1,24 @@ +/* +Copyright (C) 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. +*/ + +#define _FORCE_INLINES +#define _EXTERN_INLINE /* empty */ +#include "gmp.h" diff --git a/rts/gmp/mpn/generic/jacbase.c b/rts/gmp/mpn/generic/jacbase.c new file mode 100644 index 0000000000..dd437f1ac1 --- /dev/null +++ b/rts/gmp/mpn/generic/jacbase.c @@ -0,0 +1,136 @@ +/* mpn_jacobi_base -- limb/limb Jacobi symbol with restricted arguments. + + THIS INTERFACE IS PRELIMINARY AND MIGHT DISAPPEAR OR BE SUBJECT TO + INCOMPATIBLE CHANGES IN A FUTURE RELEASE OF GMP. */ + +/* +Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + + +#if COUNT_TRAILING_ZEROS_TIME <= 7 +/* If count_trailing_zeros is fast, use it. + K7 at 7 cycles and P6 at 2 are good here. K6 at 12-27 and P5 at 18-42 + are not. The default 15 in longlong.h is meant to mean not good here. */ + +#define PROCESS_TWOS_ANY \ + { \ + mp_limb_t twos; \ + count_trailing_zeros (twos, a); \ + result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, b); \ + a >>= twos; \ + } + +#define PROCESS_TWOS_EVEN PROCESS_TWOS_ANY + +#else +/* Use a loop instead. With "a" uniformly distributed there will usually be + only a few trailing zeros. + + Unfortunately the branch for the while loop here will be on a 50/50 + chance of a 1 or 0, which is bad for branch prediction. */ + +#define PROCESS_TWOS_EVEN \ + { \ + int two; \ + two = JACOBI_TWO_U_BIT1 (b); \ + do \ + { \ + a >>= 1; \ + result_bit1 ^= two; \ + ASSERT (a != 0); \ + } \ + while ((a & 1) == 0); \ + } + +#define PROCESS_TWOS_ANY \ + if ((a & 1) == 0) \ + PROCESS_TWOS_EVEN; + +#endif + + +/* Calculate the value of the Jacobi symbol (a/b) of two mp_limb_t's, but + with a restricted range of inputs accepted, namely b>1, b odd, and a<=b. + + The initial result_bit1 is taken as a parameter for the convenience of + mpz_kronecker_zi_ui() et al. The sign changes both here and in those + routines accumulate nicely in bit 1, see the JACOBI macros. + + The return value here is the normal +1, 0, or -1. Note that +1 and -1 + have bit 1 in the "BIT1" sense, which could be useful if the caller is + accumulating it into some extended calculation. + + Duplicating the loop body to avoid the MP_LIMB_T_SWAP(a,b) would be + possible, but a couple of tests suggest it's not a significant speedup, + and may even be a slowdown, so what's here is good enough for now. + + Future: The code doesn't demand a<=b actually, so maybe this could be + relaxed. All the places this is used currently call with a<=b though. */ + +int +#if __STDC__ +mpn_jacobi_base (mp_limb_t a, mp_limb_t b, int result_bit1) +#else +mpn_jacobi_base (a, b, result_bit1) + mp_limb_t a; + mp_limb_t b; + int result_bit1; +#endif +{ + ASSERT (b & 1); /* b odd */ + ASSERT (b != 1); + ASSERT (a <= b); + + if (a == 0) + return 0; + + PROCESS_TWOS_ANY; + if (a == 1) + goto done; + + for (;;) + { + result_bit1 ^= JACOBI_RECIP_UU_BIT1 (a, b); + MP_LIMB_T_SWAP (a, b); + + do + { + /* working on (a/b), a,b odd, a>=b */ + ASSERT (a & 1); + ASSERT (b & 1); + ASSERT (a >= b); + + if ((a -= b) == 0) + return 0; + + PROCESS_TWOS_EVEN; + if (a == 1) + goto done; + } + while (a >= b); + } + + done: + return JACOBI_BIT1_TO_PN (result_bit1); +} diff --git a/rts/gmp/mpn/generic/lshift.c b/rts/gmp/mpn/generic/lshift.c new file mode 100644 index 0000000000..0b58389658 --- /dev/null +++ b/rts/gmp/mpn/generic/lshift.c @@ -0,0 +1,87 @@ +/* mpn_lshift -- Shift left low level. + +Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +/* Shift U (pointed to by UP and USIZE digits long) CNT bits to the left + and store the USIZE least significant digits of the result at WP. + Return the bits shifted out from the most significant digit. + + Argument constraints: + 1. 0 < CNT < BITS_PER_MP_LIMB + 2. If the result is to be written over the input, WP must be >= UP. +*/ + +mp_limb_t +#if __STDC__ +mpn_lshift (register mp_ptr wp, + register mp_srcptr up, mp_size_t usize, + register unsigned int cnt) +#else +mpn_lshift (wp, up, usize, cnt) + register mp_ptr wp; + register mp_srcptr up; + mp_size_t usize; + register unsigned int cnt; +#endif +{ + register mp_limb_t high_limb, low_limb; + register unsigned sh_1, sh_2; + register mp_size_t i; + mp_limb_t retval; + +#ifdef DEBUG + if (usize == 0 || cnt == 0) + abort (); +#endif + + sh_1 = cnt; +#if 0 + if (sh_1 == 0) + { + if (wp != up) + { + /* Copy from high end to low end, to allow specified input/output + overlapping. */ + for (i = usize - 1; i >= 0; i--) + wp[i] = up[i]; + } + return 0; + } +#endif + + wp += 1; + sh_2 = BITS_PER_MP_LIMB - sh_1; + i = usize - 1; + low_limb = up[i]; + retval = low_limb >> sh_2; + high_limb = low_limb; + while (--i >= 0) + { + low_limb = up[i]; + wp[i] = (high_limb << sh_1) | (low_limb >> sh_2); + high_limb = low_limb; + } + wp[i] = high_limb << sh_1; + + return retval; +} diff --git a/rts/gmp/mpn/generic/mod_1.c b/rts/gmp/mpn/generic/mod_1.c new file mode 100644 index 0000000000..168ec9df49 --- /dev/null +++ b/rts/gmp/mpn/generic/mod_1.c @@ -0,0 +1,175 @@ +/* mpn_mod_1(dividend_ptr, dividend_size, divisor_limb) -- + Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB. + Return the single-limb remainder. + There are no constraints on the value of the divisor. + +Copyright (C) 1991, 1993, 1994, 1999 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#ifndef UMUL_TIME +#define UMUL_TIME 1 +#endif + +#ifndef UDIV_TIME +#define UDIV_TIME UMUL_TIME +#endif + +mp_limb_t +#if __STDC__ +mpn_mod_1 (mp_srcptr dividend_ptr, mp_size_t dividend_size, + mp_limb_t divisor_limb) +#else +mpn_mod_1 (dividend_ptr, dividend_size, divisor_limb) + mp_srcptr dividend_ptr; + mp_size_t dividend_size; + mp_limb_t divisor_limb; +#endif +{ + mp_size_t i; + mp_limb_t n1, n0, r; + int dummy; + + /* Botch: Should this be handled at all? Rely on callers? */ + if (dividend_size == 0) + return 0; + + /* If multiplication is much faster than division, and the + dividend is large, pre-invert the divisor, and use + only multiplications in the inner loop. */ + + /* This test should be read: + Does it ever help to use udiv_qrnnd_preinv? + && Does what we save compensate for the inversion overhead? */ + if (UDIV_TIME > (2 * UMUL_TIME + 6) + && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) + { + int normalization_steps; + + count_leading_zeros (normalization_steps, divisor_limb); + if (normalization_steps != 0) + { + mp_limb_t divisor_limb_inverted; + + divisor_limb <<= normalization_steps; + invert_limb (divisor_limb_inverted, divisor_limb); + + n1 = dividend_ptr[dividend_size - 1]; + r = n1 >> (BITS_PER_MP_LIMB - normalization_steps); + + /* Possible optimization: + if (r == 0 + && divisor_limb > ((n1 << normalization_steps) + | (dividend_ptr[dividend_size - 2] >> ...))) + ...one division less... */ + + for (i = dividend_size - 2; i >= 0; i--) + { + n0 = dividend_ptr[i]; + udiv_qrnnd_preinv (dummy, r, r, + ((n1 << normalization_steps) + | (n0 >> (BITS_PER_MP_LIMB - normalization_steps))), + divisor_limb, divisor_limb_inverted); + n1 = n0; + } + udiv_qrnnd_preinv (dummy, r, r, + n1 << normalization_steps, + divisor_limb, divisor_limb_inverted); + return r >> normalization_steps; + } + else + { + mp_limb_t divisor_limb_inverted; + + invert_limb (divisor_limb_inverted, divisor_limb); + + i = dividend_size - 1; + r = dividend_ptr[i]; + + if (r >= divisor_limb) + r = 0; + else + i--; + + for (; i >= 0; i--) + { + n0 = dividend_ptr[i]; + udiv_qrnnd_preinv (dummy, r, r, + n0, divisor_limb, divisor_limb_inverted); + } + return r; + } + } + else + { + if (UDIV_NEEDS_NORMALIZATION) + { + int normalization_steps; + + count_leading_zeros (normalization_steps, divisor_limb); + if (normalization_steps != 0) + { + divisor_limb <<= normalization_steps; + + n1 = dividend_ptr[dividend_size - 1]; + r = n1 >> (BITS_PER_MP_LIMB - normalization_steps); + + /* Possible optimization: + if (r == 0 + && divisor_limb > ((n1 << normalization_steps) + | (dividend_ptr[dividend_size - 2] >> ...))) + ...one division less... */ + + for (i = dividend_size - 2; i >= 0; i--) + { + n0 = dividend_ptr[i]; + udiv_qrnnd (dummy, r, r, + ((n1 << normalization_steps) + | (n0 >> (BITS_PER_MP_LIMB - normalization_steps))), + divisor_limb); + n1 = n0; + } + udiv_qrnnd (dummy, r, r, + n1 << normalization_steps, + divisor_limb); + return r >> normalization_steps; + } + } + /* No normalization needed, either because udiv_qrnnd doesn't require + it, or because DIVISOR_LIMB is already normalized. */ + + i = dividend_size - 1; + r = dividend_ptr[i]; + + if (r >= divisor_limb) + r = 0; + else + i--; + + for (; i >= 0; i--) + { + n0 = dividend_ptr[i]; + udiv_qrnnd (dummy, r, r, n0, divisor_limb); + } + return r; + } +} diff --git a/rts/gmp/mpn/generic/mod_1_rs.c b/rts/gmp/mpn/generic/mod_1_rs.c new file mode 100644 index 0000000000..62aaa94b92 --- /dev/null +++ b/rts/gmp/mpn/generic/mod_1_rs.c @@ -0,0 +1,111 @@ +/* mpn_mod_1_rshift -- mpn remainder under hypothetical right shift. + + THE FUNCTION IN THIS FILE IS FOR INTERNAL USE AND HAS A MUTABLE + INTERFACE. IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. + IT'S ALMOST GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP + RELEASE. */ + +/* +Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + + +/* When testing on a CPU with UDIV_NEEDS_NORMALIZATION equal to 0, it can be + changed to 1 temporarily to test the code under that case too. */ +#if 0 +#undef UDIV_NEEDS_NORMALIZATION +#define UDIV_NEEDS_NORMALIZATION 1 +#endif + + +/* Calculate the remainder "(ptr,size >> shift) % divisor". Note ptr,size + is unchanged, the shift is only for its effect on the remainder. + The shift doesn't even need to be considered until the last limb. + + This function has the normal size!=0 restriction, unlike the basic + mpn_mod_1. */ + +mp_limb_t +#if __STDC__ +mpn_mod_1_rshift (mp_srcptr ptr, mp_size_t size, unsigned shift, + mp_limb_t divisor) +#else +mpn_mod_1_rshift (ptr, size, shift, divisor) + mp_srcptr ptr; + mp_size_t size; + unsigned shift; + mp_limb_t divisor; +#endif +{ + mp_limb_t quot, rem; + + ASSERT (shift >= 1); + ASSERT (shift < BITS_PER_MP_LIMB); + ASSERT (size >= 1); + + if (size == 1) + return (ptr[0] >> shift) % divisor; + +#if UDIV_NEEDS_NORMALIZATION + { + int norm; + int delta; + + count_leading_zeros (norm, divisor); + divisor <<= norm; + + delta = shift - norm; + if (delta == 0) + return mpn_mod_1 (ptr, size, divisor) >> norm; + + if (delta > 0) + { + rem = mpn_mod_1 (ptr+1, size-1, divisor); + udiv_qrnnd (quot, rem, + rem >> delta, + (rem << (BITS_PER_MP_LIMB-delta)) | (ptr[0] >> delta), + divisor); + return rem >> norm; + } + else + { + rem = mpn_mod_1 (ptr, size, divisor); + udiv_qrnnd (quot, rem, + rem >> (BITS_PER_MP_LIMB+delta), + rem << -delta, + divisor); + return rem >> norm; + } + } + +#else /* !UDIV_NEEDS_NORMALIZATION */ + + rem = mpn_mod_1 (ptr+1, size-1, divisor); + udiv_qrnnd (quot, rem, + rem >> shift, + (rem << (BITS_PER_MP_LIMB-shift)) | (ptr[0] >> shift), + divisor); + return rem; + +#endif +} diff --git a/rts/gmp/mpn/generic/mul.c b/rts/gmp/mpn/generic/mul.c new file mode 100644 index 0000000000..cecfa19ca1 --- /dev/null +++ b/rts/gmp/mpn/generic/mul.c @@ -0,0 +1,190 @@ +/* mpn_mul -- Multiply two natural numbers. + + THE HELPER FUNCTIONS IN THIS FILE (meaning everything except mpn_mul) + ARE INTERNAL FUNCTIONS WITH MUTABLE INTERFACES. IT IS ONLY SAFE TO REACH + THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST GUARANTEED + THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. + + +Copyright (C) 1991, 1993, 1994, 1996, 1997, 1999, 2000 Free Software +Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +/* Multiply the natural numbers u (pointed to by UP, with UN limbs) and v + (pointed to by VP, with VN limbs), and store the result at PRODP. The + result is UN + VN limbs. Return the most significant limb of the result. + + NOTE: The space pointed to by PRODP is overwritten before finished with U + and V, so overlap is an error. + + Argument constraints: + 1. UN >= VN. + 2. PRODP != UP and PRODP != VP, i.e. the destination must be distinct from + the multiplier and the multiplicand. */ + +void +#if __STDC__ +mpn_sqr_n (mp_ptr prodp, + mp_srcptr up, mp_size_t un) +#else +mpn_sqr_n (prodp, up, un) + mp_ptr prodp; + mp_srcptr up; + mp_size_t un; +#endif +{ + if (un < KARATSUBA_SQR_THRESHOLD) + { /* plain schoolbook multiplication */ + if (un == 0) + return; + mpn_sqr_basecase (prodp, up, un); + } + else if (un < TOOM3_SQR_THRESHOLD) + { /* karatsuba multiplication */ + mp_ptr tspace; + TMP_DECL (marker); + TMP_MARK (marker); + tspace = (mp_ptr) TMP_ALLOC (2 * (un + BITS_PER_MP_LIMB) * BYTES_PER_MP_LIMB); + mpn_kara_sqr_n (prodp, up, un, tspace); + TMP_FREE (marker); + } +#if WANT_FFT || TUNE_PROGRAM_BUILD + else if (un < FFT_SQR_THRESHOLD) +#else + else +#endif + { /* toom3 multiplication */ + mp_ptr tspace; + TMP_DECL (marker); + TMP_MARK (marker); + tspace = (mp_ptr) TMP_ALLOC (2 * (un + BITS_PER_MP_LIMB) * BYTES_PER_MP_LIMB); + mpn_toom3_sqr_n (prodp, up, un, tspace); + TMP_FREE (marker); + } +#if WANT_FFT || TUNE_PROGRAM_BUILD + else + { + /* schoenhage multiplication */ + mpn_mul_fft_full (prodp, up, un, up, un); + } +#endif +} + +mp_limb_t +#if __STDC__ +mpn_mul (mp_ptr prodp, + mp_srcptr up, mp_size_t un, + mp_srcptr vp, mp_size_t vn) +#else +mpn_mul (prodp, up, un, vp, vn) + mp_ptr prodp; + mp_srcptr up; + mp_size_t un; + mp_srcptr vp; + mp_size_t vn; +#endif +{ + mp_size_t l; + mp_limb_t c; + + if (up == vp && un == vn) + { + mpn_sqr_n (prodp, up, un); + return prodp[2 * un - 1]; + } + + if (vn < KARATSUBA_MUL_THRESHOLD) + { /* long multiplication */ + mpn_mul_basecase (prodp, up, un, vp, vn); + return prodp[un + vn - 1]; + } + + mpn_mul_n (prodp, up, vp, vn); + if (un != vn) + { mp_limb_t t; + mp_ptr ws; + TMP_DECL (marker); + TMP_MARK (marker); + + prodp += vn; + l = vn; + up += vn; + un -= vn; + + if (un < vn) + { + /* Swap u's and v's. */ + MPN_SRCPTR_SWAP (up,un, vp,vn); + } + + ws = (mp_ptr) TMP_ALLOC (((vn >= KARATSUBA_MUL_THRESHOLD ? vn : un) + vn) + * BYTES_PER_MP_LIMB); + + t = 0; + while (vn >= KARATSUBA_MUL_THRESHOLD) + { + mpn_mul_n (ws, up, vp, vn); + if (l <= 2*vn) + { + t += mpn_add_n (prodp, prodp, ws, l); + if (l != 2*vn) + { + t = mpn_add_1 (prodp + l, ws + l, 2*vn - l, t); + l = 2*vn; + } + } + else + { + c = mpn_add_n (prodp, prodp, ws, 2*vn); + t += mpn_add_1 (prodp + 2*vn, prodp + 2*vn, l - 2*vn, c); + } + prodp += vn; + l -= vn; + up += vn; + un -= vn; + if (un < vn) + { + /* Swap u's and v's. */ + MPN_SRCPTR_SWAP (up,un, vp,vn); + } + } + + if (vn) + { + mpn_mul_basecase (ws, up, un, vp, vn); + if (l <= un + vn) + { + t += mpn_add_n (prodp, prodp, ws, l); + if (l != un + vn) + t = mpn_add_1 (prodp + l, ws + l, un + vn - l, t); + } + else + { + c = mpn_add_n (prodp, prodp, ws, un + vn); + t += mpn_add_1 (prodp + un + vn, prodp + un + vn, l - un - vn, c); + } + } + + TMP_FREE (marker); + } + return prodp[un + vn - 1]; +} diff --git a/rts/gmp/mpn/generic/mul_1.c b/rts/gmp/mpn/generic/mul_1.c new file mode 100644 index 0000000000..1c36b5fb1f --- /dev/null +++ b/rts/gmp/mpn/generic/mul_1.c @@ -0,0 +1,59 @@ +/* mpn_mul_1 -- Multiply a limb vector with a single limb and + store the product in a second limb vector. + +Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +mp_limb_t +mpn_mul_1 (res_ptr, s1_ptr, s1_size, s2_limb) + register mp_ptr res_ptr; + register mp_srcptr s1_ptr; + mp_size_t s1_size; + register mp_limb_t s2_limb; +{ + register mp_limb_t cy_limb; + register mp_size_t j; + register mp_limb_t prod_high, prod_low; + + /* The loop counter and index J goes from -S1_SIZE to -1. This way + the loop becomes faster. */ + j = -s1_size; + + /* Offset the base pointers to compensate for the negative indices. */ + s1_ptr -= j; + res_ptr -= j; + + cy_limb = 0; + do + { + umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb); + + prod_low += cy_limb; + cy_limb = (prod_low < cy_limb) + prod_high; + + res_ptr[j] = prod_low; + } + while (++j != 0); + + return cy_limb; +} diff --git a/rts/gmp/mpn/generic/mul_basecase.c b/rts/gmp/mpn/generic/mul_basecase.c new file mode 100644 index 0000000000..00c06aa5c4 --- /dev/null +++ b/rts/gmp/mpn/generic/mul_basecase.c @@ -0,0 +1,87 @@ +/* mpn_mul_basecase -- Internal routine to multiply two natural numbers + of length m and n. + + THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY + SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES. + + +Copyright (C) 1991, 1992, 1993, 1994, 1996, 1997, 2000 Free Software +Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +/* Handle simple cases with traditional multiplication. + + This is the most critical code of multiplication. All multiplies rely on + this, both small and huge. Small ones arrive here immediately, huge ones + arrive here as this is the base case for Karatsuba's recursive algorithm. */ + +void +#if __STDC__ +mpn_mul_basecase (mp_ptr prodp, + mp_srcptr up, mp_size_t usize, + mp_srcptr vp, mp_size_t vsize) +#else +mpn_mul_basecase (prodp, up, usize, vp, vsize) + mp_ptr prodp; + mp_srcptr up; + mp_size_t usize; + mp_srcptr vp; + mp_size_t vsize; +#endif +{ + /* We first multiply by the low order one or two limbs, as the result can + be stored, not added, to PROD. We also avoid a loop for zeroing this + way. */ +#if HAVE_NATIVE_mpn_mul_2 + if (vsize >= 2) + { + prodp[usize + 1] = mpn_mul_2 (prodp, up, usize, vp[0], vp[1]); + prodp += 2, vp += 2, vsize -= 2; + } + else + { + prodp[usize] = mpn_mul_1 (prodp, up, usize, vp[0]); + return; + } +#else + prodp[usize] = mpn_mul_1 (prodp, up, usize, vp[0]); + prodp += 1, vp += 1, vsize -= 1; +#endif + +#if HAVE_NATIVE_mpn_addmul_2 + while (vsize >= 2) + { + prodp[usize + 1] = mpn_addmul_2 (prodp, up, usize, vp[0], vp[1]); + prodp += 2, vp += 2, vsize -= 2; + } + if (vsize != 0) + prodp[usize] = mpn_addmul_1 (prodp, up, usize, vp[0]); +#else + /* For each iteration in the loop, multiply U with one limb from V, and + add the result to PROD. */ + while (vsize != 0) + { + prodp[usize] = mpn_addmul_1 (prodp, up, usize, vp[0]); + prodp += 1, vp += 1, vsize -= 1; + } +#endif +} diff --git a/rts/gmp/mpn/generic/mul_fft.c b/rts/gmp/mpn/generic/mul_fft.c new file mode 100644 index 0000000000..00fd6d72de --- /dev/null +++ b/rts/gmp/mpn/generic/mul_fft.c @@ -0,0 +1,772 @@ +/* An implementation in GMP of Scho"nhage's fast multiplication algorithm + modulo 2^N+1, by Paul Zimmermann, INRIA Lorraine, February 1998. + + THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND THE FUNCTIONS HAVE + MUTABLE INTERFACES. IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED + INTERFACES. IT IS ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN + A FUTURE GNU MP RELEASE. + +Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + + +/* References: + + Schnelle Multiplikation grosser Zahlen, by Arnold Scho"nhage and Volker + Strassen, Computing 7, p. 281-292, 1971. + + Asymptotically fast algorithms for the numerical multiplication + and division of polynomials with complex coefficients, by Arnold Scho"nhage, + Computer Algebra, EUROCAM'82, LNCS 144, p. 3-15, 1982. + + Tapes versus Pointers, a study in implementing fast algorithms, + by Arnold Scho"nhage, Bulletin of the EATCS, 30, p. 23-32, 1986. + + See also http://www.loria.fr/~zimmerma/bignum + + + Future: + + K==2 isn't needed in the current uses of this code and the bits specific + for that could be dropped. + + It might be possible to avoid a small number of MPN_COPYs by using a + rotating temporary or two. + + Multiplications of unequal sized operands can be done with this code, but + it needs a tighter test for identifying squaring (same sizes as well as + same pointers). */ + + +#include <stdio.h> +#include "gmp.h" +#include "gmp-impl.h" + + +/* Change this to "#define TRACE(x) x" for some traces. */ +#define TRACE(x) + + + +FFT_TABLE_ATTRS mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE] = { + FFT_MUL_TABLE, + FFT_SQR_TABLE +}; + + +static void mpn_mul_fft_internal +_PROTO ((mp_limb_t *op, mp_srcptr n, mp_srcptr m, mp_size_t pl, + int k, int K, + mp_limb_t **Ap, mp_limb_t **Bp, + mp_limb_t *A, mp_limb_t *B, + mp_size_t nprime, mp_size_t l, mp_size_t Mp, int **_fft_l, + mp_limb_t *T, int rec)); + + +/* Find the best k to use for a mod 2^(n*BITS_PER_MP_LIMB)+1 FFT. + sqr==0 if for a multiply, sqr==1 for a square */ +int +#if __STDC__ +mpn_fft_best_k (mp_size_t n, int sqr) +#else +mpn_fft_best_k (n, sqr) + mp_size_t n; + int sqr; +#endif +{ + mp_size_t t; + int i; + + for (i = 0; mpn_fft_table[sqr][i] != 0; i++) + if (n < mpn_fft_table[sqr][i]) + return i + FFT_FIRST_K; + + /* treat 4*last as one further entry */ + if (i == 0 || n < 4*mpn_fft_table[sqr][i-1]) + return i + FFT_FIRST_K; + else + return i + FFT_FIRST_K + 1; +} + + +/* Returns smallest possible number of limbs >= pl for a fft of size 2^k. + FIXME: Is this simply pl rounded up to the next multiple of 2^k ? */ + +mp_size_t +#if __STDC__ +mpn_fft_next_size (mp_size_t pl, int k) +#else +mpn_fft_next_size (pl, k) + mp_size_t pl; + int k; +#endif +{ + mp_size_t N, M; + int K; + + /* if (k==0) k = mpn_fft_best_k (pl, sqr); */ + N = pl*BITS_PER_MP_LIMB; + K = 1<<k; + if (N%K) N=(N/K+1)*K; + M = N/K; + if (M%BITS_PER_MP_LIMB) N=((M/BITS_PER_MP_LIMB)+1)*BITS_PER_MP_LIMB*K; + return (N/BITS_PER_MP_LIMB); +} + + +static void +#if __STDC__ +mpn_fft_initl(int **l, int k) +#else +mpn_fft_initl(l, k) + int **l; + int k; +#endif +{ + int i,j,K; + + l[0][0] = 0; + for (i=1,K=2;i<=k;i++,K*=2) { + for (j=0;j<K/2;j++) { + l[i][j] = 2*l[i-1][j]; + l[i][K/2+j] = 1+l[i][j]; + } + } +} + + +/* a <- -a mod 2^(n*BITS_PER_MP_LIMB)+1 */ +static void +#if __STDC__ +mpn_fft_neg_modF(mp_limb_t *ap, mp_size_t n) +#else +mpn_fft_neg_modF(ap, n) + mp_limb_t *ap; + mp_size_t n; +#endif +{ + mp_limb_t c; + + c = ap[n]+2; + mpn_com_n (ap, ap, n); + ap[n]=0; mpn_incr_u(ap, c); +} + + +/* a <- a*2^e mod 2^(n*BITS_PER_MP_LIMB)+1 */ +static void +#if __STDC__ +mpn_fft_mul_2exp_modF(mp_limb_t *ap, int e, mp_size_t n, mp_limb_t *tp) +#else +mpn_fft_mul_2exp_modF(ap, e, n, tp) + mp_limb_t *ap; + int e; + mp_size_t n; + mp_limb_t *tp; +#endif +{ + int d, sh, i; mp_limb_t cc; + + d = e%(n*BITS_PER_MP_LIMB); /* 2^e = (+/-) 2^d */ + sh = d % BITS_PER_MP_LIMB; + if (sh) mpn_lshift(tp, ap, n+1, sh); /* no carry here */ + else MPN_COPY(tp, ap, n+1); + d /= BITS_PER_MP_LIMB; /* now shift of d limbs to the left */ + if (d) { + /* ap[d..n-1] = tp[0..n-d-1], ap[0..d-1] = -tp[n-d..n-1] */ + /* mpn_xor would be more efficient here */ + for (i=d-1;i>=0;i--) ap[i] = ~tp[n-d+i]; + cc = 1-mpn_add_1(ap, ap, d, 1); + if (cc) cc=mpn_sub_1(ap+d, tp, n-d, 1); + else MPN_COPY(ap+d, tp, n-d); + if (cc+=mpn_sub_1(ap+d, ap+d, n-d, tp[n])) + ap[n]=mpn_add_1(ap, ap, n, cc); + else ap[n]=0; + } + else if ((ap[n]=mpn_sub_1(ap, tp, n, tp[n]))) { + ap[n]=mpn_add_1(ap, ap, n, 1); + } + if ((e/(n*BITS_PER_MP_LIMB))%2) mpn_fft_neg_modF(ap, n); +} + + +/* a <- a+b mod 2^(n*BITS_PER_MP_LIMB)+1 */ +static void +#if __STDC__ +mpn_fft_add_modF (mp_limb_t *ap, mp_limb_t *bp, int n) +#else +mpn_fft_add_modF (ap, bp, n) + mp_limb_t *ap,*bp; + int n; +#endif +{ + mp_limb_t c; + + c = ap[n] + bp[n] + mpn_add_n(ap, ap, bp, n); + if (c>1) c -= 1+mpn_sub_1(ap,ap,n,1); + ap[n]=c; +} + + +/* input: A[0] ... A[inc*(K-1)] are residues mod 2^N+1 where + N=n*BITS_PER_MP_LIMB + 2^omega is a primitive root mod 2^N+1 + output: A[inc*l[k][i]] <- \sum (2^omega)^(ij) A[inc*j] mod 2^N+1 */ + +static void +#if __STDC__ +mpn_fft_fft_sqr (mp_limb_t **Ap, mp_size_t K, int **ll, + mp_size_t omega, mp_size_t n, mp_size_t inc, mp_limb_t *tp) +#else +mpn_fft_fft_sqr(Ap,K,ll,omega,n,inc,tp) +mp_limb_t **Ap,*tp; +mp_size_t K,omega,n,inc; +int **ll; +#endif +{ + if (K==2) { +#ifdef ADDSUB + if (mpn_addsub_n(Ap[0], Ap[inc], Ap[0], Ap[inc], n+1) & 1) +#else + MPN_COPY(tp, Ap[0], n+1); + mpn_add_n(Ap[0], Ap[0], Ap[inc],n+1); + if (mpn_sub_n(Ap[inc], tp, Ap[inc],n+1)) +#endif + Ap[inc][n] = mpn_add_1(Ap[inc], Ap[inc], n, 1); + } + else { + int j, inc2=2*inc; + int *lk = *ll; + mp_limb_t *tmp; + TMP_DECL(marker); + + TMP_MARK(marker); + tmp = TMP_ALLOC_LIMBS (n+1); + mpn_fft_fft_sqr(Ap, K/2,ll-1,2*omega,n,inc2, tp); + mpn_fft_fft_sqr(Ap+inc, K/2,ll-1,2*omega,n,inc2, tp); + /* A[2*j*inc] <- A[2*j*inc] + omega^l[k][2*j*inc] A[(2j+1)inc] + A[(2j+1)inc] <- A[2*j*inc] + omega^l[k][(2j+1)inc] A[(2j+1)inc] */ + for (j=0;j<K/2;j++,lk+=2,Ap+=2*inc) { + MPN_COPY(tp, Ap[inc], n+1); + mpn_fft_mul_2exp_modF(Ap[inc], lk[1]*omega, n, tmp); + mpn_fft_add_modF(Ap[inc], Ap[0], n); + mpn_fft_mul_2exp_modF(tp,lk[0]*omega, n, tmp); + mpn_fft_add_modF(Ap[0], tp, n); + } + TMP_FREE(marker); + } +} + + +/* input: A[0] ... A[inc*(K-1)] are residues mod 2^N+1 where + N=n*BITS_PER_MP_LIMB + 2^omega is a primitive root mod 2^N+1 + output: A[inc*l[k][i]] <- \sum (2^omega)^(ij) A[inc*j] mod 2^N+1 */ + +static void +#if __STDC__ +mpn_fft_fft (mp_limb_t **Ap, mp_limb_t **Bp, mp_size_t K, int **ll, + mp_size_t omega, mp_size_t n, mp_size_t inc, mp_limb_t *tp) +#else +mpn_fft_fft(Ap,Bp,K,ll,omega,n,inc,tp) + mp_limb_t **Ap,**Bp,*tp; + mp_size_t K,omega,n,inc; + int **ll; +#endif +{ + if (K==2) { +#ifdef ADDSUB + if (mpn_addsub_n(Ap[0], Ap[inc], Ap[0], Ap[inc], n+1) & 1) +#else + MPN_COPY(tp, Ap[0], n+1); + mpn_add_n(Ap[0], Ap[0], Ap[inc],n+1); + if (mpn_sub_n(Ap[inc], tp, Ap[inc],n+1)) +#endif + Ap[inc][n] = mpn_add_1(Ap[inc], Ap[inc], n, 1); +#ifdef ADDSUB + if (mpn_addsub_n(Bp[0], Bp[inc], Bp[0], Bp[inc], n+1) & 1) +#else + MPN_COPY(tp, Bp[0], n+1); + mpn_add_n(Bp[0], Bp[0], Bp[inc],n+1); + if (mpn_sub_n(Bp[inc], tp, Bp[inc],n+1)) +#endif + Bp[inc][n] = mpn_add_1(Bp[inc], Bp[inc], n, 1); + } + else { + int j, inc2=2*inc; + int *lk=*ll; + mp_limb_t *tmp; + TMP_DECL(marker); + + TMP_MARK(marker); + tmp = TMP_ALLOC_LIMBS (n+1); + mpn_fft_fft(Ap, Bp, K/2,ll-1,2*omega,n,inc2, tp); + mpn_fft_fft(Ap+inc, Bp+inc, K/2,ll-1,2*omega,n,inc2, tp); + /* A[2*j*inc] <- A[2*j*inc] + omega^l[k][2*j*inc] A[(2j+1)inc] + A[(2j+1)inc] <- A[2*j*inc] + omega^l[k][(2j+1)inc] A[(2j+1)inc] */ + for (j=0;j<K/2;j++,lk+=2,Ap+=2*inc,Bp+=2*inc) { + MPN_COPY(tp, Ap[inc], n+1); + mpn_fft_mul_2exp_modF(Ap[inc], lk[1]*omega, n, tmp); + mpn_fft_add_modF(Ap[inc], Ap[0], n); + mpn_fft_mul_2exp_modF(tp,lk[0]*omega, n, tmp); + mpn_fft_add_modF(Ap[0], tp, n); + MPN_COPY(tp, Bp[inc], n+1); + mpn_fft_mul_2exp_modF(Bp[inc], lk[1]*omega, n, tmp); + mpn_fft_add_modF(Bp[inc], Bp[0], n); + mpn_fft_mul_2exp_modF(tp,lk[0]*omega, n, tmp); + mpn_fft_add_modF(Bp[0], tp, n); + } + TMP_FREE(marker); + } +} + + +/* a[i] <- a[i]*b[i] mod 2^(n*BITS_PER_MP_LIMB)+1 for 0 <= i < K */ +static void +#if __STDC__ +mpn_fft_mul_modF_K (mp_limb_t **ap, mp_limb_t **bp, mp_size_t n, int K) +#else +mpn_fft_mul_modF_K(ap, bp, n, K) + mp_limb_t **ap, **bp; + mp_size_t n; + int K; +#endif +{ + int i; + int sqr = (ap == bp); + TMP_DECL(marker); + + TMP_MARK(marker); + + if (n >= (sqr ? FFT_MODF_SQR_THRESHOLD : FFT_MODF_MUL_THRESHOLD)) { + int k, K2,nprime2,Nprime2,M2,maxLK,l,Mp2; + int **_fft_l; + mp_limb_t **Ap,**Bp,*A,*B,*T; + + k = mpn_fft_best_k (n, sqr); + K2 = 1<<k; + maxLK = (K2>BITS_PER_MP_LIMB) ? K2 : BITS_PER_MP_LIMB; + M2 = n*BITS_PER_MP_LIMB/K2; + l = n/K2; + Nprime2 = ((2*M2+k+2+maxLK)/maxLK)*maxLK; /* ceil((2*M2+k+3)/maxLK)*maxLK*/ + nprime2 = Nprime2/BITS_PER_MP_LIMB; + Mp2 = Nprime2/K2; + + Ap = TMP_ALLOC_MP_PTRS (K2); + Bp = TMP_ALLOC_MP_PTRS (K2); + A = TMP_ALLOC_LIMBS (2*K2*(nprime2+1)); + T = TMP_ALLOC_LIMBS (nprime2+1); + B = A + K2*(nprime2+1); + _fft_l = TMP_ALLOC_TYPE (k+1, int*); + for (i=0;i<=k;i++) + _fft_l[i] = TMP_ALLOC_TYPE (1<<i, int); + mpn_fft_initl(_fft_l, k); + + TRACE (printf("recurse: %dx%d limbs -> %d times %dx%d (%1.2f)\n", n, + n, K2, nprime2, nprime2, 2.0*(double)n/nprime2/K2)); + + for (i=0;i<K;i++,ap++,bp++) + mpn_mul_fft_internal(*ap, *ap, *bp, n, k, K2, Ap, Bp, A, B, nprime2, + l, Mp2, _fft_l, T, 1); + } + else { + mp_limb_t *a, *b, cc, *tp, *tpn; int n2=2*n; + tp = TMP_ALLOC_LIMBS (n2); + tpn = tp+n; + TRACE (printf (" mpn_mul_n %d of %d limbs\n", K, n)); + for (i=0;i<K;i++) { + a = *ap++; b=*bp++; + if (sqr) + mpn_sqr_n(tp, a, n); + else + mpn_mul_n(tp, b, a, n); + if (a[n]) cc=mpn_add_n(tpn, tpn, b, n); else cc=0; + if (b[n]) cc += mpn_add_n(tpn, tpn, a, n) + a[n]; + if (cc) { + cc = mpn_add_1(tp, tp, n2, cc); + ASSERT_NOCARRY (mpn_add_1(tp, tp, n2, cc)); + } + a[n] = mpn_sub_n(a, tp, tpn, n) && mpn_add_1(a, a, n, 1); + } + } + TMP_FREE(marker); +} + + +/* input: A^[l[k][0]] A^[l[k][1]] ... A^[l[k][K-1]] + output: K*A[0] K*A[K-1] ... K*A[1] */ + +static void +#if __STDC__ +mpn_fft_fftinv (mp_limb_t **Ap, int K, mp_size_t omega, mp_size_t n, + mp_limb_t *tp) +#else +mpn_fft_fftinv(Ap,K,omega,n,tp) + mp_limb_t **Ap, *tp; + int K; + mp_size_t omega, n; +#endif +{ + if (K==2) { +#ifdef ADDSUB + if (mpn_addsub_n(Ap[0], Ap[1], Ap[0], Ap[1], n+1) & 1) +#else + MPN_COPY(tp, Ap[0], n+1); + mpn_add_n(Ap[0], Ap[0], Ap[1], n+1); + if (mpn_sub_n(Ap[1], tp, Ap[1], n+1)) +#endif + Ap[1][n] = mpn_add_1(Ap[1], Ap[1], n, 1); + } + else { + int j, K2=K/2; mp_limb_t **Bp=Ap+K2, *tmp; + TMP_DECL(marker); + + TMP_MARK(marker); + tmp = TMP_ALLOC_LIMBS (n+1); + mpn_fft_fftinv(Ap, K2, 2*omega, n, tp); + mpn_fft_fftinv(Bp, K2, 2*omega, n, tp); + /* A[j] <- A[j] + omega^j A[j+K/2] + A[j+K/2] <- A[j] + omega^(j+K/2) A[j+K/2] */ + for (j=0;j<K2;j++,Ap++,Bp++) { + MPN_COPY(tp, Bp[0], n+1); + mpn_fft_mul_2exp_modF(Bp[0], (j+K2)*omega, n, tmp); + mpn_fft_add_modF(Bp[0], Ap[0], n); + mpn_fft_mul_2exp_modF(tp, j*omega, n, tmp); + mpn_fft_add_modF(Ap[0], tp, n); + } + TMP_FREE(marker); + } +} + + +/* A <- A/2^k mod 2^(n*BITS_PER_MP_LIMB)+1 */ +static void +#if __STDC__ +mpn_fft_div_2exp_modF (mp_limb_t *ap, int k, mp_size_t n, mp_limb_t *tp) +#else +mpn_fft_div_2exp_modF(ap,k,n,tp) + mp_limb_t *ap,*tp; + int k; + mp_size_t n; +#endif +{ + int i; + + i = 2*n*BITS_PER_MP_LIMB; + i = (i-k) % i; + mpn_fft_mul_2exp_modF(ap,i,n,tp); + /* 1/2^k = 2^(2nL-k) mod 2^(n*BITS_PER_MP_LIMB)+1 */ + /* normalize so that A < 2^(n*BITS_PER_MP_LIMB)+1 */ + if (ap[n]==1) { + for (i=0;i<n && ap[i]==0;i++); + if (i<n) { + ap[n]=0; + mpn_sub_1(ap, ap, n, 1); + } + } +} + + +/* R <- A mod 2^(n*BITS_PER_MP_LIMB)+1, n<=an<=3*n */ +static void +#if __STDC__ +mpn_fft_norm_modF(mp_limb_t *rp, mp_limb_t *ap, mp_size_t n, mp_size_t an) +#else +mpn_fft_norm_modF(rp, ap, n, an) + mp_limb_t *rp; + mp_limb_t *ap; + mp_size_t n; + mp_size_t an; +#endif +{ + mp_size_t l; + + if (an>2*n) { + l = n; + rp[n] = mpn_add_1(rp+an-2*n, ap+an-2*n, 3*n-an, + mpn_add_n(rp,ap,ap+2*n,an-2*n)); + } + else { + l = an-n; + MPN_COPY(rp, ap, n); + rp[n]=0; + } + if (mpn_sub_n(rp,rp,ap+n,l)) { + if (mpn_sub_1(rp+l,rp+l,n+1-l,1)) + rp[n]=mpn_add_1(rp,rp,n,1); + } +} + + +static void +#if __STDC__ +mpn_mul_fft_internal(mp_limb_t *op, mp_srcptr n, mp_srcptr m, mp_size_t pl, + int k, int K, + mp_limb_t **Ap, mp_limb_t **Bp, + mp_limb_t *A, mp_limb_t *B, + mp_size_t nprime, mp_size_t l, mp_size_t Mp, + int **_fft_l, + mp_limb_t *T, int rec) +#else +mpn_mul_fft_internal(op,n,m,pl,k,K,Ap,Bp,A,B,nprime,l,Mp,_fft_l,T,rec) + mp_limb_t *op; + mp_srcptr n, m; + mp_limb_t **Ap,**Bp,*A,*B,*T; + mp_size_t pl,nprime; + int **_fft_l; + int k,K,l,Mp,rec; +#endif +{ + int i, sqr, pla, lo, sh, j; + mp_limb_t *p; + + sqr = (n==m); + + TRACE (printf ("pl=%d k=%d K=%d np=%d l=%d Mp=%d rec=%d sqr=%d\n", + pl,k,K,nprime,l,Mp,rec,sqr)); + + /* decomposition of inputs into arrays Ap[i] and Bp[i] */ + if (rec) for (i=0;i<K;i++) { + Ap[i] = A+i*(nprime+1); Bp[i] = B+i*(nprime+1); + /* store the next M bits of n into A[i] */ + /* supposes that M is a multiple of BITS_PER_MP_LIMB */ + MPN_COPY(Ap[i], n, l); n+=l; MPN_ZERO(Ap[i]+l, nprime+1-l); + /* set most significant bits of n and m (important in recursive calls) */ + if (i==K-1) Ap[i][l]=n[0]; + mpn_fft_mul_2exp_modF(Ap[i], i*Mp, nprime, T); + if (!sqr) { + MPN_COPY(Bp[i], m, l); m+=l; MPN_ZERO(Bp[i]+l, nprime+1-l); + if (i==K-1) Bp[i][l]=m[0]; + mpn_fft_mul_2exp_modF(Bp[i], i*Mp, nprime, T); + } + } + + /* direct fft's */ + if (sqr) mpn_fft_fft_sqr(Ap,K,_fft_l+k,2*Mp,nprime,1, T); + else mpn_fft_fft(Ap,Bp,K,_fft_l+k,2*Mp,nprime,1, T); + + /* term to term multiplications */ + mpn_fft_mul_modF_K(Ap, (sqr) ? Ap : Bp, nprime, K); + + /* inverse fft's */ + mpn_fft_fftinv(Ap, K, 2*Mp, nprime, T); + + /* division of terms after inverse fft */ + for (i=0;i<K;i++) mpn_fft_div_2exp_modF(Ap[i],k+((K-i)%K)*Mp,nprime, T); + + /* addition of terms in result p */ + MPN_ZERO(T,nprime+1); + pla = l*(K-1)+nprime+1; /* number of required limbs for p */ + p = B; /* B has K*(n'+1) limbs, which is >= pla, i.e. enough */ + MPN_ZERO(p, pla); + sqr=0; /* will accumulate the (signed) carry at p[pla] */ + for (i=K-1,lo=l*i+nprime,sh=l*i;i>=0;i--,lo-=l,sh-=l) { + mp_ptr n = p+sh; + j = (K-i)%K; + if (mpn_add_n(n,n,Ap[j],nprime+1)) + sqr += mpn_add_1(n+nprime+1,n+nprime+1,pla-sh-nprime-1,1); + T[2*l]=i+1; /* T = (i+1)*2^(2*M) */ + if (mpn_cmp(Ap[j],T,nprime+1)>0) { /* subtract 2^N'+1 */ + sqr -= mpn_sub_1(n,n,pla-sh,1); + sqr -= mpn_sub_1(p+lo,p+lo,pla-lo,1); + } + } + if (sqr==-1) { + if ((sqr=mpn_add_1(p+pla-pl,p+pla-pl,pl,1))) { + /* p[pla-pl]...p[pla-1] are all zero */ + mpn_sub_1(p+pla-pl-1,p+pla-pl-1,pl+1,1); + mpn_sub_1(p+pla-1,p+pla-1,1,1); + } + } + else if (sqr==1) { + if (pla>=2*pl) + while ((sqr=mpn_add_1(p+pla-2*pl,p+pla-2*pl,2*pl,sqr))); + else { + sqr = mpn_sub_1(p+pla-pl,p+pla-pl,pl,sqr); + ASSERT (sqr == 0); + } + } + else + ASSERT (sqr == 0); + + /* here p < 2^(2M) [K 2^(M(K-1)) + (K-1) 2^(M(K-2)) + ... ] + < K 2^(2M) [2^(M(K-1)) + 2^(M(K-2)) + ... ] + < K 2^(2M) 2^(M(K-1))*2 = 2^(M*K+M+k+1) */ + mpn_fft_norm_modF(op,p,pl,pla); +} + + +/* op <- n*m mod 2^N+1 with fft of size 2^k where N=pl*BITS_PER_MP_LIMB + n and m have respectively nl and ml limbs + op must have space for pl+1 limbs + One must have pl = mpn_fft_next_size(pl, k). +*/ + +void +#if __STDC__ +mpn_mul_fft (mp_ptr op, mp_size_t pl, + mp_srcptr n, mp_size_t nl, + mp_srcptr m, mp_size_t ml, + int k) +#else +mpn_mul_fft (op, pl, n, nl, m, ml, k) + mp_ptr op; + mp_size_t pl; + mp_srcptr n; + mp_size_t nl; + mp_srcptr m; + mp_size_t ml; + int k; +#endif +{ + int K,maxLK,i,j; + mp_size_t N,Nprime,nprime,M,Mp,l; + mp_limb_t **Ap,**Bp,*A,*T,*B; + int **_fft_l; + int sqr = (n==m && nl==ml); + TMP_DECL(marker); + + TRACE (printf ("\nmpn_mul_fft pl=%ld nl=%ld ml=%ld k=%d\n", + pl, nl, ml, k)); + ASSERT_ALWAYS (mpn_fft_next_size(pl, k) == pl); + + TMP_MARK(marker); + N = pl*BITS_PER_MP_LIMB; + _fft_l = TMP_ALLOC_TYPE (k+1, int*); + for (i=0;i<=k;i++) + _fft_l[i] = TMP_ALLOC_TYPE (1<<i, int); + mpn_fft_initl(_fft_l, k); + K = 1<<k; + M = N/K; /* N = 2^k M */ + l = M/BITS_PER_MP_LIMB; + maxLK = (K>BITS_PER_MP_LIMB) ? K : BITS_PER_MP_LIMB; + + Nprime = ((2*M+k+2+maxLK)/maxLK)*maxLK; /* ceil((2*M+k+3)/maxLK)*maxLK; */ + nprime = Nprime/BITS_PER_MP_LIMB; + TRACE (printf ("N=%d K=%d, M=%d, l=%d, maxLK=%d, Np=%d, np=%d\n", + N, K, M, l, maxLK, Nprime, nprime)); + if (nprime >= (sqr ? FFT_MODF_SQR_THRESHOLD : FFT_MODF_MUL_THRESHOLD)) { + maxLK = (1<<mpn_fft_best_k(nprime,n==m))*BITS_PER_MP_LIMB; + if (Nprime % maxLK) { + Nprime=((Nprime/maxLK)+1)*maxLK; + nprime = Nprime/BITS_PER_MP_LIMB; + } + TRACE (printf ("new maxLK=%d, Np=%d, np=%d\n", maxLK, Nprime, nprime)); + } + + T = TMP_ALLOC_LIMBS (nprime+1); + Mp = Nprime/K; + + TRACE (printf("%dx%d limbs -> %d times %dx%d limbs (%1.2f)\n", + pl,pl,K,nprime,nprime,2.0*(double)N/Nprime/K); + printf(" temp space %ld\n", 2*K*(nprime+1))); + + A = _MP_ALLOCATE_FUNC_LIMBS (2*K*(nprime+1)); + B = A+K*(nprime+1); + Ap = TMP_ALLOC_MP_PTRS (K); + Bp = TMP_ALLOC_MP_PTRS (K); + /* special decomposition for main call */ + for (i=0;i<K;i++) { + Ap[i] = A+i*(nprime+1); Bp[i] = B+i*(nprime+1); + /* store the next M bits of n into A[i] */ + /* supposes that M is a multiple of BITS_PER_MP_LIMB */ + if (nl>0) { + j = (nl>=l) ? l : nl; /* limbs to store in Ap[i] */ + MPN_COPY(Ap[i], n, j); n+=l; MPN_ZERO(Ap[i]+j, nprime+1-j); + mpn_fft_mul_2exp_modF(Ap[i], i*Mp, nprime, T); + } + else MPN_ZERO(Ap[i], nprime+1); + nl -= l; + if (n!=m) { + if (ml>0) { + j = (ml>=l) ? l : ml; /* limbs to store in Bp[i] */ + MPN_COPY(Bp[i], m, j); m+=l; MPN_ZERO(Bp[i]+j, nprime+1-j); + mpn_fft_mul_2exp_modF(Bp[i], i*Mp, nprime, T); + } + else MPN_ZERO(Bp[i], nprime+1); + } + ml -= l; + } + mpn_mul_fft_internal(op,n,m,pl,k,K,Ap,Bp,A,B,nprime,l,Mp,_fft_l,T,0); + TMP_FREE(marker); + _MP_FREE_FUNC_LIMBS (A, 2*K*(nprime+1)); +} + + +#if WANT_ASSERT +static int +#if __STDC__ +mpn_zero_p (mp_ptr p, mp_size_t n) +#else + mpn_zero_p (p, n) + mp_ptr p; + mp_size_t n; +#endif +{ + mp_size_t i; + + for (i = 0; i < n; i++) + { + if (p[i] != 0) + return 0; + } + + return 1; +} +#endif + + +/* Multiply {n,nl}*{m,ml} and write the result to {op,nl+ml}. + + FIXME: Duplicating the result like this is wasteful, do something better + perhaps at the norm_modF stage above. */ + +void +#if __STDC__ +mpn_mul_fft_full (mp_ptr op, + mp_srcptr n, mp_size_t nl, + mp_srcptr m, mp_size_t ml) +#else +mpn_mul_fft_full (op, n, nl, m, ml) + mp_ptr op; + mp_srcptr n; + mp_size_t nl; + mp_srcptr m; + mp_size_t ml; +#endif +{ + mp_ptr pad_op; + mp_size_t pl; + int k; + int sqr = (n==m && nl==ml); + + k = mpn_fft_best_k (nl+ml, sqr); + pl = mpn_fft_next_size (nl+ml, k); + + TRACE (printf ("mpn_mul_fft_full nl=%ld ml=%ld -> pl=%ld k=%d\n", + nl, ml, pl, k)); + + pad_op = _MP_ALLOCATE_FUNC_LIMBS (pl+1); + mpn_mul_fft (pad_op, pl, n, nl, m, ml, k); + + ASSERT (mpn_zero_p (pad_op+nl+ml, pl+1-(nl+ml))); + MPN_COPY (op, pad_op, nl+ml); + + _MP_FREE_FUNC_LIMBS (pad_op, pl+1); +} diff --git a/rts/gmp/mpn/generic/mul_n.c b/rts/gmp/mpn/generic/mul_n.c new file mode 100644 index 0000000000..b7563be2d3 --- /dev/null +++ b/rts/gmp/mpn/generic/mul_n.c @@ -0,0 +1,1343 @@ +/* mpn_mul_n and helper function -- Multiply/square natural numbers. + + THE HELPER FUNCTIONS IN THIS FILE (meaning everything except mpn_mul_n) + ARE INTERNAL FUNCTIONS WITH MUTABLE INTERFACES. IT IS ONLY SAFE TO REACH + THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST GUARANTEED + THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. + + +Copyright (C) 1991, 1993, 1994, 1996, 1997, 1998, 1999, 2000 Free Software +Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + + +/* Multiplicative inverse of 3, modulo 2^BITS_PER_MP_LIMB. + 0xAAAAAAAB for 32 bits, 0xAAAAAAAAAAAAAAAB for 64 bits. */ +#define INVERSE_3 ((MP_LIMB_T_MAX / 3) * 2 + 1) + +#if !defined (__alpha) && !defined (__mips) +/* For all other machines, we want to call mpn functions for the compund + operations instead of open-coding them. */ +#define USE_MORE_MPN +#endif + +/*== Function declarations =================================================*/ + +static void evaluate3 _PROTO ((mp_ptr, mp_ptr, mp_ptr, + mp_ptr, mp_ptr, mp_ptr, + mp_srcptr, mp_srcptr, mp_srcptr, + mp_size_t, mp_size_t)); +static void interpolate3 _PROTO ((mp_srcptr, + mp_ptr, mp_ptr, mp_ptr, + mp_srcptr, + mp_ptr, mp_ptr, mp_ptr, + mp_size_t, mp_size_t)); +static mp_limb_t add2Times _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); + + +/*-- mpn_kara_mul_n ---------------------------------------------------------------*/ + +/* Multiplies using 3 half-sized mults and so on recursively. + * p[0..2*n-1] := product of a[0..n-1] and b[0..n-1]. + * No overlap of p[...] with a[...] or b[...]. + * ws is workspace. + */ + +void +#if __STDC__ +mpn_kara_mul_n (mp_ptr p, mp_srcptr a, mp_srcptr b, mp_size_t n, mp_ptr ws) +#else +mpn_kara_mul_n(p, a, b, n, ws) + mp_ptr p; + mp_srcptr a; + mp_srcptr b; + mp_size_t n; + mp_ptr ws; +#endif +{ + mp_limb_t i, sign, w, w0, w1; + mp_size_t n2; + mp_srcptr x, y; + + n2 = n >> 1; + ASSERT (n2 > 0); + + if (n & 1) + { + /* Odd length. */ + mp_size_t n1, n3, nm1; + + n3 = n - n2; + + sign = 0; + w = a[n2]; + if (w != 0) + w -= mpn_sub_n (p, a, a + n3, n2); + else + { + i = n2; + do + { + --i; + w0 = a[i]; + w1 = a[n3+i]; + } + while (w0 == w1 && i != 0); + if (w0 < w1) + { + x = a + n3; + y = a; + sign = 1; + } + else + { + x = a; + y = a + n3; + } + mpn_sub_n (p, x, y, n2); + } + p[n2] = w; + + w = b[n2]; + if (w != 0) + w -= mpn_sub_n (p + n3, b, b + n3, n2); + else + { + i = n2; + do + { + --i; + w0 = b[i]; + w1 = b[n3+i]; + } + while (w0 == w1 && i != 0); + if (w0 < w1) + { + x = b + n3; + y = b; + sign ^= 1; + } + else + { + x = b; + y = b + n3; + } + mpn_sub_n (p + n3, x, y, n2); + } + p[n] = w; + + n1 = n + 1; + if (n2 < KARATSUBA_MUL_THRESHOLD) + { + if (n3 < KARATSUBA_MUL_THRESHOLD) + { + mpn_mul_basecase (ws, p, n3, p + n3, n3); + mpn_mul_basecase (p, a, n3, b, n3); + } + else + { + mpn_kara_mul_n (ws, p, p + n3, n3, ws + n1); + mpn_kara_mul_n (p, a, b, n3, ws + n1); + } + mpn_mul_basecase (p + n1, a + n3, n2, b + n3, n2); + } + else + { + mpn_kara_mul_n (ws, p, p + n3, n3, ws + n1); + mpn_kara_mul_n (p, a, b, n3, ws + n1); + mpn_kara_mul_n (p + n1, a + n3, b + n3, n2, ws + n1); + } + + if (sign) + mpn_add_n (ws, p, ws, n1); + else + mpn_sub_n (ws, p, ws, n1); + + nm1 = n - 1; + if (mpn_add_n (ws, p + n1, ws, nm1)) + { + mp_limb_t x = ws[nm1] + 1; + ws[nm1] = x; + if (x == 0) + ++ws[n]; + } + if (mpn_add_n (p + n3, p + n3, ws, n1)) + { + mp_limb_t x; + i = n1 + n3; + do + { + x = p[i] + 1; + p[i] = x; + ++i; + } while (x == 0); + } + } + else + { + /* Even length. */ + mp_limb_t t; + + i = n2; + do + { + --i; + w0 = a[i]; + w1 = a[n2+i]; + } + while (w0 == w1 && i != 0); + sign = 0; + if (w0 < w1) + { + x = a + n2; + y = a; + sign = 1; + } + else + { + x = a; + y = a + n2; + } + mpn_sub_n (p, x, y, n2); + + i = n2; + do + { + --i; + w0 = b[i]; + w1 = b[n2+i]; + } + while (w0 == w1 && i != 0); + if (w0 < w1) + { + x = b + n2; + y = b; + sign ^= 1; + } + else + { + x = b; + y = b + n2; + } + mpn_sub_n (p + n2, x, y, n2); + + /* Pointwise products. */ + if (n2 < KARATSUBA_MUL_THRESHOLD) + { + mpn_mul_basecase (ws, p, n2, p + n2, n2); + mpn_mul_basecase (p, a, n2, b, n2); + mpn_mul_basecase (p + n, a + n2, n2, b + n2, n2); + } + else + { + mpn_kara_mul_n (ws, p, p + n2, n2, ws + n); + mpn_kara_mul_n (p, a, b, n2, ws + n); + mpn_kara_mul_n (p + n, a + n2, b + n2, n2, ws + n); + } + + /* Interpolate. */ + if (sign) + w = mpn_add_n (ws, p, ws, n); + else + w = -mpn_sub_n (ws, p, ws, n); + w += mpn_add_n (ws, p + n, ws, n); + w += mpn_add_n (p + n2, p + n2, ws, n); + /* TO DO: could put "if (w) { ... }" here. + * Less work but badly predicted branch. + * No measurable difference in speed on Alpha. + */ + i = n + n2; + t = p[i] + w; + p[i] = t; + if (t < w) + { + do + { + ++i; + w = p[i] + 1; + p[i] = w; + } + while (w == 0); + } + } +} + +void +#if __STDC__ +mpn_kara_sqr_n (mp_ptr p, mp_srcptr a, mp_size_t n, mp_ptr ws) +#else +mpn_kara_sqr_n (p, a, n, ws) + mp_ptr p; + mp_srcptr a; + mp_size_t n; + mp_ptr ws; +#endif +{ + mp_limb_t i, sign, w, w0, w1; + mp_size_t n2; + mp_srcptr x, y; + + n2 = n >> 1; + ASSERT (n2 > 0); + + if (n & 1) + { + /* Odd length. */ + mp_size_t n1, n3, nm1; + + n3 = n - n2; + + sign = 0; + w = a[n2]; + if (w != 0) + w -= mpn_sub_n (p, a, a + n3, n2); + else + { + i = n2; + do + { + --i; + w0 = a[i]; + w1 = a[n3+i]; + } + while (w0 == w1 && i != 0); + if (w0 < w1) + { + x = a + n3; + y = a; + sign = 1; + } + else + { + x = a; + y = a + n3; + } + mpn_sub_n (p, x, y, n2); + } + p[n2] = w; + + w = a[n2]; + if (w != 0) + w -= mpn_sub_n (p + n3, a, a + n3, n2); + else + { + i = n2; + do + { + --i; + w0 = a[i]; + w1 = a[n3+i]; + } + while (w0 == w1 && i != 0); + if (w0 < w1) + { + x = a + n3; + y = a; + sign ^= 1; + } + else + { + x = a; + y = a + n3; + } + mpn_sub_n (p + n3, x, y, n2); + } + p[n] = w; + + n1 = n + 1; + if (n2 < KARATSUBA_SQR_THRESHOLD) + { + if (n3 < KARATSUBA_SQR_THRESHOLD) + { + mpn_sqr_basecase (ws, p, n3); + mpn_sqr_basecase (p, a, n3); + } + else + { + mpn_kara_sqr_n (ws, p, n3, ws + n1); + mpn_kara_sqr_n (p, a, n3, ws + n1); + } + mpn_sqr_basecase (p + n1, a + n3, n2); + } + else + { + mpn_kara_sqr_n (ws, p, n3, ws + n1); + mpn_kara_sqr_n (p, a, n3, ws + n1); + mpn_kara_sqr_n (p + n1, a + n3, n2, ws + n1); + } + + if (sign) + mpn_add_n (ws, p, ws, n1); + else + mpn_sub_n (ws, p, ws, n1); + + nm1 = n - 1; + if (mpn_add_n (ws, p + n1, ws, nm1)) + { + mp_limb_t x = ws[nm1] + 1; + ws[nm1] = x; + if (x == 0) + ++ws[n]; + } + if (mpn_add_n (p + n3, p + n3, ws, n1)) + { + mp_limb_t x; + i = n1 + n3; + do + { + x = p[i] + 1; + p[i] = x; + ++i; + } while (x == 0); + } + } + else + { + /* Even length. */ + mp_limb_t t; + + i = n2; + do + { + --i; + w0 = a[i]; + w1 = a[n2+i]; + } + while (w0 == w1 && i != 0); + sign = 0; + if (w0 < w1) + { + x = a + n2; + y = a; + sign = 1; + } + else + { + x = a; + y = a + n2; + } + mpn_sub_n (p, x, y, n2); + + i = n2; + do + { + --i; + w0 = a[i]; + w1 = a[n2+i]; + } + while (w0 == w1 && i != 0); + if (w0 < w1) + { + x = a + n2; + y = a; + sign ^= 1; + } + else + { + x = a; + y = a + n2; + } + mpn_sub_n (p + n2, x, y, n2); + + /* Pointwise products. */ + if (n2 < KARATSUBA_SQR_THRESHOLD) + { + mpn_sqr_basecase (ws, p, n2); + mpn_sqr_basecase (p, a, n2); + mpn_sqr_basecase (p + n, a + n2, n2); + } + else + { + mpn_kara_sqr_n (ws, p, n2, ws + n); + mpn_kara_sqr_n (p, a, n2, ws + n); + mpn_kara_sqr_n (p + n, a + n2, n2, ws + n); + } + + /* Interpolate. */ + if (sign) + w = mpn_add_n (ws, p, ws, n); + else + w = -mpn_sub_n (ws, p, ws, n); + w += mpn_add_n (ws, p + n, ws, n); + w += mpn_add_n (p + n2, p + n2, ws, n); + /* TO DO: could put "if (w) { ... }" here. + * Less work but badly predicted branch. + * No measurable difference in speed on Alpha. + */ + i = n + n2; + t = p[i] + w; + p[i] = t; + if (t < w) + { + do + { + ++i; + w = p[i] + 1; + p[i] = w; + } + while (w == 0); + } + } +} + +/*-- add2Times -------------------------------------------------------------*/ + +/* z[] = x[] + 2 * y[] + Note that z and x might point to the same vectors. */ +#ifdef USE_MORE_MPN +static inline mp_limb_t +#if __STDC__ +add2Times (mp_ptr z, mp_srcptr x, mp_srcptr y, mp_size_t n) +#else +add2Times (z, x, y, n) + mp_ptr z; + mp_srcptr x; + mp_srcptr y; + mp_size_t n; +#endif +{ + mp_ptr t; + mp_limb_t c; + TMP_DECL (marker); + TMP_MARK (marker); + t = (mp_ptr) TMP_ALLOC (n * BYTES_PER_MP_LIMB); + c = mpn_lshift (t, y, n, 1); + c += mpn_add_n (z, x, t, n); + TMP_FREE (marker); + return c; +} +#else + +static mp_limb_t +#if __STDC__ +add2Times (mp_ptr z, mp_srcptr x, mp_srcptr y, mp_size_t n) +#else +add2Times (z, x, y, n) + mp_ptr z; + mp_srcptr x; + mp_srcptr y; + mp_size_t n; +#endif +{ + mp_limb_t c, v, w; + + ASSERT (n > 0); + v = *x; w = *y; + c = w >> (BITS_PER_MP_LIMB - 1); + w <<= 1; + v += w; + c += v < w; + *z = v; + ++x; ++y; ++z; + while (--n) + { + v = *x; + w = *y; + v += c; + c = v < c; + c += w >> (BITS_PER_MP_LIMB - 1); + w <<= 1; + v += w; + c += v < w; + *z = v; + ++x; ++y; ++z; + } + + return c; +} +#endif + +/*-- evaluate3 -------------------------------------------------------------*/ + +/* Evaluates: + * ph := 4*A+2*B+C + * p1 := A+B+C + * p2 := A+2*B+4*C + * where: + * ph[], p1[], p2[], A[] and B[] all have length len, + * C[] has length len2 with len-len2 = 0, 1 or 2. + * Returns top words (overflow) at pth, pt1 and pt2 respectively. + */ +#ifdef USE_MORE_MPN +static void +#if __STDC__ +evaluate3 (mp_ptr ph, mp_ptr p1, mp_ptr p2, mp_ptr pth, mp_ptr pt1, mp_ptr pt2, + mp_srcptr A, mp_srcptr B, mp_srcptr C, mp_size_t len, mp_size_t len2) +#else +evaluate3 (ph, p1, p2, pth, pt1, pt2, + A, B, C, len, len2) + mp_ptr ph; + mp_ptr p1; + mp_ptr p2; + mp_ptr pth; + mp_ptr pt1; + mp_ptr pt2; + mp_srcptr A; + mp_srcptr B; + mp_srcptr C; + mp_size_t len; + mp_size_t len2; +#endif +{ + mp_limb_t c, d, e; + + ASSERT (len - len2 <= 2); + + e = mpn_lshift (p1, B, len, 1); + + c = mpn_lshift (ph, A, len, 2); + c += e + mpn_add_n (ph, ph, p1, len); + d = mpn_add_n (ph, ph, C, len2); + if (len2 == len) c += d; else c += mpn_add_1 (ph + len2, ph + len2, len-len2, d); + ASSERT (c < 7); + *pth = c; + + c = mpn_lshift (p2, C, len2, 2); +#if 1 + if (len2 != len) { p2[len-1] = 0; p2[len2] = c; c = 0; } + c += e + mpn_add_n (p2, p2, p1, len); +#else + d = mpn_add_n (p2, p2, p1, len2); + c += d; + if (len2 != len) c = mpn_add_1 (p2+len2, p1+len2, len-len2, c); + c += e; +#endif + c += mpn_add_n (p2, p2, A, len); + ASSERT (c < 7); + *pt2 = c; + + c = mpn_add_n (p1, A, B, len); + d = mpn_add_n (p1, p1, C, len2); + if (len2 == len) c += d; + else c += mpn_add_1 (p1+len2, p1+len2, len-len2, d); + ASSERT (c < 3); + *pt1 = c; + +} + +#else + +static void +#if __STDC__ +evaluate3 (mp_ptr ph, mp_ptr p1, mp_ptr p2, mp_ptr pth, mp_ptr pt1, mp_ptr pt2, + mp_srcptr A, mp_srcptr B, mp_srcptr C, mp_size_t l, mp_size_t ls) +#else +evaluate3 (ph, p1, p2, pth, pt1, pt2, + A, B, C, l, ls) + mp_ptr ph; + mp_ptr p1; + mp_ptr p2; + mp_ptr pth; + mp_ptr pt1; + mp_ptr pt2; + mp_srcptr A; + mp_srcptr B; + mp_srcptr C; + mp_size_t l; + mp_size_t ls; +#endif +{ + mp_limb_t a,b,c, i, t, th,t1,t2, vh,v1,v2; + + ASSERT (l - ls <= 2); + + th = t1 = t2 = 0; + for (i = 0; i < l; ++i) + { + a = *A; + b = *B; + c = i < ls ? *C : 0; + + /* TO DO: choose one of the following alternatives. */ +#if 0 + t = a << 2; + vh = th + t; + th = vh < t; + th += a >> (BITS_PER_MP_LIMB - 2); + t = b << 1; + vh += t; + th += vh < t; + th += b >> (BITS_PER_MP_LIMB - 1); + vh += c; + th += vh < c; +#else + vh = th + c; + th = vh < c; + t = b << 1; + vh += t; + th += vh < t; + th += b >> (BITS_PER_MP_LIMB - 1); + t = a << 2; + vh += t; + th += vh < t; + th += a >> (BITS_PER_MP_LIMB - 2); +#endif + + v1 = t1 + a; + t1 = v1 < a; + v1 += b; + t1 += v1 < b; + v1 += c; + t1 += v1 < c; + + v2 = t2 + a; + t2 = v2 < a; + t = b << 1; + v2 += t; + t2 += v2 < t; + t2 += b >> (BITS_PER_MP_LIMB - 1); + t = c << 2; + v2 += t; + t2 += v2 < t; + t2 += c >> (BITS_PER_MP_LIMB - 2); + + *ph = vh; + *p1 = v1; + *p2 = v2; + + ++A; ++B; ++C; + ++ph; ++p1; ++p2; + } + + ASSERT (th < 7); + ASSERT (t1 < 3); + ASSERT (t2 < 7); + + *pth = th; + *pt1 = t1; + *pt2 = t2; +} +#endif + + +/*-- interpolate3 ----------------------------------------------------------*/ + +/* Interpolates B, C, D (in-place) from: + * 16*A+8*B+4*C+2*D+E + * A+B+C+D+E + * A+2*B+4*C+8*D+16*E + * where: + * A[], B[], C[] and D[] all have length l, + * E[] has length ls with l-ls = 0, 2 or 4. + * + * Reads top words (from earlier overflow) from ptb, ptc and ptd, + * and returns new top words there. + */ + +#ifdef USE_MORE_MPN +static void +#if __STDC__ +interpolate3 (mp_srcptr A, mp_ptr B, mp_ptr C, mp_ptr D, mp_srcptr E, + mp_ptr ptb, mp_ptr ptc, mp_ptr ptd, mp_size_t len, mp_size_t len2) +#else +interpolate3 (A, B, C, D, E, + ptb, ptc, ptd, len, len2) + mp_srcptr A; + mp_ptr B; + mp_ptr C; + mp_ptr D; + mp_srcptr E; + mp_ptr ptb; + mp_ptr ptc; + mp_ptr ptd; + mp_size_t len; + mp_size_t len2; +#endif +{ + mp_ptr ws; + mp_limb_t t, tb,tc,td; + TMP_DECL (marker); + TMP_MARK (marker); + + ASSERT (len - len2 == 0 || len - len2 == 2 || len - len2 == 4); + + /* Let x1, x2, x3 be the values to interpolate. We have: + * b = 16*a + 8*x1 + 4*x2 + 2*x3 + e + * c = a + x1 + x2 + x3 + e + * d = a + 2*x1 + 4*x2 + 8*x3 + 16*e + */ + + ws = (mp_ptr) TMP_ALLOC (len * BYTES_PER_MP_LIMB); + + tb = *ptb; tc = *ptc; td = *ptd; + + + /* b := b - 16*a - e + * c := c - a - e + * d := d - a - 16*e + */ + + t = mpn_lshift (ws, A, len, 4); + tb -= t + mpn_sub_n (B, B, ws, len); + t = mpn_sub_n (B, B, E, len2); + if (len2 == len) tb -= t; + else tb -= mpn_sub_1 (B+len2, B+len2, len-len2, t); + + tc -= mpn_sub_n (C, C, A, len); + t = mpn_sub_n (C, C, E, len2); + if (len2 == len) tc -= t; + else tc -= mpn_sub_1 (C+len2, C+len2, len-len2, t); + + t = mpn_lshift (ws, E, len2, 4); + t += mpn_add_n (ws, ws, A, len2); +#if 1 + if (len2 != len) t = mpn_add_1 (ws+len2, A+len2, len-len2, t); + td -= t + mpn_sub_n (D, D, ws, len); +#else + t += mpn_sub_n (D, D, ws, len2); + if (len2 != len) { + t = mpn_sub_1 (D+len2, D+len2, len-len2, t); + t += mpn_sub_n (D+len2, D+len2, A+len2, len-len2); + } /* end if/else */ + td -= t; +#endif + + + /* b, d := b + d, b - d */ + +#ifdef HAVE_MPN_ADD_SUB_N + /* #error TO DO ... */ +#else + t = tb + td + mpn_add_n (ws, B, D, len); + td = tb - td - mpn_sub_n (D, B, D, len); + tb = t; + MPN_COPY (B, ws, len); +#endif + + /* b := b-8*c */ + t = 8 * tc + mpn_lshift (ws, C, len, 3); + tb -= t + mpn_sub_n (B, B, ws, len); + + /* c := 2*c - b */ + tc = 2 * tc + mpn_lshift (C, C, len, 1); + tc -= tb + mpn_sub_n (C, C, B, len); + + /* d := d/3 */ + td = (td - mpn_divexact_by3 (D, D, len)) * INVERSE_3; + + /* b, d := b + d, b - d */ +#ifdef HAVE_MPN_ADD_SUB_N + /* #error TO DO ... */ +#else + t = tb + td + mpn_add_n (ws, B, D, len); + td = tb - td - mpn_sub_n (D, B, D, len); + tb = t; + MPN_COPY (B, ws, len); +#endif + + /* Now: + * b = 4*x1 + * c = 2*x2 + * d = 4*x3 + */ + + ASSERT(!(*B & 3)); + mpn_rshift (B, B, len, 2); + B[len-1] |= tb<<(BITS_PER_MP_LIMB-2); + ASSERT((long)tb >= 0); + tb >>= 2; + + ASSERT(!(*C & 1)); + mpn_rshift (C, C, len, 1); + C[len-1] |= tc<<(BITS_PER_MP_LIMB-1); + ASSERT((long)tc >= 0); + tc >>= 1; + + ASSERT(!(*D & 3)); + mpn_rshift (D, D, len, 2); + D[len-1] |= td<<(BITS_PER_MP_LIMB-2); + ASSERT((long)td >= 0); + td >>= 2; + +#if WANT_ASSERT + ASSERT (tb < 2); + if (len == len2) + { + ASSERT (tc < 3); + ASSERT (td < 2); + } + else + { + ASSERT (tc < 2); + ASSERT (!td); + } +#endif + + *ptb = tb; + *ptc = tc; + *ptd = td; + + TMP_FREE (marker); +} + +#else + +static void +#if __STDC__ +interpolate3 (mp_srcptr A, mp_ptr B, mp_ptr C, mp_ptr D, mp_srcptr E, + mp_ptr ptb, mp_ptr ptc, mp_ptr ptd, mp_size_t l, mp_size_t ls) +#else +interpolate3 (A, B, C, D, E, + ptb, ptc, ptd, l, ls) + mp_srcptr A; + mp_ptr B; + mp_ptr C; + mp_ptr D; + mp_srcptr E; + mp_ptr ptb; + mp_ptr ptc; + mp_ptr ptd; + mp_size_t l; + mp_size_t ls; +#endif +{ + mp_limb_t a,b,c,d,e,t, i, sb,sc,sd, ob,oc,od; + const mp_limb_t maskOffHalf = (~(mp_limb_t) 0) << (BITS_PER_MP_LIMB >> 1); + +#if WANT_ASSERT + t = l - ls; + ASSERT (t == 0 || t == 2 || t == 4); +#endif + + sb = sc = sd = 0; + for (i = 0; i < l; ++i) + { + mp_limb_t tb, tc, td, tt; + + a = *A; + b = *B; + c = *C; + d = *D; + e = i < ls ? *E : 0; + + /* Let x1, x2, x3 be the values to interpolate. We have: + * b = 16*a + 8*x1 + 4*x2 + 2*x3 + e + * c = a + x1 + x2 + x3 + e + * d = a + 2*x1 + 4*x2 + 8*x3 + 16*e + */ + + /* b := b - 16*a - e + * c := c - a - e + * d := d - a - 16*e + */ + t = a << 4; + tb = -(a >> (BITS_PER_MP_LIMB - 4)) - (b < t); + b -= t; + tb -= b < e; + b -= e; + tc = -(c < a); + c -= a; + tc -= c < e; + c -= e; + td = -(d < a); + d -= a; + t = e << 4; + td = td - (e >> (BITS_PER_MP_LIMB - 4)) - (d < t); + d -= t; + + /* b, d := b + d, b - d */ + t = b + d; + tt = tb + td + (t < b); + td = tb - td - (b < d); + d = b - d; + b = t; + tb = tt; + + /* b := b-8*c */ + t = c << 3; + tb = tb - (tc << 3) - (c >> (BITS_PER_MP_LIMB - 3)) - (b < t); + b -= t; + + /* c := 2*c - b */ + t = c << 1; + tc = (tc << 1) + (c >> (BITS_PER_MP_LIMB - 1)) - tb - (t < b); + c = t - b; + + /* d := d/3 */ + d *= INVERSE_3; + td = td - (d >> (BITS_PER_MP_LIMB - 1)) - (d*3 < d); + td *= INVERSE_3; + + /* b, d := b + d, b - d */ + t = b + d; + tt = tb + td + (t < b); + td = tb - td - (b < d); + d = b - d; + b = t; + tb = tt; + + /* Now: + * b = 4*x1 + * c = 2*x2 + * d = 4*x3 + */ + + /* sb has period 2. */ + b += sb; + tb += b < sb; + sb &= maskOffHalf; + sb |= sb >> (BITS_PER_MP_LIMB >> 1); + sb += tb; + + /* sc has period 1. */ + c += sc; + tc += c < sc; + /* TO DO: choose one of the following alternatives. */ +#if 1 + sc = (mp_limb_t)((long)sc >> (BITS_PER_MP_LIMB - 1)); + sc += tc; +#else + sc = tc - ((long)sc < 0L); +#endif + + /* sd has period 2. */ + d += sd; + td += d < sd; + sd &= maskOffHalf; + sd |= sd >> (BITS_PER_MP_LIMB >> 1); + sd += td; + + if (i != 0) + { + B[-1] = ob | b << (BITS_PER_MP_LIMB - 2); + C[-1] = oc | c << (BITS_PER_MP_LIMB - 1); + D[-1] = od | d << (BITS_PER_MP_LIMB - 2); + } + ob = b >> 2; + oc = c >> 1; + od = d >> 2; + + ++A; ++B; ++C; ++D; ++E; + } + + /* Handle top words. */ + b = *ptb; + c = *ptc; + d = *ptd; + + t = b + d; + d = b - d; + b = t; + b -= c << 3; + c = (c << 1) - b; + d *= INVERSE_3; + t = b + d; + d = b - d; + b = t; + + b += sb; + c += sc; + d += sd; + + B[-1] = ob | b << (BITS_PER_MP_LIMB - 2); + C[-1] = oc | c << (BITS_PER_MP_LIMB - 1); + D[-1] = od | d << (BITS_PER_MP_LIMB - 2); + + b >>= 2; + c >>= 1; + d >>= 2; + +#if WANT_ASSERT + ASSERT (b < 2); + if (l == ls) + { + ASSERT (c < 3); + ASSERT (d < 2); + } + else + { + ASSERT (c < 2); + ASSERT (!d); + } +#endif + + *ptb = b; + *ptc = c; + *ptd = d; +} +#endif + + +/*-- mpn_toom3_mul_n --------------------------------------------------------------*/ + +/* Multiplies using 5 mults of one third size and so on recursively. + * p[0..2*n-1] := product of a[0..n-1] and b[0..n-1]. + * No overlap of p[...] with a[...] or b[...]. + * ws is workspace. + */ + +/* TO DO: If TOOM3_MUL_THRESHOLD is much bigger than KARATSUBA_MUL_THRESHOLD then the + * recursion in mpn_toom3_mul_n() will always bottom out with mpn_kara_mul_n() + * because the "n < KARATSUBA_MUL_THRESHOLD" test here will always be false. + */ + +#define TOOM3_MUL_REC(p, a, b, n, ws) \ + do { \ + if (n < KARATSUBA_MUL_THRESHOLD) \ + mpn_mul_basecase (p, a, n, b, n); \ + else if (n < TOOM3_MUL_THRESHOLD) \ + mpn_kara_mul_n (p, a, b, n, ws); \ + else \ + mpn_toom3_mul_n (p, a, b, n, ws); \ + } while (0) + +void +#if __STDC__ +mpn_toom3_mul_n (mp_ptr p, mp_srcptr a, mp_srcptr b, mp_size_t n, mp_ptr ws) +#else +mpn_toom3_mul_n (p, a, b, n, ws) + mp_ptr p; + mp_srcptr a; + mp_srcptr b; + mp_size_t n; + mp_ptr ws; +#endif +{ + mp_limb_t cB,cC,cD, dB,dC,dD, tB,tC,tD; + mp_limb_t *A,*B,*C,*D,*E, *W; + mp_size_t l,l2,l3,l4,l5,ls; + + /* Break n words into chunks of size l, l and ls. + * n = 3*k => l = k, ls = k + * n = 3*k+1 => l = k+1, ls = k-1 + * n = 3*k+2 => l = k+1, ls = k + */ + { + mp_limb_t m; + + ASSERT (n >= TOOM3_MUL_THRESHOLD); + l = ls = n / 3; + m = n - l * 3; + if (m != 0) + ++l; + if (m == 1) + --ls; + + l2 = l * 2; + l3 = l * 3; + l4 = l * 4; + l5 = l * 5; + A = p; + B = ws; + C = p + l2; + D = ws + l2; + E = p + l4; + W = ws + l4; + } + + /** First stage: evaluation at points 0, 1/2, 1, 2, oo. **/ + evaluate3 (A, B, C, &cB, &cC, &cD, a, a + l, a + l2, l, ls); + evaluate3 (A + l, B + l, C + l, &dB, &dC, &dD, b, b + l, b + l2, l, ls); + + /** Second stage: pointwise multiplies. **/ + TOOM3_MUL_REC(D, C, C + l, l, W); + tD = cD*dD; + if (cD) tD += mpn_addmul_1 (D + l, C + l, l, cD); + if (dD) tD += mpn_addmul_1 (D + l, C, l, dD); + ASSERT (tD < 49); + TOOM3_MUL_REC(C, B, B + l, l, W); + tC = cC*dC; + /* TO DO: choose one of the following alternatives. */ +#if 0 + if (cC) tC += mpn_addmul_1 (C + l, B + l, l, cC); + if (dC) tC += mpn_addmul_1 (C + l, B, l, dC); +#else + if (cC) + { + if (cC == 1) tC += mpn_add_n (C + l, C + l, B + l, l); + else tC += add2Times (C + l, C + l, B + l, l); + } + if (dC) + { + if (dC == 1) tC += mpn_add_n (C + l, C + l, B, l); + else tC += add2Times (C + l, C + l, B, l); + } +#endif + ASSERT (tC < 9); + TOOM3_MUL_REC(B, A, A + l, l, W); + tB = cB*dB; + if (cB) tB += mpn_addmul_1 (B + l, A + l, l, cB); + if (dB) tB += mpn_addmul_1 (B + l, A, l, dB); + ASSERT (tB < 49); + TOOM3_MUL_REC(A, a, b, l, W); + TOOM3_MUL_REC(E, a + l2, b + l2, ls, W); + + /** Third stage: interpolation. **/ + interpolate3 (A, B, C, D, E, &tB, &tC, &tD, l2, ls << 1); + + /** Final stage: add up the coefficients. **/ + { + mp_limb_t i, x, y; + tB += mpn_add_n (p + l, p + l, B, l2); + tD += mpn_add_n (p + l3, p + l3, D, l2); + mpn_incr_u (p + l3, tB); + mpn_incr_u (p + l4, tC); + mpn_incr_u (p + l5, tD); + } +} + +/*-- mpn_toom3_sqr_n --------------------------------------------------------------*/ + +/* Like previous function but for squaring */ + +#define TOOM3_SQR_REC(p, a, n, ws) \ + do { \ + if (n < KARATSUBA_SQR_THRESHOLD) \ + mpn_sqr_basecase (p, a, n); \ + else if (n < TOOM3_SQR_THRESHOLD) \ + mpn_kara_sqr_n (p, a, n, ws); \ + else \ + mpn_toom3_sqr_n (p, a, n, ws); \ + } while (0) + +void +#if __STDC__ +mpn_toom3_sqr_n (mp_ptr p, mp_srcptr a, mp_size_t n, mp_ptr ws) +#else +mpn_toom3_sqr_n (p, a, n, ws) + mp_ptr p; + mp_srcptr a; + mp_size_t n; + mp_ptr ws; +#endif +{ + mp_limb_t cB,cC,cD, tB,tC,tD; + mp_limb_t *A,*B,*C,*D,*E, *W; + mp_size_t l,l2,l3,l4,l5,ls; + + /* Break n words into chunks of size l, l and ls. + * n = 3*k => l = k, ls = k + * n = 3*k+1 => l = k+1, ls = k-1 + * n = 3*k+2 => l = k+1, ls = k + */ + { + mp_limb_t m; + + ASSERT (n >= TOOM3_MUL_THRESHOLD); + l = ls = n / 3; + m = n - l * 3; + if (m != 0) + ++l; + if (m == 1) + --ls; + + l2 = l * 2; + l3 = l * 3; + l4 = l * 4; + l5 = l * 5; + A = p; + B = ws; + C = p + l2; + D = ws + l2; + E = p + l4; + W = ws + l4; + } + + /** First stage: evaluation at points 0, 1/2, 1, 2, oo. **/ + evaluate3 (A, B, C, &cB, &cC, &cD, a, a + l, a + l2, l, ls); + + /** Second stage: pointwise multiplies. **/ + TOOM3_SQR_REC(D, C, l, W); + tD = cD*cD; + if (cD) tD += mpn_addmul_1 (D + l, C, l, 2*cD); + ASSERT (tD < 49); + TOOM3_SQR_REC(C, B, l, W); + tC = cC*cC; + /* TO DO: choose one of the following alternatives. */ +#if 0 + if (cC) tC += mpn_addmul_1 (C + l, B, l, 2*cC); +#else + if (cC >= 1) + { + tC += add2Times (C + l, C + l, B, l); + if (cC == 2) + tC += add2Times (C + l, C + l, B, l); + } +#endif + ASSERT (tC < 9); + TOOM3_SQR_REC(B, A, l, W); + tB = cB*cB; + if (cB) tB += mpn_addmul_1 (B + l, A, l, 2*cB); + ASSERT (tB < 49); + TOOM3_SQR_REC(A, a, l, W); + TOOM3_SQR_REC(E, a + l2, ls, W); + + /** Third stage: interpolation. **/ + interpolate3 (A, B, C, D, E, &tB, &tC, &tD, l2, ls << 1); + + /** Final stage: add up the coefficients. **/ + { + mp_limb_t i, x, y; + tB += mpn_add_n (p + l, p + l, B, l2); + tD += mpn_add_n (p + l3, p + l3, D, l2); + mpn_incr_u (p + l3, tB); + mpn_incr_u (p + l4, tC); + mpn_incr_u (p + l5, tD); + } +} + +void +#if __STDC__ +mpn_mul_n (mp_ptr p, mp_srcptr a, mp_srcptr b, mp_size_t n) +#else +mpn_mul_n (p, a, b, n) + mp_ptr p; + mp_srcptr a; + mp_srcptr b; + mp_size_t n; +#endif +{ + if (n < KARATSUBA_MUL_THRESHOLD) + mpn_mul_basecase (p, a, n, b, n); + else if (n < TOOM3_MUL_THRESHOLD) + { + /* Allocate workspace of fixed size on stack: fast! */ +#if TUNE_PROGRAM_BUILD + mp_limb_t ws[2 * (TOOM3_MUL_THRESHOLD_LIMIT-1) + 2 * BITS_PER_MP_LIMB]; +#else + mp_limb_t ws[2 * (TOOM3_MUL_THRESHOLD-1) + 2 * BITS_PER_MP_LIMB]; +#endif + mpn_kara_mul_n (p, a, b, n, ws); + } +#if WANT_FFT || TUNE_PROGRAM_BUILD + else if (n < FFT_MUL_THRESHOLD) +#else + else +#endif + { + /* Use workspace of unknown size in heap, as stack space may + * be limited. Since n is at least TOOM3_MUL_THRESHOLD, the + * multiplication will take much longer than malloc()/free(). */ + mp_limb_t wsLen, *ws; + wsLen = 2 * n + 3 * BITS_PER_MP_LIMB; + ws = (mp_ptr) (*_mp_allocate_func) ((size_t) wsLen * sizeof (mp_limb_t)); + mpn_toom3_mul_n (p, a, b, n, ws); + (*_mp_free_func) (ws, (size_t) wsLen * sizeof (mp_limb_t)); + } +#if WANT_FFT || TUNE_PROGRAM_BUILD + else + { + mpn_mul_fft_full (p, a, n, b, n); + } +#endif +} diff --git a/rts/gmp/mpn/generic/perfsqr.c b/rts/gmp/mpn/generic/perfsqr.c new file mode 100644 index 0000000000..42ee3405d7 --- /dev/null +++ b/rts/gmp/mpn/generic/perfsqr.c @@ -0,0 +1,123 @@ +/* mpn_perfect_square_p(u,usize) -- Return non-zero if U is a perfect square, + zero otherwise. + +Copyright (C) 1991, 1993, 1994, 1996, 1997, 2000 Free Software Foundation, +Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include <stdio.h> /* for NULL */ +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + + +/* sq_res_0x100[x mod 0x100] == 1 iff x mod 0x100 is a quadratic residue + modulo 0x100. */ +static unsigned char const sq_res_0x100[0x100] = +{ + 1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, + 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, + 1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, + 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, + 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, + 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, + 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, + 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, +}; + +int +#if __STDC__ +mpn_perfect_square_p (mp_srcptr up, mp_size_t usize) +#else +mpn_perfect_square_p (up, usize) + mp_srcptr up; + mp_size_t usize; +#endif +{ + mp_limb_t rem; + mp_ptr root_ptr; + int res; + TMP_DECL (marker); + + /* The first test excludes 55/64 (85.9%) of the perfect square candidates + in O(1) time. */ + if ((sq_res_0x100[(unsigned int) up[0] % 0x100] & 1) == 0) + return 0; + +#if defined (PP) + /* The second test excludes 30652543/30808063 (99.5%) of the remaining + perfect square candidates in O(n) time. */ + + /* Firstly, compute REM = A mod PP. */ + if (UDIV_TIME > (2 * UMUL_TIME + 6)) + rem = mpn_preinv_mod_1 (up, usize, (mp_limb_t) PP, (mp_limb_t) PP_INVERTED); + else + rem = mpn_mod_1 (up, usize, (mp_limb_t) PP); + + /* Now decide if REM is a quadratic residue modulo the factors in PP. */ + + /* If A is just a few limbs, computing the square root does not take long + time, so things might run faster if we limit this loop according to the + size of A. */ + +#if BITS_PER_MP_LIMB == 64 + if (((CNST_LIMB(0x12DD703303AED3) >> rem % 53) & 1) == 0) + return 0; + if (((CNST_LIMB(0x4351B2753DF) >> rem % 47) & 1) == 0) + return 0; + if (((CNST_LIMB(0x35883A3EE53) >> rem % 43) & 1) == 0) + return 0; + if (((CNST_LIMB(0x1B382B50737) >> rem % 41) & 1) == 0) + return 0; + if (((CNST_LIMB(0x165E211E9B) >> rem % 37) & 1) == 0) + return 0; + if (((CNST_LIMB(0x121D47B7) >> rem % 31) & 1) == 0) + return 0; +#endif + if (((0x13D122F3L >> rem % 29) & 1) == 0) + return 0; + if (((0x5335FL >> rem % 23) & 1) == 0) + return 0; + if (((0x30AF3L >> rem % 19) & 1) == 0) + return 0; + if (((0x1A317L >> rem % 17) & 1) == 0) + return 0; + if (((0x161BL >> rem % 13) & 1) == 0) + return 0; + if (((0x23BL >> rem % 11) & 1) == 0) + return 0; + if (((0x017L >> rem % 7) & 1) == 0) + return 0; + if (((0x13L >> rem % 5) & 1) == 0) + return 0; + if (((0x3L >> rem % 3) & 1) == 0) + return 0; +#endif + + TMP_MARK (marker); + + /* For the third and last test, we finally compute the square root, + to make sure we've really got a perfect square. */ + root_ptr = (mp_ptr) TMP_ALLOC ((usize + 1) / 2 * BYTES_PER_MP_LIMB); + + /* Iff mpn_sqrtrem returns zero, the square is perfect. */ + res = ! mpn_sqrtrem (root_ptr, NULL, up, usize); + TMP_FREE (marker); + return res; +} diff --git a/rts/gmp/mpn/generic/popcount.c b/rts/gmp/mpn/generic/popcount.c new file mode 100644 index 0000000000..387be9536d --- /dev/null +++ b/rts/gmp/mpn/generic/popcount.c @@ -0,0 +1,93 @@ +/* popcount.c + +Copyright (C) 1994, 1996, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +#if defined __GNUC__ +/* No processor claiming to be SPARC v9 compliant seem to + implement the POPC instruction. Disable pattern for now. */ +#if 0 && defined __sparc_v9__ && BITS_PER_MP_LIMB == 64 +#define popc_limb(a) \ + ({ \ + DItype __res; \ + asm ("popc %1,%0" : "=r" (__res) : "rI" (a)); \ + __res; \ + }) +#endif +#endif + +#ifndef popc_limb + +/* Cool population count of a mp_limb_t. + You have to figure out how this works, I won't tell you! */ + +static inline unsigned int +#if __STDC__ +popc_limb (mp_limb_t x) +#else +popc_limb (x) + mp_limb_t x; +#endif +{ +#if BITS_PER_MP_LIMB == 64 + /* We have to go into some trouble to define these constants. + (For mp_limb_t being `long long'.) */ + mp_limb_t cnst; + cnst = 0xaaaaaaaaL | ((mp_limb_t) 0xaaaaaaaaL << BITS_PER_MP_LIMB/2); + x -= (x & cnst) >> 1; + cnst = 0x33333333L | ((mp_limb_t) 0x33333333L << BITS_PER_MP_LIMB/2); + x = ((x & ~cnst) >> 2) + (x & cnst); + cnst = 0x0f0f0f0fL | ((mp_limb_t) 0x0f0f0f0fL << BITS_PER_MP_LIMB/2); + x = ((x >> 4) + x) & cnst; + x = ((x >> 8) + x); + x = ((x >> 16) + x); + x = ((x >> 32) + x) & 0xff; +#endif +#if BITS_PER_MP_LIMB == 32 + x -= (x & 0xaaaaaaaa) >> 1; + x = ((x >> 2) & 0x33333333L) + (x & 0x33333333L); + x = ((x >> 4) + x) & 0x0f0f0f0fL; + x = ((x >> 8) + x); + x = ((x >> 16) + x) & 0xff; +#endif + return x; +} +#endif + +unsigned long int +#if __STDC__ +mpn_popcount (register mp_srcptr p, register mp_size_t size) +#else +mpn_popcount (p, size) + register mp_srcptr p; + register mp_size_t size; +#endif +{ + unsigned long int popcnt; + mp_size_t i; + + popcnt = 0; + for (i = 0; i < size; i++) + popcnt += popc_limb (p[i]); + + return popcnt; +} diff --git a/rts/gmp/mpn/generic/pre_mod_1.c b/rts/gmp/mpn/generic/pre_mod_1.c new file mode 100644 index 0000000000..27179683b3 --- /dev/null +++ b/rts/gmp/mpn/generic/pre_mod_1.c @@ -0,0 +1,69 @@ +/* mpn_preinv_mod_1 (dividend_ptr, dividend_size, divisor_limb, + divisor_limb_inverted) -- + Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by the normalized DIVISOR_LIMB. + DIVISOR_LIMB_INVERTED should be 2^(2*BITS_PER_MP_LIMB) / DIVISOR_LIMB + + - 2^BITS_PER_MP_LIMB. + Return the single-limb remainder. + +Copyright (C) 1991, 1993, 1994, Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#ifndef UMUL_TIME +#define UMUL_TIME 1 +#endif + +#ifndef UDIV_TIME +#define UDIV_TIME UMUL_TIME +#endif + +mp_limb_t +#if __STDC__ +mpn_preinv_mod_1 (mp_srcptr dividend_ptr, mp_size_t dividend_size, + mp_limb_t divisor_limb, mp_limb_t divisor_limb_inverted) +#else +mpn_preinv_mod_1 (dividend_ptr, dividend_size, divisor_limb, divisor_limb_inverted) + mp_srcptr dividend_ptr; + mp_size_t dividend_size; + mp_limb_t divisor_limb; + mp_limb_t divisor_limb_inverted; +#endif +{ + mp_size_t i; + mp_limb_t n0, r; + int dummy; + + i = dividend_size - 1; + r = dividend_ptr[i]; + + if (r >= divisor_limb) + r = 0; + else + i--; + + for (; i >= 0; i--) + { + n0 = dividend_ptr[i]; + udiv_qrnnd_preinv (dummy, r, r, n0, divisor_limb, divisor_limb_inverted); + } + return r; +} diff --git a/rts/gmp/mpn/generic/random.c b/rts/gmp/mpn/generic/random.c new file mode 100644 index 0000000000..dea4e20e56 --- /dev/null +++ b/rts/gmp/mpn/generic/random.c @@ -0,0 +1,43 @@ +/* mpn_random -- Generate random numbers. + +Copyright (C) 1996, 1997, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "urandom.h" + +void +#if __STDC__ +mpn_random (mp_ptr res_ptr, mp_size_t size) +#else +mpn_random (res_ptr, size) + mp_ptr res_ptr; + mp_size_t size; +#endif +{ + mp_size_t i; + + for (i = 0; i < size; i++) + res_ptr[i] = urandom (); + + /* Make sure the most significant limb is non-zero. */ + while (res_ptr[size - 1] == 0) + res_ptr[size - 1] = urandom (); +} diff --git a/rts/gmp/mpn/generic/random2.c b/rts/gmp/mpn/generic/random2.c new file mode 100644 index 0000000000..86682f81fa --- /dev/null +++ b/rts/gmp/mpn/generic/random2.c @@ -0,0 +1,105 @@ +/* mpn_random2 -- Generate random numbers with relatively long strings + of ones and zeroes. Suitable for border testing. + +Copyright (C) 1992, 1993, 1994, 1996, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +#if defined (__hpux) || defined (__alpha) || defined (__svr4__) || defined (__SVR4) +/* HPUX lacks random(). DEC OSF/1 1.2 random() returns a double. */ +long mrand48 (); +static inline long +random () +{ + return mrand48 (); +} +#elif defined(_WIN32) && !(defined(__CYGWIN__) || defined(__CYGWIN32__)) +/* MS CRT supplies just the poxy rand(), with an upper bound of 0x7fff */ +static inline unsigned long +random () +{ + return rand () ^ (rand () << 16) ^ (rand() << 32); +} + +#else +long random (); +#endif + +/* It's a bit tricky to get this right, so please test the code well + if you hack with it. Some early versions of the function produced + random numbers with the leading limb == 0, and some versions never + made the most significant bit set. */ + +void +#if __STDC__ +mpn_random2 (mp_ptr res_ptr, mp_size_t size) +#else +mpn_random2 (res_ptr, size) + mp_ptr res_ptr; + mp_size_t size; +#endif +{ + int n_bits; + int bit_pos; + mp_size_t limb_pos; + unsigned int ran; + mp_limb_t limb; + + limb = 0; + + /* Start off in a random bit position in the most significant limb. */ + bit_pos = random () & (BITS_PER_MP_LIMB - 1); + + /* Least significant bit of RAN chooses string of ones/string of zeroes. + Make most significant limb be non-zero by setting bit 0 of RAN. */ + ran = random () | 1; + + for (limb_pos = size - 1; limb_pos >= 0; ) + { + n_bits = (ran >> 1) % BITS_PER_MP_LIMB + 1; + if ((ran & 1) != 0) + { + /* Generate a string of ones. */ + if (n_bits >= bit_pos) + { + res_ptr[limb_pos--] = limb | ((((mp_limb_t) 2) << bit_pos) - 1); + bit_pos += BITS_PER_MP_LIMB; + limb = (~(mp_limb_t) 0) << (bit_pos - n_bits); + } + else + { + limb |= ((((mp_limb_t) 1) << n_bits) - 1) << (bit_pos - n_bits + 1); + } + } + else + { + /* Generate a string of zeroes. */ + if (n_bits >= bit_pos) + { + res_ptr[limb_pos--] = limb; + limb = 0; + bit_pos += BITS_PER_MP_LIMB; + } + } + bit_pos -= n_bits; + ran = random (); + } +} diff --git a/rts/gmp/mpn/generic/rshift.c b/rts/gmp/mpn/generic/rshift.c new file mode 100644 index 0000000000..59caf73529 --- /dev/null +++ b/rts/gmp/mpn/generic/rshift.c @@ -0,0 +1,88 @@ +/* mpn_rshift -- Shift right a low-level natural-number integer. + +Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +/* Shift U (pointed to by UP and USIZE limbs long) CNT bits to the right + and store the USIZE least significant limbs of the result at WP. + The bits shifted out to the right are returned. + + Argument constraints: + 1. 0 < CNT < BITS_PER_MP_LIMB + 2. If the result is to be written over the input, WP must be <= UP. +*/ + +mp_limb_t +#if __STDC__ +mpn_rshift (register mp_ptr wp, + register mp_srcptr up, mp_size_t usize, + register unsigned int cnt) +#else +mpn_rshift (wp, up, usize, cnt) + register mp_ptr wp; + register mp_srcptr up; + mp_size_t usize; + register unsigned int cnt; +#endif +{ + register mp_limb_t high_limb, low_limb; + register unsigned sh_1, sh_2; + register mp_size_t i; + mp_limb_t retval; + +#ifdef DEBUG + if (usize == 0 || cnt == 0) + abort (); +#endif + + sh_1 = cnt; + +#if 0 + if (sh_1 == 0) + { + if (wp != up) + { + /* Copy from low end to high end, to allow specified input/output + overlapping. */ + for (i = 0; i < usize; i++) + wp[i] = up[i]; + } + return usize; + } +#endif + + wp -= 1; + sh_2 = BITS_PER_MP_LIMB - sh_1; + high_limb = up[0]; + retval = high_limb << sh_2; + low_limb = high_limb; + + for (i = 1; i < usize; i++) + { + high_limb = up[i]; + wp[i] = (low_limb >> sh_1) | (high_limb << sh_2); + low_limb = high_limb; + } + wp[i] = low_limb >> sh_1; + + return retval; +} diff --git a/rts/gmp/mpn/generic/sb_divrem_mn.c b/rts/gmp/mpn/generic/sb_divrem_mn.c new file mode 100644 index 0000000000..a269e34f5f --- /dev/null +++ b/rts/gmp/mpn/generic/sb_divrem_mn.c @@ -0,0 +1,201 @@ +/* mpn_sb_divrem_mn -- Divide natural numbers, producing both remainder and + quotient. + + THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE + INTERFACES. IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. + IN FACT, IT IS ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A + FUTURE GNU MP RELEASE. + + +Copyright (C) 1993, 1994, 1995, 1996, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +/* Divide num (NP/NSIZE) by den (DP/DSIZE) and write + the NSIZE-DSIZE least significant quotient limbs at QP + and the DSIZE long remainder at NP. If QEXTRA_LIMBS is + non-zero, generate that many fraction bits and append them after the + other quotient limbs. + Return the most significant limb of the quotient, this is always 0 or 1. + + Preconditions: + 0. NSIZE >= DSIZE. + 1. The most significant bit of the divisor must be set. + 2. QP must either not overlap with the input operands at all, or + QP + DSIZE >= NP must hold true. (This means that it's + possible to put the quotient in the high part of NUM, right after the + remainder in NUM. + 3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero. + 4. DSIZE >= 2. */ + + +#define PREINVERT_VIABLE \ + (UDIV_TIME > 2 * UMUL_TIME + 6 /* && ! TARGET_REGISTER_STARVED */) + +mp_limb_t +#if __STDC__ +mpn_sb_divrem_mn (mp_ptr qp, + mp_ptr np, mp_size_t nsize, + mp_srcptr dp, mp_size_t dsize) +#else +mpn_sb_divrem_mn (qp, np, nsize, dp, dsize) + mp_ptr qp; + mp_ptr np; + mp_size_t nsize; + mp_srcptr dp; + mp_size_t dsize; +#endif +{ + mp_limb_t most_significant_q_limb = 0; + mp_size_t i; + mp_limb_t dx, d1, n0; + mp_limb_t dxinv; + int have_preinv; + + ASSERT_ALWAYS (dsize > 2); + + np += nsize - dsize; + dx = dp[dsize - 1]; + d1 = dp[dsize - 2]; + n0 = np[dsize - 1]; + + if (n0 >= dx) + { + if (n0 > dx || mpn_cmp (np, dp, dsize - 1) >= 0) + { + mpn_sub_n (np, np, dp, dsize); + most_significant_q_limb = 1; + } + } + + /* If multiplication is much faster than division, preinvert the + most significant divisor limb before entering the loop. */ + if (PREINVERT_VIABLE) + { + have_preinv = 0; + if ((UDIV_TIME - (2 * UMUL_TIME + 6)) * (nsize - dsize) > UDIV_TIME) + { + invert_limb (dxinv, dx); + have_preinv = 1; + } + } + + for (i = nsize - dsize - 1; i >= 0; i--) + { + mp_limb_t q; + mp_limb_t nx; + mp_limb_t cy_limb; + + nx = np[dsize - 1]; + np--; + + if (nx == dx) + { + /* This might over-estimate q, but it's probably not worth + the extra code here to find out. */ + q = ~(mp_limb_t) 0; + +#if 1 + cy_limb = mpn_submul_1 (np, dp, dsize, q); +#else + /* This should be faster on many machines */ + cy_limb = mpn_sub_n (np + 1, np + 1, dp, dsize); + cy = mpn_add_n (np, np, dp, dsize); + np[dsize] += cy; +#endif + + if (nx != cy_limb) + { + mpn_add_n (np, np, dp, dsize); + q--; + } + + qp[i] = q; + } + else + { + mp_limb_t rx, r1, r0, p1, p0; + + /* "workaround" avoids a problem with gcc 2.7.2.3 i386 register + usage when np[dsize-1] is used in an asm statement like + umul_ppmm in udiv_qrnnd_preinv. The symptom is seg faults due + to registers being clobbered. gcc 2.95 i386 doesn't have the + problem. */ + { + mp_limb_t workaround = np[dsize - 1]; + if (PREINVERT_VIABLE && have_preinv) + udiv_qrnnd_preinv (q, r1, nx, workaround, dx, dxinv); + else + udiv_qrnnd (q, r1, nx, workaround, dx); + } + umul_ppmm (p1, p0, d1, q); + + r0 = np[dsize - 2]; + rx = 0; + if (r1 < p1 || (r1 == p1 && r0 < p0)) + { + p1 -= p0 < d1; + p0 -= d1; + q--; + r1 += dx; + rx = r1 < dx; + } + + p1 += r0 < p0; /* cannot carry! */ + rx -= r1 < p1; /* may become 11..1 if q is still too large */ + r1 -= p1; + r0 -= p0; + + cy_limb = mpn_submul_1 (np, dp, dsize - 2, q); + + { + mp_limb_t cy1, cy2; + cy1 = r0 < cy_limb; + r0 -= cy_limb; + cy2 = r1 < cy1; + r1 -= cy1; + np[dsize - 1] = r1; + np[dsize - 2] = r0; + if (cy2 != rx) + { + mpn_add_n (np, np, dp, dsize); + q--; + } + } + qp[i] = q; + } + } + + /* ______ ______ ______ + |__rx__|__r1__|__r0__| partial remainder + ______ ______ + - |__p1__|__p0__| partial product to subtract + ______ ______ + - |______|cylimb| + + rx is -1, 0 or 1. If rx=1, then q is correct (it should match + carry out). If rx=-1 then q is too large. If rx=0, then q might + be too large, but it is most likely correct. + */ + + return most_significant_q_limb; +} diff --git a/rts/gmp/mpn/generic/scan0.c b/rts/gmp/mpn/generic/scan0.c new file mode 100644 index 0000000000..96f05ce854 --- /dev/null +++ b/rts/gmp/mpn/generic/scan0.c @@ -0,0 +1,62 @@ +/* mpn_scan0 -- Scan from a given bit position for the next clear bit. + +Copyright (C) 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +/* Design issues: + 1. What if starting_bit is not within U? Caller's problem? + 2. Bit index should be 'unsigned'? + + Argument constraints: + 1. U must sooner ot later have a limb with a clear bit. + */ + +unsigned long int +#if __STDC__ +mpn_scan0 (register mp_srcptr up, + register unsigned long int starting_bit) +#else +mpn_scan0 (up, starting_bit) + register mp_srcptr up; + register unsigned long int starting_bit; +#endif +{ + mp_size_t starting_word; + mp_limb_t alimb; + int cnt; + mp_srcptr p; + + /* Start at the word implied by STARTING_BIT. */ + starting_word = starting_bit / BITS_PER_MP_LIMB; + p = up + starting_word; + alimb = ~*p++; + + /* Mask off any bits before STARTING_BIT in the first limb. */ + alimb &= - (mp_limb_t) 1 << (starting_bit % BITS_PER_MP_LIMB); + + while (alimb == 0) + alimb = ~*p++; + + count_leading_zeros (cnt, alimb & -alimb); + return (p - up) * BITS_PER_MP_LIMB - 1 - cnt; +} diff --git a/rts/gmp/mpn/generic/scan1.c b/rts/gmp/mpn/generic/scan1.c new file mode 100644 index 0000000000..98e2e0dcc0 --- /dev/null +++ b/rts/gmp/mpn/generic/scan1.c @@ -0,0 +1,62 @@ +/* mpn_scan1 -- Scan from a given bit position for the next set bit. + +Copyright (C) 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +/* Design issues: + 1. What if starting_bit is not within U? Caller's problem? + 2. Bit index should be 'unsigned'? + + Argument constraints: + 1. U must sooner ot later have a limb != 0. + */ + +unsigned long int +#if __STDC__ +mpn_scan1 (register mp_srcptr up, + register unsigned long int starting_bit) +#else +mpn_scan1 (up, starting_bit) + register mp_srcptr up; + register unsigned long int starting_bit; +#endif +{ + mp_size_t starting_word; + mp_limb_t alimb; + int cnt; + mp_srcptr p; + + /* Start at the word implied by STARTING_BIT. */ + starting_word = starting_bit / BITS_PER_MP_LIMB; + p = up + starting_word; + alimb = *p++; + + /* Mask off any bits before STARTING_BIT in the first limb. */ + alimb &= - (mp_limb_t) 1 << (starting_bit % BITS_PER_MP_LIMB); + + while (alimb == 0) + alimb = *p++; + + count_leading_zeros (cnt, alimb & -alimb); + return (p - up) * BITS_PER_MP_LIMB - 1 - cnt; +} diff --git a/rts/gmp/mpn/generic/set_str.c b/rts/gmp/mpn/generic/set_str.c new file mode 100644 index 0000000000..e6ccc92154 --- /dev/null +++ b/rts/gmp/mpn/generic/set_str.c @@ -0,0 +1,159 @@ +/* mpn_set_str (mp_ptr res_ptr, const char *str, size_t str_len, int base) + -- Convert a STR_LEN long base BASE byte string pointed to by STR to a + limb vector pointed to by RES_PTR. Return the number of limbs in + RES_PTR. + +Copyright (C) 1991, 1992, 1993, 1994, 1996, 2000 Free Software Foundation, +Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +mp_size_t +#if __STDC__ +mpn_set_str (mp_ptr xp, const unsigned char *str, size_t str_len, int base) +#else +mpn_set_str (xp, str, str_len, base) + mp_ptr xp; + const unsigned char *str; + size_t str_len; + int base; +#endif +{ + mp_size_t size; + mp_limb_t big_base; + int indigits_per_limb; + mp_limb_t res_digit; + + big_base = __mp_bases[base].big_base; + indigits_per_limb = __mp_bases[base].chars_per_limb; + +/* size = str_len / indigits_per_limb + 1; */ + + size = 0; + + if ((base & (base - 1)) == 0) + { + /* The base is a power of 2. Read the input string from + least to most significant character/digit. */ + + const unsigned char *s; + int next_bitpos; + int bits_per_indigit = big_base; + + res_digit = 0; + next_bitpos = 0; + + for (s = str + str_len - 1; s >= str; s--) + { + int inp_digit = *s; + + res_digit |= (mp_limb_t) inp_digit << next_bitpos; + next_bitpos += bits_per_indigit; + if (next_bitpos >= BITS_PER_MP_LIMB) + { + xp[size++] = res_digit; + next_bitpos -= BITS_PER_MP_LIMB; + res_digit = inp_digit >> (bits_per_indigit - next_bitpos); + } + } + + if (res_digit != 0) + xp[size++] = res_digit; + } + else + { + /* General case. The base is not a power of 2. */ + + size_t i; + int j; + mp_limb_t cy_limb; + + for (i = indigits_per_limb; i < str_len; i += indigits_per_limb) + { + res_digit = *str++; + if (base == 10) + { /* This is a common case. + Help the compiler to avoid multiplication. */ + for (j = 1; j < indigits_per_limb; j++) + res_digit = res_digit * 10 + *str++; + } + else + { + for (j = 1; j < indigits_per_limb; j++) + res_digit = res_digit * base + *str++; + } + + if (size == 0) + { + if (res_digit != 0) + { + xp[0] = res_digit; + size = 1; + } + } + else + { + cy_limb = mpn_mul_1 (xp, xp, size, big_base); + cy_limb += mpn_add_1 (xp, xp, size, res_digit); + if (cy_limb != 0) + xp[size++] = cy_limb; + } + } + + big_base = base; + res_digit = *str++; + if (base == 10) + { /* This is a common case. + Help the compiler to avoid multiplication. */ + for (j = 1; j < str_len - (i - indigits_per_limb); j++) + { + res_digit = res_digit * 10 + *str++; + big_base *= 10; + } + } + else + { + for (j = 1; j < str_len - (i - indigits_per_limb); j++) + { + res_digit = res_digit * base + *str++; + big_base *= base; + } + } + + if (size == 0) + { + if (res_digit != 0) + { + xp[0] = res_digit; + size = 1; + } + } + else + { + cy_limb = mpn_mul_1 (xp, xp, size, big_base); + cy_limb += mpn_add_1 (xp, xp, size, res_digit); + if (cy_limb != 0) + xp[size++] = cy_limb; + } + } + + return size; +} diff --git a/rts/gmp/mpn/generic/sqr_basecase.c b/rts/gmp/mpn/generic/sqr_basecase.c new file mode 100644 index 0000000000..760258a3e0 --- /dev/null +++ b/rts/gmp/mpn/generic/sqr_basecase.c @@ -0,0 +1,83 @@ +/* mpn_sqr_basecase -- Internal routine to square two natural numbers + of length m and n. + + THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY + SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES. + + +Copyright (C) 1991, 1992, 1993, 1994, 1996, 1997, 2000 Free Software +Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +void +#if __STDC__ +mpn_sqr_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t n) +#else +mpn_sqr_basecase (prodp, up, n) + mp_ptr prodp; + mp_srcptr up; + mp_size_t n; +#endif +{ + mp_size_t i; + + { + /* N.B.! We need the superfluous indirection through argh to work around + a reloader bug in GCC 2.7.*. */ + mp_limb_t x; + mp_limb_t argh; + x = up[0]; + umul_ppmm (argh, prodp[0], x, x); + prodp[1] = argh; + } + if (n > 1) + { + mp_limb_t tarr[2 * KARATSUBA_SQR_THRESHOLD]; + mp_ptr tp = tarr; + mp_limb_t cy; + + /* must fit 2*n limbs in tarr */ + ASSERT (n <= KARATSUBA_SQR_THRESHOLD); + + cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]); + tp[n - 1] = cy; + for (i = 2; i < n; i++) + { + mp_limb_t cy; + cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]); + tp[n + i - 2] = cy; + } + for (i = 1; i < n; i++) + { + mp_limb_t x; + x = up[i]; + umul_ppmm (prodp[2 * i + 1], prodp[2 * i], x, x); + } + { + mp_limb_t cy; + cy = mpn_lshift (tp, tp, 2 * n - 2, 1); + cy += mpn_add_n (prodp + 1, prodp + 1, tp, 2 * n - 2); + prodp[2 * n - 1] += cy; + } + } +} diff --git a/rts/gmp/mpn/generic/sqrtrem.c b/rts/gmp/mpn/generic/sqrtrem.c new file mode 100644 index 0000000000..ee3b5144dd --- /dev/null +++ b/rts/gmp/mpn/generic/sqrtrem.c @@ -0,0 +1,509 @@ +/* mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size) + + Write the square root of {OP_PTR, OP_SIZE} at ROOT_PTR. + Write the remainder at REM_PTR, if REM_PTR != NULL. + Return the size of the remainder. + (The size of the root is always half of the size of the operand.) + + OP_PTR and ROOT_PTR may not point to the same object. + OP_PTR and REM_PTR may point to the same object. + + If REM_PTR is NULL, only the root is computed and the return value of + the function is 0 if OP is a perfect square, and *any* non-zero number + otherwise. + +Copyright (C) 1993, 1994, 1996, 1997, 1998, 1999, 2000 Free Software +Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* This code is just correct if "unsigned char" has at least 8 bits. It + doesn't help to use CHAR_BIT from limits.h, as the real problem is + the static arrays. */ + +#include <stdio.h> /* for NULL */ +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +/* Square root algorithm: + + 1. Shift OP (the input) to the left an even number of bits s.t. there + are an even number of words and either (or both) of the most + significant bits are set. This way, sqrt(OP) has exactly half as + many words as OP, and has its most significant bit set. + + 2. Get a 9-bit approximation to sqrt(OP) using the pre-computed tables. + This approximation is used for the first single-precision + iterations of Newton's method, yielding a full-word approximation + to sqrt(OP). + + 3. Perform multiple-precision Newton iteration until we have the + exact result. Only about half of the input operand is used in + this calculation, as the square root is perfectly determinable + from just the higher half of a number. */ + +/* Define this macro for IEEE P854 machines with a fast sqrt instruction. */ +#if defined __GNUC__ && ! defined __SOFT_FLOAT + +#if defined (__sparc__) && BITS_PER_MP_LIMB == 32 +#define SQRT(a) \ + ({ \ + double __sqrt_res; \ + asm ("fsqrtd %1,%0" : "=f" (__sqrt_res) : "f" (a)); \ + __sqrt_res; \ + }) +#endif + +#if defined (__HAVE_68881__) +#define SQRT(a) \ + ({ \ + double __sqrt_res; \ + asm ("fsqrtx %1,%0" : "=f" (__sqrt_res) : "f" (a)); \ + __sqrt_res; \ + }) +#endif + +#if defined (__hppa) && BITS_PER_MP_LIMB == 32 +#define SQRT(a) \ + ({ \ + double __sqrt_res; \ + asm ("fsqrt,dbl %1,%0" : "=fx" (__sqrt_res) : "fx" (a)); \ + __sqrt_res; \ + }) +#endif + +#if defined (_ARCH_PWR2) && BITS_PER_MP_LIMB == 32 +#define SQRT(a) \ + ({ \ + double __sqrt_res; \ + asm ("fsqrt %0,%1" : "=f" (__sqrt_res) : "f" (a)); \ + __sqrt_res; \ + }) +#endif + +#if 0 +#if defined (__i386__) || defined (__i486__) +#define SQRT(a) \ + ({ \ + double __sqrt_res; \ + asm ("fsqrt" : "=t" (__sqrt_res) : "0" (a)); \ + __sqrt_res; \ + }) +#endif +#endif + +#endif + +#ifndef SQRT + +/* Tables for initial approximation of the square root. These are + indexed with bits 1-8 of the operand for which the square root is + calculated, where bit 0 is the most significant non-zero bit. I.e. + the most significant one-bit is not used, since that per definition + is one. Likewise, the tables don't return the highest bit of the + result. That bit must be inserted by or:ing the returned value with + 0x100. This way, we get a 9-bit approximation from 8-bit tables! */ + +/* Table to be used for operands with an even total number of bits. + (Exactly as in the decimal system there are similarities between the + square root of numbers with the same initial digits and an even + difference in the total number of digits. Consider the square root + of 1, 10, 100, 1000, ...) */ +static const unsigned char even_approx_tab[256] = +{ + 0x6a, 0x6a, 0x6b, 0x6c, 0x6c, 0x6d, 0x6e, 0x6e, + 0x6f, 0x70, 0x71, 0x71, 0x72, 0x73, 0x73, 0x74, + 0x75, 0x75, 0x76, 0x77, 0x77, 0x78, 0x79, 0x79, + 0x7a, 0x7b, 0x7b, 0x7c, 0x7d, 0x7d, 0x7e, 0x7f, + 0x80, 0x80, 0x81, 0x81, 0x82, 0x83, 0x83, 0x84, + 0x85, 0x85, 0x86, 0x87, 0x87, 0x88, 0x89, 0x89, + 0x8a, 0x8b, 0x8b, 0x8c, 0x8d, 0x8d, 0x8e, 0x8f, + 0x8f, 0x90, 0x90, 0x91, 0x92, 0x92, 0x93, 0x94, + 0x94, 0x95, 0x96, 0x96, 0x97, 0x97, 0x98, 0x99, + 0x99, 0x9a, 0x9b, 0x9b, 0x9c, 0x9c, 0x9d, 0x9e, + 0x9e, 0x9f, 0xa0, 0xa0, 0xa1, 0xa1, 0xa2, 0xa3, + 0xa3, 0xa4, 0xa4, 0xa5, 0xa6, 0xa6, 0xa7, 0xa7, + 0xa8, 0xa9, 0xa9, 0xaa, 0xaa, 0xab, 0xac, 0xac, + 0xad, 0xad, 0xae, 0xaf, 0xaf, 0xb0, 0xb0, 0xb1, + 0xb2, 0xb2, 0xb3, 0xb3, 0xb4, 0xb5, 0xb5, 0xb6, + 0xb6, 0xb7, 0xb7, 0xb8, 0xb9, 0xb9, 0xba, 0xba, + 0xbb, 0xbb, 0xbc, 0xbd, 0xbd, 0xbe, 0xbe, 0xbf, + 0xc0, 0xc0, 0xc1, 0xc1, 0xc2, 0xc2, 0xc3, 0xc3, + 0xc4, 0xc5, 0xc5, 0xc6, 0xc6, 0xc7, 0xc7, 0xc8, + 0xc9, 0xc9, 0xca, 0xca, 0xcb, 0xcb, 0xcc, 0xcc, + 0xcd, 0xce, 0xce, 0xcf, 0xcf, 0xd0, 0xd0, 0xd1, + 0xd1, 0xd2, 0xd3, 0xd3, 0xd4, 0xd4, 0xd5, 0xd5, + 0xd6, 0xd6, 0xd7, 0xd7, 0xd8, 0xd9, 0xd9, 0xda, + 0xda, 0xdb, 0xdb, 0xdc, 0xdc, 0xdd, 0xdd, 0xde, + 0xde, 0xdf, 0xe0, 0xe0, 0xe1, 0xe1, 0xe2, 0xe2, + 0xe3, 0xe3, 0xe4, 0xe4, 0xe5, 0xe5, 0xe6, 0xe6, + 0xe7, 0xe7, 0xe8, 0xe8, 0xe9, 0xea, 0xea, 0xeb, + 0xeb, 0xec, 0xec, 0xed, 0xed, 0xee, 0xee, 0xef, + 0xef, 0xf0, 0xf0, 0xf1, 0xf1, 0xf2, 0xf2, 0xf3, + 0xf3, 0xf4, 0xf4, 0xf5, 0xf5, 0xf6, 0xf6, 0xf7, + 0xf7, 0xf8, 0xf8, 0xf9, 0xf9, 0xfa, 0xfa, 0xfb, + 0xfb, 0xfc, 0xfc, 0xfd, 0xfd, 0xfe, 0xfe, 0xff, +}; + +/* Table to be used for operands with an odd total number of bits. + (Further comments before previous table.) */ +static const unsigned char odd_approx_tab[256] = +{ + 0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, + 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, + 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, + 0x0b, 0x0c, 0x0c, 0x0d, 0x0d, 0x0e, 0x0e, 0x0f, + 0x0f, 0x10, 0x10, 0x10, 0x11, 0x11, 0x12, 0x12, + 0x13, 0x13, 0x14, 0x14, 0x15, 0x15, 0x16, 0x16, + 0x16, 0x17, 0x17, 0x18, 0x18, 0x19, 0x19, 0x1a, + 0x1a, 0x1b, 0x1b, 0x1b, 0x1c, 0x1c, 0x1d, 0x1d, + 0x1e, 0x1e, 0x1f, 0x1f, 0x20, 0x20, 0x20, 0x21, + 0x21, 0x22, 0x22, 0x23, 0x23, 0x23, 0x24, 0x24, + 0x25, 0x25, 0x26, 0x26, 0x27, 0x27, 0x27, 0x28, + 0x28, 0x29, 0x29, 0x2a, 0x2a, 0x2a, 0x2b, 0x2b, + 0x2c, 0x2c, 0x2d, 0x2d, 0x2d, 0x2e, 0x2e, 0x2f, + 0x2f, 0x30, 0x30, 0x30, 0x31, 0x31, 0x32, 0x32, + 0x32, 0x33, 0x33, 0x34, 0x34, 0x35, 0x35, 0x35, + 0x36, 0x36, 0x37, 0x37, 0x37, 0x38, 0x38, 0x39, + 0x39, 0x39, 0x3a, 0x3a, 0x3b, 0x3b, 0x3b, 0x3c, + 0x3c, 0x3d, 0x3d, 0x3d, 0x3e, 0x3e, 0x3f, 0x3f, + 0x40, 0x40, 0x40, 0x41, 0x41, 0x41, 0x42, 0x42, + 0x43, 0x43, 0x43, 0x44, 0x44, 0x45, 0x45, 0x45, + 0x46, 0x46, 0x47, 0x47, 0x47, 0x48, 0x48, 0x49, + 0x49, 0x49, 0x4a, 0x4a, 0x4b, 0x4b, 0x4b, 0x4c, + 0x4c, 0x4c, 0x4d, 0x4d, 0x4e, 0x4e, 0x4e, 0x4f, + 0x4f, 0x50, 0x50, 0x50, 0x51, 0x51, 0x51, 0x52, + 0x52, 0x53, 0x53, 0x53, 0x54, 0x54, 0x54, 0x55, + 0x55, 0x56, 0x56, 0x56, 0x57, 0x57, 0x57, 0x58, + 0x58, 0x59, 0x59, 0x59, 0x5a, 0x5a, 0x5a, 0x5b, + 0x5b, 0x5b, 0x5c, 0x5c, 0x5d, 0x5d, 0x5d, 0x5e, + 0x5e, 0x5e, 0x5f, 0x5f, 0x60, 0x60, 0x60, 0x61, + 0x61, 0x61, 0x62, 0x62, 0x62, 0x63, 0x63, 0x63, + 0x64, 0x64, 0x65, 0x65, 0x65, 0x66, 0x66, 0x66, + 0x67, 0x67, 0x67, 0x68, 0x68, 0x68, 0x69, 0x69, +}; +#endif + + +mp_size_t +#if __STDC__ +mpn_sqrtrem (mp_ptr root_ptr, mp_ptr rem_ptr, mp_srcptr op_ptr, mp_size_t op_size) +#else +mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size) + mp_ptr root_ptr; + mp_ptr rem_ptr; + mp_srcptr op_ptr; + mp_size_t op_size; +#endif +{ + /* R (root result) */ + mp_ptr rp; /* Pointer to least significant word */ + mp_size_t rsize; /* The size in words */ + + /* T (OP shifted to the left a.k.a. normalized) */ + mp_ptr tp; /* Pointer to least significant word */ + mp_size_t tsize; /* The size in words */ + mp_ptr t_end_ptr; /* Pointer right beyond most sign. word */ + mp_limb_t t_high0, t_high1; /* The two most significant words */ + + /* TT (temporary for numerator/remainder) */ + mp_ptr ttp; /* Pointer to least significant word */ + + /* X (temporary for quotient in main loop) */ + mp_ptr xp; /* Pointer to least significant word */ + mp_size_t xsize; /* The size in words */ + + unsigned cnt; + mp_limb_t initial_approx; /* Initially made approximation */ + mp_size_t tsizes[BITS_PER_MP_LIMB]; /* Successive calculation precisions */ + mp_size_t tmp; + mp_size_t i; + + mp_limb_t cy_limb; + TMP_DECL (marker); + + /* If OP is zero, both results are zero. */ + if (op_size == 0) + return 0; + + count_leading_zeros (cnt, op_ptr[op_size - 1]); + tsize = op_size; + if ((tsize & 1) != 0) + { + cnt += BITS_PER_MP_LIMB; + tsize++; + } + + rsize = tsize / 2; + rp = root_ptr; + + TMP_MARK (marker); + + /* Shift OP an even number of bits into T, such that either the most or + the second most significant bit is set, and such that the number of + words in T becomes even. This way, the number of words in R=sqrt(OP) + is exactly half as many as in OP, and the most significant bit of R + is set. + + Also, the initial approximation is simplified by this up-shifted OP. + + Finally, the Newtonian iteration which is the main part of this + program performs division by R. The fast division routine expects + the divisor to be "normalized" in exactly the sense of having the + most significant bit set. */ + + tp = (mp_ptr) TMP_ALLOC (tsize * BYTES_PER_MP_LIMB); + + if ((cnt & ~1) % BITS_PER_MP_LIMB != 0) + t_high0 = mpn_lshift (tp + cnt / BITS_PER_MP_LIMB, op_ptr, op_size, + (cnt & ~1) % BITS_PER_MP_LIMB); + else + MPN_COPY (tp + cnt / BITS_PER_MP_LIMB, op_ptr, op_size); + + if (cnt >= BITS_PER_MP_LIMB) + tp[0] = 0; + + t_high0 = tp[tsize - 1]; + t_high1 = tp[tsize - 2]; /* Never stray. TSIZE is >= 2. */ + +/* Is there a fast sqrt instruction defined for this machine? */ +#ifdef SQRT + { + initial_approx = SQRT (t_high0 * MP_BASE_AS_DOUBLE + t_high1); + /* If t_high0,,t_high1 is big, the result in INITIAL_APPROX might have + become incorrect due to overflow in the conversion from double to + mp_limb_t above. It will typically be zero in that case, but might be + a small number on some machines. The most significant bit of + INITIAL_APPROX should be set, so that bit is a good overflow + indication. */ + if ((mp_limb_signed_t) initial_approx >= 0) + initial_approx = ~(mp_limb_t)0; + } +#else + /* Get a 9 bit approximation from the tables. The tables expect to + be indexed with the 8 high bits right below the highest bit. + Also, the highest result bit is not returned by the tables, and + must be or:ed into the result. The scheme gives 9 bits of start + approximation with just 256-entry 8 bit tables. */ + + if ((cnt & 1) == 0) + { + /* The most significant bit of t_high0 is set. */ + initial_approx = t_high0 >> (BITS_PER_MP_LIMB - 8 - 1); + initial_approx &= 0xff; + initial_approx = even_approx_tab[initial_approx]; + } + else + { + /* The most significant bit of t_high0 is unset, + the second most significant is set. */ + initial_approx = t_high0 >> (BITS_PER_MP_LIMB - 8 - 2); + initial_approx &= 0xff; + initial_approx = odd_approx_tab[initial_approx]; + } + initial_approx |= 0x100; + initial_approx <<= BITS_PER_MP_LIMB - 8 - 1; + + /* Perform small precision Newtonian iterations to get a full word + approximation. For small operands, these iterations will do the + entire job. */ + if (t_high0 == ~(mp_limb_t)0) + initial_approx = t_high0; + else + { + mp_limb_t quot; + + if (t_high0 >= initial_approx) + initial_approx = t_high0 + 1; + + /* First get about 18 bits with pure C arithmetics. */ + quot = t_high0 / (initial_approx >> BITS_PER_MP_LIMB/2) << BITS_PER_MP_LIMB/2; + initial_approx = (initial_approx + quot) / 2; + initial_approx |= (mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1); + + /* Now get a full word by one (or for > 36 bit machines) several + iterations. */ + for (i = 18; i < BITS_PER_MP_LIMB; i <<= 1) + { + mp_limb_t ignored_remainder; + + udiv_qrnnd (quot, ignored_remainder, + t_high0, t_high1, initial_approx); + initial_approx = (initial_approx + quot) / 2; + initial_approx |= (mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1); + } + } +#endif + + rp[0] = initial_approx; + rsize = 1; + +#ifdef SQRT_DEBUG + printf ("\n\nT = "); + mpn_dump (tp, tsize); +#endif + + if (tsize > 2) + { + /* Determine the successive precisions to use in the iteration. We + minimize the precisions, beginning with the highest (i.e. last + iteration) to the lowest (i.e. first iteration). */ + + xp = (mp_ptr) TMP_ALLOC (tsize * BYTES_PER_MP_LIMB); + ttp = (mp_ptr) TMP_ALLOC (tsize * BYTES_PER_MP_LIMB); + + t_end_ptr = tp + tsize; + + tmp = tsize / 2; + for (i = 0;; i++) + { + tsize = (tmp + 1) / 2; + if (tmp == tsize) + break; + tsizes[i] = tsize + tmp; + tmp = tsize; + } + + /* Main Newton iteration loop. For big arguments, most of the + time is spent here. */ + + /* It is possible to do a great optimization here. The successive + divisors in the mpn_divmod call below have more and more leading + words equal to its predecessor. Therefore the beginning of + each division will repeat the same work as did the last + division. If we could guarantee that the leading words of two + consecutive divisors are the same (i.e. in this case, a later + divisor has just more digits at the end) it would be a simple + matter of just using the old remainder of the last division in + a subsequent division, to take care of this optimization. This + idea would surely make a difference even for small arguments. */ + + /* Loop invariants: + + R <= shiftdown_to_same_size(floor(sqrt(OP))) < R + 1. + X - 1 < shiftdown_to_same_size(floor(sqrt(OP))) <= X. + R <= shiftdown_to_same_size(X). */ + + while (--i >= 0) + { + mp_limb_t cy; +#ifdef SQRT_DEBUG + mp_limb_t old_least_sign_r = rp[0]; + mp_size_t old_rsize = rsize; + + printf ("R = "); + mpn_dump (rp, rsize); +#endif + tsize = tsizes[i]; + + /* Need to copy the numerator into temporary space, as + mpn_divmod overwrites its numerator argument with the + remainder (which we currently ignore). */ + MPN_COPY (ttp, t_end_ptr - tsize, tsize); + cy = mpn_divmod (xp, ttp, tsize, rp, rsize); + xsize = tsize - rsize; + +#ifdef SQRT_DEBUG + printf ("X =%d ", cy); + mpn_dump (xp, xsize); +#endif + + /* Add X and R with the most significant limbs aligned, + temporarily ignoring at least one limb at the low end of X. */ + tmp = xsize - rsize; + cy += mpn_add_n (xp + tmp, rp, xp + tmp, rsize); + + /* If T begins with more than 2 x BITS_PER_MP_LIMB of ones, we get + intermediate roots that'd need an extra bit. We don't want to + handle that since it would make the subsequent divisor + non-normalized, so round such roots down to be only ones in the + current precision. */ + if (cy == 2) + { + mp_size_t j; + for (j = xsize; j >= 0; j--) + xp[j] = ~(mp_limb_t)0; + } + + /* Divide X by 2 and put the result in R. This is the new + approximation. Shift in the carry from the addition. */ + mpn_rshift (rp, xp, xsize, 1); + rp[xsize - 1] |= ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1)); + rsize = xsize; +#ifdef SQRT_DEBUG + if (old_least_sign_r != rp[rsize - old_rsize]) + printf (">>>>>>>> %d: %0*lX, %0*lX <<<<<<<<\n", + i, 2 * BYTES_PER_MP_LIMB, old_least_sign_r, + 2 * BYTES_PER_MP_LIMB, rp[rsize - old_rsize]); +#endif + } + } + +#ifdef SQRT_DEBUG + printf ("(final) R = "); + mpn_dump (rp, rsize); +#endif + + /* We computed the square root of OP * 2**(2*floor(cnt/2)). + This has resulted in R being 2**floor(cnt/2) to large. + Shift it down here to fix that. */ + if (cnt / 2 != 0) + { + mpn_rshift (rp, rp, rsize, cnt/2); + rsize -= rp[rsize - 1] == 0; + } + + /* Calculate the remainder. */ + mpn_mul_n (tp, rp, rp, rsize); + tsize = rsize + rsize; + tsize -= tp[tsize - 1] == 0; + if (op_size < tsize + || (op_size == tsize && mpn_cmp (op_ptr, tp, op_size) < 0)) + { + /* R is too large. Decrement it. */ + + /* These operations can't overflow. */ + cy_limb = mpn_sub_n (tp, tp, rp, rsize); + cy_limb += mpn_sub_n (tp, tp, rp, rsize); + mpn_decr_u (tp + rsize, cy_limb); + mpn_incr_u (tp, (mp_limb_t) 1); + + mpn_decr_u (rp, (mp_limb_t) 1); + +#ifdef SQRT_DEBUG + printf ("(adjusted) R = "); + mpn_dump (rp, rsize); +#endif + } + + if (rem_ptr != NULL) + { + cy_limb = mpn_sub (rem_ptr, op_ptr, op_size, tp, tsize); + MPN_NORMALIZE (rem_ptr, op_size); + TMP_FREE (marker); + return op_size; + } + else + { + int res; + res = op_size != tsize || mpn_cmp (op_ptr, tp, op_size); + TMP_FREE (marker); + return res; + } +} diff --git a/rts/gmp/mpn/generic/sub_n.c b/rts/gmp/mpn/generic/sub_n.c new file mode 100644 index 0000000000..4f2f06099c --- /dev/null +++ b/rts/gmp/mpn/generic/sub_n.c @@ -0,0 +1,62 @@ +/* mpn_sub_n -- Subtract two limb vectors of equal, non-zero length. + +Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +mp_limb_t +#if __STDC__ +mpn_sub_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size) +#else +mpn_sub_n (res_ptr, s1_ptr, s2_ptr, size) + register mp_ptr res_ptr; + register mp_srcptr s1_ptr; + register mp_srcptr s2_ptr; + mp_size_t size; +#endif +{ + register mp_limb_t x, y, cy; + register mp_size_t j; + + /* The loop counter and index J goes from -SIZE to -1. This way + the loop becomes faster. */ + j = -size; + + /* Offset the base pointers to compensate for the negative indices. */ + s1_ptr -= j; + s2_ptr -= j; + res_ptr -= j; + + cy = 0; + do + { + y = s2_ptr[j]; + x = s1_ptr[j]; + y += cy; /* add previous carry to subtrahend */ + cy = (y < cy); /* get out carry from that addition */ + y = x - y; /* main subtract */ + cy = (y > x) + cy; /* get out carry from the subtract, combine */ + res_ptr[j] = y; + } + while (++j != 0); + + return cy; +} diff --git a/rts/gmp/mpn/generic/submul_1.c b/rts/gmp/mpn/generic/submul_1.c new file mode 100644 index 0000000000..c7c08ee4af --- /dev/null +++ b/rts/gmp/mpn/generic/submul_1.c @@ -0,0 +1,65 @@ +/* mpn_submul_1 -- multiply the S1_SIZE long limb vector pointed to by S1_PTR + by S2_LIMB, subtract the S1_SIZE least significant limbs of the product + from the limb vector pointed to by RES_PTR. Return the most significant + limb of the product, adjusted for carry-out from the subtraction. + +Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +mp_limb_t +mpn_submul_1 (res_ptr, s1_ptr, s1_size, s2_limb) + register mp_ptr res_ptr; + register mp_srcptr s1_ptr; + mp_size_t s1_size; + register mp_limb_t s2_limb; +{ + register mp_limb_t cy_limb; + register mp_size_t j; + register mp_limb_t prod_high, prod_low; + register mp_limb_t x; + + /* The loop counter and index J goes from -SIZE to -1. This way + the loop becomes faster. */ + j = -s1_size; + + /* Offset the base pointers to compensate for the negative indices. */ + res_ptr -= j; + s1_ptr -= j; + + cy_limb = 0; + do + { + umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb); + + prod_low += cy_limb; + cy_limb = (prod_low < cy_limb) + prod_high; + + x = res_ptr[j]; + prod_low = x - prod_low; + cy_limb += (prod_low > x); + res_ptr[j] = prod_low; + } + while (++j != 0); + + return cy_limb; +} diff --git a/rts/gmp/mpn/generic/tdiv_qr.c b/rts/gmp/mpn/generic/tdiv_qr.c new file mode 100644 index 0000000000..b748b5d810 --- /dev/null +++ b/rts/gmp/mpn/generic/tdiv_qr.c @@ -0,0 +1,401 @@ +/* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and + write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp. If + qxn is non-zero, generate that many fraction limbs and append them after the + other quotient limbs, and update the remainder accordningly. The input + operands are unaffected. + + Preconditions: + 1. The most significant limb of of the divisor must be non-zero. + 2. No argument overlap is permitted. (??? relax this ???) + 3. nn >= dn, even if qxn is non-zero. (??? relax this ???) + + The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time + complexity of multiplication. + +Copyright (C) 1997, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#ifndef BZ_THRESHOLD +#define BZ_THRESHOLD (7 * KARATSUBA_MUL_THRESHOLD) +#endif + +/* Extract the middle limb from ((h,,l) << cnt) */ +#define SHL(h,l,cnt) \ + ((h << cnt) | ((l >> 1) >> ((~cnt) & (BITS_PER_MP_LIMB - 1)))) + +void +#if __STDC__ +mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn, + mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn) +#else +mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn) + mp_ptr qp; + mp_ptr rp; + mp_size_t qxn; + mp_srcptr np; + mp_size_t nn; + mp_srcptr dp; + mp_size_t dn; +#endif +{ + /* FIXME: + 1. qxn + 2. pass allocated storage in additional parameter? + */ + if (qxn != 0) + abort (); + + switch (dn) + { + case 0: + DIVIDE_BY_ZERO; + + case 1: + { + rp[0] = mpn_divmod_1 (qp, np, nn, dp[0]); + return; + } + + case 2: + { + int cnt; + mp_ptr n2p, d2p; + mp_limb_t qhl, cy; + TMP_DECL (marker); + TMP_MARK (marker); + count_leading_zeros (cnt, dp[dn - 1]); + if (cnt != 0) + { + d2p = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB); + mpn_lshift (d2p, dp, dn, cnt); + n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB); + cy = mpn_lshift (n2p, np, nn, cnt); + n2p[nn] = cy; + qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p); + if (cy == 0) + qp[nn - 2] = qhl; /* always store nn-dn+1 quotient limbs */ + } + else + { + d2p = (mp_ptr) dp; + n2p = (mp_ptr) TMP_ALLOC (nn * BYTES_PER_MP_LIMB); + MPN_COPY (n2p, np, nn); + qhl = mpn_divrem_2 (qp, 0L, n2p, nn, d2p); + qp[nn - 2] = qhl; /* always store nn-dn+1 quotient limbs */ + } + + if (cnt != 0) + mpn_rshift (rp, n2p, dn, cnt); + else + MPN_COPY (rp, n2p, dn); + TMP_FREE (marker); + return; + } + + default: + { + int adjust; + TMP_DECL (marker); + TMP_MARK (marker); + adjust = np[nn - 1] >= dp[dn - 1]; /* conservative tests for quotient size */ + if (nn + adjust >= 2 * dn) + { + mp_ptr n2p, d2p; + mp_limb_t cy; + int cnt; + count_leading_zeros (cnt, dp[dn - 1]); + + qp[nn - dn] = 0; /* zero high quotient limb */ + if (cnt != 0) /* normalize divisor if needed */ + { + d2p = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB); + mpn_lshift (d2p, dp, dn, cnt); + n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB); + cy = mpn_lshift (n2p, np, nn, cnt); + n2p[nn] = cy; + nn += adjust; + } + else + { + d2p = (mp_ptr) dp; + n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB); + MPN_COPY (n2p, np, nn); + n2p[nn] = 0; + nn += adjust; + } + + if (dn == 2) + mpn_divrem_2 (qp, 0L, n2p, nn, d2p); + else if (dn < BZ_THRESHOLD) + mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn); + else + { + /* Perform 2*dn / dn limb divisions as long as the limbs + in np last. */ + mp_ptr q2p = qp + nn - 2 * dn; + n2p += nn - 2 * dn; + mpn_bz_divrem_n (q2p, n2p, d2p, dn); + nn -= dn; + while (nn >= 2 * dn) + { + mp_limb_t c; + q2p -= dn; n2p -= dn; + c = mpn_bz_divrem_n (q2p, n2p, d2p, dn); + ASSERT_ALWAYS (c == 0); + nn -= dn; + } + + if (nn != dn) + { + n2p -= nn - dn; + /* In theory, we could fall out to the cute code below + since we now have exactly the situation that code + is designed to handle. We botch this badly and call + the basic mpn_sb_divrem_mn! */ + if (dn == 2) + mpn_divrem_2 (qp, 0L, n2p, nn, d2p); + else + mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn); + } + } + + + if (cnt != 0) + mpn_rshift (rp, n2p, dn, cnt); + else + MPN_COPY (rp, n2p, dn); + TMP_FREE (marker); + return; + } + + /* When we come here, the numerator/partial remainder is less + than twice the size of the denominator. */ + + { + /* Problem: + + Divide a numerator N with nn limbs by a denominator D with dn + limbs forming a quotient of nn-dn+1 limbs. When qn is small + compared to dn, conventional division algorithms perform poorly. + We want an algorithm that has an expected running time that is + dependent only on qn. It is assumed that the most significant + limb of the numerator is smaller than the most significant limb + of the denominator. + + Algorithm (very informally stated): + + 1) Divide the 2 x qn most significant limbs from the numerator + by the qn most significant limbs from the denominator. Call + the result qest. This is either the correct quotient, but + might be 1 or 2 too large. Compute the remainder from the + division. (This step is implemented by a mpn_divrem call.) + + 2) Is the most significant limb from the remainder < p, where p + is the product of the most significant limb from the quotient + and the next(d). (Next(d) denotes the next ignored limb from + the denominator.) If it is, decrement qest, and adjust the + remainder accordingly. + + 3) Is the remainder >= qest? If it is, qest is the desired + quotient. The algorithm terminates. + + 4) Subtract qest x next(d) from the remainder. If there is + borrow out, decrement qest, and adjust the remainder + accordingly. + + 5) Skip one word from the denominator (i.e., let next(d) denote + the next less significant limb. */ + + mp_size_t qn; + mp_ptr n2p, d2p; + mp_ptr tp; + mp_limb_t cy; + mp_size_t in, rn; + mp_limb_t quotient_too_large; + int cnt; + + qn = nn - dn; + qp[qn] = 0; /* zero high quotient limb */ + qn += adjust; /* qn cannot become bigger */ + + if (qn == 0) + { + MPN_COPY (rp, np, dn); + TMP_FREE (marker); + return; + } + + in = dn - qn; /* (at least partially) ignored # of limbs in ops */ + /* Normalize denominator by shifting it to the left such that its + most significant bit is set. Then shift the numerator the same + amount, to mathematically preserve quotient. */ + count_leading_zeros (cnt, dp[dn - 1]); + if (cnt != 0) + { + d2p = (mp_ptr) TMP_ALLOC (qn * BYTES_PER_MP_LIMB); + + mpn_lshift (d2p, dp + in, qn, cnt); + d2p[0] |= dp[in - 1] >> (BITS_PER_MP_LIMB - cnt); + + n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB); + cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt); + if (adjust) + { + n2p[2 * qn] = cy; + n2p++; + } + else + { + n2p[0] |= np[nn - 2 * qn - 1] >> (BITS_PER_MP_LIMB - cnt); + } + } + else + { + d2p = (mp_ptr) dp + in; + + n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB); + MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn); + if (adjust) + { + n2p[2 * qn] = 0; + n2p++; + } + } + + /* Get an approximate quotient using the extracted operands. */ + if (qn == 1) + { + mp_limb_t q0, r0; + mp_limb_t gcc272bug_n1, gcc272bug_n0, gcc272bug_d0; + /* Due to a gcc 2.7.2.3 reload pass bug, we have to use some + temps here. This doesn't hurt code quality on any machines + so we do it unconditionally. */ + gcc272bug_n1 = n2p[1]; + gcc272bug_n0 = n2p[0]; + gcc272bug_d0 = d2p[0]; + udiv_qrnnd (q0, r0, gcc272bug_n1, gcc272bug_n0, gcc272bug_d0); + n2p[0] = r0; + qp[0] = q0; + } + else if (qn == 2) + mpn_divrem_2 (qp, 0L, n2p, 4L, d2p); + else if (qn < BZ_THRESHOLD) + mpn_sb_divrem_mn (qp, n2p, qn * 2, d2p, qn); + else + mpn_bz_divrem_n (qp, n2p, d2p, qn); + + rn = qn; + /* Multiply the first ignored divisor limb by the most significant + quotient limb. If that product is > the partial remainder's + most significant limb, we know the quotient is too large. This + test quickly catches most cases where the quotient is too large; + it catches all cases where the quotient is 2 too large. */ + { + mp_limb_t dl, x; + mp_limb_t h, l; + + if (in - 2 < 0) + dl = 0; + else + dl = dp[in - 2]; + + x = SHL (dp[in - 1], dl, cnt); + umul_ppmm (h, l, x, qp[qn - 1]); + + if (n2p[qn - 1] < h) + { + mp_limb_t cy; + + mpn_decr_u (qp, (mp_limb_t) 1); + cy = mpn_add_n (n2p, n2p, d2p, qn); + if (cy) + { + /* The partial remainder is safely large. */ + n2p[qn] = cy; + ++rn; + } + } + } + + quotient_too_large = 0; + if (cnt != 0) + { + mp_limb_t cy1, cy2; + + /* Append partially used numerator limb to partial remainder. */ + cy1 = mpn_lshift (n2p, n2p, rn, BITS_PER_MP_LIMB - cnt); + n2p[0] |= np[in - 1] & (~(mp_limb_t) 0 >> cnt); + + /* Update partial remainder with partially used divisor limb. */ + cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (~(mp_limb_t) 0 >> cnt)); + if (qn != rn) + { + if (n2p[qn] < cy2) + abort (); + n2p[qn] -= cy2; + } + else + { + n2p[qn] = cy1 - cy2; + + quotient_too_large = (cy1 < cy2); + ++rn; + } + --in; + } + /* True: partial remainder now is neutral, i.e., it is not shifted up. */ + + tp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB); + + if (in < qn) + { + if (in == 0) + { + MPN_COPY (rp, n2p, rn); + if (rn != dn) + abort (); + goto foo; + } + mpn_mul (tp, qp, qn, dp, in); + } + else + mpn_mul (tp, dp, in, qp, qn); + + cy = mpn_sub (n2p, n2p, rn, tp + in, qn); + MPN_COPY (rp + in, n2p, dn - in); + quotient_too_large |= cy; + cy = mpn_sub_n (rp, np, tp, in); + cy = mpn_sub_1 (rp + in, rp + in, rn, cy); + quotient_too_large |= cy; + foo: + if (quotient_too_large) + { + mpn_decr_u (qp, (mp_limb_t) 1); + mpn_add_n (rp, rp, dp, dn); + } + } + TMP_FREE (marker); + return; + } + } +} diff --git a/rts/gmp/mpn/generic/udiv_w_sdiv.c b/rts/gmp/mpn/generic/udiv_w_sdiv.c new file mode 100644 index 0000000000..061cce86e1 --- /dev/null +++ b/rts/gmp/mpn/generic/udiv_w_sdiv.c @@ -0,0 +1,131 @@ +/* mpn_udiv_w_sdiv -- implement udiv_qrnnd on machines with only signed + division. + + Contributed by Peter L. Montgomery. + + THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY SAFE + TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS + ALMOST GUARANTEED THAT THIS FUNCTION WILL CHANGE OR DISAPPEAR IN A FUTURE + GNU MP RELEASE. + + +Copyright (C) 1992, 1994, 1996, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +mp_limb_t +mpn_udiv_w_sdiv (rp, a1, a0, d) + mp_limb_t *rp, a1, a0, d; +{ + mp_limb_t q, r; + mp_limb_t c0, c1, b1; + + if ((mp_limb_signed_t) d >= 0) + { + if (a1 < d - a1 - (a0 >> (BITS_PER_MP_LIMB - 1))) + { + /* dividend, divisor, and quotient are nonnegative */ + sdiv_qrnnd (q, r, a1, a0, d); + } + else + { + /* Compute c1*2^32 + c0 = a1*2^32 + a0 - 2^31*d */ + sub_ddmmss (c1, c0, a1, a0, d >> 1, d << (BITS_PER_MP_LIMB - 1)); + /* Divide (c1*2^32 + c0) by d */ + sdiv_qrnnd (q, r, c1, c0, d); + /* Add 2^31 to quotient */ + q += (mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1); + } + } + else + { + b1 = d >> 1; /* d/2, between 2^30 and 2^31 - 1 */ + c1 = a1 >> 1; /* A/2 */ + c0 = (a1 << (BITS_PER_MP_LIMB - 1)) + (a0 >> 1); + + if (a1 < b1) /* A < 2^32*b1, so A/2 < 2^31*b1 */ + { + sdiv_qrnnd (q, r, c1, c0, b1); /* (A/2) / (d/2) */ + + r = 2*r + (a0 & 1); /* Remainder from A/(2*b1) */ + if ((d & 1) != 0) + { + if (r >= q) + r = r - q; + else if (q - r <= d) + { + r = r - q + d; + q--; + } + else + { + r = r - q + 2*d; + q -= 2; + } + } + } + else if (c1 < b1) /* So 2^31 <= (A/2)/b1 < 2^32 */ + { + c1 = (b1 - 1) - c1; + c0 = ~c0; /* logical NOT */ + + sdiv_qrnnd (q, r, c1, c0, b1); /* (A/2) / (d/2) */ + + q = ~q; /* (A/2)/b1 */ + r = (b1 - 1) - r; + + r = 2*r + (a0 & 1); /* A/(2*b1) */ + + if ((d & 1) != 0) + { + if (r >= q) + r = r - q; + else if (q - r <= d) + { + r = r - q + d; + q--; + } + else + { + r = r - q + 2*d; + q -= 2; + } + } + } + else /* Implies c1 = b1 */ + { /* Hence a1 = d - 1 = 2*b1 - 1 */ + if (a0 >= -d) + { + q = -1; + r = a0 + d; + } + else + { + q = -2; + r = a0 + 2*d; + } + } + } + + *rp = r; + return q; +} diff --git a/rts/gmp/mpn/hppa/README b/rts/gmp/mpn/hppa/README new file mode 100644 index 0000000000..97e7abe011 --- /dev/null +++ b/rts/gmp/mpn/hppa/README @@ -0,0 +1,91 @@ +This directory contains mpn functions for various HP PA-RISC chips. Code +that runs faster on the PA7100 and later implementations, is in the pa7100 +directory. + +RELEVANT OPTIMIZATION ISSUES + + Load and Store timing + +On the PA7000 no memory instructions can issue the two cycles after a store. +For the PA7100, this is reduced to one cycle. + +The PA7100 has a lookup-free cache, so it helps to schedule loads and the +dependent instruction really far from each other. + +STATUS + +1. mpn_mul_1 could be improved to 6.5 cycles/limb on the PA7100, using the + instructions below (but some sw pipelining is needed to avoid the + xmpyu-fstds delay): + + fldds s1_ptr + + xmpyu + fstds N(%r30) + xmpyu + fstds N(%r30) + + ldws N(%r30) + ldws N(%r30) + ldws N(%r30) + ldws N(%r30) + + addc + stws res_ptr + addc + stws res_ptr + + addib Loop + +2. mpn_addmul_1 could be improved from the current 10 to 7.5 cycles/limb + (asymptotically) on the PA7100, using the instructions below. With proper + sw pipelining and the unrolling level below, the speed becomes 8 + cycles/limb. + + fldds s1_ptr + fldds s1_ptr + + xmpyu + fstds N(%r30) + xmpyu + fstds N(%r30) + xmpyu + fstds N(%r30) + xmpyu + fstds N(%r30) + + ldws N(%r30) + ldws N(%r30) + ldws N(%r30) + ldws N(%r30) + ldws N(%r30) + ldws N(%r30) + ldws N(%r30) + ldws N(%r30) + addc + addc + addc + addc + addc %r0,%r0,cy-limb + + ldws res_ptr + ldws res_ptr + ldws res_ptr + ldws res_ptr + add + stws res_ptr + addc + stws res_ptr + addc + stws res_ptr + addc + stws res_ptr + + addib + +3. For the PA8000 we have to stick to using 32-bit limbs before compiler + support emerges. But we want to use 64-bit operations whenever possible, + in particular for loads and stores. It is possible to handle mpn_add_n + efficiently by rotating (when s1/s2 are aligned), masking+bit field + inserting when (they are not). The speed should double compared to the + code used today. diff --git a/rts/gmp/mpn/hppa/add_n.s b/rts/gmp/mpn/hppa/add_n.s new file mode 100644 index 0000000000..c53b2f71b3 --- /dev/null +++ b/rts/gmp/mpn/hppa/add_n.s @@ -0,0 +1,58 @@ +; HP-PA __gmpn_add_n -- Add two limb vectors of the same length > 0 and store +; sum in a third limb vector. + +; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr gr26 +; s1_ptr gr25 +; s2_ptr gr24 +; size gr23 + +; One might want to unroll this as for other processors, but it turns +; out that the data cache contention after a store makes such +; unrolling useless. We can't come under 5 cycles/limb anyway. + + .code + .export __gmpn_add_n +__gmpn_add_n + .proc + .callinfo frame=0,no_calls + .entry + + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + + addib,= -1,%r23,L$end ; check for (SIZE == 1) + add %r20,%r19,%r28 ; add first limbs ignoring cy + +L$loop ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + addib,<> -1,%r23,L$loop + addc %r20,%r19,%r28 + +L$end stws %r28,0(0,%r26) + bv 0(%r2) + addc %r0,%r0,%r28 + + .exit + .procend diff --git a/rts/gmp/mpn/hppa/gmp-mparam.h b/rts/gmp/mpn/hppa/gmp-mparam.h new file mode 100644 index 0000000000..98b6d9ce3c --- /dev/null +++ b/rts/gmp/mpn/hppa/gmp-mparam.h @@ -0,0 +1,63 @@ +/* gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#define BITS_PER_MP_LIMB 32 +#define BYTES_PER_MP_LIMB 4 +#define BITS_PER_LONGINT 32 +#define BITS_PER_INT 32 +#define BITS_PER_SHORTINT 16 +#define BITS_PER_CHAR 8 + +/* These values are for the PA7100 using GCC. */ +/* Generated by tuneup.c, 2000-07-25. */ + +#ifndef KARATSUBA_MUL_THRESHOLD +#define KARATSUBA_MUL_THRESHOLD 30 +#endif +#ifndef TOOM3_MUL_THRESHOLD +#define TOOM3_MUL_THRESHOLD 172 +#endif + +#ifndef KARATSUBA_SQR_THRESHOLD +#define KARATSUBA_SQR_THRESHOLD 59 +#endif +#ifndef TOOM3_SQR_THRESHOLD +#define TOOM3_SQR_THRESHOLD 185 +#endif + +#ifndef BZ_THRESHOLD +#define BZ_THRESHOLD 96 +#endif + +#ifndef FIB_THRESHOLD +#define FIB_THRESHOLD 122 +#endif + +#ifndef POWM_THRESHOLD +#define POWM_THRESHOLD 18 +#endif + +#ifndef GCD_ACCEL_THRESHOLD +#define GCD_ACCEL_THRESHOLD 46 +#endif +#ifndef GCDEXT_THRESHOLD +#define GCDEXT_THRESHOLD 33 +#endif diff --git a/rts/gmp/mpn/hppa/hppa1_1/addmul_1.s b/rts/gmp/mpn/hppa/hppa1_1/addmul_1.s new file mode 100644 index 0000000000..c7d218f922 --- /dev/null +++ b/rts/gmp/mpn/hppa/hppa1_1/addmul_1.s @@ -0,0 +1,102 @@ +; HP-PA-1.1 __gmpn_addmul_1 -- Multiply a limb vector with a limb and +; add the result to a second limb vector. + +; Copyright (C) 1992, 1993, 1994, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr r26 +; s1_ptr r25 +; size r24 +; s2_limb r23 + +; This runs at 11 cycles/limb on a PA7000. With the used instructions, it +; can not become faster due to data cache contention after a store. On the +; PA7100 it runs at 10 cycles/limb, and that can not be improved either, +; since only the xmpyu does not need the integer pipeline, so the only +; dual-issue we will get are addc+xmpyu. Unrolling could gain a cycle/limb +; on the PA7100. + +; There are some ideas described in mul_1.s that applies to this code too. + + .code + .export __gmpn_addmul_1 +__gmpn_addmul_1 + .proc + .callinfo frame=64,no_calls + .entry + + ldo 64(%r30),%r30 + fldws,ma 4(%r25),%fr5 + stw %r23,-16(%r30) ; move s2_limb ... + addib,= -1,%r24,L$just_one_limb + fldws -16(%r30),%fr4 ; ... into fr4 + add %r0,%r0,%r0 ; clear carry + xmpyu %fr4,%fr5,%fr6 + fldws,ma 4(%r25),%fr7 + fstds %fr6,-16(%r30) + xmpyu %fr4,%fr7,%fr8 + ldw -12(%r30),%r19 ; least significant limb in product + ldw -16(%r30),%r28 + + fstds %fr8,-16(%r30) + addib,= -1,%r24,L$end + ldw -12(%r30),%r1 + +; Main loop +L$loop ldws 0(%r26),%r29 + fldws,ma 4(%r25),%fr5 + add %r29,%r19,%r19 + stws,ma %r19,4(%r26) + addc %r28,%r1,%r19 + xmpyu %fr4,%fr5,%fr6 + ldw -16(%r30),%r28 + fstds %fr6,-16(%r30) + addc %r0,%r28,%r28 + addib,<> -1,%r24,L$loop + ldw -12(%r30),%r1 + +L$end ldw 0(%r26),%r29 + add %r29,%r19,%r19 + stws,ma %r19,4(%r26) + addc %r28,%r1,%r19 + ldw -16(%r30),%r28 + ldws 0(%r26),%r29 + addc %r0,%r28,%r28 + add %r29,%r19,%r19 + stws,ma %r19,4(%r26) + addc %r0,%r28,%r28 + bv 0(%r2) + ldo -64(%r30),%r30 + +L$just_one_limb + xmpyu %fr4,%fr5,%fr6 + ldw 0(%r26),%r29 + fstds %fr6,-16(%r30) + ldw -12(%r30),%r1 + ldw -16(%r30),%r28 + add %r29,%r1,%r19 + stw %r19,0(%r26) + addc %r0,%r28,%r28 + bv 0(%r2) + ldo -64(%r30),%r30 + + .exit + .procend diff --git a/rts/gmp/mpn/hppa/hppa1_1/mul_1.s b/rts/gmp/mpn/hppa/hppa1_1/mul_1.s new file mode 100644 index 0000000000..4512fddec9 --- /dev/null +++ b/rts/gmp/mpn/hppa/hppa1_1/mul_1.s @@ -0,0 +1,98 @@ +; HP-PA-1.1 __gmpn_mul_1 -- Multiply a limb vector with a limb and store +; the result in a second limb vector. + +; Copyright (C) 1992, 1993, 1994, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr r26 +; s1_ptr r25 +; size r24 +; s2_limb r23 + +; This runs at 9 cycles/limb on a PA7000. With the used instructions, it can +; not become faster due to data cache contention after a store. On the +; PA7100 it runs at 7 cycles/limb, and that can not be improved either, since +; only the xmpyu does not need the integer pipeline, so the only dual-issue +; we will get are addc+xmpyu. Unrolling would not help either CPU. + +; We could use fldds to read two limbs at a time from the S1 array, and that +; could bring down the times to 8.5 and 6.5 cycles/limb for the PA7000 and +; PA7100, respectively. We don't do that since it does not seem worth the +; (alignment) troubles... + +; At least the PA7100 is rumored to be able to deal with cache-misses +; without stalling instruction issue. If this is true, and the cache is +; actually also lockup-free, we should use a deeper software pipeline, and +; load from S1 very early! (The loads and stores to -12(sp) will surely be +; in the cache.) + + .code + .export __gmpn_mul_1 +__gmpn_mul_1 + .proc + .callinfo frame=64,no_calls + .entry + + ldo 64(%r30),%r30 + fldws,ma 4(%r25),%fr5 + stw %r23,-16(%r30) ; move s2_limb ... + addib,= -1,%r24,L$just_one_limb + fldws -16(%r30),%fr4 ; ... into fr4 + add %r0,%r0,%r0 ; clear carry + xmpyu %fr4,%fr5,%fr6 + fldws,ma 4(%r25),%fr7 + fstds %fr6,-16(%r30) + xmpyu %fr4,%fr7,%fr8 + ldw -12(%r30),%r19 ; least significant limb in product + ldw -16(%r30),%r28 + + fstds %fr8,-16(%r30) + addib,= -1,%r24,L$end + ldw -12(%r30),%r1 + +; Main loop +L$loop fldws,ma 4(%r25),%fr5 + stws,ma %r19,4(%r26) + addc %r28,%r1,%r19 + xmpyu %fr4,%fr5,%fr6 + ldw -16(%r30),%r28 + fstds %fr6,-16(%r30) + addib,<> -1,%r24,L$loop + ldw -12(%r30),%r1 + +L$end stws,ma %r19,4(%r26) + addc %r28,%r1,%r19 + ldw -16(%r30),%r28 + stws,ma %r19,4(%r26) + addc %r0,%r28,%r28 + bv 0(%r2) + ldo -64(%r30),%r30 + +L$just_one_limb + xmpyu %fr4,%fr5,%fr6 + fstds %fr6,-16(%r30) + ldw -16(%r30),%r28 + ldo -64(%r30),%r30 + bv 0(%r2) + fstws %fr6R,0(%r26) + + .exit + .procend diff --git a/rts/gmp/mpn/hppa/hppa1_1/pa7100/add_n.s b/rts/gmp/mpn/hppa/hppa1_1/pa7100/add_n.s new file mode 100644 index 0000000000..4f4be08b37 --- /dev/null +++ b/rts/gmp/mpn/hppa/hppa1_1/pa7100/add_n.s @@ -0,0 +1,75 @@ +; HP-PA __gmpn_add_n -- Add two limb vectors of the same length > 0 and store +; sum in a third limb vector. +; This is optimized for the PA7100, where is runs at 4.25 cycles/limb + +; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr gr26 +; s1_ptr gr25 +; s2_ptr gr24 +; size gr23 + + .code + .export __gmpn_add_n +__gmpn_add_n + .proc + .callinfo frame=0,no_calls + .entry + + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + + addib,<= -5,%r23,L$rest + add %r20,%r19,%r28 ; add first limbs ignoring cy + +L$loop ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + addc %r20,%r19,%r28 + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + addc %r20,%r19,%r28 + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + addc %r20,%r19,%r28 + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + addib,> -4,%r23,L$loop + addc %r20,%r19,%r28 + +L$rest addib,= 4,%r23,L$end + nop +L$eloop ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + addib,> -1,%r23,L$eloop + addc %r20,%r19,%r28 + +L$end stws %r28,0(0,%r26) + bv 0(%r2) + addc %r0,%r0,%r28 + + .exit + .procend diff --git a/rts/gmp/mpn/hppa/hppa1_1/pa7100/addmul_1.S b/rts/gmp/mpn/hppa/hppa1_1/pa7100/addmul_1.S new file mode 100644 index 0000000000..04db06822e --- /dev/null +++ b/rts/gmp/mpn/hppa/hppa1_1/pa7100/addmul_1.S @@ -0,0 +1,189 @@ +; HP-PA 7100/7200 __gmpn_addmul_1 -- Multiply a limb vector with a limb and +; add the result to a second limb vector. + +; Copyright (C) 1995, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + +; INPUT PARAMETERS +#define res_ptr %r26 +#define s1_ptr %r25 +#define size %r24 +#define s2_limb %r23 + +#define cylimb %r28 +#define s0 %r19 +#define s1 %r20 +#define s2 %r3 +#define s3 %r4 +#define lo0 %r21 +#define lo1 %r5 +#define lo2 %r6 +#define lo3 %r7 +#define hi0 %r22 +#define hi1 %r23 /* safe to reuse */ +#define hi2 %r29 +#define hi3 %r1 + + .code + .export __gmpn_addmul_1 +__gmpn_addmul_1 + .proc + .callinfo frame=128,no_calls + .entry + + ldo 128(%r30),%r30 + stws s2_limb,-16(%r30) + add %r0,%r0,cylimb ; clear cy and cylimb + addib,< -4,size,L$few_limbs + fldws -16(%r30),%fr31R + + ldo -112(%r30),%r31 + stw %r3,-96(%r30) + stw %r4,-92(%r30) + stw %r5,-88(%r30) + stw %r6,-84(%r30) + stw %r7,-80(%r30) + + bb,>=,n s1_ptr,29,L$0 + + fldws,ma 4(s1_ptr),%fr4 + ldws 0(res_ptr),s0 + xmpyu %fr4,%fr31R,%fr5 + fstds %fr5,-16(%r31) + ldws -16(%r31),cylimb + ldws -12(%r31),lo0 + add s0,lo0,s0 + addib,< -1,size,L$few_limbs + stws,ma s0,4(res_ptr) + +; start software pipeline ---------------------------------------------------- +L$0 fldds,ma 8(s1_ptr),%fr4 + fldds,ma 8(s1_ptr),%fr8 + + xmpyu %fr4L,%fr31R,%fr5 + xmpyu %fr4R,%fr31R,%fr6 + xmpyu %fr8L,%fr31R,%fr9 + xmpyu %fr8R,%fr31R,%fr10 + + fstds %fr5,-16(%r31) + fstds %fr6,-8(%r31) + fstds %fr9,0(%r31) + fstds %fr10,8(%r31) + + ldws -16(%r31),hi0 + ldws -12(%r31),lo0 + ldws -8(%r31),hi1 + ldws -4(%r31),lo1 + ldws 0(%r31),hi2 + ldws 4(%r31),lo2 + ldws 8(%r31),hi3 + ldws 12(%r31),lo3 + + addc lo0,cylimb,lo0 + addc lo1,hi0,lo1 + addc lo2,hi1,lo2 + addc lo3,hi2,lo3 + + addib,< -4,size,L$end + addc %r0,hi3,cylimb ; propagate carry into cylimb +; main loop ------------------------------------------------------------------ +L$loop fldds,ma 8(s1_ptr),%fr4 + fldds,ma 8(s1_ptr),%fr8 + + ldws 0(res_ptr),s0 + xmpyu %fr4L,%fr31R,%fr5 + ldws 4(res_ptr),s1 + xmpyu %fr4R,%fr31R,%fr6 + ldws 8(res_ptr),s2 + xmpyu %fr8L,%fr31R,%fr9 + ldws 12(res_ptr),s3 + xmpyu %fr8R,%fr31R,%fr10 + + fstds %fr5,-16(%r31) + add s0,lo0,s0 + fstds %fr6,-8(%r31) + addc s1,lo1,s1 + fstds %fr9,0(%r31) + addc s2,lo2,s2 + fstds %fr10,8(%r31) + addc s3,lo3,s3 + + ldws -16(%r31),hi0 + ldws -12(%r31),lo0 + ldws -8(%r31),hi1 + ldws -4(%r31),lo1 + ldws 0(%r31),hi2 + ldws 4(%r31),lo2 + ldws 8(%r31),hi3 + ldws 12(%r31),lo3 + + addc lo0,cylimb,lo0 + stws,ma s0,4(res_ptr) + addc lo1,hi0,lo1 + stws,ma s1,4(res_ptr) + addc lo2,hi1,lo2 + stws,ma s2,4(res_ptr) + addc lo3,hi2,lo3 + stws,ma s3,4(res_ptr) + + addib,>= -4,size,L$loop + addc %r0,hi3,cylimb ; propagate carry into cylimb +; finish software pipeline --------------------------------------------------- +L$end ldws 0(res_ptr),s0 + ldws 4(res_ptr),s1 + ldws 8(res_ptr),s2 + ldws 12(res_ptr),s3 + + add s0,lo0,s0 + stws,ma s0,4(res_ptr) + addc s1,lo1,s1 + stws,ma s1,4(res_ptr) + addc s2,lo2,s2 + stws,ma s2,4(res_ptr) + addc s3,lo3,s3 + stws,ma s3,4(res_ptr) + +; restore callee-saves registers --------------------------------------------- + ldw -96(%r30),%r3 + ldw -92(%r30),%r4 + ldw -88(%r30),%r5 + ldw -84(%r30),%r6 + ldw -80(%r30),%r7 + +L$few_limbs + addib,=,n 4,size,L$ret +L$loop2 fldws,ma 4(s1_ptr),%fr4 + ldws 0(res_ptr),s0 + xmpyu %fr4,%fr31R,%fr5 + fstds %fr5,-16(%r30) + ldws -16(%r30),hi0 + ldws -12(%r30),lo0 + addc lo0,cylimb,lo0 + addc %r0,hi0,cylimb + add s0,lo0,s0 + stws,ma s0,4(res_ptr) + addib,<> -1,size,L$loop2 + nop + +L$ret addc %r0,cylimb,cylimb + bv 0(%r2) + ldo -128(%r30),%r30 + + .exit + .procend diff --git a/rts/gmp/mpn/hppa/hppa1_1/pa7100/lshift.s b/rts/gmp/mpn/hppa/hppa1_1/pa7100/lshift.s new file mode 100644 index 0000000000..31669b1a55 --- /dev/null +++ b/rts/gmp/mpn/hppa/hppa1_1/pa7100/lshift.s @@ -0,0 +1,83 @@ +; HP-PA __gmpn_lshift -- +; This is optimized for the PA7100, where is runs at 3.25 cycles/limb + +; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr gr26 +; s_ptr gr25 +; size gr24 +; cnt gr23 + + .code + .export __gmpn_lshift +__gmpn_lshift + .proc + .callinfo frame=64,no_calls + .entry + + sh2add %r24,%r25,%r25 + sh2add %r24,%r26,%r26 + ldws,mb -4(0,%r25),%r22 + subi 32,%r23,%r1 + mtsar %r1 + addib,= -1,%r24,L$0004 + vshd %r0,%r22,%r28 ; compute carry out limb + ldws,mb -4(0,%r25),%r29 + addib,<= -5,%r24,L$rest + vshd %r22,%r29,%r20 + +L$loop ldws,mb -4(0,%r25),%r22 + stws,mb %r20,-4(0,%r26) + vshd %r29,%r22,%r20 + ldws,mb -4(0,%r25),%r29 + stws,mb %r20,-4(0,%r26) + vshd %r22,%r29,%r20 + ldws,mb -4(0,%r25),%r22 + stws,mb %r20,-4(0,%r26) + vshd %r29,%r22,%r20 + ldws,mb -4(0,%r25),%r29 + stws,mb %r20,-4(0,%r26) + addib,> -4,%r24,L$loop + vshd %r22,%r29,%r20 + +L$rest addib,= 4,%r24,L$end1 + nop +L$eloop ldws,mb -4(0,%r25),%r22 + stws,mb %r20,-4(0,%r26) + addib,<= -1,%r24,L$end2 + vshd %r29,%r22,%r20 + ldws,mb -4(0,%r25),%r29 + stws,mb %r20,-4(0,%r26) + addib,> -1,%r24,L$eloop + vshd %r22,%r29,%r20 + +L$end1 stws,mb %r20,-4(0,%r26) + vshd %r29,%r0,%r20 + bv 0(%r2) + stw %r20,-4(0,%r26) +L$end2 stws,mb %r20,-4(0,%r26) +L$0004 vshd %r22,%r0,%r20 + bv 0(%r2) + stw %r20,-4(0,%r26) + + .exit + .procend diff --git a/rts/gmp/mpn/hppa/hppa1_1/pa7100/rshift.s b/rts/gmp/mpn/hppa/hppa1_1/pa7100/rshift.s new file mode 100644 index 0000000000..d32b10b4b1 --- /dev/null +++ b/rts/gmp/mpn/hppa/hppa1_1/pa7100/rshift.s @@ -0,0 +1,80 @@ +; HP-PA __gmpn_rshift -- +; This is optimized for the PA7100, where is runs at 3.25 cycles/limb + +; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr gr26 +; s_ptr gr25 +; size gr24 +; cnt gr23 + + .code + .export __gmpn_rshift +__gmpn_rshift + .proc + .callinfo frame=64,no_calls + .entry + + ldws,ma 4(0,%r25),%r22 + mtsar %r23 + addib,= -1,%r24,L$0004 + vshd %r22,%r0,%r28 ; compute carry out limb + ldws,ma 4(0,%r25),%r29 + addib,<= -5,%r24,L$rest + vshd %r29,%r22,%r20 + +L$loop ldws,ma 4(0,%r25),%r22 + stws,ma %r20,4(0,%r26) + vshd %r22,%r29,%r20 + ldws,ma 4(0,%r25),%r29 + stws,ma %r20,4(0,%r26) + vshd %r29,%r22,%r20 + ldws,ma 4(0,%r25),%r22 + stws,ma %r20,4(0,%r26) + vshd %r22,%r29,%r20 + ldws,ma 4(0,%r25),%r29 + stws,ma %r20,4(0,%r26) + addib,> -4,%r24,L$loop + vshd %r29,%r22,%r20 + +L$rest addib,= 4,%r24,L$end1 + nop +L$eloop ldws,ma 4(0,%r25),%r22 + stws,ma %r20,4(0,%r26) + addib,<= -1,%r24,L$end2 + vshd %r22,%r29,%r20 + ldws,ma 4(0,%r25),%r29 + stws,ma %r20,4(0,%r26) + addib,> -1,%r24,L$eloop + vshd %r29,%r22,%r20 + +L$end1 stws,ma %r20,4(0,%r26) + vshd %r0,%r29,%r20 + bv 0(%r2) + stw %r20,0(0,%r26) +L$end2 stws,ma %r20,4(0,%r26) +L$0004 vshd %r0,%r22,%r20 + bv 0(%r2) + stw %r20,0(0,%r26) + + .exit + .procend diff --git a/rts/gmp/mpn/hppa/hppa1_1/pa7100/sub_n.s b/rts/gmp/mpn/hppa/hppa1_1/pa7100/sub_n.s new file mode 100644 index 0000000000..0eec41c4b3 --- /dev/null +++ b/rts/gmp/mpn/hppa/hppa1_1/pa7100/sub_n.s @@ -0,0 +1,76 @@ +; HP-PA __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and +; store difference in a third limb vector. +; This is optimized for the PA7100, where is runs at 4.25 cycles/limb + +; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr gr26 +; s1_ptr gr25 +; s2_ptr gr24 +; size gr23 + + .code + .export __gmpn_sub_n +__gmpn_sub_n + .proc + .callinfo frame=0,no_calls + .entry + + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + + addib,<= -5,%r23,L$rest + sub %r20,%r19,%r28 ; subtract first limbs ignoring cy + +L$loop ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + subb %r20,%r19,%r28 + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + subb %r20,%r19,%r28 + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + subb %r20,%r19,%r28 + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + addib,> -4,%r23,L$loop + subb %r20,%r19,%r28 + +L$rest addib,= 4,%r23,L$end + nop +L$eloop ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + addib,> -1,%r23,L$eloop + subb %r20,%r19,%r28 + +L$end stws %r28,0(0,%r26) + addc %r0,%r0,%r28 + bv 0(%r2) + subi 1,%r28,%r28 + + .exit + .procend diff --git a/rts/gmp/mpn/hppa/hppa1_1/pa7100/submul_1.S b/rts/gmp/mpn/hppa/hppa1_1/pa7100/submul_1.S new file mode 100644 index 0000000000..0fba21dcef --- /dev/null +++ b/rts/gmp/mpn/hppa/hppa1_1/pa7100/submul_1.S @@ -0,0 +1,195 @@ +; HP-PA 7100/7200 __gmpn_submul_1 -- Multiply a limb vector with a limb and +; subtract the result from a second limb vector. + +; Copyright (C) 1995, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + +; INPUT PARAMETERS +#define res_ptr %r26 +#define s1_ptr %r25 +#define size %r24 +#define s2_limb %r23 + +#define cylimb %r28 +#define s0 %r19 +#define s1 %r20 +#define s2 %r3 +#define s3 %r4 +#define lo0 %r21 +#define lo1 %r5 +#define lo2 %r6 +#define lo3 %r7 +#define hi0 %r22 +#define hi1 %r23 /* safe to reuse */ +#define hi2 %r29 +#define hi3 %r1 + + .code + .export __gmpn_submul_1 +__gmpn_submul_1 + .proc + .callinfo frame=128,no_calls + .entry + + ldo 128(%r30),%r30 + stws s2_limb,-16(%r30) + add %r0,%r0,cylimb ; clear cy and cylimb + addib,< -4,size,L$few_limbs + fldws -16(%r30),%fr31R + + ldo -112(%r30),%r31 + stw %r3,-96(%r30) + stw %r4,-92(%r30) + stw %r5,-88(%r30) + stw %r6,-84(%r30) + stw %r7,-80(%r30) + + bb,>=,n s1_ptr,29,L$0 + + fldws,ma 4(s1_ptr),%fr4 + ldws 0(res_ptr),s0 + xmpyu %fr4,%fr31R,%fr5 + fstds %fr5,-16(%r31) + ldws -16(%r31),cylimb + ldws -12(%r31),lo0 + sub s0,lo0,s0 + add s0,lo0,%r0 ; invert cy + addib,< -1,size,L$few_limbs + stws,ma s0,4(res_ptr) + +; start software pipeline ---------------------------------------------------- +L$0 fldds,ma 8(s1_ptr),%fr4 + fldds,ma 8(s1_ptr),%fr8 + + xmpyu %fr4L,%fr31R,%fr5 + xmpyu %fr4R,%fr31R,%fr6 + xmpyu %fr8L,%fr31R,%fr9 + xmpyu %fr8R,%fr31R,%fr10 + + fstds %fr5,-16(%r31) + fstds %fr6,-8(%r31) + fstds %fr9,0(%r31) + fstds %fr10,8(%r31) + + ldws -16(%r31),hi0 + ldws -12(%r31),lo0 + ldws -8(%r31),hi1 + ldws -4(%r31),lo1 + ldws 0(%r31),hi2 + ldws 4(%r31),lo2 + ldws 8(%r31),hi3 + ldws 12(%r31),lo3 + + addc lo0,cylimb,lo0 + addc lo1,hi0,lo1 + addc lo2,hi1,lo2 + addc lo3,hi2,lo3 + + addib,< -4,size,L$end + addc %r0,hi3,cylimb ; propagate carry into cylimb +; main loop ------------------------------------------------------------------ +L$loop fldds,ma 8(s1_ptr),%fr4 + fldds,ma 8(s1_ptr),%fr8 + + ldws 0(res_ptr),s0 + xmpyu %fr4L,%fr31R,%fr5 + ldws 4(res_ptr),s1 + xmpyu %fr4R,%fr31R,%fr6 + ldws 8(res_ptr),s2 + xmpyu %fr8L,%fr31R,%fr9 + ldws 12(res_ptr),s3 + xmpyu %fr8R,%fr31R,%fr10 + + fstds %fr5,-16(%r31) + sub s0,lo0,s0 + fstds %fr6,-8(%r31) + subb s1,lo1,s1 + fstds %fr9,0(%r31) + subb s2,lo2,s2 + fstds %fr10,8(%r31) + subb s3,lo3,s3 + subb %r0,%r0,lo0 ; these two insns ... + add lo0,lo0,%r0 ; ... just invert cy + + ldws -16(%r31),hi0 + ldws -12(%r31),lo0 + ldws -8(%r31),hi1 + ldws -4(%r31),lo1 + ldws 0(%r31),hi2 + ldws 4(%r31),lo2 + ldws 8(%r31),hi3 + ldws 12(%r31),lo3 + + addc lo0,cylimb,lo0 + stws,ma s0,4(res_ptr) + addc lo1,hi0,lo1 + stws,ma s1,4(res_ptr) + addc lo2,hi1,lo2 + stws,ma s2,4(res_ptr) + addc lo3,hi2,lo3 + stws,ma s3,4(res_ptr) + + addib,>= -4,size,L$loop + addc %r0,hi3,cylimb ; propagate carry into cylimb +; finish software pipeline --------------------------------------------------- +L$end ldws 0(res_ptr),s0 + ldws 4(res_ptr),s1 + ldws 8(res_ptr),s2 + ldws 12(res_ptr),s3 + + sub s0,lo0,s0 + stws,ma s0,4(res_ptr) + subb s1,lo1,s1 + stws,ma s1,4(res_ptr) + subb s2,lo2,s2 + stws,ma s2,4(res_ptr) + subb s3,lo3,s3 + stws,ma s3,4(res_ptr) + subb %r0,%r0,lo0 ; these two insns ... + add lo0,lo0,%r0 ; ... invert cy + +; restore callee-saves registers --------------------------------------------- + ldw -96(%r30),%r3 + ldw -92(%r30),%r4 + ldw -88(%r30),%r5 + ldw -84(%r30),%r6 + ldw -80(%r30),%r7 + +L$few_limbs + addib,=,n 4,size,L$ret +L$loop2 fldws,ma 4(s1_ptr),%fr4 + ldws 0(res_ptr),s0 + xmpyu %fr4,%fr31R,%fr5 + fstds %fr5,-16(%r30) + ldws -16(%r30),hi0 + ldws -12(%r30),lo0 + addc lo0,cylimb,lo0 + addc %r0,hi0,cylimb + sub s0,lo0,s0 + add s0,lo0,%r0 ; invert cy + stws,ma s0,4(res_ptr) + addib,<> -1,size,L$loop2 + nop + +L$ret addc %r0,cylimb,cylimb + bv 0(%r2) + ldo -128(%r30),%r30 + + .exit + .procend diff --git a/rts/gmp/mpn/hppa/hppa1_1/submul_1.s b/rts/gmp/mpn/hppa/hppa1_1/submul_1.s new file mode 100644 index 0000000000..20a5b5ce0a --- /dev/null +++ b/rts/gmp/mpn/hppa/hppa1_1/submul_1.s @@ -0,0 +1,111 @@ +; HP-PA-1.1 __gmpn_submul_1 -- Multiply a limb vector with a limb and +; subtract the result from a second limb vector. + +; Copyright (C) 1992, 1993, 1994, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr r26 +; s1_ptr r25 +; size r24 +; s2_limb r23 + +; This runs at 12 cycles/limb on a PA7000. With the used instructions, it +; can not become faster due to data cache contention after a store. On the +; PA7100 it runs at 11 cycles/limb, and that can not be improved either, +; since only the xmpyu does not need the integer pipeline, so the only +; dual-issue we will get are addc+xmpyu. Unrolling could gain a cycle/limb +; on the PA7100. + +; There are some ideas described in mul_1.s that applies to this code too. + +; It seems possible to make this run as fast as __gmpn_addmul_1, if we use +; sub,>>= %r29,%r19,%r22 +; addi 1,%r28,%r28 +; but that requires reworking the hairy software pipeline... + + .code + .export __gmpn_submul_1 +__gmpn_submul_1 + .proc + .callinfo frame=64,no_calls + .entry + + ldo 64(%r30),%r30 + fldws,ma 4(%r25),%fr5 + stw %r23,-16(%r30) ; move s2_limb ... + addib,= -1,%r24,L$just_one_limb + fldws -16(%r30),%fr4 ; ... into fr4 + add %r0,%r0,%r0 ; clear carry + xmpyu %fr4,%fr5,%fr6 + fldws,ma 4(%r25),%fr7 + fstds %fr6,-16(%r30) + xmpyu %fr4,%fr7,%fr8 + ldw -12(%r30),%r19 ; least significant limb in product + ldw -16(%r30),%r28 + + fstds %fr8,-16(%r30) + addib,= -1,%r24,L$end + ldw -12(%r30),%r1 + +; Main loop +L$loop ldws 0(%r26),%r29 + fldws,ma 4(%r25),%fr5 + sub %r29,%r19,%r22 + add %r22,%r19,%r0 + stws,ma %r22,4(%r26) + addc %r28,%r1,%r19 + xmpyu %fr4,%fr5,%fr6 + ldw -16(%r30),%r28 + fstds %fr6,-16(%r30) + addc %r0,%r28,%r28 + addib,<> -1,%r24,L$loop + ldw -12(%r30),%r1 + +L$end ldw 0(%r26),%r29 + sub %r29,%r19,%r22 + add %r22,%r19,%r0 + stws,ma %r22,4(%r26) + addc %r28,%r1,%r19 + ldw -16(%r30),%r28 + ldws 0(%r26),%r29 + addc %r0,%r28,%r28 + sub %r29,%r19,%r22 + add %r22,%r19,%r0 + stws,ma %r22,4(%r26) + addc %r0,%r28,%r28 + bv 0(%r2) + ldo -64(%r30),%r30 + +L$just_one_limb + xmpyu %fr4,%fr5,%fr6 + ldw 0(%r26),%r29 + fstds %fr6,-16(%r30) + ldw -12(%r30),%r1 + ldw -16(%r30),%r28 + sub %r29,%r1,%r22 + add %r22,%r1,%r0 + stw %r22,0(%r26) + addc %r0,%r28,%r28 + bv 0(%r2) + ldo -64(%r30),%r30 + + .exit + .procend diff --git a/rts/gmp/mpn/hppa/hppa1_1/udiv_qrnnd.S b/rts/gmp/mpn/hppa/hppa1_1/udiv_qrnnd.S new file mode 100644 index 0000000000..b83d6f4dd2 --- /dev/null +++ b/rts/gmp/mpn/hppa/hppa1_1/udiv_qrnnd.S @@ -0,0 +1,80 @@ +; HP-PA __udiv_qrnnd division support, used from longlong.h. +; This version runs fast on PA 7000 and later. + +; Copyright (C) 1993, 1994, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; rem_ptr gr26 +; n1 gr25 +; n0 gr24 +; d gr23 + + .code +L$0000 .word 0x43f00000 ; 2^64 + .word 0x0 + .export __gmpn_udiv_qrnnd +__gmpn_udiv_qrnnd + .proc + .callinfo frame=64,no_calls + .entry + ldo 64(%r30),%r30 + + stws %r25,-16(0,%r30) ; n_hi + stws %r24,-12(0,%r30) ; n_lo +#ifdef PIC + addil LT%L$0000,%r19 + ldo RT%L$0000(%r1),%r19 +#else + ldil L%L$0000,%r19 + ldo R%L$0000(%r19),%r19 +#endif + fldds -16(0,%r30),%fr5 + stws %r23,-12(0,%r30) + comib,<= 0,%r25,L$1 + fcnvxf,dbl,dbl %fr5,%fr5 + fldds 0(0,%r19),%fr4 + fadd,dbl %fr4,%fr5,%fr5 +L$1 + fcpy,sgl %fr0,%fr6L + fldws -12(0,%r30),%fr6R + fcnvxf,dbl,dbl %fr6,%fr4 + + fdiv,dbl %fr5,%fr4,%fr5 + + fcnvfx,dbl,dbl %fr5,%fr4 + fstws %fr4R,-16(%r30) + xmpyu %fr4R,%fr6R,%fr6 + ldws -16(%r30),%r28 + fstds %fr6,-16(0,%r30) + ldws -12(0,%r30),%r21 + ldws -16(0,%r30),%r20 + sub %r24,%r21,%r22 + subb %r25,%r20,%r19 + comib,= 0,%r19,L$2 + ldo -64(%r30),%r30 + + add %r22,%r23,%r22 + ldo -1(%r28),%r28 +L$2 bv 0(%r2) + stws %r22,0(0,%r26) + + .exit + .procend diff --git a/rts/gmp/mpn/hppa/hppa1_1/umul.s b/rts/gmp/mpn/hppa/hppa1_1/umul.s new file mode 100644 index 0000000000..1f1300ac9b --- /dev/null +++ b/rts/gmp/mpn/hppa/hppa1_1/umul.s @@ -0,0 +1,42 @@ +; Copyright (C) 1999 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + .code + .export __umul_ppmm + .align 4 +__umul_ppmm + .proc + .callinfo frame=64,no_calls + .entry + + ldo 64(%r30),%r30 + stw %r25,-16(0,%r30) + fldws -16(0,%r30),%fr22R + stw %r24,-16(0,%r30) + fldws -16(0,%r30),%fr22L + xmpyu %fr22R,%fr22L,%fr22 + fstds %fr22,-16(0,%r30) + ldw -16(0,%r30),%r28 + ldw -12(0,%r30),%r29 + stw %r29,0(0,%r26) + bv 0(%r2) + ldo -64(%r30),%r30 + + .exit + .procend diff --git a/rts/gmp/mpn/hppa/hppa2_0/add_n.s b/rts/gmp/mpn/hppa/hppa2_0/add_n.s new file mode 100644 index 0000000000..6e97278a39 --- /dev/null +++ b/rts/gmp/mpn/hppa/hppa2_0/add_n.s @@ -0,0 +1,88 @@ +; HP-PA 2.0 32-bit __gmpn_add_n -- Add two limb vectors of the same length > 0 +; and store sum in a third limb vector. + +; Copyright (C) 1997, 1998, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr gr26 +; s1_ptr gr25 +; s2_ptr gr24 +; size gr23 + +; This runs at 2 cycles/limb on PA8000. + + .code + .export __gmpn_add_n +__gmpn_add_n + .proc + .callinfo frame=0,no_calls + .entry + + sub %r0,%r23,%r22 + zdep %r22,30,3,%r28 ; r28 = 2 * (-n & 7) + zdep %r22,29,3,%r22 ; r22 = 4 * (-n & 7) + sub %r25,%r22,%r25 ; offset s1_ptr + sub %r24,%r22,%r24 ; offset s2_ptr + sub %r26,%r22,%r26 ; offset res_ptr + blr %r28,%r0 ; branch into loop + add %r0,%r0,%r0 ; reset carry + +L$loop ldw 0(%r25),%r20 + ldw 0(%r24),%r31 + addc %r20,%r31,%r20 + stw %r20,0(%r26) +L$7 ldw 4(%r25),%r21 + ldw 4(%r24),%r19 + addc %r21,%r19,%r21 + stw %r21,4(%r26) +L$6 ldw 8(%r25),%r20 + ldw 8(%r24),%r31 + addc %r20,%r31,%r20 + stw %r20,8(%r26) +L$5 ldw 12(%r25),%r21 + ldw 12(%r24),%r19 + addc %r21,%r19,%r21 + stw %r21,12(%r26) +L$4 ldw 16(%r25),%r20 + ldw 16(%r24),%r31 + addc %r20,%r31,%r20 + stw %r20,16(%r26) +L$3 ldw 20(%r25),%r21 + ldw 20(%r24),%r19 + addc %r21,%r19,%r21 + stw %r21,20(%r26) +L$2 ldw 24(%r25),%r20 + ldw 24(%r24),%r31 + addc %r20,%r31,%r20 + stw %r20,24(%r26) +L$1 ldw 28(%r25),%r21 + ldo 32(%r25),%r25 + ldw 28(%r24),%r19 + addc %r21,%r19,%r21 + stw %r21,28(%r26) + ldo 32(%r24),%r24 + addib,> -8,%r23,L$loop + ldo 32(%r26),%r26 + + bv (%r2) + .exit + addc %r0,%r0,%r28 + .procend diff --git a/rts/gmp/mpn/hppa/hppa2_0/sub_n.s b/rts/gmp/mpn/hppa/hppa2_0/sub_n.s new file mode 100644 index 0000000000..7d9b50fc27 --- /dev/null +++ b/rts/gmp/mpn/hppa/hppa2_0/sub_n.s @@ -0,0 +1,88 @@ +; HP-PA 2.0 32-bit __gmpn_sub_n -- Subtract two limb vectors of the same +; length > 0 and store difference in a third limb vector. + +; Copyright (C) 1997, 1998, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr gr26 +; s1_ptr gr25 +; s2_ptr gr24 +; size gr23 + +; This runs at 2 cycles/limb on PA8000. + + .code + .export __gmpn_sub_n +__gmpn_sub_n + .proc + .callinfo frame=0,no_calls + .entry + + sub %r0,%r23,%r22 + zdep %r22,30,3,%r28 ; r28 = 2 * (-n & 7) + zdep %r22,29,3,%r22 ; r22 = 4 * (-n & 7) + sub %r25,%r22,%r25 ; offset s1_ptr + sub %r24,%r22,%r24 ; offset s2_ptr + blr %r28,%r0 ; branch into loop + sub %r26,%r22,%r26 ; offset res_ptr and set carry + +L$loop ldw 0(%r25),%r20 + ldw 0(%r24),%r31 + subb %r20,%r31,%r20 + stw %r20,0(%r26) +L$7 ldw 4(%r25),%r21 + ldw 4(%r24),%r19 + subb %r21,%r19,%r21 + stw %r21,4(%r26) +L$6 ldw 8(%r25),%r20 + ldw 8(%r24),%r31 + subb %r20,%r31,%r20 + stw %r20,8(%r26) +L$5 ldw 12(%r25),%r21 + ldw 12(%r24),%r19 + subb %r21,%r19,%r21 + stw %r21,12(%r26) +L$4 ldw 16(%r25),%r20 + ldw 16(%r24),%r31 + subb %r20,%r31,%r20 + stw %r20,16(%r26) +L$3 ldw 20(%r25),%r21 + ldw 20(%r24),%r19 + subb %r21,%r19,%r21 + stw %r21,20(%r26) +L$2 ldw 24(%r25),%r20 + ldw 24(%r24),%r31 + subb %r20,%r31,%r20 + stw %r20,24(%r26) +L$1 ldw 28(%r25),%r21 + ldo 32(%r25),%r25 + ldw 28(%r24),%r19 + subb %r21,%r19,%r21 + stw %r21,28(%r26) + ldo 32(%r24),%r24 + addib,> -8,%r23,L$loop + ldo 32(%r26),%r26 + + addc %r0,%r0,%r28 + bv (%r2) + .exit + subi 1,%r28,%r28 + .procend diff --git a/rts/gmp/mpn/hppa/lshift.s b/rts/gmp/mpn/hppa/lshift.s new file mode 100644 index 0000000000..f5a2daad60 --- /dev/null +++ b/rts/gmp/mpn/hppa/lshift.s @@ -0,0 +1,66 @@ +; HP-PA __gmpn_lshift -- + +; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr gr26 +; s_ptr gr25 +; size gr24 +; cnt gr23 + + .code + .export __gmpn_lshift +__gmpn_lshift + .proc + .callinfo frame=64,no_calls + .entry + + sh2add %r24,%r25,%r25 + sh2add %r24,%r26,%r26 + ldws,mb -4(0,%r25),%r22 + subi 32,%r23,%r1 + mtsar %r1 + addib,= -1,%r24,L$0004 + vshd %r0,%r22,%r28 ; compute carry out limb + ldws,mb -4(0,%r25),%r29 + addib,= -1,%r24,L$0002 + vshd %r22,%r29,%r20 + +L$loop ldws,mb -4(0,%r25),%r22 + stws,mb %r20,-4(0,%r26) + addib,= -1,%r24,L$0003 + vshd %r29,%r22,%r20 + ldws,mb -4(0,%r25),%r29 + stws,mb %r20,-4(0,%r26) + addib,<> -1,%r24,L$loop + vshd %r22,%r29,%r20 + +L$0002 stws,mb %r20,-4(0,%r26) + vshd %r29,%r0,%r20 + bv 0(%r2) + stw %r20,-4(0,%r26) +L$0003 stws,mb %r20,-4(0,%r26) +L$0004 vshd %r22,%r0,%r20 + bv 0(%r2) + stw %r20,-4(0,%r26) + + .exit + .procend diff --git a/rts/gmp/mpn/hppa/rshift.s b/rts/gmp/mpn/hppa/rshift.s new file mode 100644 index 0000000000..e05e2f10b5 --- /dev/null +++ b/rts/gmp/mpn/hppa/rshift.s @@ -0,0 +1,63 @@ +; HP-PA __gmpn_rshift -- + +; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr gr26 +; s_ptr gr25 +; size gr24 +; cnt gr23 + + .code + .export __gmpn_rshift +__gmpn_rshift + .proc + .callinfo frame=64,no_calls + .entry + + ldws,ma 4(0,%r25),%r22 + mtsar %r23 + addib,= -1,%r24,L$0004 + vshd %r22,%r0,%r28 ; compute carry out limb + ldws,ma 4(0,%r25),%r29 + addib,= -1,%r24,L$0002 + vshd %r29,%r22,%r20 + +L$loop ldws,ma 4(0,%r25),%r22 + stws,ma %r20,4(0,%r26) + addib,= -1,%r24,L$0003 + vshd %r22,%r29,%r20 + ldws,ma 4(0,%r25),%r29 + stws,ma %r20,4(0,%r26) + addib,<> -1,%r24,L$loop + vshd %r29,%r22,%r20 + +L$0002 stws,ma %r20,4(0,%r26) + vshd %r0,%r29,%r20 + bv 0(%r2) + stw %r20,0(0,%r26) +L$0003 stws,ma %r20,4(0,%r26) +L$0004 vshd %r0,%r22,%r20 + bv 0(%r2) + stw %r20,0(0,%r26) + + .exit + .procend diff --git a/rts/gmp/mpn/hppa/sub_n.s b/rts/gmp/mpn/hppa/sub_n.s new file mode 100644 index 0000000000..8f770ad1ad --- /dev/null +++ b/rts/gmp/mpn/hppa/sub_n.s @@ -0,0 +1,59 @@ +; HP-PA __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and +; store difference in a third limb vector. + +; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr gr26 +; s1_ptr gr25 +; s2_ptr gr24 +; size gr23 + +; One might want to unroll this as for other processors, but it turns +; out that the data cache contention after a store makes such +; unrolling useless. We can't come under 5 cycles/limb anyway. + + .code + .export __gmpn_sub_n +__gmpn_sub_n + .proc + .callinfo frame=0,no_calls + .entry + + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + + addib,= -1,%r23,L$end ; check for (SIZE == 1) + sub %r20,%r19,%r28 ; subtract first limbs ignoring cy + +L$loop ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + addib,<> -1,%r23,L$loop + subb %r20,%r19,%r28 + +L$end stws %r28,0(0,%r26) + addc %r0,%r0,%r28 + bv 0(%r2) + subi 1,%r28,%r28 + + .exit + .procend diff --git a/rts/gmp/mpn/hppa/udiv_qrnnd.s b/rts/gmp/mpn/hppa/udiv_qrnnd.s new file mode 100644 index 0000000000..9aa3b8a830 --- /dev/null +++ b/rts/gmp/mpn/hppa/udiv_qrnnd.s @@ -0,0 +1,286 @@ +; HP-PA __udiv_qrnnd division support, used from longlong.h. +; This version runs fast on pre-PA7000 CPUs. + +; Copyright (C) 1993, 1994, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; rem_ptr gr26 +; n1 gr25 +; n0 gr24 +; d gr23 + +; The code size is a bit excessive. We could merge the last two ds;addc +; sequences by simply moving the "bb,< Odd" instruction down. The only +; trouble is the FFFFFFFF code that would need some hacking. + + .code + .export __gmpn_udiv_qrnnd +__gmpn_udiv_qrnnd + .proc + .callinfo frame=0,no_calls + .entry + + comb,< %r23,0,L$largedivisor + sub %r0,%r23,%r1 ; clear cy as side-effect + ds %r0,%r1,%r0 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r28 + ds %r25,%r23,%r25 + comclr,>= %r25,%r0,%r0 + addl %r25,%r23,%r25 + stws %r25,0(0,%r26) + bv 0(%r2) + addc %r28,%r28,%r28 + +L$largedivisor + extru %r24,31,1,%r19 ; r19 = n0 & 1 + bb,< %r23,31,L$odd + extru %r23,30,31,%r22 ; r22 = d >> 1 + shd %r25,%r24,1,%r24 ; r24 = new n0 + extru %r25,30,31,%r25 ; r25 = new n1 + sub %r0,%r22,%r21 + ds %r0,%r21,%r0 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + comclr,>= %r25,%r0,%r0 + addl %r25,%r22,%r25 + sh1addl %r25,%r19,%r25 + stws %r25,0(0,%r26) + bv 0(%r2) + addc %r24,%r24,%r28 + +L$odd addib,sv,n 1,%r22,L$FF.. ; r22 = (d / 2 + 1) + shd %r25,%r24,1,%r24 ; r24 = new n0 + extru %r25,30,31,%r25 ; r25 = new n1 + sub %r0,%r22,%r21 + ds %r0,%r21,%r0 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r28 + comclr,>= %r25,%r0,%r0 + addl %r25,%r22,%r25 + sh1addl %r25,%r19,%r25 +; We have computed (n1,,n0) / (d + 1), q' = r28, r' = r25 + add,nuv %r28,%r25,%r25 + addl %r25,%r1,%r25 + addc %r0,%r28,%r28 + sub,<< %r25,%r23,%r0 + addl %r25,%r1,%r25 + stws %r25,0(0,%r26) + bv 0(%r2) + addc %r0,%r28,%r28 + +; This is just a special case of the code above. +; We come here when d == 0xFFFFFFFF +L$FF.. add,uv %r25,%r24,%r24 + sub,<< %r24,%r23,%r0 + ldo 1(%r24),%r24 + stws %r24,0(0,%r26) + bv 0(%r2) + addc %r0,%r25,%r28 + + .exit + .procend diff --git a/rts/gmp/mpn/i960/README b/rts/gmp/mpn/i960/README new file mode 100644 index 0000000000..d68a0a83eb --- /dev/null +++ b/rts/gmp/mpn/i960/README @@ -0,0 +1,9 @@ +This directory contains mpn functions for Intel i960 processors. + +RELEVANT OPTIMIZATION ISSUES + +The code in this directory is not well optimized. + +STATUS + +The code in this directory has not been tested. diff --git a/rts/gmp/mpn/i960/add_n.s b/rts/gmp/mpn/i960/add_n.s new file mode 100644 index 0000000000..387317a397 --- /dev/null +++ b/rts/gmp/mpn/i960/add_n.s @@ -0,0 +1,43 @@ +# I960 __gmpn_add_n -- Add two limb vectors of the same length > 0 and store +# sum in a third limb vector. + +# Copyright (C) 1995, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + +.text + .align 4 + .globl ___gmpn_add_n +___gmpn_add_n: + mov 0,g6 # clear carry-save register + cmpo 1,0 # clear cy + +Loop: subo 1,g3,g3 # update loop counter + ld (g1),g5 # load from s1_ptr + addo 4,g1,g1 # s1_ptr++ + ld (g2),g4 # load from s2_ptr + addo 4,g2,g2 # s2_ptr++ + cmpo g6,1 # restore cy from g6, relies on cy being 0 + addc g4,g5,g4 # main add + subc 0,0,g6 # save cy in g6 + st g4,(g0) # store result to res_ptr + addo 4,g0,g0 # res_ptr++ + cmpobne 0,g3,Loop # when branch is taken, clears C bit + + mov g6,g0 + ret diff --git a/rts/gmp/mpn/i960/addmul_1.s b/rts/gmp/mpn/i960/addmul_1.s new file mode 100644 index 0000000000..7df1418356 --- /dev/null +++ b/rts/gmp/mpn/i960/addmul_1.s @@ -0,0 +1,48 @@ +# I960 __gmpn_addmul_1 -- Multiply a limb vector with a limb and add +# the result to a second limb vector. + +# Copyright (C) 1995, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + +.text + .align 4 + .globl ___gmpn_mul_1 +___gmpn_mul_1: + subo g2,0,g2 + shlo 2,g2,g4 + subo g4,g1,g1 + subo g4,g0,g13 + mov 0,g0 + + cmpo 1,0 # clear C bit on AC.cc + +Loop: ld (g1)[g2*4],g5 + emul g3,g5,g6 + ld (g13)[g2*4],g5 + + addc g0,g6,g6 # relies on that C bit is clear + addc 0,g7,g7 + addc g5,g6,g6 # relies on that C bit is clear + st g6,(g13)[g2*4] + addc 0,g7,g0 + + addo g2,1,g2 + cmpobne 0,g2,Loop # when branch is taken, clears C bit + + ret diff --git a/rts/gmp/mpn/i960/mul_1.s b/rts/gmp/mpn/i960/mul_1.s new file mode 100644 index 0000000000..5c0c985aa5 --- /dev/null +++ b/rts/gmp/mpn/i960/mul_1.s @@ -0,0 +1,45 @@ +# I960 __gmpn_mul_1 -- Multiply a limb vector with a limb and store +# the result in a second limb vector. + +# Copyright (C) 1995, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + +.text + .align 4 + .globl ___gmpn_mul_1 +___gmpn_mul_1: + subo g2,0,g2 + shlo 2,g2,g4 + subo g4,g1,g1 + subo g4,g0,g13 + mov 0,g0 + + cmpo 1,0 # clear C bit on AC.cc + +Loop: ld (g1)[g2*4],g5 + emul g3,g5,g6 + + addc g0,g6,g6 # relies on that C bit is clear + st g6,(g13)[g2*4] + addc 0,g7,g0 + + addo g2,1,g2 + cmpobne 0,g2,Loop # when branch is taken, clears C bit + + ret diff --git a/rts/gmp/mpn/i960/sub_n.s b/rts/gmp/mpn/i960/sub_n.s new file mode 100644 index 0000000000..2db2d46aad --- /dev/null +++ b/rts/gmp/mpn/i960/sub_n.s @@ -0,0 +1,43 @@ +# I960 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and +# store difference in a third limb vector. + +# Copyright (C) 1995, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + +.text + .align 4 + .globl ___gmpn_sub_n +___gmpn_sub_n: + mov 1,g6 # set carry-save register + cmpo 1,0 # clear cy + +Loop: subo 1,g3,g3 # update loop counter + ld (g1),g5 # load from s1_ptr + addo 4,g1,g1 # s1_ptr++ + ld (g2),g4 # load from s2_ptr + addo 4,g2,g2 # s2_ptr++ + cmpo g6,1 # restore cy from g6, relies on cy being 0 + subc g4,g5,g4 # main subtract + subc 0,0,g6 # save cy in g6 + st g4,(g0) # store result to res_ptr + addo 4,g0,g0 # res_ptr++ + cmpobne 0,g3,Loop # when branch is taken, cy will be 0 + + mov g6,g0 + ret diff --git a/rts/gmp/mpn/lisp/gmpasm-mode.el b/rts/gmp/mpn/lisp/gmpasm-mode.el new file mode 100644 index 0000000000..5d9da7fa1f --- /dev/null +++ b/rts/gmp/mpn/lisp/gmpasm-mode.el @@ -0,0 +1,351 @@ +;;; gmpasm-mode.el -- GNU MP asm and m4 editing mode. + + +;; Copyright (C) 1999, 2000 Free Software Foundation, Inc. +;; +;; This file is part of the GNU MP Library. +;; +;; The GNU MP Library is free software; you can redistribute it and/or modify +;; it under the terms of the GNU Lesser General Public License as published by +;; the Free Software Foundation; either version 2.1 of the License, or (at your +;; option) any later version. +;; +;; The GNU MP Library is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +;; License for more details. +;; +;; You should have received a copy of the GNU Lesser General Public License +;; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +;; MA 02111-1307, USA. + + +;;; Commentary: +;; +;; gmpasm-mode is an editing mode for m4 processed assembler code and m4 +;; macro files in GMP. It's similar to m4-mode, but has a number of +;; settings better suited to GMP. +;; +;; +;; Install +;; ------- +;; +;; To make M-x gmpasm-mode available, put gmpasm-mode.el somewhere in the +;; load-path and the following in .emacs +;; +;; (autoload 'gmpasm-mode "gmpasm-mode" nil t) +;; +;; To use gmpasm-mode automatically on all .asm and .m4 files, put the +;; following in .emacs +;; +;; (add-to-list 'auto-mode-alist '("\\.asm\\'" . gmpasm-mode)) +;; (add-to-list 'auto-mode-alist '("\\.m4\\'" . gmpasm-mode)) +;; +;; To have gmpasm-mode only on gmp files, try instead something like the +;; following, which uses it only in a directory starting with "gmp", or a +;; sub-directory of such. +;; +;; (add-to-list 'auto-mode-alist +;; '("/gmp.*/.*\\.\\(asm\\|m4\\)\\'" . gmpasm-mode)) +;; +;; Byte compiling will slightly speed up loading. If you want a docstring +;; in the autoload you can use M-x update-file-autoloads if you set it up +;; right. +;; +;; +;; Emacsen +;; ------- +;; +;; FSF Emacs 20.x - gmpasm-mode is designed for this. +;; XEmacs 20.x - seems to work. +;; +;; FSF Emacs 19.x - should work if replacements for some 20.x-isms are +;; available. comment-region with "C" won't really do the right thing +;; though. + + +;;; Code: + +(defgroup gmpasm nil + "GNU MP m4 and asm editing." + :prefix "gmpasm-" + :group 'languages) + +(defcustom gmpasm-mode-hook nil + "*Hook called by `gmpasm-mode'." + :type 'hook + :group 'gmpasm) + +(defcustom gmpasm-comment-start-regexp "[#;!@C]" + "*Regexp matching possible comment styles. +See `gmpasm-mode' docstring for how this is used." + :type 'regexp + :group 'gmpasm) + + +(defun gmpasm-add-to-list-second (list-var element) + "(gmpasm-add-to-list-second LIST-VAR ELEMENT) + +Add ELEMENT to LIST-VAR as the second element in the list, if it isn't +already in the list. If LIST-VAR is nil, then ELEMENT is just added as the +sole element in the list. + +This is like `add-to-list', but it puts the new value second in the list. + +The first cons cell is copied rather than changed in-place, so references to +the list elsewhere won't be affected." + + (if (member element (symbol-value list-var)) + (symbol-value list-var) + (set list-var + (if (symbol-value list-var) + (cons (car (symbol-value list-var)) + (cons element + (cdr (symbol-value list-var)))) + (list element))))) + + +(defun gmpasm-delete-from-list (list-var element) + "(gmpasm-delete-from-list LIST-VAR ELEMENT) + +Delete ELEMENT from LIST-VAR, using `delete'. +This is like `add-to-list', but the element is deleted from the list. +The list is copied rather than changed in-place, so references to it elsewhere +won't be affected." + + (set list-var (delete element (copy-sequence (symbol-value list-var))))) + + +(defvar gmpasm-mode-map + (let ((map (make-sparse-keymap))) + + ;; assembler and dnl commenting + (define-key map "\C-c\C-c" 'comment-region) + (define-key map "\C-c\C-d" 'gmpasm-comment-region-dnl) + + ;; kill an M-x compile, since it's not hard to put m4 into an infinite + ;; loop + (define-key map "\C-c\C-k" 'kill-compilation) + + map) + "Keymap for `gmpasm-mode'.") + + +(defvar gmpasm-mode-syntax-table + (let ((table (make-syntax-table))) + ;; underscore left as a symbol char, like C mode + + ;; m4 quotes + (modify-syntax-entry ?` "('" table) + (modify-syntax-entry ?' ")`" table) + + table) + "Syntax table used in `gmpasm-mode'. + +m4 ignores quote marks in # comments at the top level, but inside quotes # +isn't special and all quotes are active. There seems no easy way to express +this in the syntax table, so nothing is done for comments. Usually this is +best, since it picks up invalid apostrophes in comments inside quotes.") + + +(defvar gmpasm-font-lock-keywords + (eval-when-compile + (list + (cons + (concat + "\\b" + (regexp-opt + '("deflit" "defreg" "defframe" "defframe_pushl" + "define_not_for_expansion" + "ASM_START" "ASM_END" "PROLOGUE" "EPILOGUE" + "forloop" + "TEXT" "DATA" "ALIGN" "W32" + "builtin" "changecom" "changequote" "changeword" "debugfile" + "debugmode" "decr" "define" "defn" "divert" "divnum" "dumpdef" + "errprint" "esyscmd" "eval" "__file__" "format" "gnu" "ifdef" + "ifelse" "include" "incr" "index" "indir" "len" "__line__" + "m4exit" "m4wrap" "maketemp" "patsubst" "popdef" "pushdef" + "regexp" "shift" "sinclude" "substr" "syscmd" "sysval" + "traceoff" "traceon" "translit" "undefine" "undivert" "unix") + t) + "\\b") 'font-lock-keyword-face))) + + "`font-lock-keywords' for `gmpasm-mode'. + +The keywords are m4 builtins and some of the GMP macros used in asm files. +L and LF don't look good fontified, so they're omitted. + +The right assembler comment regexp is added dynamically buffer-local (with +dnl too).") + + +;; Initialized if gmpasm-mode finds filladapt loaded. +(defvar gmpasm-filladapt-token-table nil + "Filladapt token table used in `gmpasm-mode'.") +(defvar gmpasm-filladapt-token-match-table nil + "Filladapt token match table used in `gmpasm-mode'.") +(defvar gmpasm-filladapt-token-conversion-table nil + "Filladapt token conversion table used in `gmpasm-mode'.") + + +;;;###autoload +(defun gmpasm-mode () + "A major mode for editing GNU MP asm and m4 files. + +\\{gmpasm-mode-map} +`comment-start' and `comment-end' are set buffer-local to assembler +commenting appropriate for the CPU by looking for something matching +`gmpasm-comment-start-regexp' at the start of a line, or \"#\" is used if +there's no match (if \"#\" isn't what you want, type in a desired comment +and do \\[gmpasm-mode] to reinitialize). + +`adaptive-fill-regexp' is set buffer-local to the standard regexp with +`comment-start' and dnl added. If filladapt.el has been loaded it similarly +gets `comment-start' and dnl added as buffer-local fill prefixes. + +Font locking has the m4 builtins, some of the GMP macros, m4 dnl commenting, +and assembler commenting (based on the `comment-start' determined). + +Note that `gmpasm-comment-start-regexp' is only matched as a whole word, so +the `C' in it is only matched as a whole word, not on something that happens +to start with `C'. Also it's only the particular `comment-start' determined +that's added for filling etc, not the whole `gmpasm-comment-start-regexp'. + +`gmpasm-mode-hook' is run after initializations are complete. +" + + (interactive) + (kill-all-local-variables) + (setq major-mode 'gmpasm-mode + mode-name "gmpasm") + (use-local-map gmpasm-mode-map) + (set-syntax-table gmpasm-mode-syntax-table) + (setq fill-column 76) + + ;; Short instructions might fit with 32, but anything with labels or + ;; expressions soon needs the comments pushed out to column 40. + (setq comment-column 40) + + ;; Don't want to find out the hard way which dumb assemblers don't like a + ;; missing final newline. + (set (make-local-variable 'require-final-newline) t) + + ;; The first match of gmpasm-comment-start-regexp at the start of a line + ;; determines comment-start, or "#" if no match. + (set (make-local-variable 'comment-start) + (save-excursion + (goto-char (point-min)) + (if (re-search-forward + (concat "^\\(" gmpasm-comment-start-regexp "\\)\\(\\s-\\|$\\)") + nil t) + (match-string 1) + "#"))) + (set (make-local-variable 'comment-end) "") + + ;; If comment-start ends in an alphanumeric then \b is used to match it + ;; only as a separate word. The test is for an alphanumeric rather than + ;; \w since we might try # or ! as \w characters but without wanting \b. + (let ((comment-regexp + (concat (regexp-quote comment-start) + (if (string-match "[a-zA-Z0-9]\\'" comment-start) "\\b")))) + + ;; Whitespace is required before a comment-start so m4 $# doesn't match + ;; when comment-start is "#". + ;; Only spaces or tabs match after, so newline isn't included in the + ;; font lock below. + (set (make-local-variable 'comment-start-skip) + (concat "\\(^\\|\\s-\\)" comment-regexp "[ \t]*")) + + ;; Comment fontification based on comment-start, matching through to the + ;; end of the line. + (add-to-list (make-local-variable 'gmpasm-font-lock-keywords) + (cons (concat + "\\(\\bdnl\\b\\|" comment-start-skip "\\).*$") + 'font-lock-comment-face)) + + (set (make-local-variable 'font-lock-defaults) + '(gmpasm-font-lock-keywords + t ; no syntactic fontification (of strings etc) + nil ; no case-fold + ((?_ . "w")) ; _ part of a word while fontifying + )) + + ;; Paragraphs are separated by blank lines, or lines with only dnl or + ;; comment-start. + (set (make-local-variable 'paragraph-separate) + (concat "[ \t\f]*\\(\\(" comment-regexp "\\|dnl\\)[ \t]*\\)*$")) + (set (make-local-variable 'paragraph-start) + (concat "\f\\|" paragraph-separate)) + + ;; Adaptive fill gets dnl and comment-start as comment style prefixes on + ;; top of the standard regexp (which has # and ; already actually). + (set (make-local-variable 'adaptive-fill-regexp) + (concat "[ \t]*\\(\\(" + comment-regexp + "\\|dnl\\|[-|#;>*]+\\|(?[0-9]+[.)]\\)[ \t]*\\)*")) + (set (make-local-variable 'adaptive-fill-first-line-regexp) + "\\`\\([ \t]*dnl\\)?[ \t]*\\'") + + (when (fboundp 'filladapt-mode) + (when (not gmpasm-filladapt-token-table) + (setq gmpasm-filladapt-token-table + filladapt-token-table) + (setq gmpasm-filladapt-token-match-table + filladapt-token-match-table) + (setq gmpasm-filladapt-token-conversion-table + filladapt-token-conversion-table) + + ;; Numbered bullet points like "2.1" get matched at the start of a + ;; line when it's really something like "2.1 cycles/limb", so delete + ;; this from the list. The regexp for "1.", "2." etc is left + ;; though. + (gmpasm-delete-from-list 'gmpasm-filladapt-token-table + '("[0-9]+\\(\\.[0-9]+\\)+[ \t]" + bullet)) + + ;; "%" as a comment prefix interferes with x86 register names + ;; like %eax, so delete this. + (gmpasm-delete-from-list 'gmpasm-filladapt-token-table + '("%+" postscript-comment)) + + (add-to-list 'gmpasm-filladapt-token-match-table + '(gmpasm-comment gmpasm-comment)) + (add-to-list 'gmpasm-filladapt-token-conversion-table + '(gmpasm-comment . exact)) + ) + + (set (make-local-variable 'filladapt-token-table) + gmpasm-filladapt-token-table) + (set (make-local-variable 'filladapt-token-match-table) + gmpasm-filladapt-token-match-table) + (set (make-local-variable 'filladapt-token-conversion-table) + gmpasm-filladapt-token-conversion-table) + + ;; Add dnl and comment-start as fill prefixes. + ;; Comments in filladapt.el say filladapt-token-table must begin + ;; with ("^" beginning-of-line), so put our addition second. + (gmpasm-add-to-list-second 'filladapt-token-table + (list (concat "dnl[ \t]\\|" comment-regexp) + 'gmpasm-comment)) + )) + + (run-hooks 'gmpasm-mode-hook)) + + +(defun gmpasm-comment-region-dnl (beg end &optional arg) + "(gmpasm-comment-region BEG END &option ARG) + +Comment or uncomment each line in the region using `dnl'. +With \\[universal-argument] prefix arg, uncomment each line in region. +This is `comment-region', but using \"dnl\"." + + (interactive "r\nP") + (let ((comment-start "dnl") + (comment-end "")) + (comment-region beg end arg))) + + +(provide 'gmpasm-mode) + +;;; gmpasm-mode.el ends here diff --git a/rts/gmp/mpn/m68k/add_n.S b/rts/gmp/mpn/m68k/add_n.S new file mode 100644 index 0000000000..9e1d89d64f --- /dev/null +++ b/rts/gmp/mpn/m68k/add_n.S @@ -0,0 +1,79 @@ +/* mc68020 __gmpn_add_n -- Add two limb vectors of the same length > 0 and store + sum in a third limb vector. + +Copyright (C) 1992, 1994, 1996, 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s1_ptr (sp + 8) + s2_ptr (sp + 16) + size (sp + 12) +*/ + +#include "asm-syntax.h" + + TEXT + ALIGN + GLOBL C_SYMBOL_NAME(__gmpn_add_n) + +C_SYMBOL_NAME(__gmpn_add_n:) +PROLOG(__gmpn_add_n) +/* Save used registers on the stack. */ + movel R(d2),MEM_PREDEC(sp) + movel R(a2),MEM_PREDEC(sp) + +/* Copy the arguments to registers. Better use movem? */ + movel MEM_DISP(sp,12),R(a2) + movel MEM_DISP(sp,16),R(a0) + movel MEM_DISP(sp,20),R(a1) + movel MEM_DISP(sp,24),R(d2) + + eorw #1,R(d2) + lsrl #1,R(d2) + bcc L(L1) + subql #1,R(d2) /* clears cy as side effect */ + +L(Loop:) + movel MEM_POSTINC(a0),R(d0) + movel MEM_POSTINC(a1),R(d1) + addxl R(d1),R(d0) + movel R(d0),MEM_POSTINC(a2) +L(L1:) movel MEM_POSTINC(a0),R(d0) + movel MEM_POSTINC(a1),R(d1) + addxl R(d1),R(d0) + movel R(d0),MEM_POSTINC(a2) + + dbf R(d2),L(Loop) /* loop until 16 lsb of %4 == -1 */ + subxl R(d0),R(d0) /* d0 <= -cy; save cy as 0 or -1 in d0 */ + subl #0x10000,R(d2) + bcs L(L2) + addl R(d0),R(d0) /* restore cy */ + bra L(Loop) + +L(L2:) + negl R(d0) + +/* Restore used registers from stack frame. */ + movel MEM_POSTINC(sp),R(a2) + movel MEM_POSTINC(sp),R(d2) + + rts +EPILOG(__gmpn_add_n) diff --git a/rts/gmp/mpn/m68k/lshift.S b/rts/gmp/mpn/m68k/lshift.S new file mode 100644 index 0000000000..a539d5d42e --- /dev/null +++ b/rts/gmp/mpn/m68k/lshift.S @@ -0,0 +1,150 @@ +/* mc68020 __gmpn_lshift -- Shift left a low-level natural-number integer. + +Copyright (C) 1996, 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s_ptr (sp + 8) + s_size (sp + 16) + cnt (sp + 12) +*/ + +#include "asm-syntax.h" + +#define res_ptr a1 +#define s_ptr a0 +#define s_size d6 +#define cnt d4 + + TEXT + ALIGN + GLOBL C_SYMBOL_NAME(__gmpn_lshift) + +C_SYMBOL_NAME(__gmpn_lshift:) +PROLOG(__gmpn_lshift) + +/* Save used registers on the stack. */ + moveml R(d2)-R(d6)/R(a2),MEM_PREDEC(sp) + +/* Copy the arguments to registers. */ + movel MEM_DISP(sp,28),R(res_ptr) + movel MEM_DISP(sp,32),R(s_ptr) + movel MEM_DISP(sp,36),R(s_size) + movel MEM_DISP(sp,40),R(cnt) + + moveql #1,R(d5) + cmpl R(d5),R(cnt) + bne L(Lnormal) + cmpl R(s_ptr),R(res_ptr) + bls L(Lspecial) /* jump if s_ptr >= res_ptr */ +#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020)) + lea MEM_INDX1(s_ptr,s_size,l,4),R(a2) +#else /* not mc68020 */ + movel R(s_size),R(d0) + asll #2,R(d0) + lea MEM_INDX(s_ptr,d0,l),R(a2) +#endif + cmpl R(res_ptr),R(a2) + bls L(Lspecial) /* jump if res_ptr >= s_ptr + s_size */ + +L(Lnormal:) + moveql #32,R(d5) + subl R(cnt),R(d5) + +#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020)) + lea MEM_INDX1(s_ptr,s_size,l,4),R(s_ptr) + lea MEM_INDX1(res_ptr,s_size,l,4),R(res_ptr) +#else /* not mc68000 */ + movel R(s_size),R(d0) + asll #2,R(d0) + addl R(s_size),R(s_ptr) + addl R(s_size),R(res_ptr) +#endif + movel MEM_PREDEC(s_ptr),R(d2) + movel R(d2),R(d0) + lsrl R(d5),R(d0) /* compute carry limb */ + + lsll R(cnt),R(d2) + movel R(d2),R(d1) + subql #1,R(s_size) + beq L(Lend) + lsrl #1,R(s_size) + bcs L(L1) + subql #1,R(s_size) + +L(Loop:) + movel MEM_PREDEC(s_ptr),R(d2) + movel R(d2),R(d3) + lsrl R(d5),R(d3) + orl R(d3),R(d1) + movel R(d1),MEM_PREDEC(res_ptr) + lsll R(cnt),R(d2) +L(L1:) + movel MEM_PREDEC(s_ptr),R(d1) + movel R(d1),R(d3) + lsrl R(d5),R(d3) + orl R(d3),R(d2) + movel R(d2),MEM_PREDEC(res_ptr) + lsll R(cnt),R(d1) + + dbf R(s_size),L(Loop) + subl #0x10000,R(s_size) + bcc L(Loop) + +L(Lend:) + movel R(d1),MEM_PREDEC(res_ptr) /* store least significant limb */ + +/* Restore used registers from stack frame. */ + moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2) + rts + +/* We loop from least significant end of the arrays, which is only + permissable if the source and destination don't overlap, since the + function is documented to work for overlapping source and destination. */ + +L(Lspecial:) + clrl R(d0) /* initialize carry */ + eorw #1,R(s_size) + lsrl #1,R(s_size) + bcc L(LL1) + subql #1,R(s_size) + +L(LLoop:) + movel MEM_POSTINC(s_ptr),R(d2) + addxl R(d2),R(d2) + movel R(d2),MEM_POSTINC(res_ptr) +L(LL1:) + movel MEM_POSTINC(s_ptr),R(d2) + addxl R(d2),R(d2) + movel R(d2),MEM_POSTINC(res_ptr) + + dbf R(s_size),L(LLoop) + addxl R(d0),R(d0) /* save cy in lsb */ + subl #0x10000,R(s_size) + bcs L(LLend) + lsrl #1,R(d0) /* restore cy */ + bra L(LLoop) + +L(LLend:) +/* Restore used registers from stack frame. */ + moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2) + rts +EPILOG(__gmpn_lshift) diff --git a/rts/gmp/mpn/m68k/mc68020/addmul_1.S b/rts/gmp/mpn/m68k/mc68020/addmul_1.S new file mode 100644 index 0000000000..6638115d71 --- /dev/null +++ b/rts/gmp/mpn/m68k/mc68020/addmul_1.S @@ -0,0 +1,83 @@ +/* mc68020 __gmpn_addmul_1 -- Multiply a limb vector with a limb and add + the result to a second limb vector. + +Copyright (C) 1992, 1994, 1996, 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s1_ptr (sp + 8) + s1_size (sp + 12) + s2_limb (sp + 16) +*/ + +#include "asm-syntax.h" + + TEXT + ALIGN + GLOBL C_SYMBOL_NAME(__gmpn_addmul_1) + +C_SYMBOL_NAME(__gmpn_addmul_1:) +PROLOG(__gmpn_addmul_1) + +#define res_ptr a0 +#define s1_ptr a1 +#define s1_size d2 +#define s2_limb d4 + +/* Save used registers on the stack. */ + moveml R(d2)-R(d5),MEM_PREDEC(sp) + +/* Copy the arguments to registers. Better use movem? */ + movel MEM_DISP(sp,20),R(res_ptr) + movel MEM_DISP(sp,24),R(s1_ptr) + movel MEM_DISP(sp,28),R(s1_size) + movel MEM_DISP(sp,32),R(s2_limb) + + eorw #1,R(s1_size) + clrl R(d1) + clrl R(d5) + lsrl #1,R(s1_size) + bcc L(L1) + subql #1,R(s1_size) + subl R(d0),R(d0) /* (d0,cy) <= (0,0) */ + +L(Loop:) + movel MEM_POSTINC(s1_ptr),R(d3) + mulul R(s2_limb),R(d1):R(d3) + addxl R(d0),R(d3) + addxl R(d5),R(d1) + addl R(d3),MEM_POSTINC(res_ptr) +L(L1:) movel MEM_POSTINC(s1_ptr),R(d3) + mulul R(s2_limb),R(d0):R(d3) + addxl R(d1),R(d3) + addxl R(d5),R(d0) + addl R(d3),MEM_POSTINC(res_ptr) + + dbf R(s1_size),L(Loop) + addxl R(d5),R(d0) + subl #0x10000,R(s1_size) + bcc L(Loop) + +/* Restore used registers from stack frame. */ + moveml MEM_POSTINC(sp),R(d2)-R(d5) + + rts +EPILOG(__gmpn_addmul_1) diff --git a/rts/gmp/mpn/m68k/mc68020/mul_1.S b/rts/gmp/mpn/m68k/mc68020/mul_1.S new file mode 100644 index 0000000000..fdd4c39d70 --- /dev/null +++ b/rts/gmp/mpn/m68k/mc68020/mul_1.S @@ -0,0 +1,90 @@ +/* mc68020 __gmpn_mul_1 -- Multiply a limb vector with a limb and store + the result in a second limb vector. + +Copyright (C) 1992, 1994, 1996, 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s1_ptr (sp + 8) + s1_size (sp + 12) + s2_limb (sp + 16) +*/ + +#include "asm-syntax.h" + + TEXT + ALIGN + GLOBL C_SYMBOL_NAME(__gmpn_mul_1) + +C_SYMBOL_NAME(__gmpn_mul_1:) +PROLOG(__gmpn_mul_1) + +#define res_ptr a0 +#define s1_ptr a1 +#define s1_size d2 +#define s2_limb d4 + +/* Save used registers on the stack. */ + moveml R(d2)-R(d4),MEM_PREDEC(sp) +#if 0 + movel R(d2),MEM_PREDEC(sp) + movel R(d3),MEM_PREDEC(sp) + movel R(d4),MEM_PREDEC(sp) +#endif + +/* Copy the arguments to registers. Better use movem? */ + movel MEM_DISP(sp,16),R(res_ptr) + movel MEM_DISP(sp,20),R(s1_ptr) + movel MEM_DISP(sp,24),R(s1_size) + movel MEM_DISP(sp,28),R(s2_limb) + + eorw #1,R(s1_size) + clrl R(d1) + lsrl #1,R(s1_size) + bcc L(L1) + subql #1,R(s1_size) + subl R(d0),R(d0) /* (d0,cy) <= (0,0) */ + +L(Loop:) + movel MEM_POSTINC(s1_ptr),R(d3) + mulul R(s2_limb),R(d1):R(d3) + addxl R(d0),R(d3) + movel R(d3),MEM_POSTINC(res_ptr) +L(L1:) movel MEM_POSTINC(s1_ptr),R(d3) + mulul R(s2_limb),R(d0):R(d3) + addxl R(d1),R(d3) + movel R(d3),MEM_POSTINC(res_ptr) + + dbf R(s1_size),L(Loop) + clrl R(d3) + addxl R(d3),R(d0) + subl #0x10000,R(s1_size) + bcc L(Loop) + +/* Restore used registers from stack frame. */ + moveml MEM_POSTINC(sp),R(d2)-R(d4) +#if 0 + movel MEM_POSTINC(sp),R(d4) + movel MEM_POSTINC(sp),R(d3) + movel MEM_POSTINC(sp),R(d2) +#endif + rts +EPILOG(__gmpn_mul_1) diff --git a/rts/gmp/mpn/m68k/mc68020/submul_1.S b/rts/gmp/mpn/m68k/mc68020/submul_1.S new file mode 100644 index 0000000000..3c36b70166 --- /dev/null +++ b/rts/gmp/mpn/m68k/mc68020/submul_1.S @@ -0,0 +1,83 @@ +/* mc68020 __gmpn_submul_1 -- Multiply a limb vector with a limb and subtract + the result from a second limb vector. + +Copyright (C) 1992, 1994, 1996, 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s1_ptr (sp + 8) + s1_size (sp + 12) + s2_limb (sp + 16) +*/ + +#include "asm-syntax.h" + + TEXT + ALIGN + GLOBL C_SYMBOL_NAME(__gmpn_submul_1) + +C_SYMBOL_NAME(__gmpn_submul_1:) +PROLOG(__gmpn_submul_1) + +#define res_ptr a0 +#define s1_ptr a1 +#define s1_size d2 +#define s2_limb d4 + +/* Save used registers on the stack. */ + moveml R(d2)-R(d5),MEM_PREDEC(sp) + +/* Copy the arguments to registers. Better use movem? */ + movel MEM_DISP(sp,20),R(res_ptr) + movel MEM_DISP(sp,24),R(s1_ptr) + movel MEM_DISP(sp,28),R(s1_size) + movel MEM_DISP(sp,32),R(s2_limb) + + eorw #1,R(s1_size) + clrl R(d1) + clrl R(d5) + lsrl #1,R(s1_size) + bcc L(L1) + subql #1,R(s1_size) + subl R(d0),R(d0) /* (d0,cy) <= (0,0) */ + +L(Loop:) + movel MEM_POSTINC(s1_ptr),R(d3) + mulul R(s2_limb),R(d1):R(d3) + addxl R(d0),R(d3) + addxl R(d5),R(d1) + subl R(d3),MEM_POSTINC(res_ptr) +L(L1:) movel MEM_POSTINC(s1_ptr),R(d3) + mulul R(s2_limb),R(d0):R(d3) + addxl R(d1),R(d3) + addxl R(d5),R(d0) + subl R(d3),MEM_POSTINC(res_ptr) + + dbf R(s1_size),L(Loop) + addxl R(d5),R(d0) + subl #0x10000,R(s1_size) + bcc L(Loop) + +/* Restore used registers from stack frame. */ + moveml MEM_POSTINC(sp),R(d2)-R(d5) + + rts +EPILOG(__gmpn_submul_1) diff --git a/rts/gmp/mpn/m68k/mc68020/udiv.S b/rts/gmp/mpn/m68k/mc68020/udiv.S new file mode 100644 index 0000000000..d00cf13558 --- /dev/null +++ b/rts/gmp/mpn/m68k/mc68020/udiv.S @@ -0,0 +1,31 @@ +/* +Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. +*/ + +.text + .even +.globl ___udiv_qrnnd +___udiv_qrnnd: + movel sp@(4),a0 + movel sp@(8),d1 + movel sp@(12),d0 + divul sp@(16),d1:d0 + movel d1,a0@ + rts diff --git a/rts/gmp/mpn/m68k/mc68020/umul.S b/rts/gmp/mpn/m68k/mc68020/umul.S new file mode 100644 index 0000000000..a34ae6c543 --- /dev/null +++ b/rts/gmp/mpn/m68k/mc68020/umul.S @@ -0,0 +1,31 @@ +/* +Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. +*/ + +.text + .even +.globl ___umul_ppmm +___umul_ppmm: + movel sp@(4),a0 + movel sp@(8),d1 + movel sp@(12),d0 + mulul d0,d0:d1 + movel d1,a0@ + rts diff --git a/rts/gmp/mpn/m68k/rshift.S b/rts/gmp/mpn/m68k/rshift.S new file mode 100644 index 0000000000..b47a48e52a --- /dev/null +++ b/rts/gmp/mpn/m68k/rshift.S @@ -0,0 +1,149 @@ +/* mc68020 __gmpn_rshift -- Shift right a low-level natural-number integer. + +Copyright (C) 1996, 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s_ptr (sp + 8) + s_size (sp + 16) + cnt (sp + 12) +*/ + +#include "asm-syntax.h" + +#define res_ptr a1 +#define s_ptr a0 +#define s_size d6 +#define cnt d4 + + TEXT + ALIGN + GLOBL C_SYMBOL_NAME(__gmpn_rshift) + +C_SYMBOL_NAME(__gmpn_rshift:) +PROLOG(__gmpn_rshift) +/* Save used registers on the stack. */ + moveml R(d2)-R(d6)/R(a2),MEM_PREDEC(sp) + +/* Copy the arguments to registers. */ + movel MEM_DISP(sp,28),R(res_ptr) + movel MEM_DISP(sp,32),R(s_ptr) + movel MEM_DISP(sp,36),R(s_size) + movel MEM_DISP(sp,40),R(cnt) + + moveql #1,R(d5) + cmpl R(d5),R(cnt) + bne L(Lnormal) + cmpl R(res_ptr),R(s_ptr) + bls L(Lspecial) /* jump if res_ptr >= s_ptr */ +#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020)) + lea MEM_INDX1(res_ptr,s_size,l,4),R(a2) +#else /* not mc68020 */ + movel R(s_size),R(d0) + asll #2,R(d0) + lea MEM_INDX(res_ptr,d0,l),R(a2) +#endif + cmpl R(s_ptr),R(a2) + bls L(Lspecial) /* jump if s_ptr >= res_ptr + s_size */ + +L(Lnormal:) + moveql #32,R(d5) + subl R(cnt),R(d5) + movel MEM_POSTINC(s_ptr),R(d2) + movel R(d2),R(d0) + lsll R(d5),R(d0) /* compute carry limb */ + + lsrl R(cnt),R(d2) + movel R(d2),R(d1) + subql #1,R(s_size) + beq L(Lend) + lsrl #1,R(s_size) + bcs L(L1) + subql #1,R(s_size) + +L(Loop:) + movel MEM_POSTINC(s_ptr),R(d2) + movel R(d2),R(d3) + lsll R(d5),R(d3) + orl R(d3),R(d1) + movel R(d1),MEM_POSTINC(res_ptr) + lsrl R(cnt),R(d2) +L(L1:) + movel MEM_POSTINC(s_ptr),R(d1) + movel R(d1),R(d3) + lsll R(d5),R(d3) + orl R(d3),R(d2) + movel R(d2),MEM_POSTINC(res_ptr) + lsrl R(cnt),R(d1) + + dbf R(s_size),L(Loop) + subl #0x10000,R(s_size) + bcc L(Loop) + +L(Lend:) + movel R(d1),MEM(res_ptr) /* store most significant limb */ + +/* Restore used registers from stack frame. */ + moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2) + rts + +/* We loop from most significant end of the arrays, which is only + permissable if the source and destination don't overlap, since the + function is documented to work for overlapping source and destination. */ + +L(Lspecial:) +#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020)) + lea MEM_INDX1(s_ptr,s_size,l,4),R(s_ptr) + lea MEM_INDX1(res_ptr,s_size,l,4),R(res_ptr) +#else /* not mc68000 */ + movel R(s_size),R(d0) + asll #2,R(d0) + addl R(s_size),R(s_ptr) + addl R(s_size),R(res_ptr) +#endif + + clrl R(d0) /* initialize carry */ + eorw #1,R(s_size) + lsrl #1,R(s_size) + bcc L(LL1) + subql #1,R(s_size) + +L(LLoop:) + movel MEM_PREDEC(s_ptr),R(d2) + roxrl #1,R(d2) + movel R(d2),MEM_PREDEC(res_ptr) +L(LL1:) + movel MEM_PREDEC(s_ptr),R(d2) + roxrl #1,R(d2) + movel R(d2),MEM_PREDEC(res_ptr) + + dbf R(s_size),L(LLoop) + roxrl #1,R(d0) /* save cy in msb */ + subl #0x10000,R(s_size) + bcs L(LLend) + addl R(d0),R(d0) /* restore cy */ + bra L(LLoop) + +L(LLend:) +/* Restore used registers from stack frame. */ + moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2) + rts +EPILOG(__gmpn_rshift) diff --git a/rts/gmp/mpn/m68k/sub_n.S b/rts/gmp/mpn/m68k/sub_n.S new file mode 100644 index 0000000000..ce45b24db5 --- /dev/null +++ b/rts/gmp/mpn/m68k/sub_n.S @@ -0,0 +1,79 @@ +/* mc68020 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and + store difference in a third limb vector. + +Copyright (C) 1992, 1994, 1996, 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + INPUT PARAMETERS + res_ptr (sp + 4) + s1_ptr (sp + 8) + s2_ptr (sp + 16) + size (sp + 12) +*/ + +#include "asm-syntax.h" + + TEXT + ALIGN + GLOBL C_SYMBOL_NAME(__gmpn_sub_n) + +C_SYMBOL_NAME(__gmpn_sub_n:) +PROLOG(__gmpn_sub_n) +/* Save used registers on the stack. */ + movel R(d2),MEM_PREDEC(sp) + movel R(a2),MEM_PREDEC(sp) + +/* Copy the arguments to registers. Better use movem? */ + movel MEM_DISP(sp,12),R(a2) + movel MEM_DISP(sp,16),R(a0) + movel MEM_DISP(sp,20),R(a1) + movel MEM_DISP(sp,24),R(d2) + + eorw #1,R(d2) + lsrl #1,R(d2) + bcc L(L1) + subql #1,R(d2) /* clears cy as side effect */ + +L(Loop:) + movel MEM_POSTINC(a0),R(d0) + movel MEM_POSTINC(a1),R(d1) + subxl R(d1),R(d0) + movel R(d0),MEM_POSTINC(a2) +L(L1:) movel MEM_POSTINC(a0),R(d0) + movel MEM_POSTINC(a1),R(d1) + subxl R(d1),R(d0) + movel R(d0),MEM_POSTINC(a2) + + dbf R(d2),L(Loop) /* loop until 16 lsb of %4 == -1 */ + subxl R(d0),R(d0) /* d0 <= -cy; save cy as 0 or -1 in d0 */ + subl #0x10000,R(d2) + bcs L(L2) + addl R(d0),R(d0) /* restore cy */ + bra L(Loop) + +L(L2:) + negl R(d0) + +/* Restore used registers from stack frame. */ + movel MEM_POSTINC(sp),R(a2) + movel MEM_POSTINC(sp),R(d2) + + rts +EPILOG(__gmpn_sub_n) diff --git a/rts/gmp/mpn/m68k/syntax.h b/rts/gmp/mpn/m68k/syntax.h new file mode 100644 index 0000000000..9eec279c06 --- /dev/null +++ b/rts/gmp/mpn/m68k/syntax.h @@ -0,0 +1,177 @@ +/* asm.h -- Definitions for 68k syntax variations. + +Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#undef ALIGN + +#ifdef MIT_SYNTAX +#define PROLOG(name) +#define EPILOG(name) +#define R(r)r +#define MEM(base)base@ +#define MEM_DISP(base,displacement)base@(displacement) +#define MEM_INDX(base,idx,size_suffix)base@(idx:size_suffix) +#define MEM_INDX1(base,idx,size_suffix,scale)base@(idx:size_suffix:scale) +#define MEM_PREDEC(memory_base)memory_base@- +#define MEM_POSTINC(memory_base)memory_base@+ +#define L(label) label +#define TEXT .text +#define ALIGN .even +#define GLOBL .globl +#define moveql moveq +/* Use variable sized opcodes. */ +#define bcc jcc +#define bcs jcs +#define bls jls +#define beq jeq +#define bne jne +#define bra jra +#endif + +#ifdef SONY_SYNTAX +#define PROLOG(name) +#define EPILOG(name) +#define R(r)r +#define MEM(base)(base) +#define MEM_DISP(base,displacement)(displacement,base) +#define MEM_INDX(base,idx,size_suffix)(base,idx.size_suffix) +#define MEM_INDX1(base,idx,size_suffix,scale)(base,idx.size_suffix*scale) +#define MEM_PREDEC(memory_base)-(memory_base) +#define MEM_POSTINC(memory_base)(memory_base)+ +#define L(label) label +#define TEXT .text +#define ALIGN .even +#define GLOBL .globl +#endif + +#ifdef MOTOROLA_SYNTAX +#define PROLOG(name) +#define EPILOG(name) +#define R(r)r +#define MEM(base)(base) +#define MEM_DISP(base,displacement)(displacement,base) +#define MEM_INDX(base,idx,size_suffix)(base,idx.size_suffix) +#define MEM_INDX1(base,idx,size_suffix,scale)(base,idx.size_suffix*scale) +#define MEM_PREDEC(memory_base)-(memory_base) +#define MEM_POSTINC(memory_base)(memory_base)+ +#define L(label) label +#define TEXT +#define ALIGN +#define GLOBL XDEF +#define lea LEA +#define movel MOVE.L +#define moveml MOVEM.L +#define moveql MOVEQ.L +#define cmpl CMP.L +#define orl OR.L +#define clrl CLR.L +#define eorw EOR.W +#define lsrl LSR.L +#define lsll LSL.L +#define roxrl ROXR.L +#define roxll ROXL.L +#define addl ADD.L +#define addxl ADDX.L +#define addql ADDQ.L +#define subl SUB.L +#define subxl SUBX.L +#define subql SUBQ.L +#define negl NEG.L +#define mulul MULU.L +#define bcc BCC +#define bcs BCS +#define bls BLS +#define beq BEQ +#define bne BNE +#define bra BRA +#define dbf DBF +#define rts RTS +#define d0 D0 +#define d1 D1 +#define d2 D2 +#define d3 D3 +#define d4 D4 +#define d5 D5 +#define d6 D6 +#define d7 D7 +#define a0 A0 +#define a1 A1 +#define a2 A2 +#define a3 A3 +#define a4 A4 +#define a5 A5 +#define a6 A6 +#define a7 A7 +#define sp SP +#endif + +#ifdef ELF_SYNTAX +#define PROLOG(name) .type name,@function +#define EPILOG(name) .size name,.-name +#define MEM(base)(R(base)) +#define MEM_DISP(base,displacement)(displacement,R(base)) +#define MEM_PREDEC(memory_base)-(R(memory_base)) +#define MEM_POSTINC(memory_base)(R(memory_base))+ +#ifdef __STDC__ +#define R_(r)%##r +#define R(r)R_(r) +#define MEM_INDX_(base,idx,size_suffix)(R(base),R(idx##.##size_suffix)) +#define MEM_INDX(base,idx,size_suffix)MEM_INDX_(base,idx,size_suffix) +#define MEM_INDX1_(base,idx,size_suffix,scale)(R(base),R(idx##.##size_suffix*scale)) +#define MEM_INDX1(base,idx,size_suffix,scale)MEM_INDX1_(base,idx,size_suffix,scale) +#define L(label) .##label +#else +#define R(r)%/**/r +#define MEM_INDX(base,idx,size_suffix)(R(base),R(idx).size_suffix) +#define MEM_INDX1(base,idx,size_suffix,scale)(R(base),R(idx).size_suffix*scale) +#define L(label) ./**/label +#endif +#define TEXT .text +#define ALIGN .align 2 +#define GLOBL .globl +#define bcc jbcc +#define bcs jbcs +#define bls jbls +#define beq jbeq +#define bne jbne +#define bra jbra +#endif + +#if defined (SONY_SYNTAX) || defined (ELF_SYNTAX) +#define movel move.l +#define moveml movem.l +#define moveql moveq.l +#define cmpl cmp.l +#define orl or.l +#define clrl clr.l +#define eorw eor.w +#define lsrl lsr.l +#define lsll lsl.l +#define roxrl roxr.l +#define roxll roxl.l +#define addl add.l +#define addxl addx.l +#define addql addq.l +#define subl sub.l +#define subxl subx.l +#define subql subq.l +#define negl neg.l +#define mulul mulu.l +#endif diff --git a/rts/gmp/mpn/m88k/add_n.s b/rts/gmp/mpn/m88k/add_n.s new file mode 100644 index 0000000000..0b776c618a --- /dev/null +++ b/rts/gmp/mpn/m88k/add_n.s @@ -0,0 +1,104 @@ +; mc88100 __gmpn_add -- Add two limb vectors of the same length > 0 and store +; sum in a third limb vector. + +; Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr r2 +; s1_ptr r3 +; s2_ptr r4 +; size r5 + +; This code has been optimized to run one instruction per clock, avoiding +; load stalls and writeback contention. As a result, the instruction +; order is not always natural. + +; The speed is about 4.6 clocks/limb + 18 clocks/limb-vector on an 88100, +; but on the 88110, it seems to run much slower, 6.6 clocks/limb. + + text + align 16 + global ___gmpn_add_n +___gmpn_add_n: + ld r6,r3,0 ; read first limb from s1_ptr + extu r10,r5,3 + ld r7,r4,0 ; read first limb from s2_ptr + + subu.co r5,r0,r5 ; (clear carry as side effect) + mak r5,r5,3<4> + bcnd eq0,r5,Lzero + + or r12,r0,lo16(Lbase) + or.u r12,r12,hi16(Lbase) + addu r12,r12,r5 ; r12 is address for entering in loop + + extu r5,r5,2 ; divide by 4 + subu r2,r2,r5 ; adjust res_ptr + subu r3,r3,r5 ; adjust s1_ptr + subu r4,r4,r5 ; adjust s2_ptr + + or r8,r6,r0 + + jmp.n r12 + or r9,r7,r0 + +Loop: addu r3,r3,32 + st r8,r2,28 + addu r4,r4,32 + ld r6,r3,0 + addu r2,r2,32 + ld r7,r4,0 +Lzero: subu r10,r10,1 ; add 0 + 8r limbs (adj loop cnt) +Lbase: ld r8,r3,4 + addu.cio r6,r6,r7 + ld r9,r4,4 + st r6,r2,0 + ld r6,r3,8 ; add 7 + 8r limbs + addu.cio r8,r8,r9 + ld r7,r4,8 + st r8,r2,4 + ld r8,r3,12 ; add 6 + 8r limbs + addu.cio r6,r6,r7 + ld r9,r4,12 + st r6,r2,8 + ld r6,r3,16 ; add 5 + 8r limbs + addu.cio r8,r8,r9 + ld r7,r4,16 + st r8,r2,12 + ld r8,r3,20 ; add 4 + 8r limbs + addu.cio r6,r6,r7 + ld r9,r4,20 + st r6,r2,16 + ld r6,r3,24 ; add 3 + 8r limbs + addu.cio r8,r8,r9 + ld r7,r4,24 + st r8,r2,20 + ld r8,r3,28 ; add 2 + 8r limbs + addu.cio r6,r6,r7 + ld r9,r4,28 + st r6,r2,24 + bcnd.n ne0,r10,Loop ; add 1 + 8r limbs + addu.cio r8,r8,r9 + + st r8,r2,28 ; store most significant limb + + jmp.n r1 + addu.ci r2,r0,r0 ; return carry-out from most sign. limb diff --git a/rts/gmp/mpn/m88k/mc88110/add_n.S b/rts/gmp/mpn/m88k/mc88110/add_n.S new file mode 100644 index 0000000000..843a50dded --- /dev/null +++ b/rts/gmp/mpn/m88k/mc88110/add_n.S @@ -0,0 +1,200 @@ +; mc88110 __gmpn_add_n -- Add two limb vectors of the same length > 0 and store +; sum in a third limb vector. + +; Copyright (C) 1995, 1996, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +#define res_ptr r2 +#define s1_ptr r3 +#define s2_ptr r4 +#define size r5 + +#include "sysdep.h" + + text + align 16 + global C_SYMBOL_NAME(__gmpn_add_n) +C_SYMBOL_NAME(__gmpn_add_n): + addu.co r0,r0,r0 ; clear cy flag + xor r12,s2_ptr,res_ptr + bb1 2,r12,L1 +; ** V1a ** +L0: bb0 2,res_ptr,L_v1 ; branch if res_ptr is aligned? +/* Add least significant limb separately to align res_ptr and s2_ptr */ + ld r10,s1_ptr,0 + addu s1_ptr,s1_ptr,4 + ld r8,s2_ptr,0 + addu s2_ptr,s2_ptr,4 + subu size,size,1 + addu.co r6,r10,r8 + st r6,res_ptr,0 + addu res_ptr,res_ptr,4 +L_v1: cmp r12,size,2 + bb1 lt,r12,Lend2 + + ld r10,s1_ptr,0 + ld r12,s1_ptr,4 + ld.d r8,s2_ptr,0 + subu size,size,10 + bcnd lt0,size,Lfin1 +/* Add blocks of 8 limbs until less than 8 limbs remain */ + align 8 +Loop1: subu size,size,8 + addu.cio r6,r10,r8 + ld r10,s1_ptr,8 + addu.cio r7,r12,r9 + ld r12,s1_ptr,12 + ld.d r8,s2_ptr,8 + st.d r6,res_ptr,0 + addu.cio r6,r10,r8 + ld r10,s1_ptr,16 + addu.cio r7,r12,r9 + ld r12,s1_ptr,20 + ld.d r8,s2_ptr,16 + st.d r6,res_ptr,8 + addu.cio r6,r10,r8 + ld r10,s1_ptr,24 + addu.cio r7,r12,r9 + ld r12,s1_ptr,28 + ld.d r8,s2_ptr,24 + st.d r6,res_ptr,16 + addu.cio r6,r10,r8 + ld r10,s1_ptr,32 + addu.cio r7,r12,r9 + ld r12,s1_ptr,36 + addu s1_ptr,s1_ptr,32 + ld.d r8,s2_ptr,32 + addu s2_ptr,s2_ptr,32 + st.d r6,res_ptr,24 + addu res_ptr,res_ptr,32 + bcnd ge0,size,Loop1 + +Lfin1: addu size,size,8-2 + bcnd lt0,size,Lend1 +/* Add blocks of 2 limbs until less than 2 limbs remain */ +Loope1: addu.cio r6,r10,r8 + ld r10,s1_ptr,8 + addu.cio r7,r12,r9 + ld r12,s1_ptr,12 + ld.d r8,s2_ptr,8 + st.d r6,res_ptr,0 + subu size,size,2 + addu s1_ptr,s1_ptr,8 + addu s2_ptr,s2_ptr,8 + addu res_ptr,res_ptr,8 + bcnd ge0,size,Loope1 +Lend1: addu.cio r6,r10,r8 + addu.cio r7,r12,r9 + st.d r6,res_ptr,0 + + bb0 0,size,Lret1 +/* Add last limb */ + ld r10,s1_ptr,8 + ld r8,s2_ptr,8 + addu.cio r6,r10,r8 + st r6,res_ptr,8 + +Lret1: jmp.n r1 + addu.ci r2,r0,r0 ; return carry-out from most sign. limb + +L1: xor r12,s1_ptr,res_ptr + bb1 2,r12,L2 +; ** V1b ** + or r12,r0,s2_ptr + or s2_ptr,r0,s1_ptr + or s1_ptr,r0,r12 + br L0 + +; ** V2 ** +/* If we come here, the alignment of s1_ptr and res_ptr as well as the + alignment of s2_ptr and res_ptr differ. Since there are only two ways + things can be aligned (that we care about) we now know that the alignment + of s1_ptr and s2_ptr are the same. */ + +L2: cmp r12,size,1 + bb1 eq,r12,Ljone + bb0 2,s1_ptr,L_v2 ; branch if s1_ptr is aligned +/* Add least significant limb separately to align res_ptr and s2_ptr */ + ld r10,s1_ptr,0 + addu s1_ptr,s1_ptr,4 + ld r8,s2_ptr,0 + addu s2_ptr,s2_ptr,4 + subu size,size,1 + addu.co r6,r10,r8 + st r6,res_ptr,0 + addu res_ptr,res_ptr,4 + +L_v2: subu size,size,8 + bcnd lt0,size,Lfin2 +/* Add blocks of 8 limbs until less than 8 limbs remain */ + align 8 +Loop2: subu size,size,8 + ld.d r8,s1_ptr,0 + ld.d r6,s2_ptr,0 + addu.cio r8,r8,r6 + st r8,res_ptr,0 + addu.cio r9,r9,r7 + st r9,res_ptr,4 + ld.d r8,s1_ptr,8 + ld.d r6,s2_ptr,8 + addu.cio r8,r8,r6 + st r8,res_ptr,8 + addu.cio r9,r9,r7 + st r9,res_ptr,12 + ld.d r8,s1_ptr,16 + ld.d r6,s2_ptr,16 + addu.cio r8,r8,r6 + st r8,res_ptr,16 + addu.cio r9,r9,r7 + st r9,res_ptr,20 + ld.d r8,s1_ptr,24 + ld.d r6,s2_ptr,24 + addu.cio r8,r8,r6 + st r8,res_ptr,24 + addu.cio r9,r9,r7 + st r9,res_ptr,28 + addu s1_ptr,s1_ptr,32 + addu s2_ptr,s2_ptr,32 + addu res_ptr,res_ptr,32 + bcnd ge0,size,Loop2 + +Lfin2: addu size,size,8-2 + bcnd lt0,size,Lend2 +Loope2: ld.d r8,s1_ptr,0 + ld.d r6,s2_ptr,0 + addu.cio r8,r8,r6 + st r8,res_ptr,0 + addu.cio r9,r9,r7 + st r9,res_ptr,4 + subu size,size,2 + addu s1_ptr,s1_ptr,8 + addu s2_ptr,s2_ptr,8 + addu res_ptr,res_ptr,8 + bcnd ge0,size,Loope2 +Lend2: bb0 0,size,Lret2 +/* Add last limb */ +Ljone: ld r10,s1_ptr,0 + ld r8,s2_ptr,0 + addu.cio r6,r10,r8 + st r6,res_ptr,0 + +Lret2: jmp.n r1 + addu.ci r2,r0,r0 ; return carry-out from most sign. limb diff --git a/rts/gmp/mpn/m88k/mc88110/addmul_1.s b/rts/gmp/mpn/m88k/mc88110/addmul_1.s new file mode 100644 index 0000000000..7d97c87c79 --- /dev/null +++ b/rts/gmp/mpn/m88k/mc88110/addmul_1.s @@ -0,0 +1,61 @@ +; mc88110 __gmpn_addmul_1 -- Multiply a limb vector with a single limb and +; store the product in a second limb vector. + +; Copyright (C) 1996, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr r2 +; s1_ptr r3 +; size r4 +; s2_limb r5 + + text + align 16 + global ___gmpn_addmul_1 +___gmpn_addmul_1: + lda r3,r3[r4] + lda r8,r2[r4] ; RES_PTR in r8 since r2 is retval + subu r4,r0,r4 + addu.co r2,r0,r0 ; r2 = cy = 0 + + ld r6,r3[r4] + addu r4,r4,1 + subu r8,r8,4 + bcnd.n eq0,r4,Lend + mulu.d r10,r6,r5 + +Loop: ld r7,r8[r4] + ld r6,r3[r4] + addu.cio r9,r11,r2 + addu.ci r2,r10,r0 + addu.co r9,r9,r7 + st r9,r8[r4] + addu r4,r4,1 + mulu.d r10,r6,r5 + bcnd ne0,r4,Loop + +Lend: ld r7,r8,0 + addu.cio r9,r11,r2 + addu.ci r2,r10,r0 + addu.co r9,r9,r7 + st r9,r8,0 + jmp.n r1 + addu.ci r2,r2,r0 diff --git a/rts/gmp/mpn/m88k/mc88110/mul_1.s b/rts/gmp/mpn/m88k/mc88110/mul_1.s new file mode 100644 index 0000000000..b8483afa91 --- /dev/null +++ b/rts/gmp/mpn/m88k/mc88110/mul_1.s @@ -0,0 +1,59 @@ +; mc88110 __gmpn_mul_1 -- Multiply a limb vector with a single limb and +; store the product in a second limb vector. + +; Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr r2 +; s1_ptr r3 +; size r4 +; s2_limb r5 + + text + align 16 + global ___gmpn_mul_1 +___gmpn_mul_1: + ; Make S1_PTR and RES_PTR point at the end of their blocks + ; and negate SIZE. + lda r3,r3[r4] + lda r8,r2[r4] ; RES_PTR in r8 since r2 is retval + subu r4,r0,r4 + + addu.co r2,r0,r0 ; r2 = cy = 0 + + ld r6,r3[r4] + addu r4,r4,1 + mulu.d r10,r6,r5 + bcnd.n eq0,r4,Lend + subu r8,r8,8 + +Loop: ld r6,r3[r4] + addu.cio r9,r11,r2 + or r2,r10,r0 ; could be avoided if unrolled + addu r4,r4,1 + mulu.d r10,r6,r5 + bcnd.n ne0,r4,Loop + st r9,r8[r4] + +Lend: addu.cio r9,r11,r2 + st r9,r8,4 + jmp.n r1 + addu.ci r2,r10,r0 diff --git a/rts/gmp/mpn/m88k/mc88110/sub_n.S b/rts/gmp/mpn/m88k/mc88110/sub_n.S new file mode 100644 index 0000000000..715a3faf25 --- /dev/null +++ b/rts/gmp/mpn/m88k/mc88110/sub_n.S @@ -0,0 +1,276 @@ +; mc88110 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and +; store difference in a third limb vector. + +; Copyright (C) 1995, 1996, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +#define res_ptr r2 +#define s1_ptr r3 +#define s2_ptr r4 +#define size r5 + +#include "sysdep.h" + + text + align 16 + global C_SYMBOL_NAME(__gmpn_sub_n) +C_SYMBOL_NAME(__gmpn_sub_n): + subu.co r0,r0,r0 ; set cy flag + xor r12,s2_ptr,res_ptr + bb1 2,r12,L1 +; ** V1a ** +L0: bb0 2,res_ptr,L_v1 ; branch if res_ptr is aligned +/* Add least significant limb separately to align res_ptr and s2_ptr */ + ld r10,s1_ptr,0 + addu s1_ptr,s1_ptr,4 + ld r8,s2_ptr,0 + addu s2_ptr,s2_ptr,4 + subu size,size,1 + subu.co r6,r10,r8 + st r6,res_ptr,0 + addu res_ptr,res_ptr,4 +L_v1: cmp r12,size,2 + bb1 lt,r12,Lend2 + + ld r10,s1_ptr,0 + ld r12,s1_ptr,4 + ld.d r8,s2_ptr,0 + subu size,size,10 + bcnd lt0,size,Lfin1 +/* Add blocks of 8 limbs until less than 8 limbs remain */ + align 8 +Loop1: subu size,size,8 + subu.cio r6,r10,r8 + ld r10,s1_ptr,8 + subu.cio r7,r12,r9 + ld r12,s1_ptr,12 + ld.d r8,s2_ptr,8 + st.d r6,res_ptr,0 + subu.cio r6,r10,r8 + ld r10,s1_ptr,16 + subu.cio r7,r12,r9 + ld r12,s1_ptr,20 + ld.d r8,s2_ptr,16 + st.d r6,res_ptr,8 + subu.cio r6,r10,r8 + ld r10,s1_ptr,24 + subu.cio r7,r12,r9 + ld r12,s1_ptr,28 + ld.d r8,s2_ptr,24 + st.d r6,res_ptr,16 + subu.cio r6,r10,r8 + ld r10,s1_ptr,32 + subu.cio r7,r12,r9 + ld r12,s1_ptr,36 + addu s1_ptr,s1_ptr,32 + ld.d r8,s2_ptr,32 + addu s2_ptr,s2_ptr,32 + st.d r6,res_ptr,24 + addu res_ptr,res_ptr,32 + bcnd ge0,size,Loop1 + +Lfin1: addu size,size,8-2 + bcnd lt0,size,Lend1 +/* Add blocks of 2 limbs until less than 2 limbs remain */ +Loope1: subu.cio r6,r10,r8 + ld r10,s1_ptr,8 + subu.cio r7,r12,r9 + ld r12,s1_ptr,12 + ld.d r8,s2_ptr,8 + st.d r6,res_ptr,0 + subu size,size,2 + addu s1_ptr,s1_ptr,8 + addu s2_ptr,s2_ptr,8 + addu res_ptr,res_ptr,8 + bcnd ge0,size,Loope1 +Lend1: subu.cio r6,r10,r8 + subu.cio r7,r12,r9 + st.d r6,res_ptr,0 + + bb0 0,size,Lret1 +/* Add last limb */ + ld r10,s1_ptr,8 + ld r8,s2_ptr,8 + subu.cio r6,r10,r8 + st r6,res_ptr,8 + +Lret1: addu.ci r2,r0,r0 ; return carry-out from most sign. limb + jmp.n r1 + xor r2,r2,1 + +L1: xor r12,s1_ptr,res_ptr + bb1 2,r12,L2 +; ** V1b ** + bb0 2,res_ptr,L_v1b ; branch if res_ptr is aligned +/* Add least significant limb separately to align res_ptr and s1_ptr */ + ld r10,s2_ptr,0 + addu s2_ptr,s2_ptr,4 + ld r8,s1_ptr,0 + addu s1_ptr,s1_ptr,4 + subu size,size,1 + subu.co r6,r8,r10 + st r6,res_ptr,0 + addu res_ptr,res_ptr,4 +L_v1b: cmp r12,size,2 + bb1 lt,r12,Lend2 + + ld r10,s2_ptr,0 + ld r12,s2_ptr,4 + ld.d r8,s1_ptr,0 + subu size,size,10 + bcnd lt0,size,Lfin1b +/* Add blocks of 8 limbs until less than 8 limbs remain */ + align 8 +Loop1b: subu size,size,8 + subu.cio r6,r8,r10 + ld r10,s2_ptr,8 + subu.cio r7,r9,r12 + ld r12,s2_ptr,12 + ld.d r8,s1_ptr,8 + st.d r6,res_ptr,0 + subu.cio r6,r8,r10 + ld r10,s2_ptr,16 + subu.cio r7,r9,r12 + ld r12,s2_ptr,20 + ld.d r8,s1_ptr,16 + st.d r6,res_ptr,8 + subu.cio r6,r8,r10 + ld r10,s2_ptr,24 + subu.cio r7,r9,r12 + ld r12,s2_ptr,28 + ld.d r8,s1_ptr,24 + st.d r6,res_ptr,16 + subu.cio r6,r8,r10 + ld r10,s2_ptr,32 + subu.cio r7,r9,r12 + ld r12,s2_ptr,36 + addu s2_ptr,s2_ptr,32 + ld.d r8,s1_ptr,32 + addu s1_ptr,s1_ptr,32 + st.d r6,res_ptr,24 + addu res_ptr,res_ptr,32 + bcnd ge0,size,Loop1b + +Lfin1b: addu size,size,8-2 + bcnd lt0,size,Lend1b +/* Add blocks of 2 limbs until less than 2 limbs remain */ +Loope1b:subu.cio r6,r8,r10 + ld r10,s2_ptr,8 + subu.cio r7,r9,r12 + ld r12,s2_ptr,12 + ld.d r8,s1_ptr,8 + st.d r6,res_ptr,0 + subu size,size,2 + addu s1_ptr,s1_ptr,8 + addu s2_ptr,s2_ptr,8 + addu res_ptr,res_ptr,8 + bcnd ge0,size,Loope1b +Lend1b: subu.cio r6,r8,r10 + subu.cio r7,r9,r12 + st.d r6,res_ptr,0 + + bb0 0,size,Lret1b +/* Add last limb */ + ld r10,s2_ptr,8 + ld r8,s1_ptr,8 + subu.cio r6,r8,r10 + st r6,res_ptr,8 + +Lret1b: addu.ci r2,r0,r0 ; return carry-out from most sign. limb + jmp.n r1 + xor r2,r2,1 + +; ** V2 ** +/* If we come here, the alignment of s1_ptr and res_ptr as well as the + alignment of s2_ptr and res_ptr differ. Since there are only two ways + things can be aligned (that we care about) we now know that the alignment + of s1_ptr and s2_ptr are the same. */ + +L2: cmp r12,size,1 + bb1 eq,r12,Ljone + bb0 2,s1_ptr,L_v2 ; branch if s1_ptr is aligned +/* Add least significant limb separately to align res_ptr and s2_ptr */ + ld r10,s1_ptr,0 + addu s1_ptr,s1_ptr,4 + ld r8,s2_ptr,0 + addu s2_ptr,s2_ptr,4 + subu size,size,1 + subu.co r6,r10,r8 + st r6,res_ptr,0 + addu res_ptr,res_ptr,4 + +L_v2: subu size,size,8 + bcnd lt0,size,Lfin2 +/* Add blocks of 8 limbs until less than 8 limbs remain */ + align 8 +Loop2: subu size,size,8 + ld.d r8,s1_ptr,0 + ld.d r6,s2_ptr,0 + subu.cio r8,r8,r6 + st r8,res_ptr,0 + subu.cio r9,r9,r7 + st r9,res_ptr,4 + ld.d r8,s1_ptr,8 + ld.d r6,s2_ptr,8 + subu.cio r8,r8,r6 + st r8,res_ptr,8 + subu.cio r9,r9,r7 + st r9,res_ptr,12 + ld.d r8,s1_ptr,16 + ld.d r6,s2_ptr,16 + subu.cio r8,r8,r6 + st r8,res_ptr,16 + subu.cio r9,r9,r7 + st r9,res_ptr,20 + ld.d r8,s1_ptr,24 + ld.d r6,s2_ptr,24 + subu.cio r8,r8,r6 + st r8,res_ptr,24 + subu.cio r9,r9,r7 + st r9,res_ptr,28 + addu s1_ptr,s1_ptr,32 + addu s2_ptr,s2_ptr,32 + addu res_ptr,res_ptr,32 + bcnd ge0,size,Loop2 + +Lfin2: addu size,size,8-2 + bcnd lt0,size,Lend2 +Loope2: ld.d r8,s1_ptr,0 + ld.d r6,s2_ptr,0 + subu.cio r8,r8,r6 + st r8,res_ptr,0 + subu.cio r9,r9,r7 + st r9,res_ptr,4 + subu size,size,2 + addu s1_ptr,s1_ptr,8 + addu s2_ptr,s2_ptr,8 + addu res_ptr,res_ptr,8 + bcnd ge0,size,Loope2 +Lend2: bb0 0,size,Lret2 +/* Add last limb */ +Ljone: ld r10,s1_ptr,0 + ld r8,s2_ptr,0 + subu.cio r6,r10,r8 + st r6,res_ptr,0 + +Lret2: addu.ci r2,r0,r0 ; return carry-out from most sign. limb + jmp.n r1 + xor r2,r2,1 diff --git a/rts/gmp/mpn/m88k/mul_1.s b/rts/gmp/mpn/m88k/mul_1.s new file mode 100644 index 0000000000..06370837ef --- /dev/null +++ b/rts/gmp/mpn/m88k/mul_1.s @@ -0,0 +1,127 @@ +; mc88100 __gmpn_mul_1 -- Multiply a limb vector with a single limb and +; store the product in a second limb vector. + +; Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr r2 +; s1_ptr r3 +; size r4 +; s2_limb r5 + +; Common overhead is about 11 cycles/invocation. + +; The speed for S2_LIMB >= 0x10000 is approximately 21 cycles/limb. (The +; pipeline stalls 2 cycles due to WB contention.) + +; The speed for S2_LIMB < 0x10000 is approximately 16 cycles/limb. (The +; pipeline stalls 2 cycles due to WB contention and 1 cycle due to latency.) + +; To enhance speed: +; 1. Unroll main loop 4-8 times. +; 2. Schedule code to avoid WB contention. It might be tempting to move the +; ld instruction in the loops down to save 2 cycles (less WB contention), +; but that looses because the ultimate value will be read from outside +; the allocated space. But if we handle the ultimate multiplication in +; the tail, we can do this. +; 3. Make the multiplication with less instructions. I think the code for +; (S2_LIMB >= 0x10000) is not minimal. +; With these techniques the (S2_LIMB >= 0x10000) case would run in 17 or +; less cycles/limb; the (S2_LIMB < 0x10000) case would run in 11 +; cycles/limb. (Assuming infinite unrolling.) + + text + align 16 + global ___gmpn_mul_1 +___gmpn_mul_1: + + ; Make S1_PTR and RES_PTR point at the end of their blocks + ; and negate SIZE. + lda r3,r3[r4] + lda r6,r2[r4] ; RES_PTR in r6 since r2 is retval + subu r4,r0,r4 + + addu.co r2,r0,r0 ; r2 = cy = 0 + ld r9,r3[r4] + mask r7,r5,0xffff ; r7 = lo(S2_LIMB) + extu r8,r5,16 ; r8 = hi(S2_LIMB) + bcnd.n eq0,r8,Lsmall ; jump if (hi(S2_LIMB) == 0) + subu r6,r6,4 + +; General code for any value of S2_LIMB. + + ; Make a stack frame and save r25 and r26 + subu r31,r31,16 + st.d r25,r31,8 + + ; Enter the loop in the middle + br.n L1 + addu r4,r4,1 + +Loop: ld r9,r3[r4] + st r26,r6[r4] +; bcnd ne0,r0,0 ; bubble + addu r4,r4,1 +L1: mul r26,r9,r5 ; low word of product mul_1 WB ld + mask r12,r9,0xffff ; r12 = lo(s1_limb) mask_1 + mul r11,r12,r7 ; r11 = prod_0 mul_2 WB mask_1 + mul r10,r12,r8 ; r10 = prod_1a mul_3 + extu r13,r9,16 ; r13 = hi(s1_limb) extu_1 WB mul_1 + mul r12,r13,r7 ; r12 = prod_1b mul_4 WB extu_1 + mul r25,r13,r8 ; r25 = prod_2 mul_5 WB mul_2 + extu r11,r11,16 ; r11 = hi(prod_0) extu_2 WB mul_3 + addu r10,r10,r11 ; addu_1 WB extu_2 +; bcnd ne0,r0,0 ; bubble WB addu_1 + addu.co r10,r10,r12 ; WB mul_4 + mask.u r10,r10,0xffff ; move the 16 most significant bits... + addu.ci r10,r10,r0 ; ...to the low half of the word... + rot r10,r10,16 ; ...and put carry in pos 16. + addu.co r26,r26,r2 ; add old carry limb + bcnd.n ne0,r4,Loop + addu.ci r2,r25,r10 ; compute new carry limb + + st r26,r6[r4] + ld.d r25,r31,8 + jmp.n r1 + addu r31,r31,16 + +; Fast code for S2_LIMB < 0x10000 +Lsmall: + ; Enter the loop in the middle + br.n SL1 + addu r4,r4,1 + +SLoop: ld r9,r3[r4] ; + st r8,r6[r4] ; + addu r4,r4,1 ; +SL1: mul r8,r9,r5 ; low word of product + mask r12,r9,0xffff ; r12 = lo(s1_limb) + extu r13,r9,16 ; r13 = hi(s1_limb) + mul r11,r12,r7 ; r11 = prod_0 + mul r12,r13,r7 ; r12 = prod_1b + addu.cio r8,r8,r2 ; add old carry limb + extu r10,r11,16 ; r11 = hi(prod_0) + addu r10,r10,r12 ; + bcnd.n ne0,r4,SLoop + extu r2,r10,16 ; r2 = new carry limb + + jmp.n r1 + st r8,r6[r4] diff --git a/rts/gmp/mpn/m88k/sub_n.s b/rts/gmp/mpn/m88k/sub_n.s new file mode 100644 index 0000000000..2fd345a135 --- /dev/null +++ b/rts/gmp/mpn/m88k/sub_n.s @@ -0,0 +1,106 @@ +; mc88100 __gmpn_sub -- Subtract two limb vectors of the same length > 0 and +; store difference in a third limb vector. + +; Copyright (C) 1992, 1994, 1996, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr r2 +; s1_ptr r3 +; s2_ptr r4 +; size r5 + +; This code has been optimized to run one instruction per clock, avoiding +; load stalls and writeback contention. As a result, the instruction +; order is not always natural. + +; The speed is about 4.6 clocks/limb + 18 clocks/limb-vector on an 88100, +; but on the 88110, it seems to run much slower, 6.6 clocks/limb. + + text + align 16 + global ___gmpn_sub_n +___gmpn_sub_n: + ld r6,r3,0 ; read first limb from s1_ptr + extu r10,r5,3 + ld r7,r4,0 ; read first limb from s2_ptr + + subu r5,r0,r5 + mak r5,r5,3<4> + bcnd.n eq0,r5,Lzero + subu.co r0,r0,r0 ; initialize carry + + or r12,r0,lo16(Lbase) + or.u r12,r12,hi16(Lbase) + addu r12,r12,r5 ; r12 is address for entering in loop + + extu r5,r5,2 ; divide by 4 + subu r2,r2,r5 ; adjust res_ptr + subu r3,r3,r5 ; adjust s1_ptr + subu r4,r4,r5 ; adjust s2_ptr + + or r8,r6,r0 + + jmp.n r12 + or r9,r7,r0 + +Loop: addu r3,r3,32 + st r8,r2,28 + addu r4,r4,32 + ld r6,r3,0 + addu r2,r2,32 + ld r7,r4,0 +Lzero: subu r10,r10,1 ; subtract 0 + 8r limbs (adj loop cnt) +Lbase: ld r8,r3,4 + subu.cio r6,r6,r7 + ld r9,r4,4 + st r6,r2,0 + ld r6,r3,8 ; subtract 7 + 8r limbs + subu.cio r8,r8,r9 + ld r7,r4,8 + st r8,r2,4 + ld r8,r3,12 ; subtract 6 + 8r limbs + subu.cio r6,r6,r7 + ld r9,r4,12 + st r6,r2,8 + ld r6,r3,16 ; subtract 5 + 8r limbs + subu.cio r8,r8,r9 + ld r7,r4,16 + st r8,r2,12 + ld r8,r3,20 ; subtract 4 + 8r limbs + subu.cio r6,r6,r7 + ld r9,r4,20 + st r6,r2,16 + ld r6,r3,24 ; subtract 3 + 8r limbs + subu.cio r8,r8,r9 + ld r7,r4,24 + st r8,r2,20 + ld r8,r3,28 ; subtract 2 + 8r limbs + subu.cio r6,r6,r7 + ld r9,r4,28 + st r6,r2,24 + bcnd.n ne0,r10,Loop ; subtract 1 + 8r limbs + subu.cio r8,r8,r9 + + st r8,r2,28 ; store most significant limb + + addu.ci r2,r0,r0 ; return carry-out from most sign. limb + jmp.n r1 + xor r2,r2,1 diff --git a/rts/gmp/mpn/mips2/add_n.s b/rts/gmp/mpn/mips2/add_n.s new file mode 100644 index 0000000000..5c3c7fc8a1 --- /dev/null +++ b/rts/gmp/mpn/mips2/add_n.s @@ -0,0 +1,120 @@ + # MIPS2 __gmpn_add_n -- Add two limb vectors of the same length > 0 and + # store sum in a third limb vector. + + # Copyright (C) 1995, 2000 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # s1_ptr $5 + # s2_ptr $6 + # size $7 + + .text + .align 2 + .globl __gmpn_add_n + .ent __gmpn_add_n +__gmpn_add_n: + .set noreorder + .set nomacro + + lw $10,0($5) + lw $11,0($6) + + addiu $7,$7,-1 + and $9,$7,4-1 # number of limbs in first loop + beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop + move $2,$0 + + subu $7,$7,$9 + +.Loop0: addiu $9,$9,-1 + lw $12,4($5) + addu $11,$11,$2 + lw $13,4($6) + sltu $8,$11,$2 + addu $11,$10,$11 + sltu $2,$11,$10 + sw $11,0($4) + or $2,$2,$8 + + addiu $5,$5,4 + addiu $6,$6,4 + move $10,$12 + move $11,$13 + bne $9,$0,.Loop0 + addiu $4,$4,4 + +.L0: beq $7,$0,.Lend + nop + +.Loop: addiu $7,$7,-4 + + lw $12,4($5) + addu $11,$11,$2 + lw $13,4($6) + sltu $8,$11,$2 + addu $11,$10,$11 + sltu $2,$11,$10 + sw $11,0($4) + or $2,$2,$8 + + lw $10,8($5) + addu $13,$13,$2 + lw $11,8($6) + sltu $8,$13,$2 + addu $13,$12,$13 + sltu $2,$13,$12 + sw $13,4($4) + or $2,$2,$8 + + lw $12,12($5) + addu $11,$11,$2 + lw $13,12($6) + sltu $8,$11,$2 + addu $11,$10,$11 + sltu $2,$11,$10 + sw $11,8($4) + or $2,$2,$8 + + lw $10,16($5) + addu $13,$13,$2 + lw $11,16($6) + sltu $8,$13,$2 + addu $13,$12,$13 + sltu $2,$13,$12 + sw $13,12($4) + or $2,$2,$8 + + addiu $5,$5,16 + addiu $6,$6,16 + + bne $7,$0,.Loop + addiu $4,$4,16 + +.Lend: addu $11,$11,$2 + sltu $8,$11,$2 + addu $11,$10,$11 + sltu $2,$11,$10 + sw $11,0($4) + j $31 + or $2,$2,$8 + + .end __gmpn_add_n diff --git a/rts/gmp/mpn/mips2/addmul_1.s b/rts/gmp/mpn/mips2/addmul_1.s new file mode 100644 index 0000000000..1e5037751b --- /dev/null +++ b/rts/gmp/mpn/mips2/addmul_1.s @@ -0,0 +1,97 @@ + # MIPS __gmpn_addmul_1 -- Multiply a limb vector with a single limb and + # add the product to a second limb vector. + + # Copyright (C) 1992, 1994, 1996, 2000 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # s1_ptr $5 + # size $6 + # s2_limb $7 + + .text + .align 4 + .globl __gmpn_addmul_1 + .ent __gmpn_addmul_1 +__gmpn_addmul_1: + .set noreorder + .set nomacro + + # warm up phase 0 + lw $8,0($5) + + # warm up phase 1 + addiu $5,$5,4 + multu $8,$7 + + addiu $6,$6,-1 + beq $6,$0,$LC0 + move $2,$0 # zero cy2 + + addiu $6,$6,-1 + beq $6,$0,$LC1 + lw $8,0($5) # load new s1 limb as early as possible + +Loop: lw $10,0($4) + mflo $3 + mfhi $9 + addiu $5,$5,4 + addu $3,$3,$2 # add old carry limb to low product limb + multu $8,$7 + lw $8,0($5) # load new s1 limb as early as possible + addiu $6,$6,-1 # decrement loop counter + sltu $2,$3,$2 # carry from previous addition -> $2 + addu $3,$10,$3 + sltu $10,$3,$10 + addu $2,$2,$10 + sw $3,0($4) + addiu $4,$4,4 + bne $6,$0,Loop + addu $2,$9,$2 # add high product limb and carry from addition + + # cool down phase 1 +$LC1: lw $10,0($4) + mflo $3 + mfhi $9 + addu $3,$3,$2 + sltu $2,$3,$2 + multu $8,$7 + addu $3,$10,$3 + sltu $10,$3,$10 + addu $2,$2,$10 + sw $3,0($4) + addiu $4,$4,4 + addu $2,$9,$2 # add high product limb and carry from addition + + # cool down phase 0 +$LC0: lw $10,0($4) + mflo $3 + mfhi $9 + addu $3,$3,$2 + sltu $2,$3,$2 + addu $3,$10,$3 + sltu $10,$3,$10 + addu $2,$2,$10 + sw $3,0($4) + j $31 + addu $2,$9,$2 # add high product limb and carry from addition + + .end __gmpn_addmul_1 diff --git a/rts/gmp/mpn/mips2/lshift.s b/rts/gmp/mpn/mips2/lshift.s new file mode 100644 index 0000000000..2ca3a3c800 --- /dev/null +++ b/rts/gmp/mpn/mips2/lshift.s @@ -0,0 +1,95 @@ + # MIPS2 __gmpn_lshift -- + + # Copyright (C) 1995, 2000 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # src_ptr $5 + # size $6 + # cnt $7 + + .text + .align 2 + .globl __gmpn_lshift + .ent __gmpn_lshift +__gmpn_lshift: + .set noreorder + .set nomacro + + sll $2,$6,2 + addu $5,$5,$2 # make r5 point at end of src + lw $10,-4($5) # load first limb + subu $13,$0,$7 + addu $4,$4,$2 # make r4 point at end of res + addiu $6,$6,-1 + and $9,$6,4-1 # number of limbs in first loop + beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop + srl $2,$10,$13 # compute function result + + subu $6,$6,$9 + +.Loop0: lw $3,-8($5) + addiu $4,$4,-4 + addiu $5,$5,-4 + addiu $9,$9,-1 + sll $11,$10,$7 + srl $12,$3,$13 + move $10,$3 + or $8,$11,$12 + bne $9,$0,.Loop0 + sw $8,0($4) + +.L0: beq $6,$0,.Lend + nop + +.Loop: lw $3,-8($5) + addiu $4,$4,-16 + addiu $6,$6,-4 + sll $11,$10,$7 + srl $12,$3,$13 + + lw $10,-12($5) + sll $14,$3,$7 + or $8,$11,$12 + sw $8,12($4) + srl $9,$10,$13 + + lw $3,-16($5) + sll $11,$10,$7 + or $8,$14,$9 + sw $8,8($4) + srl $12,$3,$13 + + lw $10,-20($5) + sll $14,$3,$7 + or $8,$11,$12 + sw $8,4($4) + srl $9,$10,$13 + + addiu $5,$5,-16 + or $8,$14,$9 + bgtz $6,.Loop + sw $8,0($4) + +.Lend: sll $8,$10,$7 + j $31 + sw $8,-4($4) + .end __gmpn_lshift diff --git a/rts/gmp/mpn/mips2/mul_1.s b/rts/gmp/mpn/mips2/mul_1.s new file mode 100644 index 0000000000..ea8aa26809 --- /dev/null +++ b/rts/gmp/mpn/mips2/mul_1.s @@ -0,0 +1,85 @@ + # MIPS __gmpn_mul_1 -- Multiply a limb vector with a single limb and + # store the product in a second limb vector. + + # Copyright (C) 1992, 1994, 1996, 2000 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # s1_ptr $5 + # size $6 + # s2_limb $7 + + .text + .align 4 + .globl __gmpn_mul_1 + .ent __gmpn_mul_1 +__gmpn_mul_1: + .set noreorder + .set nomacro + + # warm up phase 0 + lw $8,0($5) + + # warm up phase 1 + addiu $5,$5,4 + multu $8,$7 + + addiu $6,$6,-1 + beq $6,$0,$LC0 + move $2,$0 # zero cy2 + + addiu $6,$6,-1 + beq $6,$0,$LC1 + lw $8,0($5) # load new s1 limb as early as possible + +Loop: mflo $10 + mfhi $9 + addiu $5,$5,4 + addu $10,$10,$2 # add old carry limb to low product limb + multu $8,$7 + lw $8,0($5) # load new s1 limb as early as possible + addiu $6,$6,-1 # decrement loop counter + sltu $2,$10,$2 # carry from previous addition -> $2 + sw $10,0($4) + addiu $4,$4,4 + bne $6,$0,Loop + addu $2,$9,$2 # add high product limb and carry from addition + + # cool down phase 1 +$LC1: mflo $10 + mfhi $9 + addu $10,$10,$2 + sltu $2,$10,$2 + multu $8,$7 + sw $10,0($4) + addiu $4,$4,4 + addu $2,$9,$2 # add high product limb and carry from addition + + # cool down phase 0 +$LC0: mflo $10 + mfhi $9 + addu $10,$10,$2 + sltu $2,$10,$2 + sw $10,0($4) + j $31 + addu $2,$9,$2 # add high product limb and carry from addition + + .end __gmpn_mul_1 diff --git a/rts/gmp/mpn/mips2/rshift.s b/rts/gmp/mpn/mips2/rshift.s new file mode 100644 index 0000000000..37c8f39cb4 --- /dev/null +++ b/rts/gmp/mpn/mips2/rshift.s @@ -0,0 +1,92 @@ + # MIPS2 __gmpn_rshift -- + + # Copyright (C) 1995, 2000 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # src_ptr $5 + # size $6 + # cnt $7 + + .text + .align 2 + .globl __gmpn_rshift + .ent __gmpn_rshift +__gmpn_rshift: + .set noreorder + .set nomacro + + lw $10,0($5) # load first limb + subu $13,$0,$7 + addiu $6,$6,-1 + and $9,$6,4-1 # number of limbs in first loop + beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop + sll $2,$10,$13 # compute function result + + subu $6,$6,$9 + +.Loop0: lw $3,4($5) + addiu $4,$4,4 + addiu $5,$5,4 + addiu $9,$9,-1 + srl $11,$10,$7 + sll $12,$3,$13 + move $10,$3 + or $8,$11,$12 + bne $9,$0,.Loop0 + sw $8,-4($4) + +.L0: beq $6,$0,.Lend + nop + +.Loop: lw $3,4($5) + addiu $4,$4,16 + addiu $6,$6,-4 + srl $11,$10,$7 + sll $12,$3,$13 + + lw $10,8($5) + srl $14,$3,$7 + or $8,$11,$12 + sw $8,-16($4) + sll $9,$10,$13 + + lw $3,12($5) + srl $11,$10,$7 + or $8,$14,$9 + sw $8,-12($4) + sll $12,$3,$13 + + lw $10,16($5) + srl $14,$3,$7 + or $8,$11,$12 + sw $8,-8($4) + sll $9,$10,$13 + + addiu $5,$5,16 + or $8,$14,$9 + bgtz $6,.Loop + sw $8,-4($4) + +.Lend: srl $8,$10,$7 + j $31 + sw $8,0($4) + .end __gmpn_rshift diff --git a/rts/gmp/mpn/mips2/sub_n.s b/rts/gmp/mpn/mips2/sub_n.s new file mode 100644 index 0000000000..51d34f3ac3 --- /dev/null +++ b/rts/gmp/mpn/mips2/sub_n.s @@ -0,0 +1,120 @@ + # MIPS2 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and + # store difference in a third limb vector. + + # Copyright (C) 1995, 2000 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # s1_ptr $5 + # s2_ptr $6 + # size $7 + + .text + .align 2 + .globl __gmpn_sub_n + .ent __gmpn_sub_n +__gmpn_sub_n: + .set noreorder + .set nomacro + + lw $10,0($5) + lw $11,0($6) + + addiu $7,$7,-1 + and $9,$7,4-1 # number of limbs in first loop + beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop + move $2,$0 + + subu $7,$7,$9 + +.Loop0: addiu $9,$9,-1 + lw $12,4($5) + addu $11,$11,$2 + lw $13,4($6) + sltu $8,$11,$2 + subu $11,$10,$11 + sltu $2,$10,$11 + sw $11,0($4) + or $2,$2,$8 + + addiu $5,$5,4 + addiu $6,$6,4 + move $10,$12 + move $11,$13 + bne $9,$0,.Loop0 + addiu $4,$4,4 + +.L0: beq $7,$0,.Lend + nop + +.Loop: addiu $7,$7,-4 + + lw $12,4($5) + addu $11,$11,$2 + lw $13,4($6) + sltu $8,$11,$2 + subu $11,$10,$11 + sltu $2,$10,$11 + sw $11,0($4) + or $2,$2,$8 + + lw $10,8($5) + addu $13,$13,$2 + lw $11,8($6) + sltu $8,$13,$2 + subu $13,$12,$13 + sltu $2,$12,$13 + sw $13,4($4) + or $2,$2,$8 + + lw $12,12($5) + addu $11,$11,$2 + lw $13,12($6) + sltu $8,$11,$2 + subu $11,$10,$11 + sltu $2,$10,$11 + sw $11,8($4) + or $2,$2,$8 + + lw $10,16($5) + addu $13,$13,$2 + lw $11,16($6) + sltu $8,$13,$2 + subu $13,$12,$13 + sltu $2,$12,$13 + sw $13,12($4) + or $2,$2,$8 + + addiu $5,$5,16 + addiu $6,$6,16 + + bne $7,$0,.Loop + addiu $4,$4,16 + +.Lend: addu $11,$11,$2 + sltu $8,$11,$2 + subu $11,$10,$11 + sltu $2,$10,$11 + sw $11,0($4) + j $31 + or $2,$2,$8 + + .end __gmpn_sub_n diff --git a/rts/gmp/mpn/mips2/submul_1.s b/rts/gmp/mpn/mips2/submul_1.s new file mode 100644 index 0000000000..495dea3ba2 --- /dev/null +++ b/rts/gmp/mpn/mips2/submul_1.s @@ -0,0 +1,97 @@ + # MIPS __gmpn_submul_1 -- Multiply a limb vector with a single limb and + # subtract the product from a second limb vector. + + # Copyright (C) 1992, 1994, 1996, 2000 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # s1_ptr $5 + # size $6 + # s2_limb $7 + + .text + .align 4 + .globl __gmpn_submul_1 + .ent __gmpn_submul_1 +__gmpn_submul_1: + .set noreorder + .set nomacro + + # warm up phase 0 + lw $8,0($5) + + # warm up phase 1 + addiu $5,$5,4 + multu $8,$7 + + addiu $6,$6,-1 + beq $6,$0,$LC0 + move $2,$0 # zero cy2 + + addiu $6,$6,-1 + beq $6,$0,$LC1 + lw $8,0($5) # load new s1 limb as early as possible + +Loop: lw $10,0($4) + mflo $3 + mfhi $9 + addiu $5,$5,4 + addu $3,$3,$2 # add old carry limb to low product limb + multu $8,$7 + lw $8,0($5) # load new s1 limb as early as possible + addiu $6,$6,-1 # decrement loop counter + sltu $2,$3,$2 # carry from previous addition -> $2 + subu $3,$10,$3 + sgtu $10,$3,$10 + addu $2,$2,$10 + sw $3,0($4) + addiu $4,$4,4 + bne $6,$0,Loop + addu $2,$9,$2 # add high product limb and carry from addition + + # cool down phase 1 +$LC1: lw $10,0($4) + mflo $3 + mfhi $9 + addu $3,$3,$2 + sltu $2,$3,$2 + multu $8,$7 + subu $3,$10,$3 + sgtu $10,$3,$10 + addu $2,$2,$10 + sw $3,0($4) + addiu $4,$4,4 + addu $2,$9,$2 # add high product limb and carry from addition + + # cool down phase 0 +$LC0: lw $10,0($4) + mflo $3 + mfhi $9 + addu $3,$3,$2 + sltu $2,$3,$2 + subu $3,$10,$3 + sgtu $10,$3,$10 + addu $2,$2,$10 + sw $3,0($4) + j $31 + addu $2,$9,$2 # add high product limb and carry from addition + + .end __gmpn_submul_1 diff --git a/rts/gmp/mpn/mips2/umul.s b/rts/gmp/mpn/mips2/umul.s new file mode 100644 index 0000000000..40e847614c --- /dev/null +++ b/rts/gmp/mpn/mips2/umul.s @@ -0,0 +1,30 @@ + # Copyright (C) 1999 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + .text + .align 2 + .globl __umul_ppmm + .ent __umul_ppmm +__umul_ppmm: + multu $5,$6 + mflo $3 + mfhi $2 + sw $3,0($4) + j $31 + .end __umul_ppmm diff --git a/rts/gmp/mpn/mips3/README b/rts/gmp/mpn/mips3/README new file mode 100644 index 0000000000..e94b2c7460 --- /dev/null +++ b/rts/gmp/mpn/mips3/README @@ -0,0 +1,23 @@ +This directory contains mpn functions optimized for MIPS3. Example of +processors that implement MIPS3 are R4000, R4400, R4600, R4700, and R8000. + +RELEVANT OPTIMIZATION ISSUES + +1. On the R4000 and R4400, branches, both the plain and the "likely" ones, + take 3 cycles to execute. (The fastest possible loop will take 4 cycles, + because of the delay insn.) + + On the R4600, branches takes a single cycle + + On the R8000, branches often take no noticable cycles, as they are + executed in a separate function unit.. + +2. The R4000 and R4400 have a load latency of 4 cycles. + +3. On the R4000 and R4400, multiplies take a data-dependent number of + cycles, contrary to the SGI documentation. There seem to be 3 or 4 + possible latencies. + +STATUS + +Good... diff --git a/rts/gmp/mpn/mips3/add_n.s b/rts/gmp/mpn/mips3/add_n.s new file mode 100644 index 0000000000..adad0beaef --- /dev/null +++ b/rts/gmp/mpn/mips3/add_n.s @@ -0,0 +1,120 @@ + # MIPS3 __gmpn_add_n -- Add two limb vectors of the same length > 0 and + # store sum in a third limb vector. + + # Copyright (C) 1995, 2000 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # s1_ptr $5 + # s2_ptr $6 + # size $7 + + .text + .align 2 + .globl __gmpn_add_n + .ent __gmpn_add_n +__gmpn_add_n: + .set noreorder + .set nomacro + + ld $10,0($5) + ld $11,0($6) + + daddiu $7,$7,-1 + and $9,$7,4-1 # number of limbs in first loop + beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop + move $2,$0 + + dsubu $7,$7,$9 + +.Loop0: daddiu $9,$9,-1 + ld $12,8($5) + daddu $11,$11,$2 + ld $13,8($6) + sltu $8,$11,$2 + daddu $11,$10,$11 + sltu $2,$11,$10 + sd $11,0($4) + or $2,$2,$8 + + daddiu $5,$5,8 + daddiu $6,$6,8 + move $10,$12 + move $11,$13 + bne $9,$0,.Loop0 + daddiu $4,$4,8 + +.L0: beq $7,$0,.Lend + nop + +.Loop: daddiu $7,$7,-4 + + ld $12,8($5) + daddu $11,$11,$2 + ld $13,8($6) + sltu $8,$11,$2 + daddu $11,$10,$11 + sltu $2,$11,$10 + sd $11,0($4) + or $2,$2,$8 + + ld $10,16($5) + daddu $13,$13,$2 + ld $11,16($6) + sltu $8,$13,$2 + daddu $13,$12,$13 + sltu $2,$13,$12 + sd $13,8($4) + or $2,$2,$8 + + ld $12,24($5) + daddu $11,$11,$2 + ld $13,24($6) + sltu $8,$11,$2 + daddu $11,$10,$11 + sltu $2,$11,$10 + sd $11,16($4) + or $2,$2,$8 + + ld $10,32($5) + daddu $13,$13,$2 + ld $11,32($6) + sltu $8,$13,$2 + daddu $13,$12,$13 + sltu $2,$13,$12 + sd $13,24($4) + or $2,$2,$8 + + daddiu $5,$5,32 + daddiu $6,$6,32 + + bne $7,$0,.Loop + daddiu $4,$4,32 + +.Lend: daddu $11,$11,$2 + sltu $8,$11,$2 + daddu $11,$10,$11 + sltu $2,$11,$10 + sd $11,0($4) + j $31 + or $2,$2,$8 + + .end __gmpn_add_n diff --git a/rts/gmp/mpn/mips3/addmul_1.s b/rts/gmp/mpn/mips3/addmul_1.s new file mode 100644 index 0000000000..d390e2298e --- /dev/null +++ b/rts/gmp/mpn/mips3/addmul_1.s @@ -0,0 +1,97 @@ + # MIPS3 __gmpn_addmul_1 -- Multiply a limb vector with a single limb and + # add the product to a second limb vector. + + # Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # s1_ptr $5 + # size $6 + # s2_limb $7 + + .text + .align 4 + .globl __gmpn_addmul_1 + .ent __gmpn_addmul_1 +__gmpn_addmul_1: + .set noreorder + .set nomacro + + # warm up phase 0 + ld $8,0($5) + + # warm up phase 1 + daddiu $5,$5,8 + dmultu $8,$7 + + daddiu $6,$6,-1 + beq $6,$0,$LC0 + move $2,$0 # zero cy2 + + daddiu $6,$6,-1 + beq $6,$0,$LC1 + ld $8,0($5) # load new s1 limb as early as possible + +Loop: ld $10,0($4) + mflo $3 + mfhi $9 + daddiu $5,$5,8 + daddu $3,$3,$2 # add old carry limb to low product limb + dmultu $8,$7 + ld $8,0($5) # load new s1 limb as early as possible + daddiu $6,$6,-1 # decrement loop counter + sltu $2,$3,$2 # carry from previous addition -> $2 + daddu $3,$10,$3 + sltu $10,$3,$10 + daddu $2,$2,$10 + sd $3,0($4) + daddiu $4,$4,8 + bne $6,$0,Loop + daddu $2,$9,$2 # add high product limb and carry from addition + + # cool down phase 1 +$LC1: ld $10,0($4) + mflo $3 + mfhi $9 + daddu $3,$3,$2 + sltu $2,$3,$2 + dmultu $8,$7 + daddu $3,$10,$3 + sltu $10,$3,$10 + daddu $2,$2,$10 + sd $3,0($4) + daddiu $4,$4,8 + daddu $2,$9,$2 # add high product limb and carry from addition + + # cool down phase 0 +$LC0: ld $10,0($4) + mflo $3 + mfhi $9 + daddu $3,$3,$2 + sltu $2,$3,$2 + daddu $3,$10,$3 + sltu $10,$3,$10 + daddu $2,$2,$10 + sd $3,0($4) + j $31 + daddu $2,$9,$2 # add high product limb and carry from addition + + .end __gmpn_addmul_1 diff --git a/rts/gmp/mpn/mips3/gmp-mparam.h b/rts/gmp/mpn/mips3/gmp-mparam.h new file mode 100644 index 0000000000..656e90c7b0 --- /dev/null +++ b/rts/gmp/mpn/mips3/gmp-mparam.h @@ -0,0 +1,58 @@ +/* gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#define BITS_PER_MP_LIMB 64 +#define BYTES_PER_MP_LIMB 8 +#define BITS_PER_LONGINT 32 +#define BITS_PER_INT 32 +#define BITS_PER_SHORTINT 16 +#define BITS_PER_CHAR 8 + +/* These values are for the R10000 usign the system cc. */ +/* Generated by tuneup.c, 2000-07-25. */ +#ifndef KARATSUBA_MUL_THRESHOLD +#define KARATSUBA_MUL_THRESHOLD 16 +#endif +#ifndef KARATSUBA_SQR_THRESHOLD +#define KARATSUBA_SQR_THRESHOLD 32 +#endif + +/* Supressed the TOOM3 values as they looked absolutely crazy + (698 and 21 respectively) */ + +#ifndef BZ_THRESHOLD +#define BZ_THRESHOLD 58 +#endif + +#ifndef FIB_THRESHOLD +#define FIB_THRESHOLD 54 +#endif + +#ifndef POWM_THRESHOLD +#define POWM_THRESHOLD 82 +#endif + +#ifndef GCD_ACCEL_THRESHOLD +#define GCD_ACCEL_THRESHOLD 4 +#endif +#ifndef GCDEXT_THRESHOLD +#define GCDEXT_THRESHOLD 159 +#endif diff --git a/rts/gmp/mpn/mips3/lshift.s b/rts/gmp/mpn/mips3/lshift.s new file mode 100644 index 0000000000..372606fddf --- /dev/null +++ b/rts/gmp/mpn/mips3/lshift.s @@ -0,0 +1,95 @@ + # MIPS3 __gmpn_lshift -- + + # Copyright (C) 1995, 2000 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # src_ptr $5 + # size $6 + # cnt $7 + + .text + .align 2 + .globl __gmpn_lshift + .ent __gmpn_lshift +__gmpn_lshift: + .set noreorder + .set nomacro + + dsll $2,$6,3 + daddu $5,$5,$2 # make r5 point at end of src + ld $10,-8($5) # load first limb + dsubu $13,$0,$7 + daddu $4,$4,$2 # make r4 point at end of res + daddiu $6,$6,-1 + and $9,$6,4-1 # number of limbs in first loop + beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop + dsrl $2,$10,$13 # compute function result + + dsubu $6,$6,$9 + +.Loop0: ld $3,-16($5) + daddiu $4,$4,-8 + daddiu $5,$5,-8 + daddiu $9,$9,-1 + dsll $11,$10,$7 + dsrl $12,$3,$13 + move $10,$3 + or $8,$11,$12 + bne $9,$0,.Loop0 + sd $8,0($4) + +.L0: beq $6,$0,.Lend + nop + +.Loop: ld $3,-16($5) + daddiu $4,$4,-32 + daddiu $6,$6,-4 + dsll $11,$10,$7 + dsrl $12,$3,$13 + + ld $10,-24($5) + dsll $14,$3,$7 + or $8,$11,$12 + sd $8,24($4) + dsrl $9,$10,$13 + + ld $3,-32($5) + dsll $11,$10,$7 + or $8,$14,$9 + sd $8,16($4) + dsrl $12,$3,$13 + + ld $10,-40($5) + dsll $14,$3,$7 + or $8,$11,$12 + sd $8,8($4) + dsrl $9,$10,$13 + + daddiu $5,$5,-32 + or $8,$14,$9 + bgtz $6,.Loop + sd $8,0($4) + +.Lend: dsll $8,$10,$7 + j $31 + sd $8,-8($4) + .end __gmpn_lshift diff --git a/rts/gmp/mpn/mips3/mul_1.s b/rts/gmp/mpn/mips3/mul_1.s new file mode 100644 index 0000000000..6659e2b4eb --- /dev/null +++ b/rts/gmp/mpn/mips3/mul_1.s @@ -0,0 +1,85 @@ + # MIPS3 __gmpn_mul_1 -- Multiply a limb vector with a single limb and + # store the product in a second limb vector. + + # Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # s1_ptr $5 + # size $6 + # s2_limb $7 + + .text + .align 4 + .globl __gmpn_mul_1 + .ent __gmpn_mul_1 +__gmpn_mul_1: + .set noreorder + .set nomacro + + # warm up phase 0 + ld $8,0($5) + + # warm up phase 1 + daddiu $5,$5,8 + dmultu $8,$7 + + daddiu $6,$6,-1 + beq $6,$0,$LC0 + move $2,$0 # zero cy2 + + daddiu $6,$6,-1 + beq $6,$0,$LC1 + ld $8,0($5) # load new s1 limb as early as possible + +Loop: mflo $10 + mfhi $9 + daddiu $5,$5,8 + daddu $10,$10,$2 # add old carry limb to low product limb + dmultu $8,$7 + ld $8,0($5) # load new s1 limb as early as possible + daddiu $6,$6,-1 # decrement loop counter + sltu $2,$10,$2 # carry from previous addition -> $2 + sd $10,0($4) + daddiu $4,$4,8 + bne $6,$0,Loop + daddu $2,$9,$2 # add high product limb and carry from addition + + # cool down phase 1 +$LC1: mflo $10 + mfhi $9 + daddu $10,$10,$2 + sltu $2,$10,$2 + dmultu $8,$7 + sd $10,0($4) + daddiu $4,$4,8 + daddu $2,$9,$2 # add high product limb and carry from addition + + # cool down phase 0 +$LC0: mflo $10 + mfhi $9 + daddu $10,$10,$2 + sltu $2,$10,$2 + sd $10,0($4) + j $31 + daddu $2,$9,$2 # add high product limb and carry from addition + + .end __gmpn_mul_1 diff --git a/rts/gmp/mpn/mips3/rshift.s b/rts/gmp/mpn/mips3/rshift.s new file mode 100644 index 0000000000..59c7fd3492 --- /dev/null +++ b/rts/gmp/mpn/mips3/rshift.s @@ -0,0 +1,92 @@ + # MIPS3 __gmpn_rshift -- + + # Copyright (C) 1995, 2000 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # src_ptr $5 + # size $6 + # cnt $7 + + .text + .align 2 + .globl __gmpn_rshift + .ent __gmpn_rshift +__gmpn_rshift: + .set noreorder + .set nomacro + + ld $10,0($5) # load first limb + dsubu $13,$0,$7 + daddiu $6,$6,-1 + and $9,$6,4-1 # number of limbs in first loop + beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop + dsll $2,$10,$13 # compute function result + + dsubu $6,$6,$9 + +.Loop0: ld $3,8($5) + daddiu $4,$4,8 + daddiu $5,$5,8 + daddiu $9,$9,-1 + dsrl $11,$10,$7 + dsll $12,$3,$13 + move $10,$3 + or $8,$11,$12 + bne $9,$0,.Loop0 + sd $8,-8($4) + +.L0: beq $6,$0,.Lend + nop + +.Loop: ld $3,8($5) + daddiu $4,$4,32 + daddiu $6,$6,-4 + dsrl $11,$10,$7 + dsll $12,$3,$13 + + ld $10,16($5) + dsrl $14,$3,$7 + or $8,$11,$12 + sd $8,-32($4) + dsll $9,$10,$13 + + ld $3,24($5) + dsrl $11,$10,$7 + or $8,$14,$9 + sd $8,-24($4) + dsll $12,$3,$13 + + ld $10,32($5) + dsrl $14,$3,$7 + or $8,$11,$12 + sd $8,-16($4) + dsll $9,$10,$13 + + daddiu $5,$5,32 + or $8,$14,$9 + bgtz $6,.Loop + sd $8,-8($4) + +.Lend: dsrl $8,$10,$7 + j $31 + sd $8,0($4) + .end __gmpn_rshift diff --git a/rts/gmp/mpn/mips3/sub_n.s b/rts/gmp/mpn/mips3/sub_n.s new file mode 100644 index 0000000000..c57c824b04 --- /dev/null +++ b/rts/gmp/mpn/mips3/sub_n.s @@ -0,0 +1,120 @@ + # MIPS3 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and + # store difference in a third limb vector. + + # Copyright (C) 1995, 2000 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # s1_ptr $5 + # s2_ptr $6 + # size $7 + + .text + .align 2 + .globl __gmpn_sub_n + .ent __gmpn_sub_n +__gmpn_sub_n: + .set noreorder + .set nomacro + + ld $10,0($5) + ld $11,0($6) + + daddiu $7,$7,-1 + and $9,$7,4-1 # number of limbs in first loop + beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop + move $2,$0 + + dsubu $7,$7,$9 + +.Loop0: daddiu $9,$9,-1 + ld $12,8($5) + daddu $11,$11,$2 + ld $13,8($6) + sltu $8,$11,$2 + dsubu $11,$10,$11 + sltu $2,$10,$11 + sd $11,0($4) + or $2,$2,$8 + + daddiu $5,$5,8 + daddiu $6,$6,8 + move $10,$12 + move $11,$13 + bne $9,$0,.Loop0 + daddiu $4,$4,8 + +.L0: beq $7,$0,.Lend + nop + +.Loop: daddiu $7,$7,-4 + + ld $12,8($5) + daddu $11,$11,$2 + ld $13,8($6) + sltu $8,$11,$2 + dsubu $11,$10,$11 + sltu $2,$10,$11 + sd $11,0($4) + or $2,$2,$8 + + ld $10,16($5) + daddu $13,$13,$2 + ld $11,16($6) + sltu $8,$13,$2 + dsubu $13,$12,$13 + sltu $2,$12,$13 + sd $13,8($4) + or $2,$2,$8 + + ld $12,24($5) + daddu $11,$11,$2 + ld $13,24($6) + sltu $8,$11,$2 + dsubu $11,$10,$11 + sltu $2,$10,$11 + sd $11,16($4) + or $2,$2,$8 + + ld $10,32($5) + daddu $13,$13,$2 + ld $11,32($6) + sltu $8,$13,$2 + dsubu $13,$12,$13 + sltu $2,$12,$13 + sd $13,24($4) + or $2,$2,$8 + + daddiu $5,$5,32 + daddiu $6,$6,32 + + bne $7,$0,.Loop + daddiu $4,$4,32 + +.Lend: daddu $11,$11,$2 + sltu $8,$11,$2 + dsubu $11,$10,$11 + sltu $2,$10,$11 + sd $11,0($4) + j $31 + or $2,$2,$8 + + .end __gmpn_sub_n diff --git a/rts/gmp/mpn/mips3/submul_1.s b/rts/gmp/mpn/mips3/submul_1.s new file mode 100644 index 0000000000..531f9705a6 --- /dev/null +++ b/rts/gmp/mpn/mips3/submul_1.s @@ -0,0 +1,97 @@ + # MIPS3 __gmpn_submul_1 -- Multiply a limb vector with a single limb and + # subtract the product from a second limb vector. + + # Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published by + # the Free Software Foundation; either version 2.1 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + # License for more details. + + # You should have received a copy of the GNU Lesser General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + # MA 02111-1307, USA. + + + # INPUT PARAMETERS + # res_ptr $4 + # s1_ptr $5 + # size $6 + # s2_limb $7 + + .text + .align 4 + .globl __gmpn_submul_1 + .ent __gmpn_submul_1 +__gmpn_submul_1: + .set noreorder + .set nomacro + + # warm up phase 0 + ld $8,0($5) + + # warm up phase 1 + daddiu $5,$5,8 + dmultu $8,$7 + + daddiu $6,$6,-1 + beq $6,$0,$LC0 + move $2,$0 # zero cy2 + + daddiu $6,$6,-1 + beq $6,$0,$LC1 + ld $8,0($5) # load new s1 limb as early as possible + +Loop: ld $10,0($4) + mflo $3 + mfhi $9 + daddiu $5,$5,8 + daddu $3,$3,$2 # add old carry limb to low product limb + dmultu $8,$7 + ld $8,0($5) # load new s1 limb as early as possible + daddiu $6,$6,-1 # decrement loop counter + sltu $2,$3,$2 # carry from previous addition -> $2 + dsubu $3,$10,$3 + sgtu $10,$3,$10 + daddu $2,$2,$10 + sd $3,0($4) + daddiu $4,$4,8 + bne $6,$0,Loop + daddu $2,$9,$2 # add high product limb and carry from addition + + # cool down phase 1 +$LC1: ld $10,0($4) + mflo $3 + mfhi $9 + daddu $3,$3,$2 + sltu $2,$3,$2 + dmultu $8,$7 + dsubu $3,$10,$3 + sgtu $10,$3,$10 + daddu $2,$2,$10 + sd $3,0($4) + daddiu $4,$4,8 + daddu $2,$9,$2 # add high product limb and carry from addition + + # cool down phase 0 +$LC0: ld $10,0($4) + mflo $3 + mfhi $9 + daddu $3,$3,$2 + sltu $2,$3,$2 + dsubu $3,$10,$3 + sgtu $10,$3,$10 + daddu $2,$2,$10 + sd $3,0($4) + j $31 + daddu $2,$9,$2 # add high product limb and carry from addition + + .end __gmpn_submul_1 diff --git a/rts/gmp/mpn/mp_bases.c b/rts/gmp/mpn/mp_bases.c new file mode 100644 index 0000000000..011c328c80 --- /dev/null +++ b/rts/gmp/mpn/mp_bases.c @@ -0,0 +1,550 @@ +/* __mp_bases -- Structure for conversion between internal binary + format and strings in base 2..255. The fields are explained in + gmp-impl.h. + + +Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + + +#if BITS_PER_MP_LIMB == 32 +const struct bases __mp_bases[256] = +{ + /* 0 */ {0, 0.0, 0, 0}, + /* 1 */ {0, 1e38, 0, 0}, + /* 2 */ {32, 1.0000000000000000, 0x1, 0x0}, + /* 3 */ {20, 0.6309297535714575, 0xcfd41b91, 0x3b563c24}, + /* 4 */ {16, 0.5000000000000000, 0x2, 0x0}, + /* 5 */ {13, 0.4306765580733931, 0x48c27395, 0xc25c2684}, + /* 6 */ {12, 0.3868528072345416, 0x81bf1000, 0xf91bd1b6}, + /* 7 */ {11, 0.3562071871080222, 0x75db9c97, 0x1607a2cb}, + /* 8 */ {10, 0.3333333333333334, 0x3, 0x0}, + /* 9 */ {10, 0.3154648767857287, 0xcfd41b91, 0x3b563c24}, + /* 10 */ {9, 0.3010299956639811, 0x3b9aca00, 0x12e0be82}, + /* 11 */ {9, 0.2890648263178878, 0x8c8b6d2b, 0xd24cde04}, + /* 12 */ {8, 0.2789429456511298, 0x19a10000, 0x3fa39ab5}, + /* 13 */ {8, 0.2702381544273197, 0x309f1021, 0x50f8ac5f}, + /* 14 */ {8, 0.2626495350371936, 0x57f6c100, 0x74843b1e}, + /* 15 */ {8, 0.2559580248098155, 0x98c29b81, 0xad0326c2}, + /* 16 */ {8, 0.2500000000000000, 0x4, 0x0}, + /* 17 */ {7, 0.2446505421182260, 0x18754571, 0x4ef0b6bd}, + /* 18 */ {7, 0.2398124665681315, 0x247dbc80, 0xc0fc48a1}, + /* 19 */ {7, 0.2354089133666382, 0x3547667b, 0x33838942}, + /* 20 */ {7, 0.2313782131597592, 0x4c4b4000, 0xad7f29ab}, + /* 21 */ {7, 0.2276702486969530, 0x6b5a6e1d, 0x313c3d15}, + /* 22 */ {7, 0.2242438242175754, 0x94ace180, 0xb8cca9e0}, + /* 23 */ {7, 0.2210647294575037, 0xcaf18367, 0x42ed6de9}, + /* 24 */ {6, 0.2181042919855316, 0xb640000, 0x67980e0b}, + /* 25 */ {6, 0.2153382790366965, 0xe8d4a51, 0x19799812}, + /* 26 */ {6, 0.2127460535533632, 0x1269ae40, 0xbce85396}, + /* 27 */ {6, 0.2103099178571525, 0x17179149, 0x62c103a9}, + /* 28 */ {6, 0.2080145976765095, 0x1cb91000, 0x1d353d43}, + /* 29 */ {6, 0.2058468324604344, 0x23744899, 0xce1decea}, + /* 30 */ {6, 0.2037950470905062, 0x2b73a840, 0x790fc511}, + /* 31 */ {6, 0.2018490865820999, 0x34e63b41, 0x35b865a0}, + /* 32 */ {6, 0.2000000000000000, 0x5, 0x0}, + /* 33 */ {6, 0.1982398631705605, 0x4cfa3cc1, 0xa9aed1b3}, + /* 34 */ {6, 0.1965616322328226, 0x5c13d840, 0x63dfc229}, + /* 35 */ {6, 0.1949590218937863, 0x6d91b519, 0x2b0fee30}, + /* 36 */ {6, 0.1934264036172708, 0x81bf1000, 0xf91bd1b6}, + /* 37 */ {6, 0.1919587200065601, 0x98ede0c9, 0xac89c3a9}, + /* 38 */ {6, 0.1905514124267734, 0xb3773e40, 0x6d2c32fe}, + /* 39 */ {6, 0.1892003595168700, 0xd1bbc4d1, 0x387907c9}, + /* 40 */ {6, 0.1879018247091076, 0xf4240000, 0xc6f7a0b}, + /* 41 */ {5, 0.1866524112389434, 0x6e7d349, 0x28928154}, + /* 42 */ {5, 0.1854490234153689, 0x7ca30a0, 0x6e8629d}, + /* 43 */ {5, 0.1842888331487062, 0x8c32bbb, 0xd373dca0}, + /* 44 */ {5, 0.1831692509136336, 0x9d46c00, 0xa0b17895}, + /* 45 */ {5, 0.1820879004699383, 0xaffacfd, 0x746811a5}, + /* 46 */ {5, 0.1810425967800402, 0xc46bee0, 0x4da6500f}, + /* 47 */ {5, 0.1800313266566926, 0xdab86ef, 0x2ba23582}, + /* 48 */ {5, 0.1790522317510414, 0xf300000, 0xdb20a88}, + /* 49 */ {5, 0.1781035935540111, 0x10d63af1, 0xe68d5ce4}, + /* 50 */ {5, 0.1771838201355579, 0x12a05f20, 0xb7cdfd9d}, + /* 51 */ {5, 0.1762914343888821, 0x1490aae3, 0x8e583933}, + /* 52 */ {5, 0.1754250635819545, 0x16a97400, 0x697cc3ea}, + /* 53 */ {5, 0.1745834300480449, 0x18ed2825, 0x48a5ca6c}, + /* 54 */ {5, 0.1737653428714400, 0x1b5e4d60, 0x2b52db16}, + /* 55 */ {5, 0.1729696904450771, 0x1dff8297, 0x111586a6}, + /* 56 */ {5, 0.1721954337940981, 0x20d38000, 0xf31d2b36}, + /* 57 */ {5, 0.1714416005739134, 0x23dd1799, 0xc8d76d19}, + /* 58 */ {5, 0.1707072796637201, 0x271f35a0, 0xa2cb1eb4}, + /* 59 */ {5, 0.1699916162869140, 0x2a9ce10b, 0x807c3ec3}, + /* 60 */ {5, 0.1692938075987814, 0x2e593c00, 0x617ec8bf}, + /* 61 */ {5, 0.1686130986895011, 0x3257844d, 0x45746cbe}, + /* 62 */ {5, 0.1679487789570419, 0x369b13e0, 0x2c0aa273}, + /* 63 */ {5, 0.1673001788101741, 0x3b27613f, 0x14f90805}, + /* 64 */ {5, 0.1666666666666667, 0x6, 0x0}, + /* 65 */ {5, 0.1660476462159378, 0x4528a141, 0xd9cf0829}, + /* 66 */ {5, 0.1654425539190583, 0x4aa51420, 0xb6fc4841}, + /* 67 */ {5, 0.1648508567221604, 0x50794633, 0x973054cb}, + /* 68 */ {5, 0.1642720499620502, 0x56a94400, 0x7a1dbe4b}, + /* 69 */ {5, 0.1637056554452156, 0x5d393975, 0x5f7fcd7f}, + /* 70 */ {5, 0.1631512196835108, 0x642d7260, 0x47196c84}, + /* 71 */ {5, 0.1626083122716341, 0x6b8a5ae7, 0x30b43635}, + /* 72 */ {5, 0.1620765243931223, 0x73548000, 0x1c1fa5f6}, + /* 73 */ {5, 0.1615554674429964, 0x7b908fe9, 0x930634a}, + /* 74 */ {5, 0.1610447717564445, 0x84435aa0, 0xef7f4a3c}, + /* 75 */ {5, 0.1605440854340214, 0x8d71d25b, 0xcf5552d2}, + /* 76 */ {5, 0.1600530732548213, 0x97210c00, 0xb1a47c8e}, + /* 77 */ {5, 0.1595714156699382, 0xa1563f9d, 0x9634b43e}, + /* 78 */ {5, 0.1590988078692941, 0xac16c8e0, 0x7cd3817d}, + /* 79 */ {5, 0.1586349589155960, 0xb768278f, 0x65536761}, + /* 80 */ {5, 0.1581795909397823, 0xc3500000, 0x4f8b588e}, + /* 81 */ {5, 0.1577324383928644, 0xcfd41b91, 0x3b563c24}, + /* 82 */ {5, 0.1572932473495469, 0xdcfa6920, 0x28928154}, + /* 83 */ {5, 0.1568617748594410, 0xeac8fd83, 0x1721bfb0}, + /* 84 */ {5, 0.1564377883420716, 0xf9461400, 0x6e8629d}, + /* 85 */ {4, 0.1560210650222250, 0x31c84b1, 0x491cc17c}, + /* 86 */ {4, 0.1556113914024940, 0x342ab10, 0x3a11d83b}, + /* 87 */ {4, 0.1552085627701551, 0x36a2c21, 0x2be074cd}, + /* 88 */ {4, 0.1548123827357682, 0x3931000, 0x1e7a02e7}, + /* 89 */ {4, 0.1544226628011101, 0x3bd5ee1, 0x11d10edd}, + /* 90 */ {4, 0.1540392219542636, 0x3e92110, 0x5d92c68}, + /* 91 */ {4, 0.1536618862898642, 0x4165ef1, 0xf50dbfb2}, + /* 92 */ {4, 0.1532904886526781, 0x4452100, 0xdf9f1316}, + /* 93 */ {4, 0.1529248683028321, 0x4756fd1, 0xcb52a684}, + /* 94 */ {4, 0.1525648706011593, 0x4a75410, 0xb8163e97}, + /* 95 */ {4, 0.1522103467132434, 0x4dad681, 0xa5d8f269}, + /* 96 */ {4, 0.1518611533308632, 0x5100000, 0x948b0fcd}, + /* 97 */ {4, 0.1515171524096389, 0x546d981, 0x841e0215}, + /* 98 */ {4, 0.1511782109217764, 0x57f6c10, 0x74843b1e}, + /* 99 */ {4, 0.1508442006228941, 0x5b9c0d1, 0x65b11e6e}, + /* 100 */ {4, 0.1505149978319906, 0x5f5e100, 0x5798ee23}, + /* 101 */ {4, 0.1501904832236879, 0x633d5f1, 0x4a30b99b}, + /* 102 */ {4, 0.1498705416319474, 0x673a910, 0x3d6e4d94}, + /* 103 */ {4, 0.1495550618645152, 0x6b563e1, 0x314825b0}, + /* 104 */ {4, 0.1492439365274121, 0x6f91000, 0x25b55f2e}, + /* 105 */ {4, 0.1489370618588283, 0x73eb721, 0x1aadaccb}, + /* 106 */ {4, 0.1486343375718350, 0x7866310, 0x10294ba2}, + /* 107 */ {4, 0.1483356667053617, 0x7d01db1, 0x620f8f6}, + /* 108 */ {4, 0.1480409554829326, 0x81bf100, 0xf91bd1b6}, + /* 109 */ {4, 0.1477501131786861, 0x869e711, 0xe6d37b2a}, + /* 110 */ {4, 0.1474630519902391, 0x8ba0a10, 0xd55cff6e}, + /* 111 */ {4, 0.1471796869179852, 0x90c6441, 0xc4ad2db2}, + /* 112 */ {4, 0.1468999356504447, 0x9610000, 0xb4b985cf}, + /* 113 */ {4, 0.1466237184553111, 0x9b7e7c1, 0xa5782bef}, + /* 114 */ {4, 0.1463509580758620, 0xa112610, 0x96dfdd2a}, + /* 115 */ {4, 0.1460815796324244, 0xa6cc591, 0x88e7e509}, + /* 116 */ {4, 0.1458155105286054, 0xacad100, 0x7b8813d3}, + /* 117 */ {4, 0.1455526803620167, 0xb2b5331, 0x6eb8b595}, + /* 118 */ {4, 0.1452930208392428, 0xb8e5710, 0x627289db}, + /* 119 */ {4, 0.1450364656948130, 0xbf3e7a1, 0x56aebc07}, + /* 120 */ {4, 0.1447829506139581, 0xc5c1000, 0x4b66dc33}, + /* 121 */ {4, 0.1445324131589439, 0xcc6db61, 0x4094d8a3}, + /* 122 */ {4, 0.1442847926987864, 0xd345510, 0x3632f7a5}, + /* 123 */ {4, 0.1440400303421672, 0xda48871, 0x2c3bd1f0}, + /* 124 */ {4, 0.1437980688733775, 0xe178100, 0x22aa4d5f}, + /* 125 */ {4, 0.1435588526911310, 0xe8d4a51, 0x19799812}, + /* 126 */ {4, 0.1433223277500932, 0xf05f010, 0x10a523e5}, + /* 127 */ {4, 0.1430884415049874, 0xf817e01, 0x828a237}, + /* 128 */ {4, 0.1428571428571428, 0x7, 0x0}, + /* 129 */ {4, 0.1426283821033600, 0x10818201, 0xf04ec452}, + /* 130 */ {4, 0.1424021108869747, 0x11061010, 0xe136444a}, + /* 131 */ {4, 0.1421782821510107, 0x118db651, 0xd2af9589}, + /* 132 */ {4, 0.1419568500933153, 0x12188100, 0xc4b42a83}, + /* 133 */ {4, 0.1417377701235801, 0x12a67c71, 0xb73dccf5}, + /* 134 */ {4, 0.1415209988221527, 0x1337b510, 0xaa4698c5}, + /* 135 */ {4, 0.1413064939005528, 0x13cc3761, 0x9dc8f729}, + /* 136 */ {4, 0.1410942141636095, 0x14641000, 0x91bf9a30}, + /* 137 */ {4, 0.1408841194731412, 0x14ff4ba1, 0x86257887}, + /* 138 */ {4, 0.1406761707131039, 0x159df710, 0x7af5c98c}, + /* 139 */ {4, 0.1404703297561400, 0x16401f31, 0x702c01a0}, + /* 140 */ {4, 0.1402665594314587, 0x16e5d100, 0x65c3ceb1}, + /* 141 */ {4, 0.1400648234939879, 0x178f1991, 0x5bb91502}, + /* 142 */ {4, 0.1398650865947379, 0x183c0610, 0x5207ec23}, + /* 143 */ {4, 0.1396673142523192, 0x18eca3c1, 0x48ac9c19}, + /* 144 */ {4, 0.1394714728255649, 0x19a10000, 0x3fa39ab5}, + /* 145 */ {4, 0.1392775294872041, 0x1a592841, 0x36e98912}, + /* 146 */ {4, 0.1390854521985406, 0x1b152a10, 0x2e7b3140}, + /* 147 */ {4, 0.1388952096850913, 0x1bd51311, 0x2655840b}, + /* 148 */ {4, 0.1387067714131417, 0x1c98f100, 0x1e7596ea}, + /* 149 */ {4, 0.1385201075671774, 0x1d60d1b1, 0x16d8a20d}, + /* 150 */ {4, 0.1383351890281539, 0x1e2cc310, 0xf7bfe87}, + /* 151 */ {4, 0.1381519873525671, 0x1efcd321, 0x85d2492}, + /* 152 */ {4, 0.1379704747522905, 0x1fd11000, 0x179a9f4}, + /* 153 */ {4, 0.1377906240751463, 0x20a987e1, 0xf59e80eb}, + /* 154 */ {4, 0.1376124087861776, 0x21864910, 0xe8b768db}, + /* 155 */ {4, 0.1374358029495937, 0x226761f1, 0xdc39d6d5}, + /* 156 */ {4, 0.1372607812113589, 0x234ce100, 0xd021c5d1}, + /* 157 */ {4, 0.1370873187823978, 0x2436d4d1, 0xc46b5e37}, + /* 158 */ {4, 0.1369153914223921, 0x25254c10, 0xb912f39c}, + /* 159 */ {4, 0.1367449754241439, 0x26185581, 0xae150294}, + /* 160 */ {4, 0.1365760475984821, 0x27100000, 0xa36e2eb1}, + /* 161 */ {4, 0.1364085852596902, 0x280c5a81, 0x991b4094}, + /* 162 */ {4, 0.1362425662114337, 0x290d7410, 0x8f19241e}, + /* 163 */ {4, 0.1360779687331669, 0x2a135bd1, 0x8564e6b7}, + /* 164 */ {4, 0.1359147715670014, 0x2b1e2100, 0x7bfbb5b4}, + /* 165 */ {4, 0.1357529539050150, 0x2c2dd2f1, 0x72dadcc8}, + /* 166 */ {4, 0.1355924953769863, 0x2d428110, 0x69ffc498}, + /* 167 */ {4, 0.1354333760385373, 0x2e5c3ae1, 0x6167f154}, + /* 168 */ {4, 0.1352755763596663, 0x2f7b1000, 0x5911016e}, + /* 169 */ {4, 0.1351190772136599, 0x309f1021, 0x50f8ac5f}, + /* 170 */ {4, 0.1349638598663645, 0x31c84b10, 0x491cc17c}, + /* 171 */ {4, 0.1348099059658079, 0x32f6d0b1, 0x417b26d8}, + /* 172 */ {4, 0.1346571975321549, 0x342ab100, 0x3a11d83b}, + /* 173 */ {4, 0.1345057169479844, 0x3563fc11, 0x32dee622}, + /* 174 */ {4, 0.1343554469488779, 0x36a2c210, 0x2be074cd}, + /* 175 */ {4, 0.1342063706143054, 0x37e71341, 0x2514bb58}, + /* 176 */ {4, 0.1340584713587980, 0x39310000, 0x1e7a02e7}, + /* 177 */ {4, 0.1339117329233981, 0x3a8098c1, 0x180ea5d0}, + /* 178 */ {4, 0.1337661393673756, 0x3bd5ee10, 0x11d10edd}, + /* 179 */ {4, 0.1336216750601996, 0x3d311091, 0xbbfb88e}, + /* 180 */ {4, 0.1334783246737591, 0x3e921100, 0x5d92c68}, + /* 181 */ {4, 0.1333360731748201, 0x3ff90031, 0x1c024c}, + /* 182 */ {4, 0.1331949058177136, 0x4165ef10, 0xf50dbfb2}, + /* 183 */ {4, 0.1330548081372441, 0x42d8eea1, 0xea30efa3}, + /* 184 */ {4, 0.1329157659418126, 0x44521000, 0xdf9f1316}, + /* 185 */ {4, 0.1327777653067443, 0x45d16461, 0xd555c0c9}, + /* 186 */ {4, 0.1326407925678156, 0x4756fd10, 0xcb52a684}, + /* 187 */ {4, 0.1325048343149731, 0x48e2eb71, 0xc193881f}, + /* 188 */ {4, 0.1323698773862368, 0x4a754100, 0xb8163e97}, + /* 189 */ {4, 0.1322359088617821, 0x4c0e0f51, 0xaed8b724}, + /* 190 */ {4, 0.1321029160581950, 0x4dad6810, 0xa5d8f269}, + /* 191 */ {4, 0.1319708865228925, 0x4f535d01, 0x9d15039d}, + /* 192 */ {4, 0.1318398080287045, 0x51000000, 0x948b0fcd}, + /* 193 */ {4, 0.1317096685686114, 0x52b36301, 0x8c394d1d}, + /* 194 */ {4, 0.1315804563506306, 0x546d9810, 0x841e0215}, + /* 195 */ {4, 0.1314521597928493, 0x562eb151, 0x7c3784f8}, + /* 196 */ {4, 0.1313247675185968, 0x57f6c100, 0x74843b1e}, + /* 197 */ {4, 0.1311982683517524, 0x59c5d971, 0x6d02985d}, + /* 198 */ {4, 0.1310726513121843, 0x5b9c0d10, 0x65b11e6e}, + /* 199 */ {4, 0.1309479056113158, 0x5d796e61, 0x5e8e5c64}, + /* 200 */ {4, 0.1308240206478128, 0x5f5e1000, 0x5798ee23}, + /* 201 */ {4, 0.1307009860033912, 0x614a04a1, 0x50cf7bde}, + /* 202 */ {4, 0.1305787914387386, 0x633d5f10, 0x4a30b99b}, + /* 203 */ {4, 0.1304574268895465, 0x65383231, 0x43bb66bd}, + /* 204 */ {4, 0.1303368824626505, 0x673a9100, 0x3d6e4d94}, + /* 205 */ {4, 0.1302171484322746, 0x69448e91, 0x374842ee}, + /* 206 */ {4, 0.1300982152363760, 0x6b563e10, 0x314825b0}, + /* 207 */ {4, 0.1299800734730872, 0x6d6fb2c1, 0x2b6cde75}, + /* 208 */ {4, 0.1298627138972530, 0x6f910000, 0x25b55f2e}, + /* 209 */ {4, 0.1297461274170591, 0x71ba3941, 0x2020a2c5}, + /* 210 */ {4, 0.1296303050907487, 0x73eb7210, 0x1aadaccb}, + /* 211 */ {4, 0.1295152381234257, 0x7624be11, 0x155b891f}, + /* 212 */ {4, 0.1294009178639407, 0x78663100, 0x10294ba2}, + /* 213 */ {4, 0.1292873358018581, 0x7aafdeb1, 0xb160fe9}, + /* 214 */ {4, 0.1291744835645007, 0x7d01db10, 0x620f8f6}, + /* 215 */ {4, 0.1290623529140715, 0x7f5c3a21, 0x14930ef}, + /* 216 */ {4, 0.1289509357448472, 0x81bf1000, 0xf91bd1b6}, + /* 217 */ {4, 0.1288402240804449, 0x842a70e1, 0xefdcb0c7}, + /* 218 */ {4, 0.1287302100711567, 0x869e7110, 0xe6d37b2a}, + /* 219 */ {4, 0.1286208859913518, 0x891b24f1, 0xddfeb94a}, + /* 220 */ {4, 0.1285122442369443, 0x8ba0a100, 0xd55cff6e}, + /* 221 */ {4, 0.1284042773229231, 0x8e2ef9d1, 0xcceced50}, + /* 222 */ {4, 0.1282969778809442, 0x90c64410, 0xc4ad2db2}, + /* 223 */ {4, 0.1281903386569819, 0x93669481, 0xbc9c75f9}, + /* 224 */ {4, 0.1280843525090381, 0x96100000, 0xb4b985cf}, + /* 225 */ {4, 0.1279790124049077, 0x98c29b81, 0xad0326c2}, + /* 226 */ {4, 0.1278743114199984, 0x9b7e7c10, 0xa5782bef}, + /* 227 */ {4, 0.1277702427352035, 0x9e43b6d1, 0x9e1771a9}, + /* 228 */ {4, 0.1276667996348261, 0xa1126100, 0x96dfdd2a}, + /* 229 */ {4, 0.1275639755045533, 0xa3ea8ff1, 0x8fd05c41}, + /* 230 */ {4, 0.1274617638294791, 0xa6cc5910, 0x88e7e509}, + /* 231 */ {4, 0.1273601581921741, 0xa9b7d1e1, 0x8225759d}, + /* 232 */ {4, 0.1272591522708010, 0xacad1000, 0x7b8813d3}, + /* 233 */ {4, 0.1271587398372755, 0xafac2921, 0x750eccf9}, + /* 234 */ {4, 0.1270589147554692, 0xb2b53310, 0x6eb8b595}, + /* 235 */ {4, 0.1269596709794558, 0xb5c843b1, 0x6884e923}, + /* 236 */ {4, 0.1268610025517973, 0xb8e57100, 0x627289db}, + /* 237 */ {4, 0.1267629036018709, 0xbc0cd111, 0x5c80c07b}, + /* 238 */ {4, 0.1266653683442337, 0xbf3e7a10, 0x56aebc07}, + /* 239 */ {4, 0.1265683910770258, 0xc27a8241, 0x50fbb19b}, + /* 240 */ {4, 0.1264719661804097, 0xc5c10000, 0x4b66dc33}, + /* 241 */ {4, 0.1263760881150453, 0xc91209c1, 0x45ef7c7c}, + /* 242 */ {4, 0.1262807514205999, 0xcc6db610, 0x4094d8a3}, + /* 243 */ {4, 0.1261859507142915, 0xcfd41b91, 0x3b563c24}, + /* 244 */ {4, 0.1260916806894653, 0xd3455100, 0x3632f7a5}, + /* 245 */ {4, 0.1259979361142023, 0xd6c16d31, 0x312a60c3}, + /* 246 */ {4, 0.1259047118299582, 0xda488710, 0x2c3bd1f0}, + /* 247 */ {4, 0.1258120027502338, 0xdddab5a1, 0x2766aa45}, + /* 248 */ {4, 0.1257198038592741, 0xe1781000, 0x22aa4d5f}, + /* 249 */ {4, 0.1256281102107963, 0xe520ad61, 0x1e06233c}, + /* 250 */ {4, 0.1255369169267456, 0xe8d4a510, 0x19799812}, + /* 251 */ {4, 0.1254462191960791, 0xec940e71, 0x15041c33}, + /* 252 */ {4, 0.1253560122735751, 0xf05f0100, 0x10a523e5}, + /* 253 */ {4, 0.1252662914786691, 0xf4359451, 0xc5c2749}, + /* 254 */ {4, 0.1251770521943144, 0xf817e010, 0x828a237}, + /* 255 */ {4, 0.1250882898658681, 0xfc05fc01, 0x40a1423}, +}; +#endif +#if BITS_PER_MP_LIMB == 64 +const struct bases __mp_bases[256] = +{ + /* 0 */ {0, 0.0, 0, 0}, + /* 1 */ {0, 1e38, 0, 0}, + /* 2 */ {64, 1.0000000000000000, CNST_LIMB(0x1), CNST_LIMB(0x0)}, + /* 3 */ {40, 0.6309297535714574, CNST_LIMB(0xa8b8b452291fe821), CNST_LIMB(0x846d550e37b5063d)}, + /* 4 */ {32, 0.5000000000000000, CNST_LIMB(0x2), CNST_LIMB(0x0)}, + /* 5 */ {27, 0.4306765580733931, CNST_LIMB(0x6765c793fa10079d), CNST_LIMB(0x3ce9a36f23c0fc90)}, + /* 6 */ {24, 0.3868528072345416, CNST_LIMB(0x41c21cb8e1000000), CNST_LIMB(0xf24f62335024a295)}, + /* 7 */ {22, 0.3562071871080222, CNST_LIMB(0x3642798750226111), CNST_LIMB(0x2df495ccaa57147b)}, + /* 8 */ {21, 0.3333333333333334, CNST_LIMB(0x3), CNST_LIMB(0x0)}, + /* 9 */ {20, 0.3154648767857287, CNST_LIMB(0xa8b8b452291fe821), CNST_LIMB(0x846d550e37b5063d)}, + /* 10 */ {19, 0.3010299956639811, CNST_LIMB(0x8ac7230489e80000), CNST_LIMB(0xd83c94fb6d2ac34a)}, + /* 11 */ {18, 0.2890648263178878, CNST_LIMB(0x4d28cb56c33fa539), CNST_LIMB(0xa8adf7ae45e7577b)}, + /* 12 */ {17, 0.2789429456511298, CNST_LIMB(0x1eca170c00000000), CNST_LIMB(0xa10c2bec5da8f8f)}, + /* 13 */ {17, 0.2702381544273197, CNST_LIMB(0x780c7372621bd74d), CNST_LIMB(0x10f4becafe412ec3)}, + /* 14 */ {16, 0.2626495350371936, CNST_LIMB(0x1e39a5057d810000), CNST_LIMB(0xf08480f672b4e86)}, + /* 15 */ {16, 0.2559580248098155, CNST_LIMB(0x5b27ac993df97701), CNST_LIMB(0x6779c7f90dc42f48)}, + /* 16 */ {16, 0.2500000000000000, CNST_LIMB(0x4), CNST_LIMB(0x0)}, + /* 17 */ {15, 0.2446505421182260, CNST_LIMB(0x27b95e997e21d9f1), CNST_LIMB(0x9c71e11bab279323)}, + /* 18 */ {15, 0.2398124665681315, CNST_LIMB(0x5da0e1e53c5c8000), CNST_LIMB(0x5dfaa697ec6f6a1c)}, + /* 19 */ {15, 0.2354089133666382, CNST_LIMB(0xd2ae3299c1c4aedb), CNST_LIMB(0x3711783f6be7e9ec)}, + /* 20 */ {14, 0.2313782131597592, CNST_LIMB(0x16bcc41e90000000), CNST_LIMB(0x6849b86a12b9b01e)}, + /* 21 */ {14, 0.2276702486969530, CNST_LIMB(0x2d04b7fdd9c0ef49), CNST_LIMB(0x6bf097ba5ca5e239)}, + /* 22 */ {14, 0.2242438242175754, CNST_LIMB(0x5658597bcaa24000), CNST_LIMB(0x7b8015c8d7af8f08)}, + /* 23 */ {14, 0.2210647294575037, CNST_LIMB(0xa0e2073737609371), CNST_LIMB(0x975a24b3a3151b38)}, + /* 24 */ {13, 0.2181042919855316, CNST_LIMB(0xc29e98000000000), CNST_LIMB(0x50bd367972689db1)}, + /* 25 */ {13, 0.2153382790366965, CNST_LIMB(0x14adf4b7320334b9), CNST_LIMB(0x8c240c4aecb13bb5)}, + /* 26 */ {13, 0.2127460535533632, CNST_LIMB(0x226ed36478bfa000), CNST_LIMB(0xdbd2e56854e118c9)}, + /* 27 */ {13, 0.2103099178571525, CNST_LIMB(0x383d9170b85ff80b), CNST_LIMB(0x2351ffcaa9c7c4ae)}, + /* 28 */ {13, 0.2080145976765095, CNST_LIMB(0x5a3c23e39c000000), CNST_LIMB(0x6b24188ca33b0636)}, + /* 29 */ {13, 0.2058468324604344, CNST_LIMB(0x8e65137388122bcd), CNST_LIMB(0xcc3dceaf2b8ba99d)}, + /* 30 */ {13, 0.2037950470905062, CNST_LIMB(0xdd41bb36d259e000), CNST_LIMB(0x2832e835c6c7d6b6)}, + /* 31 */ {12, 0.2018490865820999, CNST_LIMB(0xaee5720ee830681), CNST_LIMB(0x76b6aa272e1873c5)}, + /* 32 */ {12, 0.2000000000000000, CNST_LIMB(0x5), CNST_LIMB(0x0)}, + /* 33 */ {12, 0.1982398631705605, CNST_LIMB(0x172588ad4f5f0981), CNST_LIMB(0x61eaf5d402c7bf4f)}, + /* 34 */ {12, 0.1965616322328226, CNST_LIMB(0x211e44f7d02c1000), CNST_LIMB(0xeeb658123ffb27ec)}, + /* 35 */ {12, 0.1949590218937863, CNST_LIMB(0x2ee56725f06e5c71), CNST_LIMB(0x5d5e3762e6fdf509)}, + /* 36 */ {12, 0.1934264036172708, CNST_LIMB(0x41c21cb8e1000000), CNST_LIMB(0xf24f62335024a295)}, + /* 37 */ {12, 0.1919587200065601, CNST_LIMB(0x5b5b57f8a98a5dd1), CNST_LIMB(0x66ae7831762efb6f)}, + /* 38 */ {12, 0.1905514124267734, CNST_LIMB(0x7dcff8986ea31000), CNST_LIMB(0x47388865a00f544)}, + /* 39 */ {12, 0.1892003595168700, CNST_LIMB(0xabd4211662a6b2a1), CNST_LIMB(0x7d673c33a123b54c)}, + /* 40 */ {12, 0.1879018247091076, CNST_LIMB(0xe8d4a51000000000), CNST_LIMB(0x19799812dea11197)}, + /* 41 */ {11, 0.1866524112389434, CNST_LIMB(0x7a32956ad081b79), CNST_LIMB(0xc27e62e0686feae)}, + /* 42 */ {11, 0.1854490234153689, CNST_LIMB(0x9f49aaff0e86800), CNST_LIMB(0x9b6e7507064ce7c7)}, + /* 43 */ {11, 0.1842888331487062, CNST_LIMB(0xce583bb812d37b3), CNST_LIMB(0x3d9ac2bf66cfed94)}, + /* 44 */ {11, 0.1831692509136336, CNST_LIMB(0x109b79a654c00000), CNST_LIMB(0xed46bc50ce59712a)}, + /* 45 */ {11, 0.1820879004699383, CNST_LIMB(0x1543beff214c8b95), CNST_LIMB(0x813d97e2c89b8d46)}, + /* 46 */ {11, 0.1810425967800402, CNST_LIMB(0x1b149a79459a3800), CNST_LIMB(0x2e81751956af8083)}, + /* 47 */ {11, 0.1800313266566926, CNST_LIMB(0x224edfb5434a830f), CNST_LIMB(0xdd8e0a95e30c0988)}, + /* 48 */ {11, 0.1790522317510413, CNST_LIMB(0x2b3fb00000000000), CNST_LIMB(0x7ad4dd48a0b5b167)}, + /* 49 */ {11, 0.1781035935540111, CNST_LIMB(0x3642798750226111), CNST_LIMB(0x2df495ccaa57147b)}, + /* 50 */ {11, 0.1771838201355579, CNST_LIMB(0x43c33c1937564800), CNST_LIMB(0xe392010175ee5962)}, + /* 51 */ {11, 0.1762914343888821, CNST_LIMB(0x54411b2441c3cd8b), CNST_LIMB(0x84eaf11b2fe7738e)}, + /* 52 */ {11, 0.1754250635819545, CNST_LIMB(0x6851455acd400000), CNST_LIMB(0x3a1e3971e008995d)}, + /* 53 */ {11, 0.1745834300480449, CNST_LIMB(0x80a23b117c8feb6d), CNST_LIMB(0xfd7a462344ffce25)}, + /* 54 */ {11, 0.1737653428714400, CNST_LIMB(0x9dff7d32d5dc1800), CNST_LIMB(0x9eca40b40ebcef8a)}, + /* 55 */ {11, 0.1729696904450771, CNST_LIMB(0xc155af6faeffe6a7), CNST_LIMB(0x52fa161a4a48e43d)}, + /* 56 */ {11, 0.1721954337940981, CNST_LIMB(0xebb7392e00000000), CNST_LIMB(0x1607a2cbacf930c1)}, + /* 57 */ {10, 0.1714416005739134, CNST_LIMB(0x50633659656d971), CNST_LIMB(0x97a014f8e3be55f1)}, + /* 58 */ {10, 0.1707072796637201, CNST_LIMB(0x5fa8624c7fba400), CNST_LIMB(0x568df8b76cbf212c)}, + /* 59 */ {10, 0.1699916162869140, CNST_LIMB(0x717d9faa73c5679), CNST_LIMB(0x20ba7c4b4e6ef492)}, + /* 60 */ {10, 0.1692938075987814, CNST_LIMB(0x86430aac6100000), CNST_LIMB(0xe81ee46b9ef492f5)}, + /* 61 */ {10, 0.1686130986895011, CNST_LIMB(0x9e64d9944b57f29), CNST_LIMB(0x9dc0d10d51940416)}, + /* 62 */ {10, 0.1679487789570419, CNST_LIMB(0xba5ca5392cb0400), CNST_LIMB(0x5fa8ed2f450272a5)}, + /* 63 */ {10, 0.1673001788101741, CNST_LIMB(0xdab2ce1d022cd81), CNST_LIMB(0x2ba9eb8c5e04e641)}, + /* 64 */ {10, 0.1666666666666667, CNST_LIMB(0x6), CNST_LIMB(0x0)}, + /* 65 */ {10, 0.1660476462159378, CNST_LIMB(0x12aeed5fd3e2d281), CNST_LIMB(0xb67759cc00287bf1)}, + /* 66 */ {10, 0.1654425539190583, CNST_LIMB(0x15c3da1572d50400), CNST_LIMB(0x78621feeb7f4ed33)}, + /* 67 */ {10, 0.1648508567221604, CNST_LIMB(0x194c05534f75ee29), CNST_LIMB(0x43d55b5f72943bc0)}, + /* 68 */ {10, 0.1642720499620502, CNST_LIMB(0x1d56299ada100000), CNST_LIMB(0x173decb64d1d4409)}, + /* 69 */ {10, 0.1637056554452156, CNST_LIMB(0x21f2a089a4ff4f79), CNST_LIMB(0xe29fb54fd6b6074f)}, + /* 70 */ {10, 0.1631512196835108, CNST_LIMB(0x2733896c68d9a400), CNST_LIMB(0xa1f1f5c210d54e62)}, + /* 71 */ {10, 0.1626083122716341, CNST_LIMB(0x2d2cf2c33b533c71), CNST_LIMB(0x6aac7f9bfafd57b2)}, + /* 72 */ {10, 0.1620765243931223, CNST_LIMB(0x33f506e440000000), CNST_LIMB(0x3b563c2478b72ee2)}, + /* 73 */ {10, 0.1615554674429964, CNST_LIMB(0x3ba43bec1d062211), CNST_LIMB(0x12b536b574e92d1b)}, + /* 74 */ {10, 0.1610447717564444, CNST_LIMB(0x4455872d8fd4e400), CNST_LIMB(0xdf86c03020404fa5)}, + /* 75 */ {10, 0.1605440854340214, CNST_LIMB(0x4e2694539f2f6c59), CNST_LIMB(0xa34adf02234eea8e)}, + /* 76 */ {10, 0.1600530732548213, CNST_LIMB(0x5938006c18900000), CNST_LIMB(0x6f46eb8574eb59dd)}, + /* 77 */ {10, 0.1595714156699382, CNST_LIMB(0x65ad9912474aa649), CNST_LIMB(0x42459b481df47cec)}, + /* 78 */ {10, 0.1590988078692941, CNST_LIMB(0x73ae9ff4241ec400), CNST_LIMB(0x1b424b95d80ca505)}, + /* 79 */ {10, 0.1586349589155960, CNST_LIMB(0x836612ee9c4ce1e1), CNST_LIMB(0xf2c1b982203a0dac)}, + /* 80 */ {10, 0.1581795909397823, CNST_LIMB(0x9502f90000000000), CNST_LIMB(0xb7cdfd9d7bdbab7d)}, + /* 81 */ {10, 0.1577324383928644, CNST_LIMB(0xa8b8b452291fe821), CNST_LIMB(0x846d550e37b5063d)}, + /* 82 */ {10, 0.1572932473495469, CNST_LIMB(0xbebf59a07dab4400), CNST_LIMB(0x57931eeaf85cf64f)}, + /* 83 */ {10, 0.1568617748594410, CNST_LIMB(0xd7540d4093bc3109), CNST_LIMB(0x305a944507c82f47)}, + /* 84 */ {10, 0.1564377883420716, CNST_LIMB(0xf2b96616f1900000), CNST_LIMB(0xe007ccc9c22781a)}, + /* 85 */ {9, 0.1560210650222250, CNST_LIMB(0x336de62af2bca35), CNST_LIMB(0x3e92c42e000eeed4)}, + /* 86 */ {9, 0.1556113914024940, CNST_LIMB(0x39235ec33d49600), CNST_LIMB(0x1ebe59130db2795e)}, + /* 87 */ {9, 0.1552085627701551, CNST_LIMB(0x3f674e539585a17), CNST_LIMB(0x268859e90f51b89)}, + /* 88 */ {9, 0.1548123827357682, CNST_LIMB(0x4645b6958000000), CNST_LIMB(0xd24cde0463108cfa)}, + /* 89 */ {9, 0.1544226628011101, CNST_LIMB(0x4dcb74afbc49c19), CNST_LIMB(0xa536009f37adc383)}, + /* 90 */ {9, 0.1540392219542636, CNST_LIMB(0x56064e1d18d9a00), CNST_LIMB(0x7cea06ce1c9ace10)}, + /* 91 */ {9, 0.1536618862898642, CNST_LIMB(0x5f04fe2cd8a39fb), CNST_LIMB(0x58db032e72e8ba43)}, + /* 92 */ {9, 0.1532904886526781, CNST_LIMB(0x68d74421f5c0000), CNST_LIMB(0x388cc17cae105447)}, + /* 93 */ {9, 0.1529248683028321, CNST_LIMB(0x738df1f6ab4827d), CNST_LIMB(0x1b92672857620ce0)}, + /* 94 */ {9, 0.1525648706011593, CNST_LIMB(0x7f3afbc9cfb5e00), CNST_LIMB(0x18c6a9575c2ade4)}, + /* 95 */ {9, 0.1522103467132434, CNST_LIMB(0x8bf187fba88f35f), CNST_LIMB(0xd44da7da8e44b24f)}, + /* 96 */ {9, 0.1518611533308632, CNST_LIMB(0x99c600000000000), CNST_LIMB(0xaa2f78f1b4cc6794)}, + /* 97 */ {9, 0.1515171524096389, CNST_LIMB(0xa8ce21eb6531361), CNST_LIMB(0x843c067d091ee4cc)}, + /* 98 */ {9, 0.1511782109217764, CNST_LIMB(0xb92112c1a0b6200), CNST_LIMB(0x62005e1e913356e3)}, + /* 99 */ {9, 0.1508442006228941, CNST_LIMB(0xcad7718b8747c43), CNST_LIMB(0x4316eed01dedd518)}, + /* 100 */ {9, 0.1505149978319906, CNST_LIMB(0xde0b6b3a7640000), CNST_LIMB(0x2725dd1d243aba0e)}, + /* 101 */ {9, 0.1501904832236879, CNST_LIMB(0xf2d8cf5fe6d74c5), CNST_LIMB(0xddd9057c24cb54f)}, + /* 102 */ {9, 0.1498705416319474, CNST_LIMB(0x1095d25bfa712600), CNST_LIMB(0xedeee175a736d2a1)}, + /* 103 */ {9, 0.1495550618645152, CNST_LIMB(0x121b7c4c3698faa7), CNST_LIMB(0xc4699f3df8b6b328)}, + /* 104 */ {9, 0.1492439365274121, CNST_LIMB(0x13c09e8d68000000), CNST_LIMB(0x9ebbe7d859cb5a7c)}, + /* 105 */ {9, 0.1489370618588283, CNST_LIMB(0x15876ccb0b709ca9), CNST_LIMB(0x7c828b9887eb2179)}, + /* 106 */ {9, 0.1486343375718350, CNST_LIMB(0x17723c2976da2a00), CNST_LIMB(0x5d652ab99001adcf)}, + /* 107 */ {9, 0.1483356667053617, CNST_LIMB(0x198384e9c259048b), CNST_LIMB(0x4114f1754e5d7b32)}, + /* 108 */ {9, 0.1480409554829326, CNST_LIMB(0x1bbde41dfeec0000), CNST_LIMB(0x274b7c902f7e0188)}, + /* 109 */ {9, 0.1477501131786861, CNST_LIMB(0x1e241d6e3337910d), CNST_LIMB(0xfc9e0fbb32e210c)}, + /* 110 */ {9, 0.1474630519902391, CNST_LIMB(0x20b91cee9901ee00), CNST_LIMB(0xf4afa3e594f8ea1f)}, + /* 111 */ {9, 0.1471796869179852, CNST_LIMB(0x237ff9079863dfef), CNST_LIMB(0xcd85c32e9e4437b0)}, + /* 112 */ {9, 0.1468999356504447, CNST_LIMB(0x267bf47000000000), CNST_LIMB(0xa9bbb147e0dd92a8)}, + /* 113 */ {9, 0.1466237184553111, CNST_LIMB(0x29b08039fbeda7f1), CNST_LIMB(0x8900447b70e8eb82)}, + /* 114 */ {9, 0.1463509580758620, CNST_LIMB(0x2d213df34f65f200), CNST_LIMB(0x6b0a92adaad5848a)}, + /* 115 */ {9, 0.1460815796324244, CNST_LIMB(0x30d201d957a7c2d3), CNST_LIMB(0x4f990ad8740f0ee5)}, + /* 116 */ {9, 0.1458155105286054, CNST_LIMB(0x34c6d52160f40000), CNST_LIMB(0x3670a9663a8d3610)}, + /* 117 */ {9, 0.1455526803620167, CNST_LIMB(0x3903f855d8f4c755), CNST_LIMB(0x1f5c44188057be3c)}, + /* 118 */ {9, 0.1452930208392428, CNST_LIMB(0x3d8de5c8ec59b600), CNST_LIMB(0xa2bea956c4e4977)}, + /* 119 */ {9, 0.1450364656948130, CNST_LIMB(0x4269541d1ff01337), CNST_LIMB(0xed68b23033c3637e)}, + /* 120 */ {9, 0.1447829506139581, CNST_LIMB(0x479b38e478000000), CNST_LIMB(0xc99cf624e50549c5)}, + /* 121 */ {9, 0.1445324131589439, CNST_LIMB(0x4d28cb56c33fa539), CNST_LIMB(0xa8adf7ae45e7577b)}, + /* 122 */ {9, 0.1442847926987864, CNST_LIMB(0x5317871fa13aba00), CNST_LIMB(0x8a5bc740b1c113e5)}, + /* 123 */ {9, 0.1440400303421672, CNST_LIMB(0x596d2f44de9fa71b), CNST_LIMB(0x6e6c7efb81cfbb9b)}, + /* 124 */ {9, 0.1437980688733775, CNST_LIMB(0x602fd125c47c0000), CNST_LIMB(0x54aba5c5cada5f10)}, + /* 125 */ {9, 0.1435588526911310, CNST_LIMB(0x6765c793fa10079d), CNST_LIMB(0x3ce9a36f23c0fc90)}, + /* 126 */ {9, 0.1433223277500932, CNST_LIMB(0x6f15be069b847e00), CNST_LIMB(0x26fb43de2c8cd2a8)}, + /* 127 */ {9, 0.1430884415049874, CNST_LIMB(0x7746b3e82a77047f), CNST_LIMB(0x12b94793db8486a1)}, + /* 128 */ {9, 0.1428571428571428, CNST_LIMB(0x7), CNST_LIMB(0x0)}, + /* 129 */ {9, 0.1426283821033600, CNST_LIMB(0x894953f7ea890481), CNST_LIMB(0xdd5deca404c0156d)}, + /* 130 */ {9, 0.1424021108869747, CNST_LIMB(0x932abffea4848200), CNST_LIMB(0xbd51373330291de0)}, + /* 131 */ {9, 0.1421782821510107, CNST_LIMB(0x9dacb687d3d6a163), CNST_LIMB(0x9fa4025d66f23085)}, + /* 132 */ {9, 0.1419568500933153, CNST_LIMB(0xa8d8102a44840000), CNST_LIMB(0x842530ee2db4949d)}, + /* 133 */ {9, 0.1417377701235801, CNST_LIMB(0xb4b60f9d140541e5), CNST_LIMB(0x6aa7f2766b03dc25)}, + /* 134 */ {9, 0.1415209988221527, CNST_LIMB(0xc15065d4856e4600), CNST_LIMB(0x53035ba7ebf32e8d)}, + /* 135 */ {9, 0.1413064939005528, CNST_LIMB(0xceb1363f396d23c7), CNST_LIMB(0x3d12091fc9fb4914)}, + /* 136 */ {9, 0.1410942141636095, CNST_LIMB(0xdce31b2488000000), CNST_LIMB(0x28b1cb81b1ef1849)}, + /* 137 */ {9, 0.1408841194731412, CNST_LIMB(0xebf12a24bca135c9), CNST_LIMB(0x15c35be67ae3e2c9)}, + /* 138 */ {9, 0.1406761707131039, CNST_LIMB(0xfbe6f8dbf88f4a00), CNST_LIMB(0x42a17bd09be1ff0)}, + /* 139 */ {8, 0.1404703297561400, CNST_LIMB(0x1ef156c084ce761), CNST_LIMB(0x8bf461f03cf0bbf)}, + /* 140 */ {8, 0.1402665594314587, CNST_LIMB(0x20c4e3b94a10000), CNST_LIMB(0xf3fbb43f68a32d05)}, + /* 141 */ {8, 0.1400648234939879, CNST_LIMB(0x22b0695a08ba421), CNST_LIMB(0xd84f44c48564dc19)}, + /* 142 */ {8, 0.1398650865947379, CNST_LIMB(0x24b4f35d7a4c100), CNST_LIMB(0xbe58ebcce7956abe)}, + /* 143 */ {8, 0.1396673142523192, CNST_LIMB(0x26d397284975781), CNST_LIMB(0xa5fac463c7c134b7)}, + /* 144 */ {8, 0.1394714728255649, CNST_LIMB(0x290d74100000000), CNST_LIMB(0x8f19241e28c7d757)}, + /* 145 */ {8, 0.1392775294872041, CNST_LIMB(0x2b63b3a37866081), CNST_LIMB(0x799a6d046c0ae1ae)}, + /* 146 */ {8, 0.1390854521985406, CNST_LIMB(0x2dd789f4d894100), CNST_LIMB(0x6566e37d746a9e40)}, + /* 147 */ {8, 0.1388952096850913, CNST_LIMB(0x306a35e51b58721), CNST_LIMB(0x526887dbfb5f788f)}, + /* 148 */ {8, 0.1387067714131417, CNST_LIMB(0x331d01712e10000), CNST_LIMB(0x408af3382b8efd3d)}, + /* 149 */ {8, 0.1385201075671774, CNST_LIMB(0x35f14200a827c61), CNST_LIMB(0x2fbb374806ec05f1)}, + /* 150 */ {8, 0.1383351890281539, CNST_LIMB(0x38e858b62216100), CNST_LIMB(0x1fe7c0f0afce87fe)}, + /* 151 */ {8, 0.1381519873525671, CNST_LIMB(0x3c03b2c13176a41), CNST_LIMB(0x11003d517540d32e)}, + /* 152 */ {8, 0.1379704747522905, CNST_LIMB(0x3f44c9b21000000), CNST_LIMB(0x2f5810f98eff0dc)}, + /* 153 */ {8, 0.1377906240751463, CNST_LIMB(0x42ad23cef3113c1), CNST_LIMB(0xeb72e35e7840d910)}, + /* 154 */ {8, 0.1376124087861776, CNST_LIMB(0x463e546b19a2100), CNST_LIMB(0xd27de19593dc3614)}, + /* 155 */ {8, 0.1374358029495937, CNST_LIMB(0x49f9fc3f96684e1), CNST_LIMB(0xbaf391fd3e5e6fc2)}, + /* 156 */ {8, 0.1372607812113589, CNST_LIMB(0x4de1c9c5dc10000), CNST_LIMB(0xa4bd38c55228c81d)}, + /* 157 */ {8, 0.1370873187823978, CNST_LIMB(0x51f77994116d2a1), CNST_LIMB(0x8fc5a8de8e1de782)}, + /* 158 */ {8, 0.1369153914223921, CNST_LIMB(0x563cd6bb3398100), CNST_LIMB(0x7bf9265bea9d3a3b)}, + /* 159 */ {8, 0.1367449754241439, CNST_LIMB(0x5ab3bb270beeb01), CNST_LIMB(0x69454b325983dccd)}, + /* 160 */ {8, 0.1365760475984821, CNST_LIMB(0x5f5e10000000000), CNST_LIMB(0x5798ee2308c39df9)}, + /* 161 */ {8, 0.1364085852596902, CNST_LIMB(0x643dce0ec16f501), CNST_LIMB(0x46e40ba0fa66a753)}, + /* 162 */ {8, 0.1362425662114337, CNST_LIMB(0x6954fe21e3e8100), CNST_LIMB(0x3717b0870b0db3a7)}, + /* 163 */ {8, 0.1360779687331669, CNST_LIMB(0x6ea5b9755f440a1), CNST_LIMB(0x2825e6775d11cdeb)}, + /* 164 */ {8, 0.1359147715670014, CNST_LIMB(0x74322a1c0410000), CNST_LIMB(0x1a01a1c09d1b4dac)}, + /* 165 */ {8, 0.1357529539050150, CNST_LIMB(0x79fc8b6ae8a46e1), CNST_LIMB(0xc9eb0a8bebc8f3e)}, + /* 166 */ {8, 0.1355924953769863, CNST_LIMB(0x80072a66d512100), CNST_LIMB(0xffe357ff59e6a004)}, + /* 167 */ {8, 0.1354333760385373, CNST_LIMB(0x86546633b42b9c1), CNST_LIMB(0xe7dfd1be05fa61a8)}, + /* 168 */ {8, 0.1352755763596663, CNST_LIMB(0x8ce6b0861000000), CNST_LIMB(0xd11ed6fc78f760e5)}, + /* 169 */ {8, 0.1351190772136599, CNST_LIMB(0x93c08e16a022441), CNST_LIMB(0xbb8db609dd29ebfe)}, + /* 170 */ {8, 0.1349638598663645, CNST_LIMB(0x9ae49717f026100), CNST_LIMB(0xa71aec8d1813d532)}, + /* 171 */ {8, 0.1348099059658079, CNST_LIMB(0xa25577ae24c1a61), CNST_LIMB(0x93b612a9f20fbc02)}, + /* 172 */ {8, 0.1346571975321549, CNST_LIMB(0xaa15f068e610000), CNST_LIMB(0x814fc7b19a67d317)}, + /* 173 */ {8, 0.1345057169479844, CNST_LIMB(0xb228d6bf7577921), CNST_LIMB(0x6fd9a03f2e0a4b7c)}, + /* 174 */ {8, 0.1343554469488779, CNST_LIMB(0xba91158ef5c4100), CNST_LIMB(0x5f4615a38d0d316e)}, + /* 175 */ {8, 0.1342063706143054, CNST_LIMB(0xc351ad9aec0b681), CNST_LIMB(0x4f8876863479a286)}, + /* 176 */ {8, 0.1340584713587980, CNST_LIMB(0xcc6db6100000000), CNST_LIMB(0x4094d8a3041b60eb)}, + /* 177 */ {8, 0.1339117329233981, CNST_LIMB(0xd5e85d09025c181), CNST_LIMB(0x32600b8ed883a09b)}, + /* 178 */ {8, 0.1337661393673756, CNST_LIMB(0xdfc4e816401c100), CNST_LIMB(0x24df8c6eb4b6d1f1)}, + /* 179 */ {8, 0.1336216750601996, CNST_LIMB(0xea06b4c72947221), CNST_LIMB(0x18097a8ee151acef)}, + /* 180 */ {8, 0.1334783246737591, CNST_LIMB(0xf4b139365210000), CNST_LIMB(0xbd48cc8ec1cd8e3)}, + /* 181 */ {8, 0.1333360731748201, CNST_LIMB(0xffc80497d520961), CNST_LIMB(0x3807a8d67485fb)}, + /* 182 */ {8, 0.1331949058177136, CNST_LIMB(0x10b4ebfca1dee100), CNST_LIMB(0xea5768860b62e8d8)}, + /* 183 */ {8, 0.1330548081372441, CNST_LIMB(0x117492de921fc141), CNST_LIMB(0xd54faf5b635c5005)}, + /* 184 */ {8, 0.1329157659418126, CNST_LIMB(0x123bb2ce41000000), CNST_LIMB(0xc14a56233a377926)}, + /* 185 */ {8, 0.1327777653067443, CNST_LIMB(0x130a8b6157bdecc1), CNST_LIMB(0xae39a88db7cd329f)}, + /* 186 */ {8, 0.1326407925678156, CNST_LIMB(0x13e15dede0e8a100), CNST_LIMB(0x9c10bde69efa7ab6)}, + /* 187 */ {8, 0.1325048343149731, CNST_LIMB(0x14c06d941c0ca7e1), CNST_LIMB(0x8ac36c42a2836497)}, + /* 188 */ {8, 0.1323698773862368, CNST_LIMB(0x15a7ff487a810000), CNST_LIMB(0x7a463c8b84f5ef67)}, + /* 189 */ {8, 0.1322359088617821, CNST_LIMB(0x169859ddc5c697a1), CNST_LIMB(0x6a8e5f5ad090fd4b)}, + /* 190 */ {8, 0.1321029160581950, CNST_LIMB(0x1791c60f6fed0100), CNST_LIMB(0x5b91a2943596fc56)}, + /* 191 */ {8, 0.1319708865228925, CNST_LIMB(0x18948e8c0e6fba01), CNST_LIMB(0x4d4667b1c468e8f0)}, + /* 192 */ {8, 0.1318398080287045, CNST_LIMB(0x19a1000000000000), CNST_LIMB(0x3fa39ab547994daf)}, + /* 193 */ {8, 0.1317096685686114, CNST_LIMB(0x1ab769203dafc601), CNST_LIMB(0x32a0a9b2faee1e2a)}, + /* 194 */ {8, 0.1315804563506306, CNST_LIMB(0x1bd81ab557f30100), CNST_LIMB(0x26357ceac0e96962)}, + /* 195 */ {8, 0.1314521597928493, CNST_LIMB(0x1d0367a69fed1ba1), CNST_LIMB(0x1a5a6f65caa5859e)}, + /* 196 */ {8, 0.1313247675185968, CNST_LIMB(0x1e39a5057d810000), CNST_LIMB(0xf08480f672b4e86)}, + /* 197 */ {8, 0.1311982683517524, CNST_LIMB(0x1f7b2a18f29ac3e1), CNST_LIMB(0x4383340615612ca)}, + /* 198 */ {8, 0.1310726513121843, CNST_LIMB(0x20c850694c2aa100), CNST_LIMB(0xf3c77969ee4be5a2)}, + /* 199 */ {8, 0.1309479056113158, CNST_LIMB(0x222173cc014980c1), CNST_LIMB(0xe00993cc187c5ec9)}, + /* 200 */ {8, 0.1308240206478128, CNST_LIMB(0x2386f26fc1000000), CNST_LIMB(0xcd2b297d889bc2b6)}, + /* 201 */ {8, 0.1307009860033912, CNST_LIMB(0x24f92ce8af296d41), CNST_LIMB(0xbb214d5064862b22)}, + /* 202 */ {8, 0.1305787914387386, CNST_LIMB(0x2678863cd0ece100), CNST_LIMB(0xa9e1a7ca7ea10e20)}, + /* 203 */ {8, 0.1304574268895465, CNST_LIMB(0x280563f0a9472d61), CNST_LIMB(0x99626e72b39ea0cf)}, + /* 204 */ {8, 0.1303368824626505, CNST_LIMB(0x29a02e1406210000), CNST_LIMB(0x899a5ba9c13fafd9)}, + /* 205 */ {8, 0.1302171484322746, CNST_LIMB(0x2b494f4efe6d2e21), CNST_LIMB(0x7a80a705391e96ff)}, + /* 206 */ {8, 0.1300982152363760, CNST_LIMB(0x2d0134ef21cbc100), CNST_LIMB(0x6c0cfe23de23042a)}, + /* 207 */ {8, 0.1299800734730872, CNST_LIMB(0x2ec84ef4da2ef581), CNST_LIMB(0x5e377df359c944dd)}, + /* 208 */ {8, 0.1298627138972530, CNST_LIMB(0x309f102100000000), CNST_LIMB(0x50f8ac5fc8f53985)}, + /* 209 */ {8, 0.1297461274170591, CNST_LIMB(0x3285ee02a1420281), CNST_LIMB(0x44497266278e35b7)}, + /* 210 */ {8, 0.1296303050907487, CNST_LIMB(0x347d6104fc324100), CNST_LIMB(0x382316831f7ee175)}, + /* 211 */ {8, 0.1295152381234257, CNST_LIMB(0x3685e47dade53d21), CNST_LIMB(0x2c7f377833b8946e)}, + /* 212 */ {8, 0.1294009178639407, CNST_LIMB(0x389ff6bb15610000), CNST_LIMB(0x2157c761ab4163ef)}, + /* 213 */ {8, 0.1292873358018581, CNST_LIMB(0x3acc1912ebb57661), CNST_LIMB(0x16a7071803cc49a9)}, + /* 214 */ {8, 0.1291744835645007, CNST_LIMB(0x3d0acff111946100), CNST_LIMB(0xc6781d80f8224fc)}, + /* 215 */ {8, 0.1290623529140715, CNST_LIMB(0x3f5ca2e692eaf841), CNST_LIMB(0x294092d370a900b)}, + /* 216 */ {8, 0.1289509357448472, CNST_LIMB(0x41c21cb8e1000000), CNST_LIMB(0xf24f62335024a295)}, + /* 217 */ {8, 0.1288402240804449, CNST_LIMB(0x443bcb714399a5c1), CNST_LIMB(0xe03b98f103fad6d2)}, + /* 218 */ {8, 0.1287302100711567, CNST_LIMB(0x46ca406c81af2100), CNST_LIMB(0xcee3d32cad2a9049)}, + /* 219 */ {8, 0.1286208859913518, CNST_LIMB(0x496e106ac22aaae1), CNST_LIMB(0xbe3f9df9277fdada)}, + /* 220 */ {8, 0.1285122442369443, CNST_LIMB(0x4c27d39fa5410000), CNST_LIMB(0xae46f0d94c05e933)}, + /* 221 */ {8, 0.1284042773229231, CNST_LIMB(0x4ef825c296e43ca1), CNST_LIMB(0x9ef2280fb437a33d)}, + /* 222 */ {8, 0.1282969778809442, CNST_LIMB(0x51dfa61f5ad88100), CNST_LIMB(0x9039ff426d3f284b)}, + /* 223 */ {8, 0.1281903386569819, CNST_LIMB(0x54def7a6d2f16901), CNST_LIMB(0x82178c6d6b51f8f4)}, + /* 224 */ {8, 0.1280843525090381, CNST_LIMB(0x57f6c10000000000), CNST_LIMB(0x74843b1ee4c1e053)}, + /* 225 */ {8, 0.1279790124049077, CNST_LIMB(0x5b27ac993df97701), CNST_LIMB(0x6779c7f90dc42f48)}, + /* 226 */ {8, 0.1278743114199984, CNST_LIMB(0x5e7268b9bbdf8100), CNST_LIMB(0x5af23c74f9ad9fe9)}, + /* 227 */ {8, 0.1277702427352035, CNST_LIMB(0x61d7a7932ff3d6a1), CNST_LIMB(0x4ee7eae2acdc617e)}, + /* 228 */ {8, 0.1276667996348261, CNST_LIMB(0x65581f53c8c10000), CNST_LIMB(0x43556aa2ac262a0b)}, + /* 229 */ {8, 0.1275639755045533, CNST_LIMB(0x68f48a385b8320e1), CNST_LIMB(0x3835949593b8ddd1)}, + /* 230 */ {8, 0.1274617638294791, CNST_LIMB(0x6cada69ed07c2100), CNST_LIMB(0x2d837fbe78458762)}, + /* 231 */ {8, 0.1273601581921741, CNST_LIMB(0x70843718cdbf27c1), CNST_LIMB(0x233a7e150a54a555)}, + /* 232 */ {8, 0.1272591522708010, CNST_LIMB(0x7479027ea1000000), CNST_LIMB(0x19561984a50ff8fe)}, + /* 233 */ {8, 0.1271587398372755, CNST_LIMB(0x788cd40268f39641), CNST_LIMB(0xfd211159fe3490f)}, + /* 234 */ {8, 0.1270589147554692, CNST_LIMB(0x7cc07b437ecf6100), CNST_LIMB(0x6aa563e655033e3)}, + /* 235 */ {8, 0.1269596709794558, CNST_LIMB(0x8114cc6220762061), CNST_LIMB(0xfbb614b3f2d3b14c)}, + /* 236 */ {8, 0.1268610025517973, CNST_LIMB(0x858aa0135be10000), CNST_LIMB(0xeac0f8837fb05773)}, + /* 237 */ {8, 0.1267629036018709, CNST_LIMB(0x8a22d3b53c54c321), CNST_LIMB(0xda6e4c10e8615ca5)}, + /* 238 */ {8, 0.1266653683442337, CNST_LIMB(0x8ede496339f34100), CNST_LIMB(0xcab755a8d01fa67f)}, + /* 239 */ {8, 0.1265683910770258, CNST_LIMB(0x93bde80aec3a1481), CNST_LIMB(0xbb95a9ae71aa3e0c)}, + /* 240 */ {8, 0.1264719661804097, CNST_LIMB(0x98c29b8100000000), CNST_LIMB(0xad0326c296b4f529)}, + /* 241 */ {8, 0.1263760881150453, CNST_LIMB(0x9ded549671832381), CNST_LIMB(0x9ef9f21eed31b7c1)}, + /* 242 */ {8, 0.1262807514205999, CNST_LIMB(0xa33f092e0b1ac100), CNST_LIMB(0x91747422be14b0b2)}, + /* 243 */ {8, 0.1261859507142915, CNST_LIMB(0xa8b8b452291fe821), CNST_LIMB(0x846d550e37b5063d)}, + /* 244 */ {8, 0.1260916806894653, CNST_LIMB(0xae5b564ac3a10000), CNST_LIMB(0x77df79e9a96c06f6)}, + /* 245 */ {8, 0.1259979361142023, CNST_LIMB(0xb427f4b3be74c361), CNST_LIMB(0x6bc6019636c7d0c2)}, + /* 246 */ {8, 0.1259047118299582, CNST_LIMB(0xba1f9a938041e100), CNST_LIMB(0x601c4205aebd9e47)}, + /* 247 */ {8, 0.1258120027502338, CNST_LIMB(0xc0435871d1110f41), CNST_LIMB(0x54ddc59756f05016)}, + /* 248 */ {8, 0.1257198038592741, CNST_LIMB(0xc694446f01000000), CNST_LIMB(0x4a0648979c838c18)}, + /* 249 */ {8, 0.1256281102107963, CNST_LIMB(0xcd137a5b57ac3ec1), CNST_LIMB(0x3f91b6e0bb3a053d)}, + /* 250 */ {8, 0.1255369169267456, CNST_LIMB(0xd3c21bcecceda100), CNST_LIMB(0x357c299a88ea76a5)}, + /* 251 */ {8, 0.1254462191960791, CNST_LIMB(0xdaa150410b788de1), CNST_LIMB(0x2bc1e517aecc56e3)}, + /* 252 */ {8, 0.1253560122735751, CNST_LIMB(0xe1b24521be010000), CNST_LIMB(0x225f56ceb3da9f5d)}, + /* 253 */ {8, 0.1252662914786691, CNST_LIMB(0xe8f62df12777c1a1), CNST_LIMB(0x1951136d53ad63ac)}, + /* 254 */ {8, 0.1251770521943144, CNST_LIMB(0xf06e445906fc0100), CNST_LIMB(0x1093d504b3cd7d93)}, + /* 255 */ {8, 0.1250882898658681, CNST_LIMB(0xf81bc845c81bf801), CNST_LIMB(0x824794d1ec1814f)}, +}; +#endif diff --git a/rts/gmp/mpn/ns32k/add_n.s b/rts/gmp/mpn/ns32k/add_n.s new file mode 100644 index 0000000000..bd063d07d9 --- /dev/null +++ b/rts/gmp/mpn/ns32k/add_n.s @@ -0,0 +1,46 @@ +# ns32000 __gmpn_add_n -- Add two limb vectors of the same length > 0 and store +# sum in a third limb vector. + +# Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + + .align 1 +.globl ___gmpn_add_n +___gmpn_add_n: + save [r3,r4,r5] + negd 28(sp),r3 + movd r3,r0 + lshd 2,r0 + movd 24(sp),r4 + subd r0,r4 # r4 -> to end of S2 + movd 20(sp),r5 + subd r0,r5 # r5 -> to end of S1 + movd 16(sp),r2 + subd r0,r2 # r2 -> to end of RES + subd r0,r0 # cy = 0 + +Loop: movd r5[r3:d],r0 + addcd r4[r3:d],r0 + movd r0,r2[r3:d] + acbd 1,r3,Loop + + scsd r0 # r0 = cy. + restore [r5,r4,r3] + ret 0 diff --git a/rts/gmp/mpn/ns32k/addmul_1.s b/rts/gmp/mpn/ns32k/addmul_1.s new file mode 100644 index 0000000000..df0dcdd4af --- /dev/null +++ b/rts/gmp/mpn/ns32k/addmul_1.s @@ -0,0 +1,48 @@ +# ns32000 __gmpn_addmul_1 -- Multiply a limb vector with a limb and add +# the result to a second limb vector. + +# Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + + .align 1 +.globl ___gmpn_addmul_1 +___gmpn_addmul_1: + save [r3,r4,r5,r6,r7] + negd 24(sp),r4 + movd r4,r0 + lshd 2,r0 + movd 20(sp),r5 + subd r0,r5 # r5 -> to end of S1 + movd 16(sp),r6 + subd r0,r6 # r6 -> to end of RES + subd r0,r0 # r0 = 0, cy = 0 + movd 28(sp),r7 # r7 = s2_limb + +Loop: movd r5[r4:d],r2 + meid r7,r2 # r2 = low_prod, r3 = high_prod + addcd r0,r2 # r2 = low_prod + cy_limb + movd r3,r0 # r0 = new cy_limb + addcd 0,r0 + addd r2,r6[r4:d] + acbd 1,r4,Loop + + addcd 0,r0 + restore [r7,r6,r5,r4,r3] + ret 0 diff --git a/rts/gmp/mpn/ns32k/mul_1.s b/rts/gmp/mpn/ns32k/mul_1.s new file mode 100644 index 0000000000..0a77efba29 --- /dev/null +++ b/rts/gmp/mpn/ns32k/mul_1.s @@ -0,0 +1,47 @@ +# ns32000 __gmpn_mul_1 -- Multiply a limb vector with a limb and store +# the result in a second limb vector. + +# Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + + .align 1 +.globl ___gmpn_mul_1 +___gmpn_mul_1: + save [r3,r4,r5,r6,r7] + negd 24(sp),r4 + movd r4,r0 + lshd 2,r0 + movd 20(sp),r5 + subd r0,r5 # r5 -> to end of S1 + movd 16(sp),r6 + subd r0,r6 # r6 -> to end of RES + subd r0,r0 # r0 = 0, cy = 0 + movd 28(sp),r7 # r7 = s2_limb + +Loop: movd r5[r4:d],r2 + meid r7,r2 # r2 = low_prod, r3 = high_prod + addcd r0,r2 # r2 = low_prod + cy_limb + movd r3,r0 # r0 = new cy_limb + movd r2,r6[r4:d] + acbd 1,r4,Loop + + addcd 0,r0 + restore [r7,r6,r5,r4,r3] + ret 0 diff --git a/rts/gmp/mpn/ns32k/sub_n.s b/rts/gmp/mpn/ns32k/sub_n.s new file mode 100644 index 0000000000..cd89f4fd3f --- /dev/null +++ b/rts/gmp/mpn/ns32k/sub_n.s @@ -0,0 +1,46 @@ +# ns32000 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and +# store difference in a third limb vector. + +# Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + + .align 1 +.globl ___gmpn_sub_n +___gmpn_sub_n: + save [r3,r4,r5] + negd 28(sp),r3 + movd r3,r0 + lshd 2,r0 + movd 24(sp),r4 + subd r0,r4 # r4 -> to end of S2 + movd 20(sp),r5 + subd r0,r5 # r5 -> to end of S1 + movd 16(sp),r2 + subd r0,r2 # r2 -> to end of RES + subd r0,r0 # cy = 0 + +Loop: movd r5[r3:d],r0 + subcd r4[r3:d],r0 + movd r0,r2[r3:d] + acbd 1,r3,Loop + + scsd r0 # r0 = cy. + restore [r5,r4,r3] + ret 0 diff --git a/rts/gmp/mpn/ns32k/submul_1.s b/rts/gmp/mpn/ns32k/submul_1.s new file mode 100644 index 0000000000..f811aedcf1 --- /dev/null +++ b/rts/gmp/mpn/ns32k/submul_1.s @@ -0,0 +1,48 @@ +# ns32000 __gmpn_submul_1 -- Multiply a limb vector with a limb and subtract +# the result from a second limb vector. + +# Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + + .align 1 +.globl ___gmpn_submul_1 +___gmpn_submul_1: + save [r3,r4,r5,r6,r7] + negd 24(sp),r4 + movd r4,r0 + lshd 2,r0 + movd 20(sp),r5 + subd r0,r5 # r5 -> to end of S1 + movd 16(sp),r6 + subd r0,r6 # r6 -> to end of RES + subd r0,r0 # r0 = 0, cy = 0 + movd 28(sp),r7 # r7 = s2_limb + +Loop: movd r5[r4:d],r2 + meid r7,r2 # r2 = low_prod, r3 = high_prod + addcd r0,r2 # r2 = low_prod + cy_limb + movd r3,r0 # r0 = new cy_limb + addcd 0,r0 + subd r2,r6[r4:d] + acbd 1,r4,Loop + + addcd 0,r0 + restore [r7,r6,r5,r4,r3] + ret 0 diff --git a/rts/gmp/mpn/pa64/README b/rts/gmp/mpn/pa64/README new file mode 100644 index 0000000000..8d2976dabc --- /dev/null +++ b/rts/gmp/mpn/pa64/README @@ -0,0 +1,38 @@ +This directory contains mpn functions for 64-bit PA-RISC 2.0. + +RELEVANT OPTIMIZATION ISSUES + +The PA8000 has a multi-issue pipeline with large buffers for instructions +awaiting pending results. Therefore, no latency scheduling is necessary +(and might actually be harmful). + +Two 64-bit loads can be completed per cycle. One 64-bit store can be +completed per cycle. A store cannot complete in the same cycle as a load. + +STATUS + +* mpn_lshift, mpn_rshift, mpn_add_n, mpn_sub_n are all well-tuned and run at + the peak cache bandwidth; 1.5 cycles/limb for shifting and 2.0 cycles/limb + for add/subtract. + +* The multiplication functions run at 11 cycles/limb. The cache bandwidth + allows 7.5 cycles/limb. Perhaps it would be possible, using unrolling or + better scheduling, to get closer to the cache bandwidth limit. + +* xaddmul_1.S contains a quicker method for forming the 128 bit product. It + uses some fewer operations, and keep the carry flag live across the loop + boundary. But it seems hard to make it run more than 1/4 cycle faster + than the old code. Perhaps we really ought to unroll this loop be 2x? + 2x should suffice since register latency schedling is never needed, + but the unrolling would hide the store-load latency. Here is a sketch: + + 1. A multiply and store 64-bit products + 2. B sum 64-bit products 128-bit product + 3. B load 64-bit products to integer registers + 4. B multiply and store 64-bit products + 5. A sum 64-bit products 128-bit product + 6. A load 64-bit products to integer registers + 7. goto 1 + + In practice, adjacent groups (1 and 2, 2 and 3, etc) will be interleaved + for better instruction mix. diff --git a/rts/gmp/mpn/pa64/add_n.s b/rts/gmp/mpn/pa64/add_n.s new file mode 100644 index 0000000000..22ff19c184 --- /dev/null +++ b/rts/gmp/mpn/pa64/add_n.s @@ -0,0 +1,90 @@ +; HP-PA 2.0 __gmpn_add_n -- Add two limb vectors of the same length > 0 and +; store sum in a third limb vector. + +; Copyright (C) 1997, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr gr26 +; s1_ptr gr25 +; s2_ptr gr24 +; size gr23 + +; This runs at 2 cycles/limb on PA8000. + + .level 2.0n + .code + .export __gmpn_add_n,entry +__gmpn_add_n + .proc + .callinfo frame=0,args_saved + .entry + + sub %r0,%r23,%r22 + depw,z %r22,30,3,%r28 ; r28 = 2 * (-n & 7) + depw,z %r22,28,3,%r22 ; r22 = 8 * (-n & 7) + sub %r25,%r22,%r25 ; offset s1_ptr + sub %r24,%r22,%r24 ; offset s2_ptr + sub %r26,%r22,%r26 ; offset res_ptr + blr %r28,%r0 ; branch into loop + add %r0,%r0,%r0 ; reset carry + +L$loop ldd 0(%r25),%r20 + ldd 0(%r24),%r31 + add,dc %r20,%r31,%r20 + std %r20,0(%r26) +L$7 ldd 8(%r25),%r21 + ldd 8(%r24),%r19 + add,dc %r21,%r19,%r21 + std %r21,8(%r26) +L$6 ldd 16(%r25),%r20 + ldd 16(%r24),%r31 + add,dc %r20,%r31,%r20 + std %r20,16(%r26) +L$5 ldd 24(%r25),%r21 + ldd 24(%r24),%r19 + add,dc %r21,%r19,%r21 + std %r21,24(%r26) +L$4 ldd 32(%r25),%r20 + ldd 32(%r24),%r31 + add,dc %r20,%r31,%r20 + std %r20,32(%r26) +L$3 ldd 40(%r25),%r21 + ldd 40(%r24),%r19 + add,dc %r21,%r19,%r21 + std %r21,40(%r26) +L$2 ldd 48(%r25),%r20 + ldd 48(%r24),%r31 + add,dc %r20,%r31,%r20 + std %r20,48(%r26) +L$1 ldd 56(%r25),%r21 + ldo 64(%r25),%r25 + ldd 56(%r24),%r19 + add,dc %r21,%r19,%r21 + std %r21,56(%r26) + ldo 64(%r24),%r24 + addib,> -8,%r23,L$loop + ldo 64(%r26),%r26 + + add,dc %r0,%r0,%r29 + bve (%r2) + .exit + ldi 0,%r28 + .procend diff --git a/rts/gmp/mpn/pa64/addmul_1.S b/rts/gmp/mpn/pa64/addmul_1.S new file mode 100644 index 0000000000..b1885b432c --- /dev/null +++ b/rts/gmp/mpn/pa64/addmul_1.S @@ -0,0 +1,167 @@ +; HP-PA 2.0 64-bit __gmpn_addmul_1 -- Multiply a limb vector with a limb and +; add the result to a second limb vector. + +; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + +; INPUT PARAMETERS +#define rptr %r26 +#define sptr %r25 +#define size %r24 +#define s2limb -56(%r30) + +; This runs at 11 cycles/limb on a PA8000. It might be possible to make +; it faster, but the PA8000 pipeline is not publically documented and it +; is very complex to reverse engineer + +#define t1 %r19 +#define rlimb %r20 +#define hi %r21 +#define lo %r22 +#define m0 %r28 +#define m1 %r3 +#define cylimb %r29 +#define t3 %r4 +#define t2 %r6 +#define t5 %r23 +#define t4 %r31 + .level 2.0n + .code + .export __gmpn_addmul_1,entry +__gmpn_addmul_1 + .proc + .callinfo frame=128,no_calls + .entry + fldd -56(%r30),%fr5 ; s2limb passed on stack + ldo 128(%r30),%r30 + add %r0,%r0,cylimb ; clear cy and cylimb + + std %r3,-96(%r30) + std %r4,-88(%r30) + std %r5,-80(%r30) + std %r6,-72(%r30) + depdi,z 1,31,1,%r5 + + fldd 0(sptr),%fr4 + ldo 8(sptr),sptr + + xmpyu %fr5R,%fr4R,%fr6 + fstd %fr6,-128(%r30) + xmpyu %fr5R,%fr4L,%fr7 + fstd %fr7,-120(%r30) + xmpyu %fr5L,%fr4R,%fr8 + fstd %fr8,-112(%r30) + xmpyu %fr5L,%fr4L,%fr9 + fstd %fr9,-104(%r30) + ldd -128(%r30),lo ; lo = low 64 bit of product + ldd -120(%r30),m0 ; m0 = mid0 64 bit of product + ldd -112(%r30),m1 ; m1 = mid1 64 bit of product + ldd -104(%r30),hi ; hi = high 64 bit of product + addib,= -1,%r24,L$end1 + nop + fldd 0(sptr),%fr4 + ldo 8(sptr),sptr + addib,= -1,%r24,L$end2 + nop +L$loop + xmpyu %fr5R,%fr4R,%fr6 + fstd %fr6,-128(%r30) + xmpyu %fr5R,%fr4L,%fr7 + fstd %fr7,-120(%r30) + xmpyu %fr5L,%fr4R,%fr8 + fstd %fr8,-112(%r30) + xmpyu %fr5L,%fr4L,%fr9 + fstd %fr9,-104(%r30) + ldd 0(rptr),rlimb + extrd,u lo,31,32,t1 ; t1 = hi32(lo) + extrd,u lo,63,32,t4 ; t4 = lo32(lo) + add,l m0,t1,t1 ; t1 += m0 + add,l,*nuv m1,t1,t1 ; t1 += m1 + add,l %r5,hi,hi ; propagate carry + extrd,u t1,31,32,t2 ; t2 = hi32(t1) + depd,z t1,31,32,t5 ; t5 = lo32(t1) + add,l t5,t4,t4 ; t4 += lo32(t1) + ldd -128(%r30),lo ; lo = low 64 bit of product + add cylimb,rlimb,rlimb + ldd -120(%r30),m0 ; m0 = mid0 64 bit of product + add,dc t2,hi,cylimb + ldd -112(%r30),m1 ; m1 = mid1 64 bit of product + add t4,rlimb,t3 + ldd -104(%r30),hi ; hi = high 64 bit of product + add,dc %r0,cylimb,cylimb + fldd 0(sptr),%fr4 + ldo 8(sptr),sptr + std t3,0(rptr) + addib,<> -1,%r24,L$loop + ldo 8(rptr),rptr +L$end2 + xmpyu %fr5R,%fr4R,%fr6 + fstd %fr6,-128(%r30) + xmpyu %fr5R,%fr4L,%fr7 + fstd %fr7,-120(%r30) + xmpyu %fr5L,%fr4R,%fr8 + fstd %fr8,-112(%r30) + xmpyu %fr5L,%fr4L,%fr9 + fstd %fr9,-104(%r30) + ldd 0(rptr),rlimb + extrd,u lo,31,32,t1 ; t1 = hi32(lo) + extrd,u lo,63,32,t4 ; t4 = lo32(lo) + add,l m0,t1,t1 ; t1 += m0 + add,l,*nuv m1,t1,t1 ; t1 += m0 + add,l %r5,hi,hi ; propagate carry + extrd,u t1,31,32,t2 ; t2 = hi32(t1) + depd,z t1,31,32,t5 ; t5 = lo32(t1) + add,l t5,t4,t4 ; t4 += lo32(t1) + ldd -128(%r30),lo ; lo = low 64 bit of product + add cylimb,rlimb,rlimb + ldd -120(%r30),m0 ; m0 = mid0 64 bit of product + add,dc t2,hi,cylimb + ldd -112(%r30),m1 ; m1 = mid1 64 bit of product + add t4,rlimb,t3 + ldd -104(%r30),hi ; hi = high 64 bit of product + add,dc %r0,cylimb,cylimb + std t3,0(rptr) + ldo 8(rptr),rptr +L$end1 + ldd 0(rptr),rlimb + extrd,u lo,31,32,t1 ; t1 = hi32(lo) + extrd,u lo,63,32,t4 ; t4 = lo32(lo) + add,l m0,t1,t1 ; t1 += m0 + add,l,*nuv m1,t1,t1 ; t1 += m0 + add,l %r5,hi,hi ; propagate carry + extrd,u t1,31,32,t2 ; t2 = hi32(t1) + depd,z t1,31,32,t5 ; t5 = lo32(t1) + add,l t5,t4,t4 ; t4 += lo32(t1) + add cylimb,rlimb,rlimb + add,dc t2,hi,cylimb + add t4,rlimb,t3 + add,dc %r0,cylimb,cylimb + std t3,0(rptr) + ldo 8(rptr),rptr + + ldd -96(%r30),%r3 + ldd -88(%r30),%r4 + ldd -80(%r30),%r5 + ldd -72(%r30),%r6 + + extrd,u cylimb,31,32,%r28 + bve (%r2) + .exit + ldo -128(%r30),%r30 + .procend diff --git a/rts/gmp/mpn/pa64/gmp-mparam.h b/rts/gmp/mpn/pa64/gmp-mparam.h new file mode 100644 index 0000000000..847735b987 --- /dev/null +++ b/rts/gmp/mpn/pa64/gmp-mparam.h @@ -0,0 +1,65 @@ +/* gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#define BITS_PER_MP_LIMB 64 +#define BYTES_PER_MP_LIMB 8 +#define BITS_PER_LONGINT 64 +#define BITS_PER_INT 32 +#define BITS_PER_SHORTINT 16 +#define BITS_PER_CHAR 8 + +/* These values were measured in a PA8000 using the system compiler version + A.10.32.30. Presumably the PA8200 and PA8500 have the same timing + characteristic, but GCC might give somewhat different results. */ +/* Generated by tuneup.c, 2000-07-25. */ + +#ifndef KARATSUBA_MUL_THRESHOLD +#define KARATSUBA_MUL_THRESHOLD 16 +#endif +#ifndef TOOM3_MUL_THRESHOLD +#define TOOM3_MUL_THRESHOLD 105 +#endif + +#ifndef KARATSUBA_SQR_THRESHOLD +#define KARATSUBA_SQR_THRESHOLD 40 +#endif +#ifndef TOOM3_SQR_THRESHOLD +#define TOOM3_SQR_THRESHOLD 116 +#endif + +#ifndef BZ_THRESHOLD +#define BZ_THRESHOLD 72 +#endif + +#ifndef FIB_THRESHOLD +#define FIB_THRESHOLD 94 +#endif + +#ifndef POWM_THRESHOLD +#define POWM_THRESHOLD 50 +#endif + +#ifndef GCD_ACCEL_THRESHOLD +#define GCD_ACCEL_THRESHOLD 46 +#endif +#ifndef GCDEXT_THRESHOLD +#define GCDEXT_THRESHOLD 1 +#endif diff --git a/rts/gmp/mpn/pa64/lshift.s b/rts/gmp/mpn/pa64/lshift.s new file mode 100644 index 0000000000..994bc1c4d6 --- /dev/null +++ b/rts/gmp/mpn/pa64/lshift.s @@ -0,0 +1,103 @@ +; HP-PA 2.0 __gmpn_lshift -- + +; Copyright (C) 1997, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr gr26 +; s1_ptr gr25 +; size gr24 +; cnt gr23 + +; This runs at 1.5 cycles/limb on PA8000. + + .level 2.0n + .code + .export __gmpn_lshift,entry +__gmpn_lshift + .proc + .callinfo frame=0,args_saved + .entry + + shladd %r24,3,%r25,%r25 + shladd %r24,3,%r26,%r26 + subi 64,%r23,%r23 + mtsar %r23 + ldd -8(%r25),%r21 + addib,= -1,%r24,L$end + shrpd %r0,%r21,%sar,%r29 ; compute carry out limb + depw,z %r24,31,3,%r28 ; r28 = (size & 7) + sub %r0,%r24,%r22 + depw,z %r22,28,3,%r22 ; r22 = 8 * (-size & 7) + add %r25,%r22,%r25 ; offset s1_ptr + blr %r28,%r0 ; branch into jump table + add %r26,%r22,%r26 ; offset res_ptr + b L$0 + nop + b L$1 + copy %r21,%r20 + b L$2 + nop + b L$3 + copy %r21,%r20 + b L$4 + nop + b L$5 + copy %r21,%r20 + b L$6 + nop + b L$7 + copy %r21,%r20 + +L$loop +L$0 ldd -16(%r25),%r20 + shrpd %r21,%r20,%sar,%r21 + std %r21,-8(%r26) +L$7 ldd -24(%r25),%r21 + shrpd %r20,%r21,%sar,%r20 + std %r20,-16(%r26) +L$6 ldd -32(%r25),%r20 + shrpd %r21,%r20,%sar,%r21 + std %r21,-24(%r26) +L$5 ldd -40(%r25),%r21 + shrpd %r20,%r21,%sar,%r20 + std %r20,-32(%r26) +L$4 ldd -48(%r25),%r20 + shrpd %r21,%r20,%sar,%r21 + std %r21,-40(%r26) +L$3 ldd -56(%r25),%r21 + shrpd %r20,%r21,%sar,%r20 + std %r20,-48(%r26) +L$2 ldd -64(%r25),%r20 + shrpd %r21,%r20,%sar,%r21 + std %r21,-56(%r26) +L$1 ldd -72(%r25),%r21 + ldo -64(%r25),%r25 + shrpd %r20,%r21,%sar,%r20 + std %r20,-64(%r26) + addib,> -8,%r24,L$loop + ldo -64(%r26),%r26 + +L$end shrpd %r21,%r0,%sar,%r21 + std %r21,-8(%r26) + bve (%r2) + .exit + extrd,u %r29,31,32,%r28 + .procend diff --git a/rts/gmp/mpn/pa64/mul_1.S b/rts/gmp/mpn/pa64/mul_1.S new file mode 100644 index 0000000000..ab310c1264 --- /dev/null +++ b/rts/gmp/mpn/pa64/mul_1.S @@ -0,0 +1,158 @@ +; HP-PA 2.0 64-bit __gmpn_mul_1 -- Multiply a limb vector with a limb and +; store the result in a second limb vector. + +; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + +; INPUT PARAMETERS +#define rptr %r26 +#define sptr %r25 +#define size %r24 +#define s2limb -56(%r30) + +; This runs at 11 cycles/limb on a PA8000. It might be possible to make +; it faster, but the PA8000 pipeline is not publically documented and it +; is very complex to reverse engineer + +#define t1 %r19 +#define rlimb %r20 +#define hi %r21 +#define lo %r22 +#define m0 %r28 +#define m1 %r3 +#define cylimb %r29 +#define t3 %r4 +#define t2 %r6 +#define t5 %r23 +#define t4 %r31 + .level 2.0n + .code + .export __gmpn_mul_1,entry +__gmpn_mul_1 + .proc + .callinfo frame=128,no_calls + .entry + fldd -56(%r30),%fr5 ; s2limb passed on stack + ldo 128(%r30),%r30 + add %r0,%r0,cylimb ; clear cy and cylimb + + std %r3,-96(%r30) + std %r4,-88(%r30) + std %r5,-80(%r30) + std %r6,-72(%r30) + depdi,z 1,31,1,%r5 + + fldd 0(sptr),%fr4 + ldo 8(sptr),sptr + + xmpyu %fr5R,%fr4R,%fr6 + fstd %fr6,-128(%r30) + xmpyu %fr5R,%fr4L,%fr7 + fstd %fr7,-120(%r30) + xmpyu %fr5L,%fr4R,%fr8 + fstd %fr8,-112(%r30) + xmpyu %fr5L,%fr4L,%fr9 + fstd %fr9,-104(%r30) + ldd -128(%r30),lo ; lo = low 64 bit of product + ldd -120(%r30),m0 ; m0 = mid0 64 bit of product + ldd -112(%r30),m1 ; m1 = mid1 64 bit of product + ldd -104(%r30),hi ; hi = high 64 bit of product + addib,= -1,%r24,L$end1 + nop + fldd 0(sptr),%fr4 + ldo 8(sptr),sptr + addib,= -1,%r24,L$end2 + nop +L$loop + xmpyu %fr5R,%fr4R,%fr6 + fstd %fr6,-128(%r30) + xmpyu %fr5R,%fr4L,%fr7 + fstd %fr7,-120(%r30) + xmpyu %fr5L,%fr4R,%fr8 + fstd %fr8,-112(%r30) + xmpyu %fr5L,%fr4L,%fr9 + fstd %fr9,-104(%r30) + extrd,u lo,31,32,t1 ; t1 = hi32(lo) + extrd,u lo,63,32,t4 ; t4 = lo32(lo) + add,l m0,t1,t1 ; t1 += m0 + add,l,*nuv m1,t1,t1 ; t1 += m1 + add,l %r5,hi,hi ; propagate carry + extrd,u t1,31,32,t2 ; t2 = hi32(t1) + depd,z t1,31,32,t5 ; t5 = lo32(t1) + add,l t5,t4,t4 ; t4 += lo32(t1) + ldd -128(%r30),lo ; lo = low 64 bit of product + add cylimb,t4,t3 + ldd -120(%r30),m0 ; m0 = mid0 64 bit of product + add,dc t2,hi,cylimb + ldd -112(%r30),m1 ; m1 = mid1 64 bit of product + ldd -104(%r30),hi ; hi = high 64 bit of product + fldd 0(sptr),%fr4 + ldo 8(sptr),sptr + std t3,0(rptr) + addib,<> -1,%r24,L$loop + ldo 8(rptr),rptr +L$end2 + xmpyu %fr5R,%fr4R,%fr6 + fstd %fr6,-128(%r30) + xmpyu %fr5R,%fr4L,%fr7 + fstd %fr7,-120(%r30) + xmpyu %fr5L,%fr4R,%fr8 + fstd %fr8,-112(%r30) + xmpyu %fr5L,%fr4L,%fr9 + fstd %fr9,-104(%r30) + extrd,u lo,31,32,t1 ; t1 = hi32(lo) + extrd,u lo,63,32,t4 ; t4 = lo32(lo) + add,l m0,t1,t1 ; t1 += m0 + add,l,*nuv m1,t1,t1 ; t1 += m0 + add,l %r5,hi,hi ; propagate carry + extrd,u t1,31,32,t2 ; t2 = hi32(t1) + depd,z t1,31,32,t5 ; t5 = lo32(t1) + add,l t5,t4,t4 ; t4 += lo32(t1) + ldd -128(%r30),lo ; lo = low 64 bit of product + add cylimb,t4,t3 + ldd -120(%r30),m0 ; m0 = mid0 64 bit of product + add,dc t2,hi,cylimb + ldd -112(%r30),m1 ; m1 = mid1 64 bit of product + ldd -104(%r30),hi ; hi = high 64 bit of product + std t3,0(rptr) + ldo 8(rptr),rptr +L$end1 + extrd,u lo,31,32,t1 ; t1 = hi32(lo) + extrd,u lo,63,32,t4 ; t2 = lo32(lo) + add,l m0,t1,t1 ; t1 += m0 + add,l,*nuv m1,t1,t1 ; t1 += m0 + add,l %r5,hi,hi ; propagate carry + extrd,u t1,31,32,t2 ; t2 = hi32(t1) + depd,z t1,31,32,t5 ; t5 = lo32(t1) + add,l t5,t4,t4 ; t4 += lo32(t1) + add cylimb,t4,t3 + add,dc t2,hi,cylimb + std t3,0(rptr) + ldo 8(rptr),rptr + + ldd -96(%r30),%r3 + ldd -88(%r30),%r4 + ldd -80(%r30),%r5 + ldd -72(%r30),%r6 + + extrd,u cylimb,31,32,%r28 + bve (%r2) + .exit + ldo -128(%r30),%r30 + .procend diff --git a/rts/gmp/mpn/pa64/rshift.s b/rts/gmp/mpn/pa64/rshift.s new file mode 100644 index 0000000000..f0730e2a91 --- /dev/null +++ b/rts/gmp/mpn/pa64/rshift.s @@ -0,0 +1,100 @@ +; HP-PA 2.0 __gmpn_rshift -- + +; Copyright (C) 1997, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr gr26 +; s1_ptr gr25 +; size gr24 +; cnt gr23 + +; This runs at 1.5 cycles/limb on PA8000. + + .level 2.0n + .code + .export __gmpn_rshift,entry +__gmpn_rshift + .proc + .callinfo frame=0,args_saved + .entry + + mtsar %r23 + ldd 0(%r25),%r21 + addib,= -1,%r24,L$end + shrpd %r21,%r0,%sar,%r29 ; compute carry out limb + depw,z %r24,31,3,%r28 ; r28 = (size & 7) + sub %r0,%r24,%r22 + depw,z %r22,28,3,%r22 ; r22 = 8 * (-size & 7) + sub %r25,%r22,%r25 ; offset s1_ptr + blr %r28,%r0 ; branch into jump table + sub %r26,%r22,%r26 ; offset res_ptr + b L$0 + nop + b L$1 + copy %r21,%r20 + b L$2 + nop + b L$3 + copy %r21,%r20 + b L$4 + nop + b L$5 + copy %r21,%r20 + b L$6 + nop + b L$7 + copy %r21,%r20 + +L$loop +L$0 ldd 8(%r25),%r20 + shrpd %r20,%r21,%sar,%r21 + std %r21,0(%r26) +L$7 ldd 16(%r25),%r21 + shrpd %r21,%r20,%sar,%r20 + std %r20,8(%r26) +L$6 ldd 24(%r25),%r20 + shrpd %r20,%r21,%sar,%r21 + std %r21,16(%r26) +L$5 ldd 32(%r25),%r21 + shrpd %r21,%r20,%sar,%r20 + std %r20,24(%r26) +L$4 ldd 40(%r25),%r20 + shrpd %r20,%r21,%sar,%r21 + std %r21,32(%r26) +L$3 ldd 48(%r25),%r21 + shrpd %r21,%r20,%sar,%r20 + std %r20,40(%r26) +L$2 ldd 56(%r25),%r20 + shrpd %r20,%r21,%sar,%r21 + std %r21,48(%r26) +L$1 ldd 64(%r25),%r21 + ldo 64(%r25),%r25 + shrpd %r21,%r20,%sar,%r20 + std %r20,56(%r26) + addib,> -8,%r24,L$loop + ldo 64(%r26),%r26 + +L$end shrpd %r0,%r21,%sar,%r21 + std %r21,0(%r26) + bve (%r2) + .exit + extrd,u %r29,31,32,%r28 + .procend diff --git a/rts/gmp/mpn/pa64/sub_n.s b/rts/gmp/mpn/pa64/sub_n.s new file mode 100644 index 0000000000..dda1f54b34 --- /dev/null +++ b/rts/gmp/mpn/pa64/sub_n.s @@ -0,0 +1,90 @@ +; HP-PA 2.0 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 +; and store difference in a third limb vector. + +; Copyright (C) 1997, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr gr26 +; s1_ptr gr25 +; s2_ptr gr24 +; size gr23 + +; This runs at 2 cycles/limb on PA8000. + + .level 2.0n + .code + .export __gmpn_sub_n,entry +__gmpn_sub_n + .proc + .callinfo frame=0,args_saved + .entry + + sub %r0,%r23,%r22 + depw,z %r22,30,3,%r28 ; r28 = 2 * (-n & 7) + depw,z %r22,28,3,%r22 ; r22 = 8 * (-n & 7) + sub %r25,%r22,%r25 ; offset s1_ptr + sub %r24,%r22,%r24 ; offset s2_ptr + blr %r28,%r0 ; branch into loop + sub %r26,%r22,%r26 ; offset res_ptr and set carry + +L$loop ldd 0(%r25),%r20 + ldd 0(%r24),%r31 + sub,db %r20,%r31,%r20 + std %r20,0(%r26) +L$7 ldd 8(%r25),%r21 + ldd 8(%r24),%r19 + sub,db %r21,%r19,%r21 + std %r21,8(%r26) +L$6 ldd 16(%r25),%r20 + ldd 16(%r24),%r31 + sub,db %r20,%r31,%r20 + std %r20,16(%r26) +L$5 ldd 24(%r25),%r21 + ldd 24(%r24),%r19 + sub,db %r21,%r19,%r21 + std %r21,24(%r26) +L$4 ldd 32(%r25),%r20 + ldd 32(%r24),%r31 + sub,db %r20,%r31,%r20 + std %r20,32(%r26) +L$3 ldd 40(%r25),%r21 + ldd 40(%r24),%r19 + sub,db %r21,%r19,%r21 + std %r21,40(%r26) +L$2 ldd 48(%r25),%r20 + ldd 48(%r24),%r31 + sub,db %r20,%r31,%r20 + std %r20,48(%r26) +L$1 ldd 56(%r25),%r21 + ldo 64(%r25),%r25 + ldd 56(%r24),%r19 + sub,db %r21,%r19,%r21 + std %r21,56(%r26) + ldo 64(%r24),%r24 + addib,> -8,%r23,L$loop + ldo 64(%r26),%r26 + + add,dc %r0,%r0,%r29 + subi 1,%r29,%r29 + bve (%r2) + .exit + ldi 0,%r28 + .procend diff --git a/rts/gmp/mpn/pa64/submul_1.S b/rts/gmp/mpn/pa64/submul_1.S new file mode 100644 index 0000000000..27666b99df --- /dev/null +++ b/rts/gmp/mpn/pa64/submul_1.S @@ -0,0 +1,170 @@ +; HP-PA 2.0 64-bit __gmpn_submul_1 -- Multiply a limb vector with a limb and +; subtract the result from a second limb vector. + +; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + +; INPUT PARAMETERS +#define rptr %r26 +#define sptr %r25 +#define size %r24 +#define s2limb -56(%r30) + +; This runs at 11 cycles/limb on a PA8000. It might be possible to make +; it faster, but the PA8000 pipeline is not publically documented and it +; is very complex to reverse engineer + +#define t1 %r19 +#define rlimb %r20 +#define hi %r21 +#define lo %r22 +#define m0 %r28 +#define m1 %r3 +#define cylimb %r29 +#define t3 %r4 +#define t2 %r6 +#define t5 %r23 +#define t4 %r31 + .level 2.0n + .code + .export __gmpn_submul_1,entry +__gmpn_submul_1 + .proc + .callinfo frame=128,no_calls + .entry + fldd -56(%r30),%fr5 ; s2limb passed on stack + ldo 128(%r30),%r30 + add %r0,%r0,cylimb ; clear cy and cylimb + + std %r3,-96(%r30) + std %r4,-88(%r30) + std %r5,-80(%r30) + std %r6,-72(%r30) + depdi,z 1,31,1,%r5 + + fldd 0(sptr),%fr4 + ldo 8(sptr),sptr + + xmpyu %fr5R,%fr4R,%fr6 + fstd %fr6,-128(%r30) + xmpyu %fr5R,%fr4L,%fr7 + fstd %fr7,-120(%r30) + xmpyu %fr5L,%fr4R,%fr8 + fstd %fr8,-112(%r30) + xmpyu %fr5L,%fr4L,%fr9 + fstd %fr9,-104(%r30) + ldd -128(%r30),lo ; lo = low 64 bit of product + ldd -120(%r30),m0 ; m0 = mid0 64 bit of product + ldd -112(%r30),m1 ; m1 = mid1 64 bit of product + ldd -104(%r30),hi ; hi = high 64 bit of product + addib,= -1,%r24,L$end1 + nop + fldd 0(sptr),%fr4 + ldo 8(sptr),sptr + addib,= -1,%r24,L$end2 + nop +L$loop + xmpyu %fr5R,%fr4R,%fr6 + fstd %fr6,-128(%r30) + xmpyu %fr5R,%fr4L,%fr7 + fstd %fr7,-120(%r30) + xmpyu %fr5L,%fr4R,%fr8 + fstd %fr8,-112(%r30) + xmpyu %fr5L,%fr4L,%fr9 + fstd %fr9,-104(%r30) + ldd 0(rptr),rlimb + extrd,u lo,31,32,t1 ; t1 = hi32(lo) + extrd,u lo,63,32,t4 ; t4 = lo32(lo) + add,l m0,t1,t1 ; t1 += m0 + add,l,*nuv m1,t1,t1 ; t1 += m1 + add,l %r5,hi,hi ; propagate carry + extrd,u t1,31,32,t2 ; t2 = hi32(t1) + depd,z t1,31,32,t5 ; t5 = lo32(t1) + add,l t5,t4,t4 ; t4 += lo32(t1) + ldd -128(%r30),lo ; lo = low 64 bit of product + add cylimb,t4,t4 + ldd -120(%r30),m0 ; m0 = mid0 64 bit of product + add,dc t2,hi,cylimb + ldd -112(%r30),m1 ; m1 = mid1 64 bit of product + sub rlimb,t4,t3 + add t4,t3,%r0 + ldd -104(%r30),hi ; hi = high 64 bit of product + add,dc %r0,cylimb,cylimb + fldd 0(sptr),%fr4 + ldo 8(sptr),sptr + std t3,0(rptr) + addib,<> -1,%r24,L$loop + ldo 8(rptr),rptr +L$end2 + xmpyu %fr5R,%fr4R,%fr6 + fstd %fr6,-128(%r30) + xmpyu %fr5R,%fr4L,%fr7 + fstd %fr7,-120(%r30) + xmpyu %fr5L,%fr4R,%fr8 + fstd %fr8,-112(%r30) + xmpyu %fr5L,%fr4L,%fr9 + fstd %fr9,-104(%r30) + ldd 0(rptr),rlimb + extrd,u lo,31,32,t1 ; t1 = hi32(lo) + extrd,u lo,63,32,t4 ; t4 = lo32(lo) + add,l m0,t1,t1 ; t1 += m0 + add,l,*nuv m1,t1,t1 ; t1 += m0 + add,l %r5,hi,hi ; propagate carry + extrd,u t1,31,32,t2 ; t2 = hi32(t1) + depd,z t1,31,32,t5 ; t5 = lo32(t1) + add,l t5,t4,t4 ; t4 += lo32(t1) + ldd -128(%r30),lo ; lo = low 64 bit of product + add cylimb,t4,t4 + ldd -120(%r30),m0 ; m0 = mid0 64 bit of product + add,dc t2,hi,cylimb + ldd -112(%r30),m1 ; m1 = mid1 64 bit of product + sub rlimb,t4,t3 + add t4,t3,%r0 + ldd -104(%r30),hi ; hi = high 64 bit of product + add,dc %r0,cylimb,cylimb + std t3,0(rptr) + ldo 8(rptr),rptr +L$end1 + ldd 0(rptr),rlimb + extrd,u lo,31,32,t1 ; t1 = hi32(lo) + extrd,u lo,63,32,t4 ; t4 = lo32(lo) + add,l m0,t1,t1 ; t1 += m0 + add,l,*nuv m1,t1,t1 ; t1 += m0 + add,l %r5,hi,hi ; propagate carry + extrd,u t1,31,32,t2 ; t2 = hi32(t1) + depd,z t1,31,32,t5 ; t5 = lo32(t1) + add,l t5,t4,t4 ; t4 += lo32(t1) + add cylimb,t4,t4 + add,dc t2,hi,cylimb + sub rlimb,t4,t3 + add t4,t3,%r0 + add,dc %r0,cylimb,cylimb + std t3,0(rptr) + ldo 8(rptr),rptr + + ldd -96(%r30),%r3 + ldd -88(%r30),%r4 + ldd -80(%r30),%r5 + ldd -72(%r30),%r6 + + extrd,u cylimb,31,32,%r28 + bve (%r2) + .exit + ldo -128(%r30),%r30 + .procend diff --git a/rts/gmp/mpn/pa64/udiv_qrnnd.c b/rts/gmp/mpn/pa64/udiv_qrnnd.c new file mode 100644 index 0000000000..1c9fe084db --- /dev/null +++ b/rts/gmp/mpn/pa64/udiv_qrnnd.c @@ -0,0 +1,111 @@ +/* +Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. +*/ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#define TWO64 18446744073709551616.0 + +mp_limb_t +#if __STDC__ +__MPN(udiv_qrnnd) (mp_limb_t n1, mp_limb_t n0, mp_limb_t d, mp_limb_t *r) +#else +__MPN(udiv_qrnnd) (n1, n0, d, r) + mp_limb_t n1; + mp_limb_t n0; + mp_limb_t d; + mp_limb_t *r; +#endif +{ + mp_limb_t q1, q2, q; + mp_limb_t p1, p0; + double di, dq; + + di = 1.0 / d; + + /* Generate upper 53 bits of quotient. Be careful here; the `double' + quotient may be rounded to 2^64 which we cannot safely convert back + to a 64-bit integer. */ + dq = (TWO64 * (double) n1 + (double) n0) * di; + if (dq >= TWO64) + q1 = 0xfffffffffffff800LL; + else + q1 = (mp_limb_t) dq; + + /* Multiply back in order to compare the product to the dividend. */ + umul_ppmm (p1, p0, q1, d); + + /* Was the 53-bit quotient greater that our sought quotient? Test the + sign of the partial remainder to find out. */ + if (n1 < p1 || (n1 == p1 && n0 < p0)) + { + /* 53-bit quotient too large. Partial remainder is negative. + Compute the absolute value of the remainder in n1,,n0. */ + n1 = p1 - (n1 + (p0 < n0)); + n0 = p0 - n0; + + /* Now use the partial remainder as new dividend to compute more bits of + quotient. This is an adjustment for the one we got previously. */ + q2 = (mp_limb_t) ((TWO64 * (double) n1 + (double) n0) * di); + umul_ppmm (p1, p0, q2, d); + + q = q1 - q2; + if (n1 < p1 || (n1 == p1 && n0 <= p0)) + { + n0 = p0 - n0; + } + else + { + n0 = p0 - n0; + n0 += d; + q--; + } + } + else + { + n1 = n1 - (p1 + (n0 < p0)); + n0 = n0 - p0; + + q2 = (mp_limb_t) ((TWO64 * (double) n1 + (double) n0) * di); + umul_ppmm (p1, p0, q2, d); + + q = q1 + q2; + if (n1 < p1 || (n1 == p1 && n0 < p0)) + { + n0 = n0 - p0; + n0 += d; + q--; + } + else + { + n0 = n0 - p0; + if (n0 >= d) + { + n0 -= d; + q++; + } + } + } + + *r = n0; + return q; +} diff --git a/rts/gmp/mpn/pa64/umul_ppmm.S b/rts/gmp/mpn/pa64/umul_ppmm.S new file mode 100644 index 0000000000..ceff2d752f --- /dev/null +++ b/rts/gmp/mpn/pa64/umul_ppmm.S @@ -0,0 +1,74 @@ +; Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + +#define p0 %r28 +#define p1 %r29 +#define t32 %r19 +#define t0 %r20 +#define t1 %r21 +#define x %r22 +#define m0 %r23 +#define m1 %r24 + .level 2.0n + .code + .export __gmpn_umul_ppmm,entry +__gmpn_umul_ppmm + .proc + .callinfo frame=128,no_calls + .entry + ldo 128(%r30),%r30 + depd %r25,31,32,%r26 + std %r26,-64(%r30) + depd %r23,31,32,%r24 + std %r24,-56(%r30) + + ldw -180(%r30),%r31 + + fldd -64(%r30),%fr4 + fldd -56(%r30),%fr5 + + xmpyu %fr5R,%fr4R,%fr6 + fstd %fr6,-128(%r30) + xmpyu %fr5R,%fr4L,%fr7 + fstd %fr7,-120(%r30) + xmpyu %fr5L,%fr4R,%fr8 + fstd %fr8,-112(%r30) + xmpyu %fr5L,%fr4L,%fr9 + fstd %fr9,-104(%r30) + + depdi,z 1,31,1,t32 ; t32 = 2^32 + + ldd -128(%r30),p0 ; lo = low 64 bit of product + ldd -120(%r30),m0 ; m0 = mid0 64 bit of product + ldd -112(%r30),m1 ; m1 = mid1 64 bit of product + ldd -104(%r30),p1 ; hi = high 64 bit of product + + add,l,*nuv m0,m1,x ; x = m1+m0 + add,l t32,p1,p1 ; propagate carry to mid of p1 + depd,z x,31,32,t0 ; lo32(m1+m0) + add t0,p0,p0 + extrd,u x,31,32,t1 ; hi32(m1+m0) + add,dc t1,p1,p1 + + std p0,0(%r31) ; store low half of product + extrd,u p1,31,32,%r28 ; return high half of product + bve (%r2) + .exit + ldo -128(%r30),%r30 + .procend diff --git a/rts/gmp/mpn/pa64w/README b/rts/gmp/mpn/pa64w/README new file mode 100644 index 0000000000..cf590a7b98 --- /dev/null +++ b/rts/gmp/mpn/pa64w/README @@ -0,0 +1,2 @@ +This directory contains mpn functions for 64-bit PA-RISC 2.0 +using 64-bit pointers (2.0W). diff --git a/rts/gmp/mpn/pa64w/add_n.s b/rts/gmp/mpn/pa64w/add_n.s new file mode 100644 index 0000000000..1bb9e8fbc7 --- /dev/null +++ b/rts/gmp/mpn/pa64w/add_n.s @@ -0,0 +1,90 @@ +; HP-PA 2.0 __gmpn_add_n -- Add two limb vectors of the same length > 0 and +; store sum in a third limb vector. + +; Copyright (C) 1997, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr gr26 +; s1_ptr gr25 +; s2_ptr gr24 +; size gr23 + +; This runs at 2 cycles/limb on PA8000. + + .level 2.0w + .code + .export __gmpn_add_n,entry +__gmpn_add_n + .proc + .callinfo frame=0,args_saved + .entry + + sub %r0,%r23,%r22 + depw,z %r22,30,3,%r28 ; r28 = 2 * (-n & 7) + depw,z %r22,28,3,%r22 ; r22 = 8 * (-n & 7) + sub %r25,%r22,%r25 ; offset s1_ptr + sub %r24,%r22,%r24 ; offset s2_ptr + sub %r26,%r22,%r26 ; offset res_ptr + blr %r28,%r0 ; branch into loop + add %r0,%r0,%r0 ; reset carry + +L$loop ldd 0(%r25),%r20 + ldd 0(%r24),%r31 + add,dc %r20,%r31,%r20 + std %r20,0(%r26) +L$7 ldd 8(%r25),%r21 + ldd 8(%r24),%r19 + add,dc %r21,%r19,%r21 + std %r21,8(%r26) +L$6 ldd 16(%r25),%r20 + ldd 16(%r24),%r31 + add,dc %r20,%r31,%r20 + std %r20,16(%r26) +L$5 ldd 24(%r25),%r21 + ldd 24(%r24),%r19 + add,dc %r21,%r19,%r21 + std %r21,24(%r26) +L$4 ldd 32(%r25),%r20 + ldd 32(%r24),%r31 + add,dc %r20,%r31,%r20 + std %r20,32(%r26) +L$3 ldd 40(%r25),%r21 + ldd 40(%r24),%r19 + add,dc %r21,%r19,%r21 + std %r21,40(%r26) +L$2 ldd 48(%r25),%r20 + ldd 48(%r24),%r31 + add,dc %r20,%r31,%r20 + std %r20,48(%r26) +L$1 ldd 56(%r25),%r21 + ldo 64(%r25),%r25 + ldd 56(%r24),%r19 + add,dc %r21,%r19,%r21 + std %r21,56(%r26) + ldo 64(%r24),%r24 + addib,> -8,%r23,L$loop + ldo 64(%r26),%r26 + + add,dc %r0,%r0,%r29 + bve (%r2) + .exit + copy %r29,%r28 + .procend diff --git a/rts/gmp/mpn/pa64w/addmul_1.S b/rts/gmp/mpn/pa64w/addmul_1.S new file mode 100644 index 0000000000..4799f90fc5 --- /dev/null +++ b/rts/gmp/mpn/pa64w/addmul_1.S @@ -0,0 +1,168 @@ +; HP-PA 2.0 64-bit __gmpn_addmul_1 -- Multiply a limb vector with a limb and +; add the result to a second limb vector. + +; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + +; INPUT PARAMETERS +#define rptr %r26 +#define sptr %r25 +#define size %r24 +#define s2limb %r23 + +; This runs at 11 cycles/limb on a PA8000. It might be possible to make +; it faster, but the PA8000 pipeline is not publically documented and it +; is very complex to reverse engineer + +#define t1 %r19 +#define rlimb %r20 +#define hi %r21 +#define lo %r22 +#define m0 %r28 +#define m1 %r3 +#define cylimb %r29 +#define t3 %r4 +#define t2 %r6 +#define t5 %r23 +#define t4 %r31 + .level 2.0w + .code + .export __gmpn_addmul_1,entry +__gmpn_addmul_1 + .proc + .callinfo frame=128,no_calls + .entry + std s2limb,-56(%r30) + fldd -56(%r30),%fr5 + ldo 128(%r30),%r30 + add %r0,%r0,cylimb ; clear cy and cylimb + + std %r3,-96(%r30) + std %r4,-88(%r30) + std %r5,-80(%r30) + std %r6,-72(%r30) + depdi,z 1,31,1,%r5 + + fldd 0(sptr),%fr4 + ldo 8(sptr),sptr + + xmpyu %fr5R,%fr4R,%fr6 + fstd %fr6,-128(%r30) + xmpyu %fr5R,%fr4L,%fr7 + fstd %fr7,-120(%r30) + xmpyu %fr5L,%fr4R,%fr8 + fstd %fr8,-112(%r30) + xmpyu %fr5L,%fr4L,%fr9 + fstd %fr9,-104(%r30) + ldd -128(%r30),lo ; lo = low 64 bit of product + ldd -120(%r30),m0 ; m0 = mid0 64 bit of product + ldd -112(%r30),m1 ; m1 = mid1 64 bit of product + ldd -104(%r30),hi ; hi = high 64 bit of product + addib,= -1,%r24,L$end1 + nop + fldd 0(sptr),%fr4 + ldo 8(sptr),sptr + addib,= -1,%r24,L$end2 + nop +L$loop + xmpyu %fr5R,%fr4R,%fr6 + fstd %fr6,-128(%r30) + xmpyu %fr5R,%fr4L,%fr7 + fstd %fr7,-120(%r30) + xmpyu %fr5L,%fr4R,%fr8 + fstd %fr8,-112(%r30) + xmpyu %fr5L,%fr4L,%fr9 + fstd %fr9,-104(%r30) + ldd 0(rptr),rlimb + extrd,u lo,31,32,t1 ; t1 = hi32(lo) + extrd,u lo,63,32,t4 ; t4 = lo32(lo) + add,l m0,t1,t1 ; t1 += m0 + add,l,*nuv m1,t1,t1 ; t1 += m1 + add,l %r5,hi,hi ; propagate carry + extrd,u t1,31,32,t2 ; t2 = hi32(t1) + depd,z t1,31,32,t5 ; t5 = lo32(t1) + add,l t5,t4,t4 ; t4 += lo32(t1) + ldd -128(%r30),lo ; lo = low 64 bit of product + add cylimb,rlimb,rlimb + ldd -120(%r30),m0 ; m0 = mid0 64 bit of product + add,dc t2,hi,cylimb + ldd -112(%r30),m1 ; m1 = mid1 64 bit of product + add t4,rlimb,t3 + ldd -104(%r30),hi ; hi = high 64 bit of product + add,dc %r0,cylimb,cylimb + fldd 0(sptr),%fr4 + ldo 8(sptr),sptr + std t3,0(rptr) + addib,<> -1,%r24,L$loop + ldo 8(rptr),rptr +L$end2 + xmpyu %fr5R,%fr4R,%fr6 + fstd %fr6,-128(%r30) + xmpyu %fr5R,%fr4L,%fr7 + fstd %fr7,-120(%r30) + xmpyu %fr5L,%fr4R,%fr8 + fstd %fr8,-112(%r30) + xmpyu %fr5L,%fr4L,%fr9 + fstd %fr9,-104(%r30) + ldd 0(rptr),rlimb + extrd,u lo,31,32,t1 ; t1 = hi32(lo) + extrd,u lo,63,32,t4 ; t4 = lo32(lo) + add,l m0,t1,t1 ; t1 += m0 + add,l,*nuv m1,t1,t1 ; t1 += m0 + add,l %r5,hi,hi ; propagate carry + extrd,u t1,31,32,t2 ; t2 = hi32(t1) + depd,z t1,31,32,t5 ; t5 = lo32(t1) + add,l t5,t4,t4 ; t4 += lo32(t1) + ldd -128(%r30),lo ; lo = low 64 bit of product + add cylimb,rlimb,rlimb + ldd -120(%r30),m0 ; m0 = mid0 64 bit of product + add,dc t2,hi,cylimb + ldd -112(%r30),m1 ; m1 = mid1 64 bit of product + add t4,rlimb,t3 + ldd -104(%r30),hi ; hi = high 64 bit of product + add,dc %r0,cylimb,cylimb + std t3,0(rptr) + ldo 8(rptr),rptr +L$end1 + ldd 0(rptr),rlimb + extrd,u lo,31,32,t1 ; t1 = hi32(lo) + extrd,u lo,63,32,t4 ; t4 = lo32(lo) + add,l m0,t1,t1 ; t1 += m0 + add,l,*nuv m1,t1,t1 ; t1 += m0 + add,l %r5,hi,hi ; propagate carry + extrd,u t1,31,32,t2 ; t2 = hi32(t1) + depd,z t1,31,32,t5 ; t5 = lo32(t1) + add,l t5,t4,t4 ; t4 += lo32(t1) + add cylimb,rlimb,rlimb + add,dc t2,hi,cylimb + add t4,rlimb,t3 + add,dc %r0,cylimb,cylimb + std t3,0(rptr) + ldo 8(rptr),rptr + + ldd -96(%r30),%r3 + ldd -88(%r30),%r4 + ldd -80(%r30),%r5 + ldd -72(%r30),%r6 + + copy cylimb,%r28 + bve (%r2) + .exit + ldo -128(%r30),%r30 + .procend diff --git a/rts/gmp/mpn/pa64w/gmp-mparam.h b/rts/gmp/mpn/pa64w/gmp-mparam.h new file mode 100644 index 0000000000..ee5a0a3ab7 --- /dev/null +++ b/rts/gmp/mpn/pa64w/gmp-mparam.h @@ -0,0 +1,65 @@ +/* gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#define BITS_PER_MP_LIMB 64 +#define BYTES_PER_MP_LIMB 8 +#define BITS_PER_LONGINT 64 +#define BITS_PER_INT 32 +#define BITS_PER_SHORTINT 16 +#define BITS_PER_CHAR 8 + +/* These values were measured on a PA8500 using the system compiler version + A.11.01.02. Presumably the PA8000 and PA8200 have the same timing + characteristic, but GCC might give somewhat different results.. */ +/* Generated by tuneup.c, 2000-07-25. */ + +#ifndef KARATSUBA_MUL_THRESHOLD +#define KARATSUBA_MUL_THRESHOLD 18 +#endif +#ifndef TOOM3_MUL_THRESHOLD +#define TOOM3_MUL_THRESHOLD 105 +#endif + +#ifndef KARATSUBA_SQR_THRESHOLD +#define KARATSUBA_SQR_THRESHOLD 46 +#endif +#ifndef TOOM3_SQR_THRESHOLD +#define TOOM3_SQR_THRESHOLD 83 +#endif + +#ifndef BZ_THRESHOLD +#define BZ_THRESHOLD 58 +#endif + +#ifndef FIB_THRESHOLD +#define FIB_THRESHOLD 134 +#endif + +#ifndef POWM_THRESHOLD +#define POWM_THRESHOLD 56 +#endif + +#ifndef GCD_ACCEL_THRESHOLD +#define GCD_ACCEL_THRESHOLD 26 +#endif +#ifndef GCDEXT_THRESHOLD +#define GCDEXT_THRESHOLD 1 +#endif diff --git a/rts/gmp/mpn/pa64w/lshift.s b/rts/gmp/mpn/pa64w/lshift.s new file mode 100644 index 0000000000..84f925a105 --- /dev/null +++ b/rts/gmp/mpn/pa64w/lshift.s @@ -0,0 +1,103 @@ +; HP-PA 2.0 __gmpn_lshift -- + +; Copyright (C) 1997, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr gr26 +; s1_ptr gr25 +; size gr24 +; cnt gr23 + +; This runs at 1.5 cycles/limb on PA8000. + + .level 2.0w + .code + .export __gmpn_lshift,entry +__gmpn_lshift + .proc + .callinfo frame=0,args_saved + .entry + + shladd %r24,3,%r25,%r25 + shladd %r24,3,%r26,%r26 + subi 64,%r23,%r23 + mtsar %r23 + ldd -8(%r25),%r21 + addib,= -1,%r24,L$end + shrpd %r0,%r21,%sar,%r29 ; compute carry out limb + depw,z %r24,31,3,%r28 ; r28 = (size & 7) + sub %r0,%r24,%r22 + depw,z %r22,28,3,%r22 ; r22 = 8 * (-size & 7) + add %r25,%r22,%r25 ; offset s1_ptr + blr %r28,%r0 ; branch into jump table + add %r26,%r22,%r26 ; offset res_ptr + b L$0 + nop + b L$1 + copy %r21,%r20 + b L$2 + nop + b L$3 + copy %r21,%r20 + b L$4 + nop + b L$5 + copy %r21,%r20 + b L$6 + nop + b L$7 + copy %r21,%r20 + +L$loop +L$0 ldd -16(%r25),%r20 + shrpd %r21,%r20,%sar,%r21 + std %r21,-8(%r26) +L$7 ldd -24(%r25),%r21 + shrpd %r20,%r21,%sar,%r20 + std %r20,-16(%r26) +L$6 ldd -32(%r25),%r20 + shrpd %r21,%r20,%sar,%r21 + std %r21,-24(%r26) +L$5 ldd -40(%r25),%r21 + shrpd %r20,%r21,%sar,%r20 + std %r20,-32(%r26) +L$4 ldd -48(%r25),%r20 + shrpd %r21,%r20,%sar,%r21 + std %r21,-40(%r26) +L$3 ldd -56(%r25),%r21 + shrpd %r20,%r21,%sar,%r20 + std %r20,-48(%r26) +L$2 ldd -64(%r25),%r20 + shrpd %r21,%r20,%sar,%r21 + std %r21,-56(%r26) +L$1 ldd -72(%r25),%r21 + ldo -64(%r25),%r25 + shrpd %r20,%r21,%sar,%r20 + std %r20,-64(%r26) + addib,> -8,%r24,L$loop + ldo -64(%r26),%r26 + +L$end shrpd %r21,%r0,%sar,%r21 + std %r21,-8(%r26) + bve (%r2) + .exit + copy %r29,%r28 + .procend diff --git a/rts/gmp/mpn/pa64w/mul_1.S b/rts/gmp/mpn/pa64w/mul_1.S new file mode 100644 index 0000000000..48f13fbd1b --- /dev/null +++ b/rts/gmp/mpn/pa64w/mul_1.S @@ -0,0 +1,159 @@ +; HP-PA 2.0 64-bit __gmpn_mul_1 -- Multiply a limb vector with a limb and +; store the result in a second limb vector. + +; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + +; INPUT PARAMETERS +#define rptr %r26 +#define sptr %r25 +#define size %r24 +#define s2limb %r23 + +; This runs at 11 cycles/limb on a PA8000. It might be possible to make +; it faster, but the PA8000 pipeline is not publically documented and it +; is very complex to reverse engineer + +#define t1 %r19 +#define rlimb %r20 +#define hi %r21 +#define lo %r22 +#define m0 %r28 +#define m1 %r3 +#define cylimb %r29 +#define t3 %r4 +#define t2 %r6 +#define t5 %r23 +#define t4 %r31 + .level 2.0w + .code + .export __gmpn_mul_1,entry +__gmpn_mul_1 + .proc + .callinfo frame=128,no_calls + .entry + std s2limb,-56(%r30) + fldd -56(%r30),%fr5 + ldo 128(%r30),%r30 + add %r0,%r0,cylimb ; clear cy and cylimb + + std %r3,-96(%r30) + std %r4,-88(%r30) + std %r5,-80(%r30) + std %r6,-72(%r30) + depdi,z 1,31,1,%r5 + + fldd 0(sptr),%fr4 + ldo 8(sptr),sptr + + xmpyu %fr5R,%fr4R,%fr6 + fstd %fr6,-128(%r30) + xmpyu %fr5R,%fr4L,%fr7 + fstd %fr7,-120(%r30) + xmpyu %fr5L,%fr4R,%fr8 + fstd %fr8,-112(%r30) + xmpyu %fr5L,%fr4L,%fr9 + fstd %fr9,-104(%r30) + ldd -128(%r30),lo ; lo = low 64 bit of product + ldd -120(%r30),m0 ; m0 = mid0 64 bit of product + ldd -112(%r30),m1 ; m1 = mid1 64 bit of product + ldd -104(%r30),hi ; hi = high 64 bit of product + addib,= -1,%r24,L$end1 + nop + fldd 0(sptr),%fr4 + ldo 8(sptr),sptr + addib,= -1,%r24,L$end2 + nop +L$loop + xmpyu %fr5R,%fr4R,%fr6 + fstd %fr6,-128(%r30) + xmpyu %fr5R,%fr4L,%fr7 + fstd %fr7,-120(%r30) + xmpyu %fr5L,%fr4R,%fr8 + fstd %fr8,-112(%r30) + xmpyu %fr5L,%fr4L,%fr9 + fstd %fr9,-104(%r30) + extrd,u lo,31,32,t1 ; t1 = hi32(lo) + extrd,u lo,63,32,t4 ; t4 = lo32(lo) + add,l m0,t1,t1 ; t1 += m0 + add,l,*nuv m1,t1,t1 ; t1 += m1 + add,l %r5,hi,hi ; propagate carry + extrd,u t1,31,32,t2 ; t2 = hi32(t1) + depd,z t1,31,32,t5 ; t5 = lo32(t1) + add,l t5,t4,t4 ; t4 += lo32(t1) + ldd -128(%r30),lo ; lo = low 64 bit of product + add cylimb,t4,t3 + ldd -120(%r30),m0 ; m0 = mid0 64 bit of product + add,dc t2,hi,cylimb + ldd -112(%r30),m1 ; m1 = mid1 64 bit of product + ldd -104(%r30),hi ; hi = high 64 bit of product + fldd 0(sptr),%fr4 + ldo 8(sptr),sptr + std t3,0(rptr) + addib,<> -1,%r24,L$loop + ldo 8(rptr),rptr +L$end2 + xmpyu %fr5R,%fr4R,%fr6 + fstd %fr6,-128(%r30) + xmpyu %fr5R,%fr4L,%fr7 + fstd %fr7,-120(%r30) + xmpyu %fr5L,%fr4R,%fr8 + fstd %fr8,-112(%r30) + xmpyu %fr5L,%fr4L,%fr9 + fstd %fr9,-104(%r30) + extrd,u lo,31,32,t1 ; t1 = hi32(lo) + extrd,u lo,63,32,t4 ; t4 = lo32(lo) + add,l m0,t1,t1 ; t1 += m0 + add,l,*nuv m1,t1,t1 ; t1 += m0 + add,l %r5,hi,hi ; propagate carry + extrd,u t1,31,32,t2 ; t2 = hi32(t1) + depd,z t1,31,32,t5 ; t5 = lo32(t1) + add,l t5,t4,t4 ; t4 += lo32(t1) + ldd -128(%r30),lo ; lo = low 64 bit of product + add cylimb,t4,t3 + ldd -120(%r30),m0 ; m0 = mid0 64 bit of product + add,dc t2,hi,cylimb + ldd -112(%r30),m1 ; m1 = mid1 64 bit of product + ldd -104(%r30),hi ; hi = high 64 bit of product + std t3,0(rptr) + ldo 8(rptr),rptr +L$end1 + extrd,u lo,31,32,t1 ; t1 = hi32(lo) + extrd,u lo,63,32,t4 ; t2 = lo32(lo) + add,l m0,t1,t1 ; t1 += m0 + add,l,*nuv m1,t1,t1 ; t1 += m0 + add,l %r5,hi,hi ; propagate carry + extrd,u t1,31,32,t2 ; t2 = hi32(t1) + depd,z t1,31,32,t5 ; t5 = lo32(t1) + add,l t5,t4,t4 ; t4 += lo32(t1) + add cylimb,t4,t3 + add,dc t2,hi,cylimb + std t3,0(rptr) + ldo 8(rptr),rptr + + ldd -96(%r30),%r3 + ldd -88(%r30),%r4 + ldd -80(%r30),%r5 + ldd -72(%r30),%r6 + + copy cylimb,%r28 + bve (%r2) + .exit + ldo -128(%r30),%r30 + .procend diff --git a/rts/gmp/mpn/pa64w/rshift.s b/rts/gmp/mpn/pa64w/rshift.s new file mode 100644 index 0000000000..2517cb1f87 --- /dev/null +++ b/rts/gmp/mpn/pa64w/rshift.s @@ -0,0 +1,100 @@ +; HP-PA 2.0 __gmpn_rshift -- + +; Copyright (C) 1997, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr gr26 +; s1_ptr gr25 +; size gr24 +; cnt gr23 + +; This runs at 1.5 cycles/limb on PA8000. + + .level 2.0w + .code + .export __gmpn_rshift,entry +__gmpn_rshift + .proc + .callinfo frame=0,args_saved + .entry + + mtsar %r23 + ldd 0(%r25),%r21 + addib,= -1,%r24,L$end + shrpd %r21,%r0,%sar,%r29 ; compute carry out limb + depw,z %r24,31,3,%r28 ; r28 = (size & 7) + sub %r0,%r24,%r22 + depw,z %r22,28,3,%r22 ; r22 = 8 * (-size & 7) + sub %r25,%r22,%r25 ; offset s1_ptr + blr %r28,%r0 ; branch into jump table + sub %r26,%r22,%r26 ; offset res_ptr + b L$0 + nop + b L$1 + copy %r21,%r20 + b L$2 + nop + b L$3 + copy %r21,%r20 + b L$4 + nop + b L$5 + copy %r21,%r20 + b L$6 + nop + b L$7 + copy %r21,%r20 + +L$loop +L$0 ldd 8(%r25),%r20 + shrpd %r20,%r21,%sar,%r21 + std %r21,0(%r26) +L$7 ldd 16(%r25),%r21 + shrpd %r21,%r20,%sar,%r20 + std %r20,8(%r26) +L$6 ldd 24(%r25),%r20 + shrpd %r20,%r21,%sar,%r21 + std %r21,16(%r26) +L$5 ldd 32(%r25),%r21 + shrpd %r21,%r20,%sar,%r20 + std %r20,24(%r26) +L$4 ldd 40(%r25),%r20 + shrpd %r20,%r21,%sar,%r21 + std %r21,32(%r26) +L$3 ldd 48(%r25),%r21 + shrpd %r21,%r20,%sar,%r20 + std %r20,40(%r26) +L$2 ldd 56(%r25),%r20 + shrpd %r20,%r21,%sar,%r21 + std %r21,48(%r26) +L$1 ldd 64(%r25),%r21 + ldo 64(%r25),%r25 + shrpd %r21,%r20,%sar,%r20 + std %r20,56(%r26) + addib,> -8,%r24,L$loop + ldo 64(%r26),%r26 + +L$end shrpd %r0,%r21,%sar,%r21 + std %r21,0(%r26) + bve (%r2) + .exit + copy %r29,%r28 + .procend diff --git a/rts/gmp/mpn/pa64w/sub_n.s b/rts/gmp/mpn/pa64w/sub_n.s new file mode 100644 index 0000000000..ad01e24aa7 --- /dev/null +++ b/rts/gmp/mpn/pa64w/sub_n.s @@ -0,0 +1,90 @@ +; HP-PA 2.0 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 +; and store difference in a third limb vector. + +; Copyright (C) 1997, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr gr26 +; s1_ptr gr25 +; s2_ptr gr24 +; size gr23 + +; This runs at 2 cycles/limb on PA8000. + + .level 2.0w + .code + .export __gmpn_sub_n,entry +__gmpn_sub_n + .proc + .callinfo frame=0,args_saved + .entry + + sub %r0,%r23,%r22 + depw,z %r22,30,3,%r28 ; r28 = 2 * (-n & 7) + depw,z %r22,28,3,%r22 ; r22 = 8 * (-n & 7) + sub %r25,%r22,%r25 ; offset s1_ptr + sub %r24,%r22,%r24 ; offset s2_ptr + blr %r28,%r0 ; branch into loop + sub %r26,%r22,%r26 ; offset res_ptr and set carry + +L$loop ldd 0(%r25),%r20 + ldd 0(%r24),%r31 + sub,db %r20,%r31,%r20 + std %r20,0(%r26) +L$7 ldd 8(%r25),%r21 + ldd 8(%r24),%r19 + sub,db %r21,%r19,%r21 + std %r21,8(%r26) +L$6 ldd 16(%r25),%r20 + ldd 16(%r24),%r31 + sub,db %r20,%r31,%r20 + std %r20,16(%r26) +L$5 ldd 24(%r25),%r21 + ldd 24(%r24),%r19 + sub,db %r21,%r19,%r21 + std %r21,24(%r26) +L$4 ldd 32(%r25),%r20 + ldd 32(%r24),%r31 + sub,db %r20,%r31,%r20 + std %r20,32(%r26) +L$3 ldd 40(%r25),%r21 + ldd 40(%r24),%r19 + sub,db %r21,%r19,%r21 + std %r21,40(%r26) +L$2 ldd 48(%r25),%r20 + ldd 48(%r24),%r31 + sub,db %r20,%r31,%r20 + std %r20,48(%r26) +L$1 ldd 56(%r25),%r21 + ldo 64(%r25),%r25 + ldd 56(%r24),%r19 + sub,db %r21,%r19,%r21 + std %r21,56(%r26) + ldo 64(%r24),%r24 + addib,> -8,%r23,L$loop + ldo 64(%r26),%r26 + + add,dc %r0,%r0,%r29 + subi 1,%r29,%r29 + bve (%r2) + .exit + copy %r29,%r28 + .procend diff --git a/rts/gmp/mpn/pa64w/submul_1.S b/rts/gmp/mpn/pa64w/submul_1.S new file mode 100644 index 0000000000..294f6239b2 --- /dev/null +++ b/rts/gmp/mpn/pa64w/submul_1.S @@ -0,0 +1,171 @@ +; HP-PA 2.0 64-bit __gmpn_submul_1 -- Multiply a limb vector with a limb and +; subtract the result from a second limb vector. + +; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + +; INPUT PARAMETERS +#define rptr %r26 +#define sptr %r25 +#define size %r24 +#define s2limb %r23 + +; This runs at 11 cycles/limb on a PA8000. It might be possible to make +; it faster, but the PA8000 pipeline is not publically documented and it +; is very complex to reverse engineer + +#define t1 %r19 +#define rlimb %r20 +#define hi %r21 +#define lo %r22 +#define m0 %r28 +#define m1 %r3 +#define cylimb %r29 +#define t3 %r4 +#define t2 %r6 +#define t5 %r23 +#define t4 %r31 + .level 2.0w + .code + .export __gmpn_submul_1,entry +__gmpn_submul_1 + .proc + .callinfo frame=128,no_calls + .entry + std s2limb,-56(%r30) + fldd -56(%r30),%fr5 + ldo 128(%r30),%r30 + add %r0,%r0,cylimb ; clear cy and cylimb + + std %r3,-96(%r30) + std %r4,-88(%r30) + std %r5,-80(%r30) + std %r6,-72(%r30) + depdi,z 1,31,1,%r5 + + fldd 0(sptr),%fr4 + ldo 8(sptr),sptr + + xmpyu %fr5R,%fr4R,%fr6 + fstd %fr6,-128(%r30) + xmpyu %fr5R,%fr4L,%fr7 + fstd %fr7,-120(%r30) + xmpyu %fr5L,%fr4R,%fr8 + fstd %fr8,-112(%r30) + xmpyu %fr5L,%fr4L,%fr9 + fstd %fr9,-104(%r30) + ldd -128(%r30),lo ; lo = low 64 bit of product + ldd -120(%r30),m0 ; m0 = mid0 64 bit of product + ldd -112(%r30),m1 ; m1 = mid1 64 bit of product + ldd -104(%r30),hi ; hi = high 64 bit of product + addib,= -1,%r24,L$end1 + nop + fldd 0(sptr),%fr4 + ldo 8(sptr),sptr + addib,= -1,%r24,L$end2 + nop +L$loop + xmpyu %fr5R,%fr4R,%fr6 + fstd %fr6,-128(%r30) + xmpyu %fr5R,%fr4L,%fr7 + fstd %fr7,-120(%r30) + xmpyu %fr5L,%fr4R,%fr8 + fstd %fr8,-112(%r30) + xmpyu %fr5L,%fr4L,%fr9 + fstd %fr9,-104(%r30) + ldd 0(rptr),rlimb + extrd,u lo,31,32,t1 ; t1 = hi32(lo) + extrd,u lo,63,32,t4 ; t4 = lo32(lo) + add,l m0,t1,t1 ; t1 += m0 + add,l,*nuv m1,t1,t1 ; t1 += m1 + add,l %r5,hi,hi ; propagate carry + extrd,u t1,31,32,t2 ; t2 = hi32(t1) + depd,z t1,31,32,t5 ; t5 = lo32(t1) + add,l t5,t4,t4 ; t4 += lo32(t1) + ldd -128(%r30),lo ; lo = low 64 bit of product + add cylimb,t4,t4 + ldd -120(%r30),m0 ; m0 = mid0 64 bit of product + add,dc t2,hi,cylimb + ldd -112(%r30),m1 ; m1 = mid1 64 bit of product + sub rlimb,t4,t3 + add t4,t3,%r0 + ldd -104(%r30),hi ; hi = high 64 bit of product + add,dc %r0,cylimb,cylimb + fldd 0(sptr),%fr4 + ldo 8(sptr),sptr + std t3,0(rptr) + addib,<> -1,%r24,L$loop + ldo 8(rptr),rptr +L$end2 + xmpyu %fr5R,%fr4R,%fr6 + fstd %fr6,-128(%r30) + xmpyu %fr5R,%fr4L,%fr7 + fstd %fr7,-120(%r30) + xmpyu %fr5L,%fr4R,%fr8 + fstd %fr8,-112(%r30) + xmpyu %fr5L,%fr4L,%fr9 + fstd %fr9,-104(%r30) + ldd 0(rptr),rlimb + extrd,u lo,31,32,t1 ; t1 = hi32(lo) + extrd,u lo,63,32,t4 ; t4 = lo32(lo) + add,l m0,t1,t1 ; t1 += m0 + add,l,*nuv m1,t1,t1 ; t1 += m0 + add,l %r5,hi,hi ; propagate carry + extrd,u t1,31,32,t2 ; t2 = hi32(t1) + depd,z t1,31,32,t5 ; t5 = lo32(t1) + add,l t5,t4,t4 ; t4 += lo32(t1) + ldd -128(%r30),lo ; lo = low 64 bit of product + add cylimb,t4,t4 + ldd -120(%r30),m0 ; m0 = mid0 64 bit of product + add,dc t2,hi,cylimb + ldd -112(%r30),m1 ; m1 = mid1 64 bit of product + sub rlimb,t4,t3 + add t4,t3,%r0 + ldd -104(%r30),hi ; hi = high 64 bit of product + add,dc %r0,cylimb,cylimb + std t3,0(rptr) + ldo 8(rptr),rptr +L$end1 + ldd 0(rptr),rlimb + extrd,u lo,31,32,t1 ; t1 = hi32(lo) + extrd,u lo,63,32,t4 ; t4 = lo32(lo) + add,l m0,t1,t1 ; t1 += m0 + add,l,*nuv m1,t1,t1 ; t1 += m0 + add,l %r5,hi,hi ; propagate carry + extrd,u t1,31,32,t2 ; t2 = hi32(t1) + depd,z t1,31,32,t5 ; t5 = lo32(t1) + add,l t5,t4,t4 ; t4 += lo32(t1) + add cylimb,t4,t4 + add,dc t2,hi,cylimb + sub rlimb,t4,t3 + add t4,t3,%r0 + add,dc %r0,cylimb,cylimb + std t3,0(rptr) + ldo 8(rptr),rptr + + ldd -96(%r30),%r3 + ldd -88(%r30),%r4 + ldd -80(%r30),%r5 + ldd -72(%r30),%r6 + + copy cylimb,%r28 + bve (%r2) + .exit + ldo -128(%r30),%r30 + .procend diff --git a/rts/gmp/mpn/pa64w/udiv_qrnnd.c b/rts/gmp/mpn/pa64w/udiv_qrnnd.c new file mode 100644 index 0000000000..1852913000 --- /dev/null +++ b/rts/gmp/mpn/pa64w/udiv_qrnnd.c @@ -0,0 +1,117 @@ +/* +Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. +*/ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#define TWO64 18446744073709551616.0 +#define TWO63 9223372036854775808.0 + +mp_limb_t +#if __STDC__ +__MPN(udiv_qrnnd) (mp_limb_t n1, mp_limb_t n0, mp_limb_t d, mp_limb_t *r) +#else +__MPN(udiv_qrnnd) (n1, n0, d, r) + mp_limb_t n1; + mp_limb_t n0; + mp_limb_t d; + mp_limb_t *r; +#endif +{ + mp_limb_t q1, q2, q; + mp_limb_t p1, p0; + double di, dq; + + di = 1.0 / d; + + /* Generate upper 53 bits of quotient. Be careful here; the `double' + quotient may be rounded to 2^64 which we cannot safely convert back + to a 64-bit integer. */ + dq = (TWO64 * (double) n1 + (double) n0) * di; + if (dq >= TWO64) + q1 = 0xfffffffffffff800L; +#ifndef __GNUC__ + /* Work around HP compiler bug. */ + else if (dq > TWO63) + q1 = (mp_limb_t) (dq - TWO63) + 0x8000000000000000L; +#endif + else + q1 = (mp_limb_t) dq; + + /* Multiply back in order to compare the product to the dividend. */ + umul_ppmm (p1, p0, q1, d); + + /* Was the 53-bit quotient greater that our sought quotient? Test the + sign of the partial remainder to find out. */ + if (n1 < p1 || (n1 == p1 && n0 < p0)) + { + /* 53-bit quotient too large. Partial remainder is negative. + Compute the absolute value of the remainder in n1,,n0. */ + n1 = p1 - (n1 + (p0 < n0)); + n0 = p0 - n0; + + /* Now use the partial remainder as new dividend to compute more bits of + quotient. This is an adjustment for the one we got previously. */ + q2 = (mp_limb_t) ((TWO64 * (double) n1 + (double) n0) * di); + umul_ppmm (p1, p0, q2, d); + + q = q1 - q2; + if (n1 < p1 || (n1 == p1 && n0 <= p0)) + { + n0 = p0 - n0; + } + else + { + n0 = p0 - n0; + n0 += d; + q--; + } + } + else + { + n1 = n1 - (p1 + (n0 < p0)); + n0 = n0 - p0; + + q2 = (mp_limb_t) ((TWO64 * (double) n1 + (double) n0) * di); + umul_ppmm (p1, p0, q2, d); + + q = q1 + q2; + if (n1 < p1 || (n1 == p1 && n0 < p0)) + { + n0 = n0 - p0; + n0 += d; + q--; + } + else + { + n0 = n0 - p0; + if (n0 >= d) + { + n0 -= d; + q++; + } + } + } + + *r = n0; + return q; +} diff --git a/rts/gmp/mpn/pa64w/umul_ppmm.S b/rts/gmp/mpn/pa64w/umul_ppmm.S new file mode 100644 index 0000000000..d9fb92be8c --- /dev/null +++ b/rts/gmp/mpn/pa64w/umul_ppmm.S @@ -0,0 +1,72 @@ +; Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Lesser General Public License as published by +; the Free Software Foundation; either version 2.1 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +; License for more details. + +; You should have received a copy of the GNU Lesser General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + +#define p0 %r28 +#define p1 %r29 +#define t32 %r19 +#define t0 %r20 +#define t1 %r21 +#define x %r22 +#define m0 %r23 +#define m1 %r24 + .level 2.0w + .code + .export __gmpn_umul_ppmm,entry +__gmpn_umul_ppmm + .proc + .callinfo frame=128,no_calls + .entry + ldo 128(%r30),%r30 + std %r26,-64(%r30) + std %r25,-56(%r30) + + copy %r24,%r31 + + fldd -64(%r30),%fr4 + fldd -56(%r30),%fr5 + + xmpyu %fr5R,%fr4R,%fr6 + fstd %fr6,-128(%r30) + xmpyu %fr5R,%fr4L,%fr7 + fstd %fr7,-120(%r30) + xmpyu %fr5L,%fr4R,%fr8 + fstd %fr8,-112(%r30) + xmpyu %fr5L,%fr4L,%fr9 + fstd %fr9,-104(%r30) + + depdi,z 1,31,1,t32 ; t32 = 2^32 + + ldd -128(%r30),p0 ; lo = low 64 bit of product + ldd -120(%r30),m0 ; m0 = mid0 64 bit of product + ldd -112(%r30),m1 ; m1 = mid1 64 bit of product + ldd -104(%r30),p1 ; hi = high 64 bit of product + + add,l,*nuv m0,m1,x ; x = m1+m0 + add,l t32,p1,p1 ; propagate carry to mid of p1 + depd,z x,31,32,t0 ; lo32(m1+m0) + add t0,p0,p0 + extrd,u x,31,32,t1 ; hi32(m1+m0) + add,dc t1,p1,p1 + + std p0,0(%r31) ; store low half of product + copy p1,%r28 ; return high half of product + bve (%r2) + .exit + ldo -128(%r30),%r30 + .procend diff --git a/rts/gmp/mpn/power/add_n.s b/rts/gmp/mpn/power/add_n.s new file mode 100644 index 0000000000..0f9f48f1cc --- /dev/null +++ b/rts/gmp/mpn/power/add_n.s @@ -0,0 +1,79 @@ +# IBM POWER __gmpn_add_n -- Add two limb vectors of equal, non-zero length. + +# Copyright (C) 1992, 1994, 1995, 1996, 1999, 2000 Free Software Foundation, +# Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# s2_ptr r5 +# size r6 + + .toc + .globl __gmpn_add_n + .globl .__gmpn_add_n + .csect __gmpn_add_n[DS] +__gmpn_add_n: + .long .__gmpn_add_n, TOC[tc0], 0 + .csect .text[PR] + .align 2 +.__gmpn_add_n: + andil. 10,6,1 # odd or even number of limbs? + l 8,0(4) # load least significant s1 limb + l 0,0(5) # load least significant s2 limb + cal 3,-4(3) # offset res_ptr, it's updated before it's used + sri 10,6,1 # count for unrolled loop + a 7,0,8 # add least significant limbs, set cy + mtctr 10 # copy count into CTR + beq 0,Leven # branch if even # of limbs (# of limbs >= 2) + +# We have an odd # of limbs. Add the first limbs separately. + cmpi 1,10,0 # is count for unrolled loop zero? + bc 4,6,L1 # bne cr1,L1 (misassembled by gas) + st 7,4(3) + aze 3,10 # use the fact that r10 is zero... + br # return + +# We added least significant limbs. Now reload the next limbs to enter loop. +L1: lu 8,4(4) # load s1 limb and update s1_ptr + lu 0,4(5) # load s2 limb and update s2_ptr + stu 7,4(3) + ae 7,0,8 # add limbs, set cy +Leven: lu 9,4(4) # load s1 limb and update s1_ptr + lu 10,4(5) # load s2 limb and update s2_ptr + bdz Lend # If done, skip loop + +Loop: lu 8,4(4) # load s1 limb and update s1_ptr + lu 0,4(5) # load s2 limb and update s2_ptr + ae 11,9,10 # add previous limbs with cy, set cy + stu 7,4(3) # + lu 9,4(4) # load s1 limb and update s1_ptr + lu 10,4(5) # load s2 limb and update s2_ptr + ae 7,0,8 # add previous limbs with cy, set cy + stu 11,4(3) # + bdn Loop # decrement CTR and loop back + +Lend: ae 11,9,10 # add limbs with cy, set cy + st 7,4(3) # + st 11,8(3) # + lil 3,0 # load cy into ... + aze 3,3 # ... return value register + br diff --git a/rts/gmp/mpn/power/addmul_1.s b/rts/gmp/mpn/power/addmul_1.s new file mode 100644 index 0000000000..8ecc651579 --- /dev/null +++ b/rts/gmp/mpn/power/addmul_1.s @@ -0,0 +1,122 @@ +# IBM POWER __gmpn_addmul_1 -- Multiply a limb vector with a limb and add +# the result to a second limb vector. + +# Copyright (C) 1992, 1994, 1999, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# size r5 +# s2_limb r6 + +# The POWER architecture has no unsigned 32x32->64 bit multiplication +# instruction. To obtain that operation, we have to use the 32x32->64 signed +# multiplication instruction, and add the appropriate compensation to the high +# limb of the result. We add the multiplicand if the multiplier has its most +# significant bit set, and we add the multiplier if the multiplicand has its +# most significant bit set. We need to preserve the carry flag between each +# iteration, so we have to compute the compensation carefully (the natural, +# srai+and doesn't work). Since the POWER architecture has a branch unit we +# can branch in zero cycles, so that's how we perform the additions. + + .toc + .globl __gmpn_addmul_1 + .globl .__gmpn_addmul_1 + .csect __gmpn_addmul_1[DS] +__gmpn_addmul_1: + .long .__gmpn_addmul_1, TOC[tc0], 0 + .csect .text[PR] + .align 2 +.__gmpn_addmul_1: + + cal 3,-4(3) + l 0,0(4) + cmpi 0,6,0 + mtctr 5 + mul 9,0,6 + srai 7,0,31 + and 7,7,6 + mfmq 8 + cax 9,9,7 + l 7,4(3) + a 8,8,7 # add res_limb + blt Lneg +Lpos: bdz Lend + +Lploop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + mfmq 0 + ae 8,0,9 # low limb + old_cy_limb + old cy + l 7,4(3) + aze 10,10 # propagate cy to new cy_limb + a 8,8,7 # add res_limb + bge Lp0 + cax 10,10,6 # adjust high limb for negative limb from s1 +Lp0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + mfmq 0 + ae 8,0,10 + l 7,4(3) + aze 9,9 + a 8,8,7 + bge Lp1 + cax 9,9,6 # adjust high limb for negative limb from s1 +Lp1: bdn Lploop + + b Lend + +Lneg: cax 9,9,0 + bdz Lend +Lnloop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + mfmq 7 + ae 8,7,9 + l 7,4(3) + ae 10,10,0 # propagate cy to new cy_limb + a 8,8,7 # add res_limb + bge Ln0 + cax 10,10,6 # adjust high limb for negative limb from s1 +Ln0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + mfmq 7 + ae 8,7,10 + l 7,4(3) + ae 9,9,0 # propagate cy to new cy_limb + a 8,8,7 # add res_limb + bge Ln1 + cax 9,9,6 # adjust high limb for negative limb from s1 +Ln1: bdn Lnloop + b Lend + +Lend0: cal 9,0(10) +Lend: st 8,4(3) + aze 3,9 + br diff --git a/rts/gmp/mpn/power/lshift.s b/rts/gmp/mpn/power/lshift.s new file mode 100644 index 0000000000..ab71fb7727 --- /dev/null +++ b/rts/gmp/mpn/power/lshift.s @@ -0,0 +1,56 @@ +# IBM POWER __gmpn_lshift -- + +# Copyright (C) 1992, 1994, 1999, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s_ptr r4 +# size r5 +# cnt r6 + + .toc + .globl __gmpn_lshift + .globl .__gmpn_lshift + .csect __gmpn_lshift[DS] +__gmpn_lshift: + .long .__gmpn_lshift, TOC[tc0], 0 + .csect .text[PR] + .align 2 +.__gmpn_lshift: + sli 0,5,2 + cax 9,3,0 + cax 4,4,0 + sfi 8,6,32 + mtctr 5 # put limb count in CTR loop register + lu 0,-4(4) # read most significant limb + sre 3,0,8 # compute carry out limb, and init MQ register + bdz Lend2 # if just one limb, skip loop + lu 0,-4(4) # read 2:nd most significant limb + sreq 7,0,8 # compute most significant limb of result + bdz Lend # if just two limb, skip loop +Loop: lu 0,-4(4) # load next lower limb + stu 7,-4(9) # store previous result during read latency + sreq 7,0,8 # compute result limb + bdn Loop # loop back until CTR is zero +Lend: stu 7,-4(9) # store 2:nd least significant limb +Lend2: sle 7,0,6 # compute least significant limb + st 7,-4(9) # store it" \ + br diff --git a/rts/gmp/mpn/power/mul_1.s b/rts/gmp/mpn/power/mul_1.s new file mode 100644 index 0000000000..4e08ade583 --- /dev/null +++ b/rts/gmp/mpn/power/mul_1.s @@ -0,0 +1,109 @@ +# IBM POWER __gmpn_mul_1 -- Multiply a limb vector with a limb and store +# the result in a second limb vector. + +# Copyright (C) 1992, 1994, 1999, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# size r5 +# s2_limb r6 + +# The POWER architecture has no unsigned 32x32->64 bit multiplication +# instruction. To obtain that operation, we have to use the 32x32->64 signed +# multiplication instruction, and add the appropriate compensation to the high +# limb of the result. We add the multiplicand if the multiplier has its most +# significant bit set, and we add the multiplier if the multiplicand has its +# most significant bit set. We need to preserve the carry flag between each +# iteration, so we have to compute the compensation carefully (the natural, +# srai+and doesn't work). Since the POWER architecture has a branch unit we +# can branch in zero cycles, so that's how we perform the additions. + + .toc + .globl __gmpn_mul_1 + .globl .__gmpn_mul_1 + .csect __gmpn_mul_1[DS] +__gmpn_mul_1: + .long .__gmpn_mul_1, TOC[tc0], 0 + .csect .text[PR] + .align 2 +.__gmpn_mul_1: + + cal 3,-4(3) + l 0,0(4) + cmpi 0,6,0 + mtctr 5 + mul 9,0,6 + srai 7,0,31 + and 7,7,6 + mfmq 8 + ai 0,0,0 # reset carry + cax 9,9,7 + blt Lneg +Lpos: bdz Lend +Lploop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + mfmq 0 + ae 8,0,9 + bge Lp0 + cax 10,10,6 # adjust high limb for negative limb from s1 +Lp0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + mfmq 0 + ae 8,0,10 + bge Lp1 + cax 9,9,6 # adjust high limb for negative limb from s1 +Lp1: bdn Lploop + b Lend + +Lneg: cax 9,9,0 + bdz Lend +Lnloop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + cax 10,10,0 # adjust high limb for negative s2_limb + mfmq 0 + ae 8,0,9 + bge Ln0 + cax 10,10,6 # adjust high limb for negative limb from s1 +Ln0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + cax 9,9,0 # adjust high limb for negative s2_limb + mfmq 0 + ae 8,0,10 + bge Ln1 + cax 9,9,6 # adjust high limb for negative limb from s1 +Ln1: bdn Lnloop + b Lend + +Lend0: cal 9,0(10) +Lend: st 8,4(3) + aze 3,9 + br diff --git a/rts/gmp/mpn/power/rshift.s b/rts/gmp/mpn/power/rshift.s new file mode 100644 index 0000000000..65b3945f8a --- /dev/null +++ b/rts/gmp/mpn/power/rshift.s @@ -0,0 +1,54 @@ +# IBM POWER __gmpn_rshift -- + +# Copyright (C) 1992, 1994, 1999, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s_ptr r4 +# size r5 +# cnt r6 + + .toc + .globl __gmpn_rshift + .globl .__gmpn_rshift + .csect __gmpn_rshift[DS] +__gmpn_rshift: + .long .__gmpn_rshift, TOC[tc0], 0 + .csect .text[PR] + .align 2 +.__gmpn_rshift: + sfi 8,6,32 + mtctr 5 # put limb count in CTR loop register + l 0,0(4) # read least significant limb + ai 9,3,-4 # adjust res_ptr since it's offset in the stu:s + sle 3,0,8 # compute carry limb, and init MQ register + bdz Lend2 # if just one limb, skip loop + lu 0,4(4) # read 2:nd least significant limb + sleq 7,0,8 # compute least significant limb of result + bdz Lend # if just two limb, skip loop +Loop: lu 0,4(4) # load next higher limb + stu 7,4(9) # store previous result during read latency + sleq 7,0,8 # compute result limb + bdn Loop # loop back until CTR is zero +Lend: stu 7,4(9) # store 2:nd most significant limb +Lend2: sre 7,0,6 # compute most significant limb + st 7,4(9) # store it" \ + br diff --git a/rts/gmp/mpn/power/sdiv.s b/rts/gmp/mpn/power/sdiv.s new file mode 100644 index 0000000000..81da622fbc --- /dev/null +++ b/rts/gmp/mpn/power/sdiv.s @@ -0,0 +1,34 @@ +# Copyright (C) 1999 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + .toc + .globl __sdiv_qrnnd + .globl .__sdiv_qrnnd + .csect __sdiv_qrnnd[DS] +__sdiv_qrnnd: + .long .__sdiv_qrnnd, TOC[tc0], 0 + .csect .text[PR] + .align 2 +.__sdiv_qrnnd: + mtmq 5 + div 0,4,6 + mfmq 9 + st 9,0(3) + mr 3,0 + br diff --git a/rts/gmp/mpn/power/sub_n.s b/rts/gmp/mpn/power/sub_n.s new file mode 100644 index 0000000000..aa09cf5bc1 --- /dev/null +++ b/rts/gmp/mpn/power/sub_n.s @@ -0,0 +1,80 @@ +# IBM POWER __gmpn_sub_n -- Subtract two limb vectors of equal, non-zero length. + +# Copyright (C) 1992, 1994, 1995, 1996, 1999, 2000 Free Software Foundation, +# Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# s2_ptr r5 +# size r6 + + .toc + .globl __gmpn_sub_n + .globl .__gmpn_sub_n + .csect __gmpn_sub_n[DS] +__gmpn_sub_n: + .long .__gmpn_sub_n, TOC[tc0], 0 + .csect .text[PR] + .align 2 +.__gmpn_sub_n: + andil. 10,6,1 # odd or even number of limbs? + l 8,0(4) # load least significant s1 limb + l 0,0(5) # load least significant s2 limb + cal 3,-4(3) # offset res_ptr, it's updated before it's used + sri 10,6,1 # count for unrolled loop + sf 7,0,8 # subtract least significant limbs, set cy + mtctr 10 # copy count into CTR + beq 0,Leven # branch if even # of limbs (# of limbs >= 2) + +# We have an odd # of limbs. Add the first limbs separately. + cmpi 1,10,0 # is count for unrolled loop zero? + bc 4,6,L1 # bne cr1,L1 (misassembled by gas) + st 7,4(3) + sfe 3,0,0 # load !cy into ... + sfi 3,3,0 # ... return value register + br # return + +# We added least significant limbs. Now reload the next limbs to enter loop. +L1: lu 8,4(4) # load s1 limb and update s1_ptr + lu 0,4(5) # load s2 limb and update s2_ptr + stu 7,4(3) + sfe 7,0,8 # subtract limbs, set cy +Leven: lu 9,4(4) # load s1 limb and update s1_ptr + lu 10,4(5) # load s2 limb and update s2_ptr + bdz Lend # If done, skip loop + +Loop: lu 8,4(4) # load s1 limb and update s1_ptr + lu 0,4(5) # load s2 limb and update s2_ptr + sfe 11,10,9 # subtract previous limbs with cy, set cy + stu 7,4(3) # + lu 9,4(4) # load s1 limb and update s1_ptr + lu 10,4(5) # load s2 limb and update s2_ptr + sfe 7,0,8 # subtract previous limbs with cy, set cy + stu 11,4(3) # + bdn Loop # decrement CTR and loop back + +Lend: sfe 11,10,9 # subtract limbs with cy, set cy + st 7,4(3) # + st 11,8(3) # + sfe 3,0,0 # load !cy into ... + sfi 3,3,0 # ... return value register + br diff --git a/rts/gmp/mpn/power/submul_1.s b/rts/gmp/mpn/power/submul_1.s new file mode 100644 index 0000000000..bc01b7c95d --- /dev/null +++ b/rts/gmp/mpn/power/submul_1.s @@ -0,0 +1,127 @@ +# IBM POWER __gmpn_submul_1 -- Multiply a limb vector with a limb and subtract +# the result from a second limb vector. + +# Copyright (C) 1992, 1994, 1999, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# size r5 +# s2_limb r6 + +# The POWER architecture has no unsigned 32x32->64 bit multiplication +# instruction. To obtain that operation, we have to use the 32x32->64 signed +# multiplication instruction, and add the appropriate compensation to the high +# limb of the result. We add the multiplicand if the multiplier has its most +# significant bit set, and we add the multiplier if the multiplicand has its +# most significant bit set. We need to preserve the carry flag between each +# iteration, so we have to compute the compensation carefully (the natural, +# srai+and doesn't work). Since the POWER architecture has a branch unit we +# can branch in zero cycles, so that's how we perform the additions. + + .toc + .globl __gmpn_submul_1 + .globl .__gmpn_submul_1 + .csect __gmpn_submul_1[DS] +__gmpn_submul_1: + .long .__gmpn_submul_1, TOC[tc0], 0 + .csect .text[PR] + .align 2 +.__gmpn_submul_1: + + cal 3,-4(3) + l 0,0(4) + cmpi 0,6,0 + mtctr 5 + mul 9,0,6 + srai 7,0,31 + and 7,7,6 + mfmq 11 + cax 9,9,7 + l 7,4(3) + sf 8,11,7 # add res_limb + a 11,8,11 # invert cy (r11 is junk) + blt Lneg +Lpos: bdz Lend + +Lploop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + mfmq 0 + ae 11,0,9 # low limb + old_cy_limb + old cy + l 7,4(3) + aze 10,10 # propagate cy to new cy_limb + sf 8,11,7 # add res_limb + a 11,8,11 # invert cy (r11 is junk) + bge Lp0 + cax 10,10,6 # adjust high limb for negative limb from s1 +Lp0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + mfmq 0 + ae 11,0,10 + l 7,4(3) + aze 9,9 + sf 8,11,7 + a 11,8,11 # invert cy (r11 is junk) + bge Lp1 + cax 9,9,6 # adjust high limb for negative limb from s1 +Lp1: bdn Lploop + + b Lend + +Lneg: cax 9,9,0 + bdz Lend +Lnloop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + mfmq 7 + ae 11,7,9 + l 7,4(3) + ae 10,10,0 # propagate cy to new cy_limb + sf 8,11,7 # add res_limb + a 11,8,11 # invert cy (r11 is junk) + bge Ln0 + cax 10,10,6 # adjust high limb for negative limb from s1 +Ln0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + mfmq 7 + ae 11,7,10 + l 7,4(3) + ae 9,9,0 # propagate cy to new cy_limb + sf 8,11,7 # add res_limb + a 11,8,11 # invert cy (r11 is junk) + bge Ln1 + cax 9,9,6 # adjust high limb for negative limb from s1 +Ln1: bdn Lnloop + b Lend + +Lend0: cal 9,0(10) +Lend: st 8,4(3) + aze 3,9 + br diff --git a/rts/gmp/mpn/power/umul.s b/rts/gmp/mpn/power/umul.s new file mode 100644 index 0000000000..8c77496380 --- /dev/null +++ b/rts/gmp/mpn/power/umul.s @@ -0,0 +1,38 @@ +# Copyright (C) 1999 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + .toc + .globl __umul_ppmm + .globl .__umul_ppmm + .csect __umul_ppmm[DS] +__umul_ppmm: + .long .__umul_ppmm, TOC[tc0], 0 + .csect .text[PR] + .align 2 +.__umul_ppmm: + mul 9,4,5 + srai 0,4,31 + and 0,0,5 + srai 5,5,31 + and 5,5,4 + cax 0,0,5 + mfmq 11 + st 11,0(3) + cax 3,9,0 + br diff --git a/rts/gmp/mpn/powerpc32/add_n.asm b/rts/gmp/mpn/powerpc32/add_n.asm new file mode 100644 index 0000000000..81ed04b162 --- /dev/null +++ b/rts/gmp/mpn/powerpc32/add_n.asm @@ -0,0 +1,61 @@ +dnl PowerPC-32 mpn_add_n -- Add two limb vectors of the same length > 0 and +dnl store sum in a third limb vector. + +dnl Copyright (C) 1995, 1997, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published by +dnl the Free Software Foundation; either version 2.1 of the License, or (at your +dnl option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + +dnl INPUT PARAMETERS +dnl res_ptr r3 +dnl s1_ptr r4 +dnl s2_ptr r5 +dnl size r6 + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_add_n) + mtctr r6 C copy size into CTR + addic r0,r0,0 C clear cy + lwz r8,0(r4) C load least significant s1 limb + lwz r0,0(r5) C load least significant s2 limb + addi r3,r3,-4 C offset res_ptr, it's updated before it's used + bdz .Lend C If done, skip loop +.Loop: lwz r9,4(r4) C load s1 limb + lwz r10,4(r5) C load s2 limb + adde r7,r0,r8 C add limbs with cy, set cy + stw r7,4(r3) C store result limb + bdz .Lexit C decrement CTR and exit if done + lwzu r8,8(r4) C load s1 limb and update s1_ptr + lwzu r0,8(r5) C load s2 limb and update s2_ptr + adde r7,r10,r9 C add limbs with cy, set cy + stwu r7,8(r3) C store result limb and update res_ptr + bdnz .Loop C decrement CTR and loop back + +.Lend: adde r7,r0,r8 + stw r7,4(r3) C store ultimate result limb + li r3,0 C load cy into ... + addze r3,r3 C ... return value register + blr +.Lexit: adde r7,r10,r9 + stw r7,8(r3) + li r3,0 C load cy into ... + addze r3,r3 C ... return value register + blr +EPILOGUE(mpn_add_n) diff --git a/rts/gmp/mpn/powerpc32/addmul_1.asm b/rts/gmp/mpn/powerpc32/addmul_1.asm new file mode 100644 index 0000000000..3ef75b1532 --- /dev/null +++ b/rts/gmp/mpn/powerpc32/addmul_1.asm @@ -0,0 +1,124 @@ +dnl PowerPC-32 mpn_addmul_1 -- Multiply a limb vector with a limb and add +dnl the result to a second limb vector. + +dnl Copyright (C) 1995, 1997, 1998, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published by +dnl the Free Software Foundation; either version 2.1 of the License, or (at your +dnl option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + +dnl INPUT PARAMETERS +dnl res_ptr r3 +dnl s1_ptr r4 +dnl size r5 +dnl s2_limb r6 + +dnl This is optimized for the PPC604. It has not been tested on PPC601, PPC603 +dnl or PPC750 since I don't have access to any such machines. + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_addmul_1) + cmpi cr0,r5,9 C more than 9 limbs? + bgt cr0,.Lbig C branch if more than 9 limbs + + mtctr r5 + lwz r0,0(r4) + mullw r7,r0,r6 + mulhwu r10,r0,r6 + lwz r9,0(r3) + addc r8,r7,r9 + addi r3,r3,-4 + bdz .Lend +.Lloop: + lwzu r0,4(r4) + stwu r8,4(r3) + mullw r8,r0,r6 + adde r7,r8,r10 + mulhwu r10,r0,r6 + lwz r9,4(r3) + addze r10,r10 + addc r8,r7,r9 + bdnz .Lloop +.Lend: stw r8,4(r3) + addze r3,r10 + blr + +.Lbig: stmw r30,-32(r1) + addi r5,r5,-1 + srwi r0,r5,2 + mtctr r0 + + lwz r7,0(r4) + mullw r8,r7,r6 + mulhwu r0,r7,r6 + lwz r7,0(r3) + addc r8,r8,r7 + stw r8,0(r3) + +.LloopU: + lwz r7,4(r4) + lwz r12,8(r4) + lwz r30,12(r4) + lwzu r31,16(r4) + mullw r8,r7,r6 + mullw r9,r12,r6 + mullw r10,r30,r6 + mullw r11,r31,r6 + adde r8,r8,r0 C add cy_limb + mulhwu r0,r7,r6 + lwz r7,4(r3) + adde r9,r9,r0 + mulhwu r0,r12,r6 + lwz r12,8(r3) + adde r10,r10,r0 + mulhwu r0,r30,r6 + lwz r30,12(r3) + adde r11,r11,r0 + mulhwu r0,r31,r6 + lwz r31,16(r3) + addze r0,r0 C new cy_limb + addc r8,r8,r7 + stw r8,4(r3) + adde r9,r9,r12 + stw r9,8(r3) + adde r10,r10,r30 + stw r10,12(r3) + adde r11,r11,r31 + stwu r11,16(r3) + bdnz .LloopU + + andi. r31,r5,3 + mtctr r31 + beq cr0,.Lendx + +.LloopE: + lwzu r7,4(r4) + mullw r8,r7,r6 + adde r8,r8,r0 C add cy_limb + mulhwu r0,r7,r6 + lwz r7,4(r3) + addze r0,r0 C new cy_limb + addc r8,r8,r7 + stwu r8,4(r3) + bdnz .LloopE +.Lendx: + addze r3,r0 + lmw r30,-32(r1) + blr +EPILOGUE(mpn_addmul_1) diff --git a/rts/gmp/mpn/powerpc32/aix.m4 b/rts/gmp/mpn/powerpc32/aix.m4 new file mode 100644 index 0000000000..2bd8425817 --- /dev/null +++ b/rts/gmp/mpn/powerpc32/aix.m4 @@ -0,0 +1,39 @@ +divert(-1) +dnl m4 macros for AIX 32-bit assembly. + +dnl Copyright (C) 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + +define(`ASM_START', + `.toc') + +define(`PROLOGUE', + ` + .globl $1 + .globl .$1 + .csect $1[DS],2 +$1: + .long .$1, TOC[tc0], 0 + .csect .text[PR] + .align 2 +.$1:') + +define(`EPILOGUE', `') + +divert diff --git a/rts/gmp/mpn/powerpc32/gmp-mparam.h b/rts/gmp/mpn/powerpc32/gmp-mparam.h new file mode 100644 index 0000000000..b283185789 --- /dev/null +++ b/rts/gmp/mpn/powerpc32/gmp-mparam.h @@ -0,0 +1,66 @@ +/* gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#define BITS_PER_MP_LIMB 32 +#define BYTES_PER_MP_LIMB 4 +#define BITS_PER_LONGINT 32 +#define BITS_PER_INT 32 +#define BITS_PER_SHORTINT 16 +#define BITS_PER_CHAR 8 + +/* These values are for the 604. Presumably, these should be considerably + different for the 603 and 750 that have much slower multiply + instructions. */ + +/* Generated by tuneup.c, 2000-05-26. */ + +#ifndef KARATSUBA_MUL_THRESHOLD +#define KARATSUBA_MUL_THRESHOLD 26 /* tuneup says 20 */ +#endif +#ifndef TOOM3_MUL_THRESHOLD +#define TOOM3_MUL_THRESHOLD 228 +#endif + +#ifndef KARATSUBA_SQR_THRESHOLD +#define KARATSUBA_SQR_THRESHOLD 46 /* tuneup says 44 */ +#endif +#ifndef TOOM3_SQR_THRESHOLD +#define TOOM3_SQR_THRESHOLD 262 +#endif + +#ifndef BZ_THRESHOLD +#define BZ_THRESHOLD 52 +#endif + +#ifndef FIB_THRESHOLD +#define FIB_THRESHOLD 86 +#endif + +#ifndef POWM_THRESHOLD +#define POWM_THRESHOLD 23 +#endif + +#ifndef GCD_ACCEL_THRESHOLD +#define GCD_ACCEL_THRESHOLD 7 +#endif +#ifndef GCDEXT_THRESHOLD +#define GCDEXT_THRESHOLD 53 +#endif diff --git a/rts/gmp/mpn/powerpc32/lshift.asm b/rts/gmp/mpn/powerpc32/lshift.asm new file mode 100644 index 0000000000..73a85430ab --- /dev/null +++ b/rts/gmp/mpn/powerpc32/lshift.asm @@ -0,0 +1,145 @@ +dnl PowerPC-32 mpn_lshift -- Shift a number left. + +dnl Copyright (C) 1995, 1998, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published by +dnl the Free Software Foundation; either version 2.1 of the License, or (at your +dnl option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + +dnl INPUT PARAMETERS +dnl res_ptr r3 +dnl s1_ptr r4 +dnl size r5 +dnl cnt r6 + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_lshift) + cmpi cr0,r5,12 C more than 12 limbs? + slwi r0,r5,2 + add r4,r4,r0 C make r4 point at end of s1 + add r7,r3,r0 C make r7 point at end of res + bgt .LBIG C branch if more than 12 limbs + + mtctr r5 C copy size into CTR + subfic r8,r6,32 + lwzu r11,-4(r4) C load first s1 limb + srw r3,r11,r8 C compute function return value + bdz .Lend1 + +.Loop: lwzu r10,-4(r4) + slw r9,r11,r6 + srw r12,r10,r8 + or r9,r9,r12 + stwu r9,-4(r7) + bdz .Lend2 + lwzu r11,-4(r4) + slw r9,r10,r6 + srw r12,r11,r8 + or r9,r9,r12 + stwu r9,-4(r7) + bdnz .Loop + +.Lend1: slw r0,r11,r6 + stw r0,-4(r7) + blr +.Lend2: slw r0,r10,r6 + stw r0,-4(r7) + blr + +.LBIG: + stmw r24,-32(r1) C save registers we are supposed to preserve + lwzu r9,-4(r4) + subfic r8,r6,32 + srw r3,r9,r8 C compute function return value + slw r0,r9,r6 + addi r5,r5,-1 + + andi. r10,r5,3 C count for spill loop + beq .Le + mtctr r10 + lwzu r28,-4(r4) + bdz .Lxe0 + +.Loop0: slw r12,r28,r6 + srw r24,r28,r8 + lwzu r28,-4(r4) + or r24,r0,r24 + stwu r24,-4(r7) + mr r0,r12 + bdnz .Loop0 C taken at most once! + +.Lxe0: slw r12,r28,r6 + srw r24,r28,r8 + or r24,r0,r24 + stwu r24,-4(r7) + mr r0,r12 + +.Le: srwi r5,r5,2 C count for unrolled loop + addi r5,r5,-1 + mtctr r5 + lwz r28,-4(r4) + lwz r29,-8(r4) + lwz r30,-12(r4) + lwzu r31,-16(r4) + +.LoopU: slw r9,r28,r6 + srw r24,r28,r8 + lwz r28,-4(r4) + slw r10,r29,r6 + srw r25,r29,r8 + lwz r29,-8(r4) + slw r11,r30,r6 + srw r26,r30,r8 + lwz r30,-12(r4) + slw r12,r31,r6 + srw r27,r31,r8 + lwzu r31,-16(r4) + or r24,r0,r24 + stw r24,-4(r7) + or r25,r9,r25 + stw r25,-8(r7) + or r26,r10,r26 + stw r26,-12(r7) + or r27,r11,r27 + stwu r27,-16(r7) + mr r0,r12 + bdnz .LoopU + + slw r9,r28,r6 + srw r24,r28,r8 + slw r10,r29,r6 + srw r25,r29,r8 + slw r11,r30,r6 + srw r26,r30,r8 + slw r12,r31,r6 + srw r27,r31,r8 + or r24,r0,r24 + stw r24,-4(r7) + or r25,r9,r25 + stw r25,-8(r7) + or r26,r10,r26 + stw r26,-12(r7) + or r27,r11,r27 + stwu r27,-16(r7) + mr r0,r12 + + stw r0,-4(r7) + lmw r24,-32(r1) C restore registers + blr +EPILOGUE(mpn_lshift) diff --git a/rts/gmp/mpn/powerpc32/mul_1.asm b/rts/gmp/mpn/powerpc32/mul_1.asm new file mode 100644 index 0000000000..ec878b54d5 --- /dev/null +++ b/rts/gmp/mpn/powerpc32/mul_1.asm @@ -0,0 +1,86 @@ +dnl PowerPC-32 mpn_mul_1 -- Multiply a limb vector with a limb and store +dnl the result in a second limb vector. + +dnl Copyright (C) 1995, 1997, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published by +dnl the Free Software Foundation; either version 2.1 of the License, or (at your +dnl option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + +dnl INPUT PARAMETERS +dnl res_ptr r3 +dnl s1_ptr r4 +dnl size r5 +dnl s2_limb r6 + +dnl This is optimized for the PPC604 but it runs decently even on PPC601. It +dnl has not been tested on a PPC603 since I don't have access to any such +dnl machines. + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_mul_1) + mtctr r5 + addi r3,r3,-4 C adjust res_ptr, it's offset before it's used + li r12,0 C clear upper product reg + addic r0,r0,0 C clear cy +C Start software pipeline + lwz r8,0(r4) + bdz .Lend3 + stmw r30,-8(r1) C save registers we are supposed to preserve + lwzu r9,4(r4) + mullw r11,r8,r6 + mulhwu r0,r8,r6 + bdz .Lend1 +C Software pipelined main loop +.Loop: lwz r8,4(r4) + mullw r10,r9,r6 + adde r30,r11,r12 + mulhwu r12,r9,r6 + stw r30,4(r3) + bdz .Lend2 + lwzu r9,8(r4) + mullw r11,r8,r6 + adde r31,r10,r0 + mulhwu r0,r8,r6 + stwu r31,8(r3) + bdnz .Loop +C Finish software pipeline +.Lend1: mullw r10,r9,r6 + adde r30,r11,r12 + mulhwu r12,r9,r6 + stw r30,4(r3) + adde r31,r10,r0 + stwu r31,8(r3) + addze r3,r12 + lmw r30,-8(r1) C restore registers from stack + blr +.Lend2: mullw r11,r8,r6 + adde r31,r10,r0 + mulhwu r0,r8,r6 + stwu r31,8(r3) + adde r30,r11,r12 + stw r30,4(r3) + addze r3,r0 + lmw r30,-8(r1) C restore registers from stack + blr +.Lend3: mullw r11,r8,r6 + stw r11,4(r3) + mulhwu r3,r8,r6 + blr +EPILOGUE(mpn_mul_1) diff --git a/rts/gmp/mpn/powerpc32/regmap.m4 b/rts/gmp/mpn/powerpc32/regmap.m4 new file mode 100644 index 0000000000..978f18902a --- /dev/null +++ b/rts/gmp/mpn/powerpc32/regmap.m4 @@ -0,0 +1,34 @@ +divert(-1) + +dnl Copyright (C) 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +dnl Map register names r0, r1, etc, to just `0', `1', etc. +dnl This is needed on all systems but NeXT, Rhapsody, and MacOS-X +forloop(i,0,31, +`define(`r'i,i)' +) + +dnl Likewise for cr0, cr1, etc. +forloop(i,0,7, +`define(`cr'i,i)' +) + +divert diff --git a/rts/gmp/mpn/powerpc32/rshift.asm b/rts/gmp/mpn/powerpc32/rshift.asm new file mode 100644 index 0000000000..a09ba04938 --- /dev/null +++ b/rts/gmp/mpn/powerpc32/rshift.asm @@ -0,0 +1,60 @@ +dnl PowerPC-32 mpn_rshift -- Shift a number right. + +dnl Copyright (C) 1995, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published by +dnl the Free Software Foundation; either version 2.1 of the License, or (at your +dnl option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + +dnl INPUT PARAMETERS +dnl res_ptr r3 +dnl s1_ptr r4 +dnl size r5 +dnl cnt r6 + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_rshift) + mtctr r5 C copy size into CTR + addi r7,r3,-4 C move adjusted res_ptr to free return reg + subfic r8,r6,32 + lwz r11,0(r4) C load first s1 limb + slw r3,r11,r8 C compute function return value + bdz .Lend1 + +.Loop: lwzu r10,4(r4) + srw r9,r11,r6 + slw r12,r10,r8 + or r9,r9,r12 + stwu r9,4(r7) + bdz .Lend2 + lwzu r11,4(r4) + srw r9,r10,r6 + slw r12,r11,r8 + or r9,r9,r12 + stwu r9,4(r7) + bdnz .Loop + +.Lend1: srw r0,r11,r6 + stw r0,4(r7) + blr + +.Lend2: srw r0,r10,r6 + stw r0,4(r7) + blr +EPILOGUE(mpn_rshift) diff --git a/rts/gmp/mpn/powerpc32/sub_n.asm b/rts/gmp/mpn/powerpc32/sub_n.asm new file mode 100644 index 0000000000..b04b4192ef --- /dev/null +++ b/rts/gmp/mpn/powerpc32/sub_n.asm @@ -0,0 +1,61 @@ +dnl PowerPC-32 mpn_sub_n -- Subtract two limb vectors of the same length > 0 +dnl and store difference in a third limb vector. + +dnl Copyright (C) 1995, 1997, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published by +dnl the Free Software Foundation; either version 2.1 of the License, or (at your +dnl option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + +dnl INPUT PARAMETERS +dnl res_ptr r3 +dnl s1_ptr r4 +dnl s2_ptr r5 +dnl size r6 + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_sub_n) + mtctr r6 C copy size into CTR + addic r0,r6,-1 C set cy + lwz r8,0(r4) C load least significant s1 limb + lwz r0,0(r5) C load least significant s2 limb + addi r3,r3,-4 C offset res_ptr, it's updated before it's used + bdz .Lend C If done, skip loop +.Loop: lwz r9,4(r4) C load s1 limb + lwz r10,4(r5) C load s2 limb + subfe r7,r0,r8 C subtract limbs with cy, set cy + stw r7,4(r3) C store result limb + bdz .Lexit C decrement CTR and exit if done + lwzu r8,8(r4) C load s1 limb and update s1_ptr + lwzu r0,8(r5) C load s2 limb and update s2_ptr + subfe r7,r10,r9 C subtract limbs with cy, set cy + stwu r7,8(r3) C store result limb and update res_ptr + bdnz .Loop C decrement CTR and loop back + +.Lend: subfe r7,r0,r8 + stw r7,4(r3) C store ultimate result limb + subfe r3,r0,r0 C load !cy into ... + subfic r3,r3,0 C ... return value register + blr +.Lexit: subfe r7,r10,r9 + stw r7,8(r3) + subfe r3,r0,r0 C load !cy into ... + subfic r3,r3,0 C ... return value register + blr +EPILOGUE(mpn_sub_n) diff --git a/rts/gmp/mpn/powerpc32/submul_1.asm b/rts/gmp/mpn/powerpc32/submul_1.asm new file mode 100644 index 0000000000..a129e9f9ea --- /dev/null +++ b/rts/gmp/mpn/powerpc32/submul_1.asm @@ -0,0 +1,130 @@ +dnl PowerPC-32 mpn_submul_1 -- Multiply a limb vector with a limb and subtract +dnl the result from a second limb vector. + +dnl Copyright (C) 1995, 1997, 1998, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published by +dnl the Free Software Foundation; either version 2.1 of the License, or (at your +dnl option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + +dnl INPUT PARAMETERS +dnl res_ptr r3 +dnl s1_ptr r4 +dnl size r5 +dnl s2_limb r6 + +dnl This is optimized for the PPC604. It has not been tested on PPC601, PPC603 +dnl or PPC750 since I don't have access to any such machines. + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_submul_1) + cmpi cr0,r5,9 C more than 9 limbs? + bgt cr0,.Lbig C branch if more than 9 limbs + + mtctr r5 + lwz r0,0(r4) + mullw r7,r0,r6 + mulhwu r10,r0,r6 + lwz r9,0(r3) + subfc r8,r7,r9 + addc r7,r7,r8 C invert cy (r7 is junk) + addi r3,r3,-4 + bdz .Lend +.Lloop: + lwzu r0,4(r4) + stwu r8,4(r3) + mullw r8,r0,r6 + adde r7,r8,r10 + mulhwu r10,r0,r6 + lwz r9,4(r3) + addze r10,r10 + subfc r8,r7,r9 + addc r7,r7,r8 C invert cy (r7 is junk) + bdnz .Lloop +.Lend: stw r8,4(r3) + addze r3,r10 + blr + +.Lbig: stmw r30,-32(r1) + addi r5,r5,-1 + srwi r0,r5,2 + mtctr r0 + + lwz r7,0(r4) + mullw r8,r7,r6 + mulhwu r0,r7,r6 + lwz r7,0(r3) + subfc r7,r8,r7 + addc r8,r8,r7 + stw r7,0(r3) + +.LloopU: + lwz r7,4(r4) + lwz r12,8(r4) + lwz r30,12(r4) + lwzu r31,16(r4) + mullw r8,r7,r6 + mullw r9,r12,r6 + mullw r10,r30,r6 + mullw r11,r31,r6 + adde r8,r8,r0 C add cy_limb + mulhwu r0,r7,r6 + lwz r7,4(r3) + adde r9,r9,r0 + mulhwu r0,r12,r6 + lwz r12,8(r3) + adde r10,r10,r0 + mulhwu r0,r30,r6 + lwz r30,12(r3) + adde r11,r11,r0 + mulhwu r0,r31,r6 + lwz r31,16(r3) + addze r0,r0 C new cy_limb + subfc r7,r8,r7 + stw r7,4(r3) + subfe r12,r9,r12 + stw r12,8(r3) + subfe r30,r10,r30 + stw r30,12(r3) + subfe r31,r11,r31 + stwu r31,16(r3) + subfe r11,r11,r11 C invert ... + addic r11,r11,1 C ... carry + bdnz .LloopU + + andi. r31,r5,3 + mtctr r31 + beq cr0,.Lendx + +.LloopE: + lwzu r7,4(r4) + mullw r8,r7,r6 + adde r8,r8,r0 C add cy_limb + mulhwu r0,r7,r6 + lwz r7,4(r3) + addze r0,r0 C new cy_limb + subfc r7,r8,r7 + addc r8,r8,r7 + stwu r7,4(r3) + bdnz .LloopE +.Lendx: + addze r3,r0 + lmw r30,-32(r1) + blr +EPILOGUE(mpn_submul_1) diff --git a/rts/gmp/mpn/powerpc32/umul.asm b/rts/gmp/mpn/powerpc32/umul.asm new file mode 100644 index 0000000000..eeaa0a4dc8 --- /dev/null +++ b/rts/gmp/mpn/powerpc32/umul.asm @@ -0,0 +1,32 @@ +dnl PowerPC-32 umul_ppmm -- support for longlong.h + +dnl Copyright (C) 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published by +dnl the Free Software Foundation; either version 2.1 of the License, or (at your +dnl option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_umul_ppmm) + mullw 0,4,5 + mulhwu 9,4,5 + stw 0,0(3) + mr 3,9 + blr +EPILOGUE(mpn_umul_ppmm) diff --git a/rts/gmp/mpn/powerpc64/README b/rts/gmp/mpn/powerpc64/README new file mode 100644 index 0000000000..c779276917 --- /dev/null +++ b/rts/gmp/mpn/powerpc64/README @@ -0,0 +1,36 @@ +PPC630 (aka Power3) pipeline information: + +Decoding is 4-way and issue is 8-way with some out-of-order capability. +LS1 - ld/st unit 1 +LS2 - ld/st unit 2 +FXU1 - integer unit 1, handles any simple integer instructions +FXU2 - integer unit 2, handles any simple integer instructions +FXU3 - integer unit 3, handles integer multiply and divide +FPU1 - floating-point unit 1 +FPU2 - floating-point unit 2 + +Memory: Any two memory operations can issue, but memory subsystem + can sustain just one store per cycle. +Simple integer: 2 operations (such as add, rl*) +Integer multiply: 1 operation every 9th cycle worst case; exact timing depends + on 2nd operand most significant bit position (10 bits per + cycle). Multiply unit is not pipelined, only one multiply + operation in progress is allowed. +Integer divide: ? +Floating-point: Any plain 2 arithmetic instructions (such as fmul, fadd, fmadd) + Latency = 4. +Floating-point divide: + ? +Floating-point square root: + ? + +Best possible times for the main loops: +shift: 1.5 cycles limited by integer unit contention. + With 63 special loops, one for each shift count, we could + reduce the needed integer instructions to 2, which would + reduce the best possible time to 1 cycle. +add/sub: 1.5 cycles, limited by ld/st unit contention. +mul: 18 cycles (average) unless floating-point operations are used, + but that would only help for multiplies of perhaps 10 and more + limbs. +addmul/submul:Same situation as for mul. diff --git a/rts/gmp/mpn/powerpc64/add_n.asm b/rts/gmp/mpn/powerpc64/add_n.asm new file mode 100644 index 0000000000..c3325376dc --- /dev/null +++ b/rts/gmp/mpn/powerpc64/add_n.asm @@ -0,0 +1,61 @@ +# PowerPC-64 mpn_add_n -- Add two limb vectors of the same length > 0 and +# store sum in a third limb vector. + +# Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# s2_ptr r5 +# size r6 + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_add_n) + mtctr r6 # copy size into CTR + addic r0,r0,0 # clear cy + ld r8,0(r4) # load least significant s1 limb + ld r0,0(r5) # load least significant s2 limb + addi r3,r3,-8 # offset res_ptr, it's updated before it's used + bdz .Lend # If done, skip loop +.Loop: ld r9,8(r4) # load s1 limb + ld r10,8(r5) # load s2 limb + adde r7,r0,r8 # add limbs with cy, set cy + std r7,8(r3) # store result limb + bdz .Lexit # decrement CTR and exit if done + ldu r8,16(r4) # load s1 limb and update s1_ptr + ldu r0,16(r5) # load s2 limb and update s2_ptr + adde r7,r10,r9 # add limbs with cy, set cy + stdu r7,16(r3) # store result limb and update res_ptr + bdnz .Loop # decrement CTR and loop back + +.Lend: adde r7,r0,r8 + std r7,8(r3) # store ultimate result limb + li r3,0 # load cy into ... + addze r3,r3 # ... return value register + blr +.Lexit: adde r7,r10,r9 + std r7,16(r3) + li r3,0 # load cy into ... + addze r3,r3 # ... return value register + blr +EPILOGUE(mpn_add_n) diff --git a/rts/gmp/mpn/powerpc64/addmul_1.asm b/rts/gmp/mpn/powerpc64/addmul_1.asm new file mode 100644 index 0000000000..81774482fe --- /dev/null +++ b/rts/gmp/mpn/powerpc64/addmul_1.asm @@ -0,0 +1,52 @@ +# PowerPC-64 mpn_addmul_1 -- Multiply a limb vector with a limb and add +# the result to a second limb vector. + +# Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# size r5 +# s2_limb r6 + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_addmul_1) + mtctr 5 + li 9,0 # cy_limb = 0 + addic 0,0,0 + cal 3,-8(3) + cal 4,-8(4) +.Loop: + ldu 0,8(4) + ld 10,8(3) + mulld 7,0,6 + adde 7,7,9 + mulhdu 9,0,6 + addze 9,9 + addc 7,7,10 + stdu 7,8(3) + bdnz .Loop + + addze 3,9 + blr +EPILOGUE(mpn_addmul_1) diff --git a/rts/gmp/mpn/powerpc64/addsub_n.asm b/rts/gmp/mpn/powerpc64/addsub_n.asm new file mode 100644 index 0000000000..4ed40d71ae --- /dev/null +++ b/rts/gmp/mpn/powerpc64/addsub_n.asm @@ -0,0 +1,107 @@ +# PowerPC-64 mpn_addsub_n -- Simultaneous add and sub. + +# Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# s2_ptr r5 +# size r6 + +include(`asm-syntax.m4') + +define(SAVE_BORROW_RESTORE_CARRY, + `sldi $1,$1,63 + adde $1,$1,$1') +define(SAVE_CARRY_RESTORE_BORROW, + `sldi $1,$1,63 + adde $1,$1,$1') + +# 19991117 + +# This is just crafted for testing some ideas, and verifying that we can make +# it run fast. It runs at 2.55 cycles/limb on the 630, which is very good. +# We should play a little with the schedule. No time has been spent on that. + +# To finish this, the loop warm up and cool down code needs to be written, +# and the result need to be tested. Also, the proper calling sequence should +# be used. + +# r1p r2p s1p s2p n +# Use reg r0, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12 + +ASM_START() +PROLOGUE(mpn_addsub_n) + std r14,-64(1) + std r15,-56(1) + std r16,-48(1) + std r17,-40(1) + std r18,-32(1) + std r19,-24(1) + + srdi r7,r7,2 + mtctr r7 # copy size into CTR + addic r0,r0,0 # clear cy + addi r3,r3,-8 # offset res_ptr, it's updated before it's used + addi r4,r4,-8 # offset res_ptr, it's updated before it's used + +.Loop: + adde r12,r8,r9 + std r12,8(r3) + adde r12,r10,r11 + std r12,16(r3) + + SAVE_CARRY_RESTORE_BORROW(r0) + + subfe r12,r8,r9 + std r12,8(r4) + ld r8,8(r5) # s1 L 1 + ld r9,8(r6) # s2 L 1 + subfe r12,r10,r11 + std r12,16(r4) + ld r10,16(r5) # s1 L 2 + ld r11,16(r6) # s2 L 2 +# pair ------------------------- + subfe r12,r14,r15 + std r12,24(r4) + subfe r12,r16,r17 + stdu r12,32(r4) + + SAVE_BORROW_RESTORE_CARRY(r0) + + adde r12,r14,r15 + std r12,24(r3) + ld r14,24(r5) # s1 L 3 + ld r15,24(r6) # s2 L 3 + adde r12,r16,r17 + stdu r12,32(r3) + ldu r16,32(r5) # s1 L 4 + ldu r17,32(r6) # s2 L 4 + bdnz .Loop + + ld r14,-64(1) + ld r15,-56(1) + ld r16,-48(1) + ld r17,-40(1) + ld r18,-32(1) + ld r19,-24(1) + blr +EPILOGUE(mpn_addsub_n) diff --git a/rts/gmp/mpn/powerpc64/aix.m4 b/rts/gmp/mpn/powerpc64/aix.m4 new file mode 100644 index 0000000000..aee9f1f97a --- /dev/null +++ b/rts/gmp/mpn/powerpc64/aix.m4 @@ -0,0 +1,40 @@ +divert(-1) +dnl m4 macros for AIX 64-bit assembly. + +dnl Copyright (C) 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + +define(`ASM_START', + `.machine "ppc64" + .toc') + +define(`PROLOGUE', + ` + .globl $1 + .globl .$1 + .csect $1[DS],3 +$1: + .llong .$1, TOC[tc0], 0 + .csect .text[PR] + .align 2 +.$1:') + +define(`EPILOGUE', `') + +divert diff --git a/rts/gmp/mpn/powerpc64/copyd.asm b/rts/gmp/mpn/powerpc64/copyd.asm new file mode 100644 index 0000000000..d06e8c25fd --- /dev/null +++ b/rts/gmp/mpn/powerpc64/copyd.asm @@ -0,0 +1,45 @@ +# PowerPC-64 mpn_copyd -- Copy a limb vector. + +# Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# rptr r3 +# sptr r4 +# n r5 + +include(`../config.m4') + +# Unrolling this analogous to sparc64/copyi.s doesn't help for any +# operand sizes. + +ASM_START() +PROLOGUE(mpn_copyd) + cmpdi cr0,r5,0 + mtctr r5 + sldi r5,r5,3 + add r4,r4,r5 + add r3,r3,r5 + beq cr0,.Lend +.Loop: ldu r0,-8(r4) + stdu r0,-8(r3) + bdnz .Loop +.Lend: blr +EPILOGUE(mpn_copyd) diff --git a/rts/gmp/mpn/powerpc64/copyi.asm b/rts/gmp/mpn/powerpc64/copyi.asm new file mode 100644 index 0000000000..a1bedc4c5b --- /dev/null +++ b/rts/gmp/mpn/powerpc64/copyi.asm @@ -0,0 +1,44 @@ +# PowerPC-64 mpn_copyi -- Copy a limb vector. + +# Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# rptr r3 +# sptr r4 +# n r5 + +include(`../config.m4') + +# Unrolling this analogous to sparc64/copyi.s doesn't help for any +# operand sizes. + +ASM_START() +PROLOGUE(mpn_copyi) + cmpdi cr0,r5,0 + mtctr r5 + addi r4,r4,-8 + addi r3,r3,-8 + beq cr0,.Lend +.Loop: ldu r0,8(r4) + stdu r0,8(r3) + bdnz .Loop +.Lend: blr +EPILOGUE(mpn_copyi) diff --git a/rts/gmp/mpn/powerpc64/gmp-mparam.h b/rts/gmp/mpn/powerpc64/gmp-mparam.h new file mode 100644 index 0000000000..6fefb960cd --- /dev/null +++ b/rts/gmp/mpn/powerpc64/gmp-mparam.h @@ -0,0 +1,62 @@ +/* gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright (C) 1991, 1993, 1994, 1995, 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#define BITS_PER_MP_LIMB 64 +#define BYTES_PER_MP_LIMB 8 +#define BITS_PER_LONGINT 64 +#define BITS_PER_INT 32 +#define BITS_PER_SHORTINT 16 +#define BITS_PER_CHAR 8 + +/* Generated by tuneup.c, 2000-07-16. */ + +#ifndef KARATSUBA_MUL_THRESHOLD +#define KARATSUBA_MUL_THRESHOLD 10 +#endif +#ifndef TOOM3_MUL_THRESHOLD +#define TOOM3_MUL_THRESHOLD 57 +#endif + +#ifndef KARATSUBA_SQR_THRESHOLD +#define KARATSUBA_SQR_THRESHOLD 16 +#endif +#ifndef TOOM3_SQR_THRESHOLD +#define TOOM3_SQR_THRESHOLD 89 +#endif + +#ifndef BZ_THRESHOLD +#define BZ_THRESHOLD 28 +#endif + +#ifndef FIB_THRESHOLD +#define FIB_THRESHOLD 216 +#endif + +#ifndef POWM_THRESHOLD +#define POWM_THRESHOLD 14 +#endif + +#ifndef GCD_ACCEL_THRESHOLD +#define GCD_ACCEL_THRESHOLD 6 +#endif +#ifndef GCDEXT_THRESHOLD +#define GCDEXT_THRESHOLD 163 +#endif diff --git a/rts/gmp/mpn/powerpc64/lshift.asm b/rts/gmp/mpn/powerpc64/lshift.asm new file mode 100644 index 0000000000..cef3a81fdd --- /dev/null +++ b/rts/gmp/mpn/powerpc64/lshift.asm @@ -0,0 +1,159 @@ +# PowerPC-64 mpn_lshift -- Shift a number left. + +# Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# size r5 +# cnt r6 + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_lshift) + cmpdi cr0,r5,20 # more than 20 limbs? + sldi r0,r5,3 + add r4,r4,r0 # make r4 point at end of s1 + add r7,r3,r0 # make r7 point at end of res + bgt .LBIG # branch if more than 12 limbs + + mtctr r5 # copy size into CTR + subfic r8,r6,64 + ldu r11,-8(r4) # load first s1 limb + srd r3,r11,r8 # compute function return value + bdz .Lend1 + +.Loop: ldu r10,-8(r4) + sld r9,r11,r6 + srd r12,r10,r8 + or r9,r9,r12 + stdu r9,-8(r7) + bdz .Lend2 + ldu r11,-8(r4) + sld r9,r10,r6 + srd r12,r11,r8 + or r9,r9,r12 + stdu r9,-8(r7) + bdnz .Loop + +.Lend1: sld r0,r11,r6 + std r0,-8(r7) + blr +.Lend2: sld r0,r10,r6 + std r0,-8(r7) + blr + +.LBIG: + std r24,-64(1) + std r25,-56(1) + std r26,-48(1) + std r27,-40(1) + std r28,-32(1) + std r29,-24(1) + std r30,-16(1) + std r31,-8(1) + ldu r9,-8(r4) + subfic r8,r6,64 + srd r3,r9,r8 # compute function return value + sld r0,r9,r6 + addi r5,r5,-1 + + andi. r10,r5,3 # count for spill loop + beq .Le + mtctr r10 + ldu r28,-8(r4) + bdz .Lxe0 + +.Loop0: sld r12,r28,r6 + srd r24,r28,r8 + ldu r28,-8(r4) + or r24,r0,r24 + stdu r24,-8(r7) + mr r0,r12 + bdnz .Loop0 # taken at most once! + +.Lxe0: sld r12,r28,r6 + srd r24,r28,r8 + or r24,r0,r24 + stdu r24,-8(r7) + mr r0,r12 + +.Le: srdi r5,r5,2 # count for unrolled loop + addi r5,r5,-1 + mtctr r5 + ld r28,-8(r4) + ld r29,-16(r4) + ld r30,-24(r4) + ldu r31,-32(r4) + +.LoopU: sld r9,r28,r6 + srd r24,r28,r8 + ld r28,-8(r4) + sld r10,r29,r6 + srd r25,r29,r8 + ld r29,-16(r4) + sld r11,r30,r6 + srd r26,r30,r8 + ld r30,-24(r4) + sld r12,r31,r6 + srd r27,r31,r8 + ldu r31,-32(r4) + or r24,r0,r24 + std r24,-8(r7) + or r25,r9,r25 + std r25,-16(r7) + or r26,r10,r26 + std r26,-24(r7) + or r27,r11,r27 + stdu r27,-32(r7) + mr r0,r12 + bdnz .LoopU + + sld r9,r28,r6 + srd r24,r28,r8 + sld r10,r29,r6 + srd r25,r29,r8 + sld r11,r30,r6 + srd r26,r30,r8 + sld r12,r31,r6 + srd r27,r31,r8 + or r24,r0,r24 + std r24,-8(r7) + or r25,r9,r25 + std r25,-16(r7) + or r26,r10,r26 + std r26,-24(r7) + or r27,r11,r27 + stdu r27,-32(r7) + mr r0,r12 + + std r0,-8(r7) + ld r24,-64(1) + ld r25,-56(1) + ld r26,-48(1) + ld r27,-40(1) + ld r28,-32(1) + ld r29,-24(1) + ld r30,-16(1) + ld r31,-8(1) + blr +EPILOGUE(mpn_lshift) diff --git a/rts/gmp/mpn/powerpc64/mul_1.asm b/rts/gmp/mpn/powerpc64/mul_1.asm new file mode 100644 index 0000000000..47597283ff --- /dev/null +++ b/rts/gmp/mpn/powerpc64/mul_1.asm @@ -0,0 +1,49 @@ +# PowerPC-64 mpn_mul_1 -- Multiply a limb vector with a limb and store +# the result in a second limb vector. + +# Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# size r5 +# s2_limb r6 + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_mul_1) + mtctr 5 + li 9,0 # cy_limb = 0 + addic 0,0,0 + cal 3,-8(3) + cal 4,-8(4) +.Loop: + ldu 0,8(4) + mulld 7,0,6 + adde 7,7,9 + mulhdu 9,0,6 + stdu 7,8(3) + bdnz .Loop + + addze 3,9 + blr +EPILOGUE(mpn_mul_1) diff --git a/rts/gmp/mpn/powerpc64/rshift.asm b/rts/gmp/mpn/powerpc64/rshift.asm new file mode 100644 index 0000000000..88272c7fa9 --- /dev/null +++ b/rts/gmp/mpn/powerpc64/rshift.asm @@ -0,0 +1,60 @@ +# PowerPC-64 mpn_rshift -- Shift a number right. + +# Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# size r5 +# cnt r6 + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_rshift) + mtctr r5 # copy size into CTR + addi r7,r3,-8 # move adjusted res_ptr to free return reg + subfic r8,r6,64 + ld r11,0(r4) # load first s1 limb + sld r3,r11,r8 # compute function return value + bdz .Lend1 + +.Loop: ldu r10,8(r4) + srd r9,r11,r6 + sld r12,r10,r8 + or r9,r9,r12 + stdu r9,8(r7) + bdz .Lend2 + ldu r11,8(r4) + srd r9,r10,r6 + sld r12,r11,r8 + or r9,r9,r12 + stdu r9,8(r7) + bdnz .Loop + +.Lend1: srd r0,r11,r6 + std r0,8(r7) + blr + +.Lend2: srd r0,r10,r6 + std r0,8(r7) + blr +EPILOGUE(mpn_rshift) diff --git a/rts/gmp/mpn/powerpc64/sub_n.asm b/rts/gmp/mpn/powerpc64/sub_n.asm new file mode 100644 index 0000000000..4de3de69c7 --- /dev/null +++ b/rts/gmp/mpn/powerpc64/sub_n.asm @@ -0,0 +1,61 @@ +# PowerPC-64 mpn_sub_n -- Subtract two limb vectors of the same length > 0 +# and store difference in a third limb vector. + +# Copyright (C) 1999, 2000 Free Software Foundation, Inc.b + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# s2_ptr r5 +# size r6 + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_sub_n) + mtctr r6 # copy size into CTR + addic r0,r6,-1 # set cy + ld r8,0(r4) # load least significant s1 limb + ld r0,0(r5) # load least significant s2 limb + addi r3,r3,-8 # offset res_ptr, it's updated before it's used + bdz .Lend # If done, skip loop +.Loop: ld r9,8(r4) # load s1 limb + ld r10,8(r5) # load s2 limb + subfe r7,r0,r8 # subtract limbs with cy, set cy + std r7,8(r3) # store result limb + bdz .Lexit # decrement CTR and exit if done + ldu r8,16(r4) # load s1 limb and update s1_ptr + ldu r0,16(r5) # load s2 limb and update s2_ptr + subfe r7,r10,r9 # subtract limbs with cy, set cy + stdu r7,16(r3) # store result limb and update res_ptr + bdnz .Loop # decrement CTR and loop back + +.Lend: subfe r7,r0,r8 + std r7,8(r3) # store ultimate result limb + subfe r3,r0,r0 # load !cy into ... + subfic r3,r3,0 # ... return value register + blr +.Lexit: subfe r7,r10,r9 + std r7,16(r3) + subfe r3,r0,r0 # load !cy into ... + subfic r3,r3,0 # ... return value register + blr +EPILOGUE(mpn_sub_n) diff --git a/rts/gmp/mpn/powerpc64/submul_1.asm b/rts/gmp/mpn/powerpc64/submul_1.asm new file mode 100644 index 0000000000..17f6369a38 --- /dev/null +++ b/rts/gmp/mpn/powerpc64/submul_1.asm @@ -0,0 +1,54 @@ +# PowerPC-64 mpn_submul_1 -- Multiply a limb vector with a limb and subtract +# the result from a second limb vector. + +# Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# size r5 +# s2_limb r6 + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_submul_1) + mtctr 5 + li 9,0 # cy_limb = 0 + addic 0,0,0 + cal 3,-8(3) + cal 4,-8(4) +.Loop: + ldu 0,8(4) + ld 10,8(3) + mulld 7,0,6 + adde 7,7,9 + mulhdu 9,0,6 + addze 9,9 + subfc 7,7,10 + stdu 7,8(3) + subfe 11,11,11 # invert ... + addic 11,11,1 # ... carry + bdnz .Loop + + addze 3,9 + blr +EPILOGUE(mpn_submul_1) diff --git a/rts/gmp/mpn/pyr/add_n.s b/rts/gmp/mpn/pyr/add_n.s new file mode 100644 index 0000000000..e1fc535846 --- /dev/null +++ b/rts/gmp/mpn/pyr/add_n.s @@ -0,0 +1,76 @@ +# Pyramid __gmpn_add_n -- Add two limb vectors of the same length > 0 and store +# sum in a third limb vector. + +# Copyright (C) 1995, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + +.text + .align 2 +.globl ___gmpn_add_n +___gmpn_add_n: + movw $-1,tr0 # representation for carry clear + + movw pr3,tr2 + andw $3,tr2 + beq Lend0 + subw tr2,pr3 + +Loop0: rsubw $0,tr0 # restore carry bit from carry-save register + + movw (pr1),tr1 + addwc (pr2),tr1 + movw tr1,(pr0) + + subwb tr0,tr0 + addw $4,pr0 + addw $4,pr1 + addw $4,pr2 + addw $-1,tr2 + bne Loop0 + + mtstw pr3,pr3 + beq Lend +Lend0: +Loop: rsubw $0,tr0 # restore carry bit from carry-save register + + movw (pr1),tr1 + addwc (pr2),tr1 + movw tr1,(pr0) + + movw 4(pr1),tr1 + addwc 4(pr2),tr1 + movw tr1,4(pr0) + + movw 8(pr1),tr1 + addwc 8(pr2),tr1 + movw tr1,8(pr0) + + movw 12(pr1),tr1 + addwc 12(pr2),tr1 + movw tr1,12(pr0) + + subwb tr0,tr0 + addw $16,pr0 + addw $16,pr1 + addw $16,pr2 + addw $-4,pr3 + bne Loop +Lend: + mnegw tr0,pr0 + ret diff --git a/rts/gmp/mpn/pyr/addmul_1.s b/rts/gmp/mpn/pyr/addmul_1.s new file mode 100644 index 0000000000..65c3f8f008 --- /dev/null +++ b/rts/gmp/mpn/pyr/addmul_1.s @@ -0,0 +1,45 @@ +# Pyramid __gmpn_addmul_1 -- Multiply a limb vector with a limb and add +# the result to a second limb vector. + +# Copyright (C) 1995, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + +.text + .align 2 +.globl ___gmpn_addmul_1 +___gmpn_addmul_1: + mova (pr0)[pr2*4],pr0 + mova (pr1)[pr2*4],pr1 + mnegw pr2,pr2 + movw $0,tr3 + +Loop: movw (pr1)[pr2*4],tr1 + uemul pr3,tr0 + addw tr3,tr1 + movw $0,tr3 + addwc tr0,tr3 + movw (pr0)[pr2*0x4],tr0 + addw tr0,tr1 + addwc $0,tr3 + movw tr1,(pr0)[pr2*4] + addw $1,pr2 + bne Loop + + movw tr3,pr0 + ret diff --git a/rts/gmp/mpn/pyr/mul_1.s b/rts/gmp/mpn/pyr/mul_1.s new file mode 100644 index 0000000000..1272297c42 --- /dev/null +++ b/rts/gmp/mpn/pyr/mul_1.s @@ -0,0 +1,42 @@ +# Pyramid __gmpn_mul_1 -- Multiply a limb vector with a limb and store +# the result in a second limb vector. + +# Copyright (C) 1995, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + +.text + .align 2 +.globl ___gmpn_mul_1 +___gmpn_mul_1: + mova (pr0)[pr2*4],pr0 + mova (pr1)[pr2*4],pr1 + mnegw pr2,pr2 + movw $0,tr3 + +Loop: movw (pr1)[pr2*4],tr1 + uemul pr3,tr0 + addw tr3,tr1 + movw $0,tr3 + addwc tr0,tr3 + movw tr1,(pr0)[pr2*4] + addw $1,pr2 + bne Loop + + movw tr3,pr0 + ret diff --git a/rts/gmp/mpn/pyr/sub_n.s b/rts/gmp/mpn/pyr/sub_n.s new file mode 100644 index 0000000000..1fd2eb0f17 --- /dev/null +++ b/rts/gmp/mpn/pyr/sub_n.s @@ -0,0 +1,76 @@ +# Pyramid __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and +# store difference in a third limb vector. + +# Copyright (C) 1995, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + +.text + .align 2 +.globl ___gmpn_sub_n +___gmpn_sub_n: + movw $-1,tr0 # representation for carry clear + + movw pr3,tr2 + andw $3,tr2 + beq Lend0 + subw tr2,pr3 + +Loop0: rsubw $0,tr0 # restore carry bit from carry-save register + + movw (pr1),tr1 + subwb (pr2),tr1 + movw tr1,(pr0) + + subwb tr0,tr0 + addw $4,pr0 + addw $4,pr1 + addw $4,pr2 + addw $-1,tr2 + bne Loop0 + + mtstw pr3,pr3 + beq Lend +Lend0: +Loop: rsubw $0,tr0 # restore carry bit from carry-save register + + movw (pr1),tr1 + subwb (pr2),tr1 + movw tr1,(pr0) + + movw 4(pr1),tr1 + subwb 4(pr2),tr1 + movw tr1,4(pr0) + + movw 8(pr1),tr1 + subwb 8(pr2),tr1 + movw tr1,8(pr0) + + movw 12(pr1),tr1 + subwb 12(pr2),tr1 + movw tr1,12(pr0) + + subwb tr0,tr0 + addw $16,pr0 + addw $16,pr1 + addw $16,pr2 + addw $-4,pr3 + bne Loop +Lend: + mnegw tr0,pr0 + ret diff --git a/rts/gmp/mpn/sh/add_n.s b/rts/gmp/mpn/sh/add_n.s new file mode 100644 index 0000000000..df388b31a3 --- /dev/null +++ b/rts/gmp/mpn/sh/add_n.s @@ -0,0 +1,47 @@ +! SH __gmpn_add_n -- Add two limb vectors of the same length > 0 and store +! sum in a third limb vector. + +! Copyright (C) 1995, 1997, 2000 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +! License for more details. + +! You should have received a copy of the GNU Lesser General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr r4 +! s1_ptr r5 +! s2_ptr r6 +! size r7 + + .text + .align 2 + .global ___gmpn_add_n +___gmpn_add_n: + mov #0,r3 ! clear cy save reg + +Loop: mov.l @r5+,r1 + mov.l @r6+,r2 + shlr r3 ! restore cy + addc r2,r1 + movt r3 ! save cy + mov.l r1,@r4 + dt r7 + bf.s Loop + add #4,r4 + + rts + mov r3,r0 ! return carry-out from most sign. limb diff --git a/rts/gmp/mpn/sh/sh2/addmul_1.s b/rts/gmp/mpn/sh/sh2/addmul_1.s new file mode 100644 index 0000000000..f34a7f0503 --- /dev/null +++ b/rts/gmp/mpn/sh/sh2/addmul_1.s @@ -0,0 +1,53 @@ +! SH2 __gmpn_addmul_1 -- Multiply a limb vector with a limb and add +! the result to a second limb vector. + +! Copyright (C) 1995, 2000 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +! License for more details. + +! You should have received a copy of the GNU Lesser General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr r4 +! s1_ptr r5 +! size r6 +! s2_limb r7 + + .text + .align 1 + .global ___gmpn_addmul_1 +___gmpn_addmul_1: + mov #0,r2 ! cy_limb = 0 + mov #0,r0 ! Keep r0 = 0 for entire loop + clrt + +Loop: mov.l @r5+,r3 + dmulu.l r3,r7 + sts macl,r1 + addc r2,r1 ! lo_prod += old cy_limb + sts mach,r2 ! new cy_limb = hi_prod + mov.l @r4,r3 + addc r0,r2 ! cy_limb += T, T = 0 + addc r3,r1 + addc r0,r2 ! cy_limb += T, T = 0 + dt r6 + mov.l r1,@r4 + bf.s Loop + add #4,r4 + + rts + mov r2,r0 diff --git a/rts/gmp/mpn/sh/sh2/mul_1.s b/rts/gmp/mpn/sh/sh2/mul_1.s new file mode 100644 index 0000000000..2a117a3175 --- /dev/null +++ b/rts/gmp/mpn/sh/sh2/mul_1.s @@ -0,0 +1,50 @@ +! SH2 __gmpn_mul_1 -- Multiply a limb vector with a limb and store +! the result in a second limb vector. + +! Copyright (C) 1995, 2000 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +! License for more details. + +! You should have received a copy of the GNU Lesser General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr r4 +! s1_ptr r5 +! size r6 +! s2_limb r7 + + .text + .align 1 + .global ___gmpn_mul_1 +___gmpn_mul_1: + mov #0,r2 ! cy_limb = 0 + mov #0,r0 ! Keep r0 = 0 for entire loop + clrt + +Loop: mov.l @r5+,r3 + dmulu.l r3,r7 + sts macl,r1 + addc r2,r1 + sts mach,r2 + addc r0,r2 ! propagate carry to cy_limb (dt clobbers T) + dt r6 + mov.l r1,@r4 + bf.s Loop + add #4,r4 + + rts + mov r2,r0 diff --git a/rts/gmp/mpn/sh/sh2/submul_1.s b/rts/gmp/mpn/sh/sh2/submul_1.s new file mode 100644 index 0000000000..eb9a27dde3 --- /dev/null +++ b/rts/gmp/mpn/sh/sh2/submul_1.s @@ -0,0 +1,53 @@ +! SH2 __gmpn_submul_1 -- Multiply a limb vector with a limb and subtract +! the result from a second limb vector. + +! Copyright (C) 1995, 2000 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +! License for more details. + +! You should have received a copy of the GNU Lesser General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr r4 +! s1_ptr r5 +! size r6 +! s2_limb r7 + + .text + .align 1 + .global ___gmpn_submul_1 +___gmpn_submul_1: + mov #0,r2 ! cy_limb = 0 + mov #0,r0 ! Keep r0 = 0 for entire loop + clrt + +Loop: mov.l @r5+,r3 + dmulu.l r3,r7 + sts macl,r1 + addc r2,r1 ! lo_prod += old cy_limb + sts mach,r2 ! new cy_limb = hi_prod + mov.l @r4,r3 + addc r0,r2 ! cy_limb += T, T = 0 + subc r3,r1 + addc r0,r2 ! cy_limb += T, T = 0 + dt r6 + mov.l r1,@r4 + bf.s Loop + add #4,r4 + + rts + mov r2,r0 diff --git a/rts/gmp/mpn/sh/sub_n.s b/rts/gmp/mpn/sh/sub_n.s new file mode 100644 index 0000000000..5f818c95a8 --- /dev/null +++ b/rts/gmp/mpn/sh/sub_n.s @@ -0,0 +1,47 @@ +! SH __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and store +! difference in a third limb vector. + +! Copyright (C) 1995, 1997, 2000 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +! License for more details. + +! You should have received a copy of the GNU Lesser General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr r4 +! s1_ptr r5 +! s2_ptr r6 +! size r7 + + .text + .align 2 + .global ___gmpn_sub_n +___gmpn_sub_n: + mov #0,r3 ! clear cy save reg + +Loop: mov.l @r5+,r1 + mov.l @r6+,r2 + shlr r3 ! restore cy + subc r2,r1 + movt r3 ! save cy + mov.l r1,@r4 + dt r7 + bf.s Loop + add #4,r4 + + rts + mov r3,r0 ! return carry-out from most sign. limb diff --git a/rts/gmp/mpn/sparc32/README b/rts/gmp/mpn/sparc32/README new file mode 100644 index 0000000000..7c19df7bc4 --- /dev/null +++ b/rts/gmp/mpn/sparc32/README @@ -0,0 +1,36 @@ +This directory contains mpn functions for various SPARC chips. Code that +runs only on version 8 SPARC implementations, is in the v8 subdirectory. + +RELEVANT OPTIMIZATION ISSUES + + Load and Store timing + +On most early SPARC implementations, the ST instructions takes multiple +cycles, while a STD takes just a single cycle more than an ST. For the CPUs +in SPARCstation I and II, the times are 3 and 4 cycles, respectively. +Therefore, combining two ST instrucitons into a STD when possible is a +significant optimiation. + +Later SPARC implementations have single cycle ST. + +For SuperSPARC, we can perform just one memory instruction per cycle, even +if up to two integer instructions can be executed in its pipeline. For +programs that perform so many memory operations that there are not enough +non-memory operations to issue in parallel with all memory operations, using +LDD and STD when possible helps. + +STATUS + +1. On a SuperSPARC, mpn_lshift and mpn_rshift run at 3 cycles/limb, or 2.5 + cycles/limb asymptotically. We could optimize speed for special counts + by using ADDXCC. + +2. On a SuperSPARC, mpn_add_n and mpn_sub_n runs at 2.5 cycles/limb, or 2 + cycles/limb asymptotically. + +3. mpn_mul_1 runs at what is believed to be optimal speed. + +4. On SuperSPARC, mpn_addmul_1 and mpn_submul_1 could both be improved by a + cycle by avoiding one of the add instrucitons. See a29k/addmul_1. + +The speed of the code for other SPARC implementations is uncertain. diff --git a/rts/gmp/mpn/sparc32/add_n.asm b/rts/gmp/mpn/sparc32/add_n.asm new file mode 100644 index 0000000000..5f1d00c0e0 --- /dev/null +++ b/rts/gmp/mpn/sparc32/add_n.asm @@ -0,0 +1,236 @@ +dnl SPARC mpn_add_n -- Add two limb vectors of the same length > 0 and store +dnl sum in a third limb vector. + +dnl Copyright (C) 1995, 1996, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + +include(`../config.m4') + +C INPUT PARAMETERS +define(res_ptr,%o0) +define(s1_ptr,%o1) +define(s2_ptr,%o2) +define(n,%o3) + +ASM_START() +PROLOGUE(mpn_add_n) + xor s2_ptr,res_ptr,%g1 + andcc %g1,4,%g0 + bne L(1) C branch if alignment differs + nop +C ** V1a ** +L(0): andcc res_ptr,4,%g0 C res_ptr unaligned? Side effect: cy=0 + be L(v1) C if no, branch + nop +C Add least significant limb separately to align res_ptr and s2_ptr + ld [s1_ptr],%g4 + add s1_ptr,4,s1_ptr + ld [s2_ptr],%g2 + add s2_ptr,4,s2_ptr + add n,-1,n + addcc %g4,%g2,%o4 + st %o4,[res_ptr] + add res_ptr,4,res_ptr +L(v1): addx %g0,%g0,%o4 C save cy in register + cmp n,2 C if n < 2 ... + bl L(end2) C ... branch to tail code + subcc %g0,%o4,%g0 C restore cy + + ld [s1_ptr+0],%g4 + addcc n,-10,n + ld [s1_ptr+4],%g1 + ldd [s2_ptr+0],%g2 + blt L(fin1) + subcc %g0,%o4,%g0 C restore cy +C Add blocks of 8 limbs until less than 8 limbs remain +L(loop1): + addxcc %g4,%g2,%o4 + ld [s1_ptr+8],%g4 + addxcc %g1,%g3,%o5 + ld [s1_ptr+12],%g1 + ldd [s2_ptr+8],%g2 + std %o4,[res_ptr+0] + addxcc %g4,%g2,%o4 + ld [s1_ptr+16],%g4 + addxcc %g1,%g3,%o5 + ld [s1_ptr+20],%g1 + ldd [s2_ptr+16],%g2 + std %o4,[res_ptr+8] + addxcc %g4,%g2,%o4 + ld [s1_ptr+24],%g4 + addxcc %g1,%g3,%o5 + ld [s1_ptr+28],%g1 + ldd [s2_ptr+24],%g2 + std %o4,[res_ptr+16] + addxcc %g4,%g2,%o4 + ld [s1_ptr+32],%g4 + addxcc %g1,%g3,%o5 + ld [s1_ptr+36],%g1 + ldd [s2_ptr+32],%g2 + std %o4,[res_ptr+24] + addx %g0,%g0,%o4 C save cy in register + addcc n,-8,n + add s1_ptr,32,s1_ptr + add s2_ptr,32,s2_ptr + add res_ptr,32,res_ptr + bge L(loop1) + subcc %g0,%o4,%g0 C restore cy + +L(fin1): + addcc n,8-2,n + blt L(end1) + subcc %g0,%o4,%g0 C restore cy +C Add blocks of 2 limbs until less than 2 limbs remain +L(loope1): + addxcc %g4,%g2,%o4 + ld [s1_ptr+8],%g4 + addxcc %g1,%g3,%o5 + ld [s1_ptr+12],%g1 + ldd [s2_ptr+8],%g2 + std %o4,[res_ptr+0] + addx %g0,%g0,%o4 C save cy in register + addcc n,-2,n + add s1_ptr,8,s1_ptr + add s2_ptr,8,s2_ptr + add res_ptr,8,res_ptr + bge L(loope1) + subcc %g0,%o4,%g0 C restore cy +L(end1): + addxcc %g4,%g2,%o4 + addxcc %g1,%g3,%o5 + std %o4,[res_ptr+0] + addx %g0,%g0,%o4 C save cy in register + + andcc n,1,%g0 + be L(ret1) + subcc %g0,%o4,%g0 C restore cy +C Add last limb + ld [s1_ptr+8],%g4 + ld [s2_ptr+8],%g2 + addxcc %g4,%g2,%o4 + st %o4,[res_ptr+8] + +L(ret1): + retl + addx %g0,%g0,%o0 C return carry-out from most sign. limb + +L(1): xor s1_ptr,res_ptr,%g1 + andcc %g1,4,%g0 + bne L(2) + nop +C ** V1b ** + mov s2_ptr,%g1 + mov s1_ptr,s2_ptr + b L(0) + mov %g1,s1_ptr + +C ** V2 ** +C If we come here, the alignment of s1_ptr and res_ptr as well as the +C alignment of s2_ptr and res_ptr differ. Since there are only two ways +C things can be aligned (that we care about) we now know that the alignment +C of s1_ptr and s2_ptr are the same. + +L(2): cmp n,1 + be L(jone) + nop + andcc s1_ptr,4,%g0 C s1_ptr unaligned? Side effect: cy=0 + be L(v2) C if no, branch + nop +C Add least significant limb separately to align s1_ptr and s2_ptr + ld [s1_ptr],%g4 + add s1_ptr,4,s1_ptr + ld [s2_ptr],%g2 + add s2_ptr,4,s2_ptr + add n,-1,n + addcc %g4,%g2,%o4 + st %o4,[res_ptr] + add res_ptr,4,res_ptr + +L(v2): addx %g0,%g0,%o4 C save cy in register + addcc n,-8,n + blt L(fin2) + subcc %g0,%o4,%g0 C restore cy +C Add blocks of 8 limbs until less than 8 limbs remain +L(loop2): + ldd [s1_ptr+0],%g2 + ldd [s2_ptr+0],%o4 + addxcc %g2,%o4,%g2 + st %g2,[res_ptr+0] + addxcc %g3,%o5,%g3 + st %g3,[res_ptr+4] + ldd [s1_ptr+8],%g2 + ldd [s2_ptr+8],%o4 + addxcc %g2,%o4,%g2 + st %g2,[res_ptr+8] + addxcc %g3,%o5,%g3 + st %g3,[res_ptr+12] + ldd [s1_ptr+16],%g2 + ldd [s2_ptr+16],%o4 + addxcc %g2,%o4,%g2 + st %g2,[res_ptr+16] + addxcc %g3,%o5,%g3 + st %g3,[res_ptr+20] + ldd [s1_ptr+24],%g2 + ldd [s2_ptr+24],%o4 + addxcc %g2,%o4,%g2 + st %g2,[res_ptr+24] + addxcc %g3,%o5,%g3 + st %g3,[res_ptr+28] + addx %g0,%g0,%o4 C save cy in register + addcc n,-8,n + add s1_ptr,32,s1_ptr + add s2_ptr,32,s2_ptr + add res_ptr,32,res_ptr + bge L(loop2) + subcc %g0,%o4,%g0 C restore cy + +L(fin2): + addcc n,8-2,n + blt L(end2) + subcc %g0,%o4,%g0 C restore cy +L(loope2): + ldd [s1_ptr+0],%g2 + ldd [s2_ptr+0],%o4 + addxcc %g2,%o4,%g2 + st %g2,[res_ptr+0] + addxcc %g3,%o5,%g3 + st %g3,[res_ptr+4] + addx %g0,%g0,%o4 C save cy in register + addcc n,-2,n + add s1_ptr,8,s1_ptr + add s2_ptr,8,s2_ptr + add res_ptr,8,res_ptr + bge L(loope2) + subcc %g0,%o4,%g0 C restore cy +L(end2): + andcc n,1,%g0 + be L(ret2) + subcc %g0,%o4,%g0 C restore cy +C Add last limb +L(jone): + ld [s1_ptr],%g4 + ld [s2_ptr],%g2 + addxcc %g4,%g2,%o4 + st %o4,[res_ptr] + +L(ret2): + retl + addx %g0,%g0,%o0 C return carry-out from most sign. limb +EPILOGUE(mpn_add_n) diff --git a/rts/gmp/mpn/sparc32/addmul_1.asm b/rts/gmp/mpn/sparc32/addmul_1.asm new file mode 100644 index 0000000000..80c94e4251 --- /dev/null +++ b/rts/gmp/mpn/sparc32/addmul_1.asm @@ -0,0 +1,146 @@ +dnl SPARC mpn_addmul_1 -- Multiply a limb vector with a limb and add the +dnl result to a second limb vector. + +dnl Copyright (C) 1992, 1993, 1994, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + +include(`../config.m4') + +C INPUT PARAMETERS +C res_ptr o0 +C s1_ptr o1 +C size o2 +C s2_limb o3 + +ASM_START() +PROLOGUE(mpn_addmul_1) + C Make S1_PTR and RES_PTR point at the end of their blocks + C and put (- 4 x SIZE) in index/loop counter. + sll %o2,2,%o2 + add %o0,%o2,%o4 C RES_PTR in o4 since o0 is retval + add %o1,%o2,%o1 + sub %g0,%o2,%o2 + + cmp %o3,0xfff + bgu L(large) + nop + + ld [%o1+%o2],%o5 + mov 0,%o0 + b L(0) + add %o4,-4,%o4 +L(loop0): + addcc %o5,%g1,%g1 + ld [%o1+%o2],%o5 + addx %o0,%g0,%o0 + st %g1,[%o4+%o2] +L(0): wr %g0,%o3,%y + sra %o5,31,%g2 + and %o3,%g2,%g2 + andcc %g1,0,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,0,%g1 + sra %g1,20,%g4 + sll %g1,12,%g1 + rd %y,%g3 + srl %g3,20,%g3 + or %g1,%g3,%g1 + + addcc %g1,%o0,%g1 + addx %g2,%g4,%o0 C add sign-compensation and cy to hi limb + addcc %o2,4,%o2 C loop counter + bne L(loop0) + ld [%o4+%o2],%o5 + + addcc %o5,%g1,%g1 + addx %o0,%g0,%o0 + retl + st %g1,[%o4+%o2] + +L(large): + ld [%o1+%o2],%o5 + mov 0,%o0 + sra %o3,31,%g4 C g4 = mask of ones iff S2_LIMB < 0 + b L(1) + add %o4,-4,%o4 +L(loop): + addcc %o5,%g3,%g3 + ld [%o1+%o2],%o5 + addx %o0,%g0,%o0 + st %g3,[%o4+%o2] +L(1): wr %g0,%o5,%y + and %o5,%g4,%g2 + andcc %g0,%g0,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%g0,%g1 + rd %y,%g3 + addcc %g3,%o0,%g3 + addx %g2,%g1,%o0 + addcc %o2,4,%o2 + bne L(loop) + ld [%o4+%o2],%o5 + + addcc %o5,%g3,%g3 + addx %o0,%g0,%o0 + retl + st %g3,[%o4+%o2] +EPILOGUE(mpn_addmul_1) diff --git a/rts/gmp/mpn/sparc32/lshift.asm b/rts/gmp/mpn/sparc32/lshift.asm new file mode 100644 index 0000000000..529733ac2d --- /dev/null +++ b/rts/gmp/mpn/sparc32/lshift.asm @@ -0,0 +1,97 @@ +dnl SPARC mpn_lshift -- Shift a number left. +dnl + +dnl Copyright (C) 1995, 1996, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + +include(`../config.m4') + +C INPUT PARAMETERS +C res_ptr %o0 +C src_ptr %o1 +C size %o2 +C cnt %o3 + +ASM_START() +PROLOGUE(mpn_lshift) + sll %o2,2,%g1 + add %o1,%g1,%o1 C make %o1 point at end of src + ld [%o1-4],%g2 C load first limb + sub %g0,%o3,%o5 C negate shift count + add %o0,%g1,%o0 C make %o0 point at end of res + add %o2,-1,%o2 + andcc %o2,4-1,%g4 C number of limbs in first loop + srl %g2,%o5,%g1 C compute function result + be L(0) C if multiple of 4 limbs, skip first loop + st %g1,[%sp+80] + + sub %o2,%g4,%o2 C adjust count for main loop + +L(loop0): + ld [%o1-8],%g3 + add %o0,-4,%o0 + add %o1,-4,%o1 + addcc %g4,-1,%g4 + sll %g2,%o3,%o4 + srl %g3,%o5,%g1 + mov %g3,%g2 + or %o4,%g1,%o4 + bne L(loop0) + st %o4,[%o0+0] + +L(0): tst %o2 + be L(end) + nop + +L(loop): + ld [%o1-8],%g3 + add %o0,-16,%o0 + addcc %o2,-4,%o2 + sll %g2,%o3,%o4 + srl %g3,%o5,%g1 + + ld [%o1-12],%g2 + sll %g3,%o3,%g4 + or %o4,%g1,%o4 + st %o4,[%o0+12] + srl %g2,%o5,%g1 + + ld [%o1-16],%g3 + sll %g2,%o3,%o4 + or %g4,%g1,%g4 + st %g4,[%o0+8] + srl %g3,%o5,%g1 + + ld [%o1-20],%g2 + sll %g3,%o3,%g4 + or %o4,%g1,%o4 + st %o4,[%o0+4] + srl %g2,%o5,%g1 + + add %o1,-16,%o1 + or %g4,%g1,%g4 + bne L(loop) + st %g4,[%o0+0] + +L(end): sll %g2,%o3,%g2 + st %g2,[%o0-4] + retl + ld [%sp+80],%o0 +EPILOGUE(mpn_lshift) diff --git a/rts/gmp/mpn/sparc32/mul_1.asm b/rts/gmp/mpn/sparc32/mul_1.asm new file mode 100644 index 0000000000..e5fedeabaa --- /dev/null +++ b/rts/gmp/mpn/sparc32/mul_1.asm @@ -0,0 +1,137 @@ +dnl SPARC mpn_mul_1 -- Multiply a limb vector with a limb and store +dnl the result in a second limb vector. + +dnl Copyright (C) 1992, 1993, 1994, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + +include(`../config.m4') + +C INPUT PARAMETERS +C res_ptr o0 +C s1_ptr o1 +C size o2 +C s2_limb o3 + +ASM_START() +PROLOGUE(mpn_mul_1) + C Make S1_PTR and RES_PTR point at the end of their blocks + C and put (- 4 x SIZE) in index/loop counter. + sll %o2,2,%o2 + add %o0,%o2,%o4 C RES_PTR in o4 since o0 is retval + add %o1,%o2,%o1 + sub %g0,%o2,%o2 + + cmp %o3,0xfff + bgu L(large) + nop + + ld [%o1+%o2],%o5 + mov 0,%o0 + b L(0) + add %o4,-4,%o4 +L(loop0): + st %g1,[%o4+%o2] +L(0): wr %g0,%o3,%y + sra %o5,31,%g2 + and %o3,%g2,%g2 + andcc %g1,0,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,0,%g1 + sra %g1,20,%g4 + sll %g1,12,%g1 + rd %y,%g3 + srl %g3,20,%g3 + or %g1,%g3,%g1 + + addcc %g1,%o0,%g1 + addx %g2,%g4,%o0 C add sign-compensation and cy to hi limb + addcc %o2,4,%o2 C loop counter + bne,a L(loop0) + ld [%o1+%o2],%o5 + + retl + st %g1,[%o4+%o2] + + +L(large): + ld [%o1+%o2],%o5 + mov 0,%o0 + sra %o3,31,%g4 C g4 = mask of ones iff S2_LIMB < 0 + b L(1) + add %o4,-4,%o4 +L(loop): + st %g3,[%o4+%o2] +L(1): wr %g0,%o5,%y + and %o5,%g4,%g2 C g2 = S1_LIMB iff S2_LIMB < 0, else 0 + andcc %g0,%g0,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%g0,%g1 + rd %y,%g3 + addcc %g3,%o0,%g3 + addx %g2,%g1,%o0 C add sign-compensation and cy to hi limb + addcc %o2,4,%o2 C loop counter + bne,a L(loop) + ld [%o1+%o2],%o5 + + retl + st %g3,[%o4+%o2] +EPILOGUE(mpn_mul_1) diff --git a/rts/gmp/mpn/sparc32/rshift.asm b/rts/gmp/mpn/sparc32/rshift.asm new file mode 100644 index 0000000000..9187dbaa6f --- /dev/null +++ b/rts/gmp/mpn/sparc32/rshift.asm @@ -0,0 +1,93 @@ +dnl SPARC mpn_rshift -- Shift a number right. + +dnl Copyright (C) 1995, 1996, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + +include(`../config.m4') + +C INPUT PARAMETERS +C res_ptr %o0 +C src_ptr %o1 +C size %o2 +C cnt %o3 + +ASM_START() +PROLOGUE(mpn_rshift) + ld [%o1],%g2 C load first limb + sub %g0,%o3,%o5 C negate shift count + add %o2,-1,%o2 + andcc %o2,4-1,%g4 C number of limbs in first loop + sll %g2,%o5,%g1 C compute function result + be L(0) C if multiple of 4 limbs, skip first loop + st %g1,[%sp+80] + + sub %o2,%g4,%o2 C adjust count for main loop + +L(loop0): + ld [%o1+4],%g3 + add %o0,4,%o0 + add %o1,4,%o1 + addcc %g4,-1,%g4 + srl %g2,%o3,%o4 + sll %g3,%o5,%g1 + mov %g3,%g2 + or %o4,%g1,%o4 + bne L(loop0) + st %o4,[%o0-4] + +L(0): tst %o2 + be L(end) + nop + +L(loop): + ld [%o1+4],%g3 + add %o0,16,%o0 + addcc %o2,-4,%o2 + srl %g2,%o3,%o4 + sll %g3,%o5,%g1 + + ld [%o1+8],%g2 + srl %g3,%o3,%g4 + or %o4,%g1,%o4 + st %o4,[%o0-16] + sll %g2,%o5,%g1 + + ld [%o1+12],%g3 + srl %g2,%o3,%o4 + or %g4,%g1,%g4 + st %g4,[%o0-12] + sll %g3,%o5,%g1 + + ld [%o1+16],%g2 + srl %g3,%o3,%g4 + or %o4,%g1,%o4 + st %o4,[%o0-8] + sll %g2,%o5,%g1 + + add %o1,16,%o1 + or %g4,%g1,%g4 + bne L(loop) + st %g4,[%o0-4] + +L(end): srl %g2,%o3,%g2 + st %g2,[%o0-0] + retl + ld [%sp+80],%o0 +EPILOGUE(mpn_rshift) diff --git a/rts/gmp/mpn/sparc32/sub_n.asm b/rts/gmp/mpn/sparc32/sub_n.asm new file mode 100644 index 0000000000..071909a1b6 --- /dev/null +++ b/rts/gmp/mpn/sparc32/sub_n.asm @@ -0,0 +1,326 @@ +dnl SPARC mpn_sub_n -- Subtract two limb vectors of the same length > 0 and +dnl store difference in a third limb vector. + +dnl Copyright (C) 1995, 1996, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + +include(`../config.m4') + +C INPUT PARAMETERS +define(res_ptr,%o0) +define(s1_ptr,%o1) +define(s2_ptr,%o2) +define(n,%o3) + +ASM_START() +PROLOGUE(mpn_sub_n) + xor s2_ptr,res_ptr,%g1 + andcc %g1,4,%g0 + bne L(1) C branch if alignment differs + nop +C ** V1a ** + andcc res_ptr,4,%g0 C res_ptr unaligned? Side effect: cy=0 + be L(v1) C if no, branch + nop +C Add least significant limb separately to align res_ptr and s2_ptr + ld [s1_ptr],%g4 + add s1_ptr,4,s1_ptr + ld [s2_ptr],%g2 + add s2_ptr,4,s2_ptr + add n,-1,n + subcc %g4,%g2,%o4 + st %o4,[res_ptr] + add res_ptr,4,res_ptr +L(v1): addx %g0,%g0,%o4 C save cy in register + cmp n,2 C if n < 2 ... + bl L(end2) C ... branch to tail code + subcc %g0,%o4,%g0 C restore cy + + ld [s1_ptr+0],%g4 + addcc n,-10,n + ld [s1_ptr+4],%g1 + ldd [s2_ptr+0],%g2 + blt L(fin1) + subcc %g0,%o4,%g0 C restore cy +C Add blocks of 8 limbs until less than 8 limbs remain +L(loop1): + subxcc %g4,%g2,%o4 + ld [s1_ptr+8],%g4 + subxcc %g1,%g3,%o5 + ld [s1_ptr+12],%g1 + ldd [s2_ptr+8],%g2 + std %o4,[res_ptr+0] + subxcc %g4,%g2,%o4 + ld [s1_ptr+16],%g4 + subxcc %g1,%g3,%o5 + ld [s1_ptr+20],%g1 + ldd [s2_ptr+16],%g2 + std %o4,[res_ptr+8] + subxcc %g4,%g2,%o4 + ld [s1_ptr+24],%g4 + subxcc %g1,%g3,%o5 + ld [s1_ptr+28],%g1 + ldd [s2_ptr+24],%g2 + std %o4,[res_ptr+16] + subxcc %g4,%g2,%o4 + ld [s1_ptr+32],%g4 + subxcc %g1,%g3,%o5 + ld [s1_ptr+36],%g1 + ldd [s2_ptr+32],%g2 + std %o4,[res_ptr+24] + addx %g0,%g0,%o4 C save cy in register + addcc n,-8,n + add s1_ptr,32,s1_ptr + add s2_ptr,32,s2_ptr + add res_ptr,32,res_ptr + bge L(loop1) + subcc %g0,%o4,%g0 C restore cy + +L(fin1): + addcc n,8-2,n + blt L(end1) + subcc %g0,%o4,%g0 C restore cy +C Add blocks of 2 limbs until less than 2 limbs remain +L(loope1): + subxcc %g4,%g2,%o4 + ld [s1_ptr+8],%g4 + subxcc %g1,%g3,%o5 + ld [s1_ptr+12],%g1 + ldd [s2_ptr+8],%g2 + std %o4,[res_ptr+0] + addx %g0,%g0,%o4 C save cy in register + addcc n,-2,n + add s1_ptr,8,s1_ptr + add s2_ptr,8,s2_ptr + add res_ptr,8,res_ptr + bge L(loope1) + subcc %g0,%o4,%g0 C restore cy +L(end1): + subxcc %g4,%g2,%o4 + subxcc %g1,%g3,%o5 + std %o4,[res_ptr+0] + addx %g0,%g0,%o4 C save cy in register + + andcc n,1,%g0 + be L(ret1) + subcc %g0,%o4,%g0 C restore cy +C Add last limb + ld [s1_ptr+8],%g4 + ld [s2_ptr+8],%g2 + subxcc %g4,%g2,%o4 + st %o4,[res_ptr+8] + +L(ret1): + retl + addx %g0,%g0,%o0 C return carry-out from most sign. limb + +L(1): xor s1_ptr,res_ptr,%g1 + andcc %g1,4,%g0 + bne L(2) + nop +C ** V1b ** + andcc res_ptr,4,%g0 C res_ptr unaligned? Side effect: cy=0 + be L(v1b) C if no, branch + nop +C Add least significant limb separately to align res_ptr and s1_ptr + ld [s2_ptr],%g4 + add s2_ptr,4,s2_ptr + ld [s1_ptr],%g2 + add s1_ptr,4,s1_ptr + add n,-1,n + subcc %g2,%g4,%o4 + st %o4,[res_ptr] + add res_ptr,4,res_ptr +L(v1b): addx %g0,%g0,%o4 C save cy in register + cmp n,2 C if n < 2 ... + bl L(end2) C ... branch to tail code + subcc %g0,%o4,%g0 C restore cy + + ld [s2_ptr+0],%g4 + addcc n,-10,n + ld [s2_ptr+4],%g1 + ldd [s1_ptr+0],%g2 + blt L(fin1b) + subcc %g0,%o4,%g0 C restore cy +C Add blocks of 8 limbs until less than 8 limbs remain +L(loop1b): + subxcc %g2,%g4,%o4 + ld [s2_ptr+8],%g4 + subxcc %g3,%g1,%o5 + ld [s2_ptr+12],%g1 + ldd [s1_ptr+8],%g2 + std %o4,[res_ptr+0] + subxcc %g2,%g4,%o4 + ld [s2_ptr+16],%g4 + subxcc %g3,%g1,%o5 + ld [s2_ptr+20],%g1 + ldd [s1_ptr+16],%g2 + std %o4,[res_ptr+8] + subxcc %g2,%g4,%o4 + ld [s2_ptr+24],%g4 + subxcc %g3,%g1,%o5 + ld [s2_ptr+28],%g1 + ldd [s1_ptr+24],%g2 + std %o4,[res_ptr+16] + subxcc %g2,%g4,%o4 + ld [s2_ptr+32],%g4 + subxcc %g3,%g1,%o5 + ld [s2_ptr+36],%g1 + ldd [s1_ptr+32],%g2 + std %o4,[res_ptr+24] + addx %g0,%g0,%o4 C save cy in register + addcc n,-8,n + add s1_ptr,32,s1_ptr + add s2_ptr,32,s2_ptr + add res_ptr,32,res_ptr + bge L(loop1b) + subcc %g0,%o4,%g0 C restore cy + +L(fin1b): + addcc n,8-2,n + blt L(end1b) + subcc %g0,%o4,%g0 C restore cy +C Add blocks of 2 limbs until less than 2 limbs remain +L(loope1b): + subxcc %g2,%g4,%o4 + ld [s2_ptr+8],%g4 + subxcc %g3,%g1,%o5 + ld [s2_ptr+12],%g1 + ldd [s1_ptr+8],%g2 + std %o4,[res_ptr+0] + addx %g0,%g0,%o4 C save cy in register + addcc n,-2,n + add s1_ptr,8,s1_ptr + add s2_ptr,8,s2_ptr + add res_ptr,8,res_ptr + bge L(loope1b) + subcc %g0,%o4,%g0 C restore cy +L(end1b): + subxcc %g2,%g4,%o4 + subxcc %g3,%g1,%o5 + std %o4,[res_ptr+0] + addx %g0,%g0,%o4 C save cy in register + + andcc n,1,%g0 + be L(ret1b) + subcc %g0,%o4,%g0 C restore cy +C Add last limb + ld [s2_ptr+8],%g4 + ld [s1_ptr+8],%g2 + subxcc %g2,%g4,%o4 + st %o4,[res_ptr+8] + +L(ret1b): + retl + addx %g0,%g0,%o0 C return carry-out from most sign. limb + +C ** V2 ** +C If we come here, the alignment of s1_ptr and res_ptr as well as the +C alignment of s2_ptr and res_ptr differ. Since there are only two ways +C things can be aligned (that we care about) we now know that the alignment +C of s1_ptr and s2_ptr are the same. + +L(2): cmp n,1 + be L(jone) + nop + andcc s1_ptr,4,%g0 C s1_ptr unaligned? Side effect: cy=0 + be L(v2) C if no, branch + nop +C Add least significant limb separately to align s1_ptr and s2_ptr + ld [s1_ptr],%g4 + add s1_ptr,4,s1_ptr + ld [s2_ptr],%g2 + add s2_ptr,4,s2_ptr + add n,-1,n + subcc %g4,%g2,%o4 + st %o4,[res_ptr] + add res_ptr,4,res_ptr + +L(v2): addx %g0,%g0,%o4 C save cy in register + addcc n,-8,n + blt L(fin2) + subcc %g0,%o4,%g0 C restore cy +C Add blocks of 8 limbs until less than 8 limbs remain +L(loop2): + ldd [s1_ptr+0],%g2 + ldd [s2_ptr+0],%o4 + subxcc %g2,%o4,%g2 + st %g2,[res_ptr+0] + subxcc %g3,%o5,%g3 + st %g3,[res_ptr+4] + ldd [s1_ptr+8],%g2 + ldd [s2_ptr+8],%o4 + subxcc %g2,%o4,%g2 + st %g2,[res_ptr+8] + subxcc %g3,%o5,%g3 + st %g3,[res_ptr+12] + ldd [s1_ptr+16],%g2 + ldd [s2_ptr+16],%o4 + subxcc %g2,%o4,%g2 + st %g2,[res_ptr+16] + subxcc %g3,%o5,%g3 + st %g3,[res_ptr+20] + ldd [s1_ptr+24],%g2 + ldd [s2_ptr+24],%o4 + subxcc %g2,%o4,%g2 + st %g2,[res_ptr+24] + subxcc %g3,%o5,%g3 + st %g3,[res_ptr+28] + addx %g0,%g0,%o4 C save cy in register + addcc n,-8,n + add s1_ptr,32,s1_ptr + add s2_ptr,32,s2_ptr + add res_ptr,32,res_ptr + bge L(loop2) + subcc %g0,%o4,%g0 C restore cy + +L(fin2): + addcc n,8-2,n + blt L(end2) + subcc %g0,%o4,%g0 C restore cy +L(loope2): + ldd [s1_ptr+0],%g2 + ldd [s2_ptr+0],%o4 + subxcc %g2,%o4,%g2 + st %g2,[res_ptr+0] + subxcc %g3,%o5,%g3 + st %g3,[res_ptr+4] + addx %g0,%g0,%o4 C save cy in register + addcc n,-2,n + add s1_ptr,8,s1_ptr + add s2_ptr,8,s2_ptr + add res_ptr,8,res_ptr + bge L(loope2) + subcc %g0,%o4,%g0 C restore cy +L(end2): + andcc n,1,%g0 + be L(ret2) + subcc %g0,%o4,%g0 C restore cy +C Add last limb +L(jone): + ld [s1_ptr],%g4 + ld [s2_ptr],%g2 + subxcc %g4,%g2,%o4 + st %o4,[res_ptr] + +L(ret2): + retl + addx %g0,%g0,%o0 C return carry-out from most sign. limb +EPILOGUE(mpn_sub_n) diff --git a/rts/gmp/mpn/sparc32/submul_1.asm b/rts/gmp/mpn/sparc32/submul_1.asm new file mode 100644 index 0000000000..12abd844ce --- /dev/null +++ b/rts/gmp/mpn/sparc32/submul_1.asm @@ -0,0 +1,146 @@ +dnl SPARC mpn_submul_1 -- Multiply a limb vector with a limb and subtract +dnl the result from a second limb vector. + +dnl Copyright (C) 1992, 1993, 1994, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + +include(`../config.m4') + +C INPUT PARAMETERS +C res_ptr o0 +C s1_ptr o1 +C size o2 +C s2_limb o3 + +ASM_START() +PROLOGUE(mpn_submul_1) + C Make S1_PTR and RES_PTR point at the end of their blocks + C and put (- 4 x SIZE) in index/loop counter. + sll %o2,2,%o2 + add %o0,%o2,%o4 C RES_PTR in o4 since o0 is retval + add %o1,%o2,%o1 + sub %g0,%o2,%o2 + + cmp %o3,0xfff + bgu L(large) + nop + + ld [%o1+%o2],%o5 + mov 0,%o0 + b L(0) + add %o4,-4,%o4 +L(loop0): + subcc %o5,%g1,%g1 + ld [%o1+%o2],%o5 + addx %o0,%g0,%o0 + st %g1,[%o4+%o2] +L(0): wr %g0,%o3,%y + sra %o5,31,%g2 + and %o3,%g2,%g2 + andcc %g1,0,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,%o5,%g1 + mulscc %g1,0,%g1 + sra %g1,20,%g4 + sll %g1,12,%g1 + rd %y,%g3 + srl %g3,20,%g3 + or %g1,%g3,%g1 + + addcc %g1,%o0,%g1 + addx %g2,%g4,%o0 C add sign-compensation and cy to hi limb + addcc %o2,4,%o2 C loop counter + bne L(loop0) + ld [%o4+%o2],%o5 + + subcc %o5,%g1,%g1 + addx %o0,%g0,%o0 + retl + st %g1,[%o4+%o2] + +L(large): + ld [%o1+%o2],%o5 + mov 0,%o0 + sra %o3,31,%g4 C g4 = mask of ones iff S2_LIMB < 0 + b L(1) + add %o4,-4,%o4 +L(loop): + subcc %o5,%g3,%g3 + ld [%o1+%o2],%o5 + addx %o0,%g0,%o0 + st %g3,[%o4+%o2] +L(1): wr %g0,%o5,%y + and %o5,%g4,%g2 + andcc %g0,%g0,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%o3,%g1 + mulscc %g1,%g0,%g1 + rd %y,%g3 + addcc %g3,%o0,%g3 + addx %g2,%g1,%o0 + addcc %o2,4,%o2 + bne L(loop) + ld [%o4+%o2],%o5 + + subcc %o5,%g3,%g3 + addx %o0,%g0,%o0 + retl + st %g3,[%o4+%o2] +EPILOGUE(mpn_submul_1) diff --git a/rts/gmp/mpn/sparc32/udiv_fp.asm b/rts/gmp/mpn/sparc32/udiv_fp.asm new file mode 100644 index 0000000000..e340e147d2 --- /dev/null +++ b/rts/gmp/mpn/sparc32/udiv_fp.asm @@ -0,0 +1,158 @@ +dnl SPARC v7 __udiv_qrnnd division support, used from longlong.h. +dnl This is for v7 CPUs with a floating-point unit. + +dnl Copyright (C) 1993, 1994, 1996, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + +include(`../config.m4') + +C INPUT PARAMETERS +C rem_ptr i0 +C n1 i1 +C n0 i2 +C d i3 + +ASM_START() + +ifdef(`PIC', +` TEXT +L(getpc): + retl + nop') + + TEXT + ALIGN(8) +L(C0): .double 0r4294967296 +L(C1): .double 0r2147483648 + +PROLOGUE(mpn_udiv_qrnnd) + save %sp,-104,%sp + st %i1,[%fp-8] + ld [%fp-8],%f10 + +ifdef(`PIC', +`L(pc): call L(getpc) C put address of this insn in %o7 + ldd [%o7+L(C0)-L(pc)],%f8', +` sethi %hi(L(C0)),%o7 + ldd [%o7+%lo(L(C0))],%f8') + + fitod %f10,%f4 + cmp %i1,0 + bge L(248) + mov %i0,%i5 + faddd %f4,%f8,%f4 +L(248): + st %i2,[%fp-8] + ld [%fp-8],%f10 + fmuld %f4,%f8,%f6 + cmp %i2,0 + bge L(249) + fitod %f10,%f2 + faddd %f2,%f8,%f2 +L(249): + st %i3,[%fp-8] + faddd %f6,%f2,%f2 + ld [%fp-8],%f10 + cmp %i3,0 + bge L(250) + fitod %f10,%f4 + faddd %f4,%f8,%f4 +L(250): + fdivd %f2,%f4,%f2 + +ifdef(`PIC', +` ldd [%o7+L(C1)-L(pc)],%f4', +` sethi %hi(L(C1)),%o7 + ldd [%o7+%lo(L(C1))],%f4') + + fcmped %f2,%f4 + nop + fbge,a L(251) + fsubd %f2,%f4,%f2 + fdtoi %f2,%f2 + st %f2,[%fp-8] + b L(252) + ld [%fp-8],%i4 +L(251): + fdtoi %f2,%f2 + st %f2,[%fp-8] + ld [%fp-8],%i4 + sethi %hi(-2147483648),%g2 + xor %i4,%g2,%i4 +L(252): + wr %g0,%i4,%y + sra %i3,31,%g2 + and %i4,%g2,%g2 + andcc %g0,0,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,%i3,%g1 + mulscc %g1,0,%g1 + add %g1,%g2,%i0 + rd %y,%g3 + subcc %i2,%g3,%o7 + subxcc %i1,%i0,%g0 + be L(253) + cmp %o7,%i3 + + add %i4,-1,%i0 + add %o7,%i3,%o7 + st %o7,[%i5] + ret + restore +L(253): + blu L(246) + mov %i4,%i0 + add %i4,1,%i0 + sub %o7,%i3,%o7 +L(246): + st %o7,[%i5] + ret + restore +EPILOGUE(mpn_udiv_qrnnd) diff --git a/rts/gmp/mpn/sparc32/udiv_nfp.asm b/rts/gmp/mpn/sparc32/udiv_nfp.asm new file mode 100644 index 0000000000..ae19f4c6e9 --- /dev/null +++ b/rts/gmp/mpn/sparc32/udiv_nfp.asm @@ -0,0 +1,193 @@ +dnl SPARC v7 __udiv_qrnnd division support, used from longlong.h. +dnl This is for v7 CPUs without a floating-point unit. + +dnl Copyright (C) 1993, 1994, 1996, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + +include(`../config.m4') + +C INPUT PARAMETERS +C rem_ptr o0 +C n1 o1 +C n0 o2 +C d o3 + +ASM_START() +PROLOGUE(mpn_udiv_qrnnd) + tst %o3 + bneg L(largedivisor) + mov 8,%g1 + + b L(p1) + addxcc %o2,%o2,%o2 + +L(plop): + bcc L(n1) + addxcc %o2,%o2,%o2 +L(p1): addx %o1,%o1,%o1 + subcc %o1,%o3,%o4 + bcc L(n2) + addxcc %o2,%o2,%o2 +L(p2): addx %o1,%o1,%o1 + subcc %o1,%o3,%o4 + bcc L(n3) + addxcc %o2,%o2,%o2 +L(p3): addx %o1,%o1,%o1 + subcc %o1,%o3,%o4 + bcc L(n4) + addxcc %o2,%o2,%o2 +L(p4): addx %o1,%o1,%o1 + addcc %g1,-1,%g1 + bne L(plop) + subcc %o1,%o3,%o4 + bcc L(n5) + addxcc %o2,%o2,%o2 +L(p5): st %o1,[%o0] + retl + xnor %g0,%o2,%o0 + +L(nlop): + bcc L(p1) + addxcc %o2,%o2,%o2 +L(n1): addx %o4,%o4,%o4 + subcc %o4,%o3,%o1 + bcc L(p2) + addxcc %o2,%o2,%o2 +L(n2): addx %o4,%o4,%o4 + subcc %o4,%o3,%o1 + bcc L(p3) + addxcc %o2,%o2,%o2 +L(n3): addx %o4,%o4,%o4 + subcc %o4,%o3,%o1 + bcc L(p4) + addxcc %o2,%o2,%o2 +L(n4): addx %o4,%o4,%o4 + addcc %g1,-1,%g1 + bne L(nlop) + subcc %o4,%o3,%o1 + bcc L(p5) + addxcc %o2,%o2,%o2 +L(n5): st %o4,[%o0] + retl + xnor %g0,%o2,%o0 + +L(largedivisor): + and %o2,1,%o5 C %o5 = n0 & 1 + + srl %o2,1,%o2 + sll %o1,31,%g2 + or %g2,%o2,%o2 C %o2 = lo(n1n0 >> 1) + srl %o1,1,%o1 C %o1 = hi(n1n0 >> 1) + + and %o3,1,%g2 + srl %o3,1,%g3 C %g3 = floor(d / 2) + add %g3,%g2,%g3 C %g3 = ceil(d / 2) + + b L(Lp1) + addxcc %o2,%o2,%o2 + +L(Lplop): + bcc L(Ln1) + addxcc %o2,%o2,%o2 +L(Lp1): addx %o1,%o1,%o1 + subcc %o1,%g3,%o4 + bcc L(Ln2) + addxcc %o2,%o2,%o2 +L(Lp2): addx %o1,%o1,%o1 + subcc %o1,%g3,%o4 + bcc L(Ln3) + addxcc %o2,%o2,%o2 +L(Lp3): addx %o1,%o1,%o1 + subcc %o1,%g3,%o4 + bcc L(Ln4) + addxcc %o2,%o2,%o2 +L(Lp4): addx %o1,%o1,%o1 + addcc %g1,-1,%g1 + bne L(Lplop) + subcc %o1,%g3,%o4 + bcc L(Ln5) + addxcc %o2,%o2,%o2 +L(Lp5): add %o1,%o1,%o1 C << 1 + tst %g2 + bne L(oddp) + add %o5,%o1,%o1 + st %o1,[%o0] + retl + xnor %g0,%o2,%o0 + +L(Lnlop): + bcc L(Lp1) + addxcc %o2,%o2,%o2 +L(Ln1): addx %o4,%o4,%o4 + subcc %o4,%g3,%o1 + bcc L(Lp2) + addxcc %o2,%o2,%o2 +L(Ln2): addx %o4,%o4,%o4 + subcc %o4,%g3,%o1 + bcc L(Lp3) + addxcc %o2,%o2,%o2 +L(Ln3): addx %o4,%o4,%o4 + subcc %o4,%g3,%o1 + bcc L(Lp4) + addxcc %o2,%o2,%o2 +L(Ln4): addx %o4,%o4,%o4 + addcc %g1,-1,%g1 + bne L(Lnlop) + subcc %o4,%g3,%o1 + bcc L(Lp5) + addxcc %o2,%o2,%o2 +L(Ln5): add %o4,%o4,%o4 C << 1 + tst %g2 + bne L(oddn) + add %o5,%o4,%o4 + st %o4,[%o0] + retl + xnor %g0,%o2,%o0 + +L(oddp): + xnor %g0,%o2,%o2 + C q' in %o2. r' in %o1 + addcc %o1,%o2,%o1 + bcc L(Lp6) + addx %o2,0,%o2 + sub %o1,%o3,%o1 +L(Lp6): subcc %o1,%o3,%g0 + bcs L(Lp7) + subx %o2,-1,%o2 + sub %o1,%o3,%o1 +L(Lp7): st %o1,[%o0] + retl + mov %o2,%o0 + +L(oddn): + xnor %g0,%o2,%o2 + C q' in %o2. r' in %o4 + addcc %o4,%o2,%o4 + bcc L(Ln6) + addx %o2,0,%o2 + sub %o4,%o3,%o4 +L(Ln6): subcc %o4,%o3,%g0 + bcs L(Ln7) + subx %o2,-1,%o2 + sub %o4,%o3,%o4 +L(Ln7): st %o4,[%o0] + retl + mov %o2,%o0 +EPILOGUE(mpn_udiv_qrnnd) diff --git a/rts/gmp/mpn/sparc32/umul.asm b/rts/gmp/mpn/sparc32/umul.asm new file mode 100644 index 0000000000..efa56851d6 --- /dev/null +++ b/rts/gmp/mpn/sparc32/umul.asm @@ -0,0 +1,68 @@ +dnl SPARC mpn_umul_ppmm -- support for longlong.h for non-gcc. + +dnl Copyright (C) 1995, 1996, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_umul_ppmm) + wr %g0,%o1,%y + sra %o2,31,%g2 C Don't move this insn + and %o1,%g2,%g2 C Don't move this insn + andcc %g0,0,%g1 C Don't move this insn + mulscc %g1,%o2,%g1 + mulscc %g1,%o2,%g1 + mulscc %g1,%o2,%g1 + mulscc %g1,%o2,%g1 + mulscc %g1,%o2,%g1 + mulscc %g1,%o2,%g1 + mulscc %g1,%o2,%g1 + mulscc %g1,%o2,%g1 + mulscc %g1,%o2,%g1 + mulscc %g1,%o2,%g1 + mulscc %g1,%o2,%g1 + mulscc %g1,%o2,%g1 + mulscc %g1,%o2,%g1 + mulscc %g1,%o2,%g1 + mulscc %g1,%o2,%g1 + mulscc %g1,%o2,%g1 + mulscc %g1,%o2,%g1 + mulscc %g1,%o2,%g1 + mulscc %g1,%o2,%g1 + mulscc %g1,%o2,%g1 + mulscc %g1,%o2,%g1 + mulscc %g1,%o2,%g1 + mulscc %g1,%o2,%g1 + mulscc %g1,%o2,%g1 + mulscc %g1,%o2,%g1 + mulscc %g1,%o2,%g1 + mulscc %g1,%o2,%g1 + mulscc %g1,%o2,%g1 + mulscc %g1,%o2,%g1 + mulscc %g1,%o2,%g1 + mulscc %g1,%o2,%g1 + mulscc %g1,%o2,%g1 + mulscc %g1,0,%g1 + rd %y,%g3 + st %g3,[%o0] + retl + add %g1,%g2,%o0 +EPILOGUE(mpn_umul_ppmm) diff --git a/rts/gmp/mpn/sparc32/v8/addmul_1.asm b/rts/gmp/mpn/sparc32/v8/addmul_1.asm new file mode 100644 index 0000000000..da44644b51 --- /dev/null +++ b/rts/gmp/mpn/sparc32/v8/addmul_1.asm @@ -0,0 +1,122 @@ +dnl SPARC v8 mpn_addmul_1 -- Multiply a limb vector with a limb and +dnl add the result to a second limb vector. + +dnl Copyright (C) 1992, 1993, 1994, 1995, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + +include(`../config.m4') + +C INPUT PARAMETERS +C res_ptr o0 +C s1_ptr o1 +C size o2 +C s2_limb o3 + +ASM_START() +PROLOGUE(mpn_addmul_1) + orcc %g0,%g0,%g2 + ld [%o1+0],%o4 C 1 + + sll %o2,4,%g1 + and %g1,(4-1)<<4,%g1 +ifdef(`PIC', +` mov %o7,%g4 C Save return address register +0: call 1f + add %o7,L(1)-0b,%g3 +1: mov %g4,%o7 C Restore return address register +', +` sethi %hi(L(1)),%g3 + or %g3,%lo(L(1)),%g3 +') + jmp %g3+%g1 + nop +L(1): +L(L00): add %o0,-4,%o0 + b L(loop00) C 4, 8, 12, ... + add %o1,-4,%o1 + nop +L(L01): b L(loop01) C 1, 5, 9, ... + nop + nop + nop +L(L10): add %o0,-12,%o0 C 2, 6, 10, ... + b L(loop10) + add %o1,4,%o1 + nop +L(L11): add %o0,-8,%o0 C 3, 7, 11, ... + b L(loop11) + add %o1,-8,%o1 + nop + +L(loop): + addcc %g3,%g2,%g3 C 1 + ld [%o1+4],%o4 C 2 + rd %y,%g2 C 1 + addx %g0,%g2,%g2 + ld [%o0+0],%g1 C 2 + addcc %g1,%g3,%g3 + st %g3,[%o0+0] C 1 +L(loop00): + umul %o4,%o3,%g3 C 2 + ld [%o0+4],%g1 C 2 + addxcc %g3,%g2,%g3 C 2 + ld [%o1+8],%o4 C 3 + rd %y,%g2 C 2 + addx %g0,%g2,%g2 + nop + addcc %g1,%g3,%g3 + st %g3,[%o0+4] C 2 +L(loop11): + umul %o4,%o3,%g3 C 3 + addxcc %g3,%g2,%g3 C 3 + ld [%o1+12],%o4 C 4 + rd %y,%g2 C 3 + add %o1,16,%o1 + addx %g0,%g2,%g2 + ld [%o0+8],%g1 C 2 + addcc %g1,%g3,%g3 + st %g3,[%o0+8] C 3 +L(loop10): + umul %o4,%o3,%g3 C 4 + addxcc %g3,%g2,%g3 C 4 + ld [%o1+0],%o4 C 1 + rd %y,%g2 C 4 + addx %g0,%g2,%g2 + ld [%o0+12],%g1 C 2 + addcc %g1,%g3,%g3 + st %g3,[%o0+12] C 4 + add %o0,16,%o0 + addx %g0,%g2,%g2 +L(loop01): + addcc %o2,-4,%o2 + bg L(loop) + umul %o4,%o3,%g3 C 1 + + addcc %g3,%g2,%g3 C 4 + rd %y,%g2 C 4 + addx %g0,%g2,%g2 + ld [%o0+0],%g1 C 2 + addcc %g1,%g3,%g3 + st %g3,[%o0+0] C 4 + addx %g0,%g2,%o0 + + retl + nop +EPILOGUE(mpn_addmul_1) diff --git a/rts/gmp/mpn/sparc32/v8/mul_1.asm b/rts/gmp/mpn/sparc32/v8/mul_1.asm new file mode 100644 index 0000000000..801247553a --- /dev/null +++ b/rts/gmp/mpn/sparc32/v8/mul_1.asm @@ -0,0 +1,103 @@ +dnl SPARC v8 mpn_mul_1 -- Multiply a limb vector with a single limb and +dnl store the product in a second limb vector. + +dnl Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + +include(`../config.m4') + +C INPUT PARAMETERS +C res_ptr o0 +C s1_ptr o1 +C size o2 +C s2_limb o3 + +ASM_START() +PROLOGUE(mpn_mul_1) + sll %o2,4,%g1 + and %g1,(4-1)<<4,%g1 +ifdef(`PIC', +` mov %o7,%g4 C Save return address register +0: call 1f + add %o7,L(1)-0b,%g3 +1: mov %g4,%o7 C Restore return address register +', +` sethi %hi(L(1)),%g3 + or %g3,%lo(L(1)),%g3 +') + jmp %g3+%g1 + ld [%o1+0],%o4 C 1 +L(1): +L(L00): add %o0,-4,%o0 + add %o1,-4,%o1 + b L(loop00) C 4, 8, 12, ... + orcc %g0,%g0,%g2 +L(L01): b L(loop01) C 1, 5, 9, ... + orcc %g0,%g0,%g2 + nop + nop +L(L10): add %o0,-12,%o0 C 2, 6, 10, ... + add %o1,4,%o1 + b L(loop10) + orcc %g0,%g0,%g2 + nop +L(L11): add %o0,-8,%o0 C 3, 7, 11, ... + add %o1,-8,%o1 + b L(loop11) + orcc %g0,%g0,%g2 + +L(loop): + addcc %g3,%g2,%g3 C 1 + ld [%o1+4],%o4 C 2 + st %g3,[%o0+0] C 1 + rd %y,%g2 C 1 +L(loop00): + umul %o4,%o3,%g3 C 2 + addxcc %g3,%g2,%g3 C 2 + ld [%o1+8],%o4 C 3 + st %g3,[%o0+4] C 2 + rd %y,%g2 C 2 +L(loop11): + umul %o4,%o3,%g3 C 3 + addxcc %g3,%g2,%g3 C 3 + ld [%o1+12],%o4 C 4 + add %o1,16,%o1 + st %g3,[%o0+8] C 3 + rd %y,%g2 C 3 +L(loop10): + umul %o4,%o3,%g3 C 4 + addxcc %g3,%g2,%g3 C 4 + ld [%o1+0],%o4 C 1 + st %g3,[%o0+12] C 4 + add %o0,16,%o0 + rd %y,%g2 C 4 + addx %g0,%g2,%g2 +L(loop01): + addcc %o2,-4,%o2 + bg L(loop) + umul %o4,%o3,%g3 C 1 + + addcc %g3,%g2,%g3 C 4 + st %g3,[%o0+0] C 4 + rd %y,%g2 C 4 + + retl + addx %g0,%g2,%o0 +EPILOGUE(mpn_mul_1) diff --git a/rts/gmp/mpn/sparc32/v8/submul_1.asm b/rts/gmp/mpn/sparc32/v8/submul_1.asm new file mode 100644 index 0000000000..9ed132f4c1 --- /dev/null +++ b/rts/gmp/mpn/sparc32/v8/submul_1.asm @@ -0,0 +1,58 @@ +dnl SPARC v8 mpn_submul_1 -- Multiply a limb vector with a limb and +dnl subtract the result from a second limb vector. + +dnl Copyright (C) 1992, 1993, 1994, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + +include(`../config.m4') + +C INPUT PARAMETERS +C res_ptr o0 +C s1_ptr o1 +C size o2 +C s2_limb o3 + +ASM_START() +PROLOGUE(mpn_submul_1) + sub %g0,%o2,%o2 C negate ... + sll %o2,2,%o2 C ... and scale size + sub %o1,%o2,%o1 C o1 is offset s1_ptr + sub %o0,%o2,%g1 C g1 is offset res_ptr + + mov 0,%o0 C clear cy_limb + +L(loop): + ld [%o1+%o2],%o4 + ld [%g1+%o2],%g2 + umul %o4,%o3,%o5 + rd %y,%g3 + addcc %o5,%o0,%o5 + addx %g3,0,%o0 + subcc %g2,%o5,%g2 + addx %o0,0,%o0 + st %g2,[%g1+%o2] + + addcc %o2,4,%o2 + bne L(loop) + nop + + retl + nop +EPILOGUE(mpn_submul_1) diff --git a/rts/gmp/mpn/sparc32/v8/supersparc/udiv.asm b/rts/gmp/mpn/sparc32/v8/supersparc/udiv.asm new file mode 100644 index 0000000000..0d5e8d415d --- /dev/null +++ b/rts/gmp/mpn/sparc32/v8/supersparc/udiv.asm @@ -0,0 +1,122 @@ +dnl SuperSPARC mpn_udiv_qrnnd division support, used from longlong.h. +dnl This is for SuperSPARC only, to compensate for its semi-functional +dnl udiv instruction. + +dnl Copyright (C) 1993, 1994, 1996, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + +include(`../config.m4') + +C INPUT PARAMETERS +C rem_ptr i0 +C n1 i1 +C n0 i2 +C d i3 + +ASM_START() + +ifdef(`PIC', +` TEXT +L(getpc): + retl + nop') + + TEXT + ALIGN(8) +L(C0): .double 0r4294967296 +L(C1): .double 0r2147483648 + +PROLOGUE(mpn_udiv_qrnnd) + save %sp,-104,%sp + st %i1,[%fp-8] + ld [%fp-8],%f10 + +ifdef(`PIC', +`L(pc): call L(getpc) C put address of this insn in %o7 + ldd [%o7+L(C0)-L(pc)],%f8', +` sethi %hi(L(C0)),%o7 + ldd [%o7+%lo(L(C0))],%f8') + + fitod %f10,%f4 + cmp %i1,0 + bge L(248) + mov %i0,%i5 + faddd %f4,%f8,%f4 +L(248): + st %i2,[%fp-8] + ld [%fp-8],%f10 + fmuld %f4,%f8,%f6 + cmp %i2,0 + bge L(249) + fitod %f10,%f2 + faddd %f2,%f8,%f2 +L(249): + st %i3,[%fp-8] + faddd %f6,%f2,%f2 + ld [%fp-8],%f10 + cmp %i3,0 + bge L(250) + fitod %f10,%f4 + faddd %f4,%f8,%f4 +L(250): + fdivd %f2,%f4,%f2 + +ifdef(`PIC', +` ldd [%o7+L(C1)-L(pc)],%f4', +` sethi %hi(L(C1)),%o7 + ldd [%o7+%lo(L(C1))],%f4') + + fcmped %f2,%f4 + nop + fbge,a L(251) + fsubd %f2,%f4,%f2 + fdtoi %f2,%f2 + st %f2,[%fp-8] + b L(252) + ld [%fp-8],%i4 +L(251): + fdtoi %f2,%f2 + st %f2,[%fp-8] + ld [%fp-8],%i4 + sethi %hi(-2147483648),%g2 + xor %i4,%g2,%i4 +L(252): + umul %i3,%i4,%g3 + rd %y,%i0 + subcc %i2,%g3,%o7 + subxcc %i1,%i0,%g0 + be L(253) + cmp %o7,%i3 + + add %i4,-1,%i0 + add %o7,%i3,%o7 + st %o7,[%i5] + ret + restore +L(253): + blu L(246) + mov %i4,%i0 + add %i4,1,%i0 + sub %o7,%i3,%o7 +L(246): + st %o7,[%i5] + ret + restore +EPILOGUE(mpn_udiv_qrnnd) diff --git a/rts/gmp/mpn/sparc32/v8/umul.asm b/rts/gmp/mpn/sparc32/v8/umul.asm new file mode 100644 index 0000000000..ae8f692a0a --- /dev/null +++ b/rts/gmp/mpn/sparc32/v8/umul.asm @@ -0,0 +1,31 @@ +dnl SPARC v8 mpn_umul_ppmm -- support for longlong.h for non-gcc. + +dnl Copyright (C) 1995, 1996, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_umul_ppmm) + umul %o1,%o2,%g2 + st %g2,[%o0] + retl + rd %y,%o0 +EPILOGUE(mpn_umul_ppmm) diff --git a/rts/gmp/mpn/sparc32/v9/README b/rts/gmp/mpn/sparc32/v9/README new file mode 100644 index 0000000000..9b39713271 --- /dev/null +++ b/rts/gmp/mpn/sparc32/v9/README @@ -0,0 +1,4 @@ +Code for SPARC processors implementing version 9 of the SPARC architecture. +This code is for systems that doesn't preserve the full 64-bit contents of +integer register at context switch. For other systems (such as Solaris 7 or +later) use the code in ../../sparc64. diff --git a/rts/gmp/mpn/sparc32/v9/addmul_1.asm b/rts/gmp/mpn/sparc32/v9/addmul_1.asm new file mode 100644 index 0000000000..c1762cc41f --- /dev/null +++ b/rts/gmp/mpn/sparc32/v9/addmul_1.asm @@ -0,0 +1,288 @@ +dnl SPARC v9 32-bit mpn_addmul_1 -- Multiply a limb vector with a limb and +dnl add the result to a second limb vector. + +dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + +include(`../config.m4') + +C INPUT PARAMETERS +C res_ptr i0 +C s1_ptr i1 +C size i2 +C s2_limb i3 + +ASM_START() + + TEXT + ALIGN(4) +L(noll): + .word 0 + +PROLOGUE(mpn_addmul_1) + save %sp,-256,%sp + +ifdef(`PIC', +`L(pc): rd %pc,%o7 + ld [%o7+L(noll)-L(pc)],%f10', +` sethi %hi(L(noll)),%g1 + ld [%g1+%lo(L(noll))],%f10') + + sethi %hi(0xffff0000),%o0 + andn %i3,%o0,%o0 + st %o0,[%fp-16] + ld [%fp-16],%f11 + fxtod %f10,%f6 + + srl %i3,16,%o0 + st %o0,[%fp-16] + ld [%fp-16],%f11 + fxtod %f10,%f8 + + mov 0,%g3 C cy = 0 + + ld [%i1],%f11 + subcc %i2,1,%i2 + be,pn %icc,L(end1) + add %i1,4,%i1 C s1_ptr++ + + fxtod %f10,%f2 + ld [%i1],%f11 + add %i1,4,%i1 C s1_ptr++ + fmuld %f2,%f8,%f16 + fmuld %f2,%f6,%f4 + fdtox %f16,%f14 + std %f14,[%fp-24] + fdtox %f4,%f12 + subcc %i2,1,%i2 + be,pn %icc,L(end2) + std %f12,[%fp-16] + + fxtod %f10,%f2 + ld [%i1],%f11 + add %i1,4,%i1 C s1_ptr++ + fmuld %f2,%f8,%f16 + fmuld %f2,%f6,%f4 + fdtox %f16,%f14 + std %f14,[%fp-40] + fdtox %f4,%f12 + subcc %i2,1,%i2 + be,pn %icc,L(end3) + std %f12,[%fp-32] + + fxtod %f10,%f2 + ld [%i1],%f11 + add %i1,4,%i1 C s1_ptr++ + ld [%i0],%g5 + ldx [%fp-24],%g2 C p16 + fmuld %f2,%f8,%f16 + ldx [%fp-16],%g1 C p0 + fmuld %f2,%f6,%f4 + sllx %g2,16,%g2 C align p16 + fdtox %f16,%f14 + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + std %f14,[%fp-24] + fdtox %f4,%f12 + add %i0,4,%i0 C res_ptr++ + subcc %i2,1,%i2 + be,pn %icc,L(end4) + std %f12,[%fp-16] + + b,a L(loopm) + + .align 16 +C BEGIN LOOP +L(loop): + fxtod %f10,%f2 + ld [%i1],%f11 + add %i1,4,%i1 C s1_ptr++ + add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2) + add %g3,%g1,%g4 C p += cy + ld [%i0],%g5 + srlx %g4,32,%g3 + ldx [%fp-24],%g2 C p16 + fmuld %f2,%f8,%f16 + ldx [%fp-16],%g1 C p0 + fmuld %f2,%f6,%f4 + sllx %g2,16,%g2 C align p16 + st %g4,[%i0-4] + fdtox %f16,%f14 + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + std %f14,[%fp-24] + fdtox %f4,%f12 + std %f12,[%fp-16] + subcc %i2,1,%i2 + be,pn %icc,L(loope) + add %i0,4,%i0 C res_ptr++ +L(loopm): + fxtod %f10,%f2 + ld [%i1],%f11 + add %i1,4,%i1 C s1_ptr++ + add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2) + add %g3,%g1,%g4 C p += cy + ld [%i0],%g5 + srlx %g4,32,%g3 + ldx [%fp-40],%g2 C p16 + fmuld %f2,%f8,%f16 + ldx [%fp-32],%g1 C p0 + fmuld %f2,%f6,%f4 + sllx %g2,16,%g2 C align p16 + st %g4,[%i0-4] + fdtox %f16,%f14 + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + std %f14,[%fp-40] + fdtox %f4,%f12 + std %f12,[%fp-32] + subcc %i2,1,%i2 + bne,pt %icc,L(loop) + add %i0,4,%i0 C res_ptr++ +C END LOOP + + fxtod %f10,%f2 + add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2) + add %g3,%g1,%g4 C p += cy + ld [%i0],%g5 + srlx %g4,32,%g3 + ldx [%fp-24],%g2 C p16 + fmuld %f2,%f8,%f16 + ldx [%fp-16],%g1 C p0 + fmuld %f2,%f6,%f4 + sllx %g2,16,%g2 C align p16 + st %g4,[%i0-4] + b,a L(xxx) +L(loope): +L(end4): + fxtod %f10,%f2 + add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2) + add %g3,%g1,%g4 C p += cy + ld [%i0],%g5 + srlx %g4,32,%g3 + ldx [%fp-40],%g2 C p16 + fmuld %f2,%f8,%f16 + ldx [%fp-32],%g1 C p0 + fmuld %f2,%f6,%f4 + sllx %g2,16,%g2 C align p16 + st %g4,[%i0-4] + fdtox %f16,%f14 + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + std %f14,[%fp-40] + fdtox %f4,%f12 + std %f12,[%fp-32] + add %i0,4,%i0 C res_ptr++ + + add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2) + add %g3,%g1,%g4 C p += cy + ld [%i0],%g5 + srlx %g4,32,%g3 + ldx [%fp-24],%g2 C p16 + ldx [%fp-16],%g1 C p0 + sllx %g2,16,%g2 C align p16 + st %g4,[%i0-4] + b,a L(yyy) + +L(end3): + fxtod %f10,%f2 + ld [%i0],%g5 + ldx [%fp-24],%g2 C p16 + fmuld %f2,%f8,%f16 + ldx [%fp-16],%g1 C p0 + fmuld %f2,%f6,%f4 + sllx %g2,16,%g2 C align p16 +L(xxx): fdtox %f16,%f14 + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + std %f14,[%fp-24] + fdtox %f4,%f12 + std %f12,[%fp-16] + add %i0,4,%i0 C res_ptr++ + + add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2) + add %g3,%g1,%g4 C p += cy + ld [%i0],%g5 + srlx %g4,32,%g3 + ldx [%fp-40],%g2 C p16 + ldx [%fp-32],%g1 C p0 + sllx %g2,16,%g2 C align p16 + st %g4,[%i0-4] + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + add %i0,4,%i0 C res_ptr++ + + add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2) + add %g3,%g1,%g4 C p += cy + ld [%i0],%g5 + srlx %g4,32,%g3 + ldx [%fp-24],%g2 C p16 + ldx [%fp-16],%g1 C p0 + sllx %g2,16,%g2 C align p16 + st %g4,[%i0-4] + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + add %i0,4,%i0 C res_ptr++ + b,a L(ret) + +L(end2): + fxtod %f10,%f2 + fmuld %f2,%f8,%f16 + fmuld %f2,%f6,%f4 + fdtox %f16,%f14 + std %f14,[%fp-40] + fdtox %f4,%f12 + std %f12,[%fp-32] + ld [%i0],%g5 + ldx [%fp-24],%g2 C p16 + ldx [%fp-16],%g1 C p0 + sllx %g2,16,%g2 C align p16 +L(yyy): add %g2,%g1,%g1 C add p16 to p0 (ADD1) + add %i0,4,%i0 C res_ptr++ + + add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2) + add %g3,%g1,%g4 C p += cy + ld [%i0],%g5 + srlx %g4,32,%g3 + ldx [%fp-40],%g2 C p16 + ldx [%fp-32],%g1 C p0 + sllx %g2,16,%g2 C align p16 + st %g4,[%i0-4] + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + add %i0,4,%i0 C res_ptr++ + b,a L(ret) + +L(end1): + fxtod %f10,%f2 + fmuld %f2,%f8,%f16 + fmuld %f2,%f6,%f4 + fdtox %f16,%f14 + std %f14,[%fp-24] + fdtox %f4,%f12 + std %f12,[%fp-16] + + ld [%i0],%g5 + ldx [%fp-24],%g2 C p16 + ldx [%fp-16],%g1 C p0 + sllx %g2,16,%g2 C align p16 + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + add %i0,4,%i0 C res_ptr++ + +L(ret): add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2) + add %g3,%g1,%g4 C p += cy + srlx %g4,32,%g3 + st %g4,[%i0-4] + + ret + restore %g0,%g3,%o0 C sideeffect: put cy in retreg +EPILOGUE(mpn_addmul_1) diff --git a/rts/gmp/mpn/sparc32/v9/gmp-mparam.h b/rts/gmp/mpn/sparc32/v9/gmp-mparam.h new file mode 100644 index 0000000000..f946b900f0 --- /dev/null +++ b/rts/gmp/mpn/sparc32/v9/gmp-mparam.h @@ -0,0 +1,69 @@ +/* gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#define BITS_PER_MP_LIMB 32 +#define BYTES_PER_MP_LIMB 4 +#define BITS_PER_LONGINT 32 +#define BITS_PER_INT 32 +#define BITS_PER_SHORTINT 16 +#define BITS_PER_CHAR 8 + + +/* These values are for UltraSPARC I, II, and IIi. It is bogus that + this file lives in v9, but that will do for now. */ + +/* Variations in addmul_1 speed make the multiply and square thresholds + doubtful. TOOM3_SQR_THRESHOLD had to be estimated here. */ + +/* Generated by tuneup.c, 2000-07-06. */ + +#ifndef KARATSUBA_MUL_THRESHOLD +#define KARATSUBA_MUL_THRESHOLD 30 +#endif +#ifndef TOOM3_MUL_THRESHOLD +#define TOOM3_MUL_THRESHOLD 200 +#endif + +#ifndef KARATSUBA_SQR_THRESHOLD +#define KARATSUBA_SQR_THRESHOLD 59 +#endif +#ifndef TOOM3_SQR_THRESHOLD +#define TOOM3_SQR_THRESHOLD 500 +#endif + +#ifndef BZ_THRESHOLD +#define BZ_THRESHOLD 107 +#endif + +#ifndef FIB_THRESHOLD +#define FIB_THRESHOLD 146 +#endif + +#ifndef POWM_THRESHOLD +#define POWM_THRESHOLD 29 +#endif + +#ifndef GCD_ACCEL_THRESHOLD +#define GCD_ACCEL_THRESHOLD 4 +#endif +#ifndef GCDEXT_THRESHOLD +#define GCDEXT_THRESHOLD 3 +#endif diff --git a/rts/gmp/mpn/sparc32/v9/mul_1.asm b/rts/gmp/mpn/sparc32/v9/mul_1.asm new file mode 100644 index 0000000000..f8f0fdd8c2 --- /dev/null +++ b/rts/gmp/mpn/sparc32/v9/mul_1.asm @@ -0,0 +1,267 @@ +dnl SPARC v9 32-bit mpn_mul_1 -- Multiply a limb vector with a limb and +dnl store the result in a second limb vector. + +dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + +include(`../config.m4') + +C INPUT PARAMETERS +C res_ptr i0 +C s1_ptr i1 +C size i2 +C s2_limb i3 + +ASM_START() + + TEXT + ALIGN(4) +L(noll): + .word 0 + +PROLOGUE(mpn_mul_1) + save %sp,-256,%sp + +ifdef(`PIC', +`L(pc): rd %pc,%o7 + ld [%o7+L(noll)-L(pc)],%f10', +` sethi %hi(L(noll)),%g1 + ld [%g1+%lo(L(noll))],%f10') + + sethi %hi(0xffff0000),%o0 + andn %i3,%o0,%o0 + st %o0,[%fp-16] + ld [%fp-16],%f11 + fxtod %f10,%f6 + + srl %i3,16,%o0 + st %o0,[%fp-16] + ld [%fp-16],%f11 + fxtod %f10,%f8 + + mov 0,%g3 C cy = 0 + + ld [%i1],%f11 + subcc %i2,1,%i2 + be,pn %icc,L(end1) + add %i1,4,%i1 C s1_ptr++ + + fxtod %f10,%f2 + ld [%i1],%f11 + add %i1,4,%i1 C s1_ptr++ + fmuld %f2,%f8,%f16 + fmuld %f2,%f6,%f4 + fdtox %f16,%f14 + std %f14,[%fp-24] + fdtox %f4,%f12 + subcc %i2,1,%i2 + be,pn %icc,L(end2) + std %f12,[%fp-16] + + fxtod %f10,%f2 + ld [%i1],%f11 + add %i1,4,%i1 C s1_ptr++ + fmuld %f2,%f8,%f16 + fmuld %f2,%f6,%f4 + fdtox %f16,%f14 + std %f14,[%fp-40] + fdtox %f4,%f12 + subcc %i2,1,%i2 + be,pn %icc,L(end3) + std %f12,[%fp-32] + + fxtod %f10,%f2 + ld [%i1],%f11 + add %i1,4,%i1 C s1_ptr++ + ldx [%fp-24],%g2 C p16 + fmuld %f2,%f8,%f16 + ldx [%fp-16],%g1 C p0 + fmuld %f2,%f6,%f4 + sllx %g2,16,%g2 C align p16 + fdtox %f16,%f14 + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + std %f14,[%fp-24] + fdtox %f4,%f12 + add %i0,4,%i0 C res_ptr++ + subcc %i2,1,%i2 + be,pn %icc,L(end4) + std %f12,[%fp-16] + + b,a L(loopm) + + .align 16 +C BEGIN LOOP +L(loop): + fxtod %f10,%f2 + ld [%i1],%f11 + add %i1,4,%i1 C s1_ptr++ + add %g3,%g1,%g4 C p += cy + srlx %g4,32,%g3 + ldx [%fp-24],%g2 C p16 + fmuld %f2,%f8,%f16 + ldx [%fp-16],%g1 C p0 + fmuld %f2,%f6,%f4 + sllx %g2,16,%g2 C align p16 + st %g4,[%i0-4] + fdtox %f16,%f14 + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + std %f14,[%fp-24] + fdtox %f4,%f12 + std %f12,[%fp-16] + subcc %i2,1,%i2 + be,pn %icc,L(loope) + add %i0,4,%i0 C res_ptr++ +L(loopm): + fxtod %f10,%f2 + ld [%i1],%f11 + add %i1,4,%i1 C s1_ptr++ + add %g3,%g1,%g4 C p += cy + srlx %g4,32,%g3 + ldx [%fp-40],%g2 C p16 + fmuld %f2,%f8,%f16 + ldx [%fp-32],%g1 C p0 + fmuld %f2,%f6,%f4 + sllx %g2,16,%g2 C align p16 + st %g4,[%i0-4] + fdtox %f16,%f14 + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + std %f14,[%fp-40] + fdtox %f4,%f12 + std %f12,[%fp-32] + subcc %i2,1,%i2 + bne,pt %icc,L(loop) + add %i0,4,%i0 C res_ptr++ +C END LOOP + + fxtod %f10,%f2 + add %g3,%g1,%g4 C p += cy + srlx %g4,32,%g3 + ldx [%fp-24],%g2 C p16 + fmuld %f2,%f8,%f16 + ldx [%fp-16],%g1 C p0 + fmuld %f2,%f6,%f4 + sllx %g2,16,%g2 C align p16 + st %g4,[%i0-4] + b,a L(xxx) +L(loope): +L(end4): + fxtod %f10,%f2 + add %g3,%g1,%g4 C p += cy + srlx %g4,32,%g3 + ldx [%fp-40],%g2 C p16 + fmuld %f2,%f8,%f16 + ldx [%fp-32],%g1 C p0 + fmuld %f2,%f6,%f4 + sllx %g2,16,%g2 C align p16 + st %g4,[%i0-4] + fdtox %f16,%f14 + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + std %f14,[%fp-40] + fdtox %f4,%f12 + std %f12,[%fp-32] + add %i0,4,%i0 C res_ptr++ + + add %g3,%g1,%g4 C p += cy + srlx %g4,32,%g3 + ldx [%fp-24],%g2 C p16 + ldx [%fp-16],%g1 C p0 + sllx %g2,16,%g2 C align p16 + st %g4,[%i0-4] + b,a L(yyy) + +L(end3): + fxtod %f10,%f2 + ldx [%fp-24],%g2 C p16 + fmuld %f2,%f8,%f16 + ldx [%fp-16],%g1 C p0 + fmuld %f2,%f6,%f4 + sllx %g2,16,%g2 C align p16 +L(xxx): fdtox %f16,%f14 + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + std %f14,[%fp-24] + fdtox %f4,%f12 + std %f12,[%fp-16] + add %i0,4,%i0 C res_ptr++ + + add %g3,%g1,%g4 C p += cy + srlx %g4,32,%g3 + ldx [%fp-40],%g2 C p16 + ldx [%fp-32],%g1 C p0 + sllx %g2,16,%g2 C align p16 + st %g4,[%i0-4] + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + add %i0,4,%i0 C res_ptr++ + + add %g3,%g1,%g4 C p += cy + srlx %g4,32,%g3 + ldx [%fp-24],%g2 C p16 + ldx [%fp-16],%g1 C p0 + sllx %g2,16,%g2 C align p16 + st %g4,[%i0-4] + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + add %i0,4,%i0 C res_ptr++ + b,a L(ret) + +L(end2): + fxtod %f10,%f2 + fmuld %f2,%f8,%f16 + fmuld %f2,%f6,%f4 + fdtox %f16,%f14 + std %f14,[%fp-40] + fdtox %f4,%f12 + std %f12,[%fp-32] + ldx [%fp-24],%g2 C p16 + ldx [%fp-16],%g1 C p0 + sllx %g2,16,%g2 C align p16 +L(yyy): add %g2,%g1,%g1 C add p16 to p0 (ADD1) + add %i0,4,%i0 C res_ptr++ + + add %g3,%g1,%g4 C p += cy + srlx %g4,32,%g3 + ldx [%fp-40],%g2 C p16 + ldx [%fp-32],%g1 C p0 + sllx %g2,16,%g2 C align p16 + st %g4,[%i0-4] + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + add %i0,4,%i0 C res_ptr++ + b,a L(ret) + +L(end1): + fxtod %f10,%f2 + fmuld %f2,%f8,%f16 + fmuld %f2,%f6,%f4 + fdtox %f16,%f14 + std %f14,[%fp-24] + fdtox %f4,%f12 + std %f12,[%fp-16] + + ldx [%fp-24],%g2 C p16 + ldx [%fp-16],%g1 C p0 + sllx %g2,16,%g2 C align p16 + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + add %i0,4,%i0 C res_ptr++ + +L(ret): add %g3,%g1,%g4 C p += cy + srlx %g4,32,%g3 + st %g4,[%i0-4] + + ret + restore %g0,%g3,%o0 C sideeffect: put cy in retreg +EPILOGUE(mpn_mul_1) diff --git a/rts/gmp/mpn/sparc32/v9/submul_1.asm b/rts/gmp/mpn/sparc32/v9/submul_1.asm new file mode 100644 index 0000000000..6195ea88ea --- /dev/null +++ b/rts/gmp/mpn/sparc32/v9/submul_1.asm @@ -0,0 +1,291 @@ +dnl SPARC v9 32-bit mpn_submul_1 -- Multiply a limb vector with a limb and +dnl subtract the result from a second limb vector. + +dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + + +include(`../config.m4') + +C INPUT PARAMETERS +C res_ptr i0 +C s1_ptr i1 +C size i2 +C s2_limb i3 + +ASM_START() + + TEXT + ALIGN(4) +L(noll): + .word 0 + +PROLOGUE(mpn_submul_1) + save %sp,-256,%sp + +ifdef(`PIC', +`L(pc): rd %pc,%o7 + ld [%o7+L(noll)-L(pc)],%f10', +` sethi %hi(L(noll)),%g1 + ld [%g1+%lo(L(noll))],%f10') + + sethi %hi(0xffff0000),%o0 + andn %i3,%o0,%o0 + st %o0,[%fp-16] + ld [%fp-16],%f11 + fxtod %f10,%f6 + + srl %i3,16,%o0 + st %o0,[%fp-16] + ld [%fp-16],%f11 + fxtod %f10,%f8 + + mov 0,%g3 C cy = 0 + + ld [%i1],%f11 + subcc %i2,1,%i2 + be,pn %icc,L(end1) + add %i1,4,%i1 C s1_ptr++ + + fxtod %f10,%f2 + ld [%i1],%f11 + add %i1,4,%i1 C s1_ptr++ + fmuld %f2,%f8,%f16 + fmuld %f2,%f6,%f4 + fdtox %f16,%f14 + std %f14,[%fp-24] + fdtox %f4,%f12 + subcc %i2,1,%i2 + be,pn %icc,L(end2) + std %f12,[%fp-16] + + fxtod %f10,%f2 + ld [%i1],%f11 + add %i1,4,%i1 C s1_ptr++ + fmuld %f2,%f8,%f16 + fmuld %f2,%f6,%f4 + fdtox %f16,%f14 + std %f14,[%fp-40] + fdtox %f4,%f12 + subcc %i2,1,%i2 + be,pn %icc,L(end3) + std %f12,[%fp-32] + + fxtod %f10,%f2 + ld [%i1],%f11 + add %i1,4,%i1 C s1_ptr++ + ld [%i0],%g5 + ldx [%fp-24],%g2 C p16 + fmuld %f2,%f8,%f16 + ldx [%fp-16],%g1 C p0 + fmuld %f2,%f6,%f4 + sllx %g2,16,%g2 C align p16 + fdtox %f16,%f14 + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + std %f14,[%fp-24] + fdtox %f4,%f12 + add %i0,4,%i0 C res_ptr++ + subcc %i2,1,%i2 + be,pn %icc,L(end4) + std %f12,[%fp-16] + + b,a L(loopm) + + .align 16 +C BEGIN LOOP +L(loop): + fxtod %f10,%f2 + ld [%i1],%f11 + add %i1,4,%i1 C s1_ptr++ + add %g3,%g1,%g4 C p += cy + subcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2) + ld [%i0],%g5 + srlx %g4,32,%g3 + ldx [%fp-24],%g2 C p16 + fmuld %f2,%f8,%f16 + ldx [%fp-16],%g1 C p0 + fmuld %f2,%f6,%f4 + sllx %g2,16,%g2 C align p16 + st %l2,[%i0-4] + addx %g3,0,%g3 + fdtox %f16,%f14 + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + std %f14,[%fp-24] + fdtox %f4,%f12 + std %f12,[%fp-16] + subcc %i2,1,%i2 + be,pn %icc,L(loope) + add %i0,4,%i0 C res_ptr++ +L(loopm): + fxtod %f10,%f2 + ld [%i1],%f11 + add %i1,4,%i1 C s1_ptr++ + add %g3,%g1,%g4 C p += cy + subcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2) + ld [%i0],%g5 + srlx %g4,32,%g3 + ldx [%fp-40],%g2 C p16 + fmuld %f2,%f8,%f16 + ldx [%fp-32],%g1 C p0 + fmuld %f2,%f6,%f4 + sllx %g2,16,%g2 C align p16 + st %l2,[%i0-4] + addx %g3,0,%g3 + fdtox %f16,%f14 + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + std %f14,[%fp-40] + fdtox %f4,%f12 + std %f12,[%fp-32] + subcc %i2,1,%i2 + bne,pt %icc,L(loop) + add %i0,4,%i0 C res_ptr++ +C END LOOP + + fxtod %f10,%f2 + add %g3,%g1,%g4 C p += cy + subcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2) + ld [%i0],%g5 + srlx %g4,32,%g3 + ldx [%fp-24],%g2 C p16 + fmuld %f2,%f8,%f16 + ldx [%fp-16],%g1 C p0 + fmuld %f2,%f6,%f4 + sllx %g2,16,%g2 C align p16 + st %l2,[%i0-4] + b,a L(xxx) +L(loope): +L(end4): + fxtod %f10,%f2 + add %g3,%g1,%g4 C p += cy + subcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2) + ld [%i0],%g5 + srlx %g4,32,%g3 + ldx [%fp-40],%g2 C p16 + fmuld %f2,%f8,%f16 + ldx [%fp-32],%g1 C p0 + fmuld %f2,%f6,%f4 + sllx %g2,16,%g2 C align p16 + st %l2,[%i0-4] + fdtox %f16,%f14 + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + std %f14,[%fp-40] + fdtox %f4,%f12 + std %f12,[%fp-32] + add %i0,4,%i0 C res_ptr++ + + add %g3,%g1,%g4 C p += cy + subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2) + ld [%i0],%g5 + srlx %g4,32,%g3 + ldx [%fp-24],%g2 C p16 + ldx [%fp-16],%g1 C p0 + sllx %g2,16,%g2 C align p16 + st %l2,[%i0-4] + b,a L(yyy) + +L(end3): + fxtod %f10,%f2 + ld [%i0],%g5 + ldx [%fp-24],%g2 C p16 + fmuld %f2,%f8,%f16 + ldx [%fp-16],%g1 C p0 + fmuld %f2,%f6,%f4 + sllx %g2,16,%g2 C align p16 +L(xxx): fdtox %f16,%f14 + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + std %f14,[%fp-24] + fdtox %f4,%f12 + std %f12,[%fp-16] + add %i0,4,%i0 C res_ptr++ + + add %g3,%g1,%g4 C p += cy + subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2) + ld [%i0],%g5 + srlx %g4,32,%g3 + ldx [%fp-40],%g2 C p16 + ldx [%fp-32],%g1 C p0 + sllx %g2,16,%g2 C align p16 + st %l2,[%i0-4] + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + add %i0,4,%i0 C res_ptr++ + + add %g3,%g1,%g4 C p += cy + subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2) + ld [%i0],%g5 + srlx %g4,32,%g3 + ldx [%fp-24],%g2 C p16 + ldx [%fp-16],%g1 C p0 + sllx %g2,16,%g2 C align p16 + st %l2,[%i0-4] + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + add %i0,4,%i0 C res_ptr++ + b,a L(ret) + +L(end2): + fxtod %f10,%f2 + fmuld %f2,%f8,%f16 + fmuld %f2,%f6,%f4 + fdtox %f16,%f14 + std %f14,[%fp-40] + fdtox %f4,%f12 + std %f12,[%fp-32] + ld [%i0],%g5 + ldx [%fp-24],%g2 C p16 + ldx [%fp-16],%g1 C p0 + sllx %g2,16,%g2 C align p16 +L(yyy): add %g2,%g1,%g1 C add p16 to p0 (ADD1) + add %i0,4,%i0 C res_ptr++ + + add %g3,%g1,%g4 C p += cy + subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2) + ld [%i0],%g5 + srlx %g4,32,%g3 + ldx [%fp-40],%g2 C p16 + ldx [%fp-32],%g1 C p0 + sllx %g2,16,%g2 C align p16 + st %l2,[%i0-4] + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + add %i0,4,%i0 C res_ptr++ + b,a L(ret) + +L(end1): + fxtod %f10,%f2 + fmuld %f2,%f8,%f16 + fmuld %f2,%f6,%f4 + fdtox %f16,%f14 + std %f14,[%fp-24] + fdtox %f4,%f12 + std %f12,[%fp-16] + + ld [%i0],%g5 + ldx [%fp-24],%g2 C p16 + ldx [%fp-16],%g1 C p0 + sllx %g2,16,%g2 C align p16 + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + add %i0,4,%i0 C res_ptr++ + +L(ret): add %g3,%g1,%g4 C p += cy + subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2) + srlx %g4,32,%g3 + st %l2,[%i0-4] + + addx %g3,%g0,%g3 + ret + restore %g0,%g3,%o0 C sideeffect: put cy in retreg +EPILOGUE(mpn_submul_1) diff --git a/rts/gmp/mpn/sparc64/README b/rts/gmp/mpn/sparc64/README new file mode 100644 index 0000000000..6923a133f3 --- /dev/null +++ b/rts/gmp/mpn/sparc64/README @@ -0,0 +1,48 @@ +This directory contains mpn functions for 64-bit V9 SPARC + +RELEVANT OPTIMIZATION ISSUES + +The Ultra I/II pipeline executes up to two simple integer arithmetic operations +per cycle. The 64-bit integer multiply instruction mulx takes from 5 cycles to +35 cycles, depending on the position of the most significant bit of the 1st +source operand. It cannot overlap with other instructions. For our use of +mulx, it will take from 5 to 20 cycles. + +Integer conditional move instructions cannot dual-issue with other integer +instructions. No conditional move can issue 1-5 cycles after a load. (Or +something such bizzare.) + +Integer branches can issue with two integer arithmetic instructions. Likewise +for integer loads. Four instructions may issue (arith, arith, ld/st, branch) +but only if the branch is last. + +(The V9 architecture manual recommends that the 2nd operand of a multiply +instruction be the smaller one. For UltraSPARC, they got things backwards and +optimize for the wrong operand! Really helpful in the light of that multiply +is incredibly slow on these CPUs!) + +STATUS + +There is new code in ~/prec/gmp-remote/sparc64. Not tested or completed, but +the pipelines are worked out. Here are the timings: + +* lshift, rshift: The code is well-optimized and runs at 2.0 cycles/limb. + +* add_n, sub_n: add3.s currently runs at 6 cycles/limb. We use a bizarre + scheme of compares and branches (with some nops and fnops to align things) + and carefully stay away from the instructions intended for this application + (i.e., movcs and movcc). + + Using movcc/movcs, even with deep unrolling, seems to get down to 7 + cycles/limb. + + The most promising approach is to split operands in 32-bit pieces using + srlx, then use two addccc, and finally compile the results with sllx+or. + The result could run at 5 cycles/limb, I think. It might be possible to + do without unrolling, or with minimal unrolling. + +* addmul_1/submul_1: Should optimize for when scalar operand < 2^32. +* addmul_1/submul_1: Since mulx is horrendously slow on UltraSPARC I/II, + Karatsuba's method should save up to 16 cycles (i.e. > 20%). +* mul_1 (and possibly the other multiply functions): Handle carry in the + same tricky way as add_n,sub_n. diff --git a/rts/gmp/mpn/sparc64/add_n.asm b/rts/gmp/mpn/sparc64/add_n.asm new file mode 100644 index 0000000000..72b3895a5b --- /dev/null +++ b/rts/gmp/mpn/sparc64/add_n.asm @@ -0,0 +1,172 @@ +! SPARC v9 __gmpn_add_n -- Add two limb vectors of the same length > 0 and store +! sum in a third limb vector. + +! Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +! License for more details. + +! You should have received a copy of the GNU Lesser General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr %o0 +! s1_ptr %o1 +! s2_ptr %o2 +! size %o3 + +include(`../config.m4') + +ASM_START() + .register %g2,#scratch + .register %g3,#scratch +PROLOGUE(mpn_add_n) + +! 12 mem ops >= 12 cycles +! 8 shift insn >= 8 cycles +! 8 addccc, executing alone, +8 cycles +! Unrolling not mandatory...perhaps 2-way is best? +! Put one ldx/stx and one s?lx per issue tuple, fill with pointer arith and loop ctl +! All in all, it runs at 5 cycles/limb + + save %sp,-160,%sp + + addcc %g0,%g0,%g0 + + add %i3,-4,%i3 + brlz,pn %i3,L(there) + nop + + ldx [%i1+0],%l0 + ldx [%i2+0],%l4 + ldx [%i1+8],%l1 + ldx [%i2+8],%l5 + ldx [%i1+16],%l2 + ldx [%i2+16],%l6 + ldx [%i1+24],%l3 + ldx [%i2+24],%l7 + add %i1,32,%i1 + add %i2,32,%i2 + + add %i3,-4,%i3 + brlz,pn %i3,L(skip) + nop + b L(loop1) ! jump instead of executing many NOPs + nop + ALIGN(32) +!--------- Start main loop --------- +L(loop1): + addccc %l0,%l4,%g1 +!- + srlx %l0,32,%o0 + ldx [%i1+0],%l0 +!- + srlx %l4,32,%o4 + ldx [%i2+0],%l4 +!- + addccc %o0,%o4,%g0 +!- + addccc %l1,%l5,%g2 +!- + srlx %l1,32,%o1 + ldx [%i1+8],%l1 +!- + srlx %l5,32,%o5 + ldx [%i2+8],%l5 +!- + addccc %o1,%o5,%g0 +!- + addccc %l2,%l6,%g3 +!- + srlx %l2,32,%o2 + ldx [%i1+16],%l2 +!- + srlx %l6,32,%g5 ! asymmetry + ldx [%i2+16],%l6 +!- + addccc %o2,%g5,%g0 +!- + addccc %l3,%l7,%g4 +!- + srlx %l3,32,%o3 + ldx [%i1+24],%l3 + add %i1,32,%i1 +!- + srlx %l7,32,%o7 + ldx [%i2+24],%l7 + add %i2,32,%i2 +!- + addccc %o3,%o7,%g0 +!- + stx %g1,[%i0+0] +!- + stx %g2,[%i0+8] +!- + stx %g3,[%i0+16] + add %i3,-4,%i3 +!- + stx %g4,[%i0+24] + add %i0,32,%i0 + + brgez,pt %i3,L(loop1) + nop +!--------- End main loop --------- +L(skip): + addccc %l0,%l4,%g1 + srlx %l0,32,%o0 + srlx %l4,32,%o4 + addccc %o0,%o4,%g0 + addccc %l1,%l5,%g2 + srlx %l1,32,%o1 + srlx %l5,32,%o5 + addccc %o1,%o5,%g0 + addccc %l2,%l6,%g3 + srlx %l2,32,%o2 + srlx %l6,32,%g5 ! asymmetry + addccc %o2,%g5,%g0 + addccc %l3,%l7,%g4 + srlx %l3,32,%o3 + srlx %l7,32,%o7 + addccc %o3,%o7,%g0 + stx %g1,[%i0+0] + stx %g2,[%i0+8] + stx %g3,[%i0+16] + stx %g4,[%i0+24] + add %i0,32,%i0 + +L(there): + add %i3,4,%i3 + brz,pt %i3,L(end) + nop + +L(loop2): + ldx [%i1+0],%l0 + add %i1,8,%i1 + ldx [%i2+0],%l4 + add %i2,8,%i2 + srlx %l0,32,%g2 + srlx %l4,32,%g3 + addccc %l0,%l4,%g1 + addccc %g2,%g3,%g0 + stx %g1,[%i0+0] + add %i0,8,%i0 + add %i3,-1,%i3 + brgz,pt %i3,L(loop2) + nop + +L(end): addc %g0,%g0,%i0 + ret + restore +EPILOGUE(mpn_add_n) diff --git a/rts/gmp/mpn/sparc64/addmul1h.asm b/rts/gmp/mpn/sparc64/addmul1h.asm new file mode 100644 index 0000000000..96cb5f7369 --- /dev/null +++ b/rts/gmp/mpn/sparc64/addmul1h.asm @@ -0,0 +1,203 @@ +dnl SPARC 64-bit addmull/addmulu -- Helper for mpn_addmul_1 and mpn_mul_1. + +dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + +ifdef(`LOWPART', +`addmull:', +`addmulu:') + save %sp,-256,%sp + + sethi %hi(0xffff0000),%o0 + andn %i3,%o0,%o0 + st %o0,[%fp-17] + ld [%fp-17],%f11 + fxtod %f10,%f6 + + srl %i3,16,%o0 + st %o0,[%fp-17] + ld [%fp-17],%f11 + fxtod %f10,%f8 + + mov 0,%g3 C cy = 0 + + ld [%i1+4],%f11 + subcc %i2,1,%i2 +dnl be,pn %icc,E(end1) + add %i1,4,%i1 C s1_ptr++ + + fxtod %f10,%f2 + ld [%i1-4],%f11 + add %i1,4,%i1 C s1_ptr++ + fmuld %f2,%f8,%f16 + fmuld %f2,%f6,%f4 + fdtox %f16,%f14 + std %f14,[%fp-25] + fdtox %f4,%f12 + subcc %i2,1,%i2 + be,pn %icc,E(end2) + std %f12,[%fp-17] + + fxtod %f10,%f2 + ld [%i1+4],%f11 + add %i1,4,%i1 C s1_ptr++ + fmuld %f2,%f8,%f16 + fmuld %f2,%f6,%f4 + fdtox %f16,%f14 + std %f14,[%fp-41] + fdtox %f4,%f12 + subcc %i2,1,%i2 +dnl be,pn %icc,E(end3) + std %f12,[%fp-33] + + fxtod %f10,%f2 + ld [%i1-4],%f11 + add %i1,4,%i1 C s1_ptr++ + ld [%i0+DLO],%g5 + ldx [%fp-25],%g2 C p16 + fmuld %f2,%f8,%f16 + ldx [%fp-17],%g1 C p0 + fmuld %f2,%f6,%f4 + sllx %g2,16,%g2 C align p16 + fdtox %f16,%f14 + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + std %f14,[%fp-25] + fdtox %f4,%f12 + add %i0,4,%i0 C res_ptr++ + subcc %i2,1,%i2 + be,pn %icc,E(end4) + std %f12,[%fp-17] + + b,a E(loop) + nop C nop is cheap to nullify + + ALIGN(16) +C BEGIN LOOP +E(loop): + fxtod %f10,%f2 + ld [%i1+4],%f11 + add %i1,4,%i1 C s1_ptr++ + add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2) + add %g3,%g1,%g4 C p += cy + ld [%i0+DHI],%g5 + srlx %g4,32,%g3 + ldx [%fp-41],%g2 C p16 + fmuld %f2,%f8,%f16 + ldx [%fp-33],%g1 C p0 + fmuld %f2,%f6,%f4 + sllx %g2,16,%g2 C align p16 + st %g4,[%i0-4+DLO] + fdtox %f16,%f14 + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + std %f14,[%fp-41] + fdtox %f4,%f12 + std %f12,[%fp-33] + sub %i2,2,%i2 + add %i0,4,%i0 C res_ptr++ + + fxtod %f10,%f2 + ld [%i1-4],%f11 + add %i1,4,%i1 C s1_ptr++ + add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2) + add %g3,%g1,%g4 C p += cy + ld [%i0+DLO],%g5 + srlx %g4,32,%g3 + ldx [%fp-25],%g2 C p16 + fmuld %f2,%f8,%f16 + ldx [%fp-17],%g1 C p0 + fmuld %f2,%f6,%f4 + sllx %g2,16,%g2 C align p16 + st %g4,[%i0-4+DHI] + fdtox %f16,%f14 + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + std %f14,[%fp-25] + fdtox %f4,%f12 + std %f12,[%fp-17] + brnz,pt %i2,E(loop) + add %i0,4,%i0 C res_ptr++ +C END LOOP +E(loope): +E(end4): + fxtod %f10,%f2 + add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2) + add %g3,%g1,%g4 C p += cy + ld [%i0+DHI],%g5 + srlx %g4,32,%g3 + ldx [%fp-41],%g2 C p16 + fmuld %f2,%f8,%f16 + ldx [%fp-33],%g1 C p0 + fmuld %f2,%f6,%f4 + sllx %g2,16,%g2 C align p16 + st %g4,[%i0-4+DLO] + fdtox %f16,%f14 + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + std %f14,[%fp-41] + fdtox %f4,%f12 + std %f12,[%fp-33] + add %i0,4,%i0 C res_ptr++ + + add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2) + add %g3,%g1,%g4 C p += cy + ld [%i0+DLO],%g5 + srlx %g4,32,%g3 + ldx [%fp-25],%g2 C p16 + ldx [%fp-17],%g1 C p0 + sllx %g2,16,%g2 C align p16 + st %g4,[%i0-4+DHI] + b,a E(yyy) + +E(end2): + fxtod %f10,%f2 + fmuld %f2,%f8,%f16 + fmuld %f2,%f6,%f4 + fdtox %f16,%f14 + std %f14,[%fp-41] + fdtox %f4,%f12 + std %f12,[%fp-33] + ld [%i0+DLO],%g5 + ldx [%fp-25],%g2 C p16 + ldx [%fp-17],%g1 C p0 + sllx %g2,16,%g2 C align p16 +E(yyy): add %g2,%g1,%g1 C add p16 to p0 (ADD1) + add %i0,4,%i0 C res_ptr++ + + add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2) + add %g3,%g1,%g4 C p += cy +ifdef(`LOWPART', +` ld [%i0+DHI],%g5') + srlx %g4,32,%g3 + ldx [%fp-41],%g2 C p16 + ldx [%fp-33],%g1 C p0 + sllx %g2,16,%g2 C align p16 + st %g4,[%i0-4+DLO] + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + add %i0,4,%i0 C res_ptr++ + +ifdef(`LOWPART', +` add %g5,%g1,%g1') C add *res_ptr to p0 (ADD2) + add %g3,%g1,%g4 C p += cy +ifdef(`LOWPART', +` st %g4,[%i0-4+DHI] + srlx %g4,32,%g4') + + ret + restore %g0,%g4,%o0 C sideeffect: put cy in retreg +ifdef(`LOWPART', +`EPILOGUE(addmull)', +`EPILOGUE(addmulu)') diff --git a/rts/gmp/mpn/sparc64/addmul_1.asm b/rts/gmp/mpn/sparc64/addmul_1.asm new file mode 100644 index 0000000000..c3f04cea6a --- /dev/null +++ b/rts/gmp/mpn/sparc64/addmul_1.asm @@ -0,0 +1,114 @@ +dnl SPARC 64-bit mpn_addmul_1 -- Multiply a limb vector with a limb and +dnl add the result to a second limb vector. + +dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + +include(`../config.m4') + +C INPUT PARAMETERS +C res_ptr i0 +C s1_ptr i1 +C size i2 +C s2_limb i3 + +ASM_START() + .register %g2,#scratch + .register %g3,#scratch + +PROLOGUE(mpn_addmul_1) + save %sp,-256,%sp + +C We store 0.0 in f10 and keep it invariant accross thw two +C function calls below. Note that this is not ABI conformant, +C but since the functions are local, that's acceptable. +ifdef(`PIC', +`L(pc): rd %pc,%o7 + ld [%o7+L(noll)-L(pc)],%f10', +` sethi %hh(L(noll)),%g2 + sethi %lm(L(noll)),%g1 + or %g2,%hm(L(noll)),%g2 + or %g1,%lo(L(noll)),%g1 + sllx %g2,32,%g2 + ld [%g1+%g2],%f10') + + sub %i1,%i0,%g1 + srlx %g1,3,%g1 + cmp %g1,%i2 + bcc,pt %xcc,L(nooverlap) + nop + + sllx %i2,3,%g2 C compute stack allocation byte count + add %g2,15,%o0 + and %o0,-16,%o0 + sub %sp,%o0,%sp + add %sp,2223,%o0 + + mov %i1,%o1 C copy s1_ptr to mpn_copyi's srcp + call mpn_copyi + mov %i2,%o2 C copy n to mpn_copyi's count parameter + + add %sp,2223,%i1 + +L(nooverlap): +C First multiply-add with low 32 bits of s2_limb + mov %i0,%o0 + mov %i1,%o1 + add %i2,%i2,%o2 + call addmull + srl %i3,0,%o3 + + mov %o0,%l0 C keep carry-out from accmull + +C Now multiply-add with high 32 bits of s2_limb, unless it is zero. + srlx %i3,32,%o3 + brz,a,pn %o3,L(small) + mov %o0,%i0 + mov %i1,%o1 + add %i2,%i2,%o2 + call addmulu + add %i0,4,%o0 + + add %l0,%o0,%i0 +L(small): + ret + restore %g0,%g0,%g0 +EPILOGUE(mpn_addmul_1) + +C Put a zero in the text segment to allow us to t the address +C quickly when compiling for PIC + TEXT + ALIGN(4) +L(noll): + .word 0 + +define(`LO',`(+4)') +define(`HI',`(-4)') + +define(`DLO',`(+4)') +define(`DHI',`(-4)') +define(`LOWPART') +define(`E',`L(l.$1)') +include_mpn(`sparc64/addmul1h.asm') + +define(`DLO',`(-4)') +define(`DHI',`(+4)') +undefine(`LOWPART') +define(`E',`L(u.$1)') +include_mpn(`sparc64/addmul1h.asm') diff --git a/rts/gmp/mpn/sparc64/copyi.asm b/rts/gmp/mpn/sparc64/copyi.asm new file mode 100644 index 0000000000..d9957e3c90 --- /dev/null +++ b/rts/gmp/mpn/sparc64/copyi.asm @@ -0,0 +1,79 @@ +! SPARC v9 __gmpn_copy -- Copy a limb vector. + +! Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +! License for more details. + +! You should have received a copy of the GNU Lesser General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! rptr %o0 +! sptr %o1 +! n %o2 + +include(`../config.m4') + +ASM_START() + .register %g2,#scratch + .register %g3,#scratch +PROLOGUE(mpn_copyi) + add %o2,-8,%o2 + brlz,pn %o2,L(skip) + nop + b,a L(loop1) + nop + + ALIGN(16) +L(loop1): + ldx [%o1+0],%g1 + ldx [%o1+8],%g2 + ldx [%o1+16],%g3 + ldx [%o1+24],%g4 + ldx [%o1+32],%g5 + ldx [%o1+40],%o3 + ldx [%o1+48],%o4 + ldx [%o1+56],%o5 + add %o1,64,%o1 + stx %g1,[%o0+0] + stx %g2,[%o0+8] + stx %g3,[%o0+16] + stx %g4,[%o0+24] + stx %g5,[%o0+32] + stx %o3,[%o0+40] + stx %o4,[%o0+48] + stx %o5,[%o0+56] + add %o2,-8,%o2 + brgez,pt %o2,L(loop1) + add %o0,64,%o0 + +L(skip): + add %o2,8,%o2 + brz,pt %o2,L(end) + nop + +L(loop2): + ldx [%o1],%g1 + add %o1,8,%o1 + add %o2,-1,%o2 + stx %g1,[%o0] + add %o0,8,%o0 + brgz,pt %o2,L(loop2) + nop + +L(end): retl + nop +EPILOGUE(mpn_copyi) diff --git a/rts/gmp/mpn/sparc64/gmp-mparam.h b/rts/gmp/mpn/sparc64/gmp-mparam.h new file mode 100644 index 0000000000..74f61661c1 --- /dev/null +++ b/rts/gmp/mpn/sparc64/gmp-mparam.h @@ -0,0 +1,88 @@ +/* Sparc64 gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#define BITS_PER_MP_LIMB 64 +#define BYTES_PER_MP_LIMB 8 +#define BITS_PER_LONGINT 64 +#define BITS_PER_INT 32 +#define BITS_PER_SHORTINT 16 +#define BITS_PER_CHAR 8 + +/* Tell the toom3 multiply implementation to call low-level mpn + functions instead of open-coding operations in C. */ +#define USE_MORE_MPN 1 + + +/* Run on sun workshop cc. */ +/* Generated by tuneup.c, 2000-07-30. */ + +#ifndef KARATSUBA_MUL_THRESHOLD +#define KARATSUBA_MUL_THRESHOLD 12 +#endif +#ifndef TOOM3_MUL_THRESHOLD +#define TOOM3_MUL_THRESHOLD 95 +#endif + +#ifndef KARATSUBA_SQR_THRESHOLD +#define KARATSUBA_SQR_THRESHOLD 33 +#endif +#ifndef TOOM3_SQR_THRESHOLD +#define TOOM3_SQR_THRESHOLD 125 +#endif + +#ifndef BZ_THRESHOLD +#define BZ_THRESHOLD 27 +#endif + +#ifndef FIB_THRESHOLD +#define FIB_THRESHOLD 107 +#endif + +#ifndef POWM_THRESHOLD +#define POWM_THRESHOLD 12 +#endif + +#ifndef GCD_ACCEL_THRESHOLD +#define GCD_ACCEL_THRESHOLD 4 +#endif +#ifndef GCDEXT_THRESHOLD +#define GCDEXT_THRESHOLD 199 +#endif + +#ifndef FFT_MUL_TABLE +#define FFT_MUL_TABLE { 304, 608, 1344, 2304, 7168, 20480, 49152, 0 } +#endif +#ifndef FFT_MODF_MUL_THRESHOLD +#define FFT_MODF_MUL_THRESHOLD 320 +#endif +#ifndef FFT_MUL_THRESHOLD +#define FFT_MUL_THRESHOLD 1664 +#endif + +#ifndef FFT_SQR_TABLE +#define FFT_SQR_TABLE { 304, 608, 1344, 2816, 7168, 20480, 49152, 0 } +#endif +#ifndef FFT_MODF_SQR_THRESHOLD +#define FFT_MODF_SQR_THRESHOLD 320 +#endif +#ifndef FFT_SQR_THRESHOLD +#define FFT_SQR_THRESHOLD 1664 +#endif diff --git a/rts/gmp/mpn/sparc64/lshift.asm b/rts/gmp/mpn/sparc64/lshift.asm new file mode 100644 index 0000000000..2d2edc50a7 --- /dev/null +++ b/rts/gmp/mpn/sparc64/lshift.asm @@ -0,0 +1,97 @@ +! SPARC v9 __gmpn_lshift -- + +! Copyright (C) 1996, 2000 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +! License for more details. + +! You should have received a copy of the GNU Lesser General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr %o0 +! src_ptr %o1 +! size %o2 +! cnt %o3 + +include(`../config.m4') + +ASM_START() + .register %g2,#scratch + .register %g3,#scratch +PROLOGUE(mpn_lshift) + sllx %o2,3,%g1 + add %o1,%g1,%o1 ! make %o1 point at end of src + ldx [%o1-8],%g2 ! load first limb + sub %g0,%o3,%o5 ! negate shift count + add %o0,%g1,%o0 ! make %o0 point at end of res + add %o2,-1,%o2 + and %o2,4-1,%g4 ! number of limbs in first loop + srlx %g2,%o5,%g1 ! compute function result + brz,pn %g4,L(0) ! if multiple of 4 limbs, skip first loop + mov %g1,%g5 + + sub %o2,%g4,%o2 ! adjust count for main loop + +L(loop0): + ldx [%o1-16],%g3 + add %o0,-8,%o0 + add %o1,-8,%o1 + add %g4,-1,%g4 + sllx %g2,%o3,%o4 + srlx %g3,%o5,%g1 + mov %g3,%g2 + or %o4,%g1,%o4 + brnz,pt %g4,L(loop0) + stx %o4,[%o0+0] + +L(0): brz,pn %o2,L(end) + nop + +L(loop1): + ldx [%o1-16],%g3 + add %o0,-32,%o0 + add %o2,-4,%o2 + sllx %g2,%o3,%o4 + srlx %g3,%o5,%g1 + + ldx [%o1-24],%g2 + sllx %g3,%o3,%g4 + or %o4,%g1,%o4 + stx %o4,[%o0+24] + srlx %g2,%o5,%g1 + + ldx [%o1-32],%g3 + sllx %g2,%o3,%o4 + or %g4,%g1,%g4 + stx %g4,[%o0+16] + srlx %g3,%o5,%g1 + + ldx [%o1-40],%g2 + sllx %g3,%o3,%g4 + or %o4,%g1,%o4 + stx %o4,[%o0+8] + srlx %g2,%o5,%g1 + + add %o1,-32,%o1 + or %g4,%g1,%g4 + brnz,pt %o2,L(loop1) + stx %g4,[%o0+0] + +L(end): sllx %g2,%o3,%g2 + stx %g2,[%o0-8] + retl + mov %g5,%o0 +EPILOGUE(mpn_lshift) diff --git a/rts/gmp/mpn/sparc64/mul_1.asm b/rts/gmp/mpn/sparc64/mul_1.asm new file mode 100644 index 0000000000..f2f2821d51 --- /dev/null +++ b/rts/gmp/mpn/sparc64/mul_1.asm @@ -0,0 +1,113 @@ +dnl SPARC 64-bit mpn_mul_1 -- Multiply a limb vector with a limb and +dnl store the result to a second limb vector. + +dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + +include(`../config.m4') + +C INPUT PARAMETERS +C res_ptr i0 +C s1_ptr i1 +C size i2 +C s2_limb i3 + +ASM_START() + .register %g2,#scratch + .register %g3,#scratch + +PROLOGUE(mpn_mul_1) + save %sp,-256,%sp + +C We store 0.0 in f10 and keep it invariant accross thw two +C function calls below. Note that this is not ABI conformant, +C but since the functions are local, that's acceptable. +ifdef(`PIC', +`L(pc): rd %pc,%o7 + ld [%o7+L(noll)-L(pc)],%f10', +` sethi %hh(L(noll)),%g2 + sethi %lm(L(noll)),%g1 + or %g2,%hm(L(noll)),%g2 + or %g1,%lo(L(noll)),%g1 + sllx %g2,32,%g2 + ld [%g1+%g2],%f10') + + sub %i1,%i0,%g1 + srlx %g1,3,%g1 + cmp %g1,%i2 + bcc,pt %xcc,L(nooverlap) + nop + + sllx %i2,3,%g2 C compute stack allocation byte count + add %g2,15,%o0 + and %o0,-16,%o0 + sub %sp,%o0,%sp + add %sp,2223,%o0 + + mov %i1,%o1 C copy s1_ptr to mpn_copyi's srcp + call mpn_copyi + mov %i2,%o2 C copy n to mpn_copyi's count parameter + + add %sp,2223,%i1 + +L(nooverlap): +C First multiply-add with low 32 bits of s2_limb + mov %i0,%o0 + mov %i1,%o1 + add %i2,%i2,%o2 + call mull + srl %i3,0,%o3 + + mov %o0,%l0 C keep carry-out from accmull + +C Now multiply-add with high 32 bits of s2_limb, unless it is zero. + srlx %i3,32,%o3 + brz,a,pn %o3,L(small) + mov %o0,%i0 + mov %i1,%o1 + add %i2,%i2,%o2 + call addmulu + add %i0,4,%o0 + + add %l0,%o0,%i0 +L(small): + ret + restore %g0,%g0,%g0 +EPILOGUE(mpn_mul_1) + +C Put a zero in the text segment to allow us to t the address +C quickly when compiling for PIC + TEXT + ALIGN(4) +L(noll): + .word 0 + +define(`LO',`(+4)') +define(`HI',`(-4)') + +define(`DLO',`(+4)') +define(`DHI',`(-4)') +define(`E',`L($1)') +include_mpn(`sparc64/mul_1h.asm') + +define(`DLO',`(-4)') +define(`DHI',`(+4)') +undefine(`LOWPART') +define(`E',`L(u.$1)') +include_mpn(`sparc64/addmul1h.asm') diff --git a/rts/gmp/mpn/sparc64/mul_1h.asm b/rts/gmp/mpn/sparc64/mul_1h.asm new file mode 100644 index 0000000000..5078c01c3f --- /dev/null +++ b/rts/gmp/mpn/sparc64/mul_1h.asm @@ -0,0 +1,183 @@ +dnl SPARC 64-bit mull -- Helper for mpn_mul_1. + +dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + +mull: + save %sp,-256,%sp + + sethi %hi(0xffff0000),%o0 + andn %i3,%o0,%o0 + st %o0,[%fp-17] + ld [%fp-17],%f11 + fxtod %f10,%f6 + + srl %i3,16,%o0 + st %o0,[%fp-17] + ld [%fp-17],%f11 + fxtod %f10,%f8 + + mov 0,%g3 C cy = 0 + + ld [%i1+4],%f11 + subcc %i2,1,%i2 +dnl be,pn %icc,E(end1) + add %i1,4,%i1 C s1_ptr++ + + fxtod %f10,%f2 + ld [%i1-4],%f11 + add %i1,4,%i1 C s1_ptr++ + fmuld %f2,%f8,%f16 + fmuld %f2,%f6,%f4 + fdtox %f16,%f14 + std %f14,[%fp-25] + fdtox %f4,%f12 + subcc %i2,1,%i2 + be,pn %icc,E(end2) + std %f12,[%fp-17] + + fxtod %f10,%f2 + ld [%i1+4],%f11 + add %i1,4,%i1 C s1_ptr++ + fmuld %f2,%f8,%f16 + fmuld %f2,%f6,%f4 + fdtox %f16,%f14 + std %f14,[%fp-41] + fdtox %f4,%f12 + subcc %i2,1,%i2 +dnl be,pn %icc,E(end3) + std %f12,[%fp-33] + + fxtod %f10,%f2 + ld [%i1-4],%f11 + add %i1,4,%i1 C s1_ptr++ + ldx [%fp-25],%g2 C p16 + fmuld %f2,%f8,%f16 + ldx [%fp-17],%g1 C p0 + fmuld %f2,%f6,%f4 + sllx %g2,16,%g2 C align p16 + fdtox %f16,%f14 + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + std %f14,[%fp-25] + fdtox %f4,%f12 + add %i0,4,%i0 C res_ptr++ + subcc %i2,1,%i2 + be,pn %icc,E(end4) + std %f12,[%fp-17] + + b,a E(loop) + nop C nop is cheap to nullify + + ALIGN(16) +C BEGIN LOOP +E(loop): + fxtod %f10,%f2 + ld [%i1+4],%f11 + add %i1,4,%i1 C s1_ptr++ + add %g3,%g1,%g4 C p += cy + srlx %g4,32,%g3 + ldx [%fp-41],%g2 C p16 + fmuld %f2,%f8,%f16 + ldx [%fp-33],%g1 C p0 + fmuld %f2,%f6,%f4 + sllx %g2,16,%g2 C align p16 + st %g4,[%i0-4+DLO] + fdtox %f16,%f14 + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + std %f14,[%fp-41] + fdtox %f4,%f12 + std %f12,[%fp-33] + sub %i2,2,%i2 + add %i0,4,%i0 C res_ptr++ + + fxtod %f10,%f2 + ld [%i1-4],%f11 + add %i1,4,%i1 C s1_ptr++ + add %g3,%g1,%g4 C p += cy + srlx %g4,32,%g3 + ldx [%fp-25],%g2 C p16 + fmuld %f2,%f8,%f16 + ldx [%fp-17],%g1 C p0 + fmuld %f2,%f6,%f4 + sllx %g2,16,%g2 C align p16 + st %g4,[%i0-4+DHI] + fdtox %f16,%f14 + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + std %f14,[%fp-25] + fdtox %f4,%f12 + std %f12,[%fp-17] + brnz,pt %i2,E(loop) + add %i0,4,%i0 C res_ptr++ +C END LOOP +E(loope): +E(end4): + fxtod %f10,%f2 + add %g3,%g1,%g4 C p += cy + srlx %g4,32,%g3 + ldx [%fp-41],%g2 C p16 + fmuld %f2,%f8,%f16 + ldx [%fp-33],%g1 C p0 + fmuld %f2,%f6,%f4 + sllx %g2,16,%g2 C align p16 + st %g4,[%i0-4+DLO] + fdtox %f16,%f14 + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + std %f14,[%fp-41] + fdtox %f4,%f12 + std %f12,[%fp-33] + add %i0,4,%i0 C res_ptr++ + + add %g3,%g1,%g4 C p += cy + srlx %g4,32,%g3 + ldx [%fp-25],%g2 C p16 + ldx [%fp-17],%g1 C p0 + sllx %g2,16,%g2 C align p16 + st %g4,[%i0-4+DHI] + b,a E(yyy) + +E(end2): + fxtod %f10,%f2 + fmuld %f2,%f8,%f16 + fmuld %f2,%f6,%f4 + fdtox %f16,%f14 + std %f14,[%fp-41] + fdtox %f4,%f12 + std %f12,[%fp-33] + ldx [%fp-25],%g2 C p16 + ldx [%fp-17],%g1 C p0 + sllx %g2,16,%g2 C align p16 +E(yyy): add %g2,%g1,%g1 C add p16 to p0 (ADD1) + add %i0,4,%i0 C res_ptr++ + + add %g3,%g1,%g4 C p += cy + srlx %g4,32,%g3 + ldx [%fp-41],%g2 C p16 + ldx [%fp-33],%g1 C p0 + sllx %g2,16,%g2 C align p16 + st %g4,[%i0-4+DLO] + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + add %i0,4,%i0 C res_ptr++ + + add %g3,%g1,%g4 C p += cy + st %g4,[%i0-4+DHI] + srlx %g4,32,%g4 + + ret + restore %g0,%g4,%o0 C sideeffect: put cy in retreg +EPILOGUE(mull) diff --git a/rts/gmp/mpn/sparc64/rshift.asm b/rts/gmp/mpn/sparc64/rshift.asm new file mode 100644 index 0000000000..baf7920efb --- /dev/null +++ b/rts/gmp/mpn/sparc64/rshift.asm @@ -0,0 +1,94 @@ +! SPARC v9 __gmpn_rshift -- + +! Copyright (C) 1996, 2000 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +! License for more details. + +! You should have received a copy of the GNU Lesser General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr %o0 +! src_ptr %o1 +! size %o2 +! cnt %o3 + +include(`../config.m4') + +ASM_START() + .register %g2,#scratch + .register %g3,#scratch +PROLOGUE(mpn_rshift) + ldx [%o1],%g2 ! load first limb + sub %g0,%o3,%o5 ! negate shift count + add %o2,-1,%o2 + and %o2,4-1,%g4 ! number of limbs in first loop + sllx %g2,%o5,%g1 ! compute function result + brz,pn %g4,L(0) ! if multiple of 4 limbs, skip first loop + mov %g1,%g5 + + sub %o2,%g4,%o2 ! adjust count for main loop + +L(loop0): + ldx [%o1+8],%g3 + add %o0,8,%o0 + add %o1,8,%o1 + add %g4,-1,%g4 + srlx %g2,%o3,%o4 + sllx %g3,%o5,%g1 + mov %g3,%g2 + or %o4,%g1,%o4 + brnz,pt %g4,L(loop0) + stx %o4,[%o0-8] + +L(0): brz,pn %o2,L(end) + nop + +L(loop1): + ldx [%o1+8],%g3 + add %o0,32,%o0 + add %o2,-4,%o2 + srlx %g2,%o3,%o4 + sllx %g3,%o5,%g1 + + ldx [%o1+16],%g2 + srlx %g3,%o3,%g4 + or %o4,%g1,%o4 + stx %o4,[%o0-32] + sllx %g2,%o5,%g1 + + ldx [%o1+24],%g3 + srlx %g2,%o3,%o4 + or %g4,%g1,%g4 + stx %g4,[%o0-24] + sllx %g3,%o5,%g1 + + ldx [%o1+32],%g2 + srlx %g3,%o3,%g4 + or %o4,%g1,%o4 + stx %o4,[%o0-16] + sllx %g2,%o5,%g1 + + add %o1,32,%o1 + or %g4,%g1,%g4 + brnz %o2,L(loop1) + stx %g4,[%o0-8] + +L(end): srlx %g2,%o3,%g2 + stx %g2,[%o0-0] + retl + mov %g5,%o0 +EPILOGUE(mpn_rshift) diff --git a/rts/gmp/mpn/sparc64/sub_n.asm b/rts/gmp/mpn/sparc64/sub_n.asm new file mode 100644 index 0000000000..61547138e0 --- /dev/null +++ b/rts/gmp/mpn/sparc64/sub_n.asm @@ -0,0 +1,172 @@ +! SPARC v9 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and +! store difference in a third limb vector. + +! Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +! License for more details. + +! You should have received a copy of the GNU Lesser General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr %o0 +! s1_ptr %o1 +! s2_ptr %o2 +! size %o3 + +include(`../config.m4') + +ASM_START() + .register %g2,#scratch + .register %g3,#scratch +PROLOGUE(mpn_sub_n) + +! 12 mem ops >= 12 cycles +! 8 shift insn >= 8 cycles +! 8 addccc, executing alone, +8 cycles +! Unrolling not mandatory...perhaps 2-way is best? +! Put one ldx/stx and one s?lx per issue tuple, fill with pointer arith and loop ctl +! All in all, it runs at 5 cycles/limb + + save %sp,-160,%sp + + addcc %g0,%g0,%g0 + + add %i3,-4,%i3 + brlz,pn %i3,L(there) + nop + + ldx [%i1+0],%l0 + ldx [%i2+0],%l4 + ldx [%i1+8],%l1 + ldx [%i2+8],%l5 + ldx [%i1+16],%l2 + ldx [%i2+16],%l6 + ldx [%i1+24],%l3 + ldx [%i2+24],%l7 + add %i1,32,%i1 + add %i2,32,%i2 + + add %i3,-4,%i3 + brlz,pn %i3,L(skip) + nop + b L(loop1) ! jump instead of executing many NOPs + nop + ALIGN(32) +!--------- Start main loop --------- +L(loop1): + subccc %l0,%l4,%g1 +!- + srlx %l0,32,%o0 + ldx [%i1+0],%l0 +!- + srlx %l4,32,%o4 + ldx [%i2+0],%l4 +!- + subccc %o0,%o4,%g0 +!- + subccc %l1,%l5,%g2 +!- + srlx %l1,32,%o1 + ldx [%i1+8],%l1 +!- + srlx %l5,32,%o5 + ldx [%i2+8],%l5 +!- + subccc %o1,%o5,%g0 +!- + subccc %l2,%l6,%g3 +!- + srlx %l2,32,%o2 + ldx [%i1+16],%l2 +!- + srlx %l6,32,%g5 ! asymmetry + ldx [%i2+16],%l6 +!- + subccc %o2,%g5,%g0 +!- + subccc %l3,%l7,%g4 +!- + srlx %l3,32,%o3 + ldx [%i1+24],%l3 + add %i1,32,%i1 +!- + srlx %l7,32,%o7 + ldx [%i2+24],%l7 + add %i2,32,%i2 +!- + subccc %o3,%o7,%g0 +!- + stx %g1,[%i0+0] +!- + stx %g2,[%i0+8] +!- + stx %g3,[%i0+16] + add %i3,-4,%i3 +!- + stx %g4,[%i0+24] + add %i0,32,%i0 + + brgez,pt %i3,L(loop1) + nop +!--------- End main loop --------- +L(skip): + subccc %l0,%l4,%g1 + srlx %l0,32,%o0 + srlx %l4,32,%o4 + subccc %o0,%o4,%g0 + subccc %l1,%l5,%g2 + srlx %l1,32,%o1 + srlx %l5,32,%o5 + subccc %o1,%o5,%g0 + subccc %l2,%l6,%g3 + srlx %l2,32,%o2 + srlx %l6,32,%g5 ! asymmetry + subccc %o2,%g5,%g0 + subccc %l3,%l7,%g4 + srlx %l3,32,%o3 + srlx %l7,32,%o7 + subccc %o3,%o7,%g0 + stx %g1,[%i0+0] + stx %g2,[%i0+8] + stx %g3,[%i0+16] + stx %g4,[%i0+24] + add %i0,32,%i0 + +L(there): + add %i3,4,%i3 + brz,pt %i3,L(end) + nop + +L(loop2): + ldx [%i1+0],%l0 + add %i1,8,%i1 + ldx [%i2+0],%l4 + add %i2,8,%i2 + srlx %l0,32,%g2 + srlx %l4,32,%g3 + subccc %l0,%l4,%g1 + subccc %g2,%g3,%g0 + stx %g1,[%i0+0] + add %i0,8,%i0 + add %i3,-1,%i3 + brgz,pt %i3,L(loop2) + nop + +L(end): addc %g0,%g0,%i0 + ret + restore +EPILOGUE(mpn_sub_n) diff --git a/rts/gmp/mpn/sparc64/submul1h.asm b/rts/gmp/mpn/sparc64/submul1h.asm new file mode 100644 index 0000000000..7f51ba59c6 --- /dev/null +++ b/rts/gmp/mpn/sparc64/submul1h.asm @@ -0,0 +1,204 @@ +dnl SPARC 64-bit submull/submulu -- Helper for mpn_submul_1 and mpn_mul_1. + +dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + +ifdef(`LOWPART', +`submull:', +`submulu:') + save %sp,-256,%sp + + sethi %hi(0xffff0000),%o0 + andn %i3,%o0,%o0 + st %o0,[%fp-17] + ld [%fp-17],%f11 + fxtod %f10,%f6 + + srl %i3,16,%o0 + st %o0,[%fp-17] + ld [%fp-17],%f11 + fxtod %f10,%f8 + + mov 0,%g3 C cy = 0 + + ld [%i1+4],%f11 + subcc %i2,1,%i2 +dnl be,pn %icc,E(end1) + add %i1,4,%i1 C s1_ptr++ + + fxtod %f10,%f2 + ld [%i1-4],%f11 + add %i1,4,%i1 C s1_ptr++ + fmuld %f2,%f8,%f16 + fmuld %f2,%f6,%f4 + fdtox %f16,%f14 + std %f14,[%fp-25] + fdtox %f4,%f12 + subcc %i2,1,%i2 + be,pn %icc,E(end2) + std %f12,[%fp-17] + + fxtod %f10,%f2 + ld [%i1+4],%f11 + add %i1,4,%i1 C s1_ptr++ + fmuld %f2,%f8,%f16 + fmuld %f2,%f6,%f4 + fdtox %f16,%f14 + std %f14,[%fp-41] + fdtox %f4,%f12 + subcc %i2,1,%i2 +dnl be,pn %icc,E(end3) + std %f12,[%fp-33] + + fxtod %f10,%f2 + ld [%i1-4],%f11 + add %i1,4,%i1 C s1_ptr++ + ld [%i0+DLO],%g5 + ldx [%fp-25],%g2 C p16 + fmuld %f2,%f8,%f16 + ldx [%fp-17],%g1 C p0 + fmuld %f2,%f6,%f4 + sllx %g2,16,%g2 C align p16 + fdtox %f16,%f14 + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + std %f14,[%fp-25] + fdtox %f4,%f12 + add %i0,4,%i0 C res_ptr++ + subcc %i2,1,%i2 + be,pn %icc,E(end4) + std %f12,[%fp-17] + + b,a E(loop) + nop C nop is cheap to nullify + + ALIGN(16) +C BEGIN LOOP +E(loop): + fxtod %f10,%f2 + ld [%i1+4],%f11 + add %i1,4,%i1 C s1_ptr++ + add %g3,%g1,%g4 C p += cy + subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2) + ld [%i0+DHI],%g5 + srlx %g4,32,%g3 + ldx [%fp-41],%g2 C p16 + fmuld %f2,%f8,%f16 + ldx [%fp-33],%g1 C p0 + fmuld %f2,%f6,%f4 + sllx %g2,16,%g2 C align p16 + st %l2,[%i0-4+DLO] + fdtox %f16,%f14 + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + std %f14,[%fp-41] + fdtox %f4,%f12 + std %f12,[%fp-33] + sub %i2,2,%i2 + add %i0,4,%i0 C res_ptr++ + + fxtod %f10,%f2 + ld [%i1-4],%f11 + add %i1,4,%i1 C s1_ptr++ + add %g3,%g1,%g4 C p += cy + subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2) + ld [%i0+DLO],%g5 + srlx %g4,32,%g3 + ldx [%fp-25],%g2 C p16 + fmuld %f2,%f8,%f16 + ldx [%fp-17],%g1 C p0 + fmuld %f2,%f6,%f4 + sllx %g2,16,%g2 C align p16 + st %l2,[%i0-4+DHI] + fdtox %f16,%f14 + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + std %f14,[%fp-25] + fdtox %f4,%f12 + std %f12,[%fp-17] + brnz,pt %i2,E(loop) + add %i0,4,%i0 C res_ptr++ +C END LOOP +E(loope): +E(end4): + fxtod %f10,%f2 + add %g3,%g1,%g4 C p += cy + subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2) + ld [%i0+DHI],%g5 + srlx %g4,32,%g3 + ldx [%fp-41],%g2 C p16 + fmuld %f2,%f8,%f16 + ldx [%fp-33],%g1 C p0 + fmuld %f2,%f6,%f4 + sllx %g2,16,%g2 C align p16 + st %l2,[%i0-4+DLO] + fdtox %f16,%f14 + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + std %f14,[%fp-41] + fdtox %f4,%f12 + std %f12,[%fp-33] + add %i0,4,%i0 C res_ptr++ + + add %g3,%g1,%g4 C p += cy + subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2) + ld [%i0+DLO],%g5 + srlx %g4,32,%g3 + ldx [%fp-25],%g2 C p16 + ldx [%fp-17],%g1 C p0 + sllx %g2,16,%g2 C align p16 + st %l2,[%i0-4+DHI] + b,a E(yyy) + +E(end2): + fxtod %f10,%f2 + fmuld %f2,%f8,%f16 + fmuld %f2,%f6,%f4 + fdtox %f16,%f14 + std %f14,[%fp-41] + fdtox %f4,%f12 + std %f12,[%fp-33] + ld [%i0+DLO],%g5 + ldx [%fp-25],%g2 C p16 + ldx [%fp-17],%g1 C p0 + sllx %g2,16,%g2 C align p16 +E(yyy): add %g2,%g1,%g1 C add p16 to p0 (ADD1) + add %i0,4,%i0 C res_ptr++ + + add %g3,%g1,%g4 C p += cy + subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2) +ifdef(`LOWPART', +` ld [%i0+DHI],%g5') + srlx %g4,32,%g3 + ldx [%fp-41],%g2 C p16 + ldx [%fp-33],%g1 C p0 + sllx %g2,16,%g2 C align p16 + st %l2,[%i0-4+DLO] + add %g2,%g1,%g1 C add p16 to p0 (ADD1) + add %i0,4,%i0 C res_ptr++ + + add %g3,%g1,%g4 C p += cy +ifdef(`LOWPART', +` subxcc %g5,%g4,%l2') C add *res_ptr to p0 (ADD2) +ifdef(`LOWPART', +` st %l2,[%i0-4+DHI] + srlx %g4,32,%g4') + + addx %g4,0,%g4 + ret + restore %g0,%g4,%o0 C sideeffect: put cy in retreg +ifdef(`LOWPART', +`EPILOGUE(submull)', +`EPILOGUE(submulu)') diff --git a/rts/gmp/mpn/sparc64/submul_1.asm b/rts/gmp/mpn/sparc64/submul_1.asm new file mode 100644 index 0000000000..7c6af0a98b --- /dev/null +++ b/rts/gmp/mpn/sparc64/submul_1.asm @@ -0,0 +1,114 @@ +dnl SPARC 64-bit mpn_submul_1 -- Multiply a limb vector with a limb and +dnl subtract the result from a second limb vector. + +dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + +include(`../config.m4') + +C INPUT PARAMETERS +C res_ptr i0 +C s1_ptr i1 +C size i2 +C s2_limb i3 + +ASM_START() + .register %g2,#scratch + .register %g3,#scratch + +PROLOGUE(mpn_submul_1) + save %sp,-256,%sp + +C We store 0.0 in f10 and keep it invariant accross thw two +C function calls below. Note that this is not ABI conformant, +C but since the functions are local, that's acceptable. +ifdef(`PIC', +`L(pc): rd %pc,%o7 + ld [%o7+L(noll)-L(pc)],%f10', +` sethi %hh(L(noll)),%g2 + sethi %lm(L(noll)),%g1 + or %g2,%hm(L(noll)),%g2 + or %g1,%lo(L(noll)),%g1 + sllx %g2,32,%g2 + ld [%g1+%g2],%f10') + + sub %i1,%i0,%g1 + srlx %g1,3,%g1 + cmp %g1,%i2 + bcc,pt %xcc,L(nooverlap) + nop + + sllx %i2,3,%g2 C compute stack allocation byte count + add %g2,15,%o0 + and %o0,-16,%o0 + sub %sp,%o0,%sp + add %sp,2223,%o0 + + mov %i1,%o1 C copy s1_ptr to mpn_copyi's srcp + call mpn_copyi + mov %i2,%o2 C copy n to mpn_copyi's count parameter + + add %sp,2223,%i1 + +L(nooverlap): +C First multiply-add with low 32 bits of s2_limb + mov %i0,%o0 + mov %i1,%o1 + add %i2,%i2,%o2 + call submull + srl %i3,0,%o3 + + mov %o0,%l0 C keep carry-out from accmull + +C Now multiply-add with high 32 bits of s2_limb, unless it is zero. + srlx %i3,32,%o3 + brz,a,pn %o3,L(small) + mov %o0,%i0 + mov %i1,%o1 + add %i2,%i2,%o2 + call submulu + add %i0,4,%o0 + + add %l0,%o0,%i0 +L(small): + ret + restore %g0,%g0,%g0 +EPILOGUE(mpn_submul_1) + +C Put a zero in the text segment to allow us to t the address +C quickly when compiling for PIC + TEXT + ALIGN(4) +L(noll): + .word 0 + +define(`LO',`(+4)') +define(`HI',`(-4)') + +define(`DLO',`(+4)') +define(`DHI',`(-4)') +define(`LOWPART') +define(`E',`L(l.$1)') +include_mpn(`sparc64/submul1h.asm') + +define(`DLO',`(-4)') +define(`DHI',`(+4)') +undefine(`LOWPART') +define(`E',`L(u.$1)') +include_mpn(`sparc64/submul1h.asm') diff --git a/rts/gmp/mpn/thumb/add_n.s b/rts/gmp/mpn/thumb/add_n.s new file mode 100644 index 0000000000..c1eeb6ca87 --- /dev/null +++ b/rts/gmp/mpn/thumb/add_n.s @@ -0,0 +1,50 @@ +@ ARM/Thumb __gmpn_add -- Add two limb vectors of the same length > 0 and store +@ sum in a third limb vector. + +@ Copyright (C) 1997, 2000 Free Software Foundation, Inc. + +@ This file is part of the GNU MP Library. + +@ The GNU MP Library is free software; you can redistribute it and/or modify +@ it under the terms of the GNU Lesser General Public License as published by +@ the Free Software Foundation; either version 2.1 of the License, or (at your +@ option) any later version. + +@ The GNU MP Library is distributed in the hope that it will be useful, but +@ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +@ License for more details. + +@ You should have received a copy of the GNU Lesser General Public License +@ along with the GNU MP Library; see the file COPYING.LIB. If not, write to +@ the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +@ MA 02111-1307, USA. + + +@ INPUT PARAMETERS +@ RES_ptr r0 +@ S1_ptr r1 +@ S2_ptr r2 +@ SIZE r3 + +@ NOT TESTED CODE + + .text + .thumb + .align 0 + .global ___gmpn_add_n +___gmpn_add_n: + push {r4, r5, r6, lr} + mov r6, #1 @ init carry save register + +Loop: sub r6, #1 @ restore carry (set iff r6 was 0) + ldmia r1!, {r4} @ load next limb from S1 + ldmia r2!, {r5} @ load next limb from S2 + adc r4, r5 + stmia r0!, {r4} @ store result limb to RES + sbc r6, r6 @ save negated carry + sub r3, #1 + bge Loop @ loop back while remaining count >= 4 + + mov r0, r6 + pop {r4, r5, r6, pc} diff --git a/rts/gmp/mpn/thumb/sub_n.s b/rts/gmp/mpn/thumb/sub_n.s new file mode 100644 index 0000000000..53c292375f --- /dev/null +++ b/rts/gmp/mpn/thumb/sub_n.s @@ -0,0 +1,50 @@ +@ ARM/Thumb __gmpn_sub -- Subtract two limb vectors of the same length > 0 and +@ store difference in a third limb vector. + +@ Copyright (C) 1997, 2000 Free Software Foundation, Inc. + +@ This file is part of the GNU MP Library. + +@ The GNU MP Library is free software; you can redistribute it and/or modify +@ it under the terms of the GNU Lesser General Public License as published by +@ the Free Software Foundation; either version 2.1 of the License, or (at your +@ option) any later version. + +@ The GNU MP Library is distributed in the hope that it will be useful, but +@ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +@ License for more details. + +@ You should have received a copy of the GNU Lesser General Public License +@ along with the GNU MP Library; see the file COPYING.LIB. If not, write to +@ the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +@ MA 02111-1307, USA. + + +@ INPUT PARAMETERS +@ RES_ptr r0 +@ S1_ptr r1 +@ S2_ptr r2 +@ SIZE r3 + +@ NOT TESTED CODE + + .text + .thumb + .align 0 + .global ___gmpn_sub_n +___gmpn_sub_n: + push {r4, r5, r6, lr} + mov r6, #1 @ init carry save register + +Loop: sub r6, #1 @ restore carry (set iff r6 was 0) + ldmia r1!, {r4} @ load next limb from S1 + ldmia r2!, {r5} @ load next limb from S2 + sbc r4, r5 + stmia r0!, {r4} @ store result limb to RES + sbc r6, r6 @ save negated carry + sub r3, #1 + bge Loop @ loop back while remaining count >= 4 + + mov r0, r6 + pop {r4, r5, r6, pc} diff --git a/rts/gmp/mpn/underscore.h b/rts/gmp/mpn/underscore.h new file mode 100644 index 0000000000..240dae0f63 --- /dev/null +++ b/rts/gmp/mpn/underscore.h @@ -0,0 +1,26 @@ +/* +Copyright (C) 1999 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. +*/ + +#if __STDC__ +#define C_SYMBOL_NAME(name) _##name +#else +#define C_SYMBOL_NAME(name) _/**/name +#endif diff --git a/rts/gmp/mpn/vax/add_n.s b/rts/gmp/mpn/vax/add_n.s new file mode 100644 index 0000000000..cf4060f521 --- /dev/null +++ b/rts/gmp/mpn/vax/add_n.s @@ -0,0 +1,61 @@ +# VAX __gmpn_add_n -- Add two limb vectors of the same length > 0 and store +# sum in a third limb vector. + +# Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr (sp + 4) +# s1_ptr (sp + 8) +# s2_ptr (sp + 12) +# size (sp + 16) + +.text + .align 1 +.globl ___gmpn_add_n +___gmpn_add_n: + .word 0x0 + movl 16(ap),r0 + movl 12(ap),r1 + movl 8(ap),r2 + movl 4(ap),r3 + mnegl r0,r5 + addl2 $3,r0 + ashl $-2,r0,r0 # unroll loop count + bicl2 $-4,r5 # mask out low 2 bits + movaq (r5)[r5],r5 # 9x + jmp Loop(r5) + +Loop: movl (r2)+,r4 + adwc (r1)+,r4 + movl r4,(r3)+ + movl (r2)+,r4 + adwc (r1)+,r4 + movl r4,(r3)+ + movl (r2)+,r4 + adwc (r1)+,r4 + movl r4,(r3)+ + movl (r2)+,r4 + adwc (r1)+,r4 + movl r4,(r3)+ + sobgtr r0,Loop + + adwc r0,r0 + ret diff --git a/rts/gmp/mpn/vax/addmul_1.s b/rts/gmp/mpn/vax/addmul_1.s new file mode 100644 index 0000000000..379061dcb7 --- /dev/null +++ b/rts/gmp/mpn/vax/addmul_1.s @@ -0,0 +1,126 @@ +# VAX __gmpn_addmul_1 -- Multiply a limb vector with a limb and add +# the result to a second limb vector. + +# Copyright (C) 1992, 1994, 1996, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr (sp + 4) +# s1_ptr (sp + 8) +# size (sp + 12) +# s2_limb (sp + 16) + +.text + .align 1 +.globl ___gmpn_addmul_1 +___gmpn_addmul_1: + .word 0xfc0 + movl 12(ap),r4 + movl 8(ap),r8 + movl 4(ap),r9 + movl 16(ap),r6 + jlss s2_big + + clrl r3 + incl r4 + ashl $-1,r4,r7 + jlbc r4,L1 + clrl r11 + +# Loop for S2_LIMB < 0x80000000 +Loop1: movl (r8)+,r1 + jlss L1n0 + emul r1,r6,$0,r2 + addl2 r11,r2 + adwc $0,r3 + addl2 r2,(r9)+ + adwc $0,r3 +L1: movl (r8)+,r1 + jlss L1n1 +L1p1: emul r1,r6,$0,r10 + addl2 r3,r10 + adwc $0,r11 + addl2 r10,(r9)+ + adwc $0,r11 + + sobgtr r7,Loop1 + movl r11,r0 + ret + +L1n0: emul r1,r6,$0,r2 + addl2 r11,r2 + adwc r6,r3 + addl2 r2,(r9)+ + adwc $0,r3 + movl (r8)+,r1 + jgeq L1p1 +L1n1: emul r1,r6,$0,r10 + addl2 r3,r10 + adwc r6,r11 + addl2 r10,(r9)+ + adwc $0,r11 + + sobgtr r7,Loop1 + movl r11,r0 + ret + + +s2_big: clrl r3 + incl r4 + ashl $-1,r4,r7 + jlbc r4,L2 + clrl r11 + +# Loop for S2_LIMB >= 0x80000000 +Loop2: movl (r8)+,r1 + jlss L2n0 + emul r1,r6,$0,r2 + addl2 r11,r2 + adwc r1,r3 + addl2 r2,(r9)+ + adwc $0,r3 +L2: movl (r8)+,r1 + jlss L2n1 +L2p1: emul r1,r6,$0,r10 + addl2 r3,r10 + adwc r1,r11 + addl2 r10,(r9)+ + adwc $0,r11 + + sobgtr r7,Loop2 + movl r11,r0 + ret + +L2n0: emul r1,r6,$0,r2 + addl2 r11,r2 + adwc r6,r3 + addl2 r2,(r9)+ + adwc r1,r3 + movl (r8)+,r1 + jgeq L2p1 +L2n1: emul r1,r6,$0,r10 + addl2 r3,r10 + adwc r6,r11 + addl2 r10,(r9)+ + adwc r1,r11 + + sobgtr r7,Loop2 + movl r11,r0 + ret diff --git a/rts/gmp/mpn/vax/lshift.s b/rts/gmp/mpn/vax/lshift.s new file mode 100644 index 0000000000..fd311a9782 --- /dev/null +++ b/rts/gmp/mpn/vax/lshift.s @@ -0,0 +1,58 @@ +# VAX __gmpn_lshift -- left shift. + +# Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# rptr (sp + 4) +# sptr (sp + 8) +# size (sp + 12) +# cnt (sp + 16) +# r0=retval r1=size r2,r3=itmp r4,r5=otmp call-used registers +# r6=sptr r7=rptr r8=cnt r9 r10 r11 call-saved registers + +.text + .align 1 +.globl ___gmpn_lshift +___gmpn_lshift: + .word 0x1c0 + movl 4(ap),r7 + movl 8(ap),r6 + movl 12(ap),r1 + movl 16(ap),r8 + + moval (r6)[r1],r6 + moval (r7)[r1],r7 + clrl r3 + movl -(r6),r2 + ashq r8,r2,r4 + movl r5,r0 + movl r2,r3 + decl r1 + jeql Lend + +Loop: movl -(r6),r2 + ashq r8,r2,r4 + movl r5,-(r7) + movl r2,r3 + jsobgtr r1,Loop + +Lend: movl r4,-4(r7) + ret diff --git a/rts/gmp/mpn/vax/mul_1.s b/rts/gmp/mpn/vax/mul_1.s new file mode 100644 index 0000000000..708e8ca6ca --- /dev/null +++ b/rts/gmp/mpn/vax/mul_1.s @@ -0,0 +1,123 @@ +# VAX __gmpn_mul_1 -- Multiply a limb vector with a limb and store +# the result in a second limb vector. + +# Copyright (C) 1992, 1994, 1996, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr (sp + 4) +# s1_ptr (sp + 8) +# size (sp + 12) +# s2_limb (sp + 16) + +.text + .align 1 +.globl ___gmpn_mul_1 +___gmpn_mul_1: + .word 0xfc0 + movl 12(ap),r4 + movl 8(ap),r8 + movl 4(ap),r9 + movl 16(ap),r6 + jlss s2_big + +# One might want to combine the addl2 and the store below, but that +# is actually just slower according to my timing tests. (VAX 3600) + + clrl r3 + incl r4 + ashl $-1,r4,r7 + jlbc r4,L1 + clrl r11 + +# Loop for S2_LIMB < 0x80000000 +Loop1: movl (r8)+,r1 + jlss L1n0 + emul r1,r6,$0,r2 + addl2 r11,r2 + adwc $0,r3 + movl r2,(r9)+ +L1: movl (r8)+,r1 + jlss L1n1 +L1p1: emul r1,r6,$0,r10 + addl2 r3,r10 + adwc $0,r11 + movl r10,(r9)+ + + sobgtr r7,Loop1 + movl r11,r0 + ret + +L1n0: emul r1,r6,$0,r2 + addl2 r11,r2 + adwc r6,r3 + movl r2,(r9)+ + movl (r8)+,r1 + jgeq L1p1 +L1n1: emul r1,r6,$0,r10 + addl2 r3,r10 + adwc r6,r11 + movl r10,(r9)+ + + sobgtr r7,Loop1 + movl r11,r0 + ret + + +s2_big: clrl r3 + incl r4 + ashl $-1,r4,r7 + jlbc r4,L2 + clrl r11 + +# Loop for S2_LIMB >= 0x80000000 +Loop2: movl (r8)+,r1 + jlss L2n0 + emul r1,r6,$0,r2 + addl2 r11,r2 + adwc r1,r3 + movl r2,(r9)+ +L2: movl (r8)+,r1 + jlss L2n1 +L2p1: emul r1,r6,$0,r10 + addl2 r3,r10 + adwc r1,r11 + movl r10,(r9)+ + + sobgtr r7,Loop2 + movl r11,r0 + ret + +L2n0: emul r1,r6,$0,r2 + addl2 r1,r3 + addl2 r11,r2 + adwc r6,r3 + movl r2,(r9)+ + movl (r8)+,r1 + jgeq L2p1 +L2n1: emul r1,r6,$0,r10 + addl2 r1,r11 + addl2 r3,r10 + adwc r6,r11 + movl r10,(r9)+ + + sobgtr r7,Loop2 + movl r11,r0 + ret diff --git a/rts/gmp/mpn/vax/rshift.s b/rts/gmp/mpn/vax/rshift.s new file mode 100644 index 0000000000..515813208d --- /dev/null +++ b/rts/gmp/mpn/vax/rshift.s @@ -0,0 +1,56 @@ +# VAX __gmpn_rshift -- right shift. + +# Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# rptr (sp + 4) +# sptr (sp + 8) +# size (sp + 12) +# cnt (sp + 16) +# r0=retval r1=size r2,r3=itmp r4,r5=otmp call-used registers +# r6=sptr r7=rptr r8=cnt r9 r10 r11 call-saved registers + +.text + .align 1 +.globl ___gmpn_rshift +___gmpn_rshift: + .word 0x1c0 + movl 4(ap),r7 + movl 8(ap),r6 + movl 12(ap),r1 + movl 16(ap),r8 + + movl (r6)+,r2 + subl3 r8,$32,r8 + ashl r8,r2,r0 + decl r1 + jeql Lend + +Loop: movl (r6)+,r3 + ashq r8,r2,r4 + movl r5,(r7)+ + movl r3,r2 + jsobgtr r1,Loop + +Lend: clrl r3 + ashq r8,r2,r4 + movl r5,(r7) + ret diff --git a/rts/gmp/mpn/vax/sub_n.s b/rts/gmp/mpn/vax/sub_n.s new file mode 100644 index 0000000000..eff4b1c044 --- /dev/null +++ b/rts/gmp/mpn/vax/sub_n.s @@ -0,0 +1,61 @@ +# VAX __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and store +# difference in a third limb vector. + +# Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr (sp + 4) +# s1_ptr (sp + 8) +# s2_ptr (sp + 12) +# size (sp + 16) + +.text + .align 1 +.globl ___gmpn_sub_n +___gmpn_sub_n: + .word 0x0 + movl 16(ap),r0 + movl 12(ap),r1 + movl 8(ap),r2 + movl 4(ap),r3 + mnegl r0,r5 + addl2 $3,r0 + ashl $-2,r0,r0 # unroll loop count + bicl2 $-4,r5 # mask out low 2 bits + movaq (r5)[r5],r5 # 9x + jmp Loop(r5) + +Loop: movl (r2)+,r4 + sbwc (r1)+,r4 + movl r4,(r3)+ + movl (r2)+,r4 + sbwc (r1)+,r4 + movl r4,(r3)+ + movl (r2)+,r4 + sbwc (r1)+,r4 + movl r4,(r3)+ + movl (r2)+,r4 + sbwc (r1)+,r4 + movl r4,(r3)+ + sobgtr r0,Loop + + adwc r0,r0 + ret diff --git a/rts/gmp/mpn/vax/submul_1.s b/rts/gmp/mpn/vax/submul_1.s new file mode 100644 index 0000000000..be42286935 --- /dev/null +++ b/rts/gmp/mpn/vax/submul_1.s @@ -0,0 +1,126 @@ +# VAX __gmpn_submul_1 -- Multiply a limb vector with a limb and subtract +# the result from a second limb vector. + +# Copyright (C) 1992, 1994, 1996, 2000 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr (sp + 4) +# s1_ptr (sp + 8) +# size (sp + 12) +# s2_limb (sp + 16) + +.text + .align 1 +.globl ___gmpn_submul_1 +___gmpn_submul_1: + .word 0xfc0 + movl 12(ap),r4 + movl 8(ap),r8 + movl 4(ap),r9 + movl 16(ap),r6 + jlss s2_big + + clrl r3 + incl r4 + ashl $-1,r4,r7 + jlbc r4,L1 + clrl r11 + +# Loop for S2_LIMB < 0x80000000 +Loop1: movl (r8)+,r1 + jlss L1n0 + emul r1,r6,$0,r2 + addl2 r11,r2 + adwc $0,r3 + subl2 r2,(r9)+ + adwc $0,r3 +L1: movl (r8)+,r1 + jlss L1n1 +L1p1: emul r1,r6,$0,r10 + addl2 r3,r10 + adwc $0,r11 + subl2 r10,(r9)+ + adwc $0,r11 + + sobgtr r7,Loop1 + movl r11,r0 + ret + +L1n0: emul r1,r6,$0,r2 + addl2 r11,r2 + adwc r6,r3 + subl2 r2,(r9)+ + adwc $0,r3 + movl (r8)+,r1 + jgeq L1p1 +L1n1: emul r1,r6,$0,r10 + addl2 r3,r10 + adwc r6,r11 + subl2 r10,(r9)+ + adwc $0,r11 + + sobgtr r7,Loop1 + movl r11,r0 + ret + + +s2_big: clrl r3 + incl r4 + ashl $-1,r4,r7 + jlbc r4,L2 + clrl r11 + +# Loop for S2_LIMB >= 0x80000000 +Loop2: movl (r8)+,r1 + jlss L2n0 + emul r1,r6,$0,r2 + addl2 r11,r2 + adwc r1,r3 + subl2 r2,(r9)+ + adwc $0,r3 +L2: movl (r8)+,r1 + jlss L2n1 +L2p1: emul r1,r6,$0,r10 + addl2 r3,r10 + adwc r1,r11 + subl2 r10,(r9)+ + adwc $0,r11 + + sobgtr r7,Loop2 + movl r11,r0 + ret + +L2n0: emul r1,r6,$0,r2 + addl2 r11,r2 + adwc r6,r3 + subl2 r2,(r9)+ + adwc r1,r3 + movl (r8)+,r1 + jgeq L2p1 +L2n1: emul r1,r6,$0,r10 + addl2 r3,r10 + adwc r6,r11 + subl2 r10,(r9)+ + adwc r1,r11 + + sobgtr r7,Loop2 + movl r11,r0 + ret diff --git a/rts/gmp/mpn/x86/README b/rts/gmp/mpn/x86/README new file mode 100644 index 0000000000..3507548b8c --- /dev/null +++ b/rts/gmp/mpn/x86/README @@ -0,0 +1,40 @@ + + X86 MPN SUBROUTINES + + +This directory contains mpn functions for various 80x86 chips. + + +CODE ORGANIZATION + + x86 i386, i486, generic + x86/pentium Intel Pentium (P5, P54) + x86/pentium/mmx Intel Pentium with MMX (P55) + x86/p6 Intel Pentium Pro + x86/p6/mmx Intel Pentium II, III + x86/p6/p3mmx Intel Pentium III + x86/k6 AMD K6, K6-2, K6-3 + x86/k6/mmx + x86/k6/k62mmx AMD K6-2 + x86/k7 AMD Athlon + x86/k7/mmx + + +The x86 directory is also the main support for P6 at the moment, and +is something of a blended style, meant to be reasonable on all x86s. + + + +STATUS + +The code is well-optimized for AMD and Intel chips, but not so well +optimized for Cyrix chips. + + + +RELEVANT OPTIMIZATION ISSUES + +For implementations with slow double shift instructions (SHLD and +SHRD), it might be better to mimic their operation with SHL+SHR+OR. +(M2 is likely to benefit from that, but not Pentium due to its slow +plain SHL and SHR.) diff --git a/rts/gmp/mpn/x86/README.family b/rts/gmp/mpn/x86/README.family new file mode 100644 index 0000000000..3bc73f58b0 --- /dev/null +++ b/rts/gmp/mpn/x86/README.family @@ -0,0 +1,333 @@ + + X86 CPU FAMILY MPN SUBROUTINES + + +This file has some notes on things common to all the x86 family code. + + + +ASM FILES + +The x86 .asm files are BSD style x86 assembler code, first put through m4 +for macro processing. The generic mpn/asm-defs.m4 is used, together with +mpn/x86/x86-defs.m4. Detailed notes are in those files. + +The code is meant for use with GNU "gas" or a system "as". There's no +support for assemblers that demand Intel style, and with gas freely +available and easy to use that shouldn't be a problem. + + + +STACK FRAME + +m4 macros are used to define the parameters passed on the stack, and these +act like comments on what the stack frame looks like too. For example, +mpn_mul_1() has the following. + + defframe(PARAM_MULTIPLIER, 16) + defframe(PARAM_SIZE, 12) + defframe(PARAM_SRC, 8) + defframe(PARAM_DST, 4) + +Here PARAM_MULTIPLIER gets defined as `FRAME+16(%esp)', and the others +similarly. The return address is at offset 0, but there's not normally any +need to access that. + +FRAME is redefined as necessary through the code so it's the number of bytes +pushed on the stack, and hence the offsets in the parameter macros stay +correct. At the start of a routine FRAME should be zero. + + deflit(`FRAME',0) + ... + deflit(`FRAME',4) + ... + deflit(`FRAME',8) + ... + +Helper macros FRAME_pushl(), FRAME_popl(), FRAME_addl_esp() and +FRAME_subl_esp() exist to adjust FRAME for the effect of those instructions, +and can be used instead of explicit definitions if preferred. +defframe_pushl() is a combination FRAME_pushl() and defframe(). + +There's generally some slackness in redefining FRAME. If new values aren't +going to get used, then the redefinitions are omitted to keep from +cluttering up the code. This happens for instance at the end of a routine, +where there might be just four register pops and then a ret, so FRAME isn't +getting used. + +Local variables and saved registers can be similarly defined, with negative +offsets representing stack space below the initial stack pointer. For +example, + + defframe(SAVE_ESI, -4) + defframe(SAVE_EDI, -8) + defframe(VAR_COUNTER,-12) + + deflit(STACK_SPACE, 12) + +Here STACK_SPACE gets used in a "subl $STACK_SPACE, %esp" to allocate the +space, and that instruction must be followed by a redefinition of FRAME +(setting it equal to STACK_SPACE) to reflect the change in %esp. + +Definitions for pushed registers are only put in when they're going to be +used. If registers are just saved and restored with pushes and pops then +definitions aren't made. + + + +ASSEMBLER EXPRESSIONS + +Only addition and subtraction seem to be universally available, certainly +that's all the Solaris 8 "as" seems to accept. If expressions are wanted +then m4 eval() should be used. + +In particular note that a "/" anywhere in a line starts a comment in Solaris +"as", and in some configurations of gas too. + + addl $32/2, %eax <-- wrong + + addl $eval(32/2), %eax <-- right + +Binutils gas/config/tc-i386.c has a choice between "/" being a comment +anywhere in a line, or only at the start. FreeBSD patches 2.9.1 to select +the latter, and as of 2.9.5 it's the default for GNU/Linux too. + + + +ASSEMBLER COMMENTS + +Solaris "as" doesn't support "#" commenting, using /* */ instead, +unfortunately. For that reason "C" commenting is used (see asm-defs.m4) and +the intermediate ".s" files have no comments. + + + +ZERO DISPLACEMENTS + +In a couple of places addressing modes like 0(%ebx) with a byte-sized zero +displacement are wanted, rather than (%ebx) with no displacement. These are +either for computed jumps or to get desirable code alignment. Explicit +.byte sequences are used to ensure the assembler doesn't turn 0(%ebx) into +(%ebx). The Zdisp() macro in x86-defs.m4 is used for this. + +Current gas 2.9.5 or recent 2.9.1 leave 0(%ebx) as written, but old gas +1.92.3 changes it. In general changing would be the sort of "optimization" +an assembler might perform, hence explicit ".byte"s are used where +necessary. + + + +SHLD/SHRD INSTRUCTIONS + +The %cl count forms of double shift instructions like "shldl %cl,%eax,%ebx" +must be written "shldl %eax,%ebx" for some assemblers. gas takes either, +Solaris "as" doesn't allow %cl, gcc generates %cl for gas and NeXT (which is +gas), and omits %cl elsewhere. + +For GMP an autoconf test is used to determine whether %cl should be used and +the macros shldl, shrdl, shldw and shrdw in mpn/x86/x86-defs.m4 then pass +through or omit %cl as necessary. See comments with those macros for usage. + + + +DIRECTION FLAG + +The x86 calling conventions say that the direction flag should be clear at +function entry and exit. (See iBCS2 and SVR4 ABI books, references below.) + +Although this has been so since the year dot, it's not absolutely clear +whether it's universally respected. Since it's better to be safe than +sorry, gmp follows glibc and does a "cld" if it depends on the direction +flag being clear. This happens only in a few places. + + + +POSITION INDEPENDENT CODE + +Defining the symbol PIC in m4 processing selects position independent code. +This mainly affects computed jumps, and these are implemented in a +self-contained fashion (without using the global offset table). The few +calls from assembly code to global functions use the normal procedure +linkage table. + +PIC is necessary for ELF shared libraries because they can be mapped into +different processes at different virtual addresses. Text relocations in +shared libraries are allowed, but that presumably means a page with such a +relocation isn't shared. The use of the PLT for PIC adds a fixed cost to +every function call, which is small but might be noticeable when working with +small operands. + +Calls from one library function to another don't need to go through the PLT, +since of course the call instruction uses a displacement, not an absolute +address, and the relative locations of object files are known when libgmp.so +is created. "ld -Bsymbolic" (or "gcc -Wl,-Bsymbolic") will resolve calls +this way, so that there's no jump through the PLT, but of course leaving +setups of the GOT address in %ebx that may be unnecessary. + +The %ebx setup could be avoided in assembly if a separate option controlled +PIC for calls as opposed to computed jumps etc. But there's only ever +likely to be a handful of calls out of assembler, and getting the same +optimization for C intra-library calls would be more important. There seems +no easy way to tell gcc that certain functions can be called non-PIC, and +unfortunately many gmp functions use the global memory allocation variables, +so they need the GOT anyway. Object files with no global data references +and only intra-library calls could go into the library as non-PIC under +-Bsymbolic. Integrating this into libtool and automake is left as an +exercise for the reader. + + + +SIMPLE LOOPS + +The overheads in setting up for an unrolled loop can mean that at small +sizes a simple loop is faster. Making small sizes go fast is important, +even if it adds a cycle or two to bigger sizes. To this end various +routines choose between a simple loop and an unrolled loop according to +operand size. The path to the simple loop, or to special case code for +small sizes, is always as fast as possible. + +Adding a simple loop requires a conditional jump to choose between the +simple and unrolled code. The size of a branch misprediction penalty +affects whether a simple loop is worthwhile. + +The convention is for an m4 definition UNROLL_THRESHOLD to set the crossover +point, with sizes < UNROLL_THRESHOLD using the simple loop, sizes >= +UNROLL_THRESHOLD using the unrolled loop. If position independent code adds +a couple of cycles to an unrolled loop setup, the threshold will vary with +PIC or non-PIC. Something like the following is typical. + + ifdef(`PIC',` + deflit(UNROLL_THRESHOLD, 10) + ',` + deflit(UNROLL_THRESHOLD, 8) + ') + +There's no automated way to determine the threshold. Setting it to a small +value and then to a big value makes it possible to measure the simple and +unrolled loops each over a range of sizes, from which the crossover point +can be determined. Alternately, just adjust the threshold up or down until +there's no more speedups. + + + +UNROLLED LOOP CODING + +The x86 addressing modes allow a byte displacement of -128 to +127, making +it possible to access 256 bytes, which is 64 limbs, without adjusting +pointer registers within the loop. Dword sized displacements can be used +too, but they increase code size, and unrolling to 64 ought to be enough. + +When unrolling to the full 64 limbs/loop, the limb at the top of the loop +will have a displacement of -128, so pointers have to have a corresponding ++128 added before entering the loop. When unrolling to 32 limbs/loop +displacements 0 to 127 can be used with 0 at the top of the loop and no +adjustment needed to the pointers. + +Where 64 limbs/loop is supported, the +128 adjustment is done only when 64 +limbs/loop is selected. Usually the gain in speed using 64 instead of 32 or +16 is small, so support for 64 limbs/loop is generally only for comparison. + + + +COMPUTED JUMPS + +When working from least significant limb to most significant limb (most +routines) the computed jump and pointer calculations in preparation for an +unrolled loop are as follows. + + S = operand size in limbs + N = number of limbs per loop (UNROLL_COUNT) + L = log2 of unrolling (UNROLL_LOG2) + M = mask for unrolling (UNROLL_MASK) + C = code bytes per limb in the loop + B = bytes per limb (4 for x86) + + computed jump (-S & M) * C + entrypoint + subtract from pointers (-S & M) * B + initial loop counter (S-1) >> L + displacements 0 to B*(N-1) + +The loop counter is decremented at the end of each loop, and the looping +stops when the decrement takes the counter to -1. The displacements are for +the addressing accessing each limb, eg. a load with "movl disp(%ebx), %eax". + +Usually the multiply by "C" can be handled without an imul, using instead an +leal, or a shift and subtract. + +When working from most significant to least significant limb (eg. mpn_lshift +and mpn_copyd), the calculations change as follows. + + add to pointers (-S & M) * B + displacements 0 to -B*(N-1) + + + +OLD GAS 1.92.3 + +This version comes with FreeBSD 2.2.8 and has a couple of gremlins that +affect gmp code. + +Firstly, an expression involving two forward references to labels comes out +as zero. For example, + + addl $bar-foo, %eax + foo: + nop + bar: + +This should lead to "addl $1, %eax", but it comes out as "addl $0, %eax". +When only one forward reference is involved, it works correctly, as for +example, + + foo: + addl $bar-foo, %eax + nop + bar: + +Secondly, an expression involving two labels can't be used as the +displacement for an leal. For example, + + foo: + nop + bar: + leal bar-foo(%eax,%ebx,8), %ecx + +A slightly cryptic error is given, "Unimplemented segment type 0 in +parse_operand". When only one label is used it's ok, and the label can be a +forward reference too, as for example, + + leal foo(%eax,%ebx,8), %ecx + nop + foo: + +These problems only affect PIC computed jump calculations. The workarounds +are just to do an leal without a displacement and then an addl, and to make +sure the code is placed so that there's at most one forward reference in the +addl. + + + +REFERENCES + +"Intel Architecture Software Developer's Manual", volumes 1 to 3, 1999, +order numbers 243190, 243191 and 243192. Available on-line, + + ftp://download.intel.com/design/PentiumII/manuals/243190.htm + ftp://download.intel.com/design/PentiumII/manuals/243191.htm + ftp://download.intel.com/design/PentiumII/manuals/243192.htm + +"Intel386 Family Binary Compatibility Specification 2", Intel Corporation, +published by McGraw-Hill, 1991, ISBN 0-07-031219-2. + +"System V Application Binary Interface", Unix System Laboratories Inc, 1992, +published by Prentice Hall, ISBN 0-13-880410-9. And the "Intel386 Processor +Supplement", AT&T, 1991, ISBN 0-13-877689-X. (These have details of ELF +shared library PIC coding.) + + + +---------------- +Local variables: +mode: text +fill-column: 76 +End: diff --git a/rts/gmp/mpn/x86/addsub_n.S b/rts/gmp/mpn/x86/addsub_n.S new file mode 100644 index 0000000000..fe6f648f53 --- /dev/null +++ b/rts/gmp/mpn/x86/addsub_n.S @@ -0,0 +1,174 @@ +/* Currently not working and not used. */ + +/* +Copyright (C) 1999 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. +*/ + + +#define SAVE_BORROW_RESTORE_CARRY(r) adcl r,r; shll $31,r +#define SAVE_CARRY_RESTORE_BORROW(r) adcl r,r + + .globl mpn_addsub_n_0 + .globl mpn_addsub_n_1 + +/* Cute i386/i486/p6 addsub loop for the "full overlap" case r1==s2,r2==s1. + We let subtraction and addition alternate in being two limbs + ahead of the other, thereby avoiding some SAVE_RESTORE. */ +// r1 = r2 + r1 edi = esi + edi +// r2 = r2 - r1 esi = esi - edi +// s1 s2 +// r2 r1 +// eax,ebx,ecx,edx,esi,edi,ebp +mpn_addsub_n_0: + pushl %edi + pushl %esi + pushl %ebx + pushl %ebp + + movl 20(%esp),%edi /* res_ptr */ + movl 24(%esp),%esi /* s1_ptr */ + movl 36(%esp),%ebp /* size */ + + shrl $2,%ebp + xorl %edx,%edx + .align 4 +Loop0: // L=load E=execute S=store + movl (%esi),%ebx // sub 0 L + movl 4(%esi),%ecx // sub 1 L + sbbl (%edi),%ebx // sub 0 LE + sbbl 4(%edi),%ecx // sub 1 LE +// SAVE_BORROW_RESTORE_CARRY(%edx) + movl (%esi),%eax // add 0 L + adcl %eax,(%edi) // add 0 LES + movl 4(%esi),%eax // add 1 L + adcl %eax,4(%edi) // add 1 LES + movl %ebx,(%esi) // sub 0 S + movl %ecx,4(%esi) // sub 1 S + movl 8(%esi),%ebx // add 2 L + adcl 8(%edi),%ebx // add 2 LE + movl 12(%esi),%ecx // add 3 L + adcl 12(%edi),%ecx // add 3 LE +// SAVE_CARRY_RESTORE_BORROW(%edx) + movl 8(%edi),%eax // sub 2 L + sbbl %eax,8(%esi) // sub 2 LES + movl 12(%edi),%eax // sub 3 L + sbbl %eax,12(%esi) // sub 3 LES + movl %ebx,8(%edi) // add 2 S + movl %ecx,12(%edi) // add 3 S + leal 16(%esi),%esi + leal 16(%edi),%edi + decl %ebp + jnz Loop0 + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret + +/* Cute i386/i486/p6 addsub loop for the "full overlap" case r1==s1,r2==s2. + We let subtraction and addition alternate in being two limbs + ahead of the other, thereby avoiding some SAVE_RESTORE. */ +// r1 = r1 + r2 edi = edi + esi +// r2 = r1 - r2 esi = edi - esi +// s2 s1 +// r2 r1 +// eax,ebx,ecx,edx,esi,edi,ebp +mpn_addsub_n_1: + pushl %edi + pushl %esi + pushl %ebx + pushl %ebp + + movl 20(%esp),%edi /* res_ptr */ + movl 24(%esp),%esi /* s1_ptr */ + movl 36(%esp),%ebp /* size */ + + shrl $2,%ebp + xorl %edx,%edx + .align 4 +Loop1: // L=load E=execute S=store + movl (%edi),%ebx // sub 0 L + sbbl (%esi),%ebx // sub 0 LE + movl 4(%edi),%ecx // sub 1 L + sbbl 4(%esi),%ecx // sub 1 LE +// SAVE_BORROW_RESTORE_CARRY(%edx) + movl (%esi),%eax // add 0 L + adcl %eax,(%edi) // add 0 LES + movl 4(%esi),%eax // add 1 L + adcl %eax,4(%edi) // add 1 LES + movl %ebx,(%esi) // sub 0 S + movl %ecx,4(%esi) // sub 1 S + movl 8(%esi),%ebx // add 2 L + adcl 8(%edi),%ebx // add 2 LE + movl 12(%esi),%ecx // add 3 L + adcl 12(%edi),%ecx // add 3 LE +// SAVE_CARRY_RESTORE_BORROW(%edx) + movl 8(%edi),%eax // sub 2 L + sbbl 8(%esi),%eax // sub 2 LES + movl %eax,8(%esi) // sub 2 S + movl 12(%edi),%eax // sub 3 L + sbbl 12(%esi),%eax // sub 3 LE + movl %eax,12(%esi) // sub 3 S + movl %ebx,8(%edi) // add 2 S + movl %ecx,12(%edi) // add 3 S + leal 16(%esi),%esi + leal 16(%edi),%edi + decl %ebp + jnz Loop1 + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret + + .globl mpn_copy +mpn_copy: + pushl %edi + pushl %esi + pushl %ebx + pushl %ebp + + movl 20(%esp),%edi /* res_ptr */ + movl 24(%esp),%esi /* s1_ptr */ + movl 28(%esp),%ebp /* size */ + + shrl $2,%ebp + .align 4 +Loop2: + movl (%esi),%eax + movl 4(%esi),%ebx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl 8(%esi),%eax + movl 12(%esi),%ebx + movl %eax,8(%edi) + movl %ebx,12(%edi) + leal 16(%esi),%esi + leal 16(%edi),%edi + decl %ebp + jnz Loop2 + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret diff --git a/rts/gmp/mpn/x86/aors_n.asm b/rts/gmp/mpn/x86/aors_n.asm new file mode 100644 index 0000000000..18ef816b4d --- /dev/null +++ b/rts/gmp/mpn/x86/aors_n.asm @@ -0,0 +1,187 @@ +dnl x86 mpn_add_n/mpn_sub_n -- mpn addition and subtraction. + +dnl Copyright (C) 1992, 1994, 1995, 1996, 1999, 2000 Free Software +dnl Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +ifdef(`OPERATION_add_n',` + define(M4_inst, adcl) + define(M4_function_n, mpn_add_n) + define(M4_function_nc, mpn_add_nc) + +',`ifdef(`OPERATION_sub_n',` + define(M4_inst, sbbl) + define(M4_function_n, mpn_sub_n) + define(M4_function_nc, mpn_sub_nc) + +',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n +')')') + +MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc) + + +C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, +C mp_size_t size); +C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, +C mp_size_t size, mp_limb_t carry); + +defframe(PARAM_CARRY,20) +defframe(PARAM_SIZE, 16) +defframe(PARAM_SRC2, 12) +defframe(PARAM_SRC1, 8) +defframe(PARAM_DST, 4) + + .text + ALIGN(8) + +PROLOGUE(M4_function_nc) +deflit(`FRAME',0) + + pushl %edi FRAME_pushl() + pushl %esi FRAME_pushl() + + movl PARAM_DST,%edi + movl PARAM_SRC1,%esi + movl PARAM_SRC2,%edx + movl PARAM_SIZE,%ecx + + movl %ecx,%eax + shrl $3,%ecx C compute count for unrolled loop + negl %eax + andl $7,%eax C get index where to start loop + jz LF(M4_function_n,oopgo) C necessary special case for 0 + incl %ecx C adjust loop count + shll $2,%eax C adjustment for pointers... + subl %eax,%edi C ... since they are offset ... + subl %eax,%esi C ... by a constant when we ... + subl %eax,%edx C ... enter the loop + shrl $2,%eax C restore previous value + +ifdef(`PIC',` + C Calculate start address in loop for PIC. Due to limitations in + C old gas, LF(M4_function_n,oop)-L(0a)-3 cannot be put into the leal + call L(0a) +L(0a): leal (%eax,%eax,8),%eax + addl (%esp),%eax + addl $LF(M4_function_n,oop)-L(0a)-3,%eax + addl $4,%esp +',` + C Calculate start address in loop for non-PIC. + leal LF(M4_function_n,oop)-3(%eax,%eax,8),%eax +') + + C These lines initialize carry from the 5th parameter. Should be + C possible to simplify. + pushl %ebp FRAME_pushl() + movl PARAM_CARRY,%ebp + shrl $1,%ebp C shift bit 0 into carry + popl %ebp FRAME_popl() + + jmp *%eax C jump into loop + +EPILOGUE() + + + ALIGN(8) +PROLOGUE(M4_function_n) +deflit(`FRAME',0) + + pushl %edi FRAME_pushl() + pushl %esi FRAME_pushl() + + movl PARAM_DST,%edi + movl PARAM_SRC1,%esi + movl PARAM_SRC2,%edx + movl PARAM_SIZE,%ecx + + movl %ecx,%eax + shrl $3,%ecx C compute count for unrolled loop + negl %eax + andl $7,%eax C get index where to start loop + jz L(oop) C necessary special case for 0 + incl %ecx C adjust loop count + shll $2,%eax C adjustment for pointers... + subl %eax,%edi C ... since they are offset ... + subl %eax,%esi C ... by a constant when we ... + subl %eax,%edx C ... enter the loop + shrl $2,%eax C restore previous value + +ifdef(`PIC',` + C Calculate start address in loop for PIC. Due to limitations in + C some assemblers, L(oop)-L(0b)-3 cannot be put into the leal + call L(0b) +L(0b): leal (%eax,%eax,8),%eax + addl (%esp),%eax + addl $L(oop)-L(0b)-3,%eax + addl $4,%esp +',` + C Calculate start address in loop for non-PIC. + leal L(oop)-3(%eax,%eax,8),%eax +') + jmp *%eax C jump into loop + +L(oopgo): + pushl %ebp FRAME_pushl() + movl PARAM_CARRY,%ebp + shrl $1,%ebp C shift bit 0 into carry + popl %ebp FRAME_popl() + + ALIGN(8) +L(oop): movl (%esi),%eax + M4_inst (%edx),%eax + movl %eax,(%edi) + movl 4(%esi),%eax + M4_inst 4(%edx),%eax + movl %eax,4(%edi) + movl 8(%esi),%eax + M4_inst 8(%edx),%eax + movl %eax,8(%edi) + movl 12(%esi),%eax + M4_inst 12(%edx),%eax + movl %eax,12(%edi) + movl 16(%esi),%eax + M4_inst 16(%edx),%eax + movl %eax,16(%edi) + movl 20(%esi),%eax + M4_inst 20(%edx),%eax + movl %eax,20(%edi) + movl 24(%esi),%eax + M4_inst 24(%edx),%eax + movl %eax,24(%edi) + movl 28(%esi),%eax + M4_inst 28(%edx),%eax + movl %eax,28(%edi) + leal 32(%edi),%edi + leal 32(%esi),%esi + leal 32(%edx),%edx + decl %ecx + jnz L(oop) + + sbbl %eax,%eax + negl %eax + + popl %esi + popl %edi + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/aorsmul_1.asm b/rts/gmp/mpn/x86/aorsmul_1.asm new file mode 100644 index 0000000000..f32ad83989 --- /dev/null +++ b/rts/gmp/mpn/x86/aorsmul_1.asm @@ -0,0 +1,134 @@ +dnl x86 __gmpn_addmul_1 (for 386 and 486) -- Multiply a limb vector with a +dnl limb and add the result to a second limb vector. + + +dnl Copyright (C) 1992, 1994, 1997, 1999, 2000 Free Software Foundation, +dnl Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +ifdef(`OPERATION_addmul_1',` + define(M4_inst, addl) + define(M4_function_1, mpn_addmul_1) + +',`ifdef(`OPERATION_submul_1',` + define(M4_inst, subl) + define(M4_function_1, mpn_submul_1) + +',`m4_error(`Need OPERATION_addmul_1 or OPERATION_submul_1 +')')') + +MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1) + + +C mp_limb_t M4_function_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, +C mp_limb_t mult); + +define(PARAM_MULTIPLIER, `FRAME+16(%esp)') +define(PARAM_SIZE, `FRAME+12(%esp)') +define(PARAM_SRC, `FRAME+8(%esp)') +define(PARAM_DST, `FRAME+4(%esp)') + + TEXT + ALIGN(8) + +PROLOGUE(M4_function_1) +deflit(`FRAME',0) + + pushl %edi + pushl %esi + pushl %ebx + pushl %ebp +deflit(`FRAME',16) + + movl PARAM_DST,%edi + movl PARAM_SRC,%esi + movl PARAM_SIZE,%ecx + + xorl %ebx,%ebx + andl $3,%ecx + jz L(end0) + +L(oop0): + movl (%esi),%eax + mull PARAM_MULTIPLIER + leal 4(%esi),%esi + addl %ebx,%eax + movl $0,%ebx + adcl %ebx,%edx + M4_inst %eax,(%edi) + adcl %edx,%ebx C propagate carry into cylimb + + leal 4(%edi),%edi + decl %ecx + jnz L(oop0) + +L(end0): + movl PARAM_SIZE,%ecx + shrl $2,%ecx + jz L(end) + + ALIGN(8) +L(oop): movl (%esi),%eax + mull PARAM_MULTIPLIER + addl %eax,%ebx + movl $0,%ebp + adcl %edx,%ebp + + movl 4(%esi),%eax + mull PARAM_MULTIPLIER + M4_inst %ebx,(%edi) + adcl %eax,%ebp C new lo + cylimb + movl $0,%ebx + adcl %edx,%ebx + + movl 8(%esi),%eax + mull PARAM_MULTIPLIER + M4_inst %ebp,4(%edi) + adcl %eax,%ebx C new lo + cylimb + movl $0,%ebp + adcl %edx,%ebp + + movl 12(%esi),%eax + mull PARAM_MULTIPLIER + M4_inst %ebx,8(%edi) + adcl %eax,%ebp C new lo + cylimb + movl $0,%ebx + adcl %edx,%ebx + + M4_inst %ebp,12(%edi) + adcl $0,%ebx C propagate carry into cylimb + + leal 16(%esi),%esi + leal 16(%edi),%edi + decl %ecx + jnz L(oop) + +L(end): movl %ebx,%eax + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/copyd.asm b/rts/gmp/mpn/x86/copyd.asm new file mode 100644 index 0000000000..439640e836 --- /dev/null +++ b/rts/gmp/mpn/x86/copyd.asm @@ -0,0 +1,80 @@ +dnl x86 mpn_copyd -- copy limb vector, decrementing. +dnl +dnl Future: On P6 an MMX loop should be able to go faster than this code. + + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +C void mpn_copyd (mp_ptr dst, mp_srcptr src, mp_size_t size); +C +C Copy src,size to dst,size, working from high to low addresses. +C +C The code here is very generic and can be expected to be reasonable on all +C the x86 family. +C +C P5 - 1.0 cycles/limb. +C +C P6 - 2.4 cycles/limb, approx 40 cycles startup. + +defframe(PARAM_SIZE,12) +defframe(PARAM_SRC, 8) +defframe(PARAM_DST, 4) +deflit(`FRAME',0) + + .text + ALIGN(32) + +PROLOGUE(mpn_copyd) + C eax saved esi + C ebx + C ecx counter + C edx saved edi + C esi src + C edi dst + C ebp + + movl PARAM_SIZE, %ecx + movl %esi, %eax + + movl PARAM_SRC, %esi + movl %edi, %edx + + movl PARAM_DST, %edi + leal -4(%esi,%ecx,4), %esi + + leal -4(%edi,%ecx,4), %edi + + std + + rep + movsl + + cld + + movl %eax, %esi + movl %edx, %edi + + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/copyi.asm b/rts/gmp/mpn/x86/copyi.asm new file mode 100644 index 0000000000..5bc4e36689 --- /dev/null +++ b/rts/gmp/mpn/x86/copyi.asm @@ -0,0 +1,79 @@ +dnl x86 mpn_copyi -- copy limb vector, incrementing. + + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +C void mpn_copyi (mp_ptr dst, mp_srcptr src, mp_size_t size); +C +C Copy src,size to dst,size, working from low to high addresses. +C +C The code here is very generic and can be expected to be reasonable on all +C the x86 family. +C +C P5 - 1.0 cycles/limb. +C +C P6 - 0.75 cycles/limb. An MMX based copy was tried, but was found to be +C slower than a rep movs in all cases. The fastest MMX found was 0.8 +C cycles/limb (when fully aligned). A rep movs seems to have a startup +C time of about 15 cycles, but doing something special for small sizes +C could lead to a branch misprediction that would destroy any saving. +C For now a plain rep movs seems ok for P6. + +defframe(PARAM_SIZE,12) +defframe(PARAM_SRC, 8) +defframe(PARAM_DST, 4) +deflit(`FRAME',0) + + .text + ALIGN(32) + + C eax saved esi + C ebx + C ecx counter + C edx saved edi + C esi src + C edi dst + C ebp + +PROLOGUE(mpn_copyi) + + movl PARAM_SIZE, %ecx + movl %esi, %eax + + movl PARAM_SRC, %esi + movl %edi, %edx + + movl PARAM_DST, %edi + + cld C better safe than sorry, see mpn/x86/README.family + + rep + movsl + + movl %eax, %esi + movl %edx, %edi + + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/diveby3.asm b/rts/gmp/mpn/x86/diveby3.asm new file mode 100644 index 0000000000..df879da9e1 --- /dev/null +++ b/rts/gmp/mpn/x86/diveby3.asm @@ -0,0 +1,115 @@ +dnl x86 mpn_divexact_by3 -- mpn division by 3, expecting no remainder. + + +dnl Copyright (C) 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +dnl The following all have their own optimized versions of this routine, +dnl but for reference the code here runs as follows. +dnl +dnl cycles/limb +dnl P54 18.0 +dnl P55 17.0 +dnl P6 14.5 +dnl K6 14.0 +dnl K7 10.0 + + +include(`../config.m4') + + +C mp_limb_t mpn_divexact_by3c (mp_ptr dst, mp_srcptr src, mp_size_t size, +C mp_limb_t carry); + +defframe(PARAM_CARRY,16) +defframe(PARAM_SIZE, 12) +defframe(PARAM_SRC, 8) +defframe(PARAM_DST, 4) + +dnl multiplicative inverse of 3, modulo 2^32 +deflit(INVERSE_3, 0xAAAAAAAB) + +dnl ceil(b/3) and ceil(b*2/3) where b=2^32 +deflit(ONE_THIRD_CEIL, 0x55555556) +deflit(TWO_THIRDS_CEIL, 0xAAAAAAAB) + + .text + ALIGN(8) + +PROLOGUE(mpn_divexact_by3c) +deflit(`FRAME',0) + + movl PARAM_SRC, %ecx + pushl %ebp FRAME_pushl() + + movl PARAM_SIZE, %ebp + pushl %edi FRAME_pushl() + + movl PARAM_DST, %edi + pushl %esi FRAME_pushl() + + movl $INVERSE_3, %esi + pushl %ebx FRAME_pushl() + + leal (%ecx,%ebp,4), %ecx + movl PARAM_CARRY, %ebx + + leal (%edi,%ebp,4), %edi + negl %ebp + + + ALIGN(8) +L(top): + C eax scratch, low product + C ebx carry limb (0 to 3) + C ecx &src[size] + C edx scratch, high product + C esi multiplier + C edi &dst[size] + C ebp counter, limbs, negative + + movl (%ecx,%ebp,4), %eax + + subl %ebx, %eax + + setc %bl + + imull %esi + + cmpl $ONE_THIRD_CEIL, %eax + movl %eax, (%edi,%ebp,4) + + sbbl $-1, %ebx C +1 if eax>=ceil(b/3) + cmpl $TWO_THIRDS_CEIL, %eax + + sbbl $-1, %ebx C +1 if eax>=ceil(b*2/3) + incl %ebp + + jnz L(top) + + + movl %ebx, %eax + popl %ebx + popl %esi + popl %edi + popl %ebp + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/divrem_1.asm b/rts/gmp/mpn/x86/divrem_1.asm new file mode 100644 index 0000000000..12f14676d6 --- /dev/null +++ b/rts/gmp/mpn/x86/divrem_1.asm @@ -0,0 +1,232 @@ +dnl x86 mpn_divrem_1 -- mpn by limb division extending to fractional quotient. + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +dnl cycles/limb +dnl K6 20 +dnl P5 44 +dnl P6 39 +dnl 486 approx 43 maybe +dnl +dnl +dnl The following have their own optimized divrem_1 implementations, but +dnl for reference the code here runs as follows. +dnl +dnl cycles/limb +dnl P6MMX 39 +dnl K7 42 + + +include(`../config.m4') + + +C mp_limb_t mpn_divrem_1 (mp_ptr dst, mp_size_t xsize, +C mp_srcptr src, mp_size_t size, mp_limb_t divisor); +C mp_limb_t mpn_divrem_1c (mp_ptr dst, mp_size_t xsize, +C mp_srcptr src, mp_size_t size, mp_limb_t divisor); +C +C Divide src,size by divisor and store the quotient in dst+xsize,size. +C Extend the division to fractional quotient limbs in dst,xsize. Return the +C remainder. Either or both xsize and size can be 0. +C +C mpn_divrem_1c takes a carry parameter which is an initial high limb, +C effectively one extra limb at the top of src,size. Must have +C carry<divisor. +C +C +C Essentially the code is the same as the division based part of +C mpn/generic/divrem_1.c, but has the following advantages. +C +C - If gcc isn't being used then divrem_1.c will get the generic C +C udiv_qrnnd() and be rather slow. +C +C - On K6, using the loop instruction is a 10% speedup, but gcc doesn't +C generate that instruction (as of gcc 2.95.2 at least). +C +C A test is done to see if the high limb is less the the divisor, and if so +C one less div is done. A div is between 20 and 40 cycles on the various +C x86s, so assuming high<divisor about half the time, then this test saves +C half that amount. The branch misprediction penalty on each chip is less +C than half a div. +C +C +C K6: Back-to-back div instructions run at 20 cycles, the same as the loop +C here, so it seems there's nothing to gain by rearranging the loop. +C Pairing the mov and loop instructions was found to gain nothing. (The +C same is true of the mpn/x86/mod_1.asm loop.) +C +C With a "decl/jnz" rather than a "loop" this code runs at 22 cycles. +C The loop_or_decljnz macro is an easy way to get a 10% speedup. +C +C The fast K6 multiply might be thought to suit a multiply-by-inverse, +C but that algorithm has been found to suffer from the releatively poor +C carry handling on K6 and too many auxiliary instructions. The +C fractional part however could be done at about 13 c/l. +C +C P5: Moving the load down to pair with the store might save 1 cycle, but +C that doesn't seem worth bothering with, since it'd be only a 2.2% +C saving. +C +C Again here the auxiliary instructions hinder a multiply-by-inverse, +C though there might be a 10-15% speedup available + + +defframe(PARAM_CARRY, 24) +defframe(PARAM_DIVISOR,20) +defframe(PARAM_SIZE, 16) +defframe(PARAM_SRC, 12) +defframe(PARAM_XSIZE, 8) +defframe(PARAM_DST, 4) + + .text + ALIGN(16) + +PROLOGUE(mpn_divrem_1c) +deflit(`FRAME',0) + + movl PARAM_SIZE, %ecx + pushl %edi FRAME_pushl() + + movl PARAM_SRC, %edi + pushl %esi FRAME_pushl() + + movl PARAM_DIVISOR, %esi + pushl %ebx FRAME_pushl() + + movl PARAM_DST, %ebx + pushl %ebp FRAME_pushl() + + movl PARAM_XSIZE, %ebp + orl %ecx, %ecx + + movl PARAM_CARRY, %edx + jz LF(mpn_divrem_1,fraction) + + leal -4(%ebx,%ebp,4), %ebx C dst one limb below integer part + jmp LF(mpn_divrem_1,integer_top) + +EPILOGUE() + + +PROLOGUE(mpn_divrem_1) +deflit(`FRAME',0) + + movl PARAM_SIZE, %ecx + pushl %edi FRAME_pushl() + + movl PARAM_SRC, %edi + pushl %esi FRAME_pushl() + + movl PARAM_DIVISOR, %esi + orl %ecx,%ecx + + jz L(size_zero) + pushl %ebx FRAME_pushl() + + movl -4(%edi,%ecx,4), %eax C src high limb + xorl %edx, %edx + + movl PARAM_DST, %ebx + pushl %ebp FRAME_pushl() + + movl PARAM_XSIZE, %ebp + cmpl %esi, %eax + + leal -4(%ebx,%ebp,4), %ebx C dst one limb below integer part + jae L(integer_entry) + + + C high<divisor, so high of dst is zero, and avoid one div + + movl %edx, (%ebx,%ecx,4) + decl %ecx + + movl %eax, %edx + jz L(fraction) + + +L(integer_top): + C eax scratch (quotient) + C ebx dst+4*xsize-4 + C ecx counter + C edx scratch (remainder) + C esi divisor + C edi src + C ebp xsize + + movl -4(%edi,%ecx,4), %eax +L(integer_entry): + + divl %esi + + movl %eax, (%ebx,%ecx,4) + loop_or_decljnz L(integer_top) + + +L(fraction): + orl %ebp, %ecx + jz L(done) + + movl PARAM_DST, %ebx + + +L(fraction_top): + C eax scratch (quotient) + C ebx dst + C ecx counter + C edx scratch (remainder) + C esi divisor + C edi + C ebp + + xorl %eax, %eax + + divl %esi + + movl %eax, -4(%ebx,%ecx,4) + loop_or_decljnz L(fraction_top) + + +L(done): + popl %ebp + movl %edx, %eax + popl %ebx + popl %esi + popl %edi + ret + + +L(size_zero): +deflit(`FRAME',8) + movl PARAM_XSIZE, %ecx + xorl %eax, %eax + + movl PARAM_DST, %edi + + cld C better safe than sorry, see mpn/x86/README.family + + rep + stosl + + popl %esi + popl %edi + ret +EPILOGUE() diff --git a/rts/gmp/mpn/x86/k6/README b/rts/gmp/mpn/x86/k6/README new file mode 100644 index 0000000000..3ad96c8b89 --- /dev/null +++ b/rts/gmp/mpn/x86/k6/README @@ -0,0 +1,237 @@ + + AMD K6 MPN SUBROUTINES + + + +This directory contains code optimized for AMD K6 CPUs, meaning K6, K6-2 and +K6-3. + +The mmx and k62mmx subdirectories have routines using MMX instructions. All +K6s have MMX, the separate directories are just so that ./configure can omit +them if the assembler doesn't support MMX. + + + + +STATUS + +Times for the loops, with all code and data in L1 cache, are as follows. + + cycles/limb + + mpn_add_n/sub_n 3.25 normal, 2.75 in-place + + mpn_mul_1 6.25 + mpn_add/submul_1 7.65-8.4 (varying with data values) + + mpn_mul_basecase 9.25 cycles/crossproduct (approx) + mpn_sqr_basecase 4.7 cycles/crossproduct (approx) + or 9.2 cycles/triangleproduct (approx) + + mpn_divrem_1 20.0 + mpn_mod_1 20.0 + mpn_divexact_by3 11.0 + + mpn_l/rshift 3.0 + + mpn_copyi/copyd 1.0 + + mpn_com_n 1.5-1.85 \ + mpn_and/andn/ior/xor_n 1.5-1.75 | varying with + mpn_iorn/xnor_n 2.0-2.25 | data alignment + mpn_nand/nior_n 2.0-2.25 / + + mpn_popcount 12.5 + mpn_hamdist 13.0 + + +K6-2 and K6-3 have dual-issue MMX and get the following improvements. + + mpn_l/rshift 1.75 + + mpn_copyi/copyd 0.56 or 1.0 \ + | + mpn_com_n 1.0-1.2 | varying with + mpn_and/andn/ior/xor_n 1.2-1.5 | data alignment + mpn_iorn/xnor_n 1.5-2.0 | + mpn_nand/nior_n 1.75-2.0 / + + mpn_popcount 9.0 + mpn_hamdist 11.5 + + +Prefetching of sources hasn't yet given any joy. With the 3DNow "prefetch" +instruction, code seems to run slower, and with just "mov" loads it doesn't +seem faster. Results so far are inconsistent. The K6 does a hardware +prefetch of the second cache line in a sector, so the penalty for not +prefetching in software is reduced. + + + + +NOTES + +All K6 family chips have MMX, but only K6-2 and K6-3 have 3DNow. + +Plain K6 executes MMX instructions only in the X pipe, but K6-2 and K6-3 can +execute them in both X and Y (and together). + +Branch misprediction penalty is 1 to 4 cycles (Optimization Manual +chapter 6 table 12). + +Write-allocate L1 data cache means prefetching of destinations is unnecessary. +Store queue is 7 entries of 64 bits each. + +Floating point multiplications can be done in parallel with integer +multiplications, but there doesn't seem to be any way to make use of this. + + + +OPTIMIZATIONS + +Unrolled loops are used to reduce looping overhead. The unrolling is +configurable up to 32 limbs/loop for most routines, up to 64 for some. + +Sometimes computed jumps into the unrolling are used to handle sizes not a +multiple of the unrolling. An attractive feature of this is that times +smoothly increase with operand size, but an indirect jump is about 6 cycles +and the setups about another 6, so it depends on how much the unrolled code +is faster than a simple loop as to whether a computed jump ought to be used. + +Position independent code is implemented using a call to get eip for +computed jumps and a ret is always done, rather than an addl $4,%esp or a +popl, so the CPU return address branch prediction stack stays synchronised +with the actual stack in memory. Such a call however still costs 4 to 7 +cycles. + +Branch prediction, in absence of any history, will guess forward jumps are +not taken and backward jumps are taken. Where possible it's arranged that +the less likely or less important case is under a taken forward jump. + + + +MMX + +Putting emms or femms as late as possible in a routine seems to be fastest. +Perhaps an emms or femms stalls until all outstanding MMX instructions have +completed, so putting it later gives them a chance to complete on their own, +in parallel with other operations (like register popping). + +The Optimization Manual chapter 5 recommends using a femms on K6-2 and K6-3 +at the start of a routine, in case it's been preceded by x87 floating point +operations. This isn't done because in gmp programs it's expected that x87 +floating point won't be much used and that chances are an mpn routine won't +have been preceded by any x87 code. + + + +CODING + +Instructions in general code are shown paired if they can decode and execute +together, meaning two short decode instructions with the second not +depending on the first, only the first using the shifter, no more than one +load, and no more than one store. + +K6 does some out of order execution so the pairings aren't essential, they +just show what slots might be available. When decoding is the limiting +factor things can be scheduled that might not execute until later. + + + +NOTES + +Code alignment + +- if an opcode/modrm or 0Fh/opcode/modrm crosses a cache line boundary, + short decode is inhibited. The cross.pl script detects this. + +- loops and branch targets should be aligned to 16 bytes, or ensure at least + 2 instructions before a 32 byte boundary. This makes use of the 16 byte + cache in the BTB. + +Addressing modes + +- (%esi) degrades decoding from short to vector. 0(%esi) doesn't have this + problem, and can be used as an equivalent, or easier is just to use a + different register, like %ebx. + +- K6 and pre-CXT core K6-2 have the following problem. (K6-2 CXT and K6-3 + have it fixed, these being cpuid function 1 signatures 0x588 to 0x58F). + + If more than 3 bytes are needed to determine instruction length then + decoding degrades from direct to long, or from long to vector. This + happens with forms like "0F opcode mod/rm" with mod/rm=00-xxx-100 since + with mod=00 the sib determines whether there's a displacement. + + This affects all MMX and 3DNow instructions, and others with an 0F prefix + like movzbl. The modes affected are anything with an index and no + displacement, or an index but no base, and this includes (%esp) which is + really (,%esp,1). + + The cross.pl script detects problem cases. The workaround is to always + use a displacement, and to do this with Zdisp if it's zero so the + assembler doesn't discard it. + + See Optimization Manual rev D page 67 and 3DNow Porting Guide rev B pages + 13-14 and 36-37. + +Calls + +- indirect jumps and calls are not branch predicted, they measure about 6 + cycles. + +Various + +- adcl 2 cycles of decode, maybe 2 cycles executing in the X pipe +- bsf 12-27 cycles +- emms 5 cycles +- femms 3 cycles +- jecxz 2 cycles taken, 13 not taken (optimization manual says 7 not taken) +- divl 20 cycles back-to-back +- imull 2 decode, 2 execute +- mull 2 decode, 3 execute (optimization manual decoding sample) +- prefetch 2 cycles +- rcll/rcrl implicit by one bit: 2 cycles + immediate or %cl count: 11 + 2 per bit for dword + 13 + 4 per bit for byte +- setCC 2 cycles +- xchgl %eax,reg 1.5 cycles, back-to-back (strange) + reg,reg 2 cycles, back-to-back + + + + +REFERENCES + +"AMD-K6 Processor Code Optimization Application Note", AMD publication +number 21924, revision D amendment 0, January 2000. This describes K6-2 and +K6-3. Available on-line, + + http://www.amd.com/K6/k6docs/pdf/21924.pdf + +"AMD-K6 MMX Enhanced Processor x86 Code Optimization Application Note", AMD +publication number 21828, revision A amendment 0, August 1997. This is an +older edition of the above document, describing plain K6. Available +on-line, + + http://www.amd.com/K6/k6docs/pdf/21828.pdf + +"3DNow Technology Manual", AMD publication number 21928F/0-August 1999. +This describes the femms and prefetch instructions, but nothing else from +3DNow has been used. Available on-line, + + http://www.amd.com/K6/k6docs/pdf/21928.pdf + +"3DNow Instruction Porting Guide", AMD publication number 22621, revision B, +August 1999. This has some notes on general K6 optimizations as well as +3DNow. Available on-line, + + http://www.amd.com/products/cpg/athlon/techdocs/pdf/22621.pdf + + + +---------------- +Local variables: +mode: text +fill-column: 76 +End: diff --git a/rts/gmp/mpn/x86/k6/aors_n.asm b/rts/gmp/mpn/x86/k6/aors_n.asm new file mode 100644 index 0000000000..31b05ada51 --- /dev/null +++ b/rts/gmp/mpn/x86/k6/aors_n.asm @@ -0,0 +1,329 @@ +dnl AMD K6 mpn_add/sub_n -- mpn addition or subtraction. +dnl +dnl K6: normal 3.25 cycles/limb, in-place 2.75 cycles/limb. + + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +ifdef(`OPERATION_add_n', ` + define(M4_inst, adcl) + define(M4_function_n, mpn_add_n) + define(M4_function_nc, mpn_add_nc) + define(M4_description, add) +',`ifdef(`OPERATION_sub_n', ` + define(M4_inst, sbbl) + define(M4_function_n, mpn_sub_n) + define(M4_function_nc, mpn_sub_nc) + define(M4_description, subtract) +',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n +')')') + +MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc) + + +C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, +C mp_size_t size); +C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, +C mp_size_t size, mp_limb_t carry); +C +C Calculate src1,size M4_description src2,size, and store the result in +C dst,size. The return value is the carry bit from the top of the result +C (1 or 0). +C +C The _nc version accepts 1 or 0 for an initial carry into the low limb of +C the calculation. Note values other than 1 or 0 here will lead to garbage +C results. +C +C Instruction decoding limits a normal dst=src1+src2 operation to 3 c/l, and +C an in-place dst+=src to 2.5 c/l. The unrolled loops have 1 cycle/loop of +C loop control, which with 4 limbs/loop means an extra 0.25 c/l. + +define(PARAM_CARRY, `FRAME+20(%esp)') +define(PARAM_SIZE, `FRAME+16(%esp)') +define(PARAM_SRC2, `FRAME+12(%esp)') +define(PARAM_SRC1, `FRAME+8(%esp)') +define(PARAM_DST, `FRAME+4(%esp)') +deflit(`FRAME',0) + +dnl minimum 5 because the unrolled code can't handle less +deflit(UNROLL_THRESHOLD, 5) + + .text + ALIGN(32) + +PROLOGUE(M4_function_nc) + movl PARAM_CARRY, %eax + jmp LF(M4_function_n,start) +EPILOGUE() + + +PROLOGUE(M4_function_n) + xorl %eax, %eax +L(start): + movl PARAM_SIZE, %ecx + pushl %ebx +FRAME_pushl() + + movl PARAM_SRC1, %ebx + pushl %edi +FRAME_pushl() + + movl PARAM_SRC2, %edx + cmpl $UNROLL_THRESHOLD, %ecx + + movl PARAM_DST, %edi + jae L(unroll) + + + shrl %eax C initial carry flag + + C offset 0x21 here, close enough to aligned +L(simple): + C eax scratch + C ebx src1 + C ecx counter + C edx src2 + C esi + C edi dst + C ebp + C + C The store to (%edi) could be done with a stosl; it'd be smaller + C code, but there's no speed gain and a cld would have to be added + C (per mpn/x86/README.family). + + movl (%ebx), %eax + leal 4(%ebx), %ebx + + M4_inst (%edx), %eax + + movl %eax, (%edi) + leal 4(%edi), %edi + + leal 4(%edx), %edx + loop L(simple) + + + movl $0, %eax + popl %edi + + setc %al + + popl %ebx + ret + + +C ----------------------------------------------------------------------------- +L(unroll): + C eax carry + C ebx src1 + C ecx counter + C edx src2 + C esi + C edi dst + C ebp + + cmpl %edi, %ebx + pushl %esi + + je L(inplace) + +ifdef(`OPERATION_add_n',` + cmpl %edi, %edx + + je L(inplace_reverse) +') + + movl %ecx, %esi + + andl $-4, %ecx + andl $3, %esi + + leal (%ebx,%ecx,4), %ebx + leal (%edx,%ecx,4), %edx + leal (%edi,%ecx,4), %edi + + negl %ecx + shrl %eax + + ALIGN(32) +L(normal_top): + C eax counter, qwords, negative + C ebx src1 + C ecx scratch + C edx src2 + C esi + C edi dst + C ebp + + movl (%ebx,%ecx,4), %eax + leal 5(%ecx), %ecx + M4_inst -20(%edx,%ecx,4), %eax + movl %eax, -20(%edi,%ecx,4) + + movl 4-20(%ebx,%ecx,4), %eax + M4_inst 4-20(%edx,%ecx,4), %eax + movl %eax, 4-20(%edi,%ecx,4) + + movl 8-20(%ebx,%ecx,4), %eax + M4_inst 8-20(%edx,%ecx,4), %eax + movl %eax, 8-20(%edi,%ecx,4) + + movl 12-20(%ebx,%ecx,4), %eax + M4_inst 12-20(%edx,%ecx,4), %eax + movl %eax, 12-20(%edi,%ecx,4) + + loop L(normal_top) + + + decl %esi + jz L(normal_finish_one) + js L(normal_done) + + C two or three more limbs + + movl (%ebx), %eax + M4_inst (%edx), %eax + movl %eax, (%edi) + + movl 4(%ebx), %eax + M4_inst 4(%edx), %eax + decl %esi + movl %eax, 4(%edi) + + jz L(normal_done) + movl $2, %ecx + +L(normal_finish_one): + movl (%ebx,%ecx,4), %eax + M4_inst (%edx,%ecx,4), %eax + movl %eax, (%edi,%ecx,4) + +L(normal_done): + popl %esi + popl %edi + + movl $0, %eax + popl %ebx + + setc %al + + ret + + +C ----------------------------------------------------------------------------- + +ifdef(`OPERATION_add_n',` +L(inplace_reverse): + C dst==src2 + + movl %ebx, %edx +') + +L(inplace): + C eax initial carry + C ebx + C ecx size + C edx src + C esi + C edi dst + C ebp + + leal -1(%ecx), %esi + decl %ecx + + andl $-4, %ecx + andl $3, %esi + + movl (%edx), %ebx C src low limb + leal (%edx,%ecx,4), %edx + + leal (%edi,%ecx,4), %edi + negl %ecx + + shrl %eax + + + ALIGN(32) +L(inplace_top): + C eax + C ebx next src limb + C ecx size + C edx src + C esi + C edi dst + C ebp + + M4_inst %ebx, (%edi,%ecx,4) + + movl 4(%edx,%ecx,4), %eax + leal 5(%ecx), %ecx + + M4_inst %eax, 4-20(%edi,%ecx,4) + + movl 8-20(%edx,%ecx,4), %eax + movl 12-20(%edx,%ecx,4), %ebx + + M4_inst %eax, 8-20(%edi,%ecx,4) + M4_inst %ebx, 12-20(%edi,%ecx,4) + + movl 16-20(%edx,%ecx,4), %ebx + loop L(inplace_top) + + + C now %esi is 0 to 3 representing respectively 1 to 4 limbs more + + M4_inst %ebx, (%edi) + + decl %esi + jz L(inplace_finish_one) + js L(inplace_done) + + C two or three more limbs + + movl 4(%edx), %eax + movl 8(%edx), %ebx + M4_inst %eax, 4(%edi) + M4_inst %ebx, 8(%edi) + + decl %esi + movl $2, %ecx + + jz L(normal_done) + +L(inplace_finish_one): + movl 4(%edx,%ecx,4), %eax + M4_inst %eax, 4(%edi,%ecx,4) + +L(inplace_done): + popl %esi + popl %edi + + movl $0, %eax + popl %ebx + + setc %al + + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/k6/aorsmul_1.asm b/rts/gmp/mpn/x86/k6/aorsmul_1.asm new file mode 100644 index 0000000000..da4120fe2f --- /dev/null +++ b/rts/gmp/mpn/x86/k6/aorsmul_1.asm @@ -0,0 +1,372 @@ +dnl AMD K6 mpn_addmul_1/mpn_submul_1 -- add or subtract mpn multiple. +dnl +dnl K6: 7.65 to 8.5 cycles/limb (at 16 limbs/loop and depending on the data), +dnl PIC adds about 6 cycles at the start. + + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +dnl K6: large multpliers small multpliers +dnl UNROLL_COUNT cycles/limb cycles/limb +dnl 4 9.5 7.78 +dnl 8 9.0 7.78 +dnl 16 8.4 7.65 +dnl 32 8.4 8.2 +dnl +dnl Maximum possible unrolling with the current code is 32. +dnl +dnl Unrolling to 16 limbs/loop makes the unrolled loop fit exactly in a 256 +dnl byte block, which might explain the good speed at that unrolling. + +deflit(UNROLL_COUNT, 16) + + +ifdef(`OPERATION_addmul_1', ` + define(M4_inst, addl) + define(M4_function_1, mpn_addmul_1) + define(M4_function_1c, mpn_addmul_1c) + define(M4_description, add it to) + define(M4_desc_retval, carry) +',`ifdef(`OPERATION_submul_1', ` + define(M4_inst, subl) + define(M4_function_1, mpn_submul_1) + define(M4_function_1c, mpn_submul_1c) + define(M4_description, subtract it from) + define(M4_desc_retval, borrow) +',`m4_error(`Need OPERATION_addmul_1 or OPERATION_submul_1 +')')') + +MULFUNC_PROLOGUE(mpn_addmul_1 mpn_addmul_1c mpn_submul_1 mpn_submul_1c) + + +C mp_limb_t M4_function_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, +C mp_limb_t mult); +C mp_limb_t M4_function_1c (mp_ptr dst, mp_srcptr src, mp_size_t size, +C mp_limb_t mult, mp_limb_t carry); +C +C Calculate src,size multiplied by mult and M4_description dst,size. +C Return the M4_desc_retval limb from the top of the result. +C +C The jadcl0()s in the unrolled loop makes the speed data dependent. Small +C multipliers (most significant few bits clear) result in few carry bits and +C speeds up to 7.65 cycles/limb are attained. Large multipliers (most +C significant few bits set) make the carry bits 50/50 and lead to something +C more like 8.4 c/l. (With adcl's both of these would be 9.3 c/l.) +C +C It's important that the gains for jadcl0 on small multipliers don't come +C at the cost of slowing down other data. Tests on uniformly distributed +C random data, designed to confound branch prediction, show about a 7% +C speed-up using jadcl0 over adcl (8.93 versus 9.57 cycles/limb, with all +C overheads included). +C +C In the simple loop, jadcl0() measures slower than adcl (11.9-14.7 versus +C 11.0 cycles/limb), and hence isn't used. +C +C In the simple loop, note that running ecx from negative to zero and using +C it as an index in the two movs wouldn't help. It would save one +C instruction (2*addl+loop becoming incl+jnz), but there's nothing unpaired +C that would be collapsed by this. +C +C +C jadcl0 +C ------ +C +C jadcl0() being faster than adcl $0 seems to be an artifact of two things, +C firstly the instruction decoding and secondly the fact that there's a +C carry bit for the jadcl0 only on average about 1/4 of the time. +C +C The code in the unrolled loop decodes something like the following. +C +C decode cycles +C mull %ebp 2 +C M4_inst %esi, disp(%edi) 1 +C adcl %eax, %ecx 2 +C movl %edx, %esi \ 1 +C jnc 1f / +C incl %esi \ 1 +C 1: movl disp(%ebx), %eax / +C --- +C 7 +C +C In a back-to-back style test this measures 7 with the jnc not taken, or 8 +C with it taken (both when correctly predicted). This is opposite to the +C measurements showing small multipliers running faster than large ones. +C Watch this space for more info ... +C +C It's not clear how much branch misprediction might be costing. The K6 +C doco says it will be 1 to 4 cycles, but presumably it's near the low end +C of that range to get the measured results. +C +C +C In the code the two carries are more or less the preceding mul product and +C the calculation is roughly +C +C x*y + u*b+v +C +C where b=2^32 is the size of a limb, x*y is the two carry limbs, and u and +C v are the two limbs it's added to (being the low of the next mul, and a +C limb from the destination). +C +C To get a carry requires x*y+u*b+v >= b^2, which is u*b+v >= b^2-x*y, and +C there are b^2-(b^2-x*y) = x*y many such values, giving a probability of +C x*y/b^2. If x, y, u and v are random and uniformly distributed between 0 +C and b-1, then the total probability can be summed over x and y, +C +C 1 b-1 b-1 x*y 1 b*(b-1) b*(b-1) +C --- * sum sum --- = --- * ------- * ------- = 1/4 +C b^2 x=0 y=1 b^2 b^4 2 2 +C +C Actually it's a very tiny bit less than 1/4 of course. If y is fixed, +C then the probability is 1/2*y/b thus varying linearly between 0 and 1/2. + + +ifdef(`PIC',` +deflit(UNROLL_THRESHOLD, 9) +',` +deflit(UNROLL_THRESHOLD, 6) +') + +defframe(PARAM_CARRY, 20) +defframe(PARAM_MULTIPLIER,16) +defframe(PARAM_SIZE, 12) +defframe(PARAM_SRC, 8) +defframe(PARAM_DST, 4) + + .text + ALIGN(32) + +PROLOGUE(M4_function_1c) + pushl %esi +deflit(`FRAME',4) + movl PARAM_CARRY, %esi + jmp LF(M4_function_1,start_nc) +EPILOGUE() + +PROLOGUE(M4_function_1) + push %esi +deflit(`FRAME',4) + xorl %esi, %esi C initial carry + +L(start_nc): + movl PARAM_SIZE, %ecx + pushl %ebx +deflit(`FRAME',8) + + movl PARAM_SRC, %ebx + pushl %edi +deflit(`FRAME',12) + + cmpl $UNROLL_THRESHOLD, %ecx + movl PARAM_DST, %edi + + pushl %ebp +deflit(`FRAME',16) + jae L(unroll) + + + C simple loop + + movl PARAM_MULTIPLIER, %ebp + +L(simple): + C eax scratch + C ebx src + C ecx counter + C edx scratch + C esi carry + C edi dst + C ebp multiplier + + movl (%ebx), %eax + addl $4, %ebx + + mull %ebp + + addl $4, %edi + addl %esi, %eax + + adcl $0, %edx + + M4_inst %eax, -4(%edi) + + adcl $0, %edx + + movl %edx, %esi + loop L(simple) + + + popl %ebp + popl %edi + + popl %ebx + movl %esi, %eax + + popl %esi + ret + + + +C ----------------------------------------------------------------------------- +C The unrolled loop uses a "two carry limbs" scheme. At the top of the loop +C the carries are ecx=lo, esi=hi, then they swap for each limb processed. +C For the computed jump an odd size means they start one way around, an even +C size the other. +C +C VAR_JUMP holds the computed jump temporarily because there's not enough +C registers at the point of doing the mul for the initial two carry limbs. +C +C The add/adc for the initial carry in %esi is necessary only for the +C mpn_addmul/submul_1c entry points. Duplicating the startup code to +C eliminiate this for the plain mpn_add/submul_1 doesn't seem like a good +C idea. + +dnl overlapping with parameters already fetched +define(VAR_COUNTER, `PARAM_SIZE') +define(VAR_JUMP, `PARAM_DST') + +L(unroll): + C eax + C ebx src + C ecx size + C edx + C esi initial carry + C edi dst + C ebp + + movl %ecx, %edx + decl %ecx + + subl $2, %edx + negl %ecx + + shrl $UNROLL_LOG2, %edx + andl $UNROLL_MASK, %ecx + + movl %edx, VAR_COUNTER + movl %ecx, %edx + + shll $4, %edx + negl %ecx + + C 15 code bytes per limb +ifdef(`PIC',` + call L(pic_calc) +L(here): +',` + leal L(entry) (%edx,%ecx,1), %edx +') + movl (%ebx), %eax C src low limb + + movl PARAM_MULTIPLIER, %ebp + movl %edx, VAR_JUMP + + mull %ebp + + addl %esi, %eax C initial carry (from _1c) + jadcl0( %edx) + + + leal 4(%ebx,%ecx,4), %ebx + movl %edx, %esi C high carry + + movl VAR_JUMP, %edx + leal (%edi,%ecx,4), %edi + + testl $1, %ecx + movl %eax, %ecx C low carry + + jz L(noswap) + movl %esi, %ecx C high,low carry other way around + + movl %eax, %esi +L(noswap): + + jmp *%edx + + +ifdef(`PIC',` +L(pic_calc): + C See README.family about old gas bugs + leal (%edx,%ecx,1), %edx + addl $L(entry)-L(here), %edx + addl (%esp), %edx + ret +') + + +C ----------------------------------------------------------- + ALIGN(32) +L(top): +deflit(`FRAME',16) + C eax scratch + C ebx src + C ecx carry lo + C edx scratch + C esi carry hi + C edi dst + C ebp multiplier + C + C 15 code bytes per limb + + leal UNROLL_BYTES(%edi), %edi + +L(entry): +forloop(`i', 0, UNROLL_COUNT/2-1, ` + deflit(`disp0', eval(2*i*4)) + deflit(`disp1', eval(disp0 + 4)) + +Zdisp( movl, disp0,(%ebx), %eax) + mull %ebp +Zdisp( M4_inst,%ecx, disp0,(%edi)) + adcl %eax, %esi + movl %edx, %ecx + jadcl0( %ecx) + + movl disp1(%ebx), %eax + mull %ebp + M4_inst %esi, disp1(%edi) + adcl %eax, %ecx + movl %edx, %esi + jadcl0( %esi) +') + + decl VAR_COUNTER + leal UNROLL_BYTES(%ebx), %ebx + + jns L(top) + + + popl %ebp + M4_inst %ecx, UNROLL_BYTES(%edi) + + popl %edi + movl %esi, %eax + + popl %ebx + jadcl0( %eax) + + popl %esi + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/k6/cross.pl b/rts/gmp/mpn/x86/k6/cross.pl new file mode 100644 index 0000000000..21734f3e52 --- /dev/null +++ b/rts/gmp/mpn/x86/k6/cross.pl @@ -0,0 +1,141 @@ +#! /usr/bin/perl + +# Copyright (C) 2000 Free Software Foundation, Inc. +# +# This file is part of the GNU MP Library. +# +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation; either version 2.1 of the License, or (at +# your option) any later version. +# +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# Usage: cross.pl [filename.o]... +# +# Produce an annotated disassembly of the given object files, indicating +# certain code alignment and addressing mode problems afflicting K6 chips. +# "ZZ" is used on all annotations, so this can be searched for. +# +# With no arguments, all .o files corresponding to .asm files are processed. +# This is good in the mpn object directory of a k6*-*-* build. +# +# As far as fixing problems goes, any cache line crossing problems in loops +# get attention, but as a rule it's too tedious to rearrange code or slip in +# nops to fix every problem in setup or finishup code. +# +# Bugs: +# +# Instructions without mod/rm bytes or which are already vector decoded are +# unaffected by cache line boundary crossing, but not all of these have yet +# been put in as exceptions. All that occur in practice in GMP are present +# though. +# +# There's no messages for using the vector decoded addressing mode (%esi), +# but that mode is easy to avoid when coding. + +use strict; + +sub disassemble { + my ($file) = @_; + my ($addr,$b1,$b2,$b3, $prefix,$opcode,$modrm); + + open (IN, "objdump -Srfh $file |") + || die "Cannot open pipe from objdump\n"; + while (<IN>) { + print; + + if (/^[ \t]*[0-9]+[ \t]+\.text[ \t]/ && /2\*\*([0-9]+)$/) { + if ($1 < 5) { + print "ZZ need at least 2**5 for predictable cache line crossing\n"; + } + } + + if (/^[ \t]*([0-9a-f]*):[ \t]*([0-9a-f]+)[ \t]+([0-9a-f]+)[ \t]+([0-9a-f]+)/) { + ($addr,$b1,$b2,$b3) = ($1,$2,$3,$4); + + } elsif (/^[ \t]*([0-9a-f]*):[ \t]*([0-9a-f]+)[ \t]+([0-9a-f]+)/) { + ($addr,$b1,$b2,$b3) = ($1,$2,$3,''); + + } elsif (/^[ \t]*([0-9a-f]*):[ \t]*([0-9a-f]+)/) { + ($addr,$b1,$b2,$b3) = ($1,$2,'',''); + + } else { + next; + } + + if ($b1 =~ /0f/) { + $prefix = $b1; + $opcode = $b2; + $modrm = $b3; + } else { + $prefix = ''; + $opcode = $b1; + $modrm = $b2; + } + + # modrm of the form 00-xxx-100 with an 0F prefix is the problem case + # for K6 and pre-CXT K6-2 + if ($prefix =~ /0f/ + && $opcode !~ /^8/ # jcond disp32 + && $modrm =~ /^[0-3][4c]/) { + print "ZZ ($file) >3 bytes to determine instruction length\n"; + } + + # with just an opcode, starting 1f mod 20h + if ($addr =~ /[13579bdf]f$/ + && $prefix !~ /0f/ + && $opcode !~ /1[012345]/ # adc + && $opcode !~ /1[89abcd]/ # sbb + && $opcode !~ /68/ # push $imm32 + && $opcode !~ /^7/ # jcond disp8 + && $opcode !~ /a[89]/ # test+imm + && $opcode !~ /a[a-f]/ # stos/lods/scas + && $opcode !~ /b8/ # movl $imm32,%eax + && $opcode !~ /e[0123]/ # loop/loopz/loopnz/jcxz + && $opcode !~ /e[b9]/ # jmp disp8/disp32 + && $opcode !~ /f[89abcd]/ # clc,stc,cli,sti,cld,std + && !($opcode =~ /f[67]/ # grp 1 + && $modrm =~ /^[2367abef]/) # mul, imul, div, idiv + && $modrm !~ /^$/) { + print "ZZ ($file) opcode/modrm cross 32-byte boundary\n"; + } + + # with an 0F prefix, anything starting at 1f mod 20h + if ($addr =~ /[13579bdf][f]$/ + && $prefix =~ /0f/) { + print "ZZ ($file) prefix/opcode cross 32-byte boundary\n"; + } + + # with an 0F prefix, anything with mod/rm starting at 1e mod 20h + if ($addr =~ /[13579bdf][e]$/ + && $prefix =~ /0f/ + && $opcode !~ /^8/ # jcond disp32 + && $modrm !~ /^$/) { + print "ZZ ($file) prefix/opcode/modrm cross 32-byte boundary\n"; + } + } + close IN || die "Error from objdump (or objdump not available)\n"; +} + + +my @files; +if ($#ARGV >= 0) { + @files = @ARGV; +} else { + @files = glob "*.asm"; + map {s/.asm/.o/} @files; +} + +foreach (@files) { + disassemble($_); +} diff --git a/rts/gmp/mpn/x86/k6/diveby3.asm b/rts/gmp/mpn/x86/k6/diveby3.asm new file mode 100644 index 0000000000..ffb97bc380 --- /dev/null +++ b/rts/gmp/mpn/x86/k6/diveby3.asm @@ -0,0 +1,110 @@ +dnl AMD K6 mpn_divexact_by3 -- mpn division by 3, expecting no remainder. +dnl +dnl K6: 11.0 cycles/limb + + +dnl Copyright (C) 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +C mp_limb_t mpn_divexact_by3c (mp_ptr dst, mp_srcptr src, mp_size_t size, +C mp_limb_t carry); +C +C Using %esi in (%esi,%ecx,4) or 0(%esi,%ecx,4) addressing modes doesn't +C lead to vector decoding, unlike plain (%esi) does. + +defframe(PARAM_CARRY,16) +defframe(PARAM_SIZE, 12) +defframe(PARAM_SRC, 8) +defframe(PARAM_DST, 4) + +dnl multiplicative inverse of 3, modulo 2^32 +deflit(INVERSE_3, 0xAAAAAAAB) + + .text + ALIGN(32) + +PROLOGUE(mpn_divexact_by3c) +deflit(`FRAME',0) + + movl PARAM_SIZE, %ecx + pushl %esi defframe_pushl(SAVE_ESI) + + movl PARAM_SRC, %esi + pushl %edi defframe_pushl(SAVE_EDI) + + movl PARAM_DST, %edi + pushl %ebx defframe_pushl(SAVE_EBX) + + movl PARAM_CARRY, %ebx + leal (%esi,%ecx,4), %esi + + pushl $3 defframe_pushl(VAR_THREE) + leal (%edi,%ecx,4), %edi + + negl %ecx + + + C Need 32 alignment for claimed speed, to avoid the movl store + C opcode/modrm crossing a cache line boundary + + ALIGN(32) +L(top): + C eax scratch, low product + C ebx carry limb (0 to 3) + C ecx counter, limbs, negative + C edx scratch, high product + C esi &src[size] + C edi &dst[size] + C ebp + C + C The 0(%esi,%ecx,4) form pads so the finishup "movl %ebx, %eax" + C doesn't cross a 32 byte boundary, saving a couple of cycles + C (that's a fixed couple, not per loop). + +Zdisp( movl, 0,(%esi,%ecx,4), %eax) + subl %ebx, %eax + + setc %bl + + imull $INVERSE_3, %eax + + movl %eax, (%edi,%ecx,4) + addl $2, %ecx + + mull VAR_THREE + + addl %edx, %ebx + loop L(top) + + + movl SAVE_ESI, %esi + movl %ebx, %eax + + movl SAVE_EBX, %ebx + + movl SAVE_EDI, %edi + addl $FRAME, %esp + + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/k6/gmp-mparam.h b/rts/gmp/mpn/x86/k6/gmp-mparam.h new file mode 100644 index 0000000000..77f3948d77 --- /dev/null +++ b/rts/gmp/mpn/x86/k6/gmp-mparam.h @@ -0,0 +1,97 @@ +/* AMD K6 gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright (C) 1991, 1993, 1994, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#define BITS_PER_MP_LIMB 32 +#define BYTES_PER_MP_LIMB 4 +#define BITS_PER_LONGINT 32 +#define BITS_PER_INT 32 +#define BITS_PER_SHORTINT 16 +#define BITS_PER_CHAR 8 + + +#ifndef UMUL_TIME +#define UMUL_TIME 3 /* cycles */ +#endif + +#ifndef UDIV_TIME +#define UDIV_TIME 20 /* cycles */ +#endif + +/* bsfl takes 12-27 cycles, put an average for uniform random numbers */ +#ifndef COUNT_TRAILING_ZEROS_TIME +#define COUNT_TRAILING_ZEROS_TIME 14 /* cycles */ +#endif + + +/* Generated by tuneup.c, 2000-07-04. */ + +#ifndef KARATSUBA_MUL_THRESHOLD +#define KARATSUBA_MUL_THRESHOLD 18 +#endif +#ifndef TOOM3_MUL_THRESHOLD +#define TOOM3_MUL_THRESHOLD 130 +#endif + +#ifndef KARATSUBA_SQR_THRESHOLD +#define KARATSUBA_SQR_THRESHOLD 34 +#endif +#ifndef TOOM3_SQR_THRESHOLD +#define TOOM3_SQR_THRESHOLD 116 +#endif + +#ifndef BZ_THRESHOLD +#define BZ_THRESHOLD 68 +#endif + +#ifndef FIB_THRESHOLD +#define FIB_THRESHOLD 98 +#endif + +#ifndef POWM_THRESHOLD +#define POWM_THRESHOLD 13 +#endif + +#ifndef GCD_ACCEL_THRESHOLD +#define GCD_ACCEL_THRESHOLD 4 +#endif +#ifndef GCDEXT_THRESHOLD +#define GCDEXT_THRESHOLD 67 +#endif + +#ifndef FFT_MUL_TABLE +#define FFT_MUL_TABLE { 528, 1184, 2176, 5632, 14336, 40960, 0 } +#endif +#ifndef FFT_MODF_MUL_THRESHOLD +#define FFT_MODF_MUL_THRESHOLD 472 +#endif +#ifndef FFT_MUL_THRESHOLD +#define FFT_MUL_THRESHOLD 4352 +#endif + +#ifndef FFT_SQR_TABLE +#define FFT_SQR_TABLE { 528, 1184, 2176, 5632, 14336, 40960, 0 } +#endif +#ifndef FFT_MODF_SQR_THRESHOLD +#define FFT_MODF_SQR_THRESHOLD 544 +#endif +#ifndef FFT_SQR_THRESHOLD +#define FFT_SQR_THRESHOLD 4352 +#endif diff --git a/rts/gmp/mpn/x86/k6/k62mmx/copyd.asm b/rts/gmp/mpn/x86/k6/k62mmx/copyd.asm new file mode 100644 index 0000000000..20a33e6ccf --- /dev/null +++ b/rts/gmp/mpn/x86/k6/k62mmx/copyd.asm @@ -0,0 +1,179 @@ +dnl AMD K6-2 mpn_copyd -- copy limb vector, decrementing. +dnl +dnl K6-2: 0.56 or 1.0 cycles/limb (at 32 limbs/loop), depending on data +dnl alignment. + + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +dnl K6-2 aligned: +dnl UNROLL_COUNT cycles/limb +dnl 8 0.75 +dnl 16 0.625 +dnl 32 0.5625 +dnl 64 0.53 +dnl Maximum possible with the current code is 64, the minimum is 2. + +deflit(UNROLL_COUNT, 32) + + +C void mpn_copyd (mp_ptr dst, mp_srcptr src, mp_size_t size); +C +C Copy src,size to dst,size, processing limbs from high to low addresses. +C +C The comments in copyi.asm apply here too. + + +defframe(PARAM_SIZE,12) +defframe(PARAM_SRC, 8) +defframe(PARAM_DST, 4) +deflit(`FRAME',0) + + .text + ALIGN(32) + +PROLOGUE(mpn_copyd) + movl PARAM_SIZE, %ecx + movl %esi, %eax + + movl PARAM_SRC, %esi + movl %edi, %edx + + std + + movl PARAM_DST, %edi + cmpl $UNROLL_COUNT, %ecx + + leal -4(%esi,%ecx,4), %esi + + leal -4(%edi,%ecx,4), %edi + ja L(unroll) + +L(simple): + rep + movsl + + cld + + movl %eax, %esi + movl %edx, %edi + + ret + + +L(unroll): + C if src and dst are different alignments mod8, then use rep movs + C if src and dst are both 4mod8 then process one limb to get 0mod8 + + pushl %ebx + leal (%esi,%edi), %ebx + + testb $4, %bl + popl %ebx + + jnz L(simple) + testl $4, %esi + + leal -UNROLL_COUNT(%ecx), %ecx + jnz L(already_aligned) + + movsl + + decl %ecx +L(already_aligned): + + +ifelse(UNROLL_BYTES,256,` + subl $128, %esi + subl $128, %edi +') + + C offset 0x3D here, but gets full speed without further alignment +L(top): + C eax saved esi + C ebx + C ecx counter, limbs + C edx saved edi + C esi src, incrementing + C edi dst, incrementing + C ebp + C + C `disp' is never 0, so don't need to force 0(%esi). + +deflit(CHUNK_COUNT, 2) +forloop(`i', 0, UNROLL_COUNT/CHUNK_COUNT-1, ` + deflit(`disp', eval(-4-i*CHUNK_COUNT*4 ifelse(UNROLL_BYTES,256,+128))) + movq disp(%esi), %mm0 + movq %mm0, disp(%edi) +') + + leal -UNROLL_BYTES(%esi), %esi + subl $UNROLL_COUNT, %ecx + + leal -UNROLL_BYTES(%edi), %edi + jns L(top) + + + C now %ecx is -UNROLL_COUNT to -1 representing repectively 0 to + C UNROLL_COUNT-1 limbs remaining + + testb $eval(UNROLL_COUNT/2), %cl + + leal UNROLL_COUNT(%ecx), %ecx + jz L(not_half) + + + C at an unroll count of 32 this block of code is 16 cycles faster than + C the rep movs, less 3 or 4 to test whether to do it + +forloop(`i', 0, UNROLL_COUNT/CHUNK_COUNT/2-1, ` + deflit(`disp', eval(-4-i*CHUNK_COUNT*4 ifelse(UNROLL_BYTES,256,+128))) + movq disp(%esi), %mm0 + movq %mm0, disp(%edi) +') + + subl $eval(UNROLL_BYTES/2), %esi + subl $eval(UNROLL_BYTES/2), %edi + + subl $eval(UNROLL_COUNT/2), %ecx +L(not_half): + + +ifelse(UNROLL_BYTES,256,` + addl $128, %esi + addl $128, %edi +') + + rep + movsl + + cld + + movl %eax, %esi + movl %edx, %edi + + femms + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/k6/k62mmx/copyi.asm b/rts/gmp/mpn/x86/k6/k62mmx/copyi.asm new file mode 100644 index 0000000000..215d805f2e --- /dev/null +++ b/rts/gmp/mpn/x86/k6/k62mmx/copyi.asm @@ -0,0 +1,196 @@ +dnl AMD K6-2 mpn_copyi -- copy limb vector, incrementing. +dnl +dnl K6-2: 0.56 or 1.0 cycles/limb (at 32 limbs/loop), depending on data +dnl alignment. + + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +dnl K6-2 aligned: +dnl UNROLL_COUNT cycles/limb +dnl 8 0.75 +dnl 16 0.625 +dnl 32 0.5625 +dnl 64 0.53 +dnl Maximum possible with the current code is 64, the minimum is 2. + +deflit(UNROLL_COUNT, 32) + + +C void mpn_copyi (mp_ptr dst, mp_srcptr src, mp_size_t size); +C +C The MMX loop is faster than a rep movs when src and dst are both 0mod8. +C With one 0mod8 and one 4mod8 it's 1.056 c/l and the rep movs at 1.0 c/l is +C used instead. +C +C mod8 +C src dst +C 0 0 both aligned, use mmx +C 0 4 unaligned, use rep movs +C 4 0 unaligned, use rep movs +C 4 4 do one movs, then both aligned, use mmx +C +C The MMX code on aligned data is 0.5 c/l, plus loop overhead of 2 +C cycles/loop, which is 0.0625 c/l at 32 limbs/loop. +C +C A pattern of two movq loads and two movq stores (or four and four) was +C tried, but found to be the same speed as just one of each. +C +C Note that this code only suits K6-2 and K6-3. Plain K6 does only one mmx +C instruction per cycle, so "movq"s are no faster than the simple 1 c/l rep +C movs. +C +C Enhancement: +C +C Addressing modes like disp(%esi,%ecx,4) aren't currently used. They'd +C make it possible to avoid incrementing %esi and %edi in the loop and hence +C get loop overhead down to 1 cycle. Care would be needed to avoid bad +C cache line crossings since the "movq"s would then be 5 code bytes rather +C than 4. + + +defframe(PARAM_SIZE,12) +defframe(PARAM_SRC, 8) +defframe(PARAM_DST, 4) +deflit(`FRAME',0) + + .text + ALIGN(32) + +PROLOGUE(mpn_copyi) + movl PARAM_SIZE, %ecx + movl %esi, %eax + + movl PARAM_SRC, %esi + movl %edi, %edx + + cld + + movl PARAM_DST, %edi + cmpl $UNROLL_COUNT, %ecx + + ja L(unroll) + +L(simple): + rep + movsl + + movl %eax, %esi + movl %edx, %edi + + ret + + +L(unroll): + C if src and dst are different alignments mod8, then use rep movs + C if src and dst are both 4mod8 then process one limb to get 0mod8 + + pushl %ebx + leal (%esi,%edi), %ebx + + testb $4, %bl + popl %ebx + + jnz L(simple) + testl $4, %esi + + leal -UNROLL_COUNT(%ecx), %ecx + jz L(already_aligned) + + decl %ecx + + movsl +L(already_aligned): + + +ifelse(UNROLL_BYTES,256,` + addl $128, %esi + addl $128, %edi +') + + C this is offset 0x34, no alignment needed +L(top): + C eax saved esi + C ebx + C ecx counter, limbs + C edx saved edi + C esi src, incrementing + C edi dst, incrementing + C ebp + C + C Zdisp gets 0(%esi) left that way to avoid vector decode, and with + C 0(%edi) keeps code aligned to 16 byte boundaries. + +deflit(CHUNK_COUNT, 2) +forloop(`i', 0, UNROLL_COUNT/CHUNK_COUNT-1, ` + deflit(`disp', eval(i*CHUNK_COUNT*4 ifelse(UNROLL_BYTES,256,-128))) +Zdisp( movq, disp,(%esi), %mm0) +Zdisp( movq, %mm0, disp,(%edi)) +') + + addl $UNROLL_BYTES, %esi + subl $UNROLL_COUNT, %ecx + + leal UNROLL_BYTES(%edi), %edi + jns L(top) + + + C now %ecx is -UNROLL_COUNT to -1 representing repectively 0 to + C UNROLL_COUNT-1 limbs remaining + + testb $eval(UNROLL_COUNT/2), %cl + + leal UNROLL_COUNT(%ecx), %ecx + jz L(not_half) + + C at an unroll count of 32 this block of code is 16 cycles faster than + C the rep movs, less 3 or 4 to test whether to do it + +forloop(`i', 0, UNROLL_COUNT/CHUNK_COUNT/2-1, ` + deflit(`disp', eval(i*CHUNK_COUNT*4 ifelse(UNROLL_BYTES,256,-128))) + movq disp(%esi), %mm0 + movq %mm0, disp(%edi) +') + addl $eval(UNROLL_BYTES/2), %esi + addl $eval(UNROLL_BYTES/2), %edi + + subl $eval(UNROLL_COUNT/2), %ecx +L(not_half): + + +ifelse(UNROLL_BYTES,256,` + subl $128, %esi + subl $128, %edi +') + + rep + movsl + + movl %eax, %esi + movl %edx, %edi + + femms + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/k6/k62mmx/lshift.asm b/rts/gmp/mpn/x86/k6/k62mmx/lshift.asm new file mode 100644 index 0000000000..f6d54f97a8 --- /dev/null +++ b/rts/gmp/mpn/x86/k6/k62mmx/lshift.asm @@ -0,0 +1,286 @@ +dnl AMD K6-2 mpn_lshift -- mpn left shift. +dnl +dnl K6-2: 1.75 cycles/limb + + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size, +C unsigned shift); +C + +defframe(PARAM_SHIFT,16) +defframe(PARAM_SIZE, 12) +defframe(PARAM_SRC, 8) +defframe(PARAM_DST, 4) +deflit(`FRAME',0) + +dnl used after src has been fetched +define(VAR_RETVAL,`PARAM_SRC') + +dnl minimum 9, because unrolled loop can't handle less +deflit(UNROLL_THRESHOLD, 9) + + .text + ALIGN(32) + +PROLOGUE(mpn_lshift) +deflit(`FRAME',0) + + C The 1 limb case can be done without the push %ebx, but it's then + C still the same speed. The push is left as a free helping hand for + C the two_or_more code. + + movl PARAM_SIZE, %eax + pushl %ebx FRAME_pushl() + + movl PARAM_SRC, %ebx + decl %eax + + movl PARAM_SHIFT, %ecx + jnz L(two_or_more) + + movl (%ebx), %edx C src limb + movl PARAM_DST, %ebx + + shldl( %cl, %edx, %eax) C return value + + shll %cl, %edx + + movl %edx, (%ebx) C dst limb + popl %ebx + + ret + + +C ----------------------------------------------------------------------------- + ALIGN(16) C avoid offset 0x1f +L(two_or_more): + C eax size-1 + C ebx src + C ecx shift + C edx + + movl (%ebx,%eax,4), %edx C src high limb + negl %ecx + + movd PARAM_SHIFT, %mm6 + addl $32, %ecx C 32-shift + + shrl %cl, %edx + cmpl $UNROLL_THRESHOLD-1, %eax + + movl %edx, VAR_RETVAL + jae L(unroll) + + + movd %ecx, %mm7 + movl %eax, %ecx + + movl PARAM_DST, %eax + +L(simple): + C eax dst + C ebx src + C ecx counter, size-1 to 1 + C edx retval + C + C mm0 scratch + C mm6 shift + C mm7 32-shift + + movq -4(%ebx,%ecx,4), %mm0 + + psrlq %mm7, %mm0 + +Zdisp( movd, %mm0, 0,(%eax,%ecx,4)) + loop L(simple) + + + movd (%ebx), %mm0 + popl %ebx + + psllq %mm6, %mm0 + + movd %mm0, (%eax) + movl %edx, %eax + + femms + ret + + +C ----------------------------------------------------------------------------- + ALIGN(16) +L(unroll): + C eax size-1 + C ebx src + C ecx 32-shift + C edx retval (but instead VAR_RETVAL is used) + C + C mm6 shift + + addl $32, %ecx + movl PARAM_DST, %edx + + movd %ecx, %mm7 + subl $7, %eax C size-8 + + leal (%edx,%eax,4), %ecx C alignment of dst + + movq 32-8(%ebx,%eax,4), %mm2 C src high qword + testb $4, %cl + + jz L(dst_aligned) + psllq %mm6, %mm2 + + psrlq $32, %mm2 + decl %eax + + movd %mm2, 32(%edx,%eax,4) C dst high limb + movq 32-8(%ebx,%eax,4), %mm2 C new src high qword +L(dst_aligned): + + movq 32-16(%ebx,%eax,4), %mm0 C src second highest qword + + + C This loop is the important bit, the rest is just support for it. + C Four src limbs are held at the start, and four more will be read. + C Four dst limbs will be written. This schedule seems necessary for + C full speed. + C + C The use of size-8 lets the loop stop when %eax goes negative and + C leaves -4 to -1 which can be tested with test $1 and $2. + +L(top): + C eax counter, size-8 step by -4 until <0 + C ebx src + C ecx + C edx dst + C + C mm0 src next qword + C mm1 scratch + C mm2 src prev qword + C mm6 shift + C mm7 64-shift + + psllq %mm6, %mm2 + subl $4, %eax + + movq %mm0, %mm1 + psrlq %mm7, %mm0 + + por %mm0, %mm2 + movq 24(%ebx,%eax,4), %mm0 + + psllq %mm6, %mm1 + movq %mm2, 40(%edx,%eax,4) + + movq %mm0, %mm2 + psrlq %mm7, %mm0 + + por %mm0, %mm1 + movq 16(%ebx,%eax,4), %mm0 + + movq %mm1, 32(%edx,%eax,4) + jnc L(top) + + + C Now have four limbs in mm2 (prev) and mm0 (next), plus eax mod 4. + C + C 8(%ebx) is the next source, and 24(%edx) is the next destination. + C %eax is between -4 and -1, representing respectively 0 to 3 extra + C limbs that must be read. + + + testl $2, %eax C testl to avoid bad cache line crossing + jz L(finish_nottwo) + + C Two more limbs: lshift mm2, OR it with rshifted mm0, mm0 becomes + C new mm2 and a new mm0 is loaded. + + psllq %mm6, %mm2 + movq %mm0, %mm1 + + psrlq %mm7, %mm0 + subl $2, %eax + + por %mm0, %mm2 + movq 16(%ebx,%eax,4), %mm0 + + movq %mm2, 32(%edx,%eax,4) + movq %mm1, %mm2 +L(finish_nottwo): + + + C lshift mm2, OR with rshifted mm0, mm1 becomes lshifted mm0 + + testb $1, %al + psllq %mm6, %mm2 + + movq %mm0, %mm1 + psrlq %mm7, %mm0 + + por %mm0, %mm2 + psllq %mm6, %mm1 + + movq %mm2, 24(%edx,%eax,4) + jz L(finish_even) + + + C Size is odd, so mm1 and one extra limb to process. + + movd (%ebx), %mm0 C src[0] + popl %ebx +deflit(`FRAME',0) + + movq %mm0, %mm2 + psllq $32, %mm0 + + psrlq %mm7, %mm0 + + psllq %mm6, %mm2 + por %mm0, %mm1 + + movq %mm1, 4(%edx) C dst[1,2] + movd %mm2, (%edx) C dst[0] + + movl VAR_RETVAL, %eax + + femms + ret + + + nop C avoid bad cache line crossing +L(finish_even): +deflit(`FRAME',4) + C Size is even, so only mm1 left to process. + + movq %mm1, (%edx) C dst[0,1] + movl VAR_RETVAL, %eax + + popl %ebx + femms + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/k6/k62mmx/rshift.asm b/rts/gmp/mpn/x86/k6/k62mmx/rshift.asm new file mode 100644 index 0000000000..8a8c144241 --- /dev/null +++ b/rts/gmp/mpn/x86/k6/k62mmx/rshift.asm @@ -0,0 +1,285 @@ +dnl AMD K6-2 mpn_rshift -- mpn right shift. +dnl +dnl K6-2: 1.75 cycles/limb + + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size, +C unsigned shift); +C + +defframe(PARAM_SHIFT,16) +defframe(PARAM_SIZE, 12) +defframe(PARAM_SRC, 8) +defframe(PARAM_DST, 4) +deflit(`FRAME',0) + +dnl Minimum 9, because the unrolled loop can't handle less. +dnl +deflit(UNROLL_THRESHOLD, 9) + + .text + ALIGN(32) + +PROLOGUE(mpn_rshift) +deflit(`FRAME',0) + + C The 1 limb case can be done without the push %ebx, but it's then + C still the same speed. The push is left as a free helping hand for + C the two_or_more code. + + movl PARAM_SIZE, %eax + pushl %ebx FRAME_pushl() + + movl PARAM_SRC, %ebx + decl %eax + + movl PARAM_SHIFT, %ecx + jnz L(two_or_more) + + movl (%ebx), %edx C src limb + movl PARAM_DST, %ebx + + shrdl( %cl, %edx, %eax) C return value + + shrl %cl, %edx + + movl %edx, (%ebx) C dst limb + popl %ebx + + ret + + +C ----------------------------------------------------------------------------- + ALIGN(16) C avoid offset 0x1f +L(two_or_more): + C eax size-1 + C ebx src + C ecx shift + C edx + + movl (%ebx), %edx C src low limb + negl %ecx + + addl $32, %ecx + movd PARAM_SHIFT, %mm6 + + shll %cl, %edx + cmpl $UNROLL_THRESHOLD-1, %eax + + jae L(unroll) + + + C eax size-1 + C ebx src + C ecx 32-shift + C edx retval + C + C mm6 shift + + movl PARAM_DST, %ecx + leal (%ebx,%eax,4), %ebx + + leal -4(%ecx,%eax,4), %ecx + negl %eax + + C This loop runs at about 3 cycles/limb, which is the amount of + C decoding, and this is despite every second access being unaligned. + +L(simple): + C eax counter, -(size-1) to -1 + C ebx &src[size-1] + C ecx &dst[size-1] + C edx retval + C + C mm0 scratch + C mm6 shift + +Zdisp( movq, 0,(%ebx,%eax,4), %mm0) + incl %eax + + psrlq %mm6, %mm0 + +Zdisp( movd, %mm0, 0,(%ecx,%eax,4)) + jnz L(simple) + + + movq %mm0, (%ecx) + movl %edx, %eax + + popl %ebx + + femms + ret + + +C ----------------------------------------------------------------------------- + ALIGN(16) +L(unroll): + C eax size-1 + C ebx src + C ecx 32-shift + C edx retval + C + C mm6 shift + + addl $32, %ecx + subl $7, %eax C size-8 + + movd %ecx, %mm7 + movl PARAM_DST, %ecx + + movq (%ebx), %mm2 C src low qword + leal (%ebx,%eax,4), %ebx C src end - 32 + + testb $4, %cl + leal (%ecx,%eax,4), %ecx C dst end - 32 + + notl %eax C -(size-7) + jz L(dst_aligned) + + psrlq %mm6, %mm2 + incl %eax + +Zdisp( movd, %mm2, 0,(%ecx,%eax,4)) C dst low limb + movq 4(%ebx,%eax,4), %mm2 C new src low qword +L(dst_aligned): + + movq 12(%ebx,%eax,4), %mm0 C src second lowest qword + nop C avoid bad cache line crossing + + + C This loop is the important bit, the rest is just support for it. + C Four src limbs are held at the start, and four more will be read. + C Four dst limbs will be written. This schedule seems necessary for + C full speed. + C + C The use of -(size-7) lets the loop stop when %eax becomes >= 0 and + C and leaves 0 to 3 which can be tested with test $1 and $2. + +L(top): + C eax counter, -(size-7) step by +4 until >=0 + C ebx src end - 32 + C ecx dst end - 32 + C edx retval + C + C mm0 src next qword + C mm1 scratch + C mm2 src prev qword + C mm6 shift + C mm7 64-shift + + psrlq %mm6, %mm2 + addl $4, %eax + + movq %mm0, %mm1 + psllq %mm7, %mm0 + + por %mm0, %mm2 + movq 4(%ebx,%eax,4), %mm0 + + psrlq %mm6, %mm1 + movq %mm2, -12(%ecx,%eax,4) + + movq %mm0, %mm2 + psllq %mm7, %mm0 + + por %mm0, %mm1 + movq 12(%ebx,%eax,4), %mm0 + + movq %mm1, -4(%ecx,%eax,4) + ja L(top) C jump if no carry and not zero + + + + C Now have the four limbs in mm2 (low) and mm0 (high), and %eax is 0 + C to 3 representing respectively 3 to 0 further limbs. + + testl $2, %eax C testl to avoid bad cache line crossings + jnz L(finish_nottwo) + + C Two or three extra limbs: rshift mm2, OR it with lshifted mm0, mm0 + C becomes new mm2 and a new mm0 is loaded. + + psrlq %mm6, %mm2 + movq %mm0, %mm1 + + psllq %mm7, %mm0 + addl $2, %eax + + por %mm0, %mm2 + movq 12(%ebx,%eax,4), %mm0 + + movq %mm2, -4(%ecx,%eax,4) + movq %mm1, %mm2 +L(finish_nottwo): + + + testb $1, %al + psrlq %mm6, %mm2 + + movq %mm0, %mm1 + psllq %mm7, %mm0 + + por %mm0, %mm2 + psrlq %mm6, %mm1 + + movq %mm2, 4(%ecx,%eax,4) + jnz L(finish_even) + + + C one further extra limb to process + + movd 32-4(%ebx), %mm0 C src[size-1], most significant limb + popl %ebx + + movq %mm0, %mm2 + psllq %mm7, %mm0 + + por %mm0, %mm1 + psrlq %mm6, %mm2 + + movq %mm1, 32-12(%ecx) C dst[size-3,size-2] + movd %mm2, 32-4(%ecx) C dst[size-1] + + movl %edx, %eax C retval + + femms + ret + + + nop C avoid bad cache line crossing +L(finish_even): + C no further extra limbs + + movq %mm1, 32-8(%ecx) C dst[size-2,size-1] + movl %edx, %eax C retval + + popl %ebx + + femms + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/k6/mmx/com_n.asm b/rts/gmp/mpn/x86/k6/mmx/com_n.asm new file mode 100644 index 0000000000..8915080f0f --- /dev/null +++ b/rts/gmp/mpn/x86/k6/mmx/com_n.asm @@ -0,0 +1,91 @@ +dnl AMD K6-2 mpn_com_n -- mpn bitwise one's complement. +dnl +dnl alignment dst/src, A=0mod8 N=4mod8 +dnl A/A A/N N/A N/N +dnl K6-2 1.0 1.18 1.18 1.18 cycles/limb +dnl K6 1.5 1.85 1.75 1.85 + + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +C void mpn_com_n (mp_ptr dst, mp_srcptr src, mp_size_t size); +C +C Take the bitwise ones-complement of src,size and write it to dst,size. + +defframe(PARAM_SIZE,12) +defframe(PARAM_SRC, 8) +defframe(PARAM_DST, 4) + + .text + ALIGN(32) +PROLOGUE(mpn_com_n) +deflit(`FRAME',0) + + movl PARAM_SIZE, %ecx + movl PARAM_SRC, %eax + movl PARAM_DST, %edx + shrl %ecx + jnz L(two_or_more) + + movl (%eax), %eax + notl %eax + movl %eax, (%edx) + ret + + +L(two_or_more): + pushl %ebx +FRAME_pushl() + movl %ecx, %ebx + + pcmpeqd %mm7, %mm7 C all ones + + + ALIGN(16) +L(top): + C eax src + C ebx floor(size/2) + C ecx counter + C edx dst + C esi + C edi + C ebp + + movq -8(%eax,%ecx,8), %mm0 + pxor %mm7, %mm0 + movq %mm0, -8(%edx,%ecx,8) + loop L(top) + + + jnc L(no_extra) + movl (%eax,%ebx,8), %eax + notl %eax + movl %eax, (%edx,%ebx,8) +L(no_extra): + + popl %ebx + emms_or_femms + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/k6/mmx/logops_n.asm b/rts/gmp/mpn/x86/k6/mmx/logops_n.asm new file mode 100644 index 0000000000..46cb3b7ea5 --- /dev/null +++ b/rts/gmp/mpn/x86/k6/mmx/logops_n.asm @@ -0,0 +1,212 @@ +dnl AMD K6-2 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n, +dnl mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations. +dnl +dnl alignment dst/src1/src2, A=0mod8, N=4mod8 +dnl A/A/A A/A/N A/N/A A/N/N N/A/A N/A/N N/N/A N/N/N +dnl +dnl K6-2 1.2 1.5 1.5 1.2 1.2 1.5 1.5 1.2 and,andn,ior,xor +dnl K6-2 1.5 1.75 2.0 1.75 1.75 2.0 1.75 1.5 iorn,xnor +dnl K6-2 1.75 2.0 2.0 2.0 2.0 2.0 2.0 1.75 nand,nior +dnl +dnl K6 1.5 1.68 1.75 1.2 1.75 1.75 1.68 1.5 and,andn,ior,xor +dnl K6 2.0 2.0 2.25 2.25 2.25 2.25 2.0 2.0 iorn,xnor +dnl K6 2.0 2.25 2.25 2.25 2.25 2.25 2.25 2.0 nand,nior + + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +dnl M4_p and M4_i are the MMX and integer instructions +dnl M4_*_neg_dst means whether to negate the final result before writing +dnl M4_*_neg_src2 means whether to negate the src2 values before using them + +define(M4_choose_op, +m4_assert_numargs(7) +`ifdef(`OPERATION_$1',` +define(`M4_function', `mpn_$1') +define(`M4_operation', `$1') +define(`M4_p', `$2') +define(`M4_p_neg_dst', `$3') +define(`M4_p_neg_src2',`$4') +define(`M4_i', `$5') +define(`M4_i_neg_dst', `$6') +define(`M4_i_neg_src2',`$7') +')') + +dnl xnor is done in "iorn" style because it's a touch faster than "nior" +dnl style (the two are equivalent for xor). + +M4_choose_op( and_n, pand,0,0, andl,0,0) +M4_choose_op( andn_n, pandn,0,0, andl,0,1) +M4_choose_op( nand_n, pand,1,0, andl,1,0) +M4_choose_op( ior_n, por,0,0, orl,0,0) +M4_choose_op( iorn_n, por,0,1, orl,0,1) +M4_choose_op( nior_n, por,1,0, orl,1,0) +M4_choose_op( xor_n, pxor,0,0, xorl,0,0) +M4_choose_op( xnor_n, pxor,0,1, xorl,0,1) + +ifdef(`M4_function',, +`m4_error(`Unrecognised or undefined OPERATION symbol +')') + +MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n) + + +C void M4_function (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, +C mp_size_t size); +C +C Do src1,size M4_operation src2,size, storing the result in dst,size. +C +C Unaligned movq loads and stores are a bit slower than aligned ones. The +C test at the start of the routine checks the alignment of src1 and if +C necessary processes one limb separately at the low end to make it aligned. +C +C The raw speeds without this alignment switch are as follows. +C +C alignment dst/src1/src2, A=0mod8, N=4mod8 +C A/A/A A/A/N A/N/A A/N/N N/A/A N/A/N N/N/A N/N/N +C +C K6 1.5 2.0 1.5 2.0 and,andn,ior,xor +C K6 1.75 2.2 2.0 2.28 iorn,xnor +C K6 2.0 2.25 2.35 2.28 nand,nior +C +C +C Future: +C +C K6 can do one 64-bit load per cycle so each of these routines should be +C able to approach 1.0 c/l, if aligned. The basic and/andn/ior/xor might be +C able to get 1.0 with just a 4 limb loop, being 3 instructions per 2 limbs. +C The others are 4 instructions per 2 limbs, and so can only approach 1.0 +C because there's nowhere to hide some loop control. + +defframe(PARAM_SIZE,16) +defframe(PARAM_SRC2,12) +defframe(PARAM_SRC1,8) +defframe(PARAM_DST, 4) +deflit(`FRAME',0) + + .text + ALIGN(32) +PROLOGUE(M4_function) + movl PARAM_SIZE, %ecx + pushl %ebx + FRAME_pushl() + movl PARAM_SRC1, %eax + movl PARAM_SRC2, %ebx + cmpl $1, %ecx + movl PARAM_DST, %edx + ja L(two_or_more) + + + movl (%ebx), %ecx + popl %ebx +ifelse(M4_i_neg_src2,1,`notl %ecx') + M4_i (%eax), %ecx +ifelse(M4_i_neg_dst,1,` notl %ecx') + movl %ecx, (%edx) + + ret + + +L(two_or_more): + C eax src1 + C ebx src2 + C ecx size + C edx dst + C esi + C edi + C ebp + C + C carry bit is low of size + + pushl %esi + FRAME_pushl() + testl $4, %eax + jz L(alignment_ok) + + movl (%ebx), %esi + addl $4, %ebx +ifelse(M4_i_neg_src2,1,`notl %esi') + M4_i (%eax), %esi + addl $4, %eax +ifelse(M4_i_neg_dst,1,` notl %esi') + movl %esi, (%edx) + addl $4, %edx + decl %ecx + +L(alignment_ok): + movl %ecx, %esi + shrl %ecx + jnz L(still_two_or_more) + + movl (%ebx), %ecx + popl %esi +ifelse(M4_i_neg_src2,1,`notl %ecx') + M4_i (%eax), %ecx +ifelse(M4_i_neg_dst,1,` notl %ecx') + popl %ebx + movl %ecx, (%edx) + ret + + +L(still_two_or_more): +ifelse(eval(M4_p_neg_src2 || M4_p_neg_dst),1,` + pcmpeqd %mm7, %mm7 C all ones +') + + ALIGN(16) +L(top): + C eax src1 + C ebx src2 + C ecx counter + C edx dst + C esi + C edi + C ebp + C + C carry bit is low of size + + movq -8(%ebx,%ecx,8), %mm0 +ifelse(M4_p_neg_src2,1,`pxor %mm7, %mm0') + M4_p -8(%eax,%ecx,8), %mm0 +ifelse(M4_p_neg_dst,1,` pxor %mm7, %mm0') + movq %mm0, -8(%edx,%ecx,8) + + loop L(top) + + + jnc L(no_extra) + + movl -4(%ebx,%esi,4), %ebx +ifelse(M4_i_neg_src2,1,`notl %ebx') + M4_i -4(%eax,%esi,4), %ebx +ifelse(M4_i_neg_dst,1,` notl %ebx') + movl %ebx, -4(%edx,%esi,4) +L(no_extra): + + popl %esi + popl %ebx + emms_or_femms + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/k6/mmx/lshift.asm b/rts/gmp/mpn/x86/k6/mmx/lshift.asm new file mode 100644 index 0000000000..f1dc83db46 --- /dev/null +++ b/rts/gmp/mpn/x86/k6/mmx/lshift.asm @@ -0,0 +1,122 @@ +dnl AMD K6 mpn_lshift -- mpn left shift. +dnl +dnl K6: 3.0 cycles/limb + + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size, +C unsigned shift); +C +C The loop runs at 3 cycles/limb, limited by decoding and by having 3 mmx +C instructions. This is despite every second fetch being unaligned. + + +defframe(PARAM_SHIFT,16) +defframe(PARAM_SIZE, 12) +defframe(PARAM_SRC, 8) +defframe(PARAM_DST, 4) + + .text + ALIGN(32) + +PROLOGUE(mpn_lshift) +deflit(`FRAME',0) + + C The 1 limb case can be done without the push %ebx, but it's then + C still the same speed. The push is left as a free helping hand for + C the two_or_more code. + + movl PARAM_SIZE, %eax + pushl %ebx FRAME_pushl() + + movl PARAM_SRC, %ebx + decl %eax + + movl PARAM_SHIFT, %ecx + jnz L(two_or_more) + + movl (%ebx), %edx C src limb + movl PARAM_DST, %ebx + + shldl( %cl, %edx, %eax) C return value + + shll %cl, %edx + + movl %edx, (%ebx) C dst limb + popl %ebx + + ret + + + ALIGN(16) C avoid offset 0x1f + nop C avoid bad cache line crossing +L(two_or_more): + C eax size-1 + C ebx src + C ecx shift + C edx + + movl (%ebx,%eax,4), %edx C src high limb + negl %ecx + + movd PARAM_SHIFT, %mm6 + addl $32, %ecx C 32-shift + + shrl %cl, %edx + + movd %ecx, %mm7 + movl PARAM_DST, %ecx + +L(top): + C eax counter, size-1 to 1 + C ebx src + C ecx dst + C edx retval + C + C mm0 scratch + C mm6 shift + C mm7 32-shift + + movq -4(%ebx,%eax,4), %mm0 + decl %eax + + psrlq %mm7, %mm0 + + movd %mm0, 4(%ecx,%eax,4) + jnz L(top) + + + movd (%ebx), %mm0 + popl %ebx + + psllq %mm6, %mm0 + movl %edx, %eax + + movd %mm0, (%ecx) + + emms + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/k6/mmx/popham.asm b/rts/gmp/mpn/x86/k6/mmx/popham.asm new file mode 100644 index 0000000000..2c619252bb --- /dev/null +++ b/rts/gmp/mpn/x86/k6/mmx/popham.asm @@ -0,0 +1,238 @@ +dnl AMD K6-2 mpn_popcount, mpn_hamdist -- mpn bit population count and +dnl hamming distance. +dnl +dnl popcount hamdist +dnl K6-2: 9.0 11.5 cycles/limb +dnl K6: 12.5 13.0 + + +dnl Copyright (C) 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +C unsigned long mpn_popcount (mp_srcptr src, mp_size_t size); +C unsigned long mpn_hamdist (mp_srcptr src, mp_srcptr src2, mp_size_t size); +C +C The code here isn't optimal, but it's already a 2x speedup over the plain +C integer mpn/generic/popcount.c,hamdist.c. + + +ifdef(`OPERATION_popcount',, +`ifdef(`OPERATION_hamdist',, +`m4_error(`Need OPERATION_popcount or OPERATION_hamdist +')m4exit(1)')') + +define(HAM, +m4_assert_numargs(1) +`ifdef(`OPERATION_hamdist',`$1')') + +define(POP, +m4_assert_numargs(1) +`ifdef(`OPERATION_popcount',`$1')') + +HAM(` +defframe(PARAM_SIZE, 12) +defframe(PARAM_SRC2, 8) +defframe(PARAM_SRC, 4) +define(M4_function,mpn_hamdist) +') +POP(` +defframe(PARAM_SIZE, 8) +defframe(PARAM_SRC, 4) +define(M4_function,mpn_popcount) +') + +MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist) + + +ifdef(`PIC',,` + dnl non-PIC + + DATA + ALIGN(8) + +define(LS, +m4_assert_numargs(1) +`LF(M4_function,`$1')') + +LS(rodata_AAAAAAAAAAAAAAAA): + .long 0xAAAAAAAA + .long 0xAAAAAAAA + +LS(rodata_3333333333333333): + .long 0x33333333 + .long 0x33333333 + +LS(rodata_0F0F0F0F0F0F0F0F): + .long 0x0F0F0F0F + .long 0x0F0F0F0F + +LS(rodata_000000FF000000FF): + .long 0x000000FF + .long 0x000000FF +') + + .text + ALIGN(32) + +POP(`ifdef(`PIC', ` + C avoid shrl crossing a 32-byte boundary + nop')') + +PROLOGUE(M4_function) +deflit(`FRAME',0) + + movl PARAM_SIZE, %ecx + orl %ecx, %ecx + jz L(zero) + +ifdef(`PIC',` + movl $0xAAAAAAAA, %eax + movl $0x33333333, %edx + + movd %eax, %mm7 + movd %edx, %mm6 + + movl $0x0F0F0F0F, %eax + movl $0x000000FF, %edx + + punpckldq %mm7, %mm7 + punpckldq %mm6, %mm6 + + movd %eax, %mm5 + movd %edx, %mm4 + + punpckldq %mm5, %mm5 + punpckldq %mm4, %mm4 +',` + + movq LS(rodata_AAAAAAAAAAAAAAAA), %mm7 + movq LS(rodata_3333333333333333), %mm6 + movq LS(rodata_0F0F0F0F0F0F0F0F), %mm5 + movq LS(rodata_000000FF000000FF), %mm4 +') + +define(REG_AAAAAAAAAAAAAAAA, %mm7) +define(REG_3333333333333333, %mm6) +define(REG_0F0F0F0F0F0F0F0F, %mm5) +define(REG_000000FF000000FF, %mm4) + + + movl PARAM_SRC, %eax +HAM(` movl PARAM_SRC2, %edx') + + pxor %mm2, %mm2 C total + + shrl %ecx + jnc L(top) + +Zdisp( movd, 0,(%eax,%ecx,8), %mm1) + +HAM(` +Zdisp( movd, 0,(%edx,%ecx,8), %mm0) + pxor %mm0, %mm1 +') + + incl %ecx + jmp L(loaded) + + + ALIGN(16) +POP(` nop C alignment to avoid crossing 32-byte boundaries') + +L(top): + C eax src + C ebx + C ecx counter, qwords, decrementing + C edx [hamdist] src2 + C + C mm0 (scratch) + C mm1 (scratch) + C mm2 total (low dword) + C mm3 + C mm4 \ + C mm5 | special constants + C mm6 | + C mm7 / + + movq -8(%eax,%ecx,8), %mm1 +HAM(` pxor -8(%edx,%ecx,8), %mm1') + +L(loaded): + movq %mm1, %mm0 + pand REG_AAAAAAAAAAAAAAAA, %mm1 + + psrlq $1, %mm1 +HAM(` nop C code alignment') + + psubd %mm1, %mm0 C bit pairs +HAM(` nop C code alignment') + + + movq %mm0, %mm1 + psrlq $2, %mm0 + + pand REG_3333333333333333, %mm0 + pand REG_3333333333333333, %mm1 + + paddd %mm1, %mm0 C nibbles + + + movq %mm0, %mm1 + psrlq $4, %mm0 + + pand REG_0F0F0F0F0F0F0F0F, %mm0 + pand REG_0F0F0F0F0F0F0F0F, %mm1 + + paddd %mm1, %mm0 C bytes + + movq %mm0, %mm1 + psrlq $8, %mm0 + + + paddb %mm1, %mm0 C words + + + movq %mm0, %mm1 + psrlq $16, %mm0 + + paddd %mm1, %mm0 C dwords + + pand REG_000000FF000000FF, %mm0 + + paddd %mm0, %mm2 C low to total + psrlq $32, %mm0 + + paddd %mm0, %mm2 C high to total + loop L(top) + + + + movd %mm2, %eax + emms_or_femms + ret + +L(zero): + movl $0, %eax + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/k6/mmx/rshift.asm b/rts/gmp/mpn/x86/k6/mmx/rshift.asm new file mode 100644 index 0000000000..cc5948f26c --- /dev/null +++ b/rts/gmp/mpn/x86/k6/mmx/rshift.asm @@ -0,0 +1,122 @@ +dnl AMD K6 mpn_rshift -- mpn right shift. +dnl +dnl K6: 3.0 cycles/limb + + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size, +C unsigned shift); +C +C The loop runs at 3 cycles/limb, limited by decoding and by having 3 mmx +C instructions. This is despite every second fetch being unaligned. + + +defframe(PARAM_SHIFT,16) +defframe(PARAM_SIZE, 12) +defframe(PARAM_SRC, 8) +defframe(PARAM_DST, 4) +deflit(`FRAME',0) + + .text + ALIGN(32) + +PROLOGUE(mpn_rshift) +deflit(`FRAME',0) + + C The 1 limb case can be done without the push %ebx, but it's then + C still the same speed. The push is left as a free helping hand for + C the two_or_more code. + + movl PARAM_SIZE, %eax + pushl %ebx FRAME_pushl() + + movl PARAM_SRC, %ebx + decl %eax + + movl PARAM_SHIFT, %ecx + jnz L(two_or_more) + + movl (%ebx), %edx C src limb + movl PARAM_DST, %ebx + + shrdl( %cl, %edx, %eax) C return value + + shrl %cl, %edx + + movl %edx, (%ebx) C dst limb + popl %ebx + + ret + + + ALIGN(16) C avoid offset 0x1f +L(two_or_more): + C eax size-1 + C ebx src + C ecx shift + C edx + + movl (%ebx), %edx C src low limb + negl %ecx + + addl $32, %ecx C 32-shift + movd PARAM_SHIFT, %mm6 + + shll %cl, %edx C retval + movl PARAM_DST, %ecx + + leal (%ebx,%eax,4), %ebx + + leal -4(%ecx,%eax,4), %ecx + negl %eax + + +L(simple): + C eax counter (negative) + C ebx &src[size-1] + C ecx &dst[size-1] + C edx retval + C + C mm0 scratch + C mm6 shift + +Zdisp( movq, 0,(%ebx,%eax,4), %mm0) + incl %eax + + psrlq %mm6, %mm0 + +Zdisp( movd, %mm0, 0,(%ecx,%eax,4)) + jnz L(simple) + + + movq %mm0, (%ecx) + movl %edx, %eax + + popl %ebx + + emms + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/k6/mul_1.asm b/rts/gmp/mpn/x86/k6/mul_1.asm new file mode 100644 index 0000000000..c2220fe4ca --- /dev/null +++ b/rts/gmp/mpn/x86/k6/mul_1.asm @@ -0,0 +1,272 @@ +dnl AMD K6 mpn_mul_1 -- mpn by limb multiply. +dnl +dnl K6: 6.25 cycles/limb. + + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, +C mp_limb_t multiplier); +C mp_limb_t mpn_mul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size, +C mp_limb_t multiplier, mp_limb_t carry); +C +C Multiply src,size by mult and store the result in dst,size. +C Return the carry limb from the top of the result. +C +C mpn_mul_1c() accepts an initial carry for the calculation, it's added into +C the low limb of the result. + +defframe(PARAM_CARRY, 20) +defframe(PARAM_MULTIPLIER,16) +defframe(PARAM_SIZE, 12) +defframe(PARAM_SRC, 8) +defframe(PARAM_DST, 4) + +dnl minimum 5 because the unrolled code can't handle less +deflit(UNROLL_THRESHOLD, 5) + + .text + ALIGN(32) + +PROLOGUE(mpn_mul_1c) + pushl %esi +deflit(`FRAME',4) + movl PARAM_CARRY, %esi + jmp LF(mpn_mul_1,start_nc) +EPILOGUE() + + +PROLOGUE(mpn_mul_1) + push %esi +deflit(`FRAME',4) + xorl %esi, %esi C initial carry + +L(start_nc): + mov PARAM_SIZE, %ecx + push %ebx +FRAME_pushl() + + movl PARAM_SRC, %ebx + push %edi +FRAME_pushl() + + movl PARAM_DST, %edi + pushl %ebp +FRAME_pushl() + + cmpl $UNROLL_THRESHOLD, %ecx + movl PARAM_MULTIPLIER, %ebp + + jae L(unroll) + + + C code offset 0x22 here, close enough to aligned +L(simple): + C eax scratch + C ebx src + C ecx counter + C edx scratch + C esi carry + C edi dst + C ebp multiplier + C + C this loop 8 cycles/limb + + movl (%ebx), %eax + addl $4, %ebx + + mull %ebp + + addl %esi, %eax + movl $0, %esi + + adcl %edx, %esi + + movl %eax, (%edi) + addl $4, %edi + + loop L(simple) + + + popl %ebp + + popl %edi + popl %ebx + + movl %esi, %eax + popl %esi + + ret + + +C ----------------------------------------------------------------------------- +C The code for each limb is 6 cycles, with instruction decoding being the +C limiting factor. At 4 limbs/loop and 1 cycle/loop of overhead it's 6.25 +C cycles/limb in total. +C +C The secret ingredient to get 6.25 is to start the loop with the mul and +C have the load/store pair at the end. Rotating the load/store to the top +C is an 0.5 c/l slowdown. (Some address generation effect probably.) +C +C The whole unrolled loop fits nicely in exactly 80 bytes. + + + ALIGN(16) C already aligned to 16 here actually +L(unroll): + movl (%ebx), %eax + leal -16(%ebx,%ecx,4), %ebx + + leal -16(%edi,%ecx,4), %edi + subl $4, %ecx + + negl %ecx + + + ALIGN(16) C one byte nop for this alignment +L(top): + C eax scratch + C ebx &src[size-4] + C ecx counter + C edx scratch + C esi carry + C edi &dst[size-4] + C ebp multiplier + + mull %ebp + + addl %esi, %eax + movl $0, %esi + + adcl %edx, %esi + + movl %eax, (%edi,%ecx,4) + movl 4(%ebx,%ecx,4), %eax + + + mull %ebp + + addl %esi, %eax + movl $0, %esi + + adcl %edx, %esi + + movl %eax, 4(%edi,%ecx,4) + movl 8(%ebx,%ecx,4), %eax + + + mull %ebp + + addl %esi, %eax + movl $0, %esi + + adcl %edx, %esi + + movl %eax, 8(%edi,%ecx,4) + movl 12(%ebx,%ecx,4), %eax + + + mull %ebp + + addl %esi, %eax + movl $0, %esi + + adcl %edx, %esi + + movl %eax, 12(%edi,%ecx,4) + movl 16(%ebx,%ecx,4), %eax + + + addl $4, %ecx + js L(top) + + + + C eax next src limb + C ebx &src[size-4] + C ecx 0 to 3 representing respectively 4 to 1 further limbs + C edx + C esi carry + C edi &dst[size-4] + + testb $2, %cl + jnz L(finish_not_two) + + mull %ebp + + addl %esi, %eax + movl $0, %esi + + adcl %edx, %esi + + movl %eax, (%edi,%ecx,4) + movl 4(%ebx,%ecx,4), %eax + + + mull %ebp + + addl %esi, %eax + movl $0, %esi + + adcl %edx, %esi + + movl %eax, 4(%edi,%ecx,4) + movl 8(%ebx,%ecx,4), %eax + + addl $2, %ecx +L(finish_not_two): + + + testb $1, %cl + jnz L(finish_not_one) + + mull %ebp + + addl %esi, %eax + movl $0, %esi + + adcl %edx, %esi + + movl %eax, 8(%edi) + movl 12(%ebx), %eax +L(finish_not_one): + + + mull %ebp + + addl %esi, %eax + popl %ebp + + adcl $0, %edx + + movl %eax, 12(%edi) + popl %edi + + popl %ebx + movl %edx, %eax + + popl %esi + + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/k6/mul_basecase.asm b/rts/gmp/mpn/x86/k6/mul_basecase.asm new file mode 100644 index 0000000000..1f5a3a4b4b --- /dev/null +++ b/rts/gmp/mpn/x86/k6/mul_basecase.asm @@ -0,0 +1,600 @@ +dnl AMD K6 mpn_mul_basecase -- multiply two mpn numbers. +dnl +dnl K6: approx 9.0 cycles per cross product on 30x30 limbs (with 16 limbs/loop +dnl unrolling). + + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +dnl K6: UNROLL_COUNT cycles/product (approx) +dnl 8 9.75 +dnl 16 9.3 +dnl 32 9.3 +dnl Maximum possible with the current code is 32. +dnl +dnl With 16 the inner unrolled loop fits exactly in a 256 byte block, which +dnl might explain it's good performance. + +deflit(UNROLL_COUNT, 16) + + +C void mpn_mul_basecase (mp_ptr wp, +C mp_srcptr xp, mp_size_t xsize, +C mp_srcptr yp, mp_size_t ysize); +C +C Calculate xp,xsize multiplied by yp,ysize, storing the result in +C wp,xsize+ysize. +C +C This routine is essentially the same as mpn/generic/mul_basecase.c, but +C it's faster because it does most of the mpn_addmul_1() entry code only +C once. The saving is about 10-20% on typical sizes coming from the +C Karatsuba multiply code. +C +C Future: +C +C The unrolled loop could be shared by mpn_addmul_1, with some extra stack +C setups and maybe 2 or 3 wasted cycles at the end. Code saving would be +C 256 bytes. + +ifdef(`PIC',` +deflit(UNROLL_THRESHOLD, 8) +',` +deflit(UNROLL_THRESHOLD, 8) +') + +defframe(PARAM_YSIZE,20) +defframe(PARAM_YP, 16) +defframe(PARAM_XSIZE,12) +defframe(PARAM_XP, 8) +defframe(PARAM_WP, 4) + + .text + ALIGN(32) +PROLOGUE(mpn_mul_basecase) +deflit(`FRAME',0) + + movl PARAM_XSIZE, %ecx + movl PARAM_YP, %eax + + movl PARAM_XP, %edx + movl (%eax), %eax C yp low limb + + cmpl $2, %ecx + ja L(xsize_more_than_two_limbs) + je L(two_by_something) + + + C one limb by one limb + + movl (%edx), %edx C xp low limb + movl PARAM_WP, %ecx + + mull %edx + + movl %eax, (%ecx) + movl %edx, 4(%ecx) + ret + + +C ----------------------------------------------------------------------------- +L(two_by_something): + decl PARAM_YSIZE + pushl %ebx +deflit(`FRAME',4) + + movl PARAM_WP, %ebx + pushl %esi +deflit(`FRAME',8) + + movl %eax, %ecx C yp low limb + movl (%edx), %eax C xp low limb + + movl %edx, %esi C xp + jnz L(two_by_two) + + + C two limbs by one limb + + mull %ecx + + movl %eax, (%ebx) + movl 4(%esi), %eax + + movl %edx, %esi C carry + + mull %ecx + + addl %eax, %esi + movl %esi, 4(%ebx) + + adcl $0, %edx + + movl %edx, 8(%ebx) + popl %esi + + popl %ebx + ret + + + +C ----------------------------------------------------------------------------- + ALIGN(16) +L(two_by_two): + C eax xp low limb + C ebx wp + C ecx yp low limb + C edx + C esi xp + C edi + C ebp +deflit(`FRAME',8) + + mull %ecx C xp[0] * yp[0] + + push %edi +deflit(`FRAME',12) + movl %eax, (%ebx) + + movl 4(%esi), %eax + movl %edx, %edi C carry, for wp[1] + + mull %ecx C xp[1] * yp[0] + + addl %eax, %edi + movl PARAM_YP, %ecx + + adcl $0, %edx + + movl %edi, 4(%ebx) + movl 4(%ecx), %ecx C yp[1] + + movl 4(%esi), %eax C xp[1] + movl %edx, %edi C carry, for wp[2] + + mull %ecx C xp[1] * yp[1] + + addl %eax, %edi + + adcl $0, %edx + + movl (%esi), %eax C xp[0] + movl %edx, %esi C carry, for wp[3] + + mull %ecx C xp[0] * yp[1] + + addl %eax, 4(%ebx) + adcl %edx, %edi + adcl $0, %esi + + movl %edi, 8(%ebx) + popl %edi + + movl %esi, 12(%ebx) + popl %esi + + popl %ebx + ret + + +C ----------------------------------------------------------------------------- + ALIGN(16) +L(xsize_more_than_two_limbs): + +C The first limb of yp is processed with a simple mpn_mul_1 style loop +C inline. Unrolling this doesn't seem worthwhile since it's only run once +C (whereas the addmul below is run ysize-1 many times). A call to the +C actual mpn_mul_1 will be slowed down by the call and parameter pushing and +C popping, and doesn't seem likely to be worthwhile on the typical 10-20 +C limb operations the Karatsuba code calls here with. + + C eax yp[0] + C ebx + C ecx xsize + C edx xp + C esi + C edi + C ebp +deflit(`FRAME',0) + + pushl %edi defframe_pushl(SAVE_EDI) + pushl %ebp defframe_pushl(SAVE_EBP) + + movl PARAM_WP, %edi + pushl %esi defframe_pushl(SAVE_ESI) + + movl %eax, %ebp + pushl %ebx defframe_pushl(SAVE_EBX) + + leal (%edx,%ecx,4), %ebx C xp end + xorl %esi, %esi + + leal (%edi,%ecx,4), %edi C wp end of mul1 + negl %ecx + + +L(mul1): + C eax scratch + C ebx xp end + C ecx counter, negative + C edx scratch + C esi carry + C edi wp end of mul1 + C ebp multiplier + + movl (%ebx,%ecx,4), %eax + + mull %ebp + + addl %esi, %eax + movl $0, %esi + + adcl %edx, %esi + + movl %eax, (%edi,%ecx,4) + incl %ecx + + jnz L(mul1) + + + movl PARAM_YSIZE, %edx + movl %esi, (%edi) C final carry + + movl PARAM_XSIZE, %ecx + decl %edx + + jnz L(ysize_more_than_one_limb) + + popl %ebx + popl %esi + popl %ebp + popl %edi + ret + + +L(ysize_more_than_one_limb): + cmpl $UNROLL_THRESHOLD, %ecx + movl PARAM_YP, %eax + + jae L(unroll) + + +C ----------------------------------------------------------------------------- +C Simple addmul loop. +C +C Using ebx and edi pointing at the ends of their respective locations saves +C a couple of instructions in the outer loop. The inner loop is still 11 +C cycles, the same as the simple loop in aorsmul_1.asm. + + C eax yp + C ebx xp end + C ecx xsize + C edx ysize-1 + C esi + C edi wp end of mul1 + C ebp + + movl 4(%eax), %ebp C multiplier + negl %ecx + + movl %ecx, PARAM_XSIZE C -xsize + xorl %esi, %esi C initial carry + + leal 4(%eax,%edx,4), %eax C yp end + negl %edx + + movl %eax, PARAM_YP + movl %edx, PARAM_YSIZE + + jmp L(simple_outer_entry) + + + C aligning here saves a couple of cycles + ALIGN(16) +L(simple_outer_top): + C edx ysize counter, negative + + movl PARAM_YP, %eax C yp end + xorl %esi, %esi C carry + + movl PARAM_XSIZE, %ecx C -xsize + movl %edx, PARAM_YSIZE + + movl (%eax,%edx,4), %ebp C yp limb multiplier +L(simple_outer_entry): + addl $4, %edi + + +L(simple_inner): + C eax scratch + C ebx xp end + C ecx counter, negative + C edx scratch + C esi carry + C edi wp end of this addmul + C ebp multiplier + + movl (%ebx,%ecx,4), %eax + + mull %ebp + + addl %esi, %eax + movl $0, %esi + + adcl $0, %edx + addl %eax, (%edi,%ecx,4) + adcl %edx, %esi + + incl %ecx + jnz L(simple_inner) + + + movl PARAM_YSIZE, %edx + movl %esi, (%edi) + + incl %edx + jnz L(simple_outer_top) + + + popl %ebx + popl %esi + popl %ebp + popl %edi + ret + + +C ----------------------------------------------------------------------------- +C Unrolled loop. +C +C The unrolled inner loop is the same as in aorsmul_1.asm, see that code for +C some comments. +C +C VAR_COUNTER is for the inner loop, running from VAR_COUNTER_INIT down to +C 0, inclusive. +C +C VAR_JMP is the computed jump into the unrolled loop. +C +C PARAM_XP and PARAM_WP get offset appropriately for where the unrolled loop +C is entered. +C +C VAR_XP_LOW is the least significant limb of xp, which is needed at the +C start of the unrolled loop. This can't just be fetched through the xp +C pointer because of the offset applied to it. +C +C PARAM_YSIZE is the outer loop counter, going from -(ysize-1) up to -1, +C inclusive. +C +C PARAM_YP is offset appropriately so that the PARAM_YSIZE counter can be +C added to give the location of the next limb of yp, which is the multiplier +C in the unrolled loop. +C +C PARAM_WP is similarly offset so that the PARAM_YSIZE counter can be added +C to give the starting point in the destination for each unrolled loop (this +C point is one limb upwards for each limb of yp processed). +C +C Having PARAM_YSIZE count negative to zero means it's not necessary to +C store new values of PARAM_YP and PARAM_WP on each loop. Those values on +C the stack remain constant and on each loop an leal adjusts them with the +C PARAM_YSIZE counter value. + + +defframe(VAR_COUNTER, -20) +defframe(VAR_COUNTER_INIT, -24) +defframe(VAR_JMP, -28) +defframe(VAR_XP_LOW, -32) +deflit(VAR_STACK_SPACE, 16) + +dnl For some strange reason using (%esp) instead of 0(%esp) is a touch +dnl slower in this code, hence the defframe empty-if-zero feature is +dnl disabled. +dnl +dnl If VAR_COUNTER is at (%esp), the effect is worse. In this case the +dnl unrolled loop is 255 instead of 256 bytes, but quite how this affects +dnl anything isn't clear. +dnl +define(`defframe_empty_if_zero_disabled',1) + +L(unroll): + C eax yp (not used) + C ebx xp end (not used) + C ecx xsize + C edx ysize-1 + C esi + C edi wp end of mul1 (not used) + C ebp +deflit(`FRAME', 16) + + leal -2(%ecx), %ebp C one limb processed at start, + decl %ecx C and ebp is one less + + shrl $UNROLL_LOG2, %ebp + negl %ecx + + subl $VAR_STACK_SPACE, %esp +deflit(`FRAME', 16+VAR_STACK_SPACE) + andl $UNROLL_MASK, %ecx + + movl %ecx, %esi + shll $4, %ecx + + movl %ebp, VAR_COUNTER_INIT + negl %esi + + C 15 code bytes per limb +ifdef(`PIC',` + call L(pic_calc) +L(unroll_here): +',` + leal L(unroll_entry) (%ecx,%esi,1), %ecx +') + + movl PARAM_XP, %ebx + movl %ebp, VAR_COUNTER + + movl PARAM_WP, %edi + movl %ecx, VAR_JMP + + movl (%ebx), %eax + leal 4(%edi,%esi,4), %edi C wp adjust for unrolling and mul1 + + leal (%ebx,%esi,4), %ebx C xp adjust for unrolling + + movl %eax, VAR_XP_LOW + + movl %ebx, PARAM_XP + movl PARAM_YP, %ebx + + leal (%edi,%edx,4), %ecx C wp adjust for ysize indexing + movl 4(%ebx), %ebp C multiplier (yp second limb) + + leal 4(%ebx,%edx,4), %ebx C yp adjust for ysize indexing + + movl %ecx, PARAM_WP + + leal 1(%esi), %ecx C adjust parity for decl %ecx above + + movl %ebx, PARAM_YP + negl %edx + + movl %edx, PARAM_YSIZE + jmp L(unroll_outer_entry) + + +ifdef(`PIC',` +L(pic_calc): + C See README.family about old gas bugs + leal (%ecx,%esi,1), %ecx + addl $L(unroll_entry)-L(unroll_here), %ecx + addl (%esp), %ecx + ret +') + + +C ----------------------------------------------------------------------------- + C Aligning here saves a couple of cycles per loop. Using 32 doesn't + C cost any extra space, since the inner unrolled loop below is + C aligned to 32. + ALIGN(32) +L(unroll_outer_top): + C edx ysize + + movl PARAM_YP, %eax + movl %edx, PARAM_YSIZE C incremented ysize counter + + movl PARAM_WP, %edi + + movl VAR_COUNTER_INIT, %ebx + movl (%eax,%edx,4), %ebp C next multiplier + + movl PARAM_XSIZE, %ecx + leal (%edi,%edx,4), %edi C adjust wp for where we are in yp + + movl VAR_XP_LOW, %eax + movl %ebx, VAR_COUNTER + +L(unroll_outer_entry): + mull %ebp + + C using testb is a tiny bit faster than testl + testb $1, %cl + + movl %eax, %ecx C low carry + movl VAR_JMP, %eax + + movl %edx, %esi C high carry + movl PARAM_XP, %ebx + + jnz L(unroll_noswap) + movl %ecx, %esi C high,low carry other way around + + movl %edx, %ecx +L(unroll_noswap): + + jmp *%eax + + + +C ----------------------------------------------------------------------------- + ALIGN(32) +L(unroll_top): + C eax scratch + C ebx xp + C ecx carry low + C edx scratch + C esi carry high + C edi wp + C ebp multiplier + C VAR_COUNTER loop counter + C + C 15 code bytes each limb + + leal UNROLL_BYTES(%edi), %edi + +L(unroll_entry): +deflit(CHUNK_COUNT,2) +forloop(`i', 0, UNROLL_COUNT/CHUNK_COUNT-1, ` + deflit(`disp0', eval(i*CHUNK_COUNT*4)) + deflit(`disp1', eval(disp0 + 4)) + deflit(`disp2', eval(disp1 + 4)) + + movl disp1(%ebx), %eax + mull %ebp +Zdisp( addl, %ecx, disp0,(%edi)) + adcl %eax, %esi + movl %edx, %ecx + jadcl0( %ecx) + + movl disp2(%ebx), %eax + mull %ebp + addl %esi, disp1(%edi) + adcl %eax, %ecx + movl %edx, %esi + jadcl0( %esi) +') + + decl VAR_COUNTER + leal UNROLL_BYTES(%ebx), %ebx + + jns L(unroll_top) + + + movl PARAM_YSIZE, %edx + addl %ecx, UNROLL_BYTES(%edi) + + adcl $0, %esi + + incl %edx + movl %esi, UNROLL_BYTES+4(%edi) + + jnz L(unroll_outer_top) + + + movl SAVE_ESI, %esi + movl SAVE_EBP, %ebp + movl SAVE_EDI, %edi + movl SAVE_EBX, %ebx + + addl $FRAME, %esp + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/k6/sqr_basecase.asm b/rts/gmp/mpn/x86/k6/sqr_basecase.asm new file mode 100644 index 0000000000..70d49b3e57 --- /dev/null +++ b/rts/gmp/mpn/x86/k6/sqr_basecase.asm @@ -0,0 +1,672 @@ +dnl AMD K6 mpn_sqr_basecase -- square an mpn number. +dnl +dnl K6: approx 4.7 cycles per cross product, or 9.2 cycles per triangular +dnl product (measured on the speed difference between 17 and 33 limbs, +dnl which is roughly the Karatsuba recursing range). + + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +dnl KARATSUBA_SQR_THRESHOLD_MAX is the maximum KARATSUBA_SQR_THRESHOLD this +dnl code supports. This value is used only by the tune program to know +dnl what it can go up to. (An attempt to compile with a bigger value will +dnl trigger some m4_assert()s in the code, making the build fail.) +dnl +dnl The value is determined by requiring the displacements in the unrolled +dnl addmul to fit in single bytes. This means a maximum UNROLL_COUNT of +dnl 63, giving a maximum KARATSUBA_SQR_THRESHOLD of 66. + +deflit(KARATSUBA_SQR_THRESHOLD_MAX, 66) + + +dnl Allow a value from the tune program to override config.m4. + +ifdef(`KARATSUBA_SQR_THRESHOLD_OVERRIDE', +`define(`KARATSUBA_SQR_THRESHOLD',KARATSUBA_SQR_THRESHOLD_OVERRIDE)') + + +dnl UNROLL_COUNT is the number of code chunks in the unrolled addmul. The +dnl number required is determined by KARATSUBA_SQR_THRESHOLD, since +dnl mpn_sqr_basecase only needs to handle sizes < KARATSUBA_SQR_THRESHOLD. +dnl +dnl The first addmul is the biggest, and this takes the second least +dnl significant limb and multiplies it by the third least significant and +dnl up. Hence for a maximum operand size of KARATSUBA_SQR_THRESHOLD-1 +dnl limbs, UNROLL_COUNT needs to be KARATSUBA_SQR_THRESHOLD-3. + +m4_config_gmp_mparam(`KARATSUBA_SQR_THRESHOLD') +deflit(UNROLL_COUNT, eval(KARATSUBA_SQR_THRESHOLD-3)) + + +C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size); +C +C The algorithm is essentially the same as mpn/generic/sqr_basecase.c, but a +C lot of function call overheads are avoided, especially when the given size +C is small. +C +C The code size might look a bit excessive, but not all of it is executed +C and so won't fill up the code cache. The 1x1, 2x2 and 3x3 special cases +C clearly apply only to those sizes; mid sizes like 10x10 only need part of +C the unrolled addmul; and big sizes like 35x35 that do need all of it will +C at least be getting value for money, because 35x35 spends something like +C 5780 cycles here. +C +C Different values of UNROLL_COUNT give slightly different speeds, between +C 9.0 and 9.2 c/tri-prod measured on the difference between 17 and 33 limbs. +C This isn't a big difference, but it's presumably some alignment effect +C which if understood could give a simple speedup. + +defframe(PARAM_SIZE,12) +defframe(PARAM_SRC, 8) +defframe(PARAM_DST, 4) + + .text + ALIGN(32) +PROLOGUE(mpn_sqr_basecase) +deflit(`FRAME',0) + + movl PARAM_SIZE, %ecx + movl PARAM_SRC, %eax + + cmpl $2, %ecx + je L(two_limbs) + + movl PARAM_DST, %edx + ja L(three_or_more) + + +C ----------------------------------------------------------------------------- +C one limb only + C eax src + C ebx + C ecx size + C edx dst + + movl (%eax), %eax + movl %edx, %ecx + + mull %eax + + movl %eax, (%ecx) + movl %edx, 4(%ecx) + ret + + +C ----------------------------------------------------------------------------- + ALIGN(16) +L(two_limbs): + C eax src + C ebx + C ecx size + C edx dst + + pushl %ebx + movl %eax, %ebx C src +deflit(`FRAME',4) + + movl (%ebx), %eax + movl PARAM_DST, %ecx + + mull %eax C src[0]^2 + + movl %eax, (%ecx) + movl 4(%ebx), %eax + + movl %edx, 4(%ecx) + + mull %eax C src[1]^2 + + movl %eax, 8(%ecx) + movl (%ebx), %eax + + movl %edx, 12(%ecx) + movl 4(%ebx), %edx + + mull %edx C src[0]*src[1] + + addl %eax, 4(%ecx) + + adcl %edx, 8(%ecx) + adcl $0, 12(%ecx) + + popl %ebx + addl %eax, 4(%ecx) + + adcl %edx, 8(%ecx) + adcl $0, 12(%ecx) + + ret + + +C ----------------------------------------------------------------------------- +L(three_or_more): +deflit(`FRAME',0) + cmpl $4, %ecx + jae L(four_or_more) + + +C ----------------------------------------------------------------------------- +C three limbs + C eax src + C ecx size + C edx dst + + pushl %ebx + movl %eax, %ebx C src + + movl (%ebx), %eax + movl %edx, %ecx C dst + + mull %eax C src[0] ^ 2 + + movl %eax, (%ecx) + movl 4(%ebx), %eax + + movl %edx, 4(%ecx) + pushl %esi + + mull %eax C src[1] ^ 2 + + movl %eax, 8(%ecx) + movl 8(%ebx), %eax + + movl %edx, 12(%ecx) + pushl %edi + + mull %eax C src[2] ^ 2 + + movl %eax, 16(%ecx) + movl (%ebx), %eax + + movl %edx, 20(%ecx) + movl 4(%ebx), %edx + + mull %edx C src[0] * src[1] + + movl %eax, %esi + movl (%ebx), %eax + + movl %edx, %edi + movl 8(%ebx), %edx + + pushl %ebp + xorl %ebp, %ebp + + mull %edx C src[0] * src[2] + + addl %eax, %edi + movl 4(%ebx), %eax + + adcl %edx, %ebp + + movl 8(%ebx), %edx + + mull %edx C src[1] * src[2] + + addl %eax, %ebp + + adcl $0, %edx + + + C eax will be dst[5] + C ebx + C ecx dst + C edx dst[4] + C esi dst[1] + C edi dst[2] + C ebp dst[3] + + xorl %eax, %eax + addl %esi, %esi + adcl %edi, %edi + adcl %ebp, %ebp + adcl %edx, %edx + adcl $0, %eax + + addl %esi, 4(%ecx) + adcl %edi, 8(%ecx) + adcl %ebp, 12(%ecx) + + popl %ebp + popl %edi + + adcl %edx, 16(%ecx) + + popl %esi + popl %ebx + + adcl %eax, 20(%ecx) + ASSERT(nc) + + ret + + +C ----------------------------------------------------------------------------- + +defframe(SAVE_EBX, -4) +defframe(SAVE_ESI, -8) +defframe(SAVE_EDI, -12) +defframe(SAVE_EBP, -16) +defframe(VAR_COUNTER,-20) +defframe(VAR_JMP, -24) +deflit(STACK_SPACE, 24) + + ALIGN(16) +L(four_or_more): + + C eax src + C ebx + C ecx size + C edx dst + C esi + C edi + C ebp + +C First multiply src[0]*src[1..size-1] and store at dst[1..size]. +C +C A test was done calling mpn_mul_1 here to get the benefit of its unrolled +C loop, but this was only a tiny speedup; at 35 limbs it took 24 cycles off +C a 5780 cycle operation, which is not surprising since the loop here is 8 +C c/l and mpn_mul_1 is 6.25 c/l. + + subl $STACK_SPACE, %esp deflit(`FRAME',STACK_SPACE) + + movl %edi, SAVE_EDI + leal 4(%edx), %edi + + movl %ebx, SAVE_EBX + leal 4(%eax), %ebx + + movl %esi, SAVE_ESI + xorl %esi, %esi + + movl %ebp, SAVE_EBP + + C eax + C ebx src+4 + C ecx size + C edx + C esi + C edi dst+4 + C ebp + + movl (%eax), %ebp C multiplier + leal -1(%ecx), %ecx C size-1, and pad to a 16 byte boundary + + + ALIGN(16) +L(mul_1): + C eax scratch + C ebx src ptr + C ecx counter + C edx scratch + C esi carry + C edi dst ptr + C ebp multiplier + + movl (%ebx), %eax + addl $4, %ebx + + mull %ebp + + addl %esi, %eax + movl $0, %esi + + adcl %edx, %esi + + movl %eax, (%edi) + addl $4, %edi + + loop L(mul_1) + + +C Addmul src[n]*src[n+1..size-1] at dst[2*n-1...], for each n=1..size-2. +C +C The last two addmuls, which are the bottom right corner of the product +C triangle, are left to the end. These are src[size-3]*src[size-2,size-1] +C and src[size-2]*src[size-1]. If size is 4 then it's only these corner +C cases that need to be done. +C +C The unrolled code is the same as mpn_addmul_1(), see that routine for some +C comments. +C +C VAR_COUNTER is the outer loop, running from -(size-4) to -1, inclusive. +C +C VAR_JMP is the computed jump into the unrolled code, stepped by one code +C chunk each outer loop. +C +C K6 doesn't do any branch prediction on indirect jumps, which is good +C actually because it's a different target each time. The unrolled addmul +C is about 3 cycles/limb faster than a simple loop, so the 6 cycle cost of +C the indirect jump is quickly recovered. + + +dnl This value is also implicitly encoded in a shift and add. +dnl +deflit(CODE_BYTES_PER_LIMB, 15) + +dnl With the unmodified &src[size] and &dst[size] pointers, the +dnl displacements in the unrolled code fit in a byte for UNROLL_COUNT +dnl values up to 31. Above that an offset must be added to them. +dnl +deflit(OFFSET, +ifelse(eval(UNROLL_COUNT>31),1, +eval((UNROLL_COUNT-31)*4), +0)) + + C eax + C ebx &src[size] + C ecx + C edx + C esi carry + C edi &dst[size] + C ebp + + movl PARAM_SIZE, %ecx + movl %esi, (%edi) + + subl $4, %ecx + jz L(corner) + + movl %ecx, %edx +ifelse(OFFSET,0,, +` subl $OFFSET, %ebx') + + shll $4, %ecx +ifelse(OFFSET,0,, +` subl $OFFSET, %edi') + + negl %ecx + +ifdef(`PIC',` + call L(pic_calc) +L(here): +',` + leal L(unroll_inner_end)-eval(2*CODE_BYTES_PER_LIMB)(%ecx,%edx), %ecx +') + negl %edx + + + C The calculated jump mustn't be before the start of the available + C code. This is the limitation UNROLL_COUNT puts on the src operand + C size, but checked here using the jump address directly. + C + ASSERT(ae,` + movl_text_address( L(unroll_inner_start), %eax) + cmpl %eax, %ecx + ') + + +C ----------------------------------------------------------------------------- + ALIGN(16) +L(unroll_outer_top): + C eax + C ebx &src[size], constant + C ecx VAR_JMP + C edx VAR_COUNTER, limbs, negative + C esi high limb to store + C edi dst ptr, high of last addmul + C ebp + + movl -12+OFFSET(%ebx,%edx,4), %ebp C multiplier + movl %edx, VAR_COUNTER + + movl -8+OFFSET(%ebx,%edx,4), %eax C first limb of multiplicand + + mull %ebp + + testb $1, %cl + + movl %edx, %esi C high carry + movl %ecx, %edx C jump + + movl %eax, %ecx C low carry + leal CODE_BYTES_PER_LIMB(%edx), %edx + + movl %edx, VAR_JMP + leal 4(%edi), %edi + + C A branch-free version of this using some xors was found to be a + C touch slower than just a conditional jump, despite the jump + C switching between taken and not taken on every loop. + +ifelse(eval(UNROLL_COUNT%2),0, + jz,jnz) L(unroll_noswap) + movl %esi, %eax C high,low carry other way around + + movl %ecx, %esi + movl %eax, %ecx +L(unroll_noswap): + + jmp *%edx + + + C Must be on an even address here so the low bit of the jump address + C will indicate which way around ecx/esi should start. + C + C An attempt was made at padding here to get the end of the unrolled + C code to come out on a good alignment, to save padding before + C L(corner). This worked, but turned out to run slower than just an + C ALIGN(2). The reason for this is not clear, it might be related + C to the different speeds on different UNROLL_COUNTs noted above. + + ALIGN(2) + +L(unroll_inner_start): + C eax scratch + C ebx src + C ecx carry low + C edx scratch + C esi carry high + C edi dst + C ebp multiplier + C + C 15 code bytes each limb + C ecx/esi swapped on each chunk + +forloop(`i', UNROLL_COUNT, 1, ` + deflit(`disp_src', eval(-i*4 + OFFSET)) + deflit(`disp_dst', eval(disp_src - 4)) + + m4_assert(`disp_src>=-128 && disp_src<128') + m4_assert(`disp_dst>=-128 && disp_dst<128') + +ifelse(eval(i%2),0,` +Zdisp( movl, disp_src,(%ebx), %eax) + mull %ebp +Zdisp( addl, %esi, disp_dst,(%edi)) + adcl %eax, %ecx + movl %edx, %esi + jadcl0( %esi) +',` + dnl this one comes out last +Zdisp( movl, disp_src,(%ebx), %eax) + mull %ebp +Zdisp( addl, %ecx, disp_dst,(%edi)) + adcl %eax, %esi + movl %edx, %ecx + jadcl0( %ecx) +') +') +L(unroll_inner_end): + + addl %esi, -4+OFFSET(%edi) + + movl VAR_COUNTER, %edx + jadcl0( %ecx) + + movl %ecx, m4_empty_if_zero(OFFSET)(%edi) + movl VAR_JMP, %ecx + + incl %edx + jnz L(unroll_outer_top) + + +ifelse(OFFSET,0,,` + addl $OFFSET, %ebx + addl $OFFSET, %edi +') + + +C ----------------------------------------------------------------------------- + ALIGN(16) +L(corner): + C ebx &src[size] + C edi &dst[2*size-5] + + movl -12(%ebx), %ebp + + movl -8(%ebx), %eax + movl %eax, %ecx + + mull %ebp + + addl %eax, -4(%edi) + adcl $0, %edx + + movl -4(%ebx), %eax + movl %edx, %esi + movl %eax, %ebx + + mull %ebp + + addl %esi, %eax + adcl $0, %edx + + addl %eax, (%edi) + adcl $0, %edx + + movl %edx, %esi + movl %ebx, %eax + + mull %ecx + + addl %esi, %eax + movl %eax, 4(%edi) + + adcl $0, %edx + + movl %edx, 8(%edi) + + +C ----------------------------------------------------------------------------- +C Left shift of dst[1..2*size-2], the bit shifted out becomes dst[2*size-1]. +C The loop measures about 6 cycles/iteration, though it looks like it should +C decode in 5. + +L(lshift_start): + movl PARAM_SIZE, %ecx + + movl PARAM_DST, %edi + subl $1, %ecx C size-1 and clear carry + + movl PARAM_SRC, %ebx + movl %ecx, %edx + + xorl %eax, %eax C ready for adcl + + + ALIGN(16) +L(lshift): + C eax + C ebx src (for later use) + C ecx counter, decrementing + C edx size-1 (for later use) + C esi + C edi dst, incrementing + C ebp + + rcll 4(%edi) + rcll 8(%edi) + leal 8(%edi), %edi + loop L(lshift) + + + adcl %eax, %eax + + movl %eax, 4(%edi) C dst most significant limb + movl (%ebx), %eax C src[0] + + leal 4(%ebx,%edx,4), %ebx C &src[size] + subl %edx, %ecx C -(size-1) + + +C ----------------------------------------------------------------------------- +C Now add in the squares on the diagonal, src[0]^2, src[1]^2, ..., +C src[size-1]^2. dst[0] hasn't yet been set at all yet, and just gets the +C low limb of src[0]^2. + + + mull %eax + + movl %eax, (%edi,%ecx,8) C dst[0] + + + ALIGN(16) +L(diag): + C eax scratch + C ebx &src[size] + C ecx counter, negative + C edx carry + C esi scratch + C edi dst[2*size-2] + C ebp + + movl (%ebx,%ecx,4), %eax + movl %edx, %esi + + mull %eax + + addl %esi, 4(%edi,%ecx,8) + adcl %eax, 8(%edi,%ecx,8) + adcl $0, %edx + + incl %ecx + jnz L(diag) + + + movl SAVE_EBX, %ebx + movl SAVE_ESI, %esi + + addl %edx, 4(%edi) C dst most significant limb + + movl SAVE_EDI, %edi + movl SAVE_EBP, %ebp + addl $FRAME, %esp + ret + + + +C ----------------------------------------------------------------------------- +ifdef(`PIC',` +L(pic_calc): + C See README.family about old gas bugs + addl (%esp), %ecx + addl $L(unroll_inner_end)-L(here)-eval(2*CODE_BYTES_PER_LIMB), %ecx + addl %edx, %ecx + ret +') + + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/k7/README b/rts/gmp/mpn/x86/k7/README new file mode 100644 index 0000000000..c34315c401 --- /dev/null +++ b/rts/gmp/mpn/x86/k7/README @@ -0,0 +1,145 @@ + + AMD K7 MPN SUBROUTINES + + +This directory contains code optimized for the AMD Athlon CPU. + +The mmx subdirectory has routines using MMX instructions. All Athlons have +MMX, the separate directory is just so that configure can omit it if the +assembler doesn't support MMX. + + + +STATUS + +Times for the loops, with all code and data in L1 cache. + + cycles/limb + mpn_add/sub_n 1.6 + + mpn_copyi 0.75 or 1.0 \ varying with data alignment + mpn_copyd 0.75 or 1.0 / + + mpn_divrem_1 17.0 integer part, 15.0 fractional part + mpn_mod_1 17.0 + mpn_divexact_by3 8.0 + + mpn_l/rshift 1.2 + + mpn_mul_1 3.4 + mpn_addmul/submul_1 3.9 + + mpn_mul_basecase 4.42 cycles/crossproduct (approx) + + mpn_popcount 5.0 + mpn_hamdist 6.0 + +Prefetching of sources hasn't yet been tried. + + + +NOTES + +cmov, MMX, 3DNow and some extensions to MMX and 3DNow are available. + +Write-allocate L1 data cache means prefetching of destinations is unnecessary. + +Floating point multiplications can be done in parallel with integer +multiplications, but there doesn't seem to be any way to make use of this. + +Unsigned "mul"s can be issued every 3 cycles. This suggests 3 is a limit on +the speed of the multiplication routines. The documentation shows mul +executing in IEU0 (or maybe in IEU0 and IEU1 together), so it might be that, +to get near 3 cycles code has to be arranged so that nothing else is issued +to IEU0. A busy IEU0 could explain why some code takes 4 cycles and other +apparently equivalent code takes 5. + + + +OPTIMIZATIONS + +Unrolled loops are used to reduce looping overhead. The unrolling is +configurable up to 32 limbs/loop for most routines and up to 64 for some. +The K7 has 64k L1 code cache so quite big unrolling is allowable. + +Computed jumps into the unrolling are used to handle sizes not a multiple of +the unrolling. An attractive feature of this is that times increase +smoothly with operand size, but it may be that some routines should just +have simple loops to finish up, especially when PIC adds between 2 and 16 +cycles to get %eip. + +Position independent code is implemented using a call to get %eip for the +computed jumps and a ret is always done, rather than an addl $4,%esp or a +popl, so the CPU return address branch prediction stack stays synchronised +with the actual stack in memory. + +Branch prediction, in absence of any history, will guess forward jumps are +not taken and backward jumps are taken. Where possible it's arranged that +the less likely or less important case is under a taken forward jump. + + + +CODING + +Instructions in general code have been shown grouped if they can execute +together, which means up to three direct-path instructions which have no +successive dependencies. K7 always decodes three and has out-of-order +execution, but the groupings show what slots might be available and what +dependency chains exist. + +When there's vector-path instructions an effort is made to get triplets of +direct-path instructions in between them, even if there's dependencies, +since this maximizes decoding throughput and might save a cycle or two if +decoding is the limiting factor. + + + +INSTRUCTIONS + +adcl direct +divl 39 cycles back-to-back +lodsl,etc vector +loop 1 cycle vector (decl/jnz opens up one decode slot) +movd reg vector +movd mem direct +mull issue every 3 cycles, latency 4 cycles low word, 6 cycles high word +popl vector (use movl for more than one pop) +pushl direct, will pair with a load +shrdl %cl vector, 3 cycles, seems to be 3 decode too +xorl r,r false read dependency recognised + + + +REFERENCES + +"AMD Athlon Processor X86 Code Optimization Guide", AMD publication number +22007, revision E, November 1999. Available on-line, + + http://www.amd.com/products/cpg/athlon/techdocs/pdf/22007.pdf + +"3DNow Technology Manual", AMD publication number 21928F/0-August 1999. +This describes the femms and prefetch instructions. Available on-line, + + http://www.amd.com/K6/k6docs/pdf/21928.pdf + +"AMD Extensions to the 3DNow and MMX Instruction Sets Manual", AMD +publication number 22466, revision B, August 1999. This describes +instructions added in the Athlon processor, such as pswapd and the extra +prefetch forms. Available on-line, + + http://www.amd.com/products/cpg/athlon/techdocs/pdf/22466.pdf + +"3DNow Instruction Porting Guide", AMD publication number 22621, revision B, +August 1999. This has some notes on general Athlon optimizations as well as +3DNow. Available on-line, + + http://www.amd.com/products/cpg/athlon/techdocs/pdf/22621.pdf + + + + +---------------- +Local variables: +mode: text +fill-column: 76 +End: diff --git a/rts/gmp/mpn/x86/k7/aors_n.asm b/rts/gmp/mpn/x86/k7/aors_n.asm new file mode 100644 index 0000000000..85fa9d3036 --- /dev/null +++ b/rts/gmp/mpn/x86/k7/aors_n.asm @@ -0,0 +1,250 @@ +dnl AMD K7 mpn_add_n/mpn_sub_n -- mpn add or subtract. +dnl +dnl K7: 1.64 cycles/limb (at 16 limb/loop). + + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +dnl K7: UNROLL_COUNT cycles/limb +dnl 8 1.9 +dnl 16 1.64 +dnl 32 1.7 +dnl 64 2.0 +dnl Maximum possible with the current code is 64. + +deflit(UNROLL_COUNT, 16) + + +ifdef(`OPERATION_add_n', ` + define(M4_inst, adcl) + define(M4_function_n, mpn_add_n) + define(M4_function_nc, mpn_add_nc) + define(M4_description, add) +',`ifdef(`OPERATION_sub_n', ` + define(M4_inst, sbbl) + define(M4_function_n, mpn_sub_n) + define(M4_function_nc, mpn_sub_nc) + define(M4_description, subtract) +',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n +')')') + +MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc) + + +C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, +C mp_size_t size); +C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, +C mp_size_t size, mp_limb_t carry); +C +C Calculate src1,size M4_description src2,size, and store the result in +C dst,size. The return value is the carry bit from the top of the result (1 +C or 0). +C +C The _nc version accepts 1 or 0 for an initial carry into the low limb of +C the calculation. Note values other than 1 or 0 here will lead to garbage +C results. +C +C This code runs at 1.64 cycles/limb, which is probably the best possible +C with plain integer operations. Each limb is 2 loads and 1 store, and in +C one cycle the K7 can do two loads, or a load and a store, leading to 1.5 +C c/l. + +dnl Must have UNROLL_THRESHOLD >= 2, since the unrolled loop can't handle 1. +ifdef(`PIC',` +deflit(UNROLL_THRESHOLD, 8) +',` +deflit(UNROLL_THRESHOLD, 8) +') + +defframe(PARAM_CARRY,20) +defframe(PARAM_SIZE, 16) +defframe(PARAM_SRC2, 12) +defframe(PARAM_SRC1, 8) +defframe(PARAM_DST, 4) + +defframe(SAVE_EBP, -4) +defframe(SAVE_ESI, -8) +defframe(SAVE_EBX, -12) +defframe(SAVE_EDI, -16) +deflit(STACK_SPACE, 16) + + .text + ALIGN(32) +deflit(`FRAME',0) + +PROLOGUE(M4_function_nc) + movl PARAM_CARRY, %eax + jmp LF(M4_function_n,start) +EPILOGUE() + +PROLOGUE(M4_function_n) + + xorl %eax, %eax C carry +L(start): + movl PARAM_SIZE, %ecx + subl $STACK_SPACE, %esp +deflit(`FRAME',STACK_SPACE) + + movl %edi, SAVE_EDI + movl %ebx, SAVE_EBX + cmpl $UNROLL_THRESHOLD, %ecx + + movl PARAM_SRC2, %edx + movl PARAM_SRC1, %ebx + jae L(unroll) + + movl PARAM_DST, %edi + leal (%ebx,%ecx,4), %ebx + leal (%edx,%ecx,4), %edx + + leal (%edi,%ecx,4), %edi + negl %ecx + shrl %eax + + C This loop in in a single 16 byte code block already, so no + C alignment necessary. +L(simple): + C eax scratch + C ebx src1 + C ecx counter + C edx src2 + C esi + C edi dst + C ebp + + movl (%ebx,%ecx,4), %eax + M4_inst (%edx,%ecx,4), %eax + movl %eax, (%edi,%ecx,4) + incl %ecx + jnz L(simple) + + movl $0, %eax + movl SAVE_EDI, %edi + + movl SAVE_EBX, %ebx + setc %al + addl $STACK_SPACE, %esp + + ret + + +C ----------------------------------------------------------------------------- + C This is at 0x55, close enough to aligned. +L(unroll): +deflit(`FRAME',STACK_SPACE) + movl %ebp, SAVE_EBP + andl $-2, %ecx C size low bit masked out + andl $1, PARAM_SIZE C size low bit kept + + movl %ecx, %edi + decl %ecx + movl PARAM_DST, %ebp + + shrl $UNROLL_LOG2, %ecx + negl %edi + movl %esi, SAVE_ESI + + andl $UNROLL_MASK, %edi + +ifdef(`PIC',` + call L(pic_calc) +L(here): +',` + leal L(entry) (%edi,%edi,8), %esi C 9 bytes per +') + negl %edi + shrl %eax + + leal ifelse(UNROLL_BYTES,256,128) (%ebx,%edi,4), %ebx + leal ifelse(UNROLL_BYTES,256,128) (%edx,%edi,4), %edx + leal ifelse(UNROLL_BYTES,256,128) (%ebp,%edi,4), %edi + + jmp *%esi + + +ifdef(`PIC',` +L(pic_calc): + C See README.family about old gas bugs + leal (%edi,%edi,8), %esi + addl $L(entry)-L(here), %esi + addl (%esp), %esi + ret +') + + +C ----------------------------------------------------------------------------- + ALIGN(32) +L(top): + C eax zero + C ebx src1 + C ecx counter + C edx src2 + C esi scratch (was computed jump) + C edi dst + C ebp scratch + + leal UNROLL_BYTES(%edx), %edx + +L(entry): +deflit(CHUNK_COUNT, 2) +forloop(i, 0, UNROLL_COUNT/CHUNK_COUNT-1, ` + deflit(`disp0', eval(i*CHUNK_COUNT*4 ifelse(UNROLL_BYTES,256,-128))) + deflit(`disp1', eval(disp0 + 4)) + +Zdisp( movl, disp0,(%ebx), %esi) + movl disp1(%ebx), %ebp +Zdisp( M4_inst,disp0,(%edx), %esi) +Zdisp( movl, %esi, disp0,(%edi)) + M4_inst disp1(%edx), %ebp + movl %ebp, disp1(%edi) +') + + decl %ecx + leal UNROLL_BYTES(%ebx), %ebx + leal UNROLL_BYTES(%edi), %edi + jns L(top) + + + mov PARAM_SIZE, %esi + movl SAVE_EBP, %ebp + movl $0, %eax + + decl %esi + js L(even) + + movl (%ebx), %ecx + M4_inst UNROLL_BYTES(%edx), %ecx + movl %ecx, (%edi) +L(even): + + movl SAVE_EDI, %edi + movl SAVE_EBX, %ebx + setc %al + + movl SAVE_ESI, %esi + addl $STACK_SPACE, %esp + + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/k7/aorsmul_1.asm b/rts/gmp/mpn/x86/k7/aorsmul_1.asm new file mode 100644 index 0000000000..9f9c3daaf4 --- /dev/null +++ b/rts/gmp/mpn/x86/k7/aorsmul_1.asm @@ -0,0 +1,364 @@ +dnl AMD K7 mpn_addmul_1/mpn_submul_1 -- add or subtract mpn multiple. +dnl +dnl K7: 3.9 cycles/limb. +dnl +dnl Future: It should be possible to avoid the separate mul after the +dnl unrolled loop by moving the movl/adcl to the top. + + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +dnl K7: UNROLL_COUNT cycles/limb +dnl 4 4.42 +dnl 8 4.16 +dnl 16 3.9 +dnl 32 3.9 +dnl 64 3.87 +dnl Maximum possible with the current code is 64. + +deflit(UNROLL_COUNT, 16) + + +ifdef(`OPERATION_addmul_1',` + define(M4_inst, addl) + define(M4_function_1, mpn_addmul_1) + define(M4_function_1c, mpn_addmul_1c) + define(M4_description, add it to) + define(M4_desc_retval, carry) +',`ifdef(`OPERATION_submul_1',` + define(M4_inst, subl) + define(M4_function_1, mpn_submul_1) + define(M4_function_1c, mpn_submul_1c) + define(M4_description, subtract it from) + define(M4_desc_retval, borrow) +',`m4_error(`Need OPERATION_addmul_1 or OPERATION_submul_1 +')')') + +MULFUNC_PROLOGUE(mpn_addmul_1 mpn_addmul_1c mpn_submul_1 mpn_submul_1c) + + +C mp_limb_t M4_function_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, +C mp_limb_t mult); +C mp_limb_t M4_function_1c (mp_ptr dst, mp_srcptr src, mp_size_t size, +C mp_limb_t mult, mp_limb_t carry); +C +C Calculate src,size multiplied by mult and M4_description dst,size. +C Return the M4_desc_retval limb from the top of the result. + +ifdef(`PIC',` +deflit(UNROLL_THRESHOLD, 9) +',` +deflit(UNROLL_THRESHOLD, 6) +') + +defframe(PARAM_CARRY, 20) +defframe(PARAM_MULTIPLIER,16) +defframe(PARAM_SIZE, 12) +defframe(PARAM_SRC, 8) +defframe(PARAM_DST, 4) +deflit(`FRAME',0) + +defframe(SAVE_EBX, -4) +defframe(SAVE_ESI, -8) +defframe(SAVE_EDI, -12) +defframe(SAVE_EBP, -16) +deflit(SAVE_SIZE, 16) + + .text + ALIGN(32) +PROLOGUE(M4_function_1) + movl PARAM_SIZE, %edx + movl PARAM_SRC, %eax + xorl %ecx, %ecx + + decl %edx + jnz LF(M4_function_1c,start_1) + + movl (%eax), %eax + movl PARAM_DST, %ecx + + mull PARAM_MULTIPLIER + + M4_inst %eax, (%ecx) + adcl $0, %edx + movl %edx, %eax + + ret +EPILOGUE() + + ALIGN(16) +PROLOGUE(M4_function_1c) + movl PARAM_SIZE, %edx + movl PARAM_SRC, %eax + + decl %edx + jnz L(more_than_one_limb) + + movl (%eax), %eax + movl PARAM_DST, %ecx + + mull PARAM_MULTIPLIER + + addl PARAM_CARRY, %eax + + adcl $0, %edx + M4_inst %eax, (%ecx) + + adcl $0, %edx + movl %edx, %eax + + ret + + + C offset 0x44 so close enough to aligned +L(more_than_one_limb): + movl PARAM_CARRY, %ecx +L(start_1): + C eax src + C ecx initial carry + C edx size-1 + subl $SAVE_SIZE, %esp +deflit(`FRAME',16) + + movl %ebx, SAVE_EBX + movl %esi, SAVE_ESI + movl %edx, %ebx C size-1 + + movl PARAM_SRC, %esi + movl %ebp, SAVE_EBP + cmpl $UNROLL_THRESHOLD, %edx + + movl PARAM_MULTIPLIER, %ebp + movl %edi, SAVE_EDI + + movl (%esi), %eax C src low limb + movl PARAM_DST, %edi + ja L(unroll) + + + C simple loop + + leal 4(%esi,%ebx,4), %esi C point one limb past last + leal (%edi,%ebx,4), %edi C point at last limb + negl %ebx + + C The movl to load the next source limb is done well ahead of the + C mul. This is necessary for full speed, and leads to one limb + C handled separately at the end. + +L(simple): + C eax src limb + C ebx loop counter + C ecx carry limb + C edx scratch + C esi src + C edi dst + C ebp multiplier + + mull %ebp + + addl %eax, %ecx + adcl $0, %edx + + M4_inst %ecx, (%edi,%ebx,4) + movl (%esi,%ebx,4), %eax + adcl $0, %edx + + incl %ebx + movl %edx, %ecx + jnz L(simple) + + + mull %ebp + + movl SAVE_EBX, %ebx + movl SAVE_ESI, %esi + movl SAVE_EBP, %ebp + + addl %eax, %ecx + adcl $0, %edx + + M4_inst %ecx, (%edi) + adcl $0, %edx + movl SAVE_EDI, %edi + + addl $SAVE_SIZE, %esp + movl %edx, %eax + ret + + + +C ----------------------------------------------------------------------------- + ALIGN(16) +L(unroll): + C eax src low limb + C ebx size-1 + C ecx carry + C edx size-1 + C esi src + C edi dst + C ebp multiplier + +dnl overlapping with parameters no longer needed +define(VAR_COUNTER,`PARAM_SIZE') +define(VAR_JUMP, `PARAM_MULTIPLIER') + + subl $2, %ebx C (size-2)-1 + decl %edx C size-2 + + shrl $UNROLL_LOG2, %ebx + negl %edx + + movl %ebx, VAR_COUNTER + andl $UNROLL_MASK, %edx + + movl %edx, %ebx + shll $4, %edx + +ifdef(`PIC',` + call L(pic_calc) +L(here): +',` + leal L(entry) (%edx,%ebx,1), %edx +') + negl %ebx + movl %edx, VAR_JUMP + + mull %ebp + + addl %eax, %ecx C initial carry, becomes low carry + adcl $0, %edx + testb $1, %bl + + movl 4(%esi), %eax C src second limb + leal ifelse(UNROLL_BYTES,256,128+) 8(%esi,%ebx,4), %esi + leal ifelse(UNROLL_BYTES,256,128) (%edi,%ebx,4), %edi + + movl %edx, %ebx C high carry + cmovnz( %ecx, %ebx) C high,low carry other way around + cmovnz( %edx, %ecx) + + jmp *VAR_JUMP + + +ifdef(`PIC',` +L(pic_calc): + C See README.family about old gas bugs + leal (%edx,%ebx,1), %edx + addl $L(entry)-L(here), %edx + addl (%esp), %edx + ret +') + + +C ----------------------------------------------------------------------------- +C This code uses a "two carry limbs" scheme. At the top of the loop the +C carries are ebx=lo, ecx=hi, then they swap for each limb processed. For +C the computed jump an odd size means they start one way around, an even +C size the other. Either way one limb is handled separately at the start of +C the loop. +C +C The positioning of the movl to load the next source limb is important. +C Moving it after the adcl with a view to avoiding a separate mul at the end +C of the loop slows the code down. + + ALIGN(32) +L(top): + C eax src limb + C ebx carry high + C ecx carry low + C edx scratch + C esi src+8 + C edi dst + C ebp multiplier + C + C VAR_COUNTER loop counter + C + C 17 bytes each limb + +L(entry): +deflit(CHUNK_COUNT,2) +forloop(`i', 0, UNROLL_COUNT/CHUNK_COUNT-1, ` + deflit(`disp0', eval(i*CHUNK_COUNT*4 ifelse(UNROLL_BYTES,256,-128))) + deflit(`disp1', eval(disp0 + 4)) + + mull %ebp + +Zdisp( M4_inst,%ecx, disp0,(%edi)) + movl $0, %ecx + + adcl %eax, %ebx + +Zdisp( movl, disp0,(%esi), %eax) + adcl %edx, %ecx + + + mull %ebp + + M4_inst %ebx, disp1(%edi) + movl $0, %ebx + + adcl %eax, %ecx + + movl disp1(%esi), %eax + adcl %edx, %ebx +') + + decl VAR_COUNTER + leal UNROLL_BYTES(%esi), %esi + leal UNROLL_BYTES(%edi), %edi + + jns L(top) + + + C eax src limb + C ebx carry high + C ecx carry low + C edx + C esi + C edi dst (points at second last limb) + C ebp multiplier +deflit(`disp0', ifelse(UNROLL_BYTES,256,-128)) +deflit(`disp1', eval(disp0-0 + 4)) + + mull %ebp + + M4_inst %ecx, disp0(%edi) + movl SAVE_EBP, %ebp + + adcl %ebx, %eax + movl SAVE_EBX, %ebx + movl SAVE_ESI, %esi + + adcl $0, %edx + M4_inst %eax, disp1(%edi) + movl SAVE_EDI, %edi + + adcl $0, %edx + addl $SAVE_SIZE, %esp + + movl %edx, %eax + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/k7/diveby3.asm b/rts/gmp/mpn/x86/k7/diveby3.asm new file mode 100644 index 0000000000..57684958a5 --- /dev/null +++ b/rts/gmp/mpn/x86/k7/diveby3.asm @@ -0,0 +1,131 @@ +dnl AMD K7 mpn_divexact_by3 -- mpn division by 3, expecting no remainder. +dnl +dnl K7: 8.0 cycles/limb + + +dnl Copyright (C) 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +C mp_limb_t mpn_divexact_by3c (mp_ptr dst, mp_srcptr src, mp_size_t size, +C mp_limb_t carry); + +defframe(PARAM_CARRY,16) +defframe(PARAM_SIZE, 12) +defframe(PARAM_SRC, 8) +defframe(PARAM_DST, 4) + +dnl multiplicative inverse of 3, modulo 2^32 +deflit(INVERSE_3, 0xAAAAAAAB) + +dnl ceil(b/3) and floor(b*2/3) where b=2^32 +deflit(ONE_THIRD_CEIL, 0x55555556) +deflit(TWO_THIRDS_FLOOR, 0xAAAAAAAA) + + .text + ALIGN(32) + +PROLOGUE(mpn_divexact_by3c) +deflit(`FRAME',0) + + movl PARAM_SRC, %ecx + pushl %ebx defframe_pushl(SAVE_EBX) + + movl PARAM_CARRY, %ebx + pushl %ebp defframe_pushl(SAVE_EBP) + + movl PARAM_SIZE, %ebp + pushl %edi defframe_pushl(SAVE_EDI) + + movl (%ecx), %eax C src low limb + pushl %esi defframe_pushl(SAVE_ESI) + + movl PARAM_DST, %edi + movl $TWO_THIRDS_FLOOR, %esi + leal -4(%ecx,%ebp,4), %ecx C &src[size-1] + + subl %ebx, %eax + + setc %bl + decl %ebp + jz L(last) + + leal (%edi,%ebp,4), %edi C &dst[size-1] + negl %ebp + + + ALIGN(16) +L(top): + C eax src limb, carry subtracted + C ebx carry limb (0 or 1) + C ecx &src[size-1] + C edx scratch + C esi TWO_THIRDS_FLOOR + C edi &dst[size-1] + C ebp counter, limbs, negative + + imull $INVERSE_3, %eax, %edx + + movl 4(%ecx,%ebp,4), %eax C next src limb + cmpl $ONE_THIRD_CEIL, %edx + + sbbl $-1, %ebx C +1 if result>=ceil(b/3) + cmpl %edx, %esi + + sbbl %ebx, %eax C and further 1 if result>=ceil(b*2/3) + movl %edx, (%edi,%ebp,4) + incl %ebp + + setc %bl C new carry + jnz L(top) + + + +L(last): + C eax src limb, carry subtracted + C ebx carry limb (0 or 1) + C ecx &src[size-1] + C edx scratch + C esi multiplier + C edi &dst[size-1] + C ebp + + imull $INVERSE_3, %eax + + cmpl $ONE_THIRD_CEIL, %eax + movl %eax, (%edi) + movl SAVE_EBP, %ebp + + sbbl $-1, %ebx C +1 if eax>=ceil(b/3) + cmpl %eax, %esi + movl $0, %eax + + adcl %ebx, %eax C further +1 if eax>=ceil(b*2/3) + movl SAVE_EDI, %edi + movl SAVE_ESI, %esi + + movl SAVE_EBX, %ebx + addl $FRAME, %esp + + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/k7/gmp-mparam.h b/rts/gmp/mpn/x86/k7/gmp-mparam.h new file mode 100644 index 0000000000..c3bba0afc4 --- /dev/null +++ b/rts/gmp/mpn/x86/k7/gmp-mparam.h @@ -0,0 +1,100 @@ +/* AMD K7 gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright (C) 1991, 1993, 1994, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#define BITS_PER_MP_LIMB 32 +#define BYTES_PER_MP_LIMB 4 +#define BITS_PER_LONGINT 32 +#define BITS_PER_INT 32 +#define BITS_PER_SHORTINT 16 +#define BITS_PER_CHAR 8 + + +/* the low limb is ready after 4 cycles, but normally it's the high limb + which is of interest, and that comes out after 6 cycles */ +#ifndef UMUL_TIME +#define UMUL_TIME 6 /* cycles */ +#endif + +/* AMD doco says 40, but it measures 39 back-to-back */ +#ifndef UDIV_TIME +#define UDIV_TIME 39 /* cycles */ +#endif + +/* using bsf */ +#ifndef COUNT_TRAILING_ZEROS_TIME +#define COUNT_TRAILING_ZEROS_TIME 7 /* cycles */ +#endif + + +/* Generated by tuneup.c, 2000-07-06. */ + +#ifndef KARATSUBA_MUL_THRESHOLD +#define KARATSUBA_MUL_THRESHOLD 26 +#endif +#ifndef TOOM3_MUL_THRESHOLD +#define TOOM3_MUL_THRESHOLD 177 +#endif + +#ifndef KARATSUBA_SQR_THRESHOLD +#define KARATSUBA_SQR_THRESHOLD 52 +#endif +#ifndef TOOM3_SQR_THRESHOLD +#define TOOM3_SQR_THRESHOLD 173 +#endif + +#ifndef BZ_THRESHOLD +#define BZ_THRESHOLD 76 +#endif + +#ifndef FIB_THRESHOLD +#define FIB_THRESHOLD 114 +#endif + +#ifndef POWM_THRESHOLD +#define POWM_THRESHOLD 34 +#endif + +#ifndef GCD_ACCEL_THRESHOLD +#define GCD_ACCEL_THRESHOLD 5 +#endif +#ifndef GCDEXT_THRESHOLD +#define GCDEXT_THRESHOLD 54 +#endif + +#ifndef FFT_MUL_TABLE +#define FFT_MUL_TABLE { 720, 1440, 2944, 7680, 18432, 57344, 0 } +#endif +#ifndef FFT_MODF_MUL_THRESHOLD +#define FFT_MODF_MUL_THRESHOLD 736 +#endif +#ifndef FFT_MUL_THRESHOLD +#define FFT_MUL_THRESHOLD 6912 +#endif + +#ifndef FFT_SQR_TABLE +#define FFT_SQR_TABLE { 784, 1696, 3200, 7680, 18432, 57344, 0 } +#endif +#ifndef FFT_MODF_SQR_THRESHOLD +#define FFT_MODF_SQR_THRESHOLD 800 +#endif +#ifndef FFT_SQR_THRESHOLD +#define FFT_SQR_THRESHOLD 8448 +#endif diff --git a/rts/gmp/mpn/x86/k7/mmx/copyd.asm b/rts/gmp/mpn/x86/k7/mmx/copyd.asm new file mode 100644 index 0000000000..33214daa1f --- /dev/null +++ b/rts/gmp/mpn/x86/k7/mmx/copyd.asm @@ -0,0 +1,136 @@ +dnl AMD K7 mpn_copyd -- copy limb vector, decrementing. +dnl +dnl alignment dst/src, A=0mod8 N=4mod8 +dnl A/A A/N N/A N/N +dnl K7 0.75 1.0 1.0 0.75 + + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +C void mpn_copyd (mp_ptr dst, mp_srcptr src, mp_size_t size); +C +C The various comments in mpn/x86/k7/copyi.asm apply here too. + +defframe(PARAM_SIZE,12) +defframe(PARAM_SRC, 8) +defframe(PARAM_DST, 4) +deflit(`FRAME',0) + +dnl parameter space reused +define(SAVE_EBX,`PARAM_SIZE') +define(SAVE_ESI,`PARAM_SRC') + +dnl minimum 5 since the unrolled code can't handle less than 5 +deflit(UNROLL_THRESHOLD, 5) + + .text + ALIGN(32) +PROLOGUE(mpn_copyd) + + movl PARAM_SIZE, %ecx + movl %ebx, SAVE_EBX + + movl PARAM_SRC, %eax + movl PARAM_DST, %edx + + cmpl $UNROLL_THRESHOLD, %ecx + jae L(unroll) + + orl %ecx, %ecx + jz L(simple_done) + +L(simple): + C eax src + C ebx scratch + C ecx counter + C edx dst + C + C this loop is 2 cycles/limb + + movl -4(%eax,%ecx,4), %ebx + movl %ebx, -4(%edx,%ecx,4) + decl %ecx + jnz L(simple) + +L(simple_done): + movl SAVE_EBX, %ebx + ret + + +L(unroll): + movl %esi, SAVE_ESI + leal (%eax,%ecx,4), %ebx + leal (%edx,%ecx,4), %esi + + andl %esi, %ebx + movl SAVE_ESI, %esi + subl $4, %ecx C size-4 + + testl $4, %ebx C testl to pad code closer to 16 bytes for L(top) + jz L(aligned) + + C both src and dst unaligned, process one limb to align them + movl 12(%eax,%ecx,4), %ebx + movl %ebx, 12(%edx,%ecx,4) + decl %ecx +L(aligned): + + + ALIGN(16) +L(top): + C eax src + C ebx + C ecx counter, limbs + C edx dst + + movq 8(%eax,%ecx,4), %mm0 + movq (%eax,%ecx,4), %mm1 + subl $4, %ecx + movq %mm0, 16+8(%edx,%ecx,4) + movq %mm1, 16(%edx,%ecx,4) + jns L(top) + + + C now %ecx is -4 to -1 representing respectively 0 to 3 limbs remaining + + testb $2, %cl + jz L(finish_not_two) + + movq 8(%eax,%ecx,4), %mm0 + movq %mm0, 8(%edx,%ecx,4) +L(finish_not_two): + + testb $1, %cl + jz L(done) + + movl (%eax), %ebx + movl %ebx, (%edx) + +L(done): + movl SAVE_EBX, %ebx + emms + ret + + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/k7/mmx/copyi.asm b/rts/gmp/mpn/x86/k7/mmx/copyi.asm new file mode 100644 index 0000000000..b234a1628c --- /dev/null +++ b/rts/gmp/mpn/x86/k7/mmx/copyi.asm @@ -0,0 +1,147 @@ +dnl AMD K7 mpn_copyi -- copy limb vector, incrementing. +dnl +dnl alignment dst/src, A=0mod8 N=4mod8 +dnl A/A A/N N/A N/N +dnl K7 0.75 1.0 1.0 0.75 + + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +C void mpn_copyi (mp_ptr dst, mp_srcptr src, mp_size_t size); +C +C Copy src,size to dst,size. +C +C This code at 0.75 or 1.0 c/l is always faster than a plain rep movsl at +C 1.33 c/l. +C +C The K7 can do two loads, or two stores, or a load and a store, in one +C cycle, so if those are 64-bit operations then 0.5 c/l should be possible, +C however nothing under 0.7 c/l is known. +C +C If both source and destination are unaligned then one limb is processed at +C the start to make them aligned and so get 0.75 c/l, whereas if they'd been +C used unaligned it would be 1.5 c/l. + +defframe(PARAM_SIZE,12) +defframe(PARAM_SRC, 8) +defframe(PARAM_DST, 4) + +dnl parameter space reused +define(SAVE_EBX,`PARAM_SIZE') + +dnl minimum 5 since the unrolled code can't handle less than 5 +deflit(UNROLL_THRESHOLD, 5) + + .text + ALIGN(32) +PROLOGUE(mpn_copyi) +deflit(`FRAME',0) + + movl PARAM_SIZE, %ecx + movl %ebx, SAVE_EBX + + movl PARAM_SRC, %eax + movl PARAM_DST, %edx + + cmpl $UNROLL_THRESHOLD, %ecx + jae L(unroll) + + orl %ecx, %ecx + jz L(simple_done) + +L(simple): + C eax src, incrementing + C ebx scratch + C ecx counter + C edx dst, incrementing + C + C this loop is 2 cycles/limb + + movl (%eax), %ebx + movl %ebx, (%edx) + decl %ecx + leal 4(%eax), %eax + leal 4(%edx), %edx + jnz L(simple) + +L(simple_done): + movl SAVE_EBX, %ebx + ret + + +L(unroll): + movl %eax, %ebx + leal -12(%eax,%ecx,4), %eax C src end - 12 + subl $3, %ecx C size-3 + + andl %edx, %ebx + leal (%edx,%ecx,4), %edx C dst end - 12 + negl %ecx + + testl $4, %ebx C testl to pad code closer to 16 bytes for L(top) + jz L(aligned) + + C both src and dst unaligned, process one limb to align them + movl (%eax,%ecx,4), %ebx + movl %ebx, (%edx,%ecx,4) + incl %ecx +L(aligned): + + + ALIGN(16) +L(top): + C eax src end - 12 + C ebx + C ecx counter, negative, limbs + C edx dst end - 12 + + movq (%eax,%ecx,4), %mm0 + movq 8(%eax,%ecx,4), %mm1 + addl $4, %ecx + movq %mm0, -16(%edx,%ecx,4) + movq %mm1, -16+8(%edx,%ecx,4) + ja L(top) C jump no carry and not zero + + + C now %ecx is 0 to 3 representing respectively 3 to 0 limbs remaining + + testb $2, %cl + jnz L(finish_not_two) + + movq (%eax,%ecx,4), %mm0 + movq %mm0, (%edx,%ecx,4) +L(finish_not_two): + + testb $1, %cl + jnz L(done) + + movl 8(%eax), %ebx + movl %ebx, 8(%edx) + +L(done): + movl SAVE_EBX, %ebx + emms + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/k7/mmx/divrem_1.asm b/rts/gmp/mpn/x86/k7/mmx/divrem_1.asm new file mode 100644 index 0000000000..483ad6a9a1 --- /dev/null +++ b/rts/gmp/mpn/x86/k7/mmx/divrem_1.asm @@ -0,0 +1,718 @@ +dnl AMD K7 mpn_divrem_1 -- mpn by limb division. +dnl +dnl K7: 17.0 cycles/limb integer part, 15.0 cycles/limb fraction part. + + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +C mp_limb_t mpn_divrem_1 (mp_ptr dst, mp_size_t xsize, +C mp_srcptr src, mp_size_t size, +C mp_limb_t divisor); +C mp_limb_t mpn_divrem_1c (mp_ptr dst, mp_size_t xsize, +C mp_srcptr src, mp_size_t size, +C mp_limb_t divisor, mp_limb_t carry); +C +C The method and nomenclature follow part 8 of "Division by Invariant +C Integers using Multiplication" by Granlund and Montgomery, reference in +C gmp.texi. +C +C The "and"s shown in the paper are done here with "cmov"s. "m" is written +C for m', and "d" for d_norm, which won't cause any confusion since it's +C only the normalized divisor that's of any use in the code. "b" is written +C for 2^N, the size of a limb, N being 32 here. +C +C mpn_divrem_1 avoids one division if the src high limb is less than the +C divisor. mpn_divrem_1c doesn't check for a zero carry, since in normal +C circumstances that will be a very rare event. +C +C There's a small bias towards expecting xsize==0, by having code for +C xsize==0 in a straight line and xsize!=0 under forward jumps. + + +dnl MUL_THRESHOLD is the value of xsize+size at which the multiply by +dnl inverse method is used, rather than plain "divl"s. Minimum value 1. +dnl +dnl The inverse takes about 50 cycles to calculate, but after that the +dnl multiply is 17 c/l versus division at 42 c/l. +dnl +dnl At 3 limbs the mul is a touch faster than div on the integer part, and +dnl even more so on the fractional part. + +deflit(MUL_THRESHOLD, 3) + + +defframe(PARAM_CARRY, 24) +defframe(PARAM_DIVISOR,20) +defframe(PARAM_SIZE, 16) +defframe(PARAM_SRC, 12) +defframe(PARAM_XSIZE, 8) +defframe(PARAM_DST, 4) + +defframe(SAVE_EBX, -4) +defframe(SAVE_ESI, -8) +defframe(SAVE_EDI, -12) +defframe(SAVE_EBP, -16) + +defframe(VAR_NORM, -20) +defframe(VAR_INVERSE, -24) +defframe(VAR_SRC, -28) +defframe(VAR_DST, -32) +defframe(VAR_DST_STOP,-36) + +deflit(STACK_SPACE, 36) + + .text + ALIGN(32) + +PROLOGUE(mpn_divrem_1c) +deflit(`FRAME',0) + movl PARAM_CARRY, %edx + movl PARAM_SIZE, %ecx + subl $STACK_SPACE, %esp +deflit(`FRAME',STACK_SPACE) + + movl %ebx, SAVE_EBX + movl PARAM_XSIZE, %ebx + + movl %edi, SAVE_EDI + movl PARAM_DST, %edi + + movl %ebp, SAVE_EBP + movl PARAM_DIVISOR, %ebp + + movl %esi, SAVE_ESI + movl PARAM_SRC, %esi + + leal -4(%edi,%ebx,4), %edi + jmp LF(mpn_divrem_1,start_1c) + +EPILOGUE() + + + C offset 0x31, close enough to aligned +PROLOGUE(mpn_divrem_1) +deflit(`FRAME',0) + + movl PARAM_SIZE, %ecx + movl $0, %edx C initial carry (if can't skip a div) + subl $STACK_SPACE, %esp +deflit(`FRAME',STACK_SPACE) + + movl %ebp, SAVE_EBP + movl PARAM_DIVISOR, %ebp + + movl %ebx, SAVE_EBX + movl PARAM_XSIZE, %ebx + + movl %esi, SAVE_ESI + movl PARAM_SRC, %esi + orl %ecx, %ecx + + movl %edi, SAVE_EDI + movl PARAM_DST, %edi + leal -4(%edi,%ebx,4), %edi C &dst[xsize-1] + + jz L(no_skip_div) + movl -4(%esi,%ecx,4), %eax C src high limb + + cmpl %ebp, %eax C one less div if high<divisor + jnb L(no_skip_div) + + movl $0, (%edi,%ecx,4) C dst high limb + decl %ecx C size-1 + movl %eax, %edx C src high limb as initial carry +L(no_skip_div): + + +L(start_1c): + C eax + C ebx xsize + C ecx size + C edx carry + C esi src + C edi &dst[xsize-1] + C ebp divisor + + leal (%ebx,%ecx), %eax C size+xsize + cmpl $MUL_THRESHOLD, %eax + jae L(mul_by_inverse) + + +C With MUL_THRESHOLD set to 3, the simple loops here only do 0 to 2 limbs. +C It'd be possible to write them out without the looping, but no speedup +C would be expected. +C +C Using PARAM_DIVISOR instead of %ebp measures 1 cycle/loop faster on the +C integer part, but curiously not on the fractional part, where %ebp is a +C (fixed) couple of cycles faster. + + orl %ecx, %ecx + jz L(divide_no_integer) + +L(divide_integer): + C eax scratch (quotient) + C ebx xsize + C ecx counter + C edx scratch (remainder) + C esi src + C edi &dst[xsize-1] + C ebp divisor + + movl -4(%esi,%ecx,4), %eax + + divl PARAM_DIVISOR + + movl %eax, (%edi,%ecx,4) + decl %ecx + jnz L(divide_integer) + + +L(divide_no_integer): + movl PARAM_DST, %edi + orl %ebx, %ebx + jnz L(divide_fraction) + +L(divide_done): + movl SAVE_ESI, %esi + movl SAVE_EDI, %edi + movl %edx, %eax + + movl SAVE_EBX, %ebx + movl SAVE_EBP, %ebp + addl $STACK_SPACE, %esp + + ret + + +L(divide_fraction): + C eax scratch (quotient) + C ebx counter + C ecx + C edx scratch (remainder) + C esi + C edi dst + C ebp divisor + + movl $0, %eax + + divl %ebp + + movl %eax, -4(%edi,%ebx,4) + decl %ebx + jnz L(divide_fraction) + + jmp L(divide_done) + + + +C ----------------------------------------------------------------------------- + +L(mul_by_inverse): + C eax + C ebx xsize + C ecx size + C edx carry + C esi src + C edi &dst[xsize-1] + C ebp divisor + + bsrl %ebp, %eax C 31-l + + leal 12(%edi), %ebx + leal 4(%edi,%ecx,4), %edi C &dst[xsize+size] + + movl %edi, VAR_DST + movl %ebx, VAR_DST_STOP + + movl %ecx, %ebx C size + movl $31, %ecx + + movl %edx, %edi C carry + movl $-1, %edx + + C + + xorl %eax, %ecx C l + incl %eax C 32-l + + shll %cl, %ebp C d normalized + movl %ecx, VAR_NORM + + movd %eax, %mm7 + + movl $-1, %eax + subl %ebp, %edx C (b-d)-1 giving edx:eax = b*(b-d)-1 + + divl %ebp C floor (b*(b-d)-1) / d + + orl %ebx, %ebx C size + movl %eax, VAR_INVERSE + leal -12(%esi,%ebx,4), %eax C &src[size-3] + + jz L(start_zero) + movl %eax, VAR_SRC + cmpl $1, %ebx + + movl 8(%eax), %esi C src high limb + jz L(start_one) + +L(start_two_or_more): + movl 4(%eax), %edx C src second highest limb + + shldl( %cl, %esi, %edi) C n2 = carry,high << l + + shldl( %cl, %edx, %esi) C n10 = high,second << l + + cmpl $2, %ebx + je L(integer_two_left) + jmp L(integer_top) + + +L(start_one): + shldl( %cl, %esi, %edi) C n2 = carry,high << l + + shll %cl, %esi C n10 = high << l + movl %eax, VAR_SRC + jmp L(integer_one_left) + + +L(start_zero): + shll %cl, %edi C n2 = carry << l + movl $0, %esi C n10 = 0 + + C we're here because xsize+size>=MUL_THRESHOLD, so with size==0 then + C must have xsize!=0 + jmp L(fraction_some) + + + +C ----------------------------------------------------------------------------- +C +C The multiply by inverse loop is 17 cycles, and relies on some out-of-order +C execution. The instruction scheduling is important, with various +C apparently equivalent forms running 1 to 5 cycles slower. +C +C A lower bound for the time would seem to be 16 cycles, based on the +C following successive dependencies. +C +C cycles +C n2+n1 1 +C mul 6 +C q1+1 1 +C mul 6 +C sub 1 +C addback 1 +C --- +C 16 +C +C This chain is what the loop has already, but 16 cycles isn't achieved. +C K7 has enough decode, and probably enough execute (depending maybe on what +C a mul actually consumes), but nothing running under 17 has been found. +C +C In theory n2+n1 could be done in the sub and addback stages (by +C calculating both n2 and n2+n1 there), but lack of registers makes this an +C unlikely proposition. +C +C The jz in the loop keeps the q1+1 stage to 1 cycle. Handling an overflow +C from q1+1 with an "sbbl $0, %ebx" would add a cycle to the dependent +C chain, and nothing better than 18 cycles has been found when using it. +C The jump is taken only when q1 is 0xFFFFFFFF, and on random data this will +C be an extremely rare event. +C +C Branch mispredictions will hit random occurrances of q1==0xFFFFFFFF, but +C if some special data is coming out with this always, the q1_ff special +C case actually runs at 15 c/l. 0x2FFF...FFFD divided by 3 is a good way to +C induce the q1_ff case, for speed measurements or testing. Note that +C 0xFFF...FFF divided by 1 or 2 doesn't induce it. +C +C The instruction groupings and empty comments show the cycles for a naive +C in-order view of the code (conveniently ignoring the load latency on +C VAR_INVERSE). This shows some of where the time is going, but is nonsense +C to the extent that out-of-order execution rearranges it. In this case +C there's 19 cycles shown, but it executes at 17. + + ALIGN(16) +L(integer_top): + C eax scratch + C ebx scratch (nadj, q1) + C ecx scratch (src, dst) + C edx scratch + C esi n10 + C edi n2 + C ebp divisor + C + C mm0 scratch (src qword) + C mm7 rshift for normalization + + cmpl $0x80000000, %esi C n1 as 0=c, 1=nc + movl %edi, %eax C n2 + movl VAR_SRC, %ecx + + leal (%ebp,%esi), %ebx + cmovc( %esi, %ebx) C nadj = n10 + (-n1 & d), ignoring overflow + sbbl $-1, %eax C n2+n1 + + mull VAR_INVERSE C m*(n2+n1) + + movq (%ecx), %mm0 C next limb and the one below it + subl $4, %ecx + + movl %ecx, VAR_SRC + + C + + addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag + leal 1(%edi), %ebx C n2<<32 + m*(n2+n1)) + movl %ebp, %eax C d + + C + + adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1 + jz L(q1_ff) + movl VAR_DST, %ecx + + mull %ebx C (q1+1)*d + + psrlq %mm7, %mm0 + + leal -4(%ecx), %ecx + + C + + subl %eax, %esi + movl VAR_DST_STOP, %eax + + C + + sbbl %edx, %edi C n - (q1+1)*d + movl %esi, %edi C remainder -> n2 + leal (%ebp,%esi), %edx + + movd %mm0, %esi + + cmovc( %edx, %edi) C n - q1*d if underflow from using q1+1 + sbbl $0, %ebx C q + cmpl %eax, %ecx + + movl %ebx, (%ecx) + movl %ecx, VAR_DST + jne L(integer_top) + + +L(integer_loop_done): + + +C ----------------------------------------------------------------------------- +C +C Here, and in integer_one_left below, an sbbl $0 is used rather than a jz +C q1_ff special case. This make the code a bit smaller and simpler, and +C costs only 1 cycle (each). + +L(integer_two_left): + C eax scratch + C ebx scratch (nadj, q1) + C ecx scratch (src, dst) + C edx scratch + C esi n10 + C edi n2 + C ebp divisor + C + C mm0 src limb, shifted + C mm7 rshift + + cmpl $0x80000000, %esi C n1 as 0=c, 1=nc + movl %edi, %eax C n2 + movl PARAM_SRC, %ecx + + leal (%ebp,%esi), %ebx + cmovc( %esi, %ebx) C nadj = n10 + (-n1 & d), ignoring overflow + sbbl $-1, %eax C n2+n1 + + mull VAR_INVERSE C m*(n2+n1) + + movd (%ecx), %mm0 C src low limb + + movl VAR_DST_STOP, %ecx + + C + + addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag + leal 1(%edi), %ebx C n2<<32 + m*(n2+n1)) + movl %ebp, %eax C d + + adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1 + + sbbl $0, %ebx + + mull %ebx C (q1+1)*d + + psllq $32, %mm0 + + psrlq %mm7, %mm0 + + C + + subl %eax, %esi + + C + + sbbl %edx, %edi C n - (q1+1)*d + movl %esi, %edi C remainder -> n2 + leal (%ebp,%esi), %edx + + movd %mm0, %esi + + cmovc( %edx, %edi) C n - q1*d if underflow from using q1+1 + sbbl $0, %ebx C q + + movl %ebx, -4(%ecx) + + +C ----------------------------------------------------------------------------- +L(integer_one_left): + C eax scratch + C ebx scratch (nadj, q1) + C ecx dst + C edx scratch + C esi n10 + C edi n2 + C ebp divisor + C + C mm0 src limb, shifted + C mm7 rshift + + movl VAR_DST_STOP, %ecx + cmpl $0x80000000, %esi C n1 as 0=c, 1=nc + movl %edi, %eax C n2 + + leal (%ebp,%esi), %ebx + cmovc( %esi, %ebx) C nadj = n10 + (-n1 & d), ignoring overflow + sbbl $-1, %eax C n2+n1 + + mull VAR_INVERSE C m*(n2+n1) + + C + + C + + C + + addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag + leal 1(%edi), %ebx C n2<<32 + m*(n2+n1)) + movl %ebp, %eax C d + + C + + adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1 + + sbbl $0, %ebx C q1 if q1+1 overflowed + + mull %ebx + + C + + C + + C + + subl %eax, %esi + + C + + sbbl %edx, %edi C n - (q1+1)*d + movl %esi, %edi C remainder -> n2 + leal (%ebp,%esi), %edx + + cmovc( %edx, %edi) C n - q1*d if underflow from using q1+1 + sbbl $0, %ebx C q + + movl %ebx, -8(%ecx) + subl $8, %ecx + + + +L(integer_none): + cmpl $0, PARAM_XSIZE + jne L(fraction_some) + + movl %edi, %eax +L(fraction_done): + movl VAR_NORM, %ecx + movl SAVE_EBP, %ebp + + movl SAVE_EDI, %edi + movl SAVE_ESI, %esi + + movl SAVE_EBX, %ebx + addl $STACK_SPACE, %esp + + shrl %cl, %eax + emms + + ret + + +C ----------------------------------------------------------------------------- +C +C Special case for q1=0xFFFFFFFF, giving q=0xFFFFFFFF meaning the low dword +C of q*d is simply -d and the remainder n-q*d = n10+d + +L(q1_ff): + C eax (divisor) + C ebx (q1+1 == 0) + C ecx + C edx + C esi n10 + C edi n2 + C ebp divisor + + movl VAR_DST, %ecx + movl VAR_DST_STOP, %edx + subl $4, %ecx + + psrlq %mm7, %mm0 + leal (%ebp,%esi), %edi C n-q*d remainder -> next n2 + movl %ecx, VAR_DST + + movd %mm0, %esi C next n10 + + movl $-1, (%ecx) + cmpl %ecx, %edx + jne L(integer_top) + + jmp L(integer_loop_done) + + + +C ----------------------------------------------------------------------------- +C +C Being the fractional part, the "source" limbs are all zero, meaning +C n10=0, n1=0, and hence nadj=0, leading to many instructions eliminated. +C +C The loop runs at 15 cycles. The dependent chain is the same as the +C general case above, but without the n2+n1 stage (due to n1==0), so 15 +C would seem to be the lower bound. +C +C A not entirely obvious simplification is that q1+1 never overflows a limb, +C and so there's no need for the sbbl $0 or jz q1_ff from the general case. +C q1 is the high word of m*n2+b*n2 and the following shows q1<=b-2 always. +C rnd() means rounding down to a multiple of d. +C +C m*n2 + b*n2 <= m*(d-1) + b*(d-1) +C = m*d + b*d - m - b +C = floor((b(b-d)-1)/d)*d + b*d - m - b +C = rnd(b(b-d)-1) + b*d - m - b +C = rnd(b(b-d)-1 + b*d) - m - b +C = rnd(b*b-1) - m - b +C <= (b-2)*b +C +C Unchanged from the general case is that the final quotient limb q can be +C either q1 or q1+1, and the q1+1 case occurs often. This can be seen from +C equation 8.4 of the paper which simplifies as follows when n1==0 and +C n0==0. +C +C n-q1*d = (n2*k+q0*d)/b <= d + (d*d-2d)/b +C +C As before, the instruction groupings and empty comments show a naive +C in-order view of the code, which is made a nonsense by out of order +C execution. There's 17 cycles shown, but it executes at 15. +C +C Rotating the store q and remainder->n2 instructions up to the top of the +C loop gets the run time down from 16 to 15. + + ALIGN(16) +L(fraction_some): + C eax + C ebx + C ecx + C edx + C esi + C edi carry + C ebp divisor + + movl PARAM_DST, %esi + movl VAR_DST_STOP, %ecx + movl %edi, %eax + + subl $8, %ecx + + jmp L(fraction_entry) + + + ALIGN(16) +L(fraction_top): + C eax n2 carry, then scratch + C ebx scratch (nadj, q1) + C ecx dst, decrementing + C edx scratch + C esi dst stop point + C edi (will be n2) + C ebp divisor + + movl %ebx, (%ecx) C previous q + movl %eax, %edi C remainder->n2 + +L(fraction_entry): + mull VAR_INVERSE C m*n2 + + movl %ebp, %eax C d + subl $4, %ecx C dst + leal 1(%edi), %ebx + + C + + C + + C + + C + + addl %edx, %ebx C 1 + high(n2<<32 + m*n2) = q1+1 + + mull %ebx C (q1+1)*d + + C + + C + + C + + negl %eax C low of n - (q1+1)*d + + C + + sbbl %edx, %edi C high of n - (q1+1)*d, caring only about carry + leal (%ebp,%eax), %edx + + cmovc( %edx, %eax) C n - q1*d if underflow from using q1+1 + sbbl $0, %ebx C q + cmpl %esi, %ecx + + jne L(fraction_top) + + + movl %ebx, (%ecx) + jmp L(fraction_done) + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/k7/mmx/lshift.asm b/rts/gmp/mpn/x86/k7/mmx/lshift.asm new file mode 100644 index 0000000000..4d17c881ec --- /dev/null +++ b/rts/gmp/mpn/x86/k7/mmx/lshift.asm @@ -0,0 +1,472 @@ +dnl AMD K7 mpn_lshift -- mpn left shift. +dnl +dnl K7: 1.21 cycles/limb (at 16 limbs/loop). + + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +dnl K7: UNROLL_COUNT cycles/limb +dnl 4 1.51 +dnl 8 1.26 +dnl 16 1.21 +dnl 32 1.2 +dnl Maximum possible with the current code is 64. + +deflit(UNROLL_COUNT, 16) + + +C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size, +C unsigned shift); +C +C Shift src,size left by shift many bits and store the result in dst,size. +C Zeros are shifted in at the right. The bits shifted out at the left are +C the return value. +C +C The comments in mpn_rshift apply here too. + +ifdef(`PIC',` +deflit(UNROLL_THRESHOLD, 10) +',` +deflit(UNROLL_THRESHOLD, 10) +') + +defframe(PARAM_SHIFT,16) +defframe(PARAM_SIZE, 12) +defframe(PARAM_SRC, 8) +defframe(PARAM_DST, 4) + +defframe(SAVE_EDI, -4) +defframe(SAVE_ESI, -8) +defframe(SAVE_EBX, -12) +deflit(SAVE_SIZE, 12) + + .text + ALIGN(32) + +PROLOGUE(mpn_lshift) +deflit(`FRAME',0) + + movl PARAM_SIZE, %eax + movl PARAM_SRC, %edx + subl $SAVE_SIZE, %esp +deflit(`FRAME',SAVE_SIZE) + + movl PARAM_SHIFT, %ecx + movl %edi, SAVE_EDI + + movl PARAM_DST, %edi + decl %eax + jnz L(more_than_one_limb) + + movl (%edx), %edx + + shldl( %cl, %edx, %eax) C eax was decremented to zero + + shll %cl, %edx + + movl %edx, (%edi) + movl SAVE_EDI, %edi + addl $SAVE_SIZE, %esp + + ret + + +C ----------------------------------------------------------------------------- +L(more_than_one_limb): + C eax size-1 + C ebx + C ecx shift + C edx src + C esi + C edi dst + C ebp + + movd PARAM_SHIFT, %mm6 + movd (%edx,%eax,4), %mm5 C src high limb + cmp $UNROLL_THRESHOLD-1, %eax + + jae L(unroll) + negl %ecx + movd (%edx), %mm4 C src low limb + + addl $32, %ecx + + movd %ecx, %mm7 + +L(simple_top): + C eax loop counter, limbs + C ebx + C ecx + C edx src + C esi + C edi dst + C ebp + C + C mm0 scratch + C mm4 src low limb + C mm5 src high limb + C mm6 shift + C mm7 32-shift + + movq -4(%edx,%eax,4), %mm0 + decl %eax + + psrlq %mm7, %mm0 + + movd %mm0, 4(%edi,%eax,4) + jnz L(simple_top) + + + psllq %mm6, %mm5 + psllq %mm6, %mm4 + + psrlq $32, %mm5 + movd %mm4, (%edi) C dst low limb + + movd %mm5, %eax C return value + + movl SAVE_EDI, %edi + addl $SAVE_SIZE, %esp + emms + + ret + + +C ----------------------------------------------------------------------------- + ALIGN(16) +L(unroll): + C eax size-1 + C ebx (saved) + C ecx shift + C edx src + C esi + C edi dst + C ebp + C + C mm5 src high limb, for return value + C mm6 lshift + + movl %esi, SAVE_ESI + movl %ebx, SAVE_EBX + leal -4(%edx,%eax,4), %edx C &src[size-2] + + testb $4, %dl + movq (%edx), %mm1 C src high qword + + jz L(start_src_aligned) + + + C src isn't aligned, process high limb (marked xxx) separately to + C make it so + C + C source -4(edx,%eax,4) + C | + C +-------+-------+-------+-- + C | xxx | + C +-------+-------+-------+-- + C 0mod8 4mod8 0mod8 + C + C dest -4(edi,%eax,4) + C | + C +-------+-------+-- + C | xxx | | + C +-------+-------+-- + + psllq %mm6, %mm1 + subl $4, %edx + movl %eax, PARAM_SIZE C size-1 + + psrlq $32, %mm1 + decl %eax C size-2 is new size-1 + + movd %mm1, 4(%edi,%eax,4) + movq (%edx), %mm1 C new src high qword +L(start_src_aligned): + + + leal -4(%edi,%eax,4), %edi C &dst[size-2] + psllq %mm6, %mm5 + + testl $4, %edi + psrlq $32, %mm5 C return value + + jz L(start_dst_aligned) + + + C dst isn't aligned, subtract 4 bytes to make it so, and pretend the + C shift is 32 bits extra. High limb of dst (marked xxx) handled + C here separately. + C + C source %edx + C +-------+-------+-- + C | mm1 | + C +-------+-------+-- + C 0mod8 4mod8 + C + C dest %edi + C +-------+-------+-------+-- + C | xxx | + C +-------+-------+-------+-- + C 0mod8 4mod8 0mod8 + + movq %mm1, %mm0 + psllq %mm6, %mm1 + addl $32, %ecx C shift+32 + + psrlq $32, %mm1 + + movd %mm1, 4(%edi) + movq %mm0, %mm1 + subl $4, %edi + + movd %ecx, %mm6 C new lshift +L(start_dst_aligned): + + decl %eax C size-2, two last limbs handled at end + movq %mm1, %mm2 C copy of src high qword + negl %ecx + + andl $-2, %eax C round size down to even + addl $64, %ecx + + movl %eax, %ebx + negl %eax + + andl $UNROLL_MASK, %eax + decl %ebx + + shll %eax + + movd %ecx, %mm7 C rshift = 64-lshift + +ifdef(`PIC',` + call L(pic_calc) +L(here): +',` + leal L(entry) (%eax,%eax,4), %esi +') + shrl $UNROLL_LOG2, %ebx C loop counter + + leal ifelse(UNROLL_BYTES,256,128) -8(%edx,%eax,2), %edx + leal ifelse(UNROLL_BYTES,256,128) (%edi,%eax,2), %edi + movl PARAM_SIZE, %eax C for use at end + jmp *%esi + + +ifdef(`PIC',` +L(pic_calc): + C See README.family about old gas bugs + leal (%eax,%eax,4), %esi + addl $L(entry)-L(here), %esi + addl (%esp), %esi + + ret +') + + +C ----------------------------------------------------------------------------- + ALIGN(32) +L(top): + C eax size (for use at end) + C ebx loop counter + C ecx rshift + C edx src + C esi computed jump + C edi dst + C ebp + C + C mm0 scratch + C mm1 \ carry (alternating, mm2 first) + C mm2 / + C mm6 lshift + C mm7 rshift + C + C 10 code bytes/limb + C + C The two chunks differ in whether mm1 or mm2 hold the carry. + C The computed jump puts the initial carry in both mm1 and mm2. + +L(entry): +deflit(CHUNK_COUNT, 4) +forloop(i, 0, UNROLL_COUNT/CHUNK_COUNT-1, ` + deflit(`disp0', eval(-i*CHUNK_COUNT*4 ifelse(UNROLL_BYTES,256,-128))) + deflit(`disp1', eval(disp0 - 8)) + + movq disp0(%edx), %mm0 + psllq %mm6, %mm2 + + movq %mm0, %mm1 + psrlq %mm7, %mm0 + + por %mm2, %mm0 + movq %mm0, disp0(%edi) + + + movq disp1(%edx), %mm0 + psllq %mm6, %mm1 + + movq %mm0, %mm2 + psrlq %mm7, %mm0 + + por %mm1, %mm0 + movq %mm0, disp1(%edi) +') + + subl $UNROLL_BYTES, %edx + subl $UNROLL_BYTES, %edi + decl %ebx + + jns L(top) + + + +define(`disp', `m4_empty_if_zero(eval($1 ifelse(UNROLL_BYTES,256,-128)))') + +L(end): + testb $1, %al + movl SAVE_EBX, %ebx + psllq %mm6, %mm2 C wanted left shifted in all cases below + + movd %mm5, %eax + + movl SAVE_ESI, %esi + jz L(end_even) + + +L(end_odd): + + C Size odd, destination was aligned. + C + C source edx+8 edx+4 + C --+---------------+-------+ + C | mm2 | | + C --+---------------+-------+ + C + C dest edi + C --+---------------+---------------+-------+ + C | written | | | + C --+---------------+---------------+-------+ + C + C mm6 = shift + C mm7 = ecx = 64-shift + + + C Size odd, destination was unaligned. + C + C source edx+8 edx+4 + C --+---------------+-------+ + C | mm2 | | + C --+---------------+-------+ + C + C dest edi + C --+---------------+---------------+ + C | written | | + C --+---------------+---------------+ + C + C mm6 = shift+32 + C mm7 = ecx = 64-(shift+32) + + + C In both cases there's one extra limb of src to fetch and combine + C with mm2 to make a qword at (%edi), and in the aligned case + C there's an extra limb of dst to be formed from that extra src limb + C left shifted. + + movd disp(4) (%edx), %mm0 + testb $32, %cl + + movq %mm0, %mm1 + psllq $32, %mm0 + + psrlq %mm7, %mm0 + psllq %mm6, %mm1 + + por %mm2, %mm0 + + movq %mm0, disp(0) (%edi) + jz L(end_odd_unaligned) + movd %mm1, disp(-4) (%edi) +L(end_odd_unaligned): + + movl SAVE_EDI, %edi + addl $SAVE_SIZE, %esp + emms + + ret + + +L(end_even): + + C Size even, destination was aligned. + C + C source edx+8 + C --+---------------+ + C | mm2 | + C --+---------------+ + C + C dest edi + C --+---------------+---------------+ + C | written | | + C --+---------------+---------------+ + C + C mm6 = shift + C mm7 = ecx = 64-shift + + + C Size even, destination was unaligned. + C + C source edx+8 + C --+---------------+ + C | mm2 | + C --+---------------+ + C + C dest edi+4 + C --+---------------+-------+ + C | written | | + C --+---------------+-------+ + C + C mm6 = shift+32 + C mm7 = ecx = 64-(shift+32) + + + C The movq for the aligned case overwrites the movd for the + C unaligned case. + + movq %mm2, %mm0 + psrlq $32, %mm2 + + testb $32, %cl + movd %mm2, disp(4) (%edi) + + jz L(end_even_unaligned) + movq %mm0, disp(0) (%edi) +L(end_even_unaligned): + + movl SAVE_EDI, %edi + addl $SAVE_SIZE, %esp + emms + + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/k7/mmx/mod_1.asm b/rts/gmp/mpn/x86/k7/mmx/mod_1.asm new file mode 100644 index 0000000000..545ca56ddf --- /dev/null +++ b/rts/gmp/mpn/x86/k7/mmx/mod_1.asm @@ -0,0 +1,457 @@ +dnl AMD K7 mpn_mod_1 -- mpn by limb remainder. +dnl +dnl K7: 17.0 cycles/limb. + + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +C mp_limb_t mpn_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor); +C mp_limb_t mpn_mod_1c (mp_srcptr src, mp_size_t size, mp_limb_t divisor, +C mp_limb_t carry); +C +C The code here is the same as mpn_divrem_1, but with the quotient +C discarded. See mpn/x86/k7/mmx/divrem_1.c for some comments. + + +dnl MUL_THRESHOLD is the size at which the multiply by inverse method is +dnl used, rather than plain "divl"s. Minimum value 2. +dnl +dnl The inverse takes about 50 cycles to calculate, but after that the +dnl multiply is 17 c/l versus division at 41 c/l. +dnl +dnl Using mul or div is about the same speed at 3 limbs, so the threshold +dnl is set to 4 to get the smaller div code used at 3. + +deflit(MUL_THRESHOLD, 4) + + +defframe(PARAM_CARRY, 16) +defframe(PARAM_DIVISOR,12) +defframe(PARAM_SIZE, 8) +defframe(PARAM_SRC, 4) + +defframe(SAVE_EBX, -4) +defframe(SAVE_ESI, -8) +defframe(SAVE_EDI, -12) +defframe(SAVE_EBP, -16) + +defframe(VAR_NORM, -20) +defframe(VAR_INVERSE, -24) +defframe(VAR_SRC_STOP,-28) + +deflit(STACK_SPACE, 28) + + .text + ALIGN(32) + +PROLOGUE(mpn_mod_1c) +deflit(`FRAME',0) + movl PARAM_CARRY, %edx + movl PARAM_SIZE, %ecx + subl $STACK_SPACE, %esp +deflit(`FRAME',STACK_SPACE) + + movl %ebp, SAVE_EBP + movl PARAM_DIVISOR, %ebp + + movl %esi, SAVE_ESI + movl PARAM_SRC, %esi + jmp LF(mpn_mod_1,start_1c) + +EPILOGUE() + + + ALIGN(32) +PROLOGUE(mpn_mod_1) +deflit(`FRAME',0) + + movl PARAM_SIZE, %ecx + movl $0, %edx C initial carry (if can't skip a div) + subl $STACK_SPACE, %esp +deflit(`FRAME',STACK_SPACE) + + movl %esi, SAVE_ESI + movl PARAM_SRC, %esi + + movl %ebp, SAVE_EBP + movl PARAM_DIVISOR, %ebp + + orl %ecx, %ecx + jz L(divide_done) + + movl -4(%esi,%ecx,4), %eax C src high limb + + cmpl %ebp, %eax C carry flag if high<divisor + + cmovc( %eax, %edx) C src high limb as initial carry + sbbl $0, %ecx C size-1 to skip one div + jz L(divide_done) + + + ALIGN(16) +L(start_1c): + C eax + C ebx + C ecx size + C edx carry + C esi src + C edi + C ebp divisor + + cmpl $MUL_THRESHOLD, %ecx + jae L(mul_by_inverse) + + + +C With a MUL_THRESHOLD of 4, this "loop" only ever does 1 to 3 iterations, +C but it's already fast and compact, and there's nothing to gain by +C expanding it out. +C +C Using PARAM_DIVISOR in the divl is a couple of cycles faster than %ebp. + + orl %ecx, %ecx + jz L(divide_done) + + +L(divide_top): + C eax scratch (quotient) + C ebx + C ecx counter, limbs, decrementing + C edx scratch (remainder) + C esi src + C edi + C ebp + + movl -4(%esi,%ecx,4), %eax + + divl PARAM_DIVISOR + + decl %ecx + jnz L(divide_top) + + +L(divide_done): + movl SAVE_ESI, %esi + movl SAVE_EBP, %ebp + addl $STACK_SPACE, %esp + + movl %edx, %eax + + ret + + + +C ----------------------------------------------------------------------------- + +L(mul_by_inverse): + C eax + C ebx + C ecx size + C edx carry + C esi src + C edi + C ebp divisor + + bsrl %ebp, %eax C 31-l + + movl %ebx, SAVE_EBX + leal -4(%esi), %ebx + + movl %ebx, VAR_SRC_STOP + movl %edi, SAVE_EDI + + movl %ecx, %ebx C size + movl $31, %ecx + + movl %edx, %edi C carry + movl $-1, %edx + + C + + xorl %eax, %ecx C l + incl %eax C 32-l + + shll %cl, %ebp C d normalized + movl %ecx, VAR_NORM + + movd %eax, %mm7 + + movl $-1, %eax + subl %ebp, %edx C (b-d)-1 so edx:eax = b*(b-d)-1 + + divl %ebp C floor (b*(b-d)-1) / d + + C + + movl %eax, VAR_INVERSE + leal -12(%esi,%ebx,4), %eax C &src[size-3] + + movl 8(%eax), %esi C src high limb + movl 4(%eax), %edx C src second highest limb + + shldl( %cl, %esi, %edi) C n2 = carry,high << l + + shldl( %cl, %edx, %esi) C n10 = high,second << l + + movl %eax, %ecx C &src[size-3] + + +ifelse(MUL_THRESHOLD,2,` + cmpl $2, %ebx + je L(inverse_two_left) +') + + +C The dependent chain here is the same as in mpn_divrem_1, but a few +C instructions are saved by not needing to store the quotient limbs. +C Unfortunately this doesn't get the code down to the theoretical 16 c/l. +C +C There's four dummy instructions in the loop, all of which are necessary +C for the claimed 17 c/l. It's a 1 to 3 cycle slowdown if any are removed, +C or changed from load to store or vice versa. They're not completely +C random, since they correspond to what mpn_divrem_1 has, but there's no +C obvious reason why they're necessary. Presumably they induce something +C good in the out of order execution, perhaps through some load/store +C ordering and/or decoding effects. +C +C The q1==0xFFFFFFFF case is handled here the same as in mpn_divrem_1. On +C on special data that comes out as q1==0xFFFFFFFF always, the loop runs at +C about 13.5 c/l. + + ALIGN(32) +L(inverse_top): + C eax scratch + C ebx scratch (nadj, q1) + C ecx src pointer, decrementing + C edx scratch + C esi n10 + C edi n2 + C ebp divisor + C + C mm0 scratch (src qword) + C mm7 rshift for normalization + + cmpl $0x80000000, %esi C n1 as 0=c, 1=nc + movl %edi, %eax C n2 + movl PARAM_SIZE, %ebx C dummy + + leal (%ebp,%esi), %ebx + cmovc( %esi, %ebx) C nadj = n10 + (-n1 & d), ignoring overflow + sbbl $-1, %eax C n2+n1 + + mull VAR_INVERSE C m*(n2+n1) + + movq (%ecx), %mm0 C next src limb and the one below it + subl $4, %ecx + + movl %ecx, PARAM_SIZE C dummy + + C + + addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag + leal 1(%edi), %ebx C n2<<32 + m*(n2+n1)) + movl %ebp, %eax C d + + C + + adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1 + jz L(q1_ff) + nop C dummy + + mull %ebx C (q1+1)*d + + psrlq %mm7, %mm0 + leal 0(%ecx), %ecx C dummy + + C + + C + + subl %eax, %esi + movl VAR_SRC_STOP, %eax + + C + + sbbl %edx, %edi C n - (q1+1)*d + movl %esi, %edi C remainder -> n2 + leal (%ebp,%esi), %edx + + movd %mm0, %esi + + cmovc( %edx, %edi) C n - q1*d if underflow from using q1+1 + cmpl %eax, %ecx + jne L(inverse_top) + + +L(inverse_loop_done): + + +C ----------------------------------------------------------------------------- + +L(inverse_two_left): + C eax scratch + C ebx scratch (nadj, q1) + C ecx &src[-1] + C edx scratch + C esi n10 + C edi n2 + C ebp divisor + C + C mm0 scratch (src dword) + C mm7 rshift + + cmpl $0x80000000, %esi C n1 as 0=c, 1=nc + movl %edi, %eax C n2 + + leal (%ebp,%esi), %ebx + cmovc( %esi, %ebx) C nadj = n10 + (-n1 & d), ignoring overflow + sbbl $-1, %eax C n2+n1 + + mull VAR_INVERSE C m*(n2+n1) + + movd 4(%ecx), %mm0 C src low limb + + C + + C + + addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag + leal 1(%edi), %ebx C n2<<32 + m*(n2+n1)) + movl %ebp, %eax C d + + adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1 + + sbbl $0, %ebx + + mull %ebx C (q1+1)*d + + psllq $32, %mm0 + + psrlq %mm7, %mm0 + + C + + subl %eax, %esi + + C + + sbbl %edx, %edi C n - (q1+1)*d + movl %esi, %edi C remainder -> n2 + leal (%ebp,%esi), %edx + + movd %mm0, %esi + + cmovc( %edx, %edi) C n - q1*d if underflow from using q1+1 + + +C One limb left + + C eax scratch + C ebx scratch (nadj, q1) + C ecx + C edx scratch + C esi n10 + C edi n2 + C ebp divisor + C + C mm0 src limb, shifted + C mm7 rshift + + cmpl $0x80000000, %esi C n1 as 0=c, 1=nc + movl %edi, %eax C n2 + + leal (%ebp,%esi), %ebx + cmovc( %esi, %ebx) C nadj = n10 + (-n1 & d), ignoring overflow + sbbl $-1, %eax C n2+n1 + + mull VAR_INVERSE C m*(n2+n1) + + movl VAR_NORM, %ecx C for final denorm + + C + + C + + addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag + leal 1(%edi), %ebx C n2<<32 + m*(n2+n1)) + movl %ebp, %eax C d + + C + + adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1 + + sbbl $0, %ebx + + mull %ebx C (q1+1)*d + + movl SAVE_EBX, %ebx + + C + + C + + subl %eax, %esi + + movl %esi, %eax C remainder + movl SAVE_ESI, %esi + + sbbl %edx, %edi C n - (q1+1)*d + leal (%ebp,%eax), %edx + movl SAVE_EBP, %ebp + + cmovc( %edx, %eax) C n - q1*d if underflow from using q1+1 + movl SAVE_EDI, %edi + + shrl %cl, %eax C denorm remainder + addl $STACK_SPACE, %esp + emms + + ret + + +C ----------------------------------------------------------------------------- +C +C Special case for q1=0xFFFFFFFF, giving q=0xFFFFFFFF meaning the low dword +C of q*d is simply -d and the remainder n-q*d = n10+d + +L(q1_ff): + C eax (divisor) + C ebx (q1+1 == 0) + C ecx src pointer + C edx + C esi n10 + C edi (n2) + C ebp divisor + + movl VAR_SRC_STOP, %edx + leal (%ebp,%esi), %edi C n-q*d remainder -> next n2 + psrlq %mm7, %mm0 + + movd %mm0, %esi C next n10 + + cmpl %ecx, %edx + jne L(inverse_top) + jmp L(inverse_loop_done) + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/k7/mmx/popham.asm b/rts/gmp/mpn/x86/k7/mmx/popham.asm new file mode 100644 index 0000000000..fa7c8c04a5 --- /dev/null +++ b/rts/gmp/mpn/x86/k7/mmx/popham.asm @@ -0,0 +1,239 @@ +dnl AMD K7 mpn_popcount, mpn_hamdist -- population count and hamming +dnl distance. +dnl +dnl K7: popcount 5.0 cycles/limb, hamdist 6.0 cycles/limb + + +dnl Copyright (C) 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +dnl Only recent versions of gas know psadbw, in particular gas 2.9.1 on +dnl FreeBSD 3.3 and 3.4 doesn't recognise it. + +define(psadbw_mm4_mm0, +`ifelse(m4_ifdef_anyof_p(`HAVE_TARGET_CPU_athlon', + `HAVE_TARGET_CPU_pentium3'),1, + `.byte 0x0f,0xf6,0xc4 C psadbw %mm4, %mm0', + +`m4_warning(`warning, using simulated and only partly functional psadbw, use for testing only +') C this works enough for the sum of bytes done below, making it + C possible to test on an older cpu + leal -8(%esp), %esp + movq %mm4, (%esp) + movq %mm0, %mm4 +forloop(i,1,7, +` psrlq $ 8, %mm4 + paddb %mm4, %mm0 +') + pushl $ 0 + pushl $ 0xFF + pand (%esp), %mm0 + movq 8(%esp), %mm4 + leal 16(%esp), %esp +')') + + +C unsigned long mpn_popcount (mp_srcptr src, mp_size_t size); +C unsigned long mpn_hamdist (mp_srcptr src, mp_srcptr src2, mp_size_t size); +C +C The code here is almost certainly not optimal, but is already a 3x speedup +C over the generic C code. The main improvement would be to interleave +C processing of two qwords in the loop so as to fully exploit the available +C execution units, possibly leading to 3.25 c/l (13 cycles for 4 limbs). +C +C The loop is based on the example "Efficient 64-bit population count using +C MMX instructions" in the Athlon Optimization Guide, AMD document 22007, +C page 158 of rev E (reference in mpn/x86/k7/README). + +ifdef(`OPERATION_popcount',, +`ifdef(`OPERATION_hamdist',, +`m4_error(`Need OPERATION_popcount or OPERATION_hamdist defined +')')') + +define(HAM, +m4_assert_numargs(1) +`ifdef(`OPERATION_hamdist',`$1')') + +define(POP, +m4_assert_numargs(1) +`ifdef(`OPERATION_popcount',`$1')') + +HAM(` +defframe(PARAM_SIZE, 12) +defframe(PARAM_SRC2, 8) +defframe(PARAM_SRC, 4) +define(M4_function,mpn_hamdist) +') +POP(` +defframe(PARAM_SIZE, 8) +defframe(PARAM_SRC, 4) +define(M4_function,mpn_popcount) +') + +MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist) + + +ifdef(`PIC',,` + dnl non-PIC + + DATA + ALIGN(8) + +define(LS, +m4_assert_numargs(1) +`LF(M4_function,`$1')') + +LS(rodata_AAAAAAAAAAAAAAAA): + .long 0xAAAAAAAA + .long 0xAAAAAAAA + +LS(rodata_3333333333333333): + .long 0x33333333 + .long 0x33333333 + +LS(rodata_0F0F0F0F0F0F0F0F): + .long 0x0F0F0F0F + .long 0x0F0F0F0F +') + + .text + ALIGN(32) + +PROLOGUE(M4_function) +deflit(`FRAME',0) + + movl PARAM_SIZE, %ecx + orl %ecx, %ecx + jz L(zero) + +ifdef(`PIC',` + movl $0xAAAAAAAA, %eax + movl $0x33333333, %edx + + movd %eax, %mm7 + movd %edx, %mm6 + + movl $0x0F0F0F0F, %eax + + punpckldq %mm7, %mm7 + punpckldq %mm6, %mm6 + + movd %eax, %mm5 + movd %edx, %mm4 + + punpckldq %mm5, %mm5 + +',` + movq LS(rodata_AAAAAAAAAAAAAAAA), %mm7 + movq LS(rodata_3333333333333333), %mm6 + movq LS(rodata_0F0F0F0F0F0F0F0F), %mm5 +') + pxor %mm4, %mm4 + +define(REG_AAAAAAAAAAAAAAAA,%mm7) +define(REG_3333333333333333,%mm6) +define(REG_0F0F0F0F0F0F0F0F,%mm5) +define(REG_0000000000000000,%mm4) + + + movl PARAM_SRC, %eax +HAM(` movl PARAM_SRC2, %edx') + + pxor %mm2, %mm2 C total + + shrl %ecx + jnc L(top) + + movd (%eax,%ecx,8), %mm1 + +HAM(` movd 0(%edx,%ecx,8), %mm0 + pxor %mm0, %mm1 +') + orl %ecx, %ecx + jmp L(loaded) + + + ALIGN(16) +L(top): + C eax src + C ebx + C ecx counter, qwords, decrementing + C edx [hamdist] src2 + C + C mm0 (scratch) + C mm1 (scratch) + C mm2 total (low dword) + C mm3 + C mm4 \ + C mm5 | special constants + C mm6 | + C mm7 / + + movq -8(%eax,%ecx,8), %mm1 + +HAM(` pxor -8(%edx,%ecx,8), %mm1') + decl %ecx + +L(loaded): + movq %mm1, %mm0 + pand REG_AAAAAAAAAAAAAAAA, %mm1 + + psrlq $1, %mm1 + + psubd %mm1, %mm0 C bit pairs + + + movq %mm0, %mm1 + psrlq $2, %mm0 + + pand REG_3333333333333333, %mm0 + pand REG_3333333333333333, %mm1 + + paddd %mm1, %mm0 C nibbles + + + movq %mm0, %mm1 + psrlq $4, %mm0 + + pand REG_0F0F0F0F0F0F0F0F, %mm0 + pand REG_0F0F0F0F0F0F0F0F, %mm1 + + paddd %mm1, %mm0 C bytes + + + psadbw_mm4_mm0 + + paddd %mm0, %mm2 C add to total + jnz L(top) + + + movd %mm2, %eax + emms + ret + + +L(zero): + movl $0, %eax + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/k7/mmx/rshift.asm b/rts/gmp/mpn/x86/k7/mmx/rshift.asm new file mode 100644 index 0000000000..abb546cd5b --- /dev/null +++ b/rts/gmp/mpn/x86/k7/mmx/rshift.asm @@ -0,0 +1,471 @@ +dnl AMD K7 mpn_rshift -- mpn right shift. +dnl +dnl K7: 1.21 cycles/limb (at 16 limbs/loop). + + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +dnl K7: UNROLL_COUNT cycles/limb +dnl 4 1.51 +dnl 8 1.26 +dnl 16 1.21 +dnl 32 1.2 +dnl Maximum possible with the current code is 64. + +deflit(UNROLL_COUNT, 16) + + +C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size, +C unsigned shift); +C +C Shift src,size right by shift many bits and store the result in dst,size. +C Zeros are shifted in at the left. The bits shifted out at the right are +C the return value. +C +C This code uses 64-bit MMX operations, which makes it possible to handle +C two limbs at a time, for a theoretical 1.0 cycles/limb. Plain integer +C code, on the other hand, suffers from shrd being a vector path decode and +C running at 3 cycles back-to-back. +C +C Full speed depends on source and destination being aligned, and some hairy +C setups and finish-ups are done to arrange this for the loop. + +ifdef(`PIC',` +deflit(UNROLL_THRESHOLD, 10) +',` +deflit(UNROLL_THRESHOLD, 10) +') + +defframe(PARAM_SHIFT,16) +defframe(PARAM_SIZE, 12) +defframe(PARAM_SRC, 8) +defframe(PARAM_DST, 4) + +defframe(SAVE_EDI, -4) +defframe(SAVE_ESI, -8) +defframe(SAVE_EBX, -12) +deflit(SAVE_SIZE, 12) + + .text + ALIGN(32) + +PROLOGUE(mpn_rshift) +deflit(`FRAME',0) + + movl PARAM_SIZE, %eax + movl PARAM_SRC, %edx + subl $SAVE_SIZE, %esp +deflit(`FRAME',SAVE_SIZE) + + movl PARAM_SHIFT, %ecx + movl %edi, SAVE_EDI + + movl PARAM_DST, %edi + decl %eax + jnz L(more_than_one_limb) + + movl (%edx), %edx C src limb + + shrdl( %cl, %edx, %eax) C eax was decremented to zero + + shrl %cl, %edx + + movl %edx, (%edi) C dst limb + movl SAVE_EDI, %edi + addl $SAVE_SIZE, %esp + + ret + + +C ----------------------------------------------------------------------------- +L(more_than_one_limb): + C eax size-1 + C ebx + C ecx shift + C edx src + C esi + C edi dst + C ebp + + movd PARAM_SHIFT, %mm6 C rshift + movd (%edx), %mm5 C src low limb + cmp $UNROLL_THRESHOLD-1, %eax + + jae L(unroll) + leal (%edx,%eax,4), %edx C &src[size-1] + leal -4(%edi,%eax,4), %edi C &dst[size-2] + + movd (%edx), %mm4 C src high limb + negl %eax + + +L(simple_top): + C eax loop counter, limbs, negative + C ebx + C ecx shift + C edx carry + C edx &src[size-1] + C edi &dst[size-2] + C ebp + C + C mm0 scratch + C mm4 src high limb + C mm5 src low limb + C mm6 shift + + movq (%edx,%eax,4), %mm0 + incl %eax + + psrlq %mm6, %mm0 + + movd %mm0, (%edi,%eax,4) + jnz L(simple_top) + + + psllq $32, %mm5 + psrlq %mm6, %mm4 + + psrlq %mm6, %mm5 + movd %mm4, 4(%edi) C dst high limb + + movd %mm5, %eax C return value + + movl SAVE_EDI, %edi + addl $SAVE_SIZE, %esp + emms + + ret + + +C ----------------------------------------------------------------------------- + ALIGN(16) +L(unroll): + C eax size-1 + C ebx + C ecx shift + C edx src + C esi + C edi dst + C ebp + C + C mm5 src low limb + C mm6 rshift + + testb $4, %dl + movl %esi, SAVE_ESI + movl %ebx, SAVE_EBX + + psllq $32, %mm5 + jz L(start_src_aligned) + + + C src isn't aligned, process low limb separately (marked xxx) and + C step src and dst by one limb, making src aligned. + C + C source edx + C --+-------+-------+-------+ + C | xxx | + C --+-------+-------+-------+ + C 4mod8 0mod8 4mod8 + C + C dest edi + C --+-------+-------+ + C | | xxx | + C --+-------+-------+ + + movq (%edx), %mm0 C src low two limbs + addl $4, %edx + movl %eax, PARAM_SIZE C size-1 + + addl $4, %edi + decl %eax C size-2 is new size-1 + + psrlq %mm6, %mm0 + movl %edi, PARAM_DST C new dst + + movd %mm0, -4(%edi) +L(start_src_aligned): + + + movq (%edx), %mm1 C src low two limbs + decl %eax C size-2, two last limbs handled at end + testl $4, %edi + + psrlq %mm6, %mm5 + jz L(start_dst_aligned) + + + C dst isn't aligned, add 4 to make it so, and pretend the shift is + C 32 bits extra. Low limb of dst (marked xxx) handled here separately. + C + C source edx + C --+-------+-------+ + C | mm1 | + C --+-------+-------+ + C 4mod8 0mod8 + C + C dest edi + C --+-------+-------+-------+ + C | xxx | + C --+-------+-------+-------+ + C 4mod8 0mod8 4mod8 + + movq %mm1, %mm0 + psrlq %mm6, %mm1 + addl $32, %ecx C shift+32 + + movd %mm1, (%edi) + movq %mm0, %mm1 + addl $4, %edi C new dst + + movd %ecx, %mm6 +L(start_dst_aligned): + + + movq %mm1, %mm2 C copy of src low two limbs + negl %ecx + andl $-2, %eax C round size down to even + + movl %eax, %ebx + negl %eax + addl $64, %ecx + + andl $UNROLL_MASK, %eax + decl %ebx + + shll %eax + + movd %ecx, %mm7 C lshift = 64-rshift + +ifdef(`PIC',` + call L(pic_calc) +L(here): +',` + leal L(entry) (%eax,%eax,4), %esi + negl %eax +') + shrl $UNROLL_LOG2, %ebx C loop counter + + leal ifelse(UNROLL_BYTES,256,128+) 8(%edx,%eax,2), %edx + leal ifelse(UNROLL_BYTES,256,128) (%edi,%eax,2), %edi + movl PARAM_SIZE, %eax C for use at end + + jmp *%esi + + +ifdef(`PIC',` +L(pic_calc): + C See README.family about old gas bugs + leal (%eax,%eax,4), %esi + addl $L(entry)-L(here), %esi + addl (%esp), %esi + negl %eax + + ret +') + + +C ----------------------------------------------------------------------------- + ALIGN(64) +L(top): + C eax size, for use at end + C ebx loop counter + C ecx lshift + C edx src + C esi was computed jump + C edi dst + C ebp + C + C mm0 scratch + C mm1 \ carry (alternating) + C mm2 / + C mm6 rshift + C mm7 lshift + C + C 10 code bytes/limb + C + C The two chunks differ in whether mm1 or mm2 hold the carry. + C The computed jump puts the initial carry in both mm1 and mm2. + +L(entry): +deflit(CHUNK_COUNT, 4) +forloop(i, 0, UNROLL_COUNT/CHUNK_COUNT-1, ` + deflit(`disp0', eval(i*CHUNK_COUNT*4 ifelse(UNROLL_BYTES,256,-128))) + deflit(`disp1', eval(disp0 + 8)) + + movq disp0(%edx), %mm0 + psrlq %mm6, %mm2 + + movq %mm0, %mm1 + psllq %mm7, %mm0 + + por %mm2, %mm0 + movq %mm0, disp0(%edi) + + + movq disp1(%edx), %mm0 + psrlq %mm6, %mm1 + + movq %mm0, %mm2 + psllq %mm7, %mm0 + + por %mm1, %mm0 + movq %mm0, disp1(%edi) +') + + addl $UNROLL_BYTES, %edx + addl $UNROLL_BYTES, %edi + decl %ebx + + jns L(top) + + +deflit(`disp0', ifelse(UNROLL_BYTES,256,-128)) +deflit(`disp1', eval(disp0-0 + 8)) + + testb $1, %al + psrlq %mm6, %mm2 C wanted rshifted in all cases below + movl SAVE_ESI, %esi + + movd %mm5, %eax C return value + + movl SAVE_EBX, %ebx + jz L(end_even) + + + C Size odd, destination was aligned. + C + C source + C edx + C +-------+---------------+-- + C | | mm2 | + C +-------+---------------+-- + C + C dest edi + C +-------+---------------+---------------+-- + C | | | written | + C +-------+---------------+---------------+-- + C + C mm6 = shift + C mm7 = ecx = 64-shift + + + C Size odd, destination was unaligned. + C + C source + C edx + C +-------+---------------+-- + C | | mm2 | + C +-------+---------------+-- + C + C dest edi + C +---------------+---------------+-- + C | | written | + C +---------------+---------------+-- + C + C mm6 = shift+32 + C mm7 = ecx = 64-(shift+32) + + + C In both cases there's one extra limb of src to fetch and combine + C with mm2 to make a qword to store, and in the aligned case there's + C a further extra limb of dst to be formed. + + + movd disp0(%edx), %mm0 + movq %mm0, %mm1 + + psllq %mm7, %mm0 + testb $32, %cl + + por %mm2, %mm0 + psrlq %mm6, %mm1 + + movq %mm0, disp0(%edi) + jz L(finish_odd_unaligned) + + movd %mm1, disp1(%edi) +L(finish_odd_unaligned): + + movl SAVE_EDI, %edi + addl $SAVE_SIZE, %esp + emms + + ret + + +L(end_even): + + C Size even, destination was aligned. + C + C source + C +---------------+-- + C | mm2 | + C +---------------+-- + C + C dest edi + C +---------------+---------------+-- + C | | mm3 | + C +---------------+---------------+-- + C + C mm6 = shift + C mm7 = ecx = 64-shift + + + C Size even, destination was unaligned. + C + C source + C +---------------+-- + C | mm2 | + C +---------------+-- + C + C dest edi + C +-------+---------------+-- + C | | mm3 | + C +-------+---------------+-- + C + C mm6 = shift+32 + C mm7 = 64-(shift+32) + + + C The movd for the unaligned case is the same data as the movq for + C the aligned case, it's just a choice between whether one or two + C limbs should be written. + + + testb $32, %cl + movd %mm2, disp0(%edi) + + jz L(end_even_unaligned) + + movq %mm2, disp0(%edi) +L(end_even_unaligned): + + movl SAVE_EDI, %edi + addl $SAVE_SIZE, %esp + emms + + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/k7/mul_1.asm b/rts/gmp/mpn/x86/k7/mul_1.asm new file mode 100644 index 0000000000..07f7085b10 --- /dev/null +++ b/rts/gmp/mpn/x86/k7/mul_1.asm @@ -0,0 +1,265 @@ +dnl AMD K7 mpn_mul_1 -- mpn by limb multiply. +dnl +dnl K7: 3.4 cycles/limb (at 16 limbs/loop). + + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +dnl K7: UNROLL_COUNT cycles/limb +dnl 8 3.9 +dnl 16 3.4 +dnl 32 3.4 +dnl 64 3.35 +dnl Maximum possible with the current code is 64. + +deflit(UNROLL_COUNT, 16) + + +C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, +C mp_limb_t multiplier); +C mp_limb_t mpn_mul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size, +C mp_limb_t multiplier, mp_limb_t carry); +C +C Multiply src,size by mult and store the result in dst,size. +C Return the carry limb from the top of the result. +C +C mpn_mul_1c() accepts an initial carry for the calculation, it's added into +C the low limb of the destination. +C +C Variations on the unrolled loop have been tried, with the current +C registers or with the counter on the stack to free up ecx. The current +C code is the fastest found. +C +C An interesting effect is that removing the stores "movl %ebx, disp0(%edi)" +C from the unrolled loop actually slows it down to 5.0 cycles/limb. Code +C with this change can be tested on sizes of the form UNROLL_COUNT*n+1 +C without having to change the computed jump. There's obviously something +C fishy going on, perhaps with what execution units the mul needs. + +defframe(PARAM_CARRY, 20) +defframe(PARAM_MULTIPLIER,16) +defframe(PARAM_SIZE, 12) +defframe(PARAM_SRC, 8) +defframe(PARAM_DST, 4) + +defframe(SAVE_EBP, -4) +defframe(SAVE_EDI, -8) +defframe(SAVE_ESI, -12) +defframe(SAVE_EBX, -16) +deflit(STACK_SPACE, 16) + +dnl Must have UNROLL_THRESHOLD >= 2, since the unrolled loop can't handle 1. +ifdef(`PIC',` +deflit(UNROLL_THRESHOLD, 7) +',` +deflit(UNROLL_THRESHOLD, 5) +') + + .text + ALIGN(32) +PROLOGUE(mpn_mul_1c) +deflit(`FRAME',0) + movl PARAM_CARRY, %edx + jmp LF(mpn_mul_1,start_nc) +EPILOGUE() + + +PROLOGUE(mpn_mul_1) +deflit(`FRAME',0) + xorl %edx, %edx C initial carry +L(start_nc): + movl PARAM_SIZE, %ecx + subl $STACK_SPACE, %esp +deflit(`FRAME', STACK_SPACE) + + movl %edi, SAVE_EDI + movl %ebx, SAVE_EBX + movl %edx, %ebx + + movl %esi, SAVE_ESI + movl PARAM_SRC, %esi + cmpl $UNROLL_THRESHOLD, %ecx + + movl PARAM_DST, %edi + movl %ebp, SAVE_EBP + jae L(unroll) + + leal (%esi,%ecx,4), %esi + leal (%edi,%ecx,4), %edi + negl %ecx + + movl PARAM_MULTIPLIER, %ebp + +L(simple): + C eax scratch + C ebx carry + C ecx counter (negative) + C edx scratch + C esi src + C edi dst + C ebp multiplier + + movl (%esi,%ecx,4), %eax + + mull %ebp + + addl %ebx, %eax + movl %eax, (%edi,%ecx,4) + movl $0, %ebx + + adcl %edx, %ebx + incl %ecx + jnz L(simple) + + movl %ebx, %eax + movl SAVE_EBX, %ebx + movl SAVE_ESI, %esi + + movl SAVE_EDI, %edi + movl SAVE_EBP, %ebp + addl $STACK_SPACE, %esp + + ret + + +C ----------------------------------------------------------------------------- +C The mov to load the next source limb is done well ahead of the mul, this +C is necessary for full speed. It leads to one limb handled separately +C after the loop. +C +C When unrolling to 32 or more, an offset of +4 is used on the src pointer, +C to avoid having an 0x80 displacement in the code for the last limb in the +C unrolled loop. This is for a fair comparison between 16 and 32 unrolling. + +ifelse(eval(UNROLL_COUNT >= 32),1,` +deflit(SRC_OFFSET,4) +',` +deflit(SRC_OFFSET,) +') + + C this is offset 0x62, so close enough to aligned +L(unroll): + C eax + C ebx initial carry + C ecx size + C edx + C esi src + C edi dst + C ebp +deflit(`FRAME', STACK_SPACE) + + leal -1(%ecx), %edx C one limb handled at end + leal -2(%ecx), %ecx C and ecx is one less than edx + movl %ebp, SAVE_EBP + + negl %edx + shrl $UNROLL_LOG2, %ecx C unrolled loop counter + movl (%esi), %eax C src low limb + + andl $UNROLL_MASK, %edx + movl PARAM_DST, %edi + + movl %edx, %ebp + shll $4, %edx + + C 17 code bytes per limb +ifdef(`PIC',` + call L(add_eip_to_edx) +L(here): +',` + leal L(entry) (%edx,%ebp), %edx +') + negl %ebp + + leal ifelse(UNROLL_BYTES,256,128+) SRC_OFFSET(%esi,%ebp,4), %esi + leal ifelse(UNROLL_BYTES,256,128) (%edi,%ebp,4), %edi + movl PARAM_MULTIPLIER, %ebp + + jmp *%edx + + +ifdef(`PIC',` +L(add_eip_to_edx): + C See README.family about old gas bugs + leal (%edx,%ebp), %edx + addl $L(entry)-L(here), %edx + addl (%esp), %edx + ret +') + + +C ---------------------------------------------------------------------------- + ALIGN(32) +L(top): + C eax next src limb + C ebx carry + C ecx counter + C edx scratch + C esi src+4 + C edi dst + C ebp multiplier + C + C 17 code bytes per limb processed + +L(entry): +forloop(i, 0, UNROLL_COUNT-1, ` + deflit(`disp_dst', eval(i*4 ifelse(UNROLL_BYTES,256,-128))) + deflit(`disp_src', eval(disp_dst + 4-(SRC_OFFSET-0))) + + mull %ebp + + addl %eax, %ebx +Zdisp( movl, disp_src,(%esi), %eax) +Zdisp( movl, %ebx, disp_dst,(%edi)) + + movl $0, %ebx + adcl %edx, %ebx +') + + decl %ecx + + leal UNROLL_BYTES(%esi), %esi + leal UNROLL_BYTES(%edi), %edi + jns L(top) + + +deflit(`disp0', ifelse(UNROLL_BYTES,256,-128)) + + mull %ebp + + addl %eax, %ebx + movl $0, %eax + movl SAVE_ESI, %esi + + movl %ebx, disp0(%edi) + movl SAVE_EBX, %ebx + movl SAVE_EDI, %edi + + adcl %edx, %eax + movl SAVE_EBP, %ebp + addl $STACK_SPACE, %esp + + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/k7/mul_basecase.asm b/rts/gmp/mpn/x86/k7/mul_basecase.asm new file mode 100644 index 0000000000..c4be62e633 --- /dev/null +++ b/rts/gmp/mpn/x86/k7/mul_basecase.asm @@ -0,0 +1,593 @@ +dnl AMD K7 mpn_mul_basecase -- multiply two mpn numbers. +dnl +dnl K7: approx 4.42 cycles per cross product at around 20x20 limbs (16 +dnl limbs/loop unrolling). + + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +dnl K7 UNROLL_COUNT cycles/product (at around 20x20) +dnl 8 4.67 +dnl 16 4.59 +dnl 32 4.42 +dnl Maximum possible with the current code is 32. +dnl +dnl At 32 the typical 13-26 limb sizes from the karatsuba code will get +dnl done with a straight run through a block of code, no inner loop. Using +dnl 32 gives 1k of code, but the k7 has a 64k L1 code cache. + +deflit(UNROLL_COUNT, 32) + + +C void mpn_mul_basecase (mp_ptr wp, +C mp_srcptr xp, mp_size_t xsize, +C mp_srcptr yp, mp_size_t ysize); +C +C Calculate xp,xsize multiplied by yp,ysize, storing the result in +C wp,xsize+ysize. +C +C This routine is essentially the same as mpn/generic/mul_basecase.c, but +C it's faster because it does most of the mpn_addmul_1() startup +C calculations only once. The saving is 15-25% on typical sizes coming from +C the Karatsuba multiply code. + +ifdef(`PIC',` +deflit(UNROLL_THRESHOLD, 5) +',` +deflit(UNROLL_THRESHOLD, 5) +') + +defframe(PARAM_YSIZE,20) +defframe(PARAM_YP, 16) +defframe(PARAM_XSIZE,12) +defframe(PARAM_XP, 8) +defframe(PARAM_WP, 4) + + .text + ALIGN(32) +PROLOGUE(mpn_mul_basecase) +deflit(`FRAME',0) + + movl PARAM_XSIZE, %ecx + movl PARAM_YP, %eax + + movl PARAM_XP, %edx + movl (%eax), %eax C yp low limb + + cmpl $2, %ecx + ja L(xsize_more_than_two) + je L(two_by_something) + + + C one limb by one limb + + mull (%edx) + + movl PARAM_WP, %ecx + movl %eax, (%ecx) + movl %edx, 4(%ecx) + ret + + +C ----------------------------------------------------------------------------- +L(two_by_something): +deflit(`FRAME',0) + decl PARAM_YSIZE + pushl %ebx defframe_pushl(`SAVE_EBX') + movl %eax, %ecx C yp low limb + + movl PARAM_WP, %ebx + pushl %esi defframe_pushl(`SAVE_ESI') + movl %edx, %esi C xp + + movl (%edx), %eax C xp low limb + jnz L(two_by_two) + + + C two limbs by one limb + + mull %ecx + + movl %eax, (%ebx) + movl 4(%esi), %eax + movl %edx, %esi C carry + + mull %ecx + + addl %eax, %esi + + movl %esi, 4(%ebx) + movl SAVE_ESI, %esi + + adcl $0, %edx + + movl %edx, 8(%ebx) + movl SAVE_EBX, %ebx + addl $FRAME, %esp + + ret + + + +C ----------------------------------------------------------------------------- +C Could load yp earlier into another register. + + ALIGN(16) +L(two_by_two): + C eax xp low limb + C ebx wp + C ecx yp low limb + C edx + C esi xp + C edi + C ebp + +dnl FRAME carries on from previous + + mull %ecx C xp[0] * yp[0] + + push %edi defframe_pushl(`SAVE_EDI') + movl %edx, %edi C carry, for wp[1] + + movl %eax, (%ebx) + movl 4(%esi), %eax + + mull %ecx C xp[1] * yp[0] + + addl %eax, %edi + movl PARAM_YP, %ecx + + adcl $0, %edx + movl 4(%ecx), %ecx C yp[1] + movl %edi, 4(%ebx) + + movl 4(%esi), %eax C xp[1] + movl %edx, %edi C carry, for wp[2] + + mull %ecx C xp[1] * yp[1] + + addl %eax, %edi + + adcl $0, %edx + movl (%esi), %eax C xp[0] + + movl %edx, %esi C carry, for wp[3] + + mull %ecx C xp[0] * yp[1] + + addl %eax, 4(%ebx) + adcl %edx, %edi + movl %edi, 8(%ebx) + + adcl $0, %esi + movl SAVE_EDI, %edi + movl %esi, 12(%ebx) + + movl SAVE_ESI, %esi + movl SAVE_EBX, %ebx + addl $FRAME, %esp + + ret + + +C ----------------------------------------------------------------------------- + ALIGN(16) +L(xsize_more_than_two): + +C The first limb of yp is processed with a simple mpn_mul_1 style loop +C inline. Unrolling this doesn't seem worthwhile since it's only run once +C (whereas the addmul below is run ysize-1 many times). A call to the +C actual mpn_mul_1 will be slowed down by the call and parameter pushing and +C popping, and doesn't seem likely to be worthwhile on the typical 13-26 +C limb operations the Karatsuba code calls here with. + + C eax yp[0] + C ebx + C ecx xsize + C edx xp + C esi + C edi + C ebp + +dnl FRAME doesn't carry on from previous, no pushes yet here +defframe(`SAVE_EBX',-4) +defframe(`SAVE_ESI',-8) +defframe(`SAVE_EDI',-12) +defframe(`SAVE_EBP',-16) +deflit(`FRAME',0) + + subl $16, %esp +deflit(`FRAME',16) + + movl %edi, SAVE_EDI + movl PARAM_WP, %edi + + movl %ebx, SAVE_EBX + movl %ebp, SAVE_EBP + movl %eax, %ebp + + movl %esi, SAVE_ESI + xorl %ebx, %ebx + leal (%edx,%ecx,4), %esi C xp end + + leal (%edi,%ecx,4), %edi C wp end of mul1 + negl %ecx + + +L(mul1): + C eax scratch + C ebx carry + C ecx counter, negative + C edx scratch + C esi xp end + C edi wp end of mul1 + C ebp multiplier + + movl (%esi,%ecx,4), %eax + + mull %ebp + + addl %ebx, %eax + movl %eax, (%edi,%ecx,4) + movl $0, %ebx + + adcl %edx, %ebx + incl %ecx + jnz L(mul1) + + + movl PARAM_YSIZE, %edx + movl PARAM_XSIZE, %ecx + + movl %ebx, (%edi) C final carry + decl %edx + + jnz L(ysize_more_than_one) + + + movl SAVE_EDI, %edi + movl SAVE_EBX, %ebx + + movl SAVE_EBP, %ebp + movl SAVE_ESI, %esi + addl $FRAME, %esp + + ret + + +L(ysize_more_than_one): + cmpl $UNROLL_THRESHOLD, %ecx + movl PARAM_YP, %eax + + jae L(unroll) + + +C ----------------------------------------------------------------------------- + C simple addmul looping + C + C eax yp + C ebx + C ecx xsize + C edx ysize-1 + C esi xp end + C edi wp end of mul1 + C ebp + + leal 4(%eax,%edx,4), %ebp C yp end + negl %ecx + negl %edx + + movl (%esi,%ecx,4), %eax C xp low limb + movl %edx, PARAM_YSIZE C -(ysize-1) + incl %ecx + + xorl %ebx, %ebx C initial carry + movl %ecx, PARAM_XSIZE C -(xsize-1) + movl %ebp, PARAM_YP + + movl (%ebp,%edx,4), %ebp C yp second lowest limb - multiplier + jmp L(simple_outer_entry) + + + C this is offset 0x121 so close enough to aligned +L(simple_outer_top): + C ebp ysize counter, negative + + movl PARAM_YP, %edx + movl PARAM_XSIZE, %ecx C -(xsize-1) + xorl %ebx, %ebx C carry + + movl %ebp, PARAM_YSIZE + addl $4, %edi C next position in wp + + movl (%edx,%ebp,4), %ebp C yp limb - multiplier + movl -4(%esi,%ecx,4), %eax C xp low limb + + +L(simple_outer_entry): + +L(simple_inner): + C eax xp limb + C ebx carry limb + C ecx loop counter (negative) + C edx scratch + C esi xp end + C edi wp end + C ebp multiplier + + mull %ebp + + addl %eax, %ebx + adcl $0, %edx + + addl %ebx, (%edi,%ecx,4) + movl (%esi,%ecx,4), %eax + adcl $0, %edx + + incl %ecx + movl %edx, %ebx + jnz L(simple_inner) + + + mull %ebp + + movl PARAM_YSIZE, %ebp + addl %eax, %ebx + + adcl $0, %edx + addl %ebx, (%edi) + + adcl $0, %edx + incl %ebp + + movl %edx, 4(%edi) + jnz L(simple_outer_top) + + + movl SAVE_EBX, %ebx + movl SAVE_ESI, %esi + + movl SAVE_EDI, %edi + movl SAVE_EBP, %ebp + addl $FRAME, %esp + + ret + + + +C ----------------------------------------------------------------------------- +C +C The unrolled loop is the same as in mpn_addmul_1(), see that code for some +C comments. +C +C VAR_ADJUST is the negative of how many limbs the leals in the inner loop +C increment xp and wp. This is used to adjust back xp and wp, and rshifted +C to given an initial VAR_COUNTER at the top of the outer loop. +C +C VAR_COUNTER is for the unrolled loop, running from VAR_ADJUST/UNROLL_COUNT +C up to -1, inclusive. +C +C VAR_JMP is the computed jump into the unrolled loop. +C +C VAR_XP_LOW is the least significant limb of xp, which is needed at the +C start of the unrolled loop. +C +C PARAM_YSIZE is the outer loop counter, going from -(ysize-1) up to -1, +C inclusive. +C +C PARAM_YP is offset appropriately so that the PARAM_YSIZE counter can be +C added to give the location of the next limb of yp, which is the multiplier +C in the unrolled loop. +C +C The trick with VAR_ADJUST means it's only necessary to do one fetch in the +C outer loop to take care of xp, wp and the inner loop counter. + +defframe(VAR_COUNTER, -20) +defframe(VAR_ADJUST, -24) +defframe(VAR_JMP, -28) +defframe(VAR_XP_LOW, -32) +deflit(VAR_EXTRA_SPACE, 16) + + +L(unroll): + C eax yp + C ebx + C ecx xsize + C edx ysize-1 + C esi xp end + C edi wp end of mul1 + C ebp + + movl PARAM_XP, %esi + movl 4(%eax), %ebp C multiplier (yp second limb) + leal 4(%eax,%edx,4), %eax C yp adjust for ysize indexing + + movl PARAM_WP, %edi + movl %eax, PARAM_YP + negl %edx + + movl %edx, PARAM_YSIZE + leal UNROLL_COUNT-2(%ecx), %ebx C (xsize-1)+UNROLL_COUNT-1 + decl %ecx C xsize-1 + + movl (%esi), %eax C xp low limb + andl $-UNROLL_MASK-1, %ebx + negl %ecx + + subl $VAR_EXTRA_SPACE, %esp +deflit(`FRAME',16+VAR_EXTRA_SPACE) + negl %ebx + andl $UNROLL_MASK, %ecx + + movl %ebx, VAR_ADJUST + movl %ecx, %edx + shll $4, %ecx + + sarl $UNROLL_LOG2, %ebx + + C 17 code bytes per limb +ifdef(`PIC',` + call L(pic_calc) +L(unroll_here): +',` + leal L(unroll_entry) (%ecx,%edx,1), %ecx +') + negl %edx + + movl %eax, VAR_XP_LOW + movl %ecx, VAR_JMP + leal 4(%edi,%edx,4), %edi C wp and xp, adjust for unrolling, + leal 4(%esi,%edx,4), %esi C and start at second limb + jmp L(unroll_outer_entry) + + +ifdef(`PIC',` +L(pic_calc): + C See README.family about old gas bugs + leal (%ecx,%edx,1), %ecx + addl $L(unroll_entry)-L(unroll_here), %ecx + addl (%esp), %ecx + ret +') + + +C -------------------------------------------------------------------------- + ALIGN(32) +L(unroll_outer_top): + C ebp ysize counter, negative + + movl VAR_ADJUST, %ebx + movl PARAM_YP, %edx + + movl VAR_XP_LOW, %eax + movl %ebp, PARAM_YSIZE C store incremented ysize counter + + leal 4(%edi,%ebx,4), %edi + leal (%esi,%ebx,4), %esi + sarl $UNROLL_LOG2, %ebx + + movl (%edx,%ebp,4), %ebp C yp next multiplier + movl VAR_JMP, %ecx + +L(unroll_outer_entry): + mull %ebp + + testb $1, %cl C and clear carry bit + movl %ebx, VAR_COUNTER + movl $0, %ebx + + movl $0, %ecx + cmovz( %eax, %ecx) C eax into low carry, zero into high carry limb + cmovnz( %eax, %ebx) + + C Extra fetch of VAR_JMP is bad, but registers are tight + jmp *VAR_JMP + + +C ----------------------------------------------------------------------------- + ALIGN(32) +L(unroll_top): + C eax xp limb + C ebx carry high + C ecx carry low + C edx scratch + C esi xp+8 + C edi wp + C ebp yp multiplier limb + C + C VAR_COUNTER loop counter, negative + C + C 17 bytes each limb + +L(unroll_entry): + +deflit(CHUNK_COUNT,2) +forloop(`i', 0, UNROLL_COUNT/CHUNK_COUNT-1, ` + deflit(`disp0', eval(i*CHUNK_COUNT*4 ifelse(UNROLL_BYTES,256,-128))) + deflit(`disp1', eval(disp0 + 4)) + +Zdisp( movl, disp0,(%esi), %eax) + adcl %edx, %ebx + + mull %ebp + +Zdisp( addl, %ecx, disp0,(%edi)) + movl $0, %ecx + + adcl %eax, %ebx + + + movl disp1(%esi), %eax + adcl %edx, %ecx + + mull %ebp + + addl %ebx, disp1(%edi) + movl $0, %ebx + + adcl %eax, %ecx +') + + + incl VAR_COUNTER + leal UNROLL_BYTES(%esi), %esi + leal UNROLL_BYTES(%edi), %edi + + jnz L(unroll_top) + + + C eax + C ebx zero + C ecx low + C edx high + C esi + C edi wp, pointing at second last limb) + C ebp + C + C carry flag to be added to high + +deflit(`disp0', ifelse(UNROLL_BYTES,256,-128)) +deflit(`disp1', eval(disp0-0 + 4)) + + movl PARAM_YSIZE, %ebp + adcl $0, %edx + addl %ecx, disp0(%edi) + + adcl $0, %edx + incl %ebp + + movl %edx, disp1(%edi) + jnz L(unroll_outer_top) + + + movl SAVE_ESI, %esi + movl SAVE_EBP, %ebp + + movl SAVE_EDI, %edi + movl SAVE_EBX, %ebx + addl $FRAME, %esp + + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/k7/sqr_basecase.asm b/rts/gmp/mpn/x86/k7/sqr_basecase.asm new file mode 100644 index 0000000000..84861ea66b --- /dev/null +++ b/rts/gmp/mpn/x86/k7/sqr_basecase.asm @@ -0,0 +1,627 @@ +dnl AMD K7 mpn_sqr_basecase -- square an mpn number. +dnl +dnl K7: approx 2.3 cycles/crossproduct, or 4.55 cycles/triangular product +dnl (measured on the speed difference between 25 and 50 limbs, which is +dnl roughly the Karatsuba recursing range). + + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +dnl These are the same as mpn/x86/k6/sqr_basecase.asm, see that code for +dnl some comments. + +deflit(KARATSUBA_SQR_THRESHOLD_MAX, 66) + +ifdef(`KARATSUBA_SQR_THRESHOLD_OVERRIDE', +`define(`KARATSUBA_SQR_THRESHOLD',KARATSUBA_SQR_THRESHOLD_OVERRIDE)') + +m4_config_gmp_mparam(`KARATSUBA_SQR_THRESHOLD') +deflit(UNROLL_COUNT, eval(KARATSUBA_SQR_THRESHOLD-3)) + + +C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size); +C +C With a KARATSUBA_SQR_THRESHOLD around 50 this code is about 1500 bytes, +C which is quite a bit, but is considered good value since squares big +C enough to use most of the code will be spending quite a few cycles in it. + + +defframe(PARAM_SIZE,12) +defframe(PARAM_SRC, 8) +defframe(PARAM_DST, 4) + + .text + ALIGN(32) +PROLOGUE(mpn_sqr_basecase) +deflit(`FRAME',0) + + movl PARAM_SIZE, %ecx + movl PARAM_SRC, %eax + cmpl $2, %ecx + + movl PARAM_DST, %edx + je L(two_limbs) + ja L(three_or_more) + + +C------------------------------------------------------------------------------ +C one limb only + C eax src + C ecx size + C edx dst + + movl (%eax), %eax + movl %edx, %ecx + + mull %eax + + movl %edx, 4(%ecx) + movl %eax, (%ecx) + ret + + +C------------------------------------------------------------------------------ +C +C Using the read/modify/write "add"s seems to be faster than saving and +C restoring registers. Perhaps the loads for the first set hide under the +C mul latency and the second gets store to load forwarding. + + ALIGN(16) +L(two_limbs): + C eax src + C ebx + C ecx size + C edx dst +deflit(`FRAME',0) + + pushl %ebx FRAME_pushl() + movl %eax, %ebx C src + movl (%eax), %eax + + movl %edx, %ecx C dst + + mull %eax C src[0]^2 + + movl %eax, (%ecx) C dst[0] + movl 4(%ebx), %eax + + movl %edx, 4(%ecx) C dst[1] + + mull %eax C src[1]^2 + + movl %eax, 8(%ecx) C dst[2] + movl (%ebx), %eax + + movl %edx, 12(%ecx) C dst[3] + + mull 4(%ebx) C src[0]*src[1] + + popl %ebx + + addl %eax, 4(%ecx) + adcl %edx, 8(%ecx) + adcl $0, 12(%ecx) + ASSERT(nc) + + addl %eax, 4(%ecx) + adcl %edx, 8(%ecx) + adcl $0, 12(%ecx) + ASSERT(nc) + + ret + + +C------------------------------------------------------------------------------ +defframe(SAVE_EBX, -4) +defframe(SAVE_ESI, -8) +defframe(SAVE_EDI, -12) +defframe(SAVE_EBP, -16) +deflit(STACK_SPACE, 16) + +L(three_or_more): + subl $STACK_SPACE, %esp + cmpl $4, %ecx + jae L(four_or_more) +deflit(`FRAME',STACK_SPACE) + + +C------------------------------------------------------------------------------ +C Three limbs +C +C Writing out the loads and stores separately at the end of this code comes +C out about 10 cycles faster than using adcls to memory. + + C eax src + C ecx size + C edx dst + + movl %ebx, SAVE_EBX + movl %eax, %ebx C src + movl (%eax), %eax + + movl %edx, %ecx C dst + movl %esi, SAVE_ESI + movl %edi, SAVE_EDI + + mull %eax C src[0] ^ 2 + + movl %eax, (%ecx) + movl 4(%ebx), %eax + movl %edx, 4(%ecx) + + mull %eax C src[1] ^ 2 + + movl %eax, 8(%ecx) + movl 8(%ebx), %eax + movl %edx, 12(%ecx) + + mull %eax C src[2] ^ 2 + + movl %eax, 16(%ecx) + movl (%ebx), %eax + movl %edx, 20(%ecx) + + mull 4(%ebx) C src[0] * src[1] + + movl %eax, %esi + movl (%ebx), %eax + movl %edx, %edi + + mull 8(%ebx) C src[0] * src[2] + + addl %eax, %edi + movl %ebp, SAVE_EBP + movl $0, %ebp + + movl 4(%ebx), %eax + adcl %edx, %ebp + + mull 8(%ebx) C src[1] * src[2] + + xorl %ebx, %ebx + addl %eax, %ebp + + adcl $0, %edx + + C eax + C ebx zero, will be dst[5] + C ecx dst + C edx dst[4] + C esi dst[1] + C edi dst[2] + C ebp dst[3] + + adcl $0, %edx + addl %esi, %esi + + adcl %edi, %edi + movl 4(%ecx), %eax + + adcl %ebp, %ebp + + adcl %edx, %edx + + adcl $0, %ebx + addl %eax, %esi + movl 8(%ecx), %eax + + adcl %eax, %edi + movl 12(%ecx), %eax + movl %esi, 4(%ecx) + + adcl %eax, %ebp + movl 16(%ecx), %eax + movl %edi, 8(%ecx) + + movl SAVE_ESI, %esi + movl SAVE_EDI, %edi + + adcl %eax, %edx + movl 20(%ecx), %eax + movl %ebp, 12(%ecx) + + adcl %ebx, %eax + ASSERT(nc) + movl SAVE_EBX, %ebx + movl SAVE_EBP, %ebp + + movl %edx, 16(%ecx) + movl %eax, 20(%ecx) + addl $FRAME, %esp + + ret + + +C------------------------------------------------------------------------------ +L(four_or_more): + +C First multiply src[0]*src[1..size-1] and store at dst[1..size]. +C Further products are added in rather than stored. + + C eax src + C ebx + C ecx size + C edx dst + C esi + C edi + C ebp + +defframe(`VAR_COUNTER',-20) +defframe(`VAR_JMP', -24) +deflit(EXTRA_STACK_SPACE, 8) + + movl %ebx, SAVE_EBX + movl %edi, SAVE_EDI + leal (%edx,%ecx,4), %edi C &dst[size] + + movl %esi, SAVE_ESI + movl %ebp, SAVE_EBP + leal (%eax,%ecx,4), %esi C &src[size] + + movl (%eax), %ebp C multiplier + movl $0, %ebx + decl %ecx + + negl %ecx + subl $EXTRA_STACK_SPACE, %esp +FRAME_subl_esp(EXTRA_STACK_SPACE) + +L(mul_1): + C eax scratch + C ebx carry + C ecx counter + C edx scratch + C esi &src[size] + C edi &dst[size] + C ebp multiplier + + movl (%esi,%ecx,4), %eax + + mull %ebp + + addl %ebx, %eax + movl %eax, (%edi,%ecx,4) + movl $0, %ebx + + adcl %edx, %ebx + incl %ecx + jnz L(mul_1) + + +C Add products src[n]*src[n+1..size-1] at dst[2*n-1...], for each n=1..size-2. +C +C The last two products, which are the bottom right corner of the product +C triangle, are left to the end. These are src[size-3]*src[size-2,size-1] +C and src[size-2]*src[size-1]. If size is 4 then it's only these corner +C cases that need to be done. +C +C The unrolled code is the same as in mpn_addmul_1, see that routine for +C some comments. +C +C VAR_COUNTER is the outer loop, running from -size+4 to -1, inclusive. +C +C VAR_JMP is the computed jump into the unrolled code, stepped by one code +C chunk each outer loop. +C +C K7 does branch prediction on indirect jumps, which is bad since it's a +C different target each time. There seems no way to avoid this. + +dnl This value also hard coded in some shifts and adds +deflit(CODE_BYTES_PER_LIMB, 17) + +dnl With the unmodified &src[size] and &dst[size] pointers, the +dnl displacements in the unrolled code fit in a byte for UNROLL_COUNT +dnl values up to 31, but above that an offset must be added to them. + +deflit(OFFSET, +ifelse(eval(UNROLL_COUNT>31),1, +eval((UNROLL_COUNT-31)*4), +0)) + +dnl Because the last chunk of code is generated differently, a label placed +dnl at the end doesn't work. Instead calculate the implied end using the +dnl start and how many chunks of code there are. + +deflit(UNROLL_INNER_END, +`L(unroll_inner_start)+eval(UNROLL_COUNT*CODE_BYTES_PER_LIMB)') + + C eax + C ebx carry + C ecx + C edx + C esi &src[size] + C edi &dst[size] + C ebp + + movl PARAM_SIZE, %ecx + movl %ebx, (%edi) + + subl $4, %ecx + jz L(corner) + + negl %ecx +ifelse(OFFSET,0,,`subl $OFFSET, %edi') +ifelse(OFFSET,0,,`subl $OFFSET, %esi') + + movl %ecx, %edx + shll $4, %ecx + +ifdef(`PIC',` + call L(pic_calc) +L(here): +',` + leal UNROLL_INNER_END-eval(2*CODE_BYTES_PER_LIMB)(%ecx,%edx), %ecx +') + + + C The calculated jump mustn't come out to before the start of the + C code available. This is the limit UNROLL_COUNT puts on the src + C operand size, but checked here directly using the jump address. + ASSERT(ae, + `movl_text_address(L(unroll_inner_start), %eax) + cmpl %eax, %ecx') + + +C------------------------------------------------------------------------------ + ALIGN(16) +L(unroll_outer_top): + C eax + C ebx high limb to store + C ecx VAR_JMP + C edx VAR_COUNTER, limbs, negative + C esi &src[size], constant + C edi dst ptr, high of last addmul + C ebp + + movl -12+OFFSET(%esi,%edx,4), %ebp C next multiplier + movl -8+OFFSET(%esi,%edx,4), %eax C first of multiplicand + + movl %edx, VAR_COUNTER + + mull %ebp + +define(cmovX,`ifelse(eval(UNROLL_COUNT%2),0,`cmovz($@)',`cmovnz($@)')') + + testb $1, %cl + movl %edx, %ebx C high carry + movl %ecx, %edx C jump + + movl %eax, %ecx C low carry + cmovX( %ebx, %ecx) C high carry reverse + cmovX( %eax, %ebx) C low carry reverse + + leal CODE_BYTES_PER_LIMB(%edx), %eax + xorl %edx, %edx + leal 4(%edi), %edi + + movl %eax, VAR_JMP + + jmp *%eax + + +ifdef(`PIC',` +L(pic_calc): + addl (%esp), %ecx + addl $UNROLL_INNER_END-eval(2*CODE_BYTES_PER_LIMB)-L(here), %ecx + addl %edx, %ecx + ret +') + + + C Must be an even address to preserve the significance of the low + C bit of the jump address indicating which way around ecx/ebx should + C start. + ALIGN(2) + +L(unroll_inner_start): + C eax next limb + C ebx carry high + C ecx carry low + C edx scratch + C esi src + C edi dst + C ebp multiplier + +forloop(`i', UNROLL_COUNT, 1, ` + deflit(`disp_src', eval(-i*4 + OFFSET)) + deflit(`disp_dst', eval(disp_src - 4)) + + m4_assert(`disp_src>=-128 && disp_src<128') + m4_assert(`disp_dst>=-128 && disp_dst<128') + +ifelse(eval(i%2),0,` +Zdisp( movl, disp_src,(%esi), %eax) + adcl %edx, %ebx + + mull %ebp + +Zdisp( addl, %ecx, disp_dst,(%edi)) + movl $0, %ecx + + adcl %eax, %ebx + +',` + dnl this bit comes out last +Zdisp( movl, disp_src,(%esi), %eax) + adcl %edx, %ecx + + mull %ebp + +dnl Zdisp( addl %ebx, disp_src,(%edi)) + addl %ebx, disp_dst(%edi) +ifelse(forloop_last,0, +` movl $0, %ebx') + + adcl %eax, %ecx +') +') + + C eax next limb + C ebx carry high + C ecx carry low + C edx scratch + C esi src + C edi dst + C ebp multiplier + + adcl $0, %edx + addl %ecx, -4+OFFSET(%edi) + movl VAR_JMP, %ecx + + adcl $0, %edx + + movl %edx, m4_empty_if_zero(OFFSET) (%edi) + movl VAR_COUNTER, %edx + + incl %edx + jnz L(unroll_outer_top) + + +ifelse(OFFSET,0,,` + addl $OFFSET, %esi + addl $OFFSET, %edi +') + + +C------------------------------------------------------------------------------ +L(corner): + C esi &src[size] + C edi &dst[2*size-5] + + movl -12(%esi), %ebp + movl -8(%esi), %eax + movl %eax, %ecx + + mull %ebp + + addl %eax, -4(%edi) + movl -4(%esi), %eax + + adcl $0, %edx + movl %edx, %ebx + movl %eax, %esi + + mull %ebp + + addl %ebx, %eax + + adcl $0, %edx + addl %eax, (%edi) + movl %esi, %eax + + adcl $0, %edx + movl %edx, %ebx + + mull %ecx + + addl %ebx, %eax + movl %eax, 4(%edi) + + adcl $0, %edx + movl %edx, 8(%edi) + + + +C Left shift of dst[1..2*size-2], high bit shifted out becomes dst[2*size-1]. + +L(lshift_start): + movl PARAM_SIZE, %eax + movl PARAM_DST, %edi + xorl %ecx, %ecx C clear carry + + leal (%edi,%eax,8), %edi + notl %eax C -size-1, preserve carry + + leal 2(%eax), %eax C -(size-1) + +L(lshift): + C eax counter, negative + C ebx + C ecx + C edx + C esi + C edi dst, pointing just after last limb + C ebp + + rcll -4(%edi,%eax,8) + rcll (%edi,%eax,8) + incl %eax + jnz L(lshift) + + setc %al + + movl PARAM_SRC, %esi + movl %eax, -4(%edi) C dst most significant limb + + movl PARAM_SIZE, %ecx + + +C Now add in the squares on the diagonal, src[0]^2, src[1]^2, ..., +C src[size-1]^2. dst[0] hasn't yet been set at all yet, and just gets the +C low limb of src[0]^2. + + movl (%esi), %eax C src[0] + + mull %eax + + leal (%esi,%ecx,4), %esi C src point just after last limb + negl %ecx + + movl %eax, (%edi,%ecx,8) C dst[0] + incl %ecx + +L(diag): + C eax scratch + C ebx scratch + C ecx counter, negative + C edx carry + C esi src just after last limb + C edi dst just after last limb + C ebp + + movl (%esi,%ecx,4), %eax + movl %edx, %ebx + + mull %eax + + addl %ebx, -4(%edi,%ecx,8) + adcl %eax, (%edi,%ecx,8) + adcl $0, %edx + + incl %ecx + jnz L(diag) + + + movl SAVE_ESI, %esi + movl SAVE_EBX, %ebx + + addl %edx, -4(%edi) C dst most significant limb + movl SAVE_EDI, %edi + + movl SAVE_EBP, %ebp + addl $FRAME, %esp + + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/lshift.asm b/rts/gmp/mpn/x86/lshift.asm new file mode 100644 index 0000000000..4735335cbe --- /dev/null +++ b/rts/gmp/mpn/x86/lshift.asm @@ -0,0 +1,90 @@ +dnl x86 mpn_lshift -- mpn left shift. + +dnl Copyright (C) 1992, 1994, 1996, 1999, 2000 Free Software Foundation, +dnl Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size, +C unsigned shift); + +defframe(PARAM_SHIFT,16) +defframe(PARAM_SIZE, 12) +defframe(PARAM_SRC, 8) +defframe(PARAM_DST, 4) + + .text + ALIGN(8) +PROLOGUE(mpn_lshift) + + pushl %edi + pushl %esi + pushl %ebx +deflit(`FRAME',12) + + movl PARAM_DST,%edi + movl PARAM_SRC,%esi + movl PARAM_SIZE,%edx + movl PARAM_SHIFT,%ecx + + subl $4,%esi C adjust src + + movl (%esi,%edx,4),%ebx C read most significant limb + xorl %eax,%eax + shldl( %cl, %ebx, %eax) C compute carry limb + decl %edx + jz L(end) + pushl %eax C push carry limb onto stack + testb $1,%dl + jnz L(1) C enter loop in the middle + movl %ebx,%eax + + ALIGN(8) +L(oop): movl (%esi,%edx,4),%ebx C load next lower limb + shldl( %cl, %ebx, %eax) C compute result limb + movl %eax,(%edi,%edx,4) C store it + decl %edx +L(1): movl (%esi,%edx,4),%eax + shldl( %cl, %eax, %ebx) + movl %ebx,(%edi,%edx,4) + decl %edx + jnz L(oop) + + shll %cl,%eax C compute least significant limb + movl %eax,(%edi) C store it + + popl %eax C pop carry limb + + popl %ebx + popl %esi + popl %edi + ret + +L(end): shll %cl,%ebx C compute least significant limb + movl %ebx,(%edi) C store it + + popl %ebx + popl %esi + popl %edi + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/mod_1.asm b/rts/gmp/mpn/x86/mod_1.asm new file mode 100644 index 0000000000..3908161b3e --- /dev/null +++ b/rts/gmp/mpn/x86/mod_1.asm @@ -0,0 +1,141 @@ +dnl x86 mpn_mod_1 -- mpn by limb remainder. + + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +dnl cycles/limb +dnl K6 20 +dnl P5 44 +dnl P6 39 +dnl 486 approx 42 maybe +dnl +dnl The following have their own optimized mod_1 implementations, but for +dnl reference the code here runs as follows. +dnl +dnl P6MMX 39 +dnl K7 41 + + +include(`../config.m4') + + +C mp_limb_t mpn_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor); +C mp_limb_t mpn_mod_1c (mp_srcptr src, mp_size_t size, mp_limb_t divisor, +C mp_limb_t carry); +C +C Divide src,size by divisor and return the remainder. The quotient is +C discarded. +C +C See mpn/x86/divrem_1.asm for some comments. + +defframe(PARAM_CARRY, 16) +defframe(PARAM_DIVISOR,12) +defframe(PARAM_SIZE, 8) +defframe(PARAM_SRC, 4) + + .text + ALIGN(16) + +PROLOGUE(mpn_mod_1c) +deflit(`FRAME',0) + + movl PARAM_SIZE, %ecx + pushl %ebx FRAME_pushl() + + movl PARAM_SRC, %ebx + pushl %esi FRAME_pushl() + + movl PARAM_DIVISOR, %esi + orl %ecx, %ecx + + movl PARAM_CARRY, %edx + jnz LF(mpn_mod_1,top) + + popl %esi + movl %edx, %eax + + popl %ebx + + ret + +EPILOGUE() + + +PROLOGUE(mpn_mod_1) +deflit(`FRAME',0) + + movl PARAM_SIZE, %ecx + pushl %ebx FRAME_pushl() + + movl PARAM_SRC, %ebx + pushl %esi FRAME_pushl() + + orl %ecx, %ecx + jz L(done_zero) + + movl PARAM_DIVISOR, %esi + movl -4(%ebx,%ecx,4), %eax C src high limb + + cmpl %esi, %eax + + sbbl %edx, %edx C -1 if high<divisor + + addl %edx, %ecx C skip one division if high<divisor + jz L(done_eax) + + andl %eax, %edx C carry if high<divisor + + +L(top): + C eax scratch (quotient) + C ebx src + C ecx counter + C edx carry (remainder) + C esi divisor + C edi + C ebp + + movl -4(%ebx,%ecx,4), %eax + + divl %esi + + loop_or_decljnz L(top) + + + movl %edx, %eax +L(done_eax): + popl %esi + + popl %ebx + + ret + + +L(done_zero): + popl %esi + xorl %eax, %eax + + popl %ebx + + ret + + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/mul_1.asm b/rts/gmp/mpn/x86/mul_1.asm new file mode 100644 index 0000000000..8817f291bc --- /dev/null +++ b/rts/gmp/mpn/x86/mul_1.asm @@ -0,0 +1,130 @@ +dnl x86 mpn_mul_1 (for 386, 486, and Pentium Pro) -- Multiply a limb vector +dnl with a limb and store the result in a second limb vector. +dnl +dnl cycles/limb +dnl P6: 5.5 +dnl +dnl The following CPUs have their own optimized code, but for reference the +dnl code here runs as follows. +dnl +dnl cycles/limb +dnl P5: 12.5 +dnl K6: 10.5 +dnl K7: 4.5 + + +dnl Copyright (C) 1992, 1994, 1997, 1998, 1999, 2000 Free Software +dnl Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, +C mp_limb_t multiplier); + +defframe(PARAM_MULTIPLIER,16) +defframe(PARAM_SIZE, 12) +defframe(PARAM_SRC, 8) +defframe(PARAM_DST, 4) + + TEXT + ALIGN(8) +PROLOGUE(mpn_mul_1) +deflit(`FRAME',0) + + pushl %edi + pushl %esi + pushl %ebx + pushl %ebp +deflit(`FRAME',16) + + movl PARAM_DST,%edi + movl PARAM_SRC,%esi + movl PARAM_SIZE,%ecx + + xorl %ebx,%ebx + andl $3,%ecx + jz L(end0) + +L(oop0): + movl (%esi),%eax + mull PARAM_MULTIPLIER + leal 4(%esi),%esi + addl %ebx,%eax + movl $0,%ebx + adcl %ebx,%edx + movl %eax,(%edi) + movl %edx,%ebx C propagate carry into cylimb + + leal 4(%edi),%edi + decl %ecx + jnz L(oop0) + +L(end0): + movl PARAM_SIZE,%ecx + shrl $2,%ecx + jz L(end) + + + ALIGN(8) +L(oop): movl (%esi),%eax + mull PARAM_MULTIPLIER + addl %eax,%ebx + movl $0,%ebp + adcl %edx,%ebp + + movl 4(%esi),%eax + mull PARAM_MULTIPLIER + movl %ebx,(%edi) + addl %eax,%ebp C new lo + cylimb + movl $0,%ebx + adcl %edx,%ebx + + movl 8(%esi),%eax + mull PARAM_MULTIPLIER + movl %ebp,4(%edi) + addl %eax,%ebx C new lo + cylimb + movl $0,%ebp + adcl %edx,%ebp + + movl 12(%esi),%eax + mull PARAM_MULTIPLIER + movl %ebx,8(%edi) + addl %eax,%ebp C new lo + cylimb + movl $0,%ebx + adcl %edx,%ebx + + movl %ebp,12(%edi) + + leal 16(%esi),%esi + leal 16(%edi),%edi + decl %ecx + jnz L(oop) + +L(end): movl %ebx,%eax + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/mul_basecase.asm b/rts/gmp/mpn/x86/mul_basecase.asm new file mode 100644 index 0000000000..3a9b73895b --- /dev/null +++ b/rts/gmp/mpn/x86/mul_basecase.asm @@ -0,0 +1,209 @@ +dnl x86 mpn_mul_basecase -- Multiply two limb vectors and store the result +dnl in a third limb vector. + + +dnl Copyright (C) 1996, 1997, 1998, 1999, 2000 Free Software Foundation, +dnl Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +C void mpn_mul_basecase (mp_ptr wp, +C mp_srcptr xp, mp_size_t xsize, +C mp_srcptr yp, mp_size_t ysize); +C +C This was written in a haste since the Pentium optimized code that was used +C for all x86 machines was slow for the Pentium II. This code would benefit +C from some cleanup. +C +C To shave off some percentage of the run-time, one should make 4 variants +C of the Louter loop, for the four different outcomes of un mod 4. That +C would avoid Loop0 altogether. Code expansion would be > 4-fold for that +C part of the function, but since it is not very large, that would be +C acceptable. +C +C The mul loop (at L(oopM)) might need some tweaking. It's current speed is +C unknown. + +defframe(PARAM_YSIZE,20) +defframe(PARAM_YP, 16) +defframe(PARAM_XSIZE,12) +defframe(PARAM_XP, 8) +defframe(PARAM_WP, 4) + +defframe(VAR_MULTIPLIER, -4) +defframe(VAR_COUNTER, -8) +deflit(VAR_STACK_SPACE, 8) + + .text + ALIGN(8) + +PROLOGUE(mpn_mul_basecase) +deflit(`FRAME',0) + + subl $VAR_STACK_SPACE,%esp + pushl %esi + pushl %ebp + pushl %edi +deflit(`FRAME',eval(VAR_STACK_SPACE+12)) + + movl PARAM_XP,%esi + movl PARAM_WP,%edi + movl PARAM_YP,%ebp + + movl (%esi),%eax C load xp[0] + mull (%ebp) C multiply by yp[0] + movl %eax,(%edi) C store to wp[0] + movl PARAM_XSIZE,%ecx C xsize + decl %ecx C If xsize = 1, ysize = 1 too + jz L(done) + + pushl %ebx +FRAME_pushl() + movl %edx,%ebx + + leal 4(%esi),%esi + leal 4(%edi),%edi + +L(oopM): + movl (%esi),%eax C load next limb at xp[j] + leal 4(%esi),%esi + mull (%ebp) + addl %ebx,%eax + movl %edx,%ebx + adcl $0,%ebx + movl %eax,(%edi) + leal 4(%edi),%edi + decl %ecx + jnz L(oopM) + + movl %ebx,(%edi) C most significant limb of product + addl $4,%edi C increment wp + movl PARAM_XSIZE,%eax + shll $2,%eax + subl %eax,%edi + subl %eax,%esi + + movl PARAM_YSIZE,%eax C ysize + decl %eax + jz L(skip) + movl %eax,VAR_COUNTER C set index i to ysize + +L(outer): + movl PARAM_YP,%ebp C yp + addl $4,%ebp C make ebp point to next v limb + movl %ebp,PARAM_YP + movl (%ebp),%eax C copy y limb ... + movl %eax,VAR_MULTIPLIER C ... to stack slot + movl PARAM_XSIZE,%ecx + + xorl %ebx,%ebx + andl $3,%ecx + jz L(end0) + +L(oop0): + movl (%esi),%eax + mull VAR_MULTIPLIER + leal 4(%esi),%esi + addl %ebx,%eax + movl $0,%ebx + adcl %ebx,%edx + addl %eax,(%edi) + adcl %edx,%ebx C propagate carry into cylimb + + leal 4(%edi),%edi + decl %ecx + jnz L(oop0) + +L(end0): + movl PARAM_XSIZE,%ecx + shrl $2,%ecx + jz L(endX) + + ALIGN(8) +L(oopX): + movl (%esi),%eax + mull VAR_MULTIPLIER + addl %eax,%ebx + movl $0,%ebp + adcl %edx,%ebp + + movl 4(%esi),%eax + mull VAR_MULTIPLIER + addl %ebx,(%edi) + adcl %eax,%ebp C new lo + cylimb + movl $0,%ebx + adcl %edx,%ebx + + movl 8(%esi),%eax + mull VAR_MULTIPLIER + addl %ebp,4(%edi) + adcl %eax,%ebx C new lo + cylimb + movl $0,%ebp + adcl %edx,%ebp + + movl 12(%esi),%eax + mull VAR_MULTIPLIER + addl %ebx,8(%edi) + adcl %eax,%ebp C new lo + cylimb + movl $0,%ebx + adcl %edx,%ebx + + addl %ebp,12(%edi) + adcl $0,%ebx C propagate carry into cylimb + + leal 16(%esi),%esi + leal 16(%edi),%edi + decl %ecx + jnz L(oopX) + +L(endX): + movl %ebx,(%edi) + addl $4,%edi + + C we incremented wp and xp in the loop above; compensate + movl PARAM_XSIZE,%eax + shll $2,%eax + subl %eax,%edi + subl %eax,%esi + + movl VAR_COUNTER,%eax + decl %eax + movl %eax,VAR_COUNTER + jnz L(outer) + +L(skip): + popl %ebx + popl %edi + popl %ebp + popl %esi + addl $8,%esp + ret + +L(done): + movl %edx,4(%edi) C store to wp[1] + popl %edi + popl %ebp + popl %esi + addl $8,%esp + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/p6/README b/rts/gmp/mpn/x86/p6/README new file mode 100644 index 0000000000..7dbc905a0d --- /dev/null +++ b/rts/gmp/mpn/x86/p6/README @@ -0,0 +1,95 @@ + + INTEL P6 MPN SUBROUTINES + + + +This directory contains code optimized for Intel P6 class CPUs, meaning +PentiumPro, Pentium II and Pentium III. The mmx and p3mmx subdirectories +have routines using MMX instructions. + + + +STATUS + +Times for the loops, with all code and data in L1 cache, are as follows. +Some of these might be able to be improved. + + cycles/limb + + mpn_add_n/sub_n 3.7 + + mpn_copyi 0.75 + mpn_copyd 2.4 + + mpn_divrem_1 39.0 + mpn_mod_1 39.0 + mpn_divexact_by3 8.5 + + mpn_mul_1 5.5 + mpn_addmul/submul_1 6.35 + + mpn_l/rshift 2.5 + + mpn_mul_basecase 8.2 cycles/crossproduct (approx) + mpn_sqr_basecase 4.0 cycles/crossproduct (approx) + or 7.75 cycles/triangleproduct (approx) + +Pentium II and III have MMX and get the following improvements. + + mpn_divrem_1 25.0 integer part, 17.5 fractional part + mpn_mod_1 24.0 + + mpn_l/rshift 1.75 + + + + +NOTES + +Write-allocate L1 data cache means prefetching of destinations is unnecessary. + +Mispredicted branches have a penalty of between 9 and 15 cycles, and even up +to 26 cycles depending how far speculative execution has gone. The 9 cycle +minimum penalty comes from the issue pipeline being 9 stages. + +A copy with rep movs seems to copy 16 bytes at a time, since speeds for 4, +5, 6 or 7 limb operations are all the same. The 0.75 cycles/limb would be 3 +cycles per 16 byte block. + + + + +CODING + +Instructions in general code have been shown grouped if they can execute +together, which means up to three instructions with no successive +dependencies, and with only the first being a multiple micro-op. + +P6 has out-of-order execution, so the groupings are really only showing +dependent paths where some shuffling might allow some latencies to be +hidden. + + + + +REFERENCES + +"Intel Architecture Optimization Reference Manual", 1999, revision 001 dated +02/99, order number 245127 (order number 730795-001 is in the document too). +Available on-line: + + http://download.intel.com/design/PentiumII/manuals/245127.htm + +"Intel Architecture Optimization Manual", 1997, order number 242816. This +is an older document mostly about P5 and not as good as the above. +Available on-line: + + http://download.intel.com/design/PentiumII/manuals/242816.htm + + + +---------------- +Local variables: +mode: text +fill-column: 76 +End: diff --git a/rts/gmp/mpn/x86/p6/aorsmul_1.asm b/rts/gmp/mpn/x86/p6/aorsmul_1.asm new file mode 100644 index 0000000000..feb364ec0b --- /dev/null +++ b/rts/gmp/mpn/x86/p6/aorsmul_1.asm @@ -0,0 +1,300 @@ +dnl Intel P6 mpn_addmul_1/mpn_submul_1 -- add or subtract mpn multiple. +dnl +dnl P6: 6.35 cycles/limb (at 16 limbs/loop). + + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +dnl P6 UNROLL_COUNT cycles/limb +dnl 8 6.7 +dnl 16 6.35 +dnl 32 6.3 +dnl 64 6.3 +dnl Maximum possible with the current code is 64. + +deflit(UNROLL_COUNT, 16) + + +ifdef(`OPERATION_addmul_1', ` + define(M4_inst, addl) + define(M4_function_1, mpn_addmul_1) + define(M4_function_1c, mpn_addmul_1c) + define(M4_description, add it to) + define(M4_desc_retval, carry) +',`ifdef(`OPERATION_submul_1', ` + define(M4_inst, subl) + define(M4_function_1, mpn_submul_1) + define(M4_function_1c, mpn_submul_1c) + define(M4_description, subtract it from) + define(M4_desc_retval, borrow) +',`m4_error(`Need OPERATION_addmul_1 or OPERATION_submul_1 +')')') + +MULFUNC_PROLOGUE(mpn_addmul_1 mpn_addmul_1c mpn_submul_1 mpn_submul_1c) + + +C mp_limb_t M4_function_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, +C mp_limb_t mult); +C mp_limb_t M4_function_1c (mp_ptr dst, mp_srcptr src, mp_size_t size, +C mp_limb_t mult, mp_limb_t carry); +C +C Calculate src,size multiplied by mult and M4_description dst,size. +C Return the M4_desc_retval limb from the top of the result. +C +C This code is pretty much the same as the K6 code. The unrolled loop is +C the same, but there's just a few scheduling tweaks in the setups and the +C simple loop. +C +C A number of variations have been tried for the unrolled loop, with one or +C two carries, and with loads scheduled earlier, but nothing faster than 6 +C cycles/limb has been found. + +ifdef(`PIC',` +deflit(UNROLL_THRESHOLD, 5) +',` +deflit(UNROLL_THRESHOLD, 5) +') + +defframe(PARAM_CARRY, 20) +defframe(PARAM_MULTIPLIER,16) +defframe(PARAM_SIZE, 12) +defframe(PARAM_SRC, 8) +defframe(PARAM_DST, 4) + + .text + ALIGN(32) + +PROLOGUE(M4_function_1c) + pushl %ebx +deflit(`FRAME',4) + movl PARAM_CARRY, %ebx + jmp LF(M4_function_1,start_nc) +EPILOGUE() + +PROLOGUE(M4_function_1) + push %ebx +deflit(`FRAME',4) + xorl %ebx, %ebx C initial carry + +L(start_nc): + movl PARAM_SIZE, %ecx + pushl %esi +deflit(`FRAME',8) + + movl PARAM_SRC, %esi + pushl %edi +deflit(`FRAME',12) + + movl PARAM_DST, %edi + pushl %ebp +deflit(`FRAME',16) + cmpl $UNROLL_THRESHOLD, %ecx + + movl PARAM_MULTIPLIER, %ebp + jae L(unroll) + + + C simple loop + C this is offset 0x22, so close enough to aligned +L(simple): + C eax scratch + C ebx carry + C ecx counter + C edx scratch + C esi src + C edi dst + C ebp multiplier + + movl (%esi), %eax + addl $4, %edi + + mull %ebp + + addl %ebx, %eax + adcl $0, %edx + + M4_inst %eax, -4(%edi) + movl %edx, %ebx + + adcl $0, %ebx + decl %ecx + + leal 4(%esi), %esi + jnz L(simple) + + + popl %ebp + popl %edi + + popl %esi + movl %ebx, %eax + + popl %ebx + ret + + + +C------------------------------------------------------------------------------ +C VAR_JUMP holds the computed jump temporarily because there's not enough +C registers when doing the mul for the initial two carry limbs. +C +C The add/adc for the initial carry in %ebx is necessary only for the +C mpn_add/submul_1c entry points. Duplicating the startup code to +C eliminiate this for the plain mpn_add/submul_1 doesn't seem like a good +C idea. + +dnl overlapping with parameters already fetched +define(VAR_COUNTER,`PARAM_SIZE') +define(VAR_JUMP, `PARAM_DST') + + C this is offset 0x43, so close enough to aligned +L(unroll): + C eax + C ebx initial carry + C ecx size + C edx + C esi src + C edi dst + C ebp + + movl %ecx, %edx + decl %ecx + + subl $2, %edx + negl %ecx + + shrl $UNROLL_LOG2, %edx + andl $UNROLL_MASK, %ecx + + movl %edx, VAR_COUNTER + movl %ecx, %edx + + C 15 code bytes per limb +ifdef(`PIC',` + call L(pic_calc) +L(here): +',` + shll $4, %edx + negl %ecx + + leal L(entry) (%edx,%ecx,1), %edx +') + movl (%esi), %eax C src low limb + + movl %edx, VAR_JUMP + leal ifelse(UNROLL_BYTES,256,128+) 4(%esi,%ecx,4), %esi + + mull %ebp + + addl %ebx, %eax C initial carry (from _1c) + adcl $0, %edx + + movl %edx, %ebx C high carry + leal ifelse(UNROLL_BYTES,256,128) (%edi,%ecx,4), %edi + + movl VAR_JUMP, %edx + testl $1, %ecx + movl %eax, %ecx C low carry + + cmovnz( %ebx, %ecx) C high,low carry other way around + cmovnz( %eax, %ebx) + + jmp *%edx + + +ifdef(`PIC',` +L(pic_calc): + shll $4, %edx + negl %ecx + + C See README.family about old gas bugs + leal (%edx,%ecx,1), %edx + addl $L(entry)-L(here), %edx + + addl (%esp), %edx + + ret +') + + +C ----------------------------------------------------------- + ALIGN(32) +L(top): +deflit(`FRAME',16) + C eax scratch + C ebx carry hi + C ecx carry lo + C edx scratch + C esi src + C edi dst + C ebp multiplier + C + C VAR_COUNTER loop counter + C + C 15 code bytes per limb + + addl $UNROLL_BYTES, %edi + +L(entry): +deflit(CHUNK_COUNT,2) +forloop(`i', 0, UNROLL_COUNT/CHUNK_COUNT-1, ` + deflit(`disp0', eval(i*4*CHUNK_COUNT ifelse(UNROLL_BYTES,256,-128))) + deflit(`disp1', eval(disp0 + 4)) + +Zdisp( movl, disp0,(%esi), %eax) + mull %ebp +Zdisp( M4_inst,%ecx, disp0,(%edi)) + adcl %eax, %ebx + movl %edx, %ecx + adcl $0, %ecx + + movl disp1(%esi), %eax + mull %ebp + M4_inst %ebx, disp1(%edi) + adcl %eax, %ecx + movl %edx, %ebx + adcl $0, %ebx +') + + decl VAR_COUNTER + leal UNROLL_BYTES(%esi), %esi + + jns L(top) + + +deflit(`disp0', eval(UNROLL_BYTES ifelse(UNROLL_BYTES,256,-128))) + + M4_inst %ecx, disp0(%edi) + movl %ebx, %eax + + popl %ebp + popl %edi + + popl %esi + popl %ebx + adcl $0, %eax + + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/p6/diveby3.asm b/rts/gmp/mpn/x86/p6/diveby3.asm new file mode 100644 index 0000000000..a77703ea89 --- /dev/null +++ b/rts/gmp/mpn/x86/p6/diveby3.asm @@ -0,0 +1,37 @@ +dnl Intel P6 mpn_divexact_by3 -- mpn division by 3, expecting no remainder. +dnl +dnl P6: 8.5 cycles/limb + + +dnl Copyright (C) 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +dnl The P5 code runs well on P6, in fact better than anything else found so +dnl far. An imul is 4 cycles, meaning the two cmp/sbbl pairs on the +dnl dependent path are taking 4.5 cycles. +dnl +dnl The destination cache line prefetching is unnecessary on P6, but +dnl removing it is a 2 cycle slowdown (approx), so it must be inducing +dnl something good in the out of order execution. + +include(`../config.m4') + +MULFUNC_PROLOGUE(mpn_divexact_by3c) +include_mpn(`x86/pentium/diveby3.asm') diff --git a/rts/gmp/mpn/x86/p6/gmp-mparam.h b/rts/gmp/mpn/x86/p6/gmp-mparam.h new file mode 100644 index 0000000000..d7bfb6d60c --- /dev/null +++ b/rts/gmp/mpn/x86/p6/gmp-mparam.h @@ -0,0 +1,96 @@ +/* Intel P6 gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + + +#define BITS_PER_MP_LIMB 32 +#define BYTES_PER_MP_LIMB 4 +#define BITS_PER_LONGINT 32 +#define BITS_PER_INT 32 +#define BITS_PER_SHORTINT 16 +#define BITS_PER_CHAR 8 + + +#ifndef UMUL_TIME +#define UMUL_TIME 5 /* cycles */ +#endif +#ifndef UDIV_TIME +#define UDIV_TIME 39 /* cycles */ +#endif + +#ifndef COUNT_TRAILING_ZEROS_TIME +#define COUNT_TRAILING_ZEROS_TIME 2 /* cycles */ +#endif + + +/* Generated by tuneup.c, 2000-07-06. */ + +#ifndef KARATSUBA_MUL_THRESHOLD +#define KARATSUBA_MUL_THRESHOLD 23 +#endif +#ifndef TOOM3_MUL_THRESHOLD +#define TOOM3_MUL_THRESHOLD 139 +#endif + +#ifndef KARATSUBA_SQR_THRESHOLD +#define KARATSUBA_SQR_THRESHOLD 52 +#endif +#ifndef TOOM3_SQR_THRESHOLD +#define TOOM3_SQR_THRESHOLD 166 +#endif + +#ifndef BZ_THRESHOLD +#define BZ_THRESHOLD 116 +#endif + +#ifndef FIB_THRESHOLD +#define FIB_THRESHOLD 66 +#endif + +#ifndef POWM_THRESHOLD +#define POWM_THRESHOLD 20 +#endif + +#ifndef GCD_ACCEL_THRESHOLD +#define GCD_ACCEL_THRESHOLD 4 +#endif +#ifndef GCDEXT_THRESHOLD +#define GCDEXT_THRESHOLD 54 +#endif + +#ifndef FFT_MUL_TABLE +#define FFT_MUL_TABLE { 592, 1440, 2688, 5632, 14336, 40960, 0 } +#endif +#ifndef FFT_MODF_MUL_THRESHOLD +#define FFT_MODF_MUL_THRESHOLD 608 +#endif +#ifndef FFT_MUL_THRESHOLD +#define FFT_MUL_THRESHOLD 5888 +#endif + +#ifndef FFT_SQR_TABLE +#define FFT_SQR_TABLE { 656, 1504, 2944, 6656, 18432, 57344, 0 } +#endif +#ifndef FFT_MODF_SQR_THRESHOLD +#define FFT_MODF_SQR_THRESHOLD 672 +#endif +#ifndef FFT_SQR_THRESHOLD +#define FFT_SQR_THRESHOLD 5888 +#endif diff --git a/rts/gmp/mpn/x86/p6/mmx/divrem_1.asm b/rts/gmp/mpn/x86/p6/mmx/divrem_1.asm new file mode 100644 index 0000000000..f1b011b623 --- /dev/null +++ b/rts/gmp/mpn/x86/p6/mmx/divrem_1.asm @@ -0,0 +1,677 @@ +dnl Intel Pentium-II mpn_divrem_1 -- mpn by limb division. +dnl +dnl P6MMX: 25.0 cycles/limb integer part, 17.5 cycles/limb fraction part. + + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +C mp_limb_t mpn_divrem_1 (mp_ptr dst, mp_size_t xsize, +C mp_srcptr src, mp_size_t size, +C mp_limb_t divisor); +C mp_limb_t mpn_divrem_1c (mp_ptr dst, mp_size_t xsize, +C mp_srcptr src, mp_size_t size, +C mp_limb_t divisor, mp_limb_t carry); +C +C This code is a lightly reworked version of mpn/x86/k7/mmx/divrem_1.asm, +C see that file for some comments. It's likely what's here can be improved. + + +dnl MUL_THRESHOLD is the value of xsize+size at which the multiply by +dnl inverse method is used, rather than plain "divl"s. Minimum value 1. +dnl +dnl The different speeds of the integer and fraction parts means that using +dnl xsize+size isn't quite right. The threshold wants to be a bit higher +dnl for the integer part and a bit lower for the fraction part. (Or what's +dnl really wanted is to speed up the integer part!) +dnl +dnl The threshold is set to make the integer part right. At 4 limbs the +dnl div and mul are about the same there, but on the fractional part the +dnl mul is much faster. + +deflit(MUL_THRESHOLD, 4) + + +defframe(PARAM_CARRY, 24) +defframe(PARAM_DIVISOR,20) +defframe(PARAM_SIZE, 16) +defframe(PARAM_SRC, 12) +defframe(PARAM_XSIZE, 8) +defframe(PARAM_DST, 4) + +defframe(SAVE_EBX, -4) +defframe(SAVE_ESI, -8) +defframe(SAVE_EDI, -12) +defframe(SAVE_EBP, -16) + +defframe(VAR_NORM, -20) +defframe(VAR_INVERSE, -24) +defframe(VAR_SRC, -28) +defframe(VAR_DST, -32) +defframe(VAR_DST_STOP,-36) + +deflit(STACK_SPACE, 36) + + .text + ALIGN(16) + +PROLOGUE(mpn_divrem_1c) +deflit(`FRAME',0) + movl PARAM_CARRY, %edx + + movl PARAM_SIZE, %ecx + subl $STACK_SPACE, %esp +deflit(`FRAME',STACK_SPACE) + + movl %ebx, SAVE_EBX + movl PARAM_XSIZE, %ebx + + movl %edi, SAVE_EDI + movl PARAM_DST, %edi + + movl %ebp, SAVE_EBP + movl PARAM_DIVISOR, %ebp + + movl %esi, SAVE_ESI + movl PARAM_SRC, %esi + + leal -4(%edi,%ebx,4), %edi + jmp LF(mpn_divrem_1,start_1c) + +EPILOGUE() + + + C offset 0x31, close enough to aligned +PROLOGUE(mpn_divrem_1) +deflit(`FRAME',0) + + movl PARAM_SIZE, %ecx + movl $0, %edx C initial carry (if can't skip a div) + subl $STACK_SPACE, %esp +deflit(`FRAME',STACK_SPACE) + + movl %ebp, SAVE_EBP + movl PARAM_DIVISOR, %ebp + + movl %ebx, SAVE_EBX + movl PARAM_XSIZE, %ebx + + movl %esi, SAVE_ESI + movl PARAM_SRC, %esi + orl %ecx, %ecx + + movl %edi, SAVE_EDI + movl PARAM_DST, %edi + + leal -4(%edi,%ebx,4), %edi C &dst[xsize-1] + jz L(no_skip_div) + + movl -4(%esi,%ecx,4), %eax C src high limb + cmpl %ebp, %eax C one less div if high<divisor + jnb L(no_skip_div) + + movl $0, (%edi,%ecx,4) C dst high limb + decl %ecx C size-1 + movl %eax, %edx C src high limb as initial carry +L(no_skip_div): + + +L(start_1c): + C eax + C ebx xsize + C ecx size + C edx carry + C esi src + C edi &dst[xsize-1] + C ebp divisor + + leal (%ebx,%ecx), %eax C size+xsize + cmpl $MUL_THRESHOLD, %eax + jae L(mul_by_inverse) + + orl %ecx, %ecx + jz L(divide_no_integer) + +L(divide_integer): + C eax scratch (quotient) + C ebx xsize + C ecx counter + C edx scratch (remainder) + C esi src + C edi &dst[xsize-1] + C ebp divisor + + movl -4(%esi,%ecx,4), %eax + + divl %ebp + + movl %eax, (%edi,%ecx,4) + decl %ecx + jnz L(divide_integer) + + +L(divide_no_integer): + movl PARAM_DST, %edi + orl %ebx, %ebx + jnz L(divide_fraction) + +L(divide_done): + movl SAVE_ESI, %esi + + movl SAVE_EDI, %edi + + movl SAVE_EBX, %ebx + movl %edx, %eax + + movl SAVE_EBP, %ebp + addl $STACK_SPACE, %esp + + ret + + +L(divide_fraction): + C eax scratch (quotient) + C ebx counter + C ecx + C edx scratch (remainder) + C esi + C edi dst + C ebp divisor + + movl $0, %eax + + divl %ebp + + movl %eax, -4(%edi,%ebx,4) + decl %ebx + jnz L(divide_fraction) + + jmp L(divide_done) + + + +C ----------------------------------------------------------------------------- + +L(mul_by_inverse): + C eax + C ebx xsize + C ecx size + C edx carry + C esi src + C edi &dst[xsize-1] + C ebp divisor + + leal 12(%edi), %ebx + + movl %ebx, VAR_DST_STOP + leal 4(%edi,%ecx,4), %edi C &dst[xsize+size] + + movl %edi, VAR_DST + movl %ecx, %ebx C size + + bsrl %ebp, %ecx C 31-l + movl %edx, %edi C carry + + leal 1(%ecx), %eax C 32-l + xorl $31, %ecx C l + + movl %ecx, VAR_NORM + movl $-1, %edx + + shll %cl, %ebp C d normalized + movd %eax, %mm7 + + movl $-1, %eax + subl %ebp, %edx C (b-d)-1 giving edx:eax = b*(b-d)-1 + + divl %ebp C floor (b*(b-d)-1) / d + + movl %eax, VAR_INVERSE + orl %ebx, %ebx C size + leal -12(%esi,%ebx,4), %eax C &src[size-3] + + movl %eax, VAR_SRC + jz L(start_zero) + + movl 8(%eax), %esi C src high limb + cmpl $1, %ebx + jz L(start_one) + +L(start_two_or_more): + movl 4(%eax), %edx C src second highest limb + + shldl( %cl, %esi, %edi) C n2 = carry,high << l + + shldl( %cl, %edx, %esi) C n10 = high,second << l + + cmpl $2, %ebx + je L(integer_two_left) + jmp L(integer_top) + + +L(start_one): + shldl( %cl, %esi, %edi) C n2 = carry,high << l + + shll %cl, %esi C n10 = high << l + jmp L(integer_one_left) + + +L(start_zero): + shll %cl, %edi C n2 = carry << l + movl $0, %esi C n10 = 0 + + C we're here because xsize+size>=MUL_THRESHOLD, so with size==0 then + C must have xsize!=0 + jmp L(fraction_some) + + + +C ----------------------------------------------------------------------------- +C +C This loop runs at about 25 cycles, which is probably sub-optimal, and +C certainly more than the dependent chain would suggest. A better loop, or +C a better rough analysis of what's possible, would be welcomed. +C +C In the current implementation, the following successively dependent +C micro-ops seem to exist. +C +C uops +C n2+n1 1 (addl) +C mul 5 +C q1+1 3 (addl/adcl) +C mul 5 +C sub 3 (subl/sbbl) +C addback 2 (cmov) +C --- +C 19 +C +C Lack of registers hinders explicit scheduling and it might be that the +C normal out of order execution isn't able to hide enough under the mul +C latencies. +C +C Using sarl/negl to pick out n1 for the n2+n1 stage is a touch faster than +C cmov (and takes one uop off the dependent chain). A sarl/andl/addl +C combination was tried for the addback (despite the fact it would lengthen +C the dependent chain) but found to be no faster. + + + ALIGN(16) +L(integer_top): + C eax scratch + C ebx scratch (nadj, q1) + C ecx scratch (src, dst) + C edx scratch + C esi n10 + C edi n2 + C ebp d + C + C mm0 scratch (src qword) + C mm7 rshift for normalization + + movl %esi, %eax + movl %ebp, %ebx + + sarl $31, %eax C -n1 + movl VAR_SRC, %ecx + + andl %eax, %ebx C -n1 & d + negl %eax C n1 + + addl %esi, %ebx C nadj = n10 + (-n1 & d), ignoring overflow + addl %edi, %eax C n2+n1 + movq (%ecx), %mm0 C next src limb and the one below it + + mull VAR_INVERSE C m*(n2+n1) + + subl $4, %ecx + + movl %ecx, VAR_SRC + + C + + C + + addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag + movl %ebp, %eax C d + leal 1(%edi), %ebx C n2<<32 + m*(n2+n1)) + + adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1 + jz L(q1_ff) + + mull %ebx C (q1+1)*d + + movl VAR_DST, %ecx + psrlq %mm7, %mm0 + + C + + C + + C + + subl %eax, %esi + movl VAR_DST_STOP, %eax + + sbbl %edx, %edi C n - (q1+1)*d + movl %esi, %edi C remainder -> n2 + leal (%ebp,%esi), %edx + + cmovc( %edx, %edi) C n - q1*d if underflow from using q1+1 + movd %mm0, %esi + + sbbl $0, %ebx C q + subl $4, %ecx + + movl %ebx, (%ecx) + cmpl %eax, %ecx + + movl %ecx, VAR_DST + jne L(integer_top) + + +L(integer_loop_done): + + +C ----------------------------------------------------------------------------- +C +C Here, and in integer_one_left below, an sbbl $0 is used rather than a jz +C q1_ff special case. This make the code a bit smaller and simpler, and +C costs only 2 cycles (each). + +L(integer_two_left): + C eax scratch + C ebx scratch (nadj, q1) + C ecx scratch (src, dst) + C edx scratch + C esi n10 + C edi n2 + C ebp divisor + C + C mm0 src limb, shifted + C mm7 rshift + + + movl %esi, %eax + movl %ebp, %ebx + + sarl $31, %eax C -n1 + movl PARAM_SRC, %ecx + + andl %eax, %ebx C -n1 & d + negl %eax C n1 + + addl %esi, %ebx C nadj = n10 + (-n1 & d), ignoring overflow + addl %edi, %eax C n2+n1 + + mull VAR_INVERSE C m*(n2+n1) + + movd (%ecx), %mm0 C src low limb + + movl VAR_DST_STOP, %ecx + + C + + C + + addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag + leal 1(%edi), %ebx C n2<<32 + m*(n2+n1)) + movl %ebp, %eax C d + + adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1 + + sbbl $0, %ebx + + mull %ebx C (q1+1)*d + + psllq $32, %mm0 + + psrlq %mm7, %mm0 + + C + + C + + subl %eax, %esi + + sbbl %edx, %edi C n - (q1+1)*d + movl %esi, %edi C remainder -> n2 + leal (%ebp,%esi), %edx + + cmovc( %edx, %edi) C n - q1*d if underflow from using q1+1 + movd %mm0, %esi + + sbbl $0, %ebx C q + + movl %ebx, -4(%ecx) + + +C ----------------------------------------------------------------------------- +L(integer_one_left): + C eax scratch + C ebx scratch (nadj, q1) + C ecx scratch (dst) + C edx scratch + C esi n10 + C edi n2 + C ebp divisor + C + C mm0 src limb, shifted + C mm7 rshift + + + movl %esi, %eax + movl %ebp, %ebx + + sarl $31, %eax C -n1 + movl VAR_DST_STOP, %ecx + + andl %eax, %ebx C -n1 & d + negl %eax C n1 + + addl %esi, %ebx C nadj = n10 + (-n1 & d), ignoring overflow + addl %edi, %eax C n2+n1 + + mull VAR_INVERSE C m*(n2+n1) + + C + + C + + C + + addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag + leal 1(%edi), %ebx C n2<<32 + m*(n2+n1)) + movl %ebp, %eax C d + + C + + adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1 + + sbbl $0, %ebx C q1 if q1+1 overflowed + + mull %ebx + + C + + C + + C + + C + + subl %eax, %esi + movl PARAM_XSIZE, %eax + + sbbl %edx, %edi C n - (q1+1)*d + movl %esi, %edi C remainder -> n2 + leal (%ebp,%esi), %edx + + cmovc( %edx, %edi) C n - q1*d if underflow from using q1+1 + + sbbl $0, %ebx C q + + movl %ebx, -8(%ecx) + subl $8, %ecx + + + + orl %eax, %eax C xsize + jnz L(fraction_some) + + movl %edi, %eax +L(fraction_done): + movl VAR_NORM, %ecx + movl SAVE_EBP, %ebp + + movl SAVE_EDI, %edi + + movl SAVE_ESI, %esi + + movl SAVE_EBX, %ebx + addl $STACK_SPACE, %esp + + shrl %cl, %eax + emms + + ret + + +C ----------------------------------------------------------------------------- +C +C Special case for q1=0xFFFFFFFF, giving q=0xFFFFFFFF meaning the low dword +C of q*d is simply -d and the remainder n-q*d = n10+d + +L(q1_ff): + C eax (divisor) + C ebx (q1+1 == 0) + C ecx + C edx + C esi n10 + C edi n2 + C ebp divisor + + movl VAR_DST, %ecx + movl VAR_DST_STOP, %edx + subl $4, %ecx + + movl %ecx, VAR_DST + psrlq %mm7, %mm0 + leal (%ebp,%esi), %edi C n-q*d remainder -> next n2 + + movl $-1, (%ecx) + movd %mm0, %esi C next n10 + + cmpl %ecx, %edx + jne L(integer_top) + + jmp L(integer_loop_done) + + + +C ----------------------------------------------------------------------------- +C +C In the current implementation, the following successively dependent +C micro-ops seem to exist. +C +C uops +C mul 5 +C q1+1 1 (addl) +C mul 5 +C sub 3 (negl/sbbl) +C addback 2 (cmov) +C --- +C 16 +C +C The loop in fact runs at about 17.5 cycles. Using a sarl/andl/addl for +C the addback was found to be a touch slower. + + + ALIGN(16) +L(fraction_some): + C eax + C ebx + C ecx + C edx + C esi + C edi carry + C ebp divisor + + movl PARAM_DST, %esi + movl VAR_DST_STOP, %ecx + movl %edi, %eax + + subl $8, %ecx + + + ALIGN(16) +L(fraction_top): + C eax n2, then scratch + C ebx scratch (nadj, q1) + C ecx dst, decrementing + C edx scratch + C esi dst stop point + C edi n2 + C ebp divisor + + mull VAR_INVERSE C m*n2 + + movl %ebp, %eax C d + subl $4, %ecx C dst + leal 1(%edi), %ebx + + C + + C + + C + + addl %edx, %ebx C 1 + high(n2<<32 + m*n2) = q1+1 + + mull %ebx C (q1+1)*d + + C + + C + + C + + C + + negl %eax C low of n - (q1+1)*d + + sbbl %edx, %edi C high of n - (q1+1)*d, caring only about carry + leal (%ebp,%eax), %edx + + cmovc( %edx, %eax) C n - q1*d if underflow from using q1+1 + + sbbl $0, %ebx C q + movl %eax, %edi C remainder->n2 + cmpl %esi, %ecx + + movl %ebx, (%ecx) C previous q + jne L(fraction_top) + + + jmp L(fraction_done) + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/p6/mmx/mod_1.asm b/rts/gmp/mpn/x86/p6/mmx/mod_1.asm new file mode 100644 index 0000000000..e7d8d94d33 --- /dev/null +++ b/rts/gmp/mpn/x86/p6/mmx/mod_1.asm @@ -0,0 +1,444 @@ +dnl Intel Pentium-II mpn_mod_1 -- mpn by limb remainder. +dnl +dnl P6MMX: 24.0 cycles/limb. + + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +C mp_limb_t mpn_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor); +C mp_limb_t mpn_mod_1c (mp_srcptr src, mp_size_t size, mp_limb_t divisor, +C mp_limb_t carry); +C +C The code here very similar to mpn_divrem_1, but with the quotient +C discarded. What's here probably isn't optimal. +C +C See mpn/x86/p6/mmx/divrem_1.c and mpn/x86/k7/mmx/mod_1.asm for some +C comments. + + +dnl MUL_THRESHOLD is the size at which the multiply by inverse method is +dnl used, rather than plain "divl"s. Minimum value 2. + +deflit(MUL_THRESHOLD, 4) + + +defframe(PARAM_CARRY, 16) +defframe(PARAM_DIVISOR,12) +defframe(PARAM_SIZE, 8) +defframe(PARAM_SRC, 4) + +defframe(SAVE_EBX, -4) +defframe(SAVE_ESI, -8) +defframe(SAVE_EDI, -12) +defframe(SAVE_EBP, -16) + +defframe(VAR_NORM, -20) +defframe(VAR_INVERSE, -24) +defframe(VAR_SRC_STOP,-28) + +deflit(STACK_SPACE, 28) + + .text + ALIGN(16) + +PROLOGUE(mpn_mod_1c) +deflit(`FRAME',0) + movl PARAM_CARRY, %edx + movl PARAM_SIZE, %ecx + subl $STACK_SPACE, %esp +deflit(`FRAME',STACK_SPACE) + + movl %ebp, SAVE_EBP + movl PARAM_DIVISOR, %ebp + + movl %esi, SAVE_ESI + movl PARAM_SRC, %esi + jmp LF(mpn_mod_1,start_1c) + +EPILOGUE() + + + ALIGN(16) +PROLOGUE(mpn_mod_1) +deflit(`FRAME',0) + + movl $0, %edx C initial carry (if can't skip a div) + movl PARAM_SIZE, %ecx + subl $STACK_SPACE, %esp +deflit(`FRAME',STACK_SPACE) + + movl %esi, SAVE_ESI + movl PARAM_SRC, %esi + + movl %ebp, SAVE_EBP + movl PARAM_DIVISOR, %ebp + + orl %ecx, %ecx + jz L(divide_done) + + movl -4(%esi,%ecx,4), %eax C src high limb + + cmpl %ebp, %eax C carry flag if high<divisor + + cmovc( %eax, %edx) C src high limb as initial carry + sbbl $0, %ecx C size-1 to skip one div + jz L(divide_done) + + + ALIGN(16) +L(start_1c): + C eax + C ebx + C ecx size + C edx carry + C esi src + C edi + C ebp divisor + + cmpl $MUL_THRESHOLD, %ecx + jae L(mul_by_inverse) + + + orl %ecx, %ecx + jz L(divide_done) + + +L(divide_top): + C eax scratch (quotient) + C ebx + C ecx counter, limbs, decrementing + C edx scratch (remainder) + C esi src + C edi + C ebp + + movl -4(%esi,%ecx,4), %eax + + divl %ebp + + decl %ecx + jnz L(divide_top) + + +L(divide_done): + movl SAVE_ESI, %esi + movl %edx, %eax + + movl SAVE_EBP, %ebp + addl $STACK_SPACE, %esp + + ret + + + +C ----------------------------------------------------------------------------- + +L(mul_by_inverse): + C eax + C ebx + C ecx size + C edx carry + C esi src + C edi + C ebp divisor + + movl %ebx, SAVE_EBX + leal -4(%esi), %ebx + + movl %ebx, VAR_SRC_STOP + movl %ecx, %ebx C size + + movl %edi, SAVE_EDI + movl %edx, %edi C carry + + bsrl %ebp, %ecx C 31-l + movl $-1, %edx + + leal 1(%ecx), %eax C 32-l + xorl $31, %ecx C l + + movl %ecx, VAR_NORM + shll %cl, %ebp C d normalized + + movd %eax, %mm7 + movl $-1, %eax + subl %ebp, %edx C (b-d)-1 so edx:eax = b*(b-d)-1 + + divl %ebp C floor (b*(b-d)-1) / d + + C + + movl %eax, VAR_INVERSE + leal -12(%esi,%ebx,4), %eax C &src[size-3] + + movl 8(%eax), %esi C src high limb + movl 4(%eax), %edx C src second highest limb + + shldl( %cl, %esi, %edi) C n2 = carry,high << l + + shldl( %cl, %edx, %esi) C n10 = high,second << l + + movl %eax, %ecx C &src[size-3] + + +ifelse(MUL_THRESHOLD,2,` + cmpl $2, %ebx + je L(inverse_two_left) +') + + +C The dependent chain here is the same as in mpn_divrem_1, but a few +C instructions are saved by not needing to store the quotient limbs. This +C gets it down to 24 c/l, which is still a bit away from a theoretical 19 +C c/l. + + ALIGN(16) +L(inverse_top): + C eax scratch + C ebx scratch (nadj, q1) + C ecx src pointer, decrementing + C edx scratch + C esi n10 + C edi n2 + C ebp divisor + C + C mm0 scratch (src qword) + C mm7 rshift for normalization + + + movl %esi, %eax + movl %ebp, %ebx + + sarl $31, %eax C -n1 + + andl %eax, %ebx C -n1 & d + negl %eax C n1 + + addl %esi, %ebx C nadj = n10 + (-n1 & d), ignoring overflow + addl %edi, %eax C n2+n1 + + mull VAR_INVERSE C m*(n2+n1) + + movq (%ecx), %mm0 C next src limb and the one below it + subl $4, %ecx + + C + + C + + C + + addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag + leal 1(%edi), %ebx C n2<<32 + m*(n2+n1)) + movl %ebp, %eax C d + + adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1 + jz L(q1_ff) + + mull %ebx C (q1+1)*d + + psrlq %mm7, %mm0 + movl VAR_SRC_STOP, %ebx + + C + + C + + C + + subl %eax, %esi + + sbbl %edx, %edi C n - (q1+1)*d + movl %esi, %edi C remainder -> n2 + leal (%ebp,%esi), %edx + + cmovc( %edx, %edi) C n - q1*d if underflow from using q1+1 + movd %mm0, %esi + cmpl %ebx, %ecx + + jne L(inverse_top) + + +L(inverse_loop_done): + + +C ----------------------------------------------------------------------------- + +L(inverse_two_left): + C eax scratch + C ebx scratch (nadj, q1) + C ecx &src[-1] + C edx scratch + C esi n10 + C edi n2 + C ebp divisor + C + C mm0 scratch (src dword) + C mm7 rshift + + movl %esi, %eax + movl %ebp, %ebx + + sarl $31, %eax C -n1 + + andl %eax, %ebx C -n1 & d + negl %eax C n1 + + addl %esi, %ebx C nadj = n10 + (-n1 & d), ignoring overflow + addl %edi, %eax C n2+n1 + + mull VAR_INVERSE C m*(n2+n1) + + movd 4(%ecx), %mm0 C src low limb + + C + + C + + C + + addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag + leal 1(%edi), %ebx C n2<<32 + m*(n2+n1)) + + adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1 + + sbbl $0, %ebx + movl %ebp, %eax C d + + mull %ebx C (q1+1)*d + + psllq $32, %mm0 + + psrlq %mm7, %mm0 + + C + + C + + subl %eax, %esi + + sbbl %edx, %edi C n - (q1+1)*d + movl %esi, %edi C remainder -> n2 + leal (%ebp,%esi), %edx + + cmovc( %edx, %edi) C n - q1*d if underflow from using q1+1 + movd %mm0, %esi + + +C One limb left + + C eax scratch + C ebx scratch (nadj, q1) + C ecx + C edx scratch + C esi n10 + C edi n2 + C ebp divisor + C + C mm0 src limb, shifted + C mm7 rshift + + movl %esi, %eax + movl %ebp, %ebx + + sarl $31, %eax C -n1 + + andl %eax, %ebx C -n1 & d + negl %eax C n1 + + addl %esi, %ebx C nadj = n10 + (-n1 & d), ignoring overflow + addl %edi, %eax C n2+n1 + + mull VAR_INVERSE C m*(n2+n1) + + movl VAR_NORM, %ecx C for final denorm + + C + + C + + C + + addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag + leal 1(%edi), %ebx C n2<<32 + m*(n2+n1)) + + adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1 + + sbbl $0, %ebx + movl %ebp, %eax C d + + mull %ebx C (q1+1)*d + + movl SAVE_EBX, %ebx + + C + + C + + C + + subl %eax, %esi + + sbbl %edx, %edi C n - (q1+1)*d + leal (%ebp,%esi), %edx + movl SAVE_EBP, %ebp + + movl %esi, %eax C remainder + movl SAVE_ESI, %esi + + cmovc( %edx, %eax) C n - q1*d if underflow from using q1+1 + movl SAVE_EDI, %edi + + shrl %cl, %eax C denorm remainder + addl $STACK_SPACE, %esp + emms + + ret + + +C ----------------------------------------------------------------------------- +C +C Special case for q1=0xFFFFFFFF, giving q=0xFFFFFFFF meaning the low dword +C of q*d is simply -d and the remainder n-q*d = n10+d + +L(q1_ff): + C eax (divisor) + C ebx (q1+1 == 0) + C ecx src pointer + C edx + C esi n10 + C edi (n2) + C ebp divisor + + leal (%ebp,%esi), %edi C n-q*d remainder -> next n2 + movl VAR_SRC_STOP, %edx + psrlq %mm7, %mm0 + + movd %mm0, %esi C next n10 + cmpl %ecx, %edx + jne L(inverse_top) + + jmp L(inverse_loop_done) + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/p6/mmx/popham.asm b/rts/gmp/mpn/x86/p6/mmx/popham.asm new file mode 100644 index 0000000000..50f9a11218 --- /dev/null +++ b/rts/gmp/mpn/x86/p6/mmx/popham.asm @@ -0,0 +1,31 @@ +dnl Intel Pentium-II mpn_popcount, mpn_hamdist -- population count and +dnl hamming distance. +dnl +dnl P6MMX: popcount 11 cycles/limb (approx), hamdist 11.5 cycles/limb +dnl (approx) + + +dnl Copyright (C) 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + +MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist) +include_mpn(`x86/k6/mmx/popham.asm') diff --git a/rts/gmp/mpn/x86/p6/p3mmx/popham.asm b/rts/gmp/mpn/x86/p6/p3mmx/popham.asm new file mode 100644 index 0000000000..e63fbf334b --- /dev/null +++ b/rts/gmp/mpn/x86/p6/p3mmx/popham.asm @@ -0,0 +1,30 @@ +dnl Intel Pentium-III mpn_popcount, mpn_hamdist -- population count and +dnl hamming distance. + +dnl Copyright (C) 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +dnl Haven't actually measured it, but the K7 code with the psadbw should be +dnl good on P-III. + +include(`../config.m4') + +MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist) +include_mpn(`x86/k7/mmx/popham.asm') diff --git a/rts/gmp/mpn/x86/p6/sqr_basecase.asm b/rts/gmp/mpn/x86/p6/sqr_basecase.asm new file mode 100644 index 0000000000..174c78406a --- /dev/null +++ b/rts/gmp/mpn/x86/p6/sqr_basecase.asm @@ -0,0 +1,641 @@ +dnl Intel P6 mpn_sqr_basecase -- square an mpn number. +dnl +dnl P6: approx 4.0 cycles per cross product, or 7.75 cycles per triangular +dnl product (measured on the speed difference between 20 and 40 limbs, +dnl which is the Karatsuba recursing range). + + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +dnl These are the same as in mpn/x86/k6/sqr_basecase.asm, see that file for +dnl a description. The only difference here is that UNROLL_COUNT can go up +dnl to 64 (not 63) making KARATSUBA_SQR_THRESHOLD_MAX 67. + +deflit(KARATSUBA_SQR_THRESHOLD_MAX, 67) + +ifdef(`KARATSUBA_SQR_THRESHOLD_OVERRIDE', +`define(`KARATSUBA_SQR_THRESHOLD',KARATSUBA_SQR_THRESHOLD_OVERRIDE)') + +m4_config_gmp_mparam(`KARATSUBA_SQR_THRESHOLD') +deflit(UNROLL_COUNT, eval(KARATSUBA_SQR_THRESHOLD-3)) + + +C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size); +C +C The algorithm is basically the same as mpn/generic/sqr_basecase.c, but a +C lot of function call overheads are avoided, especially when the given size +C is small. +C +C The code size might look a bit excessive, but not all of it is executed so +C it won't all get into the code cache. The 1x1, 2x2 and 3x3 special cases +C clearly apply only to those sizes; mid sizes like 10x10 only need part of +C the unrolled addmul; and big sizes like 40x40 that do use the full +C unrolling will least be making good use of it, because 40x40 will take +C something like 7000 cycles. + +defframe(PARAM_SIZE,12) +defframe(PARAM_SRC, 8) +defframe(PARAM_DST, 4) + + .text + ALIGN(32) +PROLOGUE(mpn_sqr_basecase) +deflit(`FRAME',0) + + movl PARAM_SIZE, %edx + + movl PARAM_SRC, %eax + + cmpl $2, %edx + movl PARAM_DST, %ecx + je L(two_limbs) + + movl (%eax), %eax + ja L(three_or_more) + + +C ----------------------------------------------------------------------------- +C one limb only + C eax src limb + C ebx + C ecx dst + C edx + + mull %eax + + movl %eax, (%ecx) + movl %edx, 4(%ecx) + + ret + + +C ----------------------------------------------------------------------------- +L(two_limbs): + C eax src + C ebx + C ecx dst + C edx + +defframe(SAVE_ESI, -4) +defframe(SAVE_EBX, -8) +defframe(SAVE_EDI, -12) +defframe(SAVE_EBP, -16) +deflit(`STACK_SPACE',16) + + subl $STACK_SPACE, %esp +deflit(`FRAME',STACK_SPACE) + + movl %esi, SAVE_ESI + movl %eax, %esi + movl (%eax), %eax + + mull %eax C src[0]^2 + + movl %eax, (%ecx) C dst[0] + movl 4(%esi), %eax + + movl %ebx, SAVE_EBX + movl %edx, %ebx C dst[1] + + mull %eax C src[1]^2 + + movl %edi, SAVE_EDI + movl %eax, %edi C dst[2] + movl (%esi), %eax + + movl %ebp, SAVE_EBP + movl %edx, %ebp C dst[3] + + mull 4(%esi) C src[0]*src[1] + + addl %eax, %ebx + movl SAVE_ESI, %esi + + adcl %edx, %edi + + adcl $0, %ebp + addl %ebx, %eax + movl SAVE_EBX, %ebx + + adcl %edi, %edx + movl SAVE_EDI, %edi + + adcl $0, %ebp + + movl %eax, 4(%ecx) + + movl %ebp, 12(%ecx) + movl SAVE_EBP, %ebp + + movl %edx, 8(%ecx) + addl $FRAME, %esp + + ret + + +C ----------------------------------------------------------------------------- +L(three_or_more): + C eax src low limb + C ebx + C ecx dst + C edx size +deflit(`FRAME',0) + + pushl %esi defframe_pushl(`SAVE_ESI') + cmpl $4, %edx + + movl PARAM_SRC, %esi + jae L(four_or_more) + + +C ----------------------------------------------------------------------------- +C three limbs + + C eax src low limb + C ebx + C ecx dst + C edx + C esi src + C edi + C ebp + + pushl %ebp defframe_pushl(`SAVE_EBP') + pushl %edi defframe_pushl(`SAVE_EDI') + + mull %eax C src[0] ^ 2 + + movl %eax, (%ecx) + movl %edx, 4(%ecx) + + movl 4(%esi), %eax + xorl %ebp, %ebp + + mull %eax C src[1] ^ 2 + + movl %eax, 8(%ecx) + movl %edx, 12(%ecx) + movl 8(%esi), %eax + + pushl %ebx defframe_pushl(`SAVE_EBX') + + mull %eax C src[2] ^ 2 + + movl %eax, 16(%ecx) + movl %edx, 20(%ecx) + + movl (%esi), %eax + + mull 4(%esi) C src[0] * src[1] + + movl %eax, %ebx + movl %edx, %edi + + movl (%esi), %eax + + mull 8(%esi) C src[0] * src[2] + + addl %eax, %edi + movl %edx, %ebp + + adcl $0, %ebp + movl 4(%esi), %eax + + mull 8(%esi) C src[1] * src[2] + + xorl %esi, %esi + addl %eax, %ebp + + C eax + C ebx dst[1] + C ecx dst + C edx dst[4] + C esi zero, will be dst[5] + C edi dst[2] + C ebp dst[3] + + adcl $0, %edx + addl %ebx, %ebx + + adcl %edi, %edi + + adcl %ebp, %ebp + + adcl %edx, %edx + movl 4(%ecx), %eax + + adcl $0, %esi + addl %ebx, %eax + + movl %eax, 4(%ecx) + movl 8(%ecx), %eax + + adcl %edi, %eax + movl 12(%ecx), %ebx + + adcl %ebp, %ebx + movl 16(%ecx), %edi + + movl %eax, 8(%ecx) + movl SAVE_EBP, %ebp + + movl %ebx, 12(%ecx) + movl SAVE_EBX, %ebx + + adcl %edx, %edi + movl 20(%ecx), %eax + + movl %edi, 16(%ecx) + movl SAVE_EDI, %edi + + adcl %esi, %eax C no carry out of this + movl SAVE_ESI, %esi + + movl %eax, 20(%ecx) + addl $FRAME, %esp + + ret + + + +C ----------------------------------------------------------------------------- +defframe(VAR_COUNTER,-20) +defframe(VAR_JMP, -24) +deflit(`STACK_SPACE',24) + +L(four_or_more): + C eax src low limb + C ebx + C ecx + C edx size + C esi src + C edi + C ebp +deflit(`FRAME',4) dnl %esi already pushed + +C First multiply src[0]*src[1..size-1] and store at dst[1..size]. + + subl $STACK_SPACE-FRAME, %esp +deflit(`FRAME',STACK_SPACE) + movl $1, %ecx + + movl %edi, SAVE_EDI + movl PARAM_DST, %edi + + movl %ebx, SAVE_EBX + subl %edx, %ecx C -(size-1) + + movl %ebp, SAVE_EBP + movl $0, %ebx C initial carry + + leal (%esi,%edx,4), %esi C &src[size] + movl %eax, %ebp C multiplier + + leal -4(%edi,%edx,4), %edi C &dst[size-1] + + +C This loop runs at just over 6 c/l. + +L(mul_1): + C eax scratch + C ebx carry + C ecx counter, limbs, negative, -(size-1) to -1 + C edx scratch + C esi &src[size] + C edi &dst[size-1] + C ebp multiplier + + movl %ebp, %eax + + mull (%esi,%ecx,4) + + addl %ebx, %eax + movl $0, %ebx + + adcl %edx, %ebx + movl %eax, 4(%edi,%ecx,4) + + incl %ecx + jnz L(mul_1) + + + movl %ebx, 4(%edi) + + +C Addmul src[n]*src[n+1..size-1] at dst[2*n-1...], for each n=1..size-2. +C +C The last two addmuls, which are the bottom right corner of the product +C triangle, are left to the end. These are src[size-3]*src[size-2,size-1] +C and src[size-2]*src[size-1]. If size is 4 then it's only these corner +C cases that need to be done. +C +C The unrolled code is the same as mpn_addmul_1(), see that routine for some +C comments. +C +C VAR_COUNTER is the outer loop, running from -(size-4) to -1, inclusive. +C +C VAR_JMP is the computed jump into the unrolled code, stepped by one code +C chunk each outer loop. + +dnl This is also hard-coded in the address calculation below. +deflit(CODE_BYTES_PER_LIMB, 15) + +dnl With &src[size] and &dst[size-1] pointers, the displacements in the +dnl unrolled code fit in a byte for UNROLL_COUNT values up to 32, but above +dnl that an offset must be added to them. +deflit(OFFSET, +ifelse(eval(UNROLL_COUNT>32),1, +eval((UNROLL_COUNT-32)*4), +0)) + + C eax + C ebx carry + C ecx + C edx + C esi &src[size] + C edi &dst[size-1] + C ebp + + movl PARAM_SIZE, %ecx + + subl $4, %ecx + jz L(corner) + + movl %ecx, %edx + negl %ecx + + shll $4, %ecx +ifelse(OFFSET,0,,`subl $OFFSET, %esi') + +ifdef(`PIC',` + call L(pic_calc) +L(here): +',` + leal L(unroll_inner_end)-eval(2*CODE_BYTES_PER_LIMB)(%ecx,%edx), %ecx +') + negl %edx + +ifelse(OFFSET,0,,`subl $OFFSET, %edi') + + C The calculated jump mustn't be before the start of the available + C code. This is the limit that UNROLL_COUNT puts on the src operand + C size, but checked here using the jump address directly. + + ASSERT(ae, + `movl_text_address( L(unroll_inner_start), %eax) + cmpl %eax, %ecx') + + +C ----------------------------------------------------------------------------- + ALIGN(16) +L(unroll_outer_top): + C eax + C ebx high limb to store + C ecx VAR_JMP + C edx VAR_COUNTER, limbs, negative + C esi &src[size], constant + C edi dst ptr, second highest limb of last addmul + C ebp + + movl -12+OFFSET(%esi,%edx,4), %ebp C multiplier + movl %edx, VAR_COUNTER + + movl -8+OFFSET(%esi,%edx,4), %eax C first limb of multiplicand + + mull %ebp + +define(cmovX,`ifelse(eval(UNROLL_COUNT%2),1,`cmovz($@)',`cmovnz($@)')') + + testb $1, %cl + + movl %edx, %ebx C high carry + leal 4(%edi), %edi + + movl %ecx, %edx C jump + + movl %eax, %ecx C low carry + leal CODE_BYTES_PER_LIMB(%edx), %edx + + cmovX( %ebx, %ecx) C high carry reverse + cmovX( %eax, %ebx) C low carry reverse + movl %edx, VAR_JMP + jmp *%edx + + + C Must be on an even address here so the low bit of the jump address + C will indicate which way around ecx/ebx should start. + + ALIGN(2) + +L(unroll_inner_start): + C eax scratch + C ebx carry high + C ecx carry low + C edx scratch + C esi src pointer + C edi dst pointer + C ebp multiplier + C + C 15 code bytes each limb + C ecx/ebx reversed on each chunk + +forloop(`i', UNROLL_COUNT, 1, ` + deflit(`disp_src', eval(-i*4 + OFFSET)) + deflit(`disp_dst', eval(disp_src)) + + m4_assert(`disp_src>=-128 && disp_src<128') + m4_assert(`disp_dst>=-128 && disp_dst<128') + +ifelse(eval(i%2),0,` +Zdisp( movl, disp_src,(%esi), %eax) + mull %ebp +Zdisp( addl, %ebx, disp_dst,(%edi)) + adcl %eax, %ecx + movl %edx, %ebx + adcl $0, %ebx +',` + dnl this one comes out last +Zdisp( movl, disp_src,(%esi), %eax) + mull %ebp +Zdisp( addl, %ecx, disp_dst,(%edi)) + adcl %eax, %ebx + movl %edx, %ecx + adcl $0, %ecx +') +') +L(unroll_inner_end): + + addl %ebx, m4_empty_if_zero(OFFSET)(%edi) + + movl VAR_COUNTER, %edx + adcl $0, %ecx + + movl %ecx, m4_empty_if_zero(OFFSET+4)(%edi) + movl VAR_JMP, %ecx + + incl %edx + jnz L(unroll_outer_top) + + +ifelse(OFFSET,0,,` + addl $OFFSET, %esi + addl $OFFSET, %edi +') + + +C ----------------------------------------------------------------------------- + ALIGN(16) +L(corner): + C eax + C ebx + C ecx + C edx + C esi &src[size] + C edi &dst[2*size-5] + C ebp + + movl -12(%esi), %eax + + mull -8(%esi) + + addl %eax, (%edi) + movl -12(%esi), %eax + movl $0, %ebx + + adcl %edx, %ebx + + mull -4(%esi) + + addl %eax, %ebx + movl -8(%esi), %eax + + adcl $0, %edx + + addl %ebx, 4(%edi) + movl $0, %ebx + + adcl %edx, %ebx + + mull -4(%esi) + + movl PARAM_SIZE, %ecx + addl %ebx, %eax + + adcl $0, %edx + + movl %eax, 8(%edi) + + movl %edx, 12(%edi) + movl PARAM_DST, %edi + + +C Left shift of dst[1..2*size-2], the bit shifted out becomes dst[2*size-1]. + + subl $1, %ecx C size-1 + xorl %eax, %eax C ready for final adcl, and clear carry + + movl %ecx, %edx + movl PARAM_SRC, %esi + + +L(lshift): + C eax + C ebx + C ecx counter, size-1 to 1 + C edx size-1 (for later use) + C esi src (for later use) + C edi dst, incrementing + C ebp + + rcll 4(%edi) + rcll 8(%edi) + + leal 8(%edi), %edi + decl %ecx + jnz L(lshift) + + + adcl %eax, %eax + + movl %eax, 4(%edi) C dst most significant limb + movl (%esi), %eax C src[0] + + leal 4(%esi,%edx,4), %esi C &src[size] + subl %edx, %ecx C -(size-1) + + +C Now add in the squares on the diagonal, src[0]^2, src[1]^2, ..., +C src[size-1]^2. dst[0] hasn't yet been set at all yet, and just gets the +C low limb of src[0]^2. + + + mull %eax + + movl %eax, (%edi,%ecx,8) C dst[0] + + +L(diag): + C eax scratch + C ebx scratch + C ecx counter, negative + C edx carry + C esi &src[size] + C edi dst[2*size-2] + C ebp + + movl (%esi,%ecx,4), %eax + movl %edx, %ebx + + mull %eax + + addl %ebx, 4(%edi,%ecx,8) + adcl %eax, 8(%edi,%ecx,8) + adcl $0, %edx + + incl %ecx + jnz L(diag) + + + movl SAVE_ESI, %esi + movl SAVE_EBX, %ebx + + addl %edx, 4(%edi) C dst most significant limb + + movl SAVE_EDI, %edi + movl SAVE_EBP, %ebp + addl $FRAME, %esp + ret + + + +C ----------------------------------------------------------------------------- +ifdef(`PIC',` +L(pic_calc): + addl (%esp), %ecx + addl $L(unroll_inner_end)-L(here)-eval(2*CODE_BYTES_PER_LIMB), %ecx + addl %edx, %ecx + ret +') + + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/pentium/README b/rts/gmp/mpn/x86/pentium/README new file mode 100644 index 0000000000..3b9ec8ac6f --- /dev/null +++ b/rts/gmp/mpn/x86/pentium/README @@ -0,0 +1,77 @@ + + INTEL PENTIUM P5 MPN SUBROUTINES + + +This directory contains mpn functions optimized for Intel Pentium (P5,P54) +processors. The mmx subdirectory has code for Pentium with MMX (P55). + + +STATUS + + cycles/limb + + mpn_add_n/sub_n 2.375 + + mpn_copyi/copyd 1.0 + + mpn_divrem_1 44.0 + mpn_mod_1 44.0 + mpn_divexact_by3 15.0 + + mpn_l/rshift 5.375 normal (6.0 on P54) + 1.875 special shift by 1 bit + + mpn_mul_1 13.0 + mpn_add/submul_1 14.0 + + mpn_mul_basecase 14.2 cycles/crossproduct (approx) + + mpn_sqr_basecase 8 cycles/crossproduct (approx) + or 15.5 cycles/triangleproduct (approx) + +Pentium MMX gets the following improvements + + mpn_l/rshift 1.75 + + +1. mpn_lshift and mpn_rshift run at about 6 cycles/limb on P5 and P54, but the +documentation indicates that they should take only 43/8 = 5.375 cycles/limb, +or 5 cycles/limb asymptotically. The P55 runs them at the expected speed. + +2. mpn_add_n and mpn_sub_n run at asymptotically 2 cycles/limb. Due to loop +overhead and other delays (cache refill?), they run at or near 2.5 cycles/limb. + +3. mpn_mul_1, mpn_addmul_1, mpn_submul_1 all run 1 cycle faster than they +should. Intel documentation says a mul instruction is 10 cycles, but it +measures 9 and the routines using it run with it as 9. + + + +RELEVANT OPTIMIZATION ISSUES + +1. Pentium doesn't allocate cache lines on writes, unlike most other modern +processors. Since the functions in the mpn class do array writes, we have to +handle allocating the destination cache lines by reading a word from it in the +loops, to achieve the best performance. + +2. Pairing of memory operations requires that the two issued operations refer +to different cache banks. The simplest way to insure this is to read/write +two words from the same object. If we make operations on different objects, +they might or might not be to the same cache bank. + + + +REFERENCES + +"Intel Architecture Optimization Manual", 1997, order number 242816. This +is mostly about P5, the parts about P6 aren't relevant. Available on-line: + + http://download.intel.com/design/PentiumII/manuals/242816.htm + + + +---------------- +Local variables: +mode: text +fill-column: 76 +End: diff --git a/rts/gmp/mpn/x86/pentium/aors_n.asm b/rts/gmp/mpn/x86/pentium/aors_n.asm new file mode 100644 index 0000000000..a61082a456 --- /dev/null +++ b/rts/gmp/mpn/x86/pentium/aors_n.asm @@ -0,0 +1,196 @@ +dnl Intel Pentium mpn_add_n/mpn_sub_n -- mpn addition and subtraction. +dnl +dnl P5: 2.375 cycles/limb + + +dnl Copyright (C) 1992, 1994, 1995, 1996, 1999, 2000 Free Software +dnl Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +ifdef(`OPERATION_add_n',` + define(M4_inst, adcl) + define(M4_function_n, mpn_add_n) + define(M4_function_nc, mpn_add_nc) + +',`ifdef(`OPERATION_sub_n',` + define(M4_inst, sbbl) + define(M4_function_n, mpn_sub_n) + define(M4_function_nc, mpn_sub_nc) + +',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n +')')') + +MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc) + + +C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, +C mp_size_t size); +C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, +C mp_size_t size, mp_limb_t carry); + +defframe(PARAM_CARRY,20) +defframe(PARAM_SIZE, 16) +defframe(PARAM_SRC2, 12) +defframe(PARAM_SRC1, 8) +defframe(PARAM_DST, 4) + + .text + ALIGN(8) +PROLOGUE(M4_function_nc) + + pushl %edi + pushl %esi + pushl %ebx + pushl %ebp +deflit(`FRAME',16) + + movl PARAM_DST,%edi + movl PARAM_SRC1,%esi + movl PARAM_SRC2,%ebp + movl PARAM_SIZE,%ecx + + movl (%ebp),%ebx + + decl %ecx + movl %ecx,%edx + shrl $3,%ecx + andl $7,%edx + testl %ecx,%ecx C zero carry flag + jz L(endgo) + + pushl %edx +FRAME_pushl() + movl PARAM_CARRY,%eax + shrl $1,%eax C shift bit 0 into carry + jmp LF(M4_function_n,oop) + +L(endgo): +deflit(`FRAME',16) + movl PARAM_CARRY,%eax + shrl $1,%eax C shift bit 0 into carry + jmp LF(M4_function_n,end) + +EPILOGUE() + + + ALIGN(8) +PROLOGUE(M4_function_n) + + pushl %edi + pushl %esi + pushl %ebx + pushl %ebp +deflit(`FRAME',16) + + movl PARAM_DST,%edi + movl PARAM_SRC1,%esi + movl PARAM_SRC2,%ebp + movl PARAM_SIZE,%ecx + + movl (%ebp),%ebx + + decl %ecx + movl %ecx,%edx + shrl $3,%ecx + andl $7,%edx + testl %ecx,%ecx C zero carry flag + jz L(end) + pushl %edx +FRAME_pushl() + + ALIGN(8) +L(oop): movl 28(%edi),%eax C fetch destination cache line + leal 32(%edi),%edi + +L(1): movl (%esi),%eax + movl 4(%esi),%edx + M4_inst %ebx,%eax + movl 4(%ebp),%ebx + M4_inst %ebx,%edx + movl 8(%ebp),%ebx + movl %eax,-32(%edi) + movl %edx,-28(%edi) + +L(2): movl 8(%esi),%eax + movl 12(%esi),%edx + M4_inst %ebx,%eax + movl 12(%ebp),%ebx + M4_inst %ebx,%edx + movl 16(%ebp),%ebx + movl %eax,-24(%edi) + movl %edx,-20(%edi) + +L(3): movl 16(%esi),%eax + movl 20(%esi),%edx + M4_inst %ebx,%eax + movl 20(%ebp),%ebx + M4_inst %ebx,%edx + movl 24(%ebp),%ebx + movl %eax,-16(%edi) + movl %edx,-12(%edi) + +L(4): movl 24(%esi),%eax + movl 28(%esi),%edx + M4_inst %ebx,%eax + movl 28(%ebp),%ebx + M4_inst %ebx,%edx + movl 32(%ebp),%ebx + movl %eax,-8(%edi) + movl %edx,-4(%edi) + + leal 32(%esi),%esi + leal 32(%ebp),%ebp + decl %ecx + jnz L(oop) + + popl %edx +FRAME_popl() +L(end): + decl %edx C test %edx w/o clobbering carry + js L(end2) + incl %edx +L(oop2): + leal 4(%edi),%edi + movl (%esi),%eax + M4_inst %ebx,%eax + movl 4(%ebp),%ebx + movl %eax,-4(%edi) + leal 4(%esi),%esi + leal 4(%ebp),%ebp + decl %edx + jnz L(oop2) +L(end2): + movl (%esi),%eax + M4_inst %ebx,%eax + movl %eax,(%edi) + + sbbl %eax,%eax + negl %eax + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/pentium/aorsmul_1.asm b/rts/gmp/mpn/x86/pentium/aorsmul_1.asm new file mode 100644 index 0000000000..147b55610f --- /dev/null +++ b/rts/gmp/mpn/x86/pentium/aorsmul_1.asm @@ -0,0 +1,99 @@ +dnl Intel Pentium mpn_addmul_1 -- mpn by limb multiplication. +dnl +dnl P5: 14.0 cycles/limb + + +dnl Copyright (C) 1992, 1994, 1996, 1999, 2000 Free Software Foundation, +dnl Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. */ + + +include(`../config.m4') + + +ifdef(`OPERATION_addmul_1', ` + define(M4_inst, addl) + define(M4_function_1, mpn_addmul_1) + +',`ifdef(`OPERATION_submul_1', ` + define(M4_inst, subl) + define(M4_function_1, mpn_submul_1) + +',`m4_error(`Need OPERATION_addmul_1 or OPERATION_submul_1 +')')') + +MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1) + + +C mp_limb_t M4_function_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, +C mp_limb_t mult); + +defframe(PARAM_MULTIPLIER,16) +defframe(PARAM_SIZE, 12) +defframe(PARAM_SRC, 8) +defframe(PARAM_DST, 4) + + .text + ALIGN(8) + +PROLOGUE(M4_function_1) + + pushl %edi + pushl %esi + pushl %ebx + pushl %ebp +deflit(`FRAME',16) + + movl PARAM_DST, %edi + movl PARAM_SRC, %esi + movl PARAM_SIZE, %ecx + movl PARAM_MULTIPLIER, %ebp + + leal (%edi,%ecx,4), %edi + leal (%esi,%ecx,4), %esi + negl %ecx + xorl %ebx, %ebx + ALIGN(8) + +L(oop): adcl $0, %ebx + movl (%esi,%ecx,4), %eax + + mull %ebp + + addl %ebx, %eax + movl (%edi,%ecx,4), %ebx + + adcl $0, %edx + M4_inst %eax, %ebx + + movl %ebx, (%edi,%ecx,4) + incl %ecx + + movl %edx, %ebx + jnz L(oop) + + adcl $0, %ebx + movl %ebx, %eax + popl %ebp + popl %ebx + popl %esi + popl %edi + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/pentium/diveby3.asm b/rts/gmp/mpn/x86/pentium/diveby3.asm new file mode 100644 index 0000000000..dbac81642f --- /dev/null +++ b/rts/gmp/mpn/x86/pentium/diveby3.asm @@ -0,0 +1,183 @@ +dnl Intel P5 mpn_divexact_by3 -- mpn division by 3, expecting no remainder. +dnl +dnl P5: 15.0 cycles/limb + + +dnl Copyright (C) 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +C mp_limb_t mpn_divexact_by3c (mp_ptr dst, mp_srcptr src, mp_size_t size, +C mp_limb_t carry); + +defframe(PARAM_CARRY,16) +defframe(PARAM_SIZE, 12) +defframe(PARAM_SRC, 8) +defframe(PARAM_DST, 4) + +dnl multiplicative inverse of 3, modulo 2^32 +deflit(INVERSE_3, 0xAAAAAAAB) + +dnl ceil(b/3), ceil(b*2/3) and floor(b*2/3) where b=2^32 +deflit(ONE_THIRD_CEIL, 0x55555556) +deflit(TWO_THIRDS_CEIL, 0xAAAAAAAB) +deflit(TWO_THIRDS_FLOOR, 0xAAAAAAAA) + + .text + ALIGN(8) + +PROLOGUE(mpn_divexact_by3c) +deflit(`FRAME',0) + + movl PARAM_SRC, %ecx + movl PARAM_SIZE, %edx + + decl %edx + jnz L(two_or_more) + + movl (%ecx), %edx + movl PARAM_CARRY, %eax C risk of cache bank clash here + + movl PARAM_DST, %ecx + subl %eax, %edx + + sbbl %eax, %eax C 0 or -1 + + imull $INVERSE_3, %edx, %edx + + negl %eax C 0 or 1 + cmpl $ONE_THIRD_CEIL, %edx + + sbbl $-1, %eax C +1 if edx>=ceil(b/3) + cmpl $TWO_THIRDS_CEIL, %edx + + sbbl $-1, %eax C +1 if edx>=ceil(b*2/3) + movl %edx, (%ecx) + + ret + + +L(two_or_more): + C eax + C ebx + C ecx src + C edx size-1 + C esi + C edi + C ebp + + pushl %ebx FRAME_pushl() + pushl %esi FRAME_pushl() + + pushl %edi FRAME_pushl() + pushl %ebp FRAME_pushl() + + movl PARAM_DST, %edi + movl PARAM_CARRY, %esi + + movl (%ecx), %eax C src low limb + xorl %ebx, %ebx + + sub %esi, %eax + movl $TWO_THIRDS_FLOOR, %esi + + leal (%ecx,%edx,4), %ecx C &src[size-1] + leal (%edi,%edx,4), %edi C &dst[size-1] + + adcl $0, %ebx C carry, 0 or 1 + negl %edx C -(size-1) + + +C The loop needs a source limb ready at the top, which leads to one limb +C handled separately at the end, and the special case above for size==1. +C There doesn't seem to be any scheduling that would keep the speed but move +C the source load and carry subtract up to the top. +C +C The destination cache line prefetching adds 1 cycle to the loop but is +C considered worthwhile. The slowdown is a factor of 1.07, but will prevent +C repeated write-throughs if the destination isn't in L1. A version using +C an outer loop to prefetch only every 8 limbs (a cache line) proved to be +C no faster, due to unavoidable branch mispreditions in the inner loop. +C +C setc is 2 cycles on P54, so an adcl is used instead. If the movl $0,%ebx +C could be avoided then the src limb fetch could pair up and save a cycle. +C This would probably mean going to a two limb loop with the carry limb +C alternately positive or negative, since an sbbl %ebx,%ebx will leave a +C value which is in the opposite sense to the preceding sbbl/adcl %ebx,%eax. +C +C A register is used for TWO_THIRDS_FLOOR because a cmp can't be done as +C "cmpl %edx, $n" with the immediate as the second operand. +C +C The "4" source displacement is in the loop rather than the setup because +C this gets L(top) aligned to 8 bytes at no cost. + + ALIGN(8) +L(top): + C eax source limb, carry subtracted + C ebx carry (0 or 1) + C ecx &src[size-1] + C edx counter, limbs, negative + C esi TWO_THIRDS_FLOOR + C edi &dst[size-1] + C ebp scratch (result limb) + + imull $INVERSE_3, %eax, %ebp + + cmpl $ONE_THIRD_CEIL, %ebp + movl (%edi,%edx,4), %eax C dst cache line prefetch + + sbbl $-1, %ebx C +1 if ebp>=ceil(b/3) + cmpl %ebp, %esi + + movl 4(%ecx,%edx,4), %eax C next src limb + + sbbl %ebx, %eax C and further -1 if ebp>=ceil(b*2/3) + movl $0, %ebx + + adcl $0, %ebx C new carry + movl %ebp, (%edi,%edx,4) + + incl %edx + jnz L(top) + + + + imull $INVERSE_3, %eax, %edx + + cmpl $ONE_THIRD_CEIL, %edx + movl %edx, (%edi) + + sbbl $-1, %ebx C +1 if edx>=ceil(b/3) + cmpl $TWO_THIRDS_CEIL, %edx + + sbbl $-1, %ebx C +1 if edx>=ceil(b*2/3) + popl %ebp + + movl %ebx, %eax + popl %edi + + popl %esi + popl %ebx + + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/pentium/gmp-mparam.h b/rts/gmp/mpn/x86/pentium/gmp-mparam.h new file mode 100644 index 0000000000..d3ed3d73ce --- /dev/null +++ b/rts/gmp/mpn/x86/pentium/gmp-mparam.h @@ -0,0 +1,97 @@ +/* Intel P54 gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + + +#define BITS_PER_MP_LIMB 32 +#define BYTES_PER_MP_LIMB 4 +#define BITS_PER_LONGINT 32 +#define BITS_PER_INT 32 +#define BITS_PER_SHORTINT 16 +#define BITS_PER_CHAR 8 + + +#ifndef UMUL_TIME +#define UMUL_TIME 9 /* cycles */ +#endif +#ifndef UDIV_TIME +#define UDIV_TIME 41 /* cycles */ +#endif + +/* bsf takes 18-42 cycles, put an average for uniform random numbers */ +#ifndef COUNT_TRAILING_ZEROS_TIME +#define COUNT_TRAILING_ZEROS_TIME 20 /* cycles */ +#endif + + +/* Generated by tuneup.c, 2000-07-06. */ + +#ifndef KARATSUBA_MUL_THRESHOLD +#define KARATSUBA_MUL_THRESHOLD 14 +#endif +#ifndef TOOM3_MUL_THRESHOLD +#define TOOM3_MUL_THRESHOLD 179 +#endif + +#ifndef KARATSUBA_SQR_THRESHOLD +#define KARATSUBA_SQR_THRESHOLD 22 +#endif +#ifndef TOOM3_SQR_THRESHOLD +#define TOOM3_SQR_THRESHOLD 153 +#endif + +#ifndef BZ_THRESHOLD +#define BZ_THRESHOLD 46 +#endif + +#ifndef FIB_THRESHOLD +#define FIB_THRESHOLD 110 +#endif + +#ifndef POWM_THRESHOLD +#define POWM_THRESHOLD 13 +#endif + +#ifndef GCD_ACCEL_THRESHOLD +#define GCD_ACCEL_THRESHOLD 4 +#endif +#ifndef GCDEXT_THRESHOLD +#define GCDEXT_THRESHOLD 25 +#endif + +#ifndef FFT_MUL_TABLE +#define FFT_MUL_TABLE { 496, 928, 1920, 4608, 14336, 40960, 0 } +#endif +#ifndef FFT_MODF_MUL_THRESHOLD +#define FFT_MODF_MUL_THRESHOLD 512 +#endif +#ifndef FFT_MUL_THRESHOLD +#define FFT_MUL_THRESHOLD 3840 +#endif + +#ifndef FFT_SQR_TABLE +#define FFT_SQR_TABLE { 496, 1184, 1920, 5632, 14336, 40960, 0 } +#endif +#ifndef FFT_MODF_SQR_THRESHOLD +#define FFT_MODF_SQR_THRESHOLD 512 +#endif +#ifndef FFT_SQR_THRESHOLD +#define FFT_SQR_THRESHOLD 3840 +#endif diff --git a/rts/gmp/mpn/x86/pentium/lshift.asm b/rts/gmp/mpn/x86/pentium/lshift.asm new file mode 100644 index 0000000000..e1e35d4c57 --- /dev/null +++ b/rts/gmp/mpn/x86/pentium/lshift.asm @@ -0,0 +1,236 @@ +dnl Intel Pentium mpn_lshift -- mpn left shift. +dnl +dnl cycles/limb +dnl P5,P54: 6.0 +dnl P55: 5.375 + + +dnl Copyright (C) 1992, 1994, 1995, 1996, 1999, 2000 Free Software +dnl Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size, +C unsigned shift); +C +C The main shift-by-N loop should run at 5.375 c/l and that's what P55 does, +C but P5 and P54 run only at 6.0 c/l, which is 4 cycles lost somewhere. + +defframe(PARAM_SHIFT,16) +defframe(PARAM_SIZE, 12) +defframe(PARAM_SRC, 8) +defframe(PARAM_DST, 4) + + .text + ALIGN(8) +PROLOGUE(mpn_lshift) + + pushl %edi + pushl %esi + pushl %ebx + pushl %ebp +deflit(`FRAME',16) + + movl PARAM_DST,%edi + movl PARAM_SRC,%esi + movl PARAM_SIZE,%ebp + movl PARAM_SHIFT,%ecx + +C We can use faster code for shift-by-1 under certain conditions. + cmp $1,%ecx + jne L(normal) + leal 4(%esi),%eax + cmpl %edi,%eax + jnc L(special) C jump if s_ptr + 1 >= res_ptr + leal (%esi,%ebp,4),%eax + cmpl %eax,%edi + jnc L(special) C jump if res_ptr >= s_ptr + size + +L(normal): + leal -4(%edi,%ebp,4),%edi + leal -4(%esi,%ebp,4),%esi + + movl (%esi),%edx + subl $4,%esi + xorl %eax,%eax + shldl( %cl, %edx, %eax) C compute carry limb + pushl %eax C push carry limb onto stack + + decl %ebp + pushl %ebp + shrl $3,%ebp + jz L(end) + + movl (%edi),%eax C fetch destination cache line + + ALIGN(4) +L(oop): movl -28(%edi),%eax C fetch destination cache line + movl %edx,%ebx + + movl (%esi),%eax + movl -4(%esi),%edx + shldl( %cl, %eax, %ebx) + shldl( %cl, %edx, %eax) + movl %ebx,(%edi) + movl %eax,-4(%edi) + + movl -8(%esi),%ebx + movl -12(%esi),%eax + shldl( %cl, %ebx, %edx) + shldl( %cl, %eax, %ebx) + movl %edx,-8(%edi) + movl %ebx,-12(%edi) + + movl -16(%esi),%edx + movl -20(%esi),%ebx + shldl( %cl, %edx, %eax) + shldl( %cl, %ebx, %edx) + movl %eax,-16(%edi) + movl %edx,-20(%edi) + + movl -24(%esi),%eax + movl -28(%esi),%edx + shldl( %cl, %eax, %ebx) + shldl( %cl, %edx, %eax) + movl %ebx,-24(%edi) + movl %eax,-28(%edi) + + subl $32,%esi + subl $32,%edi + decl %ebp + jnz L(oop) + +L(end): popl %ebp + andl $7,%ebp + jz L(end2) +L(oop2): + movl (%esi),%eax + shldl( %cl,%eax,%edx) + movl %edx,(%edi) + movl %eax,%edx + subl $4,%esi + subl $4,%edi + decl %ebp + jnz L(oop2) + +L(end2): + shll %cl,%edx C compute least significant limb + movl %edx,(%edi) C store it + + popl %eax C pop carry limb + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret + + +C We loop from least significant end of the arrays, which is only +C permissable if the source and destination don't overlap, since the +C function is documented to work for overlapping source and destination. + +L(special): + movl (%esi),%edx + addl $4,%esi + + decl %ebp + pushl %ebp + shrl $3,%ebp + + addl %edx,%edx + incl %ebp + decl %ebp + jz L(Lend) + + movl (%edi),%eax C fetch destination cache line + + ALIGN(4) +L(Loop): + movl 28(%edi),%eax C fetch destination cache line + movl %edx,%ebx + + movl (%esi),%eax + movl 4(%esi),%edx + adcl %eax,%eax + movl %ebx,(%edi) + adcl %edx,%edx + movl %eax,4(%edi) + + movl 8(%esi),%ebx + movl 12(%esi),%eax + adcl %ebx,%ebx + movl %edx,8(%edi) + adcl %eax,%eax + movl %ebx,12(%edi) + + movl 16(%esi),%edx + movl 20(%esi),%ebx + adcl %edx,%edx + movl %eax,16(%edi) + adcl %ebx,%ebx + movl %edx,20(%edi) + + movl 24(%esi),%eax + movl 28(%esi),%edx + adcl %eax,%eax + movl %ebx,24(%edi) + adcl %edx,%edx + movl %eax,28(%edi) + + leal 32(%esi),%esi C use leal not to clobber carry + leal 32(%edi),%edi + decl %ebp + jnz L(Loop) + +L(Lend): + popl %ebp + sbbl %eax,%eax C save carry in %eax + andl $7,%ebp + jz L(Lend2) + addl %eax,%eax C restore carry from eax +L(Loop2): + movl %edx,%ebx + movl (%esi),%edx + adcl %edx,%edx + movl %ebx,(%edi) + + leal 4(%esi),%esi C use leal not to clobber carry + leal 4(%edi),%edi + decl %ebp + jnz L(Loop2) + + jmp L(L1) +L(Lend2): + addl %eax,%eax C restore carry from eax +L(L1): movl %edx,(%edi) C store last limb + + sbbl %eax,%eax + negl %eax + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/pentium/mmx/gmp-mparam.h b/rts/gmp/mpn/x86/pentium/mmx/gmp-mparam.h new file mode 100644 index 0000000000..2379077d0c --- /dev/null +++ b/rts/gmp/mpn/x86/pentium/mmx/gmp-mparam.h @@ -0,0 +1,97 @@ +/* Intel P55 gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + + +#define BITS_PER_MP_LIMB 32 +#define BYTES_PER_MP_LIMB 4 +#define BITS_PER_LONGINT 32 +#define BITS_PER_INT 32 +#define BITS_PER_SHORTINT 16 +#define BITS_PER_CHAR 8 + + +#ifndef UMUL_TIME +#define UMUL_TIME 9 /* cycles */ +#endif +#ifndef UDIV_TIME +#define UDIV_TIME 41 /* cycles */ +#endif + +/* bsf takes 18-42 cycles, put an average for uniform random numbers */ +#ifndef COUNT_TRAILING_ZEROS_TIME +#define COUNT_TRAILING_ZEROS_TIME 20 /* cycles */ +#endif + + +/* Generated by tuneup.c, 2000-07-06. */ + +#ifndef KARATSUBA_MUL_THRESHOLD +#define KARATSUBA_MUL_THRESHOLD 14 +#endif +#ifndef TOOM3_MUL_THRESHOLD +#define TOOM3_MUL_THRESHOLD 99 +#endif + +#ifndef KARATSUBA_SQR_THRESHOLD +#define KARATSUBA_SQR_THRESHOLD 22 +#endif +#ifndef TOOM3_SQR_THRESHOLD +#define TOOM3_SQR_THRESHOLD 89 +#endif + +#ifndef BZ_THRESHOLD +#define BZ_THRESHOLD 40 +#endif + +#ifndef FIB_THRESHOLD +#define FIB_THRESHOLD 98 +#endif + +#ifndef POWM_THRESHOLD +#define POWM_THRESHOLD 13 +#endif + +#ifndef GCD_ACCEL_THRESHOLD +#define GCD_ACCEL_THRESHOLD 5 +#endif +#ifndef GCDEXT_THRESHOLD +#define GCDEXT_THRESHOLD 25 +#endif + +#ifndef FFT_MUL_TABLE +#define FFT_MUL_TABLE { 496, 1056, 1920, 4608, 14336, 40960, 0 } +#endif +#ifndef FFT_MODF_MUL_THRESHOLD +#define FFT_MODF_MUL_THRESHOLD 512 +#endif +#ifndef FFT_MUL_THRESHOLD +#define FFT_MUL_THRESHOLD 3840 +#endif + +#ifndef FFT_SQR_TABLE +#define FFT_SQR_TABLE { 496, 1184, 2176, 5632, 14336, 40960, 0 } +#endif +#ifndef FFT_MODF_SQR_THRESHOLD +#define FFT_MODF_SQR_THRESHOLD 512 +#endif +#ifndef FFT_SQR_THRESHOLD +#define FFT_SQR_THRESHOLD 4352 +#endif diff --git a/rts/gmp/mpn/x86/pentium/mmx/lshift.asm b/rts/gmp/mpn/x86/pentium/mmx/lshift.asm new file mode 100644 index 0000000000..2225438658 --- /dev/null +++ b/rts/gmp/mpn/x86/pentium/mmx/lshift.asm @@ -0,0 +1,455 @@ +dnl Intel P5 mpn_lshift -- mpn left shift. +dnl +dnl P5: 1.75 cycles/limb. + + +dnl Copyright (C) 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size, +C unsigned shift); +C +C Shift src,size left by shift many bits and store the result in dst,size. +C Zeros are shifted in at the right. Return the bits shifted out at the +C left. +C +C The comments in mpn_rshift apply here too. + +defframe(PARAM_SHIFT,16) +defframe(PARAM_SIZE, 12) +defframe(PARAM_SRC, 8) +defframe(PARAM_DST, 4) +deflit(`FRAME',0) + +dnl minimum 5, because the unrolled loop can't handle less +deflit(UNROLL_THRESHOLD, 5) + + .text + ALIGN(8) + +PROLOGUE(mpn_lshift) + + pushl %ebx + pushl %edi +deflit(`FRAME',8) + + movl PARAM_SIZE, %eax + movl PARAM_DST, %edx + + movl PARAM_SRC, %ebx + movl PARAM_SHIFT, %ecx + + cmp $UNROLL_THRESHOLD, %eax + jae L(unroll) + + movl -4(%ebx,%eax,4), %edi C src high limb + decl %eax + + jnz L(simple) + + shldl( %cl, %edi, %eax) C eax was decremented to zero + + shll %cl, %edi + + movl %edi, (%edx) C dst low limb + popl %edi C risk of data cache bank clash + + popl %ebx + + ret + + +C ----------------------------------------------------------------------------- +L(simple): + C eax size-1 + C ebx src + C ecx shift + C edx dst + C esi + C edi + C ebp +deflit(`FRAME',8) + + movd (%ebx,%eax,4), %mm5 C src high limb + + movd %ecx, %mm6 C lshift + negl %ecx + + psllq %mm6, %mm5 + addl $32, %ecx + + movd %ecx, %mm7 + psrlq $32, %mm5 C retval + + +L(simple_top): + C eax counter, limbs, negative + C ebx src + C ecx + C edx dst + C esi + C edi + C + C mm0 scratch + C mm5 return value + C mm6 shift + C mm7 32-shift + + movq -4(%ebx,%eax,4), %mm0 + decl %eax + + psrlq %mm7, %mm0 + + C + + movd %mm0, 4(%edx,%eax,4) + jnz L(simple_top) + + + movd (%ebx), %mm0 + + movd %mm5, %eax + psllq %mm6, %mm0 + + popl %edi + popl %ebx + + movd %mm0, (%edx) + + emms + + ret + + +C ----------------------------------------------------------------------------- + ALIGN(8) +L(unroll): + C eax size + C ebx src + C ecx shift + C edx dst + C esi + C edi + C ebp +deflit(`FRAME',8) + + movd -4(%ebx,%eax,4), %mm5 C src high limb + leal (%ebx,%eax,4), %edi + + movd %ecx, %mm6 C lshift + andl $4, %edi + + psllq %mm6, %mm5 + jz L(start_src_aligned) + + + C src isn't aligned, process high limb separately (marked xxx) to + C make it so. + C + C source -8(ebx,%eax,4) + C | + C +-------+-------+-------+-- + C | | + C +-------+-------+-------+-- + C 0mod8 4mod8 0mod8 + C + C dest + C -4(edx,%eax,4) + C | + C +-------+-------+-- + C | xxx | | + C +-------+-------+-- + + movq -8(%ebx,%eax,4), %mm0 C unaligned load + + psllq %mm6, %mm0 + decl %eax + + psrlq $32, %mm0 + + C + + movd %mm0, (%edx,%eax,4) +L(start_src_aligned): + + movq -8(%ebx,%eax,4), %mm1 C src high qword + leal (%edx,%eax,4), %edi + + andl $4, %edi + psrlq $32, %mm5 C return value + + movq -16(%ebx,%eax,4), %mm3 C src second highest qword + jz L(start_dst_aligned) + + C dst isn't aligned, subtract 4 to make it so, and pretend the shift + C is 32 bits extra. High limb of dst (marked xxx) handled here + C separately. + C + C source -8(ebx,%eax,4) + C | + C +-------+-------+-- + C | mm1 | + C +-------+-------+-- + C 0mod8 4mod8 + C + C dest + C -4(edx,%eax,4) + C | + C +-------+-------+-------+-- + C | xxx | | + C +-------+-------+-------+-- + C 0mod8 4mod8 0mod8 + + movq %mm1, %mm0 + addl $32, %ecx C new shift + + psllq %mm6, %mm0 + + movd %ecx, %mm6 + psrlq $32, %mm0 + + C wasted cycle here waiting for %mm0 + + movd %mm0, -4(%edx,%eax,4) + subl $4, %edx +L(start_dst_aligned): + + + psllq %mm6, %mm1 + negl %ecx C -shift + + addl $64, %ecx C 64-shift + movq %mm3, %mm2 + + movd %ecx, %mm7 + subl $8, %eax C size-8 + + psrlq %mm7, %mm3 + + por %mm1, %mm3 C mm3 ready to store + jc L(finish) + + + C The comments in mpn_rshift apply here too. + + ALIGN(8) +L(unroll_loop): + C eax counter, limbs + C ebx src + C ecx + C edx dst + C esi + C edi + C + C mm0 + C mm1 + C mm2 src qword from 48(%ebx,%eax,4) + C mm3 dst qword ready to store to 56(%edx,%eax,4) + C + C mm5 return value + C mm6 lshift + C mm7 rshift + + movq 8(%ebx,%eax,4), %mm0 + psllq %mm6, %mm2 + + movq %mm0, %mm1 + psrlq %mm7, %mm0 + + movq %mm3, 24(%edx,%eax,4) C prev + por %mm2, %mm0 + + movq (%ebx,%eax,4), %mm3 C + psllq %mm6, %mm1 C + + movq %mm0, 16(%edx,%eax,4) + movq %mm3, %mm2 C + + psrlq %mm7, %mm3 C + subl $4, %eax + + por %mm1, %mm3 C + jnc L(unroll_loop) + + + +L(finish): + C eax -4 to -1 representing respectively 0 to 3 limbs remaining + + testb $2, %al + + jz L(finish_no_two) + + movq 8(%ebx,%eax,4), %mm0 + psllq %mm6, %mm2 + + movq %mm0, %mm1 + psrlq %mm7, %mm0 + + movq %mm3, 24(%edx,%eax,4) C prev + por %mm2, %mm0 + + movq %mm1, %mm2 + movq %mm0, %mm3 + + subl $2, %eax +L(finish_no_two): + + + C eax -4 or -3 representing respectively 0 or 1 limbs remaining + C + C mm2 src prev qword, from 48(%ebx,%eax,4) + C mm3 dst qword, for 56(%edx,%eax,4) + + testb $1, %al + movd %mm5, %eax C retval + + popl %edi + jz L(finish_zero) + + + C One extra src limb, destination was aligned. + C + C source ebx + C --+---------------+-------+ + C | mm2 | | + C --+---------------+-------+ + C + C dest edx+12 edx+4 edx + C --+---------------+---------------+-------+ + C | mm3 | | | + C --+---------------+---------------+-------+ + C + C mm6 = shift + C mm7 = ecx = 64-shift + + + C One extra src limb, destination was unaligned. + C + C source ebx + C --+---------------+-------+ + C | mm2 | | + C --+---------------+-------+ + C + C dest edx+12 edx+4 + C --+---------------+---------------+ + C | mm3 | | + C --+---------------+---------------+ + C + C mm6 = shift+32 + C mm7 = ecx = 64-(shift+32) + + + C In both cases there's one extra limb of src to fetch and combine + C with mm2 to make a qword at 4(%edx), and in the aligned case + C there's an extra limb of dst to be formed from that extra src limb + C left shifted. + + + movd (%ebx), %mm0 + psllq %mm6, %mm2 + + movq %mm3, 12(%edx) + psllq $32, %mm0 + + movq %mm0, %mm1 + psrlq %mm7, %mm0 + + por %mm2, %mm0 + psllq %mm6, %mm1 + + movq %mm0, 4(%edx) + psrlq $32, %mm1 + + andl $32, %ecx + popl %ebx + + jz L(finish_one_unaligned) + + movd %mm1, (%edx) +L(finish_one_unaligned): + + emms + + ret + + +L(finish_zero): + + C No extra src limbs, destination was aligned. + C + C source ebx + C --+---------------+ + C | mm2 | + C --+---------------+ + C + C dest edx+8 edx + C --+---------------+---------------+ + C | mm3 | | + C --+---------------+---------------+ + C + C mm6 = shift + C mm7 = ecx = 64-shift + + + C No extra src limbs, destination was unaligned. + C + C source ebx + C --+---------------+ + C | mm2 | + C --+---------------+ + C + C dest edx+8 edx+4 + C --+---------------+-------+ + C | mm3 | | + C --+---------------+-------+ + C + C mm6 = shift+32 + C mm7 = ecx = 64-(shift+32) + + + C The movd for the unaligned case writes the same data to 4(%edx) + C that the movq does for the aligned case. + + + movq %mm3, 8(%edx) + andl $32, %ecx + + psllq %mm6, %mm2 + jz L(finish_zero_unaligned) + + movq %mm2, (%edx) +L(finish_zero_unaligned): + + psrlq $32, %mm2 + popl %ebx + + movd %mm5, %eax C retval + + movd %mm2, 4(%edx) + + emms + + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/pentium/mmx/popham.asm b/rts/gmp/mpn/x86/pentium/mmx/popham.asm new file mode 100644 index 0000000000..587a07ab3d --- /dev/null +++ b/rts/gmp/mpn/x86/pentium/mmx/popham.asm @@ -0,0 +1,30 @@ +dnl Intel P55 mpn_popcount, mpn_hamdist -- population count and hamming +dnl distance. +dnl +dnl P55: popcount 11.5 cycles/limb, hamdist 12.0 cycles/limb + + +dnl Copyright (C) 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + +MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist) +include_mpn(`x86/k6/mmx/popham.asm') diff --git a/rts/gmp/mpn/x86/pentium/mmx/rshift.asm b/rts/gmp/mpn/x86/pentium/mmx/rshift.asm new file mode 100644 index 0000000000..7672630d57 --- /dev/null +++ b/rts/gmp/mpn/x86/pentium/mmx/rshift.asm @@ -0,0 +1,460 @@ +dnl Intel P5 mpn_rshift -- mpn right shift. +dnl +dnl P5: 1.75 cycles/limb. + + +dnl Copyright (C) 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size, +C unsigned shift); +C +C Shift src,size right by shift many bits and store the result in dst,size. +C Zeros are shifted in at the left. Return the bits shifted out at the +C right. +C +C It takes 6 mmx instructions to process 2 limbs, making 1.5 cycles/limb, +C and with a 4 limb loop and 1 cycle of loop overhead the total is 1.75 c/l. +C +C Full speed depends on source and destination being aligned. Unaligned mmx +C loads and stores on P5 don't pair and have a 2 cycle penalty. Some hairy +C setups and finish-ups are done to ensure alignment for the loop. +C +C MMX shifts work out a bit faster even for the simple loop. + +defframe(PARAM_SHIFT,16) +defframe(PARAM_SIZE, 12) +defframe(PARAM_SRC, 8) +defframe(PARAM_DST, 4) +deflit(`FRAME',0) + +dnl Minimum 5, because the unrolled loop can't handle less. +deflit(UNROLL_THRESHOLD, 5) + + .text + ALIGN(8) + +PROLOGUE(mpn_rshift) + + pushl %ebx + pushl %edi +deflit(`FRAME',8) + + movl PARAM_SIZE, %eax + movl PARAM_DST, %edx + + movl PARAM_SRC, %ebx + movl PARAM_SHIFT, %ecx + + cmp $UNROLL_THRESHOLD, %eax + jae L(unroll) + + decl %eax + movl (%ebx), %edi C src low limb + + jnz L(simple) + + shrdl( %cl, %edi, %eax) C eax was decremented to zero + + shrl %cl, %edi + + movl %edi, (%edx) C dst low limb + popl %edi C risk of data cache bank clash + + popl %ebx + + ret + + +C ----------------------------------------------------------------------------- + ALIGN(8) +L(simple): + C eax size-1 + C ebx src + C ecx shift + C edx dst + C esi + C edi + C ebp +deflit(`FRAME',8) + + movd (%ebx), %mm5 C src[0] + leal (%ebx,%eax,4), %ebx C &src[size-1] + + movd %ecx, %mm6 C rshift + leal -4(%edx,%eax,4), %edx C &dst[size-2] + + psllq $32, %mm5 + negl %eax + + +C This loop is 5 or 8 cycles, with every second load unaligned and a wasted +C cycle waiting for the mm0 result to be ready. For comparison a shrdl is 4 +C cycles and would be 8 in a simple loop. Using mmx helps the return value +C and last limb calculations too. + +L(simple_top): + C eax counter, limbs, negative + C ebx &src[size-1] + C ecx return value + C edx &dst[size-2] + C + C mm0 scratch + C mm5 return value + C mm6 shift + + movq (%ebx,%eax,4), %mm0 + incl %eax + + psrlq %mm6, %mm0 + + movd %mm0, (%edx,%eax,4) + jnz L(simple_top) + + + movd (%ebx), %mm0 + psrlq %mm6, %mm5 C return value + + psrlq %mm6, %mm0 + popl %edi + + movd %mm5, %eax + popl %ebx + + movd %mm0, 4(%edx) + + emms + + ret + + +C ----------------------------------------------------------------------------- + ALIGN(8) +L(unroll): + C eax size + C ebx src + C ecx shift + C edx dst + C esi + C edi + C ebp +deflit(`FRAME',8) + + movd (%ebx), %mm5 C src[0] + movl $4, %edi + + movd %ecx, %mm6 C rshift + testl %edi, %ebx + + psllq $32, %mm5 + jz L(start_src_aligned) + + + C src isn't aligned, process low limb separately (marked xxx) and + C step src and dst by one limb, making src aligned. + C + C source ebx + C --+-------+-------+-------+ + C | xxx | + C --+-------+-------+-------+ + C 4mod8 0mod8 4mod8 + C + C dest edx + C --+-------+-------+ + C | | xxx | + C --+-------+-------+ + + movq (%ebx), %mm0 C unaligned load + + psrlq %mm6, %mm0 + addl $4, %ebx + + decl %eax + + movd %mm0, (%edx) + addl $4, %edx +L(start_src_aligned): + + + movq (%ebx), %mm1 + testl %edi, %edx + + psrlq %mm6, %mm5 C retval + jz L(start_dst_aligned) + + C dst isn't aligned, add 4 to make it so, and pretend the shift is + C 32 bits extra. Low limb of dst (marked xxx) handled here + C separately. + C + C source ebx + C --+-------+-------+ + C | mm1 | + C --+-------+-------+ + C 4mod8 0mod8 + C + C dest edx + C --+-------+-------+-------+ + C | xxx | + C --+-------+-------+-------+ + C 4mod8 0mod8 4mod8 + + movq %mm1, %mm0 + addl $32, %ecx C new shift + + psrlq %mm6, %mm0 + + movd %ecx, %mm6 + + movd %mm0, (%edx) + addl $4, %edx +L(start_dst_aligned): + + + movq 8(%ebx), %mm3 + negl %ecx + + movq %mm3, %mm2 C mm2 src qword + addl $64, %ecx + + movd %ecx, %mm7 + psrlq %mm6, %mm1 + + leal -12(%ebx,%eax,4), %ebx + leal -20(%edx,%eax,4), %edx + + psllq %mm7, %mm3 + subl $7, %eax C size-7 + + por %mm1, %mm3 C mm3 ready to store + negl %eax C -(size-7) + + jns L(finish) + + + C This loop is the important bit, the rest is just support. Careful + C instruction scheduling achieves the claimed 1.75 c/l. The + C relevant parts of the pairing rules are: + C + C - mmx loads and stores execute only in the U pipe + C - only one mmx shift in a pair + C - wait one cycle before storing an mmx register result + C - the usual address generation interlock + C + C Two qword calculations are slightly interleaved. The instructions + C marked "C" belong to the second qword, and the "C prev" one is for + C the second qword from the previous iteration. + + ALIGN(8) +L(unroll_loop): + C eax counter, limbs, negative + C ebx &src[size-12] + C ecx + C edx &dst[size-12] + C esi + C edi + C + C mm0 + C mm1 + C mm2 src qword from -8(%ebx,%eax,4) + C mm3 dst qword ready to store to -8(%edx,%eax,4) + C + C mm5 return value + C mm6 rshift + C mm7 lshift + + movq (%ebx,%eax,4), %mm0 + psrlq %mm6, %mm2 + + movq %mm0, %mm1 + psllq %mm7, %mm0 + + movq %mm3, -8(%edx,%eax,4) C prev + por %mm2, %mm0 + + movq 8(%ebx,%eax,4), %mm3 C + psrlq %mm6, %mm1 C + + movq %mm0, (%edx,%eax,4) + movq %mm3, %mm2 C + + psllq %mm7, %mm3 C + addl $4, %eax + + por %mm1, %mm3 C + js L(unroll_loop) + + +L(finish): + C eax 0 to 3 representing respectively 3 to 0 limbs remaining + + testb $2, %al + + jnz L(finish_no_two) + + movq (%ebx,%eax,4), %mm0 + psrlq %mm6, %mm2 + + movq %mm0, %mm1 + psllq %mm7, %mm0 + + movq %mm3, -8(%edx,%eax,4) C prev + por %mm2, %mm0 + + movq %mm1, %mm2 + movq %mm0, %mm3 + + addl $2, %eax +L(finish_no_two): + + + C eax 2 or 3 representing respectively 1 or 0 limbs remaining + C + C mm2 src prev qword, from -8(%ebx,%eax,4) + C mm3 dst qword, for -8(%edx,%eax,4) + + testb $1, %al + popl %edi + + movd %mm5, %eax C retval + jnz L(finish_zero) + + + C One extra limb, destination was aligned. + C + C source ebx + C +-------+---------------+-- + C | | mm2 | + C +-------+---------------+-- + C + C dest edx + C +-------+---------------+---------------+-- + C | | | mm3 | + C +-------+---------------+---------------+-- + C + C mm6 = shift + C mm7 = ecx = 64-shift + + + C One extra limb, destination was unaligned. + C + C source ebx + C +-------+---------------+-- + C | | mm2 | + C +-------+---------------+-- + C + C dest edx + C +---------------+---------------+-- + C | | mm3 | + C +---------------+---------------+-- + C + C mm6 = shift+32 + C mm7 = ecx = 64-(shift+32) + + + C In both cases there's one extra limb of src to fetch and combine + C with mm2 to make a qword at 8(%edx), and in the aligned case + C there's a further extra limb of dst to be formed. + + + movd 8(%ebx), %mm0 + psrlq %mm6, %mm2 + + movq %mm0, %mm1 + psllq %mm7, %mm0 + + movq %mm3, (%edx) + por %mm2, %mm0 + + psrlq %mm6, %mm1 + andl $32, %ecx + + popl %ebx + jz L(finish_one_unaligned) + + C dst was aligned, must store one extra limb + movd %mm1, 16(%edx) +L(finish_one_unaligned): + + movq %mm0, 8(%edx) + + emms + + ret + + +L(finish_zero): + + C No extra limbs, destination was aligned. + C + C source ebx + C +---------------+-- + C | mm2 | + C +---------------+-- + C + C dest edx+4 + C +---------------+---------------+-- + C | | mm3 | + C +---------------+---------------+-- + C + C mm6 = shift + C mm7 = ecx = 64-shift + + + C No extra limbs, destination was unaligned. + C + C source ebx + C +---------------+-- + C | mm2 | + C +---------------+-- + C + C dest edx+4 + C +-------+---------------+-- + C | | mm3 | + C +-------+---------------+-- + C + C mm6 = shift+32 + C mm7 = 64-(shift+32) + + + C The movd for the unaligned case is clearly the same data as the + C movq for the aligned case, it's just a choice between whether one + C or two limbs should be written. + + + movq %mm3, 4(%edx) + psrlq %mm6, %mm2 + + movd %mm2, 12(%edx) + andl $32, %ecx + + popl %ebx + jz L(finish_zero_unaligned) + + movq %mm2, 12(%edx) +L(finish_zero_unaligned): + + emms + + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/pentium/mul_1.asm b/rts/gmp/mpn/x86/pentium/mul_1.asm new file mode 100644 index 0000000000..08639eca09 --- /dev/null +++ b/rts/gmp/mpn/x86/pentium/mul_1.asm @@ -0,0 +1,79 @@ +dnl Intel Pentium mpn_mul_1 -- mpn by limb multiplication. +dnl +dnl P5: 13.0 cycles/limb + +dnl Copyright (C) 1992, 1994, 1996, 1999, 2000 Free Software Foundation, +dnl Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. */ + + +include(`../config.m4') + + +C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, +C mp_limb_t multiplier); + +defframe(PARAM_MULTIPLIER,16) +defframe(PARAM_SIZE, 12) +defframe(PARAM_SRC, 8) +defframe(PARAM_DST, 4) + + .text + ALIGN(8) +PROLOGUE(mpn_mul_1) + + pushl %edi + pushl %esi + pushl %ebx + pushl %ebp +deflit(`FRAME',16) + + movl PARAM_DST, %edi + movl PARAM_SRC, %esi + movl PARAM_SIZE, %ecx + movl PARAM_MULTIPLIER, %ebp + + leal (%edi,%ecx,4), %edi + leal (%esi,%ecx,4), %esi + negl %ecx + xorl %ebx, %ebx + ALIGN(8) + +L(oop): adcl $0, %ebx + movl (%esi,%ecx,4), %eax + + mull %ebp + + addl %eax, %ebx + + movl %ebx, (%edi,%ecx,4) + incl %ecx + + movl %edx, %ebx + jnz L(oop) + + adcl $0, %ebx + movl %ebx, %eax + popl %ebp + popl %ebx + popl %esi + popl %edi + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/pentium/mul_basecase.asm b/rts/gmp/mpn/x86/pentium/mul_basecase.asm new file mode 100644 index 0000000000..d9f79a0831 --- /dev/null +++ b/rts/gmp/mpn/x86/pentium/mul_basecase.asm @@ -0,0 +1,135 @@ +dnl Intel Pentium mpn_mul_basecase -- mpn by mpn multiplication. +dnl +dnl P5: 14.2 cycles/crossproduct (approx) + + +dnl Copyright (C) 1996, 1998, 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +C void mpn_mul_basecase (mp_ptr wp, +C mp_srcptr xp, mp_size_t xsize, +C mp_srcptr yp, mp_size_t ysize); + +defframe(PARAM_YSIZE, 20) +defframe(PARAM_YP, 16) +defframe(PARAM_XSIZE, 12) +defframe(PARAM_XP, 8) +defframe(PARAM_WP, 4) + +defframe(VAR_COUNTER, -4) + + .text + ALIGN(8) +PROLOGUE(mpn_mul_basecase) + + pushl %eax C dummy push for allocating stack slot + pushl %esi + pushl %ebp + pushl %edi +deflit(`FRAME',16) + + movl PARAM_XP,%esi + movl PARAM_WP,%edi + movl PARAM_YP,%ebp + + movl (%esi),%eax C load xp[0] + mull (%ebp) C multiply by yp[0] + movl %eax,(%edi) C store to wp[0] + movl PARAM_XSIZE,%ecx C xsize + decl %ecx C If xsize = 1, ysize = 1 too + jz L(done) + + movl PARAM_XSIZE,%eax + pushl %ebx +FRAME_pushl() + movl %edx,%ebx + leal (%esi,%eax,4),%esi C make xp point at end + leal (%edi,%eax,4),%edi C offset wp by xsize + negl %ecx C negate j size/index for inner loop + xorl %eax,%eax C clear carry + + ALIGN(8) +L(oop1): adcl $0,%ebx + movl (%esi,%ecx,4),%eax C load next limb at xp[j] + mull (%ebp) + addl %ebx,%eax + movl %eax,(%edi,%ecx,4) + incl %ecx + movl %edx,%ebx + jnz L(oop1) + + adcl $0,%ebx + movl PARAM_YSIZE,%eax + movl %ebx,(%edi) C most significant limb of product + addl $4,%edi C increment wp + decl %eax + jz L(skip) + movl %eax,VAR_COUNTER C set index i to ysize + +L(outer): + addl $4,%ebp C make ebp point to next y limb + movl PARAM_XSIZE,%ecx + negl %ecx + xorl %ebx,%ebx + + C code at 0x61 here, close enough to aligned +L(oop2): + adcl $0,%ebx + movl (%esi,%ecx,4),%eax + mull (%ebp) + addl %ebx,%eax + movl (%edi,%ecx,4),%ebx + adcl $0,%edx + addl %eax,%ebx + movl %ebx,(%edi,%ecx,4) + incl %ecx + movl %edx,%ebx + jnz L(oop2) + + adcl $0,%ebx + + movl %ebx,(%edi) + addl $4,%edi + movl VAR_COUNTER,%eax + decl %eax + movl %eax,VAR_COUNTER + jnz L(outer) + +L(skip): + popl %ebx + popl %edi + popl %ebp + popl %esi + addl $4,%esp + ret + +L(done): + movl %edx,4(%edi) C store to wp[1] + popl %edi + popl %ebp + popl %esi + popl %eax C dummy pop for deallocating stack slot + ret + +EPILOGUE() + diff --git a/rts/gmp/mpn/x86/pentium/rshift.asm b/rts/gmp/mpn/x86/pentium/rshift.asm new file mode 100644 index 0000000000..e8f5ae8ec8 --- /dev/null +++ b/rts/gmp/mpn/x86/pentium/rshift.asm @@ -0,0 +1,236 @@ +dnl Intel Pentium mpn_rshift -- mpn right shift. +dnl +dnl cycles/limb +dnl P5,P54: 6.0 +dnl P55: 5.375 + + +dnl Copyright (C) 1992, 1994, 1995, 1996, 1999, 2000 Free Software +dnl Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size, +C unsigned shift); +C +C The main shift-by-N loop should run at 5.375 c/l and that's what P55 does, +C but P5 and P54 run only at 6.0 c/l, which is 4 cycles lost somewhere. + +defframe(PARAM_SHIFT,16) +defframe(PARAM_SIZE, 12) +defframe(PARAM_SRC, 8) +defframe(PARAM_DST, 4) + + .text + ALIGN(8) +PROLOGUE(mpn_rshift) + + pushl %edi + pushl %esi + pushl %ebx + pushl %ebp +deflit(`FRAME',16) + + movl PARAM_DST,%edi + movl PARAM_SRC,%esi + movl PARAM_SIZE,%ebp + movl PARAM_SHIFT,%ecx + +C We can use faster code for shift-by-1 under certain conditions. + cmp $1,%ecx + jne L(normal) + leal 4(%edi),%eax + cmpl %esi,%eax + jnc L(special) C jump if res_ptr + 1 >= s_ptr + leal (%edi,%ebp,4),%eax + cmpl %eax,%esi + jnc L(special) C jump if s_ptr >= res_ptr + size + +L(normal): + movl (%esi),%edx + addl $4,%esi + xorl %eax,%eax + shrdl( %cl, %edx, %eax) C compute carry limb + pushl %eax C push carry limb onto stack + + decl %ebp + pushl %ebp + shrl $3,%ebp + jz L(end) + + movl (%edi),%eax C fetch destination cache line + + ALIGN(4) +L(oop): movl 28(%edi),%eax C fetch destination cache line + movl %edx,%ebx + + movl (%esi),%eax + movl 4(%esi),%edx + shrdl( %cl, %eax, %ebx) + shrdl( %cl, %edx, %eax) + movl %ebx,(%edi) + movl %eax,4(%edi) + + movl 8(%esi),%ebx + movl 12(%esi),%eax + shrdl( %cl, %ebx, %edx) + shrdl( %cl, %eax, %ebx) + movl %edx,8(%edi) + movl %ebx,12(%edi) + + movl 16(%esi),%edx + movl 20(%esi),%ebx + shrdl( %cl, %edx, %eax) + shrdl( %cl, %ebx, %edx) + movl %eax,16(%edi) + movl %edx,20(%edi) + + movl 24(%esi),%eax + movl 28(%esi),%edx + shrdl( %cl, %eax, %ebx) + shrdl( %cl, %edx, %eax) + movl %ebx,24(%edi) + movl %eax,28(%edi) + + addl $32,%esi + addl $32,%edi + decl %ebp + jnz L(oop) + +L(end): popl %ebp + andl $7,%ebp + jz L(end2) +L(oop2): + movl (%esi),%eax + shrdl( %cl,%eax,%edx) C compute result limb + movl %edx,(%edi) + movl %eax,%edx + addl $4,%esi + addl $4,%edi + decl %ebp + jnz L(oop2) + +L(end2): + shrl %cl,%edx C compute most significant limb + movl %edx,(%edi) C store it + + popl %eax C pop carry limb + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret + + +C We loop from least significant end of the arrays, which is only +C permissable if the source and destination don't overlap, since the +C function is documented to work for overlapping source and destination. + +L(special): + leal -4(%edi,%ebp,4),%edi + leal -4(%esi,%ebp,4),%esi + + movl (%esi),%edx + subl $4,%esi + + decl %ebp + pushl %ebp + shrl $3,%ebp + + shrl %edx + incl %ebp + decl %ebp + jz L(Lend) + + movl (%edi),%eax C fetch destination cache line + + ALIGN(4) +L(Loop): + movl -28(%edi),%eax C fetch destination cache line + movl %edx,%ebx + + movl (%esi),%eax + movl -4(%esi),%edx + rcrl %eax + movl %ebx,(%edi) + rcrl %edx + movl %eax,-4(%edi) + + movl -8(%esi),%ebx + movl -12(%esi),%eax + rcrl %ebx + movl %edx,-8(%edi) + rcrl %eax + movl %ebx,-12(%edi) + + movl -16(%esi),%edx + movl -20(%esi),%ebx + rcrl %edx + movl %eax,-16(%edi) + rcrl %ebx + movl %edx,-20(%edi) + + movl -24(%esi),%eax + movl -28(%esi),%edx + rcrl %eax + movl %ebx,-24(%edi) + rcrl %edx + movl %eax,-28(%edi) + + leal -32(%esi),%esi C use leal not to clobber carry + leal -32(%edi),%edi + decl %ebp + jnz L(Loop) + +L(Lend): + popl %ebp + sbbl %eax,%eax C save carry in %eax + andl $7,%ebp + jz L(Lend2) + addl %eax,%eax C restore carry from eax +L(Loop2): + movl %edx,%ebx + movl (%esi),%edx + rcrl %edx + movl %ebx,(%edi) + + leal -4(%esi),%esi C use leal not to clobber carry + leal -4(%edi),%edi + decl %ebp + jnz L(Loop2) + + jmp L(L1) +L(Lend2): + addl %eax,%eax C restore carry from eax +L(L1): movl %edx,(%edi) C store last limb + + movl $0,%eax + rcrl %eax + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/pentium/sqr_basecase.asm b/rts/gmp/mpn/x86/pentium/sqr_basecase.asm new file mode 100644 index 0000000000..c8584df13c --- /dev/null +++ b/rts/gmp/mpn/x86/pentium/sqr_basecase.asm @@ -0,0 +1,520 @@ +dnl Intel P5 mpn_sqr_basecase -- square an mpn number. +dnl +dnl P5: approx 8 cycles per crossproduct, or 15.5 cycles per triangular +dnl product at around 20x20 limbs. + + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size); +C +C Calculate src,size squared, storing the result in dst,2*size. +C +C The algorithm is basically the same as mpn/generic/sqr_basecase.c, but a +C lot of function call overheads are avoided, especially when the size is +C small. + +defframe(PARAM_SIZE,12) +defframe(PARAM_SRC, 8) +defframe(PARAM_DST, 4) + + .text + ALIGN(8) +PROLOGUE(mpn_sqr_basecase) +deflit(`FRAME',0) + + movl PARAM_SIZE, %edx + movl PARAM_SRC, %eax + + cmpl $2, %edx + movl PARAM_DST, %ecx + + je L(two_limbs) + + movl (%eax), %eax + ja L(three_or_more) + +C ----------------------------------------------------------------------------- +C one limb only + C eax src + C ebx + C ecx dst + C edx + + mull %eax + + movl %eax, (%ecx) + movl %edx, 4(%ecx) + + ret + +C ----------------------------------------------------------------------------- + ALIGN(8) +L(two_limbs): + C eax src + C ebx + C ecx dst + C edx size + + pushl %ebp + pushl %edi + + pushl %esi + pushl %ebx + + movl %eax, %ebx + movl (%eax), %eax + + mull %eax C src[0]^2 + + movl %eax, (%ecx) C dst[0] + movl %edx, %esi C dst[1] + + movl 4(%ebx), %eax + + mull %eax C src[1]^2 + + movl %eax, %edi C dst[2] + movl %edx, %ebp C dst[3] + + movl (%ebx), %eax + + mull 4(%ebx) C src[0]*src[1] + + addl %eax, %esi + popl %ebx + + adcl %edx, %edi + + adcl $0, %ebp + addl %esi, %eax + + adcl %edi, %edx + movl %eax, 4(%ecx) + + adcl $0, %ebp + popl %esi + + movl %edx, 8(%ecx) + movl %ebp, 12(%ecx) + + popl %edi + popl %ebp + + ret + + +C ----------------------------------------------------------------------------- + ALIGN(8) +L(three_or_more): + C eax src low limb + C ebx + C ecx dst + C edx size + + cmpl $4, %edx + pushl %ebx +deflit(`FRAME',4) + + movl PARAM_SRC, %ebx + jae L(four_or_more) + + +C ----------------------------------------------------------------------------- +C three limbs + C eax src low limb + C ebx src + C ecx dst + C edx size + + pushl %ebp + pushl %edi + + mull %eax C src[0] ^ 2 + + movl %eax, (%ecx) + movl %edx, 4(%ecx) + + movl 4(%ebx), %eax + xorl %ebp, %ebp + + mull %eax C src[1] ^ 2 + + movl %eax, 8(%ecx) + movl %edx, 12(%ecx) + + movl 8(%ebx), %eax + pushl %esi C risk of cache bank clash + + mull %eax C src[2] ^ 2 + + movl %eax, 16(%ecx) + movl %edx, 20(%ecx) + + movl (%ebx), %eax + + mull 4(%ebx) C src[0] * src[1] + + movl %eax, %esi + movl %edx, %edi + + movl (%ebx), %eax + + mull 8(%ebx) C src[0] * src[2] + + addl %eax, %edi + movl %edx, %ebp + + adcl $0, %ebp + movl 4(%ebx), %eax + + mull 8(%ebx) C src[1] * src[2] + + xorl %ebx, %ebx + addl %eax, %ebp + + C eax + C ebx zero, will be dst[5] + C ecx dst + C edx dst[4] + C esi dst[1] + C edi dst[2] + C ebp dst[3] + + adcl $0, %edx + addl %esi, %esi + + adcl %edi, %edi + + adcl %ebp, %ebp + + adcl %edx, %edx + movl 4(%ecx), %eax + + adcl $0, %ebx + addl %esi, %eax + + movl %eax, 4(%ecx) + movl 8(%ecx), %eax + + adcl %edi, %eax + movl 12(%ecx), %esi + + adcl %ebp, %esi + movl 16(%ecx), %edi + + movl %eax, 8(%ecx) + movl %esi, 12(%ecx) + + adcl %edx, %edi + popl %esi + + movl 20(%ecx), %eax + movl %edi, 16(%ecx) + + popl %edi + popl %ebp + + adcl %ebx, %eax C no carry out of this + popl %ebx + + movl %eax, 20(%ecx) + + ret + + +C ----------------------------------------------------------------------------- + ALIGN(8) +L(four_or_more): + C eax src low limb + C ebx src + C ecx dst + C edx size + C esi + C edi + C ebp + C + C First multiply src[0]*src[1..size-1] and store at dst[1..size]. + +deflit(`FRAME',4) + + pushl %edi +FRAME_pushl() + pushl %esi +FRAME_pushl() + + pushl %ebp +FRAME_pushl() + leal (%ecx,%edx,4), %edi C dst end of this mul1 + + leal (%ebx,%edx,4), %esi C src end + movl %ebx, %ebp C src + + negl %edx C -size + xorl %ebx, %ebx C clear carry limb and carry flag + + leal 1(%edx), %ecx C -(size-1) + +L(mul1): + C eax scratch + C ebx carry + C ecx counter, negative + C edx scratch + C esi &src[size] + C edi &dst[size] + C ebp src + + adcl $0, %ebx + movl (%esi,%ecx,4), %eax + + mull (%ebp) + + addl %eax, %ebx + + movl %ebx, (%edi,%ecx,4) + incl %ecx + + movl %edx, %ebx + jnz L(mul1) + + + C Add products src[n]*src[n+1..size-1] at dst[2*n-1...], for + C n=1..size-2. + C + C The last two products, which are the end corner of the product + C triangle, are handled separately to save looping overhead. These + C are src[size-3]*src[size-2,size-1] and src[size-2]*src[size-1]. + C If size is 4 then it's only these that need to be done. + C + C In the outer loop %esi is a constant, and %edi just advances by 1 + C limb each time. The size of the operation decreases by 1 limb + C each time. + + C eax + C ebx carry (needing carry flag added) + C ecx + C edx + C esi &src[size] + C edi &dst[size] + C ebp + + adcl $0, %ebx + movl PARAM_SIZE, %edx + + movl %ebx, (%edi) + subl $4, %edx + + negl %edx + jz L(corner) + + +L(outer): + C ebx previous carry limb to store + C edx outer loop counter (negative) + C esi &src[size] + C edi dst, pointing at stored carry limb of previous loop + + pushl %edx C new outer loop counter + leal -2(%edx), %ecx + + movl %ebx, (%edi) + addl $4, %edi + + addl $4, %ebp + xorl %ebx, %ebx C initial carry limb, clear carry flag + +L(inner): + C eax scratch + C ebx carry (needing carry flag added) + C ecx counter, negative + C edx scratch + C esi &src[size] + C edi dst end of this addmul + C ebp &src[j] + + adcl $0, %ebx + movl (%esi,%ecx,4), %eax + + mull (%ebp) + + addl %ebx, %eax + movl (%edi,%ecx,4), %ebx + + adcl $0, %edx + addl %eax, %ebx + + movl %ebx, (%edi,%ecx,4) + incl %ecx + + movl %edx, %ebx + jnz L(inner) + + + adcl $0, %ebx + popl %edx C outer loop counter + + incl %edx + jnz L(outer) + + + movl %ebx, (%edi) + +L(corner): + C esi &src[size] + C edi &dst[2*size-4] + + movl -8(%esi), %eax + movl -4(%edi), %ebx C risk of data cache bank clash here + + mull -12(%esi) C src[size-2]*src[size-3] + + addl %eax, %ebx + movl %edx, %ecx + + adcl $0, %ecx + movl -4(%esi), %eax + + mull -12(%esi) C src[size-1]*src[size-3] + + addl %ecx, %eax + movl (%edi), %ecx + + adcl $0, %edx + movl %ebx, -4(%edi) + + addl %eax, %ecx + movl %edx, %ebx + + adcl $0, %ebx + movl -4(%esi), %eax + + mull -8(%esi) C src[size-1]*src[size-2] + + movl %ecx, 0(%edi) + addl %eax, %ebx + + adcl $0, %edx + movl PARAM_SIZE, %eax + + negl %eax + movl %ebx, 4(%edi) + + addl $1, %eax C -(size-1) and clear carry + movl %edx, 8(%edi) + + +C ----------------------------------------------------------------------------- +C Left shift of dst[1..2*size-2], high bit shifted out becomes dst[2*size-1]. + +L(lshift): + C eax counter, negative + C ebx next limb + C ecx + C edx + C esi + C edi &dst[2*size-4] + C ebp + + movl 12(%edi,%eax,8), %ebx + + rcll %ebx + movl 16(%edi,%eax,8), %ecx + + rcll %ecx + movl %ebx, 12(%edi,%eax,8) + + movl %ecx, 16(%edi,%eax,8) + incl %eax + + jnz L(lshift) + + + adcl %eax, %eax C high bit out + movl PARAM_SRC, %esi + + movl PARAM_SIZE, %ecx C risk of cache bank clash + movl %eax, 12(%edi) C dst most significant limb + + +C ----------------------------------------------------------------------------- +C Now add in the squares on the diagonal, namely src[0]^2, src[1]^2, ..., +C src[size-1]^2. dst[0] hasn't yet been set at all yet, and just gets the +C low limb of src[0]^2. + + movl (%esi), %eax C src[0] + leal (%esi,%ecx,4), %esi C src end + + negl %ecx + + mull %eax + + movl %eax, 16(%edi,%ecx,8) C dst[0] + movl %edx, %ebx + + addl $1, %ecx C size-1 and clear carry + +L(diag): + C eax scratch (low product) + C ebx carry limb + C ecx counter, negative + C edx scratch (high product) + C esi &src[size] + C edi &dst[2*size-4] + C ebp scratch (fetched dst limbs) + + movl (%esi,%ecx,4), %eax + adcl $0, %ebx + + mull %eax + + movl 16-4(%edi,%ecx,8), %ebp + + addl %ebp, %ebx + movl 16(%edi,%ecx,8), %ebp + + adcl %eax, %ebp + movl %ebx, 16-4(%edi,%ecx,8) + + movl %ebp, 16(%edi,%ecx,8) + incl %ecx + + movl %edx, %ebx + jnz L(diag) + + + adcl $0, %edx + movl 16-4(%edi), %eax C dst most significant limb + + addl %eax, %edx + popl %ebp + + movl %edx, 16-4(%edi) + popl %esi C risk of cache bank clash + + popl %edi + popl %ebx + + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/rshift.asm b/rts/gmp/mpn/x86/rshift.asm new file mode 100644 index 0000000000..c9881fd966 --- /dev/null +++ b/rts/gmp/mpn/x86/rshift.asm @@ -0,0 +1,92 @@ +dnl x86 mpn_rshift -- mpn right shift. + +dnl Copyright (C) 1992, 1994, 1996, 1999, 2000 Free Software Foundation, +dnl Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size, +C unsigned shift); + +defframe(PARAM_SHIFT,16) +defframe(PARAM_SIZE, 12) +defframe(PARAM_SRC, 8) +defframe(PARAM_DST, 4) + + .text + ALIGN(8) +PROLOGUE(mpn_rshift) + + pushl %edi + pushl %esi + pushl %ebx +deflit(`FRAME',12) + + movl PARAM_DST,%edi + movl PARAM_SRC,%esi + movl PARAM_SIZE,%edx + movl PARAM_SHIFT,%ecx + + leal -4(%edi,%edx,4),%edi + leal (%esi,%edx,4),%esi + negl %edx + + movl (%esi,%edx,4),%ebx C read least significant limb + xorl %eax,%eax + shrdl( %cl, %ebx, %eax) C compute carry limb + incl %edx + jz L(end) + pushl %eax C push carry limb onto stack + testb $1,%dl + jnz L(1) C enter loop in the middle + movl %ebx,%eax + + ALIGN(8) +L(oop): movl (%esi,%edx,4),%ebx C load next higher limb + shrdl( %cl, %ebx, %eax) C compute result limb + movl %eax,(%edi,%edx,4) C store it + incl %edx +L(1): movl (%esi,%edx,4),%eax + shrdl( %cl, %eax, %ebx) + movl %ebx,(%edi,%edx,4) + incl %edx + jnz L(oop) + + shrl %cl,%eax C compute most significant limb + movl %eax,(%edi) C store it + + popl %eax C pop carry limb + + popl %ebx + popl %esi + popl %edi + ret + +L(end): shrl %cl,%ebx C compute most significant limb + movl %ebx,(%edi) C store it + + popl %ebx + popl %esi + popl %edi + ret + +EPILOGUE() diff --git a/rts/gmp/mpn/x86/udiv.asm b/rts/gmp/mpn/x86/udiv.asm new file mode 100644 index 0000000000..9fe022b107 --- /dev/null +++ b/rts/gmp/mpn/x86/udiv.asm @@ -0,0 +1,44 @@ +dnl x86 mpn_udiv_qrnnd -- 2 by 1 limb division + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +C mp_limb_t mpn_udiv_qrnnd (mp_limb_t *remptr, mp_limb_t high, mp_limb_t low, +C mp_limb_t divisor); + +defframe(PARAM_DIVISOR, 16) +defframe(PARAM_LOW, 12) +defframe(PARAM_HIGH, 8) +defframe(PARAM_REMPTR, 4) + + TEXT + ALIGN(8) +PROLOGUE(mpn_udiv_qrnnd) +deflit(`FRAME',0) + movl PARAM_LOW, %eax + movl PARAM_HIGH, %edx + divl PARAM_DIVISOR + movl PARAM_REMPTR, %ecx + movl %edx, (%ecx) + ret +EPILOGUE() diff --git a/rts/gmp/mpn/x86/umul.asm b/rts/gmp/mpn/x86/umul.asm new file mode 100644 index 0000000000..3d289d1784 --- /dev/null +++ b/rts/gmp/mpn/x86/umul.asm @@ -0,0 +1,43 @@ +dnl mpn_umul_ppmm -- 1x1->2 limb multiplication + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +include(`../config.m4') + + +C mp_limb_t mpn_umul_ppmm (mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2); +C + +defframe(PARAM_M2, 12) +defframe(PARAM_M1, 8) +defframe(PARAM_LOWPTR, 4) + + TEXT + ALIGN(8) +PROLOGUE(mpn_umul_ppmm) +deflit(`FRAME',0) + movl PARAM_LOWPTR, %ecx + movl PARAM_M1, %eax + mull PARAM_M2 + movl %eax, (%ecx) + movl %edx, %eax + ret +EPILOGUE() diff --git a/rts/gmp/mpn/x86/x86-defs.m4 b/rts/gmp/mpn/x86/x86-defs.m4 new file mode 100644 index 0000000000..2dad698002 --- /dev/null +++ b/rts/gmp/mpn/x86/x86-defs.m4 @@ -0,0 +1,713 @@ +divert(-1) + +dnl m4 macros for x86 assembler. + + +dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + + +dnl Notes: +dnl +dnl m4 isn't perfect for processing BSD style x86 assembler code, the main +dnl problems are, +dnl +dnl 1. Doing define(foo,123) and then using foo in an addressing mode like +dnl foo(%ebx) expands as a macro rather than a constant. This is worked +dnl around by using deflit() from asm-defs.m4, instead of define(). +dnl +dnl 2. Immediates in macro definitions need a space or `' to stop the $ +dnl looking like a macro parameter. For example, +dnl +dnl define(foo, `mov $ 123, %eax') +dnl +dnl This is only a problem in macro definitions, not in ordinary text, +dnl nor in macro parameters like text passed to forloop() or ifdef(). + + +deflit(BYTES_PER_MP_LIMB, 4) + + +dnl -------------------------------------------------------------------------- +dnl Replacement PROLOGUE/EPILOGUE with more sophisticated error checking. +dnl Nesting and overlapping not allowed. +dnl + + +dnl Usage: PROLOGUE(functionname) +dnl +dnl Generate a function prologue. functionname gets GSYM_PREFIX added. +dnl Examples, +dnl +dnl PROLOGUE(mpn_add_n) +dnl PROLOGUE(somefun) + +define(`PROLOGUE', +m4_assert_numargs(1) +m4_assert_defined(`PROLOGUE_cpu') +`ifdef(`PROLOGUE_current_function', +`m4_error(`PROLOGUE'(`PROLOGUE_current_function') needs an `EPILOGUE'() before `PROLOGUE'($1) +)')dnl +m4_file_seen()dnl +define(`PROLOGUE_current_function',`$1')dnl +PROLOGUE_cpu(GSYM_PREFIX`'$1)') + + +dnl Usage: EPILOGUE() +dnl +dnl Notice the function name is passed to EPILOGUE_cpu(), letting it use $1 +dnl instead of the long PROLOGUE_current_function symbol. + +define(`EPILOGUE', +m4_assert_numargs(0) +m4_assert_defined(`EPILOGUE_cpu') +`ifdef(`PROLOGUE_current_function',, +`m4_error(`EPILOGUE'() with no `PROLOGUE'() +)')dnl +EPILOGUE_cpu(GSYM_PREFIX`'PROLOGUE_current_function)`'dnl +undefine(`PROLOGUE_current_function')') + +m4wrap_prepend( +`ifdef(`PROLOGUE_current_function', +`m4_error(`EPILOGUE() for PROLOGUE('PROLOGUE_current_function`) never seen +')')') + + +dnl Usage: PROLOGUE_assert_inside() +dnl +dnl Use this unquoted on a line on its own at the start of a macro +dnl definition to add some code to check the macro is only used inside a +dnl PROLOGUE/EPILOGUE pair, and that hence PROLOGUE_current_function is +dnl defined. + +define(PROLOGUE_assert_inside, +m4_assert_numargs(0) +``PROLOGUE_assert_inside_internal'(m4_doublequote($`'0))`dnl '') + +define(PROLOGUE_assert_inside_internal, +m4_assert_numargs(1) +`ifdef(`PROLOGUE_current_function',, +`m4_error(`$1 used outside a PROLOGUE / EPILOGUE pair +')')') + + +dnl Usage: L(labelname) +dnl LF(functionname,labelname) +dnl +dnl Generate a local label in the current or given function. For LF(), +dnl functionname gets GSYM_PREFIX added, the same as with PROLOGUE(). +dnl +dnl For example, in a function mpn_add_n (and with MPN_PREFIX __gmpn), +dnl +dnl L(bar) => L__gmpn_add_n__bar +dnl LF(somefun,bar) => Lsomefun__bar +dnl +dnl The funtion name and label name get two underscores between them rather +dnl than one to guard against clashing with a separate external symbol that +dnl happened to be called functionname_labelname. (Though this would only +dnl happen if the local label prefix is is empty.) Underscores are used so +dnl the whole label will still be a valid C identifier and so can be easily +dnl used in gdb. + +dnl LSYM_PREFIX can be L$, so defn() is used to prevent L expanding as the +dnl L macro and making an infinite recursion. +define(LF, +m4_assert_numargs(2) +m4_assert_defined(`LSYM_PREFIX') +`defn(`LSYM_PREFIX')GSYM_PREFIX`'$1`'__$2') + +define(`L', +m4_assert_numargs(1) +PROLOGUE_assert_inside() +`LF(PROLOGUE_current_function,`$1')') + + +dnl Called: PROLOGUE_cpu(gsym) +dnl EPILOGUE_cpu(gsym) + +define(PROLOGUE_cpu, +m4_assert_numargs(1) + `GLOBL $1 + TYPE($1,`function') +$1:') + +define(EPILOGUE_cpu, +m4_assert_numargs(1) +` SIZE($1,.-$1)') + + + +dnl -------------------------------------------------------------------------- +dnl Various x86 macros. +dnl + + +dnl Usage: ALIGN_OFFSET(bytes,offset) +dnl +dnl Align to `offset' away from a multiple of `bytes'. +dnl +dnl This is useful for testing, for example align to something very strict +dnl and see what effect offsets from it have, "ALIGN_OFFSET(256,32)". +dnl +dnl Generally you wouldn't execute across the padding, but it's done with +dnl nop's so it'll work. + +define(ALIGN_OFFSET, +m4_assert_numargs(2) +`ALIGN($1) +forloop(`i',1,$2,` nop +')') + + +dnl Usage: defframe(name,offset) +dnl +dnl Make a definition like the following with which to access a parameter +dnl or variable on the stack. +dnl +dnl define(name,`FRAME+offset(%esp)') +dnl +dnl Actually m4_empty_if_zero(FRAME+offset) is used, which will save one +dnl byte if FRAME+offset is zero, by putting (%esp) rather than 0(%esp). +dnl Use define(`defframe_empty_if_zero_disabled',1) if for some reason the +dnl zero offset is wanted. +dnl +dnl The new macro also gets a check that when it's used FRAME is actually +dnl defined, and that the final %esp offset isn't negative, which would +dnl mean an attempt to access something below the current %esp. +dnl +dnl deflit() is used rather than a plain define(), so the new macro won't +dnl delete any following parenthesized expression. name(%edi) will come +dnl out say as 16(%esp)(%edi). This isn't valid assembler and should +dnl provoke an error, which is better than silently giving just 16(%esp). +dnl +dnl See README.family for more on the suggested way to access the stack +dnl frame. + +define(defframe, +m4_assert_numargs(2) +`deflit(`$1', +m4_assert_defined(`FRAME') +`defframe_check_notbelow(`$1',$2,FRAME)dnl +defframe_empty_if_zero(FRAME+($2))(%esp)')') + +dnl Called: defframe_empty_if_zero(expression) +define(defframe_empty_if_zero, +`ifelse(defframe_empty_if_zero_disabled,1, +`eval($1)', +`m4_empty_if_zero($1)')') + +dnl Called: defframe_check_notbelow(`name',offset,FRAME) +define(defframe_check_notbelow, +m4_assert_numargs(3) +`ifelse(eval(($3)+($2)<0),1, +`m4_error(`$1 at frame offset $2 used when FRAME is only $3 bytes +')')') + + +dnl Usage: FRAME_pushl() +dnl FRAME_popl() +dnl FRAME_addl_esp(n) +dnl FRAME_subl_esp(n) +dnl +dnl Adjust FRAME appropriately for a pushl or popl, or for an addl or subl +dnl %esp of n bytes. +dnl +dnl Using these macros is completely optional. Sometimes it makes more +dnl sense to put explicit deflit(`FRAME',N) forms, especially when there's +dnl jumps and different sequences of FRAME values need to be used in +dnl different places. + +define(FRAME_pushl, +m4_assert_numargs(0) +m4_assert_defined(`FRAME') +`deflit(`FRAME',eval(FRAME+4))') + +define(FRAME_popl, +m4_assert_numargs(0) +m4_assert_defined(`FRAME') +`deflit(`FRAME',eval(FRAME-4))') + +define(FRAME_addl_esp, +m4_assert_numargs(1) +m4_assert_defined(`FRAME') +`deflit(`FRAME',eval(FRAME-($1)))') + +define(FRAME_subl_esp, +m4_assert_numargs(1) +m4_assert_defined(`FRAME') +`deflit(`FRAME',eval(FRAME+($1)))') + + +dnl Usage: defframe_pushl(name) +dnl +dnl Do a combination of a FRAME_pushl() and a defframe() to name the stack +dnl location just pushed. This should come after a pushl instruction. +dnl Putting it on the same line works and avoids lengthening the code. For +dnl example, +dnl +dnl pushl %eax defframe_pushl(VAR_COUNTER) +dnl +dnl Notice the defframe() is done with an unquoted -FRAME thus giving its +dnl current value without tracking future changes. + +define(defframe_pushl, +`FRAME_pushl()defframe(`$1',-FRAME)') + + +dnl -------------------------------------------------------------------------- +dnl Assembler instruction macros. +dnl + + +dnl Usage: emms_or_femms +dnl femms_available_p +dnl +dnl femms_available_p expands to 1 or 0 according to whether the AMD 3DNow +dnl femms instruction is available. emms_or_femms expands to femms if +dnl available, or emms if not. +dnl +dnl emms_or_femms is meant for use in the K6 directory where plain K6 +dnl (without femms) and K6-2 and K6-3 (with a slightly faster femms) are +dnl supported together. +dnl +dnl On K7 femms is no longer faster and is just an alias for emms, so plain +dnl emms may as well be used. + +define(femms_available_p, +m4_assert_numargs(-1) +`m4_ifdef_anyof_p( + `HAVE_TARGET_CPU_k62', + `HAVE_TARGET_CPU_k63', + `HAVE_TARGET_CPU_athlon')') + +define(emms_or_femms, +m4_assert_numargs(-1) +`ifelse(femms_available_p,1,`femms',`emms')') + + +dnl Usage: femms +dnl +dnl The gas 2.9.1 that comes with FreeBSD 3.4 doesn't support femms, so the +dnl following is a replacement using .byte. +dnl +dnl If femms isn't available, an emms is generated instead, for convenience +dnl when testing on a machine without femms. + +define(femms, +m4_assert_numargs(-1) +`ifelse(femms_available_p,1, +`.byte 15,14 C AMD 3DNow femms', +`emms`'dnl +m4_warning(`warning, using emms in place of femms, use for testing only +')')') + + +dnl Usage: jadcl0(op) +dnl +dnl Issue a jnc/incl as a substitute for adcl $0,op. This isn't an exact +dnl replacement, since it doesn't set the flags like adcl does. +dnl +dnl This finds a use in K6 mpn_addmul_1, mpn_submul_1, mpn_mul_basecase and +dnl mpn_sqr_basecase because on K6 an adcl is slow, the branch +dnl misprediction penalty is small, and the multiply algorithm used leads +dnl to a carry bit on average only 1/4 of the time. +dnl +dnl jadcl0_disabled can be set to 1 to instead issue an ordinary adcl for +dnl comparison. For example, +dnl +dnl define(`jadcl0_disabled',1) +dnl +dnl When using a register operand, eg. "jadcl0(%edx)", the jnc/incl code is +dnl the same size as an adcl. This makes it possible to use the exact same +dnl computed jump code when testing the relative speed of jnc/incl and adcl +dnl with jadcl0_disabled. + +define(jadcl0, +m4_assert_numargs(1) +`ifelse(jadcl0_disabled,1, + `adcl $`'0, $1', + `jnc 1f + incl $1 +1:dnl')') + + +dnl Usage: cmov_available_p +dnl +dnl Expand to 1 if cmov is available, 0 if not. + +define(cmov_available_p, +`m4_ifdef_anyof_p( + `HAVE_TARGET_CPU_pentiumpro', + `HAVE_TARGET_CPU_pentium2', + `HAVE_TARGET_CPU_pentium3', + `HAVE_TARGET_CPU_athlon')') + + +dnl Usage: x86_lookup(target, key,value, key,value, ...) +dnl x86_lookup_p(target, key,value, key,value, ...) +dnl +dnl Look for `target' among the `key' parameters. +dnl +dnl x86_lookup expands to the corresponding `value', or generates an error +dnl if `target' isn't found. +dnl +dnl x86_lookup_p expands to 1 if `target' is found, or 0 if not. + +define(x86_lookup, +`ifelse(eval($#<3),1, +`m4_error(`unrecognised part of x86 instruction: $1 +')', +`ifelse(`$1',`$2', `$3', +`x86_lookup(`$1',shift(shift(shift($@))))')')') + +define(x86_lookup_p, +`ifelse(eval($#<3),1, `0', +`ifelse(`$1',`$2', `1', +`x86_lookup_p(`$1',shift(shift(shift($@))))')')') + + +dnl Usage: x86_opcode_reg32(reg) +dnl x86_opcode_reg32_p(reg) +dnl +dnl x86_opcode_reg32 expands to the standard 3 bit encoding for the given +dnl 32-bit register, eg. `%ebp' turns into 5. +dnl +dnl x86_opcode_reg32_p expands to 1 if reg is a valid 32-bit register, or 0 +dnl if not. + +define(x86_opcode_reg32, +m4_assert_numargs(1) +`x86_lookup(`$1',x86_opcode_reg32_list)') + +define(x86_opcode_reg32_p, +m4_assert_onearg() +`x86_lookup_p(`$1',x86_opcode_reg32_list)') + +define(x86_opcode_reg32_list, +``%eax',0, +`%ecx',1, +`%edx',2, +`%ebx',3, +`%esp',4, +`%ebp',5, +`%esi',6, +`%edi',7') + + +dnl Usage: x86_opcode_tttn(cond) +dnl +dnl Expand to the 4-bit "tttn" field value for the given x86 branch +dnl condition (like `c', `ae', etc). + +define(x86_opcode_tttn, +m4_assert_numargs(1) +`x86_lookup(`$1',x86_opcode_ttn_list)') + +define(x86_opcode_tttn_list, +``o', 0, +`no', 1, +`b', 2, `c', 2, `nae',2, +`nb', 3, `nc', 3, `ae', 3, +`e', 4, `z', 4, +`ne', 5, `nz', 5, +`be', 6, `na', 6, +`nbe', 7, `a', 7, +`s', 8, +`ns', 9, +`p', 10, `pe', 10, `npo',10, +`np', 11, `npe',11, `po', 11, +`l', 12, `nge',12, +`nl', 13, `ge', 13, +`le', 14, `ng', 14, +`nle',15, `g', 15') + + +dnl Usage: cmovCC(srcreg,dstreg) +dnl +dnl Generate a cmov instruction if the target supports cmov, or simulate it +dnl with a conditional jump if not (the latter being meant only for +dnl testing). For example, +dnl +dnl cmovz( %eax, %ebx) +dnl +dnl cmov instructions are generated using .byte sequences, since only +dnl recent versions of gas know cmov. +dnl +dnl The source operand can only be a plain register. (m4 code implementing +dnl full memory addressing modes exists, believe it or not, but isn't +dnl currently needed and isn't included.) +dnl +dnl All the standard conditions are defined. Attempting to use one without +dnl the macro parentheses, such as just "cmovbe %eax, %ebx", will provoke +dnl an error. This ensures the necessary .byte sequences aren't +dnl accidentally missed. + +dnl Called: define_cmov_many(cond,tttn,cond,tttn,...) +define(define_cmov_many, +`ifelse(m4_length(`$1'),0,, +`define_cmov(`$1',`$2')define_cmov_many(shift(shift($@)))')') + +dnl Called: define_cmov(cond,tttn) +define(define_cmov, +m4_assert_numargs(2) +`define(`cmov$1', +m4_instruction_wrapper() +m4_assert_numargs(2) +`cmov_internal'(m4_doublequote($`'0),``$1',`$2'',dnl +m4_doublequote($`'1),m4_doublequote($`'2)))') + +define_cmov_many(x86_opcode_tttn_list) + + +dnl Called: cmov_internal(name,cond,tttn,src,dst) +define(cmov_internal, +m4_assert_numargs(5) +`ifelse(cmov_available_p,1, +`cmov_bytes_tttn(`$1',`$3',`$4',`$5')', +`m4_warning(`warning, simulating cmov with jump, use for testing only +')cmov_simulate(`$2',`$4',`$5')')') + +dnl Called: cmov_simulate(cond,src,dst) +dnl If this is going to be used with memory operands for the source it will +dnl need to be changed to do a fetch even if the condition is false, so as +dnl to trigger exceptions the same way a real cmov does. +define(cmov_simulate, +m4_assert_numargs(3) + `j$1 1f C cmov$1 $2, $3 + jmp 2f +1: movl $2, $3 +2:') + +dnl Called: cmov_bytes_tttn(name,tttn,src,dst) +define(cmov_bytes_tttn, +m4_assert_numargs(4) +`.byte dnl +15, dnl +eval(64+$2), dnl +eval(192+8*x86_opcode_reg32(`$4')+x86_opcode_reg32(`$3')) dnl + C `$1 $3, $4'') + + +dnl Usage: loop_or_decljnz label +dnl +dnl Generate either a "loop" instruction or a "decl %ecx / jnz", whichever +dnl is better. "loop" is better on K6 and probably on 386, on other chips +dnl separate decl/jnz is better. +dnl +dnl This macro is just for mpn/x86/divrem_1.asm and mpn/x86/mod_1.asm where +dnl this loop_or_decljnz variation is enough to let the code be shared by +dnl all chips. + +define(loop_or_decljnz, +`ifelse(loop_is_better_p,1, + `loop', + `decl %ecx + jnz')') + +define(loop_is_better_p, +`m4_ifdef_anyof_p(`HAVE_TARGET_CPU_k6', + `HAVE_TARGET_CPU_k62', + `HAVE_TARGET_CPU_k63', + `HAVE_TARGET_CPU_i386')') + + +dnl Usage: Zdisp(inst,op,op,op) +dnl +dnl Generate explicit .byte sequences if necessary to force a byte-sized +dnl zero displacement on an instruction. For example, +dnl +dnl Zdisp( movl, 0,(%esi), %eax) +dnl +dnl expands to +dnl +dnl .byte 139,70,0 C movl 0(%esi), %eax +dnl +dnl If the displacement given isn't 0, then normal assembler code is +dnl generated. For example, +dnl +dnl Zdisp( movl, 4,(%esi), %eax) +dnl +dnl expands to +dnl +dnl movl 4(%esi), %eax +dnl +dnl This means a single Zdisp() form can be used with an expression for the +dnl displacement, and .byte will be used only if necessary. The +dnl displacement argument is eval()ed. +dnl +dnl Because there aren't many places a 0(reg) form is wanted, Zdisp is +dnl implemented with a table of instructions and encodings. A new entry is +dnl needed for any different operation or registers. + +define(Zdisp, +`define(`Zdisp_found',0)dnl +Zdisp_match( movl, %eax, 0,(%edi), `137,71,0', $@)`'dnl +Zdisp_match( movl, %ebx, 0,(%edi), `137,95,0', $@)`'dnl +Zdisp_match( movl, %esi, 0,(%edi), `137,119,0', $@)`'dnl +Zdisp_match( movl, 0,(%ebx), %eax, `139,67,0', $@)`'dnl +Zdisp_match( movl, 0,(%ebx), %esi, `139,115,0', $@)`'dnl +Zdisp_match( movl, 0,(%esi), %eax, `139,70,0', $@)`'dnl +Zdisp_match( movl, 0,(%esi,%ecx,4), %eax, `0x8b,0x44,0x8e,0x00', $@)`'dnl +Zdisp_match( addl, %ebx, 0,(%edi), `1,95,0', $@)`'dnl +Zdisp_match( addl, %ecx, 0,(%edi), `1,79,0', $@)`'dnl +Zdisp_match( addl, %esi, 0,(%edi), `1,119,0', $@)`'dnl +Zdisp_match( subl, %ecx, 0,(%edi), `41,79,0', $@)`'dnl +Zdisp_match( adcl, 0,(%edx), %esi, `19,114,0', $@)`'dnl +Zdisp_match( sbbl, 0,(%edx), %esi, `27,114,0', $@)`'dnl +Zdisp_match( movq, 0,(%eax,%ecx,8), %mm0, `0x0f,0x6f,0x44,0xc8,0x00', $@)`'dnl +Zdisp_match( movq, 0,(%ebx,%eax,4), %mm0, `0x0f,0x6f,0x44,0x83,0x00', $@)`'dnl +Zdisp_match( movq, 0,(%ebx,%eax,4), %mm2, `0x0f,0x6f,0x54,0x83,0x00', $@)`'dnl +Zdisp_match( movq, 0,(%esi), %mm0, `15,111,70,0', $@)`'dnl +Zdisp_match( movq, %mm0, 0,(%edi), `15,127,71,0', $@)`'dnl +Zdisp_match( movq, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7f,0x54,0x81,0x00', $@)`'dnl +Zdisp_match( movq, %mm2, 0,(%edx,%eax,4), `0x0f,0x7f,0x54,0x82,0x00', $@)`'dnl +Zdisp_match( movq, %mm0, 0,(%edx,%ecx,8), `0x0f,0x7f,0x44,0xca,0x00', $@)`'dnl +Zdisp_match( movd, 0,(%eax,%ecx,8), %mm1, `0x0f,0x6e,0x4c,0xc8,0x00', $@)`'dnl +Zdisp_match( movd, 0,(%edx,%ecx,8), %mm0, `0x0f,0x6e,0x44,0xca,0x00', $@)`'dnl +Zdisp_match( movd, %mm0, 0,(%eax,%ecx,4), `0x0f,0x7e,0x44,0x88,0x00', $@)`'dnl +Zdisp_match( movd, %mm0, 0,(%ecx,%eax,4), `0x0f,0x7e,0x44,0x81,0x00', $@)`'dnl +Zdisp_match( movd, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7e,0x54,0x81,0x00', $@)`'dnl +ifelse(Zdisp_found,0, +`m4_error(`unrecognised instruction in Zdisp: $1 $2 $3 $4 +')')') + +define(Zdisp_match, +`ifelse(eval(m4_stringequal_p(`$1',`$6') + && m4_stringequal_p(`$2',0) + && m4_stringequal_p(`$3',`$8') + && m4_stringequal_p(`$4',`$9')),1, +`define(`Zdisp_found',1)dnl +ifelse(eval(`$7'),0, +` .byte $5 C `$1 0$3, $4'', +` $6 $7$8, $9')', + +`ifelse(eval(m4_stringequal_p(`$1',`$6') + && m4_stringequal_p(`$2',`$7') + && m4_stringequal_p(`$3',0) + && m4_stringequal_p(`$4',`$9')),1, +`define(`Zdisp_found',1)dnl +ifelse(eval(`$8'),0, +` .byte $5 C `$1 $2, 0$4'', +` $6 $7, $8$9')')')') + + +dnl Usage: shldl(count,src,dst) +dnl shrdl(count,src,dst) +dnl shldw(count,src,dst) +dnl shrdw(count,src,dst) +dnl +dnl Generate a double-shift instruction, possibly omitting a %cl count +dnl parameter if that's what the assembler requires, as indicated by +dnl WANT_SHLDL_CL in config.m4. For example, +dnl +dnl shldl( %cl, %eax, %ebx) +dnl +dnl turns into either +dnl +dnl shldl %cl, %eax, %ebx +dnl or +dnl shldl %eax, %ebx +dnl +dnl Immediate counts are always passed through unchanged. For example, +dnl +dnl shrdl( $2, %esi, %edi) +dnl becomes +dnl shrdl $2, %esi, %edi +dnl +dnl +dnl If you forget to use the macro form "shldl( ...)" and instead write +dnl just a plain "shldl ...", an error results. This ensures the necessary +dnl variant treatment of %cl isn't accidentally bypassed. + +define(define_shd_instruction, +`define($1, +m4_instruction_wrapper() +m4_assert_numargs(3) +`shd_instruction'(m4_doublequote($`'0),m4_doublequote($`'1),dnl +m4_doublequote($`'2),m4_doublequote($`'3)))') + +dnl Effectively: define(shldl,`shd_instruction(`$0',`$1',`$2',`$3')') etc +define_shd_instruction(shldl) +define_shd_instruction(shrdl) +define_shd_instruction(shldw) +define_shd_instruction(shrdw) + +dnl Called: shd_instruction(op,count,src,dst) +define(shd_instruction, +m4_assert_numargs(4) +m4_assert_defined(`WANT_SHLDL_CL') +`ifelse(eval(m4_stringequal_p(`$2',`%cl') && !WANT_SHLDL_CL),1, +``$1' `$3', `$4'', +``$1' `$2', `$3', `$4'')') + + +dnl Usage: ASSERT(cond, instructions) +dnl +dnl If WANT_ASSERT is 1, output the given instructions and expect the given +dnl flags condition to then be satisfied. For example, +dnl +dnl ASSERT(ne, `cmpl %eax, %ebx') +dnl +dnl The instructions can be omitted to just assert a flags condition with +dnl no extra calculation. For example, +dnl +dnl ASSERT(nc) +dnl +dnl When `instructions' is not empty, a pushf/popf is added to preserve the +dnl flags, but the instructions themselves must preserve any registers that +dnl matter. FRAME is adjusted for the push and pop, so the instructions +dnl given can use defframe() stack variables. + +define(ASSERT, +m4_assert_numargs_range(1,2) +`ifelse(WANT_ASSERT,1, + `C ASSERT +ifelse(`$2',,,` pushf ifdef(`FRAME',`FRAME_pushl()')') + $2 + j`$1' 1f + ud2 C assertion failed +1: +ifelse(`$2',,,` popf ifdef(`FRAME',`FRAME_popl()')') +')') + + +dnl Usage: movl_text_address(label,register) +dnl +dnl Get the address of a text segment label, using either a plain movl or a +dnl position-independent calculation, as necessary. For example, +dnl +dnl movl_code_address(L(foo),%eax) +dnl +dnl This macro is only meant for use in ASSERT()s or when testing, since +dnl the PIC sequence it generates will want to be done with a ret balancing +dnl the call on CPUs with return address branch predition. +dnl +dnl The addl generated here has a backward reference to 1b, and so won't +dnl suffer from the two forwards references bug in old gas (described in +dnl mpn/x86/README.family). + +define(movl_text_address, +`ifdef(`PIC', + `call 1f +1: popl $2 C %eip + addl `$'$1-1b, $2', + `movl `$'$1, $2')') + + +divert`'dnl diff --git a/rts/gmp/mpn/z8000/add_n.s b/rts/gmp/mpn/z8000/add_n.s new file mode 100644 index 0000000000..3a136107fe --- /dev/null +++ b/rts/gmp/mpn/z8000/add_n.s @@ -0,0 +1,53 @@ +! Z8000 __gmpn_add_n -- Add two limb vectors of equal, non-zero length. + +! Copyright (C) 1993, 1994, 2000 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +! License for more details. + +! You should have received a copy of the GNU Lesser General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr r7 +! s1_ptr r6 +! s2_ptr r5 +! size r4 + +! If we are really crazy, we can use push to write a few result words +! backwards, using push just because it is faster than reg+disp. We'd +! then add 2x the number of words written to r7... + + unseg + .text + even + global ___gmpn_add_n +___gmpn_add_n: + pop r0,@r6 + pop r1,@r5 + add r0,r1 + ld @r7,r0 + dec r4 + jr eq,Lend +Loop: pop r0,@r6 + pop r1,@r5 + adc r0,r1 + inc r7,#2 + ld @r7,r0 + dec r4 + jr ne,Loop +Lend: ld r2,r4 ! use 0 already in r4 + adc r2,r2 + ret t diff --git a/rts/gmp/mpn/z8000/gmp-mparam.h b/rts/gmp/mpn/z8000/gmp-mparam.h new file mode 100644 index 0000000000..4216df673c --- /dev/null +++ b/rts/gmp/mpn/z8000/gmp-mparam.h @@ -0,0 +1,27 @@ +/* gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#define BITS_PER_MP_LIMB 16 +#define BYTES_PER_MP_LIMB 2 +#define BITS_PER_LONGINT 32 +#define BITS_PER_INT 16 +#define BITS_PER_SHORTINT 16 +#define BITS_PER_CHAR 8 diff --git a/rts/gmp/mpn/z8000/mul_1.s b/rts/gmp/mpn/z8000/mul_1.s new file mode 100644 index 0000000000..20fadd340a --- /dev/null +++ b/rts/gmp/mpn/z8000/mul_1.s @@ -0,0 +1,68 @@ +! Z8000 __gmpn_mul_1 -- Multiply a limb vector with a limb and store +! the result in a second limb vector. + +! Copyright (C) 1993, 1994, 1995, 2000 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +! License for more details. + +! You should have received a copy of the GNU Lesser General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr r7 +! s1_ptr r6 +! size r5 +! s2_limb r4 + + unseg + .text + even + global ___gmpn_mul_1 +___gmpn_mul_1: + sub r2,r2 ! zero carry limb + and r4,r4 + jr mi,Lneg + +Lpos: pop r1,@r6 + ld r9,r1 + mult rr8,r4 + and r1,r1 ! shift msb of loaded limb into cy + jr mi,Lp ! branch if loaded limb's msb is set + add r8,r4 ! hi_limb += sign_comp2 +Lp: add r9,r2 ! lo_limb += cy_limb + xor r2,r2 + adc r2,r8 + ld @r7,r9 + inc r7,#2 + dec r5 + jr ne,Lpos + ret t + +Lneg: pop r1,@r6 + ld r9,r1 + mult rr8,r4 + add r8,r1 ! hi_limb += sign_comp1 + and r1,r1 + jr mi,Ln + add r8,r4 ! hi_limb += sign_comp2 +Ln: add r9,r2 ! lo_limb += cy_limb + xor r2,r2 + adc r2,r8 + ld @r7,r9 + inc r7,#2 + dec r5 + jr ne,Lneg + ret t diff --git a/rts/gmp/mpn/z8000/sub_n.s b/rts/gmp/mpn/z8000/sub_n.s new file mode 100644 index 0000000000..bd9a7ad409 --- /dev/null +++ b/rts/gmp/mpn/z8000/sub_n.s @@ -0,0 +1,54 @@ +! Z8000 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and +! store difference in a third limb vector. + +! Copyright (C) 1993, 1994, 2000 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +! License for more details. + +! You should have received a copy of the GNU Lesser General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr r7 +! s1_ptr r6 +! s2_ptr r5 +! size r4 + +! If we are really crazy, we can use push to write a few result words +! backwards, using push just because it is faster than reg+disp. We'd +! then add 2x the number of words written to r7... + + unseg + .text + even + global ___gmpn_sub_n +___gmpn_sub_n: + pop r0,@r6 + pop r1,@r5 + sub r0,r1 + ld @r7,r0 + dec r4 + jr eq,Lend +Loop: pop r0,@r6 + pop r1,@r5 + sbc r0,r1 + inc r7,#2 + ld @r7,r0 + dec r4 + jr ne,Loop +Lend: ld r2,r4 ! use 0 already in r4 + adc r2,r2 + ret t diff --git a/rts/gmp/mpn/z8000x/add_n.s b/rts/gmp/mpn/z8000x/add_n.s new file mode 100644 index 0000000000..7f130785c5 --- /dev/null +++ b/rts/gmp/mpn/z8000x/add_n.s @@ -0,0 +1,56 @@ +! Z8000 (32 bit limb version) __gmpn_add_n -- Add two limb vectors of equal, +! non-zero length. + +! Copyright (C) 1993, 1994, 2000 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +! License for more details. + +! You should have received a copy of the GNU Lesser General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr r7 +! s1_ptr r6 +! s2_ptr r5 +! size r4 + +! If we are really crazy, we can use push to write a few result words +! backwards, using push just because it is faster than reg+disp. We'd +! then add 2x the number of words written to r7... + + segm + .text + even + global ___gmpn_add_n +___gmpn_add_n: + popl rr0,@r6 + popl rr8,@r5 + addl rr0,rr8 + ldl @r7,rr0 + dec r4 + jr eq,Lend +Loop: popl rr0,@r6 + popl rr8,@r5 + adc r1,r9 + adc r0,r8 + inc r7,#4 + ldl @r7,rr0 + dec r4 + jr ne,Loop +Lend: ld r2,r4 ! use 0 already in r4 + ld r3,r4 + adc r2,r2 + ret t diff --git a/rts/gmp/mpn/z8000x/sub_n.s b/rts/gmp/mpn/z8000x/sub_n.s new file mode 100644 index 0000000000..f416d1d6eb --- /dev/null +++ b/rts/gmp/mpn/z8000x/sub_n.s @@ -0,0 +1,56 @@ +! Z8000 (32 bit limb version) __gmpn_sub_n -- Subtract two limb vectors of the +! same length > 0 and store difference in a third limb vector. + +! Copyright (C) 1993, 1994, 2000 Free Software Foundation, Inc. + +! This file is part of the GNU MP Library. + +! The GNU MP Library is free software; you can redistribute it and/or modify +! it under the terms of the GNU Lesser General Public License as published by +! the Free Software Foundation; either version 2.1 of the License, or (at your +! option) any later version. + +! The GNU MP Library is distributed in the hope that it will be useful, but +! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +! License for more details. + +! You should have received a copy of the GNU Lesser General Public License +! along with the GNU MP Library; see the file COPYING.LIB. If not, write to +! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +! MA 02111-1307, USA. + + +! INPUT PARAMETERS +! res_ptr r7 +! s1_ptr r6 +! s2_ptr r5 +! size r4 + +! If we are really crazy, we can use push to write a few result words +! backwards, using push just because it is faster than reg+disp. We'd +! then add 2x the number of words written to r7... + + segm + .text + even + global ___gmpn_sub_n +___gmpn_sub_n: + popl rr0,@r6 + popl rr8,@r5 + subl rr0,rr8 + ldl @r7,rr0 + dec r4 + jr eq,Lend +Loop: popl rr0,@r6 + popl rr8,@r5 + sbc r1,r9 + sbc r0,r8 + inc r7,#4 + ldl @r7,rr0 + dec r4 + jr ne,Loop +Lend: ld r2,r4 ! use 0 already in r4 + ld r3,r4 + adc r2,r2 + ret t diff --git a/rts/gmp/mpz/Makefile.am b/rts/gmp/mpz/Makefile.am new file mode 100644 index 0000000000..cd6fec4e21 --- /dev/null +++ b/rts/gmp/mpz/Makefile.am @@ -0,0 +1,58 @@ +## Process this file with automake to generate Makefile.in + +# Copyright (C) 1996, 1998, 1999, 2000 Free Software Foundation, Inc. +# +# This file is part of the GNU MP Library. +# +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. +# +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +AUTOMAKE_OPTIONS = gnu no-dependencies + +SUBDIRS = tests + +INCLUDES = -I$(top_srcdir) -DOPERATION_$* + +noinst_LTLIBRARIES = libmpz.la +libmpz_la_SOURCES = \ + abs.c add.c add_ui.c addmul_ui.c and.c array_init.c \ + bin_ui.c bin_uiui.c cdiv_q.c \ + cdiv_q_ui.c cdiv_qr.c cdiv_qr_ui.c cdiv_r.c cdiv_r_ui.c cdiv_ui.c \ + clear.c clrbit.c cmp.c cmp_si.c cmp_ui.c cmpabs.c cmpabs_ui.c com.c \ + divexact.c dump.c fac_ui.c fdiv_q.c fdiv_q_2exp.c fdiv_q_ui.c \ + fdiv_qr.c fdiv_qr_ui.c fdiv_r.c fdiv_r_2exp.c fdiv_r_ui.c fdiv_ui.c \ + fib_ui.c fits_sint_p.c fits_slong_p.c fits_sshort_p.c fits_uint_p.c \ + fits_ulong_p.c fits_ushort_p.c gcd.c gcd_ui.c gcdext.c get_d.c get_si.c \ + get_str.c get_ui.c getlimbn.c hamdist.c init.c inp_raw.c inp_str.c \ + invert.c ior.c iset.c iset_d.c iset_si.c iset_str.c iset_ui.c \ + jacobi.c kronsz.c kronuz.c kronzs.c kronzu.c \ + lcm.c legendre.c mod.c mul.c mul_2exp.c neg.c nextprime.c \ + out_raw.c out_str.c perfpow.c perfsqr.c popcount.c pow_ui.c powm.c \ + powm_ui.c pprime_p.c random.c random2.c realloc.c remove.c root.c rrandomb.c \ + scan0.c scan1.c set.c set_d.c set_f.c set_q.c set_si.c set_str.c \ + set_ui.c setbit.c size.c sizeinbase.c sqrt.c sqrtrem.c sub.c \ + sub_ui.c swap.c tdiv_ui.c tdiv_q.c tdiv_q_2exp.c tdiv_q_ui.c tdiv_qr.c \ + tdiv_qr_ui.c tdiv_r.c tdiv_r_2exp.c tdiv_r_ui.c tstbit.c ui_pow_ui.c \ + urandomb.c urandomm.c xor.c + +EXTRA_DIST = mul_siui.c +nodist_libmpz_la_SOURCES = mul_si.c mul_ui.c +CLEANFILES = $(nodist_libmpz_la_SOURCES) + +mul_si.c: $(srcdir)/mul_siui.c + cp $(srcdir)/mul_siui.c mul_si.c +mul_ui.c: $(srcdir)/mul_siui.c + cp $(srcdir)/mul_siui.c mul_ui.c diff --git a/rts/gmp/mpz/Makefile.in b/rts/gmp/mpz/Makefile.in new file mode 100644 index 0000000000..e0f2cdc133 --- /dev/null +++ b/rts/gmp/mpz/Makefile.in @@ -0,0 +1,457 @@ +# Makefile.in generated automatically by automake 1.4a from Makefile.am + +# Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +SHELL = @SHELL@ + +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ +VPATH = @srcdir@ +prefix = @prefix@ +exec_prefix = @exec_prefix@ + +bindir = @bindir@ +sbindir = @sbindir@ +libexecdir = @libexecdir@ +datadir = @datadir@ +sysconfdir = @sysconfdir@ +sharedstatedir = @sharedstatedir@ +localstatedir = @localstatedir@ +libdir = @libdir@ +infodir = @infodir@ +mandir = @mandir@ +includedir = @includedir@ +oldincludedir = /usr/include + +DESTDIR = + +pkgdatadir = $(datadir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ + +top_builddir = .. + +ACLOCAL = @ACLOCAL@ +AUTOCONF = @AUTOCONF@ +AUTOMAKE = @AUTOMAKE@ +AUTOHEADER = @AUTOHEADER@ + +INSTALL = @INSTALL@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_FLAG = +transform = @program_transform_name@ + +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : + +@SET_MAKE@ +build_alias = @build_alias@ +build_triplet = @build@ +host_alias = @host_alias@ +host_triplet = @host@ +target_alias = @target_alias@ +target_triplet = @target@ +AMDEP = @AMDEP@ +AMTAR = @AMTAR@ +AR = @AR@ +AS = @AS@ +AWK = @AWK@ +CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@ +CC = @CC@ +CCAS = @CCAS@ +CPP = @CPP@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +EXEEXT = @EXEEXT@ +LIBTOOL = @LIBTOOL@ +LN_S = @LN_S@ +M4 = @M4@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +PACKAGE = @PACKAGE@ +RANLIB = @RANLIB@ +SPEED_CYCLECOUNTER_OBJS = @SPEED_CYCLECOUNTER_OBJS@ +STRIP = @STRIP@ +U = @U@ +VERSION = @VERSION@ +gmp_srclinks = @gmp_srclinks@ +install_sh = @install_sh@ +mpn_objects = @mpn_objects@ +mpn_objs_in_libgmp = @mpn_objs_in_libgmp@ + +# Copyright (C) 1996, 1998, 1999, 2000 Free Software Foundation, Inc. +# +# This file is part of the GNU MP Library. +# +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or (at your +# option) any later version. +# +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +AUTOMAKE_OPTIONS = gnu no-dependencies + +SUBDIRS = + +INCLUDES = -I$(top_srcdir) -DOPERATION_$* + +noinst_LTLIBRARIES = libmpz.la +libmpz_la_SOURCES = \ + abs.c add.c add_ui.c addmul_ui.c and.c array_init.c \ + bin_ui.c bin_uiui.c cdiv_q.c \ + cdiv_q_ui.c cdiv_qr.c cdiv_qr_ui.c cdiv_r.c cdiv_r_ui.c cdiv_ui.c \ + clear.c clrbit.c cmp.c cmp_si.c cmp_ui.c cmpabs.c cmpabs_ui.c com.c \ + divexact.c dump.c fac_ui.c fdiv_q.c fdiv_q_2exp.c fdiv_q_ui.c \ + fdiv_qr.c fdiv_qr_ui.c fdiv_r.c fdiv_r_2exp.c fdiv_r_ui.c fdiv_ui.c \ + fib_ui.c fits_sint_p.c fits_slong_p.c fits_sshort_p.c fits_uint_p.c \ + fits_ulong_p.c fits_ushort_p.c gcd.c gcd_ui.c gcdext.c get_d.c get_si.c \ + get_str.c get_ui.c getlimbn.c hamdist.c init.c inp_raw.c inp_str.c \ + invert.c ior.c iset.c iset_d.c iset_si.c iset_str.c iset_ui.c \ + jacobi.c kronsz.c kronuz.c kronzs.c kronzu.c \ + lcm.c legendre.c mod.c mul.c mul_2exp.c neg.c nextprime.c \ + out_raw.c out_str.c perfpow.c perfsqr.c popcount.c pow_ui.c powm.c \ + powm_ui.c pprime_p.c random.c random2.c realloc.c remove.c root.c rrandomb.c \ + scan0.c scan1.c set.c set_d.c set_f.c set_q.c set_si.c set_str.c \ + set_ui.c setbit.c size.c sizeinbase.c sqrt.c sqrtrem.c sub.c \ + sub_ui.c swap.c tdiv_ui.c tdiv_q.c tdiv_q_2exp.c tdiv_q_ui.c tdiv_qr.c \ + tdiv_qr_ui.c tdiv_r.c tdiv_r_2exp.c tdiv_r_ui.c tstbit.c ui_pow_ui.c \ + urandomb.c urandomm.c xor.c + + +EXTRA_DIST = mul_siui.c +nodist_libmpz_la_SOURCES = mul_si.c mul_ui.c +CLEANFILES = $(nodist_libmpz_la_SOURCES) +subdir = mpz +mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs +CONFIG_HEADER = ../config.h +CONFIG_CLEAN_FILES = +LTLIBRARIES = $(noinst_LTLIBRARIES) + + +DEFS = @DEFS@ -I. -I$(srcdir) -I.. +CPPFLAGS = @CPPFLAGS@ +LDFLAGS = @LDFLAGS@ +LIBS = @LIBS@ +libmpz_la_LDFLAGS = +libmpz_la_LIBADD = +am_libmpz_la_OBJECTS = abs.lo add.lo add_ui.lo addmul_ui.lo and.lo \ +array_init.lo bin_ui.lo bin_uiui.lo cdiv_q.lo cdiv_q_ui.lo cdiv_qr.lo \ +cdiv_qr_ui.lo cdiv_r.lo cdiv_r_ui.lo cdiv_ui.lo clear.lo clrbit.lo \ +cmp.lo cmp_si.lo cmp_ui.lo cmpabs.lo cmpabs_ui.lo com.lo divexact.lo \ +dump.lo fac_ui.lo fdiv_q.lo fdiv_q_2exp.lo fdiv_q_ui.lo fdiv_qr.lo \ +fdiv_qr_ui.lo fdiv_r.lo fdiv_r_2exp.lo fdiv_r_ui.lo fdiv_ui.lo \ +fib_ui.lo fits_sint_p.lo fits_slong_p.lo fits_sshort_p.lo \ +fits_uint_p.lo fits_ulong_p.lo fits_ushort_p.lo gcd.lo gcd_ui.lo \ +gcdext.lo get_d.lo get_si.lo get_str.lo get_ui.lo getlimbn.lo \ +hamdist.lo init.lo inp_raw.lo inp_str.lo invert.lo ior.lo iset.lo \ +iset_d.lo iset_si.lo iset_str.lo iset_ui.lo jacobi.lo kronsz.lo \ +kronuz.lo kronzs.lo kronzu.lo lcm.lo legendre.lo mod.lo mul.lo \ +mul_2exp.lo neg.lo nextprime.lo out_raw.lo out_str.lo perfpow.lo \ +perfsqr.lo popcount.lo pow_ui.lo powm.lo powm_ui.lo pprime_p.lo \ +random.lo random2.lo realloc.lo remove.lo root.lo rrandomb.lo scan0.lo \ +scan1.lo set.lo set_d.lo set_f.lo set_q.lo set_si.lo set_str.lo \ +set_ui.lo setbit.lo size.lo sizeinbase.lo sqrt.lo sqrtrem.lo sub.lo \ +sub_ui.lo swap.lo tdiv_ui.lo tdiv_q.lo tdiv_q_2exp.lo tdiv_q_ui.lo \ +tdiv_qr.lo tdiv_qr_ui.lo tdiv_r.lo tdiv_r_2exp.lo tdiv_r_ui.lo \ +tstbit.lo ui_pow_ui.lo urandomb.lo urandomm.lo xor.lo +nodist_libmpz_la_OBJECTS = mul_si.lo mul_ui.lo +libmpz_la_OBJECTS = $(am_libmpz_la_OBJECTS) $(nodist_libmpz_la_OBJECTS) +COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CFLAGS = @CFLAGS@ +CCLD = $(CC) +LINK = $(LIBTOOL) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +DIST_SOURCES = $(libmpz_la_SOURCES) +DIST_COMMON = README Makefile.am Makefile.in + + +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) + +GZIP_ENV = --best +depcomp = +SOURCES = $(libmpz_la_SOURCES) $(nodist_libmpz_la_SOURCES) +OBJECTS = $(am_libmpz_la_OBJECTS) $(nodist_libmpz_la_OBJECTS) + +all: all-redirect +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) + cd $(top_srcdir) && $(AUTOMAKE) --gnu mpz/Makefile + +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + cd $(top_builddir) \ + && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status + + +mostlyclean-noinstLTLIBRARIES: + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + +distclean-noinstLTLIBRARIES: + +maintainer-clean-noinstLTLIBRARIES: + +mostlyclean-compile: + -rm -f *.o core *.core + -rm -f *.$(OBJEXT) + +clean-compile: + +distclean-compile: + -rm -f *.tab.c + +maintainer-clean-compile: + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +distclean-libtool: + +maintainer-clean-libtool: + +libmpz.la: $(libmpz_la_OBJECTS) $(libmpz_la_DEPENDENCIES) + $(LINK) $(libmpz_la_LDFLAGS) $(libmpz_la_OBJECTS) $(libmpz_la_LIBADD) $(LIBS) +.c.o: + $(COMPILE) -c $< +.c.obj: + $(COMPILE) -c `cygpath -w $<` +.c.lo: + $(LTCOMPILE) -c -o $@ $< + +# This directory's subdirectories are mostly independent; you can cd +# into them and run `make' without going through this Makefile. +# To change the values of `make' variables: instead of editing Makefiles, +# (1) if the variable is set in `config.status', edit `config.status' +# (which will cause the Makefiles to be regenerated when you run `make'); +# (2) otherwise, pass the desired values on the `make' command line. + +all-recursive install-data-recursive install-exec-recursive \ +installdirs-recursive install-recursive uninstall-recursive \ +check-recursive installcheck-recursive info-recursive dvi-recursive: + @set fnord $(MAKEFLAGS); amf=$$2; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || case "$$amf" in *=*) exit 1;; *k*) fail=yes;; *) exit 1;; esac; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +mostlyclean-recursive clean-recursive distclean-recursive \ +maintainer-clean-recursive: + @set fnord $(MAKEFLAGS); amf=$$2; \ + dot_seen=no; \ + rev=''; list='$(SUBDIRS)'; for subdir in $$list; do \ + rev="$$subdir $$rev"; \ + if test "$$subdir" = "."; then dot_seen=yes; else :; fi; \ + done; \ + test "$$dot_seen" = "no" && rev=". $$rev"; \ + target=`echo $@ | sed s/-recursive//`; \ + for subdir in $$rev; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || case "$$amf" in *=*) exit 1;; *k*) fail=yes;; *) exit 1;; esac; \ + done && test -z "$$fail" +tags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \ + done + +tags: TAGS + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + mkid -f$$here/ID $$unique $(LISP) + +TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test -f $$subdir/TAGS && tags="$$tags -i $$here/$$subdir/TAGS"; \ + fi; \ + done; \ + list='$(SOURCES) $(HEADERS) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + test -z "$(ETAGS_ARGS)$$unique$(LISP)$$tags" \ + || etags $(ETAGS_ARGS) $$tags $$unique $(LISP) + +mostlyclean-tags: + +clean-tags: + +distclean-tags: + -rm -f TAGS ID + +maintainer-clean-tags: + +distdir = $(top_builddir)/$(PACKAGE)-$(VERSION)/$(subdir) + +distdir: $(DISTFILES) + @for file in $(DISTFILES); do \ + d=$(srcdir); \ + if test -d $$d/$$file; then \ + cp -pR $$d/$$file $(distdir); \ + else \ + test -f $(distdir)/$$file \ + || cp -p $$d/$$file $(distdir)/$$file || :; \ + fi; \ + done + for subdir in $(SUBDIRS); do \ + if test "$$subdir" = .; then :; else \ + test -d $(distdir)/$$subdir \ + || mkdir $(distdir)/$$subdir \ + || exit 1; \ + (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir=../$(top_distdir) distdir=../$(distdir)/$$subdir distdir) \ + || exit 1; \ + fi; \ + done +info-am: +info: info-recursive +dvi-am: +dvi: dvi-recursive +check-am: all-am +check: check-recursive +installcheck-am: +installcheck: installcheck-recursive +install-exec-am: +install-exec: install-exec-recursive + +install-data-am: +install-data: install-data-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am +install: install-recursive +uninstall-am: +uninstall: uninstall-recursive +all-am: Makefile $(LTLIBRARIES) +all-redirect: all-recursive +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_STRIP_FLAG=-s install +installdirs: installdirs-recursive +installdirs-am: + + +mostlyclean-generic: + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -rm -f Makefile $(CONFIG_CLEAN_FILES) + -rm -f config.cache config.log stamp-h stamp-h[0-9]* + +maintainer-clean-generic: + -rm -f Makefile.in +mostlyclean-am: mostlyclean-noinstLTLIBRARIES mostlyclean-compile \ + mostlyclean-libtool mostlyclean-tags \ + mostlyclean-generic + +mostlyclean: mostlyclean-recursive + +clean-am: clean-noinstLTLIBRARIES clean-compile clean-libtool \ + clean-tags clean-generic mostlyclean-am + +clean: clean-recursive + +distclean-am: distclean-noinstLTLIBRARIES distclean-compile \ + distclean-libtool distclean-tags distclean-generic \ + clean-am + -rm -f libtool + +distclean: distclean-recursive + +maintainer-clean-am: maintainer-clean-noinstLTLIBRARIES \ + maintainer-clean-compile maintainer-clean-libtool \ + maintainer-clean-tags maintainer-clean-generic \ + distclean-am + @echo "This command is intended for maintainers to use;" + @echo "it deletes files that may require special tools to rebuild." + +maintainer-clean: maintainer-clean-recursive + +.PHONY: mostlyclean-noinstLTLIBRARIES distclean-noinstLTLIBRARIES \ +clean-noinstLTLIBRARIES maintainer-clean-noinstLTLIBRARIES \ +mostlyclean-compile distclean-compile clean-compile \ +maintainer-clean-compile mostlyclean-libtool distclean-libtool \ +clean-libtool maintainer-clean-libtool install-recursive \ +uninstall-recursive install-data-recursive uninstall-data-recursive \ +install-exec-recursive uninstall-exec-recursive installdirs-recursive \ +uninstalldirs-recursive all-recursive check-recursive \ +installcheck-recursive info-recursive dvi-recursive \ +mostlyclean-recursive distclean-recursive clean-recursive \ +maintainer-clean-recursive tags tags-recursive mostlyclean-tags \ +distclean-tags clean-tags maintainer-clean-tags distdir info-am info \ +dvi-am dvi check check-am installcheck-am installcheck install-exec-am \ +install-exec install-data-am install-data install-am install \ +uninstall-am uninstall all-redirect all-am all install-strip \ +installdirs-am installdirs mostlyclean-generic distclean-generic \ +clean-generic maintainer-clean-generic clean mostlyclean distclean \ +maintainer-clean + + +mul_si.c: $(srcdir)/mul_siui.c + cp $(srcdir)/mul_siui.c mul_si.c +mul_ui.c: $(srcdir)/mul_siui.c + cp $(srcdir)/mul_siui.c mul_ui.c + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/rts/gmp/mpz/README b/rts/gmp/mpz/README new file mode 100644 index 0000000000..06b481d770 --- /dev/null +++ b/rts/gmp/mpz/README @@ -0,0 +1,23 @@ +This directory contains functions for GMP's integer function layer. + +In this version of GMP, integers are represented like in the figure below. +(Please note that the format might change between every version, and that +depending on the internal format in any way is a bad idea.) + + most least +significant significant + limb limb + + _mp_d + / + / + \/ + ____ ____ ____ ____ ____ + |____|____|____|____|____| + + <------- _mp_size -------> + + +The most significant limb will be non-zero. The _mp_size field's sign +reflects the sign of the number. Its absolute value is the count of limbs +in the number. diff --git a/rts/gmp/mpz/abs.c b/rts/gmp/mpz/abs.c new file mode 100644 index 0000000000..0b5eab1ce6 --- /dev/null +++ b/rts/gmp/mpz/abs.c @@ -0,0 +1,51 @@ +/* mpz_abs(dst, src) -- Assign the absolute value of SRC to DST. + +Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_abs (mpz_ptr w, mpz_srcptr u) +#else +mpz_abs (w, u) + mpz_ptr w; + mpz_srcptr u; +#endif +{ + mp_ptr wp, up; + mp_size_t size; + + size = ABS (u->_mp_size); + + if (u != w) + { + if (w->_mp_alloc < size) + _mpz_realloc (w, size); + + wp = w->_mp_d; + up = u->_mp_d; + + MPN_COPY (wp, up, size); + } + + w->_mp_size = size; +} diff --git a/rts/gmp/mpz/add.c b/rts/gmp/mpz/add.c new file mode 100644 index 0000000000..a22c3778fb --- /dev/null +++ b/rts/gmp/mpz/add.c @@ -0,0 +1,123 @@ +/* mpz_add -- Add two integers. + +Copyright (C) 1991, 1993, 1994, 1996, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#ifdef BERKELEY_MP +#include "mp.h" +#endif + +#ifndef BERKELEY_MP +void +#if __STDC__ +mpz_add (mpz_ptr w, mpz_srcptr u, mpz_srcptr v) +#else +mpz_add (w, u, v) + mpz_ptr w; + mpz_srcptr u; + mpz_srcptr v; +#endif +#else /* BERKELEY_MP */ +void +#if __STDC__ +madd (mpz_srcptr u, mpz_srcptr v, mpz_ptr w) +#else +madd (u, v, w) + mpz_srcptr u; + mpz_srcptr v; + mpz_ptr w; +#endif +#endif /* BERKELEY_MP */ +{ + mp_srcptr up, vp; + mp_ptr wp; + mp_size_t usize, vsize, wsize; + mp_size_t abs_usize; + mp_size_t abs_vsize; + + usize = u->_mp_size; + vsize = v->_mp_size; + abs_usize = ABS (usize); + abs_vsize = ABS (vsize); + + if (abs_usize < abs_vsize) + { + /* Swap U and V. */ + MPZ_SRCPTR_SWAP (u, v); + MP_SIZE_T_SWAP (usize, vsize); + MP_SIZE_T_SWAP (abs_usize, abs_vsize); + } + + /* True: ABS_USIZE >= ABS_VSIZE. */ + + /* If not space for w (and possible carry), increase space. */ + wsize = abs_usize + 1; + if (w->_mp_alloc < wsize) + _mpz_realloc (w, wsize); + + /* These must be after realloc (u or v may be the same as w). */ + up = u->_mp_d; + vp = v->_mp_d; + wp = w->_mp_d; + + if ((usize ^ vsize) < 0) + { + /* U and V have different sign. Need to compare them to determine + which operand to subtract from which. */ + + /* This test is right since ABS_USIZE >= ABS_VSIZE. */ + if (abs_usize != abs_vsize) + { + mpn_sub (wp, up, abs_usize, vp, abs_vsize); + wsize = abs_usize; + MPN_NORMALIZE (wp, wsize); + if (usize < 0) + wsize = -wsize; + } + else if (mpn_cmp (up, vp, abs_usize) < 0) + { + mpn_sub_n (wp, vp, up, abs_usize); + wsize = abs_usize; + MPN_NORMALIZE (wp, wsize); + if (usize >= 0) + wsize = -wsize; + } + else + { + mpn_sub_n (wp, up, vp, abs_usize); + wsize = abs_usize; + MPN_NORMALIZE (wp, wsize); + if (usize < 0) + wsize = -wsize; + } + } + else + { + /* U and V have same sign. Add them. */ + mp_limb_t cy_limb = mpn_add (wp, up, abs_usize, vp, abs_vsize); + wp[abs_usize] = cy_limb; + wsize = abs_usize + cy_limb; + if (usize < 0) + wsize = -wsize; + } + + w->_mp_size = wsize; +} diff --git a/rts/gmp/mpz/add_ui.c b/rts/gmp/mpz/add_ui.c new file mode 100644 index 0000000000..28dbd71f45 --- /dev/null +++ b/rts/gmp/mpz/add_ui.c @@ -0,0 +1,84 @@ +/* mpz_add_ui -- Add an mpz_t and an unsigned one-word integer. + +Copyright (C) 1991, 1993, 1994, 1996, 1999 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_add_ui (mpz_ptr w, mpz_srcptr u, unsigned long int v) +#else +mpz_add_ui (w, u, v) + mpz_ptr w; + mpz_srcptr u; + unsigned long int v; +#endif +{ + mp_srcptr up; + mp_ptr wp; + mp_size_t usize, wsize; + mp_size_t abs_usize; + + usize = u->_mp_size; + abs_usize = ABS (usize); + + /* If not space for W (and possible carry), increase space. */ + wsize = abs_usize + 1; + if (w->_mp_alloc < wsize) + _mpz_realloc (w, wsize); + + /* These must be after realloc (U may be the same as W). */ + up = u->_mp_d; + wp = w->_mp_d; + + if (abs_usize == 0) + { + wp[0] = v; + w->_mp_size = v != 0; + return; + } + + if (usize >= 0) + { + mp_limb_t cy; + cy = mpn_add_1 (wp, up, abs_usize, (mp_limb_t) v); + wp[abs_usize] = cy; + wsize = abs_usize + cy; + } + else + { + /* The signs are different. Need exact comparison to determine + which operand to subtract from which. */ + if (abs_usize == 1 && up[0] < v) + { + wp[0] = v - up[0]; + wsize = 1; + } + else + { + mpn_sub_1 (wp, up, abs_usize, (mp_limb_t) v); + /* Size can decrease with at most one limb. */ + wsize = -(abs_usize - (wp[abs_usize - 1] == 0)); + } + } + + w->_mp_size = wsize; +} diff --git a/rts/gmp/mpz/addmul_ui.c b/rts/gmp/mpz/addmul_ui.c new file mode 100644 index 0000000000..7b38d3624d --- /dev/null +++ b/rts/gmp/mpz/addmul_ui.c @@ -0,0 +1,214 @@ +/* mpz_addmul_ui(prodsum, multiplier, small_multiplicand) -- + Add MULTIPLICATOR times SMALL_MULTIPLICAND to PRODSUM. + +Copyright (C) 1997, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +static mp_limb_t mpn_neg1 _PROTO ((mp_ptr, mp_size_t)); + +#if 0 +#undef MPN_NORMALIZE +#define MPN_NORMALIZE(DST, NLIMBS) \ + do { \ + while (--(NLIMBS) >= 0 && (DST)[NLIMBS] == 0) \ + ; \ + (NLIMBS)++; \ + } while (0) +#undef MPN_NORMALIZE_NOT_ZERO +#define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS) \ + do { \ + while ((DST)[--(NLIMBS)] == 0) \ + ; \ + (NLIMBS)++; \ + } while (0) +#endif + +void +#if __STDC__ +mpz_addmul_ui (mpz_ptr rz, mpz_srcptr az, unsigned long int bu) +#else +mpz_addmul_ui (rz, az, bu) + mpz_ptr rz; + mpz_srcptr az; + unsigned long int bu; +#endif +{ + mp_size_t rn, an; + mp_ptr rp, ap; + + an = SIZ (az); + + /* If either multiplier is zero, result is unaffected. */ + if (bu == 0 || an == 0) + return; + + rn = SIZ (rz); + + if (rn == 0) + { + mp_limb_t cy; + + an = ABS (an); + if (ALLOC (rz) <= an) + _mpz_realloc (rz, an + 1); + rp = PTR (rz); + ap = PTR (az); + cy = mpn_mul_1 (rp, ap, an, (mp_limb_t) bu); + rp[an] = cy; + an += cy != 0; + SIZ (rz) = SIZ (az) >= 0 ? an : -an; + return; + } + + if ((an ^ rn) >= 0) + { + /* Sign of operands are the same--really add. */ + an = ABS (an); + rn = ABS (rn); + if (rn > an) + { + mp_limb_t cy; + if (ALLOC (rz) <= rn) + _mpz_realloc (rz, rn + 1); + rp = PTR (rz); + ap = PTR (az); + cy = mpn_addmul_1 (rp, ap, an, (mp_limb_t) bu); + cy = mpn_add_1 (rp + an, rp + an, rn - an, cy); + rp[rn] = cy; + rn += cy != 0; + SIZ (rz) = SIZ (rz) >= 0 ? rn : -rn; + return; + } + else + { + mp_limb_t cy; + if (ALLOC (rz) <= an) + _mpz_realloc (rz, an + 1); + rp = PTR (rz); + ap = PTR (az); + cy = mpn_addmul_1 (rp, ap, rn, (mp_limb_t) bu); + if (an != rn) + { + mp_limb_t cy2; + cy2 = mpn_mul_1 (rp + rn, ap + rn, an - rn, (mp_limb_t) bu); + cy = cy2 + mpn_add_1 (rp + rn, rp + rn, an - rn, cy); + } + rn = an; + rp[rn] = cy; + rn += cy != 0; + SIZ (rz) = SIZ (rz) >= 0 ? rn : -rn; + return; + } + } + else + { + /* Sign of operands are different--actually subtract. */ + an = ABS (an); + rn = ABS (rn); + if (rn > an) + { + mp_limb_t cy; + rp = PTR (rz); + ap = PTR (az); + cy = mpn_submul_1 (rp, ap, an, (mp_limb_t) bu); + cy = mpn_sub_1 (rp + an, rp + an, rn - an, cy); + if (cy != 0) + { + mpn_neg1 (rp, rn); + MPN_NORMALIZE_NOT_ZERO (rp, rn); + } + else + { + MPN_NORMALIZE (rp, rn); + rn = -rn; + } + + SIZ (rz) = SIZ (rz) >= 0 ? -rn : rn; + return; + } + else + { + /* Tricky case. We need to subtract an operand that might be larger + than the minuend. To avoid allocating temporary space, we compute + a*b-r instead of r-a*b and then negate. */ + mp_limb_t cy; + if (ALLOC (rz) <= an) + _mpz_realloc (rz, an + 1); + rp = PTR (rz); + ap = PTR (az); + cy = mpn_submul_1 (rp, ap, rn, (mp_limb_t) bu); + if (an != rn) + { + mp_limb_t cy2; + cy -= mpn_neg1 (rp, rn); + cy2 = mpn_mul_1 (rp + rn, ap + rn, an - rn, (mp_limb_t) bu); + if (cy == ~(mp_limb_t) 0) + cy = cy2 - mpn_sub_1 (rp + rn, rp + rn, an - rn, (mp_limb_t) 1); + else + cy = cy2 + mpn_add_1 (rp + rn, rp + rn, an - rn, cy); + rp[an] = cy; + rn = an + (cy != 0); + rn -= rp[rn - 1] == 0; + } + else if (cy != 0) + { + cy -= mpn_neg1 (rp, rn); + rp[an] = cy; + rn = an + 1; + MPN_NORMALIZE_NOT_ZERO (rp, rn); + } + else + { + rn = an; + MPN_NORMALIZE (rp, rn); + rn = -rn; + } + + SIZ (rz) = SIZ (rz) >= 0 ? -rn : rn; + return; + } + } +} + +static mp_limb_t +#if __STDC__ +mpn_neg1 (mp_ptr rp, mp_size_t rn) +#else +mpn_neg1 (rp, rn) + mp_ptr rp; + mp_size_t rn; +#endif +{ + mp_size_t i; + + while (rn != 0 && rp[0] == 0) + rp++, rn--; + + if (rn != 0) + { + rp[0] = -rp[0]; + for (i = 1; i < rn; i++) + rp[i] = ~rp[i]; + return 1; + } + return 0; +} diff --git a/rts/gmp/mpz/and.c b/rts/gmp/mpz/and.c new file mode 100644 index 0000000000..354e9455bf --- /dev/null +++ b/rts/gmp/mpz/and.c @@ -0,0 +1,278 @@ +/* mpz_and -- Logical and. + +Copyright (C) 1991, 1993, 1994, 1996, 1997, 2000 Free Software Foundation, +Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_and (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2) +#else +mpz_and (res, op1, op2) + mpz_ptr res; + mpz_srcptr op1; + mpz_srcptr op2; +#endif +{ + mp_srcptr op1_ptr, op2_ptr; + mp_size_t op1_size, op2_size; + mp_ptr res_ptr; + mp_size_t res_size; + mp_size_t i; + TMP_DECL (marker); + + TMP_MARK (marker); + op1_size = op1->_mp_size; + op2_size = op2->_mp_size; + + op1_ptr = op1->_mp_d; + op2_ptr = op2->_mp_d; + res_ptr = res->_mp_d; + + if (op1_size >= 0) + { + if (op2_size >= 0) + { + res_size = MIN (op1_size, op2_size); + /* First loop finds the size of the result. */ + for (i = res_size - 1; i >= 0; i--) + if ((op1_ptr[i] & op2_ptr[i]) != 0) + break; + res_size = i + 1; + + /* Handle allocation, now then we know exactly how much space is + needed for the result. */ + if (res->_mp_alloc < res_size) + { + _mpz_realloc (res, res_size); + op1_ptr = op1->_mp_d; + op2_ptr = op2->_mp_d; + res_ptr = res->_mp_d; + } + + /* Second loop computes the real result. */ + for (i = res_size - 1; i >= 0; i--) + res_ptr[i] = op1_ptr[i] & op2_ptr[i]; + + res->_mp_size = res_size; + return; + } + else /* op2_size < 0 */ + { + /* Fall through to the code at the end of the function. */ + } + } + else + { + if (op2_size < 0) + { + mp_ptr opx; + mp_limb_t cy; + mp_size_t res_alloc; + + /* Both operands are negative, so will be the result. + -((-OP1) & (-OP2)) = -(~(OP1 - 1) & ~(OP2 - 1)) = + = ~(~(OP1 - 1) & ~(OP2 - 1)) + 1 = + = ((OP1 - 1) | (OP2 - 1)) + 1 */ + + /* It might seem as we could end up with an (invalid) result with + a leading zero-limb here when one of the operands is of the + type 1,,0,,..,,.0. But some analysis shows that we surely + would get carry into the zero-limb in this situation... */ + + op1_size = -op1_size; + op2_size = -op2_size; + + res_alloc = 1 + MAX (op1_size, op2_size); + + opx = (mp_ptr) TMP_ALLOC (op1_size * BYTES_PER_MP_LIMB); + mpn_sub_1 (opx, op1_ptr, op1_size, (mp_limb_t) 1); + op1_ptr = opx; + + opx = (mp_ptr) TMP_ALLOC (op2_size * BYTES_PER_MP_LIMB); + mpn_sub_1 (opx, op2_ptr, op2_size, (mp_limb_t) 1); + op2_ptr = opx; + + if (res->_mp_alloc < res_alloc) + { + _mpz_realloc (res, res_alloc); + res_ptr = res->_mp_d; + /* Don't re-read OP1_PTR and OP2_PTR. They point to + temporary space--never to the space RES->_mp_d used + to point to before reallocation. */ + } + + if (op1_size >= op2_size) + { + MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size, + op1_size - op2_size); + for (i = op2_size - 1; i >= 0; i--) + res_ptr[i] = op1_ptr[i] | op2_ptr[i]; + res_size = op1_size; + } + else + { + MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size, + op2_size - op1_size); + for (i = op1_size - 1; i >= 0; i--) + res_ptr[i] = op1_ptr[i] | op2_ptr[i]; + res_size = op2_size; + } + + cy = mpn_add_1 (res_ptr, res_ptr, res_size, (mp_limb_t) 1); + if (cy) + { + res_ptr[res_size] = cy; + res_size++; + } + + res->_mp_size = -res_size; + TMP_FREE (marker); + return; + } + else + { + /* We should compute -OP1 & OP2. Swap OP1 and OP2 and fall + through to the code that handles OP1 & -OP2. */ + MPZ_SRCPTR_SWAP (op1, op2); + MPN_SRCPTR_SWAP (op1_ptr,op1_size, op2_ptr,op2_size); + } + + } + + { +#if ANDNEW + mp_size_t op2_lim; + mp_size_t count; + + /* OP2 must be negated as with infinite precision. + + Scan from the low end for a non-zero limb. The first non-zero + limb is simply negated (two's complement). Any subsequent + limbs are one's complemented. Of course, we don't need to + handle more limbs than there are limbs in the other, positive + operand as the result for those limbs is going to become zero + anyway. */ + + /* Scan for the least significant non-zero OP2 limb, and zero the + result meanwhile for those limb positions. (We will surely + find a non-zero limb, so we can write the loop with one + termination condition only.) */ + for (i = 0; op2_ptr[i] == 0; i++) + res_ptr[i] = 0; + op2_lim = i; + + op2_size = -op2_size; + + if (op1_size <= op2_size) + { + /* The ones-extended OP2 is >= than the zero-extended OP1. + RES_SIZE <= OP1_SIZE. Find the exact size. */ + for (i = op1_size - 1; i > op2_lim; i--) + if ((op1_ptr[i] & ~op2_ptr[i]) != 0) + break; + res_size = i + 1; + for (i = res_size - 1; i > op2_lim; i--) + res_ptr[i] = op1_ptr[i] & ~op2_ptr[i]; + res_ptr[op2_lim] = op1_ptr[op2_lim] & -op2_ptr[op2_lim]; + /* Yes, this *can* happen! */ + MPN_NORMALIZE (res_ptr, res_size); + } + else + { + /* The ones-extended OP2 is < than the zero-extended OP1. + RES_SIZE == OP1_SIZE, since OP1 is normalized. */ + res_size = op1_size; + MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size, op1_size - op2_size); + for (i = op2_size - 1; i > op2_lim; i--) + res_ptr[i] = op1_ptr[i] & ~op2_ptr[i]; + res_ptr[op2_lim] = op1_ptr[op2_lim] & -op2_ptr[op2_lim]; + } + + res->_mp_size = res_size; +#else + + /* OP1 is positive and zero-extended, + OP2 is negative and ones-extended. + The result will be positive. + OP1 & -OP2 = OP1 & ~(OP2 - 1). */ + + mp_ptr opx; + + op2_size = -op2_size; + opx = (mp_ptr) TMP_ALLOC (op2_size * BYTES_PER_MP_LIMB); + mpn_sub_1 (opx, op2_ptr, op2_size, (mp_limb_t) 1); + op2_ptr = opx; + + if (op1_size > op2_size) + { + /* The result has the same size as OP1, since OP1 is normalized + and longer than the ones-extended OP2. */ + res_size = op1_size; + + /* Handle allocation, now then we know exactly how much space is + needed for the result. */ + if (res->_mp_alloc < res_size) + { + _mpz_realloc (res, res_size); + res_ptr = res->_mp_d; + op1_ptr = op1->_mp_d; + /* Don't re-read OP2_PTR. It points to temporary space--never + to the space RES->_mp_d used to point to before reallocation. */ + } + + MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size, + res_size - op2_size); + for (i = op2_size - 1; i >= 0; i--) + res_ptr[i] = op1_ptr[i] & ~op2_ptr[i]; + + res->_mp_size = res_size; + } + else + { + /* Find out the exact result size. Ignore the high limbs of OP2, + OP1 is zero-extended and would make the result zero. */ + for (i = op1_size - 1; i >= 0; i--) + if ((op1_ptr[i] & ~op2_ptr[i]) != 0) + break; + res_size = i + 1; + + /* Handle allocation, now then we know exactly how much space is + needed for the result. */ + if (res->_mp_alloc < res_size) + { + _mpz_realloc (res, res_size); + res_ptr = res->_mp_d; + op1_ptr = op1->_mp_d; + /* Don't re-read OP2_PTR. It points to temporary space--never + to the space RES->_mp_d used to point to before reallocation. */ + } + + for (i = res_size - 1; i >= 0; i--) + res_ptr[i] = op1_ptr[i] & ~op2_ptr[i]; + + res->_mp_size = res_size; + } +#endif + } + TMP_FREE (marker); +} diff --git a/rts/gmp/mpz/array_init.c b/rts/gmp/mpz/array_init.c new file mode 100644 index 0000000000..1c22046986 --- /dev/null +++ b/rts/gmp/mpz/array_init.c @@ -0,0 +1,48 @@ +/* mpz_array_init (array, array_size, size_per_elem) -- + +Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_array_init (mpz_ptr arr, mp_size_t arr_size, mp_size_t nbits) +#else +mpz_array_init (arr, arr_size, nbits) + mpz_ptr arr; + mp_size_t arr_size; + mp_size_t nbits; +#endif +{ + register mp_ptr p; + register size_t i; + mp_size_t nlimbs; + + nlimbs = (nbits + BITS_PER_MP_LIMB - 1) / BITS_PER_MP_LIMB; + p = (mp_ptr) (*_mp_allocate_func) (arr_size * nlimbs * BYTES_PER_MP_LIMB); + + for (i = 0; i < arr_size; i++) + { + arr[i]._mp_alloc = nlimbs + 1; /* Yes, lie a little... */ + arr[i]._mp_size = 0; + arr[i]._mp_d = p + i * nlimbs; + } +} diff --git a/rts/gmp/mpz/bin_ui.c b/rts/gmp/mpz/bin_ui.c new file mode 100644 index 0000000000..a7a6c98218 --- /dev/null +++ b/rts/gmp/mpz/bin_ui.c @@ -0,0 +1,141 @@ +/* mpz_bin_uiui - compute n over k. + +Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + + +/* This is a poor implementation. Look at bin_uiui.c for improvement ideas. + In fact consider calling mpz_bin_uiui() when the arguments fit, leaving + the code here only for big n. + + The identity bin(n,k) = (-1)^k * bin(-n+k-1,k) can be found in Knuth vol + 1 section 1.2.6 part G. */ + + +/* Enhancement: use mpn_divexact_1 when it exists */ +#define DIVIDE() \ + ASSERT (SIZ(r) > 0); \ + ASSERT_NOCARRY (mpn_divrem_1 (PTR(r), (mp_size_t) 0, \ + PTR(r), SIZ(r), kacc)); \ + SIZ(r) -= (PTR(r)[SIZ(r)-1] == 0); + +void +#if __STDC__ +mpz_bin_ui (mpz_ptr r, mpz_srcptr n, unsigned long int k) +#else +mpz_bin_ui (r, n, k) + mpz_ptr r; + mpz_srcptr n; + unsigned long int k; +#endif +{ + mpz_t ni; + mp_limb_t i; + mpz_t nacc; + mp_limb_t kacc; + mp_size_t negate; + + if (mpz_sgn (n) < 0) + { + /* bin(n,k) = (-1)^k * bin(-n+k-1,k), and set ni = -n+k-1 - k = -n-1 */ + mpz_init (ni); + mpz_neg (ni, n); + mpz_sub_ui (ni, ni, 1L); + negate = (k & 1); /* (-1)^k */ + } + else + { + /* bin(n,k) == 0 if k>n + (no test for this under the n<0 case, since -n+k-1 >= k there) */ + if (mpz_cmp_ui (n, k) < 0) + { + mpz_set_ui (r, 0L); + return; + } + + /* set ni = n-k */ + mpz_init (ni); + mpz_sub_ui (ni, n, k); + negate = 0; + } + + /* Now wanting bin(ni+k,k), with ni positive, and "negate" is the sign (0 + for positive, 1 for negative). */ + mpz_set_ui (r, 1L); + + /* Rewrite bin(n,k) as bin(n,n-k) if that is smaller. In this case it's + whether ni+k-k < k meaning ni<k, and if so change to denominator ni+k-k + = ni, and new ni of ni+k-ni = k. */ + if (mpz_cmp_ui (ni, k) < 0) + { + unsigned long tmp; + tmp = k; + k = mpz_get_ui (ni); + mpz_set_ui (ni, tmp); + } + + kacc = 1; + mpz_init_set_ui (nacc, 1); + + for (i = 1; i <= k; i++) + { + mp_limb_t k1, k0; + +#if 0 + mp_limb_t nacclow; + int c; + + nacclow = PTR(nacc)[0]; + for (c = 0; (((kacc | nacclow) & 1) == 0); c++) + { + kacc >>= 1; + nacclow >>= 1; + } + mpz_div_2exp (nacc, nacc, c); +#endif + + mpz_add_ui (ni, ni, 1); + mpz_mul (nacc, nacc, ni); + umul_ppmm (k1, k0, kacc, i); + if (k1 != 0) + { + /* Accumulator overflow. Perform bignum step. */ + mpz_mul (r, r, nacc); + mpz_set_ui (nacc, 1); + DIVIDE (); + kacc = i; + } + else + { + /* Save new products in accumulators to keep accumulating. */ + kacc = k0; + } + } + + mpz_mul (r, r, nacc); + DIVIDE (); + SIZ(r) = (SIZ(r) ^ -negate) + negate; + + mpz_clear (nacc); + mpz_clear (ni); +} diff --git a/rts/gmp/mpz/bin_uiui.c b/rts/gmp/mpz/bin_uiui.c new file mode 100644 index 0000000000..b37541ba54 --- /dev/null +++ b/rts/gmp/mpz/bin_uiui.c @@ -0,0 +1,120 @@ +/* mpz_bin_uiui - compute n over k. + +Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + + +/* Avoid reallocs by rounding up any new size */ +#define ROUNDUP_MASK 15 + +/* Enhancement: use mpn_divexact_1 when it exists */ +#define MULDIV() \ + MPZ_REALLOC (r, (SIZ(r)+1)|ROUNDUP_MASK); \ + PTR(r)[SIZ(r)] = mpn_mul_1 (PTR(r), PTR(r), SIZ(r), nacc); \ + ASSERT_NOCARRY (mpn_divrem_1 (PTR(r), (mp_size_t) 0, \ + PTR(r), SIZ(r)+1, kacc)); \ + SIZ(r) += (PTR(r)[SIZ(r)] != 0); + +void +#if __STDC__ +mpz_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k) +#else +mpz_bin_uiui (r, n, k) + mpz_ptr r; + unsigned long int n; + unsigned long int k; +#endif +{ + unsigned long int i, j; + mp_limb_t nacc, kacc; + unsigned long int cnt; + + /* bin(n,k) = 0 if k>n. */ + if (n < k) + { + mpz_set_ui (r, 0); + return; + } + + /* Rewrite bin(n,k) as bin(n,n-k) if that is smaller. */ + k = MIN (k, n-k); + + /* bin(n,0) = 1 */ + if (k == 0) + { + mpz_set_ui (r, 1); + return; + } + + j = n - k + 1; + mpz_set_ui (r, j); + + /* Initialize accumulators. */ + nacc = 1; + kacc = 1; + + cnt = 0; + for (i = 2; i <= k; i++) + { + mp_limb_t n1, n0, k1, k0; + + j++; +#if 0 + /* Remove common multiples of 2. This will allow us to accumulate + more in nacc and kacc before we need a bignum step. It would make + sense to cancel factors of 3, 5, etc too, but this would be best + handled by sieving out factors. Alternatively, we could perform a + gcd of the accumulators just as they have overflown, and keep + accumulating until the gcd doesn't remove a significant factor. */ + while (((nacc | kacc) & 1) == 0) + { + nacc >>= 1; + kacc >>= 1; + } +#else + cnt = ((nacc | kacc) & 1) ^ 1; + nacc >>= cnt; + kacc >>= cnt; +#endif + /* Accumulate next multiples. */ + umul_ppmm (n1, n0, nacc, j); + umul_ppmm (k1, k0, kacc, i); + if (n1 != 0) + { + /* Accumulator overflow. Perform bignum step. */ + MULDIV (); + nacc = j; + kacc = i; + } + else + { + if (k1 != 0) abort (); + /* Save new products in accumulators to keep accumulating. */ + nacc = n0; + kacc = k0; + } + } + + /* Take care of whatever is left in accumulators. */ + MULDIV (); +} diff --git a/rts/gmp/mpz/cdiv_q.c b/rts/gmp/mpz/cdiv_q.c new file mode 100644 index 0000000000..b15ba8aaa9 --- /dev/null +++ b/rts/gmp/mpz/cdiv_q.c @@ -0,0 +1,51 @@ +/* mpz_cdiv_q -- Division rounding the quotient towards +infinity. The + remainder gets the opposite sign as the denominator. + +Copyright (C) 1994, 1995, 1996, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_cdiv_q (mpz_ptr quot, mpz_srcptr dividend, mpz_srcptr divisor) +#else +mpz_cdiv_q (quot, dividend, divisor) + mpz_ptr quot; + mpz_srcptr dividend; + mpz_srcptr divisor; +#endif +{ + mp_size_t dividend_size = dividend->_mp_size; + mp_size_t divisor_size = divisor->_mp_size; + mpz_t rem; + TMP_DECL (marker); + + TMP_MARK (marker); + + MPZ_TMP_INIT (rem, ABS (divisor_size)); + + mpz_tdiv_qr (quot, rem, dividend, divisor); + + if ((divisor_size ^ dividend_size) >= 0 && rem->_mp_size != 0) + mpz_add_ui (quot, quot, 1L); + + TMP_FREE (marker); +} diff --git a/rts/gmp/mpz/cdiv_q_ui.c b/rts/gmp/mpz/cdiv_q_ui.c new file mode 100644 index 0000000000..74f3a90b83 --- /dev/null +++ b/rts/gmp/mpz/cdiv_q_ui.c @@ -0,0 +1,67 @@ +/* mpz_cdiv_q_ui -- Division rounding the quotient towards +infinity. The + remainder gets the opposite sign as the denominator. In order to make it + always fit into the return type, the negative of the true remainder is + returned. + +Copyright (C) 1994, 1996, 1999 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +unsigned long int +#if __STDC__ +mpz_cdiv_q_ui (mpz_ptr quot, mpz_srcptr dividend, unsigned long int divisor) +#else +mpz_cdiv_q_ui (quot, dividend, divisor) + mpz_ptr quot; + mpz_srcptr dividend; + unsigned long int divisor; +#endif +{ + mp_size_t dividend_size; + mp_size_t size; + mp_ptr quot_ptr; + mp_limb_t remainder_limb; + + if (divisor == 0) + DIVIDE_BY_ZERO; + + dividend_size = dividend->_mp_size; + size = ABS (dividend_size); + + if (quot->_mp_alloc < size) + _mpz_realloc (quot, size); + + quot_ptr = quot->_mp_d; + + remainder_limb = mpn_divmod_1 (quot_ptr, dividend->_mp_d, size, + (mp_limb_t) divisor); + + if (remainder_limb != 0 && dividend_size >= 0) + { + mpn_incr_u (quot_ptr, (mp_limb_t) 1); + remainder_limb = divisor - remainder_limb; + } + + size -= size != 0 && quot_ptr[size - 1] == 0; + quot->_mp_size = dividend_size >= 0 ? size : -size; + + return remainder_limb; +} diff --git a/rts/gmp/mpz/cdiv_qr.c b/rts/gmp/mpz/cdiv_qr.c new file mode 100644 index 0000000000..29c7c41a4e --- /dev/null +++ b/rts/gmp/mpz/cdiv_qr.c @@ -0,0 +1,64 @@ +/* mpz_cdiv_qr -- Division rounding the quotient towards +infinity. The + remainder gets the opposite sign as the denominator. + +Copyright (C) 1994, 1995, 1996, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_cdiv_qr (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor) +#else +mpz_cdiv_qr (quot, rem, dividend, divisor) + mpz_ptr quot; + mpz_ptr rem; + mpz_srcptr dividend; + mpz_srcptr divisor; +#endif +{ + mp_size_t divisor_size = divisor->_mp_size; + mp_size_t xsize; + mpz_t temp_divisor; /* N.B.: lives until function returns! */ + TMP_DECL (marker); + + TMP_MARK (marker); + + /* We need the original value of the divisor after the quotient and + remainder have been preliminary calculated. We have to copy it to + temporary space if it's the same variable as either QUOT or REM. */ + if (quot == divisor || rem == divisor) + { + MPZ_TMP_INIT (temp_divisor, ABS (divisor_size)); + mpz_set (temp_divisor, divisor); + divisor = temp_divisor; + } + + xsize = dividend->_mp_size ^ divisor_size;; + mpz_tdiv_qr (quot, rem, dividend, divisor); + + if (xsize >= 0 && rem->_mp_size != 0) + { + mpz_add_ui (quot, quot, 1L); + mpz_sub (rem, rem, divisor); + } + + TMP_FREE (marker); +} diff --git a/rts/gmp/mpz/cdiv_qr_ui.c b/rts/gmp/mpz/cdiv_qr_ui.c new file mode 100644 index 0000000000..a7873c6e20 --- /dev/null +++ b/rts/gmp/mpz/cdiv_qr_ui.c @@ -0,0 +1,71 @@ +/* mpz_cdiv_qr_ui -- Division rounding the quotient towards +infinity. The + remainder gets the opposite sign as the denominator. In order to make it + always fit into the return type, the negative of the true remainder is + returned. + +Copyright (C) 1994, 1995, 1996, 1999 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +unsigned long int +#if __STDC__ +mpz_cdiv_qr_ui (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor) +#else +mpz_cdiv_qr_ui (quot, rem, dividend, divisor) + mpz_ptr quot; + mpz_ptr rem; + mpz_srcptr dividend; + unsigned long int divisor; +#endif +{ + mp_size_t dividend_size; + mp_size_t size; + mp_ptr quot_ptr; + mp_limb_t remainder_limb; + + if (divisor == 0) + DIVIDE_BY_ZERO; + + dividend_size = dividend->_mp_size; + size = ABS (dividend_size); + + if (quot->_mp_alloc < size) + _mpz_realloc (quot, size); + + quot_ptr = quot->_mp_d; + + remainder_limb = mpn_divmod_1 (quot_ptr, dividend->_mp_d, size, + (mp_limb_t) divisor); + + if (remainder_limb != 0 && dividend_size >= 0) + { + mpn_incr_u (quot_ptr, (mp_limb_t) 1); + remainder_limb = divisor - remainder_limb; + } + + size -= size != 0 && quot_ptr[size - 1] == 0; + quot->_mp_size = dividend_size >= 0 ? size : -size; + + rem->_mp_d[0] = remainder_limb; + rem->_mp_size = -(remainder_limb != 0); + + return remainder_limb; +} diff --git a/rts/gmp/mpz/cdiv_r.c b/rts/gmp/mpz/cdiv_r.c new file mode 100644 index 0000000000..e96ce7e677 --- /dev/null +++ b/rts/gmp/mpz/cdiv_r.c @@ -0,0 +1,59 @@ +/* mpz_cdiv_r -- Division rounding the quotient towards +infinity. The + remainder gets the opposite sign as the denominator. + +Copyright (C) 1994, 1995, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_cdiv_r (mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor) +#else +mpz_cdiv_r (rem, dividend, divisor) + mpz_ptr rem; + mpz_srcptr dividend; + mpz_srcptr divisor; +#endif +{ + mp_size_t divisor_size = divisor->_mp_size; + mpz_t temp_divisor; /* N.B.: lives until function returns! */ + TMP_DECL (marker); + + TMP_MARK (marker); + + /* We need the original value of the divisor after the remainder has been + preliminary calculated. We have to copy it to temporary space if it's + the same variable as REM. */ + if (rem == divisor) + { + + MPZ_TMP_INIT (temp_divisor, ABS (divisor_size)); + mpz_set (temp_divisor, divisor); + divisor = temp_divisor; + } + + mpz_tdiv_r (rem, dividend, divisor); + + if ((divisor_size ^ dividend->_mp_size) >= 0 && rem->_mp_size != 0) + mpz_sub (rem, rem, divisor); + + TMP_FREE (marker); +} diff --git a/rts/gmp/mpz/cdiv_r_ui.c b/rts/gmp/mpz/cdiv_r_ui.c new file mode 100644 index 0000000000..e17e2381c0 --- /dev/null +++ b/rts/gmp/mpz/cdiv_r_ui.c @@ -0,0 +1,57 @@ +/* mpz_cdiv_r_ui -- Division rounding the quotient towards +infinity. The + remainder gets the opposite sign as the denominator. In order to make it + always fit into the return type, the negative of the true remainder is + returned. + +Copyright (C) 1994, 1995, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +unsigned long int +#if __STDC__ +mpz_cdiv_r_ui (mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor) +#else +mpz_cdiv_r_ui (rem, dividend, divisor) + mpz_ptr rem; + mpz_srcptr dividend; + unsigned long int divisor; +#endif +{ + mp_size_t dividend_size; + mp_size_t size; + mp_limb_t remainder_limb; + + if (divisor == 0) + DIVIDE_BY_ZERO; + + dividend_size = dividend->_mp_size; + size = ABS (dividend_size); + + remainder_limb = mpn_mod_1 (dividend->_mp_d, size, (mp_limb_t) divisor); + + if (remainder_limb != 0 && dividend_size >= 0) + remainder_limb = divisor - remainder_limb; + + rem->_mp_d[0] = remainder_limb; + rem->_mp_size = -(remainder_limb != 0); + + return remainder_limb; +} diff --git a/rts/gmp/mpz/cdiv_ui.c b/rts/gmp/mpz/cdiv_ui.c new file mode 100644 index 0000000000..63547a78c0 --- /dev/null +++ b/rts/gmp/mpz/cdiv_ui.c @@ -0,0 +1,50 @@ +/* mpz_cdiv_ui -- Division rounding the quotient towards +infinity. The + remainder gets the opposite sign as the denominator. In order to make it + always fit into the return type, the negative of the true remainder is + returned. + +Copyright (C) 1994, 1995, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +unsigned long int +#if __STDC__ +mpz_cdiv_ui (mpz_srcptr dividend, unsigned long int divisor) +#else +mpz_cdiv_ui (dividend, divisor) + mpz_srcptr dividend; + unsigned long int divisor; +#endif +{ + mp_size_t dividend_size; + mp_size_t size; + mp_limb_t remainder_limb; + + dividend_size = dividend->_mp_size; + size = ABS (dividend_size); + + remainder_limb = mpn_mod_1 (dividend->_mp_d, size, (mp_limb_t) divisor); + + if (remainder_limb != 0 && dividend_size >= 0) + remainder_limb = divisor - remainder_limb; + + return remainder_limb; +} diff --git a/rts/gmp/mpz/clear.c b/rts/gmp/mpz/clear.c new file mode 100644 index 0000000000..5224553f9e --- /dev/null +++ b/rts/gmp/mpz/clear.c @@ -0,0 +1,35 @@ +/* mpz_clear -- de-allocate the space occupied by the dynamic digit space of + an integer. + +Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_clear (mpz_ptr m) +#else +mpz_clear (m) + mpz_ptr m; +#endif +{ + (*_mp_free_func) (m->_mp_d, m->_mp_alloc * BYTES_PER_MP_LIMB); +} diff --git a/rts/gmp/mpz/clrbit.c b/rts/gmp/mpz/clrbit.c new file mode 100644 index 0000000000..865d84902f --- /dev/null +++ b/rts/gmp/mpz/clrbit.c @@ -0,0 +1,114 @@ +/* mpz_clrbit -- clear a specified bit. + +Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_clrbit (mpz_ptr d, unsigned long int bit_index) +#else +mpz_clrbit (d, bit_index) + mpz_ptr d; + unsigned long int bit_index; +#endif +{ + mp_size_t dsize = d->_mp_size; + mp_ptr dp = d->_mp_d; + mp_size_t limb_index; + + limb_index = bit_index / BITS_PER_MP_LIMB; + if (dsize >= 0) + { + if (limb_index < dsize) + { + dp[limb_index] &= ~((mp_limb_t) 1 << (bit_index % BITS_PER_MP_LIMB)); + MPN_NORMALIZE (dp, dsize); + d->_mp_size = dsize; + } + else + ; + } + else + { + mp_size_t zero_bound; + + /* Simulate two's complement arithmetic, i.e. simulate + 1. Set OP = ~(OP - 1) [with infinitely many leading ones]. + 2. clear the bit. + 3. Set OP = ~OP + 1. */ + + dsize = -dsize; + + /* No upper bound on this loop, we're sure there's a non-zero limb + sooner ot later. */ + for (zero_bound = 0; ; zero_bound++) + if (dp[zero_bound] != 0) + break; + + if (limb_index > zero_bound) + { + if (limb_index < dsize) + dp[limb_index] |= (mp_limb_t) 1 << (bit_index % BITS_PER_MP_LIMB); + else + { + /* Ugh. The bit should be cleared outside of the end of the + number. We have to increase the size of the number. */ + if (d->_mp_alloc < limb_index + 1) + { + _mpz_realloc (d, limb_index + 1); + dp = d->_mp_d; + } + MPN_ZERO (dp + dsize, limb_index - dsize); + dp[limb_index] = (mp_limb_t) 1 << (bit_index % BITS_PER_MP_LIMB); + d->_mp_size = -(limb_index + 1); + } + } + else if (limb_index == zero_bound) + { + dp[limb_index] = ((dp[limb_index] - 1) + | ((mp_limb_t) 1 << (bit_index % BITS_PER_MP_LIMB))) + 1; + if (dp[limb_index] == 0) + { + mp_size_t i; + for (i = limb_index + 1; i < dsize; i++) + { + dp[i] += 1; + if (dp[i] != 0) + goto fin; + } + /* We got carry all way out beyond the end of D. Increase + its size (and allocation if necessary). */ + dsize++; + if (d->_mp_alloc < dsize) + { + _mpz_realloc (d, dsize); + dp = d->_mp_d; + } + dp[i] = 1; + d->_mp_size = -dsize; + fin:; + } + } + else + ; + } +} diff --git a/rts/gmp/mpz/cmp.c b/rts/gmp/mpz/cmp.c new file mode 100644 index 0000000000..60628348e5 --- /dev/null +++ b/rts/gmp/mpz/cmp.c @@ -0,0 +1,75 @@ +/* mpz_cmp(u,v) -- Compare U, V. Return postive, zero, or negative + based on if U > V, U == V, or U < V. + +Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#ifdef BERKELEY_MP +#include "mp.h" +#endif +#include "gmp.h" +#include "gmp-impl.h" + +#ifndef BERKELEY_MP +int +#if __STDC__ +mpz_cmp (mpz_srcptr u, mpz_srcptr v) +#else +mpz_cmp (u, v) + mpz_srcptr u; + mpz_srcptr v; +#endif +#else /* BERKELEY_MP */ +int +#if __STDC__ +mcmp (mpz_srcptr u, mpz_srcptr v) +#else +mcmp (u, v) + mpz_srcptr u; + mpz_srcptr v; +#endif +#endif /* BERKELEY_MP */ +{ + mp_size_t usize = u->_mp_size; + mp_size_t vsize = v->_mp_size; + mp_size_t size; + mp_srcptr up, vp; + int cmp; + + if (usize != vsize) + return usize - vsize; + + if (usize == 0) + return 0; + + size = ABS (usize); + + up = u->_mp_d; + vp = v->_mp_d; + + cmp = mpn_cmp (up, vp, size); + + if (cmp == 0) + return 0; + + if ((cmp < 0) == (usize < 0)) + return 1; + else + return -1; +} diff --git a/rts/gmp/mpz/cmp_si.c b/rts/gmp/mpz/cmp_si.c new file mode 100644 index 0000000000..0c2212fbe9 --- /dev/null +++ b/rts/gmp/mpz/cmp_si.c @@ -0,0 +1,64 @@ +/* mpz_cmp_si(u,v) -- Compare an integer U with a single-word int V. + Return positive, zero, or negative based on if U > V, U == V, or U < V. + +Copyright (C) 1991, 1993, 1994, 1995, 1996, 2000 Free Software Foundation, +Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +int +#if __STDC__ +_mpz_cmp_si (mpz_srcptr u, signed long int v_digit) +#else +_mpz_cmp_si (u, v_digit) + mpz_srcptr u; + signed long int v_digit; +#endif +{ + mp_size_t usize = u->_mp_size; + mp_size_t vsize; + mp_limb_t u_digit; + + vsize = 0; + if (v_digit > 0) + vsize = 1; + else if (v_digit < 0) + { + vsize = -1; + v_digit = -v_digit; + } + + if (usize != vsize) + return usize - vsize; + + if (usize == 0) + return 0; + + u_digit = u->_mp_d[0]; + + if (u_digit == (mp_limb_t) (unsigned long) v_digit) + return 0; + + if (u_digit > (mp_limb_t) (unsigned long) v_digit) + return usize; + else + return -usize; +} diff --git a/rts/gmp/mpz/cmp_ui.c b/rts/gmp/mpz/cmp_ui.c new file mode 100644 index 0000000000..fd84f301c1 --- /dev/null +++ b/rts/gmp/mpz/cmp_ui.c @@ -0,0 +1,53 @@ +/* mpz_cmp_ui.c -- Compare a mpz_t a with an mp_limb_t b. Return positive, + zero, or negative based on if a > b, a == b, or a < b. + +Copyright (C) 1991, 1993, 1994, 1995, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +int +#if __STDC__ +_mpz_cmp_ui (mpz_srcptr u, unsigned long int v_digit) +#else +_mpz_cmp_ui (u, v_digit) + mpz_srcptr u; + unsigned long int v_digit; +#endif +{ + mp_size_t usize = u->_mp_size; + + if (usize == 0) + return -(v_digit != 0); + + if (usize == 1) + { + mp_limb_t u_digit; + + u_digit = u->_mp_d[0]; + if (u_digit > v_digit) + return 1; + if (u_digit < v_digit) + return -1; + return 0; + } + + return (usize > 0) ? 1 : -1; +} diff --git a/rts/gmp/mpz/cmpabs.c b/rts/gmp/mpz/cmpabs.c new file mode 100644 index 0000000000..037d7a9145 --- /dev/null +++ b/rts/gmp/mpz/cmpabs.c @@ -0,0 +1,57 @@ +/* mpz_cmpabs(u,v) -- Compare U, V. Return postive, zero, or negative + based on if U > V, U == V, or U < V. + +Copyright (C) 1991, 1993, 1994, 1996, 1997, 2000 Free Software Foundation, +Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +int +#if __STDC__ +mpz_cmpabs (mpz_srcptr u, mpz_srcptr v) +#else +mpz_cmpabs (u, v) + mpz_srcptr u; + mpz_srcptr v; +#endif +{ + mp_size_t usize = u->_mp_size; + mp_size_t vsize = v->_mp_size; + mp_size_t size; + mp_srcptr up, vp; + int cmp; + + usize = ABS (usize); + vsize = ABS (vsize); + + if (usize != vsize) + return usize - vsize; + + if (usize == 0) + return 0; + + up = u->_mp_d; + vp = v->_mp_d; + + cmp = mpn_cmp (up, vp, usize); + + return cmp; +} diff --git a/rts/gmp/mpz/cmpabs_ui.c b/rts/gmp/mpz/cmpabs_ui.c new file mode 100644 index 0000000000..db816b5820 --- /dev/null +++ b/rts/gmp/mpz/cmpabs_ui.c @@ -0,0 +1,56 @@ +/* mpz_cmpabs_ui.c -- Compare a mpz_t a with an mp_limb_t b. Return positive, + zero, or negative based on if a > b, a == b, or a < b. + +Copyright (C) 1991, 1993, 1994, 1995, 1997, 2000 Free Software Foundation, +Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +int +#if __STDC__ +mpz_cmpabs_ui (mpz_srcptr u, unsigned long int v_digit) +#else +mpz_cmpabs_ui (u, v_digit) + mpz_srcptr u; + unsigned long int v_digit; +#endif +{ + mp_size_t usize = u->_mp_size; + + if (usize == 0) + return -(v_digit != 0); + + usize = ABS (usize); + + if (usize == 1) + { + mp_limb_t u_digit; + + u_digit = u->_mp_d[0]; + if (u_digit > v_digit) + return 1; + if (u_digit < v_digit) + return -1; + return 0; + } + + return 1; +} diff --git a/rts/gmp/mpz/com.c b/rts/gmp/mpz/com.c new file mode 100644 index 0000000000..18d6427779 --- /dev/null +++ b/rts/gmp/mpz/com.c @@ -0,0 +1,93 @@ +/* mpz_com(mpz_ptr dst, mpz_ptr src) -- Assign the bit-complemented value of + SRC to DST. + +Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_com (mpz_ptr dst, mpz_srcptr src) +#else +mpz_com (dst, src) + mpz_ptr dst; + mpz_srcptr src; +#endif +{ + mp_size_t size = src->_mp_size; + mp_srcptr src_ptr; + mp_ptr dst_ptr; + + if (size >= 0) + { + /* As with infinite precision: one's complement, two's complement. + But this can be simplified using the identity -x = ~x + 1. + So we're going to compute (~~x) + 1 = x + 1! */ + + if (dst->_mp_alloc < size + 1) + _mpz_realloc (dst, size + 1); + + src_ptr = src->_mp_d; + dst_ptr = dst->_mp_d; + + if (size == 0) + { + /* Special case, as mpn_add wants the first arg's size >= the + second arg's size. */ + dst_ptr[0] = 1; + dst->_mp_size = -1; + return; + } + + { + mp_limb_t cy; + + cy = mpn_add_1 (dst_ptr, src_ptr, size, (mp_limb_t) 1); + if (cy) + { + dst_ptr[size] = cy; + size++; + } + } + + /* Store a negative size, to indicate ones-extension. */ + dst->_mp_size = -size; + } + else + { + /* As with infinite precision: two's complement, then one's complement. + But that can be simplified using the identity -x = ~(x - 1). + So we're going to compute ~~(x - 1) = x - 1! */ + size = -size; + + if (dst->_mp_alloc < size) + _mpz_realloc (dst, size); + + src_ptr = src->_mp_d; + dst_ptr = dst->_mp_d; + + mpn_sub_1 (dst_ptr, src_ptr, size, (mp_limb_t) 1); + size -= dst_ptr[size - 1] == 0; + + /* Store a positive size, to indicate zero-extension. */ + dst->_mp_size = size; + } +} diff --git a/rts/gmp/mpz/divexact.c b/rts/gmp/mpz/divexact.c new file mode 100644 index 0000000000..c2970454fd --- /dev/null +++ b/rts/gmp/mpz/divexact.c @@ -0,0 +1,125 @@ +/* mpz_divexact -- finds quotient when known that quot * den == num && den != 0. + +Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 2000 Free Software +Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* Ken Weber (kweber@mat.ufrgs.br, kweber@mcs.kent.edu) + + Funding for this work has been partially provided by Conselho Nacional + de Desenvolvimento Cienti'fico e Tecnolo'gico (CNPq) do Brazil, Grant + 301314194-2, and was done while I was a visiting reseacher in the Instituto + de Matema'tica at Universidade Federal do Rio Grande do Sul (UFRGS). + + References: + T. Jebelean, An algorithm for exact division, Journal of Symbolic + Computation, v. 15, 1993, pp. 169-180. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +void +#if __STDC__ +mpz_divexact (mpz_ptr quot, mpz_srcptr num, mpz_srcptr den) +#else +mpz_divexact (quot, num, den) + mpz_ptr quot; + mpz_srcptr num; + mpz_srcptr den; +#endif +{ + mp_ptr qp, tp; + mp_size_t qsize, tsize; + mp_srcptr np, dp; + mp_size_t nsize, dsize; + TMP_DECL (marker); + + nsize = ABS (num->_mp_size); + dsize = ABS (den->_mp_size); + + qsize = nsize - dsize + 1; + if (quot->_mp_alloc < qsize) + _mpz_realloc (quot, qsize); + + np = num->_mp_d; + dp = den->_mp_d; + qp = quot->_mp_d; + + if (nsize == 0) + { + if (dsize == 0) + DIVIDE_BY_ZERO; + quot->_mp_size = 0; + return; + } + + if (dsize <= 1) + { + if (dsize == 1) + { + mpn_divmod_1 (qp, np, nsize, dp[0]); + qsize -= qp[qsize - 1] == 0; + quot->_mp_size = (num->_mp_size ^ den->_mp_size) >= 0 ? qsize : -qsize; + return; + } + + /* Generate divide-by-zero error since dsize == 0. */ + DIVIDE_BY_ZERO; + } + + TMP_MARK (marker); + + /* QUOT <-- NUM/2^r, T <-- DEN/2^r where = r number of twos in DEN. */ + while (dp[0] == 0) + np += 1, nsize -= 1, dp += 1, dsize -= 1; + tsize = MIN (qsize, dsize); + if ((dp[0] & 1) != 0) + { + if (quot == den) /* QUOT and DEN overlap. */ + { + tp = (mp_ptr) TMP_ALLOC (tsize * BYTES_PER_MP_LIMB); + MPN_COPY (tp, dp, tsize); + } + else + tp = (mp_ptr) dp; + if (qp != np) + MPN_COPY_INCR (qp, np, qsize); + } + else + { + unsigned int r; + tp = (mp_ptr) TMP_ALLOC (tsize * BYTES_PER_MP_LIMB); + count_trailing_zeros (r, dp[0]); + mpn_rshift (tp, dp, tsize, r); + if (dsize > tsize) + tp[tsize - 1] |= dp[tsize] << (BITS_PER_MP_LIMB - r); + mpn_rshift (qp, np, qsize, r); + if (nsize > qsize) + qp[qsize - 1] |= np[qsize] << (BITS_PER_MP_LIMB - r); + } + + /* Now QUOT <-- QUOT/T. */ + mpn_bdivmod (qp, qp, qsize, tp, tsize, qsize * BITS_PER_MP_LIMB); + MPN_NORMALIZE (qp, qsize); + + quot->_mp_size = (num->_mp_size ^ den->_mp_size) >= 0 ? qsize : -qsize; + + TMP_FREE (marker); +} diff --git a/rts/gmp/mpz/dump.c b/rts/gmp/mpz/dump.c new file mode 100644 index 0000000000..dc318ac8cf --- /dev/null +++ b/rts/gmp/mpz/dump.c @@ -0,0 +1,44 @@ +/* mpz_dump - Dump an integer to stdout. + + THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS NOT SAFE TO + CALL THIS FUNCTION DIRECTLY. IN FACT, IT IS ALMOST GUARANTEED THAT THIS + FUNCTION WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. + + +Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include <stdio.h> +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_dump (mpz_srcptr u) +#else +mpz_dump (u) + mpz_srcptr u; +#endif +{ + char *str; + + str = mpz_get_str (0, 10, u); + printf ("%s\n", str); + (*_mp_free_func) (str, 0);/* ??? broken alloc interface, pass what size ??? */ +} diff --git a/rts/gmp/mpz/fac_ui.c b/rts/gmp/mpz/fac_ui.c new file mode 100644 index 0000000000..85f40f271c --- /dev/null +++ b/rts/gmp/mpz/fac_ui.c @@ -0,0 +1,157 @@ +/* mpz_fac_ui(result, n) -- Set RESULT to N!. + +Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#ifdef DBG +#include <stdio.h> +#endif + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +void +#if __STDC__ +mpz_fac_ui (mpz_ptr result, unsigned long int n) +#else +mpz_fac_ui (result, n) + mpz_ptr result; + unsigned long int n; +#endif +{ +#if SIMPLE_FAC + + /* Be silly. Just multiply the numbers in ascending order. O(n**2). */ + + unsigned long int k; + + mpz_set_ui (result, 1L); + + for (k = 2; k <= n; k++) + mpz_mul_ui (result, result, k); +#else + + /* Be smarter. Multiply groups of numbers in ascending order until the + product doesn't fit in a limb. Multiply these partial product in a + balanced binary tree fashion, to make the operand have as equal sizes + as possible. When the operands have about the same size, mpn_mul + becomes faster. */ + + unsigned long int p, k; + mp_limb_t p1, p0; + + /* Stack of partial products, used to make the computation balanced + (i.e. make the sizes of the multiplication operands equal). The + topmost position of MP_STACK will contain a one-limb partial product, + the second topmost will contain a two-limb partial product, and so + on. MP_STACK[0] will contain a partial product with 2**t limbs. + To compute n! MP_STACK needs to be less than + log(n)**2/log(BITS_PER_MP_LIMB), so 30 is surely enough. */ +#define MP_STACK_SIZE 30 + mpz_t mp_stack[MP_STACK_SIZE]; + + /* TOP is an index into MP_STACK, giving the topmost element. + TOP_LIMIT_SO_FAR is the largets value it has taken so far. */ + int top, top_limit_so_far; + + /* Count of the total number of limbs put on MP_STACK so far. This + variable plays an essential role in making the compututation balanced. + See below. */ + unsigned int tree_cnt; + + top = top_limit_so_far = -1; + tree_cnt = 0; + p = 1; + for (k = 2; k <= n; k++) + { + /* Multiply the partial product in P with K. */ + umul_ppmm (p1, p0, (mp_limb_t) p, (mp_limb_t) k); + + /* Did we get overflow into the high limb, i.e. is the partial + product now more than one limb? */ + if (p1 != 0) + { + tree_cnt++; + + if (tree_cnt % 2 == 0) + { + mp_size_t i; + + /* TREE_CNT is even (i.e. we have generated an even number of + one-limb partial products), which means that we have a + single-limb product on the top of MP_STACK. */ + + mpz_mul_ui (mp_stack[top], mp_stack[top], p); + + /* If TREE_CNT is divisable by 4, 8,..., we have two + similar-sized partial products with 2, 4,... limbs at + the topmost two positions of MP_STACK. Multiply them + to form a new partial product with 4, 8,... limbs. */ + for (i = 4; (tree_cnt & (i - 1)) == 0; i <<= 1) + { + mpz_mul (mp_stack[top - 1], + mp_stack[top], mp_stack[top - 1]); + top--; + } + } + else + { + /* Put the single-limb partial product in P on the stack. + (The next time we get a single-limb product, we will + multiply the two together.) */ + top++; + if (top > top_limit_so_far) + { + if (top > MP_STACK_SIZE) + abort(); + /* The stack is now bigger than ever, initialize the top + element. */ + mpz_init_set_ui (mp_stack[top], p); + top_limit_so_far++; + } + else + mpz_set_ui (mp_stack[top], p); + } + + /* We ignored the last result from umul_ppmm. Put K in P as the + first component of the next single-limb partial product. */ + p = k; + } + else + /* We didn't get overflow in umul_ppmm. Put p0 in P and try + with one more value of K. */ + p = p0; /* bogus if long != mp_limb_t */ + } + + /* We have partial products in mp_stack[0..top], in descending order. + We also have a small partial product in p. + Their product is the final result. */ + if (top < 0) + mpz_set_ui (result, p); + else + mpz_mul_ui (result, mp_stack[top--], p); + while (top >= 0) + mpz_mul (result, result, mp_stack[top--]); + + /* Free the storage allocated for MP_STACK. */ + for (top = top_limit_so_far; top >= 0; top--) + mpz_clear (mp_stack[top]); +#endif +} diff --git a/rts/gmp/mpz/fdiv_q.c b/rts/gmp/mpz/fdiv_q.c new file mode 100644 index 0000000000..9d75ca33d2 --- /dev/null +++ b/rts/gmp/mpz/fdiv_q.c @@ -0,0 +1,51 @@ +/* mpz_fdiv_q -- Division rounding the quotient towards -infinity. + The remainder gets the same sign as the denominator. + +Copyright (C) 1994, 1995, 1996, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_fdiv_q (mpz_ptr quot, mpz_srcptr dividend, mpz_srcptr divisor) +#else +mpz_fdiv_q (quot, dividend, divisor) + mpz_ptr quot; + mpz_srcptr dividend; + mpz_srcptr divisor; +#endif +{ + mp_size_t dividend_size = dividend->_mp_size; + mp_size_t divisor_size = divisor->_mp_size; + mpz_t rem; + TMP_DECL (marker); + + TMP_MARK (marker); + + MPZ_TMP_INIT (rem, ABS (divisor_size)); + + mpz_tdiv_qr (quot, rem, dividend, divisor); + + if ((divisor_size ^ dividend_size) < 0 && rem->_mp_size != 0) + mpz_sub_ui (quot, quot, 1L); + + TMP_FREE (marker); +} diff --git a/rts/gmp/mpz/fdiv_q_2exp.c b/rts/gmp/mpz/fdiv_q_2exp.c new file mode 100644 index 0000000000..8e02180ecc --- /dev/null +++ b/rts/gmp/mpz/fdiv_q_2exp.c @@ -0,0 +1,104 @@ +/* mpz_fdiv_q_2exp -- Divide an integer by 2**CNT. Round the quotient + towards -infinity. + +Copyright (C) 1991, 1993, 1994, 1996, 1998, 1999 Free Software Foundation, +Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_fdiv_q_2exp (mpz_ptr w, mpz_srcptr u, unsigned long int cnt) +#else +mpz_fdiv_q_2exp (w, u, cnt) + mpz_ptr w; + mpz_srcptr u; + unsigned long int cnt; +#endif +{ + mp_size_t usize = u->_mp_size; + mp_size_t wsize; + mp_size_t abs_usize = ABS (usize); + mp_size_t limb_cnt; + mp_ptr wp; + mp_limb_t round = 0; + + limb_cnt = cnt / BITS_PER_MP_LIMB; + wsize = abs_usize - limb_cnt; + if (wsize <= 0) + { + wp = w->_mp_d; + wsize = 0; + /* Set ROUND since we know we skip some non-zero words in this case. + Well, if U is zero, we don't, but then this will be taken care of + below, since rounding only really takes place for negative U. */ + round = 1; + wp[0] = 1; + w->_mp_size = -(usize < 0); + return; + } + else + { + mp_size_t i; + mp_ptr up; + + /* Make sure there is enough space. We make an extra limb + here to account for possible rounding at the end. */ + if (w->_mp_alloc < wsize + 1) + _mpz_realloc (w, wsize + 1); + + wp = w->_mp_d; + up = u->_mp_d; + + /* Set ROUND if we are about skip some non-zero limbs. */ + for (i = 0; i < limb_cnt && round == 0; i++) + round = up[i]; + + cnt %= BITS_PER_MP_LIMB; + if (cnt != 0) + { + round |= mpn_rshift (wp, up + limb_cnt, wsize, cnt); + wsize -= wp[wsize - 1] == 0; + } + else + { + MPN_COPY_INCR (wp, up + limb_cnt, wsize); + } + } + + if (usize < 0 && round != 0) + { + mp_limb_t cy; + if (wsize != 0) + { + cy = mpn_add_1 (wp, wp, wsize, (mp_limb_t) 1); + wp[wsize] = cy; + wsize += cy; + } + else + { + /* We shifted something negative to zero. The result is -1. */ + wp[0] = 1; + wsize = 1; + } + } + w->_mp_size = usize >= 0 ? wsize : -wsize; +} diff --git a/rts/gmp/mpz/fdiv_q_ui.c b/rts/gmp/mpz/fdiv_q_ui.c new file mode 100644 index 0000000000..55d2498693 --- /dev/null +++ b/rts/gmp/mpz/fdiv_q_ui.c @@ -0,0 +1,65 @@ +/* mpz_fdiv_q_ui -- Division rounding the quotient towards -infinity. + The remainder gets the same sign as the denominator. + +Copyright (C) 1994, 1995, 1996, 1999 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +unsigned long int +#if __STDC__ +mpz_fdiv_q_ui (mpz_ptr quot, mpz_srcptr dividend, unsigned long int divisor) +#else +mpz_fdiv_q_ui (quot, dividend, divisor) + mpz_ptr quot; + mpz_srcptr dividend; + unsigned long int divisor; +#endif +{ + mp_size_t dividend_size; + mp_size_t size; + mp_ptr quot_ptr; + mp_limb_t remainder_limb; + + if (divisor == 0) + DIVIDE_BY_ZERO; + + dividend_size = dividend->_mp_size; + size = ABS (dividend_size); + + if (quot->_mp_alloc < size) + _mpz_realloc (quot, size); + + quot_ptr = quot->_mp_d; + + remainder_limb = mpn_divmod_1 (quot_ptr, dividend->_mp_d, size, + (mp_limb_t) divisor); + + if (remainder_limb != 0 && dividend_size < 0) + { + mpn_incr_u (quot_ptr, (mp_limb_t) 1); + remainder_limb = divisor - remainder_limb; + } + + size -= size != 0 && quot_ptr[size - 1] == 0; + quot->_mp_size = dividend_size >= 0 ? size : -size; + + return remainder_limb; +} diff --git a/rts/gmp/mpz/fdiv_qr.c b/rts/gmp/mpz/fdiv_qr.c new file mode 100644 index 0000000000..06ce50607b --- /dev/null +++ b/rts/gmp/mpz/fdiv_qr.c @@ -0,0 +1,64 @@ +/* mpz_fdiv_qr -- Division rounding the quotient towards -infinity. + The remainder gets the same sign as the denominator. + +Copyright (C) 1994, 1995, 1996, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_fdiv_qr (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor) +#else +mpz_fdiv_qr (quot, rem, dividend, divisor) + mpz_ptr quot; + mpz_ptr rem; + mpz_srcptr dividend; + mpz_srcptr divisor; +#endif +{ + mp_size_t divisor_size = divisor->_mp_size; + mp_size_t xsize; + mpz_t temp_divisor; /* N.B.: lives until function returns! */ + TMP_DECL (marker); + + TMP_MARK (marker); + + /* We need the original value of the divisor after the quotient and + remainder have been preliminary calculated. We have to copy it to + temporary space if it's the same variable as either QUOT or REM. */ + if (quot == divisor || rem == divisor) + { + MPZ_TMP_INIT (temp_divisor, ABS (divisor_size)); + mpz_set (temp_divisor, divisor); + divisor = temp_divisor; + } + + xsize = dividend->_mp_size ^ divisor_size;; + mpz_tdiv_qr (quot, rem, dividend, divisor); + + if (xsize < 0 && rem->_mp_size != 0) + { + mpz_sub_ui (quot, quot, 1L); + mpz_add (rem, rem, divisor); + } + + TMP_FREE (marker); +} diff --git a/rts/gmp/mpz/fdiv_qr_ui.c b/rts/gmp/mpz/fdiv_qr_ui.c new file mode 100644 index 0000000000..600c0dacfc --- /dev/null +++ b/rts/gmp/mpz/fdiv_qr_ui.c @@ -0,0 +1,69 @@ +/* mpz_fdiv_qr_ui -- Division rounding the quotient towards -infinity. + The remainder gets the same sign as the denominator. + +Copyright (C) 1994, 1995, 1996, 1999 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +unsigned long int +#if __STDC__ +mpz_fdiv_qr_ui (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor) +#else +mpz_fdiv_qr_ui (quot, rem, dividend, divisor) + mpz_ptr quot; + mpz_ptr rem; + mpz_srcptr dividend; + unsigned long int divisor; +#endif +{ + mp_size_t dividend_size; + mp_size_t size; + mp_ptr quot_ptr; + mp_limb_t remainder_limb; + + if (divisor == 0) + DIVIDE_BY_ZERO; + + dividend_size = dividend->_mp_size; + size = ABS (dividend_size); + + if (quot->_mp_alloc < size) + _mpz_realloc (quot, size); + + quot_ptr = quot->_mp_d; + + remainder_limb = mpn_divmod_1 (quot_ptr, dividend->_mp_d, size, + (mp_limb_t) divisor); + + if (remainder_limb != 0 && dividend_size < 0) + { + mpn_incr_u (quot_ptr, (mp_limb_t) 1); + remainder_limb = divisor - remainder_limb; + } + + size -= size != 0 && quot_ptr[size - 1] == 0; + quot->_mp_size = dividend_size >= 0 ? size : -size; + + rem->_mp_d[0] = remainder_limb; + rem->_mp_size = remainder_limb != 0; + + return remainder_limb; +} diff --git a/rts/gmp/mpz/fdiv_r.c b/rts/gmp/mpz/fdiv_r.c new file mode 100644 index 0000000000..a3652838d2 --- /dev/null +++ b/rts/gmp/mpz/fdiv_r.c @@ -0,0 +1,58 @@ +/* mpz_fdiv_r -- Division rounding the quotient towards -infinity. + The remainder gets the same sign as the denominator. + +Copyright (C) 1994, 1995, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_fdiv_r (mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor) +#else +mpz_fdiv_r (rem, dividend, divisor) + mpz_ptr rem; + mpz_srcptr dividend; + mpz_srcptr divisor; +#endif +{ + mp_size_t divisor_size = divisor->_mp_size; + mpz_t temp_divisor; /* N.B.: lives until function returns! */ + TMP_DECL (marker); + + TMP_MARK (marker); + + /* We need the original value of the divisor after the remainder has been + preliminary calculated. We have to copy it to temporary space if it's + the same variable as REM. */ + if (rem == divisor) + { + MPZ_TMP_INIT (temp_divisor, ABS (divisor_size)); + mpz_set (temp_divisor, divisor); + divisor = temp_divisor; + } + + mpz_tdiv_r (rem, dividend, divisor); + + if ((divisor_size ^ dividend->_mp_size) < 0 && rem->_mp_size != 0) + mpz_add (rem, rem, divisor); + + TMP_FREE (marker); +} diff --git a/rts/gmp/mpz/fdiv_r_2exp.c b/rts/gmp/mpz/fdiv_r_2exp.c new file mode 100644 index 0000000000..081ce19203 --- /dev/null +++ b/rts/gmp/mpz/fdiv_r_2exp.c @@ -0,0 +1,156 @@ +/* mpz_fdiv_r_2exp -- Divide a integer by 2**CNT and produce a remainder. + +Copyright (C) 1991, 1993, 1994, 1995, 1998, 1999, 2000 Free Software +Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_fdiv_r_2exp (mpz_ptr res, mpz_srcptr in, unsigned long int cnt) +#else +mpz_fdiv_r_2exp (res, in, cnt) + mpz_ptr res; + mpz_srcptr in; + unsigned long int cnt; +#endif +{ + mp_size_t in_size = ABS (in->_mp_size); + mp_size_t res_size; + mp_size_t limb_cnt = cnt / BITS_PER_MP_LIMB; + mp_srcptr in_ptr = in->_mp_d; + + if (in_size > limb_cnt) + { + /* The input operand is (probably) greater than 2**CNT. */ + mp_limb_t x; + + x = in_ptr[limb_cnt] & (((mp_limb_t) 1 << cnt % BITS_PER_MP_LIMB) - 1); + if (x != 0) + { + res_size = limb_cnt + 1; + if (res->_mp_alloc < res_size) + _mpz_realloc (res, res_size); + + res->_mp_d[limb_cnt] = x; + } + else + { + res_size = limb_cnt; + MPN_NORMALIZE (in_ptr, res_size); + + if (res->_mp_alloc < res_size) + _mpz_realloc (res, res_size); + + limb_cnt = res_size; + } + } + else + { + /* The input operand is smaller than 2**CNT. We perform a no-op, + apart from that we might need to copy IN to RES, and may need + to round the result. */ + res_size = in_size; + if (res->_mp_alloc < res_size) + _mpz_realloc (res, res_size); + + limb_cnt = res_size; + } + + if (res != in) + MPN_COPY (res->_mp_d, in->_mp_d, limb_cnt); + in_size = in->_mp_size; + res->_mp_size = res_size; + if (in_size < 0 && res_size != 0) + { + /* Result should be 2^CNT - RES */ + mpz_t tmp; + TMP_DECL (marker); + TMP_MARK (marker); + MPZ_TMP_INIT (tmp, cnt/BITS_PER_MP_LIMB + 2); + mpz_set_ui (tmp, 1L); + mpz_mul_2exp (tmp, tmp, cnt); + mpz_sub (res, tmp, res); + TMP_FREE (marker); + } +} + +/* This is an alternative ending of the above function using just low-level + functions. Tested, but perhaps excessive? */ +#if 0 + if (in->_mp_size < 0 && res_size != 0) + { + /* Result should be 2^CNT - RES */ + + mp_ptr rp; + + limb_cnt = cnt / BITS_PER_MP_LIMB; + + if (res->_mp_alloc <= limb_cnt) + _mpz_realloc (res, limb_cnt + 1); + rp = PTR(res); + if (res_size > limb_cnt) + { + mpn_nz_neg (rp, rp, res_size); + rp[limb_cnt] &= ~(~(mp_limb_t) 0 << cnt % BITS_PER_MP_LIMB); + MPN_NORMALIZE_NOT_ZERO (rp, res_size); + } + else + { + mp_size_t i; + mpn_nz_neg (rp, rp, res_size); + for (i = res_size; i < limb_cnt; i++) + rp[i] = ~ (mp_limb_t) 0; + res_size = limb_cnt; + if (cnt % BITS_PER_MP_LIMB != 0) + { + rp[res_size] = ((mp_limb_t) 1 << (cnt % BITS_PER_MP_LIMB)) - 1; + res_size++; + } + else + MPN_NORMALIZE_NOT_ZERO (rp, res_size); + } + } + SIZ(res) = res_size; +} + +static void +mpn_nz_neg (rp, sp, n) + mp_ptr rp, sp; + mp_size_t n; +{ + mp_size_t i; + mp_limb_t x; + + x = sp[0]; + rp[0] = -x; + for (i = 1; x == 0; i++) + { + x = sp[i]; + rp[i] = -x; + } + + for (; i < n; i++) + { + rp[i] = ~sp[i]; + } +} +#endif diff --git a/rts/gmp/mpz/fdiv_r_ui.c b/rts/gmp/mpz/fdiv_r_ui.c new file mode 100644 index 0000000000..dd5c743d27 --- /dev/null +++ b/rts/gmp/mpz/fdiv_r_ui.c @@ -0,0 +1,55 @@ +/* mpz_fdiv_r_ui -- Division rounding the quotient towards -infinity. + The remainder gets the same sign as the denominator. + +Copyright (C) 1994, 1995, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +unsigned long int +#if __STDC__ +mpz_fdiv_r_ui (mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor) +#else +mpz_fdiv_r_ui (rem, dividend, divisor) + mpz_ptr rem; + mpz_srcptr dividend; + unsigned long int divisor; +#endif +{ + mp_size_t dividend_size; + mp_size_t size; + mp_limb_t remainder_limb; + + if (divisor == 0) + DIVIDE_BY_ZERO; + + dividend_size = dividend->_mp_size; + size = ABS (dividend_size); + + remainder_limb = mpn_mod_1 (dividend->_mp_d, size, (mp_limb_t) divisor); + + if (remainder_limb != 0 && dividend_size < 0) + remainder_limb = divisor - remainder_limb; + + rem->_mp_d[0] = remainder_limb; + rem->_mp_size = remainder_limb != 0; + + return remainder_limb; +} diff --git a/rts/gmp/mpz/fdiv_ui.c b/rts/gmp/mpz/fdiv_ui.c new file mode 100644 index 0000000000..f937b5f6d0 --- /dev/null +++ b/rts/gmp/mpz/fdiv_ui.c @@ -0,0 +1,48 @@ +/* mpz_fdiv_ui -- Division rounding the quotient towards -infinity. + The remainder gets the same sign as the denominator. + +Copyright (C) 1994, 1995, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +unsigned long int +#if __STDC__ +mpz_fdiv_ui (mpz_srcptr dividend, unsigned long int divisor) +#else +mpz_fdiv_ui (dividend, divisor) + mpz_srcptr dividend; + unsigned long int divisor; +#endif +{ + mp_size_t dividend_size; + mp_size_t size; + mp_limb_t remainder_limb; + + dividend_size = dividend->_mp_size; + size = ABS (dividend_size); + + remainder_limb = mpn_mod_1 (dividend->_mp_d, size, (mp_limb_t) divisor); + + if (remainder_limb != 0 && dividend_size < 0) + remainder_limb = divisor - remainder_limb; + + return remainder_limb; +} diff --git a/rts/gmp/mpz/fib_ui.c b/rts/gmp/mpz/fib_ui.c new file mode 100644 index 0000000000..4bebb80d94 --- /dev/null +++ b/rts/gmp/mpz/fib_ui.c @@ -0,0 +1,165 @@ +/* mpz_fib_ui(result, n) -- Set RESULT to the Nth Fibonacci number. + +Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +/* This is fast, but could be made somewhat faster and neater. + The timing is somewhat fluctuating for even/odd sizes because + of the extra hair used to save variables and operations. Here + are a few things one might want to address: + 1. Avoid using 4 intermediate variables in mpz_fib_bigcase. + 2. Call mpn functions directly. Straightforward for these functions. + 3. Merge the three functions into one. + +Said by Kevin: + Consider using the Lucas numbers L[n] as an auxiliary sequence, making + it possible to do the "doubling" operation in mpz_fib_bigcase with two + squares rather than two multiplies. The formulas are a little more + complicated, something like the following (untested). + + F[2n] = ((F[n]+L[n])^2 - 6*F[n]^2 - 4*(-1)^n) / 2 + L[2n] = 5*F[n]^2 + 2*(-1)^n + + F[2n+1] = (F[2n] + L[2n]) / 2 + L[2n+1] = (5*F[2n] + L[2n]) / 2 + + The Lucas number that comes for free here could even be returned. + + Maybe there's formulas with two squares using just F[n], but I don't + know of any. +*/ + +/* Determine the needed storage for Fib(n). */ +#define FIB_SIZE(n) (((mp_size_t) ((n)*0.695)) / BITS_PER_MP_LIMB + 2) + +static void mpz_fib_bigcase _PROTO ((mpz_t, mpz_t, unsigned long int)); +static void mpz_fib_basecase _PROTO ((mpz_t, mpz_t, unsigned long int)); + + +#ifndef FIB_THRESHOLD +#define FIB_THRESHOLD 60 +#endif + +void +#if __STDC__ +mpz_fib_ui (mpz_t r, unsigned long int n) +#else +mpz_fib_ui (r, n) + mpz_t r; + unsigned long int n; +#endif +{ + if (n == 0) + mpz_set_ui (r, 0); + else + { + mpz_t t1; + mpz_init (t1); + if (n < FIB_THRESHOLD) + mpz_fib_basecase (t1, r, n); + else + mpz_fib_bigcase (t1, r, n); + mpz_clear (t1); + } +} + +static void +#if __STDC__ +mpz_fib_basecase (mpz_t t1, mpz_t t2, unsigned long int n) +#else +mpz_fib_basecase (t1, t2, n) + mpz_t t1; + mpz_t t2; + unsigned long int n; +#endif +{ + unsigned long int m, i; + + mpz_set_ui (t1, 0); + mpz_set_ui (t2, 1); + m = n/2; + for (i = 0; i < m; i++) + { + mpz_add (t1, t1, t2); + mpz_add (t2, t1, t2); + } + if ((n & 1) == 0) + { + mpz_sub (t1, t2, t1); + mpz_sub (t2, t2, t1); /* trick: recover t1 value just overwritten */ + } +} + +static void +#if __STDC__ +mpz_fib_bigcase (mpz_t t1, mpz_t t2, unsigned long int n) +#else +mpz_fib_bigcase (t1, t2, n) + mpz_t t1; + mpz_t t2; + unsigned long int n; +#endif +{ + unsigned long int n2; + int ni, i; + mpz_t x1, x2, u1, u2; + + ni = 0; + for (n2 = n; n2 >= FIB_THRESHOLD; n2 /= 2) + ni++; + + mpz_fib_basecase (t1, t2, n2); + + mpz_init (x1); + mpz_init (x2); + mpz_init (u1); + mpz_init (u2); + + for (i = ni - 1; i >= 0; i--) + { + mpz_mul_2exp (x1, t1, 1); + mpz_mul_2exp (x2, t2, 1); + + mpz_add (x1, x1, t2); + mpz_sub (x2, x2, t1); + + mpz_mul (u1, t2, x1); + mpz_mul (u2, t1, x2); + + if (((n >> i) & 1) == 0) + { + mpz_sub (t1, u1, u2); + mpz_set (t2, u1); + } + else + { + mpz_set (t1, u1); + mpz_mul_2exp (t2, u1, 1); + mpz_sub (t2, t2, u2); + } + } + + mpz_clear (x1); + mpz_clear (x2); + mpz_clear (u1); + mpz_clear (u2); +} diff --git a/rts/gmp/mpz/fits_sint_p.c b/rts/gmp/mpz/fits_sint_p.c new file mode 100644 index 0000000000..82e32a24d5 --- /dev/null +++ b/rts/gmp/mpz/fits_sint_p.c @@ -0,0 +1,50 @@ +/* int mpz_fits_X_p (mpz_t src) -- Return whether src fits the C type X. + +Copyright (C) 1997, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +int +#if __STDC__ +mpz_fits_sint_p (mpz_srcptr src) +#else +mpz_fits_sint_p (src) + mpz_srcptr src; +#endif +{ + mp_size_t size; + mp_limb_t mpl; + + mpl = PTR(src)[0]; + size = SIZ(src); + if (size > 0) + { + if (size > 1) + return 0; + return mpl < ~((~(unsigned int) 0) >> 1); + } + else + { + if (size < -1) + return 0; + return mpl <= ~((~(unsigned int) 0) >> 1); + } +} diff --git a/rts/gmp/mpz/fits_slong_p.c b/rts/gmp/mpz/fits_slong_p.c new file mode 100644 index 0000000000..e0669b5aaa --- /dev/null +++ b/rts/gmp/mpz/fits_slong_p.c @@ -0,0 +1,50 @@ +/* int mpz_fits_X_p (mpz_t src) -- Return whether src fits the C type X. + +Copyright (C) 1997, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +int +#if __STDC__ +mpz_fits_slong_p (mpz_srcptr src) +#else +mpz_fits_slong_p (src) + mpz_srcptr src; +#endif +{ + mp_size_t size; + mp_limb_t mpl; + + mpl = PTR(src)[0]; + size = SIZ(src); + if (size > 0) + { + if (size > 1) + return 0; + return mpl < ~((~(unsigned long int) 0) >> 1); + } + else + { + if (size < -1) + return 0; + return mpl <= ~((~(unsigned long int) 0) >> 1); + } +} diff --git a/rts/gmp/mpz/fits_sshort_p.c b/rts/gmp/mpz/fits_sshort_p.c new file mode 100644 index 0000000000..5b8e31afae --- /dev/null +++ b/rts/gmp/mpz/fits_sshort_p.c @@ -0,0 +1,50 @@ +/* int mpz_fits_X_p (mpz_t src) -- Return whether src fits the C type X. + +Copyright (C) 1997, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +int +#if __STDC__ +mpz_fits_sshort_p (mpz_srcptr src) +#else +mpz_fits_sshort_p (src) + mpz_srcptr src; +#endif +{ + mp_size_t size; + mp_limb_t mpl; + + mpl = PTR(src)[0]; + size = SIZ(src); + if (size > 0) + { + if (size > 1) + return 0; + return mpl <= (((unsigned short int) ~(unsigned int) 0) >> 1); + } + else + { + if (size < -1) + return 0; + return mpl <= (((unsigned short int) ~(unsigned int) 0) >> 1) + 1; + } +} diff --git a/rts/gmp/mpz/fits_uint_p.c b/rts/gmp/mpz/fits_uint_p.c new file mode 100644 index 0000000000..72f62fa723 --- /dev/null +++ b/rts/gmp/mpz/fits_uint_p.c @@ -0,0 +1,41 @@ +/* int mpz_fits_X_p (mpz_t src) -- Return whether src fits the C type X. + +Copyright (C) 1997, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +int +#if __STDC__ +mpz_fits_uint_p (mpz_srcptr src) +#else +mpz_fits_uint_p (src) + mpz_srcptr src; +#endif +{ + mp_size_t size; + mp_limb_t mpl; + + mpl = PTR(src)[0]; + size = SIZ(src); + if (size < 0 || size > 1) + return 0; + return mpl <= (~(unsigned int) 0); +} diff --git a/rts/gmp/mpz/fits_ulong_p.c b/rts/gmp/mpz/fits_ulong_p.c new file mode 100644 index 0000000000..92eb42e86e --- /dev/null +++ b/rts/gmp/mpz/fits_ulong_p.c @@ -0,0 +1,41 @@ +/* int mpz_fits_X_p (mpz_t src) -- Return whether src fits the C type X. + +Copyright (C) 1997, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +int +#if __STDC__ +mpz_fits_ulong_p (mpz_srcptr src) +#else +mpz_fits_ulong_p (src) + mpz_srcptr src; +#endif +{ + mp_size_t size; + mp_limb_t mpl; + + mpl = PTR(src)[0]; + size = SIZ(src); + if (size < 0 || size > 1) + return 0; + return mpl <= (~(unsigned long int) 0); +} diff --git a/rts/gmp/mpz/fits_ushort_p.c b/rts/gmp/mpz/fits_ushort_p.c new file mode 100644 index 0000000000..bde0edae6e --- /dev/null +++ b/rts/gmp/mpz/fits_ushort_p.c @@ -0,0 +1,41 @@ +/* int mpz_fits_X_p (mpz_t src) -- Return whether src fits the C type X. + +Copyright (C) 1997, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +int +#if __STDC__ +mpz_fits_ushort_p (mpz_srcptr src) +#else +mpz_fits_ushort_p (src) + mpz_srcptr src; +#endif +{ + mp_size_t size; + mp_limb_t mpl; + + mpl = PTR(src)[0]; + size = SIZ(src); + if (size < 0 || size > 1) + return 0; + return mpl <= ((unsigned short int) ~(unsigned int) 0); +} diff --git a/rts/gmp/mpz/gcd.c b/rts/gmp/mpz/gcd.c new file mode 100644 index 0000000000..0d950dd609 --- /dev/null +++ b/rts/gmp/mpz/gcd.c @@ -0,0 +1,180 @@ +/* mpz/gcd.c: Calculate the greatest common divisor of two integers. + +Copyright (C) 1991, 1993, 1994, 1996, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" +#ifdef BERKELEY_MP +#include "mp.h" +#endif + + +#ifndef BERKELEY_MP +void +#if __STDC__ +mpz_gcd (mpz_ptr g, mpz_srcptr u, mpz_srcptr v) +#else +mpz_gcd (g, u, v) + mpz_ptr g; + mpz_srcptr u; + mpz_srcptr v; +#endif +#else /* BERKELEY_MP */ +void +#if __STDC__ +gcd (mpz_srcptr u, mpz_srcptr v, mpz_ptr g) +#else +gcd (u, v, g) + mpz_ptr g; + mpz_srcptr u; + mpz_srcptr v; +#endif +#endif /* BERKELEY_MP */ + +{ + unsigned long int g_zero_bits, u_zero_bits, v_zero_bits; + mp_size_t g_zero_limbs, u_zero_limbs, v_zero_limbs; + mp_ptr tp; + mp_ptr up = u->_mp_d; + mp_size_t usize = ABS (u->_mp_size); + mp_ptr vp = v->_mp_d; + mp_size_t vsize = ABS (v->_mp_size); + mp_size_t gsize; + TMP_DECL (marker); + + /* GCD(0, V) == V. */ + if (usize == 0) + { + g->_mp_size = vsize; + if (g == v) + return; + if (g->_mp_alloc < vsize) + _mpz_realloc (g, vsize); + MPN_COPY (g->_mp_d, vp, vsize); + return; + } + + /* GCD(U, 0) == U. */ + if (vsize == 0) + { + g->_mp_size = usize; + if (g == u) + return; + if (g->_mp_alloc < usize) + _mpz_realloc (g, usize); + MPN_COPY (g->_mp_d, up, usize); + return; + } + + if (usize == 1) + { + g->_mp_size = 1; + g->_mp_d[0] = mpn_gcd_1 (vp, vsize, up[0]); + return; + } + + if (vsize == 1) + { + g->_mp_size = 1; + g->_mp_d[0] = mpn_gcd_1 (up, usize, vp[0]); + return; + } + + TMP_MARK (marker); + + /* Eliminate low zero bits from U and V and move to temporary storage. */ + while (*up == 0) + up++; + u_zero_limbs = up - u->_mp_d; + usize -= u_zero_limbs; + count_trailing_zeros (u_zero_bits, *up); + tp = up; + up = (mp_ptr) TMP_ALLOC (usize * BYTES_PER_MP_LIMB); + if (u_zero_bits != 0) + { + mpn_rshift (up, tp, usize, u_zero_bits); + usize -= up[usize - 1] == 0; + } + else + MPN_COPY (up, tp, usize); + + while (*vp == 0) + vp++; + v_zero_limbs = vp - v->_mp_d; + vsize -= v_zero_limbs; + count_trailing_zeros (v_zero_bits, *vp); + tp = vp; + vp = (mp_ptr) TMP_ALLOC (vsize * BYTES_PER_MP_LIMB); + if (v_zero_bits != 0) + { + mpn_rshift (vp, tp, vsize, v_zero_bits); + vsize -= vp[vsize - 1] == 0; + } + else + MPN_COPY (vp, tp, vsize); + + if (u_zero_limbs > v_zero_limbs) + { + g_zero_limbs = v_zero_limbs; + g_zero_bits = v_zero_bits; + } + else if (u_zero_limbs < v_zero_limbs) + { + g_zero_limbs = u_zero_limbs; + g_zero_bits = u_zero_bits; + } + else /* Equal. */ + { + g_zero_limbs = u_zero_limbs; + g_zero_bits = MIN (u_zero_bits, v_zero_bits); + } + + /* Call mpn_gcd. The 2nd argument must not have more bits than the 1st. */ + vsize = (usize < vsize || (usize == vsize && up[usize-1] < vp[vsize-1])) + ? mpn_gcd (vp, vp, vsize, up, usize) + : mpn_gcd (vp, up, usize, vp, vsize); + + /* Here G <-- V << (g_zero_limbs*BITS_PER_MP_LIMB + g_zero_bits). */ + gsize = vsize + g_zero_limbs; + if (g_zero_bits != 0) + { + mp_limb_t cy_limb; + gsize += (vp[vsize - 1] >> (BITS_PER_MP_LIMB - g_zero_bits)) != 0; + if (g->_mp_alloc < gsize) + _mpz_realloc (g, gsize); + MPN_ZERO (g->_mp_d, g_zero_limbs); + + tp = g->_mp_d + g_zero_limbs; + cy_limb = mpn_lshift (tp, vp, vsize, g_zero_bits); + if (cy_limb != 0) + tp[vsize] = cy_limb; + } + else + { + if (g->_mp_alloc < gsize) + _mpz_realloc (g, gsize); + MPN_ZERO (g->_mp_d, g_zero_limbs); + MPN_COPY (g->_mp_d + g_zero_limbs, vp, vsize); + } + + g->_mp_size = gsize; + TMP_FREE (marker); +} diff --git a/rts/gmp/mpz/gcd_ui.c b/rts/gmp/mpz/gcd_ui.c new file mode 100644 index 0000000000..f3bec58829 --- /dev/null +++ b/rts/gmp/mpz/gcd_ui.c @@ -0,0 +1,65 @@ +/* mpz_gcd_ui -- Calculate the greatest common divisior of two integers. + +Copyright (C) 1994, 1996, 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include <stdio.h> /* for NULL */ +#include "gmp.h" +#include "gmp-impl.h" + +unsigned long int +#if __STDC__ +mpz_gcd_ui (mpz_ptr w, mpz_srcptr u, unsigned long int v) +#else +mpz_gcd_ui (w, u, v) + mpz_ptr w; + mpz_srcptr u; + unsigned long int v; +#endif +{ + mp_size_t size; + mp_limb_t res; + + size = ABS (u->_mp_size); + + if (size == 0) + res = v; + else if (v == 0) + { + if (w != NULL && u != w) + { + if (w->_mp_alloc < size) + _mpz_realloc (w, size); + + MPN_COPY (w->_mp_d, u->_mp_d, size); + } + w->_mp_size = size; + /* We can't return any useful result for gcd(big,0). */ + return size > 1 ? 0 : w->_mp_d[0]; + } + else + res = mpn_gcd_1 (u->_mp_d, size, (mp_limb_t) v); + + if (w != NULL) + { + w->_mp_d[0] = res; + w->_mp_size = 1; + } + return res; +} diff --git a/rts/gmp/mpz/gcdext.c b/rts/gmp/mpz/gcdext.c new file mode 100644 index 0000000000..3ba04c84ff --- /dev/null +++ b/rts/gmp/mpz/gcdext.c @@ -0,0 +1,137 @@ +/* mpz_gcdext(g, s, t, a, b) -- Set G to gcd(a, b), and S and T such that + g = as + bt. + +Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 2000 Free Software +Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include <stdio.h> /* for NULL */ +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_gcdext (mpz_ptr g, mpz_ptr s, mpz_ptr t, mpz_srcptr a, mpz_srcptr b) +#else +mpz_gcdext (g, s, t, a, b) + mpz_ptr g; + mpz_ptr s; + mpz_ptr t; + mpz_srcptr a; + mpz_srcptr b; +#endif +{ + mp_size_t asize, bsize, usize, vsize; + mp_srcptr ap, bp; + mp_ptr up, vp; + mp_size_t gsize, ssize, tmp_ssize; + mp_ptr gp, sp, tmp_gp, tmp_sp; + mpz_srcptr u, v; + mpz_ptr ss, tt; + __mpz_struct stmp, gtmp; + TMP_DECL (marker); + + TMP_MARK (marker); + + /* mpn_gcdext requires that U >= V. Therefore, we often have to swap U and + V. This in turn leads to a lot of complications. The computed cofactor + will be the wrong one, so we have to fix that up at the end. */ + + asize = ABS (SIZ (a)); + bsize = ABS (SIZ (b)); + ap = PTR (a); + bp = PTR (b); + if (asize > bsize || (asize == bsize && mpn_cmp (ap, bp, asize) > 0)) + { + usize = asize; + vsize = bsize; + up = (mp_ptr) TMP_ALLOC ((usize + 1) * BYTES_PER_MP_LIMB); + vp = (mp_ptr) TMP_ALLOC ((vsize + 1) * BYTES_PER_MP_LIMB); + MPN_COPY (up, ap, usize); + MPN_COPY (vp, bp, vsize); + u = a; + v = b; + ss = s; + tt = t; + } + else + { + usize = bsize; + vsize = asize; + up = (mp_ptr) TMP_ALLOC ((usize + 1) * BYTES_PER_MP_LIMB); + vp = (mp_ptr) TMP_ALLOC ((vsize + 1) * BYTES_PER_MP_LIMB); + MPN_COPY (up, bp, usize); + MPN_COPY (vp, ap, vsize); + u = b; + v = a; + ss = t; + tt = s; + } + + tmp_gp = (mp_ptr) TMP_ALLOC ((usize + 1) * BYTES_PER_MP_LIMB); + tmp_sp = (mp_ptr) TMP_ALLOC ((usize + 1) * BYTES_PER_MP_LIMB); + + if (vsize == 0) + { + tmp_sp[0] = 1; + tmp_ssize = 1; + MPN_COPY (tmp_gp, up, usize); + gsize = usize; + } + else + gsize = mpn_gcdext (tmp_gp, tmp_sp, &tmp_ssize, up, usize, vp, vsize); + ssize = ABS (tmp_ssize); + + PTR (>mp) = tmp_gp; + SIZ (>mp) = gsize; + + PTR (&stmp) = tmp_sp; + SIZ (&stmp) = (tmp_ssize ^ SIZ (u)) >= 0 ? ssize : -ssize; + + if (tt != NULL) + { + if (SIZ (v) == 0) + SIZ (tt) = 0; + else + { + mpz_t x; + MPZ_TMP_INIT (x, ssize + usize + 1); + mpz_mul (x, &stmp, u); + mpz_sub (x, >mp, x); + mpz_tdiv_q (tt, x, v); + } + } + + if (ss != NULL) + { + if (ALLOC (ss) < ssize) + _mpz_realloc (ss, ssize); + sp = PTR (ss); + MPN_COPY (sp, tmp_sp, ssize); + SIZ (ss) = SIZ (&stmp); + } + + if (ALLOC (g) < gsize) + _mpz_realloc (g, gsize); + gp = PTR (g); + MPN_COPY (gp, tmp_gp, gsize); + SIZ (g) = gsize; + + TMP_FREE (marker); +} diff --git a/rts/gmp/mpz/get_d.c b/rts/gmp/mpz/get_d.c new file mode 100644 index 0000000000..6a7c5856bb --- /dev/null +++ b/rts/gmp/mpz/get_d.c @@ -0,0 +1,128 @@ +/* double mpz_get_d (mpz_t src) -- Return the double approximation to SRC. + +Copyright (C) 1996, 1997, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + + +static int +#if __STDC__ +mpn_zero_p (mp_ptr p, mp_size_t n) +#else +mpn_zero_p (p, n) + mp_ptr p; + mp_size_t n; +#endif +{ + mp_size_t i; + + for (i = 0; i < n; i++) + { + if (p[i] != 0) + return 0; + } + + return 1; +} + + +double +#if __STDC__ +mpz_get_d (mpz_srcptr src) +#else +mpz_get_d (src) + mpz_srcptr src; +#endif +{ + double res; + mp_size_t size; + int negative; + mp_ptr qp; + mp_limb_t hz, lz; + int cnt; + + size = SIZ(src); + if (size == 0) + return 0.0; + + negative = size < 0; + size = ABS (size); + qp = PTR(src); + + if (size == 1) + { + res = qp[size - 1]; + } + else if (size == 2) + { + res = MP_BASE_AS_DOUBLE * qp[size - 1] + qp[size - 2]; + } + else + { + count_leading_zeros (cnt, qp[size - 1]); + +#if BITS_PER_MP_LIMB == 32 + if (cnt == 0) + { + hz = qp[size - 1]; + lz = qp[size - 2]; + } + else + { + hz = (qp[size - 1] << cnt) | (qp[size - 2] >> BITS_PER_MP_LIMB - cnt); + lz = (qp[size - 2] << cnt) | (qp[size - 3] >> BITS_PER_MP_LIMB - cnt); + } +#if _GMP_IEEE_FLOATS + /* Take bits from less significant limbs, but only if they may affect + the result. */ + if ((lz & 0x7ff) == 0x400) + { + if (cnt != 0) + lz += ((qp[size - 3] << cnt) != 0 || ! mpn_zero_p (qp, size - 3)); + else + lz += (! mpn_zero_p (qp, size - 2)); + } +#endif + res = MP_BASE_AS_DOUBLE * hz + lz; + res = __gmp_scale2 (res, (size - 2) * BITS_PER_MP_LIMB - cnt); +#endif +#if BITS_PER_MP_LIMB == 64 + if (cnt == 0) + hz = qp[size - 1]; + else + hz = (qp[size - 1] << cnt) | (qp[size - 2] >> BITS_PER_MP_LIMB - cnt); +#if _GMP_IEEE_FLOATS + if ((hz & 0x7ff) == 0x400) + { + if (cnt != 0) + hz += ((qp[size - 2] << cnt) != 0 || ! mpn_zero_p (qp, size - 2)); + else + hz += (! mpn_zero_p (qp, size - 1)); + } +#endif + res = hz; + res = __gmp_scale2 (res, (size - 1) * BITS_PER_MP_LIMB - cnt); +#endif + } + + return negative ? -res : res; +} diff --git a/rts/gmp/mpz/get_si.c b/rts/gmp/mpz/get_si.c new file mode 100644 index 0000000000..8a5d0e4803 --- /dev/null +++ b/rts/gmp/mpz/get_si.c @@ -0,0 +1,43 @@ +/* mpz_get_si(integer) -- Return the least significant digit from INTEGER. + +Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +signed long int +#if __STDC__ +mpz_get_si (mpz_srcptr op) +#else +mpz_get_si (op) + mpz_srcptr op; +#endif +{ + mp_size_t size = op->_mp_size; + mp_limb_t low_limb = op->_mp_d[0]; + + if (size > 0) + return low_limb % ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1)); + else if (size < 0) + /* This convoluted expression is necessary to properly handle 0x80000000 */ + return ~((low_limb - 1) % ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1))); + else + return 0; +} diff --git a/rts/gmp/mpz/get_str.c b/rts/gmp/mpz/get_str.c new file mode 100644 index 0000000000..c7278afb52 --- /dev/null +++ b/rts/gmp/mpz/get_str.c @@ -0,0 +1,118 @@ +/* mpz_get_str (string, base, mp_src) -- Convert the multiple precision + number MP_SRC to a string STRING of base BASE. If STRING is NULL + allocate space for the result. In any case, return a pointer to the + result. If STRING is not NULL, the caller must ensure enough space is + available to store the result. + +Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +char * +#if __STDC__ +mpz_get_str (char *res_str, int base, mpz_srcptr x) +#else +mpz_get_str (res_str, base, x) + char *res_str; + int base; + mpz_srcptr x; +#endif +{ + mp_ptr xp; + mp_size_t x_size = x->_mp_size; + unsigned char *str; + char *return_str; + size_t str_size; + char *num_to_text; + int i; + TMP_DECL (marker); + + TMP_MARK (marker); + if (base >= 0) + { + if (base == 0) + base = 10; + num_to_text = "0123456789abcdefghijklmnopqrstuvwxyz"; + } + else + { + base = -base; + num_to_text = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + } + + /* We allways allocate space for the string. If the caller passed a + NULL pointer for RES_STR, we allocate permanent space and return + a pointer to that to the caller. */ + str_size = ((size_t) (ABS (x_size) * BITS_PER_MP_LIMB + * __mp_bases[base].chars_per_bit_exactly)) + 3; + if (res_str == 0) + { + /* We didn't get a string from the user. Allocate one (and return + a pointer to it). */ + res_str = (char *) (*_mp_allocate_func) (str_size); + /* Make str, the variable used for raw result from mpn_get_str, + point to the same string, but just after a possible minus sign. */ + str = (unsigned char *) res_str + 1; + } + else + { + /* Use TMP_ALLOC to get temporary space, since we need a few extra bytes + that we can't expect to caller to supply us with. */ + str = (unsigned char *) TMP_ALLOC (str_size); + } + + return_str = res_str; + + if (x_size == 0) + { + res_str[0] = '0'; + res_str[1] = 0; + TMP_FREE (marker); + return res_str; + } + if (x_size < 0) + { + *res_str++ = '-'; + x_size = -x_size; + } + + /* Move the number to convert into temporary space, since mpn_get_str + clobbers its argument + needs one extra high limb.... */ + xp = (mp_ptr) TMP_ALLOC ((x_size + 1) * BYTES_PER_MP_LIMB); + MPN_COPY (xp, x->_mp_d, x_size); + + str_size = mpn_get_str (str, base, xp, x_size); + + /* mpn_get_str might make some leading zeros. Skip them. */ + while (*str == 0) + { + str_size--; + str++; + } + + /* Translate result to printable chars and move result to RES_STR. */ + for (i = 0; i < str_size; i++) + res_str[i] = num_to_text[str[i]]; + res_str[str_size] = 0; + + TMP_FREE (marker); + return return_str; +} diff --git a/rts/gmp/mpz/get_ui.c b/rts/gmp/mpz/get_ui.c new file mode 100644 index 0000000000..a8ec9e01a4 --- /dev/null +++ b/rts/gmp/mpz/get_ui.c @@ -0,0 +1,37 @@ +/* mpz_get_ui(integer) -- Return the least significant digit from INTEGER. + +Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +unsigned long int +#if __STDC__ +mpz_get_ui (mpz_srcptr integer) +#else +mpz_get_ui (integer) + mpz_srcptr integer; +#endif +{ + if (integer->_mp_size == 0) + return 0; + else + return integer->_mp_d[0]; +} diff --git a/rts/gmp/mpz/getlimbn.c b/rts/gmp/mpz/getlimbn.c new file mode 100644 index 0000000000..b772ed05c4 --- /dev/null +++ b/rts/gmp/mpz/getlimbn.c @@ -0,0 +1,38 @@ +/* mpz_getlimbn(integer,n) -- Return the N:th limb from INTEGER. + +Copyright (C) 1993, 1994, 1995, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +mp_limb_t +#if __STDC__ +mpz_getlimbn (mpz_srcptr integer, mp_size_t n) +#else +mpz_getlimbn (integer, n) + mpz_srcptr integer; + mp_size_t n; +#endif +{ + if (ABS (integer->_mp_size) <= n || n < 0) + return 0; + else + return integer->_mp_d[n]; +} diff --git a/rts/gmp/mpz/hamdist.c b/rts/gmp/mpz/hamdist.c new file mode 100644 index 0000000000..b039a653d2 --- /dev/null +++ b/rts/gmp/mpz/hamdist.c @@ -0,0 +1,62 @@ +/* mpz_hamdist(mpz_ptr op1, mpz_ptr op2) -- Compute the hamming distance + between OP1 and OP2. If one of the operands is negative, return ~0. (We + could make the function well-defined when both operands are negative, but + that would probably not be worth the trouble. + +Copyright (C) 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +unsigned long int +#if __STDC__ +mpz_hamdist (mpz_srcptr u, mpz_srcptr v) +#else +mpz_hamdist (u, v) + mpz_srcptr u; + mpz_srcptr v; +#endif +{ + mp_srcptr up, vp; + mp_size_t usize, vsize, size; + unsigned long int count; + + usize = u->_mp_size; + vsize = v->_mp_size; + + if ((usize | vsize) < 0) + return ~ (unsigned long int) 0; + + up = u->_mp_d; + vp = v->_mp_d; + + if (usize > vsize) + { + count = mpn_popcount (up + vsize, usize - vsize); + size = vsize; + } + else + { + count = mpn_popcount (vp + usize, vsize - usize); + size = usize; + } + + return count + mpn_hamdist (up, vp, size); +} diff --git a/rts/gmp/mpz/init.c b/rts/gmp/mpz/init.c new file mode 100644 index 0000000000..2e8e4d2cbd --- /dev/null +++ b/rts/gmp/mpz/init.c @@ -0,0 +1,36 @@ +/* mpz_init() -- Make a new multiple precision number with value 0. + +Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_init (mpz_ptr x) +#else +mpz_init (x) + mpz_ptr x; +#endif +{ + x->_mp_alloc = 1; + x->_mp_d = (mp_ptr) (*_mp_allocate_func) (BYTES_PER_MP_LIMB); + x->_mp_size = 0; +} diff --git a/rts/gmp/mpz/inp_raw.c b/rts/gmp/mpz/inp_raw.c new file mode 100644 index 0000000000..15e601229d --- /dev/null +++ b/rts/gmp/mpz/inp_raw.c @@ -0,0 +1,101 @@ +/* mpz_inp_raw -- Input a mpz_t in raw, but endianess, and wordsize + independent format (as output by mpz_out_raw). + +Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include <stdio.h> + +#include "gmp.h" +#include "gmp-impl.h" + +size_t +#if __STDC__ +mpz_inp_raw (mpz_ptr x, FILE *stream) +#else +mpz_inp_raw (x, stream) + mpz_ptr x; + FILE *stream; +#endif +{ + int i; + mp_size_t s; + mp_size_t xsize; + mp_ptr xp; + unsigned int c; + mp_limb_t x_limb; + mp_size_t in_bytesize; + int neg_flag; + + if (stream == 0) + stream = stdin; + + /* Read 4-byte size */ + in_bytesize = 0; + for (i = 4 - 1; i >= 0; i--) + { + c = fgetc (stream); + in_bytesize = (in_bytesize << BITS_PER_CHAR) | c; + } + + /* Size is stored as a 32 bit word; sign extend in_bytesize for non-32 bit + machines. */ + if (sizeof (mp_size_t) > 4) + in_bytesize |= (-(in_bytesize < 0)) << 31; + + neg_flag = in_bytesize < 0; + in_bytesize = ABS (in_bytesize); + xsize = (in_bytesize + BYTES_PER_MP_LIMB - 1) / BYTES_PER_MP_LIMB; + + if (xsize == 0) + { + x->_mp_size = 0; + return 4; /* we've read 4 bytes */ + } + + if (x->_mp_alloc < xsize) + _mpz_realloc (x, xsize); + xp = x->_mp_d; + + x_limb = 0; + for (i = (in_bytesize - 1) % BYTES_PER_MP_LIMB; i >= 0; i--) + { + c = fgetc (stream); + x_limb = (x_limb << BITS_PER_CHAR) | c; + } + xp[xsize - 1] = x_limb; + + for (s = xsize - 2; s >= 0; s--) + { + x_limb = 0; + for (i = BYTES_PER_MP_LIMB - 1; i >= 0; i--) + { + c = fgetc (stream); + x_limb = (x_limb << BITS_PER_CHAR) | c; + } + xp[s] = x_limb; + } + + if (c == EOF) + return 0; /* error */ + + MPN_NORMALIZE (xp, xsize); + x->_mp_size = neg_flag ? -xsize : xsize; + return in_bytesize + 4; +} diff --git a/rts/gmp/mpz/inp_str.c b/rts/gmp/mpz/inp_str.c new file mode 100644 index 0000000000..7aa5e1fc30 --- /dev/null +++ b/rts/gmp/mpz/inp_str.c @@ -0,0 +1,167 @@ +/* mpz_inp_str(dest_integer, stream, base) -- Input a number in base + BASE from stdio stream STREAM and store the result in DEST_INTEGER. + +Copyright (C) 1991, 1993, 1994, 1996, 1998, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include <stdio.h> +#include <ctype.h> +#include "gmp.h" +#include "gmp-impl.h" + +static int +#if __STDC__ +digit_value_in_base (int c, int base) +#else +digit_value_in_base (c, base) + int c; + int base; +#endif +{ + int digit; + + if (isdigit (c)) + digit = c - '0'; + else if (islower (c)) + digit = c - 'a' + 10; + else if (isupper (c)) + digit = c - 'A' + 10; + else + return -1; + + if (digit < base) + return digit; + return -1; +} + +size_t +#if __STDC__ +mpz_inp_str (mpz_ptr x, FILE *stream, int base) +#else +mpz_inp_str (x, stream, base) + mpz_ptr x; + FILE *stream; + int base; +#endif +{ + char *str; + size_t alloc_size, str_size; + int c; + int negative; + mp_size_t xsize; + size_t nread; + + if (stream == 0) + stream = stdin; + + nread = 0; + + /* Skip whitespace. */ + do + { + c = getc (stream); + nread++; + } + while (isspace (c)); + + negative = 0; + if (c == '-') + { + negative = 1; + c = getc (stream); + nread++; + } + + if (digit_value_in_base (c, base == 0 ? 10 : base) < 0) + return 0; /* error if no digits */ + + /* If BASE is 0, try to find out the base by looking at the initial + characters. */ + if (base == 0) + { + base = 10; + if (c == '0') + { + base = 8; + c = getc (stream); + nread++; + if (c == 'x' || c == 'X') + { + base = 16; + c = getc (stream); + nread++; + } + else if (c == 'b' || c == 'B') + { + base = 2; + c = getc (stream); + nread++; + } + } + } + + /* Skip leading zeros. */ + while (c == '0') + { + c = getc (stream); + nread++; + } + + alloc_size = 100; + str = (char *) (*_mp_allocate_func) (alloc_size); + str_size = 0; + + for (;;) + { + int dig; + if (str_size >= alloc_size) + { + size_t old_alloc_size = alloc_size; + alloc_size = alloc_size * 3 / 2; + str = (char *) (*_mp_reallocate_func) (str, old_alloc_size, alloc_size); + } + dig = digit_value_in_base (c, base); + if (dig < 0) + break; + str[str_size++] = dig; + c = getc (stream); + } + + ungetc (c, stream); + + /* Make sure the string is not empty, mpn_set_str would fail. */ + if (str_size == 0) + { + x->_mp_size = 0; + (*_mp_free_func) (str, alloc_size); + return nread; + } + + xsize = (((mp_size_t) (str_size / __mp_bases[base].chars_per_bit_exactly)) + / BITS_PER_MP_LIMB + 2); + if (x->_mp_alloc < xsize) + _mpz_realloc (x, xsize); + + /* Convert the byte array in base BASE to our bignum format. */ + xsize = mpn_set_str (x->_mp_d, (unsigned char *) str, str_size, base); + x->_mp_size = negative ? -xsize : xsize; + + (*_mp_free_func) (str, alloc_size); + return str_size + nread; +} diff --git a/rts/gmp/mpz/invert.c b/rts/gmp/mpz/invert.c new file mode 100644 index 0000000000..749a0969fc --- /dev/null +++ b/rts/gmp/mpz/invert.c @@ -0,0 +1,77 @@ +/* mpz_invert (inv, x, n). Find multiplicative inverse of X in Z(N). + If X has an inverse, return non-zero and store inverse in INVERSE, + otherwise, return 0 and put garbage in INVERSE. + +Copyright (C) 1996, 1997, 1998, 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +int +#if __STDC__ +mpz_invert (mpz_ptr inverse, mpz_srcptr x, mpz_srcptr n) +#else +mpz_invert (inverse, x, n) + mpz_ptr inverse; + mpz_srcptr x, n; +#endif +{ + mpz_t gcd, tmp; + mp_size_t xsize, nsize, size; + TMP_DECL (marker); + + xsize = SIZ (x); + nsize = SIZ (n); + xsize = ABS (xsize); + nsize = ABS (nsize); + size = MAX (xsize, nsize) + 1; + + /* No inverse exists if the leftside operand is 0. Likewise, no + inverse exists if the mod operand is 1. */ + if (xsize == 0 || (nsize == 1 && (PTR (n))[0] == 1)) + return 0; + + TMP_MARK (marker); + + MPZ_TMP_INIT (gcd, size); + MPZ_TMP_INIT (tmp, size); + mpz_gcdext (gcd, tmp, (mpz_ptr) 0, x, n); + + /* If no inverse existed, return with an indication of that. */ + if (gcd->_mp_size != 1 || (gcd->_mp_d)[0] != 1) + { + TMP_FREE (marker); + return 0; + } + + /* Make sure we return a positive inverse. */ + if (SIZ (tmp) < 0) + { + if (SIZ (n) < 0) + mpz_sub (inverse, tmp, n); + else + mpz_add (inverse, tmp, n); + } + else + mpz_set (inverse, tmp); + + TMP_FREE (marker); + return 1; +} diff --git a/rts/gmp/mpz/ior.c b/rts/gmp/mpz/ior.c new file mode 100644 index 0000000000..0bb5a806dc --- /dev/null +++ b/rts/gmp/mpz/ior.c @@ -0,0 +1,244 @@ +/* mpz_ior -- Logical inclusive or. + +Copyright (C) 1991, 1993, 1994, 1996, 1997, 2000 Free Software Foundation, +Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_ior (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2) +#else +mpz_ior (res, op1, op2) + mpz_ptr res; + mpz_srcptr op1; + mpz_srcptr op2; +#endif +{ + mp_srcptr op1_ptr, op2_ptr; + mp_size_t op1_size, op2_size; + mp_ptr res_ptr; + mp_size_t res_size; + mp_size_t i; + TMP_DECL (marker); + + TMP_MARK (marker); + op1_size = op1->_mp_size; + op2_size = op2->_mp_size; + + op1_ptr = op1->_mp_d; + op2_ptr = op2->_mp_d; + res_ptr = res->_mp_d; + + if (op1_size >= 0) + { + if (op2_size >= 0) + { + if (op1_size >= op2_size) + { + if (res->_mp_alloc < op1_size) + { + _mpz_realloc (res, op1_size); + op1_ptr = op1->_mp_d; + op2_ptr = op2->_mp_d; + res_ptr = res->_mp_d; + } + + if (res_ptr != op1_ptr) + MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size, + op1_size - op2_size); + for (i = op2_size - 1; i >= 0; i--) + res_ptr[i] = op1_ptr[i] | op2_ptr[i]; + res_size = op1_size; + } + else + { + if (res->_mp_alloc < op2_size) + { + _mpz_realloc (res, op2_size); + op1_ptr = op1->_mp_d; + op2_ptr = op2->_mp_d; + res_ptr = res->_mp_d; + } + + if (res_ptr != op2_ptr) + MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size, + op2_size - op1_size); + for (i = op1_size - 1; i >= 0; i--) + res_ptr[i] = op1_ptr[i] | op2_ptr[i]; + res_size = op2_size; + } + + res->_mp_size = res_size; + return; + } + else /* op2_size < 0 */ + { + /* Fall through to the code at the end of the function. */ + } + } + else + { + if (op2_size < 0) + { + mp_ptr opx; + mp_limb_t cy; + + /* Both operands are negative, so will be the result. + -((-OP1) | (-OP2)) = -(~(OP1 - 1) | ~(OP2 - 1)) = + = ~(~(OP1 - 1) | ~(OP2 - 1)) + 1 = + = ((OP1 - 1) & (OP2 - 1)) + 1 */ + + op1_size = -op1_size; + op2_size = -op2_size; + + res_size = MIN (op1_size, op2_size); + + /* Possible optimization: Decrease mpn_sub precision, + as we won't use the entire res of both. */ + opx = (mp_ptr) TMP_ALLOC (res_size * BYTES_PER_MP_LIMB); + mpn_sub_1 (opx, op1_ptr, res_size, (mp_limb_t) 1); + op1_ptr = opx; + + opx = (mp_ptr) TMP_ALLOC (res_size * BYTES_PER_MP_LIMB); + mpn_sub_1 (opx, op2_ptr, res_size, (mp_limb_t) 1); + op2_ptr = opx; + + if (res->_mp_alloc < res_size) + { + _mpz_realloc (res, res_size); + res_ptr = res->_mp_d; + /* Don't re-read OP1_PTR and OP2_PTR. They point to + temporary space--never to the space RES->_mp_d used + to point to before reallocation. */ + } + + /* First loop finds the size of the result. */ + for (i = res_size - 1; i >= 0; i--) + if ((op1_ptr[i] & op2_ptr[i]) != 0) + break; + res_size = i + 1; + + if (res_size != 0) + { + /* Second loop computes the real result. */ + for (i = res_size - 1; i >= 0; i--) + res_ptr[i] = op1_ptr[i] & op2_ptr[i]; + + cy = mpn_add_1 (res_ptr, res_ptr, res_size, (mp_limb_t) 1); + if (cy) + { + res_ptr[res_size] = cy; + res_size++; + } + } + else + { + res_ptr[0] = 1; + res_size = 1; + } + + res->_mp_size = -res_size; + TMP_FREE (marker); + return; + } + else + { + /* We should compute -OP1 | OP2. Swap OP1 and OP2 and fall + through to the code that handles OP1 | -OP2. */ + MPZ_SRCPTR_SWAP (op1, op2); + MPN_SRCPTR_SWAP (op1_ptr,op1_size, op2_ptr,op2_size); + } + } + + { + mp_ptr opx; + mp_limb_t cy; + mp_size_t res_alloc; + mp_size_t count; + + /* Operand 2 negative, so will be the result. + -(OP1 | (-OP2)) = -(OP1 | ~(OP2 - 1)) = + = ~(OP1 | ~(OP2 - 1)) + 1 = + = (~OP1 & (OP2 - 1)) + 1 */ + + op2_size = -op2_size; + + res_alloc = op2_size; + + opx = (mp_ptr) TMP_ALLOC (op2_size * BYTES_PER_MP_LIMB); + mpn_sub_1 (opx, op2_ptr, op2_size, (mp_limb_t) 1); + op2_ptr = opx; + op2_size -= op2_ptr[op2_size - 1] == 0; + + if (res->_mp_alloc < res_alloc) + { + _mpz_realloc (res, res_alloc); + op1_ptr = op1->_mp_d; + res_ptr = res->_mp_d; + /* Don't re-read OP2_PTR. It points to temporary space--never + to the space RES->_mp_d used to point to before reallocation. */ + } + + if (op1_size >= op2_size) + { + /* We can just ignore the part of OP1 that stretches above OP2, + because the result limbs are zero there. */ + + /* First loop finds the size of the result. */ + for (i = op2_size - 1; i >= 0; i--) + if ((~op1_ptr[i] & op2_ptr[i]) != 0) + break; + res_size = i + 1; + count = res_size; + } + else + { + res_size = op2_size; + + /* Copy the part of OP2 that stretches above OP1, to RES. */ + MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size, op2_size - op1_size); + count = op1_size; + } + + if (res_size != 0) + { + /* Second loop computes the real result. */ + for (i = count - 1; i >= 0; i--) + res_ptr[i] = ~op1_ptr[i] & op2_ptr[i]; + + cy = mpn_add_1 (res_ptr, res_ptr, res_size, (mp_limb_t) 1); + if (cy) + { + res_ptr[res_size] = cy; + res_size++; + } + } + else + { + res_ptr[0] = 1; + res_size = 1; + } + + res->_mp_size = -res_size; + } + TMP_FREE (marker); +} diff --git a/rts/gmp/mpz/iset.c b/rts/gmp/mpz/iset.c new file mode 100644 index 0000000000..114bc2d542 --- /dev/null +++ b/rts/gmp/mpz/iset.c @@ -0,0 +1,49 @@ +/* mpz_init_set (src_integer) -- Make a new multiple precision number with + a value copied from SRC_INTEGER. + +Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_init_set (mpz_ptr w, mpz_srcptr u) +#else +mpz_init_set (w, u) + mpz_ptr w; + mpz_srcptr u; +#endif +{ + mp_ptr wp, up; + mp_size_t usize, size; + + usize = u->_mp_size; + size = ABS (usize); + + w->_mp_alloc = MAX (size, 1); + w->_mp_d = (mp_ptr) (*_mp_allocate_func) (w->_mp_alloc * BYTES_PER_MP_LIMB); + + wp = w->_mp_d; + up = u->_mp_d; + + MPN_COPY (wp, up, size); + w->_mp_size = usize; +} diff --git a/rts/gmp/mpz/iset_d.c b/rts/gmp/mpz/iset_d.c new file mode 100644 index 0000000000..502a8933e2 --- /dev/null +++ b/rts/gmp/mpz/iset_d.c @@ -0,0 +1,39 @@ +/* mpz_init_set_d(integer, val) -- Initialize and assign INTEGER with a double + value VAL. + +Copyright (C) 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_init_set_d (mpz_ptr dest, double val) +#else +mpz_init_set_d (dest, val) + mpz_ptr dest; + double val; +#endif +{ + dest->_mp_alloc = 1; + dest->_mp_d = (mp_ptr) (*_mp_allocate_func) (BYTES_PER_MP_LIMB); + dest->_mp_size = 0; + mpz_set_d (dest, val); +} diff --git a/rts/gmp/mpz/iset_si.c b/rts/gmp/mpz/iset_si.c new file mode 100644 index 0000000000..842db140ef --- /dev/null +++ b/rts/gmp/mpz/iset_si.c @@ -0,0 +1,49 @@ +/* mpz_init_set_si(val) -- Make a new multiple precision number with + value val. + +Copyright (C) 1991, 1993, 1994, 1995, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_init_set_si (mpz_ptr x, signed long int val) +#else +mpz_init_set_si (x, val) + mpz_ptr x; + signed long int val; +#endif +{ + x->_mp_alloc = 1; + x->_mp_d = (mp_ptr) (*_mp_allocate_func) (BYTES_PER_MP_LIMB); + if (val > 0) + { + x->_mp_d[0] = val; + x->_mp_size = 1; + } + else if (val < 0) + { + x->_mp_d[0] = (unsigned long) -val; + x->_mp_size = -1; + } + else + x->_mp_size = 0; +} diff --git a/rts/gmp/mpz/iset_str.c b/rts/gmp/mpz/iset_str.c new file mode 100644 index 0000000000..dfb8c6b230 --- /dev/null +++ b/rts/gmp/mpz/iset_str.c @@ -0,0 +1,47 @@ +/* mpz_init_set_str(string, base) -- Convert the \0-terminated string + STRING in base BASE to a multiple precision integer. Return a MP_INT + structure representing the integer. Allow white space in the + string. If BASE == 0 determine the base in the C standard way, + i.e. 0xhh...h means base 16, 0oo...o means base 8, otherwise + assume base 10. + +Copyright (C) 1991, 1993, 1994, 1995, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +int +#if __STDC__ +mpz_init_set_str (mpz_ptr x, const char *str, int base) +#else +mpz_init_set_str (x, str, base) + mpz_ptr x; + const char *str; + int base; +#endif +{ + x->_mp_alloc = 1; + x->_mp_d = (mp_ptr) (*_mp_allocate_func) (BYTES_PER_MP_LIMB); + + /* if str has no digits mpz_set_str leaves x->_mp_size unset */ + x->_mp_size = 0; + + return mpz_set_str (x, str, base); +} diff --git a/rts/gmp/mpz/iset_ui.c b/rts/gmp/mpz/iset_ui.c new file mode 100644 index 0000000000..759182c556 --- /dev/null +++ b/rts/gmp/mpz/iset_ui.c @@ -0,0 +1,39 @@ +/* mpz_init_set_ui(val) -- Make a new multiple precision number with + value val. + +Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_init_set_ui (mpz_ptr x, unsigned long int val) +#else +mpz_init_set_ui (x, val) + mpz_ptr x; + unsigned long int val; +#endif +{ + x->_mp_alloc = 1; + x->_mp_d = (mp_ptr) (*_mp_allocate_func) (BYTES_PER_MP_LIMB); + x->_mp_d[0] = val; + x->_mp_size = val != 0; +} diff --git a/rts/gmp/mpz/jacobi.c b/rts/gmp/mpz/jacobi.c new file mode 100644 index 0000000000..9d49e1d0c6 --- /dev/null +++ b/rts/gmp/mpz/jacobi.c @@ -0,0 +1,53 @@ +/* mpz_jacobi (op1, op2). + Contributed by Bennet Yee (bsy) at Carnegie-Mellon University + +Copyright (C) 1991, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" + +/* Precondition: both p and q are positive */ + +int +#if __STDC__ +mpz_jacobi (mpz_srcptr pi, mpz_srcptr qi) +#else +mpz_jacobi (pi, qi) + mpz_srcptr pi, qi; +#endif +{ +#if GCDCHECK + int retval; + mpz_t gcdval; + + mpz_init (gcdval); + mpz_gcd (gcdval, pi, qi); + if (!mpz_cmp_ui (gcdval, 1L)) + { + /* J(ab,cb) = J(ab,c)J(ab,b) = J(ab,c)J(0,b) = J(ab,c)*0 */ + retval = 0; + } + else + retval = mpz_legendre (pi, qi); + mpz_clear (gcdval); + return retval; +#else + return mpz_legendre (pi, qi); +#endif +} diff --git a/rts/gmp/mpz/kronsz.c b/rts/gmp/mpz/kronsz.c new file mode 100644 index 0000000000..c8c6752224 --- /dev/null +++ b/rts/gmp/mpz/kronsz.c @@ -0,0 +1,126 @@ +/* mpz_si_kronecker -- Kronecker/Jacobi symbol. */ + +/* +Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. +*/ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + + +int +#if __STDC__ +mpz_si_kronecker (long a, mpz_srcptr b) +#else +mpz_si_kronecker (a, b) + long a; + mpz_srcptr b; +#endif +{ + int b_abs_size; + mp_srcptr b_ptr; + mp_limb_t b_low; + int twos; + int result_bit1; + + b_abs_size = ABSIZ (b); + if (b_abs_size == 0) + return JACOBI_S0 (a); /* (a/0) */ + + b_ptr = PTR(b); + b_low = b_ptr[0]; + + /* (0/b) = 1 if b=+/-1, 0 otherwise */ + if (a == 0) + return (b_abs_size == 1) & (b_low == 1); + + /* account for the effect of the sign of b, so can then ignore it */ + result_bit1 = JACOBI_BSGN_SZ_BIT1 (a, b); + + if ((b_low & 1) == 0) + { + /* b even */ + + if ((a & 1) == 0) + return 0; /* (a/b)=0 if both a,b even */ + + /* Require MP_BITS_PER_LIMB even, so that (a/2)^MP_BITS_PER_LIMB = 1, + and so that therefore there's no need to account for how many zero + limbs are stripped. */ + ASSERT ((BITS_PER_MP_LIMB & 1) == 0); + + MPN_STRIP_LOW_ZEROS_NOT_ZERO (b_ptr, b_abs_size); + b_low = b_ptr[0]; + + if ((b_low & 1) == 0) + { + /* odd a, even b */ + + mp_limb_t b_shl_bit1; + + count_trailing_zeros (twos, b_low); + + /* b_shl_bit1 is b>>twos, but with only bit 1 guaranteed */ + if (twos == BITS_PER_MP_LIMB-1) + b_shl_bit1 = (b_abs_size == 1) ? 0 : (b_ptr[1] << 1); + else + b_shl_bit1 = (b_low >> twos); + + result_bit1 ^= JACOBI_ASGN_SU_BIT1 (a, b_shl_bit1); + a = ABS(a); + + if (a == 1) + return JACOBI_BIT1_TO_PN (result_bit1); /* (1/b)=1 */ + + /* twos (a/2), reciprocity to (b/a), and (b/a) = (b mod a / b) */ + return mpn_jacobi_base (mpn_mod_1_rshift (b_ptr, b_abs_size, + twos, a), + a, + result_bit1 + ^ JACOBI_TWOS_U_BIT1 (twos, a) + ^ JACOBI_RECIP_UU_BIT1 (a, b_shl_bit1)); + } + } + + /* b odd */ + + result_bit1 ^= JACOBI_ASGN_SU_BIT1 (a, b_low); + a = ABS(a); + + /* (a/1) = 1 for any a */ + if (b_abs_size == 1 && b_low == 1) + return JACOBI_BIT1_TO_PN (result_bit1); + + /* Note a is cast to unsigned because 0x80..00 doesn't fit in a signed. */ + if ((a & 1) == 0) + { + count_trailing_zeros (twos, a); + a = ((unsigned long) a) >> twos; + result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, b_low); + } + + if (a == 1) + return JACOBI_BIT1_TO_PN (result_bit1); /* (1/b)=1 */ + + /* reciprocity to (b/a), and (b/a) == (b mod a / a) */ + return mpn_jacobi_base (mpn_mod_1 (b_ptr, b_abs_size, a), a, + result_bit1 ^ JACOBI_RECIP_UU_BIT1 (a, b_low)); +} diff --git a/rts/gmp/mpz/kronuz.c b/rts/gmp/mpz/kronuz.c new file mode 100644 index 0000000000..b877e6f64c --- /dev/null +++ b/rts/gmp/mpz/kronuz.c @@ -0,0 +1,115 @@ +/* mpz_ui_kronecker -- Kronecker/Jacobi symbol. */ + +/* +Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. +*/ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + + +int +#if __STDC__ +mpz_ui_kronecker (unsigned long a, mpz_srcptr b) +#else +mpz_ui_kronecker (a, b) + unsigned long a; + mpz_srcptr b; +#endif +{ + int b_abs_size; + mp_srcptr b_ptr; + mp_limb_t b_low; + int twos; + int result_bit1; + + /* (a/0) */ + b_abs_size = ABSIZ (b); + if (b_abs_size == 0) + return JACOBI_U0 (a); + + /* (a/-1)=1 when a>=0, so the sign of b is ignored */ + b_ptr = PTR(b); + b_low = b_ptr[0]; + + /* (0/1)=1; (0/-1)=1; (0/b)=0 for b!=+/-1 + (1/b)=1, for any b */ + if (a <= 1) + return (a == 1) | ((b_abs_size == 1) & (b_low == 1)); + + if (b_low & 1) + { + /* (a/1) = 1 for any a */ + if (b_abs_size == 1 && b_low == 1) + return 1; + + count_trailing_zeros (twos, a); + a >>= twos; + if (a == 1) + return JACOBI_TWOS_U (twos, b_low); /* powers of (2/b) only */ + + /* powers of (2/b); reciprocity to (b/a); (b/a) == (b mod a / a) */ + return mpn_jacobi_base (mpn_mod_1 (b_ptr, b_abs_size, a), + a, + JACOBI_TWOS_U_BIT1 (twos, b_low) + ^ JACOBI_RECIP_UU_BIT1 (b_low, a)); + } + + /* b is even; (a/2)=0 if a is even */ + if ((a & 1) == 0) + return 0; + + /* Require MP_BITS_PER_LIMB even, so (a/2)^MP_BITS_PER_LIMB = 1, and so we + don't have to pay attention to how many trailing zero limbs are + stripped. */ + ASSERT ((BITS_PER_MP_LIMB & 1) == 0); + + MPN_STRIP_LOW_ZEROS_NOT_ZERO (b_ptr, b_abs_size); + b_low = b_ptr[0]; + + if (b_low & 1) + /* reciprocity to (b/a); (b/a) == (b mod a / a) */ + return mpn_jacobi_base (mpn_mod_1 (b_ptr, b_abs_size, a), + a, + JACOBI_RECIP_UU_BIT1 (b_low, a)); + + count_trailing_zeros (twos, b_low); + + /* reciprocity to get (b/a) */ + if (twos == BITS_PER_MP_LIMB-1) + { + if (b_abs_size == 1) + { + /* b==0x800...00, one limb high bit only, so (a/2)^(BPML-1) */ + return JACOBI_TWOS_U (BITS_PER_MP_LIMB-1, a); + } + + /* b_abs_size > 1 */ + result_bit1 = JACOBI_RECIP_UU_BIT1 (a, b_ptr[1] << 1); + } + else + result_bit1 = JACOBI_RECIP_UU_BIT1 (a, b_low >> twos); + + /* powers of (a/2); reciprocity to (b/a); (b/a) == (b mod a / a) */ + return mpn_jacobi_base (mpn_mod_1_rshift (b_ptr, b_abs_size, twos, a), + a, + JACOBI_TWOS_U_BIT1 (twos, a) ^ result_bit1); +} diff --git a/rts/gmp/mpz/kronzs.c b/rts/gmp/mpz/kronzs.c new file mode 100644 index 0000000000..edfb465976 --- /dev/null +++ b/rts/gmp/mpz/kronzs.c @@ -0,0 +1,74 @@ +/* mpz_kronecker_si -- Kronecker/Jacobi symbol. */ + +/* +Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. +*/ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + + +/* This function is expected to be often used with b odd, so there's a test + for this before invoking count_trailing_zeros(). + + After the absolute value of b is established it's treated as an unsigned + long, because 0x80..00 doesn't fit in a signed long. */ + +int +#if __STDC__ +mpz_kronecker_si (mpz_srcptr a, long b) +#else +mpz_kronecker_si (a, b) + mpz_srcptr a; + long b; +#endif +{ + int result_bit1; + int twos; + + if (b == 0) + return JACOBI_Z0 (a); + + result_bit1 = JACOBI_BSGN_ZS_BIT1(a, b); + b = ABS (b); + + if (b == 1) + return JACOBI_BIT1_TO_PN (result_bit1); /* (a/1) = 1 for any a */ + + if (b & 1) + return mpn_jacobi_base (mpz_fdiv_ui (a, b), b, result_bit1); + + /* result 0 if both a,b even */ + if (mpz_even_p (a)) + return 0; + + /* (a/2)=(2/a) when a odd */ + count_trailing_zeros (twos, b); + result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, PTR(a)[0]); + + b = ((unsigned long) b) >> twos; + if (b == 1) + return JACOBI_BIT1_TO_PN (result_bit1); + else + return mpn_jacobi_base (mpz_fdiv_ui (a, b), b, result_bit1); +} + + diff --git a/rts/gmp/mpz/kronzu.c b/rts/gmp/mpz/kronzu.c new file mode 100644 index 0000000000..749be5df07 --- /dev/null +++ b/rts/gmp/mpz/kronzu.c @@ -0,0 +1,66 @@ +/* mpz_kronecker_ui -- Kronecker/Jacobi symbol. */ + +/* +Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. +*/ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + + +/* This function is expected to be often used with b an odd prime, so the + code for odd b is nice and short. */ + +int +#if __STDC__ +mpz_kronecker_ui (mpz_srcptr a, unsigned long b) +#else +mpz_kronecker_ui (a, b) + mpz_srcptr a; + unsigned long b; +#endif +{ + int twos; + + if (b & 1) + { + if (b != 1) + return mpn_jacobi_base (mpz_fdiv_ui (a, b), b, 0); + else + return 1; /* (a/1)=1 for any a */ + } + + if (b == 0) + return JACOBI_Z0 (a); + + /* (a/2)=0 if a even */ + if (mpz_even_p (a)) + return 0; + + /* (a/2)=(2/a) when a odd */ + count_trailing_zeros (twos, b); + b >>= twos; + if (b == 1) + return JACOBI_TWOS_U (twos, PTR(a)[0]); + + return mpn_jacobi_base (mpz_fdiv_ui (a, b), b, + JACOBI_TWOS_U_BIT1(twos, PTR(a)[0])); +} diff --git a/rts/gmp/mpz/lcm.c b/rts/gmp/mpz/lcm.c new file mode 100644 index 0000000000..7495882ae5 --- /dev/null +++ b/rts/gmp/mpz/lcm.c @@ -0,0 +1,61 @@ +/* mpz/lcm.c: Calculate the least common multiple of two integers. + +Copyright (C) 1996, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +void *_mpz_realloc (); + +void +#if __STDC__ +mpz_lcm (mpz_ptr r, mpz_srcptr u, mpz_srcptr v) +#else +mpz_lcm (r, u, v) + mpz_ptr r; + mpz_srcptr u; + mpz_srcptr v; +#endif +{ + mpz_t g; + mp_size_t usize, vsize, size; + TMP_DECL (marker); + + TMP_MARK (marker); + + usize = ABS (SIZ (u)); + vsize = ABS (SIZ (v)); + + if (usize == 0 || vsize == 0) + { + SIZ (r) = 0; + return; + } + + size = MAX (usize, vsize); + MPZ_TMP_INIT (g, size); + + mpz_gcd (g, u, v); + mpz_divexact (g, u, g); + mpz_mul (r, g, v); + + TMP_FREE (marker); +} diff --git a/rts/gmp/mpz/legendre.c b/rts/gmp/mpz/legendre.c new file mode 100644 index 0000000000..ab665f70d0 --- /dev/null +++ b/rts/gmp/mpz/legendre.c @@ -0,0 +1,184 @@ +/* mpz_legendre (op1, op2). + Contributed by Bennet Yee (bsy) at Carnegie-Mellon University + +Copyright (C) 1992, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" + +#if defined (DEBUG) +#include <stdio.h> +#endif + +/* Precondition: both p and q are positive */ + +int +#if __STDC__ +mpz_legendre (mpz_srcptr pi, mpz_srcptr qi) +#else +mpz_legendre (pi, qi) +mpz_srcptr pi, qi; +#endif +{ + mpz_t p, q, qdiv2; +#ifdef Q_MINUS_1 + mpz_t q_minus_1; +#endif + mpz_ptr mtmp; + register mpz_ptr pptr, qptr; + register int retval = 1; + register unsigned long int s; + + pptr = p; + mpz_init_set (pptr, pi); + qptr = q; + mpz_init_set (qptr, qi); + +#ifdef Q_MINUS_1 + mpz_init (q_minus_1); +#endif + mpz_init (qdiv2); + +tail_recurse2: +#ifdef DEBUG + printf ("tail_recurse2: p="); + mpz_out_str (stdout, 10, pptr); + printf ("\nq="); + mpz_out_str (stdout, 10, qptr); + putchar ('\n'); +#endif + s = mpz_scan1 (qptr, 0); + if (s) mpz_tdiv_q_2exp (qptr, qptr, s); /* J(a,2) = 1 */ +#ifdef DEBUG + printf ("2 factor decomposition: p="); + mpz_out_str (stdout, 10, pptr); + printf ("\nq="); + mpz_out_str (stdout, 10, qptr); + putchar ('\n'); +#endif + /* postcondition q odd */ + if (!mpz_cmp_ui (qptr, 1L)) /* J(a,1) = 1 */ + goto done; + mpz_mod (pptr, pptr, qptr); /* J(a,q) = J(b,q) when a == b mod q */ +#ifdef DEBUG + printf ("mod out by q: p="); + mpz_out_str (stdout, 10, pptr); + printf ("\nq="); + mpz_out_str (stdout, 10, qptr); + putchar ('\n'); +#endif + /* quick calculation to get approximate size first */ + /* precondition: p < q */ + if ((mpz_sizeinbase (pptr, 2) + 1 >= mpz_sizeinbase (qptr,2)) + && (mpz_tdiv_q_2exp (qdiv2, qptr, 1L), mpz_cmp (pptr, qdiv2) > 0)) + { + /* p > q/2 */ + mpz_sub (pptr, qptr, pptr); + /* J(-1,q) = (-1)^((q-1)/2), q odd */ + if (mpz_get_ui (qptr) & 2) + retval = -retval; + } + /* p < q/2 */ +#ifdef Q_MINUS_1 + mpz_sub_ui (q_minus_q, qptr, 1L); +#endif +tail_recurse: /* we use tail_recurse only if q has not changed */ +#ifdef DEBUG + printf ("tail_recurse1: p="); + mpz_out_str (stdout, 10, pptr); + printf ("\nq="); + mpz_out_str (stdout, 10, qptr); + putchar ('\n'); +#endif + /* + * J(0,q) = 0 + * this occurs only if gcd(p,q) != 1 which is never true for + * Legendre function. + */ + if (!mpz_cmp_ui (pptr, 0L)) + { + retval = 0; + goto done; + } + + if (!mpz_cmp_ui (pptr, 1L)) + { + /* J(1,q) = 1 */ + /* retval *= 1; */ + goto done; + } +#ifdef Q_MINUS_1 + if (!mpz_cmp (pptr, q_minus_1)) + { + /* J(-1,q) = (-1)^((q-1)/2) */ + if (mpz_get_ui (qptr) & 2) + retval = -retval; + /* else retval *= 1; */ + goto done; + } +#endif + /* + * we do not handle J(xy,q) except for x==2 + * since we do not want to factor + */ + if ((s = mpz_scan1 (pptr, 0)) != 0) + { + /* + * J(2,q) = (-1)^((q^2-1)/8) + * + * Note that q odd guarantees that q^2-1 is divisible by 8: + * Let a: q=2a+1. q^2 = 4a^2+4a+1, (q^2-1)/8 = a(a+1)/2, qed + * + * Now, note that this means that the low two bits of _a_ + * (or the low bits of q shifted over by 1 determines + * the factor). + */ + mpz_tdiv_q_2exp (pptr, pptr, s); + + /* even powers of 2 gives J(2,q)^{2n} = 1 */ + if (s & 1) + { + s = mpz_get_ui (qptr) >> 1; + s = s * (s + 1); + if (s & 2) + retval = -retval; + } + goto tail_recurse; + } + /* + * we know p is odd since we have cast out 2s + * precondition that q is odd guarantees both odd. + * + * quadratic reciprocity + * J(p,q) = (-1)^((p-1)(q-1)/4) * J(q,p) + */ + if ((s = mpz_scan1 (pptr, 1)) <= 2 && (s + mpz_scan1 (qptr, 1)) <= 2) + retval = -retval; + + mtmp = pptr; pptr = qptr; qptr = mtmp; + goto tail_recurse2; +done: + mpz_clear (p); + mpz_clear (q); + mpz_clear (qdiv2); +#ifdef Q_MINUS_1 + mpz_clear (q_minus_1); +#endif + return retval; +} diff --git a/rts/gmp/mpz/mod.c b/rts/gmp/mpz/mod.c new file mode 100644 index 0000000000..87033b333b --- /dev/null +++ b/rts/gmp/mpz/mod.c @@ -0,0 +1,63 @@ +/* mpz_mod -- The mathematical mod function. + +Copyright (C) 1991, 1993, 1994, 1995, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_mod (mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor) +#else +mpz_mod (rem, dividend, divisor) + mpz_ptr rem; + mpz_srcptr dividend; + mpz_srcptr divisor; +#endif +{ + mp_size_t divisor_size = divisor->_mp_size; + mpz_t temp_divisor; /* N.B.: lives until function returns! */ + TMP_DECL (marker); + + TMP_MARK (marker); + + /* We need the original value of the divisor after the remainder has been + preliminary calculated. We have to copy it to temporary space if it's + the same variable as REM. */ + if (rem == divisor) + { + MPZ_TMP_INIT (temp_divisor, ABS (divisor_size)); + mpz_set (temp_divisor, divisor); + divisor = temp_divisor; + } + + mpz_tdiv_r (rem, dividend, divisor); + + if (rem->_mp_size != 0) + { + if (dividend->_mp_size < 0) + if (divisor->_mp_size < 0) + mpz_sub (rem, rem, divisor); + else + mpz_add (rem, rem, divisor); + } + + TMP_FREE (marker); +} diff --git a/rts/gmp/mpz/mul.c b/rts/gmp/mpz/mul.c new file mode 100644 index 0000000000..7854788e50 --- /dev/null +++ b/rts/gmp/mpz/mul.c @@ -0,0 +1,131 @@ +/* mpz_mul -- Multiply two integers. + +Copyright (C) 1991, 1993, 1994, 1996, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include <stdio.h> /* for NULL */ +#include "gmp.h" +#include "gmp-impl.h" +#ifdef BERKELEY_MP +#include "mp.h" +#endif + +#ifndef BERKELEY_MP +void +#if __STDC__ +mpz_mul (mpz_ptr w, mpz_srcptr u, mpz_srcptr v) +#else +mpz_mul (w, u, v) + mpz_ptr w; + mpz_srcptr u; + mpz_srcptr v; +#endif +#else /* BERKELEY_MP */ +void +#if __STDC__ +mult (mpz_srcptr u, mpz_srcptr v, mpz_ptr w) +#else +mult (u, v, w) + mpz_srcptr u; + mpz_srcptr v; + mpz_ptr w; +#endif +#endif /* BERKELEY_MP */ +{ + mp_size_t usize = u->_mp_size; + mp_size_t vsize = v->_mp_size; + mp_size_t wsize; + mp_size_t sign_product; + mp_ptr up, vp; + mp_ptr wp; + mp_ptr free_me = NULL; + size_t free_me_size; + mp_limb_t cy_limb; + TMP_DECL (marker); + + TMP_MARK (marker); + sign_product = usize ^ vsize; + usize = ABS (usize); + vsize = ABS (vsize); + + if (usize < vsize) + { + /* Swap U and V. */ + {const __mpz_struct *t = u; u = v; v = t;} + {mp_size_t t = usize; usize = vsize; vsize = t;} + } + + up = u->_mp_d; + vp = v->_mp_d; + wp = w->_mp_d; + + /* Ensure W has space enough to store the result. */ + wsize = usize + vsize; + if (w->_mp_alloc < wsize) + { + if (wp == up || wp == vp) + { + free_me = wp; + free_me_size = w->_mp_alloc; + } + else + (*_mp_free_func) (wp, w->_mp_alloc * BYTES_PER_MP_LIMB); + + w->_mp_alloc = wsize; + wp = (mp_ptr) (*_mp_allocate_func) (wsize * BYTES_PER_MP_LIMB); + w->_mp_d = wp; + } + else + { + /* Make U and V not overlap with W. */ + if (wp == up) + { + /* W and U are identical. Allocate temporary space for U. */ + up = (mp_ptr) TMP_ALLOC (usize * BYTES_PER_MP_LIMB); + /* Is V identical too? Keep it identical with U. */ + if (wp == vp) + vp = up; + /* Copy to the temporary space. */ + MPN_COPY (up, wp, usize); + } + else if (wp == vp) + { + /* W and V are identical. Allocate temporary space for V. */ + vp = (mp_ptr) TMP_ALLOC (vsize * BYTES_PER_MP_LIMB); + /* Copy to the temporary space. */ + MPN_COPY (vp, wp, vsize); + } + } + + if (vsize == 0) + { + wsize = 0; + } + else + { + cy_limb = mpn_mul (wp, up, usize, vp, vsize); + wsize = usize + vsize; + wsize -= cy_limb == 0; + } + + w->_mp_size = sign_product < 0 ? -wsize : wsize; + if (free_me != NULL) + (*_mp_free_func) (free_me, free_me_size * BYTES_PER_MP_LIMB); + TMP_FREE (marker); +} diff --git a/rts/gmp/mpz/mul_2exp.c b/rts/gmp/mpz/mul_2exp.c new file mode 100644 index 0000000000..abea5fed2c --- /dev/null +++ b/rts/gmp/mpz/mul_2exp.c @@ -0,0 +1,76 @@ +/* mpz_mul_2exp -- Multiply a bignum by 2**CNT + +Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_mul_2exp (mpz_ptr w, mpz_srcptr u, unsigned long int cnt) +#else +mpz_mul_2exp (w, u, cnt) + mpz_ptr w; + mpz_srcptr u; + unsigned long int cnt; +#endif +{ + mp_size_t usize = u->_mp_size; + mp_size_t abs_usize = ABS (usize); + mp_size_t wsize; + mp_size_t limb_cnt; + mp_ptr wp; + mp_limb_t wlimb; + + if (usize == 0) + { + w->_mp_size = 0; + return; + } + + limb_cnt = cnt / BITS_PER_MP_LIMB; + wsize = abs_usize + limb_cnt + 1; + if (w->_mp_alloc < wsize) + _mpz_realloc (w, wsize); + + wp = w->_mp_d; + wsize = abs_usize + limb_cnt; + + cnt %= BITS_PER_MP_LIMB; + if (cnt != 0) + { + wlimb = mpn_lshift (wp + limb_cnt, u->_mp_d, abs_usize, cnt); + if (wlimb != 0) + { + wp[wsize] = wlimb; + wsize++; + } + } + else + { + MPN_COPY_DECR (wp + limb_cnt, u->_mp_d, abs_usize); + } + + /* Zero all whole limbs at low end. Do it here and not before calling + mpn_lshift, not to lose for U == W. */ + MPN_ZERO (wp, limb_cnt); + + w->_mp_size = usize >= 0 ? wsize : -wsize; +} diff --git a/rts/gmp/mpz/mul_siui.c b/rts/gmp/mpz/mul_siui.c new file mode 100644 index 0000000000..9849cd41b0 --- /dev/null +++ b/rts/gmp/mpz/mul_siui.c @@ -0,0 +1,81 @@ +/* mpz_mul_ui/si (product, multiplier, small_multiplicand) -- Set PRODUCT to + MULTIPLICATOR times SMALL_MULTIPLICAND. + +Copyright (C) 1991, 1993, 1994, 1996, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + + +#ifdef OPERATION_mul_ui +#define FUNCTION mpz_mul_ui +#define MULTIPLICAND_UNSIGNED unsigned +#define MULTIPLICAND_ABS(x) x +#else +#ifdef OPERATION_mul_si +#define FUNCTION mpz_mul_si +#define MULTIPLICAND_UNSIGNED +#define MULTIPLICAND_ABS(x) ABS(x) +#else +Error, error, unrecognised OPERATION +#endif +#endif + + +void +#if __STDC__ +FUNCTION (mpz_ptr prod, mpz_srcptr mult, + MULTIPLICAND_UNSIGNED long int small_mult) +#else +FUNCTION (prod, mult, small_mult) + mpz_ptr prod; + mpz_srcptr mult; + MULTIPLICAND_UNSIGNED long int small_mult; +#endif +{ + mp_size_t size = mult->_mp_size; + mp_size_t sign_product = size; + mp_limb_t cy; + mp_size_t prod_size; + mp_ptr prod_ptr; + + if (size == 0 || small_mult == 0) + { + prod->_mp_size = 0; + return; + } + size = ABS (size); + + prod_size = size + 1; + if (prod->_mp_alloc < prod_size) + _mpz_realloc (prod, prod_size); + + prod_ptr = prod->_mp_d; + + cy = mpn_mul_1 (prod_ptr, mult->_mp_d, size, + (mp_limb_t) MULTIPLICAND_ABS (small_mult)); + if (cy != 0) + { + prod_ptr[size] = cy; + size++; + } + + prod->_mp_size = ((sign_product < 0) ^ (small_mult < 0)) ? -size : size; +} diff --git a/rts/gmp/mpz/neg.c b/rts/gmp/mpz/neg.c new file mode 100644 index 0000000000..566c3a95aa --- /dev/null +++ b/rts/gmp/mpz/neg.c @@ -0,0 +1,53 @@ +/* mpz_neg(mpz_ptr dst, mpz_ptr src) -- Assign the negated value of SRC to DST. + +Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_neg (mpz_ptr w, mpz_srcptr u) +#else +mpz_neg (w, u) + mpz_ptr w; + mpz_srcptr u; +#endif +{ + mp_ptr wp, up; + mp_size_t usize, size; + + usize = u->_mp_size; + + if (u != w) + { + size = ABS (usize); + + if (w->_mp_alloc < size) + _mpz_realloc (w, size); + + wp = w->_mp_d; + up = u->_mp_d; + + MPN_COPY (wp, up, size); + } + + w->_mp_size = -usize; +} diff --git a/rts/gmp/mpz/nextprime.c b/rts/gmp/mpz/nextprime.c new file mode 100644 index 0000000000..f024dd1206 --- /dev/null +++ b/rts/gmp/mpz/nextprime.c @@ -0,0 +1,120 @@ +/* mpz_nextprime(p,t) - compute the next prime > t and store that in p. + +Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_nextprime (mpz_ptr p, mpz_srcptr t) +#else +mpz_nextprime (p, t) + mpz_ptr p; + mpz_srcptr t; +#endif +{ + mpz_add_ui (p, t, 1L); + while (! mpz_probab_prime_p (p, 5)) + mpz_add_ui (p, p, 1L); +} + +#if 0 +/* This code is not yet tested. Will be enabled in 3.1. */ + +status unsigned short primes[] = +{ +3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97, +101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181, +191,193,197,199,211,223,227,229,233,239,241,251,257,263,269,271,277, +281,283,293,307,311,313,317,331,337,347,349,353,359,367,373,379,383, +389,397,401,409,419,421,431,433,439,443,449,457,461,463,467,479,487, +491,499,503,509,521,523,541,547,557,563,569,571,577,587,593,599,601, +607,613,617,619,631,641,643,647,653,659,661,673,677,683,691,701,709, +719,727,733,739,743,751,757,761,769,773,787,797,809,811,821,823,827, +829,839,853,857,859,863,877,881,883,887,907,911,919,929,937,941,947, +953,967,971,977,983,991,997 +}; + +#define NUMBER_OF_PRIMES 167 + +void +#if __STDC__ +mpz_nextprime (mpz_ptr p, mpz_srcptr n) +#else +mpz_nextprime (p, n) + mpz_ptr p; + mpz_srcptr n; +#endif +{ + mpz_t tmp; + unsigned short *moduli; + unsigned long difference; + int i; + int composite; + + /* First handle tiny numbers */ + if (mpz_cmp_ui (n, 2) < 0) + { + mpz_set_ui (p, 2); + return; + } + mpz_add_ui (p, n, 1); + mpz_setbit (p, 0); + + if (mpz_cmp_ui (p, 7) <= 0) + return; + + prime_limit = NUMBER_OF_PRIMES - 1; + if (mpz_cmp_ui (p, primes[prime_limit]) <= 0) + /* Just use first three entries (3,5,7) of table for small numbers */ + prime_limit = 3; + if (prime_limit) + { + /* Compute residues modulo small odd primes */ + moduli = (unsigned short *) TMP_ALLOC (prime_limit * sizeof moduli[0]); + for (i = 0; i < prime_limit; i++) + moduli[i] = mpz_fdiv_ui (p, primes[i]); + } + for (difference = 0; ; difference += 2) + { + composite = 0; + + /* First check residues */ + for (i = 0; i < prime_limit; i++) + { + int acc, pr; + composite |= (moduli[i] == 0); + acc = moduli[i] + 2; + pr = primes[i]; + moduli[i] = acc >= pr ? acc - pr : acc; + } + if (composite) + continue; + + mpz_add_ui (p, p, difference); + difference = 0; + + /* Miller-Rabin test */ + if (mpz_millerrabin (p, 2)) + break; + } +} +#endif diff --git a/rts/gmp/mpz/out_raw.c b/rts/gmp/mpz/out_raw.c new file mode 100644 index 0000000000..62709479c5 --- /dev/null +++ b/rts/gmp/mpz/out_raw.c @@ -0,0 +1,89 @@ +/* mpz_out_raw -- Output a mpz_t in binary. Use an endianess and word size + independent format. + +Copyright (C) 1995 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include <stdio.h> + +#include "gmp.h" +#include "gmp-impl.h" + +size_t +#if __STDC__ +mpz_out_raw (FILE *stream, mpz_srcptr x) +#else +mpz_out_raw (stream, x) + FILE *stream; + mpz_srcptr x; +#endif +{ + int i; + mp_size_t s; + mp_size_t xsize = ABS (x->_mp_size); + mp_srcptr xp = x->_mp_d; + mp_size_t out_bytesize; + mp_limb_t hi_limb; + int n_bytes_in_hi_limb; + + if (stream == 0) + stream = stdout; + + if (xsize == 0) + { + for (i = 4 - 1; i >= 0; i--) + fputc (0, stream); + return ferror (stream) ? 0 : 4; + } + + hi_limb = xp[xsize - 1]; + for (i = BYTES_PER_MP_LIMB - 1; i > 0; i--) + { + if ((hi_limb >> i * BITS_PER_CHAR) != 0) + break; + } + n_bytes_in_hi_limb = i + 1; + out_bytesize = BYTES_PER_MP_LIMB * (xsize - 1) + n_bytes_in_hi_limb; + if (x->_mp_size < 0) + out_bytesize = -out_bytesize; + + /* Make the size 4 bytes on all machines, to make the format portable. */ + for (i = 4 - 1; i >= 0; i--) + fputc ((out_bytesize >> (i * BITS_PER_CHAR)) % (1 << BITS_PER_CHAR), + stream); + + /* Output from the most significant limb to the least significant limb, + with each limb also output in decreasing significance order. */ + + /* Output the most significant limb separately, since we will only + output some of its bytes. */ + for (i = n_bytes_in_hi_limb - 1; i >= 0; i--) + fputc ((hi_limb >> (i * BITS_PER_CHAR)) % (1 << BITS_PER_CHAR), stream); + + /* Output the remaining limbs. */ + for (s = xsize - 2; s >= 0; s--) + { + mp_limb_t x_limb; + + x_limb = xp[s]; + for (i = BYTES_PER_MP_LIMB - 1; i >= 0; i--) + fputc ((x_limb >> (i * BITS_PER_CHAR)) % (1 << BITS_PER_CHAR), stream); + } + return ferror (stream) ? 0 : ABS (out_bytesize) + 4; +} diff --git a/rts/gmp/mpz/out_str.c b/rts/gmp/mpz/out_str.c new file mode 100644 index 0000000000..bf971b0057 --- /dev/null +++ b/rts/gmp/mpz/out_str.c @@ -0,0 +1,108 @@ +/* mpz_out_str(stream, base, integer) -- Output to STREAM the multi prec. + integer INTEGER in base BASE. + +Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include <stdio.h> +#include "gmp.h" +#include "gmp-impl.h" + +size_t +#if __STDC__ +mpz_out_str (FILE *stream, int base, mpz_srcptr x) +#else +mpz_out_str (stream, base, x) + FILE *stream; + int base; + mpz_srcptr x; +#endif +{ + mp_ptr xp; + mp_size_t x_size = x->_mp_size; + unsigned char *str; + size_t str_size; + size_t i; + size_t written; + char *num_to_text; + TMP_DECL (marker); + + if (stream == 0) + stream = stdout; + + if (base >= 0) + { + if (base == 0) + base = 10; + num_to_text = "0123456789abcdefghijklmnopqrstuvwxyz"; + } + else + { + base = -base; + num_to_text = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + } + + if (x_size == 0) + { + fputc ('0', stream); + return ferror (stream) ? 0 : 1; + } + + written = 0; + + if (x_size < 0) + { + fputc ('-', stream); + x_size = -x_size; + written = 1; + } + + TMP_MARK (marker); + str_size = ((size_t) (x_size * BITS_PER_MP_LIMB + * __mp_bases[base].chars_per_bit_exactly)) + 3; + str = (unsigned char *) TMP_ALLOC (str_size); + + /* Move the number to convert into temporary space, since mpn_get_str + clobbers its argument + needs one extra high limb.... */ + xp = (mp_ptr) TMP_ALLOC ((x_size + 1) * BYTES_PER_MP_LIMB); + MPN_COPY (xp, x->_mp_d, x_size); + + str_size = mpn_get_str (str, base, xp, x_size); + + /* mpn_get_str might make some leading zeros. Skip them. */ + while (*str == 0) + { + str_size--; + str++; + } + + /* Translate to printable chars. */ + for (i = 0; i < str_size; i++) + str[i] = num_to_text[str[i]]; + str[str_size] = 0; + + { + size_t fwret; + fwret = fwrite ((char *) str, 1, str_size, stream); + written += fwret; + } + + TMP_FREE (marker); + return ferror (stream) ? 0 : written; +} diff --git a/rts/gmp/mpz/perfpow.c b/rts/gmp/mpz/perfpow.c new file mode 100644 index 0000000000..e71670a0be --- /dev/null +++ b/rts/gmp/mpz/perfpow.c @@ -0,0 +1,272 @@ +/* mpz_perfect_power_p(arg) -- Return non-zero if ARG is a perfect power, + zero otherwise. + +Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* + We are to determine if c is a perfect power, c = a ^ b. + Assume c is divisible by 2^n and that codd = c/2^n is odd. + Assume a is divisible by 2^m and that aodd = a/2^m is odd. + It is always true that m divides n. + + * If n is prime, either 1) a is 2*aodd and b = n + or 2) a = c and b = 1. + So for n prime, we readily have a solution. + * If n is factorable into the non-trivial factors p1,p2,... + Since m divides n, m has a subset of n's factors and b = n / m. + + BUG: Should handle negative numbers, since they can be odd perfect powers. +*/ + +/* This is a naive approach to recognizing perfect powers. + Many things can be improved. In particular, we should use p-adic + arithmetic for computing possible roots. */ + +#include <stdio.h> /* for NULL */ +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +static unsigned long int gcd _PROTO ((unsigned long int a, unsigned long int b)); +static int isprime _PROTO ((unsigned long int t)); + +static const unsigned short primes[] = +{ 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, + 59, 61, 67, 71, 73, 79, 83, 89, 97,101,103,107,109,113,127,131, + 137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223, + 227,229,233,239,241,251,257,263,269,271,277,281,283,293,307,311, + 313,317,331,337,347,349,353,359,367,373,379,383,389,397,401,409, + 419,421,431,433,439,443,449,457,461,463,467,479,487,491,499,503, + 509,521,523,541,547,557,563,569,571,577,587,593,599,601,607,613, + 617,619,631,641,643,647,653,659,661,673,677,683,691,701,709,719, + 727,733,739,743,751,757,761,769,773,787,797,809,811,821,823,827, + 829,839,853,857,859,863,877,881,883,887,907,911,919,929,937,941, + 947,953,967,971,977,983,991,997,0 +}; +#define SMALLEST_OMITTED_PRIME 1009 + + +int +#if __STDC__ +mpz_perfect_power_p (mpz_srcptr u) +#else +mpz_perfect_power_p (u) + mpz_srcptr u; +#endif +{ + unsigned long int prime; + unsigned long int n, n2; + int i; + unsigned long int rem; + mpz_t u2, q; + int exact; + mp_size_t uns; + TMP_DECL (marker); + + if (mpz_cmp_ui (u, 1) <= 0) + return 0; + + n2 = mpz_scan1 (u, 0); + if (n2 == 1) + return 0; + + TMP_MARK (marker); + + uns = ABSIZ (u) - n2 / BITS_PER_MP_LIMB; + MPZ_TMP_INIT (q, uns); + MPZ_TMP_INIT (u2, uns); + + mpz_tdiv_q_2exp (u2, u, n2); + + if (isprime (n2)) + goto n2prime; + + for (i = 1; primes[i] != 0; i++) + { + prime = primes[i]; + rem = mpz_tdiv_ui (u2, prime); + if (rem == 0) /* divisable? */ + { + rem = mpz_tdiv_q_ui (q, u2, prime * prime); + if (rem != 0) + { + TMP_FREE (marker); + return 0; + } + mpz_swap (q, u2); + for (n = 2;;) + { + rem = mpz_tdiv_q_ui (q, u2, prime); + if (rem != 0) + break; + mpz_swap (q, u2); + n++; + } + + n2 = gcd (n2, n); + if (n2 == 1) + { + TMP_FREE (marker); + return 0; + } + + /* As soon as n2 becomes a prime number, stop factoring. + Either we have u=x^n2 or u is not a perfect power. */ + if (isprime (n2)) + goto n2prime; + } + } + + if (mpz_cmp_ui (u2, 1) == 0) + { + TMP_FREE (marker); + return 1; + } + + if (n2 == 0) + { + unsigned long int nth; + /* We did not find any factors above. We have to consider all values + of n. */ + for (nth = 2;; nth++) + { + if (! isprime (nth)) + continue; +#if 0 + exact = mpz_padic_root (q, u2, nth, PTH); + if (exact) +#endif + exact = mpz_root (q, u2, nth); + if (exact) + { + TMP_FREE (marker); + return 1; + } + if (mpz_cmp_ui (q, SMALLEST_OMITTED_PRIME) < 0) + { + TMP_FREE (marker); + return 0; + } + } + } + else + { + unsigned long int nth; + /* We found some factors above. We just need to consider values of n + that divides n2. */ + for (nth = 2; nth <= n2; nth++) + { + if (! isprime (nth)) + continue; + if (n2 % nth != 0) + continue; +#if 0 + exact = mpz_padic_root (q, u2, nth, PTH); + if (exact) +#endif + exact = mpz_root (q, u2, nth); + if (exact) + { + TMP_FREE (marker); + return 1; + } + if (mpz_cmp_ui (q, SMALLEST_OMITTED_PRIME) < 0) + { + TMP_FREE (marker); + return 0; + } + } + + TMP_FREE (marker); + return 0; + } + +n2prime: + exact = mpz_root (NULL, u2, n2); + TMP_FREE (marker); + return exact; +} + +static unsigned long int +#if __STDC__ +gcd (unsigned long int a, unsigned long int b) +#else +gcd (a, b) + unsigned long int a, b; +#endif +{ + int an2, bn2, n2; + + if (a == 0) + return b; + if (b == 0) + return a; + + count_trailing_zeros (an2, a); + a >>= an2; + + count_trailing_zeros (bn2, b); + b >>= bn2; + + n2 = MIN (an2, bn2); + + while (a != b) + { + if (a > b) + { + a -= b; + do + a >>= 1; + while ((a & 1) == 0); + } + else /* b > a. */ + { + b -= a; + do + b >>= 1; + while ((b & 1) == 0); + } + } + + return a << n2; +} + +static int +#if __STDC__ +isprime (unsigned long int t) +#else +isprime (t) + unsigned long int t; +#endif +{ + unsigned long int q, r, d; + + if (t < 3 || (t & 1) == 0) + return t == 2; + + for (d = 3, r = 1; r != 0; d += 2) + { + q = t / d; + r = t - q * d; + if (q < d) + return 1; + } + return 0; +} diff --git a/rts/gmp/mpz/perfsqr.c b/rts/gmp/mpz/perfsqr.c new file mode 100644 index 0000000000..92e8d08ea9 --- /dev/null +++ b/rts/gmp/mpz/perfsqr.c @@ -0,0 +1,45 @@ +/* mpz_perfect_square_p(arg) -- Return non-zero if ARG is a perfect square, + zero otherwise. + +Copyright (C) 1991, 1993, 1994, 1996, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +int +#if __STDC__ +mpz_perfect_square_p (mpz_srcptr a) +#else +mpz_perfect_square_p (a) + mpz_srcptr a; +#endif +{ + mp_size_t asize = a->_mp_size; + + /* No negative numbers are perfect squares. */ + if (asize < 0) + return 0; + + /* Zero is a perfect square. */ + if (asize == 0) + return 1; + + return mpn_perfect_square_p (a->_mp_d, asize); +} diff --git a/rts/gmp/mpz/popcount.c b/rts/gmp/mpz/popcount.c new file mode 100644 index 0000000000..3105258e26 --- /dev/null +++ b/rts/gmp/mpz/popcount.c @@ -0,0 +1,42 @@ +/* mpz_popcount(mpz_ptr op) -- Population count of OP. If the operand is + negative, return ~0 (a novel representation of infinity). + +Copyright (C) 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +unsigned long int +#if __STDC__ +mpz_popcount (mpz_srcptr u) +#else +mpz_popcount (u) + mpz_srcptr u; +#endif +{ + mp_size_t usize; + + usize = u->_mp_size; + + if ((usize) < 0) + return ~ (unsigned long int) 0; + + return mpn_popcount (u->_mp_d, usize); +} diff --git a/rts/gmp/mpz/pow_ui.c b/rts/gmp/mpz/pow_ui.c new file mode 100644 index 0000000000..96ca114e4d --- /dev/null +++ b/rts/gmp/mpz/pow_ui.c @@ -0,0 +1,129 @@ +/* mpz_pow_ui(res, base, exp) -- Set RES to BASE**EXP. + +Copyright (C) 1991, 1993, 1994, 1996, 1997 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#ifdef BERKELEY_MP +#include "mp.h" +#endif +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#ifndef BERKELEY_MP +void +#if __STDC__ +mpz_pow_ui (mpz_ptr r, mpz_srcptr b, unsigned long int e) +#else +mpz_pow_ui (r, b, e) + mpz_ptr r; + mpz_srcptr b; + unsigned long int e; +#endif +#else /* BERKELEY_MP */ +void +#if __STDC__ +rpow (const MINT *b, signed short int e, MINT *r) +#else +rpow (b, e, r) + const MINT *b; + signed short int e; + MINT *r; +#endif +#endif /* BERKELEY_MP */ +{ + mp_ptr rp, bp, tp, xp; + mp_size_t ralloc, rsize, bsize; + int cnt, i; + mp_limb_t blimb; + TMP_DECL (marker); + + bsize = ABS (b->_mp_size); + + /* Single out cases that give result == 0 or 1. These tests are here + to simplify the general code below, not to optimize. */ + if (e == 0) + { + r->_mp_d[0] = 1; + r->_mp_size = 1; + return; + } + if (bsize == 0 +#ifdef BERKELEY_MP + || e < 0 +#endif + ) + { + r->_mp_size = 0; + return; + } + + bp = b->_mp_d; + + blimb = bp[bsize - 1]; + if (bsize == 1 && blimb < 0x100) + { + /* Estimate space requirements accurately. Using the code from the + `else' path would over-estimate space requirements wildly. */ + float lb = __mp_bases[blimb].chars_per_bit_exactly; + ralloc = 3 + ((mp_size_t) (e / lb) / BITS_PER_MP_LIMB); + } + else + { + /* Over-estimate space requirements somewhat. */ + count_leading_zeros (cnt, blimb); + ralloc = bsize * e - cnt * e / BITS_PER_MP_LIMB + 2; + } + + TMP_MARK (marker); + + /* The two areas are used to alternatingly hold the input and recieve the + product for mpn_mul. (This scheme is used to fulfill the requirements + of mpn_mul; that the product space may not be the same as any of the + input operands.) */ + rp = (mp_ptr) TMP_ALLOC (ralloc * BYTES_PER_MP_LIMB); + tp = (mp_ptr) TMP_ALLOC (ralloc * BYTES_PER_MP_LIMB); + + MPN_COPY (rp, bp, bsize); + rsize = bsize; + count_leading_zeros (cnt, e); + + for (i = BITS_PER_MP_LIMB - cnt - 2; i >= 0; i--) + { + mpn_mul_n (tp, rp, rp, rsize); + rsize = 2 * rsize; + rsize -= tp[rsize - 1] == 0; + xp = tp; tp = rp; rp = xp; + + if ((e & ((mp_limb_t) 1 << i)) != 0) + { + rsize = rsize + bsize - (mpn_mul (tp, rp, rsize, bp, bsize) == 0); + xp = tp; tp = rp; rp = xp; + } + } + + /* Now then we know the exact space requirements, reallocate if + necessary. */ + if (r->_mp_alloc < rsize) + _mpz_realloc (r, rsize); + + MPN_COPY (r->_mp_d, rp, rsize); + r->_mp_size = (e & 1) == 0 || b->_mp_size >= 0 ? rsize : -rsize; + TMP_FREE (marker); +} diff --git a/rts/gmp/mpz/powm.c b/rts/gmp/mpz/powm.c new file mode 100644 index 0000000000..e6af855a71 --- /dev/null +++ b/rts/gmp/mpz/powm.c @@ -0,0 +1,364 @@ +/* mpz_powm(res,base,exp,mod) -- Set RES to (base**exp) mod MOD. + +Copyright (C) 1991, 1993, 1994, 1996, 1997, 2000 Free Software Foundation, Inc. +Contributed by Paul Zimmermann. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" +#ifdef BERKELEY_MP +#include "mp.h" +#endif + + +/* set c <- (a*b)/R^n mod m c has to have at least (2n) allocated limbs */ +static void +#if __STDC__ +mpz_redc (mpz_ptr c, mpz_srcptr a, mpz_srcptr b, mpz_srcptr m, mp_limb_t Nprim) +#else +mpz_redc (c, a, b, m, Nprim) + mpz_ptr c; + mpz_srcptr a; + mpz_srcptr b; + mpz_srcptr m; + mp_limb_t Nprim; +#endif +{ + mp_ptr cp, mp = PTR (m); + mp_limb_t cy, cout = 0; + mp_limb_t q; + size_t j, n = ABSIZ (m); + + ASSERT (ALLOC (c) >= 2 * n); + + mpz_mul (c, a, b); + cp = PTR (c); + j = ABSIZ (c); + MPN_ZERO (cp + j, 2 * n - j); + for (j = 0; j < n; j++) + { + q = cp[0] * Nprim; + cy = mpn_addmul_1 (cp, mp, n, q); + cout += mpn_add_1 (cp + n, cp + n, n - j, cy); + cp++; + } + cp -= n; + if (cout) + { + cy = cout - mpn_sub_n (cp, cp + n, mp, n); + while (cy) + cy -= mpn_sub_n (cp, cp, mp, n); + } + else + MPN_COPY (cp, cp + n, n); + MPN_NORMALIZE (cp, n); + SIZ (c) = SIZ (c) < 0 ? -n : n; +} + +/* average number of calls to redc for an exponent of n bits + with the sliding window algorithm of base 2^k: the optimal is + obtained for the value of k which minimizes 2^(k-1)+n/(k+1): + + n\k 4 5 6 7 8 + 128 156* 159 171 200 261 + 256 309 307* 316 343 403 + 512 617 607* 610 632 688 + 1024 1231 1204 1195* 1207 1256 + 2048 2461 2399 2366 2360* 2396 + 4096 4918 4787 4707 4665* 4670 +*/ + +#ifndef BERKELEY_MP +void +#if __STDC__ +mpz_powm (mpz_ptr res, mpz_srcptr base, mpz_srcptr e, mpz_srcptr mod) +#else +mpz_powm (res, base, e, mod) + mpz_ptr res; + mpz_srcptr base; + mpz_srcptr e; + mpz_srcptr mod; +#endif +#else /* BERKELEY_MP */ +void +#if __STDC__ +pow (mpz_srcptr base, mpz_srcptr e, mpz_srcptr mod, mpz_ptr res) +#else +pow (base, e, mod, res) + mpz_srcptr base; + mpz_srcptr e; + mpz_srcptr mod; + mpz_ptr res; +#endif +#endif /* BERKELEY_MP */ +{ + mp_limb_t invm, *ep, c, mask; + mpz_t xx, *g; + mp_size_t n, i, K, j, l, k; + int sh; + int use_redc; + +#ifdef POWM_DEBUG + mpz_t exp; + mpz_init (exp); +#endif + + n = ABSIZ (mod); + + if (n == 0) + DIVIDE_BY_ZERO; + + if (SIZ (e) == 0) + { + /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0 + depending on if MOD equals 1. */ + SIZ(res) = (ABSIZ (mod) == 1 && (PTR(mod))[0] == 1) ? 0 : 1; + PTR(res)[0] = 1; + return; + } + + /* Use REDC instead of usual reduction for sizes < POWM_THRESHOLD. + In REDC each modular multiplication costs about 2*n^2 limbs operations, + whereas using usual reduction it costs 3*K(n), where K(n) is the cost of a + multiplication using Karatsuba, and a division is assumed to cost 2*K(n), + for example using Burnikel-Ziegler's algorithm. This gives a theoretical + threshold of a*KARATSUBA_SQR_THRESHOLD, with a=(3/2)^(1/(2-ln(3)/ln(2))) ~ + 2.66. */ + /* For now, also disable REDC when MOD is even, as the inverse can't + handle that. */ + +#ifndef POWM_THRESHOLD +#define POWM_THRESHOLD ((8 * KARATSUBA_SQR_THRESHOLD) / 3) +#endif + + use_redc = (n < POWM_THRESHOLD && PTR(mod)[0] % 2 != 0); + if (use_redc) + { + /* invm = -1/m mod 2^BITS_PER_MP_LIMB, must have m odd */ + modlimb_invert (invm, PTR(mod)[0]); + invm = -invm; + } + + /* determines optimal value of k */ + l = ABSIZ (e) * BITS_PER_MP_LIMB; /* number of bits of exponent */ + k = 1; + K = 2; + while (2 * l > K * (2 + k * (3 + k))) + { + k++; + K *= 2; + } + + g = (mpz_t *) (*_mp_allocate_func) (K / 2 * sizeof (mpz_t)); + /* compute x*R^n where R=2^BITS_PER_MP_LIMB */ + mpz_init (g[0]); + if (use_redc) + { + mpz_mul_2exp (g[0], base, n * BITS_PER_MP_LIMB); + mpz_mod (g[0], g[0], mod); + } + else + mpz_mod (g[0], base, mod); + + /* compute xx^g for odd g < 2^k */ + mpz_init (xx); + if (use_redc) + { + _mpz_realloc (xx, 2 * n); + mpz_redc (xx, g[0], g[0], mod, invm); /* xx = x^2*R^n */ + } + else + { + mpz_mul (xx, g[0], g[0]); + mpz_mod (xx, xx, mod); + } + for (i = 1; i < K / 2; i++) + { + mpz_init (g[i]); + if (use_redc) + { + _mpz_realloc (g[i], 2 * n); + mpz_redc (g[i], g[i - 1], xx, mod, invm); /* g[i] = x^(2i+1)*R^n */ + } + else + { + mpz_mul (g[i], g[i - 1], xx); + mpz_mod (g[i], g[i], mod); + } + } + + /* now starts the real stuff */ + mask = (mp_limb_t) ((1<<k) - 1); + ep = PTR (e); + i = ABSIZ (e) - 1; /* current index */ + c = ep[i]; /* current limb */ + count_leading_zeros (sh, c); + sh = BITS_PER_MP_LIMB - sh; /* significant bits in ep[i] */ + sh -= k; /* index of lower bit of ep[i] to take into account */ + if (sh < 0) + { /* k-sh extra bits are needed */ + if (i > 0) + { + i--; + c = (c << (-sh)) | (ep[i] >> (BITS_PER_MP_LIMB + sh)); + sh += BITS_PER_MP_LIMB; + } + } + else + c = c >> sh; +#ifdef POWM_DEBUG + printf ("-1/m mod 2^%u = %lu\n", BITS_PER_MP_LIMB, invm); + mpz_set_ui (exp, c); +#endif + j=0; + while (c % 2 == 0) + { + j++; + c = (c >> 1); + } + mpz_set (xx, g[c >> 1]); + while (j--) + { + if (use_redc) + mpz_redc (xx, xx, xx, mod, invm); + else + { + mpz_mul (xx, xx, xx); + mpz_mod (xx, xx, mod); + } + } + +#ifdef POWM_DEBUG + printf ("x^"); mpz_out_str (0, 10, exp); + printf ("*2^%u mod m = ", n * BITS_PER_MP_LIMB); mpz_out_str (0, 10, xx); + putchar ('\n'); +#endif + + while (i > 0 || sh > 0) + { + c = ep[i]; + sh -= k; + l = k; /* number of bits treated */ + if (sh < 0) + { + if (i > 0) + { + i--; + c = (c << (-sh)) | (ep[i] >> (BITS_PER_MP_LIMB + sh)); + sh += BITS_PER_MP_LIMB; + } + else + { + l += sh; /* may be less bits than k here */ + c = c & ((1<<l) - 1); + } + } + else + c = c >> sh; + c = c & mask; + + /* this while loop implements the sliding window improvement */ + while ((c & (1 << (k - 1))) == 0 && (i > 0 || sh > 0)) + { + if (use_redc) mpz_redc (xx, xx, xx, mod, invm); + else + { + mpz_mul (xx, xx, xx); + mpz_mod (xx, xx, mod); + } + if (sh) + { + sh--; + c = (c<<1) + ((ep[i]>>sh) & 1); + } + else + { + i--; + sh = BITS_PER_MP_LIMB - 1; + c = (c<<1) + (ep[i]>>sh); + } + } + +#ifdef POWM_DEBUG + printf ("l=%u c=%lu\n", l, c); + mpz_mul_2exp (exp, exp, k); + mpz_add_ui (exp, exp, c); +#endif + + /* now replace xx by xx^(2^k)*x^c */ + if (c != 0) + { + j = 0; + while (c % 2 == 0) + { + j++; + c = c >> 1; + } + /* c0 = c * 2^j, i.e. xx^(2^k)*x^c = (A^(2^(k - j))*c)^(2^j) */ + l -= j; + while (l--) + if (use_redc) mpz_redc (xx, xx, xx, mod, invm); + else + { + mpz_mul (xx, xx, xx); + mpz_mod (xx, xx, mod); + } + if (use_redc) + mpz_redc (xx, xx, g[c >> 1], mod, invm); + else + { + mpz_mul (xx, xx, g[c >> 1]); + mpz_mod (xx, xx, mod); + } + } + else + j = l; /* case c=0 */ + while (j--) + { + if (use_redc) + mpz_redc (xx, xx, xx, mod, invm); + else + { + mpz_mul (xx, xx, xx); + mpz_mod (xx, xx, mod); + } + } +#ifdef POWM_DEBUG + printf ("x^"); mpz_out_str (0, 10, exp); + printf ("*2^%u mod m = ", n * BITS_PER_MP_LIMB); mpz_out_str (0, 10, xx); + putchar ('\n'); +#endif + } + + /* now convert back xx to xx/R^n */ + if (use_redc) + { + mpz_set_ui (g[0], 1); + mpz_redc (xx, xx, g[0], mod, invm); + if (mpz_cmp (xx, mod) >= 0) + mpz_sub (xx, xx, mod); + } + mpz_set (res, xx); + + mpz_clear (xx); + for (i = 0; i < K / 2; i++) + mpz_clear (g[i]); + (*_mp_free_func) (g, K / 2 * sizeof (mpz_t)); +} diff --git a/rts/gmp/mpz/powm_ui.c b/rts/gmp/mpz/powm_ui.c new file mode 100644 index 0000000000..00f70bd563 --- /dev/null +++ b/rts/gmp/mpz/powm_ui.c @@ -0,0 +1,248 @@ +/* mpz_powm_ui(res,base,exp,mod) -- Set RES to (base**exp) mod MOD. + +Copyright (C) 1991, 1993, 1994, 1996, 1997, 2000 Free Software Foundation, +Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include <stdio.h> /* for NULL */ +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +void +#if __STDC__ +mpz_powm_ui (mpz_ptr res, mpz_srcptr base, unsigned long int exp, mpz_srcptr mod) +#else +mpz_powm_ui (res, base, exp, mod) + mpz_ptr res; + mpz_srcptr base; + unsigned long int exp; + mpz_srcptr mod; +#endif +{ + mp_ptr rp, mp, bp; + mp_size_t msize, bsize, rsize; + mp_size_t size; + int mod_shift_cnt; + int negative_result; + mp_limb_t *free_me = NULL; + size_t free_me_size; + TMP_DECL (marker); + + msize = ABS (mod->_mp_size); + size = 2 * msize; + + rp = res->_mp_d; + + if (msize == 0) + DIVIDE_BY_ZERO; + + if (exp == 0) + { + /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0 + depending on if MOD equals 1. */ + res->_mp_size = (msize == 1 && (mod->_mp_d)[0] == 1) ? 0 : 1; + rp[0] = 1; + return; + } + + TMP_MARK (marker); + + /* Normalize MOD (i.e. make its most significant bit set) as required by + mpn_divmod. This will make the intermediate values in the calculation + slightly larger, but the correct result is obtained after a final + reduction using the original MOD value. */ + + mp = (mp_ptr) TMP_ALLOC (msize * BYTES_PER_MP_LIMB); + count_leading_zeros (mod_shift_cnt, mod->_mp_d[msize - 1]); + if (mod_shift_cnt != 0) + mpn_lshift (mp, mod->_mp_d, msize, mod_shift_cnt); + else + MPN_COPY (mp, mod->_mp_d, msize); + + bsize = ABS (base->_mp_size); + if (bsize > msize) + { + /* The base is larger than the module. Reduce it. */ + + /* Allocate (BSIZE + 1) with space for remainder and quotient. + (The quotient is (bsize - msize + 1) limbs.) */ + bp = (mp_ptr) TMP_ALLOC ((bsize + 1) * BYTES_PER_MP_LIMB); + MPN_COPY (bp, base->_mp_d, bsize); + /* We don't care about the quotient, store it above the remainder, + at BP + MSIZE. */ + mpn_divmod (bp + msize, bp, bsize, mp, msize); + bsize = msize; + /* Canonicalize the base, since we are going to multiply with it + quite a few times. */ + MPN_NORMALIZE (bp, bsize); + } + else + bp = base->_mp_d; + + if (bsize == 0) + { + res->_mp_size = 0; + TMP_FREE (marker); + return; + } + + if (res->_mp_alloc < size) + { + /* We have to allocate more space for RES. If any of the input + parameters are identical to RES, defer deallocation of the old + space. */ + + if (rp == mp || rp == bp) + { + free_me = rp; + free_me_size = res->_mp_alloc; + } + else + (*_mp_free_func) (rp, res->_mp_alloc * BYTES_PER_MP_LIMB); + + rp = (mp_ptr) (*_mp_allocate_func) (size * BYTES_PER_MP_LIMB); + res->_mp_alloc = size; + res->_mp_d = rp; + } + else + { + /* Make BASE, EXP and MOD not overlap with RES. */ + if (rp == bp) + { + /* RES and BASE are identical. Allocate temp. space for BASE. */ + bp = (mp_ptr) TMP_ALLOC (bsize * BYTES_PER_MP_LIMB); + MPN_COPY (bp, rp, bsize); + } + if (rp == mp) + { + /* RES and MOD are identical. Allocate temporary space for MOD. */ + mp = (mp_ptr) TMP_ALLOC (msize * BYTES_PER_MP_LIMB); + MPN_COPY (mp, rp, msize); + } + } + + MPN_COPY (rp, bp, bsize); + rsize = bsize; + + { + mp_ptr xp = (mp_ptr) TMP_ALLOC (2 * (msize + 1) * BYTES_PER_MP_LIMB); + int c; + mp_limb_t e; + mp_limb_t carry_limb; + + negative_result = (exp & 1) && base->_mp_size < 0; + + e = exp; + count_leading_zeros (c, e); + e = (e << c) << 1; /* shift the exp bits to the left, lose msb */ + c = BITS_PER_MP_LIMB - 1 - c; + + /* Main loop. + + Make the result be pointed to alternately by XP and RP. This + helps us avoid block copying, which would otherwise be necessary + with the overlap restrictions of mpn_divmod. With 50% probability + the result after this loop will be in the area originally pointed + by RP (==RES->_mp_d), and with 50% probability in the area originally + pointed to by XP. */ + + while (c != 0) + { + mp_ptr tp; + mp_size_t xsize; + + mpn_mul_n (xp, rp, rp, rsize); + xsize = 2 * rsize; + xsize -= xp[xsize - 1] == 0; + if (xsize > msize) + { + mpn_divmod (xp + msize, xp, xsize, mp, msize); + xsize = msize; + } + + tp = rp; rp = xp; xp = tp; + rsize = xsize; + + if ((mp_limb_signed_t) e < 0) + { + mpn_mul (xp, rp, rsize, bp, bsize); + xsize = rsize + bsize; + xsize -= xp[xsize - 1] == 0; + if (xsize > msize) + { + mpn_divmod (xp + msize, xp, xsize, mp, msize); + xsize = msize; + } + + tp = rp; rp = xp; xp = tp; + rsize = xsize; + } + e <<= 1; + c--; + } + + /* We shifted MOD, the modulo reduction argument, left MOD_SHIFT_CNT + steps. Adjust the result by reducing it with the original MOD. + + Also make sure the result is put in RES->_mp_d (where it already + might be, see above). */ + + if (mod_shift_cnt != 0) + { + carry_limb = mpn_lshift (res->_mp_d, rp, rsize, mod_shift_cnt); + rp = res->_mp_d; + if (carry_limb != 0) + { + rp[rsize] = carry_limb; + rsize++; + } + } + else + { + MPN_COPY (res->_mp_d, rp, rsize); + rp = res->_mp_d; + } + + if (rsize >= msize) + { + mpn_divmod (rp + msize, rp, rsize, mp, msize); + rsize = msize; + } + + /* Remove any leading zero words from the result. */ + if (mod_shift_cnt != 0) + mpn_rshift (rp, rp, rsize, mod_shift_cnt); + MPN_NORMALIZE (rp, rsize); + } + + if (negative_result && rsize != 0) + { + if (mod_shift_cnt != 0) + mpn_rshift (mp, mp, msize, mod_shift_cnt); + mpn_sub (rp, mp, msize, rp, rsize); + rsize = msize; + MPN_NORMALIZE (rp, rsize); + } + res->_mp_size = rsize; + + if (free_me != NULL) + (*_mp_free_func) (free_me, free_me_size * BYTES_PER_MP_LIMB); + TMP_FREE (marker); +} diff --git a/rts/gmp/mpz/pprime_p.c b/rts/gmp/mpz/pprime_p.c new file mode 100644 index 0000000000..82eb678238 --- /dev/null +++ b/rts/gmp/mpz/pprime_p.c @@ -0,0 +1,242 @@ +/* mpz_probab_prime_p -- + An implementation of the probabilistic primality test found in Knuth's + Seminumerical Algorithms book. If the function mpz_probab_prime_p() + returns 0 then n is not prime. If it returns 1, then n is 'probably' + prime. If it returns 2, n is surely prime. The probability of a false + positive is (1/4)**reps, where reps is the number of internal passes of the + probabilistic algorithm. Knuth indicates that 25 passes are reasonable. + +Copyright (C) 1991, 1993, 1994, 1996, 1997, 1998, 1999, 2000 Free Software +Foundation, Inc. Miller-Rabin code contributed by John Amanatides. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +static int isprime _PROTO ((unsigned long int t)); +static int mpz_millerrabin _PROTO ((mpz_srcptr n, int reps)); + +int +#if __STDC__ +mpz_probab_prime_p (mpz_srcptr n, int reps) +#else +mpz_probab_prime_p (n, reps) + mpz_srcptr n; + int reps; +#endif +{ + mp_limb_t r; + + /* Handle small and negative n. */ + if (mpz_cmp_ui (n, 1000000L) <= 0) + { + int is_prime; + if (mpz_sgn (n) < 0) + { + /* Negative number. Negate and call ourselves. */ + mpz_t n2; + mpz_init (n2); + mpz_neg (n2, n); + is_prime = mpz_probab_prime_p (n2, reps); + mpz_clear (n2); + return is_prime; + } + is_prime = isprime (mpz_get_ui (n)); + return is_prime ? 2 : 0; + } + + /* If n is now even, it is not a prime. */ + if ((mpz_get_ui (n) & 1) == 0) + return 0; + + /* Check if n has small factors. */ + if (UDIV_TIME > (2 * UMUL_TIME + 6)) + r = mpn_preinv_mod_1 (PTR(n), SIZ(n), (mp_limb_t) PP, (mp_limb_t) PP_INVERTED); + else + r = mpn_mod_1 (PTR(n), SIZ(n), (mp_limb_t) PP); + if (r % 3 == 0 || r % 5 == 0 || r % 7 == 0 || r % 11 == 0 || r % 13 == 0 + || r % 17 == 0 || r % 19 == 0 || r % 23 == 0 || r % 29 == 0 +#if BITS_PER_MP_LIMB == 64 + || r % 31 == 0 || r % 37 == 0 || r % 41 == 0 || r % 43 == 0 + || r % 47 == 0 || r % 53 == 0 +#endif + ) + { + return 0; + } + + /* Do more dividing. We collect small primes, using umul_ppmm, until we + overflow a single limb. We divide our number by the small primes product, + and look for factors in the remainder. */ + { + unsigned long int ln2; + unsigned long int q; + mp_limb_t p1, p0, p; + unsigned int primes[15]; + int nprimes; + + nprimes = 0; + p = 1; + ln2 = mpz_sizeinbase (n, 2) / 30; ln2 = ln2 * ln2; + for (q = BITS_PER_MP_LIMB == 64 ? 59 : 31; q < ln2; q += 2) + { + if (isprime (q)) + { + umul_ppmm (p1, p0, p, q); + if (p1 != 0) + { + r = mpn_mod_1 (PTR(n), SIZ(n), p); + while (--nprimes >= 0) + if (r % primes[nprimes] == 0) + { + if (mpn_mod_1 (PTR(n), SIZ(n), (mp_limb_t) primes[nprimes]) != 0) + abort (); + return 0; + } + p = q; + nprimes = 0; + } + else + { + p = p0; + } + primes[nprimes++] = q; + } + } + } + + /* Perform a number of Miller-Rabin tests. */ + return mpz_millerrabin (n, reps); +} + +static int +#if __STDC__ +isprime (unsigned long int t) +#else +isprime (t) + unsigned long int t; +#endif +{ + unsigned long int q, r, d; + + if (t < 3 || (t & 1) == 0) + return t == 2; + + for (d = 3, r = 1; r != 0; d += 2) + { + q = t / d; + r = t - q * d; + if (q < d) + return 1; + } + return 0; +} + +static int millerrabin _PROTO ((mpz_srcptr n, mpz_srcptr nm1, + mpz_ptr x, mpz_ptr y, + mpz_srcptr q, unsigned long int k)); + +static int +#if __STDC__ +mpz_millerrabin (mpz_srcptr n, int reps) +#else +mpz_millerrabin (n, reps) + mpz_srcptr n; + int reps; +#endif +{ + int r; + mpz_t nm1, x, y, q; + unsigned long int k; + gmp_randstate_t rstate; + int is_prime; + TMP_DECL (marker); + TMP_MARK (marker); + + MPZ_TMP_INIT (nm1, SIZ (n) + 1); + mpz_sub_ui (nm1, n, 1L); + + MPZ_TMP_INIT (x, SIZ (n)); + MPZ_TMP_INIT (y, 2 * SIZ (n)); /* mpz_powm_ui needs excessive memory!!! */ + + /* Perform a Fermat test. */ + mpz_set_ui (x, 210L); + mpz_powm (y, x, nm1, n); + if (mpz_cmp_ui (y, 1L) != 0) + { + TMP_FREE (marker); + return 0; + } + + MPZ_TMP_INIT (q, SIZ (n)); + + /* Find q and k, where q is odd and n = 1 + 2**k * q. */ + k = mpz_scan1 (nm1, 0L); + mpz_tdiv_q_2exp (q, nm1, k); + + gmp_randinit (rstate, GMP_RAND_ALG_DEFAULT, 32L); + + is_prime = 1; + for (r = 0; r < reps && is_prime; r++) + { + do + mpz_urandomb (x, rstate, mpz_sizeinbase (n, 2) - 1); + while (mpz_cmp_ui (x, 1L) <= 0); + + is_prime = millerrabin (n, nm1, x, y, q, k); + } + + gmp_randclear (rstate); + + TMP_FREE (marker); + return is_prime; +} + +static int +#if __STDC__ +millerrabin (mpz_srcptr n, mpz_srcptr nm1, mpz_ptr x, mpz_ptr y, + mpz_srcptr q, unsigned long int k) +#else +millerrabin (n, nm1, x, y, q, k) + mpz_srcptr n; + mpz_srcptr nm1; + mpz_ptr x; + mpz_ptr y; + mpz_srcptr q; + unsigned long int k; +#endif +{ + unsigned long int i; + + mpz_powm (y, x, q, n); + + if (mpz_cmp_ui (y, 1L) == 0 || mpz_cmp (y, nm1) == 0) + return 1; + + for (i = 1; i < k; i++) + { + mpz_powm_ui (y, y, 2L, n); + if (mpz_cmp (y, nm1) == 0) + return 1; + if (mpz_cmp_ui (y, 1L) == 0) + return 0; + } + return 0; +} diff --git a/rts/gmp/mpz/random.c b/rts/gmp/mpz/random.c new file mode 100644 index 0000000000..60d9113991 --- /dev/null +++ b/rts/gmp/mpz/random.c @@ -0,0 +1,56 @@ +/* mpz_random -- Generate a random mpz_t of specified size. + This function is non-portable and generates poor random numbers. + +Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "urandom.h" + +void +#if __STDC__ +mpz_random (mpz_ptr x, mp_size_t size) +#else +mpz_random (x, size) + mpz_ptr x; + mp_size_t size; +#endif +{ + mp_size_t i; + mp_limb_t ran; + mp_ptr xp; + mp_size_t abs_size; + + abs_size = ABS (size); + + if (x->_mp_alloc < abs_size) + _mpz_realloc (x, abs_size); + + xp = x->_mp_d; + + for (i = 0; i < abs_size; i++) + { + ran = urandom (); + xp[i] = ran; + } + + MPN_NORMALIZE (xp, abs_size); + x->_mp_size = size < 0 ? -abs_size : abs_size; +} diff --git a/rts/gmp/mpz/random2.c b/rts/gmp/mpz/random2.c new file mode 100644 index 0000000000..a90af115e9 --- /dev/null +++ b/rts/gmp/mpz/random2.c @@ -0,0 +1,48 @@ +/* mpz_random2 -- Generate a positive random mpz_t of specified size, with + long runs of consecutive ones and zeros in the binary representation. + Meant for testing of other MP routines. + +Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_random2 (mpz_ptr x, mp_size_t size) +#else +mpz_random2 (x, size) + mpz_ptr x; + mp_size_t size; +#endif +{ + mp_size_t abs_size; + + abs_size = ABS (size); + if (abs_size != 0) + { + if (x->_mp_alloc < abs_size) + _mpz_realloc (x, abs_size); + + mpn_random2 (x->_mp_d, abs_size); + } + + x->_mp_size = size; +} diff --git a/rts/gmp/mpz/realloc.c b/rts/gmp/mpz/realloc.c new file mode 100644 index 0000000000..0b9e447ec3 --- /dev/null +++ b/rts/gmp/mpz/realloc.c @@ -0,0 +1,52 @@ +/* _mpz_realloc -- make the mpz_t have NEW_SIZE digits allocated. + +Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void * +#if __STDC__ +_mpz_realloc (mpz_ptr m, mp_size_t new_size) +#else +_mpz_realloc (m, new_size) + mpz_ptr m; + mp_size_t new_size; +#endif +{ + /* Never allocate zero space. */ + if (new_size == 0) + new_size = 1; + + m->_mp_d = (mp_ptr) (*_mp_reallocate_func) (m->_mp_d, + m->_mp_alloc * BYTES_PER_MP_LIMB, + new_size * BYTES_PER_MP_LIMB); + m->_mp_alloc = new_size; + +#if 0 + /* This might break some code that reads the size field after + reallocation, in the case the reallocated destination and a + source argument are identical. */ + if (ABS (m->_mp_size) > new_size) + m->_mp_size = 0; +#endif + + return (void *) m->_mp_d; +} diff --git a/rts/gmp/mpz/remove.c b/rts/gmp/mpz/remove.c new file mode 100644 index 0000000000..bc6675f972 --- /dev/null +++ b/rts/gmp/mpz/remove.c @@ -0,0 +1,93 @@ +/* mpz_remove -- divide out a factor and return its multiplicity. + +Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +unsigned long int +#if __STDC__ +mpz_remove (mpz_ptr dest, mpz_srcptr src, mpz_srcptr f) +#else +mpz_remove (dest, src, f) + mpz_ptr dest; + mpz_srcptr src; + mpz_srcptr f; +#endif +{ + mpz_t fpow[40]; /* inexhaustible...until year 2020 or so */ + mpz_t x, rem; + unsigned long int pwr; + int p; + + if (mpz_cmp_ui (f, 1) <= 0 || mpz_sgn (src) == 0) + DIVIDE_BY_ZERO; + if (mpz_cmp_ui (f, 2) == 0) + { + unsigned long int s0; + s0 = mpz_scan1 (src, 0); + mpz_div_2exp (dest, src, s0); + return s0; + } + + /* We could perhaps compute mpz_scan1(src,0)/mpz_scan1(f,0). It is an + upper bound of the result we're seeking. We could also shift down the + operands so that they become odd, to make intermediate values smaller. */ + + mpz_init (rem); + mpz_init (x); + + pwr = 0; + mpz_init (fpow[0]); + mpz_set (fpow[0], f); + mpz_set (dest, src); + + /* Divide by f, f^2, ..., f^(2^k) until we get a remainder for f^(2^k). */ + for (p = 0;; p++) + { + mpz_tdiv_qr (x, rem, dest, fpow[p]); + if (SIZ (rem) != 0) + break; + mpz_init (fpow[p + 1]); + mpz_mul (fpow[p + 1], fpow[p], fpow[p]); + mpz_set (dest, x); + } + + pwr = (1 << p) - 1; + + mpz_clear (fpow[p]); + + /* Divide by f^(2^(k-1)), f^(2^(k-2)), ..., f for all divisors that give a + zero remainder. */ + while (--p >= 0) + { + mpz_tdiv_qr (x, rem, dest, fpow[p]); + if (SIZ (rem) == 0) + { + pwr += 1 << p; + mpz_set (dest, x); + } + mpz_clear (fpow[p]); + } + + mpz_clear (x); + mpz_clear (rem); + return pwr; +} diff --git a/rts/gmp/mpz/root.c b/rts/gmp/mpz/root.c new file mode 100644 index 0000000000..0920bf22d3 --- /dev/null +++ b/rts/gmp/mpz/root.c @@ -0,0 +1,183 @@ +/* mpz_root(root, u, nth) -- Set ROOT to floor(U^(1/nth)). + Return an indication if the result is exact. + +Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* Naive implementation of nth root extraction. It would probably be a + better idea to use a division-free Newton iteration. It is insane + to use full precision from iteration 1. The mpz_scan1 trick compensates + to some extent. It would be natural to avoid representing the low zero + bits mpz_scan1 is counting, and at the same time call mpn directly. */ + +#include <stdio.h> /* for NULL */ +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +int +#if __STDC__ +mpz_root (mpz_ptr r, mpz_srcptr c, unsigned long int nth) +#else +mpz_root (r, c, nth) + mpz_ptr r; + mpz_srcptr c; + unsigned long int nth; +#endif +{ + mpz_t x, t0, t1, t2; + __mpz_struct ccs, *cc = &ccs; + unsigned long int nbits; + int bit; + int exact; + int i; + unsigned long int lowz; + unsigned long int rl; + + /* even roots of negatives provoke an exception */ + if (mpz_sgn (c) < 0 && (nth & 1) == 0) + SQRT_OF_NEGATIVE; + + /* root extraction interpreted as c^(1/nth) means a zeroth root should + provoke a divide by zero, do this even if c==0 */ + if (nth == 0) + DIVIDE_BY_ZERO; + + if (mpz_sgn (c) == 0) + { + if (r != NULL) + mpz_set_ui (r, 0); + return 1; /* exact result */ + } + + PTR(cc) = PTR(c); + SIZ(cc) = ABSIZ(c); + + nbits = (mpz_sizeinbase (cc, 2) - 1) / nth; + if (nbits == 0) + { + if (r != NULL) + mpz_set_ui (r, 1); + if (mpz_sgn (c) < 0) + { + if (r != NULL) + SIZ(r) = -SIZ(r); + return mpz_cmp_si (c, -1L) == 0; + } + return mpz_cmp_ui (c, 1L) == 0; + } + + mpz_init (x); + mpz_init (t0); + mpz_init (t1); + mpz_init (t2); + + /* Create a one-bit approximation. */ + mpz_set_ui (x, 0); + mpz_setbit (x, nbits); + + /* Make the approximation better, one bit at a time. This odd-looking + termination criteria makes large nth get better initial approximation, + which avoids slow convergence for such values. */ + bit = nbits - 1; + for (i = 1; (nth >> i) != 0; i++) + { + mpz_setbit (x, bit); + mpz_tdiv_q_2exp (t0, x, bit); + mpz_pow_ui (t1, t0, nth); + mpz_mul_2exp (t1, t1, bit * nth); + if (mpz_cmp (cc, t1) < 0) + mpz_clrbit (x, bit); + + bit--; /* check/set next bit */ + if (bit < 0) + { + /* We're done. */ + mpz_pow_ui (t1, x, nth); + goto done; + } + } + mpz_setbit (x, bit); + mpz_set_ui (t2, 0); mpz_setbit (t2, bit); mpz_add (x, x, t2); + +#if DEBUG + /* Check that the starting approximation is >= than the root. */ + mpz_pow_ui (t1, x, nth); + if (mpz_cmp (cc, t1) >= 0) + abort (); +#endif + + mpz_add_ui (x, x, 1); + + /* Main loop */ + do + { + lowz = mpz_scan1 (x, 0); + mpz_tdiv_q_2exp (t0, x, lowz); + mpz_pow_ui (t1, t0, nth - 1); + mpz_mul_2exp (t1, t1, lowz * (nth - 1)); + mpz_tdiv_q (t2, cc, t1); + mpz_sub (t2, x, t2); + rl = mpz_tdiv_q_ui (t2, t2, nth); + mpz_sub (x, x, t2); + } + while (mpz_sgn (t2) != 0); + + /* If we got a non-zero remainder in the last division, we know our root + is too large. */ + mpz_sub_ui (x, x, (mp_limb_t) (rl != 0)); + + /* Adjustment loop. If we spend more care on rounding in the loop above, + we could probably get rid of this, or greatly simplify it. */ + { + int bad = 0; + lowz = mpz_scan1 (x, 0); + mpz_tdiv_q_2exp (t0, x, lowz); + mpz_pow_ui (t1, t0, nth); + mpz_mul_2exp (t1, t1, lowz * nth); + while (mpz_cmp (cc, t1) < 0) + { + bad++; + if (bad > 2) + abort (); /* abort if our root is far off */ + mpz_sub_ui (x, x, 1); + lowz = mpz_scan1 (x, 0); + mpz_tdiv_q_2exp (t0, x, lowz); + mpz_pow_ui (t1, t0, nth); + mpz_mul_2exp (t1, t1, lowz * nth); + } + } + + done: + exact = mpz_cmp (t1, cc) == 0; + + if (r != NULL) + { + mpz_set (r, x); + if (mpz_sgn (c) < 0) + SIZ(r) = -SIZ(r); + } + + mpz_clear (t2); + mpz_clear (t1); + mpz_clear (t0); + mpz_clear (x); + + return exact; +} diff --git a/rts/gmp/mpz/rrandomb.c b/rts/gmp/mpz/rrandomb.c new file mode 100644 index 0000000000..7d78243674 --- /dev/null +++ b/rts/gmp/mpz/rrandomb.c @@ -0,0 +1,117 @@ +/* mpz_rrandomb -- Generate a positive random mpz_t of specified bit size, with + long runs of consecutive ones and zeros in the binary representation. + Meant for testing of other MP routines. + +Copyright (C) 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +static void gmp_rrandomb _PROTO ((mp_ptr rp, gmp_randstate_t rstate, unsigned long int nbits)); + +void +#if __STDC__ +mpz_rrandomb (mpz_ptr x, gmp_randstate_t rstate, unsigned long int nbits) +#else +mpz_rrandomb (x, rstate, nbits) + mpz_ptr x; + gmp_randstate_t rstate; + unsigned long int nbits; +#endif +{ + mp_size_t nl = 0; + + if (nbits != 0) + { + mp_ptr xp; + nl = (nbits + BITS_PER_MP_LIMB - 1) / BITS_PER_MP_LIMB; + if (x->_mp_alloc < nl) + _mpz_realloc (x, nl); + + xp = PTR(x); + gmp_rrandomb (xp, rstate, nbits); + MPN_NORMALIZE (xp, nl); + } + + SIZ(x) = nl; +} + +#define BITS_PER_CHUNK 4 + +static void +#if __STDC__ +gmp_rrandomb (mp_ptr rp, gmp_randstate_t rstate, unsigned long int nbits) +#else +gmp_rrandomb (rp, rstate, nbits) + mp_ptr rp; + gmp_randstate_t rstate; + unsigned long int nbits; +#endif +{ + int nb; + int bit_pos; + mp_size_t limb_pos; + mp_limb_t ran, ranm; + mp_limb_t acc; + mp_size_t n; + + bit_pos = nbits % BITS_PER_MP_LIMB; + limb_pos = nbits / BITS_PER_MP_LIMB; + if (bit_pos == 0) + { + bit_pos = BITS_PER_MP_LIMB; + limb_pos--; + } + + acc = 0; + while (limb_pos >= 0) + { + _gmp_rand (&ranm, rstate, BITS_PER_CHUNK + 1); + ran = ranm; + nb = (ran >> 1) + 1; + if ((ran & 1) != 0) + { + /* Generate a string of ones. */ + if (nb > bit_pos) + { + rp[limb_pos--] = acc | ((((mp_limb_t) 1) << bit_pos) - 1); + bit_pos += BITS_PER_MP_LIMB; + bit_pos -= nb; + acc = (~(mp_limb_t) 0) << bit_pos; + } + else + { + bit_pos -= nb; + acc |= ((((mp_limb_t) 1) << nb) - 1) << bit_pos; + } + } + else + { + /* Generate a string of zeroes. */ + if (nb > bit_pos) + { + rp[limb_pos--] = acc; + acc = 0; + bit_pos += BITS_PER_MP_LIMB; + } + bit_pos -= nb; + } + } +} diff --git a/rts/gmp/mpz/scan0.c b/rts/gmp/mpz/scan0.c new file mode 100644 index 0000000000..6c59cf8939 --- /dev/null +++ b/rts/gmp/mpz/scan0.c @@ -0,0 +1,35 @@ +/* mpz_scan0(op, startbit) -- Scan for the next set bit, starting at startbit. + +Copyright (C) 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +unsigned long int +#if __STDC__ +mpz_scan0 (mpz_srcptr u, unsigned long int starting_bit) +#else +mpz_scan0 (u, starting_bit) + mpz_srcptr u; + unsigned long int starting_bit; +#endif +{ + return mpn_scan0 (u->_mp_d, starting_bit); +} diff --git a/rts/gmp/mpz/scan1.c b/rts/gmp/mpz/scan1.c new file mode 100644 index 0000000000..3b84e3420c --- /dev/null +++ b/rts/gmp/mpz/scan1.c @@ -0,0 +1,35 @@ +/* mpz_scan1(op, startbit) -- Scan for the next set bit, starting at startbit. + +Copyright (C) 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +unsigned long int +#if __STDC__ +mpz_scan1 (mpz_srcptr u, unsigned long int starting_bit) +#else +mpz_scan1 (u, starting_bit) + mpz_srcptr u; + unsigned long int starting_bit; +#endif +{ + return mpn_scan1 (u->_mp_d, starting_bit); +} diff --git a/rts/gmp/mpz/set.c b/rts/gmp/mpz/set.c new file mode 100644 index 0000000000..06b2eef511 --- /dev/null +++ b/rts/gmp/mpz/set.c @@ -0,0 +1,48 @@ +/* mpz_set (dest_integer, src_integer) -- Assign DEST_INTEGER from SRC_INTEGER. + +Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_set (mpz_ptr w, mpz_srcptr u) +#else +mpz_set (w, u) + mpz_ptr w; + mpz_srcptr u; +#endif +{ + mp_ptr wp, up; + mp_size_t usize, size; + + usize = u->_mp_size; + size = ABS (usize); + + if (w->_mp_alloc < size) + _mpz_realloc (w, size); + + wp = w->_mp_d; + up = u->_mp_d; + + MPN_COPY (wp, up, size); + w->_mp_size = usize; +} diff --git a/rts/gmp/mpz/set_d.c b/rts/gmp/mpz/set_d.c new file mode 100644 index 0000000000..e90ed9bc2f --- /dev/null +++ b/rts/gmp/mpz/set_d.c @@ -0,0 +1,96 @@ +/* mpz_set_d(integer, val) -- Assign INTEGER with a double value VAL. + +Copyright (C) 1995, 1996, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_set_d (mpz_ptr r, double d) +#else +mpz_set_d (r, d) + mpz_ptr r; + double d; +#endif +{ + int negative; + mp_limb_t tp[3]; + mp_ptr rp; + mp_size_t rn; + + negative = d < 0; + d = ABS (d); + + /* Handle small arguments quickly. */ + if (d < MP_BASE_AS_DOUBLE) + { + mp_limb_t tmp; + tmp = d; + PTR(r)[0] = tmp; + SIZ(r) = negative ? -(tmp != 0) : (tmp != 0); + return; + } + + rn = __gmp_extract_double (tp, d); + + if (ALLOC(r) < rn) + _mpz_realloc (r, rn); + + rp = PTR (r); + +#if BITS_PER_MP_LIMB == 32 + switch (rn) + { + default: + MPN_ZERO (rp, rn - 3); + rp += rn - 3; + /* fall through */ + case 3: + rp[2] = tp[2]; + rp[1] = tp[1]; + rp[0] = tp[0]; + break; + case 2: + rp[1] = tp[2]; + rp[0] = tp[1]; + break; + case 1: + /* handled in "small aguments" case above */ + abort (); + } +#else + switch (rn) + { + default: + MPN_ZERO (rp, rn - 2); + rp += rn - 2; + /* fall through */ + case 2: + rp[1] = tp[1], rp[0] = tp[0]; + break; + case 1: + /* handled in "small aguments" case above */ + abort (); + } +#endif + + SIZ(r) = negative ? -rn : rn; +} diff --git a/rts/gmp/mpz/set_f.c b/rts/gmp/mpz/set_f.c new file mode 100644 index 0000000000..2273953dfd --- /dev/null +++ b/rts/gmp/mpz/set_f.c @@ -0,0 +1,64 @@ +/* mpz_set_f (dest_integer, src_float) -- Assign DEST_INTEGER from SRC_FLOAT. + +Copyright (C) 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_set_f (mpz_ptr w, mpf_srcptr u) +#else +mpz_set_f (w, u) + mpz_ptr w; + mpf_srcptr u; +#endif +{ + mp_ptr wp, up; + mp_size_t usize, size; + mp_exp_t exp; + + usize = SIZ (u); + size = ABS (usize); + exp = EXP (u); + + if (w->_mp_alloc < exp) + _mpz_realloc (w, exp); + + wp = w->_mp_d; + up = u->_mp_d; + + if (exp <= 0) + { + SIZ (w) = 0; + return; + } + if (exp < size) + { + MPN_COPY (wp, up + size - exp, exp); + } + else + { + MPN_ZERO (wp, exp - size); + MPN_COPY (wp + exp - size, up, size); + } + + w->_mp_size = usize >= 0 ? exp : -exp; +} diff --git a/rts/gmp/mpz/set_q.c b/rts/gmp/mpz/set_q.c new file mode 100644 index 0000000000..72d3222a80 --- /dev/null +++ b/rts/gmp/mpz/set_q.c @@ -0,0 +1,36 @@ +/* mpz_set_q (dest_integer, src_rational) -- Assign DEST_INTEGER from + SRC_rational. + +Copyright (C) 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_set_q (mpz_ptr w, mpq_srcptr u) +#else +mpz_set_q (w, u) + mpz_ptr w; + mpq_srcptr u; +#endif +{ + mpz_tdiv_q (w, mpq_numref (u), mpq_denref (u)); +} diff --git a/rts/gmp/mpz/set_si.c b/rts/gmp/mpz/set_si.c new file mode 100644 index 0000000000..9ba2fbaf30 --- /dev/null +++ b/rts/gmp/mpz/set_si.c @@ -0,0 +1,48 @@ +/* mpz_set_si(integer, val) -- Assign INTEGER with a small value VAL. + +Copyright (C) 1991, 1993, 1994, 1995, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_set_si (mpz_ptr dest, signed long int val) +#else +mpz_set_si (dest, val) + mpz_ptr dest; + signed long int val; +#endif +{ + /* We don't check if the allocation is enough, since the rest of the + package ensures it's at least 1, which is what we need here. */ + if (val > 0) + { + dest->_mp_d[0] = val; + dest->_mp_size = 1; + } + else if (val < 0) + { + dest->_mp_d[0] = (unsigned long) -val; + dest->_mp_size = -1; + } + else + dest->_mp_size = 0; +} diff --git a/rts/gmp/mpz/set_str.c b/rts/gmp/mpz/set_str.c new file mode 100644 index 0000000000..3ab79c0e89 --- /dev/null +++ b/rts/gmp/mpz/set_str.c @@ -0,0 +1,157 @@ +/* mpz_set_str(mp_dest, string, base) -- Convert the \0-terminated + string STRING in base BASE to multiple precision integer in + MP_DEST. Allow white space in the string. If BASE == 0 determine + the base in the C standard way, i.e. 0xhh...h means base 16, + 0oo...o means base 8, otherwise assume base 10. + +Copyright (C) 1991, 1993, 1994, 1996, 1997, 1998, 2000 Free Software +Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include <string.h> +#include <ctype.h> +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +static int +#if __STDC__ +digit_value_in_base (int c, int base) +#else +digit_value_in_base (c, base) + int c; + int base; +#endif +{ + int digit; + + if (isdigit (c)) + digit = c - '0'; + else if (islower (c)) + digit = c - 'a' + 10; + else if (isupper (c)) + digit = c - 'A' + 10; + else + return -1; + + if (digit < base) + return digit; + return -1; +} + +int +#if __STDC__ +mpz_set_str (mpz_ptr x, const char *str, int base) +#else +mpz_set_str (x, str, base) + mpz_ptr x; + const char *str; + int base; +#endif +{ + size_t str_size; + char *s, *begs; + size_t i; + mp_size_t xsize; + int c; + int negative; + TMP_DECL (marker); + + /* Skip whitespace. */ + do + c = *str++; + while (isspace (c)); + + negative = 0; + if (c == '-') + { + negative = 1; + c = *str++; + } + + if (digit_value_in_base (c, base == 0 ? 10 : base) < 0) + return -1; /* error if no digits */ + + /* If BASE is 0, try to find out the base by looking at the initial + characters. */ + if (base == 0) + { + base = 10; + if (c == '0') + { + base = 8; + c = *str++; + if (c == 'x' || c == 'X') + { + base = 16; + c = *str++; + } + else if (c == 'b' || c == 'B') + { + base = 2; + c = *str++; + } + } + } + + /* Skip leading zeros. */ + while (c == '0') + c = *str++; + /* Make sure the string does not become empty, mpn_set_str would fail. */ + if (c == 0) + { + x->_mp_size = 0; + return 0; + } + + TMP_MARK (marker); + str_size = strlen (str - 1); + s = begs = (char *) TMP_ALLOC (str_size + 1); + + /* Remove spaces from the string and convert the result from ASCII to a + byte array. */ + for (i = 0; i < str_size; i++) + { + if (!isspace (c)) + { + int dig = digit_value_in_base (c, base); + if (dig < 0) + { + TMP_FREE (marker); + return -1; + } + *s++ = dig; + } + c = *str++; + } + + str_size = s - begs; + + xsize = (((mp_size_t) (str_size / __mp_bases[base].chars_per_bit_exactly)) + / BITS_PER_MP_LIMB + 2); + if (x->_mp_alloc < xsize) + _mpz_realloc (x, xsize); + + /* Convert the byte array in base BASE to our bignum format. */ + xsize = mpn_set_str (x->_mp_d, (unsigned char *) begs, str_size, base); + x->_mp_size = negative ? -xsize : xsize; + + TMP_FREE (marker); + return 0; +} diff --git a/rts/gmp/mpz/set_ui.c b/rts/gmp/mpz/set_ui.c new file mode 100644 index 0000000000..d6097c170a --- /dev/null +++ b/rts/gmp/mpz/set_ui.c @@ -0,0 +1,43 @@ +/* mpz_set_ui(integer, val) -- Assign INTEGER with a small value VAL. + +Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_set_ui (mpz_ptr dest, unsigned long int val) +#else +mpz_set_ui (dest, val) + mpz_ptr dest; + unsigned long int val; +#endif +{ + /* We don't check if the allocation is enough, since the rest of the + package ensures it's at least 1, which is what we need here. */ + if (val > 0) + { + dest->_mp_d[0] = val; + dest->_mp_size = 1; + } + else + dest->_mp_size = 0; +} diff --git a/rts/gmp/mpz/setbit.c b/rts/gmp/mpz/setbit.c new file mode 100644 index 0000000000..d4249a434e --- /dev/null +++ b/rts/gmp/mpz/setbit.c @@ -0,0 +1,119 @@ +/* mpz_setbit -- set a specified bit. + +Copyright (C) 1991, 1993, 1994, 1995, 1997, 1999 Free Software Foundation, +Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_setbit (mpz_ptr d, unsigned long int bit_index) +#else +mpz_setbit (d, bit_index) + mpz_ptr d; + unsigned long int bit_index; +#endif +{ + mp_size_t dsize = d->_mp_size; + mp_ptr dp = d->_mp_d; + mp_size_t limb_index; + + limb_index = bit_index / BITS_PER_MP_LIMB; + if (dsize >= 0) + { + if (limb_index < dsize) + { + dp[limb_index] |= (mp_limb_t) 1 << (bit_index % BITS_PER_MP_LIMB); + d->_mp_size = dsize; + } + else + { + /* Ugh. The bit should be set outside of the end of the + number. We have to increase the size of the number. */ + if (d->_mp_alloc < limb_index + 1) + { + _mpz_realloc (d, limb_index + 1); + dp = d->_mp_d; + } + MPN_ZERO (dp + dsize, limb_index - dsize); + dp[limb_index] = (mp_limb_t) 1 << (bit_index % BITS_PER_MP_LIMB); + d->_mp_size = limb_index + 1; + } + } + else + { + mp_size_t zero_bound; + + /* Simulate two's complement arithmetic, i.e. simulate + 1. Set OP = ~(OP - 1) [with infinitely many leading ones]. + 2. Set the bit. + 3. Set OP = ~OP + 1. */ + + dsize = -dsize; + + /* No upper bound on this loop, we're sure there's a non-zero limb + sooner ot later. */ + for (zero_bound = 0; ; zero_bound++) + if (dp[zero_bound] != 0) + break; + + if (limb_index > zero_bound) + { + if (limb_index < dsize) + dp[limb_index] &= ~((mp_limb_t) 1 << (bit_index % BITS_PER_MP_LIMB)); + else + ; + } + else if (limb_index == zero_bound) + { + dp[limb_index] = ((dp[limb_index] - 1) + & ~((mp_limb_t) 1 << (bit_index % BITS_PER_MP_LIMB))) + 1; + if (dp[limb_index] == 0) + { + mp_size_t i; + for (i = limb_index + 1; i < dsize; i++) + { + dp[i] += 1; + if (dp[i] != 0) + goto fin; + } + /* We got carry all way out beyond the end of D. Increase + its size (and allocation if necessary). */ + dsize++; + if (d->_mp_alloc < dsize) + { + _mpz_realloc (d, dsize); + dp = d->_mp_d; + } + dp[i] = 1; + d->_mp_size = -dsize; + fin:; + } + } + else + { + mpn_decr_u (dp + limb_index, + (mp_limb_t) 1 << (bit_index % BITS_PER_MP_LIMB)); + dsize -= dp[dsize - 1] == 0; + d->_mp_size = -dsize; + } + } +} diff --git a/rts/gmp/mpz/size.c b/rts/gmp/mpz/size.c new file mode 100644 index 0000000000..6574756783 --- /dev/null +++ b/rts/gmp/mpz/size.c @@ -0,0 +1,35 @@ +/* mpz_size(x) -- return the number of lims currently used by the + value of integer X. + +Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +size_t +#if __STDC__ +mpz_size (mpz_srcptr x) +#else +mpz_size (x) + mpz_srcptr x; +#endif +{ + return ABS (x->_mp_size); +} diff --git a/rts/gmp/mpz/sizeinbase.c b/rts/gmp/mpz/sizeinbase.c new file mode 100644 index 0000000000..734f9c4532 --- /dev/null +++ b/rts/gmp/mpz/sizeinbase.c @@ -0,0 +1,60 @@ +/* mpz_sizeinbase(x, base) -- return an approximation to the number of + character the integer X would have printed in base BASE. The + approximation is never too small. + +Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +size_t +#if __STDC__ +mpz_sizeinbase (mpz_srcptr x, int base) +#else +mpz_sizeinbase (x, base) + mpz_srcptr x; + int base; +#endif +{ + mp_size_t size = ABS (x->_mp_size); + int lb_base, cnt; + size_t totbits; + + /* Special case for X == 0. */ + if (size == 0) + return 1; + + /* Calculate the total number of significant bits of X. */ + count_leading_zeros (cnt, x->_mp_d[size - 1]); + totbits = size * BITS_PER_MP_LIMB - cnt; + + if ((base & (base - 1)) == 0) + { + /* Special case for powers of 2, giving exact result. */ + + count_leading_zeros (lb_base, base); + lb_base = BITS_PER_MP_LIMB - lb_base - 1; + + return (totbits + lb_base - 1) / lb_base; + } + else + return (size_t) (totbits * __mp_bases[base].chars_per_bit_exactly) + 1; +} diff --git a/rts/gmp/mpz/sqrt.c b/rts/gmp/mpz/sqrt.c new file mode 100644 index 0000000000..fe82fe407a --- /dev/null +++ b/rts/gmp/mpz/sqrt.c @@ -0,0 +1,86 @@ +/* mpz_sqrt(root, u) -- Set ROOT to floor(sqrt(U)). + +Copyright (C) 1991, 1993, 1994, 1996, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include <stdio.h> /* for NULL */ +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_sqrt (mpz_ptr root, mpz_srcptr op) +#else +mpz_sqrt (root, op) + mpz_ptr root; + mpz_srcptr op; +#endif +{ + mp_size_t op_size, root_size; + mp_ptr root_ptr, op_ptr; + mp_ptr free_me = NULL; + mp_size_t free_me_size; + TMP_DECL (marker); + + TMP_MARK (marker); + op_size = op->_mp_size; + if (op_size < 0) + SQRT_OF_NEGATIVE; + + /* The size of the root is accurate after this simple calculation. */ + root_size = (op_size + 1) / 2; + + root_ptr = root->_mp_d; + op_ptr = op->_mp_d; + + if (root->_mp_alloc < root_size) + { + if (root_ptr == op_ptr) + { + free_me = root_ptr; + free_me_size = root->_mp_alloc; + } + else + (*_mp_free_func) (root_ptr, root->_mp_alloc * BYTES_PER_MP_LIMB); + + root->_mp_alloc = root_size; + root_ptr = (mp_ptr) (*_mp_allocate_func) (root_size * BYTES_PER_MP_LIMB); + root->_mp_d = root_ptr; + } + else + { + /* Make OP not overlap with ROOT. */ + if (root_ptr == op_ptr) + { + /* ROOT and OP are identical. Allocate temporary space for OP. */ + op_ptr = (mp_ptr) TMP_ALLOC (op_size * BYTES_PER_MP_LIMB); + /* Copy to the temporary space. Hack: Avoid temporary variable + by using ROOT_PTR. */ + MPN_COPY (op_ptr, root_ptr, op_size); + } + } + + mpn_sqrtrem (root_ptr, NULL, op_ptr, op_size); + + root->_mp_size = root_size; + + if (free_me != NULL) + (*_mp_free_func) (free_me, free_me_size * BYTES_PER_MP_LIMB); + TMP_FREE (marker); +} diff --git a/rts/gmp/mpz/sqrtrem.c b/rts/gmp/mpz/sqrtrem.c new file mode 100644 index 0000000000..99a6453122 --- /dev/null +++ b/rts/gmp/mpz/sqrtrem.c @@ -0,0 +1,111 @@ +/* mpz_sqrtrem(root,rem,x) -- Set ROOT to floor(sqrt(X)) and REM + to the remainder, i.e. X - ROOT**2. + +Copyright (C) 1991, 1993, 1994, 1996, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include <stdio.h> /* for NULL */ +#include "gmp.h" +#include "gmp-impl.h" +#ifdef BERKELEY_MP +#include "mp.h" +#endif + +#ifndef BERKELEY_MP +void +#if __STDC__ +mpz_sqrtrem (mpz_ptr root, mpz_ptr rem, mpz_srcptr op) +#else +mpz_sqrtrem (root, rem, op) + mpz_ptr root; + mpz_ptr rem; + mpz_srcptr op; +#endif +#else /* BERKELEY_MP */ +void +#if __STDC__ +msqrt (mpz_srcptr op, mpz_ptr root, mpz_ptr rem) +#else +msqrt (op, root, rem) + mpz_srcptr op; + mpz_ptr root; + mpz_ptr rem; +#endif +#endif /* BERKELEY_MP */ +{ + mp_size_t op_size, root_size, rem_size; + mp_ptr root_ptr, op_ptr; + mp_ptr free_me = NULL; + mp_size_t free_me_size; + TMP_DECL (marker); + + TMP_MARK (marker); + op_size = op->_mp_size; + if (op_size < 0) + SQRT_OF_NEGATIVE; + + if (rem->_mp_alloc < op_size) + _mpz_realloc (rem, op_size); + + /* The size of the root is accurate after this simple calculation. */ + root_size = (op_size + 1) / 2; + + root_ptr = root->_mp_d; + op_ptr = op->_mp_d; + + if (root->_mp_alloc < root_size) + { + if (root_ptr == op_ptr) + { + free_me = root_ptr; + free_me_size = root->_mp_alloc; + } + else + (*_mp_free_func) (root_ptr, root->_mp_alloc * BYTES_PER_MP_LIMB); + + root->_mp_alloc = root_size; + root_ptr = (mp_ptr) (*_mp_allocate_func) (root_size * BYTES_PER_MP_LIMB); + root->_mp_d = root_ptr; + } + else + { + /* Make OP not overlap with ROOT. */ + if (root_ptr == op_ptr) + { + /* ROOT and OP are identical. Allocate temporary space for OP. */ + op_ptr = (mp_ptr) TMP_ALLOC (op_size * BYTES_PER_MP_LIMB); + /* Copy to the temporary space. Hack: Avoid temporary variable + by using ROOT_PTR. */ + MPN_COPY (op_ptr, root_ptr, op_size); + } + } + + rem_size = mpn_sqrtrem (root_ptr, rem->_mp_d, op_ptr, op_size); + + root->_mp_size = root_size; + + /* Write remainder size last, to enable us to define this function to + give only the square root remainder, if the user calls if with + ROOT == REM. */ + rem->_mp_size = rem_size; + + if (free_me != NULL) + (*_mp_free_func) (free_me, free_me_size * BYTES_PER_MP_LIMB); + TMP_FREE (marker); +} diff --git a/rts/gmp/mpz/sub.c b/rts/gmp/mpz/sub.c new file mode 100644 index 0000000000..f3ae7c23a0 --- /dev/null +++ b/rts/gmp/mpz/sub.c @@ -0,0 +1,123 @@ +/* mpz_sub -- Subtract two integers. + +Copyright (C) 1991, 1993, 1994, 1996, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#ifdef BERKELEY_MP +#include "mp.h" +#endif + +#ifndef BERKELEY_MP +void +#if __STDC__ +mpz_sub (mpz_ptr w, mpz_srcptr u, mpz_srcptr v) +#else +mpz_sub (w, u, v) + mpz_ptr w; + mpz_srcptr u; + mpz_srcptr v; +#endif +#else /* BERKELEY_MP */ +void +#if __STDC__ +msub (mpz_srcptr u, mpz_srcptr v, mpz_ptr w) +#else +msub (u, v, w) + mpz_srcptr u; + mpz_srcptr v; + mpz_ptr w; +#endif +#endif /* BERKELEY_MP */ +{ + mp_srcptr up, vp; + mp_ptr wp; + mp_size_t usize, vsize, wsize; + mp_size_t abs_usize; + mp_size_t abs_vsize; + + usize = u->_mp_size; + vsize = -v->_mp_size; /* The "-" makes the difference from mpz_add */ + abs_usize = ABS (usize); + abs_vsize = ABS (vsize); + + if (abs_usize < abs_vsize) + { + /* Swap U and V. */ + MPZ_SRCPTR_SWAP (u, v); + MP_SIZE_T_SWAP (usize, vsize); + MP_SIZE_T_SWAP (abs_usize, abs_vsize); + } + + /* True: ABS_USIZE >= ABS_VSIZE. */ + + /* If not space for w (and possible carry), increase space. */ + wsize = abs_usize + 1; + if (w->_mp_alloc < wsize) + _mpz_realloc (w, wsize); + + /* These must be after realloc (u or v may be the same as w). */ + up = u->_mp_d; + vp = v->_mp_d; + wp = w->_mp_d; + + if ((usize ^ vsize) < 0) + { + /* U and V have different sign. Need to compare them to determine + which operand to subtract from which. */ + + /* This test is right since ABS_USIZE >= ABS_VSIZE. */ + if (abs_usize != abs_vsize) + { + mpn_sub (wp, up, abs_usize, vp, abs_vsize); + wsize = abs_usize; + MPN_NORMALIZE (wp, wsize); + if (usize < 0) + wsize = -wsize; + } + else if (mpn_cmp (up, vp, abs_usize) < 0) + { + mpn_sub_n (wp, vp, up, abs_usize); + wsize = abs_usize; + MPN_NORMALIZE (wp, wsize); + if (usize >= 0) + wsize = -wsize; + } + else + { + mpn_sub_n (wp, up, vp, abs_usize); + wsize = abs_usize; + MPN_NORMALIZE (wp, wsize); + if (usize < 0) + wsize = -wsize; + } + } + else + { + /* U and V have same sign. Add them. */ + mp_limb_t cy_limb = mpn_add (wp, up, abs_usize, vp, abs_vsize); + wp[abs_usize] = cy_limb; + wsize = abs_usize + cy_limb; + if (usize < 0) + wsize = -wsize; + } + + w->_mp_size = wsize; +} diff --git a/rts/gmp/mpz/sub_ui.c b/rts/gmp/mpz/sub_ui.c new file mode 100644 index 0000000000..327add8503 --- /dev/null +++ b/rts/gmp/mpz/sub_ui.c @@ -0,0 +1,84 @@ +/* mpz_sub_ui -- Subtract an unsigned one-word integer from an MP_INT. + +Copyright (C) 1991, 1993, 1994, 1996, 1999 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_sub_ui (mpz_ptr w, mpz_srcptr u, unsigned long int v) +#else +mpz_sub_ui (w, u, v) + mpz_ptr w; + mpz_srcptr u; + unsigned long int v; +#endif +{ + mp_srcptr up; + mp_ptr wp; + mp_size_t usize, wsize; + mp_size_t abs_usize; + + usize = u->_mp_size; + abs_usize = ABS (usize); + + /* If not space for W (and possible carry), increase space. */ + wsize = abs_usize + 1; + if (w->_mp_alloc < wsize) + _mpz_realloc (w, wsize); + + /* These must be after realloc (U may be the same as W). */ + up = u->_mp_d; + wp = w->_mp_d; + + if (abs_usize == 0) + { + wp[0] = v; + w->_mp_size = -(v != 0); + return; + } + + if (usize < 0) + { + mp_limb_t cy; + cy = mpn_add_1 (wp, up, abs_usize, (mp_limb_t) v); + wp[abs_usize] = cy; + wsize = -(abs_usize + cy); + } + else + { + /* The signs are different. Need exact comparison to determine + which operand to subtract from which. */ + if (abs_usize == 1 && up[0] < v) + { + wp[0] = v - up[0]; + wsize = -1; + } + else + { + mpn_sub_1 (wp, up, abs_usize, (mp_limb_t) v); + /* Size can decrease with at most one limb. */ + wsize = abs_usize - (wp[abs_usize - 1] == 0); + } + } + + w->_mp_size = wsize; +} diff --git a/rts/gmp/mpz/swap.c b/rts/gmp/mpz/swap.c new file mode 100644 index 0000000000..0070d6ff24 --- /dev/null +++ b/rts/gmp/mpz/swap.c @@ -0,0 +1,52 @@ +/* mpz_swap (dest_integer, src_integer) -- Swap U and V. + +Copyright (C) 1997, 1998 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_swap (mpz_ptr u, mpz_ptr v) +#else +mpz_swap (u, v) + mpz_ptr u; + mpz_ptr v; +#endif +{ + mp_ptr up, vp; + mp_size_t usize, vsize; + mp_size_t ualloc, valloc; + + ualloc = u->_mp_alloc; + valloc = v->_mp_alloc; + v->_mp_alloc = ualloc; + u->_mp_alloc = valloc; + + usize = u->_mp_size; + vsize = v->_mp_size; + v->_mp_size = usize; + u->_mp_size = vsize; + + up = u->_mp_d; + vp = v->_mp_d; + v->_mp_d = up; + u->_mp_d = vp; +} diff --git a/rts/gmp/mpz/tdiv_q.c b/rts/gmp/mpz/tdiv_q.c new file mode 100644 index 0000000000..21db4ab385 --- /dev/null +++ b/rts/gmp/mpz/tdiv_q.c @@ -0,0 +1,91 @@ +/* mpz_tdiv_q -- divide two integers and produce a quotient. + +Copyright (C) 1991, 1993, 1994, 1996, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +void +#if __STDC__ +mpz_tdiv_q (mpz_ptr quot, mpz_srcptr num, mpz_srcptr den) +#else +mpz_tdiv_q (quot, num, den) + mpz_ptr quot; + mpz_srcptr num; + mpz_srcptr den; +#endif +{ + mp_size_t ql; + mp_size_t ns, ds, nl, dl; + mp_ptr np, dp, qp, rp; + TMP_DECL (marker); + + ns = SIZ (num); + ds = SIZ (den); + nl = ABS (ns); + dl = ABS (ds); + ql = nl - dl + 1; + + if (dl == 0) + DIVIDE_BY_ZERO; + + if (ql <= 0) + { + SIZ (quot) = 0; + return; + } + + MPZ_REALLOC (quot, ql); + + TMP_MARK (marker); + qp = PTR (quot); + rp = (mp_ptr) TMP_ALLOC (dl * BYTES_PER_MP_LIMB); + np = PTR (num); + dp = PTR (den); + + /* FIXME: We should think about how to handle the temporary allocation. + Perhaps mpn_tdiv_qr should handle it, since it anyway often needs to + allocate temp space. */ + + /* Copy denominator to temporary space if it overlaps with the quotient. */ + if (dp == qp) + { + mp_ptr tp; + tp = (mp_ptr) TMP_ALLOC (dl * BYTES_PER_MP_LIMB); + MPN_COPY (tp, dp, dl); + dp = tp; + } + /* Copy numerator to temporary space if it overlaps with the quotient. */ + if (np == qp) + { + mp_ptr tp; + tp = (mp_ptr) TMP_ALLOC (nl * BYTES_PER_MP_LIMB); + MPN_COPY (tp, np, nl); + np = tp; + } + + mpn_tdiv_qr (qp, rp, 0L, np, nl, dp, dl); + + ql -= qp[ql - 1] == 0; + + SIZ (quot) = (ns ^ ds) >= 0 ? ql : -ql; + TMP_FREE (marker); +} diff --git a/rts/gmp/mpz/tdiv_q_2exp.c b/rts/gmp/mpz/tdiv_q_2exp.c new file mode 100644 index 0000000000..03d1e01f89 --- /dev/null +++ b/rts/gmp/mpz/tdiv_q_2exp.c @@ -0,0 +1,68 @@ +/* mpz_tdiv_q_2exp -- Divide an integer by 2**CNT. Round the quotient + towards -infinity. + +Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_tdiv_q_2exp (mpz_ptr w, mpz_srcptr u, unsigned long int cnt) +#else +mpz_tdiv_q_2exp (w, u, cnt) + mpz_ptr w; + mpz_srcptr u; + unsigned long int cnt; +#endif +{ + mp_size_t usize, wsize; + mp_size_t limb_cnt; + + usize = u->_mp_size; + limb_cnt = cnt / BITS_PER_MP_LIMB; + wsize = ABS (usize) - limb_cnt; + if (wsize <= 0) + w->_mp_size = 0; + else + { + mp_ptr wp; + mp_srcptr up; + + if (w->_mp_alloc < wsize) + _mpz_realloc (w, wsize); + + wp = w->_mp_d; + up = u->_mp_d; + + cnt %= BITS_PER_MP_LIMB; + if (cnt != 0) + { + mpn_rshift (wp, up + limb_cnt, wsize, cnt); + wsize -= wp[wsize - 1] == 0; + } + else + { + MPN_COPY_INCR (wp, up + limb_cnt, wsize); + } + + w->_mp_size = usize >= 0 ? wsize : -wsize; + } +} diff --git a/rts/gmp/mpz/tdiv_q_ui.c b/rts/gmp/mpz/tdiv_q_ui.c new file mode 100644 index 0000000000..a2e3462b76 --- /dev/null +++ b/rts/gmp/mpz/tdiv_q_ui.c @@ -0,0 +1,64 @@ +/* mpz_tdiv_q_ui(quot, dividend, divisor_limb) + -- Divide DIVIDEND by DIVISOR_LIMB and store the result in QUOT. + +Copyright (C) 1991, 1993, 1994, 1996, 1998 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +unsigned long int +#if __STDC__ +mpz_tdiv_q_ui (mpz_ptr quot, mpz_srcptr dividend, unsigned long int divisor) +#else +mpz_tdiv_q_ui (quot, dividend, divisor) + mpz_ptr quot; + mpz_srcptr dividend; + unsigned long int divisor; +#endif +{ + mp_size_t dividend_size; + mp_size_t size; + mp_ptr quot_ptr; + mp_limb_t remainder_limb; + + if (divisor == 0) + DIVIDE_BY_ZERO; + + dividend_size = dividend->_mp_size; + size = ABS (dividend_size); + + /* No need for temporary allocation and copying if QUOT == DIVIDEND as + the divisor is just one limb, and thus no intermediate remainders + need to be stored. */ + + if (quot->_mp_alloc < size) + _mpz_realloc (quot, size); + + quot_ptr = quot->_mp_d; + + remainder_limb + = mpn_divmod_1 (quot_ptr, dividend->_mp_d, size, (mp_limb_t) divisor); + + /* The quotient is SIZE limbs, but the most significant might be zero. */ + size -= size != 0 && quot_ptr[size - 1] == 0; + quot->_mp_size = dividend_size >= 0 ? size : -size; + + return remainder_limb; +} diff --git a/rts/gmp/mpz/tdiv_qr.c b/rts/gmp/mpz/tdiv_qr.c new file mode 100644 index 0000000000..d66f57d9e5 --- /dev/null +++ b/rts/gmp/mpz/tdiv_qr.c @@ -0,0 +1,130 @@ +/* mpz_tdiv_qr(quot,rem,dividend,divisor) -- Set QUOT to DIVIDEND/DIVISOR, + and REM to DIVIDEND mod DIVISOR. + +Copyright (C) 1991, 1993, 1994, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" +#ifdef BERKELEY_MP +#include "mp.h" +#endif + + +#ifndef BERKELEY_MP + +void +#if __STDC__ +mpz_tdiv_qr (mpz_ptr quot, mpz_ptr rem, mpz_srcptr num, mpz_srcptr den) +#else +mpz_tdiv_qr (quot, rem, num, den) + mpz_ptr quot; + mpz_ptr rem; + mpz_srcptr num; + mpz_srcptr den; +#endif + +#else /* BERKELEY_MP */ + +void +#if __STDC__ +mdiv (mpz_srcptr num, mpz_srcptr den, mpz_ptr quot, mpz_ptr rem) +#else +mdiv (num, den, quot, rem) + mpz_srcptr num; + mpz_srcptr den; + mpz_ptr quot; + mpz_ptr rem; +#endif + +#endif /* BERKELEY_MP */ +{ + mp_size_t ql; + mp_size_t ns, ds, nl, dl; + mp_ptr np, dp, qp, rp; + TMP_DECL (marker); + + ns = SIZ (num); + ds = SIZ (den); + nl = ABS (ns); + dl = ABS (ds); + ql = nl - dl + 1; + + if (dl == 0) + DIVIDE_BY_ZERO; + + MPZ_REALLOC (rem, dl); + + if (ql <= 0) + { + if (num != rem) + { + mp_ptr np, rp; + np = PTR (num); + rp = PTR (rem); + MPN_COPY (rp, np, nl); + SIZ (rem) = SIZ (num); + } + /* This needs to follow the assignment to rem, in case the + numerator and quotient are the same. */ + SIZ (quot) = 0; + return; + } + + MPZ_REALLOC (quot, ql); + + TMP_MARK (marker); + qp = PTR (quot); + rp = PTR (rem); + np = PTR (num); + dp = PTR (den); + + /* FIXME: We should think about how to handle the temporary allocation. + Perhaps mpn_tdiv_qr should handle it, since it anyway often needs to + allocate temp space. */ + + /* Copy denominator to temporary space if it overlaps with the quotient + or remainder. */ + if (dp == rp || dp == qp) + { + mp_ptr tp; + tp = (mp_ptr) TMP_ALLOC (dl * BYTES_PER_MP_LIMB); + MPN_COPY (tp, dp, dl); + dp = tp; + } + /* Copy numerator to temporary space if it overlaps with the quotient or + remainder. */ + if (np == rp || np == qp) + { + mp_ptr tp; + tp = (mp_ptr) TMP_ALLOC (nl * BYTES_PER_MP_LIMB); + MPN_COPY (tp, np, nl); + np = tp; + } + + mpn_tdiv_qr (qp, rp, 0L, np, nl, dp, dl); + + ql -= qp[ql - 1] == 0; + MPN_NORMALIZE (rp, dl); + + SIZ (quot) = (ns ^ ds) >= 0 ? ql : -ql; + SIZ (rem) = ns >= 0 ? dl : -dl; + TMP_FREE (marker); +} diff --git a/rts/gmp/mpz/tdiv_qr_ui.c b/rts/gmp/mpz/tdiv_qr_ui.c new file mode 100644 index 0000000000..10368cd340 --- /dev/null +++ b/rts/gmp/mpz/tdiv_qr_ui.c @@ -0,0 +1,76 @@ +/* mpz_tdiv_qr_ui(quot,rem,dividend,short_divisor) -- + Set QUOT to DIVIDEND / SHORT_DIVISOR + and REM to DIVIDEND mod SHORT_DIVISOR. + +Copyright (C) 1991, 1993, 1994, 1996, 1998 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +unsigned long int +#if __STDC__ +mpz_tdiv_qr_ui (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor) +#else +mpz_tdiv_qr_ui (quot, rem, dividend, divisor) + mpz_ptr quot; + mpz_ptr rem; + mpz_srcptr dividend; + unsigned long int divisor; +#endif +{ + mp_size_t dividend_size; + mp_size_t size; + mp_ptr quot_ptr; + mp_limb_t remainder_limb; + + if (divisor == 0) + DIVIDE_BY_ZERO; + + dividend_size = dividend->_mp_size; + size = ABS (dividend_size); + + /* No need for temporary allocation and copying if QUOT == DIVIDEND as + the divisor is just one limb, and thus no intermediate remainders + need to be stored. */ + + if (quot->_mp_alloc < size) + _mpz_realloc (quot, size); + + quot_ptr = quot->_mp_d; + + remainder_limb = mpn_divmod_1 (quot_ptr, dividend->_mp_d, size, + (mp_limb_t) divisor); + + if (remainder_limb == 0) + rem->_mp_size = 0; + else + { + /* Store the single-limb remainder. We don't check if there's space + for just one limb, since no function ever makes zero space. */ + rem->_mp_size = dividend_size >= 0 ? 1 : -1; + rem->_mp_d[0] = remainder_limb; + } + + /* The quotient is SIZE limbs, but the most significant might be zero. */ + size -= size != 0 && quot_ptr[size - 1] == 0; + quot->_mp_size = dividend_size >= 0 ? size : -size; + + return remainder_limb; +} diff --git a/rts/gmp/mpz/tdiv_r.c b/rts/gmp/mpz/tdiv_r.c new file mode 100644 index 0000000000..9eb87dfabf --- /dev/null +++ b/rts/gmp/mpz/tdiv_r.c @@ -0,0 +1,98 @@ +/* mpz_tdiv_r(rem, dividend, divisor) -- Set REM to DIVIDEND mod DIVISOR. + +Copyright (C) 1991, 1993, 1994, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +void +#if __STDC__ +mpz_tdiv_r (mpz_ptr rem, mpz_srcptr num, mpz_srcptr den) +#else +mpz_tdiv_r (rem, num, den) + mpz_ptr rem; + mpz_srcptr num; + mpz_srcptr den; +#endif +{ + mp_size_t ql; + mp_size_t ns, ds, nl, dl; + mp_ptr np, dp, qp, rp; + TMP_DECL (marker); + + ns = SIZ (num); + ds = SIZ (den); + nl = ABS (ns); + dl = ABS (ds); + ql = nl - dl + 1; + + if (dl == 0) + DIVIDE_BY_ZERO; + + MPZ_REALLOC (rem, dl); + + if (ql <= 0) + { + if (num != rem) + { + mp_ptr np, rp; + np = PTR (num); + rp = PTR (rem); + MPN_COPY (rp, np, nl); + SIZ (rem) = SIZ (num); + } + return; + } + + TMP_MARK (marker); + qp = (mp_ptr) TMP_ALLOC (ql * BYTES_PER_MP_LIMB); + rp = PTR (rem); + np = PTR (num); + dp = PTR (den); + + /* FIXME: We should think about how to handle the temporary allocation. + Perhaps mpn_tdiv_qr should handle it, since it anyway often needs to + allocate temp space. */ + + /* Copy denominator to temporary space if it overlaps with the remainder. */ + if (dp == rp) + { + mp_ptr tp; + tp = (mp_ptr) TMP_ALLOC (dl * BYTES_PER_MP_LIMB); + MPN_COPY (tp, dp, dl); + dp = tp; + } + /* Copy numerator to temporary space if it overlaps with the remainder. */ + if (np == rp) + { + mp_ptr tp; + tp = (mp_ptr) TMP_ALLOC (nl * BYTES_PER_MP_LIMB); + MPN_COPY (tp, np, nl); + np = tp; + } + + mpn_tdiv_qr (qp, rp, 0L, np, nl, dp, dl); + + MPN_NORMALIZE (rp, dl); + + SIZ (rem) = ns >= 0 ? dl : -dl; + TMP_FREE (marker); +} diff --git a/rts/gmp/mpz/tdiv_r_2exp.c b/rts/gmp/mpz/tdiv_r_2exp.c new file mode 100644 index 0000000000..91de170f5c --- /dev/null +++ b/rts/gmp/mpz/tdiv_r_2exp.c @@ -0,0 +1,79 @@ +/* mpz_tdiv_r_2exp -- Divide a integer by 2**CNT and produce a remainder. + +Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_tdiv_r_2exp (mpz_ptr res, mpz_srcptr in, unsigned long int cnt) +#else +mpz_tdiv_r_2exp (res, in, cnt) + mpz_ptr res; + mpz_srcptr in; + unsigned long int cnt; +#endif +{ + mp_size_t in_size = ABS (in->_mp_size); + mp_size_t res_size; + mp_size_t limb_cnt = cnt / BITS_PER_MP_LIMB; + mp_srcptr in_ptr = in->_mp_d; + + if (in_size > limb_cnt) + { + /* The input operand is (probably) greater than 2**CNT. */ + mp_limb_t x; + + x = in_ptr[limb_cnt] & (((mp_limb_t) 1 << cnt % BITS_PER_MP_LIMB) - 1); + if (x != 0) + { + res_size = limb_cnt + 1; + if (res->_mp_alloc < res_size) + _mpz_realloc (res, res_size); + + res->_mp_d[limb_cnt] = x; + } + else + { + res_size = limb_cnt; + MPN_NORMALIZE (in_ptr, res_size); + + if (res->_mp_alloc < res_size) + _mpz_realloc (res, res_size); + + limb_cnt = res_size; + } + } + else + { + /* The input operand is smaller than 2**CNT. We perform a no-op, + apart from that we might need to copy IN to RES. */ + res_size = in_size; + if (res->_mp_alloc < res_size) + _mpz_realloc (res, res_size); + + limb_cnt = res_size; + } + + if (res != in) + MPN_COPY (res->_mp_d, in->_mp_d, limb_cnt); + res->_mp_size = in->_mp_size >= 0 ? res_size : -res_size; +} diff --git a/rts/gmp/mpz/tdiv_r_ui.c b/rts/gmp/mpz/tdiv_r_ui.c new file mode 100644 index 0000000000..2ea411fda1 --- /dev/null +++ b/rts/gmp/mpz/tdiv_r_ui.c @@ -0,0 +1,63 @@ +/* mpz_tdiv_r_ui(rem, dividend, divisor_limb) + -- Set REM to DIVDEND mod DIVISOR_LIMB. + +Copyright (C) 1991, 1993, 1994, 1996, 1998 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +unsigned long int +#if __STDC__ +mpz_tdiv_r_ui (mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor) +#else +mpz_tdiv_r_ui (rem, dividend, divisor) + mpz_ptr rem; + mpz_srcptr dividend; + unsigned long int divisor; +#endif +{ + mp_size_t dividend_size; + mp_size_t size; + mp_limb_t remainder_limb; + + if (divisor == 0) + DIVIDE_BY_ZERO; + + dividend_size = dividend->_mp_size; + size = ABS (dividend_size); + + /* No need for temporary allocation and copying if QUOT == DIVIDEND as + the divisor is just one limb, and thus no intermediate remainders + need to be stored. */ + + remainder_limb = mpn_mod_1 (dividend->_mp_d, size, (mp_limb_t) divisor); + + if (remainder_limb == 0) + rem->_mp_size = 0; + else + { + /* Store the single-limb remainder. We don't check if there's space + for just one limb, since no function ever makes zero space. */ + rem->_mp_size = dividend_size >= 0 ? 1 : -1; + rem->_mp_d[0] = remainder_limb; + } + + return remainder_limb; +} diff --git a/rts/gmp/mpz/tdiv_ui.c b/rts/gmp/mpz/tdiv_ui.c new file mode 100644 index 0000000000..7a40a6a7f7 --- /dev/null +++ b/rts/gmp/mpz/tdiv_ui.c @@ -0,0 +1,53 @@ +/* mpz_tdiv_ui(dividend, divisor_limb) + -- Return DIVDEND mod DIVISOR_LIMB. + +Copyright (C) 1991, 1993, 1994, 1996, 1997, 1998 Free Software Foundation, +Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +unsigned long int +#if __STDC__ +mpz_tdiv_ui (mpz_srcptr dividend, unsigned long int divisor) +#else +mpz_tdiv_ui (dividend, divisor) + mpz_srcptr dividend; + unsigned long int divisor; +#endif +{ + mp_size_t dividend_size; + mp_size_t size; + mp_limb_t remainder_limb; + + if (divisor == 0) + DIVIDE_BY_ZERO; + + dividend_size = dividend->_mp_size; + size = ABS (dividend_size); + + /* No need for temporary allocation and copying if QUOT == DIVIDEND as + the divisor is just one limb, and thus no intermediate remainders + need to be stored. */ + + remainder_limb = mpn_mod_1 (dividend->_mp_d, size, (mp_limb_t) divisor); + + return remainder_limb; +} diff --git a/rts/gmp/mpz/tstbit.c b/rts/gmp/mpz/tstbit.c new file mode 100644 index 0000000000..b0a8b0b31a --- /dev/null +++ b/rts/gmp/mpz/tstbit.c @@ -0,0 +1,70 @@ +/* mpz_tstbit -- test a specified bit. Simulate 2's complement representation. + +Copyright (C) 1997 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +int +#if __STDC__ +mpz_tstbit (mpz_srcptr d, unsigned long int bit_index) +#else +mpz_tstbit (d, bit_index) + mpz_srcptr d; + unsigned long int bit_index; +#endif +{ + mp_size_t dsize = d->_mp_size; + mp_ptr dp = d->_mp_d; + mp_size_t limb_index; + + limb_index = bit_index / BITS_PER_MP_LIMB; + if (dsize >= 0) + { + if (limb_index < dsize) + return (dp[limb_index] >> (bit_index % BITS_PER_MP_LIMB)) & 1; + else + /* Testing a bit outside of a positive number. */ + return 0; + } + else + { + mp_size_t zero_bound; + + dsize = -dsize; + + /* Locate the least significant non-zero limb. */ + for (zero_bound = 0; dp[zero_bound] == 0; zero_bound++) + ; + + if (limb_index > zero_bound) + { + if (limb_index < dsize) + return (~dp[limb_index] >> (bit_index % BITS_PER_MP_LIMB)) & 1; + else + /* Testing a bit outside of a negative number. */ + return 1; + } + else if (limb_index == zero_bound) + return (-dp[limb_index] >> (bit_index % BITS_PER_MP_LIMB)) & 1; + else + return 0; + } +} diff --git a/rts/gmp/mpz/ui_pow_ui.c b/rts/gmp/mpz/ui_pow_ui.c new file mode 100644 index 0000000000..edd2dee625 --- /dev/null +++ b/rts/gmp/mpz/ui_pow_ui.c @@ -0,0 +1,139 @@ +/* mpz_ui_pow_ui(res, base, exp) -- Set RES to BASE**EXP. + +Copyright (C) 1991, 1993, 1994, 1996, 1997, 2000 Free Software Foundation, +Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + + +static void mpz_pow2 _PROTO ((mpz_ptr r, mp_limb_t blimb, unsigned long int e, mp_limb_t rl)); + +void +#if __STDC__ +mpz_ui_pow_ui (mpz_ptr r, unsigned long int b, unsigned long int e) +#else +mpz_ui_pow_ui (r, b, e) + mpz_ptr r; + unsigned long int b; + unsigned long int e; +#endif +{ + mp_limb_t blimb = b; + mp_limb_t rl; + + if (e == 0) + { + /* For x^0 we return 1, even if x is 0. */ + r->_mp_d[0] = 1; + r->_mp_size = 1; + return; + } + + /* Compute b^e as (b^n)^(e div n) * b^(e mod n), where n is chosen such that + the latter factor is the largest number small enough to fit in a limb. */ + + rl = 1; + while (e != 0 && blimb < ((mp_limb_t) 1 << BITS_PER_MP_LIMB/2)) + { + if ((e & 1) != 0) + rl = rl * blimb; + blimb = blimb * blimb; + e = e >> 1; + } + + /* rl is now b^(e mod n). (I.e., the latter factor above.) */ + + if (e == 0) + { + r->_mp_d[0] = rl; + r->_mp_size = rl != 0; + return; + } + + mpz_pow2 (r, blimb, e, rl); +} + +/* Multi-precision part of expontialization code. */ +static void +#if __STDC__ +mpz_pow2 (mpz_ptr r, mp_limb_t blimb, unsigned long int e, mp_limb_t rl) +#else +mpz_pow2 (r, blimb, e, rl) + mpz_ptr r; + mp_limb_t blimb; + unsigned long int e; + mp_limb_t rl; +#endif +{ + mp_ptr rp, tp; + mp_size_t ralloc, rsize; + int cnt, i; + TMP_DECL (marker); + + TMP_MARK (marker); + + /* Over-estimate temporary space requirements somewhat. */ + count_leading_zeros (cnt, blimb); + ralloc = e - cnt * e / BITS_PER_MP_LIMB + 1; + + /* The two areas are used to alternatingly hold the input and receive the + product for mpn_mul. (Needed since mpn_mul_n requires that the product + is distinct from either input operand.) */ + rp = (mp_ptr) TMP_ALLOC (ralloc * BYTES_PER_MP_LIMB); + tp = (mp_ptr) TMP_ALLOC (ralloc * BYTES_PER_MP_LIMB); + + rp[0] = blimb; + rsize = 1; + + count_leading_zeros (cnt, e); + for (i = BITS_PER_MP_LIMB - cnt - 2; i >= 0; i--) + { + mpn_mul_n (tp, rp, rp, rsize); + rsize = 2 * rsize; + rsize -= tp[rsize - 1] == 0; + MP_PTR_SWAP (rp, tp); + + if ((e & ((mp_limb_t) 1 << i)) != 0) + { + mp_limb_t cy; + cy = mpn_mul_1 (rp, rp, rsize, blimb); + rp[rsize] = cy; + rsize += cy != 0; + } + } + + /* We will need rsize or rsize+1 limbs for the result. */ + if (r->_mp_alloc <= rsize) + _mpz_realloc (r, rsize + 1); + + /* Multiply the two factors (in rp,rsize and rl) and put the final result + in place. */ + { + mp_limb_t cy; + cy = mpn_mul_1 (r->_mp_d, rp, rsize, rl); + (r->_mp_d)[rsize] = cy; + rsize += cy != 0; + } + + r->_mp_size = rsize; + TMP_FREE (marker); +} diff --git a/rts/gmp/mpz/urandomb.c b/rts/gmp/mpz/urandomb.c new file mode 100644 index 0000000000..caca086e05 --- /dev/null +++ b/rts/gmp/mpz/urandomb.c @@ -0,0 +1,49 @@ +/* mpz_urandomb (rop, state, n) -- Generate a uniform pseudorandom + integer in the range 0 to 2^N - 1, inclusive, using STATE as the + random state previously initialized by a call to gmp_randinit(). + +Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_urandomb (mpz_t rop, gmp_randstate_t rstate, unsigned long int nbits) +#else +mpz_urandomb (rop, rstate, nbits) + mpz_t rop; + gmp_randstate_t rstate; + unsigned long int nbits; +#endif +{ + mp_ptr rp; + mp_size_t size; + + size = (nbits + BITS_PER_MP_LIMB - 1) / BITS_PER_MP_LIMB; + if (ALLOC (rop) < size) + _mpz_realloc (rop, size); + + rp = PTR (rop); + + _gmp_rand (rp, rstate, nbits); + MPN_NORMALIZE (rp, size); + SIZ (rop) = size; +} diff --git a/rts/gmp/mpz/urandomm.c b/rts/gmp/mpz/urandomm.c new file mode 100644 index 0000000000..69e1bae78a --- /dev/null +++ b/rts/gmp/mpz/urandomm.c @@ -0,0 +1,78 @@ +/* mpz_urandomm (rop, state, n) -- Generate a uniform pseudorandom + integer in the range 0 to N-1, using STATE as the random state + previously initialized by a call to gmp_randinit(). + +Copyright (C) 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +void +#if __STDC__ +mpz_urandomm (mpz_t rop, gmp_randstate_t rstate, mpz_t n) +#else +mpz_urandomm (rop, rstate, n) + mpz_t rop; + gmp_randstate_t rstate; + mpz_t n; +#endif +{ + mpz_t t, p, m; + mp_ptr tp; + mp_size_t nbits, size; + int count; + TMP_DECL (marker); + + TMP_MARK (marker); + + /* FIXME: Should check for n == 0 and report error */ + + size = SIZ (n); + count_leading_zeros (count, PTR (n)[size - 1]); + nbits = size * BITS_PER_MP_LIMB - count; + + /* Allocate enough for any mpz function called since a realloc of + these will fail. */ + MPZ_TMP_INIT (t, size); + MPZ_TMP_INIT (m, size + 1); + MPZ_TMP_INIT (p, size + 1); + + /* Let m = highest possible random number plus 1. */ + mpz_set_ui (m, 0); + mpz_setbit (m, nbits); + + /* Let p = floor(m / n) * n. */ + mpz_fdiv_q (p, m, n); + mpz_mul (p, p, n); + + tp = PTR (t); + do + { + _gmp_rand (tp, rstate, nbits); + MPN_NORMALIZE (tp, size); /* FIXME: Really necessary? */ + SIZ (t) = size; + } + while (mpz_cmp (t, p) >= 0); + + mpz_mod (rop, t, n); + + TMP_FREE (marker); +} diff --git a/rts/gmp/mpz/xor.c b/rts/gmp/mpz/xor.c new file mode 100644 index 0000000000..69898d1791 --- /dev/null +++ b/rts/gmp/mpz/xor.c @@ -0,0 +1,217 @@ +/* mpz_xor -- Logical xor. + +Copyright (C) 1991, 1993, 1994, 1996, 1997, 2000 Free Software Foundation, +Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +mpz_xor (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2) +#else +mpz_xor (res, op1, op2) + mpz_ptr res; + mpz_srcptr op1; + mpz_srcptr op2; +#endif +{ + mp_srcptr op1_ptr, op2_ptr; + mp_size_t op1_size, op2_size; + mp_ptr res_ptr; + mp_size_t res_size, res_alloc; + mp_size_t i; + TMP_DECL (marker); + + TMP_MARK (marker); + op1_size = op1->_mp_size; + op2_size = op2->_mp_size; + + op1_ptr = op1->_mp_d; + op2_ptr = op2->_mp_d; + res_ptr = res->_mp_d; + + if (op1_size >= 0) + { + if (op2_size >= 0) + { + if (op1_size >= op2_size) + { + if (res->_mp_alloc < op1_size) + { + _mpz_realloc (res, op1_size); + op1_ptr = op1->_mp_d; + op2_ptr = op2->_mp_d; + res_ptr = res->_mp_d; + } + + if (res_ptr != op1_ptr) + MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size, + op1_size - op2_size); + for (i = op2_size - 1; i >= 0; i--) + res_ptr[i] = op1_ptr[i] ^ op2_ptr[i]; + res_size = op1_size; + } + else + { + if (res->_mp_alloc < op2_size) + { + _mpz_realloc (res, op2_size); + op1_ptr = op1->_mp_d; + op2_ptr = op2->_mp_d; + res_ptr = res->_mp_d; + } + + if (res_ptr != op2_ptr) + MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size, + op2_size - op1_size); + for (i = op1_size - 1; i >= 0; i--) + res_ptr[i] = op1_ptr[i] ^ op2_ptr[i]; + res_size = op2_size; + } + + MPN_NORMALIZE (res_ptr, res_size); + res->_mp_size = res_size; + return; + } + else /* op2_size < 0 */ + { + /* Fall through to the code at the end of the function. */ + } + } + else + { + if (op2_size < 0) + { + mp_ptr opx; + mp_limb_t cy; + + /* Both operands are negative, the result will be positive. + (-OP1) ^ (-OP2) = + = ~(OP1 - 1) ^ ~(OP2 - 1) = + = (OP1 - 1) ^ (OP2 - 1) */ + + op1_size = -op1_size; + op2_size = -op2_size; + + /* Possible optimization: Decrease mpn_sub precision, + as we won't use the entire res of both. */ + opx = (mp_ptr) TMP_ALLOC (op1_size * BYTES_PER_MP_LIMB); + mpn_sub_1 (opx, op1_ptr, op1_size, (mp_limb_t) 1); + op1_ptr = opx; + + opx = (mp_ptr) TMP_ALLOC (op2_size * BYTES_PER_MP_LIMB); + mpn_sub_1 (opx, op2_ptr, op2_size, (mp_limb_t) 1); + op2_ptr = opx; + + res_alloc = MAX (op1_size, op2_size); + if (res->_mp_alloc < res_alloc) + { + _mpz_realloc (res, res_alloc); + res_ptr = res->_mp_d; + /* Don't re-read OP1_PTR and OP2_PTR. They point to + temporary space--never to the space RES->_mp_d used + to point to before reallocation. */ + } + + if (op1_size > op2_size) + { + MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size, + op1_size - op2_size); + for (i = op2_size - 1; i >= 0; i--) + res_ptr[i] = op1_ptr[i] ^ op2_ptr[i]; + res_size = op1_size; + } + else + { + MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size, + op2_size - op1_size); + for (i = op1_size - 1; i >= 0; i--) + res_ptr[i] = op1_ptr[i] ^ op2_ptr[i]; + res_size = op2_size; + } + + MPN_NORMALIZE (res_ptr, res_size); + res->_mp_size = res_size; + TMP_FREE (marker); + return; + } + else + { + /* We should compute -OP1 ^ OP2. Swap OP1 and OP2 and fall + through to the code that handles OP1 ^ -OP2. */ + MPZ_SRCPTR_SWAP (op1, op2); + MPN_SRCPTR_SWAP (op1_ptr,op1_size, op2_ptr,op2_size); + } + } + + { + mp_ptr opx; + mp_limb_t cy; + mp_size_t count; + + /* Operand 2 negative, so will be the result. + -(OP1 ^ (-OP2)) = -(OP1 ^ ~(OP2 - 1)) = + = ~(OP1 ^ ~(OP2 - 1)) + 1 = + = (OP1 ^ (OP2 - 1)) + 1 */ + + op2_size = -op2_size; + + opx = (mp_ptr) TMP_ALLOC (op2_size * BYTES_PER_MP_LIMB); + mpn_sub_1 (opx, op2_ptr, op2_size, (mp_limb_t) 1); + op2_ptr = opx; + + res_alloc = MAX (op1_size, op2_size) + 1; + if (res->_mp_alloc < res_alloc) + { + _mpz_realloc (res, res_alloc); + op1_ptr = op1->_mp_d; + res_ptr = res->_mp_d; + /* Don't re-read OP2_PTR. It points to temporary space--never + to the space RES->_mp_d used to point to before reallocation. */ + } + + if (op1_size > op2_size) + { + MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size, op1_size - op2_size); + for (i = op2_size - 1; i >= 0; i--) + res_ptr[i] = op1_ptr[i] ^ op2_ptr[i]; + res_size = op1_size; + } + else + { + MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size, op2_size - op1_size); + for (i = op1_size - 1; i >= 0; i--) + res_ptr[i] = op1_ptr[i] ^ op2_ptr[i]; + res_size = op2_size; + } + + cy = mpn_add_1 (res_ptr, res_ptr, res_size, (mp_limb_t) 1); + if (cy) + { + res_ptr[res_size] = cy; + res_size++; + } + + MPN_NORMALIZE (res_ptr, res_size); + res->_mp_size = -res_size; + TMP_FREE (marker); + } +} diff --git a/rts/gmp/rand.c b/rts/gmp/rand.c new file mode 100644 index 0000000000..d1f9354511 --- /dev/null +++ b/rts/gmp/rand.c @@ -0,0 +1,171 @@ +/* gmp_randinit (state, algorithm, ...) -- Initialize a random state. + +Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include <stdio.h> /* for NULL */ +#if __STDC__ +# include <stdarg.h> +#else +# include <varargs.h> +#endif + +#include "gmp.h" +#include "gmp-impl.h" + +/* Array of CL-schemes, ordered in increasing order of the first + member (the 'm2exp' value). The end of the array is indicated with + an entry containing all zeros. */ + +/* All multipliers are in the range 0.01*m and 0.99*m, and are +congruent to 5 (mod 8). +They all pass the spectral test with Vt >= 2^(30/t) and merit >= 1. +(Up to and including 196 bits, merit is >= 3.) */ + +struct __gmp_rand_lc_scheme_struct +{ + unsigned long int m2exp; /* Modulus is 2 ^ m2exp. */ + char *astr; /* Multiplier in string form. */ + unsigned long int c; /* Adder. */ +}; + +struct __gmp_rand_lc_scheme_struct __gmp_rand_lc_scheme[] = +{ + {32, "43840821", 1}, + {33, "85943917", 1}, + {34, "171799469", 1}, + {35, "343825285", 1}, + {36, "687285701", 1}, + {37, "1374564613", 1}, + {38, "2749193437", 1}, + {39, "5497652029", 1}, + {40, "10995212661", 1}, + {56, "47988680294711517", 1}, + {64, "13469374875402548381", 1}, + {100, "203786806069096950756900463357", 1}, + {128, "96573135900076068624591706046897650309", 1}, + {156, "43051576988660538262511726153887323360449035333", 1}, + {196, "1611627857640767981443524165616850972435303571524033586421", 1}, + {200, "491824250216153841876046962368396460896019632211283945747141", 1}, + {256, "79336254595106925775099152154558630917988041692672147726148065355845551082677", 1}, + {0, NULL, 0} /* End of array. */ +}; + +void +#if __STDC__ +gmp_randinit (gmp_randstate_t rstate, + gmp_randalg_t alg, + ...) +#else +gmp_randinit (va_alist) + va_dcl +#endif +{ + va_list ap; +#if __STDC__ +#else + __gmp_randstate_struct *rstate; + gmp_randalg_t alg; +#endif + +#if __STDC__ + va_start (ap, alg); +#else + va_start (ap); + + rstate = va_arg (ap, __gmp_randstate_struct *); + alg = va_arg (ap, gmp_randalg_t); +#endif + + switch (alg) + { + case GMP_RAND_ALG_LC: /* Linear congruential. */ + { + unsigned long int size; + struct __gmp_rand_lc_scheme_struct *sp; + mpz_t a; + + size = va_arg (ap, unsigned long int); + + /* Pick a scheme. */ + for (sp = __gmp_rand_lc_scheme; sp->m2exp != 0; sp++) + if (sp->m2exp / 2 >= size) + break; + + if (sp->m2exp == 0) /* Nothing big enough found. */ + { + gmp_errno |= GMP_ERROR_INVALID_ARGUMENT; + return; + } + + /* Install scheme. */ + mpz_init_set_str (a, sp->astr, 0); + gmp_randinit_lc_2exp (rstate, a, sp->c, sp->m2exp); + mpz_clear (a); + break; + } + +#if 0 + case GMP_RAND_ALG_BBS: /* Blum, Blum, and Shub. */ + { + mpz_t p, q; + mpz_t ztmp; + + /* FIXME: Generate p and q. They must be ``large'' primes, + congruent to 3 mod 4. Should we ensure that they meet some + of the criterias for being ``hard primes''?*/ + + /* These are around 128 bits. */ + mpz_init_set_str (p, "148028650191182616877187862194899201391", 10); + mpz_init_set_str (q, "315270837425234199477225845240496832591", 10); + + /* Allocate algorithm specific data. */ + rstate->data.bbs = (__gmp_rand_data_bbs *) + (*_mp_allocate_func) (sizeof (__gmp_rand_data_bbs)); + + mpz_init (rstate->data.bbs->bi); /* The Blum integer. */ + mpz_mul (rstate->data.bbs->bi, p, q); + + /* Find a seed, x, with gcd (x, bi) == 1. */ + mpz_init (ztmp); + while (1) + { + mpz_gcd (ztmp, seed, rstate->data.bbs->bi); + if (!mpz_cmp_ui (ztmp, 1)) + break; + mpz_add_ui (seed, seed, 1); + } + + rstate->alg = alg; + rstate->size = size; /* FIXME: Remove. */ + mpz_set (rstate->seed, seed); + + mpz_clear (p); + mpz_clear (q); + mpz_clear (ztmp); + break; + } +#endif /* 0 */ + + default: /* Bad choice. */ + gmp_errno |= GMP_ERROR_UNSUPPORTED_ARGUMENT; + } + + va_end (ap); +} diff --git a/rts/gmp/randclr.c b/rts/gmp/randclr.c new file mode 100644 index 0000000000..5cb0291165 --- /dev/null +++ b/rts/gmp/randclr.c @@ -0,0 +1,54 @@ +/* gmp_randclear (state) -- Clear and deallocate random state STATE. + +Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +gmp_randclear (gmp_randstate_t rstate) +#else +gmp_randclear (rstate) + gmp_randstate_t rstate; +#endif +{ + mpz_clear (rstate->seed); + + switch (rstate->alg) + { + case GMP_RAND_ALG_LC: + mpz_clear (rstate->algdata.lc->a); + if (rstate->algdata.lc->m2exp == 0) + mpz_clear (rstate->algdata.lc->m); + (*_mp_free_func) (rstate->algdata.lc, sizeof (*rstate->algdata.lc)); + break; + +#if 0 + case GMP_RAND_ALG_BBS: + mpz_clear (rstate->algdata.bbs->bi); + (*_mp_free_func) (rstate->algdata.bbs, sizeof (*rstate->algdata.bbs)); + break; +#endif /* 0 */ + + default: + gmp_errno |= GMP_ERROR_UNSUPPORTED_ARGUMENT; + } +} diff --git a/rts/gmp/randlc.c b/rts/gmp/randlc.c new file mode 100644 index 0000000000..7079db827e --- /dev/null +++ b/rts/gmp/randlc.c @@ -0,0 +1,56 @@ +/* gmp_randinit_lc (state, a, c, m) -- Initialize a random state for a + linear congruential generator with multiplier A, adder C, and + modulus M. + +Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +gmp_randinit_lc (gmp_randstate_t rstate, + mpz_t a, + unsigned long int c, + mpz_t m) +#else +gmp_randinit_lc (rstate, a, c, m) + gmp_randstate_t rstate; + mpz_t a; + unsigned long int c; + mpz_t m; +#endif +{ + /* FIXME: Not finished. We don't handle this in _gmp_rand() yet. */ + abort (); + + mpz_init_set_ui (rstate->seed, 1); + _mpz_realloc (rstate->seed, ABSIZ (m)); + + /* Allocate algorithm specific data. */ + rstate->algdata.lc = (__gmp_randata_lc *) + (*_mp_allocate_func) (sizeof (__gmp_randata_lc)); + + mpz_init_set (rstate->algdata.lc->a, a); + rstate->algdata.lc->c = c; + mpz_init_set (rstate->algdata.lc->m, m); + + rstate->alg = GMP_RAND_ALG_LC; +} diff --git a/rts/gmp/randlc2x.c b/rts/gmp/randlc2x.c new file mode 100644 index 0000000000..dbd5f041ee --- /dev/null +++ b/rts/gmp/randlc2x.c @@ -0,0 +1,59 @@ +/* gmp_randinit_lc_2exp (state, a, c, m2exp) -- Initialize random + state STATE for a linear congruential generator with multiplier A, + adder C, and modulus 2 ^ M2EXP. + +Copyright (C) 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +gmp_randinit_lc_2exp (gmp_randstate_t rstate, + mpz_t a, + unsigned long int c, + unsigned long int m2exp) +#else +gmp_randinit_lc_2exp (rstate, a, c, m2exp) + gmp_randstate_t rstate; + mpz_t a; + unsigned long int c; + unsigned long int m2exp; +#endif +{ + mpz_init_set_ui (rstate->seed, 1); + _mpz_realloc (rstate->seed, m2exp / BITS_PER_MP_LIMB + + (m2exp % BITS_PER_MP_LIMB != 0)); + + /* Allocate algorithm specific data. */ + rstate->algdata.lc = (__gmp_randata_lc *) + (*_mp_allocate_func) (sizeof (__gmp_randata_lc)); + + mpz_init_set (rstate->algdata.lc->a, a); + rstate->algdata.lc->c = c; + + /* Cover weird case where m2exp is 0, which means that m is used + instead of m2exp. */ + if (m2exp == 0) + mpz_init_set_ui (rstate->algdata.lc->m, 0); + rstate->algdata.lc->m2exp = m2exp; + + rstate->alg = GMP_RAND_ALG_LC; +} diff --git a/rts/gmp/randraw.c b/rts/gmp/randraw.c new file mode 100644 index 0000000000..c0c3889d33 --- /dev/null +++ b/rts/gmp/randraw.c @@ -0,0 +1,360 @@ +/* _gmp_rand (rp, state, nbits) -- Generate a random bitstream of + length NBITS in RP. RP must have enough space allocated to hold + NBITS. + +Copyright (C) 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +/* For linear congruential (LC), we use one of algorithms (1) or (2). + (gmp-3.0 uses algorithm (1) with 'm' as a power of 2.) + +LC algorithm (1). + + X = (aX + c) mod m + +[D. Knuth, "The Art of Computer Programming: Volume 2, Seminumerical Algorithms", +Third Edition, Addison Wesley, 1998, pp. 184-185.] + + X is the seed and the result + a is chosen so that + a mod 8 = 5 [3.2.1.2] and [3.2.1.3] + .01m < a < .99m + its binary or decimal digits is not a simple, regular pattern + it has no large quotients when Euclid's algorithm is used to find + gcd(a, m) [3.3.3] + it passes the spectral test [3.3.4] + it passes several tests of [3.3.2] + c has no factor in common with m (c=1 or c=a can be good) + m is large (2^30) + is a power of 2 [3.2.1.1] + +The least significant digits of the generated number are not very +random. It should be regarded as a random fraction X/m. To get a +random integer between 0 and n-1, multiply X/m by n and truncate. +(Don't use X/n [ex 3.4.1-3]) + +The ``accuracy'' in t dimensions is one part in ``the t'th root of m'' [3.3.4]. + +Don't generate more than about m/1000 numbers without changing a, c, or m. + +The sequence length depends on chosen a,c,m. + + +LC algorithm (2). + + X = a * (X mod q) - r * (long) (X/q) + if X<0 then X+=m + +[Knuth, pp. 185-186.] + + X is the seed and the result + as a seed is nonzero and less than m + a is a primitive root of m (which means that a^2 <= m) + q is (long) m / a + r is m mod a + m is a prime number near the largest easily computed integer + +which gives + + X = a * (X % ((long) m / a)) - + (M % a) * ((long) (X / ((long) m / a))) + +Since m is prime, the least-significant bits of X are just as random as +the most-significant bits. */ + +/* Blum, Blum, and Shub. + + [Bruce Schneier, "Applied Cryptography", Second Edition, John Wiley + & Sons, Inc., 1996, pp. 417-418.] + + "Find two large prime numbers, p and q, which are congruent to 3 + modulo 4. The product of those numbers, n, is a blum integer. + Choose another random integer, x, which is relatively prime to n. + Compute + x[0] = x^2 mod n + That's the seed for the generator." + + To generate a random bit, compute + x[i] = x[i-1]^2 mod n + The least significant bit of x[i] is the one we want. + + We can use more than one bit from x[i], namely the + log2(bitlength of x[i]) + least significant bits of x[i]. + + So, for a 32-bit seed we get 5 bits per computation. + + The non-predictability of this generator is based on the difficulty + of factoring n. + */ + +/* -------------------------------------------------- */ + +/* lc (rp, state) -- Generate next number in LC sequence. Return the + number of valid bits in the result. NOTE: If 'm' is a power of 2 + (m2exp != 0), discard the lower half of the result. */ + +static +unsigned long int +#if __STDC__ +lc (mp_ptr rp, gmp_randstate_t rstate) +#else +lc (rp, rstate) + mp_ptr rp; + gmp_randstate_t rstate; +#endif +{ + mp_ptr tp, seedp, ap; + mp_size_t ta; + mp_size_t tn, seedn, an; + mp_size_t retval; + int shiftcount = 0; + unsigned long int m2exp; + mp_limb_t c; + TMP_DECL (mark); + + m2exp = rstate->algdata.lc->m2exp; + c = (mp_limb_t) rstate->algdata.lc->c; + + seedp = PTR (rstate->seed); + seedn = SIZ (rstate->seed); + + if (seedn == 0) + { + /* Seed is 0. Result is C % M. */ + *rp = c; + + if (m2exp != 0) + { + /* M is a power of 2. */ + if (m2exp < BITS_PER_MP_LIMB) + { + /* Only necessary when M may be smaller than C. */ + *rp &= (((mp_limb_t) 1 << m2exp) - 1); + } + } + else + { + /* M is not a power of 2. */ + abort (); /* FIXME. */ + } + + /* Save result as next seed. */ + *seedp = *rp; + SIZ (rstate->seed) = 1; + return BITS_PER_MP_LIMB; + } + + ap = PTR (rstate->algdata.lc->a); + an = SIZ (rstate->algdata.lc->a); + + /* Allocate temporary storage. Let there be room for calculation of + (A * seed + C) % M, or M if bigger than that. */ + + ASSERT_ALWAYS (m2exp != 0); /* FIXME. */ + + TMP_MARK (mark); + ta = an + seedn + 1; + tp = (mp_ptr) TMP_ALLOC (ta * BYTES_PER_MP_LIMB); + MPN_ZERO (tp, ta); + + /* t = a * seed */ + if (seedn >= an) + mpn_mul_basecase (tp, seedp, seedn, ap, an); + else + mpn_mul_basecase (tp, ap, an, seedp, seedn); + tn = an + seedn; + + /* t = t + c */ + mpn_incr_u (tp, c); + + /* t = t % m */ + if (m2exp != 0) + { + /* M is a power of 2. The mod operation is trivial. */ + + tp[m2exp / BITS_PER_MP_LIMB] &= ((mp_limb_t) 1 << m2exp % BITS_PER_MP_LIMB) - 1; + tn = (m2exp + BITS_PER_MP_LIMB - 1) / BITS_PER_MP_LIMB; + } + else + { + abort (); /* FIXME. */ + } + + /* Save result as next seed. */ + MPN_COPY (PTR (rstate->seed), tp, tn); + SIZ (rstate->seed) = tn; + + if (m2exp != 0) + { + /* Discard the lower half of the result. */ + unsigned long int discardb = m2exp / 2; + mp_size_t discardl = discardb / BITS_PER_MP_LIMB; + + tn -= discardl; + if (tn > 0) + { + if (discardb % BITS_PER_MP_LIMB != 0) + { + mpn_rshift (tp, tp + discardl, tn, discardb % BITS_PER_MP_LIMB); + MPN_COPY (rp, tp, (discardb + BITS_PER_MP_LIMB -1) / BITS_PER_MP_LIMB); + } + else /* Even limb boundary. */ + MPN_COPY_INCR (rp, tp + discardl, tn); + } + } + else + { + MPN_COPY (rp, tp, tn); + } + + TMP_FREE (mark); + + /* Return number of valid bits in the result. */ + if (m2exp != 0) + retval = (m2exp + 1) / 2; + else + retval = SIZ (rstate->algdata.lc->m) * BITS_PER_MP_LIMB - shiftcount; + return retval; +} + +#ifdef RAWRANDEBUG +/* Set even bits to EVENBITS and odd bits to ! EVENBITS in RP. + Number of bits is m2exp in state. */ +/* FIXME: Remove. */ +unsigned long int +lc_test (mp_ptr rp, gmp_randstate_t s, const int evenbits) +{ + unsigned long int rn, nbits; + int f; + + nbits = s->algdata.lc->m2exp / 2; + rn = nbits / BITS_PER_MP_LIMB + (nbits % BITS_PER_MP_LIMB != 0); + MPN_ZERO (rp, rn); + + for (f = 0; f < nbits; f++) + { + mpn_lshift (rp, rp, rn, 1); + if (f % 2 == ! evenbits) + rp[0] += 1; + } + + return nbits; +} +#endif /* RAWRANDEBUG */ + +void +#if __STDC__ +_gmp_rand (mp_ptr rp, gmp_randstate_t rstate, unsigned long int nbits) +#else +_gmp_rand (rp, rstate, nbits) + mp_ptr rp; + gmp_randstate_t rstate; + unsigned long int nbits; +#endif +{ + mp_size_t rn; /* Size of R. */ + + rn = (nbits + BITS_PER_MP_LIMB - 1) / BITS_PER_MP_LIMB; + + switch (rstate->alg) + { + case GMP_RAND_ALG_LC: + { + unsigned long int rbitpos; + int chunk_nbits; + mp_ptr tp; + mp_size_t tn; + TMP_DECL (lcmark); + + TMP_MARK (lcmark); + + chunk_nbits = rstate->algdata.lc->m2exp / 2; + tn = (chunk_nbits + BITS_PER_MP_LIMB - 1) / BITS_PER_MP_LIMB; + + tp = (mp_ptr) TMP_ALLOC (tn * BYTES_PER_MP_LIMB); + + rbitpos = 0; + while (rbitpos + chunk_nbits <= nbits) + { + mp_ptr r2p = rp + rbitpos / BITS_PER_MP_LIMB; + + if (rbitpos % BITS_PER_MP_LIMB != 0) + { + mp_limb_t savelimb, rcy; + /* Target of of new chunk is not bit aligned. Use temp space + and align things by shifting it up. */ + lc (tp, rstate); + savelimb = r2p[0]; + rcy = mpn_lshift (r2p, tp, tn, rbitpos % BITS_PER_MP_LIMB); + r2p[0] |= savelimb; +/* bogus */ if ((chunk_nbits % BITS_PER_MP_LIMB + rbitpos % BITS_PER_MP_LIMB) + > BITS_PER_MP_LIMB) + r2p[tn] = rcy; + } + else + { + /* Target of of new chunk is bit aligned. Let `lc' put bits + directly into our target variable. */ + lc (r2p, rstate); + } + rbitpos += chunk_nbits; + } + + /* Handle last [0..chunk_nbits) bits. */ + if (rbitpos != nbits) + { + mp_ptr r2p = rp + rbitpos / BITS_PER_MP_LIMB; + int last_nbits = nbits - rbitpos; + tn = (last_nbits + BITS_PER_MP_LIMB - 1) / BITS_PER_MP_LIMB; + lc (tp, rstate); + if (rbitpos % BITS_PER_MP_LIMB != 0) + { + mp_limb_t savelimb, rcy; + /* Target of of new chunk is not bit aligned. Use temp space + and align things by shifting it up. */ + savelimb = r2p[0]; + rcy = mpn_lshift (r2p, tp, tn, rbitpos % BITS_PER_MP_LIMB); + r2p[0] |= savelimb; + if (rbitpos + tn * BITS_PER_MP_LIMB - rbitpos % BITS_PER_MP_LIMB < nbits) + r2p[tn] = rcy; + } + else + { + MPN_COPY (r2p, tp, tn); + } + /* Mask off top bits if needed. */ + if (nbits % BITS_PER_MP_LIMB != 0) + rp[nbits / BITS_PER_MP_LIMB] + &= ~ ((~(mp_limb_t) 0) << nbits % BITS_PER_MP_LIMB); + } + + TMP_FREE (lcmark); + break; + } + + default: + gmp_errno |= GMP_ERROR_UNSUPPORTED_ARGUMENT; + break; + } +} diff --git a/rts/gmp/randsd.c b/rts/gmp/randsd.c new file mode 100644 index 0000000000..3bed14b578 --- /dev/null +++ b/rts/gmp/randsd.c @@ -0,0 +1,37 @@ +/* gmp_randseed (state, seed) -- Set initial seed SEED in random state + STATE. + +Copyright (C) 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +gmp_randseed (gmp_randstate_t rstate, + mpz_t seed) +#else +gmp_randseed (rstate, seed) + gmp_randstate_t rstate; + mpz_t seed; +#endif +{ + mpz_set (rstate->seed, seed); +} diff --git a/rts/gmp/randsdui.c b/rts/gmp/randsdui.c new file mode 100644 index 0000000000..92f412f3ea --- /dev/null +++ b/rts/gmp/randsdui.c @@ -0,0 +1,37 @@ +/* gmp_randseed_ui (state, seed) -- Set initial seed SEED in random + state STATE. + +Copyright (C) 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +void +#if __STDC__ +gmp_randseed_ui (gmp_randstate_t rstate, + unsigned long int seed) +#else +gmp_randseed_ui (rstate, seed) + gmp_randstate_t rstate; + mpz_t seed; +#endif +{ + mpz_set_ui (rstate->seed, seed); +} diff --git a/rts/gmp/stack-alloc.c b/rts/gmp/stack-alloc.c new file mode 100644 index 0000000000..9ab98fe5f9 --- /dev/null +++ b/rts/gmp/stack-alloc.c @@ -0,0 +1,136 @@ +/* Stack allocation routines. This is intended for machines without support + for the `alloca' function. + +Copyright (C) 1996, 1997, 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "stack-alloc.h" + +#define __need_size_t +#include <stddef.h> +#undef __need_size_t + +/* gmp-impl.h and stack-alloc.h conflict when not USE_STACK_ALLOC, so these + declarations are copied here */ +#if __STDC__ +extern void * (*__gmp_allocate_func) (size_t); +extern void (*__gmp_free_func) (void *, size_t); +#else +extern void * (*__gmp_allocate_func) (); +extern void (*__gmp_free_func) (); +#endif + +typedef struct tmp_stack tmp_stack; + +static unsigned long max_total_allocation = 0; +static unsigned long current_total_allocation = 0; + +static tmp_stack xxx = {&xxx, &xxx, 0}; +static tmp_stack *current = &xxx; + +/* The rounded size of the header of each allocation block. */ +#define HSIZ ((sizeof (tmp_stack) + __TMP_ALIGN - 1) & -__TMP_ALIGN) + +/* Allocate a block of exactly <size> bytes. This should only be called + through the TMP_ALLOC macro, which takes care of rounding/alignment. */ +void * +#if __STDC__ +__gmp_tmp_alloc (unsigned long size) +#else +__gmp_tmp_alloc (size) + unsigned long size; +#endif +{ + void *that; + + if (size > (char *) current->end - (char *) current->alloc_point) + { + void *chunk; + tmp_stack *header; + unsigned long chunk_size; + unsigned long now; + + /* Allocate a chunk that makes the total current allocation somewhat + larger than the maximum allocation ever. If size is very large, we + allocate that much. */ + + now = current_total_allocation + size; + if (now > max_total_allocation) + { + /* We need more temporary memory than ever before. Increase + for future needs. */ + now = now * 3 / 2; + chunk_size = now - current_total_allocation + HSIZ; + current_total_allocation = now; + max_total_allocation = current_total_allocation; + } + else + { + chunk_size = max_total_allocation - current_total_allocation + HSIZ; + current_total_allocation = max_total_allocation; + } + + chunk = (*__gmp_allocate_func) (chunk_size); + header = (tmp_stack *) chunk; + header->end = (char *) chunk + chunk_size; + header->alloc_point = (char *) chunk + HSIZ; + header->prev = current; + current = header; + } + + that = current->alloc_point; + current->alloc_point = (char *) that + size; + return that; +} + +/* Typically called at function entry. <mark> is assigned so that + __gmp_tmp_free can later be used to reclaim all subsequently allocated + storage. */ +void +#if __STDC__ +__gmp_tmp_mark (tmp_marker *mark) +#else +__gmp_tmp_mark (mark) + tmp_marker *mark; +#endif +{ + mark->which_chunk = current; + mark->alloc_point = current->alloc_point; +} + +/* Free everything allocated since <mark> was assigned by __gmp_tmp_mark */ +void +#if __STDC__ +__gmp_tmp_free (tmp_marker *mark) +#else +__gmp_tmp_free (mark) + tmp_marker *mark; +#endif +{ + while (mark->which_chunk != current) + { + tmp_stack *tmp; + + tmp = current; + current = tmp->prev; + current_total_allocation -= (((char *) (tmp->end) - (char *) tmp) - HSIZ); + (*__gmp_free_func) (tmp, (char *) tmp->end - (char *) tmp); + } + current->alloc_point = mark->alloc_point; +} diff --git a/rts/gmp/stack-alloc.h b/rts/gmp/stack-alloc.h new file mode 100644 index 0000000000..f59beec266 --- /dev/null +++ b/rts/gmp/stack-alloc.h @@ -0,0 +1,64 @@ +/* Stack allocation routines. This is intended for machines without support + for the `alloca' function. + +Copyright (C) 1996, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +struct tmp_stack +{ + void *end; + void *alloc_point; + struct tmp_stack *prev; +}; + +struct tmp_marker +{ + struct tmp_stack *which_chunk; + void *alloc_point; +}; + +typedef struct tmp_marker tmp_marker; + +#if defined (__cplusplus) +extern "C" { +#endif + +#if __STDC__ +void *__gmp_tmp_alloc (unsigned long); +void __gmp_tmp_mark (tmp_marker *); +void __gmp_tmp_free (tmp_marker *); +#else +void *__gmp_tmp_alloc (); +void __gmp_tmp_mark (); +void __gmp_tmp_free (); +#endif + +#if defined (__cplusplus) +} +#endif + +#ifndef __TMP_ALIGN +#define __TMP_ALIGN 8 +#endif + +#define TMP_DECL(marker) tmp_marker marker +#define TMP_ALLOC(size) \ + __gmp_tmp_alloc (((unsigned long) (size) + __TMP_ALIGN - 1) & -__TMP_ALIGN) +#define TMP_MARK(marker) __gmp_tmp_mark (&marker) +#define TMP_FREE(marker) __gmp_tmp_free (&marker) diff --git a/rts/gmp/stamp-h.in b/rts/gmp/stamp-h.in new file mode 100644 index 0000000000..9788f70238 --- /dev/null +++ b/rts/gmp/stamp-h.in @@ -0,0 +1 @@ +timestamp diff --git a/rts/gmp/stamp-vti b/rts/gmp/stamp-vti new file mode 100644 index 0000000000..e3186186b2 --- /dev/null +++ b/rts/gmp/stamp-vti @@ -0,0 +1,3 @@ +@set UPDATED 5 October 2000 +@set EDITION 3.1.1 +@set VERSION 3.1.1 diff --git a/rts/gmp/urandom.h b/rts/gmp/urandom.h new file mode 100644 index 0000000000..313479e8b7 --- /dev/null +++ b/rts/gmp/urandom.h @@ -0,0 +1,86 @@ +/* urandom.h -- define urandom returning a full unsigned long random value. + +Copyright (C) 1995, 1996, 1997, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#if defined (__hpux) || defined (__svr4__) || defined (__SVR4) +/* HPUX lacks random(). */ +static inline mp_limb_t +urandom () +{ + return mrand48 (); +} +#define __URANDOM +#endif + +#if defined(_WIN32) && !(defined(__CYGWIN__) || defined(__CYGWIN32__)) +/* MS CRT supplies just the poxy rand(), with an upper bound of 0x7fff */ +static inline unsigned long +urandom () +{ + return rand () ^ (rand () << 16) ^ (rand() << 32); +} +#define __URANDOM +#endif + +#if defined (__alpha) && !defined (__URANDOM) +/* DEC OSF/1 1.2 random() returns a double. */ +long mrand48 (); +static inline mp_limb_t +urandom () +{ + return mrand48 () | (mrand48 () << 32); +} +#define __URANDOM +#endif + +#if BITS_PER_MP_LIMB == 32 && !defined (__URANDOM) +#if defined (__cplusplus) +extern "C" { +#endif +long random (); +#if defined (__cplusplus) +} +#endif +static inline mp_limb_t +urandom () +{ + /* random() returns 31 bits, we want 32. */ + return random () ^ (random () << 1); +} +#define __URANDOM +#endif + +#if BITS_PER_MP_LIMB == 64 && !defined (__URANDOM) +#if defined (__cplusplus) +extern "C" { +#endif +long random (); +#if defined (__cplusplus) +} +#endif +static inline mp_limb_t +urandom () +{ + /* random() returns 31 bits, we want 64. */ + return random () ^ ((mp_limb_t) random () << 31) ^ ((mp_limb_t) random () << 62); +} +#define __URANDOM +#endif + diff --git a/rts/gmp/version.c b/rts/gmp/version.c new file mode 100644 index 0000000000..9d544ee1d8 --- /dev/null +++ b/rts/gmp/version.c @@ -0,0 +1,26 @@ +/* gmp_version -- version number compiled into the library */ + +/* +Copyright (C) 1996, 1999, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +const char *gmp_version = VERSION; diff --git a/rts/gmp/version.texi b/rts/gmp/version.texi new file mode 100644 index 0000000000..e3186186b2 --- /dev/null +++ b/rts/gmp/version.texi @@ -0,0 +1,3 @@ +@set UPDATED 5 October 2000 +@set EDITION 3.1.1 +@set VERSION 3.1.1 |