summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Manual.pdfbin501334 -> 0 bytes
-rw-r--r--README10
-rwxr-xr-xcompile347
-rw-r--r--configure.ac11
-rwxr-xr-xdepcomp791
-rw-r--r--examples/gf_example_2.c4
-rw-r--r--examples/gf_example_5.c1
-rw-r--r--examples/gf_example_6.c1
-rw-r--r--examples/gf_example_7.c1
-rw-r--r--include/gf_complete.h15
-rw-r--r--include/gf_int.h4
-rw-r--r--m4/ltoptions.m419
-rw-r--r--src/gf.c68
-rw-r--r--src/gf_general.c9
-rw-r--r--src/gf_general.h61
-rw-r--r--src/gf_int.h200
-rw-r--r--src/gf_method.c7
-rw-r--r--src/gf_rand.h22
-rw-r--r--src/gf_w128.c63
-rw-r--r--src/gf_w16.c77
-rw-r--r--src/gf_w32.c229
-rw-r--r--src/gf_w4.c207
-rw-r--r--src/gf_w64.c90
-rw-r--r--src/gf_w8.c102
-rw-r--r--src/gf_wgen.c13
-rwxr-xr-xtest-driver139
-rw-r--r--test/gf_unit.c20
-rw-r--r--tools/Makefile.am2
-rw-r--r--tools/gf_add.c2
-rw-r--r--tools/gf_inline_time.c5
-rw-r--r--tools/gf_methods.c7
-rw-r--r--tools/gf_poly.c6
-rw-r--r--tools/gf_time.c15
-rwxr-xr-xtools/run-tests.sh9
34 files changed, 1804 insertions, 753 deletions
diff --git a/Manual.pdf b/Manual.pdf
deleted file mode 100644
index 59968bb..0000000
--- a/Manual.pdf
+++ /dev/null
Binary files differ
diff --git a/README b/README
index e37682d..f34fa08 100644
--- a/README
+++ b/README
@@ -8,8 +8,10 @@ Authors: James S. Plank (University of Tennessee)
Adam W. Disney (University of Tennessee,
Allen C. McBride (University of Tennessee)
-The user's manual is in the file Manual.pdf. You may also get a copy of that
-manual at http://www.cs.utk.edu/~plank/plank/papers/GF-Complete-Manual-1.02.pdf.
+The programmer's manual and tutorial is provided in two places:
+
+1.) A copy is hosted on BitBucket at https://bitbucket.org/jimplank/gf-complete/downloads/GF-Complete-Manual.pdf
+2.) A copy is also available at http://www.cs.utk.edu/~plank/plank/papers/GF-Complete-Manual-1.02.pdf
The online home for GF-Complete is:
@@ -25,3 +27,7 @@ To compile, do:
./configure
make
sudo make install
+
+To run the tests, do:
+
+ make check
diff --git a/compile b/compile
new file mode 100755
index 0000000..531136b
--- /dev/null
+++ b/compile
@@ -0,0 +1,347 @@
+#! /bin/sh
+# Wrapper for compilers which do not understand '-c -o'.
+
+scriptversion=2012-10-14.11; # UTC
+
+# Copyright (C) 1999-2013 Free Software Foundation, Inc.
+# Written by Tom Tromey <tromey@cygnus.com>.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# This file is maintained in Automake, please report
+# bugs to <bug-automake@gnu.org> or send patches to
+# <automake-patches@gnu.org>.
+
+nl='
+'
+
+# We need space, tab and new line, in precisely that order. Quoting is
+# there to prevent tools from complaining about whitespace usage.
+IFS=" "" $nl"
+
+file_conv=
+
+# func_file_conv build_file lazy
+# Convert a $build file to $host form and store it in $file
+# Currently only supports Windows hosts. If the determined conversion
+# type is listed in (the comma separated) LAZY, no conversion will
+# take place.
+func_file_conv ()
+{
+ file=$1
+ case $file in
+ / | /[!/]*) # absolute file, and not a UNC file
+ if test -z "$file_conv"; then
+ # lazily determine how to convert abs files
+ case `uname -s` in
+ MINGW*)
+ file_conv=mingw
+ ;;
+ CYGWIN*)
+ file_conv=cygwin
+ ;;
+ *)
+ file_conv=wine
+ ;;
+ esac
+ fi
+ case $file_conv/,$2, in
+ *,$file_conv,*)
+ ;;
+ mingw/*)
+ file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
+ ;;
+ cygwin/*)
+ file=`cygpath -m "$file" || echo "$file"`
+ ;;
+ wine/*)
+ file=`winepath -w "$file" || echo "$file"`
+ ;;
+ esac
+ ;;
+ esac
+}
+
+# func_cl_dashL linkdir
+# Make cl look for libraries in LINKDIR
+func_cl_dashL ()
+{
+ func_file_conv "$1"
+ if test -z "$lib_path"; then
+ lib_path=$file
+ else
+ lib_path="$lib_path;$file"
+ fi
+ linker_opts="$linker_opts -LIBPATH:$file"
+}
+
+# func_cl_dashl library
+# Do a library search-path lookup for cl
+func_cl_dashl ()
+{
+ lib=$1
+ found=no
+ save_IFS=$IFS
+ IFS=';'
+ for dir in $lib_path $LIB
+ do
+ IFS=$save_IFS
+ if $shared && test -f "$dir/$lib.dll.lib"; then
+ found=yes
+ lib=$dir/$lib.dll.lib
+ break
+ fi
+ if test -f "$dir/$lib.lib"; then
+ found=yes
+ lib=$dir/$lib.lib
+ break
+ fi
+ if test -f "$dir/lib$lib.a"; then
+ found=yes
+ lib=$dir/lib$lib.a
+ break
+ fi
+ done
+ IFS=$save_IFS
+
+ if test "$found" != yes; then
+ lib=$lib.lib
+ fi
+}
+
+# func_cl_wrapper cl arg...
+# Adjust compile command to suit cl
+func_cl_wrapper ()
+{
+ # Assume a capable shell
+ lib_path=
+ shared=:
+ linker_opts=
+ for arg
+ do
+ if test -n "$eat"; then
+ eat=
+ else
+ case $1 in
+ -o)
+ # configure might choose to run compile as 'compile cc -o foo foo.c'.
+ eat=1
+ case $2 in
+ *.o | *.[oO][bB][jJ])
+ func_file_conv "$2"
+ set x "$@" -Fo"$file"
+ shift
+ ;;
+ *)
+ func_file_conv "$2"
+ set x "$@" -Fe"$file"
+ shift
+ ;;
+ esac
+ ;;
+ -I)
+ eat=1
+ func_file_conv "$2" mingw
+ set x "$@" -I"$file"
+ shift
+ ;;
+ -I*)
+ func_file_conv "${1#-I}" mingw
+ set x "$@" -I"$file"
+ shift
+ ;;
+ -l)
+ eat=1
+ func_cl_dashl "$2"
+ set x "$@" "$lib"
+ shift
+ ;;
+ -l*)
+ func_cl_dashl "${1#-l}"
+ set x "$@" "$lib"
+ shift
+ ;;
+ -L)
+ eat=1
+ func_cl_dashL "$2"
+ ;;
+ -L*)
+ func_cl_dashL "${1#-L}"
+ ;;
+ -static)
+ shared=false
+ ;;
+ -Wl,*)
+ arg=${1#-Wl,}
+ save_ifs="$IFS"; IFS=','
+ for flag in $arg; do
+ IFS="$save_ifs"
+ linker_opts="$linker_opts $flag"
+ done
+ IFS="$save_ifs"
+ ;;
+ -Xlinker)
+ eat=1
+ linker_opts="$linker_opts $2"
+ ;;
+ -*)
+ set x "$@" "$1"
+ shift
+ ;;
+ *.cc | *.CC | *.cxx | *.CXX | *.[cC]++)
+ func_file_conv "$1"
+ set x "$@" -Tp"$file"
+ shift
+ ;;
+ *.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO])
+ func_file_conv "$1" mingw
+ set x "$@" "$file"
+ shift
+ ;;
+ *)
+ set x "$@" "$1"
+ shift
+ ;;
+ esac
+ fi
+ shift
+ done
+ if test -n "$linker_opts"; then
+ linker_opts="-link$linker_opts"
+ fi
+ exec "$@" $linker_opts
+ exit 1
+}
+
+eat=
+
+case $1 in
+ '')
+ echo "$0: No command. Try '$0 --help' for more information." 1>&2
+ exit 1;
+ ;;
+ -h | --h*)
+ cat <<\EOF
+Usage: compile [--help] [--version] PROGRAM [ARGS]
+
+Wrapper for compilers which do not understand '-c -o'.
+Remove '-o dest.o' from ARGS, run PROGRAM with the remaining
+arguments, and rename the output as expected.
+
+If you are trying to build a whole package this is not the
+right script to run: please start by reading the file 'INSTALL'.
+
+Report bugs to <bug-automake@gnu.org>.
+EOF
+ exit $?
+ ;;
+ -v | --v*)
+ echo "compile $scriptversion"
+ exit $?
+ ;;
+ cl | *[/\\]cl | cl.exe | *[/\\]cl.exe )
+ func_cl_wrapper "$@" # Doesn't return...
+ ;;
+esac
+
+ofile=
+cfile=
+
+for arg
+do
+ if test -n "$eat"; then
+ eat=
+ else
+ case $1 in
+ -o)
+ # configure might choose to run compile as 'compile cc -o foo foo.c'.
+ # So we strip '-o arg' only if arg is an object.
+ eat=1
+ case $2 in
+ *.o | *.obj)
+ ofile=$2
+ ;;
+ *)
+ set x "$@" -o "$2"
+ shift
+ ;;
+ esac
+ ;;
+ *.c)
+ cfile=$1
+ set x "$@" "$1"
+ shift
+ ;;
+ *)
+ set x "$@" "$1"
+ shift
+ ;;
+ esac
+ fi
+ shift
+done
+
+if test -z "$ofile" || test -z "$cfile"; then
+ # If no '-o' option was seen then we might have been invoked from a
+ # pattern rule where we don't need one. That is ok -- this is a
+ # normal compilation that the losing compiler can handle. If no
+ # '.c' file was seen then we are probably linking. That is also
+ # ok.
+ exec "$@"
+fi
+
+# Name of file we expect compiler to create.
+cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'`
+
+# Create the lock directory.
+# Note: use '[/\\:.-]' here to ensure that we don't use the same name
+# that we are using for the .o file. Also, base the name on the expected
+# object file name, since that is what matters with a parallel build.
+lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d
+while true; do
+ if mkdir "$lockdir" >/dev/null 2>&1; then
+ break
+ fi
+ sleep 1
+done
+# FIXME: race condition here if user kills between mkdir and trap.
+trap "rmdir '$lockdir'; exit 1" 1 2 15
+
+# Run the compile.
+"$@"
+ret=$?
+
+if test -f "$cofile"; then
+ test "$cofile" = "$ofile" || mv "$cofile" "$ofile"
+elif test -f "${cofile}bj"; then
+ test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile"
+fi
+
+rmdir "$lockdir"
+exit $ret
+
+# Local Variables:
+# mode: shell-script
+# sh-indentation: 2
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-time-zone: "UTC"
+# time-stamp-end: "; # UTC"
+# End:
diff --git a/configure.ac b/configure.ac
index 058e71f..9f33852 100644
--- a/configure.ac
+++ b/configure.ac
@@ -17,13 +17,20 @@ AC_CONFIG_MACRO_DIR([m4])
AM_MAINTAINER_MODE([disable])
# Override default CFLAGS
-CFLAGS="-O3 -g"
-CXXFLAGS="-O3 -g"
+CFLAGS="-Wall -Wpointer-arith -O3 -g"
dnl Compiling with per-target flags requires AM_PROG_CC_C_O.
AC_PROG_CC
AX_EXT()
+AC_ARG_ENABLE([sse],
+ AS_HELP_STRING([--disable-sse], [Build without SSE optimizations]),
+ [if test "x$enableval" = "xno" ; then
+ SIMD_FLAGS=""
+ echo "DISABLED SSE!!!"
+ fi]
+)
+
AC_CONFIG_FILES([Makefile src/Makefile tools/Makefile test/Makefile examples/Makefile])
AC_OUTPUT
diff --git a/depcomp b/depcomp
new file mode 100755
index 0000000..4ebd5b3
--- /dev/null
+++ b/depcomp
@@ -0,0 +1,791 @@
+#! /bin/sh
+# depcomp - compile a program generating dependencies as side-effects
+
+scriptversion=2013-05-30.07; # UTC
+
+# Copyright (C) 1999-2013 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# Originally written by Alexandre Oliva <oliva@dcc.unicamp.br>.
+
+case $1 in
+ '')
+ echo "$0: No command. Try '$0 --help' for more information." 1>&2
+ exit 1;
+ ;;
+ -h | --h*)
+ cat <<\EOF
+Usage: depcomp [--help] [--version] PROGRAM [ARGS]
+
+Run PROGRAMS ARGS to compile a file, generating dependencies
+as side-effects.
+
+Environment variables:
+ depmode Dependency tracking mode.
+ source Source file read by 'PROGRAMS ARGS'.
+ object Object file output by 'PROGRAMS ARGS'.
+ DEPDIR directory where to store dependencies.
+ depfile Dependency file to output.
+ tmpdepfile Temporary file to use when outputting dependencies.
+ libtool Whether libtool is used (yes/no).
+
+Report bugs to <bug-automake@gnu.org>.
+EOF
+ exit $?
+ ;;
+ -v | --v*)
+ echo "depcomp $scriptversion"
+ exit $?
+ ;;
+esac
+
+# Get the directory component of the given path, and save it in the
+# global variables '$dir'. Note that this directory component will
+# be either empty or ending with a '/' character. This is deliberate.
+set_dir_from ()
+{
+ case $1 in
+ */*) dir=`echo "$1" | sed -e 's|/[^/]*$|/|'`;;
+ *) dir=;;
+ esac
+}
+
+# Get the suffix-stripped basename of the given path, and save it the
+# global variable '$base'.
+set_base_from ()
+{
+ base=`echo "$1" | sed -e 's|^.*/||' -e 's/\.[^.]*$//'`
+}
+
+# If no dependency file was actually created by the compiler invocation,
+# we still have to create a dummy depfile, to avoid errors with the
+# Makefile "include basename.Plo" scheme.
+make_dummy_depfile ()
+{
+ echo "#dummy" > "$depfile"
+}
+
+# Factor out some common post-processing of the generated depfile.
+# Requires the auxiliary global variable '$tmpdepfile' to be set.
+aix_post_process_depfile ()
+{
+ # If the compiler actually managed to produce a dependency file,
+ # post-process it.
+ if test -f "$tmpdepfile"; then
+ # Each line is of the form 'foo.o: dependency.h'.
+ # Do two passes, one to just change these to
+ # $object: dependency.h
+ # and one to simply output
+ # dependency.h:
+ # which is needed to avoid the deleted-header problem.
+ { sed -e "s,^.*\.[$lower]*:,$object:," < "$tmpdepfile"
+ sed -e "s,^.*\.[$lower]*:[$tab ]*,," -e 's,$,:,' < "$tmpdepfile"
+ } > "$depfile"
+ rm -f "$tmpdepfile"
+ else
+ make_dummy_depfile
+ fi
+}
+
+# A tabulation character.
+tab=' '
+# A newline character.
+nl='
+'
+# Character ranges might be problematic outside the C locale.
+# These definitions help.
+upper=ABCDEFGHIJKLMNOPQRSTUVWXYZ
+lower=abcdefghijklmnopqrstuvwxyz
+digits=0123456789
+alpha=${upper}${lower}
+
+if test -z "$depmode" || test -z "$source" || test -z "$object"; then
+ echo "depcomp: Variables source, object and depmode must be set" 1>&2
+ exit 1
+fi
+
+# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po.
+depfile=${depfile-`echo "$object" |
+ sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`}
+tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`}
+
+rm -f "$tmpdepfile"
+
+# Avoid interferences from the environment.
+gccflag= dashmflag=
+
+# Some modes work just like other modes, but use different flags. We
+# parameterize here, but still list the modes in the big case below,
+# to make depend.m4 easier to write. Note that we *cannot* use a case
+# here, because this file can only contain one case statement.
+if test "$depmode" = hp; then
+ # HP compiler uses -M and no extra arg.
+ gccflag=-M
+ depmode=gcc
+fi
+
+if test "$depmode" = dashXmstdout; then
+ # This is just like dashmstdout with a different argument.
+ dashmflag=-xM
+ depmode=dashmstdout
+fi
+
+cygpath_u="cygpath -u -f -"
+if test "$depmode" = msvcmsys; then
+ # This is just like msvisualcpp but w/o cygpath translation.
+ # Just convert the backslash-escaped backslashes to single forward
+ # slashes to satisfy depend.m4
+ cygpath_u='sed s,\\\\,/,g'
+ depmode=msvisualcpp
+fi
+
+if test "$depmode" = msvc7msys; then
+ # This is just like msvc7 but w/o cygpath translation.
+ # Just convert the backslash-escaped backslashes to single forward
+ # slashes to satisfy depend.m4
+ cygpath_u='sed s,\\\\,/,g'
+ depmode=msvc7
+fi
+
+if test "$depmode" = xlc; then
+ # IBM C/C++ Compilers xlc/xlC can output gcc-like dependency information.
+ gccflag=-qmakedep=gcc,-MF
+ depmode=gcc
+fi
+
+case "$depmode" in
+gcc3)
+## gcc 3 implements dependency tracking that does exactly what
+## we want. Yay! Note: for some reason libtool 1.4 doesn't like
+## it if -MD -MP comes after the -MF stuff. Hmm.
+## Unfortunately, FreeBSD c89 acceptance of flags depends upon
+## the command line argument order; so add the flags where they
+## appear in depend2.am. Note that the slowdown incurred here
+## affects only configure: in makefiles, %FASTDEP% shortcuts this.
+ for arg
+ do
+ case $arg in
+ -c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;;
+ *) set fnord "$@" "$arg" ;;
+ esac
+ shift # fnord
+ shift # $arg
+ done
+ "$@"
+ stat=$?
+ if test $stat -ne 0; then
+ rm -f "$tmpdepfile"
+ exit $stat
+ fi
+ mv "$tmpdepfile" "$depfile"
+ ;;
+
+gcc)
+## Note that this doesn't just cater to obsosete pre-3.x GCC compilers.
+## but also to in-use compilers like IMB xlc/xlC and the HP C compiler.
+## (see the conditional assignment to $gccflag above).
+## There are various ways to get dependency output from gcc. Here's
+## why we pick this rather obscure method:
+## - Don't want to use -MD because we'd like the dependencies to end
+## up in a subdir. Having to rename by hand is ugly.
+## (We might end up doing this anyway to support other compilers.)
+## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
+## -MM, not -M (despite what the docs say). Also, it might not be
+## supported by the other compilers which use the 'gcc' depmode.
+## - Using -M directly means running the compiler twice (even worse
+## than renaming).
+ if test -z "$gccflag"; then
+ gccflag=-MD,
+ fi
+ "$@" -Wp,"$gccflag$tmpdepfile"
+ stat=$?
+ if test $stat -ne 0; then
+ rm -f "$tmpdepfile"
+ exit $stat
+ fi
+ rm -f "$depfile"
+ echo "$object : \\" > "$depfile"
+ # The second -e expression handles DOS-style file names with drive
+ # letters.
+ sed -e 's/^[^:]*: / /' \
+ -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
+## This next piece of magic avoids the "deleted header file" problem.
+## The problem is that when a header file which appears in a .P file
+## is deleted, the dependency causes make to die (because there is
+## typically no way to rebuild the header). We avoid this by adding
+## dummy dependencies for each header file. Too bad gcc doesn't do
+## this for us directly.
+## Some versions of gcc put a space before the ':'. On the theory
+## that the space means something, we add a space to the output as
+## well. hp depmode also adds that space, but also prefixes the VPATH
+## to the object. Take care to not repeat it in the output.
+## Some versions of the HPUX 10.20 sed can't process this invocation
+## correctly. Breaking it into two sed invocations is a workaround.
+ tr ' ' "$nl" < "$tmpdepfile" \
+ | sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \
+ | sed -e 's/$/ :/' >> "$depfile"
+ rm -f "$tmpdepfile"
+ ;;
+
+hp)
+ # This case exists only to let depend.m4 do its work. It works by
+ # looking at the text of this script. This case will never be run,
+ # since it is checked for above.
+ exit 1
+ ;;
+
+sgi)
+ if test "$libtool" = yes; then
+ "$@" "-Wp,-MDupdate,$tmpdepfile"
+ else
+ "$@" -MDupdate "$tmpdepfile"
+ fi
+ stat=$?
+ if test $stat -ne 0; then
+ rm -f "$tmpdepfile"
+ exit $stat
+ fi
+ rm -f "$depfile"
+
+ if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files
+ echo "$object : \\" > "$depfile"
+ # Clip off the initial element (the dependent). Don't try to be
+ # clever and replace this with sed code, as IRIX sed won't handle
+ # lines with more than a fixed number of characters (4096 in
+ # IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines;
+ # the IRIX cc adds comments like '#:fec' to the end of the
+ # dependency line.
+ tr ' ' "$nl" < "$tmpdepfile" \
+ | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' \
+ | tr "$nl" ' ' >> "$depfile"
+ echo >> "$depfile"
+ # The second pass generates a dummy entry for each header file.
+ tr ' ' "$nl" < "$tmpdepfile" \
+ | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
+ >> "$depfile"
+ else
+ make_dummy_depfile
+ fi
+ rm -f "$tmpdepfile"
+ ;;
+
+xlc)
+ # This case exists only to let depend.m4 do its work. It works by
+ # looking at the text of this script. This case will never be run,
+ # since it is checked for above.
+ exit 1
+ ;;
+
+aix)
+ # The C for AIX Compiler uses -M and outputs the dependencies
+ # in a .u file. In older versions, this file always lives in the
+ # current directory. Also, the AIX compiler puts '$object:' at the
+ # start of each line; $object doesn't have directory information.
+ # Version 6 uses the directory in both cases.
+ set_dir_from "$object"
+ set_base_from "$object"
+ if test "$libtool" = yes; then
+ tmpdepfile1=$dir$base.u
+ tmpdepfile2=$base.u
+ tmpdepfile3=$dir.libs/$base.u
+ "$@" -Wc,-M
+ else
+ tmpdepfile1=$dir$base.u
+ tmpdepfile2=$dir$base.u
+ tmpdepfile3=$dir$base.u
+ "$@" -M
+ fi
+ stat=$?
+ if test $stat -ne 0; then
+ rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
+ exit $stat
+ fi
+
+ for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
+ do
+ test -f "$tmpdepfile" && break
+ done
+ aix_post_process_depfile
+ ;;
+
+tcc)
+ # tcc (Tiny C Compiler) understand '-MD -MF file' since version 0.9.26
+ # FIXME: That version still under development at the moment of writing.
+ # Make that this statement remains true also for stable, released
+ # versions.
+ # It will wrap lines (doesn't matter whether long or short) with a
+ # trailing '\', as in:
+ #
+ # foo.o : \
+ # foo.c \
+ # foo.h \
+ #
+ # It will put a trailing '\' even on the last line, and will use leading
+ # spaces rather than leading tabs (at least since its commit 0394caf7
+ # "Emit spaces for -MD").
+ "$@" -MD -MF "$tmpdepfile"
+ stat=$?
+ if test $stat -ne 0; then
+ rm -f "$tmpdepfile"
+ exit $stat
+ fi
+ rm -f "$depfile"
+ # Each non-empty line is of the form 'foo.o : \' or ' dep.h \'.
+ # We have to change lines of the first kind to '$object: \'.
+ sed -e "s|.*:|$object :|" < "$tmpdepfile" > "$depfile"
+ # And for each line of the second kind, we have to emit a 'dep.h:'
+ # dummy dependency, to avoid the deleted-header problem.
+ sed -n -e 's|^ *\(.*\) *\\$|\1:|p' < "$tmpdepfile" >> "$depfile"
+ rm -f "$tmpdepfile"
+ ;;
+
+## The order of this option in the case statement is important, since the
+## shell code in configure will try each of these formats in the order
+## listed in this file. A plain '-MD' option would be understood by many
+## compilers, so we must ensure this comes after the gcc and icc options.
+pgcc)
+ # Portland's C compiler understands '-MD'.
+ # Will always output deps to 'file.d' where file is the root name of the
+ # source file under compilation, even if file resides in a subdirectory.
+ # The object file name does not affect the name of the '.d' file.
+ # pgcc 10.2 will output
+ # foo.o: sub/foo.c sub/foo.h
+ # and will wrap long lines using '\' :
+ # foo.o: sub/foo.c ... \
+ # sub/foo.h ... \
+ # ...
+ set_dir_from "$object"
+ # Use the source, not the object, to determine the base name, since
+ # that's sadly what pgcc will do too.
+ set_base_from "$source"
+ tmpdepfile=$base.d
+
+ # For projects that build the same source file twice into different object
+ # files, the pgcc approach of using the *source* file root name can cause
+ # problems in parallel builds. Use a locking strategy to avoid stomping on
+ # the same $tmpdepfile.
+ lockdir=$base.d-lock
+ trap "
+ echo '$0: caught signal, cleaning up...' >&2
+ rmdir '$lockdir'
+ exit 1
+ " 1 2 13 15
+ numtries=100
+ i=$numtries
+ while test $i -gt 0; do
+ # mkdir is a portable test-and-set.
+ if mkdir "$lockdir" 2>/dev/null; then
+ # This process acquired the lock.
+ "$@" -MD
+ stat=$?
+ # Release the lock.
+ rmdir "$lockdir"
+ break
+ else
+ # If the lock is being held by a different process, wait
+ # until the winning process is done or we timeout.
+ while test -d "$lockdir" && test $i -gt 0; do
+ sleep 1
+ i=`expr $i - 1`
+ done
+ fi
+ i=`expr $i - 1`
+ done
+ trap - 1 2 13 15
+ if test $i -le 0; then
+ echo "$0: failed to acquire lock after $numtries attempts" >&2
+ echo "$0: check lockdir '$lockdir'" >&2
+ exit 1
+ fi
+
+ if test $stat -ne 0; then
+ rm -f "$tmpdepfile"
+ exit $stat
+ fi
+ rm -f "$depfile"
+ # Each line is of the form `foo.o: dependent.h',
+ # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'.
+ # Do two passes, one to just change these to
+ # `$object: dependent.h' and one to simply `dependent.h:'.
+ sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile"
+ # Some versions of the HPUX 10.20 sed can't process this invocation
+ # correctly. Breaking it into two sed invocations is a workaround.
+ sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" \
+ | sed -e 's/$/ :/' >> "$depfile"
+ rm -f "$tmpdepfile"
+ ;;
+
+hp2)
+ # The "hp" stanza above does not work with aCC (C++) and HP's ia64
+ # compilers, which have integrated preprocessors. The correct option
+ # to use with these is +Maked; it writes dependencies to a file named
+ # 'foo.d', which lands next to the object file, wherever that
+ # happens to be.
+ # Much of this is similar to the tru64 case; see comments there.
+ set_dir_from "$object"
+ set_base_from "$object"
+ if test "$libtool" = yes; then
+ tmpdepfile1=$dir$base.d
+ tmpdepfile2=$dir.libs/$base.d
+ "$@" -Wc,+Maked
+ else
+ tmpdepfile1=$dir$base.d
+ tmpdepfile2=$dir$base.d
+ "$@" +Maked
+ fi
+ stat=$?
+ if test $stat -ne 0; then
+ rm -f "$tmpdepfile1" "$tmpdepfile2"
+ exit $stat
+ fi
+
+ for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2"
+ do
+ test -f "$tmpdepfile" && break
+ done
+ if test -f "$tmpdepfile"; then
+ sed -e "s,^.*\.[$lower]*:,$object:," "$tmpdepfile" > "$depfile"
+ # Add 'dependent.h:' lines.
+ sed -ne '2,${
+ s/^ *//
+ s/ \\*$//
+ s/$/:/
+ p
+ }' "$tmpdepfile" >> "$depfile"
+ else
+ make_dummy_depfile
+ fi
+ rm -f "$tmpdepfile" "$tmpdepfile2"
+ ;;
+
+tru64)
+ # The Tru64 compiler uses -MD to generate dependencies as a side
+ # effect. 'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'.
+ # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
+ # dependencies in 'foo.d' instead, so we check for that too.
+ # Subdirectories are respected.
+ set_dir_from "$object"
+ set_base_from "$object"
+
+ if test "$libtool" = yes; then
+ # Libtool generates 2 separate objects for the 2 libraries. These
+ # two compilations output dependencies in $dir.libs/$base.o.d and
+ # in $dir$base.o.d. We have to check for both files, because
+ # one of the two compilations can be disabled. We should prefer
+ # $dir$base.o.d over $dir.libs/$base.o.d because the latter is
+ # automatically cleaned when .libs/ is deleted, while ignoring
+ # the former would cause a distcleancheck panic.
+ tmpdepfile1=$dir$base.o.d # libtool 1.5
+ tmpdepfile2=$dir.libs/$base.o.d # Likewise.
+ tmpdepfile3=$dir.libs/$base.d # Compaq CCC V6.2-504
+ "$@" -Wc,-MD
+ else
+ tmpdepfile1=$dir$base.d
+ tmpdepfile2=$dir$base.d
+ tmpdepfile3=$dir$base.d
+ "$@" -MD
+ fi
+
+ stat=$?
+ if test $stat -ne 0; then
+ rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
+ exit $stat
+ fi
+
+ for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
+ do
+ test -f "$tmpdepfile" && break
+ done
+ # Same post-processing that is required for AIX mode.
+ aix_post_process_depfile
+ ;;
+
+msvc7)
+ if test "$libtool" = yes; then
+ showIncludes=-Wc,-showIncludes
+ else
+ showIncludes=-showIncludes
+ fi
+ "$@" $showIncludes > "$tmpdepfile"
+ stat=$?
+ grep -v '^Note: including file: ' "$tmpdepfile"
+ if test $stat -ne 0; then
+ rm -f "$tmpdepfile"
+ exit $stat
+ fi
+ rm -f "$depfile"
+ echo "$object : \\" > "$depfile"
+ # The first sed program below extracts the file names and escapes
+ # backslashes for cygpath. The second sed program outputs the file
+ # name when reading, but also accumulates all include files in the
+ # hold buffer in order to output them again at the end. This only
+ # works with sed implementations that can handle large buffers.
+ sed < "$tmpdepfile" -n '
+/^Note: including file: *\(.*\)/ {
+ s//\1/
+ s/\\/\\\\/g
+ p
+}' | $cygpath_u | sort -u | sed -n '
+s/ /\\ /g
+s/\(.*\)/'"$tab"'\1 \\/p
+s/.\(.*\) \\/\1:/
+H
+$ {
+ s/.*/'"$tab"'/
+ G
+ p
+}' >> "$depfile"
+ echo >> "$depfile" # make sure the fragment doesn't end with a backslash
+ rm -f "$tmpdepfile"
+ ;;
+
+msvc7msys)
+ # This case exists only to let depend.m4 do its work. It works by
+ # looking at the text of this script. This case will never be run,
+ # since it is checked for above.
+ exit 1
+ ;;
+
+#nosideeffect)
+ # This comment above is used by automake to tell side-effect
+ # dependency tracking mechanisms from slower ones.
+
+dashmstdout)
+ # Important note: in order to support this mode, a compiler *must*
+ # always write the preprocessed file to stdout, regardless of -o.
+ "$@" || exit $?
+
+ # Remove the call to Libtool.
+ if test "$libtool" = yes; then
+ while test "X$1" != 'X--mode=compile'; do
+ shift
+ done
+ shift
+ fi
+
+ # Remove '-o $object'.
+ IFS=" "
+ for arg
+ do
+ case $arg in
+ -o)
+ shift
+ ;;
+ $object)
+ shift
+ ;;
+ *)
+ set fnord "$@" "$arg"
+ shift # fnord
+ shift # $arg
+ ;;
+ esac
+ done
+
+ test -z "$dashmflag" && dashmflag=-M
+ # Require at least two characters before searching for ':'
+ # in the target name. This is to cope with DOS-style filenames:
+ # a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise.
+ "$@" $dashmflag |
+ sed "s|^[$tab ]*[^:$tab ][^:][^:]*:[$tab ]*|$object: |" > "$tmpdepfile"
+ rm -f "$depfile"
+ cat < "$tmpdepfile" > "$depfile"
+ # Some versions of the HPUX 10.20 sed can't process this sed invocation
+ # correctly. Breaking it into two sed invocations is a workaround.
+ tr ' ' "$nl" < "$tmpdepfile" \
+ | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \
+ | sed -e 's/$/ :/' >> "$depfile"
+ rm -f "$tmpdepfile"
+ ;;
+
+dashXmstdout)
+ # This case only exists to satisfy depend.m4. It is never actually
+ # run, as this mode is specially recognized in the preamble.
+ exit 1
+ ;;
+
+makedepend)
+ "$@" || exit $?
+ # Remove any Libtool call
+ if test "$libtool" = yes; then
+ while test "X$1" != 'X--mode=compile'; do
+ shift
+ done
+ shift
+ fi
+ # X makedepend
+ shift
+ cleared=no eat=no
+ for arg
+ do
+ case $cleared in
+ no)
+ set ""; shift
+ cleared=yes ;;
+ esac
+ if test $eat = yes; then
+ eat=no
+ continue
+ fi
+ case "$arg" in
+ -D*|-I*)
+ set fnord "$@" "$arg"; shift ;;
+ # Strip any option that makedepend may not understand. Remove
+ # the object too, otherwise makedepend will parse it as a source file.
+ -arch)
+ eat=yes ;;
+ -*|$object)
+ ;;
+ *)
+ set fnord "$@" "$arg"; shift ;;
+ esac
+ done
+ obj_suffix=`echo "$object" | sed 's/^.*\././'`
+ touch "$tmpdepfile"
+ ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@"
+ rm -f "$depfile"
+ # makedepend may prepend the VPATH from the source file name to the object.
+ # No need to regex-escape $object, excess matching of '.' is harmless.
+ sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile"
+ # Some versions of the HPUX 10.20 sed can't process the last invocation
+ # correctly. Breaking it into two sed invocations is a workaround.
+ sed '1,2d' "$tmpdepfile" \
+ | tr ' ' "$nl" \
+ | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \
+ | sed -e 's/$/ :/' >> "$depfile"
+ rm -f "$tmpdepfile" "$tmpdepfile".bak
+ ;;
+
+cpp)
+ # Important note: in order to support this mode, a compiler *must*
+ # always write the preprocessed file to stdout.
+ "$@" || exit $?
+
+ # Remove the call to Libtool.
+ if test "$libtool" = yes; then
+ while test "X$1" != 'X--mode=compile'; do
+ shift
+ done
+ shift
+ fi
+
+ # Remove '-o $object'.
+ IFS=" "
+ for arg
+ do
+ case $arg in
+ -o)
+ shift
+ ;;
+ $object)
+ shift
+ ;;
+ *)
+ set fnord "$@" "$arg"
+ shift # fnord
+ shift # $arg
+ ;;
+ esac
+ done
+
+ "$@" -E \
+ | sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
+ -e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
+ | sed '$ s: \\$::' > "$tmpdepfile"
+ rm -f "$depfile"
+ echo "$object : \\" > "$depfile"
+ cat < "$tmpdepfile" >> "$depfile"
+ sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile"
+ rm -f "$tmpdepfile"
+ ;;
+
+msvisualcpp)
+ # Important note: in order to support this mode, a compiler *must*
+ # always write the preprocessed file to stdout.
+ "$@" || exit $?
+
+ # Remove the call to Libtool.
+ if test "$libtool" = yes; then
+ while test "X$1" != 'X--mode=compile'; do
+ shift
+ done
+ shift
+ fi
+
+ IFS=" "
+ for arg
+ do
+ case "$arg" in
+ -o)
+ shift
+ ;;
+ $object)
+ shift
+ ;;
+ "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI")
+ set fnord "$@"
+ shift
+ shift
+ ;;
+ *)
+ set fnord "$@" "$arg"
+ shift
+ shift
+ ;;
+ esac
+ done
+ "$@" -E 2>/dev/null |
+ sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile"
+ rm -f "$depfile"
+ echo "$object : \\" > "$depfile"
+ sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::'"$tab"'\1 \\:p' >> "$depfile"
+ echo "$tab" >> "$depfile"
+ sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile"
+ rm -f "$tmpdepfile"
+ ;;
+
+msvcmsys)
+ # This case exists only to let depend.m4 do its work. It works by
+ # looking at the text of this script. This case will never be run,
+ # since it is checked for above.
+ exit 1
+ ;;
+
+none)
+ exec "$@"
+ ;;
+
+*)
+ echo "Unknown depmode $depmode" 1>&2
+ exit 1
+ ;;
+esac
+
+exit 0
+
+# Local Variables:
+# mode: shell-script
+# sh-indentation: 2
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-time-zone: "UTC"
+# time-stamp-end: "; # UTC"
+# End:
diff --git a/examples/gf_example_2.c b/examples/gf_example_2.c
index e98774a..576d9a5 100644
--- a/examples/gf_example_2.c
+++ b/examples/gf_example_2.c
@@ -28,8 +28,8 @@ int main(int argc, char **argv)
{
uint32_t a, b, c;
uint8_t *r1, *r2;
- uint16_t *r16;
- uint32_t *r32;
+ uint16_t *r16 = NULL;
+ uint32_t *r32 = NULL;
int w, i;
gf_t gf;
diff --git a/examples/gf_example_5.c b/examples/gf_example_5.c
index 8e7dd4e..da6e9ca 100644
--- a/examples/gf_example_5.c
+++ b/examples/gf_example_5.c
@@ -74,4 +74,5 @@ int main(int argc, char **argv)
gf.extract_word.w32(&gf, a, 30*2, i+15),
gf.extract_word.w32(&gf, b, 30*2, i+15));
}
+ return 0;
}
diff --git a/examples/gf_example_6.c b/examples/gf_example_6.c
index 54cdf83..800a35f 100644
--- a/examples/gf_example_6.c
+++ b/examples/gf_example_6.c
@@ -80,4 +80,5 @@ int main(int argc, char **argv)
gf.extract_word.w32(&gf, a, 30*4, i+15),
gf.extract_word.w32(&gf, b, 30*4, i+15));
}
+ return 0;
}
diff --git a/examples/gf_example_7.c b/examples/gf_example_7.c
index cd5c44b..ee07d53 100644
--- a/examples/gf_example_7.c
+++ b/examples/gf_example_7.c
@@ -71,4 +71,5 @@ int main(int argc, char **argv)
gf.extract_word.w32(&gf, a, 3, i),
gf.extract_word.w32(&gf, b, 3, i));
}
+ return 0;
}
diff --git a/include/gf_complete.h b/include/gf_complete.h
index 57b439e..0469b77 100644
--- a/include/gf_complete.h
+++ b/include/gf_complete.h
@@ -33,17 +33,18 @@
Not all are implemented for all values of w.
See the paper for an explanation of how they work. */
-typedef enum {GF_MULT_DEFAULT,
- GF_MULT_SHIFT,
- GF_MULT_CARRY_FREE,
- GF_MULT_GROUP,
+typedef enum {GF_MULT_DEFAULT,
+ GF_MULT_SHIFT,
+ GF_MULT_CARRY_FREE,
+ GF_MULT_CARRY_FREE_GK, //ADAM
+ GF_MULT_GROUP,
GF_MULT_BYTWO_p,
GF_MULT_BYTWO_b,
- GF_MULT_TABLE,
- GF_MULT_LOG_TABLE,
+ GF_MULT_TABLE,
+ GF_MULT_LOG_TABLE,
GF_MULT_LOG_ZERO,
GF_MULT_LOG_ZERO_EXT,
- GF_MULT_SPLIT_TABLE,
+ GF_MULT_SPLIT_TABLE,
GF_MULT_COMPOSITE } gf_mult_type_t;
/* These are the different ways to optimize region
diff --git a/include/gf_int.h b/include/gf_int.h
index 9221569..98294cc 100644
--- a/include/gf_int.h
+++ b/include/gf_int.h
@@ -154,8 +154,8 @@ typedef enum {GF_E_MDEFDIV, /* Dev != Default && Mult == Default */
GF_E_SP128AR, /* Mult == SPLIT, w=128, Bad arg1/arg2 */
GF_E_SP128AL, /* Mult == SPLIT, w=128, SSE requires ALTMAP */
GF_E_SP128AS, /* Mult == SPLIT, w=128, ALTMAP requires SSE */
- GF_E_SP128_A, /* Mult == SPLIT, w=128, SSE only with 4/128 */
- GF_E_SP128_S, /* Mult == SPLIT, w=128, ALTMAP only with 4/128 */
+ GF_E_SP128_A, /* Mult == SPLIT, w=128, ALTMAP only with 4/128 */
+ GF_E_SP128_S, /* Mult == SPLIT, w=128, SSE only with 4/128 */
GF_E_SPLIT_W, /* Mult == SPLIT, Bad w (8, 16, 32, 64, 128) */
GF_E_SP_16AR, /* Mult == SPLIT, w=16, Bad arg1/arg2 */
GF_E_SP_16_A, /* Mult == SPLIT, w=16, ALTMAP only with 4/16 */
diff --git a/m4/ltoptions.m4 b/m4/ltoptions.m4
index 17cfd51..5d9acd8 100644
--- a/m4/ltoptions.m4
+++ b/m4/ltoptions.m4
@@ -326,9 +326,24 @@ dnl AC_DEFUN([AM_DISABLE_FAST_INSTALL], [])
# MODE is either `yes' or `no'. If omitted, it defaults to `both'.
m4_define([_LT_WITH_PIC],
[AC_ARG_WITH([pic],
- [AS_HELP_STRING([--with-pic],
+ [AS_HELP_STRING([--with-pic@<:@=PKGS@:>@],
[try to use only PIC/non-PIC objects @<:@default=use both@:>@])],
- [pic_mode="$withval"],
+ [lt_p=${PACKAGE-default}
+ case $withval in
+ yes|no) pic_mode=$withval ;;
+ *)
+ pic_mode=default
+ # Look at the argument we got. We use all the common list separators.
+ lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+ for lt_pkg in $withval; do
+ IFS="$lt_save_ifs"
+ if test "X$lt_pkg" = "X$lt_p"; then
+ pic_mode=yes
+ fi
+ done
+ IFS="$lt_save_ifs"
+ ;;
+ esac],
[pic_mode=default])
test -z "$pic_mode" && pic_mode=m4_default([$1], [default])
diff --git a/src/gf.c b/src/gf.c
index a443f17..1955559 100644
--- a/src/gf.c
+++ b/src/gf.c
@@ -85,8 +85,8 @@ void gf_error()
case GF_E_SP128AR: s = "With -m SPLIT, w=128, bad arg1/arg2."; break;
case GF_E_SP128AL: s = "With -m SPLIT, w=128, -r SSE requires -r ALTMAP."; break;
case GF_E_SP128AS: s = "With -m SPLIT, w=128, ALTMAP needs SSSE3 supported."; break;
- case GF_E_SP128_A: s = "With -m SPLIT, w=128, -r SSE|NOSSE only with arg1/arg2 = 4/128."; break;
- case GF_E_SP128_S: s = "With -m SPLIT, w=128, -r ALTMAP only with arg1/arg2 = 4/128."; break;
+ case GF_E_SP128_A: s = "With -m SPLIT, w=128, -r ALTMAP only with arg1/arg2 = 4/128."; break;
+ case GF_E_SP128_S: s = "With -m SPLIT, w=128, -r SSE|NOSSE only with arg1/arg2 = 4/128."; break;
case GF_E_SPLIT_W: s = "With -m SPLIT, w must be in {8, 16, 32, 64, 128}."; break;
case GF_E_SP_16AR: s = "With -m SPLIT, w=16, Bad arg1/arg2."; break;
case GF_E_SP_16_A: s = "With -m SPLIT, w=16, -r ALTMAP only with arg1/arg2 = 4/16."; break;
@@ -179,13 +179,11 @@ uint64_t gf_composite_get_default_poly(gf_t *base)
int gf_error_check(int w, int mult_type, int region_type, int divide_type,
int arg1, int arg2, uint64_t poly, gf_t *base)
{
- int sse4 = 0;
int sse3 = 0;
int sse2 = 0;
int pclmul = 0;
int rdouble, rquad, rlazy, rsse, rnosse, raltmap, rcauchy, tmp;
- uint64_t pp;
- gf_internal_t *sub, *subsub, *subsubsub;
+ gf_internal_t *sub;
rdouble = (region_type & GF_REGION_DOUBLE_TABLE);
rquad = (region_type & GF_REGION_QUAD_TABLE);
@@ -214,10 +212,6 @@ int gf_error_check(int w, int mult_type, int region_type, int divide_type,
sse3 = 1;
#endif
-#ifdef INTEL_SSE4
- sse4 = 1;
-#endif
-
#ifdef INTEL_SSE4_PCLMUL
pclmul = 1;
#endif
@@ -292,6 +286,16 @@ int gf_error_check(int w, int mult_type, int region_type, int divide_type,
return 1;
}
+ //ADAM
+ if (mult_type == GF_MULT_CARRY_FREE_GK) {
+ if (w != 4 && w != 8 && w != 16 &&
+ w != 32 && w != 64 && w != 128) { _gf_errno = GF_E_CFM___W; return 0; }
+ if (raltmap) { _gf_errno = GF_E_ALT_CFM; return 0; }
+ if (rsse || rnosse) { _gf_errno = GF_E_SSE_CFM; return 0; }
+ if (!pclmul) { _gf_errno = GF_E_PCLMULX; return 0; }
+ return 1;
+ }
+
if (mult_type == GF_MULT_BYTWO_p || mult_type == GF_MULT_BYTWO_b) {
if (raltmap) { _gf_errno = GF_E_ALT_BY2; return 0; }
if (rsse && !sse2) { _gf_errno = GF_E_BY2_SSE; return 0; }
@@ -344,11 +348,12 @@ int gf_error_check(int w, int mult_type, int region_type, int divide_type,
if (rsse && !sse3) { _gf_errno = GF_E_SP_SSE3; return 0; }
if (raltmap) { _gf_errno = GF_E_SP_8__A; return 0; }
} else if (w == 16) {
- if (arg1 == 4 && arg2 == 16) {
- if (rsse && !sse3) { _gf_errno = GF_E_SP_SSE3; return 0; }
- } else if (arg1 == 8 && (arg2 == 16 || arg2 == 8)) {
+ if ((arg1 == 8 && arg2 == 8) ||
+ (arg1 == 8 && arg2 == 16)) {
if (rsse || rnosse) { _gf_errno = GF_E_SP_16_S; return 0; }
if (raltmap) { _gf_errno = GF_E_SP_16_A; return 0; }
+ } else if (arg1 == 4 && arg2 == 16) {
+ if (rsse && !sse3) { _gf_errno = GF_E_SP_SSE3; return 0; }
} else { _gf_errno = GF_E_SP_16AR; return 0; }
} else if (w == 32) {
if ((arg1 == 8 && arg2 == 8) ||
@@ -356,10 +361,8 @@ int gf_error_check(int w, int mult_type, int region_type, int divide_type,
(arg1 == 16 && arg2 == 32)) {
if (rsse || rnosse) { _gf_errno = GF_E_SP_32_S; return 0; }
if (raltmap) { _gf_errno = GF_E_SP_32_A; return 0; }
- } else if ((arg1 == 4 && arg2 == 32) ||
- (arg1 == 4 && arg2 == 32)) {
+ } else if (arg1 == 4 && arg2 == 32) {
if (rsse && !sse3) { _gf_errno = GF_E_SP_SSE3; return 0; }
- if (raltmap && arg1 != 4) { _gf_errno = GF_E_SP_32_A; return 0; }
if (raltmap && !sse3) { _gf_errno = GF_E_SP_32AS; return 0; }
if (raltmap && rnosse) { _gf_errno = GF_E_SP_32AS; return 0; }
} else { _gf_errno = GF_E_SP_32AR; return 0; }
@@ -488,7 +491,7 @@ int gf_init_hard(gf_t *gf, int w, int mult_type,
h->arg2 = arg2;
h->base_gf = base_gf;
h->private = (void *) gf->scratch;
- h->private += (sizeof(gf_internal_t));
+ h->private = (uint8_t *)h->private + (sizeof(gf_internal_t));
gf->extract_word.w32 = NULL;
switch(w) {
@@ -525,7 +528,7 @@ void gf_alignment_error(char *s, int a)
static
void gf_invert_binary_matrix(uint32_t *mat, uint32_t *inv, int rows) {
- int cols, i, j, k;
+ int cols, i, j;
uint32_t tmp;
cols = rows;
@@ -594,7 +597,7 @@ uint32_t gf_bitmatrix_inverse(uint32_t y, int w, uint32_t pp)
void gf_two_byte_region_table_multiply(gf_region_data *rd, uint16_t *base)
{
uint64_t a, prod;
- int j, xor;
+ int xor;
uint64_t *s64, *d64, *top;
s64 = rd->s_start;
@@ -693,8 +696,8 @@ static void gf_slow_multiply_region(gf_region_data *rd, void *src, void *dest, v
fprintf(stderr, "Error: gf_slow_multiply_region: w=%d not implemented.\n", h->w);
exit(1);
}
- src += wb;
- dest += wb;
+ src = (uint8_t *)src + wb;
+ dest = (uint8_t *)dest + wb;
}
}
@@ -773,8 +776,7 @@ void gf_set_region_data(gf_region_data *rd,
int xor,
int align)
{
- uint8_t *s8, *d8;
- gf_internal_t *h;
+ gf_internal_t *h = NULL;
int wb;
uint32_t a;
unsigned long uls, uld;
@@ -802,7 +804,7 @@ void gf_set_region_data(gf_region_data *rd,
if (align == -1) { /* JSP: This is cauchy. Error check bytes, then set up the pointers
so that there are no alignment regions. */
- if (bytes % h->w != 0) {
+ if (h != NULL && bytes % h->w != 0) {
fprintf(stderr, "Error in region multiply operation.\n");
fprintf(stderr, "The size must be a multiple of %d bytes.\n", h->w);
exit(1);
@@ -810,8 +812,8 @@ void gf_set_region_data(gf_region_data *rd,
rd->s_start = src;
rd->d_start = dest;
- rd->s_top = src + bytes;
- rd->d_top = src + bytes;
+ rd->s_top = (uint8_t *)src + bytes;
+ rd->d_top = (uint8_t *)src + bytes;
return;
}
@@ -840,12 +842,12 @@ void gf_set_region_data(gf_region_data *rd,
uls %= a;
if (uls != 0) uls = (a-uls);
- rd->s_start = rd->src + uls;
- rd->d_start = rd->dest + uls;
+ rd->s_start = (uint8_t *)rd->src + uls;
+ rd->d_start = (uint8_t *)rd->dest + uls;
bytes -= uls;
bytes -= (bytes % align);
- rd->s_top = rd->s_start + bytes;
- rd->d_top = rd->d_start + bytes;
+ rd->s_top = (uint8_t *)rd->s_start + bytes;
+ rd->d_top = (uint8_t *)rd->d_start + bytes;
}
@@ -856,7 +858,7 @@ void gf_do_initial_region_alignment(gf_region_data *rd)
void gf_do_final_region_alignment(gf_region_data *rd)
{
- gf_slow_multiply_region(rd, rd->s_top, rd->d_top, rd->src+rd->bytes);
+ gf_slow_multiply_region(rd, rd->s_top, rd->d_top, (uint8_t *)rd->src+rd->bytes);
}
void gf_multby_zero(void *dest, int bytes, int xor)
@@ -897,9 +899,8 @@ void gf_multby_one(void *src, void *dest, int bytes, int xor)
__m128i ms, md;
#endif
unsigned long uls, uld;
- uint8_t *s8, *d8, *dtop8;
+ uint8_t *s8, *d8;
uint64_t *s64, *d64, *dtop64;
- int abytes;
gf_region_data rd;
if (!xor) {
@@ -910,6 +911,7 @@ void gf_multby_one(void *src, void *dest, int bytes, int xor)
uld = (unsigned long) dest;
#ifdef INTEL_SSE2
+ int abytes;
s8 = (uint8_t *) src;
d8 = (uint8_t *) dest;
if (uls % 16 == uld % 16) {
@@ -1025,7 +1027,7 @@ static void gf_unaligned_xor(void *src, void *dest, int bytes)
}
d8 = (uint8_t *) d64;
- while (d8 < (uint8_t *) (dest+bytes)) {
+ while (d8 < (uint8_t *) ((uint8_t *)dest+bytes)) {
*d8 ^= *s8;
d8++;
s8++;
diff --git a/src/gf_general.c b/src/gf_general.c
index d9d1700..8fcc737 100644
--- a/src/gf_general.c
+++ b/src/gf_general.c
@@ -240,7 +240,7 @@ int gf_general_are_equal(gf_general_t *v1, gf_general_t *v2, int w)
return (v1->w64 == v2->w64);
} else {
return (v1->w128[0] == v2->w128[0] &&
- v1->w128[0] == v2->w128[0]);
+ v1->w128[1] == v2->w128[1]);
}
}
@@ -267,7 +267,6 @@ void gf_general_do_region_check(gf_t *gf, gf_general_t *a, void *orig_a, void *o
int w, words, i;
gf_general_t oa, ot, ft, sb;
char sa[50], soa[50], sot[50], sft[50], ssb[50];
- uint8_t *p;
h = (gf_internal_t *) gf->scratch;
w = h->w;
@@ -327,7 +326,7 @@ void gf_general_set_up_single_timing_test(int w, void *ra, void *rb, int size)
uint64_t *r64;
int i;
- top = rb+size;
+ top = (uint8_t *)rb+size;
/* If w is 8, 16, 32, 64 or 128, fill the regions with random bytes.
However, don't allow for zeros in rb, because that will screw up
@@ -366,7 +365,7 @@ void gf_general_set_up_single_timing_test(int w, void *ra, void *rb, int size)
r64[1] = g.w128[1];
break;
}
- rb += (w/8);
+ rb = (uint8_t *)rb + (w/8);
}
} else if (w == 4) {
r8a = (uint8_t *) ra;
@@ -408,7 +407,7 @@ int gf_general_do_single_timing_test(gf_t *gf, void *ra, void *rb, int size, cha
h = (gf_internal_t *) gf->scratch;
w = h->w;
- top = ra + size;
+ top = (uint8_t *)ra + size;
if (w == 8 || w == 4) {
r8a = (uint8_t *) ra;
diff --git a/src/gf_general.h b/src/gf_general.h
deleted file mode 100644
index 9a5de52..0000000
--- a/src/gf_general.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic
- * James S. Plank, Ethan L. Miller, Kevin M. Greenan,
- * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride.
- *
- * gf_general.h
- *
- * This file has helper routines for doing basic GF operations with any
- * legal value of w. The problem is that w <= 32, w=64 and w=128 all have
- * different data types, which is a pain. The procedures in this file try
- * to alleviate that pain. They are used in gf_unit and gf_time.
- */
-
-#pragma once
-
-#include <stdio.h>
-#include <getopt.h>
-#include <stdint.h>
-#include <string.h>
-#include <stdlib.h>
-#include <time.h>
-
-#include "gf_complete.h"
-
-typedef union {
- uint32_t w32;
- uint64_t w64;
- uint64_t w128[2];
-} gf_general_t;
-
-void gf_general_set_zero(gf_general_t *v, int w);
-void gf_general_set_one(gf_general_t *v, int w);
-void gf_general_set_two(gf_general_t *v, int w);
-
-int gf_general_is_zero(gf_general_t *v, int w);
-int gf_general_is_one(gf_general_t *v, int w);
-int gf_general_are_equal(gf_general_t *v1, gf_general_t *v2, int w);
-
-void gf_general_val_to_s(gf_general_t *v, int w, char *s, int hex);
-int gf_general_s_to_val(gf_general_t *v, int w, char *s, int hex);
-
-void gf_general_set_random(gf_general_t *v, int w, int zero_ok);
-
-void gf_general_add(gf_t *gf, gf_general_t *a, gf_general_t *b, gf_general_t *c);
-void gf_general_multiply(gf_t *gf, gf_general_t *a, gf_general_t *b, gf_general_t *c);
-void gf_general_divide(gf_t *gf, gf_general_t *a, gf_general_t *b, gf_general_t *c);
-void gf_general_inverse(gf_t *gf, gf_general_t *a, gf_general_t *b);
-
-void gf_general_do_region_multiply(gf_t *gf, gf_general_t *a,
- void *ra, void *rb,
- int bytes, int xor);
-
-void gf_general_do_region_check(gf_t *gf, gf_general_t *a,
- void *orig_a, void *orig_target, void *final_target,
- int bytes, int xor);
-
-
-/* Which is M, D or I for multiply, divide or inverse. */
-
-void gf_general_set_up_single_timing_test(int w, void *ra, void *rb, int size);
-int gf_general_do_single_timing_test(gf_t *gf, void *ra, void *rb, int size, char which);
diff --git a/src/gf_int.h b/src/gf_int.h
deleted file mode 100644
index 9221569..0000000
--- a/src/gf_int.h
+++ /dev/null
@@ -1,200 +0,0 @@
-/*
- * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic
- * James S. Plank, Ethan L. Miller, Kevin M. Greenan,
- * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride.
- *
- * gf_int.h
- *
- * Internal code for Galois field routines. This is not meant for
- * users to include, but for the internal GF files to use.
- */
-
-#pragma once
-
-#include "gf_complete.h"
-
-#include <string.h>
-
-extern void timer_start (double *t);
-extern double timer_split (const double *t);
-extern void galois_fill_random (void *buf, int len, unsigned int seed);
-
-typedef struct {
- int mult_type;
- int region_type;
- int divide_type;
- int w;
- uint64_t prim_poly;
- int free_me;
- int arg1;
- int arg2;
- gf_t *base_gf;
- void *private;
-} gf_internal_t;
-
-extern int gf_w4_init (gf_t *gf);
-extern int gf_w4_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2);
-
-extern int gf_w8_init (gf_t *gf);
-extern int gf_w8_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2);
-
-extern int gf_w16_init (gf_t *gf);
-extern int gf_w16_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2);
-
-extern int gf_w32_init (gf_t *gf);
-extern int gf_w32_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2);
-
-extern int gf_w64_init (gf_t *gf);
-extern int gf_w64_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2);
-
-extern int gf_w128_init (gf_t *gf);
-extern int gf_w128_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2);
-
-extern int gf_wgen_init (gf_t *gf);
-extern int gf_wgen_scratch_size(int w, int mult_type, int region_type, int divide_type, int arg1, int arg2);
-
-void gf_wgen_cauchy_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor);
-gf_val_32_t gf_wgen_extract_word(gf_t *gf, void *start, int bytes, int index);
-
-extern void gf_alignment_error(char *s, int a);
-
-extern uint32_t gf_bitmatrix_inverse(uint32_t y, int w, uint32_t pp);
-
-/* This returns the correct default for prim_poly when base is used as the base
- field for COMPOSITE. It returns 0 if we don't have a default prim_poly. */
-
-extern uint64_t gf_composite_get_default_poly(gf_t *base);
-
-/* This structure lets you define a region multiply. It helps because you can handle
- unaligned portions of the data with the procedures below, which really cleans
- up the code. */
-
-typedef struct {
- gf_t *gf;
- void *src;
- void *dest;
- int bytes;
- uint64_t val;
- int xor;
- int align; /* The number of bytes to which to align. */
- void *s_start; /* The start and the top of the aligned region. */
- void *d_start;
- void *s_top;
- void *d_top;
-} gf_region_data;
-
-/* This lets you set up one of these in one call. It also sets the start/top pointers. */
-
-void gf_set_region_data(gf_region_data *rd,
- gf_t *gf,
- void *src,
- void *dest,
- int bytes,
- uint64_t val,
- int xor,
- int align);
-
-/* This performs gf->multiply.32() on all of the unaligned bytes in the beginning of the region */
-
-extern void gf_do_initial_region_alignment(gf_region_data *rd);
-
-/* This performs gf->multiply.32() on all of the unaligned bytes in the end of the region */
-
-extern void gf_do_final_region_alignment(gf_region_data *rd);
-
-extern void gf_two_byte_region_table_multiply(gf_region_data *rd, uint16_t *base);
-
-extern void gf_multby_zero(void *dest, int bytes, int xor);
-extern void gf_multby_one(void *src, void *dest, int bytes, int xor);
-
-typedef enum {GF_E_MDEFDIV, /* Dev != Default && Mult == Default */
- GF_E_MDEFREG, /* Reg != Default && Mult == Default */
- GF_E_MDEFARG, /* Args != Default && Mult == Default */
- GF_E_DIVCOMP, /* Mult == Composite && Div != Default */
- GF_E_CAUCOMP, /* Mult == Composite && Reg == CAUCHY */
- GF_E_DOUQUAD, /* Reg == DOUBLE && Reg == QUAD */
- GF_E_SSE__NO, /* Reg == SSE && Reg == NOSSE */
- GF_E_CAUCHYB, /* Reg == CAUCHY && Other Reg */
- GF_E_CAUGT32, /* Reg == CAUCHY && w > 32*/
- GF_E_ARG1SET, /* Arg1 != 0 && Mult \notin COMPOSITE/SPLIT/GROUP */
- GF_E_ARG2SET, /* Arg2 != 0 && Mult \notin SPLIT/GROUP */
- GF_E_MATRIXW, /* Div == MATRIX && w > 32 */
- GF_E_BAD___W, /* Illegal w */
- GF_E_DOUBLET, /* Reg == DOUBLE && Mult != TABLE */
- GF_E_DOUBLEW, /* Reg == DOUBLE && w \notin {4,8} */
- GF_E_DOUBLEJ, /* Reg == DOUBLE && other Reg */
- GF_E_DOUBLEL, /* Reg == DOUBLE & LAZY but w = 4 */
- GF_E_QUAD__T, /* Reg == QUAD && Mult != TABLE */
- GF_E_QUAD__W, /* Reg == QUAD && w != 4 */
- GF_E_QUAD__J, /* Reg == QUAD && other Reg */
- GF_E_LAZY__X, /* Reg == LAZY && not DOUBLE or QUAD*/
- GF_E_ALTSHIF, /* Mult == Shift && Reg == ALTMAP */
- GF_E_SSESHIF, /* Mult == Shift && Reg == SSE|NOSSE */
- GF_E_ALT_CFM, /* Mult == CARRY_FREE && Reg == ALTMAP */
- GF_E_SSE_CFM, /* Mult == CARRY_FREE && Reg == SSE|NOSSE */
- GF_E_PCLMULX, /* Mult == Carry_Free && No PCLMUL */
- GF_E_ALT_BY2, /* Mult == Bytwo_x && Reg == ALTMAP */
- GF_E_BY2_SSE, /* Mult == Bytwo_x && Reg == SSE && No SSE2 */
- GF_E_LOGBADW, /* Mult == LOGx, w too big*/
- GF_E_LOG___J, /* Mult == LOGx, && Reg == SSE|ALTMAP|NOSSE */
- GF_E_ZERBADW, /* Mult == LOG_ZERO, w \notin {8,16} */
- GF_E_ZEXBADW, /* Mult == LOG_ZERO_EXT, w != 8 */
- GF_E_LOGPOLY, /* Mult == LOG & poly not primitive */
- GF_E_GR_ARGX, /* Mult == GROUP, Bad arg1/2 */
- GF_E_GR_W_48, /* Mult == GROUP, w \in { 4, 8 } */
- GF_E_GR_W_16, /* Mult == GROUP, w == 16, arg1 != 4 || arg2 != 4 */
- GF_E_GR_128A, /* Mult == GROUP, w == 128, bad args */
- GF_E_GR_A_27, /* Mult == GROUP, either arg > 27 */
- GF_E_GR_AR_W, /* Mult == GROUP, either arg > w */
- GF_E_GR____J, /* Mult == GROUP, Reg == SSE|ALTMAP|NOSSE */
- GF_E_TABLE_W, /* Mult == TABLE, w too big */
- GF_E_TAB_SSE, /* Mult == TABLE, SSE|NOSSE only apply to w == 4 */
- GF_E_TABSSE3, /* Mult == TABLE, Need SSSE3 for SSE */
- GF_E_TAB_ALT, /* Mult == TABLE, Reg == ALTMAP */
- GF_E_SP128AR, /* Mult == SPLIT, w=128, Bad arg1/arg2 */
- GF_E_SP128AL, /* Mult == SPLIT, w=128, SSE requires ALTMAP */
- GF_E_SP128AS, /* Mult == SPLIT, w=128, ALTMAP requires SSE */
- GF_E_SP128_A, /* Mult == SPLIT, w=128, SSE only with 4/128 */
- GF_E_SP128_S, /* Mult == SPLIT, w=128, ALTMAP only with 4/128 */
- GF_E_SPLIT_W, /* Mult == SPLIT, Bad w (8, 16, 32, 64, 128) */
- GF_E_SP_16AR, /* Mult == SPLIT, w=16, Bad arg1/arg2 */
- GF_E_SP_16_A, /* Mult == SPLIT, w=16, ALTMAP only with 4/16 */
- GF_E_SP_16_S, /* Mult == SPLIT, w=16, SSE only with 4/16 */
- GF_E_SP_32AR, /* Mult == SPLIT, w=32, Bad arg1/arg2 */
- GF_E_SP_32AS, /* Mult == SPLIT, w=32, ALTMAP requires SSE */
- GF_E_SP_32_A, /* Mult == SPLIT, w=32, ALTMAP only with 4/32 */
- GF_E_SP_32_S, /* Mult == SPLIT, w=32, SSE only with 4/32 */
- GF_E_SP_64AR, /* Mult == SPLIT, w=64, Bad arg1/arg2 */
- GF_E_SP_64AS, /* Mult == SPLIT, w=64, ALTMAP requires SSE */
- GF_E_SP_64_A, /* Mult == SPLIT, w=64, ALTMAP only with 4/64 */
- GF_E_SP_64_S, /* Mult == SPLIT, w=64, SSE only with 4/64 */
- GF_E_SP_8_AR, /* Mult == SPLIT, w=8, Bad arg1/arg2 */
- GF_E_SP_8__A, /* Mult == SPLIT, w=8, no ALTMAP */
- GF_E_SP_SSE3, /* Mult == SPLIT, Need SSSE3 for SSE */
- GF_E_COMP_A2, /* Mult == COMP, arg1 must be = 2 */
- GF_E_COMP_SS, /* Mult == COMP, SSE|NOSSE */
- GF_E_COMP__W, /* Mult == COMP, Bad w. */
- GF_E_UNKFLAG, /* Unknown flag in create_from.... */
- GF_E_UNKNOWN, /* Unknown mult_type. */
- GF_E_UNK_REG, /* Unknown region_type. */
- GF_E_UNK_DIV, /* Unknown divide_type. */
- GF_E_CFM___W, /* Mult == CFM, Bad w. */
- GF_E_CFM4POL, /* Mult == CFM & Prim Poly has high bits set. */
- GF_E_CFM8POL, /* Mult == CFM & Prim Poly has high bits set. */
- GF_E_CF16POL, /* Mult == CFM & Prim Poly has high bits set. */
- GF_E_CF32POL, /* Mult == CFM & Prim Poly has high bits set. */
- GF_E_CF64POL, /* Mult == CFM & Prim Poly has high bits set. */
- GF_E_FEWARGS, /* Too few args in argc/argv. */
- GF_E_BADPOLY, /* Bad primitive polynomial -- too many bits set. */
- GF_E_COMP_PP, /* Bad primitive polynomial -- bigger than sub-field. */
- GF_E_COMPXPP, /* Can't derive a default pp for composite field. */
- GF_E_BASE__W, /* Composite -- Base field is the wrong size. */
- GF_E_TWOMULT, /* In create_from... two -m's. */
- GF_E_TWO_DIV, /* In create_from... two -d's. */
- GF_E_POLYSPC, /* Bad numbera after -p. */
- GF_E_SPLITAR, /* Ran out of arguments in SPLIT */
- GF_E_SPLITNU, /* Arguments not integers in SPLIT. */
- GF_E_GROUPAR, /* Ran out of arguments in GROUP */
- GF_E_GROUPNU, /* Arguments not integers in GROUP. */
- GF_E_DEFAULT } gf_error_type_t;
-
diff --git a/src/gf_method.c b/src/gf_method.c
index 36ec3c4..90d62af 100644
--- a/src/gf_method.c
+++ b/src/gf_method.c
@@ -21,10 +21,9 @@
int create_gf_from_argv(gf_t *gf, int w, int argc, char **argv, int starting)
{
int mult_type, divide_type, region_type;
- int arg1, arg2, subrg_size;
+ int arg1, arg2;
uint64_t prim_poly;
gf_t *base;
- char *crt, *x, *y;
mult_type = GF_MULT_DEFAULT;
region_type = GF_REGION_DEFAULT;
@@ -48,6 +47,10 @@ int create_gf_from_argv(gf_t *gf, int w, int argc, char **argv, int starting)
} else if (strcmp(argv[starting], "CARRY_FREE") == 0) {
mult_type = GF_MULT_CARRY_FREE;
starting++;
+ //ADAM
+ } else if (strcmp(argv[starting], "CARRY_FREE_GK") == 0) {
+ mult_type = GF_MULT_CARRY_FREE_GK;
+ starting++;
} else if (strcmp(argv[starting], "GROUP") == 0) {
mult_type = GF_MULT_GROUP;
if (argc < starting + 3) {
diff --git a/src/gf_rand.h b/src/gf_rand.h
deleted file mode 100644
index 24294ad..0000000
--- a/src/gf_rand.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic
- * James S. Plank, Ethan L. Miller, Kevin M. Greenan,
- * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride.
- *
- * gf_rand.h
- *
- * Random number generation, using the "Mother of All" random number generator. */
-
-#pragma once
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-/* These are all pretty self-explanatory */
-uint32_t MOA_Random_32();
-uint64_t MOA_Random_64();
-void MOA_Random_128(uint64_t *x);
-uint32_t MOA_Random_W(int w, int zero_ok);
-void MOA_Fill_Random_Region (void *reg, int size); /* reg should be aligned to 4 bytes, but
- size can be anything. */
-void MOA_Seed(uint32_t seed);
diff --git a/src/gf_w128.c b/src/gf_w128.c
index fae9f5c..61cf3d7 100644
--- a/src/gf_w128.c
+++ b/src/gf_w128.c
@@ -81,6 +81,7 @@ int xor)
}
}
+#if defined(INTEL_SSE4_PCLMUL)
static
void
gf_w128_clm_multiply_region_from_single(gf_t *gf, void *src, void *dest, gf_val_128_t val, int bytes,
@@ -89,9 +90,7 @@ int xor)
int i;
gf_val_128_t s128;
gf_val_128_t d128;
- uint64_t c128[2];
gf_region_data rd;
-#if defined(INTEL_SSE4_PCLMUL)
__m128i a,b;
__m128i result0,result1;
__m128i prim_poly;
@@ -106,8 +105,6 @@ int xor)
if (val[1] == 1) { gf_multby_one(src, dest, bytes, xor); return; }
}
- set_zero(c128, 0);
-
s128 = (gf_val_128_t) src;
d128 = (gf_val_128_t) dest;
@@ -184,8 +181,8 @@ int xor)
d128[i+1] = (uint64_t)_mm_extract_epi64(result1,0);
}
}
-#endif
}
+#endif
/*
* Some w128 notes:
@@ -384,7 +381,7 @@ gf_w128_sse_bytwo_p_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_
{
#if defined(INTEL_SSE4)
int i;
- __m128i a, b, pp, one, prod, amask, l_middle_one, u_middle_one;
+ __m128i a, b, pp, prod, amask, u_middle_one;
/*John: pmask is always the highest bit set, and the rest zeros. amask changes, it's a countdown.*/
uint32_t topbit, middlebit, pmask; /* this is used as a boolean value */
gf_internal_t *h;
@@ -400,7 +397,6 @@ gf_w128_sse_bytwo_p_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_
pmask = 0x80000000;
amask = _mm_insert_epi32(prod, 0x80000000, 0x3);
u_middle_one = _mm_insert_epi32(prod, 1, 0x2);
- l_middle_one = _mm_insert_epi32(prod, 1 << 31, 0x1);
for (i = 0; i < 64; i++) {
topbit = (_mm_extract_epi32(prod, 0x3) & pmask);
@@ -599,13 +595,13 @@ gf_w128_split_4_128_multiply_region(gf_t *gf, void *src, void *dest, gf_val_128_
}
}
+#if defined(INTEL_SSSE3) && defined(INTEL_SSE4)
static
void
gf_w128_split_4_128_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_128_t val, int bytes, int xor)
{
-#ifdef INTEL_SSSE3
gf_internal_t *h;
- int i, m, j, k, tindex;
+ int i, j, k;
uint64_t pp, v[2], s, *s64, *d64, *top;
__m128i p, tables[32][16];
struct gf_w128_split_4_128_data *ld;
@@ -624,7 +620,7 @@ gf_w128_split_4_128_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_
/* Doing this instead of gf_do_initial_region_alignment() because that doesn't hold 128-bit vals */
- gf_w128_multiply_region_from_single(gf, src, dest, val, (rd.s_start-src), xor);
+ gf_w128_multiply_region_from_single(gf, src, dest, val, ((uint8_t *)rd.s_start-(uint8_t *)src), xor);
s64 = (uint64_t *) rd.s_start;
d64 = (uint64_t *) rd.d_start;
@@ -694,18 +690,18 @@ gf_w128_split_4_128_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_
/* Doing this instead of gf_do_final_region_alignment() because that doesn't hold 128-bit vals */
- gf_w128_multiply_region_from_single(gf, rd.s_top, rd.d_top, val, (src+bytes)-rd.s_top, xor);
-#endif
+ gf_w128_multiply_region_from_single(gf, rd.s_top, rd.d_top, val, ((uint8_t *)src+bytes)-(uint8_t *)rd.s_top, xor);
}
+#endif
+#if defined(INTEL_SSSE3) && defined(INTEL_SSE4)
static
void
gf_w128_split_4_128_sse_altmap_multiply_region(gf_t *gf, void *src, void *dest, gf_val_128_t val, int bytes, int xor)
{
-#ifdef INTEL_SSSE3
gf_internal_t *h;
- int i, m, j, k, tindex;
- uint64_t pp, v[2], s, *s64, *d64, *top;
+ int i, j, k;
+ uint64_t pp, v[2], *s64, *d64, *top;
__m128i si, tables[32][16], p[16], v0, mask1;
struct gf_w128_split_4_128_data *ld;
uint8_t btable[16];
@@ -724,7 +720,7 @@ gf_w128_split_4_128_sse_altmap_multiply_region(gf_t *gf, void *src, void *dest,
/* Doing this instead of gf_do_initial_region_alignment() because that doesn't hold 128-bit vals */
- gf_w128_multiply_region_from_single(gf, src, dest, val, (rd.s_start-src), xor);
+ gf_w128_multiply_region_from_single(gf, src, dest, val, ((uint8_t *)rd.s_start-(uint8_t *)src), xor);
s64 = (uint64_t *) rd.s_start;
d64 = (uint64_t *) rd.d_start;
@@ -804,9 +800,9 @@ gf_w128_split_4_128_sse_altmap_multiply_region(gf_t *gf, void *src, void *dest,
}
/* Doing this instead of gf_do_final_region_alignment() because that doesn't hold 128-bit vals */
- gf_w128_multiply_region_from_single(gf, rd.s_top, rd.d_top, val, (src+bytes)-rd.s_top, xor);
-#endif
+ gf_w128_multiply_region_from_single(gf, rd.s_top, rd.d_top, val, ((uint8_t *)src+bytes)-(uint8_t *)rd.s_top, xor);
}
+#endif
static
void
@@ -886,7 +882,7 @@ gf_w128_split_8_128_multiply_region(gf_t *gf, void *src, void *dest, gf_val_128_
void
gf_w128_bytwo_b_multiply_region(gf_t *gf, void *src, void *dest, gf_val_128_t val, int bytes, int xor)
{
- uint64_t bmask, pp, vmask;
+ uint64_t bmask, pp;
gf_internal_t *h;
uint64_t a[2], c[2], b[2], *s64, *d64, *top;
gf_region_data rd;
@@ -987,7 +983,7 @@ void gf_w128_group_m_init(gf_t *gf, gf_val_128_t b128)
void
gf_w128_group_multiply(GFP gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_t c128)
{
- int i,j;
+ int i;
/* index_r, index_m, total_m (if g_r > g_m) */
int i_r, i_m, t_m;
int mask_m, mask_r;
@@ -1162,11 +1158,12 @@ gf_w128_euclid(GFP gf, gf_val_128_t a128, gf_val_128_t b128)
uint64_t c_i[2];
uint64_t *b;
uint64_t one = 1;
- uint64_t buf, buf1;
/* This needs to return some sort of error (in b128?) */
if (a128[0] == 0 && a128[1] == 0) return;
+ b = (uint64_t *) b128;
+
e_im1[0] = 0;
e_im1[1] = ((gf_internal_t *) (gf->scratch))->prim_poly;
e_i[0] = a128[0];
@@ -1240,7 +1237,6 @@ gf_w128_euclid(GFP gf, gf_val_128_t a128, gf_val_128_t b128)
d_i = d_ip1;
}
- b = (uint64_t *) b128;
b[0] = y_i[0];
b[1] = y_i[1];
return;
@@ -1326,7 +1322,6 @@ static
void
gf_w128_composite_multiply_region(gf_t *gf, void *src, void *dest, gf_val_128_t val, int bytes, int xor)
{
- unsigned long uls, uld;
gf_internal_t *h = (gf_internal_t *) gf->scratch;
gf_t *base_gf = h->base_gf;
uint64_t b0 = val[1];
@@ -1381,14 +1376,13 @@ gf_w128_composite_multiply_region_alt(gf_t *gf, void *src, void *dest, gf_val_12
gf_internal_t *h = (gf_internal_t *) gf->scratch; gf_t *base_gf = h->base_gf;
gf_val_64_t val0 = val[1];
gf_val_64_t val1 = val[0];
- uint64_t *l, *hi;
uint8_t *slow, *shigh;
uint8_t *dlow, *dhigh, *top;
int sub_reg_size;
gf_region_data rd;
gf_set_region_data(&rd, gf, src, dest, bytes, 0, xor, 64);
- gf_w128_multiply_region_from_single(gf, src, dest, val, (rd.s_start-src), xor);
+ gf_w128_multiply_region_from_single(gf, src, dest, val, ((uint8_t *)rd.s_start-(uint8_t *)src), xor);
slow = (uint8_t *) rd.s_start;
dlow = (uint8_t *) rd.d_start;
@@ -1404,7 +1398,7 @@ gf_w128_composite_multiply_region_alt(gf_t *gf, void *src, void *dest, gf_val_12
base_gf->multiply_region.w64(base_gf, shigh, dhigh, base_gf->multiply.w64(base_gf, h->prim_poly, val1
), sub_reg_size, 1);
- gf_w128_multiply_region_from_single(gf, rd.s_top, rd.d_top, val, (src+bytes)-rd.s_top, xor);
+ gf_w128_multiply_region_from_single(gf, rd.s_top, rd.d_top, val, ((uint8_t *)src+bytes)-(uint8_t *)rd.s_top, xor);
}
@@ -1419,8 +1413,6 @@ int gf_w128_composite_init(gf_t *gf)
gf->multiply_region.w128 = gf_w128_composite_multiply_region;
}
- gf_internal_t *base_h = (gf_internal_t *) h->base_gf->scratch;
-
gf->multiply.w128 = gf_w128_composite_multiply;
gf->divide.w128 = gf_w128_divide_from_inverse;
gf->inverse.w128 = gf_w128_composite_inverse;
@@ -1444,8 +1436,6 @@ int gf_w128_cfm_init(gf_t *gf)
static
int gf_w128_shift_init(gf_t *gf)
{
- gf_internal_t *h;
- h = (gf_internal_t*) gf->scratch;
gf->multiply.w128 = gf_w128_shift_multiply;
gf->inverse.w128 = gf_w128_euclid;
gf->multiply_region.w128 = gf_w128_multiply_region_from_single;
@@ -1501,10 +1491,10 @@ void gf_w128_group_r_init(gf_t *gf)
return;
}
+#if 0 // defined(INTEL_SSE4)
static
void gf_w128_group_r_sse_init(gf_t *gf)
{
-#if defined(INTEL_SSE4)
int i, j;
int g_r;
uint64_t pp;
@@ -1526,8 +1516,8 @@ void gf_w128_group_r_sse_init(gf_t *gf)
}
}
return;
-#endif
}
+#endif
static
int gf_w128_split_init(gf_t *gf)
@@ -1587,16 +1577,14 @@ int gf_w128_group_init(gf_t *gf)
{
gf_internal_t *scratch;
gf_group_tables_t *gt;
- int g_m, g_r, size_r;
- long tmp;
+ int g_r, size_r;
scratch = (gf_internal_t *) gf->scratch;
gt = scratch->private;
- g_m = scratch->arg1;
g_r = scratch->arg2;
size_r = (1 << g_r);
- gt->r_table = scratch->private + (2 * sizeof(uint64_t *));
+ gt->r_table = (gf_val_128_t)((uint8_t *)scratch->private + (2 * sizeof(uint64_t *)));
gt->m_table = gt->r_table + size_r;
gt->m_table[2] = 0;
gt->m_table[3] = 0;
@@ -1690,7 +1678,6 @@ void gf_w128_composite_extract_word(gf_t *gf, void *start, int bytes, int index,
int gf_w128_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2)
{
int size_m, size_r;
- int w = 128;
if (divide_type==GF_DIVIDE_MATRIX) return 0;
switch(mult_type)
@@ -1739,7 +1726,7 @@ int gf_w128_scratch_size(int mult_type, int region_type, int divide_type, int ar
int gf_w128_init(gf_t *gf)
{
- gf_internal_t *h, *h_base, *h_base_base, *h_base_base_base;
+ gf_internal_t *h;
int no_default_flag = 0;
h = (gf_internal_t *) gf->scratch;
diff --git a/src/gf_w16.c b/src/gf_w16.c
index 454c6cc..272a95b 100644
--- a/src/gf_w16.c
+++ b/src/gf_w16.c
@@ -125,6 +125,7 @@ gf_w16_multiply_region_from_single(gf_t *gf, void *src, void *dest, gf_val_32_t
gf_do_final_region_alignment(&rd);
}
+#if defined(INTEL_SSE4_PCLMUL)
static
void
gf_w16_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
@@ -132,8 +133,6 @@ gf_w16_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, gf_val
gf_region_data rd;
uint16_t *s16;
uint16_t *d16;
-
-#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
__m128i prim_poly;
@@ -186,9 +185,10 @@ gf_w16_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, gf_val
}
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
+#if defined(INTEL_SSE4_PCLMUL)
static
void
gf_w16_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
@@ -197,8 +197,6 @@ gf_w16_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, gf_val
uint16_t *s16;
uint16_t *d16;
-#if defined(INTEL_SSE4_PCLMUL)
-
__m128i a, b;
__m128i result;
__m128i prim_poly;
@@ -255,9 +253,10 @@ gf_w16_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, gf_val
}
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
+#if defined(INTEL_SSE4_PCLMUL)
static
void
gf_w16_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
@@ -266,8 +265,6 @@ gf_w16_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, gf_val
uint16_t *s16;
uint16_t *d16;
-#if defined(INTEL_SSE4_PCLMUL)
-
__m128i a, b;
__m128i result;
__m128i prim_poly;
@@ -328,8 +325,8 @@ gf_w16_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, gf_val
}
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
static
inline
@@ -453,7 +450,7 @@ gf_w16_clm_multiply_2 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16)
__m128i a, b;
__m128i result;
__m128i prim_poly;
- __m128i v, w;
+ __m128i w;
gf_internal_t * h = gf->scratch;
a = _mm_insert_epi32 (_mm_setzero_si128(), a16, 0);
@@ -500,7 +497,7 @@ gf_w16_clm_multiply_3 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16)
__m128i a, b;
__m128i result;
__m128i prim_poly;
- __m128i v, w;
+ __m128i w;
gf_internal_t * h = gf->scratch;
a = _mm_insert_epi32 (_mm_setzero_si128(), a16, 0);
@@ -540,7 +537,7 @@ gf_w16_clm_multiply_4 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16)
__m128i a, b;
__m128i result;
__m128i prim_poly;
- __m128i v, w;
+ __m128i w;
gf_internal_t * h = gf->scratch;
a = _mm_insert_epi32 (_mm_setzero_si128(), a16, 0);
@@ -605,13 +602,13 @@ int gf_w16_shift_init(gf_t *gf)
static
int gf_w16_cfm_init(gf_t *gf)
{
+#if defined(INTEL_SSE4_PCLMUL)
gf_internal_t *h;
h = (gf_internal_t *) gf->scratch;
/*Ben: Determining how many reductions to do */
-#if defined(INTEL_SSE4_PCLMUL)
if ((0xfe00 & h->prim_poly) == 0) {
gf->multiply.w32 = gf_w16_clm_multiply_2;
gf->multiply_region.w32 = gf_w16_clm_multiply_region_from_single_2;
@@ -774,9 +771,8 @@ static
void
gf_w16_split_4_16_lazy_nosse_altmap_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
- uint64_t i, j, a, b, c, prod;
+ uint64_t i, j, c, prod;
uint8_t *s8, *d8, *top;
- gf_internal_t *h;
uint16_t table[4][16];
gf_region_data rd;
@@ -786,8 +782,6 @@ gf_w16_split_4_16_lazy_nosse_altmap_multiply_region(gf_t *gf, void *src, void *d
gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 32);
gf_do_initial_region_alignment(&rd);
- h = (gf_internal_t *) gf->scratch;
-
/*Ben: Constructs lazy multiplication table*/
for (j = 0; j < 16; j++) {
@@ -840,7 +834,6 @@ gf_w16_split_4_16_lazy_multiply_region(gf_t *gf, void *src, void *dest, gf_val_3
{
uint64_t i, j, a, c, prod;
uint16_t *s16, *d16, *top;
- gf_internal_t *h;
uint16_t table[4][16];
gf_region_data rd;
@@ -850,8 +843,6 @@ gf_w16_split_4_16_lazy_multiply_region(gf_t *gf, void *src, void *dest, gf_val_3
gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 2);
gf_do_initial_region_alignment(&rd);
- h = (gf_internal_t *) gf->scratch;
-
for (j = 0; j < 16; j++) {
for (i = 0; i < 4; i++) {
c = (j << (i*4));
@@ -880,7 +871,7 @@ static
void
gf_w16_split_8_16_lazy_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
- uint64_t j, k, v, a, c, prod, *s64, *d64, *top64;
+ uint64_t j, k, v, a, prod, *s64, *d64, *top64;
gf_internal_t *h;
uint64_t htable[256], ltable[256];
gf_region_data rd;
@@ -966,7 +957,7 @@ gf_w16_split_8_16_lazy_multiply_region(gf_t *gf, void *src, void *dest, gf_val_3
static void
gf_w16_table_lazy_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
- uint64_t j, a, c, pp;
+ uint64_t c;
gf_internal_t *h;
struct gf_w16_lazytable_data *ltd;
gf_region_data rd;
@@ -1010,12 +1001,12 @@ gf_w16_split_4_16_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_v
{
#ifdef INTEL_SSSE3
uint64_t i, j, *s64, *d64, *top64;;
- uint64_t a, c, prod;
+ uint64_t c, prod;
uint8_t low[4][16];
uint8_t high[4][16];
gf_region_data rd;
- __m128i mask, ta, tb, ti, tpl, tph, tlow[4], thigh[4], tta, ttb, shuffler, unshuffler, lmask;
+ __m128i mask, ta, tb, ti, tpl, tph, tlow[4], thigh[4], tta, ttb, lmask;
if (val == 0) { gf_multby_zero(dest, bytes, xor); return; }
if (val == 1) { gf_multby_one(src, dest, bytes, xor); return; }
@@ -1147,7 +1138,6 @@ gf_w16_split_4_16_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *des
uint8_t low[4][16];
uint8_t high[4][16];
gf_region_data rd;
- struct gf_single_table_data *std;
__m128i mask, ta, tb, ti, tpl, tph, tlow[4], thigh[4];
if (val == 0) { gf_multby_zero(dest, bytes, xor); return; }
@@ -1358,11 +1348,8 @@ issse3 = 0;
static
int gf_w16_table_init(gf_t *gf)
{
- gf_internal_t *h;
gf_w16_log_init(gf);
- h = (gf_internal_t *) gf->scratch;
-
gf->multiply_region.w32 = gf_w16_table_lazy_multiply_region;
return 1;
}
@@ -1557,15 +1544,14 @@ gf_w16_bytwo_p_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_
prod = _mm_xor_si128(prod, t1); \
v = _mm_srli_epi64(v, 1); }
+#ifdef INTEL_SSE2
static
void
gf_w16_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
-#ifdef INTEL_SSE2
int i;
uint8_t *s8, *d8;
uint32_t vrev;
- uint64_t amask;
__m128i pp, m1, m2, ta, prod, t1, t2, tp, one, v;
struct gf_w16_bytwo_data *btd;
gf_region_data rd;
@@ -1618,17 +1604,16 @@ gf_w16_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t
s8 += 16;
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w16_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_w16_bytwo_data *btd)
{
-#ifdef INTEL_SSE2
- int i;
- uint8_t *d8, *s8, tb;
- __m128i pp, m1, m2, t1, t2, va, vb;
+ uint8_t *d8, *s8;
+ __m128i pp, m1, m2, t1, t2, va;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
@@ -1644,16 +1629,15 @@ gf_w16_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_w16_bytwo_data *
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w16_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_w16_bytwo_data *btd)
{
-#ifdef INTEL_SSE2
- int i;
- uint8_t *d8, *s8, tb;
+ uint8_t *d8, *s8;
__m128i pp, m1, m2, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
@@ -1672,15 +1656,15 @@ gf_w16_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_w16_bytwo_data *bt
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w16_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
-#ifdef INTEL_SSE2
int itb;
uint8_t *d8, *s8;
__m128i pp, m1, m2, t1, t2, va, vb;
@@ -1728,14 +1712,13 @@ gf_w16_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
static
void
gf_w16_bytwo_b_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
- int i;
uint64_t *s64, *d64, t1, t2, ta, tb, prod;
struct gf_w16_bytwo_data *btd;
gf_region_data rd;
@@ -1834,6 +1817,7 @@ gf_w16_bytwo_b_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_
s64++;
}
}
+ break;
default:
if (xor) {
while (d64 < (uint64_t *) rd.d_top) {
@@ -1988,7 +1972,6 @@ gf_val_32_t
gf_w16_composite_multiply_inline(gf_t *gf, gf_val_32_t a, gf_val_32_t b)
{
gf_internal_t *h = (gf_internal_t *) gf->scratch;
- gf_t *base_gf = h->base_gf;
uint8_t b0 = b & 0x00ff;
uint8_t b1 = (b & 0xff00) >> 8;
uint8_t a0 = a & 0x00ff;
@@ -2072,7 +2055,6 @@ static
void
gf_w16_composite_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
- unsigned long uls, uld;
gf_internal_t *h = (gf_internal_t *) gf->scratch;
gf_t *base_gf = h->base_gf;
uint8_t b0 = val & 0x00ff;
@@ -2080,7 +2062,6 @@ gf_w16_composite_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t va
uint16_t *s16, *d16, *top;
uint8_t a0, a1, a1b1, *mt;
gf_region_data rd;
- struct gf_w16_logtable_data *ltd;
struct gf_w16_composite_data *cd;
cd = (struct gf_w16_composite_data *) h->private;
@@ -2237,7 +2218,6 @@ inline
gf_val_32_t
gf_w16_group_4_4_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b)
{
- int i;
uint16_t p, l, ind, r, a16;
struct gf_w16_group_4_4_data *d44;
@@ -2270,7 +2250,6 @@ gf_w16_group_4_4_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b)
static
void gf_w16_group_4_4_region_multiply(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
- int i;
uint16_t p, l, ind, r, a16, p16;
struct gf_w16_group_4_4_data *d44;
gf_region_data rd;
@@ -2475,10 +2454,8 @@ int gf_w16_init(gf_t *gf)
uint16_t *gf_w16_get_log_table(gf_t *gf)
{
- gf_internal_t *h;
struct gf_w16_logtable_data *ltd;
- h = (gf_internal_t *) gf->scratch;
if (gf->multiply.w32 == gf_w16_log_multiply) {
ltd = (struct gf_w16_logtable_data *) ((gf_internal_t *) gf->scratch)->private;
return (uint16_t *) ltd->log_tbl;
diff --git a/src/gf_w32.c b/src/gf_w32.c
index 03f285f..c90c7fb 100644
--- a/src/gf_w32.c
+++ b/src/gf_w32.c
@@ -120,13 +120,13 @@ xor)
}
}
+#if defined(INTEL_SSE4_PCLMUL)
+
static
void
gf_w32_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor)
{
-#if defined(INTEL_SSE4_PCLMUL)
-
int i;
uint32_t *s32;
uint32_t *d32;
@@ -167,16 +167,16 @@ gf_w32_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, uint32
d32[i] = ((gf_val_32_t)_mm_extract_epi32(result, 0));
}
}
-#endif
}
+#endif
+
+#if defined(INTEL_SSE4_PCLMUL)
static
void
gf_w32_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor)
{
-#if defined(INTEL_SSE4_PCLMUL)
-
int i;
uint32_t *s32;
uint32_t *d32;
@@ -222,14 +222,14 @@ gf_w32_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, uint32
d32[i] = ((gf_val_32_t)_mm_extract_epi32(result, 0));
}
}
-#endif
}
+#endif
+#if defined(INTEL_SSE4_PCLMUL)
static
void
gf_w32_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor)
{
-#if defined(INTEL_SSE4_PCLMUL)
int i;
uint32_t *s32;
uint32_t *d32;
@@ -279,8 +279,8 @@ gf_w32_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, uint32
d32[i] = ((gf_val_32_t)_mm_extract_epi32(result, 0));
}
}
-#endif
}
+#endif
static
inline
@@ -399,7 +399,94 @@ uint32_t gf_w32_matrix (gf_t *gf, uint32_t b)
extra memory.
*/
+//ADAM
+static
+inline
+gf_val_32_t
+gf_w32_cfmgk_multiply (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
+{
+ gf_val_32_t rv = 0;
+
+#if defined(INTEL_SSE4_PCLMUL)
+
+ __m128i a, b;
+ __m128i result;
+ __m128i w;
+ __m128i g, q;
+ gf_internal_t * h = gf->scratch;
+ uint64_t g_star, q_plus;
+
+ q_plus = *(uint64_t *) h->private;
+ g_star = *((uint64_t *) h->private + 1);
+
+ a = _mm_insert_epi32 (_mm_setzero_si128(), a32, 0);
+ b = _mm_insert_epi32 (a, b32, 0);
+ g = _mm_insert_epi64 (a, g_star, 0);
+ q = _mm_insert_epi64 (a, q_plus, 0);
+
+ result = _mm_clmulepi64_si128 (a, b, 0);
+ w = _mm_clmulepi64_si128 (q, _mm_srli_si128 (result, 4), 0);
+ w = _mm_clmulepi64_si128 (g, _mm_srli_si128 (w, 4), 0);
+ result = _mm_xor_si128 (result, w);
+
+ /* Extracts 32 bit value from result. */
+ rv = ((gf_val_32_t)_mm_extract_epi32(result, 0));
+#endif
+ return rv;
+}
+
+//ADAM
+#if defined(INTEL_SSE4_PCLMUL)
+static
+void
+gf_w32_cfmgk_multiply_region_from_single(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor)
+{
+
+ int i;
+ uint32_t *s32;
+ uint32_t *d32;
+
+ __m128i a, b;
+ __m128i result;
+ __m128i w;
+ __m128i g, q;
+ gf_internal_t * h = gf->scratch;
+ uint64_t g_star, q_plus;
+
+ if (val == 0) { gf_multby_zero(dest, bytes, xor); return; }
+ if (val == 1) { gf_multby_one(src, dest, bytes, xor); return; }
+
+ q_plus = *(uint64_t *) h->private;
+ g_star = *((uint64_t *) h->private + 1);
+
+ g = _mm_insert_epi64 (a, g_star, 0);
+ q = _mm_insert_epi64 (a, q_plus, 0);
+ a = _mm_insert_epi32 (_mm_setzero_si128(), val, 0);
+ s32 = (uint32_t *) src;
+ d32 = (uint32_t *) dest;
+
+ if (xor) {
+ for (i = 0; i < bytes/sizeof(uint32_t); i++) {
+ b = _mm_insert_epi32 (a, s32[i], 0);
+ result = _mm_clmulepi64_si128 (a, b, 0);
+ w = _mm_clmulepi64_si128 (q, _mm_srli_si128 (result, 4), 0);
+ w = _mm_clmulepi64_si128 (g, _mm_srli_si128 (w, 4), 0);
+ result = _mm_xor_si128 (result, w);
+ d32[i] ^= ((gf_val_32_t)_mm_extract_epi32(result, 0));
+ }
+ } else {
+ for (i = 0; i < bytes/sizeof(uint32_t); i++) {
+ b = _mm_insert_epi32 (a, s32[i], 0);
+ result = _mm_clmulepi64_si128 (a, b, 0);
+ w = _mm_clmulepi64_si128 (q, _mm_srli_si128 (result, 4), 0);
+ w = _mm_clmulepi64_si128 (g, _mm_srli_si128 (w, 4), 0);
+ result = _mm_xor_si128 (result, w);
+ d32[i] = ((gf_val_32_t)_mm_extract_epi32(result, 0));
+ }
+ }
+}
+#endif
static
@@ -414,7 +501,7 @@ gf_w32_clm_multiply_2 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
__m128i a, b;
__m128i result;
__m128i prim_poly;
- __m128i v, w;
+ __m128i w;
gf_internal_t * h = gf->scratch;
@@ -446,6 +533,7 @@ gf_w32_clm_multiply_2 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
#endif
return rv;
}
+
static
inline
gf_val_32_t
@@ -458,7 +546,7 @@ gf_w32_clm_multiply_3 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
__m128i a, b;
__m128i result;
__m128i prim_poly;
- __m128i v, w;
+ __m128i w;
gf_internal_t * h = gf->scratch;
@@ -497,7 +585,7 @@ gf_w32_clm_multiply_4 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
__m128i a, b;
__m128i result;
__m128i prim_poly;
- __m128i v, w;
+ __m128i w;
gf_internal_t * h = gf->scratch;
@@ -552,13 +640,48 @@ gf_w32_shift_multiply (gf_t *gf, uint32_t a32, uint32_t b32)
return product;
}
+//ADAM
static
-int gf_w32_cfm_init(gf_t *gf)
+int gf_w32_cfmgk_init(gf_t *gf)
{
+ gf->inverse.w32 = gf_w32_euclid;
+ gf->multiply_region.w32 = gf_w32_multiply_region_from_single;
+
+#if defined(INTEL_SSE4_PCLMUL)
gf_internal_t *h;
h = (gf_internal_t *) gf->scratch;
+ gf->multiply.w32 = gf_w32_cfmgk_multiply;
+ gf->multiply_region.w32 = gf_w32_cfmgk_multiply_region_from_single;
+
+ //setup in the private section the q+ and g* ADAM
+ uint64_t *q_plus = (uint64_t *) h->private;
+ uint64_t *g_star = (uint64_t *) h->private + 1;
+ //q+
+ uint64_t tmp = h->prim_poly << 32;
+ *q_plus = 1ULL << 32;
+
+ int i;
+ for(i = 63; i >= 32; i--)
+ if((1ULL << i) & tmp)
+ {
+ *q_plus |= 1ULL << (i-32);
+ tmp ^= h->prim_poly << (i-32);
+ }
+
+ //g*
+ *g_star = h->prim_poly & ((1ULL << 32) - 1);
+
+ return 1;
+#endif
+
+ return 0;
+}
+
+ static
+int gf_w32_cfm_init(gf_t *gf)
+{
gf->inverse.w32 = gf_w32_euclid;
gf->multiply_region.w32 = gf_w32_multiply_region_from_single;
@@ -566,6 +689,10 @@ int gf_w32_cfm_init(gf_t *gf)
/*Ben: Check to see how many reduction steps it will take*/
#if defined(INTEL_SSE4_PCLMUL)
+ gf_internal_t *h;
+
+ h = (gf_internal_t *) gf->scratch;
+
if ((0xfffe0000 & h->prim_poly) == 0){
gf->multiply.w32 = gf_w32_clm_multiply_2;
gf->multiply_region.w32 = gf_w32_clm_multiply_region_from_single_2;
@@ -616,9 +743,8 @@ gf_w32_group_set_shift_tables(uint32_t *shift, uint32_t val, gf_internal_t *h)
static
void gf_w32_group_s_equals_r_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
- int i;
int leftover, rs;
- uint32_t p, l, ind, r, a32;
+ uint32_t p, l, ind, a32;
int bits_left;
int g_s;
gf_region_data rd;
@@ -741,9 +867,8 @@ inline
gf_val_32_t
gf_w32_group_s_equals_r_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b)
{
- int i;
int leftover, rs;
- uint32_t p, l, ind, r, a32;
+ uint32_t p, l, ind, a32;
int bits_left;
int g_s;
@@ -781,8 +906,7 @@ inline
gf_val_32_t
gf_w32_group_4_4_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b)
{
- int i;
- uint32_t p, l, ind, r, a32;
+ uint32_t p, l, ind, a32;
struct gf_w32_group_data *d44;
gf_internal_t *h = (gf_internal_t *) gf->scratch;
@@ -832,7 +956,7 @@ gf_w32_group_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b)
{
int i;
int leftover;
- uint64_t p, l, r, mask;
+ uint64_t p, l, r;
uint32_t a32, ind;
int g_s, g_r;
struct gf_w32_group_data *gd;
@@ -986,15 +1110,14 @@ gf_w32_bytwo_p_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_
prod = _mm_xor_si128(prod, t1); \
v = _mm_srli_epi64(v, 1); }
+#ifdef INTEL_SSE2
static
void
gf_w32_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
-#ifdef INTEL_SSE2
int i;
uint8_t *s8, *d8;
uint32_t vrev;
- uint64_t amask;
__m128i pp, m1, m2, ta, prod, t1, t2, tp, one, v;
struct gf_w32_bytwo_data *btd;
gf_region_data rd;
@@ -1039,14 +1162,13 @@ gf_w32_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t
s8 += 16;
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
static
void
gf_w32_bytwo_b_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
- int i;
uint64_t *s64, *d64, t1, t2, ta, tb, prod;
struct gf_w32_bytwo_data *btd;
gf_region_data rd;
@@ -1101,6 +1223,7 @@ gf_w32_bytwo_b_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_
s64++;
}
}
+ break;
case 4:
if (xor) {
while (d64 < (uint64_t *) rd.d_top) {
@@ -1144,6 +1267,7 @@ gf_w32_bytwo_b_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_
s64++;
}
}
+ break;
default:
if (xor) {
while (d64 < (uint64_t *) rd.d_top) {
@@ -1181,14 +1305,13 @@ gf_w32_bytwo_b_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_
gf_do_final_region_alignment(&rd);
}
+#ifdef INTEL_SSE2
static
void
gf_w32_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_w32_bytwo_data *btd)
{
-#ifdef INTEL_SSE2
- int i;
- uint8_t *d8, *s8, tb;
- __m128i pp, m1, m2, t1, t2, va, vb;
+ uint8_t *d8, *s8;
+ __m128i pp, m1, m2, t1, t2, va;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
@@ -1204,16 +1327,15 @@ gf_w32_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_w32_bytwo_data *
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w32_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_w32_bytwo_data *btd)
{
-#ifdef INTEL_SSE2
- int i;
- uint8_t *d8, *s8, tb;
+ uint8_t *d8, *s8;
__m128i pp, m1, m2, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
@@ -1232,15 +1354,15 @@ gf_w32_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_w32_bytwo_data *bt
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w32_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
-#ifdef INTEL_SSE2
uint32_t itb;
uint8_t *d8, *s8;
__m128i pp, m1, m2, t1, t2, va, vb;
@@ -1288,8 +1410,8 @@ gf_w32_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
static
int gf_w32_bytwo_init(gf_t *gf)
@@ -1556,14 +1678,14 @@ gf_w32_split_2_32_lazy_multiply_region(gf_t *gf, void *src, void *dest, uint32_t
gf_do_final_region_alignment(&rd);
}
+#ifdef INTEL_SSSE3
static
void
gf_w32_split_2_32_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor)
{
-#ifdef INTEL_SSSE3
gf_internal_t *h;
- int i, m, j, tindex;
- uint32_t pp, v, v2, s, *s32, *d32, *top;
+ int i, tindex;
+ uint32_t pp, v, v2, *s32, *d32, *top;
__m128i vi, si, pi, shuffler, tables[16], adder, xi, mask1, mask2;
gf_region_data rd;
@@ -1635,8 +1757,8 @@ gf_w32_split_2_32_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, uint
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
static
void
@@ -1699,8 +1821,8 @@ gf_w32_split_4_32_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *des
{
#ifdef INTEL_SSSE3
gf_internal_t *h;
- int i, m, j, k, tindex;
- uint32_t pp, v, s, *s32, *d32, *top, *realtop;
+ int i, j, k;
+ uint32_t pp, v, *s32, *d32, *top;
__m128i si, tables[8][4], p0, p1, p2, p3, mask1, v0, v1, v2, v3;
struct gf_split_4_32_lazy_data *ld;
uint8_t btable[16];
@@ -1891,9 +2013,9 @@ gf_w32_split_4_32_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, uint
{
#ifdef INTEL_SSSE3
gf_internal_t *h;
- int i, m, j, k, tindex;
- uint32_t pp, v, s, *s32, *d32, *top, tmp_table[16];
- __m128i vi, si, tables[8][4], p0, p1, p2, p3, mask1, v0, v1, v2, v3, mask8;
+ int i, j, k;
+ uint32_t pp, v, *s32, *d32, *top, tmp_table[16];
+ __m128i si, tables[8][4], p0, p1, p2, p3, mask1, v0, v1, v2, v3, mask8;
__m128i tv1, tv2, tv3, tv0;
uint8_t btable[16];
gf_region_data rd;
@@ -2378,7 +2500,6 @@ uint32_t
gf_w32_composite_multiply_inline(gf_t *gf, uint32_t a, uint32_t b)
{
gf_internal_t *h = (gf_internal_t *) gf->scratch;
- gf_t *base_gf = h->base_gf;
uint32_t b0 = b & 0x0000ffff;
uint32_t b1 = b >> 16;
uint32_t a0 = a & 0x0000ffff;
@@ -2620,11 +2741,8 @@ int gf_w32_composite_init(gf_t *gf)
int gf_w32_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2)
{
- int ss;
int issse3 = 0;
- ss = (GF_REGION_SSE | GF_REGION_NOSSE);
-
#ifdef INTEL_SSSE3
issse3 = 1;
#endif
@@ -2665,6 +2783,10 @@ int gf_w32_scratch_size(int mult_type, int region_type, int divide_type, int arg
case GF_MULT_CARRY_FREE:
return sizeof(gf_internal_t);
break;
+ //ADAM
+ case GF_MULT_CARRY_FREE_GK:
+ return sizeof(gf_internal_t) + sizeof(uint64_t)*2;
+ break;
case GF_MULT_SHIFT:
return sizeof(gf_internal_t);
break;
@@ -2712,14 +2834,15 @@ int gf_w32_init(gf_t *gf)
gf->multiply_region.w32 = NULL;
switch(h->mult_type) {
- case GF_MULT_CARRY_FREE: if (gf_w32_cfm_init(gf) == 0) return 0; break;
- case GF_MULT_SHIFT: if (gf_w32_shift_init(gf) == 0) return 0; break;
- case GF_MULT_COMPOSITE: if (gf_w32_composite_init(gf) == 0) return 0; break;
+ case GF_MULT_CARRY_FREE: if (gf_w32_cfm_init(gf) == 0) return 0; break;
+ case GF_MULT_CARRY_FREE_GK: if (gf_w32_cfmgk_init(gf) == 0) return 0; break; //ADAM
+ case GF_MULT_SHIFT: if (gf_w32_shift_init(gf) == 0) return 0; break;
+ case GF_MULT_COMPOSITE: if (gf_w32_composite_init(gf) == 0) return 0; break;
case GF_MULT_DEFAULT:
- case GF_MULT_SPLIT_TABLE: if (gf_w32_split_init(gf) == 0) return 0; break;
- case GF_MULT_GROUP: if (gf_w32_group_init(gf) == 0) return 0; break;
+ case GF_MULT_SPLIT_TABLE: if (gf_w32_split_init(gf) == 0) return 0; break;
+ case GF_MULT_GROUP: if (gf_w32_group_init(gf) == 0) return 0; break;
case GF_MULT_BYTWO_p:
- case GF_MULT_BYTWO_b: if (gf_w32_bytwo_init(gf) == 0) return 0; break;
+ case GF_MULT_BYTWO_b: if (gf_w32_bytwo_init(gf) == 0) return 0; break;
default: return 0;
}
if (h->divide_type == GF_DIVIDE_EUCLID) {
diff --git a/src/gf_w4.c b/src/gf_w4.c
index 2504ec6..6bc79d0 100644
--- a/src/gf_w4.c
+++ b/src/gf_w4.c
@@ -61,7 +61,8 @@ struct gf_bytwo_data {
t2 = ((t2 << 1) - (t2 >> (GF_FIELD_WIDTH-1))); \
b = (t1 ^ (t2 & ip));}
-#define SSE_AB2(pp, m1 ,m2, va, t1, t2) {\
+// ToDo(KMG/JSP): Why is 0x88 hard-coded?
+#define SSE_AB2(pp, m1, va, t1, t2) {\
t1 = _mm_and_si128(_mm_slli_epi64(va, 1), m1); \
t2 = _mm_and_si128(va, _mm_set1_epi8(0x88)); \
t2 = _mm_sub_epi64 (_mm_slli_epi64(t2, 1), _mm_srli_epi64(t2, (GF_FIELD_WIDTH-1))); \
@@ -414,14 +415,14 @@ gf_w4_single_table_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t
#define MM_PRINT(s, r) { uint8_t blah[16]; printf("%-12s", s); _mm_storeu_si128((__m128i *)blah, r); for (i = 0; i < 16; i++) printf(" %02x", blah[i]); printf("\n"); }
+#ifdef INTEL_SSSE3
static
void
gf_w4_single_table_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
-#ifdef INTEL_SSSE3
gf_region_data rd;
uint8_t *base, *sptr, *dptr, *top;
- __m128i tl, loset, h4, r, va, th;
+ __m128i tl, loset, r, va, th;
struct gf_single_table_data *std;
@@ -460,15 +461,15 @@ gf_w4_single_table_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_3
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
static
int gf_w4_single_table_init(gf_t *gf)
{
gf_internal_t *h;
struct gf_single_table_data *std;
- int a, b, prod, loga, logb;
+ int a, b, prod;
h = (gf_internal_t *) gf->scratch;
@@ -531,7 +532,6 @@ static
void
gf_w4_double_table_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
- unsigned long uls, uld;
int i;
uint8_t *s8, *d8, *base;
gf_region_data rd;
@@ -560,7 +560,7 @@ int gf_w4_double_table_init(gf_t *gf)
{
gf_internal_t *h;
struct gf_double_table_data *std;
- int a, b, c, prod, loga, logb, ab;
+ int a, b, c, prod, ab;
uint8_t mult[GF_FIELD_SIZE][GF_FIELD_SIZE];
h = (gf_internal_t *) gf->scratch;
@@ -687,7 +687,7 @@ int gf_w4_quad_table_init(gf_t *gf)
{
gf_internal_t *h;
struct gf_quad_table_data *std;
- int prod, loga, logb, ab, val, a, b, c, d, va, vb, vc, vd;
+ int prod, val, a, b, c, d, va, vb, vc, vd;
uint8_t mult[GF_FIELD_SIZE][GF_FIELD_SIZE];
h = (gf_internal_t *) gf->scratch;
@@ -731,10 +731,9 @@ int gf_w4_quad_table_lazy_init(gf_t *gf)
{
gf_internal_t *h;
struct gf_quad_table_lazy_data *std;
- int a, b, c, prod, loga, logb, ab;
+ int a, b, prod, loga, logb;
uint8_t log_tbl[GF_FIELD_SIZE];
uint8_t antilog_tbl[GF_FIELD_SIZE*2];
- uint8_t mult[GF_FIELD_SIZE][GF_FIELD_SIZE];
h = (gf_internal_t *) gf->scratch;
std = (struct gf_quad_table_lazy_data *)h->private;
@@ -794,7 +793,6 @@ int gf_w4_table_init(gf_t *gf)
} else {
return gf_w4_quad_table_init(gf);
}
- return gf_w4_double_table_init(gf);
} else {
return gf_w4_single_table_init(gf);
}
@@ -911,23 +909,22 @@ gf_w4_bytwo_p_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t
}
#define BYTWO_P_ONESTEP {\
- SSE_AB2(pp, m1 ,m2, prod, t1, t2); \
+ SSE_AB2(pp, m1, prod, t1, t2); \
t1 = _mm_and_si128(v, one); \
t1 = _mm_sub_epi8(t1, one); \
t1 = _mm_and_si128(t1, ta); \
prod = _mm_xor_si128(prod, t1); \
v = _mm_srli_epi64(v, 1); }
+#ifdef INTEL_SSE2
static
void
gf_w4_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
-#ifdef INTEL_SSE2
int i;
uint8_t *s8, *d8;
uint8_t vrev;
- uint64_t amask;
- __m128i pp, m1, m2, ta, prod, t1, t2, tp, one, v;
+ __m128i pp, m1, ta, prod, t1, t2, tp, one, v;
struct gf_bytwo_data *btd;
gf_region_data rd;
@@ -950,7 +947,6 @@ gf_w4_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t v
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
- m2 = _mm_set1_epi8((btd->mask2)&0xff);
one = _mm_set1_epi8(1);
while (d8 < (uint8_t *) rd.d_top) {
@@ -967,8 +963,8 @@ gf_w4_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t v
s8 += 16;
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
/*
static
@@ -1036,354 +1032,330 @@ gf_w4_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t v
}
*/
+#ifdef INTEL_SSE2
static
void
gf_w4_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
-#ifdef INTEL_SSE2
- int i;
- uint8_t *d8, *s8, tb;
- __m128i pp, m1, m2, t1, t2, va, vb;
+ uint8_t *d8, *s8;
+ __m128i pp, m1, t1, t2, va;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
- m2 = _mm_set1_epi8((btd->mask2)&0xff);
while (d8 < (uint8_t *) rd->d_top) {
va = _mm_load_si128 ((__m128i *)(s8));
- SSE_AB2(pp, m1, m2, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
_mm_store_si128((__m128i *)d8, va);
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w4_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
-#ifdef INTEL_SSE2
- int i;
- uint8_t *d8, *s8, tb;
- __m128i pp, m1, m2, t1, t2, va, vb;
+ uint8_t *d8, *s8;
+ __m128i pp, m1, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
- m2 = _mm_set1_epi8((btd->mask2)&0xff);
while (d8 < (uint8_t *) rd->d_top) {
va = _mm_load_si128 ((__m128i *)(s8));
- SSE_AB2(pp, m1, m2, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
vb = _mm_load_si128 ((__m128i *)(d8));
vb = _mm_xor_si128(vb, va);
_mm_store_si128((__m128i *)d8, vb);
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w4_bytwo_b_sse_region_4_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
-#ifdef INTEL_SSE2
- int i;
- uint8_t *d8, *s8, tb;
- __m128i pp, m1, m2, t1, t2, va, vb;
+ uint8_t *d8, *s8;
+ __m128i pp, m1, t1, t2, va;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
- m2 = _mm_set1_epi8((btd->mask2)&0xff);
while (d8 < (uint8_t *) rd->d_top) {
va = _mm_load_si128 ((__m128i *)(s8));
- SSE_AB2(pp, m1, m2, va, t1, t2);
- SSE_AB2(pp, m1, m2, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
_mm_store_si128((__m128i *)d8, va);
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w4_bytwo_b_sse_region_4_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
-#ifdef INTEL_SSE2
- int i;
- uint8_t *d8, *s8, tb;
- __m128i pp, m1, m2, t1, t2, va, vb;
+ uint8_t *d8, *s8;
+ __m128i pp, m1, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
- m2 = _mm_set1_epi8((btd->mask2)&0xff);
while (d8 < (uint8_t *) rd->d_top) {
va = _mm_load_si128 ((__m128i *)(s8));
- SSE_AB2(pp, m1, m2, va, t1, t2);
- SSE_AB2(pp, m1, m2, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
vb = _mm_load_si128 ((__m128i *)(d8));
vb = _mm_xor_si128(vb, va);
_mm_store_si128((__m128i *)d8, vb);
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w4_bytwo_b_sse_region_3_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
-#ifdef INTEL_SSE2
- int i;
- uint8_t *d8, *s8, tb;
- __m128i pp, m1, m2, t1, t2, va, vb;
+ uint8_t *d8, *s8;
+ __m128i pp, m1, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
- m2 = _mm_set1_epi8((btd->mask2)&0xff);
while (d8 < (uint8_t *) rd->d_top) {
va = _mm_load_si128 ((__m128i *)(s8));
vb = va;
- SSE_AB2(pp, m1, m2, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
va = _mm_xor_si128(va, vb);
_mm_store_si128((__m128i *)d8, va);
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w4_bytwo_b_sse_region_3_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
-#ifdef INTEL_SSE2
- int i;
- uint8_t *d8, *s8, tb;
- __m128i pp, m1, m2, t1, t2, va, vb;
+ uint8_t *d8, *s8;
+ __m128i pp, m1, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
- m2 = _mm_set1_epi8((btd->mask2)&0xff);
while (d8 < (uint8_t *) rd->d_top) {
va = _mm_load_si128 ((__m128i *)(s8));
vb = _mm_xor_si128(_mm_load_si128 ((__m128i *)(d8)), va);
- SSE_AB2(pp, m1, m2, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
vb = _mm_xor_si128(vb, va);
_mm_store_si128((__m128i *)d8, vb);
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w4_bytwo_b_sse_region_5_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
-#ifdef INTEL_SSE2
- int i;
- uint8_t *d8, *s8, tb;
- __m128i pp, m1, m2, t1, t2, va, vb;
+ uint8_t *d8, *s8;
+ __m128i pp, m1, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
- m2 = _mm_set1_epi8((btd->mask2)&0xff);
while (d8 < (uint8_t *) rd->d_top) {
va = _mm_load_si128 ((__m128i *)(s8));
vb = va;
- SSE_AB2(pp, m1, m2, va, t1, t2);
- SSE_AB2(pp, m1, m2, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
va = _mm_xor_si128(va, vb);
_mm_store_si128((__m128i *)d8, va);
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w4_bytwo_b_sse_region_5_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
-#ifdef INTEL_SSE2
- int i;
- uint8_t *d8, *s8, tb;
- __m128i pp, m1, m2, t1, t2, va, vb;
+ uint8_t *d8, *s8;
+ __m128i pp, m1, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
- m2 = _mm_set1_epi8((btd->mask2)&0xff);
while (d8 < (uint8_t *) rd->d_top) {
va = _mm_load_si128 ((__m128i *)(s8));
vb = _mm_xor_si128(_mm_load_si128 ((__m128i *)(d8)), va);
- SSE_AB2(pp, m1, m2, va, t1, t2);
- SSE_AB2(pp, m1, m2, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
vb = _mm_xor_si128(vb, va);
_mm_store_si128((__m128i *)d8, vb);
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w4_bytwo_b_sse_region_7_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
-#ifdef INTEL_SSE2
- int i;
- uint8_t *d8, *s8, tb;
- __m128i pp, m1, m2, t1, t2, va, vb;
+ uint8_t *d8, *s8;
+ __m128i pp, m1, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
- m2 = _mm_set1_epi8((btd->mask2)&0xff);
while (d8 < (uint8_t *) rd->d_top) {
va = _mm_load_si128 ((__m128i *)(s8));
vb = va;
- SSE_AB2(pp, m1, m2, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
vb = _mm_xor_si128(va, vb);
- SSE_AB2(pp, m1, m2, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
va = _mm_xor_si128(va, vb);
_mm_store_si128((__m128i *)d8, va);
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w4_bytwo_b_sse_region_7_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
-#ifdef INTEL_SSE2
- int i;
- uint8_t *d8, *s8, tb;
- __m128i pp, m1, m2, t1, t2, va, vb;
+ uint8_t *d8, *s8;
+ __m128i pp, m1, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
- m2 = _mm_set1_epi8((btd->mask2)&0xff);
while (d8 < (uint8_t *) rd->d_top) {
va = _mm_load_si128 ((__m128i *)(s8));
vb = _mm_xor_si128(_mm_load_si128 ((__m128i *)(d8)), va);
- SSE_AB2(pp, m1, m2, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
vb = _mm_xor_si128(vb, va);
- SSE_AB2(pp, m1, m2, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
vb = _mm_xor_si128(vb, va);
_mm_store_si128((__m128i *)d8, vb);
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w4_bytwo_b_sse_region_6_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
-#ifdef INTEL_SSE2
- int i;
- uint8_t *d8, *s8, tb;
- __m128i pp, m1, m2, t1, t2, va, vb;
+ uint8_t *d8, *s8;
+ __m128i pp, m1, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
- m2 = _mm_set1_epi8((btd->mask2)&0xff);
while (d8 < (uint8_t *) rd->d_top) {
va = _mm_load_si128 ((__m128i *)(s8));
- SSE_AB2(pp, m1, m2, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
vb = va;
- SSE_AB2(pp, m1, m2, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
va = _mm_xor_si128(va, vb);
_mm_store_si128((__m128i *)d8, va);
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w4_bytwo_b_sse_region_6_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
-#ifdef INTEL_SSE2
- int i;
- uint8_t *d8, *s8, tb;
- __m128i pp, m1, m2, t1, t2, va, vb;
+ uint8_t *d8, *s8;
+ __m128i pp, m1, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
pp = _mm_set1_epi8(btd->prim_poly&0xff);
m1 = _mm_set1_epi8((btd->mask1)&0xff);
- m2 = _mm_set1_epi8((btd->mask2)&0xff);
while (d8 < (uint8_t *) rd->d_top) {
va = _mm_load_si128 ((__m128i *)(s8));
- SSE_AB2(pp, m1, m2, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
vb = _mm_xor_si128(_mm_load_si128 ((__m128i *)(d8)), va);
- SSE_AB2(pp, m1, m2, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
vb = _mm_xor_si128(vb, va);
_mm_store_si128((__m128i *)d8, vb);
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w4_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
-#ifdef INTEL_SSE2
uint8_t *d8, *s8, tb;
__m128i pp, m1, m2, t1, t2, va, vb;
struct gf_bytwo_data *btd;
@@ -1464,7 +1436,7 @@ gf_w4_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t v
if (tb & 1) vb = _mm_xor_si128(vb, va);
tb >>= 1;
if (tb == 0) break;
- SSE_AB2(pp, m1, m2, va, t1, t2);
+ SSE_AB2(pp, m1, va, t1, t2);
}
_mm_store_si128((__m128i *)d8, vb);
d8 += 16;
@@ -1491,16 +1463,13 @@ gf_w4_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t v
}
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
static
void
gf_w4_bytwo_b_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
- unsigned long uls, uld;
- int i;
- uint8_t *s8, *d8, *top;
uint64_t *s64, *d64, t1, t2, ta, tb, prod;
struct gf_bytwo_data *btd;
gf_region_data rd;
@@ -1614,6 +1583,7 @@ gf_w4_bytwo_b_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t
s64++;
}
}
+ break;
case 6:
if (xor) {
while (d64 < (uint64_t *) rd.d_top) {
@@ -1636,6 +1606,7 @@ gf_w4_bytwo_b_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t
s64++;
}
}
+ break;
case 7:
if (xor) {
while (d64 < (uint64_t *) rd.d_top) {
@@ -1963,10 +1934,6 @@ int gf_w4_bytwo_init(gf_t *gf)
static
int gf_w4_cfm_init(gf_t *gf)
{
- gf_internal_t *h;
-
- h = (gf_internal_t *) gf->scratch;
-
#if defined(INTEL_SSE4_PCLMUL)
gf->multiply.w32 = gf_w4_clm_multiply;
return 1;
@@ -1986,8 +1953,6 @@ int gf_w4_shift_init(gf_t *gf)
int gf_w4_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2)
{
- int region_tbl_size;
- int ss;
int issse3 = 0;
#ifdef INTEL_SSSE3
diff --git a/src/gf_w64.c b/src/gf_w64.c
index 73bf164..f04daf0 100644
--- a/src/gf_w64.c
+++ b/src/gf_w64.c
@@ -87,20 +87,19 @@ xor)
}
}
+#if defined(INTEL_SSE4_PCLMUL)
static
void
gf_w64_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, gf_val_64_t val, int bytes, int
xor)
{
- int i, size;
gf_val_64_t *s64, *d64, *top;
gf_region_data rd;
-#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result, r1;
__m128i prim_poly;
- __m128i v, w;
+ __m128i w;
__m128i m1, m2, m3, m4;
gf_internal_t * h = gf->scratch;
@@ -121,7 +120,6 @@ xor)
s64 = (gf_val_64_t *) rd.s_start;
d64 = (gf_val_64_t *) rd.d_start;
top = (gf_val_64_t *) rd.d_top;
- size = bytes/sizeof(gf_val_64_t);
if (xor) {
while (d64 != top) {
@@ -175,19 +173,18 @@ xor)
}
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
+#if defined(INTEL_SSE4_PCLMUL)
static
void
gf_w64_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, gf_val_64_t val, int bytes, int
xor)
{
- int i, size;
gf_val_64_t *s64, *d64, *top;
gf_region_data rd;
-#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result, r1;
__m128i prim_poly;
@@ -210,7 +207,6 @@ xor)
s64 = (gf_val_64_t *) rd.s_start;
d64 = (gf_val_64_t *) rd.d_start;
top = (gf_val_64_t *) rd.d_top;
- size = bytes/sizeof(gf_val_64_t);
if (xor) {
while (d64 != top) {
@@ -263,8 +259,8 @@ xor)
}
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
static
inline
@@ -321,7 +317,7 @@ inline
gf_val_64_t
gf_w64_shift_multiply (gf_t *gf, gf_val_64_t a64, gf_val_64_t b64)
{
- uint64_t pl, pr, ppl, ppr, i, pp, a, bl, br, one, lbit;
+ uint64_t pl, pr, ppl, ppr, i, a, bl, br, one, lbit;
gf_internal_t *h;
h = (gf_internal_t *) gf->scratch;
@@ -468,9 +464,7 @@ gf_w64_clm_multiply_region(gf_t *gf, void *src, void *dest, uint64_t val, int by
{
#if defined(INTEL_SSE4_PCLMUL)
gf_internal_t *h;
- int i, j, k;
uint8_t *s8, *d8, *dtop;
- uint64_t *s64, *d64;
gf_region_data rd;
__m128i v, b, m, prim_poly, c, fr, w, result;
@@ -492,7 +486,6 @@ gf_w64_clm_multiply_region(gf_t *gf, void *src, void *dest, uint64_t val, int by
if (xor) {
while (d8 != dtop) {
- s64 = (uint64_t *) s8;
b = _mm_load_si128((__m128i *) s8);
result = _mm_clmulepi64_si128 (b, v, 0);
c = _mm_insert_epi32 (_mm_srli_si128 (result, 8), 0, 0);
@@ -521,7 +514,6 @@ gf_w64_clm_multiply_region(gf_t *gf, void *src, void *dest, uint64_t val, int by
}
} else {
while (d8 < dtop) {
- s64 = (uint64_t *) s8;
b = _mm_load_si128((__m128i *) s8);
result = _mm_clmulepi64_si128 (b, v, 0);
c = _mm_insert_epi32 (_mm_srli_si128 (result, 8), 0, 0);
@@ -741,8 +733,6 @@ gf_w64_split_16_64_lazy_multiply_region(gf_t *gf, void *src, void *dest, uint64_
static
int gf_w64_shift_init(gf_t *gf)
{
- gf_internal_t *h;
-
gf->multiply.w64 = gf_w64_shift_multiply;
gf->inverse.w64 = gf_w64_euclid;
gf->multiply_region.w64 = gf_w64_multiply_region_from_single;
@@ -752,14 +742,14 @@ int gf_w64_shift_init(gf_t *gf)
static
int gf_w64_cfm_init(gf_t *gf)
{
- gf_internal_t *h;
-
- h = (gf_internal_t *) gf->scratch;
-
gf->inverse.w64 = gf_w64_euclid;
gf->multiply_region.w64 = gf_w64_multiply_region_from_single;
#if defined(INTEL_SSE4_PCLMUL)
+ gf_internal_t *h;
+
+ h = (gf_internal_t *) gf->scratch;
+
if ((0xfffffffe00000000ULL & h->prim_poly) == 0){
gf->multiply.w64 = gf_w64_clm_multiply_2;
gf->multiply_region.w64 = gf_w64_clm_multiply_region_from_single_2;
@@ -803,7 +793,6 @@ inline
gf_val_64_t
gf_w64_group_multiply(gf_t *gf, gf_val_64_t a, gf_val_64_t b)
{
- int i;
uint64_t top, bot, mask, tp;
int g_s, g_r, lshift, rshift;
struct gf_w64_group_data *gd;
@@ -854,7 +843,7 @@ static
void gf_w64_group_multiply_region(gf_t *gf, void *src, void *dest, gf_val_64_t val, int bytes, int xor)
{
int i, fzb;
- uint64_t a64, smask, rmask, top, bot, tp, one;
+ uint64_t a64, smask, rmask, top, bot, tp;
int lshift, rshift, g_s, g_r;
gf_region_data rd;
uint64_t *s64, *d64, *dtop;
@@ -936,9 +925,8 @@ inline
gf_val_64_t
gf_w64_group_s_equals_r_multiply(gf_t *gf, gf_val_64_t a, gf_val_64_t b)
{
- int i;
int leftover, rs;
- uint64_t p, l, ind, r, a64;
+ uint64_t p, l, ind, a64;
int bits_left;
int g_s;
@@ -974,9 +962,8 @@ gf_w64_group_s_equals_r_multiply(gf_t *gf, gf_val_64_t a, gf_val_64_t b)
static
void gf_w64_group_s_equals_r_multiply_region(gf_t *gf, void *src, void *dest, gf_val_64_t val, int bytes, int xor)
{
- int i;
int leftover, rs;
- uint64_t p, l, ind, r, a64;
+ uint64_t p, l, ind, a64;
int bits_left;
int g_s;
gf_region_data rd;
@@ -1189,7 +1176,7 @@ static
void
gf_w64_bytwo_p_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_64_t val, int bytes, int xor)
{
- uint64_t *s64, *d64, t1, t2, ta, prod, amask, pmask, pp;
+ uint64_t *s64, *d64, ta, prod, amask, pmask, pp;
gf_region_data rd;
gf_internal_t *h;
@@ -1243,7 +1230,7 @@ static
void
gf_w64_bytwo_b_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_64_t val, int bytes, int xor)
{
- uint64_t *s64, *d64, t1, t2, ta, tb, prod, amask, bmask, pp;
+ uint64_t *s64, *d64, ta, tb, prod, bmask, pp;
gf_region_data rd;
gf_internal_t *h;
@@ -1374,14 +1361,13 @@ void gf_w64_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_
#endif
}
+#ifdef INTEL_SSE2
static
void
gf_w64_bytwo_b_sse_region_2_xor(gf_region_data *rd)
{
-#ifdef INTEL_SSE2
- int i;
uint64_t one64, amask;
- uint8_t *d8, *s8, tb;
+ uint8_t *d8, *s8;
__m128i pp, m1, m2, t1, t2, va, vb;
gf_internal_t *h;
@@ -1405,17 +1391,16 @@ gf_w64_bytwo_b_sse_region_2_xor(gf_region_data *rd)
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w64_bytwo_b_sse_region_2_noxor(gf_region_data *rd)
{
-#ifdef INTEL_SSE2
- int i;
uint64_t one64, amask;
- uint8_t *d8, *s8, tb;
+ uint8_t *d8, *s8;
__m128i pp, m1, m2, t1, t2, va;
gf_internal_t *h;
@@ -1437,18 +1422,17 @@ gf_w64_bytwo_b_sse_region_2_noxor(gf_region_data *rd)
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w64_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_64_t val, int bytes, int xor)
{
-#ifdef INTEL_SSE2
uint64_t itb, amask, one64;
uint8_t *d8, *s8;
__m128i pp, m1, m2, t1, t2, va, vb;
- struct gf_w32_bytwo_data *btd;
gf_region_data rd;
gf_internal_t *h;
@@ -1495,8 +1479,8 @@ gf_w64_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_64_t
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
static
@@ -1620,17 +1604,13 @@ static
void
gf_w64_composite_multiply_region(gf_t *gf, void *src, void *dest, gf_val_64_t val, int bytes, int xor)
{
- unsigned long uls, uld;
gf_internal_t *h = (gf_internal_t *) gf->scratch;
gf_t *base_gf = h->base_gf;
- int i=0;
uint32_t b0 = val & 0x00000000ffffffff;
uint32_t b1 = (val & 0xffffffff00000000) >> 32;
uint64_t *s64, *d64;
uint64_t *top;
uint64_t a0, a1, a1b1;
- int num_syms = bytes / 8;
- int sym_divisible = bytes % 4;
gf_region_data rd;
if (val == 0) { gf_multby_zero(dest, bytes, xor); return; }
@@ -1721,14 +1701,14 @@ int gf_w64_composite_init(gf_t *gf)
return 1;
}
+#ifdef INTEL_SSSE3
static
void
gf_w64_split_4_64_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *dest, uint64_t val, int bytes, int xor)
{
-#ifdef INTEL_SSSE3
gf_internal_t *h;
- int i, m, j, k, tindex;
- uint64_t pp, v, s, *s64, *d64, *top;
+ int i, j, k;
+ uint64_t pp, v, *s64, *d64, *top;
__m128i si, tables[16][8], p[8], v0, mask1;
struct gf_split_4_64_lazy_data *ld;
uint8_t btable[16];
@@ -1802,18 +1782,18 @@ gf_w64_split_4_64_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *des
}
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
+#ifdef INTEL_SSE4
static
void
gf_w64_split_4_64_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, uint64_t val, int bytes, int xor)
{
-#ifdef INTEL_SSE4
gf_internal_t *h;
- int i, m, j, k, tindex;
- uint64_t pp, v, s, *s64, *d64, *top;
- __m128i si, tables[16][8], p[8], st[8], mask1, mask8, mask16, t1, t2;
+ int i, j, k;
+ uint64_t pp, v, *s64, *d64, *top;
+ __m128i si, tables[16][8], p[8], st[8], mask1, mask8, mask16, t1;
struct gf_split_4_64_lazy_data *ld;
uint8_t btable[16];
gf_region_data rd;
@@ -2006,8 +1986,8 @@ gf_w64_split_4_64_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, uint
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
#define GF_MULTBY_TWO(p) (((p) & GF_FIRST_BIT) ? (((p) << 1) ^ h->prim_poly) : (p) << 1);
@@ -2141,8 +2121,6 @@ int gf_w64_split_init(gf_t *gf)
int gf_w64_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2)
{
- int issse4;
-
switch(mult_type)
{
case GF_MULT_SHIFT:
@@ -2162,11 +2140,9 @@ int gf_w64_scratch_size(int mult_type, int region_type, int divide_type, int arg
* then fall through to split table scratch size code. */
#ifdef INTEL_SSE4
- issse4 = 1;
arg1 = 64;
arg2 = 4;
#else
- issse4 = 0;
arg1 = 64;
arg2 = 8;
#endif
@@ -2202,7 +2178,7 @@ int gf_w64_scratch_size(int mult_type, int region_type, int divide_type, int arg
int gf_w64_init(gf_t *gf)
{
- gf_internal_t *h, *h_base, *h_base_base, *h_base_base_base;
+ gf_internal_t *h;
int no_default_flag = 0;
h = (gf_internal_t *) gf->scratch;
diff --git a/src/gf_w8.c b/src/gf_w8.c
index 7661aad..a2a8600 100644
--- a/src/gf_w8.c
+++ b/src/gf_w8.c
@@ -216,7 +216,7 @@ gf_w8_clm_multiply_2 (gf_t *gf, gf_val_32_t a8, gf_val_32_t b8)
__m128i a, b;
__m128i result;
__m128i prim_poly;
- __m128i v, w;
+ __m128i w;
gf_internal_t * h = gf->scratch;
a = _mm_insert_epi32 (_mm_setzero_si128(), a8, 0);
@@ -262,7 +262,7 @@ gf_w8_clm_multiply_3 (gf_t *gf, gf_val_32_t a8, gf_val_32_t b8)
__m128i a, b;
__m128i result;
__m128i prim_poly;
- __m128i v, w;
+ __m128i w;
gf_internal_t * h = gf->scratch;
a = _mm_insert_epi32 (_mm_setzero_si128(), a8, 0);
@@ -301,7 +301,7 @@ gf_w8_clm_multiply_4 (gf_t *gf, gf_val_32_t a8, gf_val_32_t b8)
__m128i a, b;
__m128i result;
__m128i prim_poly;
- __m128i v, w;
+ __m128i w;
gf_internal_t * h = gf->scratch;
a = _mm_insert_epi32 (_mm_setzero_si128(), a8, 0);
@@ -364,6 +364,7 @@ gf_w8_multiply_region_from_single(gf_t *gf, void *src, void *dest, gf_val_32_t v
gf_do_final_region_alignment(&rd);
}
+#if defined(INTEL_SSE4_PCLMUL)
static
void
gf_w8_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int
@@ -373,12 +374,10 @@ gf_w8_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, gf_val_
uint8_t *s8;
uint8_t *d8;
-#if defined(INTEL_SSE4_PCLMUL)
-
__m128i a, b;
__m128i result;
__m128i prim_poly;
- __m128i v, w;
+ __m128i w;
gf_internal_t * h = gf->scratch;
prim_poly = _mm_set_epi32(0, 0, 0, (uint32_t)(h->prim_poly & 0x1ffULL));
@@ -420,9 +419,10 @@ gf_w8_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, gf_val_
}
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
+#if defined(INTEL_SSE4_PCLMUL)
static
void
gf_w8_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int
@@ -432,12 +432,10 @@ gf_w8_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, gf_val_
uint8_t *s8;
uint8_t *d8;
-#if defined(INTEL_SSE4_PCLMUL)
-
__m128i a, b;
__m128i result;
__m128i prim_poly;
- __m128i v, w;
+ __m128i w;
gf_internal_t * h = gf->scratch;
prim_poly = _mm_set_epi32(0, 0, 0, (uint32_t)(h->prim_poly & 0x1ffULL));
@@ -483,9 +481,10 @@ gf_w8_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, gf_val_
}
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
+#if defined(INTEL_SSE4_PCLMUL)
static
void
gf_w8_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int
@@ -495,12 +494,10 @@ gf_w8_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, gf_val_
uint8_t *s8;
uint8_t *d8;
-#if defined(INTEL_SSE4_PCLMUL)
-
__m128i a, b;
__m128i result;
__m128i prim_poly;
- __m128i v, w;
+ __m128i w;
gf_internal_t * h = gf->scratch;
prim_poly = _mm_set_epi32(0, 0, 0, (uint32_t)(h->prim_poly & 0x1ffULL));
@@ -550,8 +547,8 @@ gf_w8_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, gf_val_
}
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
/* ------------------------------------------------------------
IMPLEMENTATION: SHIFT:
@@ -588,11 +585,11 @@ gf_w8_shift_multiply (gf_t *gf, uint32_t a8, uint32_t b8)
static
int gf_w8_cfm_init(gf_t *gf)
{
+#if defined(INTEL_SSE4_PCLMUL)
gf_internal_t *h;
h = (gf_internal_t *) gf->scratch;
-#if defined(INTEL_SSE4_PCLMUL)
if ((0xe0 & h->prim_poly) == 0){
gf->multiply.w32 = gf_w8_clm_multiply_2;
gf->multiply_region.w32 = gf_w8_clm_multiply_region_from_single_2;
@@ -731,7 +728,7 @@ static
gf_w8_log_multiply_region(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor)
{
int i;
- uint8_t lv, b, c;
+ uint8_t lv;
uint8_t *s8, *d8;
struct gf_w8_logtable_data *ltd;
@@ -760,7 +757,7 @@ static
gf_w8_logzero_multiply_region(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor)
{
int i;
- uint8_t lv, b, c;
+ uint8_t lv;
uint8_t *s8, *d8;
struct gf_w8_logzero_table_data *ltd;
struct gf_w8_logzero_small_table_data *std;
@@ -802,9 +799,9 @@ gf_w8_logzero_multiply_region(gf_t *gf, void *src, void *dest, uint32_t val, int
int gf_w8_log_init(gf_t *gf)
{
gf_internal_t *h;
- struct gf_w8_logtable_data *ltd;
- struct gf_w8_logzero_table_data *ztd;
- struct gf_w8_logzero_small_table_data *std;
+ struct gf_w8_logtable_data *ltd = NULL;
+ struct gf_w8_logzero_table_data *ztd = NULL;
+ struct gf_w8_logzero_small_table_data *std = NULL;
uint8_t *alt;
uint8_t *inv;
int i, b;
@@ -941,6 +938,7 @@ gf_w8_default_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b)
return (ftd->multtable[a][b]);
}
+#ifdef INTEL_SSSE3
static
gf_val_32_t
gf_w8_default_divide(gf_t *gf, gf_val_32_t a, gf_val_32_t b)
@@ -950,6 +948,7 @@ gf_w8_default_divide(gf_t *gf, gf_val_32_t a, gf_val_32_t b)
ftd = (struct gf_w8_default_data *) ((gf_internal_t *) gf->scratch)->private;
return (ftd->divtable[a][b]);
}
+#endif
static
gf_val_32_t
@@ -976,7 +975,7 @@ static
gf_w8_double_table_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
uint16_t *base;
- uint32_t b, c, prod, vc, vb;
+ uint32_t b, c, vc, vb;
gf_internal_t *h;
struct gf_w8_double_table_data *dtd;
struct gf_w8_double_table_lazy_data *ltd;
@@ -1033,7 +1032,6 @@ static
gf_w8_table_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
int i;
- uint8_t lv, b, c;
uint8_t *s8, *d8;
struct gf_w8_single_table_data *ftd;
@@ -1055,14 +1053,13 @@ gf_w8_table_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, in
}
}
+#ifdef INTEL_SSSE3
static
void
gf_w8_split_multiply_region_sse(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
-#ifdef INTEL_SSSE3
- uint8_t *s8, *d8, *bh, *bl, *sptr, *dptr, *top;
- __m128i tbl, loset, t1, r, va, mth, mtl;
- uint64_t altable[4];
+ uint8_t *bh, *bl, *sptr, *dptr;
+ __m128i loset, t1, r, va, mth, mtl;
struct gf_w8_half_table_data *htd;
gf_region_data rd;
@@ -1115,8 +1112,8 @@ gf_w8_split_multiply_region_sse(gf_t *gf, void *src, void *dest, gf_val_32_t val
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
/* ------------------------------------------------------------
@@ -1137,9 +1134,7 @@ static
void
gf_w8_split_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
- unsigned long uls, uld;
int i;
- uint8_t lv, b, c;
uint8_t *s8, *d8;
struct gf_w8_half_table_data *htd;
@@ -1167,11 +1162,10 @@ int gf_w8_split_init(gf_t *gf)
{
gf_internal_t *h;
struct gf_w8_half_table_data *htd;
- int a, b, pp;
+ int a, b;
h = (gf_internal_t *) gf->scratch;
htd = (struct gf_w8_half_table_data *)h->private;
- pp = h->prim_poly;
bzero(htd->high, sizeof(uint8_t)*GF_FIELD_SIZE*GF_HALF_SIZE);
bzero(htd->low, sizeof(uint8_t)*GF_FIELD_SIZE*GF_HALF_SIZE);
@@ -1325,13 +1319,13 @@ gf_w8_composite_multiply_region_alt(gf_t *gf, void *src, void *dest, gf_val_32_t
gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 32);
gf_do_initial_region_alignment(&rd);
- sub_reg_size = (rd.d_top - rd.d_start) / 2;
+ sub_reg_size = ((uint8_t *)rd.d_top - (uint8_t *)rd.d_start) / 2;
base_gf->multiply_region.w32(base_gf, rd.s_start, rd.d_start, val0, sub_reg_size, xor);
- base_gf->multiply_region.w32(base_gf, rd.s_start+sub_reg_size, rd.d_start, val1, sub_reg_size, 1);
- base_gf->multiply_region.w32(base_gf, rd.s_start, rd.d_start+sub_reg_size, val1, sub_reg_size, xor);
- base_gf->multiply_region.w32(base_gf, rd.s_start+sub_reg_size, rd.d_start+sub_reg_size, val0, sub_reg_size, 1);
- base_gf->multiply_region.w32(base_gf, rd.s_start+sub_reg_size, rd.d_start+sub_reg_size, base_gf->multiply.w32(base_gf, h->prim_poly, val1), sub_reg_size, 1);
+ base_gf->multiply_region.w32(base_gf, (uint8_t *)rd.s_start+sub_reg_size, rd.d_start, val1, sub_reg_size, 1);
+ base_gf->multiply_region.w32(base_gf, rd.s_start, (uint8_t *)rd.d_start+sub_reg_size, val1, sub_reg_size, xor);
+ base_gf->multiply_region.w32(base_gf, (uint8_t *)rd.s_start+sub_reg_size, (uint8_t *)rd.d_start+sub_reg_size, val0, sub_reg_size, 1);
+ base_gf->multiply_region.w32(base_gf, (uint8_t *)rd.s_start+sub_reg_size, (uint8_t *)rd.d_start+sub_reg_size, base_gf->multiply.w32(base_gf, h->prim_poly, val1), sub_reg_size, 1);
gf_do_final_region_alignment(&rd);
}
@@ -1361,7 +1355,6 @@ gf_val_32_t
gf_w8_composite_multiply_inline(gf_t *gf, gf_val_32_t a, gf_val_32_t b)
{
gf_internal_t *h = (gf_internal_t *) gf->scratch;
- gf_t *base_gf = h->base_gf;
uint8_t b0 = b & 0x0f;
uint8_t b1 = (b & 0xf0) >> 4;
uint8_t a0 = a & 0x0f;
@@ -1674,15 +1667,14 @@ gf_w8_bytwo_p_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t
prod = _mm_xor_si128(prod, t1); \
v = _mm_srli_epi64(v, 1); }
+#ifdef INTEL_SSE2
static
void
gf_w8_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
-#ifdef INTEL_SSE2
int i;
uint8_t *s8, *d8;
uint8_t vrev;
- uint64_t amask;
__m128i pp, m1, m2, ta, prod, t1, t2, tp, one, v;
struct gf_w8_bytwo_data *btd;
gf_region_data rd;
@@ -1727,17 +1719,16 @@ gf_w8_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t v
s8 += 16;
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w8_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_w8_bytwo_data *btd)
{
-#ifdef INTEL_SSE2
- int i;
- uint8_t *d8, *s8, tb;
- __m128i pp, m1, m2, t1, t2, va, vb;
+ uint8_t *d8, *s8;
+ __m128i pp, m1, m2, t1, t2, va;
s8 = (uint8_t *) rd->s_start;
d8 = (uint8_t *) rd->d_start;
@@ -1753,16 +1744,15 @@ gf_w8_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_w8_bytwo_data *bt
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w8_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_w8_bytwo_data *btd)
{
-#ifdef INTEL_SSE2
- int i;
- uint8_t *d8, *s8, tb;
+ uint8_t *d8, *s8;
__m128i pp, m1, m2, t1, t2, va, vb;
s8 = (uint8_t *) rd->s_start;
@@ -1781,15 +1771,15 @@ gf_w8_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_w8_bytwo_data *btd)
d8 += 16;
s8 += 16;
}
-#endif
}
+#endif
+#ifdef INTEL_SSE2
static
void
gf_w8_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
-#ifdef INTEL_SSE2
int itb;
uint8_t *d8, *s8;
__m128i pp, m1, m2, t1, t2, va, vb;
@@ -1837,15 +1827,13 @@ gf_w8_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t v
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
static
void
gf_w8_bytwo_b_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
- int i;
- uint8_t *s8, *d8, *top;
uint64_t *s64, *d64, t1, t2, ta, tb, prod;
struct gf_w8_bytwo_data *btd;
gf_region_data rd;
@@ -1944,6 +1932,7 @@ gf_w8_bytwo_b_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t
s64++;
}
}
+ break;
case 6:
if (xor) {
while (d64 < (uint64_t *) rd.d_top) {
@@ -1966,6 +1955,7 @@ gf_w8_bytwo_b_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t
s64++;
}
}
+ break;
/*
case 7:
if (xor) {
@@ -2362,7 +2352,7 @@ int gf_w8_scratch_size(int mult_type, int region_type, int divide_type, int arg1
int gf_w8_init(gf_t *gf)
{
- gf_internal_t *h, *h_base;
+ gf_internal_t *h;
h = (gf_internal_t *) gf->scratch;
@@ -2454,11 +2444,9 @@ uint8_t *gf_w8_get_mult_table(gf_t *gf)
uint8_t *gf_w8_get_div_table(gf_t *gf)
{
- gf_internal_t *h;
struct gf_w8_default_data *ftd;
struct gf_w8_single_table_data *std;
- h = (gf_internal_t *) gf->scratch;
if (gf->multiply.w32 == gf_w8_default_multiply) {
ftd = (struct gf_w8_default_data *) ((gf_internal_t *) gf->scratch)->private;
return (uint8_t *) ftd->divtable;
diff --git a/src/gf_wgen.c b/src/gf_wgen.c
index f5e22e0..06f7993 100644
--- a/src/gf_wgen.c
+++ b/src/gf_wgen.c
@@ -284,9 +284,8 @@ inline
gf_val_32_t
gf_wgen_group_s_equals_r_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b)
{
- int i;
int leftover, rs;
- uint32_t p, l, ind, r, a32;
+ uint32_t p, l, ind, a32;
int bits_left;
int g_s;
int w;
@@ -362,7 +361,7 @@ gf_wgen_group_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b)
{
int i;
int leftover;
- uint64_t p, l, r, mask;
+ uint64_t p, l, r;
uint32_t a32, ind;
int g_s, g_r;
struct gf_wgen_group_data *gd;
@@ -496,7 +495,7 @@ int gf_wgen_table_8_init(gf_t *gf)
gf_internal_t *h;
int w;
struct gf_wgen_table_w8_data *std;
- uint32_t a, b, p, pp;
+ uint32_t a, b, p;
h = (gf_internal_t *) gf->scratch;
w = h->w;
@@ -557,7 +556,7 @@ int gf_wgen_table_16_init(gf_t *gf)
gf_internal_t *h;
int w;
struct gf_wgen_table_w16_data *std;
- uint32_t a, b, p, pp;
+ uint32_t a, b, p;
h = (gf_internal_t *) gf->scratch;
w = h->w;
@@ -917,11 +916,11 @@ gf_wgen_cauchy_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int byte
for (i = 0; i < h->w; i++) {
for (j = 0; j < h->w; j++) {
if (val & (1 << j)) {
- gf_multby_one(src, dest + j*rs, rs, (written & (1 << j)));
+ gf_multby_one(src, ((uint8_t *)dest) + j*rs, rs, (written & (1 << j)));
written |= (1 << j);
}
}
- src += rs;
+ src = (uint8_t *)src + rs;
val = gf->multiply.w32(gf, val, 2);
}
}
diff --git a/test-driver b/test-driver
new file mode 100755
index 0000000..d306056
--- /dev/null
+++ b/test-driver
@@ -0,0 +1,139 @@
+#! /bin/sh
+# test-driver - basic testsuite driver script.
+
+scriptversion=2013-07-13.22; # UTC
+
+# Copyright (C) 2011-2013 Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# This file is maintained in Automake, please report
+# bugs to <bug-automake@gnu.org> or send patches to
+# <automake-patches@gnu.org>.
+
+# Make unconditional expansion of undefined variables an error. This
+# helps a lot in preventing typo-related bugs.
+set -u
+
+usage_error ()
+{
+ echo "$0: $*" >&2
+ print_usage >&2
+ exit 2
+}
+
+print_usage ()
+{
+ cat <<END
+Usage:
+ test-driver --test-name=NAME --log-file=PATH --trs-file=PATH
+ [--expect-failure={yes|no}] [--color-tests={yes|no}]
+ [--enable-hard-errors={yes|no}] [--]
+ TEST-SCRIPT [TEST-SCRIPT-ARGUMENTS]
+The '--test-name', '--log-file' and '--trs-file' options are mandatory.
+END
+}
+
+test_name= # Used for reporting.
+log_file= # Where to save the output of the test script.
+trs_file= # Where to save the metadata of the test run.
+expect_failure=no
+color_tests=no
+enable_hard_errors=yes
+while test $# -gt 0; do
+ case $1 in
+ --help) print_usage; exit $?;;
+ --version) echo "test-driver $scriptversion"; exit $?;;
+ --test-name) test_name=$2; shift;;
+ --log-file) log_file=$2; shift;;
+ --trs-file) trs_file=$2; shift;;
+ --color-tests) color_tests=$2; shift;;
+ --expect-failure) expect_failure=$2; shift;;
+ --enable-hard-errors) enable_hard_errors=$2; shift;;
+ --) shift; break;;
+ -*) usage_error "invalid option: '$1'";;
+ *) break;;
+ esac
+ shift
+done
+
+missing_opts=
+test x"$test_name" = x && missing_opts="$missing_opts --test-name"
+test x"$log_file" = x && missing_opts="$missing_opts --log-file"
+test x"$trs_file" = x && missing_opts="$missing_opts --trs-file"
+if test x"$missing_opts" != x; then
+ usage_error "the following mandatory options are missing:$missing_opts"
+fi
+
+if test $# -eq 0; then
+ usage_error "missing argument"
+fi
+
+if test $color_tests = yes; then
+ # Keep this in sync with 'lib/am/check.am:$(am__tty_colors)'.
+ red='' # Red.
+ grn='' # Green.
+ lgn='' # Light green.
+ blu='' # Blue.
+ mgn='' # Magenta.
+ std='' # No color.
+else
+ red= grn= lgn= blu= mgn= std=
+fi
+
+do_exit='rm -f $log_file $trs_file; (exit $st); exit $st'
+trap "st=129; $do_exit" 1
+trap "st=130; $do_exit" 2
+trap "st=141; $do_exit" 13
+trap "st=143; $do_exit" 15
+
+# Test script is run here.
+"$@" >$log_file 2>&1
+estatus=$?
+if test $enable_hard_errors = no && test $estatus -eq 99; then
+ estatus=1
+fi
+
+case $estatus:$expect_failure in
+ 0:yes) col=$red res=XPASS recheck=yes gcopy=yes;;
+ 0:*) col=$grn res=PASS recheck=no gcopy=no;;
+ 77:*) col=$blu res=SKIP recheck=no gcopy=yes;;
+ 99:*) col=$mgn res=ERROR recheck=yes gcopy=yes;;
+ *:yes) col=$lgn res=XFAIL recheck=no gcopy=yes;;
+ *:*) col=$red res=FAIL recheck=yes gcopy=yes;;
+esac
+
+# Report outcome to console.
+echo "${col}${res}${std}: $test_name"
+
+# Register the test result, and other relevant metadata.
+echo ":test-result: $res" > $trs_file
+echo ":global-test-result: $res" >> $trs_file
+echo ":recheck: $recheck" >> $trs_file
+echo ":copy-in-global-log: $gcopy" >> $trs_file
+
+# Local Variables:
+# mode: shell-script
+# sh-indentation: 2
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-time-zone: "UTC"
+# time-stamp-end: "; # UTC"
+# End:
diff --git a/test/gf_unit.c b/test/gf_unit.c
index cf466fe..deaaced 100644
--- a/test/gf_unit.c
+++ b/test/gf_unit.c
@@ -70,16 +70,16 @@ int main(int argc, char **argv)
{
signal(SIGSEGV, SigHandler);
- int w, i, verbose, single, region, tested, top;
+ int w, i, verbose, single, region, top;
int s_start, d_start, bytes, xor, alignment_test;
gf_t gf, gf_def;
time_t t0;
gf_internal_t *h;
- gf_general_t *a, *b, *c, *d, *ai, *bi;
- uint8_t a8, b8, c8, *mult4, *div4, *mult8, *div8;
- uint16_t a16, b16, c16, d16, *log16, *alog16;
- char as[50], bs[50], cs[50], ds[50], ais[50], bis[50];
- uint32_t mask;
+ gf_general_t *a, *b, *c, *d;
+ uint8_t a8, b8, c8, *mult4 = NULL, *mult8 = NULL;
+ uint16_t a16, b16, c16, *log16 = NULL, *alog16 = NULL;
+ char as[50], bs[50], cs[50], ds[50];
+ uint32_t mask = 0;
char *ra, *rb, *rc, *rd, *target;
int align;
@@ -115,8 +115,6 @@ int main(int argc, char **argv)
b = (gf_general_t *) malloc(sizeof(gf_general_t));
c = (gf_general_t *) malloc(sizeof(gf_general_t));
d = (gf_general_t *) malloc(sizeof(gf_general_t));
- ai = (gf_general_t *) malloc(sizeof(gf_general_t));
- bi = (gf_general_t *) malloc(sizeof(gf_general_t));
//15 bytes extra to make sure it's 16byte aligned
ra = (char *) malloc(sizeof(char)*REGION_SIZE+15);
@@ -145,12 +143,10 @@ int main(int argc, char **argv)
problem("No default for this value of w");
if (w == 4) {
mult4 = gf_w4_get_mult_table(&gf);
- div4 = gf_w4_get_div_table(&gf);
}
if (w == 8) {
mult8 = gf_w8_get_mult_table(&gf);
- div8 = gf_w8_get_div_table(&gf);
}
if (w == 16) {
@@ -240,7 +236,6 @@ int main(int argc, char **argv)
}
}
- tested = 0;
gf_general_multiply(&gf, a, b, c);
/* If w is 4, 8 or 16, then there are inline multiplication/division methods.
@@ -285,7 +280,6 @@ int main(int argc, char **argv)
/* If this is not composite, then first test against the default: */
if (h->mult_type != GF_MULT_COMPOSITE) {
- tested = 1;
gf_general_multiply(&gf_def, a, b, d);
if (!gf_general_are_equal(c, d, w)) {
@@ -306,7 +300,6 @@ int main(int argc, char **argv)
if (gf_general_is_zero(a, w) || gf_general_is_zero(b, w) ||
gf_general_is_one(a, w) || gf_general_is_one(b, w)) {
- tested = 1;
if (((gf_general_is_zero(a, w) || gf_general_is_zero(b, w)) && !gf_general_is_zero(c, w)) ||
(gf_general_is_one(a, w) && !gf_general_are_equal(b, c, w)) ||
(gf_general_is_one(b, w) && !gf_general_are_equal(a, c, w))) {
@@ -429,4 +422,5 @@ int main(int argc, char **argv)
gf_general_do_region_check(&gf, a, rc+s_start, rd+d_start, target+d_start, bytes, xor);
}
}
+ return 0;
}
diff --git a/tools/Makefile.am b/tools/Makefile.am
index 31dffae..9e7c564 100644
--- a/tools/Makefile.am
+++ b/tools/Makefile.am
@@ -3,6 +3,8 @@
INCLUDES=-I./ -I../include
AM_CFLAGS = -O3 $(SIMD_FLAGS) -fPIC $(INCLUDES)
+TESTS=run-tests.sh
+
bin_PROGRAMS = gf_mult gf_div gf_add gf_time gf_methods gf_poly gf_inline_time
gf_mult_SOURCES = gf_mult.c
diff --git a/tools/gf_add.c b/tools/gf_add.c
index b900e69..28cc12c 100644
--- a/tools/gf_add.c
+++ b/tools/gf_add.c
@@ -62,7 +62,7 @@ void print_128(uint64_t *v)
int main(int argc, char **argv)
{
- int hex, al, bl, w;
+ int hex, w;
uint32_t a, b, c, top;
uint64_t a64, b64, c64;
uint64_t a128[2], b128[2], c128[2];
diff --git a/tools/gf_inline_time.c b/tools/gf_inline_time.c
index e64f0b3..c81e8a9 100644
--- a/tools/gf_inline_time.c
+++ b/tools/gf_inline_time.c
@@ -62,8 +62,8 @@ int main(int argc, char **argv)
int w, j, i, size, iterations;
gf_t gf;
double timer, elapsed, dnum, num;
- uint8_t *ra, *rb, *mult4, *mult8;
- uint16_t *ra16, *rb16, *log16, *alog16;
+ uint8_t *ra = NULL, *rb = NULL, *mult4, *mult8;
+ uint16_t *ra16 = NULL, *rb16 = NULL, *log16, *alog16;
time_t t0;
if (argc != 5) usage(NULL);
@@ -164,4 +164,5 @@ int main(int argc, char **argv)
printf("Inline mult: %10.6lf s Mops: %10.3lf %10.3lf Mega-ops/s\n",
elapsed, dnum/1024.0/1024.0, dnum/1024.0/1024.0/elapsed);
}
+ return 0;
}
diff --git a/tools/gf_methods.c b/tools/gf_methods.c
index 3afb438..921febf 100644
--- a/tools/gf_methods.c
+++ b/tools/gf_methods.c
@@ -20,8 +20,9 @@
#define BNMULTS (8)
static char *BMULTS[BNMULTS] = { "CARRY_FREE", "GROUP48",
"TABLE", "LOG", "SPLIT4", "SPLIT8", "SPLIT88", "COMPOSITE" };
-#define NMULTS (16)
-static char *MULTS[NMULTS] = { "SHIFT", "CARRY_FREE", "GROUP44", "GROUP48", "BYTWO_p", "BYTWO_b",
+//ADAM
+#define NMULTS (17)
+static char *MULTS[NMULTS] = { "SHIFT", "CARRY_FREE", "CARRY_FREE_GK", "GROUP44", "GROUP48", "BYTWO_p", "BYTWO_b",
"TABLE", "LOG", "LOG_ZERO", "LOG_ZERO_EXT", "SPLIT2",
"SPLIT4", "SPLIT8", "SPLIT16", "SPLIT88", "COMPOSITE" };
@@ -76,7 +77,7 @@ int main(int argc, char *argv[])
int listing;
char *gf_argv[50], *x;
gf_t gf;
- char divs[200], ks[10], ls[10];
+ char ls[10];
char * w_str;
if (argc != 4) usage(NULL);
diff --git a/tools/gf_poly.c b/tools/gf_poly.c
index e19706c..44a24ac 100644
--- a/tools/gf_poly.c
+++ b/tools/gf_poly.c
@@ -84,7 +84,6 @@ int gcd_one(gf_t *gf, int w, int n, gf_general_t *poly, gf_general_t *prod)
{
gf_general_t *a, *b, zero, factor, p;
int i, j, da, db;
- char buf[30];
gf_general_set_zero(&zero, w);
@@ -123,7 +122,6 @@ void x_to_q_to_i_minus_x(gf_t *gf, int w, int n, gf_general_t *poly, int logq, i
gf_general_t *product;
gf_general_t p, zero, factor;
int j, k, lq;
- char buf[20];
gf_general_set_zero(&zero, w);
product = (gf_general_t *) malloc(sizeof(gf_general_t) * n*2);
@@ -181,9 +179,9 @@ void x_to_q_to_i_minus_x(gf_t *gf, int w, int n, gf_general_t *poly, int logq, i
free(x_to_q);
}
-main(int argc, char **argv)
+int main(int argc, char **argv)
{
- int w, i, power, n, ap, success, j;
+ int w, i, power, n, ap, success;
gf_t gf;
gf_general_t *poly, *prod;
char *string, *ptr;
diff --git a/tools/gf_time.c b/tools/gf_time.c
index 2bd2d04..4becc8d 100644
--- a/tools/gf_time.c
+++ b/tools/gf_time.c
@@ -119,7 +119,7 @@ int main(int argc, char **argv)
if (!create_gf_from_argv(&gf, w, argc, argv, 6)) usage(BM);
strcpy(tests, "");
- for (i = 0; i < argv[2][i] != '\0'; i++) {
+ for (i = 0; argv[2][i] != '\0'; i++) {
switch(argv[2][i]) {
case 'A': strcat(tests, single_tests);
strcat(tests, region_tests);
@@ -163,8 +163,8 @@ int main(int argc, char **argv)
for (i = 0; i < 3; i++) {
test = single_tests[i];
if (strchr(tests, test) != NULL) {
- if (tmethods[test] == NULL) {
- printf("No %s method.\n", tstrings[test]);
+ if (tmethods[(int)test] == NULL) {
+ printf("No %s method.\n", tstrings[(int)test]);
} else {
elapsed = 0;
dnum = 0;
@@ -176,7 +176,7 @@ int main(int argc, char **argv)
elapsed += timer_split(&timer);
}
printf("%14s: %10.6lf s Mops: %10.3lf %10.3lf Mega-ops/s\n",
- tstrings[test], elapsed,
+ tstrings[(int)test], elapsed,
dnum/1024.0/1024.0, dnum/1024.0/1024.0/elapsed);
}
}
@@ -185,8 +185,8 @@ int main(int argc, char **argv)
for (i = 0; i < 4; i++) {
test = region_tests[i];
if (strchr(tests, test) != NULL) {
- if (tmethods[test] == NULL) {
- printf("No %s method.\n", tstrings[test]);
+ if (tmethods[(int)test] == NULL) {
+ printf("No %s method.\n", tstrings[(int)test]);
} else {
elapsed = 0;
@@ -204,10 +204,11 @@ int main(int argc, char **argv)
elapsed += timer_split(&timer);
}
printf("%14s: XOR: %d %10.6lf s MB: %10.3lf %10.3lf MB/s\n",
- tstrings[test], xor, elapsed,
+ tstrings[(int)test], xor, elapsed,
ds*di/1024.0/1024.0, ds*di/1024.0/1024.0/elapsed);
}
}
}
}
+ return 0;
}
diff --git a/tools/run-tests.sh b/tools/run-tests.sh
new file mode 100755
index 0000000..bd3cc60
--- /dev/null
+++ b/tools/run-tests.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+for w in 4 8 16 32 64 128 ; do
+ ./gf_methods $w -A -U | sh -e
+ if [ $? != "0" ] ; then
+ echo "Failed unit tests for w=$w"
+ break
+ fi
+done