summaryrefslogtreecommitdiff
path: root/include/gf_complete.h
blob: 5806625a6d798c8affb5897539179d3fd9b2bf89 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
/* 
 * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic
 * James S. Plank, Ethan L. Miller, Kevin M. Greenan,
 * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride.
 *
 * gf_complete.h
 *
 * The main include file for gf_complete. 
 */

#ifndef _GF_COMPLETE_H_
#define _GF_COMPLETE_H_
#include <stdint.h>

#ifdef INTEL_SSE4
  #ifdef __SSE4_2__
    #include <nmmintrin.h>
  #endif
  #ifdef __SSE4_1__
    #include <smmintrin.h>
  #endif
#endif

#ifdef INTEL_SSSE3
  #include <tmmintrin.h>
#endif

#ifdef INTEL_SSE2
  #include <emmintrin.h>
#endif

#ifdef INTEL_SSE4_PCLMUL
  #include <wmmintrin.h>
#endif


/* These are the different ways to perform multiplication.
   Not all are implemented for all values of w.
   See the paper for an explanation of how they work. */

typedef enum {GF_MULT_DEFAULT,
              GF_MULT_SHIFT,
              GF_MULT_CARRY_FREE,
              GF_MULT_CARRY_FREE_GK,
              GF_MULT_GROUP,
              GF_MULT_BYTWO_p,
              GF_MULT_BYTWO_b,
              GF_MULT_TABLE,
              GF_MULT_LOG_TABLE,
              GF_MULT_LOG_ZERO,
              GF_MULT_LOG_ZERO_EXT,
              GF_MULT_SPLIT_TABLE,
              GF_MULT_COMPOSITE } gf_mult_type_t;

/* These are the different ways to optimize region 
   operations.  They are bits because you can compose them.
   Certain optimizations only apply to certain gf_mult_type_t's.  
   Again, please see documentation for how to use these */
   
#define GF_REGION_DEFAULT      (0x0)
#define GF_REGION_DOUBLE_TABLE (0x1)
#define GF_REGION_QUAD_TABLE   (0x2)
#define GF_REGION_LAZY         (0x4)
#define GF_REGION_SSE          (0x8)
#define GF_REGION_NOSSE        (0x10)
#define GF_REGION_ALTMAP       (0x20)
#define GF_REGION_CAUCHY       (0x40)

typedef uint32_t gf_region_type_t;

/* These are different ways to implement division.
   Once again, it's best to use "DEFAULT".  However,
   there are times when you may want to experiment
   with the others. */

typedef enum { GF_DIVIDE_DEFAULT,
               GF_DIVIDE_MATRIX,
               GF_DIVIDE_EUCLID } gf_division_type_t;

/* We support w=4,8,16,32,64 and 128 with their own data types and
   operations for multiplication, division, etc.  We also support
   a "gen" type so that you can do general gf arithmetic for any 
   value of w from 1 to 32.  You can perform a "region" operation
   on these if you use "CAUCHY" as the mapping. 
 */

typedef uint32_t    gf_val_32_t;
typedef uint64_t    gf_val_64_t;
typedef uint64_t   *gf_val_128_t;

extern int _gf_errno;
extern void gf_error();

typedef struct gf *GFP;

typedef union gf_func_a_b {
    gf_val_32_t  (*w32) (GFP gf, gf_val_32_t a,  gf_val_32_t b);
    gf_val_64_t  (*w64) (GFP gf, gf_val_64_t a,  gf_val_64_t b);
    void         (*w128)(GFP gf, gf_val_128_t a, gf_val_128_t b, gf_val_128_t c);
} gf_func_a_b;
  
typedef union {
  gf_val_32_t  (*w32) (GFP gf, gf_val_32_t a);
  gf_val_64_t  (*w64) (GFP gf, gf_val_64_t a);
  void         (*w128)(GFP gf, gf_val_128_t a, gf_val_128_t b);
} gf_func_a;
  
typedef union {
  void  (*w32) (GFP gf, void *src, void *dest, gf_val_32_t val,  int bytes, int add);
  void  (*w64) (GFP gf, void *src, void *dest, gf_val_64_t val,  int bytes, int add);
  void  (*w128)(GFP gf, void *src, void *dest, gf_val_128_t val, int bytes, int add);
} gf_region;

typedef union {
  gf_val_32_t  (*w32) (GFP gf, void *start, int bytes, int index);
  gf_val_64_t  (*w64) (GFP gf, void *start, int bytes, int index);
  void         (*w128)(GFP gf, void *start, int bytes, int index, gf_val_128_t rv);
} gf_extract;

typedef struct gf {
  gf_func_a_b    multiply;
  gf_func_a_b    divide;
  gf_func_a      inverse;
  gf_region      multiply_region;
  gf_extract     extract_word;
  void           *scratch;
} gf_t;
    
/* Initializes the GF to defaults.  Pass it a pointer to a gf_t.
   Returns 0 on failure, 1 on success. */

extern int gf_init_easy(GFP gf, int w);

/* Initializes the GF changing the defaults.
   Returns 0 on failure, 1 on success.
   Pass it a pointer to a gf_t.
   For mult_type and divide_type, use one of gf_mult_type_t gf_divide_type_t .  
   For region_type, OR together the GF_REGION_xxx's defined above.  
   Use 0 as prim_poly for defaults.  Otherwise, the leading 1 is optional.
   Use NULL for scratch_memory to have init_hard allocate memory.  Otherwise,
   use gf_scratch_size() to determine how big scratch_memory has to be.
 */

extern int gf_init_hard(GFP gf, 
                        int w, 
                        int mult_type, 
                        int region_type, 
                        int divide_type, 
                        uint64_t prim_poly,
                        int arg1, 
                        int arg2,
                        GFP base_gf,
                        void *scratch_memory);

/* Determines the size for scratch_memory.  
   Returns 0 on failure and non-zero on success. */

extern int gf_scratch_size(int w, 
                           int mult_type, 
                           int region_type, 
                           int divide_type, 
                           int arg1, 
                           int arg2);

/* This reports the gf_scratch_size of a gf_t that has already been created */

extern int gf_size(GFP gf);

/* Frees scratch memory if gf_init_easy/gf_init_hard called malloc.
   If recursive = 1, then it calls itself recursively on base_gf. */

extern int gf_free(GFP gf, int recursive);

/* This is support for inline single multiplications and divisions.
   I know it's yucky, but if you've got to be fast, you've got to be fast.
   We support inlining for w=4, w=8 and w=16.  

   To use inline multiplication and division with w=4 or 8, you should use the 
   default gf_t, or one with a single table.  Otherwise, gf_w4/8_get_mult_table()
   will return NULL. Similarly, with w=16, the gf_t must be LOG */

uint8_t *gf_w4_get_mult_table(GFP gf);
uint8_t *gf_w4_get_div_table(GFP gf);

#define GF_W4_INLINE_MULTDIV(table, a, b) (table[((a)<<4)|(b)])

uint8_t *gf_w8_get_mult_table(GFP gf);
uint8_t *gf_w8_get_div_table(GFP gf);

#define GF_W8_INLINE_MULTDIV(table, a, b) (table[(((uint32_t) (a))<<8)|(b)])

uint16_t *gf_w16_get_log_table(GFP gf);
uint16_t *gf_w16_get_mult_alog_table(GFP gf);
uint16_t *gf_w16_get_div_alog_table(GFP gf);

#define GF_W16_INLINE_MULT(log, alog, a, b) ((a) == 0 || (b) == 0) ? 0 : (alog[(uint32_t)log[a]+(uint32_t)log[b]])
#define GF_W16_INLINE_DIV(log, alog, a, b) ((a) == 0 || (b) == 0) ? 0 : (alog[(int)log[a]-(int)log[b]])
#endif