summaryrefslogtreecommitdiff
path: root/gcc/config/arm/neon.ml
diff options
context:
space:
mode:
authorktkachov <ktkachov@138bc75d-0d04-0410-961f-82ee72b054a4>2013-04-25 10:54:39 +0000
committerktkachov <ktkachov@138bc75d-0d04-0410-961f-82ee72b054a4>2013-04-25 10:54:39 +0000
commit8518cadae3234d27bfe8993183b24badc81c82d3 (patch)
tree27825bda688a67445e26f1c90cffc22c096d39c7 /gcc/config/arm/neon.ml
parentb04165c4f58840b546699f33b8e65ebeedef8883 (diff)
downloadgcc-8518cadae3234d27bfe8993183b24badc81c82d3.tar.gz
2013-04-25 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Julian Brown <julian@codesourcery.com> * config/arm/arm.c (neon_builtin_type_mode): Add T_V4HF. (TB_DREG): Add T_V4HF. (v4hf_UP): New macro. (neon_itype): Add NEON_FLOAT_WIDEN, NEON_FLOAT_NARROW. (arm_init_neon_builtins): Handle NEON_FLOAT_WIDEN, NEON_FLOAT_NARROW. Handle initialisation of V4HF. Adjust initialisation of reinterpret built-ins. (arm_expand_neon_builtin): Handle NEON_FLOAT_WIDEN, NEON_FLOAT_NARROW. (arm_vector_mode_supported_p): Handle V4HF. (arm_mangle_map): Handle V4HFmode. * config/arm/arm.h (VALID_NEON_DREG_MODE): Add V4HF. * config/arm/arm_neon_builtins.def: Add entries for vcvtv4hfv4sf, vcvtv4sfv4hf. * config/arm/neon.md (neon_vcvtv4sfv4hf): New pattern. (neon_vcvtv4hfv4sf): Likewise. * config/arm/neon-gen.ml: Handle half-precision floating point features. * config/arm/neon-testgen.ml: Handle Requires_FP_bit feature. * config/arm/arm_neon.h: Regenerate. * config/arm/neon.ml (type elts): Add F16. (type vectype): Add T_float16x4, T_floatHF. (type vecmode): Add V4HF. (type features): Add Requires_FP_bit feature. (elt_width): Handle F16. (elt_class): Likewise. (elt_of_class_width): Likewise. (mode_of_elt): Refactor. (type_for_elt): Handle F16, fix error messages. (vectype_size): Handle T_float16x4. (vcvt_sh): New function. (ops): Add entries for vcvt_f16_f32, vcvt_f32_f16. (string_of_vectype): Handle T_floatHF, T_float16, T_float16x4. (string_of_mode): Handle V4HF. * doc/arm-neon-intrinsics.texi: Regenerate. 2013-04-25 Kyrylo Tkachov <kyrylo.tkachov@arm.com> * lib/target-supports.exp (check_effective_target_arm_neon_fp16_ok_nocache): New procedure. (check_effective_target_arm_neon_fp16_ok): Likewise. (add_options_for_arm_neon_fp16): Likewise. * gcc.target/arm/neon/vcvtf16_f32.c: New test. Generated. * gcc.target/arm/neon/vcvtf32_f16.c: Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@198298 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/arm/neon.ml')
-rw-r--r--gcc/config/arm/neon.ml66
1 files changed, 47 insertions, 19 deletions
diff --git a/gcc/config/arm/neon.ml b/gcc/config/arm/neon.ml
index 34090c9655c..ca9a4c06aa6 100644
--- a/gcc/config/arm/neon.ml
+++ b/gcc/config/arm/neon.ml
@@ -21,7 +21,7 @@
<http://www.gnu.org/licenses/>. *)
(* Shorthand types for vector elements. *)
-type elts = S8 | S16 | S32 | S64 | F32 | U8 | U16 | U32 | U64 | P8 | P16
+type elts = S8 | S16 | S32 | S64 | F16 | F32 | U8 | U16 | U32 | U64 | P8 | P16
| I8 | I16 | I32 | I64 | B8 | B16 | B32 | B64 | Conv of elts * elts
| Cast of elts * elts | NoElts
@@ -37,6 +37,7 @@ type vectype = T_int8x8 | T_int8x16
| T_uint16x4 | T_uint16x8
| T_uint32x2 | T_uint32x4
| T_uint64x1 | T_uint64x2
+ | T_float16x4
| T_float32x2 | T_float32x4
| T_poly8x8 | T_poly8x16
| T_poly16x4 | T_poly16x8
@@ -46,11 +47,13 @@ type vectype = T_int8x8 | T_int8x16
| T_uint8 | T_uint16
| T_uint32 | T_uint64
| T_poly8 | T_poly16
- | T_float32 | T_arrayof of int * vectype
+ | T_float16 | T_float32
+ | T_arrayof of int * vectype
| T_ptrto of vectype | T_const of vectype
| T_void | T_intQI
| T_intHI | T_intSI
- | T_intDI | T_floatSF
+ | T_intDI | T_floatHF
+ | T_floatSF
(* The meanings of the following are:
TImode : "Tetra", two registers (four words).
@@ -92,7 +95,7 @@ type arity = Arity0 of vectype
| Arity3 of vectype * vectype * vectype * vectype
| Arity4 of vectype * vectype * vectype * vectype * vectype
-type vecmode = V8QI | V4HI | V2SI | V2SF | DI
+type vecmode = V8QI | V4HI | V4HF |V2SI | V2SF | DI
| V16QI | V8HI | V4SI | V4SF | V2DI
| QI | HI | SI | SF
@@ -284,18 +287,22 @@ type features =
| Fixed_core_reg
(* Mark that the intrinsic requires __ARM_FEATURE_string to be defined. *)
| Requires_feature of string
+ (* Mark that the intrinsic requires a particular architecture version. *)
| Requires_arch of int
+ (* Mark that the intrinsic requires a particular bit in __ARM_FP to
+ be set. *)
+ | Requires_FP_bit of int
exception MixedMode of elts * elts
let rec elt_width = function
S8 | U8 | P8 | I8 | B8 -> 8
- | S16 | U16 | P16 | I16 | B16 -> 16
+ | S16 | U16 | P16 | I16 | B16 | F16 -> 16
| S32 | F32 | U32 | I32 | B32 -> 32
| S64 | U64 | I64 | B64 -> 64
| Conv (a, b) ->
let wa = elt_width a and wb = elt_width b in
- if wa = wb then wa else failwith "element width?"
+ if wa = wb then wa else raise (MixedMode (a, b))
| Cast (a, b) -> raise (MixedMode (a, b))
| NoElts -> failwith "No elts"
@@ -303,7 +310,7 @@ let rec elt_class = function
S8 | S16 | S32 | S64 -> Signed
| U8 | U16 | U32 | U64 -> Unsigned
| P8 | P16 -> Poly
- | F32 -> Float
+ | F16 | F32 -> Float
| I8 | I16 | I32 | I64 -> Int
| B8 | B16 | B32 | B64 -> Bits
| Conv (a, b) | Cast (a, b) -> ConvClass (elt_class a, elt_class b)
@@ -315,6 +322,7 @@ let elt_of_class_width c w =
| Signed, 16 -> S16
| Signed, 32 -> S32
| Signed, 64 -> S64
+ | Float, 16 -> F16
| Float, 32 -> F32
| Unsigned, 8 -> U8
| Unsigned, 16 -> U16
@@ -384,7 +392,12 @@ let find_key_operand operands =
in
scan ((Array.length operands) - 1)
-let rec mode_of_elt elt shape =
+(* Find a vecmode from a shape_elt ELT for an instruction with shape_form
+ SHAPE. For a Use_operands shape, if ARGPOS is passed then return the mode
+ for the given argument position, else determine which argument to return a
+ mode for automatically. *)
+
+let rec mode_of_elt ?argpos elt shape =
let flt = match elt_class elt with
Float | ConvClass(_, Float) -> true | _ -> false in
let idx =
@@ -394,7 +407,10 @@ let rec mode_of_elt elt shape =
in match shape with
All (_, Dreg) | By_scalar Dreg | Pair_result Dreg | Unary_scalar Dreg
| Binary_imm Dreg | Long_noreg Dreg | Wide_noreg Dreg ->
- [| V8QI; V4HI; if flt then V2SF else V2SI; DI |].(idx)
+ if flt then
+ [| V8QI; V4HF; V2SF; DI |].(idx)
+ else
+ [| V8QI; V4HI; V2SI; DI |].(idx)
| All (_, Qreg) | By_scalar Qreg | Pair_result Qreg | Unary_scalar Qreg
| Binary_imm Qreg | Long_noreg Qreg | Wide_noreg Qreg ->
[| V16QI; V8HI; if flt then V4SF else V4SI; V2DI |].(idx)
@@ -404,7 +420,11 @@ let rec mode_of_elt elt shape =
| Long_imm ->
[| V8QI; V4HI; V2SI; DI |].(idx)
| Narrow | Narrow_imm -> [| V16QI; V8HI; V4SI; V2DI |].(idx)
- | Use_operands ops -> mode_of_elt elt (All (0, (find_key_operand ops)))
+ | Use_operands ops ->
+ begin match argpos with
+ None -> mode_of_elt ?argpos elt (All (0, (find_key_operand ops)))
+ | Some pos -> mode_of_elt ?argpos elt (All (0, ops.(pos)))
+ end
| _ -> failwith "invalid shape"
(* Modify an element type dependent on the shape of the instruction and the
@@ -454,10 +474,11 @@ let type_for_elt shape elt no =
| U16 -> T_uint16x4
| U32 -> T_uint32x2
| U64 -> T_uint64x1
+ | F16 -> T_float16x4
| F32 -> T_float32x2
| P8 -> T_poly8x8
| P16 -> T_poly16x4
- | _ -> failwith "Bad elt type"
+ | _ -> failwith "Bad elt type for Dreg"
end
| Qreg ->
begin match elt with
@@ -472,7 +493,7 @@ let type_for_elt shape elt no =
| F32 -> T_float32x4
| P8 -> T_poly8x16
| P16 -> T_poly16x8
- | _ -> failwith "Bad elt type"
+ | _ -> failwith "Bad elt type for Qreg"
end
| Corereg ->
begin match elt with
@@ -487,7 +508,7 @@ let type_for_elt shape elt no =
| P8 -> T_poly8
| P16 -> T_poly16
| F32 -> T_float32
- | _ -> failwith "Bad elt type"
+ | _ -> failwith "Bad elt type for Corereg"
end
| Immed ->
T_immediate (0, 0)
@@ -506,7 +527,7 @@ let type_for_elt shape elt no =
let vectype_size = function
T_int8x8 | T_int16x4 | T_int32x2 | T_int64x1
| T_uint8x8 | T_uint16x4 | T_uint32x2 | T_uint64x1
- | T_float32x2 | T_poly8x8 | T_poly16x4 -> 64
+ | T_float32x2 | T_poly8x8 | T_poly16x4 | T_float16x4 -> 64
| T_int8x16 | T_int16x8 | T_int32x4 | T_int64x2
| T_uint8x16 | T_uint16x8 | T_uint32x4 | T_uint64x2
| T_float32x4 | T_poly8x16 | T_poly16x8 -> 128
@@ -1217,6 +1238,10 @@ let ops =
[Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
Vcvt, [InfoWord], All (2, Qreg), "vcvtQ", conv_1,
[Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
+ Vcvt, [Builtin_name "vcvt" ; Requires_FP_bit 1],
+ Use_operands [| Dreg; Qreg; |], "vcvt", conv_1, [Conv (F16, F32)];
+ Vcvt, [Builtin_name "vcvt" ; Requires_FP_bit 1],
+ Use_operands [| Qreg; Dreg; |], "vcvt", conv_1, [Conv (F32, F16)];
Vcvt_n, [InfoWord], Use_operands [| Dreg; Dreg; Immed |], "vcvt_n", conv_2,
[Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
Vcvt_n, [InfoWord], Use_operands [| Qreg; Qreg; Immed |], "vcvtQ_n", conv_2,
@@ -1782,7 +1807,7 @@ let rec string_of_elt = function
| U8 -> "u8" | U16 -> "u16" | U32 -> "u32" | U64 -> "u64"
| I8 -> "i8" | I16 -> "i16" | I32 -> "i32" | I64 -> "i64"
| B8 -> "8" | B16 -> "16" | B32 -> "32" | B64 -> "64"
- | F32 -> "f32" | P8 -> "p8" | P16 -> "p16"
+ | F16 -> "f16" | F32 -> "f32" | P8 -> "p8" | P16 -> "p16"
| Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "_" ^ string_of_elt b
| NoElts -> failwith "No elts"
@@ -1809,6 +1834,7 @@ let string_of_vectype vt =
| T_uint32x4 -> affix "uint32x4"
| T_uint64x1 -> affix "uint64x1"
| T_uint64x2 -> affix "uint64x2"
+ | T_float16x4 -> affix "float16x4"
| T_float32x2 -> affix "float32x2"
| T_float32x4 -> affix "float32x4"
| T_poly8x8 -> affix "poly8x8"
@@ -1825,6 +1851,7 @@ let string_of_vectype vt =
| T_uint64 -> affix "uint64"
| T_poly8 -> affix "poly8"
| T_poly16 -> affix "poly16"
+ | T_float16 -> affix "float16"
| T_float32 -> affix "float32"
| T_immediate _ -> "const int"
| T_void -> "void"
@@ -1832,6 +1859,7 @@ let string_of_vectype vt =
| T_intHI -> "__builtin_neon_hi"
| T_intSI -> "__builtin_neon_si"
| T_intDI -> "__builtin_neon_di"
+ | T_floatHF -> "__builtin_neon_hf"
| T_floatSF -> "__builtin_neon_sf"
| T_arrayof (num, base) ->
let basename = name (fun x -> x) base in
@@ -1853,10 +1881,10 @@ let string_of_inttype = function
| B_XImode -> "__builtin_neon_xi"
let string_of_mode = function
- V8QI -> "v8qi" | V4HI -> "v4hi" | V2SI -> "v2si" | V2SF -> "v2sf"
- | DI -> "di" | V16QI -> "v16qi" | V8HI -> "v8hi" | V4SI -> "v4si"
- | V4SF -> "v4sf" | V2DI -> "v2di" | QI -> "qi" | HI -> "hi" | SI -> "si"
- | SF -> "sf"
+ V8QI -> "v8qi" | V4HI -> "v4hi" | V4HF -> "v4hf" | V2SI -> "v2si"
+ | V2SF -> "v2sf" | DI -> "di" | V16QI -> "v16qi" | V8HI -> "v8hi"
+ | V4SI -> "v4si" | V4SF -> "v4sf" | V2DI -> "v2di" | QI -> "qi"
+ | HI -> "hi" | SI -> "si" | SF -> "sf"
(* Use uppercase chars for letters which form part of the intrinsic name, but
should be omitted from the builtin name (the info is passed in an extra