summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gas/ChangeLog32
-rw-r--r--gas/config/tc-bpf.c1521
-rw-r--r--gas/config/tc-bpf.h2
3 files changed, 1551 insertions, 4 deletions
diff --git a/gas/ChangeLog b/gas/ChangeLog
index 80548d24ba6..e1cfcec4abf 100644
--- a/gas/ChangeLog
+++ b/gas/ChangeLog
@@ -1,3 +1,35 @@
+2023-04-20 Guillermo E. Martinez <guillermo.e.martinez@oracle.com>
+
+ PR gas/29728
+ * config/tc-bpf.h (TC_EQUAL_IN_INSN): Define.
+ * config/tc-bpf.c (LEX_IS_SYMBOL_COMPONENT): Define.
+ (LEX_IS_WHITESPACE): Likewise.
+ (LEX_IS_NEWLINE): Likewise.
+ (LEX_IS_ARITHM_OP): Likewise.
+ (LEX_IS_STAR): Likewise.
+ (LEX_IS_CLSE_BR): Likewise.
+ (LEX_IS_OPEN_BR): Likewise.
+ (LEX_IS_EQUAL): Likewise.
+ (LEX_IS_EXCLA): Likewise.
+ (ST_EOI): Likewise.
+ (MAX_TOKEN_SZ): Likewise.
+ (init_pseudoc_lex): New function.
+ (md_begin): Call init_pseudoc_lex.
+ (valid_expr): New function.
+ (build_bpf_non_generic_load): Likewise.
+ (build_bpf_atomic_insn): Likewise.
+ (build_bpf_jmp_insn): Likewise.
+ (build_bpf_arithm_insn): Likewise.
+ (build_bpf_endianness): Likewise.
+ (build_bpf_load_store_insn): Likewise.
+ (look_for_reserved_word): Likewise.
+ (is_register): Likewise.
+ (is_cast): Likewise.
+ (get_token): Likewise.
+ (bpf_pseudoc_to_normal_syntax): Likewise.
+ (md_assemble): Try pseudo-C syntax if an instruction cannot be
+ parsed.
+
2023-04-18 mengqinggang <mengqinggang@loongson.cn>
* config/tc-loongarch.c (loongarch_fix_adjustable): Symbols with
diff --git a/gas/config/tc-bpf.c b/gas/config/tc-bpf.c
index 1f8b0cc2ede..171fc682806 100644
--- a/gas/config/tc-bpf.c
+++ b/gas/config/tc-bpf.c
@@ -28,13 +28,36 @@
#include "elf/common.h"
#include "elf/bpf.h"
#include "dwarf2dbg.h"
+#include <ctype.h>
const char comment_chars[] = ";";
-const char line_comment_chars[] = "#";
+const char line_comment_chars[] = "#";
const char line_separator_chars[] = "`";
const char EXP_CHARS[] = "eE";
const char FLT_CHARS[] = "fFdD";
+static const char *invalid_expression;
+static char pseudoc_lex[256];
+static const char symbol_chars[] =
+"_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
+
+static const char arithm_op[] = "+-/<>%&|^";
+
+static void init_pseudoc_lex (void);
+
+#define LEX_IS_SYMBOL_COMPONENT 1
+#define LEX_IS_WHITESPACE 2
+#define LEX_IS_NEWLINE 3
+#define LEX_IS_ARITHM_OP 4
+#define LEX_IS_STAR 6
+#define LEX_IS_CLSE_BR 7
+#define LEX_IS_OPEN_BR 8
+#define LEX_IS_EQUAL 9
+#define LEX_IS_EXCLA 10
+
+#define ST_EOI 100
+#define MAX_TOKEN_SZ 100
+
/* Like s_lcomm_internal in gas/read.c but the alignment string
is allowed to be optional. */
@@ -158,6 +181,32 @@ md_show_usage (FILE * stream)
}
+
+static void
+init_pseudoc_lex (void)
+{
+ const char *p;
+
+ for (p = symbol_chars; *p; ++p)
+ pseudoc_lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
+
+ pseudoc_lex[' '] = LEX_IS_WHITESPACE;
+ pseudoc_lex['\t'] = LEX_IS_WHITESPACE;
+ pseudoc_lex['\r'] = LEX_IS_WHITESPACE;
+ pseudoc_lex['\n'] = LEX_IS_NEWLINE;
+ pseudoc_lex['*'] = LEX_IS_STAR;
+ pseudoc_lex[')'] = LEX_IS_CLSE_BR;
+ pseudoc_lex['('] = LEX_IS_OPEN_BR;
+ pseudoc_lex[']'] = LEX_IS_CLSE_BR;
+ pseudoc_lex['['] = LEX_IS_OPEN_BR;
+
+ for (p = arithm_op; *p; ++p)
+ pseudoc_lex[(unsigned char) *p] = LEX_IS_ARITHM_OP;
+
+ pseudoc_lex['='] = LEX_IS_EQUAL;
+ pseudoc_lex['!'] = LEX_IS_EXCLA;
+}
+
void
md_begin (void)
{
@@ -196,6 +245,9 @@ md_begin (void)
cgen_bitset_set (bpf_isa, ISA_EBPFLE);
}
+ /* Ensure that lines can begin with '*' in BPF store pseudoc instruction. */
+ lex_type['*'] |= LEX_BEGIN_NAME;
+
/* Set the machine number and endian. */
gas_cgen_cpu_desc = bpf_cgen_cpu_open (CGEN_CPU_OPEN_ENDIAN,
target_big_endian ?
@@ -212,6 +264,7 @@ md_begin (void)
/* Set the machine type. */
bfd_default_set_arch_mach (stdoutput, bfd_arch_bpf, bfd_mach_bpf);
+ init_pseudoc_lex();
}
valueT
@@ -362,12 +415,1456 @@ md_apply_fix (fixS *fixP, valueT *valP, segT seg)
gas_cgen_md_apply_fix (fixP, valP, seg);
}
+/*
+ The BPF pseudo grammar:
+
+ instruction : bpf_alu_insn
+ | bpf_alu32_insn
+ | bpf_jump_insn
+ | bpf_load_store_insn
+ | bpf_load_store32_insn
+ | bpf_non_generic_load
+ | bpf_endianness_conv_insn
+ | bpf_64_imm_load_insn
+ | bpf_atomic_insn
+ ;
+
+ bpf_alu_insn : BPF_REG bpf_alu_operator register_or_imm32
+ ;
+
+ bpf_alu32_insn : BPF_REG32 bpf_alu_operator register32_or_imm32
+ ;
+
+ bpf_jump_insn : BPF_JA offset
+ | IF BPF_REG bpf_jump_operator register_or_imm32 BPF_JA offset
+ | IF BPF_REG32 bpf_jump_operator register_or_imm32 BPF_JA offset
+ | BPF_CALL offset
+ | BPF_EXIT
+ ;
+
+ bpf_load_store_insn : BPF_REG CHR_EQUAL bpf_size_cast BPF_CHR_OPEN_BR \
+ register_and_offset BPF_CHR_CLSE_BR
+ | bpf_size_cast register_and_offset CHR_EQUAL BPF_REG
+ ;
+
+ bpf_load_store32_insn : BPF_REG CHR_EQUAL bpf_size_cast BPF_CHR_OPEN_BR \
+ register32_and_offset BPF_CHR_CLSE_BR
+ | bpf_size_cast register_and_offset CHR_EQUAL BPF_REG32
+ ;
+
+ bpf_non_generic_load : BPF_REG_R0 CHR_EQUAL bpf_size_cast BPF_LD BPF_CHR_OPEN_BR \
+ imm32 BPF_CHR_CLSE_BR
+ ;
+
+ bpf_endianness_conv_insn : BPF_REG_N bpf_endianness_mnem BPF_REG_N
+ ;
+
+ bpf_64_imm_load_insn : BPF_REG imm64 BPF_LL
+ ;
+
+ bpf_atomic_insn : BPF_LOCK bpf_size_cast_32_64 register_and_offset BPF_ADD BPF_REG
+
+ register_and_offset : BPF_CHR_OPEN_BR BPF_REG offset BPF_CHR_CLSE_BR
+ ;
+
+ register32_and_offset : BPF_CHR_OPEN_BR BPF_REG32 offset BPF_CHR_CLSE_BR
+ ;
+
+ bpf_size_cast : CHR_START BPF_CHR_OPEN_BR bpf_size CHR_START BPF_CHR_CLSE_BR
+ ;
+
+ bpf_size_cast_32_64 : CHR_START BPF_CHR_OPEN_BR bpf_size_cast_32_64 CHR_STAR BPF_CHR_CLSE_BR
+ ;
+
+ bpf_size_32_64 : BPF_CAST_U32
+ | BPF_CAST_U64
+ ;
+
+ bpf_size : BPF_CAST_U8
+ | BPF_CAST_U16
+ | BPF_CAST_U32
+ | BPF_CAST_U64
+ ;
+
+ bpf_jump_operator : BPF_JEQ
+ | BPF_JGT
+ | BPF_JGE
+ | BPF_JNE
+ | BPF_JSGT
+ | BPF_JSGE
+ | BPF_JLT
+ | BPF_JLE
+ | BPF_JSLT
+ | BPF_JSLE
+ ;
+
+ bpf_alu_operator : BPF_ADD
+ | BPF_SUB
+ | BPF_MUL
+ | BPF_DIV
+ | BPF_OR
+ | BPF_AND
+ | BPF_LSH
+ | BPF_RSH
+ | BPF_NEG
+ | BPF_MOD
+ | BPF_XOR
+ | BPF_ARSH
+ | CHR_EQUAL
+ ;
+
+ bpf_endianness_mnem : BPF_LE16
+ | BPF_LE32
+ | BPF_LE64
+ | BPF_BE16
+ | BPF_BE32
+ | BPF_BE64
+ ;
+
+ offset : BPF_EXPR
+ | BPF_SYMBOL
+ ;
+
+ register_or_imm32 : BPF_REG
+ | expression
+ ;
+
+ register32_or_imm32 : BPF_REG32
+ | expression
+ ;
+
+ imm32 : BPF_EXPR
+ | BPF_SYMBOL
+ ;
+
+ imm64 : BPF_EXPR
+ | BPF_SYMBOL
+ ;
+
+ register_or_expression : BPF_EXPR
+ | BPF_REG
+ ;
+
+ BPF_EXPR : GAS_EXPR
+
+*/
+
+enum bpf_token_type
+ {
+ /* Keep grouped to quickly access. */
+ BPF_ADD,
+ BPF_SUB,
+ BPF_MUL,
+ BPF_DIV,
+ BPF_OR,
+ BPF_AND,
+ BPF_LSH,
+ BPF_RSH,
+ BPF_MOD,
+ BPF_XOR,
+ BPF_MOV,
+ BPF_ARSH,
+ BPF_NEG,
+
+ BPF_REG,
+
+ BPF_IF,
+ BPF_GOTO,
+
+ /* Keep grouped to quickly access. */
+ BPF_JEQ,
+ BPF_JGT,
+ BPF_JGE,
+ BPF_JLT,
+ BPF_JLE,
+ BPF_JSET,
+ BPF_JNE,
+ BPF_JSGT,
+ BPF_JSGE,
+ BPF_JSLT,
+ BPF_JSLE,
+
+ BPF_SYMBOL,
+ BPF_CHR_CLSE_BR,
+ BPF_CHR_OPEN_BR,
+
+ /* Keep grouped to quickly access. */
+ BPF_CAST_U8,
+ BPF_CAST_U16,
+ BPF_CAST_U32,
+ BPF_CAST_U64,
+
+ /* Keep grouped to quickly access. */
+ BPF_LE16,
+ BPF_LE32,
+ BPF_LE64,
+ BPF_BE16,
+ BPF_BE32,
+ BPF_BE64,
+
+ BPF_LOCK,
+
+ BPF_IND_CALL,
+ BPF_LD,
+ BPF_LL,
+ BPF_EXPR,
+ BPF_UNKNOWN,
+ };
+
+static int
+valid_expr (const char *e, const char **end_expr)
+{
+ invalid_expression = NULL;
+ char *hold = input_line_pointer;
+ expressionS exp;
+
+ input_line_pointer = (char *) e;
+ deferred_expression (&exp);
+ *end_expr = input_line_pointer;
+ input_line_pointer = hold;
+
+ return invalid_expression == NULL;
+}
+
+static char *
+build_bpf_non_generic_load (char *src, enum bpf_token_type cast,
+ const char *imm32)
+{
+ char *bpf_insn;
+ static const char *cast_rw[] = {"b", "h", "w", "dw"};
+
+ bpf_insn = xasprintf ("%s%s%s %s%s%s%s",
+ "ld",
+ src ? "ind" : "abs",
+ cast_rw[cast - BPF_CAST_U8],
+ src ? "%" : "",
+ src ? src : "",
+ src ? "," : "",
+ imm32);
+ return bpf_insn;
+}
+
+static char *
+build_bpf_atomic_insn (char *dst, char *src,
+ enum bpf_token_type atomic_insn,
+ enum bpf_token_type cast,
+ const char *offset)
+{
+ char *bpf_insn;
+ static const char *cast_rw[] = {"w", "dw"};
+ static const char *mnem[] = {"xadd"};
+
+ bpf_insn = xasprintf ("%s%s [%%%s%s%s],%%%s", mnem[atomic_insn - BPF_ADD],
+ cast_rw[cast - BPF_CAST_U32], dst,
+ *offset != '+' ? "+" : "",
+ offset, src);
+ return bpf_insn;
+}
+
+static char *
+build_bpf_jmp_insn (char *dst, char *src,
+ char *imm32, enum bpf_token_type op,
+ const char *sym, const char *offset)
+{
+ char *bpf_insn;
+ static const char *mnem[] =
+ {
+ "jeq", "jgt", "jge", "jlt",
+ "jle", "jset", "jne", "jsgt",
+ "jsge", "jslt", "jsle"
+ };
+
+ const char *in32 = (*dst == 'w' ? "32" : "");
+
+ *dst = 'r';
+ if (src)
+ *src = 'r';
+
+ bpf_insn = xasprintf ("%s%s %%%s,%s%s,%s",
+ mnem[op - BPF_JEQ], in32, dst,
+ src ? "%" : "",
+ src ? src : imm32,
+ offset ? offset : sym);
+ return bpf_insn;
+}
+
+static char *
+build_bpf_arithm_insn (char *dst, char *src,
+ int load64, const char *imm32,
+ enum bpf_token_type type)
+{
+ char *bpf_insn;
+ static const char *mnem[] =
+ {
+ "add", "sub", "mul", "div",
+ "or", "and", "lsh", "rsh",
+ "mod", "xor", "mov", "arsh",
+ "neg",
+ };
+ const char *in32 = (*dst == 'w' ? "32" : "");
+
+ *dst = 'r';
+ if (src)
+ *src = 'r';
+
+ if (type == BPF_NEG)
+ bpf_insn = xasprintf ("%s%s %%%s", mnem[type - BPF_ADD], in32, dst);
+ else if (load64)
+ bpf_insn = xasprintf ("%s %%%s,%s", "lddw", dst, imm32);
+ else
+ bpf_insn = xasprintf ("%s%s %%%s,%s%s", mnem[type - BPF_ADD],
+ in32, dst,
+ src ? "%" : "",
+ src ? src: imm32);
+ return bpf_insn;
+}
+
+static char *
+build_bpf_endianness (char *dst, enum bpf_token_type endianness)
+{
+ char *bpf_insn;
+ static const char *size[] = {"16", "32", "64"};
+ int be = 1;
+
+ if (endianness == BPF_LE16
+ || endianness == BPF_LE32
+ || endianness == BPF_LE64)
+ be = 0;
+
+ bpf_insn = xasprintf ("%s %%%s,%s", be ? "endbe" : "endle",
+ dst, be ? size[endianness - BPF_BE16] : size[endianness - BPF_LE16]);
+ return bpf_insn;
+}
+
+static char *
+build_bpf_load_store_insn (char *dst, char *src,
+ enum bpf_token_type cast,
+ const char *offset, int isload)
+{
+ char *bpf_insn;
+ static const char *cast_rw[] = {"b", "h", "w", "dw"};
+
+ *dst = *src = 'r';
+ if (isload)
+ bpf_insn = xasprintf ("%s%s %%%s,[%%%s%s%s]", "ldx",
+ cast_rw[cast - BPF_CAST_U8], dst, src,
+ *offset != '+' ? "+" : "",
+ offset);
+ else
+ bpf_insn = xasprintf ("%s%s [%%%s%s%s],%%%s", "stx",
+ cast_rw[cast - BPF_CAST_U8], dst,
+ *offset != '+' ? "+" : "",
+ offset, src);
+ return bpf_insn;
+}
+
+static int
+look_for_reserved_word (const char *token, enum bpf_token_type *type)
+{
+ int i;
+ static struct
+ {
+ const char *name;
+ enum bpf_token_type type;
+ } reserved_words[] =
+ {
+ {
+ .name = "if",
+ .type = BPF_IF
+ },
+ {
+ .name = "goto",
+ .type = BPF_GOTO
+ },
+ {
+ .name = "le16",
+ .type = BPF_LE16
+ },
+ {
+ .name = "le32",
+ .type = BPF_LE32
+ },
+ {
+ .name = "le64",
+ .type = BPF_LE64
+ },
+ {
+ .name = "be16",
+ .type = BPF_BE16
+ },
+ {
+ .name = "be32",
+ .type = BPF_BE32
+ },
+ {
+ .name = "be64",
+ .type = BPF_BE64
+ },
+ {
+ .name = "lock",
+ .type = BPF_LOCK
+ },
+ {
+ .name = "callx",
+ .type = BPF_IND_CALL
+ },
+ {
+ .name = "skb",
+ .type = BPF_LD
+ },
+ {
+ .name = "ll",
+ .type = BPF_LL
+ },
+ {
+ .name = NULL,
+ }
+ };
+
+ for (i = 0; reserved_words[i].name; ++i)
+ if (*reserved_words[i].name == *token
+ && !strcmp (reserved_words[i].name, token))
+ {
+ *type = reserved_words[i].type;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int
+is_register (const char *token, int len)
+{
+ if (token[0] == 'r' || token[0] == 'w')
+ if ((len == 2 && isdigit (token[1]))
+ || (len == 3 && token[1] == '1' && token[2] == '0'))
+ return 1;
+
+ return 0;
+}
+
+static enum bpf_token_type
+is_cast (const char *token)
+{
+ static const char *cast_rw[] = {"u8", "u16", "u32", "u64"};
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE (cast_rw); ++i)
+ if (!strcmp (token, cast_rw[i]))
+ return BPF_CAST_U8 + i;
+
+ return BPF_UNKNOWN;
+}
+
+static enum bpf_token_type
+get_token (const char **insn, char *token, size_t *tlen)
+{
+#define GET() \
+ (*str == '\0' \
+ ? EOF \
+ : *(unsigned char *)(str++))
+
+#define UNGET() (--str)
+
+#define START_EXPR() \
+ do \
+ { \
+ if (expr == NULL) \
+ expr = str - 1; \
+ } while (0)
+
+#define SCANNER_SKIP_WHITESPACE() \
+ do \
+ { \
+ do \
+ ch = GET (); \
+ while (ch != EOF \
+ && ((ch) == ' ' || (ch) == '\t')); \
+ if (ch != EOF) \
+ UNGET (); \
+ } while (0)
+
+ const char *str = *insn;
+ char ch, ch2 = 0;
+ enum bpf_token_type ttype = BPF_UNKNOWN;
+ size_t len = 0;
+ const char *expr = NULL;
+ const char *end_expr = NULL;
+ int state = 0;
+ int return_token = 0;
+
+ while (1)
+ {
+ ch = GET ();
+
+ if (ch == EOF || len > MAX_TOKEN_SZ)
+ break;
+
+ switch (pseudoc_lex[(unsigned char) ch])
+ {
+ case LEX_IS_WHITESPACE:
+ SCANNER_SKIP_WHITESPACE ();
+ return_token = 1;
+
+ switch (state)
+ {
+ case 12: /* >' ' */
+ ttype = BPF_JGT;
+ break;
+
+ case 17: /* ==' ' */
+ ttype = BPF_JEQ;
+ break;
+
+ case 18: /* <' ' */
+ ttype = BPF_JLT;
+ break;
+
+ case 20: /* &' ' */
+ ttype = BPF_JSET;
+ break;
+
+ case 22: /* s<' '*/
+ ttype = BPF_JSLT;
+ break;
+
+ case 14: /* s> ' ' */
+ ttype = BPF_JSGT;
+ break;
+
+ case 16: /* =' ' */
+ ttype = BPF_MOV;
+ break;
+
+ default:
+ return_token = 0;
+ }
+ break;
+
+ case LEX_IS_EXCLA:
+ token[len++] = ch;
+ state = 21;
+ break;
+
+ case LEX_IS_ARITHM_OP:
+ if (state == 16)
+ {
+ /* ='-' is handle as '=' */
+ UNGET ();
+ ttype = BPF_MOV;
+ return_token = 1;
+ break;
+ }
+
+ START_EXPR();
+ token[len++] = ch;
+ switch (ch)
+ {
+#define BPF_ARITHM_OP(op, type) \
+ case (op): \
+ state = 6; \
+ ttype = (type); \
+ break;
+
+ BPF_ARITHM_OP('+', BPF_ADD);
+ BPF_ARITHM_OP('-', BPF_SUB);
+ BPF_ARITHM_OP('*', BPF_MUL);
+ BPF_ARITHM_OP('/', BPF_DIV);
+ BPF_ARITHM_OP('|', BPF_OR);
+ BPF_ARITHM_OP('%', BPF_MOD);
+ BPF_ARITHM_OP('^', BPF_XOR);
+
+ case '&':
+ state = 20; /* '&' */
+ break;
+
+ case '<':
+ switch (state)
+ {
+ case 0:
+ state = 18; /* '<' */
+ break;
+
+ case 18:
+ state = 19; /* <'<' */
+ break;
+
+ case 8:
+ state = 22; /* s'<' */
+ break;
+ }
+ break;
+
+ case '>':
+ switch (state)
+ {
+ case 0:
+ state = 12; /* '>' */
+ break;
+
+ case 12:
+ state = 13; /* >'>' */
+ break;
+
+ case 8:
+ state = 14; /* s'>' */
+ break;
+
+ case 14:
+ state = 15; /* s>'>' */
+ break;
+ }
+ break;
+ }
+ break;
+
+ case LEX_IS_STAR:
+ switch (state)
+ {
+ case 0:
+ token[len++] = ch;
+ START_EXPR ();
+ state = 2; /* '*', It could be the fist cast char. */
+ break;
+
+ case 16: /* ='*' Not valid token. */
+ ttype = BPF_MOV;
+ return_token = 1;
+ UNGET ();
+ break;
+
+ case 4: /* *(uXX'*' */
+ token[len++] = ch;
+ state = 5;
+ break;
+ }
+ break;
+
+ case LEX_IS_OPEN_BR:
+ START_EXPR ();
+ token[len++] = ch;
+ return_token = 1;
+
+ switch (state)
+ {
+ case 2:
+ state = 3; /* *'(' second char of a cast or expr. */
+ return_token = 0;
+ break;
+
+ case 6:
+ if (valid_expr (expr, &end_expr))
+ {
+ len = end_expr - expr;
+ memcpy (token, expr, len);
+ ttype = BPF_EXPR;
+ str = end_expr;
+ }
+ else
+ {
+ len = 0;
+ while (*invalid_expression)
+ token[len++] = *invalid_expression++;
+
+ token[len] = 0;
+ ttype = BPF_UNKNOWN;
+ }
+ break;
+
+ default:
+ ttype = BPF_CHR_OPEN_BR;
+ SCANNER_SKIP_WHITESPACE ();
+ ch2 = GET ();
+
+ if ((isdigit (ch2) || ch2 == '(')
+ && valid_expr (expr, &end_expr))
+ {
+ len = end_expr - expr;
+ memcpy (token, expr, len);
+ ttype = BPF_EXPR;
+ str = end_expr;
+ }
+ else
+ UNGET ();
+ }
+ break;
+
+ case LEX_IS_CLSE_BR:
+ token[len++] = ch;
+
+ if (state == 0)
+ {
+ ttype = BPF_CHR_CLSE_BR;
+ return_token = 1;
+ }
+ else if (state == 5) /* *(uXX*')' */
+ return_token = 1;
+ break;
+
+ case LEX_IS_EQUAL:
+ token[len++] = ch;
+ return_token = 1;
+
+ switch (state)
+ {
+ case 0:
+ state = 16; /* '=' */
+ return_token = 0;
+ break;
+
+ case 16:
+ state = 17; /* ='=' */
+ return_token = 0;
+ break;
+
+ case 2: /* *'=' */
+ ttype = BPF_MUL;
+ break;
+
+ case 10: /* s>>'=' */
+ ttype = BPF_ARSH;
+ break;
+
+ case 12: /* >'=' */
+ ttype = BPF_JGE;
+ break;
+
+ case 13: /* >>'=' */
+ ttype = BPF_RSH;
+ break;
+
+ case 14: /* s>'=' */
+ ttype = BPF_JSGE;
+ break;
+
+ case 15: /* s>>'=' */
+ ttype = BPF_ARSH;
+ break;
+
+ case 18: /* <'=' */
+ ttype = BPF_JLE;
+ break;
+
+ case 19: /* <<'=' */
+ ttype = BPF_LSH;
+ break;
+
+ case 20: /* &'=' */
+ ttype = BPF_AND;
+ break;
+
+ case 21: /* !'=' */
+ ttype = BPF_JNE;
+ break;
+
+ case 22: /* s<'=' */
+ ttype = BPF_JSLE;
+ break;
+ }
+ break;
+
+ case LEX_IS_SYMBOL_COMPONENT:
+ return_token = 1;
+
+ switch (state)
+ {
+ case 17: /* =='sym' */
+ ttype = BPF_JEQ;
+ break;
+
+ case 12: /* >'sym' */
+ ttype = BPF_JGT;
+ break;
+
+ case 18: /* <'sym' */
+ ttype = BPF_JLT;
+ break;
+
+ case 20: /* &'sym' */
+ ttype = BPF_JSET;
+ break;
+
+ case 14: /*s>'sym' */
+ ttype = BPF_JSGT;
+ break;
+
+ case 22: /* s<'sym' */
+ ttype = BPF_JSLT;
+ break;
+
+ case 16: /* ='sym' */
+ ttype = BPF_MOV;
+ break;
+
+ default:
+ return_token = 0;
+ }
+
+ if (return_token)
+ {
+ UNGET ();
+ break;
+ }
+
+ START_EXPR ();
+ token[len++] = ch;
+
+ while ((ch2 = GET ()) != EOF)
+ {
+ int type;
+
+ type = pseudoc_lex[(unsigned char) ch2];
+ if (type != LEX_IS_SYMBOL_COMPONENT)
+ break;
+ token[len++] = ch2;
+ }
+
+ if (ch2 != EOF)
+ UNGET ();
+
+ if (state == 0)
+ {
+ if (len == 1 && ch == 's')
+ state = 8; /* signed instructions: 's' */
+ else
+ {
+ ttype = BPF_SYMBOL;
+ if (is_register (token, len))
+ ttype = BPF_REG;
+ else if (look_for_reserved_word (token, &ttype))
+ ;
+ else if ((pseudoc_lex[(unsigned char) *token] == LEX_IS_ARITHM_OP
+ || *token == '(' || isdigit(*token))
+ && valid_expr (expr, &end_expr))
+ {
+ len = end_expr - expr;
+ token[len] = '\0';
+ ttype = BPF_EXPR;
+ str = end_expr;
+ }
+
+ return_token = 1;
+ }
+ }
+ else if (state == 3) /* *('sym' */
+ {
+ if ((ttype = is_cast (&token[2])) != BPF_UNKNOWN)
+ state = 4; /* *('uXX' */
+ else
+ {
+ ttype = BPF_EXPR;
+ return_token = 1;
+ }
+ }
+ else if (state == 6)
+ {
+ if (ttype == BPF_SUB) /* neg */
+ {
+ if (is_register (&token[1], len - 1))
+ ttype = BPF_NEG;
+ else if (valid_expr(expr, &end_expr))
+ {
+ len = end_expr - expr;
+ memcpy(token, expr, len);
+ ttype = BPF_EXPR;
+ str = end_expr;
+ }
+ else
+ {
+ len = 0;
+ while (*invalid_expression)
+ token[len++] = *invalid_expression++;
+ token[len] = 0;
+ ttype = BPF_UNKNOWN;
+ }
+ }
+ else if (valid_expr (expr, &end_expr))
+ {
+ len = end_expr - expr;
+ memcpy(token, expr, len);
+ ttype = BPF_EXPR;
+ str = end_expr;
+ }
+ else
+ ttype = BPF_UNKNOWN;
+
+ return_token = 1;
+ }
+ break;
+ }
+
+ if (return_token)
+ {
+ *tlen = len;
+ *insn = str;
+ break;
+ }
+ }
+
+ return ttype;
+
+#undef GET
+#undef UNGET
+#undef START_EXPR
+#undef SCANNER_SKIP_WHITESPACE
+#undef BPF_ARITHM_OP
+}
+
+/*
+ The parser represent a FSM for the grammar described above. So for example
+ the following rule:
+
+ ` bpf_alu_insn : BPF_REG bpf_alu_operator register_or_imm32'
+
+ Is parser as follows:
+
+ 1. It starts in state 0.
+
+ 2. Consumes next token, e.g: `BPF_REG' and set `state' variable to a
+ particular state to helps to identify, in this case, that a register
+ token has been read, a comment surrounded by a single quote in the
+ pseudo-c token is added along with the new `state' value to indicate
+ what the scanner has read, e.g.:
+
+ state = 6; // dst_reg = str_cast ( 'src_reg'
+
+ So, in `state 6' the scanner has consumed: a destination register
+ (BPF_REG), an equal character (BPF_MOV), a cast token (BPF_CAST), an
+ open parenthesis (BPF_CHR_OPEN_BR) and the source register (BPF_REG).
+
+ 3. If the accumulated tokens represent a complete BPF pseudo-c syntax
+ instruction then, a validation of the terms is made, for example: if
+ the registers have the same sizes (32/64 bits), if a specific
+ destination register must be used, etc., after that, a builder:
+ build_bfp_{non_generic_load,atomic_insn,jmp_insn,arithm_insn,endianness,load_store_insn}
+ is invoked, internally, it translates the BPF pseudo-c instruction to
+ a BPF GAS instruction using the previous terms recollected by the
+ scanner.
+
+ 4. If a successful build of BPF GAS instruction was done, a final
+ state is set to `ST_EOI' (End Of Instruction) meaning that is not
+ expecting for more tokens in such instruction. Otherwise if the
+ conditions to calling builder are not satisfied an error is emitted
+ and `parse_err' is set.
+*/
+
+static char *
+bpf_pseudoc_to_normal_syntax (const char *str, char **errmsg)
+{
+#define syntax_err(format, ...) \
+ do \
+ { \
+ if (! parse_err) \
+ { \
+ parse_err = 1; \
+ errbuf = xasprintf (format, ##__VA_ARGS__); \
+ } \
+ } while (0)
+
+ enum bpf_token_type ttype;
+ enum bpf_token_type bpf_endianness,
+ bpf_atomic_insn;
+ enum bpf_token_type bpf_jmp_op = BPF_JEQ; /* Arbitrary. */
+ enum bpf_token_type bpf_cast = BPF_CAST_U8; /* Arbitrary. */
+ enum bpf_token_type bpf_arithm_op = BPF_ADD; /* Arbitrary. */
+ char *bpf_insn = NULL;
+ char *errbuf = NULL;
+ char src_reg[3] = {0};
+ char dst_reg[3] = {0};
+ char str_imm32[40] = {0};
+ char str_offset[40] = {0};
+ char str_symbol[MAX_TOKEN_SZ] = {0};
+ char token[MAX_TOKEN_SZ] = {0};
+ int state = 0;
+ int parse_err = 0;
+ size_t tlen;
+
+ while (*str)
+ {
+ ttype = get_token (&str, token, &tlen);
+ if (ttype == BPF_UNKNOWN || state == ST_EOI)
+ {
+ syntax_err ("unexpected token: '%s'", token);
+ break;
+ }
+
+ switch (ttype)
+ {
+ case BPF_UNKNOWN:
+ case BPF_LL:
+ break;
+
+ case BPF_REG:
+ switch (state)
+ {
+ case 0:
+ memcpy (dst_reg, token, tlen);
+ state = 1; /* 'dst_reg' */
+ break;
+
+ case 3:
+ /* dst_reg bpf_op 'src_reg' */
+ memcpy (src_reg, token, tlen);
+ if (*dst_reg == *src_reg)
+ bpf_insn = build_bpf_arithm_insn (dst_reg, src_reg, 0,
+ NULL, bpf_arithm_op);
+ else
+ {
+ syntax_err ("different register sizes: '%s', '%s'",
+ dst_reg, src_reg);
+ break;
+ }
+ state = ST_EOI;
+ break;
+
+ case 5:
+ memcpy (src_reg, token, tlen);
+ state = 6; /* dst_reg = str_cast ( 'src_reg' */
+ break;
+
+ case 9:
+ memcpy (dst_reg, token, tlen);
+ state = 10; /* str_cast ( 'dst_reg' */
+ break;
+
+ case 11:
+ /* str_cast ( dst_reg offset ) = 'src_reg' */
+ memcpy (src_reg, token, tlen);
+ bpf_insn = build_bpf_load_store_insn (dst_reg, src_reg,
+ bpf_cast, str_offset, 0);
+ state = ST_EOI;
+ break;
+
+ case 14:
+ memcpy (dst_reg, token, tlen);
+ state = 15; /* if 'dst_reg' */
+ break;
+
+ case 16:
+ memcpy (src_reg, token, tlen);
+ state = 17; /* if dst_reg jmp_op 'src_reg' */
+ break;
+
+ case 24:
+ /* dst_reg = endianness src_reg */
+ memcpy (src_reg, token, tlen);
+ if (*dst_reg == 'r' && !strcmp (dst_reg, src_reg))
+ bpf_insn = build_bpf_endianness (dst_reg, bpf_endianness);
+ else
+ syntax_err ("invalid operand for instruction: '%s'", token);
+
+ state = ST_EOI;
+ break;
+
+ case 28:
+ memcpy (dst_reg, token, tlen);
+ state = 29; /* lock str_cast ( 'dst_reg' */
+ break;
+
+ case 32:
+ {
+ /* lock str_cast ( dst_reg offset ) atomic_insn 'src_reg' */
+ int with_offset = *str_offset != '\0';
+
+ memcpy (src_reg, token, tlen);
+ if ((bpf_cast != BPF_CAST_U32
+ && bpf_cast != BPF_CAST_U64)
+ || *dst_reg != 'r'
+ || *src_reg != 'r')
+ syntax_err ("invalid wide atomic instruction");
+ else
+ bpf_insn = build_bpf_atomic_insn (dst_reg, src_reg, bpf_atomic_insn,
+ bpf_cast, with_offset ? str_offset : str_symbol);
+ }
+
+ state = ST_EOI;
+ break;
+
+ case 33:
+ /* callx 'dst_reg' */
+ bpf_insn = xasprintf ("%s %%%s", "call", token);
+ state = ST_EOI;
+ break;
+
+ case 35:
+ memcpy (src_reg, token, tlen);
+ state = 36; /* dst_reg = str_cast skb [ 'src_reg' */
+ break;
+ }
+ break;
+
+ case BPF_MOV:
+ case BPF_ADD:
+ case BPF_SUB:
+ case BPF_MUL:
+ case BPF_DIV:
+ case BPF_OR:
+ case BPF_AND:
+ case BPF_LSH:
+ case BPF_RSH:
+ case BPF_MOD:
+ case BPF_XOR:
+ case BPF_ARSH:
+ case BPF_NEG:
+ switch (state)
+ {
+ case 1:
+ state = 3; /* dst_reg 'arith_op' */
+ bpf_arithm_op = ttype;
+ break;
+
+ case 3:
+ if (ttype == BPF_NEG)
+ {
+ /* reg = -reg */
+ bpf_arithm_op = ttype;
+ memcpy (src_reg, token + 1, tlen - 1);
+ if (strcmp (dst_reg, src_reg))
+ {
+ syntax_err ("found: '%s', expected: -%s", token, dst_reg);
+ break;
+ }
+
+ bpf_insn = build_bpf_arithm_insn (dst_reg, src_reg, 0,
+ NULL, bpf_arithm_op);
+ state = ST_EOI;
+ }
+ break;
+
+ case 23:
+ memcpy (src_reg, token, tlen);
+ state = 11; /* str_cast ( dst_reg offset ) '=' */
+ break;
+
+ case 12:
+ if (ttype == BPF_MOV)
+ state = 13; /* str_cast ( dst_reg offset ) '=' */
+ break;
+
+ case 31:
+ bpf_atomic_insn = ttype;
+ state = 32; /* lock str_cast ( dst_reg offset ) 'atomic_insn' */
+ break;
+
+ default:
+ syntax_err ("unexpected '%s'", token);
+ state = ST_EOI;
+ }
+ break;
+
+ case BPF_CAST_U8:
+ case BPF_CAST_U16:
+ case BPF_CAST_U32:
+ case BPF_CAST_U64:
+ bpf_cast = ttype;
+ switch (state)
+ {
+ case 3:
+ state = 4; /* dst_reg = 'str_cast' */
+ break;
+
+ case 0:
+ state = 8; /* 'str_cast' */
+ break;
+
+ case 26:
+ state = 27; /* lock 'str_cast' */
+ break;
+ }
+ break;
+
+ case BPF_CHR_OPEN_BR:
+ switch (state)
+ {
+ case 4:
+ state = 5; /* dst_reg = str_cast '(' */
+ break;
+
+ case 8:
+ state = 9; /* str_cast '(' */
+ break;
+
+ case 27:
+ state = 28; /* lock str_cast '(' */
+ break;
+
+ case 34:
+ state = 35; /* dst_reg = str_cast skb '[' */
+ break;
+ }
+ break;
+
+ case BPF_CHR_CLSE_BR:
+ switch (state)
+ {
+ case 7:
+ /* dst_reg = str_cast ( imm32 ')' */
+ bpf_insn = build_bpf_load_store_insn (dst_reg, src_reg,
+ bpf_cast, str_imm32, 1);
+ state = ST_EOI;
+ break;
+
+ case 11:
+ state = 12; /* str_cast ( dst_reg imm32 ')' */
+ break;
+
+ case 21:
+ /* dst_reg = str_cast ( src_reg offset ')' */
+ bpf_insn = build_bpf_load_store_insn (dst_reg, src_reg,
+ bpf_cast, str_offset, 1);
+ state = ST_EOI;
+ break;
+
+ case 22:
+ state = 23; /* str_cast ( dst_reg offset ')' */
+ break;
+
+ case 30:
+ state = 31; /* lock str_cast ( dst_reg offset ')' */
+ break;
+
+ case 37:
+ /* dst_reg = str_cast skb [ src_reg imm32 ']' */
+ if (*dst_reg != 'w' && !strcmp ("r0", dst_reg))
+ bpf_insn = build_bpf_non_generic_load (*src_reg != '\0' ? src_reg : NULL,
+ bpf_cast, str_imm32);
+ else
+ syntax_err ("invalid register operand: '%s'", dst_reg);
+
+ state = ST_EOI;
+ break;
+ }
+ break;
+
+ case BPF_EXPR:
+ switch (state)
+ {
+ case 3:
+ {
+ /* dst_reg bpf_arithm_op 'imm32' */
+ int load64 = 0;
+
+ memcpy (str_imm32, token, tlen);
+ memset (token, 0, tlen);
+
+ if ((ttype = get_token (&str, token, &tlen)) == BPF_LL
+ && bpf_arithm_op == BPF_MOV)
+ load64 = 1;
+ else if (ttype != BPF_UNKNOWN)
+ syntax_err ("unexpected token: '%s'", token);
+
+ if (load64 && *dst_reg == 'w')
+ syntax_err ("unexpected register size: '%s'", dst_reg);
+
+ if (! parse_err)
+ bpf_insn = build_bpf_arithm_insn (dst_reg, NULL, load64,
+ str_imm32, bpf_arithm_op);
+ state = ST_EOI;
+ }
+ break;
+
+ case 18:
+ {
+ /* if dst_reg jmp_op src_reg goto 'offset' */
+ int with_src = *src_reg != '\0';
+
+ memcpy (str_offset, token, tlen);
+ if (with_src && *dst_reg != *src_reg)
+ syntax_err ("different register size: '%s', '%s'",
+ dst_reg, src_reg);
+ else
+ bpf_insn = build_bpf_jmp_insn (dst_reg, with_src ? src_reg : NULL,
+ with_src ? NULL: str_imm32,
+ bpf_jmp_op, NULL, str_offset);
+ state = ST_EOI;
+ }
+ break;
+
+ case 19:
+ /* goto 'offset' */
+ memcpy (str_offset, token, tlen);
+ bpf_insn = xasprintf ("%s %s", "ja", str_offset);
+ state = ST_EOI;
+ break;
+
+ case 6:
+ memcpy (str_offset, token, tlen);
+ state = 21; /* dst_reg = str_cast ( src_reg 'offset' */
+ break;
+
+ case 10:
+ memcpy (str_offset, token, tlen);
+ state = 22; /* str_cast ( dst_reg 'offset' */
+ break;
+
+ case 16:
+ memcpy (str_imm32, token, tlen);
+ state = 25; /* if dst_reg jmp_op 'imm32' */
+ break;
+
+ case 29:
+ memcpy (str_offset, token, tlen);
+ state = 30; /* lock str_cast ( dst_reg 'offset' */
+ break;
+
+ case 34:
+ /* dst_reg = str_cast skb 'imm32' */
+ if (*dst_reg != 'w' && !strcmp ("r0", dst_reg))
+ {
+ memcpy (str_imm32, token, tlen);
+ bpf_insn = build_bpf_non_generic_load (*src_reg != '\0' ? src_reg : NULL,
+ bpf_cast, str_imm32);
+ }
+ else
+ syntax_err ("invalid register operand: '%s'", dst_reg);
+
+ state = ST_EOI;
+ break;
+
+ case 36:
+ memcpy (str_imm32, token, tlen);
+ state = 37; /* dst_reg = str_cast skb [ src_reg 'imm32' */
+ break;
+ }
+ break;
+
+ case BPF_IF:
+ if (state == 0)
+ state = 14;
+ break;
+
+ case BPF_JSGT:
+ case BPF_JSLT:
+ case BPF_JSLE:
+ case BPF_JSGE:
+ case BPF_JGT:
+ case BPF_JGE:
+ case BPF_JLE:
+ case BPF_JSET:
+ case BPF_JNE:
+ case BPF_JLT:
+ case BPF_JEQ:
+ if (state == 15)
+ {
+ bpf_jmp_op = ttype;
+ state = 16; /* if dst_reg 'jmp_op' */
+ }
+ break;
+
+ case BPF_GOTO:
+ switch (state)
+ {
+ case 17:
+ case 25:
+ state = 18; /* if dst_reg jmp_op src_reg|imm32 'goto' */
+ break;
+
+ case 0:
+ state = 19;
+ break;
+ }
+ break;
+
+ case BPF_SYMBOL:
+ switch (state)
+ {
+ case 18:
+ {
+ /* if dst_reg jmp_op src_reg goto 'sym' */
+ int with_src = *src_reg != '\0';
+
+ memcpy (str_symbol, token, tlen);
+ if (with_src && *dst_reg != *src_reg)
+ syntax_err ("different register size: '%s', '%s'",
+ dst_reg, src_reg);
+ else
+ bpf_insn = build_bpf_jmp_insn (dst_reg, with_src ? src_reg : NULL,
+ with_src ? NULL: str_imm32,
+ bpf_jmp_op, str_symbol, NULL);
+ state = ST_EOI;
+ }
+ break;
+
+ case 19:
+ /* goto 'sym' */
+ memcpy (str_symbol, token, tlen);
+ bpf_insn = xasprintf ("%s %s", "ja", str_symbol);
+ state = ST_EOI;
+ break;
+
+ case 0:
+ state = ST_EOI;
+ break;
+
+ case 3:
+ {
+ /* dst_reg arithm_op 'sym' */
+ int load64 = 0;
+
+ memcpy (str_symbol, token, tlen);
+ memset (token, 0, tlen);
+
+ if ((ttype = get_token (&str, token, &tlen)) == BPF_LL
+ && bpf_arithm_op == BPF_MOV)
+ load64 = 1;
+ else if (ttype != BPF_UNKNOWN)
+ syntax_err ("unexpected token: '%s'", token);
+
+ if (load64 && *dst_reg == 'w')
+ syntax_err ("unexpected register size: '%s'", dst_reg);
+
+ if (! parse_err)
+ bpf_insn = build_bpf_arithm_insn (dst_reg, NULL, load64,
+ str_symbol, bpf_arithm_op);
+ state = ST_EOI;
+ }
+ break;
+ }
+ break;
+
+ case BPF_LE16:
+ case BPF_LE32:
+ case BPF_LE64:
+ case BPF_BE16:
+ case BPF_BE32:
+ case BPF_BE64:
+ bpf_endianness = ttype;
+ state = 24; /* dst_reg = 'endianness' */
+ break;
+
+ case BPF_LOCK:
+ state = 26;
+ break;
+
+ case BPF_IND_CALL:
+ state = 33;
+ break;
+
+ case BPF_LD:
+ state = 34; /* dst_reg = str_cast 'skb' */
+ break;
+ }
+
+ memset (token, 0, tlen);
+ }
+
+ if (state != ST_EOI)
+ syntax_err ("incomplete instruction");
+
+ *errmsg = errbuf;
+ return bpf_insn;
+
+#undef syntax_err
+}
+
void
md_assemble (char *str)
{
const CGEN_INSN *insn;
char *errmsg;
+ char *a_errmsg;
CGEN_FIELDS fields;
+ char *normal;
#if CGEN_INT_INSN_P
CGEN_INSN_INT buffer[CGEN_MAX_INSN_SIZE / sizeof (CGEN_INT_INSN_P)];
@@ -378,11 +1875,26 @@ md_assemble (char *str)
gas_cgen_init_parse ();
insn = bpf_cgen_assemble_insn (gas_cgen_cpu_desc, str, &fields,
buffer, &errmsg);
-
if (insn == NULL)
{
- as_bad ("%s", errmsg);
- return;
+ normal = bpf_pseudoc_to_normal_syntax (str, &a_errmsg);
+ if (normal)
+ {
+ insn = bpf_cgen_assemble_insn (gas_cgen_cpu_desc, normal, &fields,
+ buffer, &a_errmsg);
+ xfree (normal);
+ }
+
+ if (insn == NULL)
+ {
+ as_bad ("%s", errmsg);
+ if (a_errmsg)
+ {
+ as_bad ("%s", a_errmsg);
+ xfree (a_errmsg);
+ }
+ return;
+ }
}
gas_cgen_finish_insn (insn, buffer, CGEN_FIELDS_BITSIZE (&fields),
@@ -393,6 +1905,7 @@ md_assemble (char *str)
void
md_operand (expressionS *expressionP)
{
+ invalid_expression = input_line_pointer - 1;
gas_cgen_md_operand (expressionP);
}
diff --git a/gas/config/tc-bpf.h b/gas/config/tc-bpf.h
index 1f7d76762f6..db604dbe8bc 100644
--- a/gas/config/tc-bpf.h
+++ b/gas/config/tc-bpf.h
@@ -51,3 +51,5 @@
/* The Linux kernel verifier expects NOPs to be encoded in this way;
a jump to offset 0 means jump to the next instruction. */
#define md_single_noop_insn "ja 0"
+
+#define TC_EQUAL_IN_INSN(c, s) 1