diff options
author | Peter Johnson <peter@tortall.net> | 2001-10-28 23:39:14 +0000 |
---|---|---|
committer | Peter Johnson <peter@tortall.net> | 2001-10-28 23:39:14 +0000 |
commit | e7bab50cf222a9f017043f566a77d09e11b8121f (patch) | |
tree | 2059dad66fd90305f3ab65cc34398d161bbde94e /libyasm | |
parent | 121f93b0a0005e522204b17fc790289ed6bfe57f (diff) | |
download | yasm-e7bab50cf222a9f017043f566a77d09e11b8121f.tar.gz |
Parse NOSPLIT.
Move some work to subfunctions in expr so they can be used for both 16-bit
and 32-bit checking.
svn path=/trunk/yasm/; revision=295
Diffstat (limited to 'libyasm')
-rw-r--r-- | libyasm/bytecode.c | 21 | ||||
-rw-r--r-- | libyasm/bytecode.h | 1 | ||||
-rw-r--r-- | libyasm/expr.c | 308 | ||||
-rw-r--r-- | libyasm/expr.h | 8 |
4 files changed, 208 insertions, 130 deletions
diff --git a/libyasm/bytecode.c b/libyasm/bytecode.c index 124472c8..ee8125ab 100644 --- a/libyasm/bytecode.c +++ b/libyasm/bytecode.c @@ -66,6 +66,8 @@ struct effaddr { unsigned char valid_sib; /* 1 if SIB byte currently valid, 0 if not */ unsigned char need_sib; /* 1 if SIB byte needed, 0 if not, 0xff if unknown */ + unsigned char nosplit; /* 1 if reg*2 should not be split into + reg+reg. (0 if not) */ }; struct immval { @@ -197,6 +199,7 @@ effaddr_new_reg(unsigned long reg) ea->need_modrm = 1; ea->valid_sib = 0; ea->need_sib = 0; + ea->nosplit = 0; return ea; } @@ -215,6 +218,7 @@ effaddr_new_expr(expr *expr_ptr) ea->valid_sib = 0; ea->need_sib = 0xff; /* we won't know until we know more about expr and the BITS/address override setting */ + ea->nosplit = 0; return ea; } @@ -232,6 +236,7 @@ effaddr_new_imm(immval *im_ptr, unsigned char im_len) ea->need_modrm = 0; ea->valid_sib = 0; ea->need_sib = 0; + ea->nosplit = 0; return ea; } @@ -292,6 +297,15 @@ SetEALen(effaddr *ptr, unsigned char len) ptr->len = len; } +void +SetEANosplit(effaddr *ptr, unsigned char nosplit) +{ + if (!ptr) + return; + + ptr->nosplit = nosplit; +} + effaddr * GetInsnEA(bytecode *bc) { @@ -556,9 +570,10 @@ bytecode_print(const bytecode *bc) else printf("(nil)"); printf("\n"); - printf(" Len=%u SegmentOv=%02x\n", + printf(" Len=%u SegmentOv=%02x NoSplit=%u\n", (unsigned int)bc->data.insn.ea->len, - (unsigned int)bc->data.insn.ea->segment); + (unsigned int)bc->data.insn.ea->segment, + (unsigned int)bc->data.insn.ea->nosplit); printf(" ModRM=%03o ValidRM=%u NeedRM=%u\n", (unsigned int)bc->data.insn.ea->modrm, (unsigned int)bc->data.insn.ea->valid_modrm, @@ -686,7 +701,7 @@ bytecode_parser_finalize_insn(bytecode *bc) * displacement. */ if (!expr_checkea(&ea->disp, &bc->data.insn.addrsize, - bc->mode_bits, &ea->len, &ea->modrm, + bc->mode_bits, ea->nosplit, &ea->len, &ea->modrm, &ea->valid_modrm, &ea->need_modrm, &ea->sib, &ea->valid_sib, &ea->need_sib)) return; /* failed, don't bother checking rest of insn */ diff --git a/libyasm/bytecode.h b/libyasm/bytecode.h index 5f495838..fc164653 100644 --- a/libyasm/bytecode.h +++ b/libyasm/bytecode.h @@ -71,6 +71,7 @@ immval *immval_new_expr(expr *expr_ptr); void SetEASegment(effaddr *ptr, unsigned char segment); void SetEALen(effaddr *ptr, unsigned char len); +void SetEANosplit(effaddr *ptr, unsigned char nosplit); effaddr *GetInsnEA(bytecode *bc); diff --git a/libyasm/expr.c b/libyasm/expr.c index 6256f591..dc862135 100644 --- a/libyasm/expr.c +++ b/libyasm/expr.c @@ -421,7 +421,7 @@ expr_simplify_identity(expr *e, int numterms, int int_term) * Returns a possibly reallocated e. */ static expr * -expr_level_op(expr *e, int fold_const) +expr_level_op(expr *e, int fold_const, int simplify_ident) { int i, j, o, fold_numterms, level_numterms, level_fold_numterms; int first_int_term = -1; @@ -486,9 +486,12 @@ expr_level_op(expr *e, int fold_const) } } - /* Simplify identities and make IDENT if possible. */ - fold_numterms = expr_simplify_identity(e, fold_numterms, - first_int_term); + if (simplify_ident) + /* Simplify identities and make IDENT if possible. */ + fold_numterms = expr_simplify_identity(e, fold_numterms, + first_int_term); + else if (fold_numterms == 1) + e->op = EXPR_IDENT; } /* Only level operators that allow more than two operand terms. @@ -561,7 +564,7 @@ expr_level_op(expr *e, int fold_const) } /* Simplify identities, make IDENT if possible, and save to e->numterms. */ - if (first_int_term != -1) { + if (simplify_ident && first_int_term != -1) { e->numterms = expr_simplify_identity(e, level_numterms, first_int_term); } else { @@ -575,7 +578,7 @@ expr_level_op(expr *e, int fold_const) /* Level an entire expn tree */ static expr * -expr_level_tree(expr *e, int fold_const) +expr_level_tree(expr *e, int fold_const, int simplify_ident) { int i; @@ -586,11 +589,12 @@ expr_level_tree(expr *e, int fold_const) for (i=0; i<e->numterms; i++) { if (e->terms[i].type == EXPR_EXPR) e->terms[i].data.expn = expr_level_tree(e->terms[i].data.expn, - fold_const); + fold_const, + simplify_ident); } /* do callback */ - return expr_level_op(e, fold_const); + return expr_level_op(e, fold_const, simplify_ident); } /* Comparison function for expr_order_terms(). @@ -730,6 +734,29 @@ expr_contains(expr *e, ExprType t) return expr_traverse_leaves_in(e, &t, expr_contains_callback); } +/* Only works if ei->type == EXPR_REG (doesn't check). + * Overwrites ei with intnum of 0 (to eliminate regs from the final expr). + */ +static int * +expr_checkea_get_reg32(ExprItem *ei, void *d) +{ + int *data = d; + int *ret; + + /* don't allow 16-bit registers */ + if (ei->data.reg.size != 32) + return 0; + + ret = &data[ei->data.reg.num & 7]; /* & 7 for sanity check */ + + /* overwrite with 0 to eliminate register from displacement expr */ + ei->type = EXPR_INT; + ei->data.intn = intnum_new_int(0); + + /* we're okay */ + return ret; +} + typedef struct checkea_invalid16_data { int bx, si, di, bp; /* total multiplier for each reg */ } checkea_invalid16_data; @@ -738,8 +765,9 @@ typedef struct checkea_invalid16_data { * Overwrites ei with intnum of 0 (to eliminate regs from the final expr). */ static int * -expr_checkea_get_reg16(ExprItem *ei, checkea_invalid16_data *data) +expr_checkea_get_reg16(ExprItem *ei, void *d) { + checkea_invalid16_data *data = d; /* in order: ax,cx,dx,bx,sp,bp,si,di */ static int *reg16[8] = {0,0,0,0,0,0,0,0}; int *ret; @@ -874,6 +902,116 @@ expr_checkea_distcheck_reg(expr **ep) return retval; } +/* Simplify and determine if expression is superficially valid: + * Valid expr should be [(int-equiv expn)]+[reg*(int-equiv expn)+...] + * where the [...] parts are optional. + * + * Don't simplify out constant identities if we're looking for an indexreg: we + * need the multiplier for determining what the indexreg is! + * + * Returns 0 if invalid register usage, 1 if unable to determine all values, + * and 2 if all values successfully determined and saved in data. + */ +static int +expr_checkea_getregusage(expr **ep, int *indexreg, void *data, + int *(*get_reg)(ExprItem *ei, void *d)) +{ + int i; + int *reg; + expr *e; + + *ep = expr_xform_neg_tree(*ep); + *ep = expr_level_tree(*ep, 1, indexreg == 0); + e = *ep; + switch (expr_checkea_distcheck_reg(ep)) { + case 0: + ErrorAt(e->filename, e->line, _("invalid effective address")); + return 0; + case 2: + /* Need to simplify again */ + *ep = expr_xform_neg_tree(*ep); + *ep = expr_level_tree(*ep, 1, indexreg == 0); + e = *ep; + break; + default: + break; + } + + switch (e->op) { + case EXPR_ADD: + /* Prescan for non-int multipliers. + * This is because if any of the terms is a more complex + * expr (eg, undetermined value), we don't want to try to + * figure out *any* of the expression, because each register + * lookup overwrites the register with a 0 value! And storing + * the state of this routine from one excution to the next + * would be a major chore. + */ + for (i=0; i<e->numterms; i++) + if (e->terms[i].type == EXPR_EXPR) { + if (e->terms[i].data.expn->numterms > 2) + return 1; + expr_order_terms(e->terms[i].data.expn); + if (e->terms[i].data.expn->terms[1].type != EXPR_INT) + return 1; + } + + /* FALLTHROUGH */ + case EXPR_IDENT: + /* Check each term for register (and possible multiplier). */ + for (i=0; i<e->numterms; i++) { + if (e->terms[i].type == EXPR_REG) { + reg = get_reg(&e->terms[i], data); + if (!reg) + return 0; + (*reg)++; + } else if (e->terms[i].type == EXPR_EXPR) { + /* Already ordered from ADD above, just grab the value. + * Sanity check for EXPR_INT. + */ + if (e->terms[i].data.expn->terms[0].type != EXPR_REG) + InternalError(__LINE__, __FILE__, + _("Register not found in reg expn")); + if (e->terms[i].data.expn->terms[1].type != EXPR_INT) + InternalError(__LINE__, __FILE__, + _("Non-integer value in reg expn")); + reg = get_reg(&e->terms[i].data.expn->terms[0], data); + if (!reg) + return 0; + (*reg) += + intnum_get_int(e->terms[i].data.expn->terms[1].data.intn); + if (indexreg) + *indexreg = + e->terms[i].data.expn->terms[0].data.reg.num; + } + } + break; + case EXPR_MUL: + /* Here, too, check for non-int multipliers. */ + if (e->numterms > 2) + return 1; + expr_order_terms(e); + if (e->terms[1].type != EXPR_INT) + return 1; + reg = get_reg(&e->terms[0], data); + if (!reg) + return 0; + (*reg) += intnum_get_int(e->terms[1].data.intn); + break; + default: + /* Should never get here! */ + break; + } + + /* Simplify expr, which is now really just the displacement. This + * should get rid of the 0's we put in for registers in the callback. + */ + *ep = expr_simplify(*ep); + /* e = *ep; */ + + return 2; +} + static int expr_checkea_getregsize_callback(ExprItem *ei, void *d) { @@ -888,15 +1026,14 @@ expr_checkea_getregsize_callback(ExprItem *ei, void *d) int expr_checkea(expr **ep, unsigned char *addrsize, unsigned char bits, - unsigned char *displen, unsigned char *modrm, - unsigned char *v_modrm, unsigned char *n_modrm, - unsigned char *sib, unsigned char *v_sib, unsigned char *n_sib) + unsigned char nosplit, unsigned char *displen, + unsigned char *modrm, unsigned char *v_modrm, + unsigned char *n_modrm, unsigned char *sib, unsigned char *v_sib, + unsigned char *n_sib) { expr *e = *ep; const intnum *intn; long dispval; - int i; - int *reg; if (*addrsize == 0) { /* we need to figure out the address size from what we know about: @@ -927,8 +1064,25 @@ expr_checkea(expr **ep, unsigned char *addrsize, unsigned char bits, } } - if (*addrsize == 32 && (*n_modrm || *n_sib)) { - } else if (*addrsize == 16 && *n_modrm) { + if (*addrsize == 32 && ((*n_modrm && !*v_modrm) || (*n_sib && !*v_sib))) { + int reg32mult[8] = {0, 0, 0, 0, 0, 0, 0, 0}; + /*int basereg = 0;*/ /* "base" register (for SIB) */ + int indexreg = 0; /* "index" register (for SIB) */ + + switch (expr_checkea_getregusage(ep, &indexreg, reg32mult, + expr_checkea_get_reg32)) { + case 0: + e = *ep; + ErrorAt(e->filename, e->line, _("invalid effective address")); + return 0; + case 1: + return 1; + default: + e = *ep; + break; + } + + } else if (*addrsize == 16 && *n_modrm && !*v_modrm) { static const unsigned char modrm16[16] = { 0006 /* disp16 */, 0007 /* [BX] */, 0004 /* [SI] */, 0000 /* [BX+SI] */, 0005 /* [DI] */, 0001 /* [BX+DI] */, @@ -937,7 +1091,7 @@ expr_checkea(expr **ep, unsigned char *addrsize, unsigned char bits, 0003 /* [BP+DI] */, 0377 /* invalid */, 0377 /* invalid */, 0377 /* invalid */ }; - checkea_invalid16_data data; + checkea_invalid16_data reg16mult = {0, 0, 0, 0}; enum { HAVE_NONE = 0, HAVE_BX = 1<<0, @@ -946,133 +1100,41 @@ expr_checkea(expr **ep, unsigned char *addrsize, unsigned char bits, HAVE_BP = 1<<3 } havereg = HAVE_NONE; - data.bx = 0; - data.si = 0; - data.di = 0; - data.bp = 0; - /* 16-bit cannot have SIB */ *sib = 0; *v_sib = 0; *n_sib = 0; - /* Determine if expression is superficially valid: - * Valid expr should be [(int-equiv expn)]+[reg*(int-equiv expn)+...] - * where the [...] parts are optional. - * To check this, first look at top expn operator.. if it's not ADD or - * MUL, then no registers are valid for use. - */ - *ep = expr_simplify(*ep); - e = *ep; - switch (expr_checkea_distcheck_reg(ep)) { + switch (expr_checkea_getregusage(ep, (int *)NULL, ®16mult, + expr_checkea_get_reg16)) { case 0: + e = *ep; ErrorAt(e->filename, e->line, _("invalid effective address")); return 0; - case 2: - /* Need to simplify again */ - *ep = expr_simplify(*ep); - e = *ep; - break; - default: - break; - } - - switch (e->op) { - case EXPR_ADD: - /* Prescan for non-int multipliers. - * This is because if any of the terms is a more complex - * expr (eg, undetermined value), we don't want to try to - * figure out *any* of the expression, because each register - * lookup overwrites the register with a 0 value! And storing - * the state of this routine from one excution to the next - * would be a major chore. - */ - for (i=0; i<e->numterms; i++) - if (e->terms[i].type == EXPR_EXPR) { - if (e->terms[i].data.expn->numterms > 2) - return 1; - expr_order_terms(e->terms[i].data.expn); - if (e->terms[i].data.expn->terms[1].type != EXPR_INT) - return 1; - } - - /* FALLTHROUGH */ - case EXPR_IDENT: - /* Check each term for register (and possible multiplier). */ - for (i=0; i<e->numterms; i++) { - if (e->terms[i].type == EXPR_REG) { - reg = expr_checkea_get_reg16(&e->terms[i], &data); - if (!reg) { - ErrorAt(e->filename, e->line, - _("invalid effective address")); - return 0; - } - (*reg)++; - } else if (e->terms[i].type == EXPR_EXPR) { - /* Already ordered from ADD above, just grab the value. - * Sanity check for EXPR_INT. - */ - if (e->terms[i].data.expn->terms[0].type != EXPR_REG) - InternalError(__LINE__, __FILE__, - _("Register not found in reg expn")); - if (e->terms[i].data.expn->terms[1].type != EXPR_INT) - InternalError(__LINE__, __FILE__, - _("Non-integer value in reg expn")); - reg = - expr_checkea_get_reg16(&e->terms[i].data.expn->terms[0], - &data); - if (!reg) { - ErrorAt(e->filename, e->line, - _("invalid effective address")); - return 0; - } - (*reg) += - intnum_get_int(e->terms[i].data.expn->terms[1].data.intn); - } - } - break; - case EXPR_MUL: - /* Here, too, check for non-int multipliers. */ - if (e->numterms > 2) - return 1; - expr_order_terms(e); - if (e->terms[1].type != EXPR_INT) - return 1; - reg = expr_checkea_get_reg16(&e->terms[0], &data); - if (!reg) { - ErrorAt(e->filename, e->line, - _("invalid effective address")); - return 0; - } - (*reg) += intnum_get_int(e->terms[1].data.intn); - break; + case 1: + return 1; default: - /* Should never get here! */ + e = *ep; break; } - /* negative reg multipliers are illegal. */ - if (data.bx < 0 || data.si < 0 || data.di < 0 || data.bp < 0) { + /* reg multipliers not 0 or 1 are illegal. */ + if (reg16mult.bx & ~1 || reg16mult.si & ~1 || reg16mult.di & ~1 || + reg16mult.bp & ~1) { ErrorAt(e->filename, e->line, _("invalid effective address")); return 0; } /* Set havereg appropriately */ - if (data.bx > 0) + if (reg16mult.bx > 0) havereg |= HAVE_BX; - if (data.si > 0) + if (reg16mult.si > 0) havereg |= HAVE_SI; - if (data.di > 0) + if (reg16mult.di > 0) havereg |= HAVE_DI; - if (data.bp > 0) + if (reg16mult.bp > 0) havereg |= HAVE_BP; - /* Simplify expr, which is now really just the displacement. This - * should get rid of the 0's we put in for registers in the callback. - */ - *ep = expr_simplify(*ep); - e = *ep; - /* Check the modrm value for invalid combinations. */ if (modrm16[havereg] & 0070) { ErrorAt(e->filename, e->line, _("invalid effective address")); @@ -1260,7 +1322,7 @@ expr * expr_simplify(expr *e) { e = expr_xform_neg_tree(e); - e = expr_level_tree(e, 1); + e = expr_level_tree(e, 1, 1); return e; } diff --git a/libyasm/expr.h b/libyasm/expr.h index 9a9694f5..25266d59 100644 --- a/libyasm/expr.h +++ b/libyasm/expr.h @@ -95,10 +95,10 @@ expr *expr_copy(const expr *e); void expr_delete(expr *e); int expr_checkea(expr **ep, unsigned char *addrsize, unsigned char bits, - unsigned char *displen, unsigned char *modrm, - unsigned char *v_modrm, unsigned char *n_modrm, - unsigned char *sib, unsigned char *v_sib, - unsigned char *n_sib); + unsigned char nosplit, unsigned char *displen, + unsigned char *modrm, unsigned char *v_modrm, + unsigned char *n_modrm, unsigned char *sib, + unsigned char *v_sib, unsigned char *n_sib); /* Expands all (symrec) equ's in the expression into full expression * instances. |