{ Copyright (c) 2008 by Florian Klaempfl Member of the Free Pascal development team This unit implements the code generator for the AVR This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. **************************************************************************** } unit cgcpu; {$i fpcdefs.inc} interface uses globtype,symtype,symdef, cgbase,cgutils,cgobj, aasmbase,aasmcpu,aasmtai,aasmdata, parabase, cpubase,cpuinfo,node,cg64f32,rgcpu; type { tcgavr } tcgavr = class(tcg) { true, if the next arithmetic operation should modify the flags } cgsetflags : boolean; procedure init_register_allocators;override; procedure done_register_allocators;override; function getaddressregister(list:TAsmList):TRegister;override; function GetHigh(const r : TRegister) : TRegister;inline; function GetOffsetReg(const r: TRegister;ofs : shortint): TRegister;override; function GetOffsetReg64(const r,rhi: TRegister;ofs : shortint): TRegister;override; procedure a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);override; procedure a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const paraloc : TCGPara);override; procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);override; procedure a_load_reg_cgpara(list : TAsmList; size : tcgsize;r : tregister; const cgpara : tcgpara);override; procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override; procedure a_call_reg(list : TAsmList;reg: tregister);override; procedure a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister); override; procedure a_op_reg_reg(list: TAsmList; Op: TOpCG; size: TCGSize; src, dst : TRegister); override; procedure a_op_const_reg_reg(list : TAsmList;op : TOpCg;size : tcgsize; a : tcgint;src,dst : tregister); override; procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister; setflags: boolean; var ovloc: tlocation); override; procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister; setflags: boolean; var ovloc: tlocation); override; { move instructions } procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override; procedure a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);override; procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override; procedure a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);override; { fpu move instructions } procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override; procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override; procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override; { comparison operations } procedure a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister; l : tasmlabel);override; procedure a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel); override; procedure a_jmp_name(list : TAsmList;const s : string); override; procedure a_jmp_always(list : TAsmList;l: tasmlabel); override; procedure a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel); override; procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override; procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override; procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override; procedure a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);override; procedure g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);override; procedure g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint); procedure g_overflowcheck(list: TAsmList; const l: tlocation; def: tdef); override; procedure g_overflowCheck_loc(List: TAsmList; const Loc: TLocation; def: TDef; ovloc: tlocation); override; procedure g_save_registers(list : TAsmList);override; procedure g_restore_registers(list : TAsmList);override; procedure a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel); procedure fixref(list : TAsmList;var ref : treference); function normalize_ref(list : TAsmList;ref : treference; tmpreg : tregister) : treference; procedure emit_mov(list: TAsmList;reg2: tregister; reg1: tregister); procedure a_adjust_sp(list: TAsmList; value: longint); function GetLoad(const ref : treference) : tasmop; function GetStore(const ref: treference): tasmop; procedure gen_multiply(list: TAsmList; op: topcg; size: TCgSize; src2, src1, dst: tregister; check_overflow: boolean; var ovloc: tlocation); private procedure a_op_const_reg_reg_internal(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, srchi, dst, dsthi: tregister); protected procedure a_op_reg_reg_internal(list: TAsmList; Op: TOpCG; size: TCGSize; src, srchi, dst, dsthi: TRegister); procedure a_op_const_reg_internal(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg, reghi: TRegister); procedure maybegetcpuregister(list : tasmlist; reg : tregister); end; tcg64favr = class(tcg64f32) procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override; procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override; procedure a_op64_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; value: int64;src,dst: tregister64);override; end; procedure create_codegen; const TOpCG2AsmOp: Array[topcg] of TAsmOp = (A_NONE,A_MOV,A_ADD,A_AND,A_NONE, A_NONE,A_MULS,A_MUL,A_NEG,A_COM,A_OR, A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_ROL,A_ROR); implementation uses globals,verbose,systems,cutils, fmodule, symconst,symsym,symtable, tgobj,rgobj, procinfo,cpupi, paramgr; procedure tcgavr.init_register_allocators; begin inherited init_register_allocators; if CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype] then rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE, [RS_R18,RS_R19,RS_R20,RS_R21,RS_R22,RS_R23,RS_R24,RS_R25],first_int_imreg,[]) else rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE, [RS_R18,RS_R19,RS_R20,RS_R21,RS_R22,RS_R23,RS_R24,RS_R25, RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,RS_R9, RS_R10,RS_R11,RS_R12,RS_R13,RS_R14,RS_R15,RS_R16,RS_R17],first_int_imreg,[]); end; procedure tcgavr.done_register_allocators; begin rg[R_INTREGISTER].free; // rg[R_ADDRESSREGISTER].free; inherited done_register_allocators; end; function tcgavr.getaddressregister(list: TAsmList): TRegister; begin Result:=getintregister(list,OS_ADDR); end; function tcgavr.GetHigh(const r : TRegister) : TRegister; begin result:=GetNextReg(r); end; function tcgavr.GetOffsetReg(const r: TRegister;ofs : shortint): TRegister; begin result:=TRegister(longint(r)+ofs); end; function tcgavr.GetOffsetReg64(const r,rhi: TRegister;ofs : shortint): TRegister; begin if ofs>3 then result:=TRegister(longint(rhi)+ofs-4) else result:=TRegister(longint(r)+ofs); end; procedure tcgavr.a_load_reg_cgpara(list : TAsmList;size : tcgsize;r : tregister;const cgpara : tcgpara); procedure load_para_loc(r : TRegister;paraloc : PCGParaLocation); var ref : treference; begin paramanager.allocparaloc(list,paraloc); case paraloc^.loc of LOC_REGISTER,LOC_CREGISTER: a_load_reg_reg(list,paraloc^.size,paraloc^.size,r,paraloc^.register); LOC_REFERENCE,LOC_CREFERENCE: begin reference_reset_base(ref,paraloc^.reference.index,paraloc^.reference.offset,ctempposinvalid,2,[]); if ref.base<>NR_STACK_POINTER_REG then Internalerror(2020011801); { as AVR allows no stack indirect addressing, everything else than a push makes no sense } list.concat(taicpu.op_reg(A_PUSH,r)); end; else internalerror(2002071004); end; end; var i, i2 : longint; hp : PCGParaLocation; begin if not(tcgsize2size[cgpara.Size] in [1..4]) then internalerror(2014011101); hp:=cgpara.location; i:=0; while i1) or (hp^.shiftval<>0) then internalerror(2015041101); a_load_const_reg(list,hp^.size,(a shr (8*(i-1))) and $ff,hp^.register); inc(i,tcgsize2size[hp^.size]); hp:=hp^.Next; end; LOC_REFERENCE,LOC_CREFERENCE: begin for j:=1 to tcgsize2size[hp^.size] do begin tmpreg:=getintregister(list,OS_8); a_load_const_reg(list,OS_8,(a shr (8*(i-1+j-1))) and $ff,tmpreg); { as AVR allows no stack indirect addressing, everything else than a push makes no sense } list.concat(taicpu.op_reg(A_PUSH,tmpreg)); end; inc(i,tcgsize2size[hp^.size]); hp:=hp^.Next; end; else internalerror(2002071004); end; end; end; procedure tcgavr.a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const paraloc : TCGPara); var tmpref, ref: treference; location: pcgparalocation; sizeleft: tcgint; i: Integer; tmpreg: TRegister; begin location := paraloc.location; tmpref := r; sizeleft := paraloc.intsize; while assigned(location) do begin paramanager.allocparaloc(list,location); case location^.loc of LOC_REGISTER,LOC_CREGISTER: a_load_ref_reg(list,location^.size,location^.size,tmpref,location^.register); LOC_REFERENCE: begin ref:=tmpref; for i:=1 to sizeleft do begin tmpreg:=getintregister(list,OS_8); a_load_ref_reg(list,OS_8,OS_8,tmpref,tmpreg); { as AVR allows no stack indirect addressing, everything else than a push makes no sense } list.concat(taicpu.op_reg(A_PUSH,tmpreg)); inc(tmpref.offset); end; end; LOC_VOID: begin // nothing to do end; else internalerror(2002081103); end; inc(tmpref.offset,tcgsize2size[location^.size]); dec(sizeleft,tcgsize2size[location^.size]); location := location^.next; end; end; procedure tcgavr.a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara); var tmpreg: tregister; begin tmpreg:=getaddressregister(list); a_loadaddr_ref_reg(list,r,tmpreg); a_load_reg_cgpara(list,OS_ADDR,tmpreg,paraloc); end; procedure tcgavr.a_call_name(list : TAsmList;const s : string; weak: boolean); var sym: TAsmSymbol; begin if weak then sym:=current_asmdata.WeakRefAsmSymbol(s,AT_FUNCTION) else sym:=current_asmdata.RefAsmSymbol(s,AT_FUNCTION); if CPUAVR_HAS_JMP_CALL in cpu_capabilities[current_settings.cputype] then list.concat(taicpu.op_sym(A_CALL,sym)) else list.concat(taicpu.op_sym(A_RCALL,sym)); include(current_procinfo.flags,pi_do_call); end; procedure tcgavr.a_call_reg(list : TAsmList;reg: tregister); begin a_reg_alloc(list,NR_ZLO); emit_mov(list,NR_ZLO,reg); a_reg_alloc(list,NR_ZHI); emit_mov(list,NR_ZHI,GetHigh(reg)); list.concat(taicpu.op_none(A_ICALL)); a_reg_dealloc(list,NR_ZHI); a_reg_dealloc(list,NR_ZLO); include(current_procinfo.flags,pi_do_call); end; procedure tcgavr.a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister); begin if not(size in [OS_S8,OS_8,OS_S16,OS_16,OS_S32,OS_32]) then internalerror(2012102403); a_op_const_reg_internal(list,Op,size,a,reg,NR_NO); end; procedure tcgavr.a_op_reg_reg(list: TAsmList; Op: TOpCG; size: TCGSize; src, dst : TRegister); begin if not(size in [OS_S8,OS_8,OS_S16,OS_16,OS_S32,OS_32]) then internalerror(2012102401); a_op_reg_reg_internal(list,Op,size,src,NR_NO,dst,NR_NO); end; procedure tcgavr.a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister); begin a_op_const_reg_reg_internal(list,op,size,a,src,NR_NO,dst,NR_NO); end; procedure tcgavr.a_op_const_reg_reg_internal(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src,srchi,dst,dsthi: tregister); var tmpSrc, tmpDst, countreg: TRegister; b, b2, i, j: byte; s1, s2, t1: integer; l1: TAsmLabel; oldexecutionweight: LongInt; begin if (op in [OP_MUL,OP_IMUL]) and (size in [OS_16,OS_S16]) and (a in [2,4,8]) then begin emit_mov(list,dst,src); emit_mov(list,GetNextReg(dst),GetNextReg(src)); a:=a shr 1; while a>0 do begin list.concat(taicpu.op_reg(A_LSL,dst)); list.concat(taicpu.op_reg(A_ROL,GetNextReg(dst))); a:=a shr 1; end; end else if (op in [OP_SHL,OP_SHR]) and { a=0 get eliminated later by tcg.optimize_op_const } (a>0) then begin { number of bytes to shift } b:=a div 8; { Ensure that b is never larger than base type } if b>tcgsize2size[size] then begin b:=tcgsize2size[size]; b2:=0; end else b2:=a mod 8; if b < tcgsize2size[size] then { copy from src to dst accounting for shift offset } for i:=0 to (tcgsize2size[size]-b-1) do if op=OP_SHL then a_load_reg_reg(list,OS_8,OS_8, GetOffsetReg64(src,srchi,i), GetOffsetReg64(dst,dsthi,i+b)) else a_load_reg_reg(list,OS_8,OS_8, GetOffsetReg64(src,srchi,i+b), GetOffsetReg64(dst,dsthi,i)); { remaining bit shifts } if b2 > 0 then begin { Cost of loop } s1:=3+tcgsize2size[size]-b; t1:=b2*(tcgsize2size[size]-b+3); { Cost of loop unrolling,t2=s2 } s2:=b2*(tcgsize2size[size]-b); if ((cs_opt_size in current_settings.optimizerswitches) and (s10) then begin { Shift non-moved bytes in loop } current_asmdata.getjumplabel(l1); countreg:=getintregister(list,OS_8); a_load_const_reg(list,OS_8,b2,countreg); cg.a_label(list,l1); oldexecutionweight:=executionweight; executionweight:=executionweight*b2; if op=OP_SHL then list.concat(taicpu.op_reg(A_LSL,GetOffsetReg64(dst,dsthi,b))) else list.concat(taicpu.op_reg(A_LSR,GetOffsetReg64(dst,dsthi,tcgsize2size[size]-1-b))); if size in [OS_S16,OS_16,OS_S32,OS_32,OS_S64,OS_64] then begin for i:=2+b to tcgsize2size[size] do if op=OP_SHL then list.concat(taicpu.op_reg(A_ROL,GetOffsetReg64(dst,dsthi,i-1))) else list.concat(taicpu.op_reg(A_ROR,GetOffsetReg64(dst,dsthi,tcgsize2size[size]-i))); end; list.concat(taicpu.op_reg(A_DEC,countreg)); a_jmp_flags(list,F_NE,l1); executionweight:=oldexecutionweight; { keep registers alive } a_reg_sync(list,countreg); end else begin { Unroll shift loop over non-moved bytes } for j:=1 to b2 do begin if op=OP_SHL then list.concat(taicpu.op_reg(A_LSL, GetOffsetReg64(dst,dsthi,b))) else list.concat(taicpu.op_reg(A_LSR, GetOffsetReg64(dst,dsthi,tcgsize2size[size]-b-1))); if not(size in [OS_8,OS_S8]) then for i:=2 to tcgsize2size[size]-b do if op=OP_SHL then list.concat(taicpu.op_reg(A_ROL, GetOffsetReg64(dst,dsthi,b+i-1))) else list.concat(taicpu.op_reg(A_ROR, GetOffsetReg64(dst,dsthi,tcgsize2size[size]-b-i))); end; end; end; { fill skipped destination registers with 0 Do last,then optimizer can optimize register moves } for i:=1 to b do if op=OP_SHL then emit_mov(list,GetOffsetReg64(dst,dsthi,i-1),GetDefaultZeroReg) else emit_mov(list,GetOffsetReg64(dst,dsthi,tcgsize2size[size]-i),GetDefaultZeroReg); end else inherited a_op_const_reg_reg(list,op,size,a,src,dst); end; procedure tcgavr.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister; setflags: boolean; var ovloc: tlocation); var tmpreg: TRegister; begin if (op in [OP_MUL,OP_IMUL]) and setflags then begin tmpreg:=getintregister(list,size); a_load_const_reg(list,size,a,tmpreg); a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc); end else begin inherited a_op_const_reg_reg_checkoverflow(list, op, size, a, src, dst, setflags, ovloc); ovloc.loc:=LOC_FLAGS; end; end; procedure tcgavr.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister; setflags: boolean; var ovloc: tlocation); begin if (op in [OP_MUL,OP_IMUL]) and setflags then gen_multiply(list,op,size,src1,src2,dst,setflags,ovloc) else begin inherited a_op_reg_reg_reg_checkoverflow(list, op, size, src1, src2, dst, setflags, ovloc); ovloc.loc:=LOC_FLAGS; end; end; procedure tcgavr.a_op_reg_reg_internal(list : TAsmList; Op: TOpCG; size: TCGSize; src, srchi, dst, dsthi: TRegister); var countreg, tmpreg: tregister; i : integer; instr : taicpu; paraloc1,paraloc2 : TCGPara; l1,l2 : tasmlabel; pd : tprocdef; hovloc: tlocation; { NextRegDst* is sometimes called before the register usage and sometimes afterwards } procedure NextSrcDstPreInc; begin if i=5 then begin dst:=dsthi; src:=srchi; end else begin dst:=GetNextReg(dst); src:=GetNextReg(src); end; end; procedure NextSrcDstPostInc; begin if i=4 then begin dst:=dsthi; src:=srchi; end else begin dst:=GetNextReg(dst); src:=GetNextReg(src); end; end; { iterates TmpReg through all registers of dst } procedure NextTmp; begin if i=4 then tmpreg:=dsthi else tmpreg:=GetNextReg(tmpreg); end; begin case op of OP_ADD: begin list.concat(taicpu.op_reg_reg(A_ADD,dst,src)); for i:=2 to tcgsize2size[size] do begin NextSrcDstPreInc; list.concat(taicpu.op_reg_reg(A_ADC,dst,src)); end; end; OP_SUB: begin list.concat(taicpu.op_reg_reg(A_SUB,dst,src)); for i:=2 to tcgsize2size[size] do begin NextSrcDstPreInc; list.concat(taicpu.op_reg_reg(A_SBC,dst,src)); end; end; OP_NEG: begin if src<>dst then begin if size in [OS_S64,OS_64] then begin a_load_reg_reg(list,OS_32,OS_32,src,dst); a_load_reg_reg(list,OS_32,OS_32,srchi,dsthi); end else a_load_reg_reg(list,size,size,src,dst); end; if size in [OS_S16,OS_16,OS_S32,OS_32,OS_S64,OS_64] then begin tmpreg:=GetNextReg(dst); for i:=2 to tcgsize2size[size] do begin list.concat(taicpu.op_reg(A_COM,tmpreg)); { check if we are not in the last iteration to avoid an internalerror in GetNextReg } if idst then a_load_reg_reg(list,OS_8,OS_8,src,dst); list.concat(taicpu.op_reg(A_COM,dst)); { check if we are not in the last iteration to avoid an internalerror in GetNextReg } if i0 then list.concat(taicpu.op_reg_const(A_ORI,reg,(qword(a) and mask) shr shift)); { check if we are not in the last iteration to avoid an internalerror in GetNextReg } if i$ff then begin getcpuregister(list,NR_R26); list.concat(taicpu.op_reg_const(A_LDI,NR_R26,(qword(a) and mask) shr shift)); list.concat(taicpu.op_reg_reg(A_AND,reg,NR_R26)); ungetcpuregister(list,NR_R26); end; { check if we are not in the last iteration to avoid an internalerror in GetNextReg } if i=(tcgsize2size[size]*8-1)) then begin current_asmdata.getjumplabel(l1); list.concat(taicpu.op_reg(A_TST,GetOffsetReg64(reg,reghi,tcgsize2size[size]-1))); a_load_const_reg(list,OS_8,0,GetOffsetReg64(reg,reghi,tcgsize2size[size]-1)); a_jmp_flags(list,F_PL,l1); list.concat(taicpu.op_reg(A_DEC,GetOffsetReg64(reg,reghi,tcgsize2size[size]-1))); cg.a_label(list,l1); for i:=2 to tcgsize2size[size] do a_load_reg_reg(list,OS_8,OS_8,GetOffsetReg64(reg,reghi,tcgsize2size[size]-1),GetOffsetReg64(reg,reghi,tcgsize2size[size]-i)); end else if (op=OP_SHR) and (a=(tcgsize2size[size]*8-1)) then begin current_asmdata.getjumplabel(l1); list.concat(taicpu.op_reg(A_TST,GetOffsetReg64(reg,reghi,tcgsize2size[size]-1))); a_load_const_reg(list,OS_8,0,GetOffsetReg64(reg,reghi,0)); a_jmp_flags(list,F_PL,l1); list.concat(taicpu.op_reg(A_INC,GetOffsetReg64(reg,reghi,0))); cg.a_label(list,l1); for i:=1 to tcgsize2size[size]-1 do a_load_const_reg(list,OS_8,0,GetOffsetReg64(reg,reghi,i)); end else if a*tcgsize2size[size]<=8 then begin for j:=1 to a do begin case op of OP_SHR: list.concat(taicpu.op_reg(A_LSR,GetOffsetReg64(reg,reghi,tcgsize2size[size]-1))); OP_SHL: list.concat(taicpu.op_reg(A_LSL,reg)); OP_SAR: list.concat(taicpu.op_reg(A_ASR,GetOffsetReg64(reg,reghi,tcgsize2size[size]-1))); OP_ROR: begin { load carry? } if not(size in [OS_8,OS_S8]) then begin list.concat(taicpu.op_none(A_CLC)); list.concat(taicpu.op_reg_const(A_SBRC,reg,0)); list.concat(taicpu.op_none(A_SEC)); end; list.concat(taicpu.op_reg(A_ROR,GetOffsetReg64(reg,reghi,tcgsize2size[size]-1))); end; OP_ROL: begin { load carry? } if not(size in [OS_8,OS_S8]) then begin list.concat(taicpu.op_none(A_CLC)); list.concat(taicpu.op_reg_const(A_SBRC,GetOffsetReg64(reg,reghi,tcgsize2size[size]-1),7)); list.concat(taicpu.op_none(A_SEC)); end; list.concat(taicpu.op_reg(A_ROL,reg)) end; else internalerror(2011030901); end; if size in [OS_S16,OS_16,OS_S32,OS_32,OS_S64,OS_64] then begin for i:=2 to tcgsize2size[size] do begin case op of OP_ROR, OP_SHR: list.concat(taicpu.op_reg(A_ROR,GetOffsetReg64(reg,reghi,tcgsize2size[size]-i))); OP_ROL, OP_SHL: list.concat(taicpu.op_reg(A_ROL,GetOffsetReg64(reg,reghi,i-1))); OP_SAR: list.concat(taicpu.op_reg(A_ROR,GetOffsetReg64(reg,reghi,tcgsize2size[size]-i))); else internalerror(2011030902); end; end; end; end; end else begin tmpreg:=getintregister(list,size); a_load_const_reg(list,size,a,tmpreg); a_op_reg_reg(list,op,size,tmpreg,reg); end; end; OP_ADD: begin curvalue:=a and mask; if curvalue=0 then list.concat(taicpu.op_reg_reg(A_ADD,reg,GetDefaultZeroReg)) else if (curvalue=1) and (tcgsize2size[size]=1) then list.concat(taicpu.op_reg(A_INC,reg)) else begin tmpreg:=getintregister(list,OS_8); a_load_const_reg(list,OS_8,curvalue,tmpreg); list.concat(taicpu.op_reg_reg(A_ADD,reg,tmpreg)); end; if size in [OS_S16,OS_16,OS_S32,OS_32,OS_S64,OS_64] then begin for i:=2 to tcgsize2size[size] do begin NextRegPreInc; mask:=mask shl 8; inc(shift,8); curvalue:=(qword(a) and mask) shr shift; { decrease pressure on upper half of registers by using ADC ...,R1 instead of ADD ...,0 } if curvalue=0 then list.concat(taicpu.op_reg_reg(A_ADC,reg,GetDefaultZeroReg)) else begin tmpreg:=getintregister(list,OS_8); a_load_const_reg(list,OS_8,curvalue,tmpreg); list.concat(taicpu.op_reg_reg(A_ADC,reg,tmpreg)); end; end; end; end; else begin if size in [OS_64,OS_S64] then begin tmpreg64.reglo:=getintregister(list,OS_32); tmpreg64.reghi:=getintregister(list,OS_32); cg64.a_load64_const_reg(list,a,tmpreg64); cg64.a_op64_reg_reg(list,op,size,tmpreg64,joinreg64(reg,reghi)); end else begin {$if 0} { code not working yet } if (op=OP_SAR) and (a=31) and (size in [OS_32,OS_S32]) then begin tmpreg:=reg; for i:=1 to 4 do begin list.concat(taicpu.op_reg_reg(A_MOV,tmpreg,GetDefaultZeroReg)); tmpreg:=GetNextReg(tmpreg); end; end else {$endif} begin tmpreg:=getintregister(list,size); a_load_const_reg(list,size,a,tmpreg); a_op_reg_reg(list,op,size,tmpreg,reg); end; end; end; end; end; procedure tcgavr.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister); var mask : qword; shift : byte; i : byte; begin mask:=$ff; shift:=0; for i:=1 to tcgsize2size[size] do begin if ((qword(a) and mask) shr shift)=0 then emit_mov(list,reg,GetDefaultZeroReg) else begin getcpuregister(list,NR_R26); list.concat(taicpu.op_reg_const(A_LDI,NR_R26,(qword(a) and mask) shr shift)); a_load_reg_reg(list,OS_8,OS_8,NR_R26,reg); ungetcpuregister(list,NR_R26); end; mask:=mask shl 8; inc(shift,8); { check if we are not in the last iteration to avoid an internalerror in GetNextReg } if iAM_UNCHANGED then internalerror(2011021701); { Be sure to have a base register } if (ref.base=NR_NO) then begin ref.base:=ref.index; ref.index:=NR_NO; end; { can we take advantage of adiw/sbiw? } if (current_settings.cputype>=cpu_avr2) and not(assigned(ref.symbol)) and (ref.offset<>0) and (ref.offset>=-63) and (ref.offset<=63) and ((tmpreg=NR_R24) or (tmpreg=NR_R26) or (tmpreg=NR_R28) or (tmpreg=NR_R30)) and (ref.base<>NR_NO) then begin maybegetcpuregister(list,tmpreg); emit_mov(list,tmpreg,ref.base); maybegetcpuregister(list,GetNextReg(tmpreg)); emit_mov(list,GetNextReg(tmpreg),GetNextReg(ref.base)); if ref.index<>NR_NO then begin list.concat(taicpu.op_reg_reg(A_ADD,tmpreg,ref.index)); list.concat(taicpu.op_reg_reg(A_ADC,GetNextReg(tmpreg),GetNextReg(ref.index))); end; if ref.offset>0 then list.concat(taicpu.op_reg_const(A_ADIW,tmpreg,ref.offset)) else list.concat(taicpu.op_reg_const(A_SBIW,tmpreg,-ref.offset)); ref.offset:=0; ref.base:=tmpreg; ref.index:=NR_NO; end else if assigned(ref.symbol) or (ref.offset<>0) then begin reference_reset(tmpref,0,[]); tmpref.symbol:=ref.symbol; tmpref.offset:=ref.offset; if assigned(ref.symbol) and (ref.symbol.typ in [AT_FUNCTION,AT_LABEL]) then tmpref.refaddr:=addr_lo8_gs else tmpref.refaddr:=addr_lo8; maybegetcpuregister(list,tmpreg); list.concat(taicpu.op_reg_ref(A_LDI,tmpreg,tmpref)); if assigned(ref.symbol) and (ref.symbol.typ in [AT_FUNCTION,AT_LABEL]) then tmpref.refaddr:=addr_hi8_gs else tmpref.refaddr:=addr_hi8; maybegetcpuregister(list,GetNextReg(tmpreg)); list.concat(taicpu.op_reg_ref(A_LDI,GetNextReg(tmpreg),tmpref)); if (ref.base<>NR_NO) then begin list.concat(taicpu.op_reg_reg(A_ADD,tmpreg,ref.base)); list.concat(taicpu.op_reg_reg(A_ADC,GetNextReg(tmpreg),GetNextReg(ref.base))); end; if (ref.index<>NR_NO) then begin list.concat(taicpu.op_reg_reg(A_ADD,tmpreg,ref.index)); list.concat(taicpu.op_reg_reg(A_ADC,GetNextReg(tmpreg),GetNextReg(ref.index))); end; ref.symbol:=nil; ref.offset:=0; ref.base:=tmpreg; ref.index:=NR_NO; end else if (ref.base<>NR_NO) and (ref.index<>NR_NO) then begin maybegetcpuregister(list,tmpreg); emit_mov(list,tmpreg,ref.base); maybegetcpuregister(list,GetNextReg(tmpreg)); emit_mov(list,GetNextReg(tmpreg),GetNextReg(ref.base)); list.concat(taicpu.op_reg_reg(A_ADD,tmpreg,ref.index)); list.concat(taicpu.op_reg_reg(A_ADC,GetNextReg(tmpreg),GetNextReg(ref.index))); ref.base:=tmpreg; ref.index:=NR_NO; end else if (ref.base<>NR_NO) then begin maybegetcpuregister(list,tmpreg); emit_mov(list,tmpreg,ref.base); maybegetcpuregister(list,GetNextReg(tmpreg)); emit_mov(list,GetNextReg(tmpreg),GetNextReg(ref.base)); ref.base:=tmpreg; ref.index:=NR_NO; end else if (ref.index<>NR_NO) then begin maybegetcpuregister(list,tmpreg); emit_mov(list,tmpreg,ref.index); maybegetcpuregister(list,GetNextReg(tmpreg)); emit_mov(list,GetNextReg(tmpreg),GetNextReg(ref.index)); ref.base:=tmpreg; ref.index:=NR_NO; end else Internalerror(2020011901); Result:=ref; end; procedure tcgavr.a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference); var href : treference; conv_done: boolean; tmpreg : tregister; i : integer; QuickRef,ungetcpuregister_z: Boolean; begin QuickRef:=false; ungetcpuregister_z:=false; href:=Ref; { ensure, href.base contains a valid register if there is any register used } if href.base=NR_NO then begin href.base:=href.index; href.index:=NR_NO; end; { try to use std/sts } if not((href.Base=NR_NO) and (href.Index=NR_NO)) then begin if not((href.addressmode=AM_UNCHANGED) and (href.symbol=nil) and (href.Index=NR_NO) and (href.Offset in [0..64-tcgsize2size[fromsize]])) or (CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype]) then begin href:=normalize_ref(list,href,NR_R30); getcpuregister(list,NR_R30); getcpuregister(list,NR_R31); ungetcpuregister_z:=true; end else begin if (href.base<>NR_R28) and (href.base<>NR_R30) then begin getcpuregister(list,NR_R30); emit_mov(list,NR_R30,href.base); getcpuregister(list,NR_R31); emit_mov(list,NR_R31,GetNextReg(href.base)); href.base:=NR_R30; ungetcpuregister_z:=true; end; QuickRef:=true; end; end else QuickRef:=true; if (tcgsize2size[fromsize]>32) or (tcgsize2size[tosize]>32) or (fromsize=OS_NO) or (tosize=OS_NO) then internalerror(2011021307); conv_done:=false; if tosize<>fromsize then begin conv_done:=true; if tcgsize2size[tosize]<=tcgsize2size[fromsize] then fromsize:=tosize; case fromsize of OS_8: begin if not(QuickRef) and (tcgsize2size[tosize]>1) then href.addressmode:=AM_POSTINCREMENT; list.concat(taicpu.op_ref_reg(GetStore(href),href,reg)); for i:=2 to tcgsize2size[tosize] do begin if QuickRef then inc(href.offset); if not(QuickRef) and (i1) then href.addressmode:=AM_POSTINCREMENT; list.concat(taicpu.op_ref_reg(GetStore(href),href,reg)); if tcgsize2size[tosize]>1 then begin tmpreg:=getintregister(list,OS_8); emit_mov(list,tmpreg,GetDefaultZeroReg); list.concat(taicpu.op_reg_const(A_SBRC,reg,7)); list.concat(taicpu.op_reg(A_COM,tmpreg)); for i:=2 to tcgsize2size[tosize] do begin if QuickRef then inc(href.offset); if not(QuickRef) and (i1) then href.addressmode:=AM_POSTINCREMENT; list.concat(taicpu.op_ref_reg(GetStore(href),href,reg)); if QuickRef then inc(href.offset) else if not(QuickRef) and (tcgsize2size[fromsize]>2) then href.addressmode:=AM_POSTINCREMENT else href.addressmode:=AM_UNCHANGED; reg:=GetNextReg(reg); list.concat(taicpu.op_ref_reg(GetStore(href),href,reg)); for i:=3 to tcgsize2size[tosize] do begin if QuickRef then inc(href.offset); if not(QuickRef) and (i1) then href.addressmode:=AM_POSTINCREMENT; list.concat(taicpu.op_ref_reg(GetStore(href),href,reg)); if QuickRef then inc(href.offset) else if not(QuickRef) and (tcgsize2size[fromsize]>2) then href.addressmode:=AM_POSTINCREMENT else href.addressmode:=AM_UNCHANGED; reg:=GetNextReg(reg); list.concat(taicpu.op_ref_reg(GetStore(href),href,reg)); if tcgsize2size[tosize]>2 then begin tmpreg:=getintregister(list,OS_8); emit_mov(list,tmpreg,GetDefaultZeroReg); list.concat(taicpu.op_reg_const(A_SBRC,reg,7)); list.concat(taicpu.op_reg(A_COM,tmpreg)); for i:=3 to tcgsize2size[tosize] do begin if QuickRef then inc(href.offset); if not(QuickRef) and (i cpu_avrxmega3) and (fromsize in [OS_16, OS_S16]) and QuickRef and (href.offset > 31) and (href.offset < cpuinfo.embedded_controllers[current_settings.controllertype].srambase) then begin tmpreg:=GetNextReg(reg); href.addressmode:=AM_UNCHANGED; inc(href.offset); list.concat(taicpu.op_ref_reg(GetStore(href),href,tmpreg)); dec(href.offset); list.concat(taicpu.op_ref_reg(GetStore(href),href,reg)); end else begin for i:=1 to tcgsize2size[fromsize] do begin if not(QuickRef) and (iNR_R28) and (href.base<>NR_R30) then begin getcpuregister(list,NR_R30); emit_mov(list,NR_R30,href.base); getcpuregister(list,NR_R31); emit_mov(list,NR_R31,GetNextReg(href.base)); href.base:=NR_R30; ungetcpuregister_z:=true; end; QuickRef:=true; end; end else QuickRef:=true; if (tcgsize2size[fromsize]>32) or (tcgsize2size[tosize]>32) or (fromsize=OS_NO) or (tosize=OS_NO) then internalerror(2011021307); conv_done:=false; if tosize<>fromsize then begin conv_done:=true; if tcgsize2size[tosize]<=tcgsize2size[fromsize] then fromsize:=tosize; case fromsize of OS_8: begin list.concat(taicpu.op_reg_ref(GetLoad(href),reg,href)); for i:=2 to tcgsize2size[tosize] do begin reg:=GetNextReg(reg); emit_mov(list,reg,GetDefaultZeroReg); end; end; OS_S8: begin list.concat(taicpu.op_reg_ref(GetLoad(href),reg,href)); tmpreg:=reg; if tcgsize2size[tosize]>1 then begin reg:=GetNextReg(reg); emit_mov(list,reg,GetDefaultZeroReg); list.concat(taicpu.op_reg_const(A_SBRC,tmpreg,7)); list.concat(taicpu.op_reg(A_COM,reg)); tmpreg:=reg; for i:=3 to tcgsize2size[tosize] do begin reg:=GetNextReg(reg); emit_mov(list,reg,tmpreg); end; end; end; OS_16: begin if not(QuickRef) then href.addressmode:=AM_POSTINCREMENT; list.concat(taicpu.op_reg_ref(GetLoad(href),reg,href)); if QuickRef then inc(href.offset); href.addressmode:=AM_UNCHANGED; reg:=GetNextReg(reg); list.concat(taicpu.op_reg_ref(GetLoad(href),reg,href)); for i:=3 to tcgsize2size[tosize] do begin reg:=GetNextReg(reg); emit_mov(list,reg,GetDefaultZeroReg); end; end; OS_S16: begin if not(QuickRef) then href.addressmode:=AM_POSTINCREMENT; list.concat(taicpu.op_reg_ref(GetLoad(href),reg,href)); if QuickRef then inc(href.offset); href.addressmode:=AM_UNCHANGED; reg:=GetNextReg(reg); list.concat(taicpu.op_reg_ref(GetLoad(href),reg,href)); tmpreg:=reg; reg:=GetNextReg(reg); emit_mov(list,reg,GetDefaultZeroReg); list.concat(taicpu.op_reg_const(A_SBRC,tmpreg,7)); list.concat(taicpu.op_reg(A_COM,reg)); tmpreg:=reg; for i:=4 to tcgsize2size[tosize] do begin reg:=GetNextReg(reg); emit_mov(list,reg,tmpreg); end; end; else conv_done:=false; end; end; if not conv_done then begin for i:=1 to tcgsize2size[fromsize] do begin if not(QuickRef) and (i32) or (tcgsize2size[tosize]>32) or (fromsize=OS_NO) or (tosize=OS_NO) then internalerror(2011021310); conv_done:=false; if tosize<>fromsize then begin conv_done:=true; if tcgsize2size[tosize]<=tcgsize2size[fromsize] then fromsize:=tosize; case fromsize of OS_8: begin emit_mov(list,reg2,reg1); for i:=2 to tcgsize2size[tosize] do begin reg2:=GetNextReg(reg2); emit_mov(list,reg2,GetDefaultZeroReg); end; end; OS_S8: begin emit_mov(list,reg2,reg1); if tcgsize2size[tosize]>1 then begin reg2:=GetNextReg(reg2); emit_mov(list,reg2,GetDefaultZeroReg); list.concat(taicpu.op_reg_const(A_SBRC,reg1,7)); list.concat(taicpu.op_reg(A_COM,reg2)); tmpreg:=reg2; for i:=3 to tcgsize2size[tosize] do begin reg2:=GetNextReg(reg2); emit_mov(list,reg2,tmpreg); end; end; end; OS_16: begin emit_mov(list,reg2,reg1); reg1:=GetNextReg(reg1); reg2:=GetNextReg(reg2); emit_mov(list,reg2,reg1); for i:=3 to tcgsize2size[tosize] do begin reg2:=GetNextReg(reg2); emit_mov(list,reg2,GetDefaultZeroReg); end; end; OS_S16: begin emit_mov(list,reg2,reg1); reg1:=GetNextReg(reg1); reg2:=GetNextReg(reg2); emit_mov(list,reg2,reg1); if tcgsize2size[tosize]>2 then begin reg2:=GetNextReg(reg2); emit_mov(list,reg2,GetDefaultZeroReg); list.concat(taicpu.op_reg_const(A_SBRC,reg1,7)); list.concat(taicpu.op_reg(A_COM,reg2)); tmpreg:=reg2; for i:=4 to tcgsize2size[tosize] do begin reg2:=GetNextReg(reg2); emit_mov(list,reg2,tmpreg); end; end; end; else conv_done:=false; end; end; if not conv_done and (reg1<>reg2) then begin for i:=1 to tcgsize2size[fromsize] do begin emit_mov(list,reg2,reg1); { check if we are not in the last iteration to avoid an internalerror in GetNextReg } if i0 then list.concat(taicpu.op_reg(A_PUSH,GetDefaultTmpReg)); for i:=1 to (-value) div 2 do list.concat(taicpu.op_const(A_RCALL,0)); end; 1..7: begin for i:=1 to value do list.concat(taicpu.op_reg(A_POP,GetDefaultTmpReg)); end;} else begin list.concat(taicpu.op_reg_const(A_SUBI,NR_R28,lo(word(-value)))); list.concat(taicpu.op_reg_const(A_SBCI,NR_R29,hi(word(-value)))); // get SREG list.concat(taicpu.op_reg_const(A_IN,GetDefaultTmpReg,NIO_SREG)); // block interrupts list.concat(taicpu.op_none(A_CLI)); // write high SP list.concat(taicpu.op_const_reg(A_OUT,NIO_SP_HI,NR_R29)); // release interrupts list.concat(taicpu.op_const_reg(A_OUT,NIO_SREG,GetDefaultTmpReg)); // write low SP list.concat(taicpu.op_const_reg(A_OUT,NIO_SP_LO,NR_R28)); end; end; end; function tcgavr.GetLoad(const ref: treference) : tasmop; begin if (ref.base=NR_NO) and (ref.index=NR_NO) then result:=A_LDS else if (ref.base<>NR_NO) and (ref.offset<>0) then result:=A_LDD else result:=A_LD; end; function tcgavr.GetStore(const ref: treference) : tasmop; begin if (ref.base=NR_NO) and (ref.index=NR_NO) then result:=A_STS else if (ref.base<>NR_NO) and (ref.offset<>0) then result:=A_STD else result:=A_ST; end; procedure tcgavr.gen_multiply(list: TAsmList; op: topcg; size: TCgSize; src2, src1, dst: tregister; check_overflow: boolean; var ovloc: tlocation); procedure perform_r1_check(overflow_label: TAsmLabel; other_reg: TRegister=NR_R1); var ai: taicpu; begin if check_overflow then begin list.concat(taicpu.op_reg_reg(A_OR,NR_R1,other_reg)); ai:=Taicpu.Op_Sym(A_BRxx,overflow_label); ai.SetCondition(C_NE); ai.is_jmp:=true; list.concat(ai); end; end; procedure perform_ovf_check(overflow_label: TAsmLabel); var ai: taicpu; begin if check_overflow then begin ai:=Taicpu.Op_Sym(A_BRxx,overflow_label); ai.SetCondition(C_CS); ai.is_jmp:=true; list.concat(ai); end; end; var pd: tprocdef; paraloc1, paraloc2: tcgpara; ai: taicpu; hl, no_overflow: TAsmLabel; name: String; begin ovloc.loc:=LOC_VOID; if size in [OS_8,OS_S8] then begin if (CPUAVR_HAS_MUL in cpu_capabilities[current_settings.cputype]) and (op=OP_MUL) then begin cg.a_reg_alloc(list,NR_R0); cg.a_reg_alloc(list,NR_R1); list.concat(taicpu.op_reg_reg(topcg2asmop[op],src1,src2)); // Check overflow if check_overflow then begin current_asmdata.getjumplabel(hl); list.concat(taicpu.op_reg_reg(A_AND,NR_R1,NR_R1)); { Clear carry as it's not affected by any of the instructions } list.concat(taicpu.op_none(A_CLC)); ai:=Taicpu.Op_Sym(A_BRxx,hl); ai.SetCondition(C_EQ); ai.is_jmp:=true; list.concat(ai); list.concat(taicpu.op_reg(A_CLR,NR_R1)); list.concat(taicpu.op_none(A_SEC)); a_label(list,hl); ovloc.loc:=LOC_FLAGS; end else list.concat(taicpu.op_reg(A_CLR,NR_R1)); cg.a_reg_dealloc(list,NR_R1); list.concat(taicpu.op_reg_reg(A_MOV,dst,NR_R0)); cg.a_reg_dealloc(list,NR_R0); end else if (CPUAVR_HAS_MUL in cpu_capabilities[current_settings.cputype]) and (op=OP_IMUL) then begin cg.a_reg_alloc(list,NR_R0); cg.a_reg_alloc(list,NR_R1); list.concat(taicpu.op_reg_reg(A_MULS,src1,src2)); list.concat(taicpu.op_reg_reg(A_MOV,dst,NR_R0)); // Check overflow if check_overflow then begin current_asmdata.getjumplabel(no_overflow); list.concat(taicpu.op_reg_const(A_SBRC,NR_R0,7)); list.concat(taicpu.op_reg(A_INC,NR_R1)); list.concat(taicpu.op_reg(A_TST,NR_R1)); ai:=Taicpu.Op_Sym(A_BRxx,no_overflow); ai.SetCondition(C_EQ); ai.is_jmp:=true; list.concat(ai); list.concat(taicpu.op_reg(A_CLR,NR_R1)); a_call_name(list,'FPC_OVERFLOW',false); a_label(list,no_overflow); ovloc.loc:=LOC_VOID; end else list.concat(taicpu.op_reg(A_CLR,NR_R1)); cg.a_reg_dealloc(list,NR_R1); cg.a_reg_dealloc(list,NR_R0); end else begin if size=OS_8 then name:='fpc_mul_byte' else name:='fpc_mul_shortint'; if check_overflow then name:=name+'_checkoverflow'; pd:=search_system_proc(name); paraloc1.init; paraloc2.init; paramanager.getcgtempparaloc(list,pd,1,paraloc1); paramanager.getcgtempparaloc(list,pd,2,paraloc2); a_load_reg_cgpara(list,OS_8,src1,paraloc2); a_load_reg_cgpara(list,OS_8,src2,paraloc1); paramanager.freecgpara(list,paraloc2); paramanager.freecgpara(list,paraloc1); alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default)); a_call_name(list,upper(name),false); dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default)); cg.a_reg_alloc(list,NR_R24); cg.a_load_reg_reg(list,OS_8,OS_8,NR_R24,dst); cg.a_reg_dealloc(list,NR_R24); paraloc2.done; paraloc1.done; end; end else if size in [OS_16,OS_S16] then begin if (CPUAVR_HAS_MUL in cpu_capabilities[current_settings.cputype]) and ((not check_overflow) or (size=OS_16)) then begin if check_overflow then begin current_asmdata.getjumplabel(hl); current_asmdata.getjumplabel(no_overflow); end; cg.a_reg_alloc(list,NR_R0); cg.a_reg_alloc(list,NR_R1); list.concat(taicpu.op_reg_reg(A_MUL,src2,src1)); emit_mov(list,dst,NR_R0); emit_mov(list,GetNextReg(dst),NR_R1); list.concat(taicpu.op_reg_reg(A_MUL,GetNextReg(src1),src2)); perform_r1_check(hl); list.concat(taicpu.op_reg_reg(A_ADD,GetNextReg(dst),NR_R0)); perform_ovf_check(hl); list.concat(taicpu.op_reg_reg(A_MUL,src1,GetNextReg(src2))); perform_r1_check(hl); list.concat(taicpu.op_reg_reg(A_ADD,GetNextReg(dst),NR_R0)); perform_ovf_check(hl); if check_overflow then begin list.concat(taicpu.op_reg_reg(A_MUL,GetNextReg(src1),GetNextReg(src2))); perform_r1_check(hl,NR_R0); end; cg.a_reg_dealloc(list,NR_R0); list.concat(taicpu.op_reg(A_CLR,NR_R1)); if check_overflow then begin { CLV/CLC JMP no_overflow .hl: CLR R1 SEV/SEC .no_overflow: } if op=OP_MUL then list.concat(taicpu.op_none(A_CLC)) else list.concat(taicpu.op_none(A_CLV)); a_jmp_always(list,no_overflow); a_label(list,hl); list.concat(taicpu.op_reg(A_CLR,NR_R1)); if op=OP_MUL then list.concat(taicpu.op_none(A_SEC)) else list.concat(taicpu.op_none(A_SEV)); a_label(list,no_overflow); ovloc.loc:=LOC_FLAGS; end; cg.a_reg_dealloc(list,NR_R1); end else begin if size=OS_16 then name:='fpc_mul_word' else name:='fpc_mul_integer'; if check_overflow then name:=name+'_checkoverflow'; pd:=search_system_proc(name); paraloc1.init; paraloc2.init; paramanager.getcgtempparaloc(list,pd,1,paraloc1); paramanager.getcgtempparaloc(list,pd,2,paraloc2); a_load_reg_cgpara(list,OS_16,src1,paraloc2); a_load_reg_cgpara(list,OS_16,src2,paraloc1); paramanager.freecgpara(list,paraloc2); paramanager.freecgpara(list,paraloc1); alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default)); a_call_name(list,upper(name),false); dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default)); cg.a_reg_alloc(list,NR_R24); cg.a_reg_alloc(list,NR_R25); cg.a_load_reg_reg(list,OS_8,OS_8,NR_R24,dst); cg.a_reg_dealloc(list,NR_R24); cg.a_load_reg_reg(list,OS_8,OS_8,NR_R25,GetNextReg(dst)); cg.a_reg_dealloc(list,NR_R25); paraloc2.done; paraloc1.done; end; end else internalerror(2011022002); end; procedure tcgavr.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean); var regs : tcpuregisterset; reg : tsuperregister; begin if current_procinfo.procdef.isempty then exit; if (po_interrupt in current_procinfo.procdef.procoptions) and (not nostackframe) then begin { check if the framepointer is actually used, this is done here because we have to know the size of the locals (must be 0), avr does not know an sp based stack } if not(current_procinfo.procdef.stack_tainting_parameter(calleeside)) and (localsize=0) then current_procinfo.framepointer:=NR_NO; { save int registers, but only if the procedure returns } if not(po_noreturn in current_procinfo.procdef.procoptions) then regs:=rg[R_INTREGISTER].used_in_proc else regs:=[]; { if the framepointer is potentially used, save it always because we need a proper stack frame, even if the procedure never returns, the procedure could be e.g. a nested one accessing an outer stackframe } if current_procinfo.framepointer<>NR_NO then regs:=regs+[RS_R28,RS_R29]; { we clear r1 } include(regs,getsupreg(GetDefaultZeroReg)); regs:=regs+[getsupreg(GetDefaultTmpReg)]; if current_settings.cputype=cpu_avr1 then message1(cg_w_interrupt_does_not_save_registers,current_procinfo.procdef.fullprocname(false)) else begin for reg:=RS_R31 downto RS_R0 do if reg in regs then list.concat(taicpu.op_reg(A_PUSH,newreg(R_INTREGISTER,reg,R_SUBWHOLE))); { Save SREG } cg.getcpuregister(list,GetDefaultTmpReg); list.concat(taicpu.op_reg_const(A_IN, GetDefaultTmpReg, $3F)); list.concat(taicpu.op_reg(A_PUSH, GetDefaultTmpReg)); cg.ungetcpuregister(list,GetDefaultTmpReg); end; list.concat(taicpu.op_reg(A_CLR,GetDefaultZeroReg)); if current_procinfo.framepointer<>NR_NO then begin cg.getcpuregister(list,NR_R28); list.concat(taicpu.op_reg_const(A_IN,NR_R28,NIO_SP_LO)); cg.getcpuregister(list,NR_R29); list.concat(taicpu.op_reg_const(A_IN,NR_R29,NIO_SP_HI)); a_adjust_sp(list,-localsize); end; end else if not(nostackframe) then begin { check if the framepointer is actually used, this is done here because we have to know the size of the locals (must be 0), avr does not know an sp based stack } if not(current_procinfo.procdef.stack_tainting_parameter(calleeside)) and (localsize=0) then current_procinfo.framepointer:=NR_NO; { save int registers, but only if the procedure returns } if not(po_noreturn in current_procinfo.procdef.procoptions) then regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall) else regs:=[]; { if the framepointer is potentially used, save it always because we need a proper stack frame, even if the procedure never returns, the procedure could be e.g. a nested one accessing an outer stackframe } if current_procinfo.framepointer<>NR_NO then regs:=regs+[RS_R28,RS_R29]; for reg:=RS_R31 downto RS_R0 do if reg in regs then list.concat(taicpu.op_reg(A_PUSH,newreg(R_INTREGISTER,reg,R_SUBWHOLE))); if current_procinfo.framepointer<>NR_NO then begin cg.getcpuregister(list,NR_R28); list.concat(taicpu.op_reg_const(A_IN,NR_R28,NIO_SP_LO)); cg.getcpuregister(list,NR_R29); list.concat(taicpu.op_reg_const(A_IN,NR_R29,NIO_SP_HI)); a_adjust_sp(list,-localsize); end; end; end; procedure tcgavr.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); var regs : tcpuregisterset; reg : TSuperRegister; LocalSize : longint; begin { every byte counts for avr, so if a subroutine is marked as non-returning, we do not generate any exit code, so we really trust the noreturn directive } if po_noreturn in current_procinfo.procdef.procoptions then exit; if po_interrupt in current_procinfo.procdef.procoptions then begin if not(current_procinfo.procdef.isempty) and (not nostackframe) then begin regs:=rg[R_INTREGISTER].used_in_proc; if current_procinfo.framepointer<>NR_NO then begin regs:=regs+[RS_R28,RS_R29]; LocalSize:=current_procinfo.calc_stackframe_size; a_adjust_sp(list,LocalSize); end; { we clear r1 } include(regs,getsupreg(GetDefaultZeroReg)); if current_settings.cputype<>cpu_avr1 then begin { Reload SREG } regs:=regs+[getsupreg(GetDefaultTmpReg)]; cg.getcpuregister(list,GetDefaultTmpReg); list.concat(taicpu.op_reg(A_POP, GetDefaultTmpReg)); list.concat(taicpu.op_const_reg(A_OUT, $3F, GetDefaultTmpReg)); cg.ungetcpuregister(list,GetDefaultTmpReg); for reg:=RS_R0 to RS_R31 do if reg in regs then list.concat(taicpu.op_reg(A_POP,newreg(R_INTREGISTER,reg,R_SUBWHOLE))); end; end; list.concat(taicpu.op_none(A_RETI)); end else if not(nostackframe) and not(current_procinfo.procdef.isempty) then begin regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall); if current_procinfo.framepointer<>NR_NO then begin regs:=regs+[RS_R28,RS_R29]; LocalSize:=current_procinfo.calc_stackframe_size; a_adjust_sp(list,LocalSize); end; for reg:=RS_R0 to RS_R31 do if reg in regs then list.concat(taicpu.op_reg(A_POP,newreg(R_INTREGISTER,reg,R_SUBWHOLE))); list.concat(taicpu.op_none(A_RET)); end else list.concat(taicpu.op_none(A_RET)); end; procedure tcgavr.a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister); var tmpref : treference; begin if ref.addressmode<>AM_UNCHANGED then internalerror(2011021701); if assigned(ref.symbol) or (ref.offset<>0) then begin reference_reset(tmpref,0,[]); tmpref.symbol:=ref.symbol; tmpref.offset:=ref.offset; if assigned(ref.symbol) and (ref.symbol.typ in [AT_FUNCTION,AT_LABEL]) then tmpref.refaddr:=addr_lo8_gs else tmpref.refaddr:=addr_lo8; list.concat(taicpu.op_reg_ref(A_LDI,r,tmpref)); if assigned(ref.symbol) and (ref.symbol.typ in [AT_FUNCTION,AT_LABEL]) then tmpref.refaddr:=addr_hi8_gs else tmpref.refaddr:=addr_hi8; list.concat(taicpu.op_reg_ref(A_LDI,GetNextReg(r),tmpref)); if (ref.base<>NR_NO) then begin list.concat(taicpu.op_reg_reg(A_ADD,r,ref.base)); list.concat(taicpu.op_reg_reg(A_ADC,GetNextReg(r),GetNextReg(ref.base))); end; if (ref.index<>NR_NO) then begin list.concat(taicpu.op_reg_reg(A_ADD,r,ref.index)); list.concat(taicpu.op_reg_reg(A_ADC,GetNextReg(r),GetNextReg(ref.index))); end; end else if (ref.base<>NR_NO)then begin emit_mov(list,r,ref.base); emit_mov(list,GetNextReg(r),GetNextReg(ref.base)); if (ref.index<>NR_NO) then begin list.concat(taicpu.op_reg_reg(A_ADD,r,ref.index)); list.concat(taicpu.op_reg_reg(A_ADC,GetNextReg(r),GetNextReg(ref.index))); end; end else if (ref.index<>NR_NO) then begin emit_mov(list,r,ref.index); emit_mov(list,GetNextReg(r),GetNextReg(ref.index)); end; end; procedure tcgavr.fixref(list : TAsmList;var ref : treference); begin internalerror(2011021320); end; procedure tcgavr.g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint); var paraloc1,paraloc2,paraloc3 : TCGPara; pd : tprocdef; begin pd:=search_system_proc('MOVE'); paraloc1.init; paraloc2.init; paraloc3.init; paramanager.getcgtempparaloc(list,pd,1,paraloc1); paramanager.getcgtempparaloc(list,pd,2,paraloc2); paramanager.getcgtempparaloc(list,pd,3,paraloc3); a_load_const_cgpara(list,OS_SINT,len,paraloc3); a_loadaddr_ref_cgpara(list,dest,paraloc2); a_loadaddr_ref_cgpara(list,source,paraloc1); paramanager.freecgpara(list,paraloc3); paramanager.freecgpara(list,paraloc2); paramanager.freecgpara(list,paraloc1); alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default)); a_call_name_static(list,'FPC_MOVE'); dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default)); paraloc3.done; paraloc2.done; paraloc1.done; end; procedure tcgavr.g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint); var countreg,tmpreg,tmpreg2: tregister; srcref,dstref : treference; copysize,countregsize : tcgsize; l : TAsmLabel; i : longint; SrcQuickRef, DestQuickRef : Boolean; begin if len>16 then begin current_asmdata.getjumplabel(l); reference_reset(srcref,source.alignment,source.volatility); reference_reset(dstref,dest.alignment,source.volatility); srcref.base:=NR_R30; srcref.addressmode:=AM_POSTINCREMENT; dstref.base:=NR_R26; dstref.addressmode:=AM_POSTINCREMENT; copysize:=OS_8; if len<256 then countregsize:=OS_8 else if len<65536 then countregsize:=OS_16 else internalerror(2011022007); countreg:=getintregister(list,countregsize); a_load_const_reg(list,countregsize,len,countreg); cg.getcpuregister(list,NR_R30); cg.getcpuregister(list,NR_R31); a_loadaddr_ref_reg(list,source,NR_R30); { only base or index register in dest? } if ((dest.addressmode=AM_UNCHANGED) and (dest.offset=0) and not(assigned(dest.symbol))) and ((dest.base<>NR_NO) xor (dest.index<>NR_NO)) then begin if dest.base<>NR_NO then tmpreg:=dest.base else if dest.index<>NR_NO then tmpreg:=dest.index else internalerror(2016112001); end else begin tmpreg:=getaddressregister(list); a_loadaddr_ref_reg(list,dest,tmpreg); end; { X is used for spilling code so we can load it only by a push/pop sequence, this can be optimized later on by the peephole optimizer } list.concat(taicpu.op_reg(A_PUSH,tmpreg)); list.concat(taicpu.op_reg(A_PUSH,GetNextReg(tmpreg))); cg.getcpuregister(list,NR_R27); list.concat(taicpu.op_reg(A_POP,NR_R27)); cg.getcpuregister(list,NR_R26); list.concat(taicpu.op_reg(A_POP,NR_R26)); cg.a_label(list,l); cg.getcpuregister(list,GetDefaultTmpReg); list.concat(taicpu.op_reg_ref(GetLoad(srcref),GetDefaultTmpReg,srcref)); list.concat(taicpu.op_ref_reg(GetStore(dstref),dstref,GetDefaultTmpReg)); cg.ungetcpuregister(list,GetDefaultTmpReg); if tcgsize2size[countregsize] = 1 then list.concat(taicpu.op_reg(A_DEC,countreg)) else begin list.concat(taicpu.op_reg_const(A_SUBI,countreg,1)); list.concat(taicpu.op_reg_reg(A_SBC,GetNextReg(countreg),GetDefaultZeroReg)); end; a_jmp_flags(list,F_NE,l); cg.ungetcpuregister(list,NR_R26); cg.ungetcpuregister(list,NR_R27); cg.ungetcpuregister(list,NR_R30); cg.ungetcpuregister(list,NR_R31); { keep registers alive } a_reg_sync(list,countreg); end else begin SrcQuickRef:=false; DestQuickRef:=false; if ((CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype]) and not((source.Base=NR_NO) and (source.Index=NR_NO) and (source.symbol=nil) and (source.Offset in [0..192-len]))) or ( not((source.addressmode=AM_UNCHANGED) and (source.symbol=nil) and ((source.base=NR_R28) or (source.base=NR_R30)) and (source.Index=NR_NO) and (source.Offset in [0..64-len])) and not((source.Base=NR_NO) and (source.Index=NR_NO)) ) then begin cg.getcpuregister(list,NR_R30); cg.getcpuregister(list,NR_R31); srcref:=normalize_ref(list,source,NR_R30); end else begin SrcQuickRef:=true; srcref:=source; end; if ((CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype]) and not((dest.Base=NR_NO) and (dest.Index=NR_NO) and (dest.symbol=nil) and (dest.Offset in [0..192-len]))) or ( not((dest.addressmode=AM_UNCHANGED) and (dest.symbol=nil) and ((dest.base=NR_R28) or (dest.base=NR_R30)) and (dest.Index=NR_No) and (dest.Offset in [0..64-len])) and not((dest.Base=NR_NO) and (dest.Index=NR_NO)) ) then begin if not(SrcQuickRef) then begin { only base or index register in dest? } if ((dest.addressmode=AM_UNCHANGED) and (dest.offset=0) and not(assigned(dest.symbol))) and ((dest.base<>NR_NO) xor (dest.index<>NR_NO)) then begin if dest.base<>NR_NO then tmpreg:=dest.base else if dest.index<>NR_NO then tmpreg:=dest.index else internalerror(2016112002); end else tmpreg:=getaddressregister(list); dstref:=normalize_ref(list,dest,tmpreg); { X is used for spilling code so we can load it only by a push/pop sequence, this can be optimized later on by the peephole optimizer } list.concat(taicpu.op_reg(A_PUSH,tmpreg)); list.concat(taicpu.op_reg(A_PUSH,GetNextReg(tmpreg))); cg.getcpuregister(list,NR_R27); list.concat(taicpu.op_reg(A_POP,NR_R27)); cg.getcpuregister(list,NR_R26); list.concat(taicpu.op_reg(A_POP,NR_R26)); dstref.base:=NR_R26; end else begin cg.getcpuregister(list,NR_R30); cg.getcpuregister(list,NR_R31); dstref:=normalize_ref(list,dest,NR_R30); end; end else begin DestQuickRef:=true; dstref:=dest; end; // CC // If dest is an ioreg (31 < offset < srambase) and size = 16 bit then // write high byte first, then low byte // but not for avrxmega3 if (len = 2) and DestQuickRef and (current_settings.cputype <> cpu_avrxmega3) and (dest.offset > 31) and (dest.offset < cpuinfo.embedded_controllers[current_settings.controllertype].srambase) then begin // If src is also a 16 bit ioreg then read low byte then high byte if SrcQuickRef and (srcref.offset > 31) and (srcref.offset < cpuinfo.embedded_controllers[current_settings.controllertype].srambase) then begin // First read source into temp registers tmpreg:=getintregister(list, OS_16); list.concat(taicpu.op_reg_ref(GetLoad(srcref),tmpreg,srcref)); inc(srcref.offset); tmpreg2:=GetNextReg(tmpreg); list.concat(taicpu.op_reg_ref(GetLoad(srcref),tmpreg2,srcref)); // then move temp registers to dest in reverse order inc(dstref.offset); list.concat(taicpu.op_ref_reg(GetStore(dstref),dstref,tmpreg2)); dec(dstref.offset); list.concat(taicpu.op_ref_reg(GetStore(dstref),dstref,tmpreg)); end else begin srcref.addressmode:=AM_UNCHANGED; inc(srcref.offset); dstref.addressmode:=AM_UNCHANGED; inc(dstref.offset); cg.getcpuregister(list,GetDefaultTmpReg); list.concat(taicpu.op_reg_ref(GetLoad(srcref),GetDefaultTmpReg,srcref)); list.concat(taicpu.op_ref_reg(GetStore(dstref),dstref,GetDefaultTmpReg)); cg.ungetcpuregister(list,GetDefaultTmpReg); if not(SrcQuickRef) then srcref.addressmode:=AM_POSTINCREMENT else srcref.addressmode:=AM_UNCHANGED; dec(srcref.offset); dec(dstref.offset); cg.getcpuregister(list,GetDefaultTmpReg); list.concat(taicpu.op_reg_ref(GetLoad(srcref),GetDefaultTmpReg,srcref)); list.concat(taicpu.op_ref_reg(GetStore(dstref),dstref,GetDefaultTmpReg)); cg.ungetcpuregister(list,GetDefaultTmpReg); end; end else for i:=1 to len do begin if not(SrcQuickRef) and (i