diff options
Diffstat (limited to 'lib/hipe/x86')
26 files changed, 0 insertions, 9055 deletions
diff --git a/lib/hipe/x86/Makefile b/lib/hipe/x86/Makefile deleted file mode 100644 index 84edeaebe7..0000000000 --- a/lib/hipe/x86/Makefile +++ /dev/null @@ -1,140 +0,0 @@ -# -# %CopyrightBegin% -# -# Copyright Ericsson AB 2001-2016. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# %CopyrightEnd% -# - -ifndef EBIN -EBIN = ../ebin -endif - -ifndef DOCS -DOCS = ../doc -endif - -include $(ERL_TOP)/make/target.mk -include $(ERL_TOP)/make/$(TARGET)/otp.mk - -# ---------------------------------------------------- -# Application version -# ---------------------------------------------------- -include ../vsn.mk -VSN=$(HIPE_VSN) - -# ---------------------------------------------------- -# Release directory specification -# ---------------------------------------------------- -RELSYSDIR = $(RELEASE_PATH)/lib/hipe-$(VSN) - -# ---------------------------------------------------- -# Target Specs -# ---------------------------------------------------- -# Please keep this list sorted. -MODULES=hipe_rtl_to_x86 \ - hipe_x86 \ - hipe_x86_assemble \ - hipe_x86_cfg \ - hipe_x86_defuse \ - hipe_x86_encode \ - hipe_x86_frame \ - hipe_x86_liveness \ - hipe_x86_main \ - hipe_x86_postpass \ - hipe_x86_pp \ - hipe_x86_ra \ - hipe_x86_ra_finalise \ - hipe_x86_ra_ls \ - hipe_x86_ra_naive \ - hipe_x86_ra_postconditions \ - hipe_x86_registers \ - hipe_x86_spill_restore \ - hipe_x86_subst \ - hipe_x86_x87 - -HRL_FILES=hipe_x86.hrl -ERL_FILES=$(MODULES:%=%.erl) -TARGET_FILES=$(MODULES:%=$(EBIN)/%.$(EMULATOR)) -DOC_FILES= $(MODULES:%=$(DOCS)/%.html) - -# APP_FILE= -# APP_SRC=$(APP_FILE).src -# APP_TARGET=$(EBIN)/$(APP_FILE) -# -# APPUP_FILE= -# APPUP_SRC=$(APPUP_FILE).src -# APPUP_TARGET=$(EBIN)/$(APPUP_FILE) - -# ---------------------------------------------------- -# FLAGS -# ---------------------------------------------------- - -include ../native.mk - -ERL_COMPILE_FLAGS += -Werror +warn_export_vars - -# ---------------------------------------------------- -# Targets -# ---------------------------------------------------- - -debug opt: $(TARGET_FILES) - -docs: - -# Moved docs target to edocs so the standard docs rule work properly. -edocs: $(DOC_FILES) - -clean: - rm -f $(TARGET_FILES) - rm -f core - -distclean: clean -realclean: clean - -$(DOCS)/%.html:%.erl - erl -noshell -run edoc_run file '"$<"' '[{dir, "$(DOCS)"}]' -s init stop - -# ---------------------------------------------------- -# Special Build Targets -# ---------------------------------------------------- - -# ---------------------------------------------------- -# Release Target -# ---------------------------------------------------- -include $(ERL_TOP)/make/otp_release_targets.mk - -release_spec: opt - $(INSTALL_DIR) "$(RELSYSDIR)/ebin" - $(INSTALL_DATA) $(TARGET_FILES) "$(RELSYSDIR)/ebin" - -release_docs_spec: - -# Please keep this list sorted. -$(EBIN)/hipe_rtl_to_x86.beam: ../rtl/hipe_rtl.hrl -$(EBIN)/hipe_x86_assemble.beam: ../main/hipe.hrl ../rtl/hipe_literals.hrl ../misc/hipe_sdi.hrl -$(EBIN)/hipe_x86_cfg.beam: ../flow/cfg.hrl ../flow/cfg.inc -$(EBIN)/hipe_x86_frame.beam: ../rtl/hipe_literals.hrl -$(EBIN)/hipe_x86_liveness.beam: ../flow/liveness.inc -$(EBIN)/hipe_x86_main.beam: ../main/hipe.hrl -$(EBIN)/hipe_x86_ra: ../main/hipe.hrl -$(EBIN)/hipe_x86_ra_dummy.beam: ../main/hipe.hrl -$(EBIN)/hipe_x86_ra_ls.beam: ../main/hipe.hrl -$(EBIN)/hipe_x86_ra_postconditions.beam: ../main/hipe.hrl -$(EBIN)/hipe_x86_registers.beam: ../rtl/hipe_literals.hrl -$(EBIN)/hipe_x86_spill_restore.beam: ../main/hipe.hrl ../flow/cfg.hrl -$(EBIN)/hipe_x86_x87.beam: ../main/hipe.hrl - -$(TARGET_FILES): hipe_x86.hrl ../misc/hipe_consttab.hrl diff --git a/lib/hipe/x86/NOTES.OPTIM b/lib/hipe/x86/NOTES.OPTIM deleted file mode 100644 index c518ea3481..0000000000 --- a/lib/hipe/x86/NOTES.OPTIM +++ /dev/null @@ -1,198 +0,0 @@ -Partial x86 code optimisation guide -=================================== -Priority should be given to P6 and P4, then K7, -then P5, and last to K6. - -Rules that are blatantly obvious or irrelevant for HiPE are -generally not listed. These includes things like alignment -of basic data types, store-forwarding rules when alignment -or sizes don't match, and partial register stalls. - -Intel P4 --------- -The P6 4-1-1 insn decode template no longer applies. - -Simple insns (add/sub/cmp/test/and/or/xor/neg/not/mov/sahf) -are twice as fast as in P6. - -Shifts are "movsx" (sign-extend) are slower than in P6. - -Always avoid "inc" and "dec", use "add" and "sub" instead, -due to condition codes dependencies overhead. - -"fxch" is slightly more expensive than in P6, where it was free. - -Use "setcc" or "cmov" to eliminate unpredictable branches. - -For hot code executing out of the trace cache, alignment of -branch targets is less of an issue compared to P6. - -Do use "fxch" to simulate a flat FP register file, but only -for that purpose, not for manual scheduling for parallelism. - -Using "lea" is highly recommended. - -Eliminate redundant loads. Use regs as much as possible. - -Left shifts up to 3 have longer latencies than the equivalent -sequence of adds. - -Do utilise the addressing modes, to save registers and trace -cache bandwidth. - -"xor reg,reg" or "sub reg,reg" preferred over moving zero to reg. - -"test reg,reg" preferred over "cmp" with zero or "and". - -Avoid explicit cmp/test;jcc if the preceeding insn (alu, but not -mov or lea) set the condition codes. - -Load-execute alu insns (mem src) are Ok. - -Add-reg-to-mem slightly better than add-mem-to-reg. - -Add-reg-to-mem is better than load;add;store. - -Intel P6 --------- -4-1-1 instruction decoding template: can decode one semi-complex -(max 4 uops) and two simple (1 uop) insns per clock; follow a -complex insn by two simple ones, otherwise the decoders will stall. - -Load-execute (mem src) alu insns are 2 uops. -Read-modify-write (mem dst) alu insns are 4 uops. - -Insns longer than 7 bytes block parallel decoding. -Avoid insns longer than 7 bytes. - -Lea is useful. - -"movzx" is preferred for zero-extension; the xor;mov alternative -causes a partial register stall. - -Use "test" instead of "cmp" with zero. - -Pull address calculations into load and store insn addressing modes. - -Clear a reg with "xor", not by moving zero to it. - -Many alu insns set the condition codes. Replace "alu;cmp;jcc" -with "alu;jcc". This is not applicable for "mov" or "lea". - -For FP code, simulate a flat register file on the x87 stack by -using fxch to reorder it. - -AMD K7 ------- -Select DirectPath insns. Avoid VectorPath insns due to slower decode. - -Alu insns with mem src are very efficient. -Alu insns with mem dst are very efficient. - -Fetches from I-cache are 16-byte aligned. Align functions and frequently -used labels at or near the start of 16-byte aligned blocks. - -"movzx" preferred over "xor;mov" for zero-extension. - -"push mem" preferred over "load;push reg". - -"xor reg,reg" preferred over moving zero to the reg. - -"test" preferred over "cmp". - -"pop" insns are VectorPath. "pop mem" has latency 3, "pop reg" has -latency 4. - -"push reg" and "push imm" are DirectPath, "push mem" is VectorPath. -The latency is 3 clocks. - -Intel P5 --------- -If a loop header is less than 8 bytes away from a 16-byte -boundary, align it to the 16-byte boundary. - -If a return address is less than 8 bytes away from a 16-byte -boundary, align it to the 16-byte boundary. - -Align function entry points to 16-byte boundaries. - -Ensure that doubles are 64-bit aligned. - -Data cache line size is 32 bytes. The whole line is brought -in on a read miss. - -"push mem" is not pairable; loading a temp reg and pushing -the reg pairs better -- this is also faster on the 486. - -No conditional move instruction. - -Insns longer than 7 bytes can't go down the V-pipe or share -the insn FIFO with other insns. -Avoid insns longer than 7 bytes. - -Lea is useful when it replaces several other add/shift insns. -Lea is not a good replacement for a single shl since a scaled -index requires a disp32 (or base), making the insn longer. - -"movzx" is worse than the xor;mov alternative -- the opcode -prefix causes a slowdown and it is not pariable. - -Use "test" instead of "cmp" with zero. - -"test eax,imm" and "test reg,reg" are pairable, other forms are not. - -Pull address calculations into load and store insn addressing modes. - -Clear a reg with "xor", not by moving zero to it. - -Many alu insns set the condition codes. Replace "alu;cmp;jcc" -with "alu;jcc". This is not applicable for "mov" or "lea". - -For FP code, simulate a flat register file on the x87 stack by -using fxch to reorder it. - -"neg" and "not" are not pairable. "test imm,reg" and "test imm,mem" -are not pairable. Shifts by "cl" are not pairable. Shifts by "1" or -"imm" are pairable but only execute in the U-pipe. - -AMD K6 ------- -The insn size predecoder has a 3-byte window. Insns with both prefix -and SIB bytes cannot be short-decoded. - -Use short and simple insns, including mem src alu insns. - -Avoid insns longer than 7 bytes. They cannot be short-decoded. -Short-decode: max 7 bytes, max 2 uops. -Long-decode: max 11 bytes, max 4 uops. -Vector-decode: longer than 11 bytes or more than 4 uops. - -Prefer read-modify-write alu insns (mem dst) over "load;op;store" -sequences, for code density and register pressure reasons. - -Avoid the "(esi)" addressing mode: it forces the insn to be vector-decoded. -Use a different reg or add an explicit zero displacement. - -"add reg,reg" preferred over a shl by 1, it parallelises better. - -"movzx" preferred over "xor;mov" for zero-extension. - -Moving zero to a reg preferred over "xor reg,reg" due to dependencies -and condition codes overhead. - -"push mem" preferred over "load;push reg" due to code density and -register pressure. (Page 64.) -Explicit moves preferred when pushing args for fn calls, due to -%esp dependencies and random access possibility. (Page 58.) -[hmm, these two are in conflict] - -There is no penalty for seg reg prefix unless there are multiple prefixes. - -Align function entries and frequent branch targets to 16-byte boundaries. - -Shifts by imm only go down one of the pipes. - -"test reg,reg" preferred over "cmp" with zero. -"test reg,imm" is a long-decode insn. - -No conditional move insn. diff --git a/lib/hipe/x86/NOTES.RA b/lib/hipe/x86/NOTES.RA deleted file mode 100644 index 173eaf229e..0000000000 --- a/lib/hipe/x86/NOTES.RA +++ /dev/null @@ -1,30 +0,0 @@ -Register Allocation -=================== - -These are the rules that HiPE x86 register allocators must abide by. - -- Before RA, every Temp (precoloured or pseudo) is semantically - equivalent to Reg. Any operand may be Temp. - -- Before RA, only FIXED registers may occur in precoloured Temps. - Exception 1 is move: src or dst may be an argument register. - Exception 2 is call: the dst (if any) must be %eax. - -- After RA, an operand (src or dst) may refer to at most one memory cell. - Therefore, a pseudo-Temp MAY NOT occur as base or offset in an - explicit memory operand after RA. - -- After RA, a binary operation (alu, cmp, move) may refer to at most - one memory cell. Therefore, AT MOST ONE of src and dst may be a - pseudo-Temp after RA. If one of the operands (src or dst) is an - explicit memory operand, then the other operand MUST NOT be a - pseudo-Temp after RA. - -- After RA, the index in a jmp_switch must be a register. - -- After RA, the temp in a lea must be a register. - -- After RA, the temp in an imul must be a register. - -- After RA, a function's formal parameters must reside on the stack. - Therefore, the RA MUST NOT map the formals to actual registers. diff --git a/lib/hipe/x86/TODO b/lib/hipe/x86/TODO deleted file mode 100644 index 7c93f7daf3..0000000000 --- a/lib/hipe/x86/TODO +++ /dev/null @@ -1,31 +0,0 @@ -rtl_to_x86: -* recognise alub(X,X,sub,1,lt,L1,L2,P) and turn it into 'dec', - this might improve the reduction test code slightly (X is - the pseudo for FCALLS) -* recognise alu(Z,X,add,Y) and turn it into 'lea'. -* rewrite tailcalls as parallel assignments before regalloc - -x86: -* Use separate constructors for real regs (x86_reg) and pseudos (x86_temp). - -Frame: -* drop tailcall rewrite - -Registers: -* make the 2 regs now reserved for frame's tailcall rewrite available for arg passing - -Optimizations: -* replace jcc cc,L1; jmp L0; L1: with jcc <not cc> L0; L1: (length:len/2) -* Kill move X,X insns, either in frame or finalise -* Instruction scheduling module -* We can now choose to not have HP in %esi. However, this currently loses - performance due to (a) repeated moves to/from P_HP(P), and (b) spills of - the temp that contains a copy of P_HP(P). Both of these problems should be - fixed, and then, if we don't have any noticeable performance degradation, we - should permanently change to a non-reserved HP strategy. - -Loader: - -Assembler: - -Encode: diff --git a/lib/hipe/x86/hipe_rtl_to_x86.erl b/lib/hipe/x86/hipe_rtl_to_x86.erl deleted file mode 100644 index 22947da148..0000000000 --- a/lib/hipe/x86/hipe_rtl_to_x86.erl +++ /dev/null @@ -1,936 +0,0 @@ -%%% -*- erlang-indent-level: 2 -*- -%%% -%%% Licensed under the Apache License, Version 2.0 (the "License"); -%%% you may not use this file except in compliance with the License. -%%% You may obtain a copy of the License at -%%% -%%% http://www.apache.org/licenses/LICENSE-2.0 -%%% -%%% Unless required by applicable law or agreed to in writing, software -%%% distributed under the License is distributed on an "AS IS" BASIS, -%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%%% See the License for the specific language governing permissions and -%%% limitations under the License. -%%% -%%% Translate 3-address RTL code to 2-address pseudo-x86 code. - --ifdef(HIPE_AMD64). --define(HIPE_RTL_TO_X86, hipe_rtl_to_amd64). --define(HIPE_X86_REGISTERS, hipe_amd64_registers). --define(ECX, rcx). --define(EAX, rax). --else. --define(HIPE_RTL_TO_X86, hipe_rtl_to_x86). --define(HIPE_X86_REGISTERS, hipe_x86_registers). --define(ECX, ecx). --define(EAX, eax). --endif. - --module(?HIPE_RTL_TO_X86). --export([translate/1]). - --include("../rtl/hipe_rtl.hrl"). - -translate(RTL) -> % RTL function -> x86 defun - hipe_gensym:init(x86), - hipe_gensym:set_var(x86, ?HIPE_X86_REGISTERS:first_virtual()), - hipe_gensym:set_label(x86, hipe_gensym:get_label(rtl)), - Map0 = vmap_empty(), - {Formals, Map1} = conv_formals(hipe_rtl:rtl_params(RTL), Map0), - OldData = hipe_rtl:rtl_data(RTL), - {Code0, NewData} = conv_insn_list(hipe_rtl:rtl_code(RTL), Map1, OldData), - {RegFormals,_} = split_args(Formals), - Code = - case RegFormals of - [] -> Code0; - _ -> [hipe_x86:mk_label(hipe_gensym:get_next_label(x86)) | - move_formals(RegFormals, Code0)] - end, - IsClosure = hipe_rtl:rtl_is_closure(RTL), - IsLeaf = hipe_rtl:rtl_is_leaf(RTL), - hipe_x86:mk_defun(hipe_rtl:rtl_fun(RTL), - Formals, - IsClosure, - IsLeaf, - Code, - NewData, - [], - []). - -conv_insn_list([H|T], Map, Data) -> - {NewH, NewMap, NewData1} = conv_insn(H, Map, Data), - %% io:format("~w \n ==>\n ~w\n- - - - - - - - -\n",[H,NewH]), - {NewT, NewData2} = conv_insn_list(T, NewMap, NewData1), - {NewH ++ NewT, NewData2}; -conv_insn_list([], _, Data) -> - {[], Data}. - -conv_insn(I, Map, Data) -> - case I of - #alu{} -> - %% dst = src1 binop src2 - BinOp = conv_binop(hipe_rtl:alu_op(I)), - {Dst, Map0} = conv_dst(hipe_rtl:alu_dst(I), Map), - {FixSrc1, Src1, Map1} = conv_src(hipe_rtl:alu_src1(I), Map0), - {FixSrc2, Src2, Map2} = conv_src(hipe_rtl:alu_src2(I), Map1), - I2 = - case hipe_rtl:is_shift_op(hipe_rtl:alu_op(I)) of - true -> - conv_shift(Dst, Src1, BinOp, Src2); - false -> - conv_alu_nocc(Dst, Src1, BinOp, Src2, []) - end, - {FixSrc1++FixSrc2++I2, Map2, Data}; - #alub{} -> - %% dst = src1 op src2; if COND goto label - BinOp = conv_binop(hipe_rtl:alub_op(I)), - {FixSrc1, Src1, Map0} = conv_src(hipe_rtl:alub_src1(I), Map), - {FixSrc2, Src2, Map1} = conv_src(hipe_rtl:alub_src2(I), Map0), - Cc = conv_cond(hipe_rtl:alub_cond(I)), - BranchOp = conv_branchop(BinOp), - HasDst = hipe_rtl:alub_has_dst(I), - {I2, Map3} = - case (not HasDst) andalso BranchOp =/= none of - true -> - {conv_branch(Src1, BranchOp, Src2, Cc, - hipe_rtl:alub_true_label(I), - hipe_rtl:alub_false_label(I), - hipe_rtl:alub_pred(I)), Map1}; - false -> - {Dst, Map2} = - case HasDst of - false -> {new_untagged_temp(), Map1}; - true -> conv_dst(hipe_rtl:alub_dst(I), Map1) - end, - I1 = [hipe_x86:mk_pseudo_jcc(Cc, - hipe_rtl:alub_true_label(I), - hipe_rtl:alub_false_label(I), - hipe_rtl:alub_pred(I))], - {conv_alu(Dst, Src1, BinOp, Src2, I1), Map2} - end, - {FixSrc1++FixSrc2++I2, Map3, Data}; - #call{} -> - %% push <arg1> - %% ... - %% push <argn> - %% eax := call <Fun>; if exn goto <Fail> else goto Next - %% Next: - %% <Dst> := eax - %% goto <Cont> - {FixArgs, Args, Map0} = conv_src_list(hipe_rtl:call_arglist(I), Map), - {Dsts, Map1} = conv_dst_list(hipe_rtl:call_dstlist(I), Map0), - {Fun, Map2} = conv_fun(hipe_rtl:call_fun(I), Map1), - I2 = conv_call(Dsts, Fun, Args, - hipe_rtl:call_continuation(I), - hipe_rtl:call_fail(I), - hipe_rtl:call_type(I)), - {FixArgs++I2, Map2, Data}; - #comment{} -> - I2 = [hipe_x86:mk_comment(hipe_rtl:comment_text(I))], - {I2, Map, Data}; - #enter{} -> - {FixArgs, Args, Map0} = conv_src_list(hipe_rtl:enter_arglist(I), Map), - {Fun, Map1} = conv_fun(hipe_rtl:enter_fun(I), Map0), - I2 = conv_tailcall(Fun, Args, hipe_rtl:enter_type(I)), - {FixArgs++I2, Map1, Data}; - #goto{} -> - I2 = [hipe_x86:mk_jmp_label(hipe_rtl:goto_label(I))], - {I2, Map, Data}; - #label{} -> - I2 = [hipe_x86:mk_label(hipe_rtl:label_name(I))], - {I2, Map, Data}; - #load{} -> - {Dst, Map0} = conv_dst(hipe_rtl:load_dst(I), Map), - {FixSrc, Src, Map1} = conv_src_noimm(hipe_rtl:load_src(I), Map0), - {FixOff, Off, Map2} = conv_src(hipe_rtl:load_offset(I), Map1), - I2 = case {hipe_rtl:load_size(I), hipe_rtl:load_sign(I)} of - {byte, signed} -> - [hipe_x86:mk_movsx(hipe_x86:mk_mem(Src, Off, 'byte'), Dst)]; - {byte, unsigned} -> - [hipe_x86:mk_movzx(hipe_x86:mk_mem(Src, Off, 'byte'), Dst)]; - {int16, signed} -> - [hipe_x86:mk_movsx(hipe_x86:mk_mem(Src, Off, 'int16'), Dst)]; - {int16, unsigned} -> - [hipe_x86:mk_movzx(hipe_x86:mk_mem(Src, Off, 'int16'), Dst)]; - {LoadSize, LoadSign} -> - mk_load(LoadSize, LoadSign, Src, Off, Dst) - end, - {FixSrc++FixOff++I2, Map2, Data}; - #load_address{} -> - {Dst, Map0} = conv_dst(hipe_rtl:load_address_dst(I), Map), - Addr = hipe_rtl:load_address_addr(I), - Type = hipe_rtl:load_address_type(I), - Src = hipe_x86:mk_imm_from_addr(Addr, Type), - I2 = mk_load_address(Type, Src, Dst), - {I2, Map0, Data}; - #load_atom{} -> - {Dst, Map0} = conv_dst(hipe_rtl:load_atom_dst(I), Map), - Src = hipe_x86:mk_imm_from_atom(hipe_rtl:load_atom_atom(I)), - I2 = [hipe_x86:mk_move(Src, Dst)], - {I2, Map0, Data}; - #move{src=Dst, dst=Dst} -> {[], Map, Data}; - #move{} -> - {Dst, Map0} = conv_dst(hipe_rtl:move_dst(I), Map), - {FixSrc, Src, Map1} = conv_src(hipe_rtl:move_src(I), Map0), - I2 = [hipe_x86:mk_move(Src, Dst)], - {FixSrc++I2, Map1, Data}; - #return{} -> - {FixArgs, Args, Map0} = conv_src_list(hipe_rtl:return_varlist(I), Map), - %% frame will fill in npop later, hence the "mk_ret(-1)" - I2 = move_retvals(Args, [hipe_x86:mk_ret(-1)]), - {FixArgs++I2, Map0, Data}; - #store{} -> - {FixPtr, Ptr, Map0} = conv_src_noimm(hipe_rtl:store_base(I), Map), - {FixSrc, Src, Map1} = conv_src(hipe_rtl:store_src(I), Map0), - {FixOff, Off, Map2} = conv_src(hipe_rtl:store_offset(I), Map1), - I2 = mk_store(hipe_rtl:store_size(I), Src, Ptr, Off), - {FixPtr++FixSrc++FixOff++I2, Map2, Data}; - #switch{} -> % this one also updates Data :-( - %% from hipe_rtl2sparc, but we use a hairy addressing mode - %% instead of doing the arithmetic manually - Labels = hipe_rtl:switch_labels(I), - LMap = [{label,L} || L <- Labels], - {NewData, JTabLab} = - case hipe_rtl:switch_sort_order(I) of - [] -> - hipe_consttab:insert_block(Data, word, LMap); - SortOrder -> - hipe_consttab:insert_sorted_block( - Data, word, LMap, SortOrder) - end, - %% no immediates allowed here - {Index, Map1} = conv_dst(hipe_rtl:switch_src(I), Map), - I2 = mk_jmp_switch(Index, JTabLab, Labels), - {I2, Map1, NewData}; - #fload{} -> - {Dst, Map0} = conv_dst(hipe_rtl:fload_dst(I), Map), - {[], Src, Map1} = conv_src_noimm(hipe_rtl:fload_src(I), Map0), - {[], Off, Map2} = conv_src(hipe_rtl:fload_offset(I), Map1), - I2 = [hipe_x86:mk_fmove(hipe_x86:mk_mem(Src, Off, 'double'),Dst)], - {I2, Map2, Data}; - #fstore{} -> - {Dst, Map0} = conv_dst(hipe_rtl:fstore_base(I), Map), - {[], Src, Map1} = conv_src(hipe_rtl:fstore_src(I), Map0), - {[], Off, Map2} = conv_src(hipe_rtl:fstore_offset(I), Map1), - I2 = [hipe_x86:mk_fmove(Src, hipe_x86:mk_mem(Dst, Off, 'double'))], - {I2, Map2, Data}; - #fp{} -> - {Dst, Map0} = conv_dst(hipe_rtl:fp_dst(I), Map), - {[], Src1, Map1} = conv_src(hipe_rtl:fp_src1(I), Map0), - {[], Src2, Map2} = conv_src(hipe_rtl:fp_src2(I), Map1), - FpBinOp = conv_fp_binop(hipe_rtl:fp_op(I)), - I2 = conv_fp_binary(Dst, Src1, FpBinOp, Src2), - {I2, Map2, Data}; - #fp_unop{} -> - {Dst, Map0} = conv_dst(hipe_rtl:fp_unop_dst(I), Map), - {[], Src, Map1} = conv_src(hipe_rtl:fp_unop_src(I), Map0), - FpUnOp = conv_fp_unop(hipe_rtl:fp_unop_op(I)), - I2 = conv_fp_unary(Dst, Src, FpUnOp), - {I2, Map1, Data}; - #fmove{} -> - {Dst, Map0} = conv_dst(hipe_rtl:fmove_dst(I), Map), - {[], Src, Map1} = conv_src(hipe_rtl:fmove_src(I), Map0), - I2 = [hipe_x86:mk_fmove(Src, Dst)], - {I2, Map1, Data}; - #fconv{} -> - {Dst, Map0} = conv_dst(hipe_rtl:fconv_dst(I), Map), - {[], Src, Map1} = conv_src(hipe_rtl:fconv_src(I), Map0), - I2 = conv_fconv(Dst, Src), - {I2, Map1, Data}; - X -> - %% gctest?? - %% jmp, jmp_link, jsr, esr, multimove, - %% stackneed, pop_frame, restore_frame, save_frame - throw({?MODULE, {"unknown RTL instruction", X}}) - end. - -%%% Finalise the conversion of a 3-address ALU operation, taking -%%% care to not introduce more temps and moves than necessary. - -conv_alu_nocc(Dst, Src1, 'add', Src2, Tail) -> - case (not same_opnd(Dst, Src1)) andalso (not same_opnd(Dst, Src2)) - %% We could use orelse instead of xor here to generate lea T1(T2), T3, but - %% they seem to move coalesce so well that move+add is better for them. - andalso (hipe_x86:is_temp(Src1) xor hipe_x86:is_temp(Src2)) - of - false -> conv_alu(Dst, Src1, 'add', Src2, Tail); - true -> % Use LEA - Type = typeof_dst(Dst), - Mem = case hipe_x86:is_temp(Src1) of - true -> hipe_x86:mk_mem(Src1, Src2, Type); - false -> hipe_x86:mk_mem(Src2, Src1, Type) - end, - [hipe_x86:mk_lea(Mem, Dst) | Tail] - end; -conv_alu_nocc(Dst, Src1, 'sub', Src2, Tail) -> - case (not same_opnd(Dst, Src1)) andalso hipe_x86:is_temp(Src1) - andalso (not hipe_x86:is_temp(Src2)) - of - false -> conv_alu(Dst, Src1, 'sub', Src2, Tail); - true -> % Use LEA - Imm = hipe_x86:mk_imm(-hipe_x86:imm_value(Src2)), - Mem = hipe_x86:mk_mem(Src1, Imm, typeof_dst(Dst)), - [hipe_x86:mk_lea(Mem, Dst) | Tail] - end; -conv_alu_nocc(Dst, Src1, BinOp, Src2, Tail) -> - conv_alu(Dst, Src1, BinOp, Src2, Tail). - -conv_alu(Dst, Src1, 'imul', Src2, Tail) -> - mk_imul(Src1, Src2, Dst, Tail); -conv_alu(Dst, Src1, BinOp, Src2, Tail) -> - case same_opnd(Dst, Src1) of - true -> % x = x op y - [hipe_x86:mk_alu(BinOp, Src2, Dst) | Tail]; % x op= y - false -> % z = x op y, where z != x - case same_opnd(Dst, Src2) of - false -> % z = x op y, where z != x && z != y - [hipe_x86:mk_move(Src1, Dst), % z = x - hipe_x86:mk_alu(BinOp, Src2, Dst) | Tail]; % z op= y - true -> % y = x op y, where y != x - case binop_commutes(BinOp) of - true -> % y = y op x - [hipe_x86:mk_alu(BinOp, Src1, Dst) | Tail]; % y op= x - false -> % y = x op y, where op doesn't commute - Tmp = clone_dst(Dst), - [hipe_x86:mk_move(Src1, Tmp), % t = x - hipe_x86:mk_alu(BinOp, Src2, Tmp), % t op= y - hipe_x86:mk_move(Tmp, Dst) | Tail] % y = t - end - end - end. - -mk_imul(Src1, Src2, Dst, Tail) -> - case hipe_x86:is_imm(Src1) of - true -> - case hipe_x86:is_imm(Src2) of - true -> - mk_imul_iit(Src1, Src2, Dst, Tail); - _ -> - mk_imul_itt(Src1, Src2, Dst, Tail) - end; - _ -> - case hipe_x86:is_imm(Src2) of - true -> - mk_imul_itt(Src2, Src1, Dst, Tail); - _ -> - mk_imul_ttt(Src1, Src2, Dst, Tail) - end - end. - -mk_imul_iit(Src1, Src2, Dst, Tail) -> - io:format("~w: RTL mul with two immediates\n", [?MODULE]), - Tmp2 = new_untagged_temp(), - [hipe_x86:mk_move(Src2, Tmp2) | - mk_imul_itt(Src1, Tmp2, Dst, Tail)]. - -mk_imul_itt(Src1, Src2, Dst, Tail) -> - [hipe_x86:mk_imul(Src1, Src2, Dst) | Tail]. - -mk_imul_ttt(Src1, Src2, Dst, Tail) -> - case same_opnd(Dst, Src1) of - true -> - [hipe_x86:mk_imul([], Src2, Dst) | Tail]; - false -> - case same_opnd(Dst, Src2) of - true -> - [hipe_x86:mk_imul([], Src1, Dst) | Tail]; - false -> - [hipe_x86:mk_move(Src1, Dst), - hipe_x86:mk_imul([], Src2, Dst) | Tail] - end - end. - -conv_shift(Dst, Src1, BinOp, Src2) -> - {NewSrc2,I1} = - case hipe_x86:is_imm(Src2) of - true -> - {Src2, []}; - false -> - NewSrc = hipe_x86:mk_temp(?HIPE_X86_REGISTERS:?ECX(), 'untagged'), - {NewSrc, [hipe_x86:mk_move(Src2, NewSrc)]} - end, - I2 = case same_opnd(Dst, Src1) of - true -> % x = x op y - [hipe_x86:mk_shift(BinOp, NewSrc2, Dst)]; % x op= y - false -> % z = x op y, where z != x - case same_opnd(Dst, Src2) of - false -> % z = x op y, where z != x && z != y - [hipe_x86:mk_move(Src1, Dst), % z = x - hipe_x86:mk_shift(BinOp, NewSrc2, Dst)];% z op= y - true -> % y = x op y, no shift op commutes - Tmp = clone_dst(Dst), - [hipe_x86:mk_move(Src1, Tmp), % t = x - hipe_x86:mk_shift(BinOp, NewSrc2, Tmp), % t op= y - hipe_x86:mk_move(Tmp, Dst)] % y = t - end - end, - I1 ++ I2. - -%%% Finalise the conversion of a conditional branch operation, taking -%%% care to not introduce more temps and moves than necessary. - -conv_branchop('sub') -> 'cmp'; -conv_branchop('and') -> 'test'; -conv_branchop(_) -> none. - -branchop_commutes('cmp') -> false; -branchop_commutes('test') -> true. - -conv_branch(Src1, Op, Src2, Cc, TrueLab, FalseLab, Pred) -> - case hipe_x86:is_imm(Src1) of - false -> - mk_branch(Src1, Op, Src2, Cc, TrueLab, FalseLab, Pred); - true -> - case hipe_x86:is_imm(Src2) of - false -> - NewCc = case branchop_commutes(Op) of - true -> Cc; - false -> commute_cc(Cc) - end, - mk_branch(Src2, Op, Src1, NewCc, TrueLab, FalseLab, Pred); - true -> - %% two immediates, let the optimiser clean it up - Tmp = new_untagged_temp(), - [hipe_x86:mk_move(Src1, Tmp) | - mk_branch(Tmp, Op, Src2, Cc, TrueLab, FalseLab, Pred)] - end - end. - -mk_branch(Src1, Op, Src2, Cc, TrueLab, FalseLab, Pred) -> - %% PRE: not(is_imm(Src1)) - [mk_branchtest(Src1, Op, Src2), - hipe_x86:mk_pseudo_jcc(Cc, TrueLab, FalseLab, Pred)]. - -mk_branchtest(Src1, cmp, Src2) -> hipe_x86:mk_cmp(Src2, Src1); -mk_branchtest(Src1, test, Src2) -> hipe_x86:mk_test(Src2, Src1). - -%%% Convert an RTL ALU or ALUB binary operator. - -conv_binop(BinOp) -> - case BinOp of - 'add' -> 'add'; - 'sub' -> 'sub'; - 'or' -> 'or'; - 'and' -> 'and'; - 'xor' -> 'xor'; - 'sll' -> 'shl'; - 'srl' -> 'shr'; - 'sra' -> 'sar'; - 'mul' -> 'imul'; - %% andnot ??? - _ -> exit({?MODULE, {"unknown binop", BinOp}}) - end. - -binop_commutes(BinOp) -> - case BinOp of - 'add' -> true; - 'or' -> true; - 'and' -> true; - 'xor' -> true; - _ -> false - end. - -%%% Convert an RTL conditional operator. - -conv_cond(Cond) -> - case Cond of - eq -> 'e'; - ne -> 'ne'; - gt -> 'g'; - gtu -> 'a'; - ge -> 'ge'; - geu -> 'ae'; - lt -> 'l'; - ltu -> 'b'; - le -> 'le'; - leu -> 'be'; - overflow -> 'o'; - not_overflow -> 'no'; - _ -> exit({?MODULE, {"unknown rtl cond", Cond}}) - end. - -commute_cc(Cc) -> % if x Cc y, then y commute_cc(Cc) x - case Cc of - 'e' -> 'e'; % ==, == - 'ne' -> 'ne'; % !=, != - 'g' -> 'l'; % >, < - 'a' -> 'b'; % >u, <u - 'ge' -> 'le'; % >=, <= - 'ae' -> 'be'; % >=u, <=u - 'l' -> 'g'; % <, > - 'b' -> 'a'; % <u, >u - 'le' -> 'ge'; % <=, >= - 'be' -> 'ae'; % <=u, >=u - %% overflow/not_overflow: n/a - _ -> exit({?MODULE, {"unknown cc", Cc}}) - end. - -%%% Test if Dst and Src are the same operand. - -same_opnd(Dst, Src) -> Dst =:= Src. - -%%% Finalise the conversion of a tailcall instruction. - -conv_tailcall(Fun, Args, Linkage) -> - Arity = length(Args), - {RegArgs,StkArgs} = split_args(Args), - move_actuals(RegArgs, - [hipe_x86:mk_pseudo_tailcall_prepare(), - hipe_x86:mk_pseudo_tailcall(Fun, Arity, StkArgs, Linkage)]). - -split_args(Args) -> - split_args(0, ?HIPE_X86_REGISTERS:nr_args(), Args, []). -split_args(I, N, [Arg|Args], RegArgs) when I < N -> - Reg = ?HIPE_X86_REGISTERS:arg(I), - Temp = hipe_x86:mk_temp(Reg, 'tagged'), - split_args(I+1, N, Args, [{Arg,Temp}|RegArgs]); -split_args(_, _, StkArgs, RegArgs) -> - {RegArgs, StkArgs}. - -move_actuals([], Rest) -> Rest; -move_actuals([{Src,Dst}|Actuals], Rest) -> - move_actuals(Actuals, [hipe_x86:mk_move(Src, Dst) | Rest]). - -move_formals([], Rest) -> Rest; -move_formals([{Dst,Src}|Formals], Rest) -> - move_formals(Formals, [hipe_x86:mk_move(Src, Dst) | Rest]). - -%%% Finalise the conversion of a call instruction. - -conv_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage) -> - case hipe_x86:is_prim(Fun) of - true -> - conv_primop_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage); - false -> - conv_general_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage) - end. - -conv_primop_call(Dsts, Prim, Args, ContLab, ExnLab, Linkage) -> - case hipe_x86:prim_prim(Prim) of - 'fwait' -> - conv_fwait_call(Dsts, Args, ContLab, ExnLab, Linkage); - _ -> - conv_general_call(Dsts, Prim, Args, ContLab, ExnLab, Linkage) - end. - -conv_fwait_call([], [], [], [], not_remote) -> - [hipe_x86:mk_fp_unop('fwait', [])]. - -conv_general_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage) -> - %% The backend does not support pseudo_calls without a - %% continuation label, so we make sure each call has one. - {RealContLab, Tail} = - case do_call_results(Dsts) of - [] -> - %% Avoid consing up a dummy basic block if the moves list - %% is empty, as is typical for calls to suspend/0. - %% This should be subsumed by a general "optimise the CFG" - %% module, and could probably be removed. - case ContLab of - [] -> - NewContLab = hipe_gensym:get_next_label(x86), - {NewContLab, [hipe_x86:mk_label(NewContLab)]}; - _ -> - {ContLab, []} - end; - Moves -> - %% Change the call to continue at a new basic block. - %% In this block move the result registers to the Dsts, - %% then continue at the call's original continuation. - %% - %% This should be fixed to propagate "fallthrough calls" - %% When the rest of the backend supports them. - NewContLab = hipe_gensym:get_next_label(x86), - case ContLab of - [] -> - %% This is just a fallthrough - %% No jump back after the moves. - {NewContLab, - [hipe_x86:mk_label(NewContLab) | - Moves]}; - _ -> - %% The call has a continuation - %% jump to it. - {NewContLab, - [hipe_x86:mk_label(NewContLab) | - Moves ++ - [hipe_x86:mk_jmp_label(ContLab)]]} - end - end, - SDesc = hipe_x86:mk_sdesc(ExnLab, 0, length(Args), {}), - CallInsn = hipe_x86:mk_pseudo_call(Fun, SDesc, RealContLab, Linkage), - {RegArgs,StkArgs} = split_args(Args), - do_push_args(StkArgs, move_actuals(RegArgs, [CallInsn | Tail])). - -do_push_args([Arg|Args], Tail) -> - [hipe_x86:mk_push(Arg) | do_push_args(Args, Tail)]; -do_push_args([], Tail) -> - Tail. - -%%% Move return values from the return value registers. - -do_call_results(DstList) -> - do_call_results(DstList, 0, []). - -do_call_results([Dst|DstList], I, Rest) -> - Src = hipe_x86:mk_temp(?HIPE_X86_REGISTERS:ret(I), 'tagged'), - Move = hipe_x86:mk_move(Src, Dst), - do_call_results(DstList, I+1, [Move|Rest]); -do_call_results([], _, Insns) -> Insns. - -%%% Move return values to the return value registers. - -move_retvals(SrcLst, Rest) -> - move_retvals(SrcLst, 0, Rest). - -move_retvals([Src|SrcLst], I, Rest) -> - Dst = hipe_x86:mk_temp(?HIPE_X86_REGISTERS:ret(I), 'tagged'), - Move = hipe_x86:mk_move(Src, Dst), - move_retvals(SrcLst, I+1, [Move|Rest]); -move_retvals([], _, Insns) -> Insns. - -%%% Convert a 'fun' operand (MFA, prim, or temp) - -conv_fun(Fun, Map) -> - case hipe_rtl:is_var(Fun) of - true -> - conv_dst(Fun, Map); - false -> - case hipe_rtl:is_reg(Fun) of - true -> - conv_dst(Fun, Map); - false -> - case Fun of - Prim when is_atom(Prim) -> - {hipe_x86:mk_prim(Prim), Map}; - {M,F,A} when is_atom(M), is_atom(F), is_integer(A) -> - {hipe_x86:mk_mfa(M,F,A), Map}; - _ -> - exit({?MODULE,conv_fun,Fun}) - end - end - end. - -conv_src_noimm(Opnd, Map) -> - R={FixSrc0, Src, NewMap} = conv_src(Opnd, Map), - case hipe_x86:is_imm(Src) of - false -> R; - true -> - Tmp = new_untagged_temp(), - {FixSrc0 ++ [hipe_x86:mk_move(Src, Tmp)], - Tmp, NewMap} - end. - -%%% Convert an RTL source operand (imm/var/reg). - -conv_src(Opnd, Map) -> - case hipe_rtl:is_imm(Opnd) of - true -> - conv_imm(Opnd, Map); - false -> - {NewOpnd,NewMap} = conv_dst(Opnd, Map), - {[], NewOpnd, NewMap} - end. - --ifdef(HIPE_AMD64). -conv_imm(Opnd, Map) -> - ImmVal = hipe_rtl:imm_value(Opnd), - case is_imm64(ImmVal) of - true -> - Temp = hipe_x86:mk_new_temp('untagged'), - {[hipe_x86:mk_move64(hipe_x86:mk_imm(ImmVal), Temp)], Temp, Map}; - false -> - {[], hipe_x86:mk_imm(ImmVal), Map} - end. - -is_imm64(Value) when is_integer(Value) -> - (Value < -(1 bsl (32 - 1))) or (Value > (1 bsl (32 - 1)) - 1); -is_imm64({_,atom}) -> false; % Atoms are 32 bits. -is_imm64({_,c_const}) -> true; % c_consts are 64 bits. -is_imm64({_,_}) -> true . % Other relocs are 64 bits. --else. -conv_imm(Opnd, Map) -> - {[], hipe_x86:mk_imm(hipe_rtl:imm_value(Opnd)), Map}. --endif. - -conv_src_list([O|Os], Map) -> - {NewInstr, V, Map1} = conv_src(O, Map), - {Instrs, Vs, Map2} = conv_src_list(Os, Map1), - {Instrs++NewInstr, [V|Vs], Map2}; -conv_src_list([], Map) -> - {[], [], Map}. - -%%% Convert an RTL destination operand (var/reg). - -conv_dst(Opnd, Map) -> - {Name, Type} = - case hipe_rtl:is_var(Opnd) of - true -> - {hipe_rtl:var_index(Opnd), 'tagged'}; - false -> - case hipe_rtl:is_fpreg(Opnd) of - true -> - {hipe_rtl:fpreg_index(Opnd), 'double'}; - false -> - {hipe_rtl:reg_index(Opnd), 'untagged'} - end - end, - case ?HIPE_X86_REGISTERS:is_precoloured(Name) of - true -> - case ?HIPE_X86_REGISTERS:proc_offset(Name) of - false -> - {hipe_x86:mk_temp(Name, Type), Map}; - Offset -> - Preg = ?HIPE_X86_REGISTERS:proc_pointer(), - Pbase = hipe_x86:mk_temp(Preg, 'untagged'), - Poff = hipe_x86:mk_imm(Offset), - {hipe_x86:mk_mem(Pbase, Poff, Type), Map} - end; - false -> - case vmap_lookup(Map, Opnd) of - {value, NewTemp} -> - {NewTemp, Map}; - _ -> - NewTemp = hipe_x86:mk_new_temp(Type), - {NewTemp, vmap_bind(Map, Opnd, NewTemp)} - end - end. - -conv_dst_list([O|Os], Map) -> - {Dst, Map1} = conv_dst(O, Map), - {Dsts, Map2} = conv_dst_list(Os, Map1), - {[Dst|Dsts], Map2}; -conv_dst_list([], Map) -> - {[], Map}. - -conv_formals(Os, Map) -> - conv_formals(?HIPE_X86_REGISTERS:nr_args(), Os, Map, []). - -conv_formals(N, [O|Os], Map, Res) -> - Type = - case hipe_rtl:is_var(O) of - true -> 'tagged'; - false ->'untagged' - end, - Dst = - if N > 0 -> hipe_x86:mk_new_temp(Type); % allocatable - true -> hipe_x86:mk_new_nonallocatable_temp(Type) - end, - Map1 = vmap_bind(Map, O, Dst), - conv_formals(N-1, Os, Map1, [Dst|Res]); -conv_formals(_, [], Map, Res) -> - {lists:reverse(Res), Map}. - -%%% typeof_src -- what's src's type? - -typeof_src(Src) -> - case hipe_x86:is_imm(Src) of - true -> - 'untagged'; - _ -> - typeof_dst(Src) - end. - -%%% typeof_dst -- what's dst's type? - -typeof_dst(Dst) -> - case hipe_x86:is_temp(Dst) of - true -> - hipe_x86:temp_type(Dst); - _ -> - hipe_x86:mem_type(Dst) - end. - -%%% clone_dst -- conjure up a scratch reg with same type as dst - -clone_dst(Dst) -> - hipe_x86:mk_new_temp(typeof_dst(Dst)). - -%%% new_untagged_temp -- conjure up an untagged scratch reg - -new_untagged_temp() -> - hipe_x86:mk_new_temp('untagged'). - -%%% Map from RTL var/reg operands to x86 temps. - -vmap_empty() -> - gb_trees:empty(). - -vmap_lookup(Map, Key) -> - gb_trees:lookup(Key, Map). - -vmap_bind(Map, Key, Val) -> - gb_trees:insert(Key, Val, Map). - -%%% Finalise the conversion of an Integer-to-Float operation. - -conv_fconv(Dst, Src) -> - case hipe_x86:is_imm(Src) of - false -> - [hipe_x86:mk_fmove(Src, Dst)]; - true -> - %% cvtsi2sd does not allow src to be an immediate - Tmp = new_untagged_temp(), - [hipe_x86:mk_move(Src, Tmp), - hipe_x86:mk_fmove(Tmp, Dst)] - end. - -%%% Finalise the conversion of a 2-address FP operation. - --ifdef(HIPE_AMD64). -conv_fp_unary(Dst, Src, 'fchs') -> - Tmp = new_untagged_temp(), - case same_opnd(Dst, Src) of - true -> - []; - _ -> - [hipe_x86:mk_fmove(Src, Dst)] - end ++ - mk_load_address(c_const, hipe_x86:mk_imm({sse2_fnegate_mask, c_const}), Tmp) ++ - [hipe_x86:mk_fp_binop('xorpd', hipe_x86:mk_mem(Tmp, hipe_x86:mk_imm(0), double), Dst)]. --else. -conv_fp_unary(Dst, Src, FpUnOp) -> - case same_opnd(Dst, Src) of - true -> - [hipe_x86:mk_fp_unop(FpUnOp, Dst)]; - _ -> - [hipe_x86:mk_fmove(Src, Dst), - hipe_x86:mk_fp_unop(FpUnOp, Dst)] - end. --endif. - -conv_fp_unop(RtlFpUnOp) -> - case RtlFpUnOp of - 'fchs' -> 'fchs' - end. - -%%% Finalise the conversion of a 3-address FP operation. - -conv_fp_binary(Dst, Src1, FpBinOp, Src2) -> - case same_opnd(Dst, Src1) of - true -> % x = x op y - [hipe_x86:mk_fp_binop(FpBinOp, Src2, Dst)]; % x op= y - false -> % z = x op y, where z != x - case same_opnd(Dst, Src2) of - false -> % z = x op y, where z != x && z != y - [hipe_x86:mk_fmove(Src1, Dst), % z = x - hipe_x86:mk_fp_binop(FpBinOp, Src2, Dst)]; % z op= y - true -> % y = x op y, where y != x - case fp_binop_commutes(FpBinOp) of - true -> % y = y op x - [hipe_x86:mk_fp_binop(FpBinOp, Src1, Dst)]; % y op= x - false -> % y = x op y, where op doesn't commute - RevFpBinOp = reverse_fp_binop(FpBinOp), - [hipe_x86:mk_fp_binop(RevFpBinOp, Src1, Dst)] - end - end - end. - -%%% Convert an RTL FP binary operator. - -conv_fp_binop(RtlFpBinOp) -> - case RtlFpBinOp of - 'fadd' -> 'fadd'; - 'fdiv' -> 'fdiv'; - 'fmul' -> 'fmul'; - 'fsub' -> 'fsub' - end. - -fp_binop_commutes(FpBinOp) -> - case FpBinOp of - 'fadd' -> true; - 'fmul' -> true; - _ -> false - end. - -reverse_fp_binop(FpBinOp) -> - case FpBinOp of - 'fsub' -> 'fsubr'; - 'fdiv' -> 'fdivr' - end. - -%%% Create a jmp_switch instruction. - --ifdef(HIPE_AMD64). -mk_jmp_switch(Index, JTabLab, Labels) -> - JTabReg = hipe_x86:mk_new_temp('untagged'), - JTabImm = hipe_x86:mk_imm_from_addr(JTabLab, constant), - [hipe_x86:mk_move64(JTabImm, JTabReg), - hipe_x86:mk_jmp_switch(Index, JTabReg, Labels)]. --else. -mk_jmp_switch(Index, JTabLab, Labels) -> - %% this is equivalent to "jmp *JTabLab(,Index,4)" - %% ("r = Index; r *= 4; r += &JTab; jmp *r" isn't as nice) - [hipe_x86:mk_jmp_switch(Index, JTabLab, Labels)]. --endif. - -%%% Finalise the translation of a load_address instruction. - --ifdef(HIPE_AMD64). -mk_load_address(_Type, Src, Dst) -> - [hipe_x86:mk_move64(Src, Dst)]. --else. -mk_load_address(_Type, Src, Dst) -> - [hipe_x86:mk_move(Src, Dst)]. --endif. - -%%% Translate 32-bit and larger loads. - --ifdef(HIPE_AMD64). -mk_load(LoadSize, LoadSign, Src, Off, Dst) -> - case {LoadSize, LoadSign} of - {int32, signed} -> - [hipe_x86:mk_movsx(hipe_x86:mk_mem(Src, Off, 'int32'), Dst)]; - {int32, unsigned} -> - %% The processor zero-extends for us. No need for 'movzx'. - [hipe_x86:mk_move(hipe_x86:mk_mem(Src, Off, 'int32'), Dst)]; - {_, _} -> - mk_load_word(Src, Off, Dst) - end. --else. -mk_load(_LoadSize, _LoadSign, Src, Off, Dst) -> - mk_load_word(Src, Off, Dst). --endif. - -mk_load_word(Src, Off, Dst) -> - Type = typeof_dst(Dst), - [hipe_x86:mk_move(hipe_x86:mk_mem(Src, Off, Type), Dst)]. - -%%% Finalise the translation of a store instruction. - --ifdef(HIPE_AMD64). -mk_store(RtlStoreSize, Src, Ptr, Off) -> - Type = case RtlStoreSize of - word -> - typeof_src(Src); - OtherType -> - OtherType - end, - [hipe_x86:mk_move(Src, hipe_x86:mk_mem(Ptr, Off, Type))]. --else. -mk_store(RtlStoreSize, Src, Ptr, Off) -> - case RtlStoreSize of - word -> - Type = typeof_src(Src), - [hipe_x86:mk_move(Src, hipe_x86:mk_mem(Ptr, Off, Type))]; - int32 -> - Type = typeof_src(Src), - [hipe_x86:mk_move(Src, hipe_x86:mk_mem(Ptr, Off, Type))]; - int16 -> - Type = 'int16', - [hipe_x86:mk_move(Src, hipe_x86:mk_mem(Ptr, Off, Type))]; - byte -> - Type = 'byte', - {NewSrc, I1} = conv_small_store(Src), - I1 ++ [hipe_x86:mk_move(NewSrc, hipe_x86:mk_mem(Ptr, Off, Type))] - end. - -conv_small_store(Src) -> - case hipe_x86:is_imm(Src) of - true -> - {Src, []}; - false -> - NewSrc = hipe_x86:mk_temp(hipe_x86_registers:eax(), 'untagged'), - {NewSrc, [hipe_x86:mk_move(Src, NewSrc)]} - end. --endif. diff --git a/lib/hipe/x86/hipe_x86.erl b/lib/hipe/x86/hipe_x86.erl deleted file mode 100644 index f514dd1ded..0000000000 --- a/lib/hipe/x86/hipe_x86.erl +++ /dev/null @@ -1,508 +0,0 @@ -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. -%% -%% representation of 2-address pseudo-amd64 code - --module(hipe_x86). - --include("hipe_x86.hrl"). - -%% Commented out are interface functions which are currently not used. --export([mk_temp/2, - %% mk_nonallocatable_temp/2, - mk_new_temp/1, - mk_new_nonallocatable_temp/1, - is_temp/1, - temp_reg/1, - temp_type/1, - temp_is_allocatable/1, - - mk_imm/1, - mk_imm_from_addr/2, - mk_imm_from_atom/1, - is_imm/1, - imm_value/1, - - mk_mem/3, - %% is_mem/1, - %% mem_base/1, - %% mem_off/1, - mem_type/1, - - mk_fpreg/1, - mk_fpreg/2, - %% is_fpreg/1, - %% fpreg_is_pseudo/1, - %% fpreg_reg/1, - - mk_mfa/3, - %% is_mfa/1, - - mk_prim/1, - is_prim/1, - prim_prim/1, - - mk_sdesc/4, - - %% insn_type/1, - - mk_alu/3, - %% is_alu/1, - alu_op/1, - alu_src/1, - alu_dst/1, - - mk_call/3, - %% is_call/1, - call_fun/1, - call_sdesc/1, - call_linkage/1, - - %% mk_cmovcc/3, - %% is_cmovcc/1, - cmovcc_cc/1, - cmovcc_src/1, - cmovcc_dst/1, - - mk_cmp/2, - %% is_cmp/1, - cmp_src/1, - cmp_dst/1, - - mk_comment/1, - %% is_comment/1, - %% comment_term/1, - - mk_fmove/2, - is_fmove/1, - fmove_src/1, - fmove_dst/1, - - mk_fp_unop/2, - %% is_fp_unop/1, - fp_unop_arg/1, - fp_unop_op/1, - - mk_fp_binop/3, - %% is_fp_binop/1, - fp_binop_src/1, - fp_binop_dst/1, - fp_binop_op/1, - - mk_imul/3, - imul_imm_opt/1, - imul_src/1, - imul_temp/1, - - mk_jcc/2, - %% is_jcc/1, - jcc_cc/1, - jcc_label/1, - - mk_jmp_fun/2, - %% is_jmp_fun/1, - jmp_fun_fun/1, - jmp_fun_linkage/1, - - mk_jmp_label/1, - %% is_jmp_label/1, - jmp_label_label/1, - - mk_jmp_switch/3, - %% is_jmp_switch/1, - jmp_switch_temp/1, - jmp_switch_jtab/1, - %% jmp_switch_labels/1, - - mk_label/1, - is_label/1, - label_label/1, - - mk_lea/2, - %% is_lea/1, - lea_mem/1, - lea_temp/1, - - mk_move/2, - is_move/1, - move_src/1, - move_dst/1, - mk_move64/2, - %% is_move64/1, - move64_src/1, - move64_dst/1, - - mk_movsx/2, - %% is_movsx/1, - movsx_src/1, - movsx_dst/1, - - mk_movzx/2, - %% is_movzx/1, - movzx_src/1, - movzx_dst/1, - - mk_pseudo_call/4, - %% is_pseudo_call/1, - pseudo_call_fun/1, - pseudo_call_sdesc/1, - pseudo_call_contlab/1, - pseudo_call_linkage/1, - - mk_pseudo_jcc/4, - %% is_pseudo_jcc/1, - %% pseudo_jcc_cc/1, - %% pseudo_jcc_true_label/1, - %% pseudo_jcc_false_label/1, - %% pseudo_jcc_pred/1, - - mk_pseudo_spill/1, - - mk_pseudo_spill_fmove/3, - is_pseudo_spill_fmove/1, - - mk_pseudo_spill_move/3, - is_pseudo_spill_move/1, - - mk_pseudo_tailcall/4, - %% is_pseudo_tailcall/1, - pseudo_tailcall_fun/1, - %% pseudo_tailcall_arity/1, - pseudo_tailcall_stkargs/1, - pseudo_tailcall_linkage/1, - - mk_pseudo_tailcall_prepare/0, - %% is_pseudo_tailcall_prepare/1, - - mk_push/1, - %% is_push/1, - push_src/1, - - %% mk_pop/1, - pop_dst/1, - - mk_ret/1, - %% is_ret/1, - ret_npop/1, - - mk_shift/3, - %% is_shift/1, - shift_op/1, - shift_src/1, - shift_dst/1, - - mk_test/2, - test_src/1, - test_dst/1, - - mk_defun/8, - defun_mfa/1, - defun_formals/1, - defun_is_closure/1, - defun_is_leaf/1, - defun_code/1, - defun_data/1, - defun_var_range/1 - %% defun_label_range/1, - - %% highest_temp/1 - ]). - -%% Other utilities --export([neg_cc/1 - ]). - -%%% -%%% Low-level accessors. -%%% - -mk_temp(Reg, Type) when is_integer(Reg) -> - #x86_temp{reg=Reg, type=Type, allocatable=true}. -mk_nonallocatable_temp(Reg, Type) when is_integer(Reg) -> - #x86_temp{reg=Reg, type=Type, allocatable=false}. -mk_new_temp(Type) -> - mk_temp(hipe_gensym:get_next_var(x86), Type). -mk_new_nonallocatable_temp(Type) -> - mk_nonallocatable_temp(hipe_gensym:get_next_var(x86), Type). -is_temp(X) -> case X of #x86_temp{} -> true; _ -> false end. -temp_reg(#x86_temp{reg=Reg}) when is_integer(Reg) -> Reg. -temp_type(#x86_temp{type=Type}) -> Type. -temp_is_allocatable(#x86_temp{allocatable=A}) -> A. - -mk_imm(Value) -> #x86_imm{value=Value}. -mk_imm_from_addr(Addr, Type) -> - mk_imm({Addr, Type}). -mk_imm_from_atom(Atom) -> - mk_imm(Atom). -is_imm(X) -> case X of #x86_imm{} -> true; _ -> false end. -imm_value(#x86_imm{value=Value}) -> Value. - -mk_mem(Base, Off, Type) -> #x86_mem{base=Base, off=Off, type=Type}. -%% is_mem(X) -> case X of #x86_mem{} -> true; _ -> false end. -%% mem_base(#x86_mem{base=Base}) -> Base. -%% mem_off(#x86_mem{off=Off}) -> Off. -mem_type(#x86_mem{type=Type}) -> Type. - -mk_fpreg(Reg) -> #x86_fpreg{reg=Reg, pseudo=true}. -mk_fpreg(Reg, Pseudo) -> #x86_fpreg{reg=Reg, pseudo=Pseudo}. -%% is_fpreg(F) -> case F of #x86_fpreg{} -> true;_ -> false end. -%% fpreg_is_pseudo(#x86_fpreg{pseudo=Pseudo}) -> Pseudo. -%% fpreg_reg(#x86_fpreg{reg=Reg}) -> Reg. - -mk_mfa(M, F, A) -> #x86_mfa{m=M, f=F, a=A}. -%% is_mfa(X) -> case X of #x86_mfa{} -> true; _ -> false end. - -mk_prim(Prim) -> #x86_prim{prim=Prim}. -is_prim(X) -> case X of #x86_prim{} -> true; _ -> false end. -prim_prim(#x86_prim{prim=Prim}) -> Prim. - -mk_sdesc(ExnLab, FSize, Arity, Live) -> - #x86_sdesc{exnlab=ExnLab, fsize=FSize, arity=Arity, live=Live}. - -insn_type(Insn) -> - element(1, Insn). - -is_insn_type(Insn, Type) -> - case insn_type(Insn) of - Type -> true; - _ -> false - end. - -mk_alu(Op, Src, Dst) -> #alu{aluop=Op, src=Src, dst=Dst}. -%% is_alu(Insn) -> is_insn_type(Insn, alu). -alu_op(#alu{aluop=Op}) -> Op. -alu_src(#alu{src=Src}) -> Src. -alu_dst(#alu{dst=Dst}) -> Dst. - -mk_call(Fun, SDesc, Linkage) -> - check_linkage(Linkage), - #call{'fun'=Fun, sdesc=SDesc, linkage=Linkage}. -%% is_call(Insn) -> is_insn_type(Insn, call). -call_fun(#call{'fun'=Fun}) -> Fun. -call_sdesc(#call{sdesc=SDesc}) -> SDesc. -call_linkage(#call{linkage=Linkage}) -> Linkage. - -check_linkage(Linkage) -> - case Linkage of - remote -> []; - not_remote -> [] - end. - -%% mk_cmovcc(Cc, Src, Dst) -> #cmovcc{cc=Cc, src=Src, dst=Dst}. -%% is_cmovcc(Insn) -> is_insn_type(Insn, cmovcc). -cmovcc_cc(#cmovcc{cc=Cc}) -> Cc. -cmovcc_src(#cmovcc{src=Src}) -> Src. -cmovcc_dst(#cmovcc{dst=Dst}) -> Dst. - -mk_cmp(Src, Dst) -> #cmp{src=Src, dst=Dst}. -%% is_cmp(Insn) -> is_insn_type(Insn, cmp). -cmp_src(#cmp{src=Src}) -> Src. -cmp_dst(#cmp{dst=Dst}) -> Dst. - -mk_test(Src, Dst) -> #test{src=Src, dst=Dst}. -test_src(#test{src=Src}) -> Src. -test_dst(#test{dst=Dst}) -> Dst. - -mk_comment(Term) -> #comment{term=Term}. -%% is_comment(Insn) -> is_insn_type(Insn, comment). -%% comment_term(#comment{term=Term}) -> Term. - -mk_fmove(Src, Dst) -> #fmove{src=Src, dst=Dst}. -is_fmove(F) -> is_insn_type(F, fmove). -fmove_src(#fmove{src=Src}) -> Src. -fmove_dst(#fmove{dst=Dst}) -> Dst. - -mk_fp_unop(Op, Arg) -> #fp_unop{op=Op, arg=Arg}. -%% is_fp_unop(F) -> is_insn_type(F, fp_unop). -fp_unop_arg(#fp_unop{arg=Arg}) -> Arg. -fp_unop_op(#fp_unop{op=Op}) -> Op. - -mk_fp_binop(Op, Src, Dst) -> #fp_binop{op=Op, src=Src, dst=Dst}. -%% is_fp_binop(F) -> is_insn_type(F, fp_binop). -fp_binop_src(#fp_binop{src=Src}) -> Src. -fp_binop_dst(#fp_binop{dst=Dst}) -> Dst. -fp_binop_op(#fp_binop{op=Op}) -> Op. - -mk_imul(ImmOpt, Src, Temp) -> #imul{imm_opt=ImmOpt, src=Src, temp=Temp}. -imul_imm_opt(#imul{imm_opt=ImmOpt}) -> ImmOpt. -imul_src(#imul{src=Src}) -> Src. -imul_temp(#imul{temp=Temp}) -> Temp. - -mk_jcc(Cc, Label) -> #jcc{cc=Cc, label=Label}. -%% is_jcc(Insn) -> is_insn_type(Insn, jcc). -jcc_cc(#jcc{cc=Cc}) -> Cc. -jcc_label(#jcc{label=Label}) -> Label. - -mk_jmp_fun(Fun, Linkage) -> - check_linkage(Linkage), - #jmp_fun{'fun'=Fun, linkage=Linkage}. -%% is_jmp_fun(Insn) -> is_insn_type(Insn, jmp_fun). -jmp_fun_fun(#jmp_fun{'fun'=Fun}) -> Fun. -jmp_fun_linkage(#jmp_fun{linkage=Linkage}) -> Linkage. - -mk_jmp_label(Label) -> #jmp_label{label=Label}. -%% is_jmp_label(Insn) -> is_insn_type(Insn, jmp_label). -jmp_label_label(#jmp_label{label=Label}) -> Label. - -mk_jmp_switch(Temp, JTab, Labels) -> - #jmp_switch{temp=Temp, jtab=JTab, labels=Labels}. -%% is_jmp_switch(Insn) -> is_insn_type(Insn, jmp_switch). -jmp_switch_temp(#jmp_switch{temp=Temp}) -> Temp. -jmp_switch_jtab(#jmp_switch{jtab=JTab}) -> JTab. -%% jmp_switch_labels(#jmp_switch{labels=Labels}) -> Labels. - -mk_label(Label) -> #label{label=Label}. -is_label(Insn) -> is_insn_type(Insn, label). -label_label(#label{label=Label}) -> Label. - -mk_lea(Mem, Temp) -> #lea{mem=Mem, temp=Temp}. -%% is_lea(Insn) -> is_insn_type(Insn, lea). -lea_mem(#lea{mem=Mem}) -> Mem. -lea_temp(#lea{temp=Temp}) -> Temp. - -mk_move(Src, Dst) -> #move{src=Src, dst=Dst}. -is_move(Insn) -> is_insn_type(Insn, move). -move_src(#move{src=Src}) -> Src. -move_dst(#move{dst=Dst}) -> Dst. - -mk_move64(Imm, Dst) -> #move64{imm=Imm, dst=Dst}. -%% is_move64(Insn) -> is_insn_type(Insn, move64). -move64_src(#move64{imm=Imm}) -> Imm. -move64_dst(#move64{dst=Dst}) -> Dst. - -mk_movsx(Src, Dst) -> #movsx{src=Src, dst=Dst}. -%% is_movsx(Insn) -> is_insn_type(Insn, movsx). -movsx_src(#movsx{src=Src}) -> Src. -movsx_dst(#movsx{dst=Dst}) -> Dst. - -mk_movzx(Src, Dst) -> #movzx{src=Src, dst=Dst}. -%% is_movzx(Insn) -> is_insn_type(Insn, movzx). -movzx_src(#movzx{src=Src}) -> Src. -movzx_dst(#movzx{dst=Dst}) -> Dst. - -mk_pseudo_call(Fun, SDesc, ContLab, Linkage) -> - check_linkage(Linkage), - #pseudo_call{'fun'=Fun, sdesc=SDesc, contlab=ContLab, linkage=Linkage}. -%% is_pseudo_call(Insn) -> is_insn_type(Insn, pseudo_call). -pseudo_call_fun(#pseudo_call{'fun'=Fun}) -> Fun. -pseudo_call_sdesc(#pseudo_call{sdesc=SDesc}) -> SDesc. -pseudo_call_contlab(#pseudo_call{contlab=ContLab}) -> ContLab. -pseudo_call_linkage(#pseudo_call{linkage=Linkage}) -> Linkage. - -mk_pseudo_jcc(Cc, TrueLabel, FalseLabel, Pred) -> % 'smart' constructor - if Pred >= 0.5 -> - mk_pseudo_jcc_simple(neg_cc(Cc), FalseLabel, TrueLabel, 1.0-Pred); - true -> - mk_pseudo_jcc_simple(Cc, TrueLabel, FalseLabel, Pred) - end. -neg_cc(Cc) -> - case Cc of - 'e' -> 'ne'; % ==, != - 'ne' -> 'e'; % !=, == - 'g' -> 'le'; % >, <= - 'a' -> 'be'; % >u, <=u - 'ge' -> 'l'; % >=, < - 'ae' -> 'b'; % >=u, <u - 'l' -> 'ge'; % <, >= - 'b' -> 'ae'; % <u, >=u - 'le' -> 'g'; % <=, > - 'be' -> 'a'; % <=u, >u - 'o' -> 'no'; % overflow, not_overflow - 'no' -> 'o'; % not_overflow, overflow - _ -> exit({?MODULE, {"unknown cc", Cc}}) - end. -mk_pseudo_jcc_simple(Cc, TrueLabel, FalseLabel, Pred) -> - #pseudo_jcc{cc=Cc, true_label=TrueLabel, false_label=FalseLabel, pred=Pred}. -%% is_pseudo_jcc(Insn) -> is_insn_type(Insn, pseudo_jcc). -%% pseudo_jcc_cc(#pseudo_jcc{cc=Cc}) -> Cc. -%% pseudo_jcc_true_label(#pseudo_jcc{true_label=TrueLabel}) -> TrueLabel. -%% pseudo_jcc_false_label(#pseudo_jcc{false_label=FalseLabel}) -> FalseLabel. -%% pseudo_jcc_pred(#pseudo_jcc{pred=Pred}) -> Pred. - -mk_pseudo_spill(List) -> - #pseudo_spill{args=List}. - -mk_pseudo_spill_fmove(Src, Temp, Dst) -> - #pseudo_spill_fmove{src=Src, temp=Temp, dst=Dst}. -is_pseudo_spill_fmove(I) -> is_record(I, pseudo_spill_fmove). - -mk_pseudo_spill_move(Src, Temp, Dst) -> - #pseudo_spill_move{src=Src, temp=Temp, dst=Dst}. -is_pseudo_spill_move(I) -> is_record(I, pseudo_spill_move). - -mk_pseudo_tailcall(Fun, Arity, StkArgs, Linkage) -> - check_linkage(Linkage), - #pseudo_tailcall{'fun'=Fun, arity=Arity, stkargs=StkArgs, linkage=Linkage}. -%% is_pseudo_tailcall(Insn) -> is_insn_type(Insn, pseudo_tailcall). -pseudo_tailcall_fun(#pseudo_tailcall{'fun'=Fun}) -> Fun. -%% pseudo_tailcall_arity(#pseudo_tailcall{arity=Arity}) -> Arity. -pseudo_tailcall_stkargs(#pseudo_tailcall{stkargs=StkArgs}) -> StkArgs. -pseudo_tailcall_linkage(#pseudo_tailcall{linkage=Linkage}) -> Linkage. - -mk_pseudo_tailcall_prepare() -> #pseudo_tailcall_prepare{}. -%% is_pseudo_tailcall_prepare(Insn) -> is_insn_type(Insn, pseudo_tailcall_prepare). - -mk_push(Src) -> #push{src=Src}. -%% is_push(Insn) -> is_insn_type(Insn, push). -push_src(#push{src=Src}) -> Src. - -%% mk_pop(Dst) -> #pop{dst=Dst}. -%% is_push(Insn) -> is_insn_type(Insn, push). -pop_dst(#pop{dst=Dst}) -> Dst. - -mk_ret(NPop) -> #ret{npop=NPop}. -%% is_ret(Insn) -> is_insn_type(Insn, ret). -ret_npop(#ret{npop=NPop}) -> NPop. - -mk_shift(ShiftOp, Src, Dst) -> - #shift{shiftop=ShiftOp, src=Src, dst=Dst}. -%% is_shift(Insn) -> is_insn_type(Insn, shift). -shift_op(#shift{shiftop=ShiftOp}) -> ShiftOp. -shift_src(#shift{src=Src}) -> Src. -shift_dst(#shift{dst=Dst}) -> Dst. - -mk_defun(MFA, Formals, IsClosure, IsLeaf, Code, Data, VarRange, LabelRange) -> - #defun{mfa=MFA, formals=Formals, code=Code, data=Data, - isclosure=IsClosure, isleaf=IsLeaf, - var_range=VarRange, label_range=LabelRange}. -defun_mfa(#defun{mfa=MFA}) -> MFA. -defun_formals(#defun{formals=Formals}) -> Formals. -defun_is_closure(#defun{isclosure=IsClosure}) -> IsClosure. -defun_is_leaf(#defun{isleaf=IsLeaf}) -> IsLeaf. -defun_code(#defun{code=Code}) -> Code. -defun_data(#defun{data=Data}) -> Data. -defun_var_range(#defun{var_range=VarRange}) -> VarRange. -%% defun_label_range(#defun{label_range=LabelRange}) -> LabelRange. - -%% highest_temp(Code) -> -%% highest_temp(Code,0). -%% -%% highest_temp([I|Is],Max) -> -%% Defs = hipe_x86_defuse:insn_def(I), -%% Uses = hipe_x86_defuse:insn_use(I), -%% highest_temp(Is,new_max(Defs++Uses,Max)); -%% highest_temp([],Max) -> -%% Max. -%% -%% new_max([V|Vs],Max) -> -%% case is_temp(V) of -%% true -> -%% TReg = temp_reg(V), -%% if TReg > Max -> -%% new_max(Vs, TReg); -%% true -> -%% new_max(Vs, Max) -%% end; -%% false -> -%% new_max(Vs, Max) -%% end; -%% new_max([],Max) -> Max. diff --git a/lib/hipe/x86/hipe_x86.hrl b/lib/hipe/x86/hipe_x86.hrl deleted file mode 100644 index 6cd69905b2..0000000000 --- a/lib/hipe/x86/hipe_x86.hrl +++ /dev/null @@ -1,112 +0,0 @@ -%%% Licensed under the Apache License, Version 2.0 (the "License"); -%%% you may not use this file except in compliance with the License. -%%% You may obtain a copy of the License at -%%% -%%% http://www.apache.org/licenses/LICENSE-2.0 -%%% -%%% Unless required by applicable law or agreed to in writing, software -%%% distributed under the License is distributed on an "AS IS" BASIS, -%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%%% See the License for the specific language governing permissions and -%%% limitations under the License. -%%% -%%% concrete representation of 2-address pseudo-x86 code - -%%%-------------------------------------------------------------------- -%%% x86 operands: -%%% -%%% int32 ::= <a 32-bit integer> -%%% reg ::= <token from hipe_x86_registers module> -%%% type ::= 'tagged' | 'untagged' -%%% label ::= <an integer> -%%% label_type ::= 'label' | 'constant' -%%% aluop ::= <an atom denoting a binary alu op> -%%% term ::= <any Erlang term> -%%% cc ::= <an atom denoting a condition code> -%%% pred ::= <a real number between 0.0 and 1.0 inclusive> -%%% npop ::= <a 32-bit natural number which is a multiple of 4> -%%% -%%% temp ::= {x86_temp, reg, type, allocatable} -%%% allocatable ::= 'true' | 'false' -%%% -%%% imm ::= {x86_imm, value} -%%% value ::= int32 | atom | {label, label_type} -%%% -%%% mem ::= {x86_mem, base, off, mem_type} -%%% base ::= temp | [] (XXX BUG: not quite true before RA) -%%% off ::= imm | temp -%%% mem_type ::= 'byte' | 'int16' (only valid with mov{s,z}x) -%%% | type -%%% -%%% src ::= temp | mem | imm -%%% dst ::= temp | mem -%%% arg ::= src -%%% args ::= <list of arg> -%%% -%%% mfa ::= {x86_mfa, atom, atom, byte} -%%% prim ::= {x86_prim, atom} -%%% fun ::= mfa | prim | temp | mem -%%% -%%% jtab ::= label (equiv. to {x86_imm,{label,'constant'}}) -%%% -%%% sdesc ::= {x86_sdesc, exnlab, fsize, arity, live} -%%% exnlab ::= [] | label -%%% fsize ::= <int32> (frame size in words) -%%% live ::= <tuple of int32> (word offsets) -%%% arity ::= int32 - --record(x86_temp, {reg, type, allocatable}). --record(x86_imm, {value}). --record(x86_mem, {base, off, type}). --record(x86_fpreg, {reg, pseudo}). --record(x86_mfa, {m::atom(), f::atom(), a::arity()}). --record(x86_prim, {prim}). --record(x86_sdesc, {exnlab, fsize, arity::arity(), live::tuple()}). - -%%% Basic instructions. -%%% These follow the AT&T convention, i.e. op src,dst (dst := dst op src) -%%% After register allocation, at most one operand in a binary -%%% instruction (alu, cmp, move) may denote a memory cell. -%%% After frame allocation, every temp must denote a physical register. - --record(alu, {aluop, src, dst}). --record(call, {'fun', sdesc, linkage}). --record(cmovcc, {cc, src, dst}). --record(cmp, {src, dst}). % a 'sub' alu which doesn't update dst --record(comment, {term}). --record(fmove, {src, dst}). --record(fp_binop, {op, src, dst}). --record(fp_unop, {op, arg}). % arg may be [] :-( --record(imul, {imm_opt, src, temp}). % imm_opt:[]|imm, src:temp|mem --record(jcc, {cc, label}). --record(jmp_fun, {'fun', linkage}). % tailcall, direct or indirect --record(jmp_label, {label}). % local jmp, direct --record(jmp_switch, {temp, jtab, labels}). % local jmp, indirect --record(label, {label}). --record(lea, {mem, temp}). --record(move, {src, dst}). --record(move64, {imm, dst}). --record(movsx, {src, dst}). --record(movzx, {src, dst}). --record(pseudo_call, {'fun', sdesc, contlab, linkage}). --record(pseudo_jcc, {cc, true_label, false_label, pred}). --record(pseudo_spill, {args=[]}). --record(pseudo_spill_move, {src, temp, dst}). --record(pseudo_spill_fmove, {src, temp, dst}). --record(pseudo_tailcall, {'fun', arity, stkargs, linkage}). --record(pseudo_tailcall_prepare, {}). --record(push, {src}). --record(pop, {dst}). --record(ret, {npop}). % EAX is live-in --record(shift, {shiftop, src, dst}). --record(test, {src, dst}). - -%%% Function definitions. - --include("../misc/hipe_consttab.hrl"). - --record(defun, {mfa :: mfa(), formals, code, - data :: hipe_consttab(), - isclosure :: boolean(), - isleaf :: boolean(), - var_range, label_range}). diff --git a/lib/hipe/x86/hipe_x86_assemble.erl b/lib/hipe/x86/hipe_x86_assemble.erl deleted file mode 100644 index 9d2586a14d..0000000000 --- a/lib/hipe/x86/hipe_x86_assemble.erl +++ /dev/null @@ -1,1004 +0,0 @@ -%%% -*- erlang-indent-level: 2 -*- -%%% -%%% Licensed under the Apache License, Version 2.0 (the "License"); -%%% you may not use this file except in compliance with the License. -%%% You may obtain a copy of the License at -%%% -%%% http://www.apache.org/licenses/LICENSE-2.0 -%%% -%%% Unless required by applicable law or agreed to in writing, software -%%% distributed under the License is distributed on an "AS IS" BASIS, -%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%%% See the License for the specific language governing permissions and -%%% limitations under the License. -%%% -%%% HiPE/x86 assembler -%%% -%%% TODO: -%%% - Simplify combine_label_maps and mk_data_relocs. - --ifdef(HIPE_AMD64). --define(HIPE_X86_ASSEMBLE, hipe_amd64_assemble). --define(HIPE_X86_ENCODE, hipe_amd64_encode). --define(HIPE_X86_REGISTERS, hipe_amd64_registers). --define(HIPE_X86_PP, hipe_amd64_pp). --ifdef(AMD64_SIMULATE_NSP). --define(X86_SIMULATE_NSP, ?AMD64_SIMULATE_NSP). --endif. --define(EAX, rax). --define(REGArch, reg64). --define(RMArch, rm64). --define(EA_DISP32_ABSOLUTE, ea_disp32_sindex). --else. --define(HIPE_X86_ASSEMBLE, hipe_x86_assemble). --define(HIPE_X86_ENCODE, hipe_x86_encode). --define(HIPE_X86_REGISTERS, hipe_x86_registers). --define(HIPE_X86_PP, hipe_x86_pp). --define(EAX, eax). --define(REGArch, reg32). --define(RMArch, rm32). --define(EA_DISP32_ABSOLUTE, ea_disp32). --endif. - --module(?HIPE_X86_ASSEMBLE). --export([assemble/4]). - --define(DEBUG,true). - --include("../main/hipe.hrl"). --include("../x86/hipe_x86.hrl"). --include("../../kernel/src/hipe_ext_format.hrl"). --include("../rtl/hipe_literals.hrl"). --include("../misc/hipe_sdi.hrl"). --undef(ASSERT). --define(ASSERT(G), if G -> [] ; true -> exit({assertion_failed,?MODULE,?LINE,??G}) end). - -assemble(CompiledCode, Closures, Exports, Options) -> - ?when_option(time, Options, ?start_timer("x86 assembler")), - print("****************** Assembling *******************\n", [], Options), - %% - Code = [{MFA, - hipe_x86:defun_code(Defun), - hipe_x86:defun_data(Defun)} - || {MFA, Defun} <- CompiledCode], - %% - {ConstAlign,ConstSize,ConstMap,RefsFromConsts} = - hipe_pack_constants:pack_constants(Code), - %% - {CodeSize,CodeBinary,AccRefs,LabelMap,ExportMap} = - encode(translate(Code, ConstMap, Options), Options), - print("Total num bytes=~w\n", [CodeSize], Options), - %% put(code_size, CodeSize), - %% put(const_size, ConstSize), - %% ?when_option(verbose, Options, - %% ?debug_msg("Constants are ~w bytes\n",[ConstSize])), - %% - SC = hipe_pack_constants:slim_constmap(ConstMap), - DataRelocs = hipe_pack_constants:mk_data_relocs(RefsFromConsts, LabelMap), - SSE = hipe_pack_constants:slim_sorted_exportmap(ExportMap,Closures,Exports), - SlimRefs = hipe_pack_constants:slim_refs(AccRefs), - Bin = term_to_binary([{?VERSION_STRING(),?HIPE_ERTS_CHECKSUM}, - ConstAlign, ConstSize, - SC, - DataRelocs, % nee LM, LabelMap - SSE, - CodeSize,CodeBinary,SlimRefs, - 0,[] % ColdCodeSize, SlimColdRefs - ]), - %% - %% ?when_option(time, Options, ?stop_timer("x86 assembler")), - Bin. - -%%% -%%% Assembly Pass 1. -%%% Process initial {MFA,Code,Data} list. -%%% Translate each MFA's body, choosing operand & instruction kinds. -%%% -%%% Assembly Pass 2. -%%% Perform short/long form optimisation for jumps. -%%% Build LabelMap for each MFA. -%%% -%%% Result is {MFA,NewCode,CodeSize,LabelMap} list. -%%% - -translate(Code, ConstMap, Options) -> - translate_mfas(Code, ConstMap, [], Options). - -translate_mfas([{MFA,Insns,_Data}|Code], ConstMap, NewCode, Options) -> - {NewInsns,CodeSize,LabelMap} = - translate_insns(Insns, {MFA,ConstMap}, hipe_sdi:pass1_init(), 0, [], Options), - translate_mfas(Code, ConstMap, [{MFA,NewInsns,CodeSize,LabelMap}|NewCode], Options); -translate_mfas([], _ConstMap, NewCode, _Options) -> - lists:reverse(NewCode). - -translate_insns([I|Insns], Context, SdiPass1, Address, NewInsns, Options) -> - NewIs = translate_insn(I, Context, Options), - add_insns(NewIs, Insns, Context, SdiPass1, Address, NewInsns, Options); -translate_insns([], _Context, SdiPass1, Address, NewInsns, _Options) -> - {LabelMap,CodeSizeIncr} = hipe_sdi:pass2(SdiPass1), - {lists:reverse(NewInsns), Address+CodeSizeIncr, LabelMap}. - -add_insns([I|Is], Insns, Context, SdiPass1, Address, NewInsns, Options) -> - NewSdiPass1 = - case I of - {'.label',L,_} -> - hipe_sdi:pass1_add_label(SdiPass1, Address, L); - {jcc_sdi,{_,{label,L}},_} -> - SdiInfo = #sdi_info{incr=(6-2),lb=(-128)+2,ub=127+2}, - hipe_sdi:pass1_add_sdi(SdiPass1, Address, L, SdiInfo); - {jmp_sdi,{{label,L}},_} -> - SdiInfo = #sdi_info{incr=(5-2),lb=(-128)+2,ub=127+2}, - hipe_sdi:pass1_add_sdi(SdiPass1, Address, L, SdiInfo); - _ -> - SdiPass1 - end, - Address1 = Address + insn_size(I), - add_insns(Is, Insns, Context, NewSdiPass1, Address1, [I|NewInsns], Options); -add_insns([], Insns, Context, SdiPass1, Address, NewInsns, Options) -> - translate_insns(Insns, Context, SdiPass1, Address, NewInsns, Options). - -insn_size(I) -> - case I of - {'.label',_,_} -> 0; - {'.sdesc',_,_} -> 0; - {jcc_sdi,_,_} -> 2; - {jmp_sdi,_,_} -> 2; - {Op,Arg,_Orig} -> ?HIPE_X86_ENCODE:insn_sizeof(Op, Arg) - end. - -translate_insn(I, Context, Options) -> - case I of - #alu{aluop='xor', src=#x86_temp{reg=Reg}=Src, dst=#x86_temp{reg=Reg}=Dst} -> - [{'xor', {temp_to_reg32(Dst), temp_to_rm32(Src)}, I}]; - #alu{} -> - Arg = resolve_alu_args(hipe_x86:alu_src(I), hipe_x86:alu_dst(I), Context), - [{hipe_x86:alu_op(I), Arg, I}]; - #call{} -> - translate_call(I); - #cmovcc{} -> - {Dst,Src} = resolve_move_args( - hipe_x86:cmovcc_src(I), hipe_x86:cmovcc_dst(I), - Context), - CC = {cc,?HIPE_X86_ENCODE:cc(hipe_x86:cmovcc_cc(I))}, - Arg = {CC,Dst,Src}, - [{cmovcc, Arg, I}]; - #cmp{} -> - Arg = resolve_alu_args(hipe_x86:cmp_src(I), hipe_x86:cmp_dst(I), Context), - [{cmp, Arg, I}]; - #comment{} -> - []; - #fmove{} -> - {Op,Arg} = resolve_sse2_fmove_args(hipe_x86:fmove_src(I), - hipe_x86:fmove_dst(I)), - [{Op, Arg, I}]; - #fp_binop{} -> - case proplists:get_bool(x87, Options) of - true -> % x87 - Arg = resolve_x87_binop_args(hipe_x86:fp_binop_src(I), - hipe_x86:fp_binop_dst(I)), - [{hipe_x86:fp_binop_op(I), Arg, I}]; - false -> % sse2 - Arg = resolve_sse2_binop_args(hipe_x86:fp_binop_src(I), - hipe_x86:fp_binop_dst(I)), - [{resolve_sse2_op(hipe_x86:fp_binop_op(I)), Arg, I}] - end; - #fp_unop{} -> - case proplists:get_bool(x87, Options) of - true -> % x87 - Arg = resolve_x87_unop_arg(hipe_x86:fp_unop_arg(I)), - [{hipe_x86:fp_unop_op(I), Arg, I}]; - false -> % sse2 - case hipe_x86:fp_unop_op(I) of - 'fchs' -> - Arg = resolve_sse2_fchs_arg(hipe_x86:fp_unop_arg(I)), - [{'xorpd', Arg, I}]; - 'fwait' -> % no op on sse2, magic on x87 - [] - end - end; - #imul{} -> - translate_imul(I, Context); - #jcc{} -> - Cc = {cc,?HIPE_X86_ENCODE:cc(hipe_x86:jcc_cc(I))}, - Label = translate_label(hipe_x86:jcc_label(I)), - [{jcc_sdi, {Cc,Label}, I}]; - #jmp_fun{} -> - %% call and jmp are patched the same, so no need to distinguish - %% call from tailcall - PatchTypeExt = - case hipe_x86:jmp_fun_linkage(I) of - remote -> ?CALL_REMOTE; - not_remote -> ?CALL_LOCAL - end, - Arg = translate_fun(hipe_x86:jmp_fun_fun(I), PatchTypeExt), - [{jmp, {Arg}, I}]; - #jmp_label{} -> - Arg = translate_label(hipe_x86:jmp_label_label(I)), - [{jmp_sdi, {Arg}, I}]; - #jmp_switch{} -> - RM32 = resolve_jmp_switch_arg(I, Context), - [{jmp, {RM32}, I}]; - #label{} -> - [{'.label', hipe_x86:label_label(I), I}]; - #lea{} -> - Arg = resolve_lea_args(hipe_x86:lea_mem(I), hipe_x86:lea_temp(I)), - [{lea, Arg, I}]; - #move{} -> - Arg = resolve_move_args(hipe_x86:move_src(I), hipe_x86:move_dst(I), - Context), - [{mov, Arg, I}]; - #move64{} -> - translate_move64(I, Context); - #movsx{} -> - Src = resolve_movx_src(hipe_x86:movsx_src(I)), - [{movsx, {temp_to_regArch(hipe_x86:movsx_dst(I)), Src}, I}]; - #movzx{} -> - Src = resolve_movx_src(hipe_x86:movzx_src(I)), - [{movzx, {temp_to_reg32(hipe_x86:movzx_dst(I)), Src}, I}]; - %% pseudo_call: eliminated before assembly - %% pseudo_jcc: eliminated before assembly - %% pseudo_tailcall: eliminated before assembly - %% pseudo_tailcall_prepare: eliminated before assembly - #pop{} -> - Arg = translate_dst(hipe_x86:pop_dst(I)), - [{pop, {Arg}, I}]; - #push{} -> - Arg = translate_src(hipe_x86:push_src(I), Context), - [{push, {Arg}, I}]; - #ret{} -> - translate_ret(I); - #shift{} -> - Arg = resolve_shift_args(hipe_x86:shift_src(I), hipe_x86:shift_dst(I), Context), - [{hipe_x86:shift_op(I), Arg, I}]; - #test{} -> - Arg = resolve_test_args(hipe_x86:test_src(I), hipe_x86:test_dst(I), Context), - [{test, Arg, I}] - end. - --ifdef(X86_SIMULATE_NSP). --ifdef(HIPE_AMD64). -translate_call(I) -> - WordSize = hipe_amd64_registers:wordsize(), - RegSP = 2#100, % esp/rsp - TempSP = hipe_x86:mk_temp(RegSP, untagged), - FunOrig = hipe_x86:call_fun(I), - Fun = - case FunOrig of - #x86_mem{base=#x86_temp{reg=4}, off=#x86_imm{value=Off}} -> - FunOrig#x86_mem{off=#x86_imm{value=Off+WordSize}}; - _ -> FunOrig - end, - RegRA = - begin - RegTemp0 = hipe_amd64_registers:temp0(), - RegTemp1 = hipe_amd64_registers:temp1(), - case Fun of - #x86_temp{reg=RegTemp0} -> RegTemp1; - #x86_mem{base=#x86_temp{reg=RegTemp0}} -> RegTemp1; - _ -> RegTemp0 - end - end, - TempRA = hipe_x86:mk_temp(RegRA, untagged), - PatchTypeExt = - case hipe_x86:call_linkage(I) of - remote -> ?CALL_REMOTE; - not_remote -> ?CALL_LOCAL - end, - JmpArg = translate_fun(Fun, PatchTypeExt), - I4 = {'.sdesc', hipe_x86:call_sdesc(I), #comment{term=sdesc}}, - I3 = {jmp, {JmpArg}, #comment{term=call}}, - Size3 = hipe_amd64_encode:insn_sizeof(jmp, {JmpArg}), - MovArgs = {mem_to_rmArch(hipe_x86:mk_mem(TempSP, - hipe_x86:mk_imm(0), - untagged)), - temp_to_regArch(TempRA)}, - I2 = {mov, MovArgs, #comment{term=call}}, - Size2 = hipe_amd64_encode:insn_sizeof(mov, MovArgs), - I1 = {lea, {temp_to_regArch(TempRA), - {ea, hipe_amd64_encode:ea_disp32_rip(Size2+Size3)}}, - #comment{term=call}}, - I0 = {sub, {temp_to_rmArch(TempSP), {imm8,WordSize}}, I}, - [I0,I1,I2,I3,I4]. --else. -translate_call(I) -> - WordSize = ?HIPE_X86_REGISTERS:wordsize(), - RegSP = 2#100, % esp/rsp - TempSP = hipe_x86:mk_temp(RegSP, untagged), - FunOrig = hipe_x86:call_fun(I), - Fun = - case FunOrig of - #x86_mem{base=#x86_temp{reg=4}, off=#x86_imm{value=Off}} -> - FunOrig#x86_mem{off=#x86_imm{value=Off+WordSize}}; - _ -> FunOrig - end, - PatchTypeExt = - case hipe_x86:call_linkage(I) of - remote -> ?CALL_REMOTE; - not_remote -> ?CALL_LOCAL - end, - JmpArg = translate_fun(Fun, PatchTypeExt), - I3 = {'.sdesc', hipe_x86:call_sdesc(I), #comment{term=sdesc}}, - I2 = {jmp, {JmpArg}, #comment{term=call}}, - Size2 = ?HIPE_X86_ENCODE:insn_sizeof(jmp, {JmpArg}), - I1 = {mov, {mem_to_rmArch(hipe_x86:mk_mem(TempSP, - hipe_x86:mk_imm(0), - untagged)), - {imm32,{?X86ABSPCREL,4+Size2}}}, - #comment{term=call}}, - I0 = {sub, {temp_to_rmArch(TempSP), {imm8,WordSize}}, I}, - [I0,I1,I2,I3]. --endif. - -translate_ret(I) -> - NPOP = hipe_x86:ret_npop(I) + ?HIPE_X86_REGISTERS:wordsize(), - RegSP = 2#100, % esp/rsp - TempSP = hipe_x86:mk_temp(RegSP, untagged), - RegRA = 2#011, % ebx/rbx - TempRA = hipe_x86:mk_temp(RegRA, untagged), - [{mov, - {temp_to_regArch(TempRA), - mem_to_rmArch(hipe_x86:mk_mem(TempSP, - hipe_x86:mk_imm(0), - untagged))}, - I}, - {add, - {temp_to_rmArch(TempSP), - case NPOP < 128 of - true -> {imm8,NPOP}; - false -> {imm32,NPOP} - end}, - #comment{term=ret}}, - {jmp, - {temp_to_rmArch(TempRA)}, - #comment{term=ret}}]. - --else. % not X86_SIMULATE_NSP - -translate_call(I) -> - %% call and jmp are patched the same, so no need to distinguish - %% call from tailcall - PatchTypeExt = - case hipe_x86:call_linkage(I) of - remote -> ?CALL_REMOTE; - not_remote -> ?CALL_LOCAL - end, - Arg = translate_fun(hipe_x86:call_fun(I), PatchTypeExt), - SDesc = hipe_x86:call_sdesc(I), - [{call, {Arg}, I}, {'.sdesc', SDesc, #comment{term=sdesc}}]. - -translate_ret(I) -> - Arg = - case hipe_x86:ret_npop(I) of - 0 -> {}; - N -> {{imm16,N}} - end, - [{ret, Arg, I}]. - --endif. % X86_SIMULATE_NSP - -translate_imul(I, Context) -> - Temp = temp_to_regArch(hipe_x86:imul_temp(I)), - Src = temp_or_mem_to_rmArch(hipe_x86:imul_src(I)), - Args = - case hipe_x86:imul_imm_opt(I) of - [] -> {Temp,Src}; - Imm -> {Temp,Src,translate_imm(Imm, Context, true)} - end, - [{'imul', Args, I}]. - -temp_or_mem_to_rmArch(Src) -> - case Src of - #x86_temp{} -> temp_to_rmArch(Src); - #x86_mem{} -> mem_to_rmArch(Src) - end. - -translate_label(Label) when is_integer(Label) -> - {label,Label}. % symbolic, since offset is not yet computable - -translate_fun(Arg, PatchTypeExt) -> - case Arg of - #x86_temp{} -> - temp_to_rmArch(Arg); - #x86_mem{} -> - mem_to_rmArch(Arg); - #x86_mfa{m=M,f=F,a=A} -> - {rel32,{PatchTypeExt,{M,F,A}}}; - #x86_prim{prim=Prim} -> - {rel32,{PatchTypeExt,Prim}} - end. - -translate_src(Src, Context) -> - case Src of - #x86_imm{} -> - translate_imm(Src, Context, true); - _ -> - translate_dst(Src) - end. - -%%% MayTrunc8 controls whether negative Imm8s should be truncated -%%% to 8 bits or not. Truncation should always be done, except when -%%% the caller will widen the Imm8 to an Imm32 or Imm64. -translate_imm(#x86_imm{value=Imm}, Context, MayTrunc8) -> - if is_atom(Imm) -> - {imm32,{?LOAD_ATOM,Imm}}; - is_integer(Imm) -> - case (Imm =< 127) and (Imm >= -128) of - true -> - Imm8 = - case MayTrunc8 of - true -> Imm band 16#FF; - false -> Imm - end, - {imm8,Imm8}; - false -> - {imm32,Imm} - end; - true -> - Val = - case Imm of - {Label,constant} -> - {MFA,ConstMap} = Context, - ConstNo = hipe_pack_constants:find_const({MFA,Label}, ConstMap), - {constant,ConstNo}; - {Label,closure} -> - {closure,Label}; - {Label,c_const} -> - {c_const,Label} - end, - {imm32,{?LOAD_ADDRESS,Val}} - end. - -translate_dst(Dst) -> - case Dst of - #x86_temp{} -> - temp_to_regArch(Dst); - #x86_mem{type='double'} -> - mem_to_rm64fp(Dst); - #x86_mem{} -> - mem_to_rmArch(Dst); - #x86_fpreg{} -> - fpreg_to_stack(Dst) - end. - -%%% -%%% Assembly Pass 3. -%%% Process final {MFA,Code,CodeSize,LabelMap} list from pass 2. -%%% Translate to a single binary code segment. -%%% Collect relocation patches. -%%% Build ExportMap (MFA-to-address mapping). -%%% Combine LabelMaps to a single one (for mk_data_relocs/2 compatibility). -%%% Return {CombinedCodeSize,BinaryCode,Relocs,CombinedLabelMap,ExportMap}. -%%% - -encode(Code, Options) -> - CodeSize = compute_code_size(Code, 0), - ExportMap = build_export_map(Code, 0, []), - {AccCode,Relocs} = encode_mfas(Code, 0, [], [], Options), - CodeBinary = list_to_binary(lists:reverse(AccCode)), - ?ASSERT(CodeSize =:= byte_size(CodeBinary)), - CombinedLabelMap = combine_label_maps(Code, 0, gb_trees:empty()), - {CodeSize,CodeBinary,Relocs,CombinedLabelMap,ExportMap}. - -nr_pad_bytes(Address) -> (4 - (Address rem 4)) rem 4. % XXX: 16 or 32 instead? - -align_entry(Address) -> Address + nr_pad_bytes(Address). - -compute_code_size([{_MFA,_Insns,CodeSize,_LabelMap}|Code], Size) -> - compute_code_size(Code, align_entry(Size+CodeSize)); -compute_code_size([], Size) -> Size. - -build_export_map([{{M,F,A},_Insns,CodeSize,_LabelMap}|Code], Address, ExportMap) -> - build_export_map(Code, align_entry(Address+CodeSize), [{Address,M,F,A}|ExportMap]); -build_export_map([], _Address, ExportMap) -> ExportMap. - -combine_label_maps([{MFA,_Insns,CodeSize,LabelMap}|Code], Address, CLM) -> - NewCLM = merge_label_map(gb_trees:to_list(LabelMap), MFA, Address, CLM), - combine_label_maps(Code, align_entry(Address+CodeSize), NewCLM); -combine_label_maps([], _Address, CLM) -> CLM. - -merge_label_map([{Label,Offset}|Rest], MFA, Address, CLM) -> - NewCLM = gb_trees:insert({MFA,Label}, Address+Offset, CLM), - merge_label_map(Rest, MFA, Address, NewCLM); -merge_label_map([], _MFA, _Address, CLM) -> CLM. - -encode_mfas([{MFA,Insns,CodeSize,LabelMap}|Code], Address, AccCode, Relocs, Options) -> - print("Generating code for:~w\n", [MFA], Options), - print("Offset | Opcode | Instruction\n", [], Options), - {Address1,Relocs1,AccCode1} = - encode_insns(Insns, Address, Address, LabelMap, Relocs, AccCode, Options), - ExpectedAddress = align_entry(Address + CodeSize), - ?ASSERT(Address1 =:= ExpectedAddress), - print("Finished.\n\n", [], Options), - encode_mfas(Code, Address1, AccCode1, Relocs1, Options); -encode_mfas([], _Address, AccCode, Relocs, _Options) -> - {AccCode, Relocs}. - -encode_insns([I|Insns], Address, FunAddress, LabelMap, Relocs, AccCode, Options) -> - case I of - {'.label',L,_} -> - LabelAddress = gb_trees:get(L, LabelMap) + FunAddress, - ?ASSERT(Address =:= LabelAddress), % sanity check - print_insn(Address, [], I, Options), - encode_insns(Insns, Address, FunAddress, LabelMap, Relocs, AccCode, Options); - {'.sdesc',SDesc,_} -> - #x86_sdesc{exnlab=ExnLab,fsize=FSize,arity=Arity,live=Live} = SDesc, - ExnRA = - case ExnLab of - [] -> []; % don't cons up a new one - ExnLab -> gb_trees:get(ExnLab, LabelMap) + FunAddress - end, - Reloc = {?SDESC, Address, - ?STACK_DESC(ExnRA, FSize, Arity, Live)}, - encode_insns(Insns, Address, FunAddress, LabelMap, [Reloc|Relocs], AccCode, Options); - _ -> - {Op,Arg,_} = fix_jumps(I, Address, FunAddress, LabelMap), - {Bytes, NewRelocs} = ?HIPE_X86_ENCODE:insn_encode(Op, Arg, Address), - print_insn(Address, Bytes, I, Options), - Segment = list_to_binary(Bytes), - Size = byte_size(Segment), - NewAccCode = [Segment|AccCode], - encode_insns(Insns, Address+Size, FunAddress, LabelMap, NewRelocs++Relocs, NewAccCode, Options) - end; -encode_insns([], Address, FunAddress, LabelMap, Relocs, AccCode, Options) -> - case nr_pad_bytes(Address) of - 0 -> - {Address,Relocs,AccCode}; - NrPadBytes -> % triggers at most once per function body - Padding = lists:duplicate(NrPadBytes, {nop,{},#comment{term=padding}}), - encode_insns(Padding, Address, FunAddress, LabelMap, Relocs, AccCode, Options) - end. - -fix_jumps(I, InsnAddress, FunAddress, LabelMap) -> - case I of - {jcc_sdi,{CC,{label,L}},OrigI} -> - LabelAddress = gb_trees:get(L, LabelMap) + FunAddress, - ShortOffset = LabelAddress - (InsnAddress + 2), - if is_integer(ShortOffset), ShortOffset >= -128, ShortOffset =< 127 -> - {jcc,{CC,{rel8,ShortOffset band 16#FF}},OrigI}; - true -> - LongOffset = LabelAddress - (InsnAddress + 6), - {jcc,{CC,{rel32,LongOffset}},OrigI} - end; - {jmp_sdi,{{label,L}},OrigI} -> - LabelAddress = gb_trees:get(L, LabelMap) + FunAddress, - ShortOffset = LabelAddress - (InsnAddress + 2), - if is_integer(ShortOffset), ShortOffset >= -128, ShortOffset =< 127 -> - {jmp,{{rel8,ShortOffset band 16#FF}},OrigI}; - true -> - LongOffset = LabelAddress - (InsnAddress + 5), - {jmp,{{rel32,LongOffset}},OrigI} - end; - _ -> I - end. - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -fpreg_to_stack(#x86_fpreg{reg=Reg}) -> - {fpst, Reg}. - -temp_to_regArch(#x86_temp{reg=Reg}) -> - {?REGArch, Reg}. - --ifdef(HIPE_AMD64). -temp_to_reg64(#x86_temp{reg=Reg}) -> - {reg64, Reg}. --endif. - -temp_to_reg32(#x86_temp{reg=Reg}) -> - {reg32, Reg}. -temp_to_reg16(#x86_temp{reg=Reg}) -> - {reg16, Reg}. -temp_to_reg8(#x86_temp{reg=Reg}) -> - {reg8, Reg}. - -temp_to_xmm(#x86_temp{reg=Reg}) -> - {xmm, Reg}. - --ifdef(HIPE_AMD64). -temp_to_rm8(#x86_temp{reg=Reg}) -> - {rm8, ?HIPE_X86_ENCODE:rm_reg(Reg)}. -temp_to_rm64(#x86_temp{reg=Reg}) -> - {rm64, hipe_amd64_encode:rm_reg(Reg)}. --else. -temp_to_rm8(#x86_temp{reg=Reg}) -> - true = ?HIPE_X86_ENCODE:reg_has_8bit(Reg), - {rm8, ?HIPE_X86_ENCODE:rm_reg(Reg)}. -temp_to_rm16(#x86_temp{reg=Reg}) -> - {rm16, ?HIPE_X86_ENCODE:rm_reg(Reg)}. --endif. - -temp_to_rm32(#x86_temp{reg=Reg}) -> - {rm32, ?HIPE_X86_ENCODE:rm_reg(Reg)}. -temp_to_rmArch(#x86_temp{reg=Reg}) -> - {?RMArch, ?HIPE_X86_ENCODE:rm_reg(Reg)}. -temp_to_rm64fp(#x86_temp{reg=Reg}) -> - {rm64fp, ?HIPE_X86_ENCODE:rm_reg(Reg)}. - -mem_to_ea(Mem) -> - EA = mem_to_ea_common(Mem), - {ea, EA}. - -mem_to_rm32(Mem) -> - EA = mem_to_ea_common(Mem), - {rm32, ?HIPE_X86_ENCODE:rm_mem(EA)}. - -mem_to_rmArch(Mem) -> - EA = mem_to_ea_common(Mem), - {?RMArch, ?HIPE_X86_ENCODE:rm_mem(EA)}. - -mem_to_rm64fp(Mem) -> - EA = mem_to_ea_common(Mem), - {rm64fp, ?HIPE_X86_ENCODE:rm_mem(EA)}. - -%%%%%%%%%%%%%%%%% -mem_to_rm8(Mem) -> - EA = mem_to_ea_common(Mem), - {rm8, ?HIPE_X86_ENCODE:rm_mem(EA)}. - -mem_to_rm16(Mem) -> - EA = mem_to_ea_common(Mem), - {rm16, ?HIPE_X86_ENCODE:rm_mem(EA)}. -%%%%%%%%%%%%%%%%% - -mem_to_ea_common(#x86_mem{base=[], off=#x86_imm{value=Off}}) -> - ?HIPE_X86_ENCODE:?EA_DISP32_ABSOLUTE(Off); -mem_to_ea_common(#x86_mem{base=#x86_temp{reg=Base}, off=#x86_temp{reg=Index}}) -> - case Base band 2#111 of - 5 -> % ebp/rbp or r13 - case Index band 2#111 of - 5 -> % ebp/rbp or r13 - SINDEX = ?HIPE_X86_ENCODE:sindex(0, Index), - SIB = ?HIPE_X86_ENCODE:sib(Base, SINDEX), - ?HIPE_X86_ENCODE:ea_disp8_sib(0, SIB); - _ -> - SINDEX = ?HIPE_X86_ENCODE:sindex(0, Base), - SIB = ?HIPE_X86_ENCODE:sib(Index, SINDEX), - ?HIPE_X86_ENCODE:ea_sib(SIB) - end; - _ -> - SINDEX = ?HIPE_X86_ENCODE:sindex(0, Index), - SIB = ?HIPE_X86_ENCODE:sib(Base, SINDEX), - ?HIPE_X86_ENCODE:ea_sib(SIB) - end; -mem_to_ea_common(#x86_mem{base=#x86_temp{reg=Base}, off=#x86_imm{value=Off}}) -> - if - Off =:= 0 -> - case Base of - 4 -> %esp, use SIB w/o disp8 - SIB = ?HIPE_X86_ENCODE:sib(Base), - ?HIPE_X86_ENCODE:ea_sib(SIB); - 5 -> %ebp, use disp8 w/o SIB - ?HIPE_X86_ENCODE:ea_disp8_base(Off, Base); - 12 -> %r12, use SIB w/o disp8 - SIB = ?HIPE_X86_ENCODE:sib(Base), - ?HIPE_X86_ENCODE:ea_sib(SIB); - 13 -> %r13, use disp8 w/o SIB - ?HIPE_X86_ENCODE:ea_disp8_base(Off, Base); - _ -> %neither SIB nor disp8 needed - ?HIPE_X86_ENCODE:ea_base(Base) - end; - Off >= -128, Off =< 127 -> - Disp8 = Off band 16#FF, - case Base of - 4 -> %esp, must use SIB - SIB = ?HIPE_X86_ENCODE:sib(Base), - ?HIPE_X86_ENCODE:ea_disp8_sib(Disp8, SIB); - 12 -> %r12, must use SIB - SIB = ?HIPE_X86_ENCODE:sib(Base), - ?HIPE_X86_ENCODE:ea_disp8_sib(Disp8, SIB); - _ -> %use disp8 w/o SIB - ?HIPE_X86_ENCODE:ea_disp8_base(Disp8, Base) - end; - true -> - case Base of - 4 -> %esp, must use SIB - SIB = ?HIPE_X86_ENCODE:sib(Base), - ?HIPE_X86_ENCODE:ea_disp32_sib(Off, SIB); - 12 -> %r12, must use SIB - SIB = ?HIPE_X86_ENCODE:sib(Base), - ?HIPE_X86_ENCODE:ea_disp32_sib(Off, SIB); - _ -> - ?HIPE_X86_ENCODE:ea_disp32_base(Off, Base) - end - end. - -%% jmp_switch --ifdef(HIPE_AMD64). -resolve_jmp_switch_arg(I, _Context) -> - Base = hipe_x86:temp_reg(hipe_x86:jmp_switch_jtab(I)), - Index = hipe_x86:temp_reg(hipe_x86:jmp_switch_temp(I)), - SINDEX = hipe_amd64_encode:sindex(3, Index), - SIB = hipe_amd64_encode:sib(Base, SINDEX), - EA = - if (Base =:= 5) or (Base =:= 13) -> - hipe_amd64_encode:ea_disp8_sib(0, SIB); - true -> - hipe_amd64_encode:ea_sib(SIB) - end, - {rm64,hipe_amd64_encode:rm_mem(EA)}. --else. -resolve_jmp_switch_arg(I, {MFA,ConstMap}) -> - ConstNo = hipe_pack_constants:find_const({MFA,hipe_x86:jmp_switch_jtab(I)}, ConstMap), - Disp32 = {?LOAD_ADDRESS,{constant,ConstNo}}, - SINDEX = ?HIPE_X86_ENCODE:sindex(2, hipe_x86:temp_reg(hipe_x86:jmp_switch_temp(I))), - EA = ?HIPE_X86_ENCODE:ea_disp32_sindex(Disp32, SINDEX), % this creates a SIB implicitly - {rm32,?HIPE_X86_ENCODE:rm_mem(EA)}. --endif. - -%% lea reg, mem -resolve_lea_args(Src=#x86_mem{}, Dst=#x86_temp{}) -> - {temp_to_regArch(Dst),mem_to_ea(Src)}. - -resolve_sse2_op(Op) -> - case Op of - fadd -> addsd; - fdiv -> divsd; - fmul -> mulsd; - fsub -> subsd; - xorpd -> xorpd; - _ -> exit({?MODULE, unknown_sse2_operator, Op}) - end. - -%% OP xmm, mem -resolve_sse2_binop_args(Src=#x86_mem{type=double}, - Dst=#x86_temp{type=double}) -> - {temp_to_xmm(Dst),mem_to_rm64fp(Src)}; -%% movsd mem, xmm -resolve_sse2_binop_args(Src=#x86_temp{type=double}, - Dst=#x86_mem{type=double}) -> - {mem_to_rm64fp(Dst),temp_to_xmm(Src)}; -%% OP xmm, xmm -resolve_sse2_binop_args(Src=#x86_temp{type=double}, - Dst=#x86_temp{type=double}) -> - {temp_to_xmm(Dst),temp_to_rm64fp(Src)}. - -%%% fmove -> cvtsi2sd or movsd -resolve_sse2_fmove_args(Src, Dst) -> - case {Src,Dst} of - {#x86_temp{type=untagged}, #x86_temp{type=double}} -> % cvtsi2sd xmm, reg - {cvtsi2sd, {temp_to_xmm(Dst),temp_to_rmArch(Src)}}; - {#x86_mem{type=untagged}, #x86_temp{type=double}} -> % cvtsi2sd xmm, mem - {cvtsi2sd, {temp_to_xmm(Dst),mem_to_rmArch(Src)}}; - _ -> % movsd - {movsd, resolve_sse2_binop_args(Src, Dst)} - end. - -%%% xorpd xmm, mem -resolve_sse2_fchs_arg(Dst=#x86_temp{type=double}) -> - {temp_to_xmm(Dst), - {rm64fp, {rm_mem, ?HIPE_X86_ENCODE:?EA_DISP32_ABSOLUTE( - {?LOAD_ADDRESS, - {c_const, sse2_fnegate_mask}})}}}. - -%% mov mem, imm -resolve_move_args(#x86_imm{value=ImmSrc}, Dst=#x86_mem{type=Type}, Context) -> - case Type of % to support byte, int16 and int32 stores - byte -> - ByteImm = ImmSrc band 255, %to ensure that it is a bytesized imm - {mem_to_rm8(Dst),{imm8,ByteImm}}; - int16 -> - {mem_to_rm16(Dst),{imm16,ImmSrc band 16#FFFF}}; - int32 -> - {_,Imm} = translate_imm(#x86_imm{value=ImmSrc}, Context, false), - {mem_to_rm32(Dst),{imm32,Imm}}; - _ -> - RMArch = mem_to_rmArch(Dst), - {_,Imm} = translate_imm(#x86_imm{value=ImmSrc}, Context, false), - {RMArch,{imm32,Imm}} - end; - -%% mov reg,mem -resolve_move_args(Src=#x86_mem{type=Type}, Dst=#x86_temp{}, _Context) -> - case Type of - int32 -> % must be unsigned - {temp_to_reg32(Dst),mem_to_rm32(Src)}; - _ -> - {temp_to_regArch(Dst),mem_to_rmArch(Src)} - end; - -%% mov mem,reg -resolve_move_args(Src=#x86_temp{}, Dst=#x86_mem{type=Type}, _Context) -> - case Type of % to support byte, int16 and int32 stores - byte -> - {mem_to_rm8(Dst),temp_to_reg8(Src)}; - int16 -> - {mem_to_rm16(Dst),temp_to_reg16(Src)}; - int32 -> - {mem_to_rm32(Dst),temp_to_reg32(Src)}; - tagged -> % tagged, untagged - {mem_to_rmArch(Dst),temp_to_regArch(Src)}; - untagged -> % tagged, untagged - {mem_to_rmArch(Dst),temp_to_regArch(Src)} - end; - -%% mov reg,reg -resolve_move_args(Src=#x86_temp{}, Dst=#x86_temp{}, _Context) -> - {temp_to_regArch(Dst),temp_to_rmArch(Src)}; - -%% mov reg,imm -resolve_move_args(Src=#x86_imm{value=_ImmSrc}, Dst=#x86_temp{}, Context) -> - {_,Imm} = translate_imm(Src, Context, false), - imm_move_args(Dst, Imm). - --ifdef(HIPE_AMD64). -imm_move_args(Dst, Imm) -> - if is_number(Imm), Imm >= 0 -> - {temp_to_reg32(Dst),{imm32,Imm}}; - true -> - {temp_to_rm64(Dst),{imm32,Imm}} - end. --else. -imm_move_args(Dst, Imm) -> - {temp_to_reg32(Dst),{imm32,Imm}}. --endif. - --ifdef(HIPE_AMD64). -translate_move64(I, Context) -> - Arg = resolve_move64_args(hipe_x86:move64_src(I), - hipe_x86:move64_dst(I), - Context), - [{mov, Arg, I}]. - -%% mov reg,imm64 -resolve_move64_args(Src=#x86_imm{}, Dst=#x86_temp{}, Context) -> - {_,Imm} = translate_imm(Src, Context, false), - {temp_to_reg64(Dst),{imm64,Imm}}. --else. -translate_move64(I, _Context) -> exit({?MODULE, I}). --endif. - -%%% mov{s,z}x -resolve_movx_src(Src=#x86_mem{type=Type}) -> - case Type of - byte -> - mem_to_rm8(Src); - int16 -> - mem_to_rm16(Src); - int32 -> - mem_to_rm32(Src) - end. - -%%% alu/cmp (_not_ test) -resolve_alu_args(Src, Dst, Context) -> - case {Src,Dst} of - {#x86_imm{}, #x86_mem{}} -> - {mem_to_rmArch(Dst), translate_imm(Src, Context, true)}; - {#x86_mem{}, #x86_temp{}} -> - {temp_to_regArch(Dst), mem_to_rmArch(Src)}; - {#x86_temp{}, #x86_mem{}} -> - {mem_to_rmArch(Dst), temp_to_regArch(Src)}; - {#x86_temp{}, #x86_temp{}} -> - {temp_to_regArch(Dst), temp_to_rmArch(Src)}; - {#x86_imm{}, #x86_temp{reg=0}} -> % eax,imm - NewSrc = translate_imm(Src, Context, true), - NewDst = - case NewSrc of - {imm8,_} -> temp_to_rmArch(Dst); - {imm32,_} -> ?EAX - end, - {NewDst, NewSrc}; - {#x86_imm{}, #x86_temp{}} -> - {temp_to_rmArch(Dst), translate_imm(Src, Context, true)} - end. - -%%% test -resolve_test_args(Src, Dst, Context) -> - case Src of - %% Since we're using an 8-bit instruction, the immediate is not sign - %% extended. Thus, we can use immediates up to 255. - #x86_imm{value=ImmVal} - when is_integer(ImmVal), ImmVal >= 0, ImmVal =< 255 -> - Imm = {imm8, ImmVal}, - case Dst of - #x86_temp{reg=0} -> {al, Imm}; - #x86_temp{} -> resolve_test_imm8_reg(Imm, Dst); - #x86_mem{} -> {mem_to_rm8(Dst), Imm} - end; - #x86_imm{value=ImmVal} when is_integer(ImmVal), ImmVal >= 0 -> - {case Dst of - #x86_temp{reg=0} -> eax; - #x86_temp{} -> temp_to_rm32(Dst); - #x86_mem{} -> mem_to_rm32(Dst) - end, {imm32, ImmVal}}; - #x86_imm{} -> % Negative ImmVal; use word-sized instr, imm32 - {_, ImmVal} = translate_imm(Src, Context, false), - {case Dst of - #x86_temp{reg=0} -> ?EAX; - #x86_temp{} -> temp_to_rmArch(Dst); - #x86_mem{} -> mem_to_rmArch(Dst) - end, {imm32, ImmVal}}; - #x86_temp{} -> - NewDst = - case Dst of - #x86_temp{} -> temp_to_rmArch(Dst); - #x86_mem{} -> mem_to_rmArch(Dst) - end, - {NewDst, temp_to_regArch(Src)} - end. - --ifdef(HIPE_AMD64). -resolve_test_imm8_reg(Imm, Dst) -> {temp_to_rm8(Dst), Imm}. --else. -resolve_test_imm8_reg(Imm = {imm8, ImmVal}, Dst = #x86_temp{reg=Reg}) -> - case ?HIPE_X86_ENCODE:reg_has_8bit(Reg) of - true -> {temp_to_rm8(Dst), Imm}; - false -> - %% Register does not exist in 8-bit version; use 16-bit instead - {temp_to_rm16(Dst), {imm16, ImmVal}} - end. --endif. - -%%% shifts -resolve_shift_args(Src, Dst, Context) -> - RM32 = - case Dst of - #x86_temp{} -> temp_to_rmArch(Dst); - #x86_mem{} -> mem_to_rmArch(Dst) - end, - Count = - case Src of - #x86_imm{value=1} -> 1; - #x86_imm{} -> translate_imm(Src, Context, true); % must be imm8 - #x86_temp{reg=1} -> cl % temp must be ecx - end, - {RM32, Count}. - -%% x87_binop mem -resolve_x87_unop_arg(Arg=#x86_mem{type=Type})-> - case Type of - 'double' -> {mem_to_rm64fp(Arg)}; - 'untagged' -> {mem_to_rmArch(Arg)}; - _ -> ?EXIT({fmovArgNotSupported,{Arg}}) - end; -resolve_x87_unop_arg(Arg=#x86_fpreg{}) -> - {fpreg_to_stack(Arg)}; -resolve_x87_unop_arg([]) -> - []. - -%% x87_binop mem, st(i) -resolve_x87_binop_args(Src=#x86_fpreg{}, Dst=#x86_mem{})-> - {mem_to_rm64fp(Dst),fpreg_to_stack(Src)}; -%% x87_binop st(0), st(i) -resolve_x87_binop_args(Src=#x86_fpreg{}, Dst=#x86_fpreg{})-> - {fpreg_to_stack(Dst),fpreg_to_stack(Src)}. - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%% -%%% Assembly listing support (pp_asm option). -%%% - -print(String, Arglist, Options) -> - ?when_option(pp_asm, Options, io:format(String, Arglist)). - -print_insn(Address, Bytes, I, Options) -> - ?when_option(pp_asm, Options, print_insn_2(Address, Bytes, I)), - ?when_option(pp_cxmon, Options, print_code_list_2(Bytes)). - -print_code_list_2([H | Tail]) -> - print_byte(H), - io:format(","), - print_code_list_2(Tail); -print_code_list_2([]) -> - io:format(""). - -print_insn_2(Address, Bytes, {_,_,OrigI}) -> - io:format("~8.16b | ", [Address]), - print_code_list(Bytes, 0), - ?HIPE_X86_PP:pp_insn(OrigI). - -print_code_list([Byte|Rest], Len) -> - print_byte(Byte), - print_code_list(Rest, Len+1); -print_code_list([], Len) -> - fill_spaces(24-(Len*2)), - io:format(" | "). - -print_byte(Byte) -> - io:format("~2.16.0b", [Byte band 16#FF]). - -fill_spaces(N) when N > 0 -> - io:format(" "), - fill_spaces(N-1); -fill_spaces(0) -> - []. diff --git a/lib/hipe/x86/hipe_x86_cfg.erl b/lib/hipe/x86/hipe_x86_cfg.erl deleted file mode 100644 index 0a3c0fc9d6..0000000000 --- a/lib/hipe/x86/hipe_x86_cfg.erl +++ /dev/null @@ -1,162 +0,0 @@ -%% -*- erlang-indent-level: 2 -*- -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. - --module(hipe_x86_cfg). - --export([init/1, - labels/1, start_label/1, - succ/2, pred/2, - bb/2, bb_add/3, map_bbs/2, fold_bbs/3]). --export([postorder/1, reverse_postorder/1]). --export([linearise/1, params/1, arity/1, redirect_jmp/3, branch_preds/1]). - -%%% these tell cfg.inc what to define (ugly as hell) --define(PRED_NEEDED,true). --define(BREADTH_ORDER,true). --define(PARAMS_NEEDED,true). --define(START_LABEL_UPDATE_NEEDED,true). --define(MAP_FOLD_NEEDED,true). - --include("hipe_x86.hrl"). --include("../flow/cfg.hrl"). --include("../flow/cfg.inc"). - -init(Defun) -> - %% XXX: this assumes that the code starts with a label insn. - %% Is that guaranteed? - Code = hipe_x86:defun_code(Defun), - StartLab = hipe_x86:label_label(hd(Code)), - Data = hipe_x86:defun_data(Defun), - IsClosure = hipe_x86:defun_is_closure(Defun), - MFA = hipe_x86:defun_mfa(Defun), - IsLeaf = hipe_x86:defun_is_leaf(Defun), - Formals = hipe_x86:defun_formals(Defun), - CFG0 = mk_empty_cfg(MFA, StartLab, Data, IsClosure, IsLeaf, Formals), - take_bbs(Code, CFG0). - -is_branch(I) -> - case I of - #jmp_fun{} -> true; - #jmp_label{} -> true; - #jmp_switch{} -> true; - #pseudo_call{} -> true; - #pseudo_jcc{} -> true; - #pseudo_tailcall{} -> true; - #ret{} -> true; - _ -> false - end. - -branch_successors(Branch) -> - case Branch of - #jmp_fun{} -> []; - #jmp_label{label=Label} -> [Label]; - #jmp_switch{labels=Labels} -> Labels; - #pseudo_call{contlab=ContLab, sdesc=#x86_sdesc{exnlab=ExnLab}} -> - case ExnLab of - [] -> [ContLab]; - _ -> [ContLab,ExnLab] - end; - #pseudo_jcc{true_label=TrueLab,false_label=FalseLab} -> [FalseLab,TrueLab]; - #pseudo_tailcall{} -> []; - #ret{} -> [] - end. - -branch_preds(Branch) -> - case Branch of - #jmp_switch{labels=Labels} -> - Prob = 1.0/length(Labels), - [{L, Prob} || L <- Labels]; - #pseudo_call{contlab=ContLab, sdesc=#x86_sdesc{exnlab=[]}} -> - %% A function can still cause an exception, even if we won't catch it - [{ContLab, 1.0-hipe_bb_weights:call_exn_pred()}]; - #pseudo_call{contlab=ContLab, sdesc=#x86_sdesc{exnlab=ExnLab}} -> - CallExnPred = hipe_bb_weights:call_exn_pred(), - [{ContLab, 1.0-CallExnPred}, {ExnLab, CallExnPred}]; - #pseudo_jcc{true_label=TrueLab,false_label=FalseLab,pred=Pred} -> - [{FalseLab, 1.0-Pred}, {TrueLab, Pred}]; - _ -> - case branch_successors(Branch) of - [] -> []; - [Single] -> [{Single, 1.0}] - end - end. - --ifdef(REMOVE_TRIVIAL_BBS_NEEDED). -fails_to(_Instr) -> []. --endif. - -redirect_jmp(I, Old, New) -> - case I of - #jmp_label{label=Label} -> - if Old =:= Label -> I#jmp_label{label=New}; - true -> I - end; - #pseudo_jcc{true_label=TrueLab, false_label=FalseLab} -> - J0 = if Old =:= TrueLab -> I#pseudo_jcc{true_label=New}; - true -> I - end, - if Old =:= FalseLab -> J0#pseudo_jcc{false_label=New}; - true -> J0 - end; - %% handle pseudo_call too? - _ -> I - end. - -%%% XXX: fix if labels can occur in operands -%% redirect_ops(_Labels, CFG, _Map) -> -%% CFG. - -mk_goto(Label) -> - hipe_x86:mk_jmp_label(Label). - -is_label(I) -> - case I of #label{} -> true; _ -> false end. - -label_name(Label) -> - hipe_x86:label_label(Label). - -mk_label(Name) -> - hipe_x86:mk_label(Name). - -%% is_comment(I) -> -%% hipe_x86:is_comment(I). -%% -%% is_goto(I) -> -%% hipe_x86:is_jmp_label(I). - -linearise(CFG) -> % -> defun, not insn list - MFA = function(CFG), - Formals = params(CFG), - Code = linearize_cfg(CFG), - Data = data(CFG), - VarRange = hipe_gensym:var_range(x86), - LabelRange = hipe_gensym:label_range(x86), - IsClosure = is_closure(CFG), - IsLeaf = is_leaf(CFG), - hipe_x86:mk_defun(MFA, Formals, IsClosure, IsLeaf, - Code, Data, VarRange, LabelRange). - -arity(CFG) -> - {_M,_F,A} = function(CFG), - A. - -%% init_gensym(CFG) -> -%% HighestVar = find_highest_var(CFG), -%% HighestLabel = find_highest_label(CFG), -%% hipe_gensym:init(), -%% hipe_gensym:set_var(x86, HighestVar), -%% hipe_gensym:set_label(x86, HighestLabel). -%% -%% highest_var(Code) -> -%% hipe_x86:highest_temp(Code). diff --git a/lib/hipe/x86/hipe_x86_defuse.erl b/lib/hipe/x86/hipe_x86_defuse.erl deleted file mode 100644 index 2731836dc1..0000000000 --- a/lib/hipe/x86/hipe_x86_defuse.erl +++ /dev/null @@ -1,170 +0,0 @@ -%%% -*- erlang-indent-level: 2 -*- -%%% -%%% Licensed under the Apache License, Version 2.0 (the "License"); -%%% you may not use this file except in compliance with the License. -%%% You may obtain a copy of the License at -%%% -%%% http://www.apache.org/licenses/LICENSE-2.0 -%%% -%%% Unless required by applicable law or agreed to in writing, software -%%% distributed under the License is distributed on an "AS IS" BASIS, -%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%%% See the License for the specific language governing permissions and -%%% limitations under the License. -%%% -%%% compute def/use sets for x86 insns -%%% -%%% TODO: -%%% - represent EFLAGS (condition codes) use/def by a virtual reg? -%%% - should push use/def %esp? - --ifdef(HIPE_AMD64). --define(HIPE_X86_DEFUSE, hipe_amd64_defuse). --define(HIPE_X86_REGISTERS, hipe_amd64_registers). --define(RV, rax). --else. --define(HIPE_X86_DEFUSE, hipe_x86_defuse). --define(HIPE_X86_REGISTERS, hipe_x86_registers). --define(RV, eax). --endif. - --module(?HIPE_X86_DEFUSE). --export([insn_def/1, insn_defs_all/1, insn_use/1]). %% src_use/1]). --include("../x86/hipe_x86.hrl"). - -%%% -%%% insn_def(Insn) -- Return set of temps defined by an instruction. -%%% - -insn_def(I) -> - case I of - #alu{dst=Dst} -> dst_def(Dst); - #cmovcc{dst=Dst} -> dst_def(Dst); - #fmove{dst=Dst} -> dst_def(Dst); - #fp_binop{dst=Dst} -> dst_def(Dst); - #fp_unop{arg=Arg} -> dst_def(Arg); - #imul{temp=Temp} -> [Temp]; - #lea{temp=Temp} -> [Temp]; - #move{dst=Dst} -> dst_def(Dst); - #move64{dst=Dst} -> dst_def(Dst); - #movsx{dst=Dst} -> dst_def(Dst); - #movzx{dst=Dst} -> dst_def(Dst); - #pseudo_call{} -> call_clobbered(); - #pseudo_spill{} -> []; - #pseudo_spill_fmove{temp=Temp, dst=Dst} -> [Temp, Dst]; - #pseudo_spill_move{temp=Temp, dst=Dst} -> [Temp, Dst]; - #pseudo_tailcall_prepare{} -> tailcall_clobbered(); - #shift{dst=Dst} -> dst_def(Dst); - %% call, cmp, comment, jcc, jmp_fun, jmp_label, jmp_switch, label - %% pseudo_jcc, pseudo_tailcall, push, ret, test - _ -> [] - end. - - -%% @doc Answers whether instruction I defines all allocatable registers. Used by -%% hipe_regalloc_prepass. --spec insn_defs_all(_) -> boolean(). -insn_defs_all(I) -> - case I of - #pseudo_call{} -> true; - _ -> false - end. - -dst_def(Dst) -> - case Dst of - #x86_temp{} -> [Dst]; - #x86_fpreg{} -> [Dst]; - _ -> [] - end. - -call_clobbered() -> - [hipe_x86:mk_temp(R, T) - || {R,T} <- ?HIPE_X86_REGISTERS:call_clobbered()]. - -tailcall_clobbered() -> - [hipe_x86:mk_temp(R, T) - || {R,T} <- ?HIPE_X86_REGISTERS:tailcall_clobbered()]. - -%%% -%%% insn_use(Insn) -- Return set of temps used by an instruction. -%%% - -insn_use(I) -> - case I of - #alu{src=Src,dst=Dst} -> addtemp(Src, addtemp(Dst, [])); - #call{'fun'=Fun} -> addtemp(Fun, []); - #cmovcc{src=Src, dst=Dst} -> addtemp(Src, dst_use(Dst)); - #cmp{src=Src, dst=Dst} -> addtemp(Src, addtemp(Dst, [])); - #fmove{src=Src,dst=Dst} -> addtemp(Src, dst_use(Dst)); - #fp_unop{arg=Arg} -> addtemp(Arg, []); - #fp_binop{src=Src,dst=Dst} -> addtemp(Src, addtemp(Dst, [])); - #imul{imm_opt=ImmOpt,src=Src,temp=Temp} -> - addtemp(Src, case ImmOpt of [] -> addtemp(Temp, []); _ -> [] end); - #jmp_fun{'fun'=Fun} -> addtemp(Fun, []); - #jmp_switch{temp=Temp, jtab=JTab} -> addtemp(Temp, addtemp(JTab, [])); - #lea{mem=Mem} -> addtemp(Mem, []); - #move{src=Src,dst=Dst} -> addtemp(Src, dst_use(Dst)); - #move64{} -> []; - #movsx{src=Src,dst=Dst} -> addtemp(Src, dst_use(Dst)); - #movzx{src=Src,dst=Dst} -> addtemp(Src, dst_use(Dst)); - #pseudo_call{'fun'=Fun,sdesc=#x86_sdesc{arity=Arity}} -> - addtemp(Fun, arity_use(Arity)); - #pseudo_spill{args=Args} -> Args; - #pseudo_spill_fmove{src=Src} -> [Src]; - #pseudo_spill_move{src=Src} -> [Src]; - #pseudo_tailcall{'fun'=Fun,arity=Arity,stkargs=StkArgs} -> - addtemp(Fun, addtemps(StkArgs, addtemps(tailcall_clobbered(), - arity_use(Arity)))); - #push{src=Src} -> addtemp(Src, []); - #ret{} -> [hipe_x86:mk_temp(?HIPE_X86_REGISTERS:?RV(), 'tagged')]; - #shift{src=Src,dst=Dst} -> addtemp(Src, addtemp(Dst, [])); - #test{src=Src, dst=Dst} -> addtemp(Src, addtemp(Dst, [])); - %% comment, jcc, jmp_label, label, pseudo_jcc, pseudo_tailcall_prepare - _ -> [] - end. - -arity_use(Arity) -> - [hipe_x86:mk_temp(R, 'tagged') - || R <- ?HIPE_X86_REGISTERS:args(Arity)]. - -dst_use(Dst) -> - case Dst of - #x86_mem{base=Base,off=Off} -> addbase(Base, addtemp(Off, [])); - _ -> [] - end. - -%%% -%%% src_use(Src) -- Return set of temps used by a source operand. -%%% - -%% src_use(Src) -> -%% addtemp(Src, []). - -%%% -%%% Auxiliary operations on sets of temps -%%% - -addtemps([Arg|Args], Set) -> - addtemps(Args, addtemp(Arg, Set)); -addtemps([], Set) -> - Set. - -addtemp(Arg, Set) -> - case Arg of - #x86_temp{} -> add(Arg, Set); - #x86_mem{base=Base,off=Off} -> addtemp(Off, addbase(Base, Set)); - #x86_fpreg{} -> add(Arg, Set); - _ -> Set - end. - -addbase(Base, Set) -> - case Base of - [] -> Set; - _ -> addtemp(Base, Set) - end. - -add(Arg, Set) -> - case lists:member(Arg, Set) of - false -> [Arg|Set]; - _ -> Set - end. diff --git a/lib/hipe/x86/hipe_x86_encode.erl b/lib/hipe/x86/hipe_x86_encode.erl deleted file mode 100644 index 2662f76d0b..0000000000 --- a/lib/hipe/x86/hipe_x86_encode.erl +++ /dev/null @@ -1,1319 +0,0 @@ -%%% Licensed under the Apache License, Version 2.0 (the "License"); -%%% you may not use this file except in compliance with the License. -%%% You may obtain a copy of the License at -%%% -%%% http://www.apache.org/licenses/LICENSE-2.0 -%%% -%%% Unless required by applicable law or agreed to in writing, software -%%% distributed under the License is distributed on an "AS IS" BASIS, -%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%%% See the License for the specific language governing permissions and -%%% limitations under the License. -%%% -%%% Copyright (C) 2000-2005 Mikael Pettersson -%%% -%%% This is the syntax of x86 r/m operands: -%%% -%%% opnd ::= reg mod == 11 -%%% | MEM[ea] mod != 11 -%%% -%%% ea ::= disp32(reg) mod == 10, r/m != ESP -%%% | disp32 sib12 mod == 10, r/m == 100 -%%% | disp8(reg) mod == 01, r/m != ESP -%%% | disp8 sib12 mod == 01, r/m == 100 -%%% | (reg) mod == 00, r/m != ESP and EBP -%%% | sib0 mod == 00, r/m == 100 -%%% | disp32 mod == 00, r/m == 101 [on x86-32] -%%% | disp32(%rip) mod == 00, r/m == 101 [on x86-64] -%%% -%%% // sib0: mod == 00 -%%% sib0 ::= disp32(,index,scale) base == EBP, index != ESP -%%% | disp32 base == EBP, index == 100 -%%% | (base,index,scale) base != EBP, index != ESP -%%% | (base) base != EBP, index == 100 -%%% -%%% // sib12: mod == 01 or 10 -%%% sib12 ::= (base,index,scale) index != ESP -%%% | (base) index == 100 -%%% -%%% scale ::= 00 | 01 | 10 | 11 index << scale -%%% -%%% Notes: -%%% -%%% 1. ESP cannot be used as index register. -%%% 2. Use of ESP as base register requires a SIB byte. -%%% 3. disp(reg), when reg != ESP, can be represented without -%%% [r/m == reg] or with [r/m == 100, base == reg] a SIB byte. -%%% 4. disp32 can be represented without [mod == 00, r/m == 101] -%%% or with [mod == 00, r/m == 100, base == 101, index == 100] -%%% a SIB byte. -%%% 5. x86-32 and x86-64 interpret mod==00b r/m==101b EAs differently: -%%% on x86-32 the disp32 is an absolute address, but on x86-64 the -%%% disp32 is relative to the %rip of the next instruction. -%%% Absolute disp32s need a SIB on x86-64. - --module(hipe_x86_encode). - --export([% condition codes - cc/1, - % 8-bit registers - %% al/0, cl/0, dl/0, bl/0, ah/0, ch/0, dh/0, bh/0, - reg_has_8bit/1, - % 32-bit registers - %% eax/0, ecx/0, edx/0, ebx/0, esp/0, ebp/0, esi/0, edi/0, - % operands - sindex/2, sib/1, sib/2, - ea_disp32_base/2, ea_disp32_sib/2, - ea_disp8_base/2, ea_disp8_sib/2, - ea_base/1, - %% ea_disp32_sindex/1, % XXX: do not use on x86-32, only on x86-64 - ea_disp32_sindex/2, - ea_sib/1, ea_disp32/1, - rm_reg/1, rm_mem/1, - % instructions - insn_encode/3, insn_sizeof/2]). - -%%-define(DO_HIPE_X86_ENCODE_TEST,true). --ifdef(DO_HIPE_X86_ENCODE_TEST). --export([dotest/0, dotest/1]). % for testing, don't use --endif. - --define(ASSERT(F,G), if G -> [] ; true -> exit({?MODULE,F}) end). -%-define(ASSERT(F,G), []). - -%%% condition codes - --define(CC_O, 2#0000). % overflow --define(CC_NO, 2#0001). % no overflow --define(CC_B, 2#0010). % below, <u --define(CC_AE, 2#0011). % above or equal, >=u --define(CC_E, 2#0100). % equal --define(CC_NE, 2#0101). % not equal --define(CC_BE, 2#0110). % below or equal, <=u --define(CC_A, 2#0111). % above, >u --define(CC_S, 2#1000). % sign, + --define(CC_NS, 2#1001). % not sign, - --define(CC_PE, 2#1010). % parity even --define(CC_PO, 2#1011). % parity odd --define(CC_L, 2#1100). % less than, <s --define(CC_GE, 2#1101). % greater or equal, >=s --define(CC_LE, 2#1110). % less or equal, <=s --define(CC_G, 2#1111). % greater than, >s - -cc(o) -> ?CC_O; -cc(no) -> ?CC_NO; -cc(b) -> ?CC_B; -cc(ae) -> ?CC_AE; -cc(e) -> ?CC_E; -cc(ne) -> ?CC_NE; -cc(be) -> ?CC_BE; -cc(a) -> ?CC_A; -cc(s) -> ?CC_S; -cc(ns) -> ?CC_NS; -cc(pe) -> ?CC_PE; -cc(po) -> ?CC_PO; -cc(l) -> ?CC_L; -cc(ge) -> ?CC_GE; -cc(le) -> ?CC_LE; -cc(g) -> ?CC_G. - -%%% 8-bit registers - --define(AL, 2#000). --define(CL, 2#001). --define(DL, 2#010). --define(BL, 2#011). --define(AH, 2#100). --define(CH, 2#101). --define(DH, 2#110). --define(BH, 2#111). - -%% al() -> ?AL. -%% cl() -> ?CL. -%% dl() -> ?DL. -%% bl() -> ?BL. -%% ah() -> ?AH. -%% ch() -> ?CH. -%% dh() -> ?DH. -%% bh() -> ?BH. - -reg_has_8bit(Reg) -> Reg =< ?BL. - -%%% 32-bit registers - --define(EAX, 2#000). --define(ECX, 2#001). --define(EDX, 2#010). --define(EBX, 2#011). --define(ESP, 2#100). --define(EBP, 2#101). --define(ESI, 2#110). --define(EDI, 2#111). - -%% eax() -> ?EAX. -%% ecx() -> ?ECX. -%% edx() -> ?EDX. -%% ebx() -> ?EBX. -%% esp() -> ?ESP. -%% ebp() -> ?EBP. -%% esi() -> ?ESI. -%% edi() -> ?EDI. - -%%% r/m operands - -sindex(Scale, Index) when is_integer(Scale), is_integer(Index) -> - ?ASSERT(sindex, Scale >= 0), - ?ASSERT(sindex, Scale =< 3), - ?ASSERT(sindex, Index =/= ?ESP), - {sindex, Scale, Index}. - --record(sib, {sindex_opt, base :: integer()}). -sib(Base) when is_integer(Base) -> #sib{sindex_opt=none, base=Base}. -sib(Base, Sindex) when is_integer(Base) -> #sib{sindex_opt=Sindex, base=Base}. - -ea_disp32_base(Disp32, Base) when is_integer(Base) -> - ?ASSERT(ea_disp32_base, Base =/= ?ESP), - {ea_disp32_base, Disp32, Base}. -ea_disp32_sib(Disp32, SIB) -> {ea_disp32_sib, Disp32, SIB}. -ea_disp8_base(Disp8, Base) when is_integer(Base) -> - ?ASSERT(ea_disp8_base, Base =/= ?ESP), - {ea_disp8_base, Disp8, Base}. -ea_disp8_sib(Disp8, SIB) -> {ea_disp8_sib, Disp8, SIB}. -ea_base(Base) when is_integer(Base) -> - ?ASSERT(ea_base, Base =/= ?ESP), - ?ASSERT(ea_base, Base =/= ?EBP), - {ea_base, Base}. -%% ea_disp32_sindex(Disp32) -> {ea_disp32_sindex, Disp32, none}. -ea_disp32_sindex(Disp32, Sindex) -> {ea_disp32_sindex, Disp32, Sindex}. -ea_sib(SIB) -> - ?ASSERT(ea_sib, SIB#sib.base =/= ?EBP), - {ea_sib, SIB}. -ea_disp32(Disp32) -> {ea_disp32, Disp32}. - -rm_reg(Reg) -> {rm_reg, Reg}. -rm_mem(EA) -> {rm_mem, EA}. - -mk_modrm(Mod, RO, RM) -> - (Mod bsl 6) bor (RO bsl 3) bor RM. - -mk_sib(Scale, Index, Base) -> - (Scale bsl 6) bor (Index bsl 3) bor Base. - -le16(Word, Tail) -> - [Word band 16#FF, (Word bsr 8) band 16#FF | Tail]. - -le32(Word, Tail) when is_integer(Word) -> - [Word band 16#FF, (Word bsr 8) band 16#FF, - (Word bsr 16) band 16#FF, (Word bsr 24) band 16#FF | Tail]; -le32({Tag,Val}, Tail) -> % a relocatable datum - [{le32,Tag,Val} | Tail]. - -enc_sindex_opt({sindex,Scale,Index}) -> {Scale, Index}; -enc_sindex_opt(none) -> {2#00, 2#100}. - -enc_sib(#sib{sindex_opt=SindexOpt, base=Base}) -> - {Scale, Index} = enc_sindex_opt(SindexOpt), - mk_sib(Scale, Index, Base). - -enc_ea(EA, RO, Tail) -> - case EA of - {ea_disp32_base, Disp32, Base} -> - [mk_modrm(2#10, RO, Base) | le32(Disp32, Tail)]; - {ea_disp32_sib, Disp32, SIB} -> - [mk_modrm(2#10, RO, 2#100), enc_sib(SIB) | le32(Disp32, Tail)]; - {ea_disp8_base, Disp8, Base} -> - [mk_modrm(2#01, RO, Base), Disp8 | Tail]; - {ea_disp8_sib, Disp8, SIB} -> - [mk_modrm(2#01, RO, 2#100), enc_sib(SIB), Disp8 | Tail]; - {ea_base, Base} -> - [mk_modrm(2#00, RO, Base) | Tail]; - {ea_disp32_sindex, Disp32, SindexOpt} -> - {Scale, Index} = enc_sindex_opt(SindexOpt), - SIB = mk_sib(Scale, Index, 2#101), - MODRM = mk_modrm(2#00, RO, 2#100), - [MODRM, SIB | le32(Disp32, Tail)]; - {ea_sib, SIB} -> - [mk_modrm(2#00, RO, 2#100), enc_sib(SIB) | Tail]; - {ea_disp32, Disp32} -> - [mk_modrm(2#00, RO, 2#101) | le32(Disp32, Tail)] - end. - -encode_rm(RM, RO, Tail) -> - case RM of - {rm_reg, Reg} -> [mk_modrm(2#11, RO, Reg) | Tail]; - {rm_mem, EA} -> enc_ea(EA, RO, Tail) - end. - -sizeof_ea(EA) -> - case element(1, EA) of - ea_disp32_base -> 5; - ea_disp32_sib -> 6; - ea_disp8_base -> 2; - ea_disp8_sib -> 3; - ea_base -> 1; - ea_disp32_sindex -> 6; - ea_sib -> 2; - ea_disp32 -> 5 - end. - -sizeof_rm(RM) -> - case RM of - {rm_reg, _} -> 1; - {rm_mem, EA} -> sizeof_ea(EA) - end. - -%%% Floating point stack positions - --define(ST0, 2#000). --define(ST1, 2#001). --define(ST2, 2#010). --define(ST3, 2#011). --define(ST4, 2#100). --define(ST5, 2#101). --define(ST6, 2#110). --define(ST7, 2#111). - -st(0) -> ?ST0; -st(1) -> ?ST1; -st(2) -> ?ST2; -st(3) -> ?ST3; -st(4) -> ?ST4; -st(5) -> ?ST5; -st(6) -> ?ST6; -st(7) -> ?ST7. - - -%%% Instructions -%%% -%%% Insn ::= {Op,Opnds} -%%% Opnds ::= {Opnd1,...,Opndn} (n >= 0) -%%% Opnd ::= eax | ax | al | 1 | cl -%%% | {imm32,Imm32} | {imm16,Imm16} | {imm8,Imm8} -%%% | {rm32,RM32} | {rm16,RM16} | {rm8,RM8} -%%% | {rel32,Rel32} | {rel8,Rel8} -%%% | {moffs32,Moffs32} | {moffs16,Moffs16} | {moffs8,Moffs8} -%%% | {cc,CC} -%%% | {reg32,Reg32} | {reg16,Reg16} | {reg8,Reg8} -%%% | {ea,EA} - --define(PFX_OPND, 16#66). - -arith_binop_encode(SubOpcode, Opnds) -> - %% add, or, adc, sbb, and, sub, xor, cmp - case Opnds of - {eax, {imm32,Imm32}} -> - [16#05 bor (SubOpcode bsl 3) | le32(Imm32, [])]; - {{rm32,RM32}, {imm32,Imm32}} -> - [16#81 | encode_rm(RM32, SubOpcode, le32(Imm32, []))]; - {{rm32,RM32}, {imm8,Imm8}} -> - [16#83 | encode_rm(RM32, SubOpcode, [Imm8])]; - {{rm32,RM32}, {reg32,Reg32}} -> - [16#01 bor (SubOpcode bsl 3) | encode_rm(RM32, Reg32, [])]; - {{reg32,Reg32}, {rm32,RM32}} -> - [16#03 bor (SubOpcode bsl 3) | encode_rm(RM32, Reg32, [])] - end. - -arith_binop_sizeof(Opnds) -> - %% add, or, adc, sbb, and, sub, xor, cmp - case Opnds of - {eax, {imm32,_}} -> - 1 + 4; - {{rm32,RM32}, {imm32,_}} -> - 1 + sizeof_rm(RM32) + 4; - {{rm32,RM32}, {imm8,_}} -> - 1 + sizeof_rm(RM32) + 1; - {{rm32,RM32}, {reg32,_}} -> - 1 + sizeof_rm(RM32); - {{reg32,_}, {rm32,RM32}} -> - 1 + sizeof_rm(RM32) - end. - -bs_op_encode(Opcode, {{reg32,Reg32}, {rm32,RM32}}) -> % bsf, bsr - [16#0F, Opcode | encode_rm(RM32, Reg32, [])]. - -bs_op_sizeof({{reg32,_}, {rm32,RM32}}) -> % bsf, bsr - 2 + sizeof_rm(RM32). - -bswap_encode({{reg32,Reg32}}) -> - [16#0F, 16#C8 bor Reg32]. - -bswap_sizeof({{reg32,_}}) -> - 2. - -bt_op_encode(SubOpcode, Opnds) -> % bt, btc, btr, bts - case Opnds of - {{rm32,RM32}, {reg32,Reg32}} -> - [16#0F, 16#A3 bor (SubOpcode bsl 3) | encode_rm(RM32, Reg32, [])]; - {{rm32,RM32}, {imm8,Imm8}} -> - [16#0F, 16#BA | encode_rm(RM32, SubOpcode, [Imm8])] - end. - -bt_op_sizeof(Opnds) -> % bt, btc, btr, bts - case Opnds of - {{rm32,RM32}, {reg32,_}} -> - 2 + sizeof_rm(RM32); - {{rm32,RM32}, {imm8,_}} -> - 2 + sizeof_rm(RM32) + 1 - end. - -call_encode(Opnds) -> - case Opnds of - {{rel32,Rel32}} -> - [16#E8 | le32(Rel32, [])]; - {{rm32,RM32}} -> - [16#FF | encode_rm(RM32, 2#010, [])] - end. - -call_sizeof(Opnds) -> - case Opnds of - {{rel32,_}} -> - 1 + 4; - {{rm32,RM32}} -> - 1 + sizeof_rm(RM32) - end. - -cbw_encode({}) -> - [?PFX_OPND, 16#98]. - -cbw_sizeof({}) -> - 2. - -nullary_op_encode(Opcode, {}) -> - %% cdq, clc, cld, cmc, cwde, into, leave, nop, prefix_fs, stc, std - [Opcode]. - -nullary_op_sizeof({}) -> - %% cdq, clc, cld, cmc, cwde, into, leave, nop, prefix_fs, stc, std - 1. - -cmovcc_encode({{cc,CC}, {reg32,Reg32}, {rm32,RM32}}) -> - [16#0F, 16#40 bor CC | encode_rm(RM32, Reg32, [])]. - -cmovcc_sizeof({{cc,_}, {reg32,_}, {rm32,RM32}}) -> - 2 + sizeof_rm(RM32). - -incdec_encode(SubOpcode, Opnds) -> % SubOpcode is either 0 or 1 - case Opnds of - {{rm32,RM32}} -> - [16#FF | encode_rm(RM32, SubOpcode, [])]; - {{reg32,Reg32}} -> - [16#40 bor (SubOpcode bsl 3) bor Reg32] - end. - -incdec_sizeof(Opnds) -> - case Opnds of - {{rm32,RM32}} -> - 1 + sizeof_rm(RM32); - {{reg32,_}} -> - 1 - end. - -arith_unop_encode(Opcode, {{rm32,RM32}}) -> % div, idiv, mul, neg, not - [16#F7 | encode_rm(RM32, Opcode, [])]. - -arith_unop_sizeof({{rm32,RM32}}) -> % div, idiv, mul, neg, not - 1 + sizeof_rm(RM32). - -enter_encode({{imm16,Imm16}, {imm8,Imm8}}) -> - [16#C8 | le16(Imm16, [Imm8])]. - -enter_sizeof({{imm16,_}, {imm8,_}}) -> - 1 + 2 + 1. - -imul_encode(Opnds) -> - case Opnds of - {{rm32,RM32}} -> % <edx,eax> *= rm32 - [16#F7 | encode_rm(RM32, 2#101, [])]; - {{reg32,Reg32}, {rm32,RM32}} -> % reg *= rm32 - [16#0F, 16#AF | encode_rm(RM32, Reg32, [])]; - {{reg32,Reg32}, {rm32,RM32}, {imm8,Imm8}} -> % reg := rm32 * sext(imm8) - [16#6B | encode_rm(RM32, Reg32, [Imm8])]; - {{reg32,Reg32}, {rm32,RM32}, {imm32,Imm32}} -> % reg := rm32 * imm32 - [16#69 | encode_rm(RM32, Reg32, le32(Imm32, []))] - end. - -imul_sizeof(Opnds) -> - case Opnds of - {{rm32,RM32}} -> - 1 + sizeof_rm(RM32); - {{reg32,_}, {rm32,RM32}} -> - 2 + sizeof_rm(RM32); - {{reg32,_}, {rm32,RM32}, {imm8,_}} -> - 1 + sizeof_rm(RM32) + 1; - {{reg32,_}, {rm32,RM32}, {imm32,_}} -> - 1 + sizeof_rm(RM32) + 4 - end. - -jcc_encode(Opnds) -> - case Opnds of - {{cc,CC}, {rel8,Rel8}} -> - [16#70 bor CC, Rel8]; - {{cc,CC}, {rel32,Rel32}} -> - [16#0F, 16#80 bor CC | le32(Rel32, [])] - end. - -jcc_sizeof(Opnds) -> - case Opnds of - {{cc,_}, {rel8,_}} -> - 2; - {{cc,_}, {rel32,_}} -> - 2 + 4 - end. - -jmp8_op_encode(Opcode, {{rel8,Rel8}}) -> % jecxz, loop, loope, loopne - [Opcode, Rel8]. - -jmp8_op_sizeof({{rel8,_}}) -> % jecxz, loop, loope, loopne - 2. - -jmp_encode(Opnds) -> - case Opnds of - {{rel8,Rel8}} -> - [16#EB, Rel8]; - {{rel32,Rel32}} -> - [16#E9 | le32(Rel32, [])]; - {{rm32,RM32}} -> - [16#FF | encode_rm(RM32, 2#100, [])] - end. - -jmp_sizeof(Opnds) -> - case Opnds of - {{rel8,_}} -> - 2; - {{rel32,_}} -> - 1 + 4; - {{rm32,RM32}} -> - 1 + sizeof_rm(RM32) - end. - -lea_encode({{reg32,Reg32}, {ea,EA}}) -> - [16#8D | enc_ea(EA, Reg32, [])]. - -lea_sizeof({{reg32,_}, {ea,EA}}) -> - 1 + sizeof_ea(EA). - -mov_encode(Opnds) -> - case Opnds of - {{rm8,RM8}, {reg8,Reg8}} -> - [16#88 | encode_rm(RM8, Reg8, [])]; - {{rm16,RM16}, {reg16,Reg16}} -> - [?PFX_OPND, 16#89 | encode_rm(RM16, Reg16, [])]; - {{rm32,RM32}, {reg32,Reg32}} -> - [16#89 | encode_rm(RM32, Reg32, [])]; - {{reg8,Reg8}, {rm8,RM8}} -> - [16#8A | encode_rm(RM8, Reg8, [])]; - {{reg16,Reg16}, {rm16,RM16}} -> - [?PFX_OPND, 16#8B | encode_rm(RM16, Reg16, [])]; - {{reg32,Reg32}, {rm32,RM32}} -> - [16#8B | encode_rm(RM32, Reg32, [])]; - {al, {moffs8,Moffs8}} -> - [16#A0 | le32(Moffs8, [])]; - {ax, {moffs16,Moffs16}} -> - [?PFX_OPND, 16#A1 | le32(Moffs16, [])]; - {eax, {moffs32,Moffs32}} -> - [16#A1 | le32(Moffs32, [])]; - {{moffs8,Moffs8}, al} -> - [16#A2 | le32(Moffs8, [])]; - {{moffs16,Moffs16}, ax} -> - [?PFX_OPND, 16#A3 | le32(Moffs16, [])]; - {{moffs32,Moffs32}, eax} -> - [16#A3 | le32(Moffs32, [])]; - {{reg8,Reg8}, {imm8,Imm8}} -> - [16#B0 bor Reg8, Imm8]; - {{reg16,Reg16}, {imm16,Imm16}} -> - [?PFX_OPND, 16#B8 bor Reg16 | le16(Imm16, [])]; - {{reg32,Reg32}, {imm32,Imm32}} -> - [16#B8 bor Reg32 | le32(Imm32, [])]; - {{rm8,RM8}, {imm8,Imm8}} -> - [16#C6 | encode_rm(RM8, 2#000, [Imm8])]; - {{rm16,RM16}, {imm16,Imm16}} -> - [?PFX_OPND, 16#C7 | encode_rm(RM16, 2#000, le16(Imm16, []))]; - {{rm32,RM32}, {imm32,Imm32}} -> - [16#C7 | encode_rm(RM32, 2#000, le32(Imm32, []))] - end. - -mov_sizeof(Opnds) -> - case Opnds of - {{rm8,RM8}, {reg8,_}} -> - 1 + sizeof_rm(RM8); - {{rm16,RM16}, {reg16,_}} -> - 2 + sizeof_rm(RM16); - {{rm32,RM32}, {reg32,_}} -> - 1 + sizeof_rm(RM32); - {{reg8,_}, {rm8,RM8}} -> - 1 + sizeof_rm(RM8); - {{reg16,_}, {rm16,RM16}} -> - 2 + sizeof_rm(RM16); - {{reg32,_}, {rm32,RM32}} -> - 1 + sizeof_rm(RM32); - {al, {moffs8,_}} -> - 1 + 4; - {ax, {moffs16,_}} -> - 2 + 4; - {eax, {moffs32,_}} -> - 1 + 4; - {{moffs8,_}, al} -> - 1 + 4; - {{moffs16,_}, ax} -> - 2 + 4; - {{moffs32,_}, eax} -> - 1 + 4; - {{reg8,_}, {imm8,_}} -> - 2; - {{reg16,_}, {imm16,_}} -> - 2 + 2; - {{reg32,_}, {imm32,_}} -> - 1 + 4; - {{rm8,RM8}, {imm8,_}} -> - 1 + sizeof_rm(RM8) + 1; - {{rm16,RM16}, {imm16,_}} -> - 2 + sizeof_rm(RM16) + 2; - {{rm32,RM32}, {imm32,_}} -> - 1 + sizeof_rm(RM32) + 4 - end. - -movx_op_encode(Opcode, Opnds) -> % movsx, movzx - case Opnds of - {{reg16,Reg16}, {rm8,RM8}} -> - [?PFX_OPND, 16#0F, Opcode | encode_rm(RM8, Reg16, [])]; - {{reg32,Reg32}, {rm8,RM8}} -> - [16#0F, Opcode | encode_rm(RM8, Reg32, [])]; - {{reg32,Reg32}, {rm16,RM16}} -> - [16#0F, Opcode bor 1 | encode_rm(RM16, Reg32, [])] - end. - -movx_op_sizeof(Opnds) -> - case Opnds of - {{reg16,_}, {rm8,RM8}} -> - 3 + sizeof_rm(RM8); - {{reg32,_}, {rm8,RM8}} -> - 2 + sizeof_rm(RM8); - {{reg32,_}, {rm16,RM16}} -> - 2 + sizeof_rm(RM16) - end. - -pop_encode(Opnds) -> - case Opnds of - {{rm32,RM32}} -> - [16#8F | encode_rm(RM32, 2#000, [])]; - {{reg32,Reg32}} -> - [16#58 bor Reg32] - end. - -pop_sizeof(Opnds) -> - case Opnds of - {{rm32,RM32}} -> - 1 + sizeof_rm(RM32); - {{reg32,_}} -> - 1 - end. - -push_encode(Opnds) -> - case Opnds of - {{rm32,RM32}} -> - [16#FF | encode_rm(RM32, 2#110, [])]; - {{reg32,Reg32}} -> - [16#50 bor Reg32]; - {{imm8,Imm8}} -> % sign-extended - [16#6A, Imm8]; - {{imm32,Imm32}} -> - [16#68 | le32(Imm32, [])] - end. - -push_sizeof(Opnds) -> - case Opnds of - {{rm32,RM32}} -> - 1 + sizeof_rm(RM32); - {{reg32,_}} -> - 1; - {{imm8,_}} -> - 2; - {{imm32,_}} -> - 1 + 4 - end. - -shift_op_encode(SubOpcode, Opnds) -> % rcl, rcr, rol, ror, sar, shl, shr - case Opnds of - {{rm32,RM32}, 1} -> - [16#D1 | encode_rm(RM32, SubOpcode, [])]; - {{rm32,RM32}, cl} -> - [16#D3 | encode_rm(RM32, SubOpcode, [])]; - {{rm32,RM32}, {imm8,Imm8}} -> - [16#C1 | encode_rm(RM32, SubOpcode, [Imm8])]; - {{rm16,RM16}, {imm8,Imm8}} -> - [?PFX_OPND, 16#C1 | encode_rm(RM16, SubOpcode, [Imm8])] - end. - -shift_op_sizeof(Opnds) -> % rcl, rcr, rol, ror, sar, shl, shr - case Opnds of - {{rm32,RM32}, 1} -> - 1 + sizeof_rm(RM32); - {{rm32,RM32}, cl} -> - 1 + sizeof_rm(RM32); - {{rm32,RM32}, {imm8,_Imm8}} -> - 1 + sizeof_rm(RM32) + 1; - {{rm16,RM16}, {imm8,_Imm8}} -> - 1 + 1 + sizeof_rm(RM16) + 1 - end. - -ret_encode(Opnds) -> - case Opnds of - {} -> - [16#C3]; - {{imm16,Imm16}} -> - [16#C2 | le16(Imm16, [])] - end. - -ret_sizeof(Opnds) -> - case Opnds of - {} -> - 1; - {{imm16,_}} -> - 1 + 2 - end. - -setcc_encode({{cc,CC}, {rm8,RM8}}) -> - [16#0F, 16#90 bor CC | encode_rm(RM8, 2#000, [])]. - -setcc_sizeof({{cc,_}, {rm8,RM8}}) -> - 2 + sizeof_rm(RM8). - -shd_op_encode(Opcode, Opnds) -> - case Opnds of - {{rm32,RM32}, {reg32,Reg32}, {imm8,Imm8}} -> - [16#0F, Opcode | encode_rm(RM32, Reg32, [Imm8])]; - {{rm32,RM32}, {reg32,Reg32}, cl} -> - [16#0F, Opcode bor 1 | encode_rm(RM32, Reg32, [])] - end. - -shd_op_sizeof(Opnds) -> - case Opnds of - {{rm32,RM32}, {reg32,_}, {imm8,_}} -> - 2 + sizeof_rm(RM32) + 1; - {{rm32,RM32}, {reg32,_}, cl} -> - 2 + sizeof_rm(RM32) - end. - -test_encode(Opnds) -> - case Opnds of - {al, {imm8,Imm8}} -> - [16#A8, Imm8]; - {ax, {imm16,Imm16}} -> - [?PFX_OPND, 16#A9 | le16(Imm16, [])]; - {eax, {imm32,Imm32}} -> - [16#A9 | le32(Imm32, [])]; - {{rm8,RM8}, {imm8,Imm8}} -> - [16#F6 | encode_rm(RM8, 2#000, [Imm8])]; - {{rm16,RM16}, {imm16,Imm16}} -> - [?PFX_OPND, 16#F7 | encode_rm(RM16, 2#000, le16(Imm16, []))]; - {{rm32,RM32}, {imm32,Imm32}} -> - [16#F7 | encode_rm(RM32, 2#000, le32(Imm32, []))]; - {{rm32,RM32}, {reg32,Reg32}} -> - [16#85 | encode_rm(RM32, Reg32, [])] - end. - -test_sizeof(Opnds) -> - case Opnds of - {al, {imm8,_}} -> - 1 + 1; - {ax, {imm16,_}} -> - 2 + 2; - {eax, {imm32,_}} -> - 1 + 4; - {{rm8,RM8}, {imm8,_}} -> - 1 + sizeof_rm(RM8) + 1; - {{rm16,RM16}, {imm16,_}} -> - 2 + sizeof_rm(RM16) + 2; - {{rm32,RM32}, {imm32,_}} -> - 1 + sizeof_rm(RM32) + 4; - {{rm32,RM32}, {reg32,_}} -> - 1 + sizeof_rm(RM32) - end. - -fild_encode(Opnds) -> - %% The operand cannot be a register! - {{rm32, RM32}} = Opnds, - [16#DB | encode_rm(RM32, 2#000, [])]. - -fild_sizeof(Opnds) -> - {{rm32, RM32}} = Opnds, - 1 + sizeof_rm(RM32). - -fld_encode(Opnds) -> - case Opnds of - {{rm64fp, RM64fp}} -> - [16#DD | encode_rm(RM64fp, 2#000, [])]; - {{fpst, St}} -> - [16#D9, 16#C0 bor st(St)] - end. - -fld_sizeof(Opnds) -> - case Opnds of - {{rm64fp, RM64fp}} -> - 1 + sizeof_rm(RM64fp); - {{fpst, _}} -> - 2 - end. - -fp_comm_arith_encode(OpCode, Opnds) -> - %% fadd, fmul - case Opnds of - {{rm64fp, RM64fp}} -> - [16#DC | encode_rm(RM64fp, OpCode, [])]; - {{fpst,0}, {fpst,St}} -> - [16#D8, (16#C0 bor (OpCode bsl 3)) bor st(St)]; - {{fpst,St}, {fpst,0}} -> - [16#DC, (16#C0 bor (OpCode bsl 3)) bor st(St)] - end. - -fp_comm_arith_pop_encode(OpCode, Opnds) -> - %% faddp, fmulp - case Opnds of - [] -> - [16#DE, 16#C0 bor (OpCode bsl 3) bor st(1)]; - {{fpst,St},{fpst,0}} -> - [16#DE, 16#C0 bor (OpCode bsl 3) bor st(St)] - end. - -fp_arith_encode(OpCode, Opnds) -> - %% fdiv, fsub - case Opnds of - {{rm64fp, RM64fp}} -> - [16#DC | encode_rm(RM64fp, OpCode, [])]; - {{fpst,0}, {fpst,St}} -> - OpCode0 = OpCode band 2#110, - [16#D8, 16#C0 bor (OpCode0 bsl 3) bor st(St)]; - {{fpst,St}, {fpst,0}} -> - OpCode0 = OpCode bor 1, - [16#DC, 16#C0 bor (OpCode0 bsl 3) bor st(St)] - end. - -fp_arith_pop_encode(OpCode, Opnds) -> - %% fdivp, fsubp - OpCode0 = OpCode bor 1, - case Opnds of - [] -> - [16#DE, 16#C8 bor (OpCode0 bsl 3) bor st(1)]; - {{fpst,St}, {fpst,0}} -> - [16#DE, 16#C8 bor (OpCode0 bsl 3) bor st(St)] - end. - -fp_arith_rev_encode(OpCode, Opnds) -> - %% fdivr, fsubr - case Opnds of - {{rm64fp, RM64fp}} -> - [16#DC | encode_rm(RM64fp, OpCode, [])]; - {{fpst,0}, {fpst,St}} -> - OpCode0 = OpCode bor 1, - [16#D8, 16#C0 bor (OpCode0 bsl 3) bor st(St)]; - {{fpst,St}, {fpst,0}} -> - OpCode0 = OpCode band 2#110, - [16#DC, 16#C0 bor (OpCode0 bsl 3) bor st(St)] - end. - -fp_arith_rev_pop_encode(OpCode, Opnds) -> - %% fdivrp, fsubrp - OpCode0 = OpCode band 2#110, - case Opnds of - [] -> - [16#DE, 16#C0 bor (OpCode0 bsl 3) bor st(1)]; - {{fpst,St}, {fpst, 0}} -> - [16#DE, 16#C0 bor (OpCode0 bsl 3) bor st(St)] - end. - -fp_arith_sizeof(Opnds) -> - case Opnds of - {{rm64fp, RM64fp}} -> - 1 + sizeof_rm(RM64fp); - {{fpst,0}, {fpst,_}} -> - 2; - {{fpst,_}, {fpst,0}} -> - 2 - end. - -fst_encode(OpCode, Opnds) -> - case Opnds of - {{rm64fp, RM64fp}} -> - [16#DD | encode_rm(RM64fp, OpCode, [])]; - {{fpst, St}} -> - [16#DD, 16#C0 bor (OpCode bsl 3) bor st(St)] - end. - -fst_sizeof(Opnds) -> - case Opnds of - {{rm64fp, RM64fp}} -> - 1 + sizeof_rm(RM64fp); - {{fpst, _}} -> - 2 - end. - -fchs_encode() -> - [16#D9, 16#E0]. -fchs_sizeof() -> - 2. - -ffree_encode({{fpst, St}})-> - [16#DD, 16#C0 bor st(St)]. -ffree_sizeof() -> - 2. - -fwait_encode() -> - [16#9B]. -fwait_sizeof() -> - 1. - -fxch_encode(Opnds) -> - case Opnds of - [] -> - [16#D9, 16#C8 bor st(1)]; - {{fpst, St}} -> - [16#D9, 16#C8 bor st(St)] - end. -fxch_sizeof() -> - 2. - -insn_encode(Op, Opnds, Offset) -> - Bytes = insn_encode_internal(Op, Opnds), - case has_relocs(Bytes) of - false -> % the common case - {Bytes, []}; - _ -> - fix_relocs(Bytes, Offset, [], []) - end. - -has_relocs([{le32,_,_}|_]) -> true; -has_relocs([_|Bytes]) -> has_relocs(Bytes); -has_relocs([]) -> false. - -fix_relocs([{le32,Tag,Val}|Bytes], Offset, Code, Relocs) -> - fix_relocs(Bytes, Offset+4, - [16#00, 16#00, 16#00, 16#00 | Code], - [{Tag,Offset,Val}|Relocs]); -fix_relocs([Byte|Bytes], Offset, Code, Relocs) -> - fix_relocs(Bytes, Offset+1, [Byte|Code], Relocs); -fix_relocs([], _Offset, Code, Relocs) -> - {lists:reverse(Code), lists:reverse(Relocs)}. - -insn_encode_internal(Op, Opnds) -> - case Op of - 'adc' -> arith_binop_encode(2#010, Opnds); - 'add' -> arith_binop_encode(2#000, Opnds); - 'and' -> arith_binop_encode(2#100, Opnds); - 'bsf' -> bs_op_encode(16#BC, Opnds); - 'bsr' -> bs_op_encode(16#BD, Opnds); - 'bswap' -> bswap_encode(Opnds); - 'bt' -> bt_op_encode(2#100, Opnds); - 'btc' -> bt_op_encode(2#111, Opnds); - 'btr' -> bt_op_encode(2#110, Opnds); - 'bts' -> bt_op_encode(2#101, Opnds); - 'call' -> call_encode(Opnds); - 'cbw' -> cbw_encode(Opnds); - 'cdq' -> nullary_op_encode(16#99, Opnds); - 'clc' -> nullary_op_encode(16#F8, Opnds); - 'cld' -> nullary_op_encode(16#FC, Opnds); - 'cmc' -> nullary_op_encode(16#F5, Opnds); - 'cmovcc' -> cmovcc_encode(Opnds); - 'cmp' -> arith_binop_encode(2#111, Opnds); - 'cwde' -> nullary_op_encode(16#98, Opnds); - 'dec' -> incdec_encode(2#001, Opnds); - 'div' -> arith_unop_encode(2#110, Opnds); - 'enter' -> enter_encode(Opnds); - 'fadd' -> fp_comm_arith_encode(2#000, Opnds); - 'faddp' -> fp_comm_arith_pop_encode(2#000, Opnds); - 'fchs' -> fchs_encode(); - 'fdiv' -> fp_arith_encode(2#110, Opnds); - 'fdivp' -> fp_arith_pop_encode(2#110, Opnds); - 'fdivr' -> fp_arith_rev_encode(2#111, Opnds); - 'fdivrp' -> fp_arith_rev_pop_encode(2#111, Opnds); - 'ffree' -> ffree_encode(Opnds); - 'fild' -> fild_encode(Opnds); - 'fld' -> fld_encode(Opnds); - 'fmul' -> fp_comm_arith_encode(2#001, Opnds); - 'fmulp' -> fp_comm_arith_pop_encode(2#001, Opnds); - 'fst' -> fst_encode(2#010, Opnds); - 'fstp' -> fst_encode(2#011, Opnds); - 'fsub' -> fp_arith_encode(2#100, Opnds); - 'fsubp' -> fp_arith_pop_encode(2#100, Opnds); - 'fsubr' -> fp_arith_rev_encode(2#101, Opnds); - 'fsubrp' -> fp_arith_rev_pop_encode(2#101, Opnds); - 'fwait' -> fwait_encode(); - 'fxch' -> fxch_encode(Opnds); - 'idiv' -> arith_unop_encode(2#111, Opnds); - 'imul' -> imul_encode(Opnds); - 'inc' -> incdec_encode(2#000, Opnds); - 'into' -> nullary_op_encode(16#CE, Opnds); - 'jcc' -> jcc_encode(Opnds); - 'jecxz' -> jmp8_op_encode(16#E3, Opnds); - 'jmp' -> jmp_encode(Opnds); - 'lea' -> lea_encode(Opnds); - 'leave' -> nullary_op_encode(16#C9, Opnds); - 'loop' -> jmp8_op_encode(16#E2, Opnds); - 'loope' -> jmp8_op_encode(16#E1, Opnds); - 'loopne' -> jmp8_op_encode(16#E0, Opnds); - 'mov' -> mov_encode(Opnds); - 'movsx' -> movx_op_encode(16#BE, Opnds); - 'movzx' -> movx_op_encode(16#B6, Opnds); - 'mul' -> arith_unop_encode(2#100, Opnds); - 'neg' -> arith_unop_encode(2#011, Opnds); - 'nop' -> nullary_op_encode(16#90, Opnds); - 'not' -> arith_unop_encode(2#010, Opnds); - 'or' -> arith_binop_encode(2#001, Opnds); - 'pop' -> pop_encode(Opnds); - 'prefix_fs' -> nullary_op_encode(16#64, Opnds); - 'push' -> push_encode(Opnds); - 'rcl' -> shift_op_encode(2#010, Opnds); - 'rcr' -> shift_op_encode(2#011, Opnds); - 'ret' -> ret_encode(Opnds); - 'rol' -> shift_op_encode(2#000, Opnds); - 'ror' -> shift_op_encode(2#001, Opnds); - 'sar' -> shift_op_encode(2#111, Opnds); - 'sbb' -> arith_binop_encode(2#011, Opnds); - 'setcc' -> setcc_encode(Opnds); - 'shl' -> shift_op_encode(2#100, Opnds); - 'shld' -> shd_op_encode(16#A4, Opnds); - 'shr' -> shift_op_encode(2#101, Opnds); - 'shrd' -> shd_op_encode(16#AC, Opnds); - 'stc' -> nullary_op_encode(16#F9, Opnds); - 'std' -> nullary_op_encode(16#FD, Opnds); - 'sub' -> arith_binop_encode(2#101, Opnds); - 'test' -> test_encode(Opnds); - 'xor' -> arith_binop_encode(2#110, Opnds); - _ -> exit({?MODULE,insn_encode,Op}) - end. - -insn_sizeof(Op, Opnds) -> - case Op of - 'adc' -> arith_binop_sizeof(Opnds); - 'add' -> arith_binop_sizeof(Opnds); - 'and' -> arith_binop_sizeof(Opnds); - 'bsf' -> bs_op_sizeof(Opnds); - 'bsr' -> bs_op_sizeof(Opnds); - 'bswap' -> bswap_sizeof(Opnds); - 'bt' -> bt_op_sizeof(Opnds); - 'btc' -> bt_op_sizeof(Opnds); - 'btr' -> bt_op_sizeof(Opnds); - 'bts' -> bt_op_sizeof(Opnds); - 'call' -> call_sizeof(Opnds); - 'cbw' -> cbw_sizeof(Opnds); - 'cdq' -> nullary_op_sizeof(Opnds); - 'clc' -> nullary_op_sizeof(Opnds); - 'cld' -> nullary_op_sizeof(Opnds); - 'cmc' -> nullary_op_sizeof(Opnds); - 'cmovcc' -> cmovcc_sizeof(Opnds); - 'cmp' -> arith_binop_sizeof(Opnds); - 'cwde' -> nullary_op_sizeof(Opnds); - 'dec' -> incdec_sizeof(Opnds); - 'div' -> arith_unop_sizeof(Opnds); - 'enter' -> enter_sizeof(Opnds); - 'fadd' -> fp_arith_sizeof(Opnds); - 'faddp' -> fp_arith_sizeof(Opnds); - 'fchs' -> fchs_sizeof(); - 'fdiv' -> fp_arith_sizeof(Opnds); - 'fdivp' -> fp_arith_sizeof(Opnds); - 'fdivr' -> fp_arith_sizeof(Opnds); - 'fdivrp' -> fp_arith_sizeof(Opnds); - 'ffree' -> ffree_sizeof(); - 'fild' -> fild_sizeof(Opnds); - 'fld' -> fld_sizeof(Opnds); - 'fmul' -> fp_arith_sizeof(Opnds); - 'fmulp' -> fp_arith_sizeof(Opnds); - 'fst' -> fst_sizeof(Opnds); - 'fstp' -> fst_sizeof(Opnds); - 'fsub' -> fp_arith_sizeof(Opnds); - 'fsubp' -> fp_arith_sizeof(Opnds); - 'fsubr' -> fp_arith_sizeof(Opnds); - 'fsubrp' -> fp_arith_sizeof(Opnds); - 'fwait' -> fwait_sizeof(); - 'fxch' -> fxch_sizeof(); - 'idiv' -> arith_unop_sizeof(Opnds); - 'imul' -> imul_sizeof(Opnds); - 'inc' -> incdec_sizeof(Opnds); - 'into' -> nullary_op_sizeof(Opnds); - 'jcc' -> jcc_sizeof(Opnds); - 'jecxz' -> jmp8_op_sizeof(Opnds); - 'jmp' -> jmp_sizeof(Opnds); - 'lea' -> lea_sizeof(Opnds); - 'leave' -> nullary_op_sizeof(Opnds); - 'loop' -> jmp8_op_sizeof(Opnds); - 'loope' -> jmp8_op_sizeof(Opnds); - 'loopne' -> jmp8_op_sizeof(Opnds); - 'mov' -> mov_sizeof(Opnds); - 'movsx' -> movx_op_sizeof(Opnds); - 'movzx' -> movx_op_sizeof(Opnds); - 'mul' -> arith_unop_sizeof(Opnds); - 'neg' -> arith_unop_sizeof(Opnds); - 'nop' -> nullary_op_sizeof(Opnds); - 'not' -> arith_unop_sizeof(Opnds); - 'or' -> arith_binop_sizeof(Opnds); - 'pop' -> pop_sizeof(Opnds); - 'prefix_fs' -> nullary_op_sizeof(Opnds); - 'push' -> push_sizeof(Opnds); - 'rcl' -> shift_op_sizeof(Opnds); - 'rcr' -> shift_op_sizeof(Opnds); - 'ret' -> ret_sizeof(Opnds); - 'rol' -> shift_op_sizeof(Opnds); - 'ror' -> shift_op_sizeof(Opnds); - 'sar' -> shift_op_sizeof(Opnds); - 'sbb' -> arith_binop_sizeof(Opnds); - 'setcc' -> setcc_sizeof(Opnds); - 'shl' -> shift_op_sizeof(Opnds); - 'shld' -> shd_op_sizeof(Opnds); - 'shr' -> shift_op_sizeof(Opnds); - 'shrd' -> shd_op_sizeof(Opnds); - 'stc' -> nullary_op_sizeof(Opnds); - 'std' -> nullary_op_sizeof(Opnds); - 'sub' -> arith_binop_sizeof(Opnds); - 'test' -> test_sizeof(Opnds); - 'xor' -> arith_binop_sizeof(Opnds); - _ -> exit({?MODULE,insn_sizeof,Op}) - end. - -%%===================================================================== -%% testing interface -%%===================================================================== - --ifdef(DO_HIPE_X86_ENCODE_TEST). - -say(OS, Str) -> - file:write(OS, Str). - -digit16(Dig0) -> - Dig = Dig0 band 16#F, - if Dig >= 16#A -> $A + (Dig - 16#A); - true -> $0 + Dig - end. - -say_byte(OS, Byte) -> - say(OS, "0x"), - say(OS, [digit16(Byte bsr 4)]), - say(OS, [digit16(Byte)]). - -init(OS) -> - say(OS, "\t.text\n"). - -say_bytes(OS, Byte0, Bytes0) -> - say_byte(OS, Byte0), - case Bytes0 of - [] -> - say(OS, "\n"); - [Byte1|Bytes1] -> - say(OS, ","), - say_bytes(OS, Byte1, Bytes1) - end. - -t(OS, Op, Opnds) -> - insn_sizeof(Op, Opnds), - {[Byte|Bytes],[]} = insn_encode(Op, Opnds, 0), - say(OS, "\t.byte "), - say_bytes(OS, Byte, Bytes). - -dotest1(OS) -> - init(OS), - % exercise all rm32 types - t(OS,lea,{{reg32,?EAX},{ea,ea_disp32(16#87654321)}}), - t(OS,lea,{{reg32,?EAX},{ea,ea_sib(sib(?ECX))}}), - t(OS,lea,{{reg32,?EAX},{ea,ea_sib(sib(?ECX,sindex(2#10,?EDI)))}}), - t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_sindex(16#87654321)}}), - t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_sindex(16#87654321,sindex(2#10,?EDI))}}), - t(OS,lea,{{reg32,?EAX},{ea,ea_base(?ECX)}}), - t(OS,lea,{{reg32,?EAX},{ea,ea_disp8_sib(16#03,sib(?ECX))}}), - t(OS,lea,{{reg32,?EAX},{ea,ea_disp8_sib(16#03,sib(?ECX,sindex(2#10,?EDI)))}}), - t(OS,lea,{{reg32,?EAX},{ea,ea_disp8_base(16#3,?ECX)}}), - t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_sib(16#87654321,sib(?ECX))}}), - t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_sib(16#87654321,sib(?ECX,sindex(2#10,?EDI)))}}), - t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_base(16#87654321,?EBP)}}), - t(OS,call,{{rm32,rm_reg(?EAX)}}), - t(OS,call,{{rm32,rm_mem(ea_disp32_sindex(16#87654321,sindex(2#10,?EDI)))}}), - t(OS,call,{{rel32,-5}}), - % default parameters for the tests below - Word32 = 16#87654321, - Word16 = 16#F00F, - Word8 = 16#80, - Imm32 = {imm32,Word32}, - Imm16 = {imm16,Word16}, - Imm8 = {imm8,Word8}, - RM32 = {rm32,rm_reg(?EDX)}, - RM16 = {rm16,rm_reg(?EDX)}, - RM8 = {rm8,rm_reg(?EDX)}, - Rel32 = {rel32,Word32}, - Rel8 = {rel8,Word8}, - Moffs32 = {moffs32,Word32}, - Moffs16 = {moffs16,Word32}, - Moffs8 = {moffs8,Word32}, - CC = {cc,?CC_G}, - Reg32 = {reg32,?EAX}, - Reg16 = {reg16,?EAX}, - Reg8 = {reg8,?AH}, - EA = {ea,ea_base(?ECX)}, - % exercise each instruction definition - t(OS,'adc',{eax,Imm32}), - t(OS,'adc',{RM32,Imm32}), - t(OS,'adc',{RM32,Imm8}), - t(OS,'adc',{RM32,Reg32}), - t(OS,'adc',{Reg32,RM32}), - t(OS,'add',{eax,Imm32}), - t(OS,'add',{RM32,Imm32}), - t(OS,'add',{RM32,Imm8}), - t(OS,'add',{RM32,Reg32}), - t(OS,'add',{Reg32,RM32}), - t(OS,'and',{eax,Imm32}), - t(OS,'and',{RM32,Imm32}), - t(OS,'and',{RM32,Imm8}), - t(OS,'and',{RM32,Reg32}), - t(OS,'and',{Reg32,RM32}), - t(OS,'bsf',{Reg32,RM32}), - t(OS,'bsr',{Reg32,RM32}), - t(OS,'bswap',{Reg32}), - t(OS,'bt',{RM32,Reg32}), - t(OS,'bt',{RM32,Imm8}), - t(OS,'btc',{RM32,Reg32}), - t(OS,'btc',{RM32,Imm8}), - t(OS,'btr',{RM32,Reg32}), - t(OS,'btr',{RM32,Imm8}), - t(OS,'bts',{RM32,Reg32}), - t(OS,'bts',{RM32,Imm8}), - t(OS,'call',{Rel32}), - t(OS,'call',{RM32}), - t(OS,'cbw',{}), - t(OS,'cdq',{}), - t(OS,'clc',{}), - t(OS,'cld',{}), - t(OS,'cmc',{}), - t(OS,'cmovcc',{CC,Reg32,RM32}), - t(OS,'cmp',{eax,Imm32}), - t(OS,'cmp',{RM32,Imm32}), - t(OS,'cmp',{RM32,Imm8}), - t(OS,'cmp',{RM32,Reg32}), - t(OS,'cmp',{Reg32,RM32}), - t(OS,'cwde',{}), - t(OS,'dec',{RM32}), - t(OS,'dec',{Reg32}), - t(OS,'div',{RM32}), - t(OS,'enter',{Imm16,{imm8,3}}), - t(OS,'idiv',{RM32}), - t(OS,'imul',{RM32}), - t(OS,'imul',{Reg32,RM32}), - t(OS,'imul',{Reg32,RM32,Imm8}), - t(OS,'imul',{Reg32,RM32,Imm32}), - t(OS,'inc',{RM32}), - t(OS,'inc',{Reg32}), - t(OS,'into',{}), - t(OS,'jcc',{CC,Rel8}), - t(OS,'jcc',{CC,Rel32}), - t(OS,'jecxz',{Rel8}), - t(OS,'jmp',{Rel8}), - t(OS,'jmp',{Rel32}), - t(OS,'jmp',{RM32}), - t(OS,'lea',{Reg32,EA}), - t(OS,'leave',{}), - t(OS,'loop',{Rel8}), - t(OS,'loope',{Rel8}), - t(OS,'loopne',{Rel8}), - t(OS,'mov',{RM8,Reg8}), - t(OS,'mov',{RM16,Reg16}), - t(OS,'mov',{RM32,Reg32}), - t(OS,'mov',{Reg8,RM8}), - t(OS,'mov',{Reg16,RM16}), - t(OS,'mov',{Reg32,RM32}), - t(OS,'mov',{al,Moffs8}), - t(OS,'mov',{ax,Moffs16}), - t(OS,'mov',{eax,Moffs32}), - t(OS,'mov',{Moffs8,al}), - t(OS,'mov',{Moffs16,ax}), - t(OS,'mov',{Moffs32,eax}), - t(OS,'mov',{Reg8,Imm8}), - t(OS,'mov',{Reg16,Imm16}), - t(OS,'mov',{Reg32,Imm32}), - t(OS,'mov',{RM8,Imm8}), - t(OS,'mov',{RM16,Imm16}), - t(OS,'mov',{RM32,Imm32}), - t(OS,'movsx',{Reg16,RM8}), - t(OS,'movsx',{Reg32,RM8}), - t(OS,'movsx',{Reg32,RM16}), - t(OS,'movzx',{Reg16,RM8}), - t(OS,'movzx',{Reg32,RM8}), - t(OS,'movzx',{Reg32,RM16}), - t(OS,'mul',{RM32}), - t(OS,'neg',{RM32}), - t(OS,'nop',{}), - t(OS,'not',{RM32}), - t(OS,'or',{eax,Imm32}), - t(OS,'or',{RM32,Imm32}), - t(OS,'or',{RM32,Imm8}), - t(OS,'or',{RM32,Reg32}), - t(OS,'or',{Reg32,RM32}), - t(OS,'pop',{RM32}), - t(OS,'pop',{Reg32}), - t(OS,'push',{RM32}), - t(OS,'push',{Reg32}), - t(OS,'push',{Imm8}), - t(OS,'push',{Imm32}), - t(OS,'rcl',{RM32,1}), - t(OS,'rcl',{RM32,cl}), - t(OS,'rcl',{RM32,Imm8}), - t(OS,'rcl',{RM16,Imm8}), - t(OS,'rcr',{RM32,1}), - t(OS,'rcr',{RM32,cl}), - t(OS,'rcr',{RM32,Imm8}), - t(OS,'rcr',{RM16,Imm8}), - t(OS,'ret',{}), - t(OS,'ret',{Imm16}), - t(OS,'rol',{RM32,1}), - t(OS,'rol',{RM32,cl}), - t(OS,'rol',{RM32,Imm8}), - t(OS,'rol',{RM16,Imm8}), - t(OS,'ror',{RM32,1}), - t(OS,'ror',{RM32,cl}), - t(OS,'ror',{RM32,Imm8}), - t(OS,'ror',{RM16,Imm8}), - t(OS,'sar',{RM32,1}), - t(OS,'sar',{RM32,cl}), - t(OS,'sar',{RM32,Imm8}), - t(OS,'sar',{RM16,Imm8}), - t(OS,'sbb',{eax,Imm32}), - t(OS,'sbb',{RM32,Imm32}), - t(OS,'sbb',{RM32,Imm8}), - t(OS,'sbb',{RM32,Reg32}), - t(OS,'sbb',{Reg32,RM32}), - t(OS,'setcc',{CC,RM8}), - t(OS,'shl',{RM32,1}), - t(OS,'shl',{RM32,cl}), - t(OS,'shl',{RM32,Imm8}), - t(OS,'shl',{RM16,Imm8}), - t(OS,'shld',{RM32,Reg32,Imm8}), - t(OS,'shld',{RM32,Reg32,cl}), - t(OS,'shr',{RM32,1}), - t(OS,'shr',{RM32,cl}), - t(OS,'shr',{RM32,Imm8}), - t(OS,'shr',{RM16,Imm8}), - t(OS,'shrd',{RM32,Reg32,Imm8}), - t(OS,'shrd',{RM32,Reg32,cl}), - t(OS,'stc',{}), - t(OS,'std',{}), - t(OS,'sub',{eax,Imm32}), - t(OS,'sub',{RM32,Imm32}), - t(OS,'sub',{RM32,Imm8}), - t(OS,'sub',{RM32,Reg32}), - t(OS,'sub',{Reg32,RM32}), - t(OS,'test',{al,Imm8}), - t(OS,'test',{ax,Imm16}), - t(OS,'test',{eax,Imm32}), - t(OS,'test',{RM8,Imm8}), - t(OS,'test',{RM16,Imm16}), - t(OS,'test',{RM32,Imm32}), - t(OS,'test',{RM32,Reg32}), - t(OS,'xor',{eax,Imm32}), - t(OS,'xor',{RM32,Imm32}), - t(OS,'xor',{RM32,Imm8}), - t(OS,'xor',{RM32,Reg32}), - t(OS,'xor',{Reg32,RM32}), - t(OS,'prefix_fs',{}), t(OS,'add',{{reg32,?EAX},{rm32,rm_mem(ea_disp32(16#20))}}), - []. - -dotest() -> dotest1(group_leader()). % stdout == group_leader - -dotest(File) -> - {ok,OS} = file:open(File, [write]), - dotest1(OS), - file:close(OS). --endif. diff --git a/lib/hipe/x86/hipe_x86_encode.txt b/lib/hipe/x86/hipe_x86_encode.txt deleted file mode 100644 index eab732fb2d..0000000000 --- a/lib/hipe/x86/hipe_x86_encode.txt +++ /dev/null @@ -1,211 +0,0 @@ -hipe_x86_encode USAGE GUIDE -Revision 0.4, 2001-10-09 - -This document describes how to use the hipe_x86_encode.erl module. - -Preliminaries -------------- -This is not a tutorial on the x86 architecture. The reader -should be familiar with both the programming model and -the general syntax of instructions and their operands. - -The hipe_x86_encode module follows the conventions in the -"Intel Architecture Software Developer's Manual, Volume 2: -Instruction Set Reference" document. In particular, the -order of source and destination operands in instructions -follows Intel's conventions: "add eax,edx" adds edx to eax. -The GNU Assembler "gas" follows the so-called AT&T syntax -which reverses the order of the source and destination operands. - -Basic Functionality -------------------- -The hipe_x86_encode module implements the mapping from symbolic x86 -instructions to their binary representation, as lists of bytes. - -Instructions and operands have to match actual x86 instructions -and operands exactly. The mapping from "abstract" instructions -to correct x86 instructions has to be done before the instructions -are passed to the hipe_x86_encode module. (In HiPE, this mapping -is done by the hipe_x86_assemble module.) - -The hipe_x86_encode module handles arithmetic operations on 32-bit -integers, data movement of 8, 16, and 32-bit words, and most -control flow operations. A 32-bit address and operand size process -mode is assumed, which is what Unix and Linux systems use. - -Operations and registers related to floating-point, MMX, SIMD, 3dnow!, -or operating system control are not implemented. Segment registers -are supported minimally: a 'prefix_fs' pseudo-instruction can be -used to insert an FS segment register override prefix. - -Instruction Syntax ------------------- -The function hipe_x86_encode:insn_encode/1 takes an instruction in -symbolic form and translates it to its binary representation, -as a list of bytes. - -Symbolic instructions are Erlang terms in the following syntax: - - Insn ::= {Op,Opnds} - Op ::= (an Erlang atom) - Opnds ::= {Opnd1,...,Opndn} (n >= 0) - Opnd ::= eax | ax | al | 1 | cl - | {imm32,Imm32} | {imm16,Imm16} | {imm8,Imm8} - | {rm32,RM32} | {rm16,RM16} | {rm8,RM8} - | {rel32,Rel32} | {rel8,Rel8} - | {moffs32,Moffs32} | {moffs16,Moffs16} | {moffs8,Moffs8} - | {cc,CC} - | {reg32,Reg32} | {reg16,Reg16} | {reg8,Reg8} - | {ea,EA} - Imm32 ::= (a 32-bit integer; immediate value) - Imm16 ::= (a 16-bit integer; immediate value) - Imm8 ::= (an 8-bit integer; immediate value) - Rel32 ::= (a 32-bit integer; jump offset) - Rel8 ::= (an 8-bit integer; jump offset) - Moffs32 ::= (a 32-bit integer; address of 32-bit word) - Moffs16 ::= (a 32-bit integer; address of 16-bit word) - Moffs8 ::= (a 32-bit integer; address of 8-bit word) - CC ::= (a 4-bit condition code) - Reg32 ::= (a 3-bit register number of a 32-bit register) - Reg16 ::= (same as Reg32, but the register size is 16 bits) - Reg8 ::= (a 3-bit register number of an 8-bit register) - EA ::= (general operand; a memory cell) - RM32 ::= (general operand; a 32-bit register or memory cell) - RM16 ::= (same as RM32, but the operand size is 16 bits) - RM8 ::= (general operand; an 8-bit register or memory cell) - -To construct these terms, the hipe_x86_encode module exports several -helper functions: - -cc/1 - Converts an atom to a 4-bit condition code. - -al/0, cl/0, dl/0, bl/0, ah/0, ch/0, dh/0, bh/0 - Returns a 3-bit register number for an 8-bit register. - -eax/0, ecx/0, edx/0, ebx/0, esp/0, ebp/0, esi/0, edi/0 - Returns a 3-bit register number for a 32- or 16-bit register. - -A general operand can be a register or a memory operand. -An x86 memory operand is expressed as an "effective address": - - Displacement(Base register,Index register,Scale) -or - [base register] + [(index register) * (scale)] + [displacement] - -where the base register is any of the 8 integer registers, -the index register in any of the 8 integer registers except ESP, -scale is 0, 1, 2, or 3 (multiply index with 1, 2, 4, or 8), -and displacement is an 8- or 32-bit offset. -Most components are optional. - -An effective address is constructed by calling one of the following -nine functions: - -ea_base/1 - ea_base(Reg32), where Reg32 is not ESP or EBP, - constructs the EA "(Reg32)", i.e. Reg32. -ea_disp32/1 - ea_disp32(Disp32) construct the EA "Disp32" -ea_disp32_base/2 - ea_disp32(Disp32, Reg32), where Reg32 is not ESP, - constructs the EA "Disp32(Reg32)", i.e. Reg32+Disp32. -ea_disp8_base/2 - This is like ea_disp32_base/2, except the displacement - is 8 bits instead of 32 bits. The CPU will _sign-extend_ - the 8-bit displacement to 32 bits before using it. -ea_disp32_sindex/1 - ea_disp32_sindex(Disp32) constructs the EA "Disp32", - but uses a longer encoding than ea_disp32/1. - Hint: Don't use this one. - -The last four forms use index registers with or without scaling -factors and base registers, so-called "SIBs". To build these, call: - -sindex/2 - sindex(Scale, Index), where scale is 0, 1, 2, or 3, and - Index is a 32-bit integer register except ESP, constructs - part of a SIB representing "Index * 2^Scale". -sib/1 - sib(Reg32) constructs a SIB containing only a base register - and no scaled index, "(Reg32)", i.e. "Reg32". -sib/2 - sib(Reg32, sindex(Scale, Index)) constructs a SIB - "(Reg32,Index,Scale)", i.e. "Reg32 + (Index * 2^Scale)". - -ea_sib/1 - ea_sib(SIB), where SIB's base register is not EBP, - constructs an EA which is that SIB, i.e. "(Base)" or - "(Base,Index,Scale)". -ea_disp32_sib/2 - ea_disp32_sib(Disp32, SIB) constructs the EA "Disp32(SIB)", - i.e. "Base+Disp32" or "Base+(Index*2^Scale)+Disp32". -ea_disp32_sindex/2 - ea_disp32_sindex(Disp32, Sindex) constructs the EA - "Disp32(,Index,Scale)", i.e. "(Index*2^Scale)+Disp32". -ea_disp8_sib/2 - This is just like ea_disp32_sib/2, except the displacement - is 8 bits (with sign-extension). - -To construct a general operand, call one of these two functions: - -rm_reg/1 - rm_reg(Reg) constructs a general operand which is that register. -rm_mem/1 - rm_mem(EA) constucts a general operand which is the memory - cell addressed by EA. - -A symbolic instruction with name "Op" and the n operands "Opnd1" -to "Opndn" is represented as the tuple - - {Op, {Opnd1, ..., Opndn}} - -Usage ------ -Once a symbolic instruction "Insn" has been constructed, it can be -translated to binary by calling - - insn_encode(Insn) - -which returns a list of bytes. - -Since x86 instructions have varying size (as opposed to most -RISC machines), there is also a function - - insn_sizeof(Insn) - -which returns the number of bytes the binary encoding will occupy. -insn_sizeof(Insn) equals length(insn_encode(Insn)), but insn_sizeof -is cheaper to compute. This is useful for two purposes: (1) when -compiling to memory, one needs to know in advance how many bytes of -memory to allocate for a piece of code, and (2) when computing the -relative distance between a jump or call instruction and its target -label. - -Examples --------- -1. nop -is constructed as - {nop, {}} - -2. add eax,edx (eax := eax + edx) -can be constructed as - {add, {eax, {reg32, hipe_x86_encode:edx()}}} -or as - Reg32 = {reg32, hipe_x86_encode:eax()}, - RM32 = {rm32, hipe_x86_encode:rm_reg(hipe_x86_encode:edx())}, - {add, {Reg32, RM32}} - -3. mov edx,(eax) (edx := MEM[eax]) -is constructed as - Reg32 = {reg32, hipe_x86_encode:edx()}, - RM32 = {rm32, hipe_x86_encode:rm_reg(hipe_x86_encode:eax())}, - {mov, {Reg32, RM32}} - -Addendum --------- -The hipe_x86_encode.erl source code is the authoritative reference -for the hipe_x86_encode module. - -Please report errors in either hipe_x86_encode.erl or this guide -to mikpe@it.uu.se. diff --git a/lib/hipe/x86/hipe_x86_frame.erl b/lib/hipe/x86/hipe_x86_frame.erl deleted file mode 100644 index 558321d0c3..0000000000 --- a/lib/hipe/x86/hipe_x86_frame.erl +++ /dev/null @@ -1,713 +0,0 @@ -%%% -*- erlang-indent-level: 2 -*- -%%% -%%% Licensed under the Apache License, Version 2.0 (the "License"); -%%% you may not use this file except in compliance with the License. -%%% You may obtain a copy of the License at -%%% -%%% http://www.apache.org/licenses/LICENSE-2.0 -%%% -%%% Unless required by applicable law or agreed to in writing, software -%%% distributed under the License is distributed on an "AS IS" BASIS, -%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%%% See the License for the specific language governing permissions and -%%% limitations under the License. -%%% -%%% x86 stack frame handling -%%% -%%% - map non-register temps to stack slots -%%% - add explicit stack management code to prologue and epilogue, -%%% and at calls and tailcalls -%%% -%%% TODO: -%%% - Compute max stack in a pre-pass? (get rid of ref cell updates) -%%% - Merge all_temps and defun_minframe to a single -%%% pass, for compile-time efficiency reasons. - --ifdef(HIPE_AMD64). --define(HIPE_X86_FRAME, hipe_amd64_frame). --define(HIPE_X86_REGISTERS, hipe_amd64_registers). --define(HIPE_X86_LIVENESS, hipe_amd64_liveness). --define(LEAF_WORDS, ?AMD64_LEAF_WORDS). --else. --define(HIPE_X86_FRAME, hipe_x86_frame). --define(HIPE_X86_REGISTERS, hipe_x86_registers). --define(HIPE_X86_LIVENESS, hipe_x86_liveness). --define(LEAF_WORDS, ?X86_LEAF_WORDS). --endif. - --module(?HIPE_X86_FRAME). --export([frame/2]). --include("../x86/hipe_x86.hrl"). --include("../rtl/hipe_literals.hrl"). - -frame(CFG0, _Options) -> - Formals = fix_formals(hipe_x86_cfg:params(CFG0)), - Temps0 = all_temps(CFG0, Formals), - MinFrame = defun_minframe(CFG0), - Temps = ensure_minframe(MinFrame, Temps0), - Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0), - do_body(CFG0, Liveness, Formals, Temps). - -fix_formals(Formals) -> - fix_formals(?HIPE_X86_REGISTERS:nr_args(), Formals). - -fix_formals(0, Rest) -> Rest; -fix_formals(N, [_|Rest]) -> fix_formals(N-1, Rest); -fix_formals(_, []) -> []. - -do_body(CFG0, Liveness, Formals, Temps) -> - Context = mk_context(Liveness, Formals, Temps), - CFG1 = do_blocks(CFG0, Context), - do_prologue(CFG1, Context). - -do_blocks(CFG, Context) -> - hipe_x86_cfg:map_bbs(fun(Lbl, BB) -> do_block(Lbl, BB, Context) end, CFG). - -do_block(Label, Block, Context) -> - Liveness = context_liveness(Context), - LiveOut = ?HIPE_X86_LIVENESS:liveout(Liveness, Label), - Code = hipe_bb:code(Block), - NewCode = do_block(Code, LiveOut, Context, context_framesize(Context), []), - hipe_bb:code_update(Block, NewCode). - -do_block([I|Insns], LiveOut, Context, FPoff0, RevCode) -> - {NewIs, FPoff1} = do_insn(I, LiveOut, Context, FPoff0), - do_block(Insns, LiveOut, Context, FPoff1, lists:reverse(NewIs, RevCode)); -do_block([], _, Context, FPoff, RevCode) -> - FPoff0 = context_framesize(Context), - if FPoff =:= FPoff0 -> []; - true -> exit({?MODULE,do_block,FPoff}) - end, - lists:reverse(RevCode, []). - -do_insn(I, LiveOut, Context, FPoff) -> - case I of - #alu{} -> - {[do_alu(I, Context, FPoff)], FPoff}; - #cmp{} -> - {[do_cmp(I, Context, FPoff)], FPoff}; - #fp_unop{} -> - {do_fp_unop(I, Context, FPoff), FPoff}; - #fp_binop{} -> - {do_fp_binop(I, Context, FPoff), FPoff}; - #fmove{} -> - {[do_fmove(I, Context, FPoff)], FPoff}; - #imul{} -> - {[do_imul(I, Context, FPoff)], FPoff}; - #move{} -> - {do_move(I, Context, FPoff), FPoff}; - #movsx{} -> - {[do_movsx(I, Context, FPoff)], FPoff}; - #movzx{} -> - {[do_movzx(I, Context, FPoff)], FPoff}; - #pseudo_call{} -> - do_pseudo_call(I, LiveOut, Context, FPoff); - #pseudo_spill_fmove{} -> - {do_pseudo_spill_fmove(I, Context, FPoff), FPoff}; - #pseudo_spill_move{} -> - {do_pseudo_spill_move(I, Context, FPoff), FPoff}; - #pseudo_tailcall{} -> - {do_pseudo_tailcall(I, Context), context_framesize(Context)}; - #push{} -> - {[do_push(I, Context, FPoff)], FPoff+word_size()}; - #ret{} -> - {do_ret(I, Context, FPoff), context_framesize(Context)}; - #shift{} -> - {[do_shift(I, Context, FPoff)], FPoff}; - #test{} -> - {[do_test(I, Context, FPoff)], FPoff}; - _ -> % comment, jmp, label, pseudo_jcc, pseudo_tailcall_prepare - {[I], FPoff} - end. - -%%% -%%% Convert any pseudo-temp operand in a binary (alu, cmp, move) -%%% or unary (push) instruction to an explicit x86_mem operand. -%%% - -do_alu(I, Context, FPoff) -> - #alu{src=Src0,dst=Dst0} = I, - Src = conv_opnd(Src0, FPoff, Context), - Dst = conv_opnd(Dst0, FPoff, Context), - I#alu{src=Src,dst=Dst}. - -do_cmp(I, Context, FPoff) -> - #cmp{src=Src0,dst=Dst0} = I, - Src = conv_opnd(Src0, FPoff, Context), - Dst = conv_opnd(Dst0, FPoff, Context), - I#cmp{src=Src,dst=Dst}. - -do_fp_unop(I, Context, FPoff) -> - #fp_unop{arg=Arg0} = I, - Arg = conv_opnd(Arg0, FPoff, Context), - [I#fp_unop{arg=Arg}]. - -do_fp_binop(I, Context, FPoff) -> - #fp_binop{src=Src0,dst=Dst0} = I, - Src = conv_opnd(Src0, FPoff, Context), - Dst = conv_opnd(Dst0, FPoff, Context), - [I#fp_binop{src=Src,dst=Dst}]. - -do_fmove(I0, Context, FPoff) -> - #fmove{src=Src0,dst=Dst0} = I0, - Src = conv_opnd(Src0, FPoff, Context), - Dst = conv_opnd(Dst0, FPoff, Context), - I = I0#fmove{src=Src,dst=Dst}, - case Src =:= Dst of - true -> []; % omit move-to-self - false -> [I] - end. - -do_pseudo_spill_fmove(I0, Context, FPoff) -> - #pseudo_spill_fmove{src=Src0,temp=Temp0,dst=Dst0} = I0, - Src = conv_opnd(Src0, FPoff, Context), - Temp = conv_opnd(Temp0, FPoff, Context), - Dst = conv_opnd(Dst0, FPoff, Context), - case Src =:= Dst of - true -> []; % omit move-to-self - false -> [#fmove{src=Src, dst=Temp}, #fmove{src=Temp, dst=Dst}] - end. - -do_imul(I, Context, FPoff) -> - #imul{src=Src0} = I, - Src = conv_opnd(Src0, FPoff, Context), - I#imul{src=Src}. - -do_move(I0, Context, FPoff) -> - #move{src=Src0,dst=Dst0} = I0, - Src = conv_opnd(Src0, FPoff, Context), - Dst = conv_opnd(Dst0, FPoff, Context), - I = I0#move{src=Src,dst=Dst}, - case Src =:= Dst of - true -> []; % omit move-to-self - false -> [I] - end. - -do_pseudo_spill_move(I0, Context, FPoff) -> - #pseudo_spill_move{src=Src0,temp=Temp0,dst=Dst0} = I0, - Src = conv_opnd(Src0, FPoff, Context), - Temp = conv_opnd(Temp0, FPoff, Context), - Dst = conv_opnd(Dst0, FPoff, Context), - case Src =:= Dst of - true -> []; % omit move-to-self - false -> [#move{src=Src, dst=Temp}, #move{src=Temp, dst=Dst}] - end. - -do_movsx(I, Context, FPoff) -> - #movsx{src=Src0,dst=Dst0} = I, - Src = conv_opnd(Src0, FPoff, Context), - Dst = conv_opnd(Dst0, FPoff, Context), - I#movsx{src=Src,dst=Dst}. - -do_movzx(I, Context, FPoff) -> - #movzx{src=Src0,dst=Dst0} = I, - Src = conv_opnd(Src0, FPoff, Context), - Dst = conv_opnd(Dst0, FPoff, Context), - I#movzx{src=Src,dst=Dst}. - -do_push(I, Context, FPoff) -> - #push{src=Src0} = I, - Src = conv_opnd(Src0, FPoff, Context), - I#push{src=Src}. - -do_shift(I, Context, FPoff) -> - #shift{src=Src0,dst=Dst0} = I, - Src = conv_opnd(Src0, FPoff, Context), - Dst = conv_opnd(Dst0, FPoff, Context), - I#shift{src=Src,dst=Dst}. - -do_test(I, Context, FPoff) -> - #test{src=Src0,dst=Dst0} = I, - Src = conv_opnd(Src0, FPoff, Context), - Dst = conv_opnd(Dst0, FPoff, Context), - I#test{src=Src,dst=Dst}. - -conv_opnd(Opnd, FPoff, Context) -> - case opnd_is_pseudo(Opnd) of - false -> - Opnd; - true -> - conv_pseudo(Opnd, FPoff, Context) - end. - -conv_pseudo(Temp, FPoff, Context) -> - Off = FPoff + context_offset(Context, Temp), - conv_pseudo(Temp, Off). - -conv_pseudo(Temp, Off) -> - hipe_x86:mk_mem(mk_sp(), hipe_x86:mk_imm(Off), hipe_x86:temp_type(Temp)). - -%%% -%%% Return - deallocate frame and emit 'ret $N' insn. -%%% - -do_ret(_I, Context, FPoff) -> - %% XXX: this conses up a new ret insn, ignoring the one rtl->x86 made - adjust_sp(FPoff, [hipe_x86:mk_ret(word_size()*context_arity(Context))]). - -adjust_sp(N, Rest) -> - if N =:= 0 -> - Rest; - true -> - [hipe_x86:mk_alu('add', hipe_x86:mk_imm(N), mk_sp()) | Rest] - end. - -%%% -%%% Recursive calls. -%%% - -do_pseudo_call(I, LiveOut, Context, FPoff0) -> - #x86_sdesc{exnlab=ExnLab,arity=OrigArity} = hipe_x86:pseudo_call_sdesc(I), - Fun0 = hipe_x86:pseudo_call_fun(I), - Fun1 = conv_opnd(Fun0, FPoff0, Context), - LiveTemps = [Temp || Temp <- LiveOut, temp_is_pseudo(Temp)], - SDesc = mk_sdesc(ExnLab, Context, LiveTemps), - ContLab = hipe_x86:pseudo_call_contlab(I), - Linkage = hipe_x86:pseudo_call_linkage(I), - CallCode = [hipe_x86:mk_pseudo_call(Fun1, SDesc, ContLab, Linkage)], - %% +word_size() for our RA and +word_size() for callee's RA should - %% it need to call inc_stack - StkArity = erlang:max(0, OrigArity - ?HIPE_X86_REGISTERS:nr_args()), - context_need_stack(Context, stack_need(FPoff0 + 2*word_size(), StkArity, Fun1)), - ArgsBytes = word_size() * StkArity, - {CallCode, FPoff0 - ArgsBytes}. - -stack_need(FPoff, StkArity, Fun) -> - case Fun of - #x86_prim{} -> FPoff; - #x86_mfa{m=M,f=F,a=A} -> - case erlang:is_builtin(M, F, A) of - true -> FPoff; - false -> stack_need_general(FPoff, StkArity) - end; - #x86_temp{} -> stack_need_general(FPoff, StkArity); - #x86_mem{} -> stack_need_general(FPoff, StkArity) - end. - -stack_need_general(FPoff, StkArity) -> - erlang:max(FPoff, FPoff + (?LEAF_WORDS - 2 - StkArity) * word_size()). - -%%% -%%% Create stack descriptors for call sites. -%%% - -mk_sdesc(ExnLab, Context, Temps) -> % for normal calls - Temps0 = only_tagged(Temps), - Live = mk_live(Context, Temps0), - Arity = context_arity(Context), - FSize = context_framesize(Context), - hipe_x86:mk_sdesc(ExnLab, FSize div word_size(), Arity, - list_to_tuple(Live)). - -only_tagged(Temps)-> - [X || X <- Temps, hipe_x86:temp_type(X) =:= 'tagged']. - -mk_live(Context, Temps) -> - lists:sort([temp_to_slot(Context, Temp) || Temp <- Temps]). - -temp_to_slot(Context, Temp) -> - (context_framesize(Context) + context_offset(Context, Temp)) - div word_size(). - -mk_minimal_sdesc(Context) -> % for inc_stack_0 calls - hipe_x86:mk_sdesc([], 0, context_arity(Context), {}). - -%%% -%%% Tailcalls. -%%% - -do_pseudo_tailcall(I, Context) -> % always at FPoff=context_framesize(Context) - Arity = context_arity(Context), - Args = hipe_x86:pseudo_tailcall_stkargs(I) ++ [context_ra(Context)], - Fun0 = hipe_x86:pseudo_tailcall_fun(I), - {Insns, FPoff1, Fun1} = do_tailcall_args(Args, Context, Fun0), - context_need_stack(Context, FPoff1), - FPoff2 = FPoff1 + word_size()+word_size()*Arity - word_size()*length(Args), - %% +word_size() for callee's inc_stack RA - StkArity = length(hipe_x86:pseudo_tailcall_stkargs(I)), - context_need_stack(Context, stack_need(FPoff2 + word_size(), StkArity, Fun1)), - I2 = hipe_x86:mk_jmp_fun(Fun1, hipe_x86:pseudo_tailcall_linkage(I)), - Insns ++ adjust_sp(FPoff2, [I2]). - -do_tailcall_args(Args, Context, Fun0) -> - FPoff0 = context_framesize(Context), - Arity = context_arity(Context), - FrameTop = word_size() + word_size()*Arity, - DangerOff = FrameTop - word_size()*length(Args), - Moves = mk_moves(Args, FrameTop, []), - {Stores, Simple, Conflict} = - split_moves(Moves, Context, DangerOff, [], [], []), - %% sanity check (shouldn't trigger any more) - if DangerOff < -FPoff0 -> - exit({?MODULE,do_tailcall_args,DangerOff,-FPoff0}); - true -> [] - end, - FPoff1 = FPoff0, - %% - {Pushes, MoreSimple, FPoff2} = split_conflict(Conflict, FPoff1, [], []), - %% - {PushFun0, FPoff3, LoadFun1, Fun1} = - case opnd_is_pseudo(Fun0) of - false -> - {[], FPoff2, [], Fun0}; - true -> - Type = hipe_x86:temp_type(Fun0), - Temp1 = mk_temp1(Type), - Fun0Off = context_offset(Context, Fun0), - MEM0 = conv_pseudo(Fun0, FPoff2 + Fun0Off), - if Fun0Off >= DangerOff -> - Fun1Off = hipe_x86:mk_imm(0), - MEM1 = hipe_x86:mk_mem(mk_sp(), Fun1Off, Type), - {[hipe_x86:mk_push(MEM0)], - FPoff2 + word_size(), - [hipe_x86:mk_move(MEM1, Temp1)], - Temp1}; - true -> - {[], FPoff2, [hipe_x86:mk_move(MEM0, Temp1)], Temp1} - end - end, - %% - RegTemp0 = ?HIPE_X86_REGISTERS:temp0(), - TempReg = - case hipe_x86:is_temp(Fun1) of - true -> - RegFun1 = hipe_x86:temp_reg(Fun1), - if RegFun1 =/= RegTemp0 -> RegTemp0; - true -> ?HIPE_X86_REGISTERS:temp1() - end; - false -> - RegTemp0 - end, - %% - {Pushes ++ PushFun0 ++ - store_moves(Stores, FPoff3, LoadFun1 ++ - simple_moves(Simple, FPoff3, TempReg, - simple_moves(MoreSimple, FPoff3, TempReg, - []))), - FPoff3, Fun1}. - -mk_moves([Arg|Args], Off, Moves) -> - Off1 = Off - word_size(), - mk_moves(Args, Off1, [{Arg,Off1}|Moves]); -mk_moves([], _, Moves) -> - Moves. - -split_moves([Move|Moves], Context, DangerOff, Stores, Simple, Conflict) -> - {Src,DstOff} = Move, - case src_is_pseudo(Src) of - false -> - split_moves(Moves, Context, DangerOff, [Move|Stores], - Simple, Conflict); - true -> - SrcOff = context_offset(Context, Src), - Type = typeof_src(Src), - if SrcOff =:= DstOff -> - split_moves(Moves, Context, DangerOff, Stores, - Simple, Conflict); - SrcOff >= DangerOff -> - split_moves(Moves, Context, DangerOff, Stores, - Simple, [{SrcOff,DstOff,Type}|Conflict]); - true -> - split_moves(Moves, Context, DangerOff, Stores, - [{SrcOff,DstOff,Type}|Simple], Conflict) - end - end; -split_moves([], _, _, Stores, Simple, Conflict) -> - {Stores, Simple, Conflict}. - -split_conflict([{SrcOff,DstOff,Type}|Conflict], FPoff, Pushes, Simple) -> - Push = hipe_x86:mk_push( - hipe_x86:mk_mem(mk_sp(), hipe_x86:mk_imm(FPoff+SrcOff), Type)), - split_conflict(Conflict, FPoff+word_size(), [Push|Pushes], - [{-(FPoff+word_size()),DstOff,Type}|Simple]); -split_conflict([], FPoff, Pushes, Simple) -> - {lists:reverse(Pushes), Simple, FPoff}. - -simple_moves([{SrcOff,DstOff,Type}|Moves], FPoff, TempReg, Rest) -> - Temp = hipe_x86:mk_temp(TempReg, Type), - SP = mk_sp(), - LoadOff = hipe_x86:mk_imm(FPoff+SrcOff), - LD = hipe_x86:mk_move(hipe_x86:mk_mem(SP, LoadOff, Type), Temp), - StoreOff = hipe_x86:mk_imm(FPoff+DstOff), - ST = hipe_x86:mk_move(Temp, hipe_x86:mk_mem(SP, StoreOff, Type)), - simple_moves(Moves, FPoff, TempReg, [LD, ST | Rest]); -simple_moves([], _, _, Rest) -> - Rest. - -store_moves([{Src,DstOff}|Moves], FPoff, Rest) -> - Type = typeof_src(Src), - SP = mk_sp(), - StoreOff = hipe_x86:mk_imm(FPoff+DstOff), - ST = hipe_x86:mk_move(Src, hipe_x86:mk_mem(SP, StoreOff, Type)), - store_moves(Moves, FPoff, [ST | Rest]); -store_moves([], _, Rest) -> - Rest. - -%%% -%%% Contexts -%%% - --record(context, {liveness, framesize, arity, map, ra, ref_maxstack}). - -mk_context(Liveness, Formals, Temps) -> - RA = hipe_x86:mk_new_temp('untagged'), - {Map, MinOff} = mk_temp_map(Formals, RA, Temps), - FrameSize = (-MinOff), - RefMaxStack = hipe_bifs:ref(FrameSize), - Context = #context{liveness=Liveness, - framesize=FrameSize, arity=length(Formals), - map=Map, ra=RA, ref_maxstack=RefMaxStack}, - Context. - -context_need_stack(#context{ref_maxstack=RM}, N) -> - M = hipe_bifs:ref_get(RM), - if N > M -> hipe_bifs:ref_set(RM, N); - true -> [] - end. - -context_maxstack(#context{ref_maxstack=RM}) -> - hipe_bifs:ref_get(RM). - -context_arity(#context{arity=Arity}) -> - Arity. - -context_framesize(#context{framesize=FrameSize}) -> - FrameSize. - -context_liveness(#context{liveness=Liveness}) -> - Liveness. - -context_offset(#context{map=Map}, Temp) -> - tmap_lookup(Map, Temp). - -context_ra(#context{ra=RA}) -> - RA. - -mk_temp_map(Formals, RA, Temps) -> - {Map, _} = enter_vars(Formals, word_size() * (length(Formals)+1), - tmap_bind(tmap_empty(), RA, 0)), - enter_vars(tset_to_list(Temps), 0, Map). - -enter_vars([V|Vs], PrevOff, Map) -> - Off = - case hipe_x86:temp_type(V) of - 'double' -> PrevOff - float_size(); - _ -> PrevOff - word_size() - end, - enter_vars(Vs, Off, tmap_bind(Map, V, Off)); -enter_vars([], Off, Map) -> - {Map, Off}. - -tmap_empty() -> - gb_trees:empty(). - -tmap_bind(Map, Key, Val) -> - gb_trees:insert(Key, Val, Map). - -tmap_lookup(Map, Key) -> - gb_trees:get(Key, Map). - -%%% -%%% do_prologue: prepend stack frame allocation code. -%%% -%%% NewStart: -%%% temp0 = sp - MaxStack -%%% if( temp0 < SP_LIMIT(P) ) goto IncStack else goto AllocFrame -%%% AllocFrame: -%%% sp -= FrameSize -%%% goto OldStart -%%% OldStart: -%%% ... -%%% IncStack: -%%% call inc_stack -%%% goto NewStart - -do_prologue(CFG, Context) -> - do_check_stack(do_alloc_frame(CFG, Context), Context). - -do_alloc_frame(CFG, Context) -> - case context_framesize(Context) of - 0 -> - CFG; - FrameSize -> - OldStartLab = hipe_x86_cfg:start_label(CFG), - AllocFrameLab = hipe_gensym:get_next_label(x86), - SP = mk_sp(), - AllocFrameCode = - [hipe_x86:mk_alu('sub', hipe_x86:mk_imm(FrameSize), SP), - hipe_x86:mk_jmp_label(OldStartLab)], - CFG1 = hipe_x86_cfg:bb_add(CFG, AllocFrameLab, - hipe_bb:mk_bb(AllocFrameCode)), - hipe_x86_cfg:start_label_update(CFG1, AllocFrameLab) - end. - -do_check_stack(CFG, Context) -> - MaxStack = context_maxstack(Context), - Arity = context_arity(Context), - Guaranteed = erlang:max(0, (?LEAF_WORDS - 1 - Arity) * word_size()), - if MaxStack =< Guaranteed -> - %% io:format("~w: MaxStack ~w =< Guaranteed ~w :-)\n", [?MODULE,MaxStack,Guaranteed]), - CFG; - true -> - %% io:format("~w: MaxStack ~w > Guaranteed ~w :-(\n", [?MODULE,MaxStack,Guaranteed]), - AllocFrameLab = hipe_x86_cfg:start_label(CFG), - NewStartLab = hipe_gensym:get_next_label(x86), - IncStackLab = hipe_gensym:get_next_label(x86), - %% - Type = 'untagged', - Preg = ?HIPE_X86_REGISTERS:proc_pointer(), - Pbase = hipe_x86:mk_temp(Preg, Type), - SP_LIMIT_OFF = hipe_x86:mk_imm( - ?HIPE_X86_REGISTERS:sp_limit_offset()), - Temp0 = mk_temp0(Type), - SP = mk_sp(), - NewStartCode = - %% hopefully this lea is faster than the mov;sub it replaced - [hipe_x86:mk_lea( - hipe_x86:mk_mem(SP, hipe_x86:mk_imm(-MaxStack), 'untagged'), - Temp0), - hipe_x86:mk_cmp( - hipe_x86:mk_mem(Pbase, SP_LIMIT_OFF, Type), Temp0), - hipe_x86:mk_pseudo_jcc('b', IncStackLab, AllocFrameLab, 0.01)], - IncStackCode = - [hipe_x86:mk_call(hipe_x86:mk_prim('inc_stack_0'), - mk_minimal_sdesc(Context), not_remote), - hipe_x86:mk_jmp_label(NewStartLab)], - %% - CFG1 = hipe_x86_cfg:bb_add(CFG, NewStartLab, - hipe_bb:mk_bb(NewStartCode)), - CFG2 = hipe_x86_cfg:bb_add(CFG1, IncStackLab, - hipe_bb:mk_bb(IncStackCode)), - hipe_x86_cfg:start_label_update(CFG2, NewStartLab) - end. - -%%% typeof_src -- what's src's type? - -typeof_src(Src) -> - case Src of - #x86_imm{} -> - 'untagged'; - #x86_temp{} -> - hipe_x86:temp_type(Src); - #x86_mem{} -> - hipe_x86:mem_type(Src) - end. - -%%% Cons up an '%sp' Temp. - -mk_sp() -> - hipe_x86:mk_temp(?HIPE_X86_REGISTERS:sp(), 'untagged'). - -%%% Cons up a '%temp0' Temp. - -mk_temp0(Type) -> - hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp0(), Type). - -%%% Cons up a '%temp1' Temp. - -mk_temp1(Type) -> - hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp1(), Type). - -%%% Check if an operand is a pseudo-Temp. - -src_is_pseudo(Src) -> - opnd_is_pseudo(Src). - -opnd_is_pseudo(Opnd) -> - case hipe_x86:is_temp(Opnd) of - true -> temp_is_pseudo(Opnd); - false -> false - end. - -temp_is_pseudo(Temp) -> - case hipe_x86:is_temp(Temp) of - true -> - not(?HIPE_X86_REGISTERS:is_precoloured(hipe_x86:temp_reg(Temp))); - false -> - false - end. - - -%%% -%%% Build the set of all temps used in a Defun's body. -%%% - -all_temps(CFG, Formals) -> - S0 = fold_insns(fun find_temps/2, tset_empty(), CFG), - S1 = tset_del_list(S0, Formals), - S2 = tset_filter(S1, fun(T) -> temp_is_pseudo(T) end), - S2. - -find_temps(I, S0) -> - S1 = tset_add_list(S0, hipe_x86_defuse:insn_def(I)), - tset_add_list(S1, hipe_x86_defuse:insn_use(I)). - -fold_insns(Fun, InitAcc, CFG) -> - hipe_x86_cfg:fold_bbs( - fun(_, BB, Acc0) -> lists:foldl(Fun, Acc0, hipe_bb:code(BB)) end, - InitAcc, CFG). - --compile({inline, [tset_empty/0, tset_size/1, tset_insert/2, - tset_filter/2, tset_to_list/1]}). - -tset_empty() -> - #{}. - -tset_size(S) -> - map_size(S). - -tset_insert(S, T) -> - S#{T => []}. - -tset_add_list(S, []) -> S; -tset_add_list(S, [T|Ts]) -> - tset_add_list(S#{T => []}, Ts). - -tset_del_list(S, []) -> S; -tset_del_list(S, [T|Ts]) -> - tset_del_list(maps:remove(T,S), Ts). - -tset_filter(S, F) -> - maps:filter(fun(K, _V) -> F(K) end, S). - -tset_to_list(S) -> - maps:keys(S). - -%%% -%%% Compute minimum permissible frame size, ignoring spilled temps. -%%% This is done to ensure that we won't have to adjust the frame size -%%% in the middle of a tailcall. -%%% - -defun_minframe(CFG) -> - MaxTailArity = fold_insns(fun insn_mta/2, 0, CFG), - MyArity = length(fix_formals(hipe_x86_cfg:params(CFG))), - erlang:max(MaxTailArity - MyArity, 0). - -insn_mta(I, MTA) -> - case I of - #pseudo_tailcall{arity=Arity} -> - erlang:max(MTA, Arity - ?HIPE_X86_REGISTERS:nr_args()); - _ -> MTA - end. - -%%% -%%% Ensure that we have enough temps to satisfy the minimum frame size, -%%% if necessary by prepending unused dummy temps. -%%% - -ensure_minframe(MinFrame, Temps) -> - ensure_minframe(MinFrame, tset_size(Temps), Temps). - -ensure_minframe(MinFrame, Frame, Temps) -> - if MinFrame > Frame -> - Temp = hipe_x86:mk_new_temp('untagged'), - ensure_minframe(MinFrame, Frame+1, tset_insert(Temps, Temp)); - true -> Temps - end. - -word_size() -> - ?HIPE_X86_REGISTERS:wordsize(). - -float_size() -> - ?HIPE_X86_REGISTERS:float_size(). diff --git a/lib/hipe/x86/hipe_x86_liveness.erl b/lib/hipe/x86/hipe_x86_liveness.erl deleted file mode 100644 index 470501b46d..0000000000 --- a/lib/hipe/x86/hipe_x86_liveness.erl +++ /dev/null @@ -1,52 +0,0 @@ -%%% -*- erlang-indent-level: 2 -*- -%%% -%%% Licensed under the Apache License, Version 2.0 (the "License"); -%%% you may not use this file except in compliance with the License. -%%% You may obtain a copy of the License at -%%% -%%% http://www.apache.org/licenses/LICENSE-2.0 -%%% -%%% Unless required by applicable law or agreed to in writing, software -%%% distributed under the License is distributed on an "AS IS" BASIS, -%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%%% See the License for the specific language governing permissions and -%%% limitations under the License. -%%% -%%% x86_liveness -- compute register liveness for x86 CFGs - --ifdef(HIPE_AMD64). --define(HIPE_X86_LIVENESS, hipe_amd64_liveness). --define(HIPE_X86_DEFUSE, hipe_amd64_defuse). --define(HIPE_X86_REGISTERS, hipe_amd64_registers). --else. --define(HIPE_X86_LIVENESS, hipe_x86_liveness). --define(HIPE_X86_DEFUSE, hipe_x86_defuse). --define(HIPE_X86_REGISTERS, hipe_x86_registers). --endif. - --module(?HIPE_X86_LIVENESS). - --export([analyse/1]). --export([liveout/2]). --export([uses/1, defines/1]). % used in hipe_*_spill_restore modules - --include("../x86/hipe_x86.hrl"). % ../x86/ is needed when included in amd64 --include("../flow/liveness.inc"). - -analyse(CFG) -> analyze(CFG). -cfg_bb(CFG, L) -> hipe_x86_cfg:bb(CFG, L). -cfg_postorder(CFG) -> hipe_x86_cfg:postorder(CFG). -cfg_succ(CFG, L) -> hipe_x86_cfg:succ(CFG, L). -uses(Insn) -> ?HIPE_X86_DEFUSE:insn_use(Insn). -defines(Insn) -> ?HIPE_X86_DEFUSE:insn_def(Insn). -liveout_no_succ() -> - ordsets:from_list(lists:map(fun({Reg,Type}) -> - hipe_x86:mk_temp(Reg, Type) - end, - ?HIPE_X86_REGISTERS:live_at_return())). - --ifdef(DEBUG_LIVENESS). -cfg_labels(CFG) -> hipe_x86_cfg:labels(CFG). -cfg_bb_add(CFG,L,NewBB) -> hipe_x86_cfg:bb_add(CFG,L,NewBB). -mk_comment(Text) -> hipe_x86:mk_comment(Text). --endif. diff --git a/lib/hipe/x86/hipe_x86_main.erl b/lib/hipe/x86/hipe_x86_main.erl deleted file mode 100644 index 7e9fd10e62..0000000000 --- a/lib/hipe/x86/hipe_x86_main.erl +++ /dev/null @@ -1,68 +0,0 @@ -%% -*- erlang-indent-level: 2 -*- -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. - --ifdef(HIPE_AMD64). --define(HIPE_X86_MAIN, hipe_amd64_main). --define(RTL_TO_X86, rtl_to_amd64). % XXX: kill this crap --define(HIPE_RTL_TO_X86, hipe_rtl_to_amd64). --define(HIPE_X86_RA, hipe_amd64_ra). --define(HIPE_X86_FRAME, hipe_amd64_frame). --define(HIPE_X86_PP, hipe_amd64_pp). --define(X86TAG, amd64). % XXX: kill this crap --define(X86STR, "amd64"). --define(HIPE_X86_SPILL_RESTORE, hipe_amd64_spill_restore). --else. --define(HIPE_X86_MAIN, hipe_x86_main). --define(RTL_TO_X86, rtl_to_x86). % XXX: kill this crap --define(HIPE_RTL_TO_X86, hipe_rtl_to_x86). --define(HIPE_X86_RA, hipe_x86_ra). --define(HIPE_X86_FRAME, hipe_x86_frame). --define(HIPE_X86_PP, hipe_x86_pp). --define(X86TAG, x86). % XXX: kill this crap --define(X86STR, "x86"). --define(HIPE_X86_SPILL_RESTORE, hipe_x86_spill_restore). --endif. - --module(?HIPE_X86_MAIN). --export([?RTL_TO_X86/3]). % XXX: change to 'from_rtl' to avoid $ARCH substring - --ifndef(DEBUG). --define(DEBUG,1). --endif. --define(HIPE_INSTRUMENT_COMPILER, true). %% Turn on instrumentation. --include("../main/hipe.hrl"). - -?RTL_TO_X86(MFA, RTL, Options) -> - Translated = ?option_time(?HIPE_RTL_TO_X86:translate(RTL), - "RTL-to-"?X86STR, Options), - TransCFG = ?option_time(hipe_x86_cfg:init(Translated), - ?X86STR" to cfg", Options), - SpillRestCFG = - case proplists:get_bool(caller_save_spill_restore, Options) of - true -> - ?option_time(?HIPE_X86_SPILL_RESTORE:spill_restore(TransCFG, Options), - ?X86STR" spill restore", Options); - false -> - TransCFG - end, - AllocatedCFG = ?option_time(?HIPE_X86_RA:ra(SpillRestCFG, Options), - ?X86STR" register allocation", Options), - FramedCFG = ?option_time(?HIPE_X86_FRAME:frame(AllocatedCFG, Options), - ?X86STR" frame", Options), - Framed = ?option_time(hipe_x86_cfg:linearise(FramedCFG), - ?X86STR" linearise", Options), - Finalised = ?option_time(hipe_x86_postpass:postpass(Framed, Options), - ?X86STR" finalise", Options), - ?HIPE_X86_PP:optional_pp(Finalised, MFA, Options), - {native, ?X86TAG, {unprofiled, Finalised}}. diff --git a/lib/hipe/x86/hipe_x86_postpass.erl b/lib/hipe/x86/hipe_x86_postpass.erl deleted file mode 100644 index 925054dd68..0000000000 --- a/lib/hipe/x86/hipe_x86_postpass.erl +++ /dev/null @@ -1,285 +0,0 @@ -%%% -*- erlang-indent-level: 2 -*- -%%% -%%% Licensed under the Apache License, Version 2.0 (the "License"); -%%% you may not use this file except in compliance with the License. -%%% You may obtain a copy of the License at -%%% -%%% http://www.apache.org/licenses/LICENSE-2.0 -%%% -%%% Unless required by applicable law or agreed to in writing, software -%%% distributed under the License is distributed on an "AS IS" BASIS, -%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%%% See the License for the specific language governing permissions and -%%% limitations under the License. -%%% -%%%---------------------------------------------------------------------- -%%% File : hipe_x86_postpass.erl -%%% Author : Christoffer Vikström <chvi3471@student.uu.se> -%%% Purpose : Contain postpass optimisations for x86-assembler code. -%%% Created : 5 Aug 2003 by Christoffer Vikström <chvi3471@student.uu.se> -%%%---------------------------------------------------------------------- - --ifndef(HIPE_X86_POSTPASS). --define(HIPE_X86_POSTPASS, hipe_x86_postpass). --endif. - --module(?HIPE_X86_POSTPASS). --export([postpass/2]). --include("../x86/hipe_x86.hrl"). - -%%>----------------------------------------------------------------------< -% Procedure : postpass/2 -% Purpose : Function that performs a nr of postpass optimizations on -% the hipe x86-assembler code before it is encoded and loaded. -%%>----------------------------------------------------------------------< -postpass(#defun{code=Code0}=Defun, Options) -> - Code1 = pseudo_insn_expansion(Code0), - Code2 = case proplists:get_bool(peephole, Options) of - true -> peephole_optimization(Code1); - false -> Code1 - end, - Code3 = trivial_goto_elimination(Code2), - Defun#defun{code=Code3}. - - -%%>----------------------------------------------------------------------< -% Procedure : peep/1 -% Purpose : Function that does peephole optimizations. It works by -% moving a window over the code and looking at a sequence of -% a few instructions. Replaces long sequences of instructions -% with shorter ones and removes unnecesary ones. -% Arguments : Insns - List of pseudo x86-assembler records. -% Res - Returned list of pseudo x86-assembler records. -% Kept reversed, until it is returned. -% Return : An optimized list of pseudo x86-assembler records with -% (hopefully) fewer or faster instructions. -%%>----------------------------------------------------------------------< -peephole_optimization(Insns) -> - peep(Insns, [], []). - - -%% MoveSelf related peep-opts -%% ------------------------------ -peep([#fmove{src=Src, dst=Src} | Insns], Res,Lst) -> - peep(Insns, Res, [moveSelf1|Lst]); -peep([I=#fmove{src=Src, dst=Dst}, - #fmove{src=Dst, dst=Src} | Insns], Res,Lst) -> - peep(Insns, [I|Res], [moveSelf2|Lst]); -peep([#movsx{src=Src, dst=Src} | Insns], Res,Lst) -> - peep(Insns, Res, [moveSelf3|Lst]); -peep([I=#movsx{src=Src, dst=Dst}, - #movsx{src=Dst, dst=Src} | Insns], Res,Lst) -> - peep(Insns, [I|Res], [moveSelf4|Lst]); -peep([#movzx{src=Src, dst=Src} | Insns], Res,Lst) -> - peep(Insns, Res, [moveSelf5|Lst]); -peep([I=#movzx{src=Src, dst=Dst}, - #movzx{src=Dst, dst=Src} | Insns], Res,Lst) -> - peep(Insns, [I|Res], [moveSelf6|Lst]); -peep([#cmovcc{src=Src, dst=Src} | Insns], Res,Lst) -> - peep(Insns, Res, [moveSelf7|Lst]); -peep([I=#cmovcc{src=Src, dst=Dst}, - #cmovcc{src=Dst, dst=Src}|Insns], Res,Lst) -> - peep(Insns, [I|Res], [moveSelf8|Lst]); -peep([#move{src=#x86_temp{reg=X}, - dst=#x86_temp{reg=X}} | Insns], Res,Lst) -> - peep(Insns, Res, [moveSelf9|Lst]); -peep([I=#move{src=#x86_temp{reg=Src}, dst=#x86_temp{reg=Dst}}, - #move{src=#x86_temp{reg=Dst}, dst=#x86_temp{reg=Src}} | Insns], Res,Lst) -> - peep(Insns, [I|Res], [moveSelf0|Lst]); - - -%% ElimBinALMDouble -%% ---------------- -peep([Move=#move{src=Src, dst=Dst}, Alu=#alu{src=Src, dst=Dst}|Insns], Res, Lst) - when not is_record(Dst, x86_mem) -> - peep([Alu#alu{src=Dst}|Insns], [Move|Res], [elimBinALMDouble|Lst]); - - -%% ElimFBinDouble -%% -------------- -peep([Move=#fmove{src=Src, dst=Dst}, - BinOp=#fp_binop{src=Src, dst=Dst}|Insns], Res, Lst) -> - peep([BinOp#fp_binop{src=Dst}|Insns], [Move|Res], [elimFBinDouble|Lst]); - - -%% CommuteBinALMD -%% -------------- -peep([#move{src=Src1, dst=Dst}, - #alu{aluop=Op,src=Src2,dst=Dst}|Insns], Res, Lst) - when (Src1 =:= #x86_imm{}) and (Src2 =/= #x86_imm{}) and - ((Op =:= 'add') or (Op =:= 'and') or (Op =:= 'or') or (Op =:= 'xor')) -> - peep(Insns, [#alu{aluop=Op,src=Src1,dst=Dst}, - #move{src=Src2, dst=Dst}|Res], - [commuteBinALMD|Lst]); - - -%% ElimCmp0 -%% -------- -peep([#cmp{src=#x86_imm{value=0}, dst=Dst=#x86_temp{}}|Insns],Res,Lst) -> - %% TEST leaves the adjust flag undefined, whereas CMP sets it properly (in - %% this case to 0). However, since HiPE does not use any instructions that - %% read the adjust flag, we can do this transform safely. - peep(Insns, [#test{src=Dst, dst=Dst} | Res], [elimCmp0_1|Lst]); -peep([#cmp{src=Src=#x86_temp{}, dst=#x86_imm{value=0}}, - J=#jcc{cc=Cond}|Insns],Res,Lst) - when Cond =:= 'e'; Cond =:= 'ne' -> % We're commuting the comparison - peep(Insns, [J, #test{src=Src, dst=Src} | Res], [elimCmp0_2|Lst]); - -%% ElimCmpTest -%% ----------- -peep([I|Insns],Res,Lst) when (I =:= #cmp{}) or (I =:= #test{}) -> - case check(Insns) of - #jcc{} -> - peep(Insns, [I|Res], Lst); - #jmp_fun{} -> - peep(Insns, [I|Res], Lst); - #jmp_label{} -> - peep(Insns, [I|Res], Lst); - #jmp_switch{} -> - peep(Insns, [I|Res], Lst); - #cmovcc{} -> - peep(Insns, [I|Res], Lst); - #ret{} -> - peep(Insns, [I|Res], Lst); - _ -> - peep(Insns, Res, [elimCmpTest|Lst]) - end; - - -%% ElimPushPop -%% ----------- -peep([#push{src=Opr}, #pop{dst=Opr} | Insns], Res, Lst) -> - peep(Insns, Res, [elimPushPop|Lst]); - - -% %% ElimIFF -% %% ------- -peep([#jcc{label=Lab}, I=#label{label=Lab}|Insns], Res, Lst) -> - peep(Insns, [I, #jmp_label{label=Lab}|Res], [elimIFF|Lst]); - - -%% ElimSet0 -%% -------- -peep([#move{src=#x86_imm{value=0},dst=Dst=#x86_temp{}}|Insns],Res,Lst) -> - peep(Insns, [#alu{aluop='xor', src=Dst, dst=Dst}|Res], [elimSet0|Lst]); - -%% ElimMDPow2 -%% ---------- -peep([B = #alu{aluop=Op,src=#x86_imm{value=Val},dst=Dst}|Insns], Res, Lst) -> - {IsLog2, Size, Sign} = log2(Val), - case ((Op =:= imul) or (Op =:= idiv)) and IsLog2 of - true -> - Sh = case Sign of positive -> 'bsl'; negative -> 'bsr' end, - peep(Insns, - [#shift{shiftop=Sh, src=#x86_imm{value=Size}, dst=Dst}|Res], - [elimMDPow2|Lst]); - false -> - peep(Insns, [B|Res], Lst) - end; - -%% LeaToAdd -%% This rule transforms lea into add when the destination is the same as one of -%% the operands. Sound because lea is never used where the condition codes are -%% live (and would be clobbered by add). -%% ---------- -peep([#lea{mem=#x86_mem{base=#x86_temp{reg=DstR},off=Src}, - temp=Dst=#x86_temp{reg=DstR}}|Insns], Res, Lst) -> - peep(Insns, [#alu{aluop='add',src=Src,dst=Dst}|Res], [leaToAdd|Lst]); -peep([#lea{mem=#x86_mem{base=Src,off=#x86_temp{reg=DstR}}, - temp=Dst=#x86_temp{reg=DstR}}|Insns], Res, Lst) -> - peep(Insns, [#alu{aluop='add',src=Src,dst=Dst}|Res], [leaToAdd|Lst]); - -%% SubToDec -%% This rule turns "subl $1,Dst; jl Lab" into "decl Dst; jl Lab", which -%% changes reduction counter tests to use decl instead of subl. -%% However, on Athlon64 this leads to a small but measurable decrease -%% in performance. The use of dec is also not recommended on P4, so -%% this transformation is disabled. -%% peep([#alu{aluop='sub',src=#x86_imm{value=1},dst=Dst},J=#jcc{cc='l'}|Insns], Res, Lst) -> -%% peep(Insns, [J, #dec{dst=Dst} | Res], [subToDec|Lst]); - -%% Standard list recursion clause -%% ------------------------------ -peep([I | Insns], Res, Lst) -> - peep(Insns, [I|Res], Lst); -peep([], Res, _Lst) -> - lists:reverse(Res). - -%% Simple goto elimination -%% ----------------------- -trivial_goto_elimination(Insns) -> goto_elim(Insns, []). - -goto_elim([#jmp_label{label=Label}, I = #label{label=Label}|Insns], Res) -> - goto_elim([I|Insns], Res); -goto_elim([#jcc{cc=CC, label=Label} = IJCC, - #jmp_label{label=BranchTgt}, - #label{label=Label} = ILBL|Insns], Res) -> - goto_elim([IJCC#jcc{cc=hipe_x86:neg_cc(CC), label=BranchTgt}, - ILBL|Insns], Res); -goto_elim([I | Insns], Res) -> - goto_elim(Insns, [I|Res]); -goto_elim([], Res) -> - lists:reverse(Res). - - -%%>----------------------------------------------------------------------< -%% Procedure : expand/1 -%% Purpose : Expands pseudo instructions. -%% Arguments : Insns - An x86-instruction list. -%% Return : An expanded instruction list. -%% Notes : -%%>----------------------------------------------------------------------< -pseudo_insn_expansion(Insns) -> expand(Insns, []). -expand([I|Tail], Res) -> - case I of - #pseudo_jcc{cc=Cc,true_label=TrueLab,false_label=FalseLab} -> - expand(Tail, [hipe_x86:mk_jmp_label(FalseLab), - hipe_x86:mk_jcc(Cc, TrueLab) | Res]); - #pseudo_tailcall_prepare{} -> - expand(Tail, Res); - #pseudo_call{'fun'=Fun,sdesc=SDesc,contlab=ContLab,linkage=Linkage} -> - expand(Tail, [hipe_x86:mk_jmp_label(ContLab), - hipe_x86:mk_call(Fun, SDesc, Linkage) | Res]); - _ -> - expand(Tail, [I|Res]) - end; -expand([], Res) -> lists:reverse(Res). - -%% Log2 function -%% ------------- -%% Used by ElimMDPow2 clause of peep(..) -log2(Nr) -> log2(Nr, 0). -log2(0, _) -> {false, 0, positive}; -log2(Nr, I) -> - case (Nr band 1) =:= 1 of - true -> - case Nr of - 1 -> - {true, I, positive}; - -1 -> - {true, I, negative}; - _ -> - {false, 0, positive} - end; - false -> - log2((Nr bsr 1), I+1) - end. - -%% Skips through all comments and move instructions and returns the next one -%% ------------------------------------------------------------------------- -%% Used by ElimCmpTest above. -check([I|Ins]) -> - case I of - #comment{} -> - check(Ins); - #move{} -> - check(Ins); - #fmove{} -> - check(Ins); - #movsx{} -> - check(Ins); - #movzx{} -> - check(Ins); - OtherI -> - OtherI - end. diff --git a/lib/hipe/x86/hipe_x86_pp.erl b/lib/hipe/x86/hipe_x86_pp.erl deleted file mode 100644 index 72d2fa80bf..0000000000 --- a/lib/hipe/x86/hipe_x86_pp.erl +++ /dev/null @@ -1,351 +0,0 @@ -%%% -*- erlang-indent-level: 2 -*- -%%% -%%% Licensed under the Apache License, Version 2.0 (the "License"); -%%% you may not use this file except in compliance with the License. -%%% You may obtain a copy of the License at -%%% -%%% http://www.apache.org/licenses/LICENSE-2.0 -%%% -%%% Unless required by applicable law or agreed to in writing, software -%%% distributed under the License is distributed on an "AS IS" BASIS, -%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%%% See the License for the specific language governing permissions and -%%% limitations under the License. -%%% -%%% x86 pretty-printer - --ifdef(HIPE_AMD64). --define(HIPE_X86_PP, hipe_amd64_pp). --define(HIPE_X86_REGISTERS, hipe_amd64_registers). --else. --define(HIPE_X86_PP, hipe_x86_pp). --define(HIPE_X86_REGISTERS, hipe_x86_registers). --endif. - --module(?HIPE_X86_PP). --export([% pp/1, pp/2, - pp_insn/1, optional_pp/3]). --include("../x86/hipe_x86.hrl"). - -optional_pp(Defun, MFA, Options) -> - case proplists:get_value(pp_native, Options) of - true -> - pp(Defun); - {only,Lst} when is_list(Lst) -> - case lists:member(MFA, Lst) of - true -> pp(Defun); - false -> ok - end; - {only,MFA} -> - pp(Defun); - {file,FileName} -> - {ok, File} = file:open(FileName, [write,append]), - pp(File, Defun), - ok = file:close(File); - _ -> - ok - end. - -pp(Defun) -> - pp(standard_io, Defun). - -pp(Dev, #defun{mfa={M,F,A}, code=Code, data=Data}) -> - Fname = atom_to_list(M)++"_"++atom_to_list(F)++"_"++integer_to_list(A), - io:format(Dev, "\t.text\n", []), - io:format(Dev, "\t.align 4\n", []), - io:format(Dev, "\t.global ~s\n", [Fname]), - io:format(Dev, "~s:\n", [Fname]), - pp_insns(Dev, Code, Fname), - io:format(Dev, "\t.rodata\n", []), - io:format(Dev, "\t.align 4\n", []), - hipe_data_pp:pp(Dev, Data, x86, Fname), - io:format(Dev, "\n", []). - -pp_insns(Dev, [I|Is], Fname) -> - pp_insn(Dev, I, Fname), - pp_insns(Dev, Is, Fname); -pp_insns(_, [], _) -> - ok. - -pp_insn(I) -> - pp_insn(standard_io, I, ""). - -pp_insn(Dev, I, Pre) -> - case I of - #alu{aluop=AluOp, src=Src, dst=Dst} -> - io:format(Dev, "\t~s ", [alu_op_name(AluOp)]), - pp_src(Dev, Src), - io:format(Dev, ", ", []), - pp_dst(Dev, Dst), - io:format(Dev, "\n", []); - #call{'fun'=Fun, sdesc=SDesc, linkage=Linkage} -> - io:format(Dev, "\tcall ", []), - pp_fun(Dev, Fun), - io:format(Dev, " #", []), - pp_sdesc(Dev, Pre, SDesc), - io:format(Dev, " ~w\n", [Linkage]); - #cmovcc{cc=Cc, src=Src, dst=Dst} -> - io:format(Dev, "\tcmov~s ", [cc_name(Cc)]), - pp_src(Dev, Src), - io:format(Dev, ", ", []), - pp_dst(Dev, Dst), - io:format(Dev, "\n", []); - #cmp{src=Src, dst=Dst} -> - io:format(Dev, "\tcmp ", []), - pp_src(Dev, Src), - io:format(Dev, ", ", []), - pp_dst(Dev, Dst), - io:format(Dev, "\n", []); - #comment{term=Term} -> - io:format(Dev, "\t# ~p\n", [Term]); - #imul{imm_opt=ImmOpt, src=Src, temp=Temp} -> - io:format(Dev, "\timul ", []), - case ImmOpt of - [] -> ok; - Imm -> - pp_imm(Dev, Imm, true), - io:format(Dev, ", ", []) - end, - pp_src(Dev, Src), - io:format(Dev, ", ", []), - pp_temp(Dev, Temp), - io:format(Dev, "\n", []); - #jcc{cc=Cc, label=Label} -> - io:format(Dev, "\tj~s .~s_~w\n", [cc_name(Cc), Pre, Label]); - #jmp_fun{'fun'=Fun, linkage=Linkage} -> - io:format(Dev, "\tjmp ", []), - pp_fun(Dev, Fun), - io:format(Dev, " ~w\n", [Linkage]); - #jmp_label{label=Label} -> - io:format(Dev, "\tjmp .~s_~w\n", [Pre, Label]); - #jmp_switch{temp=Temp, jtab=JTab, labels=Labels} -> - io:format(Dev, "\tjmp *{constant,~w}(,", [JTab]), - pp_temp(Dev, Temp), - io:format(Dev, ",4) #", []), - pp_labels(Dev, Labels, Pre), - io:format(Dev, "\n", []); - #label{label=Label} -> - io:format(Dev, ".~s_~w:~n", [Pre, Label]); - #lea{mem=Mem, temp=Temp} -> - io:format(Dev, "\tlea ", []), - pp_mem(Dev, Mem), - io:format(Dev, ", ", []), - pp_temp(Dev, Temp), - io:format(Dev, "\n", []); - #move{src=Src, dst=Dst} -> - io:format(Dev, "\tmov ", []), - pp_src(Dev, Src), - io:format(Dev, ", ", []), - pp_dst(Dev, Dst), - io:format(Dev, "\n", []); - #move64{} -> - pp_move64(Dev, I); - #movsx{src=Src, dst=Dst} -> - io:format(Dev, "\tmovsx ", []), - pp_src(Dev, Src), - io:format(Dev, ", ", []), - pp_dst(Dev, Dst), - io:format(Dev, "\n", []); - #movzx{src=Src, dst=Dst} -> - io:format(Dev, "\tmovzx ", []), - pp_src(Dev, Src), - io:format(Dev, ", ", []), - pp_dst(Dev, Dst), - io:format(Dev, "\n", []); - #pseudo_call{'fun'=Fun, sdesc=SDesc, contlab=ContLab, linkage=Linkage} -> - io:format(Dev, "\tpseudo_call ", []), - pp_fun(Dev, Fun), - io:format(Dev, " # contlab .~s_~w", [Pre, ContLab]), - pp_sdesc(Dev, Pre, SDesc), - io:format(Dev, " ~w\n", [Linkage]); - #pseudo_jcc{cc=Cc, true_label=TrueLab, false_label=FalseLab, pred=Pred} -> - io:format(Dev, "\tpseudo_j~s ", [cc_name(Cc)]), - io:format(Dev, ".~s_~w # .~s_~w ~.2f\n", - [Pre, TrueLab, Pre, FalseLab, Pred]); - #pseudo_tailcall{'fun'=Fun, arity=Arity, stkargs=StkArgs, linkage=Linkage} -> - io:format(Dev, "\tpseudo_tailcall ", []), - pp_fun(Dev, Fun), - io:format(Dev, " ~w (", [Arity]), - pp_args(Dev, StkArgs), - io:format(Dev, ") ~w\n", [Linkage]); - #pseudo_tailcall_prepare{} -> - io:format(Dev, "\tpseudo_tailcall_prepare\n", []); - #push{src=Src} -> - io:format(Dev, "\tpush ", []), - pp_src(Dev, Src), - io:format(Dev, "\n", []); - #ret{npop=NPop} -> - io:format(Dev, "\tret $~s\n", [to_hex(NPop)]); - #shift{shiftop=ShiftOp, src=Src, dst=Dst} -> - io:format(Dev, "\t~s ", [alu_op_name(ShiftOp)]), - pp_src(Dev, Src), - io:format(Dev, ", ", []), - pp_dst(Dev, Dst), - io:format(Dev, "\n", []); - #test{src=Src, dst=Dst} -> - io:format(Dev, "\ttest ", []), - pp_src(Dev, Src), - io:format(Dev, ", ", []), - pp_dst(Dev, Dst), - io:format(Dev, "\n", []); - #fp_binop{src=Src, dst=Dst, op=Op} -> - io:format(Dev, "\t~s ", [Op]), - pp_dst(Dev, Dst), - io:format(Dev, ", ", []), - pp_src(Dev, Src), - io:format(Dev, "\n", []); - #fp_unop{arg=Arg, op=Op} -> - io:format(Dev, "\t~s ", [Op]), - case Arg of - []-> - io:format(Dev, "\n", []); - _ -> - pp_args(Dev, [Arg]), - io:format(Dev, "\n", []) - end; - #fmove{src=Src, dst=Dst} -> - io:format(Dev, "\tfmove ", []), - pp_src(Dev, Src), - io:format(Dev, ", ", []), - pp_dst(Dev, Dst), - io:format(Dev, "\n", []); - _ -> - exit({?MODULE, pp_insn, {"unknown x86 instruction", I}}) - end. - --ifdef(HIPE_AMD64). -pp_move64(Dev, I) -> - #move64{imm=Src, dst=Dst} = I, - io:format(Dev, "\tmov64 ", []), - pp_src(Dev, Src), - io:format(Dev, ", ", []), - pp_dst(Dev, Dst), - io:format(Dev, "\n", []). --else. -pp_move64(_Dev, I) -> exit({?MODULE, I}). --endif. - -to_hex(N) -> - io_lib:format("~.16x", [N, "0x"]). - -pp_sdesc(Dev, Pre, #x86_sdesc{exnlab=ExnLab,fsize=FSize,arity=Arity,live=Live}) -> - pp_sdesc_exnlab(Dev, Pre, ExnLab), - io:format(Dev, " ~s ~w [", [to_hex(FSize), Arity]), - pp_sdesc_live(Dev, Live), - io:format(Dev, "]", []). - -pp_sdesc_exnlab(Dev, _, []) -> io:format(Dev, " []", []); -pp_sdesc_exnlab(Dev, Pre, ExnLab) -> io:format(Dev, " .~s_~w", [Pre, ExnLab]). - -pp_sdesc_live(_, {}) -> ok; -pp_sdesc_live(Dev, Live) -> pp_sdesc_live(Dev, Live, 1). - -pp_sdesc_live(Dev, Live, I) -> - io:format(Dev, "~s", [to_hex(element(I, Live))]), - if I < tuple_size(Live) -> - io:format(Dev, ",", []), - pp_sdesc_live(Dev, Live, I+1); - true -> ok - end. - -pp_labels(Dev, [Label|Labels], Pre) -> - io:format(Dev, " .~s_~w", [Pre, Label]), - pp_labels(Dev, Labels, Pre); -pp_labels(_, [], _) -> - ok. - -pp_fun(Dev, Fun) -> - case Fun of - #x86_mfa{m=M, f=F, a=A} -> - io:format(Dev, "~w:~w/~w", [M, F, A]); - #x86_prim{prim=Prim} -> - io:format(Dev, "~w", [Prim]); - _ -> % temp or mem - io:format(Dev, "*", []), - pp_dst(Dev, Fun) - end. - -alu_op_name(Op) -> Op. - -cc_name(Cc) -> Cc. - -pp_hard_reg(Dev, Reg) -> - io:format(Dev, "~s", [?HIPE_X86_REGISTERS:reg_name(Reg)]). - -type_tag('tagged') -> "t"; -type_tag('untagged') -> "u"; -type_tag('double') -> "d". - -pp_temp(Dev, #x86_temp{reg=Reg, type=Type}) -> - case Type of - double -> - Tag = type_tag(Type), - io:format(Dev, "~s~w", [Tag, Reg]); - _ -> - case ?HIPE_X86_REGISTERS:is_precoloured(Reg) of - true -> - pp_hard_reg(Dev, Reg); - false -> - Tag = type_tag(Type), - io:format(Dev, "~s~w", [Tag, Reg]) - end - end. - -pp_fpreg(Dev, #x86_fpreg{reg=Reg, pseudo=Pseudo})-> - case Pseudo of - true -> io:format(Dev, "pseudo_fp(~w)", [Reg]); - _ -> io:format(Dev, "st(~w)", [Reg]) - end. - -pp_imm(Dev, #x86_imm{value=Value}, Dollar) -> - if Dollar =:= true -> io:format(Dev, [$$], []); - true -> ok - end, - if is_integer(Value) -> io:format(Dev, "~s", [to_hex(Value)]); - true -> io:format(Dev, "~w", [Value]) - end. - -pp_mem(Dev, #x86_mem{base=Base, off=Off}) -> - pp_off(Dev, Off), - case Base of - [] -> - ok; - _ -> - io:format(Dev, "(", []), - pp_temp(Dev, Base), - io:format(Dev, ")", []) - end. - -pp_off(Dev, Off) -> - pp_src(Dev, Off, false). - -pp_src(Dev, Src) -> - pp_src(Dev, Src, true). - -pp_src(Dev, Src, Dollar) -> - case Src of - #x86_temp{} -> - pp_temp(Dev, Src); - #x86_imm{} -> - pp_imm(Dev, Src, Dollar); - #x86_mem{} -> - pp_mem(Dev, Src); - #x86_fpreg{} -> - pp_fpreg(Dev, Src) - end. - -pp_dst(Dev, Dst) -> - pp_src(Dev, Dst). - -pp_args(Dev, [A|As]) -> - pp_src(Dev, A), - pp_comma_args(Dev, As); -pp_args(_, []) -> - ok. - -pp_comma_args(Dev, [A|As]) -> - io:format(Dev, ", ", []), - pp_src(Dev, A), - pp_comma_args(Dev, As); -pp_comma_args(_, []) -> - ok. diff --git a/lib/hipe/x86/hipe_x86_ra.erl b/lib/hipe/x86/hipe_x86_ra.erl deleted file mode 100644 index f358306d49..0000000000 --- a/lib/hipe/x86/hipe_x86_ra.erl +++ /dev/null @@ -1,116 +0,0 @@ -%% -*- erlang-indent-level: 2 -*- -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. - --ifdef(HIPE_AMD64). --define(HIPE_X86_RA, hipe_amd64_ra). --define(HIPE_X86_PP, hipe_amd64_pp). --define(HIPE_X86_RA_LS, hipe_amd64_ra_ls). --define(HIPE_X86_RA_NAIVE, hipe_amd64_ra_naive). --define(HIPE_X86_RA_FINALISE, hipe_amd64_ra_finalise). --define(HIPE_X86_SPECIFIC, hipe_amd64_specific). --else. --define(HIPE_X86_RA, hipe_x86_ra). --define(HIPE_X86_PP, hipe_x86_pp). --define(HIPE_X86_RA_LS, hipe_x86_ra_ls). --define(HIPE_X86_RA_NAIVE, hipe_x86_ra_naive). --define(HIPE_X86_RA_FINALISE, hipe_x86_ra_finalise). --define(HIPE_X86_SPECIFIC, hipe_x86_specific). --endif. - --module(?HIPE_X86_RA). --export([ra/2]). - -%%-define(HIPE_INSTRUMENT_COMPILER, true). %% Turn on instrumentation. --include("../main/hipe.hrl"). - --ifdef(HIPE_INSTRUMENT_COMPILER). -code_size(CFG) -> - hipe_x86_cfg:fold_bbs(fun(_, BB, Acc) -> Acc + length(hipe_bb:code(BB)) end, - 0, CFG). --endif. %% ifdef(HIPE_INSTRUMENT_COMPILER) - -ra(CFG0, Options) -> - %% hipe_x86_cfg:pp(CFG0), - Liveness0 = ?HIPE_X86_SPECIFIC:analyze(CFG0, no_context), - {CFG1, Liveness, Coloring_fp, SpillIndex} = ra_fp(CFG0, Liveness0, Options), - %% hipe_x86_cfg:pp(CFG1), - ?start_ra_instrumentation(Options, - code_size(CFG1), - element(2,hipe_gensym:var_range(x86))), - {CFG2, _, Coloring} - = case proplists:get_value(regalloc, Options, coalescing) of - coalescing -> - ra(CFG1, Liveness, SpillIndex, Options, hipe_coalescing_regalloc); - optimistic -> - ra(CFG1, Liveness, SpillIndex, Options, hipe_optimistic_regalloc); - graph_color -> - ra(CFG1, Liveness, SpillIndex, Options, hipe_graph_coloring_regalloc); - linear_scan -> - ?HIPE_X86_RA_LS:ra(CFG1, Liveness, SpillIndex, Options); - naive -> - ?HIPE_X86_RA_NAIVE:ra(CFG1, Liveness, Coloring_fp, Options); - _ -> - exit({unknown_regalloc_compiler_option, - proplists:get_value(regalloc,Options)}) - end, - ?stop_ra_instrumentation(Options, - code_size(CFG2), - element(2,hipe_gensym:var_range(x86))), - %% hipe_x86_cfg:pp(CFG2), - ?HIPE_X86_RA_FINALISE:finalise(CFG2, Coloring, Coloring_fp, Options). - -ra(CFG, Liveness, SpillIndex, Options, RegAllocMod) -> - hipe_regalloc_loop:ra(CFG, Liveness, SpillIndex, Options, RegAllocMod, - ?HIPE_X86_SPECIFIC, no_context). - --ifdef(HIPE_AMD64). -ra_fp(CFG, Liveness, Options) -> - Regalloc0 = proplists:get_value(regalloc, Options), - {Regalloc, TargetMod} = - case proplists:get_bool(inline_fp, Options) and (Regalloc0 =/= naive) of - false -> {naive, undefined}; - true -> - case proplists:get_bool(x87, Options) of - true -> {linear_scan, hipe_amd64_specific_x87}; - false -> {Regalloc0, hipe_amd64_specific_sse2} - end - end, - case Regalloc of - coalescing -> - ra_fp(CFG, Liveness, Options, hipe_coalescing_regalloc, TargetMod); - optimistic -> - ra_fp(CFG, Liveness, Options, hipe_optimistic_regalloc, TargetMod); - graph_color -> - ra_fp(CFG, Liveness, Options, hipe_graph_coloring_regalloc, TargetMod); - linear_scan -> hipe_amd64_ra_ls:ra_fp(CFG, Liveness, Options, TargetMod, - no_context); - naive -> {CFG,Liveness,[],0}; - _ -> - exit({unknown_regalloc_compiler_option, - proplists:get_value(regalloc,Options)}) - end. - -ra_fp(CFG, Liveness, Options, RegAllocMod, TargetMod) -> - hipe_regalloc_loop:ra_fp(CFG, Liveness, Options, RegAllocMod, TargetMod, - no_context). --else. -ra_fp(CFG, Liveness, Options) -> - case proplists:get_bool(inline_fp, Options) of - true -> - hipe_x86_ra_ls:ra_fp(CFG, Liveness, Options, hipe_x86_specific_x87, - no_context); - false -> - {CFG,Liveness,[],0} - end. --endif. diff --git a/lib/hipe/x86/hipe_x86_ra_finalise.erl b/lib/hipe/x86/hipe_x86_ra_finalise.erl deleted file mode 100644 index e8abe78e00..0000000000 --- a/lib/hipe/x86/hipe_x86_ra_finalise.erl +++ /dev/null @@ -1,335 +0,0 @@ -%%% -*- erlang-indent-level: 2 -*- -%%% -%%% Licensed under the Apache License, Version 2.0 (the "License"); -%%% you may not use this file except in compliance with the License. -%%% You may obtain a copy of the License at -%%% -%%% http://www.apache.org/licenses/LICENSE-2.0 -%%% -%%% Unless required by applicable law or agreed to in writing, software -%%% distributed under the License is distributed on an "AS IS" BASIS, -%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%%% See the License for the specific language governing permissions and -%%% limitations under the License. -%%% -%%% - apply temp -> reg/spill map from RA - --ifdef(HIPE_AMD64). --define(HIPE_X86_RA_FINALISE, hipe_amd64_ra_finalise). --define(HIPE_X86_REGISTERS, hipe_amd64_registers). --define(HIPE_X86_X87, hipe_amd64_x87). --define(HIPE_X86_SSE2, hipe_amd64_sse2). --define(IF_HAS_SSE2(Expr), Expr). --else. --define(HIPE_X86_RA_FINALISE, hipe_x86_ra_finalise). --define(HIPE_X86_REGISTERS, hipe_x86_registers). --define(HIPE_X86_X87, hipe_x86_x87). --define(IF_HAS_SSE2(Expr),). --endif. - --module(?HIPE_X86_RA_FINALISE). --export([finalise/4]). --include("../x86/hipe_x86.hrl"). - -finalise(CFG0, TempMap, FpMap, Options) -> - CFG1 = finalise_ra(CFG0, TempMap, FpMap, Options), - case proplists:get_bool(x87, Options) of - true -> - ?HIPE_X86_X87:map(CFG1); - _ -> - case - proplists:get_bool(inline_fp, Options) - and (proplists:get_value(regalloc, Options) =:= linear_scan) - of - %% Ugly, but required to avoid Dialyzer complaints about "Unknown - %% function" hipe_x86_sse2:map/1 - ?IF_HAS_SSE2(true -> - ?HIPE_X86_SSE2:map(CFG1);) - false -> - CFG1 - end - end. - -%%% -%%% Finalise the temp->reg/spill mapping. -%%% (XXX: maybe this should be merged with the main pass, -%%% but I just want this to work now) -%%% - -finalise_ra(CFG, [], [], _Options) -> - CFG; -finalise_ra(CFG, TempMap, FpMap, Options) -> - {_, SpillLimit} = hipe_gensym:var_range(x86), - Map = mk_ra_map(TempMap, SpillLimit), - FpMap0 = mk_ra_map_fp(FpMap, SpillLimit, Options), - hipe_x86_cfg:map_bbs(fun(_Lbl, BB) -> ra_bb(BB, Map, FpMap0) end, CFG). - -ra_bb(BB, Map, FpMap) -> - hipe_bb:code_update(BB, ra_code(hipe_bb:code(BB), Map, FpMap)). - -ra_code(Code, Map, FpMap) -> - [ra_insn(I, Map, FpMap) || I <- Code]. - -ra_insn(I, Map, FpMap) -> - case I of - #alu{src=Src0,dst=Dst0} -> - Src = ra_opnd(Src0, Map), - Dst = ra_opnd(Dst0, Map), - I#alu{src=Src,dst=Dst}; - #call{} -> - I; - #cmovcc{src=Src0,dst=Dst0} -> - Src = ra_opnd(Src0, Map), - Dst = ra_opnd(Dst0, Map), - I#cmovcc{src=Src,dst=Dst}; - #cmp{src=Src0,dst=Dst0} -> - Src = ra_opnd(Src0, Map), - Dst = ra_opnd(Dst0, Map), - I#cmp{src=Src,dst=Dst}; - #comment{} -> - I; - #fmove{src=Src0,dst=Dst0} -> - Src = ra_opnd(Src0, Map, FpMap), - Dst = ra_opnd(Dst0, Map, FpMap), - I#fmove{src=Src,dst=Dst}; - #fp_unop{arg=Arg0} -> - Arg = ra_opnd(Arg0, Map, FpMap), - I#fp_unop{arg=Arg}; - #fp_binop{src=Src0,dst=Dst0} -> - Src = ra_opnd(Src0, Map, FpMap), - Dst = ra_opnd(Dst0, Map, FpMap), - I#fp_binop{src=Src,dst=Dst}; - #imul{src=Src0,temp=Temp0} -> - Src = ra_opnd(Src0, Map), - Temp = ra_temp(Temp0, Map), - I#imul{src=Src,temp=Temp}; - #jcc{} -> - I; - #jmp_fun{'fun'=Fun0} -> - Fun = ra_opnd(Fun0, Map), - I#jmp_fun{'fun'=Fun}; - #jmp_label{} -> - I; - #jmp_switch{temp=Temp0,jtab=JTab0} -> - Temp = ra_opnd(Temp0, Map), - JTab = ra_opnd(JTab0, Map), - I#jmp_switch{temp=Temp,jtab=JTab}; - #label{} -> - I; - #lea{mem=Mem0,temp=Temp0} -> - Mem = ra_mem(Mem0, Map), - Temp = ra_temp(Temp0, Map), - I#lea{mem=Mem,temp=Temp}; - #move{src=Src0,dst=Dst0} -> - Src = ra_opnd(Src0, Map), - Dst = ra_opnd(Dst0, Map), - I#move{src=Src,dst=Dst}; - #move64{dst=Dst0} -> - Dst = ra_opnd(Dst0, Map), - I#move64{dst=Dst}; - #movsx{src=Src0,dst=Dst0} -> - Src = ra_opnd(Src0, Map), - Dst = ra_opnd(Dst0, Map), - I#movsx{src=Src,dst=Dst}; - #movzx{src=Src0,dst=Dst0} -> - Src = ra_opnd(Src0, Map), - Dst = ra_opnd(Dst0, Map), - I#movzx{src=Src,dst=Dst}; - #pseudo_call{'fun'=Fun0} -> - Fun = ra_opnd(Fun0, Map), - I#pseudo_call{'fun'=Fun}; - #pseudo_jcc{} -> - I; - #pseudo_spill_fmove{src=Src0, temp=Temp0, dst=Dst0} -> - Src = ra_opnd(Src0, Map, FpMap), - Temp = ra_opnd(Temp0, Map, FpMap), - Dst = ra_opnd(Dst0, Map, FpMap), - I#pseudo_spill_fmove{src=Src, temp=Temp, dst=Dst}; - #pseudo_spill_move{src=Src0, temp=Temp0, dst=Dst0} -> - Src = ra_opnd(Src0, Map), - Temp = ra_opnd(Temp0, Map), - Dst = ra_opnd(Dst0, Map), - I#pseudo_spill_move{src=Src, temp=Temp, dst=Dst}; - #pseudo_tailcall{'fun'=Fun0,stkargs=StkArgs0} -> - Fun = ra_opnd(Fun0, Map), - StkArgs = ra_args(StkArgs0, Map), - I#pseudo_tailcall{'fun'=Fun,stkargs=StkArgs}; - #pseudo_tailcall_prepare{} -> - I; - #push{src=Src0} -> - Src = ra_opnd(Src0, Map), - I#push{src=Src}; - #ret{} -> - I; - #shift{src=Src0,dst=Dst0} -> - Src = ra_opnd(Src0, Map), - Dst = ra_opnd(Dst0, Map), - I#shift{src=Src,dst=Dst}; - #test{src=Src0,dst=Dst0} -> - Src = ra_opnd(Src0, Map), - Dst = ra_opnd(Dst0, Map), - I#test{src=Src,dst=Dst}; - _ -> - exit({?MODULE,ra_insn,I}) - end. - -ra_args(Args, Map) -> - [ra_opnd(Opnd, Map) || Opnd <- Args]. - -ra_opnd(Opnd, Map) -> - ra_opnd(Opnd, Map, gb_trees:empty()). -ra_opnd(Opnd, Map, FpMap) -> - case Opnd of - #x86_temp{} -> ra_temp(Opnd, Map, FpMap); - #x86_mem{} -> ra_mem(Opnd, Map); - _ -> Opnd - end. - -ra_mem(Mem, Map) -> - #x86_mem{base=Base0,off=Off0} = Mem, - Base = ra_opnd(Base0, Map), - Off = ra_opnd(Off0, Map), - Mem#x86_mem{base=Base,off=Off}. - -ra_temp(Temp, Map) -> - ra_temp(Temp, Map, gb_trees:empty()). - -ra_temp(Temp, Map, FpMap) -> - Reg = hipe_x86:temp_reg(Temp), - case hipe_x86:temp_type(Temp) of - double -> - ra_temp_double(Temp, Reg, FpMap); - _-> - case ?HIPE_X86_REGISTERS:is_precoloured(Reg) of - true -> - Temp; - _ -> - case gb_trees:lookup(Reg, Map) of - {value,NewReg} -> Temp#x86_temp{reg=NewReg}; - _ -> Temp - end - end - end. - --ifdef(HIPE_AMD64). -ra_temp_double(Temp, Reg, FpMap) -> - case hipe_amd64_registers:is_precoloured_sse2(Reg) of - true -> - Temp; - _ -> - case gb_trees:lookup(Reg, FpMap) of - {value,NewReg} -> Temp#x86_temp{reg=NewReg}; - _ -> Temp - end - end. --else. -ra_temp_double(Temp, Reg, FpMap) -> - case gb_trees:lookup(Reg, FpMap) of - {value,NewReg} -> - case hipe_x86_registers:is_precoloured_x87(NewReg) of - true -> hipe_x86:mk_fpreg(NewReg); - false -> - Temp#x86_temp{reg=NewReg} - end; - _ -> - Temp - end. --endif. - -mk_ra_map(TempMap, SpillLimit) -> - %% Build a partial map from pseudo to reg or spill. - %% Spills are represented as pseudos with indices above SpillLimit. - %% (I'd prefer to use negative indices, but that breaks - %% ?HIPE_X86_REGISTERS:is_precoloured/1.) - %% The frame mapping proper is unchanged, since spills look just like - %% ordinary (un-allocated) pseudos. - lists:foldl(fun(MapLet, Map) -> - {Key,Val} = conv_ra_maplet(MapLet, SpillLimit, - is_precoloured), - gb_trees:insert(Key, Val, Map) - end, - gb_trees:empty(), - TempMap). - -conv_ra_maplet({From,To}, SpillLimit, IsPrecoloured) - when is_integer(From), From =< SpillLimit -> - %% From should be a pseudo, or a hard reg mapped to itself. - case ?HIPE_X86_REGISTERS:IsPrecoloured(From) of - false -> ok; - _ -> To = {reg, From}, ok - end, - %% end of From check - case To of - {reg, NewReg} when is_integer(NewReg) -> - %% NewReg should be a hard reg, or a pseudo mapped - %% to itself (formals are handled this way). - true = (?HIPE_X86_REGISTERS:IsPrecoloured(NewReg) orelse From =:= NewReg), - {From, NewReg}; - {spill, SpillIndex} when is_integer(SpillIndex), SpillIndex >= 0 -> - ToTempNum = SpillLimit+SpillIndex+1, - MaxTempNum = hipe_gensym:get_var(x86), - if MaxTempNum >= ToTempNum -> ok; - true -> hipe_gensym:set_var(x86, ToTempNum) - end, - {From, ToTempNum} - end. - -mk_ra_map_x87(FpMap, SpillLimit) -> - lists:foldl(fun(MapLet, Map) -> - {Key,Val} = conv_ra_maplet(MapLet, SpillLimit, - is_precoloured_x87), - gb_trees:insert(Key, Val, Map) - end, - gb_trees:empty(), - FpMap). - --ifdef(HIPE_AMD64). -mk_ra_map_sse2(FpMap, SpillLimit) -> - lists:foldl(fun(MapLet, Map) -> - {Key,Val} = conv_ra_maplet(MapLet, SpillLimit, - is_precoloured_sse2), - gb_trees:insert(Key, Val, Map) - end, - gb_trees:empty(), - FpMap). - -mk_ra_map_fp(FpMap, SpillLimit, Options) -> - case proplists:get_bool(x87, Options) of - true -> mk_ra_map_x87(FpMap, SpillLimit); - false -> mk_ra_map_sse2(FpMap, SpillLimit) - end. --else. -mk_ra_map_fp(FpMap, SpillLimit, _Options) -> - mk_ra_map_x87(FpMap, SpillLimit). --endif. - --ifdef(notdef). -conv_ra_maplet_fp(MapLet = {From,To}, SpillLimit) -> - %% From should be a pseudo - if is_integer(From), From =< SpillLimit -> []; - true -> exit({?MODULE,conv_ra_maplet_fp,MapLet}) - end, - %% end of From check - case To of - {reg, NewReg} -> - case hipe_x86_registers:is_precoloured_x87(NewReg) of - true-> []; - false -> exit({?MODULE,conv_ra_maplet_fp,MapLet}) - end, - %% end of NewReg check. - {From, NewReg}; - {spill, SpillIndex} -> - %% SpillIndex should be >= 0. - if is_integer(SpillIndex), SpillIndex >= 0 -> []; - true -> exit({?MODULE,conv_ra_maplet_fp,MapLet}) - end, - %% end of SpillIndex check - ToTempNum = SpillLimit+SpillIndex+1, - MaxTempNum = hipe_gensym:get_var(x86), - if MaxTempNum >= ToTempNum -> []; - true -> hipe_gensym:set_var(x86, ToTempNum) - end, - {From, ToTempNum}; - _ -> exit({?MODULE,conv_ra_maplet_fp,MapLet}) - end. --endif. diff --git a/lib/hipe/x86/hipe_x86_ra_ls.erl b/lib/hipe/x86/hipe_x86_ra_ls.erl deleted file mode 100644 index 581abd299d..0000000000 --- a/lib/hipe/x86/hipe_x86_ra_ls.erl +++ /dev/null @@ -1,104 +0,0 @@ -%%% -*- erlang-indent-level: 2 -*- -%%% -%%% Licensed under the Apache License, Version 2.0 (the "License"); -%%% you may not use this file except in compliance with the License. -%%% You may obtain a copy of the License at -%%% -%%% http://www.apache.org/licenses/LICENSE-2.0 -%%% -%%% Unless required by applicable law or agreed to in writing, software -%%% distributed under the License is distributed on an "AS IS" BASIS, -%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%%% See the License for the specific language governing permissions and -%%% limitations under the License. -%%% -%%% Linear Scan register allocator for x86 - --ifdef(HIPE_AMD64). --define(HIPE_X86_RA_LS, hipe_amd64_ra_ls). --define(HIPE_X86_PP, hipe_amd64_pp). --define(HIPE_X86_RA_POSTCONDITIONS, hipe_amd64_ra_postconditions). --define(HIPE_X86_REGISTERS, hipe_amd64_registers). --define(HIPE_X86_SPECIFIC, hipe_amd64_specific). --else. --define(HIPE_X86_RA_LS, hipe_x86_ra_ls). --define(HIPE_X86_PP, hipe_x86_pp). --define(HIPE_X86_RA_POSTCONDITIONS, hipe_x86_ra_postconditions). --define(HIPE_X86_REGISTERS, hipe_x86_registers). --define(HIPE_X86_SPECIFIC, hipe_x86_specific). --endif. - --module(?HIPE_X86_RA_LS). --export([ra/4,ra_fp/5]). --define(HIPE_INSTRUMENT_COMPILER, true). %% Turn on instrumentation. --include("../main/hipe.hrl"). - -ra(CFG, Liveness, SpillIndex, Options) -> - SpillLimit = ?HIPE_X86_SPECIFIC:number_of_temporaries( - CFG, no_context), - ?inc_counter(bbs_counter, length(hipe_x86_cfg:labels(CFG))), - alloc(CFG, Liveness, SpillIndex, SpillLimit, Options). - -ra_fp(CFG, Liveness, Options, TargetMod, TargetCtx) -> - ?inc_counter(ra_calls_counter,1), - %% ?inc_counter(ra_caller_saves_counter,count_caller_saves(CFG)), - SpillIndex = 0, - SpillLimit = TargetMod:number_of_temporaries(CFG, TargetCtx), - ?inc_counter(bbs_counter, length(hipe_x86_cfg:labels(CFG))), - - ?inc_counter(ra_iteration_counter,1), - %% ?HIPE_X86_PP:pp(Defun), - - {Coloring,NewSpillIndex} = - regalloc(CFG, Liveness, - TargetMod:allocatable('linearscan', TargetCtx), - [hipe_x86_cfg:start_label(CFG)], - SpillIndex, SpillLimit, Options, - TargetMod, TargetCtx), - - {NewCFG, _DidSpill} = - TargetMod:check_and_rewrite(CFG, Coloring, 'linearscan', TargetCtx), - TempMap = hipe_temp_map:cols2tuple(Coloring, TargetMod, TargetCtx), - {TempMap2, NewSpillIndex2} = - hipe_spillmin:stackalloc(CFG, Liveness, [], SpillIndex, Options, - TargetMod, TargetCtx, TempMap), - Coloring2 = - hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), TempMap2), - ?add_spills(Options, NewSpillIndex), - {NewCFG, Liveness, Coloring2, NewSpillIndex2}. - -alloc(CFG, Liveness, SpillIndex, SpillLimit, Options) -> - ?inc_counter(ra_iteration_counter,1), - %% ?HIPE_X86_PP:pp(Defun), - {Coloring, NewSpillIndex} = - regalloc( - CFG, Liveness, - ?HIPE_X86_REGISTERS:allocatable()-- - [?HIPE_X86_REGISTERS:temp1(), - ?HIPE_X86_REGISTERS:temp0()], - [hipe_x86_cfg:start_label(CFG)], - SpillIndex, SpillLimit, Options, - ?HIPE_X86_SPECIFIC, no_context), - {NewCFG, _DidSpill} = - ?HIPE_X86_RA_POSTCONDITIONS:check_and_rewrite( - CFG, Coloring, 'linearscan'), - %% ?HIPE_X86_PP:pp(NewDefun), - TempMap = hipe_temp_map:cols2tuple(Coloring, ?HIPE_X86_SPECIFIC, no_context), - {TempMap2,NewSpillIndex2} = - hipe_spillmin:stackalloc(CFG, Liveness, [], SpillIndex, Options, - ?HIPE_X86_SPECIFIC, no_context, TempMap), - Coloring2 = - hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), TempMap2), - case proplists:get_bool(verbose_spills, Options) of - true -> - ?msg("Stack slot size: ~p~n",[NewSpillIndex2-SpillIndex]); - false -> - ok - end, - ?add_spills(Options, NewSpillIndex), - {NewCFG, Liveness, Coloring2}. - -regalloc(CFG, Liveness, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, - TgtMod, TgtCtx) -> - hipe_ls_regalloc:regalloc(CFG, Liveness, PhysRegs, Entrypoints, SpillIndex, - DontSpill, Options, TgtMod, TgtCtx). diff --git a/lib/hipe/x86/hipe_x86_ra_naive.erl b/lib/hipe/x86/hipe_x86_ra_naive.erl deleted file mode 100644 index f96c662d18..0000000000 --- a/lib/hipe/x86/hipe_x86_ra_naive.erl +++ /dev/null @@ -1,412 +0,0 @@ -%%% -*- erlang-indent-level: 2 -*- -%%% -%%% Licensed under the Apache License, Version 2.0 (the "License"); -%%% you may not use this file except in compliance with the License. -%%% You may obtain a copy of the License at -%%% -%%% http://www.apache.org/licenses/LICENSE-2.0 -%%% -%%% Unless required by applicable law or agreed to in writing, software -%%% distributed under the License is distributed on an "AS IS" BASIS, -%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%%% See the License for the specific language governing permissions and -%%% limitations under the License. -%%% -%%% simple local x86 regalloc - --ifdef(HIPE_AMD64). --define(HIPE_X86_RA_NAIVE, hipe_amd64_ra_naive). --define(HIPE_X86_REGISTERS, hipe_amd64_registers). --define(HIPE_X86_SPECIFIC_FP, hipe_amd64_specific_sse2). --define(ECX, rcx). --else. --define(HIPE_X86_RA_NAIVE, hipe_x86_ra_naive). --define(HIPE_X86_REGISTERS, hipe_x86_registers). --define(HIPE_X86_SPECIFIC_FP, hipe_x86_specific_x87). --define(ECX, ecx). --endif. - --module(?HIPE_X86_RA_NAIVE). --export([ra/4]). - --include("../x86/hipe_x86.hrl"). --define(HIPE_INSTRUMENT_COMPILER, true). % enable instrumentation --include("../main/hipe.hrl"). - -ra(CFG0, Liveness, Coloring_fp, Options) -> - CFG = hipe_x86_cfg:map_bbs(fun do_bb/2, CFG0), - NofSpilledFloats = count_non_float_spills(Coloring_fp), - NofFloats = length(Coloring_fp), - ?add_spills(Options, hipe_gensym:get_var(x86) - - ?HIPE_X86_REGISTERS:first_virtual()- - NofSpilledFloats - - NofFloats), - TempMap = [], - {CFG, Liveness, - TempMap}. - -do_bb(_Lbl, BB) -> - hipe_bb:code_update(BB, do_insns(hipe_bb:code(BB))). - -count_non_float_spills(Coloring_fp) -> - count_non_float_spills(Coloring_fp, 0). - -count_non_float_spills([{_,To}|Tail], Num) -> - case ?HIPE_X86_SPECIFIC_FP:is_precoloured(To, no_context) of - true -> - count_non_float_spills(Tail, Num); - false -> - count_non_float_spills(Tail, Num+1) - end; -count_non_float_spills([], Num) -> - Num. - -do_insns([I|Insns]) -> - do_insn(I) ++ do_insns(Insns); -do_insns([]) -> - []. - -do_insn(I) -> % Insn -> Insn list - case I of - #alu{} -> - do_alu(I); - #cmp{} -> - do_cmp(I); - #imul{} -> - do_imul(I); - #jmp_switch{} -> - do_jmp_switch(I); - #lea{} -> - do_lea(I); - #move{} -> - do_move(I); - #move64{} -> - do_move64(I); - #movzx{} -> - do_movx(I); - #movsx{} -> - do_movx(I); - #fmove{} -> - do_fmove(I); - #fp_unop{} -> - do_fp_unop(I); - #fp_binop{} -> - do_fp_binop(I); - #shift{} -> - do_shift(I); - #test{} -> - do_test(I); - #label{} -> - [I]; - #pseudo_jcc{} -> - [I]; - #pseudo_call{} -> - [I]; - #ret{} -> - [I]; - #pseudo_tailcall_prepare{} -> - [I]; - #pseudo_tailcall{} -> - [I]; - #push{} -> - [I]; - #jmp_label{} -> - [I]; - #comment{} -> - [I]; - _ -> - io:format("Unknown Instruction = ~w\n", [I]), - exit({?MODULE, unknown_instruction, I}) - end. - -%%% Fix an alu op. - -do_alu(I) -> - #alu{src=Src0,dst=Dst0} = I, - {FixSrc,Src,FixDst,Dst} = do_binary(Src0, Dst0), - FixSrc ++ FixDst ++ [I#alu{src=Src,dst=Dst}]. - -%%% Fix a cmp op. - -do_cmp(I) -> - #cmp{src=Src0,dst=Dst0} = I, - {FixSrc, Src, FixDst, Dst} = do_binary(Src0, Dst0), - FixSrc ++ FixDst ++ [I#cmp{src=Src,dst=Dst}]. - -%%% Fix an imul op. - -do_imul(I) -> - #imul{imm_opt=ImmOpt,src=Src0,temp=Temp0} = I, - {FixSrc,Src} = fix_src_operand(Src0), % may use temp0 - {FixTempSrc,Temp,FixTempDst} = - case temp_is_pseudo(Temp0) of - false -> - {[], Temp0, []}; - true -> - Reg = hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp1(), 'untagged'), - {case ImmOpt of - [] -> [hipe_x86:mk_move(Temp0, Reg)]; % temp *= src - _ -> [] % temp = src * imm - end, - Reg, - [hipe_x86:mk_move(Reg, Temp0)]} - end, - FixSrc ++ FixTempSrc ++ [I#imul{src=Src,temp=Temp}] ++ FixTempDst. - -%%% Fix a jmp_switch op. - --ifdef(HIPE_AMD64). -do_jmp_switch(I) -> - #jmp_switch{temp=Temp, jtab=Tab} = I, - case temp_is_pseudo(Temp) of - false -> - case temp_is_pseudo(Tab) of - false -> - [I]; - true -> - Reg = hipe_x86:mk_temp(hipe_amd64_registers:temp0(), 'untagged'), - [hipe_x86:mk_move(Temp, Reg), I#jmp_switch{jtab=Reg}] - end; - true -> - Reg = hipe_x86:mk_temp(hipe_amd64_registers:temp1(), 'untagged'), - case temp_is_pseudo(Tab) of - false -> - [hipe_x86:mk_move(Temp, Reg), I#jmp_switch{temp=Reg}]; - true -> - Reg2 = hipe_x86:mk_temp(hipe_amd64_registers:temp0(), 'untagged'), - [hipe_x86:mk_move(Temp, Reg), - hipe_x86:mk_move(Tab, Reg2), - I#jmp_switch{temp=Reg, jtab=Reg2}] - end - end. --else. -do_jmp_switch(I) -> - #jmp_switch{temp=Temp} = I, - case temp_is_pseudo(Temp) of - false -> - [I]; - true -> - Reg = hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp0(), 'untagged'), - [hipe_x86:mk_move(Temp, Reg), I#jmp_switch{temp=Reg}] - end. --endif. - -%%% Fix a lea op. - -do_lea(I) -> - #lea{temp=Temp} = I, - case temp_is_pseudo(Temp) of - false -> - [I]; - true -> - Reg = hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp0(), 'untagged'), - [I#lea{temp=Reg}, hipe_x86:mk_move(Reg, Temp)] - end. - -%%% Fix a move op. - -do_move(I) -> - #move{src=Src0,dst=Dst0} = I, - {FixSrc, Src, FixDst, Dst} = do_binary(Src0, Dst0), - FixSrc ++ FixDst ++ [I#move{src=Src,dst=Dst}]. - --ifdef(HIPE_AMD64). -do_move64(I) -> - #move64{dst=Dst} = I, - case is_mem_opnd(Dst) of - false -> - [I]; - true -> - Reg = hipe_amd64_registers:temp1(), - NewDst = clone(Dst, Reg), - [I#move64{dst=NewDst}, hipe_x86:mk_move(NewDst, Dst)] - end. --else. -do_move64(I) -> exit({?MODULE, I}). --endif. - -do_movx(I) -> - {{FixSrc, Src}, {FixDst, Dst}} = - case I of - #movsx{src=Src0,dst=Dst0} -> - {fix_src_operand(Src0), fix_dst_operand(Dst0)}; - #movzx{src=Src0,dst=Dst0} -> - {fix_src_operand(Src0), fix_dst_operand(Dst0)} - end, - Reg = ?HIPE_X86_REGISTERS:temp0(), - Dst2 = clone(Dst, Reg), - I2 = case is_mem_opnd(Dst) of - true -> - Reg = ?HIPE_X86_REGISTERS:temp0(), - Dst2 = clone(Dst, Reg), - case I of - #movsx{} -> - [hipe_x86:mk_movsx(Src, Dst2), hipe_x86:mk_move(Dst2, Dst)]; - #movzx{} -> - [hipe_x86:mk_movzx(Src, Dst2), hipe_x86:mk_move(Dst2, Dst)] - end; - false -> - case I of - #movsx{} -> - [hipe_x86:mk_movsx(Src, Dst)]; - #movzx{} -> - [hipe_x86:mk_movzx(Src, Dst)] - end - end, - FixSrc ++ FixDst ++ I2. - - -%%% Fix a fmove op. -%% conv_to_float -do_fmove(I=#fmove{src=#x86_temp{type=untagged}, - dst=#x86_temp{type=double}}) -> - #fmove{src=Src0,dst=Dst0} = I, - Src = clone(Src0, ?HIPE_X86_REGISTERS:temp0()), - Dst = clone(Dst0, ?HIPE_X86_REGISTERS:temp1()), - [hipe_x86:mk_move(Src0, Src), - I#fmove{src=Src, dst=Dst}, - hipe_x86:mk_fmove(Dst, Dst0)]; -%% fmove -do_fmove(I) -> - #fmove{src=Src0,dst=Dst0} = I, - {FixSrc, Src, FixDst, Dst} = do_binary(Src0, Dst0), - FixSrc ++ FixDst ++ [I#fmove{src=Src,dst=Dst}]. - -do_fp_unop(I) -> - #fp_unop{arg=Arg} = I, - case is_mem_opnd(Arg) of - false -> - [I]; - true -> - Reg = ?HIPE_X86_REGISTERS:temp1(), - NewArg = clone(Arg, Reg), - [hipe_x86:mk_fmove(Arg, NewArg), - I#fp_unop{arg=NewArg}, - hipe_x86:mk_fmove(NewArg, Arg)] - end. - -do_fp_binop(I) -> - #fp_binop{src=Src0, dst=Dst0} = I, - {FixSrc, Src} = fix_src_operand(Src0), - {FixDst, Dst} = fix_dst_operand(Dst0), - Reg = ?HIPE_X86_REGISTERS:temp1(), - Dst2 = clone(Dst, Reg), - FixSrc ++ FixDst ++ [hipe_x86:mk_fmove(Dst, Dst2), - I#fp_binop{src=Src, dst=Dst2}, - hipe_x86:mk_fmove(Dst2, Dst)]. - -do_shift(I) -> - #shift{src=Src0,dst=Dst0} = I, - {FixDst, Dst} = fix_dst_operand(Dst0), - Reg = ?HIPE_X86_REGISTERS:?ECX(), - case Src0 of - #x86_imm{} -> - FixDst ++ [I#shift{dst=Dst}]; - #x86_temp{reg=Reg} -> - FixDst ++ [I#shift{dst=Dst}] - end. - -do_test(I) -> - #test{src=Src0,dst=Dst0} = I, - {FixSrc, Src, FixDst, Dst} = do_binary(Src0, Dst0), - FixSrc ++ FixDst ++ [I#test{src=Src,dst=Dst}]. - -%%% Fix the operands of a binary op. -%%% 1. remove pseudos from any explicit memory operands -%%% 2. if both operands are (implicit or explicit) memory operands, -%%% move src to a reg and use reg as src in the original insn - -do_binary(Src0, Dst0) -> - {FixSrc, Src} = fix_src_operand(Src0), - {FixDst, Dst} = fix_dst_operand(Dst0), - {FixSrc3, Src3} = - case is_mem_opnd(Src) of - false -> - {FixSrc, Src}; - true -> - case is_mem_opnd(Dst) of - false -> - {FixSrc, Src}; - true -> - Reg = ?HIPE_X86_REGISTERS:temp0(), - Src2 = clone(Src, Reg), - FixSrc2 = FixSrc ++ [mk_move(Src, Src2)], - {FixSrc2, Src2} - end - end, - {FixSrc3, Src3, FixDst, Dst}. - -%%% Fix any x86_mem operand to not refer to any pseudos. -%%% The fixup may use additional instructions and registers. -%%% 'src' operands may clobber '%temp0'. -%%% 'dst' operands may clobber '%temp1'. - -fix_src_operand(Opnd) -> - fix_mem_operand(Opnd, ?HIPE_X86_REGISTERS:temp0()). - -fix_dst_operand(Opnd) -> - fix_mem_operand(Opnd, ?HIPE_X86_REGISTERS:temp1()). - -fix_mem_operand(Opnd, Reg) -> % -> {[fixupcode], newop} - case Opnd of - #x86_mem{base=Base,off=Off} -> - case is_mem_opnd(Base) of - false -> - case src_is_pseudo(Off) of - false -> - {[], Opnd}; - true -> % pseudo(reg) - Temp = clone(Off, Reg), - {[hipe_x86:mk_move(Off, Temp)], - Opnd#x86_mem{off=Temp}} - end; - true -> - Temp = clone(Base, Reg), - case src_is_pseudo(Off) of - false -> % imm/reg(pseudo) - {[hipe_x86:mk_move(Base, Temp)], - Opnd#x86_mem{base=Temp}}; - true -> % pseudo1(pseudo0) - {[hipe_x86:mk_move(Base, Temp), - hipe_x86:mk_alu('add', Off, Temp)], - Opnd#x86_mem{base=Temp, off=hipe_x86:mk_imm(0)}} - end - end; - _ -> - {[], Opnd} - end. - -%%% Check if an operand denotes a memory cell (mem or pseudo). - -is_mem_opnd(Opnd) -> - case Opnd of - #x86_mem{} -> true; - #x86_temp{} -> temp_is_pseudo(Opnd); - _ -> false - end. - -%%% Check if an operand is a pseudo-Temp. - -src_is_pseudo(Src) -> - case hipe_x86:is_temp(Src) of - true -> temp_is_pseudo(Src); - false -> false - end. - -temp_is_pseudo(Temp) -> - not(?HIPE_X86_REGISTERS:is_precoloured(hipe_x86:temp_reg(Temp))). - -%%% Make Reg a clone of Dst (attach Dst's type to Reg). - -clone(Dst, Reg) -> - Type = - case Dst of - #x86_mem{} -> hipe_x86:mem_type(Dst); - #x86_temp{} -> hipe_x86:temp_type(Dst) - end, - hipe_x86:mk_temp(Reg, Type). - -mk_move(Src, Dst=#x86_temp{type=double}) -> - hipe_x86:mk_fmove(Src, Dst); -mk_move(Src, Dst) -> - hipe_x86:mk_move(Src, Dst). diff --git a/lib/hipe/x86/hipe_x86_ra_postconditions.erl b/lib/hipe/x86/hipe_x86_ra_postconditions.erl deleted file mode 100644 index db6391d5c1..0000000000 --- a/lib/hipe/x86/hipe_x86_ra_postconditions.erl +++ /dev/null @@ -1,474 +0,0 @@ -%% -*- erlang-indent-level: 2 -*- -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. - --ifdef(HIPE_AMD64). --define(HIPE_X86_RA_POSTCONDITIONS, hipe_amd64_ra_postconditions). --define(HIPE_X86_REGISTERS, hipe_amd64_registers). --define(HIPE_X86_SPECIFIC, hipe_amd64_specific). --define(ECX, rcx). --else. --define(HIPE_X86_RA_POSTCONDITIONS, hipe_x86_ra_postconditions). --define(HIPE_X86_REGISTERS, hipe_x86_registers). --define(HIPE_X86_SPECIFIC, hipe_x86_specific). --define(ECX, ecx). --endif. - --module(?HIPE_X86_RA_POSTCONDITIONS). - --export([check_and_rewrite/3]). - --include("../x86/hipe_x86.hrl"). --define(HIPE_INSTRUMENT_COMPILER, true). --include("../main/hipe.hrl"). --define(count_temp(T), ?cons_counter(counter_mfa_mem_temps, T)). - -check_and_rewrite(CFG, Coloring, Strategy) -> - %% io:format("Converting\n"), - TempMap = hipe_temp_map:cols2tuple(Coloring, ?HIPE_X86_SPECIFIC, no_context), - %% io:format("Rewriting\n"), - do_bbs(hipe_x86_cfg:labels(CFG), TempMap, Strategy, CFG, false). - -do_bbs([], _, _, CFG, DidSpill) -> {CFG, DidSpill}; -do_bbs([Lbl|Lbls], TempMap, Strategy, CFG0, DidSpill0) -> - Code0 = hipe_bb:code(BB = hipe_x86_cfg:bb(CFG0, Lbl)), - {Code, DidSpill} = do_insns(Code0, TempMap, Strategy, [], DidSpill0), - CFG = hipe_x86_cfg:bb_add(CFG0, Lbl, hipe_bb:code_update(BB, Code)), - do_bbs(Lbls, TempMap, Strategy, CFG, DidSpill). - -do_insns([I|Insns], TempMap, Strategy, Accum, DidSpill0) -> - {NewIs, DidSpill1} = do_insn(I, TempMap, Strategy), - do_insns(Insns, TempMap, Strategy, lists:reverse(NewIs, Accum), DidSpill0 or DidSpill1); -do_insns([], _TempMap, _Strategy, Accum, DidSpill) -> - {lists:reverse(Accum), DidSpill}. - -do_insn(I, TempMap, Strategy) -> % Insn -> {Insn list, DidSpill} - case I of - #alu{} -> - do_alu(I, TempMap, Strategy); - #cmp{} -> - do_cmp(I, TempMap, Strategy); - #imul{} -> - do_imul(I, TempMap, Strategy); - #jmp_switch{} -> - do_jmp_switch(I, TempMap, Strategy); - #lea{} -> - do_lea(I, TempMap, Strategy); - #move{} -> - do_move(I, TempMap, Strategy); - #move64{} -> - do_move64(I, TempMap, Strategy); - #movsx{} -> - do_movx(I, TempMap, Strategy); - #movzx{} -> - do_movx(I, TempMap, Strategy); - #fmove{} -> - do_fmove(I, TempMap, Strategy); - #pseudo_spill_move{} -> - do_pseudo_spill_move(I, TempMap, Strategy); - #shift{} -> - do_shift(I, TempMap, Strategy); - #test{} -> - do_test(I, TempMap, Strategy); - _ -> - %% comment, jmp*, label, pseudo_call, pseudo_jcc, pseudo_tailcall, - %% pseudo_tailcall_prepare, push, ret - {[I], false} - end. - -%%% Fix an alu op. - -do_alu(I, TempMap, Strategy) -> - #alu{src=Src0,dst=Dst0} = I, - {FixSrc,Src,FixDst,Dst,DidSpill} = - do_binary(Src0, Dst0, TempMap, Strategy), - {FixSrc ++ FixDst ++ [I#alu{src=Src,dst=Dst}], DidSpill}. - -%%% Fix a cmp op. - -do_cmp(I, TempMap, Strategy) -> - #cmp{src=Src0,dst=Dst0} = I, - {FixSrc, Src, FixDst, Dst, DidSpill} = - do_binary(Src0, Dst0, TempMap, Strategy), - {FixSrc ++ FixDst ++ [I#cmp{src=Src,dst=Dst}], DidSpill}. - -%%% Fix an imul op. - -do_imul(I, TempMap, Strategy) -> - #imul{imm_opt=ImmOpt,src=Src0,temp=Temp0} = I, - {FixSrc,Src,DidSpill1} = fix_src_operand(Src0, TempMap, Strategy), % temp1 - {FixTempSrc,Temp,FixTempDst,DidSpill2} = - case is_spilled(Temp0, TempMap) of - false -> - {[], Temp0, [], false}; - true -> - Reg = spill_temp0('untagged', Strategy), - {case ImmOpt of - [] -> [hipe_x86:mk_move(Temp0, Reg)]; % temp *= src - _ -> [] % temp = src * imm - end, - Reg, - [hipe_x86:mk_move(Reg, Temp0)], - true} - end, - {FixSrc ++ FixTempSrc ++ [I#imul{src=Src,temp=Temp}] ++ FixTempDst, - DidSpill1 or DidSpill2}. - -%%% Fix a jmp_switch op. - --ifdef(HIPE_AMD64). -do_jmp_switch(I, TempMap, Strategy) -> - #jmp_switch{temp=Temp, jtab=Tab} = I, - case is_spilled(Temp, TempMap) of - false -> - case is_spilled(Tab, TempMap) of - false -> - {[I], false}; - true -> - NewTab = spill_temp('untagged', Strategy), - {[hipe_x86:mk_move(Tab, NewTab), I#jmp_switch{jtab=Tab}], - true} - end; - true -> - case is_spilled(Tab, TempMap) of - false -> - NewTmp = spill_temp('untagged', Strategy), - {[hipe_x86:mk_move(Temp, NewTmp), I#jmp_switch{temp=NewTmp}], - true}; - true -> - NewTmp = spill_temp('untagged', Strategy), - NewTab = spill_temp0('untagged', Strategy), - {[hipe_x86:mk_move(Temp, NewTmp), - hipe_x86:mk_move(Tab, NewTab), - I#jmp_switch{temp=NewTmp, jtab=NewTab}], - true} - end - end. --else. % not AMD64 -do_jmp_switch(I, TempMap, Strategy) -> - #jmp_switch{temp=Temp} = I, - case is_spilled(Temp, TempMap) of - false -> - {[I], false}; - true -> - NewTmp = spill_temp('untagged', Strategy), - {[hipe_x86:mk_move(Temp, NewTmp), I#jmp_switch{temp=NewTmp}], - true} - end. --endif. % not AMD64 - -%%% Fix a lea op. - -do_lea(I, TempMap, Strategy) -> - #lea{mem=Mem0,temp=Temp0} = I, - {FixMem, Mem, DidSpill1} = fix_mem_operand(Mem0, TempMap, temp1(Strategy)), - case Mem of - #x86_mem{base=Base, off=#x86_imm{value=0}} -> - %% We've decayed into a move due to both operands being memory (there's an - %% 'add' in FixMem). - {FixMem ++ [hipe_x86:mk_move(Base, Temp0)], DidSpill1}; - #x86_mem{} -> - {StoreTemp, Temp, DidSpill2} = - case is_mem_opnd(Temp0, TempMap) of - false -> {[], Temp0, false}; - true -> - Temp1 = clone2(Temp0, temp0(Strategy)), - {[hipe_x86:mk_move(Temp1, Temp0)], Temp1, true} - end, - {FixMem ++ [I#lea{mem=Mem,temp=Temp} | StoreTemp], DidSpill1 or DidSpill2} - end. - -%%% Fix a move op. - -do_move(I, TempMap, Strategy) -> - #move{src=Src0,dst=Dst0} = I, - case - is_record(Src0, x86_temp) andalso is_record(Dst0, x86_temp) - andalso is_spilled(Src0, TempMap) andalso is_spilled(Dst0, TempMap) - of - true -> - Tmp = clone(Src0, Strategy), - {[hipe_x86:mk_pseudo_spill_move(Src0, Tmp, Dst0)], true}; - false -> - {FixSrc, Src, FixDst, Dst, DidSpill} = - do_check_byte_move(Src0, Dst0, TempMap, Strategy), - {FixSrc ++ FixDst ++ [I#move{src=Src,dst=Dst}], - DidSpill} - end. - --ifdef(HIPE_AMD64). - -%%% AMD64 has no issues with byte moves. -do_check_byte_move(Src0, Dst0, TempMap, Strategy) -> - do_binary(Src0, Dst0, TempMap, Strategy). - --else. % not AMD64 - -%%% x86 can only do byte moves to a subset of the integer registers. -do_check_byte_move(Src0, Dst0, TempMap, Strategy) -> - case Dst0 of - #x86_mem{type=byte} -> - do_byte_move(Src0, Dst0, TempMap, Strategy); - _ -> - do_binary(Src0, Dst0, TempMap, Strategy) - end. - -do_byte_move(Src0, Dst0, TempMap, Strategy) -> - {FixSrc, Src, DidSpill1} = fix_src_operand(Src0, TempMap, Strategy), - {FixDst, Dst, DidSpill2} = fix_dst_operand(Dst0, TempMap, Strategy), - Reg = hipe_x86_registers:eax(), - {FixSrc3, Src3} = % XXX: this just checks Src, the result is known! - case Src of - #x86_imm{} -> - {FixSrc, Src}; - #x86_temp{reg=Reg} -> % small moves must start from reg 1->4 - {FixSrc, Src} % so variable sources are always put in eax - end, - {FixSrc3, Src3, FixDst, Dst, - DidSpill2 or DidSpill1}. - --endif. % not AMD64 - -%%% Fix a move64 op. - -do_move64(I, TempMap, Strategy) -> - #move64{dst=Dst} = I, - case is_spilled(Dst, TempMap) of - false -> - {[I], false}; - true -> - Reg = clone(Dst, Strategy), - {[I#move64{dst=Reg}, hipe_x86:mk_move(Reg, Dst)], true} - end. - -%%% Fix a movx op. - -do_movx(I, TempMap, Strategy) -> - {{FixSrc, Src, DidSpill1}, {FixDst, Dst, DidSpill2}} = - case I of - #movsx{src=Src0,dst=Dst0} -> - {fix_src_operand(Src0, TempMap, Strategy), - fix_dst_operand(Dst0, TempMap, Strategy)}; - #movzx{src=Src0,dst=Dst0} -> - {fix_src_operand(Src0, TempMap, Strategy), - fix_dst_operand(Dst0, TempMap, Strategy)} - end, - {I3, DidSpill3} = - case is_spilled(Dst, TempMap) of - false -> - I2 = case I of - #movsx{} -> - [hipe_x86:mk_movsx(Src, Dst)]; - #movzx{} -> - [hipe_x86:mk_movzx(Src, Dst)] - end, - {I2, false}; - true -> - Dst2 = clone(Dst, Strategy), - I2 = - case I of - #movsx{} -> - [hipe_x86:mk_movsx(Src, Dst2), hipe_x86:mk_move(Dst2, Dst)]; - #movzx{} -> - [hipe_x86:mk_movzx(Src, Dst2), hipe_x86:mk_move(Dst2, Dst)] - end, - {I2, true} - end, - {FixSrc++FixDst++I3, - DidSpill3 or DidSpill2 or DidSpill1}. - -%%% Fix an fmove op. - -do_fmove(I, TempMap, Strategy) -> - #fmove{src=Src0,dst=Dst0} = I, - {FixSrc, Src, DidSpill1} = fix_src_operand(Src0, TempMap, Strategy), - {FixDst, Dst, DidSpill2} = fix_dst_operand(Dst0, TempMap, Strategy), - %% fmoves from memory position to memory position is handled - %% by the f.p. register allocator. - {FixSrc ++ FixDst ++ [I#fmove{src=Src,dst=Dst}], - DidSpill1 or DidSpill2}. - -%%% Fix an pseudo_spill_move op. - -do_pseudo_spill_move(I = #pseudo_spill_move{temp=Temp}, TempMap, _Strategy) -> - %% Temp is above the low water mark and must not have been spilled - false = is_spilled(Temp, TempMap), - {[I], false}. % nothing to do - -%%% Fix a shift operation. -%%% 1. remove pseudos from any explicit memory operands -%%% 2. if the source is a register or memory position -%%% make sure to move it to %ecx - -do_shift(I, TempMap, Strategy) -> - #shift{src=Src0,dst=Dst0} = I, - {FixDst, Dst, DidSpill} = fix_dst_operand(Dst0, TempMap, Strategy), - Reg = ?HIPE_X86_REGISTERS:?ECX(), - case Src0 of - #x86_imm{} -> - {FixDst ++ [I#shift{dst=Dst}], DidSpill}; - #x86_temp{reg=Reg} -> - {FixDst ++ [I#shift{dst=Dst}], DidSpill} - end. - -%%% Fix a test op. - -do_test(I, TempMap, Strategy) -> - #test{src=Src0,dst=Dst0} = I, - {FixSrc, Src, FixDst, Dst, DidSpill} = - do_binary(Src0, Dst0, TempMap, Strategy), - {FixSrc ++ FixDst ++ [I#test{src=Src,dst=Dst}], DidSpill}. - -%%% Fix the operands of a binary op. -%%% 1. remove pseudos from any explicit memory operands -%%% 2. if both operands are (implicit or explicit) memory operands, -%%% move src to a reg and use reg as src in the original insn - -do_binary(Src0, Dst0, TempMap, Strategy) -> - {FixSrc, Src, DidSpill1} = fix_src_operand(Src0, TempMap, Strategy), - {FixDst, Dst, DidSpill2} = fix_dst_operand(Dst0, TempMap, Strategy), - {FixSrc3, Src3, DidSpill3} = - case is_mem_opnd(Src, TempMap) of - false -> - {FixSrc, Src, false}; - true -> - case is_mem_opnd(Dst, TempMap) of - false -> - {FixSrc, Src, false}; - true -> - Src2 = clone(Src, Strategy), - FixSrc2 = FixSrc ++ [hipe_x86:mk_move(Src, Src2)], - {FixSrc2, Src2, true} - end - end, - {FixSrc3, Src3, FixDst, Dst, - DidSpill3 or DidSpill2 or DidSpill1}. - -%%% Fix any x86_mem operand to not refer to any spilled temps. - -fix_src_operand(Opnd, TmpMap, Strategy) -> - fix_mem_operand(Opnd, TmpMap, temp1(Strategy)). - -temp1('normal') -> []; -temp1('linearscan') -> ?HIPE_X86_REGISTERS:temp1(). - -fix_dst_operand(Opnd, TempMap, Strategy) -> - fix_mem_operand(Opnd, TempMap, temp0(Strategy)). - -temp0('normal') -> []; -temp0('linearscan') -> ?HIPE_X86_REGISTERS:temp0(). - -fix_mem_operand(Opnd, TempMap, RegOpt) -> % -> {[fixupcode], newop, DidSpill} - case Opnd of - #x86_mem{base=Base,off=Off} -> - case is_mem_opnd(Base, TempMap) of - false -> - case is_mem_opnd(Off, TempMap) of - false -> - {[], Opnd, false}; - true -> - Temp = clone2(Off, RegOpt), - {[hipe_x86:mk_move(Off, Temp)], - Opnd#x86_mem{off=Temp}, - true} - end; - true -> - Temp = clone2(Base, RegOpt), - case is_mem_opnd(Off, TempMap) of - false -> % imm/reg(pseudo) - {[hipe_x86:mk_move(Base, Temp)], - Opnd#x86_mem{base=Temp}, - true}; - true -> % pseudo(pseudo) - {[hipe_x86:mk_move(Base, Temp), - hipe_x86:mk_alu('add', Off, Temp)], - Opnd#x86_mem{base=Temp, off=hipe_x86:mk_imm(0)}, - true} - end - end; - _ -> - {[], Opnd, false} - end. - -%%% Check if an operand denotes a memory cell (mem or pseudo). - -is_mem_opnd(Opnd, TempMap) -> - R = - case Opnd of - #x86_mem{} -> true; - #x86_temp{} -> - Reg = hipe_x86:temp_reg(Opnd), - case hipe_x86:temp_is_allocatable(Opnd) of - true -> - case - hipe_temp_map:is_spilled(Reg, TempMap) of - true -> - ?count_temp(Reg), - true; - false -> false - end; - false -> true - end; - _ -> false - end, - %% io:format("Op ~w mem: ~w\n",[Opnd,R]), - R. - -%%% Check if an operand is a spilled Temp. - -is_spilled(Temp, TempMap) -> - case hipe_x86:temp_is_allocatable(Temp) of - true -> - Reg = hipe_x86:temp_reg(Temp), - case hipe_temp_map:is_spilled(Reg, TempMap) of - true -> - ?count_temp(Reg), - true; - false -> - false - end; - false -> true - end. - -%%% Make Reg a clone of Dst (attach Dst's type to Reg). - -clone(Dst, Strategy) -> - Type = - case Dst of - #x86_mem{} -> hipe_x86:mem_type(Dst); - #x86_temp{} -> hipe_x86:temp_type(Dst) - end, - spill_temp(Type, Strategy). - -spill_temp0(Type, 'normal') when Type =/= double -> - hipe_x86:mk_new_temp(Type); -spill_temp0(Type, 'linearscan') when Type =/= double -> - hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp0(), Type). - -spill_temp(Type, 'normal') when Type =/= double -> - hipe_x86:mk_new_temp(Type); -spill_temp(Type, 'linearscan') when Type =/= double -> - hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp1(), Type). - -%%% Make a certain reg into a clone of Dst - -clone2(Dst, RegOpt) -> - Type = - case Dst of - #x86_mem{} -> hipe_x86:mem_type(Dst); - #x86_temp{} -> hipe_x86:temp_type(Dst) - end, - case RegOpt of - [] when Type =/= double -> hipe_x86:mk_new_temp(Type); - Reg -> hipe_x86:mk_temp(Reg, Type) - end. diff --git a/lib/hipe/x86/hipe_x86_registers.erl b/lib/hipe/x86/hipe_x86_registers.erl deleted file mode 100644 index dbff68ad28..0000000000 --- a/lib/hipe/x86/hipe_x86_registers.erl +++ /dev/null @@ -1,249 +0,0 @@ -%%% Licensed under the Apache License, Version 2.0 (the "License"); -%%% you may not use this file except in compliance with the License. -%%% You may obtain a copy of the License at -%%% -%%% http://www.apache.org/licenses/LICENSE-2.0 -%%% -%%% Unless required by applicable law or agreed to in writing, software -%%% distributed under the License is distributed on an "AS IS" BASIS, -%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%%% See the License for the specific language governing permissions and -%%% limitations under the License. -%%% -%%% TODO: -%%% - Do we need a pseudo reg for the condition codes? - --module(hipe_x86_registers). - --export([reg_name/1, - first_virtual/0, - is_precoloured/1, - is_precoloured_x87/1, - all_precoloured/0, - eax/0, - ecx/0, - temp0/0, - temp1/0, - sp/0, - proc_pointer/0, - heap_limit/0, - fcalls/0, - proc_offset/1, - sp_limit_offset/0, - is_fixed/1, - %% fixed/0, - allocatable/0, - allocatable_x87/0, - nr_args/0, - arg/1, - is_arg/1, - args/1, - nr_rets/0, - ret/1, - call_clobbered/0, - tailcall_clobbered/0, - live_at_return/0, - float_size/0, - wordsize/0, - alignment/0]). - --include("../rtl/hipe_literals.hrl"). - --ifdef(X86_HP_IN_ESI). --export([heap_pointer/0]). --endif. - --define(EAX, 0). --define(ECX, 1). --define(EDX, 2). --define(EBX, 3). --define(ESP, 4). --define(EBP, 5). --define(ESI, 6). --define(EDI, 7). --define(FCALLS, 8). % proc field alias --define(HEAP_LIMIT, 9). % proc field alias --define(LAST_PRECOLOURED, 9). - --define(ARG0, ?EAX). --define(ARG1, ?EDX). --define(ARG2, ?ECX). --define(ARG3, ?EBX). --define(ARG4, ?EDI). - --define(RET0, ?EAX). --define(RET1, ?EDX). --define(RET2, ?ECX). --define(RET3, ?EBX). --define(RET4, ?EDI). - --define(TEMP0, ?EBX). % XXX: was EAX --define(TEMP1, ?EDI). % XXX: was EDX then EDI - --define(PROC_POINTER, ?EBP). - -reg_name(R) -> - case R of - ?EAX -> "%eax"; - ?ECX -> "%ecx"; - ?EDX -> "%edx"; - ?EBX -> "%ebx"; - ?ESP -> "%esp"; - ?EBP -> "%ebp"; - ?ESI -> "%esi"; - ?EDI -> "%edi"; - ?FCALLS -> "%fcalls"; - ?HEAP_LIMIT -> "%hplim"; - Other -> "%r" ++ integer_to_list(Other) - end. - -first_virtual() -> ?LAST_PRECOLOURED + 1. - -is_precoloured(X) -> X =< ?LAST_PRECOLOURED. - -is_precoloured_x87(X) -> X =< 6. - -all_precoloured() -> - [?EAX, - ?ECX, - ?EDX, - ?EBX, - ?ESP, - ?EBP, - ?ESI, - ?EDI, - ?FCALLS, - ?HEAP_LIMIT]. - -eax() -> ?EAX. -ecx() -> ?ECX. -temp0() -> ?TEMP0. -temp1() -> ?TEMP1. -sp() -> ?ESP. -proc_pointer() -> ?PROC_POINTER. -fcalls() -> ?FCALLS. -heap_limit() -> ?HEAP_LIMIT. - --ifdef(X86_HP_IN_ESI). --define(ESI_IS_FIXED,1). --define(HEAP_POINTER, ?ESI). -heap_pointer() -> ?HEAP_POINTER. -is_heap_pointer(?HEAP_POINTER) -> true; -is_heap_pointer(_) -> false. --define(LIST_HP_FIXED,[?HEAP_POINTER]). --define(LIST_HP_LIVE_AT_RETURN,[{?HEAP_POINTER,untagged}]). --else. -is_heap_pointer(_) -> false. --define(LIST_HP_FIXED,[]). --define(LIST_HP_LIVE_AT_RETURN,[]). --endif. - --ifdef(ESI_IS_FIXED). --define(LIST_ESI_ALLOCATABLE,[]). --define(LIST_ESI_CALL_CLOBBERED,[]). --else. --define(LIST_ESI_ALLOCATABLE,[?ESI]). --define(LIST_ESI_CALL_CLOBBERED,[{?ESI,tagged},{?ESI,untagged}]). --endif. - -proc_offset(?FCALLS) -> ?P_FCALLS; -proc_offset(?HEAP_LIMIT) -> ?P_HP_LIMIT; -proc_offset(_) -> false. - -sp_limit_offset() -> ?P_NSP_LIMIT. - -is_fixed(?ESP) -> true; -is_fixed(?PROC_POINTER) -> true; -is_fixed(?FCALLS) -> true; -is_fixed(?HEAP_LIMIT) -> true; -is_fixed(R) -> is_heap_pointer(R). - -%% fixed() -> -%% [?ESP, ?PROC_POINTER, ?FCALLS, ?HEAP_LIMIT | ?LIST_HP_FIXED]. - -allocatable() -> - [?EDX, ?ECX, ?EBX, ?EAX, ?EDI| ?LIST_ESI_ALLOCATABLE]. - -allocatable_x87() -> - [0,1,2,3,4,5,6]. - -nr_args() -> ?X86_NR_ARG_REGS. - -arg(N) -> - if N < ?X86_NR_ARG_REGS -> - case N of - 0 -> ?ARG0; - 1 -> ?ARG1; - 2 -> ?ARG2; - 3 -> ?ARG3; - 4 -> ?ARG4; - _ -> exit({?MODULE, arg, N}) - end; - true -> - exit({?MODULE, arg, N}) - end. - -is_arg(R) -> - case R of - ?ARG0 -> ?X86_NR_ARG_REGS > 0; - ?ARG1 -> ?X86_NR_ARG_REGS > 1; - ?ARG2 -> ?X86_NR_ARG_REGS > 2; - ?ARG3 -> ?X86_NR_ARG_REGS > 3; - ?ARG4 -> ?X86_NR_ARG_REGS > 4; - _ -> false - end. - -args(Arity) when is_integer(Arity), Arity >= 0 -> - N = erlang:min(Arity, ?X86_NR_ARG_REGS), - args(N-1, []). - -args(I, Rest) when I < 0 -> Rest; -args(I, Rest) -> args(I-1, [arg(I) | Rest]). - -nr_rets() -> ?X86_NR_RET_REGS. - -ret(N) -> - if N < ?X86_NR_RET_REGS -> - case N of - 0 -> ?RET0; - 1 -> ?RET1; - 2 -> ?RET2; - 3 -> ?RET3; - 4 -> ?RET4; - _ -> exit({?MODULE, ret, N}) - end; - true -> - exit({?MODULE, ret, N}) - end. - -%% Note: the fact that (allocatable() UNION allocatable_x87()) is a subset of -%% call_clobbered() is hard-coded in hipe_x86_defuse:insn_defs_all/1 -call_clobbered() -> - [{?EAX,tagged},{?EAX,untagged}, % does the RA strip the type or not? - {?EDX,tagged},{?EDX,untagged}, - {?ECX,tagged},{?ECX,untagged}, - {?EBX,tagged},{?EBX,untagged}, - {?EDI,tagged},{?EDI,untagged} - | ?LIST_ESI_CALL_CLOBBERED] ++ all_x87_pseudos(). - -tailcall_clobbered() -> % tailcall crapola needs two temps - [{?TEMP0,tagged},{?TEMP0,untagged}, - {?TEMP1,tagged},{?TEMP1,untagged}] ++ all_x87_pseudos(). - -all_x87_pseudos() -> - [{0,double}, {1,double}, {2,double}, {3,double}, - {4,double}, {5,double}, {6,double}]. - -live_at_return() -> - [{?ESP,untagged} - ,{?PROC_POINTER,untagged} - ,{?FCALLS,untagged} - ,{?HEAP_LIMIT,untagged} - | ?LIST_HP_LIVE_AT_RETURN - ]. - -alignment() -> 4. - -float_size() -> 8. - -wordsize() -> 4. diff --git a/lib/hipe/x86/hipe_x86_spill_restore.erl b/lib/hipe/x86/hipe_x86_spill_restore.erl deleted file mode 100644 index 90edef31f3..0000000000 --- a/lib/hipe/x86/hipe_x86_spill_restore.erl +++ /dev/null @@ -1,334 +0,0 @@ -%% -*- erlang-indent-level: 2 -*- -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. -%% -%% ==================================================================== -%% Authors : Dogan Yazar and Erdem Aksu (KT2 project of 2008) -%% ==================================================================== - --ifdef(HIPE_AMD64). --define(HIPE_X86_SPILL_RESTORE, hipe_amd64_spill_restore). --define(HIPE_X86_LIVENESS, hipe_amd64_liveness). --define(HIPE_X86_REGISTERS, hipe_amd64_registers). --define(X86STR, "amd64"). --else. --define(HIPE_X86_SPILL_RESTORE, hipe_x86_spill_restore). --define(HIPE_X86_LIVENESS, hipe_x86_liveness). --define(HIPE_X86_REGISTERS, hipe_x86_registers). --define(X86STR, "x86"). --endif. - --module(?HIPE_X86_SPILL_RESTORE). - --export([spill_restore/2]). - -%% controls which set library is used to keep temp variables. --define(SET_MODULE, ordsets). - -%% Turn on instrumentation. --define(HIPE_INSTRUMENT_COMPILER, true). - --include("../main/hipe.hrl"). --include("../x86/hipe_x86.hrl"). % Added for the definition of #pseudo_call{} --include("../flow/cfg.hrl"). % Added for the definition of #cfg{} - -%% Main function -spill_restore(CFG0, Options) -> - CFG1 = ?option_time(firstPass(CFG0), ?X86STR" First Pass", Options), - ?option_time(secondPass(CFG1), ?X86STR" Second Pass", Options). - -%% Performs the first pass of the algorithm. -%% By working bottom up, introduce the pseudo_spills. -firstPass(CFG0) -> - %% get the labels bottom up - Labels = hipe_x86_cfg:postorder(CFG0), - Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0), - %% spill around the function will be introduced below the move - %% formals, so get all labels except it. - LabelsExceptMoveFormals = lists:sublist(Labels, length(Labels)-1), - %% all work is done by the helper function firstPassHelper - %% saveTree keeps the all newly introduced spills. Keys are the labels. - {CFG1, SaveTree} = firstPassHelper(LabelsExceptMoveFormals, Liveness, CFG0), - case hipe_x86_cfg:reverse_postorder(CFG0) of - [Label1, Label2|_] -> - SaveTreeElement = saveTreeLookup(Label2, SaveTree), - %% FilteredSaveTreeElement is the to be spilled temps around the - %% function call. They are spilled just before move formals. - FilteredSaveTreeElement = [T || T <- SaveTreeElement, temp_is_pseudo(T)], - Block = hipe_x86_cfg:bb(CFG1, Label1), - Code = hipe_bb:code(Block), - %% The following statements are tedious but work ok. - %% Put spills between move formals and the jump code. - %% This disgusting thing is done because spills should be - %% introduced after move formals. - %% Another solution may be to introduce another block. - MoveCodes = lists:sublist(Code, length(Code)-1), - JumpCode = lists:last(Code), - hipe_x86_cfg:bb_add(CFG1, Label1, hipe_bb:mk_bb(MoveCodes ++ [hipe_x86:mk_pseudo_spill(FilteredSaveTreeElement), JumpCode])); - _ -> - CFG1 - end. - -%% helper function of firstPass - -%% processes all labels recursively and decides the spills to be put. -%% spills are introduced before each function call (pseudo_call) as well as -%% global spill is found -firstPassHelper(Labels, Liveness, CFG) -> - firstPassHelper(Labels, Liveness, CFG, gb_trees:empty()). - -firstPassHelper([Label|Labels], Liveness, CFG, SaveTree) -> - LiveOut = from_list(?HIPE_X86_LIVENESS:liveout(Liveness, Label)), - Block = hipe_x86_cfg:bb(CFG, Label), - Code = hipe_bb:code(Block), - Succ = hipe_x86_cfg:succ(CFG, Label), - IntersectedSaveList = findIntersectedSaveList(Succ,SaveTree), - %% call firstPassDoBlock which will give the updated block - %% code(including spills) as well as Intersected Save List which - %% should be passed above blocks - {_,NewIntersectedList,NewCode} = - firstPassDoBlock(Code, LiveOut,IntersectedSaveList), - NewBlock = hipe_bb:code_update(Block, NewCode), - NewCFG = hipe_x86_cfg:bb_add(CFG, Label, NewBlock), - SizeOfSet = setSize(NewIntersectedList), - %% if the Intersected Save List is not empty, insert it in the save tree. - if SizeOfSet =/= 0 -> - UpdatedSaveTree = gb_trees:insert(Label, NewIntersectedList, SaveTree), - firstPassHelper(Labels, Liveness, NewCFG, UpdatedSaveTree); - true -> - firstPassHelper(Labels, Liveness, NewCFG, SaveTree) - end; -firstPassHelper([], _, CFG, SaveTree) -> - {CFG, SaveTree}. - -%% handle each instruction in the block bottom up -firstPassDoBlock(Insts, LiveOut, IntersectedSaveList) -> - lists:foldr(fun firstPassDoInsn/2, {LiveOut,IntersectedSaveList,[]}, Insts). - -firstPassDoInsn(I, {LiveOut,IntersectedSaveList,PrevInsts}) -> - case I of - #pseudo_call{} -> - do_pseudo_call(I, {LiveOut,IntersectedSaveList,PrevInsts}); - _ -> % other instructions - DefinedList = from_list( ?HIPE_X86_LIVENESS:defines(I)), - UsedList = from_list(?HIPE_X86_LIVENESS:uses(I)), - NewLiveOut = subtract(union(LiveOut, UsedList), DefinedList), - NewIntersectedSaveList = subtract(IntersectedSaveList, DefinedList), - {NewLiveOut, NewIntersectedSaveList, [I|PrevInsts]} - end. - -do_pseudo_call(I, {LiveOut,IntersectedSaveList,PrevInsts}) -> - LiveTemps = [Temp || Temp <- to_list(LiveOut), temp_is_pseudo(Temp)], - NewIntersectedSaveList = union(IntersectedSaveList, LiveOut), - {LiveOut, NewIntersectedSaveList, [hipe_x86:mk_pseudo_spill(LiveTemps), I | PrevInsts]}. - -findIntersectedSaveList(LabelList, SaveTree) -> - findIntersectedSaveList([saveTreeLookup(Label,SaveTree) || Label <- LabelList]). - -findIntersectedSaveList([]) -> - []; -findIntersectedSaveList([List1]) -> - List1; -findIntersectedSaveList([List1,List2|Rest]) -> - findIntersectedSaveList([intersection(List1, List2)|Rest]). - -saveTreeLookup(Label, SaveTree) -> - case gb_trees:lookup(Label, SaveTree) of - {value, SaveList} -> - SaveList; - _ -> - [] - end. - -%% Performs the second pass of the algorithm. -%% It basically eliminates the unnecessary spills and introduces restores. -%% Works top down -secondPass(CFG0) -> - Labels = hipe_x86_cfg:reverse_postorder(CFG0), - Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0), - secondPassHelper(Labels,Liveness,CFG0). - -%% helper function of secondPass. - -%% recursively handle all labels given. -secondPassHelper(Labels, Liveness, CFG) -> - secondPassHelper(Labels, Liveness, CFG, gb_trees:empty(), CFG). - -%% AccumulatedCFG stands for the CFG that has restore edges incrementally. -%% UnmodifiedCFG is the CFG created after first pass. - -%% AccumulatedSaveTree is used to eliminate the unnecessary saves. The -%% saves (spills) in above blocks are traversed down (if still live -%% and not redefined) and redundant saves are eliminated in the lower -%% blocks. -%% For memory efficiency, it may be better not to maintain the -%% AccumulatedSaveTree but traverse the tree recursively and pass the -%% save lists to the childs individually. -%% But current approach may be faster even though it needs bigger memory. - -secondPassHelper([Label|RestOfLabels], Liveness, - AccumulatedCFG, AccumulatedSaveTree, UnmodifiedCFG) -> - LiveOut = ?HIPE_X86_LIVENESS:liveout(Liveness, Label), - Block = hipe_x86_cfg:bb(AccumulatedCFG, Label), - Code = hipe_bb:code(Block), - - %% UnmodifiedCFG is needed for getting the correct predecessors. - %% (i.e. not to get the restore edge blocks) - PredList = hipe_x86_cfg:pred(UnmodifiedCFG, Label), - %% find the spills coming from all the parents by intersecting - InitialAccumulatedSaveList = - findIntersectedSaveList(PredList, AccumulatedSaveTree), - AccumulatedSaveList = - keepLiveVarsInAccumSaveList(InitialAccumulatedSaveList, LiveOut), - - {NewCode, CFGUpdateWithRestores, NewAccumulatedSaveList} = - secondPassDoBlock(Label, Code, AccumulatedCFG, AccumulatedSaveList), - - UpdatedAccumulatedSaveTree = - gb_trees:insert(Label, NewAccumulatedSaveList, AccumulatedSaveTree), - NewBlock = hipe_bb:code_update(Block, NewCode), - NewCFG = hipe_x86_cfg:bb_add(CFGUpdateWithRestores, Label, NewBlock), - secondPassHelper(RestOfLabels, Liveness, NewCFG, - UpdatedAccumulatedSaveTree, UnmodifiedCFG); -secondPassHelper([], _, AccumulatedCFG, _, _) -> - AccumulatedCFG. - -secondPassDoBlock(CurrentLabel, Insts, CFG, AccumulatedSaveList) -> - {NewAccumulatedSaveList,NewInsts,_,_,CFGUpdateWithRestores} = - lists:foldl(fun secondPassDoInsn/2, {AccumulatedSaveList,[],[],CurrentLabel,CFG}, Insts), - {NewInsts, CFGUpdateWithRestores, NewAccumulatedSaveList}. - -secondPassDoInsn(I, {AccumulatedSaveList,PrevInsts,SpillList,CurrentLabel,CFG}) -> - case I of - #pseudo_spill{} -> - %% spill variables that are not accumulated from top down - %% (which are not already saved) - VariablesAlreadySaved = [X || {X,_} <- to_list(AccumulatedSaveList)], - VariablesToBeSpilled = I#pseudo_spill.args -- VariablesAlreadySaved, - NewSpillList = [{Temp, hipe_x86:mk_new_temp(Temp#x86_temp.type)} || Temp <- VariablesToBeSpilled], - %% update accumulated saved list by adding the newly spilled variables. - NewAccumulatedSaveList = union(AccumulatedSaveList, from_list(NewSpillList)), - {NewAccumulatedSaveList, PrevInsts ++ secondPassDoPseudoSpill(NewSpillList), NewSpillList, CurrentLabel, CFG}; - #pseudo_call{} -> - {CFGUpdateWithRestores, NewPseudoCall} = - secondPassDoPseudoCall(I, AccumulatedSaveList, CFG), - %% spill list is emptied after use - {AccumulatedSaveList, PrevInsts ++ [NewPseudoCall], CurrentLabel, [], CFGUpdateWithRestores}; - _ -> - %% remove the defined variables from the accumulated save - %% list since they need to be saved again in later occasions. - DefinedList = from_list(?HIPE_X86_LIVENESS:defines(I)), - NewAccumulatedSaveList = removeRedefVarsFromAccumSaveList(AccumulatedSaveList, DefinedList), - {NewAccumulatedSaveList, PrevInsts ++ [I], SpillList, CurrentLabel, CFG} - end. - -%% remove dead vars from accumulated save list so that they are not restored. -keepLiveVarsInAccumSaveList([], _) -> - []; -keepLiveVarsInAccumSaveList([{Var,Temp}|Rest], DefinedList) -> - IsDefined = is_element(Var, DefinedList), - case IsDefined of - true -> [{Var,Temp}|keepLiveVarsInAccumSaveList(Rest, DefinedList)]; - false -> keepLiveVarsInAccumSaveList(Rest, DefinedList) - end. - -%% remove the redefined variables from accumulated save list since -%% they are changed. -removeRedefVarsFromAccumSaveList([], _) -> - []; -removeRedefVarsFromAccumSaveList([{Var,Temp}|Rest], DefinedList) -> - IsDefined = is_element(Var, DefinedList), - case IsDefined of - true -> removeRedefVarsFromAccumSaveList(Rest, DefinedList); - false -> [{Var,Temp}|removeRedefVarsFromAccumSaveList(Rest, DefinedList)] - end. - -%% convert pseudo_spills to move instructions. -secondPassDoPseudoSpill(SpillList) -> - lists:foldl(fun convertPseudoSpillToMov/2, [], SpillList). - -%% if there are variables to be restored, then call addRestoreBlockToEdge to -%% place them in a new block on the edge of the blocks. -secondPassDoPseudoCall(I, RestoreList, CFG) -> - ContLabel = I#pseudo_call.contlab, - SizeOfSet = setSize(RestoreList), - if SizeOfSet =/= 0 -> - addRestoreBlockToEdge(I, ContLabel, CFG, RestoreList); - true -> - {CFG, I} - end. - -%% prepares the moves for the spills. -convertPseudoSpillToMov({Temp, NewTemp}, OtherMoves) -> - OtherMoves ++ [mkMove(Temp, NewTemp)]. - -%% prepares the moves for the restores. -%% Called by addRestoreBlockToEdge while introducing the restores. -convertPseudoRestoreToMov({Temp, NewTemp}, OtherMoves) -> - OtherMoves ++ [mkMove(NewTemp, Temp)]. - -%% makes the move record, special care is taken for doubles. -mkMove(NewTemp,Temp) -> - if Temp#x86_temp.type =:= 'double' -> - hipe_x86:mk_fmove(NewTemp, Temp); - true -> - hipe_x86:mk_move(NewTemp, Temp) - end. - -%% adds a new block (on the edge) that includes introduced restore moves. -addRestoreBlockToEdge(PseudoCall, ContLabel, CFG, TempArgsList) -> - NextLabel = hipe_gensym:get_next_label(x86), - NewCode = lists:foldl(fun convertPseudoRestoreToMov/2, [], TempArgsList) ++ [hipe_x86:mk_jmp_label(ContLabel)], - NewBlock = hipe_bb:mk_bb(NewCode), - NewPseudoCall = redirect_pseudo_call(PseudoCall, ContLabel, NextLabel), - NewCFG = hipe_x86_cfg:bb_add(CFG, NextLabel, NewBlock), - {NewCFG, NewPseudoCall}. - -%% used instead of hipe_x86_cfg:redirect_jmp since it does not handle -%% pseudo_call calls. -redirect_pseudo_call(I = #pseudo_call{contlab=ContLabel}, Old, New) -> - case Old =:= ContLabel of - true -> I#pseudo_call{contlab=New}; - false -> I - end. - -temp_is_pseudo(Temp) -> - case hipe_x86:is_temp(Temp) of - true -> not(?HIPE_X86_REGISTERS:is_precoloured(hipe_x86:temp_reg(Temp))); - false -> false - end. - -%%--------------------------------------------------------------------- -%% Set operations where the module name is an easily changeable macro -%%--------------------------------------------------------------------- - -union(Set1, Set2) -> - ?SET_MODULE:union(Set1, Set2). - -setSize(Set) -> - ?SET_MODULE:size(Set). - -from_list(List) -> - ?SET_MODULE:from_list(List). - -to_list(Set) -> - ?SET_MODULE:to_list(Set). - -subtract(Set1, Set2) -> - ?SET_MODULE:subtract(Set1, Set2). - -intersection(Set1, Set2) -> - ?SET_MODULE:intersection(Set1, Set2). - -is_element(Element, Set) -> - ?SET_MODULE:is_element(Element, Set). diff --git a/lib/hipe/x86/hipe_x86_subst.erl b/lib/hipe/x86/hipe_x86_subst.erl deleted file mode 100644 index 7db3b23d92..0000000000 --- a/lib/hipe/x86/hipe_x86_subst.erl +++ /dev/null @@ -1,112 +0,0 @@ -%% -*- erlang-indent-level: 2 -*- -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. - --ifdef(HIPE_AMD64). --define(HIPE_X86_SUBST, hipe_amd64_subst). --else. --define(HIPE_X86_SUBST, hipe_x86_subst). --endif. - --module(?HIPE_X86_SUBST). --export([insn_temps/2, insn_lbls/2]). --include("../x86/hipe_x86.hrl"). - -%% These should be moved to hipe_x86 and exported --type temp() :: #x86_temp{}. --type oper() :: temp() | #x86_imm{} | #x86_mem{}. --type mfarec() :: #x86_mfa{}. --type prim() :: #x86_prim{}. --type funv() :: mfarec() | prim() | temp(). --type label() :: non_neg_integer(). --type insn() :: tuple(). % for now - --type subst_fun() :: fun((temp()) -> temp()). - -%% @doc Maps over the temporaries in an instruction --spec insn_temps(subst_fun(), insn()) -> insn(). -insn_temps(SubstTemp, I) -> - O = fun(O) -> oper_temps(SubstTemp, O) end, - case I of - #alu {src=S, dst=D} -> I#alu {src=O(S), dst=O(D)}; - #cmovcc {src=S, dst=D} -> I#cmovcc {src=O(S), dst=O(D)}; - #cmp {src=S, dst=D} -> I#cmp {src=O(S), dst=O(D)}; - #fmove {src=S, dst=D} -> I#fmove {src=O(S), dst=O(D)}; - #fp_binop{src=S, dst=D} -> I#fp_binop{src=O(S), dst=O(D)}; - #imul {src=S, temp=T} -> I#imul {src=O(S), temp=O(T)}; - #lea {mem=M, temp=T} -> I#lea {mem=O(M), temp=O(T)}; - #move {src=S, dst=D} -> I#move {src=O(S), dst=O(D)}; - #movsx {src=S, dst=D} -> I#movsx {src=O(S), dst=O(D)}; - #movzx {src=S, dst=D} -> I#movzx {src=O(S), dst=O(D)}; - #shift {src=S, dst=D} -> I#shift {src=O(S), dst=O(D)}; - #test {src=S, dst=D} -> I#test {src=O(S), dst=O(D)}; - #fp_unop{arg=[]} -> I; - #fp_unop{arg=A} -> I#fp_unop{arg=O(A)}; - #move64 {dst=D} -> I#move64 {dst=O(D)}; - #push {src=S} -> I#push {src=O(S)}; - #pop {dst=D} -> I#pop {dst=O(D)}; - #jmp_switch{temp=T, jtab=J} -> - I#jmp_switch{temp=O(T), jtab=jtab_temps(SubstTemp, J)}; - #pseudo_call{'fun'=F} -> - I#pseudo_call{'fun'=funv_temps(SubstTemp, F)}; - #pseudo_spill_fmove{src=S, temp=T, dst=D} -> - I#pseudo_spill_fmove{src=O(S), temp=O(T), dst=O(D)}; - #pseudo_spill_move{src=S, temp=T, dst=D} -> - I#pseudo_spill_move{src=O(S), temp=O(T), dst=O(D)}; - #pseudo_tailcall{'fun'=F, stkargs=Stk} -> - I#pseudo_tailcall{'fun'=funv_temps(SubstTemp, F), - stkargs=lists:map(O, Stk)}; - #comment{} -> I; - #jmp_label{} -> I; - #pseudo_tailcall_prepare{} -> I; - #pseudo_jcc{} -> I; - #ret{} -> I - end. - --spec oper_temps(subst_fun(), oper()) -> oper(). -oper_temps(_SubstTemp, I=#x86_imm{}) -> I; -oper_temps(SubstTemp, T=#x86_temp{}) -> SubstTemp(T); -oper_temps(SubstTemp, M=#x86_mem{base=Base,off=Off}) -> - M#x86_mem{base=oper_temps(SubstTemp, Base), - off =oper_temps(SubstTemp, Off)}. - --spec funv_temps(subst_fun(), funv()) -> funv(). -funv_temps(_SubstTemp, MFA=#x86_mfa{}) -> MFA; -funv_temps(_SubstTemp, P=#x86_prim{}) -> P; -funv_temps(SubstTemp, T=#x86_temp{}) -> SubstTemp(T). - -%% TODO: Undo this ifdeffery at the source (make jtab an #x86_imm{} on x86) --ifdef(HIPE_AMD64). -jtab_temps(SubstTemp, T=#x86_temp{}) -> SubstTemp(T). --else. -jtab_temps(_SubstTemp, DataLbl) when is_integer(DataLbl) -> DataLbl. --endif. - --type lbl_subst_fun() :: fun((label()) -> label()). - -%% @doc Maps over the branch targets in an instruction --spec insn_lbls(lbl_subst_fun(), insn()) -> insn(). -insn_lbls(SubstLbl, I) -> - case I of - #jmp_label{label=Label} -> - I#jmp_label{label=SubstLbl(Label)}; - #pseudo_call{sdesc=Sdesc, contlab=Contlab} -> - I#pseudo_call{sdesc=sdesc_lbls(SubstLbl, Sdesc), - contlab=SubstLbl(Contlab)}; - #pseudo_jcc{true_label=T, false_label=F} -> - I#pseudo_jcc{true_label=SubstLbl(T), false_label=SubstLbl(F)} - end. - -sdesc_lbls(_SubstLbl, Sdesc=#x86_sdesc{exnlab=[]}) -> Sdesc; -sdesc_lbls(SubstLbl, Sdesc=#x86_sdesc{exnlab=Exnlab}) -> - Sdesc#x86_sdesc{exnlab=SubstLbl(Exnlab)}. diff --git a/lib/hipe/x86/hipe_x86_x87.erl b/lib/hipe/x86/hipe_x86_x87.erl deleted file mode 100644 index 85268ab85a..0000000000 --- a/lib/hipe/x86/hipe_x86_x87.erl +++ /dev/null @@ -1,629 +0,0 @@ -%% -*- erlang-indent-level: 2 -*- -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. -%% -%% Floating point handling. - --ifdef(HIPE_AMD64). --define(HIPE_X86_X87, hipe_amd64_x87). --define(HIPE_X86_DEFUSE, hipe_amd64_defuse). --define(HIPE_X86_LIVENESS, hipe_amd64_liveness). --define(HIPE_X86_REGISTERS, hipe_amd64_registers). --else. --define(HIPE_X86_X87, hipe_x86_x87). --define(HIPE_X86_DEFUSE, hipe_x86_defuse). --define(HIPE_X86_LIVENESS, hipe_x86_liveness). --define(HIPE_X86_REGISTERS, hipe_x86_registers). --endif. - --module(?HIPE_X86_X87). - --export([map/1]). - --include("../x86/hipe_x86.hrl"). --include("../main/hipe.hrl"). - -%%---------------------------------------------------------------------- - -map(CFG0) -> - %% hipe_x86_cfg:pp(CFG0), - Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0), - StartLabel = hipe_x86_cfg:start_label(CFG0), - {CFG1,_} = do_blocks([], [StartLabel], CFG0, Liveness, [], gb_trees:empty()), - CFG1. - -do_blocks(Pred, [Lbl|Lbls], CFG, Liveness, Map, BlockMap) -> - case gb_trees:lookup(Lbl, BlockMap) of - none -> - %% This block has not been visited. - Block = hipe_x86_cfg:bb(CFG, Lbl), - Succ = hipe_x86_cfg:succ(CFG, Lbl), - NewBlockMap = gb_trees:insert(Lbl, Map, BlockMap), - LiveOut = [X || X <- ?HIPE_X86_LIVENESS:liveout(Liveness, Lbl), - is_fp(X)], - Code = hipe_bb:code(Block), - ReverseCode = lists:reverse(Code), - {NewCode0, NewMap, NewBlockMap1, Dirty} = - do_block(ReverseCode, LiveOut, Map, NewBlockMap), - NewCFG1 = - case Dirty of - true -> - NewBlock = hipe_bb:code_update(Block, NewCode0), - hipe_x86_cfg:bb_add(CFG, Lbl, NewBlock); - _ -> - CFG - end, - {NewCFG3, NewBlockMap2} = - do_blocks(Lbl, Succ, NewCFG1, Liveness, NewMap, NewBlockMap1), - do_blocks(Pred, Lbls, NewCFG3, Liveness, Map, NewBlockMap2); - {value, fail} -> - %% Don't have to follow this trace any longer. - do_blocks(Pred,Lbls, CFG, Liveness, Map, BlockMap); - {value, ExistingMap} -> - %% This block belongs to a trace already handled. - %% The Map coming in must be identical to the one used - %% when the block was processed. - if ExistingMap =:= Map -> - do_blocks(Pred, Lbls, CFG, Liveness, Map, BlockMap); - true -> - NewCFG = do_shuffle(Pred, Lbl, CFG, Map, ExistingMap), - do_blocks(Pred, Lbls, NewCFG, Liveness, Map, BlockMap) - end - end; -do_blocks(_Pred, [], CFG, _Liveness, _Map, BlockMap) -> - {CFG, BlockMap}. - -do_block(Ins, LiveOut, Map, BlockMap) -> - do_block(Ins, LiveOut, Map, BlockMap, false). - -do_block([I|Is], LiveOut, Map, BlockMap, Dirty) -> - case handle_insn(I) of - false -> - {NewCode, NewMap, NewBlockMap, NewDirty} = - do_block(Is, LiveOut, Map, BlockMap, Dirty), - {NewCode++[I], NewMap, NewBlockMap, NewDirty}; - true -> - Def = ordsets:from_list(?HIPE_X86_DEFUSE:insn_def(I)), - Use = ordsets:from_list(?HIPE_X86_DEFUSE:insn_use(I)), - NewLiveOut = - ordsets:filter(fun(X) -> is_fp(X) end, - ordsets:union(ordsets:subtract(LiveOut, Def), Use)), - {NewCode, NewMap, NewBlockMap, NewDirty} = - do_block(Is, NewLiveOut, Map, BlockMap, Dirty), - {NewI, NewMap1, NewBlockMap1} = - do_insn(I, LiveOut, NewMap, NewBlockMap), - NewDirty1 = - if NewDirty =:= true -> true; - NewI =:= [I] -> false; - true -> true - end, - {NewCode++NewI, NewMap1, NewBlockMap1, NewDirty1} - end; -do_block([], LiveOut, Map, BlockMap, Dirty) -> - case [X || X <- Map, not lists:member(X, LiveOut)] of - [] -> - {[], Map, BlockMap, Dirty}; - Pop -> - {PopIns, NewMap} = pop_dead(Pop, Map), - {PopIns, NewMap, BlockMap, true} - end. - -do_shuffle(Pred, Lbl, CFG, OldMap, NewMap) -> - %% First make sure both maps have the same members. - Push = NewMap -- OldMap, - Pop = OldMap -- NewMap, - {PopInsn, OldMap0} = pop_dead(Pop, OldMap), - {PushInsn, OldMap1} = - case Push of - []-> {[], OldMap0}; - _-> push_list(lists:reverse(Push), OldMap0) - end, - Code = - if OldMap1 =:= NewMap -> - %% It was enough to push and pop. - PopInsn ++ PushInsn ++ [hipe_x86:mk_jmp_label(Lbl)]; - true -> - %% Shuffle the positions so the maps match - Cycles = find_swap_cycles(OldMap1, NewMap), - SwitchInsns = do_switching(Cycles), - PopInsn ++ PushInsn ++ SwitchInsns ++ [hipe_x86:mk_jmp_label(Lbl)] - end, - %% Update the CFG. - NewLabel = hipe_gensym:get_next_label(x86), - NewCFG1 = hipe_x86_cfg:bb_add(CFG, NewLabel, hipe_bb:mk_bb(Code)), - OldPred = hipe_x86_cfg:bb(NewCFG1, Pred), - PredCode = hipe_bb:code(OldPred), - NewLast = redirect(lists:last(PredCode), Lbl,NewLabel), - NewPredCode = butlast(PredCode) ++ [NewLast], - NewPredBB = hipe_bb:code_update(OldPred, NewPredCode), - hipe_x86_cfg:bb_add(NewCFG1, Pred, NewPredBB). - -find_swap_cycles(OldMap, NewMap) -> - Moves = [get_pos(X, NewMap, 1) || X <- OldMap], - find_swap_cycles(OldMap, Moves, lists:seq(1, length(OldMap)), []). - -find_swap_cycles(OldMap, Moves, NotHandled, Cycles) -> - if NotHandled =:= [] -> Cycles; - true -> - Cycle = find_cycle(Moves, [hd(NotHandled)]), - NewNotHandled = NotHandled -- Cycle, - case lists:member(1, Cycle) of - true -> - %% The cycle that contains the first element on the stack - %% must be processed last. - NewCycle = format_cycle(Cycle), - find_swap_cycles(OldMap, Moves, NewNotHandled, Cycles ++ [NewCycle]); - _ -> - NewCycle = format_cycle(Cycle), - find_swap_cycles(OldMap, Moves, NewNotHandled, [NewCycle|Cycles]) - end - end. - -find_cycle(Moves, Cycle) -> - To = lists:nth(lists:last(Cycle), Moves), - if To =:= hd(Cycle) -> Cycle; - true -> find_cycle(Moves, Cycle ++ [To]) - end. - -format_cycle(C) -> - %% The position numbers start with 1 - should start with 0. - %% If position 0 is in the cycle it will be permuted until - %% the 0 is first and then remove it. - %% Otherwise the first element is also added last. - NewCycle = [X - 1 || X <- C], - case lists:member(0, NewCycle) of - true -> format_cycle(NewCycle, []); - _ -> NewCycle ++ [hd(NewCycle)] - end. - -format_cycle([H|T], NewCycle) -> - case H of - 0 -> T ++ NewCycle; - _ -> format_cycle(T, NewCycle ++ [H]) - end. - -do_switching(Cycles) -> - do_switching(Cycles, []). - -do_switching([C|Cycles], Insns) -> - NewInsns = Insns ++ [hipe_x86:mk_fp_unop(fxch, mk_st(X)) || X <- C], - do_switching(Cycles, NewInsns); -do_switching([], Insns) -> - Insns. - -redirect(Insn, OldLbl, NewLbl) -> - case Insn of - #pseudo_call{contlab = ContLab, sdesc = SDesc} -> - #x86_sdesc{exnlab = ExnLab} = SDesc, - if ContLab =:= OldLbl -> - Insn#pseudo_call{contlab = NewLbl}; - ExnLab =:= OldLbl -> - Insn#pseudo_call{sdesc = SDesc#x86_sdesc{exnlab = NewLbl}} - end; - _ -> - hipe_x86_cfg:redirect_jmp(Insn, OldLbl, NewLbl) - end. - -do_insn(I, LiveOut, Map, BlockMap) -> - case I of - #pseudo_call{'fun' = Fun, contlab = ContLab} -> - case Fun of - %% We don't want to spill anything if an exception has been thrown. - {_, 'handle_fp_exception'} -> - NewBlockMap = - case gb_trees:lookup(ContLab, BlockMap) of - {value, fail} -> - BlockMap; - {value, _} -> - gb_trees:update(ContLab, fail, BlockMap); - none -> - gb_trees:insert(ContLab, fail, BlockMap) - end, - {[I], [], NewBlockMap}; - _ -> - {pop_all(Map)++[I],[],BlockMap} - end; - #fp_unop{op = 'fwait'} -> - Store = pseudo_pop(Map), - {Store ++ [I], Map, BlockMap}; - #fp_unop{} -> - {NewI, NewMap} = do_fp_unop(I, LiveOut, Map), - {NewI, NewMap, BlockMap}; - #fp_binop{} -> - {NewI, NewMap} = do_fp_binop(I, LiveOut, Map), - {NewI, NewMap, BlockMap}; - #fmove{src = Src, dst = Dst} -> - if Src =:= Dst -> - %% Don't need to keep this instruction! - %% However, we may need to pop from the stack. - case is_liveOut(Src, LiveOut) of - true-> - {[], Map, BlockMap}; - false -> - {SwitchInsn, NewMap0} = switch_first(Dst, Map), - NewMap = pop(NewMap0), - {SwitchInsn++pop_insn(), NewMap, BlockMap} - end; - true -> - {NewI, NewMap} = do_fmove(Src, Dst, LiveOut, Map), - {NewI, NewMap, BlockMap} - end; - _ -> - {[I], Map, BlockMap} - end. - -do_fmove(Src, Dst = #x86_mem{}, LiveOut, Map) -> - %% Storing a float from the stack into memory. - {SwitchInsn, NewMap0} = switch_first(Src, Map), - case is_liveOut(Src, LiveOut) of - true -> - {SwitchInsn ++ [hipe_x86:mk_fp_unop(fst, Dst)], NewMap0}; - _ -> - NewMap1 = pop(NewMap0), - {SwitchInsn ++ [hipe_x86:mk_fp_unop(fstp, Dst)], NewMap1} - end; -do_fmove(Src = #x86_mem{}, Dst, _LiveOut, Map) -> - %% Pushing a float into the stack. - case in_map(Dst, Map) of - true -> ?EXIT({loadingExistingFpVariable,{Src,Dst}}); - _ -> ok - end, - {PushOp, [_|NewMap0]} = push(Src, Map), - %% We want Dst in the map rather than Src. - NewMap = [Dst|NewMap0], - {PushOp, NewMap}; -do_fmove(Src, Dst, LiveOut, Map) -> - %% Copying a float that either is spilled or is on the fp stack, - %% or converting a fixnum in a temp to a float on the fp stack. - case in_map(Dst, Map) of - true -> ?EXIT({copyingToExistingFpVariable,{Src,Dst}}); - _ -> ok - end, - IsConv = - case Src of - #x86_temp{type = Type} -> Type =/= 'double'; - _ -> false - end, - case IsConv of - true -> - do_conv(Src, Dst, Map); - _ -> - %% Copying. - case {is_liveOut(Src, LiveOut), in_map(Src, Map)} of - {false, true} -> - %% Just remap Dst to Src - {Head, [_|T]} = lists:splitwith(fun(X) -> X =/= Src end, Map), - {[], Head ++ [Dst|T]}; - _ -> - {PushOp, [_|NewMap0]} = push(Src, Map), - %% We want Dst in the map rather than Src. - NewMap = [Dst|NewMap0], - {PushOp, NewMap} - end - end. - -do_conv(Src = #x86_temp{reg = Reg}, Dst, Map) -> - %% Converting. Src must not be a register, so we - %% might have to put it into memory in between. - {Move, NewSrc} = - case ?HIPE_X86_REGISTERS:is_precoloured(Reg) of - true -> - Temp = hipe_x86:mk_new_temp('untagged'), - {[hipe_x86:mk_move(Src,Temp)], Temp}; - _ -> - {[], Src} - end, - {PushOp, [_|NewMap0]} = push(NewSrc, Map), - %% We want Dst in the map rather than NewSrc. - NewMap = [Dst|NewMap0], - case length(PushOp) of - 1 -> %% No popping of memory object on fpstack - {Move ++ [hipe_x86:mk_fp_unop(fild, NewSrc)], NewMap}; - _ -> %% H contains pop instructions. Must be kept! - Head = butlast(PushOp), - {Move ++ Head ++ [hipe_x86:mk_fp_unop(fild, NewSrc)], NewMap} - end. - -do_fp_unop(I = #fp_unop{arg = Arg, op = fchs}, Liveout, Map) -> - %% This is fchs, the only operation without a - %% popping version. Needs special handling. - case is_liveOut(Arg, Liveout) of - true -> - {SwitchIns, NewMap} = switch_first(Arg, Map), - {SwitchIns ++ [I#fp_unop{arg = []}], NewMap}; - false -> - %% Don't need to keep this instruction! - %% However, we may need to pop Src from the stack. - case in_map(Arg, Map) of - true -> - {SwitchInsn, NewMap0} = switch_first(Arg, Map), - NewMap = pop(NewMap0), - {SwitchInsn ++ pop_insn(), NewMap}; - _ -> - {[],Map} - end - end. - -do_fp_binop(#fp_binop{src = Src, dst = Dst, op = Op}, LiveOut, Map) -> - case {is_liveOut(Src, LiveOut), is_liveOut(Dst, LiveOut)} of - {true, true} -> - keep_both(Op, Src, Dst, Map); - {true, false} -> - keep_src(Op, Src, Dst, Map); - {false, true} -> - keep_dst(Op, Src, Dst, Map); - {false, false} -> - %% Both Dst and Src are popped. - keep_none(Op, Src, Dst, Map) - end. - -keep_both(Op, Src, Dst, Map) -> - %% Keep both Dst and Src if it is there. - {SwitchInsn, NewMap} = switch_first(Dst, Map), - NewSrc = get_new_opnd(Src, NewMap), - Insn = format_fp_binop(Op, NewSrc, mk_st(0)), - {SwitchInsn++Insn, NewMap}. - -keep_src(Op, Src, Dst, Map) -> - %% Pop Dst but keep Src in stack if it is there. - {SwitchInsn, NewMap0} = switch_first(Dst, Map), - NewSrc = get_new_opnd(Src, NewMap0), - NewMap = pop(NewMap0), - Insn = format_fp_binop(Op, NewSrc, mk_st(0)), - {SwitchInsn ++ Insn ++ pop_insn(), NewMap}. - -keep_dst(Op, Src, Dst, Map) -> - %% Keep Dst but pop Src. - %% Dst must be in stack. - DstInMap = in_map(Dst, Map), - SrcInMap = in_map(Src, Map), - case SrcInMap of - true -> - case DstInMap of - true -> - %% Src must be popped. If Dst is on top of the stack we can - %% alter the operation rather than shuffle the stack. - {SwitchInsn, Insn, NewMap} = - if hd(Map) =:= Dst -> - NewOp = mk_op_pop(reverse_op(Op)), - NewDst = get_new_opnd(Src, Map), - TmpMap = lists:map(fun(X) -> - if X =:= Src -> Dst; true -> X end - end, Map), - {[], format_fp_binop(NewOp, mk_st(0), NewDst), pop(TmpMap)}; - true -> - {SwitchInsn1, NewMap0} = switch_first(Src, Map), - NewDst = get_new_opnd(Dst,NewMap0), - NewOp = mk_op_pop(Op), - {SwitchInsn1,format_fp_binop(NewOp, mk_st(0), NewDst), pop(NewMap0)} - end, - {SwitchInsn ++ Insn, NewMap}; - _ -> - %% Src is on the stack, but Dst isn't. Use memory command to avoid - %% unnecessary loading instructions. - {SwitchInsn, NewMap0} = switch_first(Src, Map), - NewOp = reverse_op(Op), - NewMap = [Dst] ++ tl(NewMap0), - Insn = format_fp_binop(NewOp, Dst, mk_st(0)), - {SwitchInsn ++ Insn, NewMap} - end; - _ -> - %% Src isn't in the map so it doesn't have to be popped. - {SwitchInsn, NewMap} = switch_first(Dst, Map), - {SwitchInsn ++ [#fp_unop{arg = Src, op = Op}], NewMap} - end. - -keep_none(Op, Src, Dst, Map) -> - %% Dst must be on stack. - {PushInsn, NewMap0} = - case in_map(Dst, Map) of - true -> {[], Map}; - _ -> push(Dst, Map) - end, - case in_map(Src, NewMap0) of - true -> - %% Src must be popped. - {SwitchInsn1, NewMap1} = switch_first(Src, NewMap0), - NewOp = mk_op_pop(Op), - NewDst = get_new_opnd(Dst,NewMap1), - NewMap2 = pop(NewMap1), - %% Then Dst has to be popped. - {PopInsn, NewMap} = pop_member(Dst, NewMap2), - Insn = format_fp_binop(NewOp, mk_st(0), NewDst), - {PushInsn ++ SwitchInsn1 ++ Insn ++ PopInsn, NewMap}; - _ -> - %% Src isn't in the map so it doesn't have to be popped. - {SwitchInsn, NewMap1} = switch_first(Dst, NewMap0), - NewMap = pop(NewMap1), - {SwitchInsn ++ [#fp_unop{arg = Src, op = Op}] ++ pop_insn(), NewMap} - end. - -format_fp_binop(Op, Src = #x86_temp{}, Dst = #x86_fpreg{reg = Reg}) -> - %% Handle that st(0) is sometimes implicit. - if Reg =:= 0 -> [hipe_x86:mk_fp_unop(Op, Src)]; - true -> [hipe_x86:mk_fp_binop(Op, Src, Dst)] - end; -format_fp_binop(Op, Src, Dst) -> - [hipe_x86:mk_fp_binop(Op, Src, Dst)]. - -in_map(X, Map) -> - lists:member(X, Map). - -push_list(L, Map) -> - push_list(L, Map, []). -push_list([H|T], Map, Acc) -> - {Insn, NewMap} = push(H,Map), - push_list(T, NewMap, Acc++Insn); -push_list([], Map, Acc) -> - {Acc, Map}. - -push(X, Map0) -> - {PopInsn, Map} = - if length(Map0) > 7 -> pop_a_temp(Map0); - true -> {[], Map0} - end, - NewX = get_new_opnd(X,Map), - NewMap = [X | Map], - PushOp = [hipe_x86:mk_fp_unop(fld, NewX)], - {PopInsn ++ PushOp, NewMap}. - -pop([_|Map]) -> - Map. - -pop_insn() -> - [hipe_x86:mk_fp_unop('fstp',mk_st(0))]. - -pop_dead(Dead, Map) -> - Dead0 = [X || X <- Map, lists:member(X,Dead)], - pop_dead(Dead0, Map, []). - -pop_dead([D|Dead], Map, Code) -> - {I, NewMap0} = switch_first(D, Map), - NewMap = pop(NewMap0), - Store = case D of - #x86_temp{} -> [hipe_x86:mk_fp_unop('fstp', D)]; - _ -> pop_insn() - end, - pop_dead(Dead, NewMap, Code++I++Store); -pop_dead([], Map, Code) -> - {Code,Map}. - -pop_all(Map) -> - {Code, _} = pop_dead(Map, Map), - Code. - -pop_member(Member, Map) -> - {Head,[_|T]} = lists:splitwith(fun(X)-> X =/= Member end, Map), - {[hipe_x86:mk_fp_unop('fstp', mk_st(get_pos(Member, Map, 0)))], - Head++T}. - -pop_a_temp(Map) -> - Temp = find_a_temp(Map), - {SwitchInsn, NewMap0} = switch_first(Temp, Map), - NewMap = pop(NewMap0), - {SwitchInsn ++ [hipe_x86:mk_fp_unop('fstp', Temp)], NewMap}. - -find_a_temp([H = #x86_temp{}|_]) -> - H; -find_a_temp([_|T]) -> - find_a_temp(T); -find_a_temp([]) -> - ?EXIT({noTempOnFPStack,{}}). - -switch_first(X, Map = [H|_]) -> - Pos = get_pos(X, Map, 0), - case Pos of - 0 -> - {[], Map}; - notFound -> - push(X, Map); - _ -> - {[_|Head], [_|Tail]} = lists:splitwith(fun(Y)-> Y =/= X end, Map), - NewMap = [X|Head] ++ [H|Tail], - Ins = hipe_x86:mk_fp_unop(fxch, mk_st(Pos)), - {[Ins], NewMap} - end; -switch_first(X, Map) -> - push(X, Map). - -get_pos(X, [H|T], Pos) -> - if X =:= H -> Pos; - true -> get_pos(X, T, Pos+1) - end; -get_pos(_, [], _) -> - notFound. - -get_new_opnd(X, Map) -> - I = get_pos(X, Map, 0), - case I of - notFound -> - %% The operand is probably a spilled float. - X; - _ -> - mk_st(I) - end. - -is_fp(#x86_fpreg{}) -> - true; -is_fp(#x86_mem{type = Type}) -> - Type =:= 'double'; -is_fp(#x86_temp{type = Type}) -> - Type =:= 'double'. - -handle_insn(I) -> - case I of - #fmove{} -> true; - #fp_unop{} -> true; - #fp_binop{} -> true; - #pseudo_call{} ->true; - %% #ret{} -> true; - _ -> false - end. - -is_liveOut(X, LiveOut) -> - ordsets:is_element(X, LiveOut). - -mk_st(X) -> - hipe_x86:mk_fpreg(X, false). - -reverse_op(Op) -> - case Op of - 'fsub' -> 'fsubr'; - 'fdiv' -> 'fdivr'; - 'fsubr'-> 'fsub'; - 'fdivr' -> 'fdiv'; - _ -> Op - end. - -mk_op_pop(Op) -> - case Op of - 'fadd'-> 'faddp'; - 'fdiv' -> 'fdivp'; - 'fdivr' -> 'fdivrp'; - 'fmul' -> 'fmulp'; - 'fsub' -> 'fsubp'; - 'fsubr' -> 'fsubrp'; - _ -> ?EXIT({operandHasNoPopVariant,{Op}}) - end. - -butlast([X|Xs]) -> butlast(Xs,X). - -butlast([],_) -> []; -butlast([X|Xs],Y) -> [Y|butlast(Xs,X)]. - -%%pp_insn(Op, Src, Dst) -> -%% pp([hipe_x86:mk_fp_binop(Op, Src, Dst)]). - -%%pp([I|Ins]) -> -%% hipe_x86_pp:pp_insn(I), -%% pp(Ins); -%%pp([]) -> -%% []. - -pseudo_pop(Map) when length(Map) > 0 -> - Dst = hipe_x86:mk_new_temp('double'), - pseudo_pop(Dst, length(Map), []); -pseudo_pop(_) -> - []. - -pseudo_pop(Dst, St, Acc) when St > 1 -> - %% Store all members of the stack to a single temporary to force - %% any floating point overflow exceptions to occur even though we - %% don't have overflow for the extended double precision in the x87. - pseudo_pop(Dst, St-1, - [hipe_x86:mk_fp_unop('fxch', mk_st(St-1)), - hipe_x86:mk_fp_unop('fst', Dst), - hipe_x86:mk_fp_unop('fxch', mk_st(St-1)) - |Acc]); -pseudo_pop(Dst, _St, Acc) -> - [hipe_x86:mk_fp_unop('fst', Dst)|Acc]. |