summaryrefslogtreecommitdiff
path: root/gen_ujit_examples.rb
diff options
context:
space:
mode:
Diffstat (limited to 'gen_ujit_examples.rb')
-rw-r--r--gen_ujit_examples.rb108
1 files changed, 108 insertions, 0 deletions
diff --git a/gen_ujit_examples.rb b/gen_ujit_examples.rb
new file mode 100644
index 0000000000..eaacd988b8
--- /dev/null
+++ b/gen_ujit_examples.rb
@@ -0,0 +1,108 @@
+def get_example_instruction_id
+ # TODO we could get this from the script that generates vm.inc instead of dothings this song and dance
+ `dwarfdump --name='YARVINSN_ujit_call_example' vm.o`.each_line do |line|
+ if (id = line[/DW_AT_const_value\s\((\d+\))/, 1])
+ p [__method__, line] if $DEBUG
+ return id.to_i
+ end
+ end
+ raise
+end
+
+def get_fileoff
+ # use the load command to figure out the offset to the start of the content of vm.o
+ `otool -l vm.o`.each_line do |line|
+ if (fileoff = line[/fileoff (\d+)/, 1])
+ p [__method__, line] if $DEBUG
+ return fileoff.to_i
+ end
+ end
+ raise
+end
+
+def get_symbol_offset(symbol)
+ `nm vm.o`.each_line do |line|
+ if (offset = line[Regexp.compile('(\h+).+' + Regexp.escape(symbol) + '\Z'), 1])
+ p [__method__, line] if $DEBUG
+ return Integer(offset, 16)
+ end
+ end
+ raise
+end
+
+def readint8b(offset)
+ bytes = IO.binread('vm.o', 8, offset)
+ bytes.unpack('q').first # this is native endian but we want little endian. it's fine if the host moachine is x86
+end
+
+
+def disassemble(offset)
+ command = "objdump --x86-asm-syntax=intel --start-address=#{offset} --stop-address=#{offset+50} -d vm.o"
+ puts "Running: #{command}"
+ puts "feel free to verify with --reloc"
+ disassembly = `#{command}`
+ instructions = []
+ puts disassembly if $DEBUG
+ disassembly.each_line do |line|
+ line = line.strip
+ match_data = /\h+: ((?:\h\h\s?)+)\s+(\w+)/.match(line)
+ if match_data
+ bytes = match_data[1]
+ mnemonic = match_data[2]
+ instructions << [bytes, mnemonic, line]
+ break if mnemonic == 'jmp'
+ elsif !instructions.empty?
+ p line
+ raise "expected a continuous sequence of disassembly lines"
+ end
+ end
+
+ jmp_idx = instructions.find_index { |_, mnemonic, _| mnemonic == 'jmp' }
+ raise 'failed to find jmp' unless jmp_idx
+ raise 'generated code for example too long' unless jmp_idx < 10
+ handler_instructions = instructions[(0..jmp_idx)]
+ raise 'rip reference in example makes copying unsafe' if handler_instructions.any? { |_, _, full_line| full_line.downcase.include?('rip') }
+ acceptable_mnemonics = %w(mov jmp lea call)
+ unrecognized = nil
+ handler_instructions.each { |i| unrecognized = i unless acceptable_mnemonics.include?(i[1]) }
+ raise "found a unrecognized \"#{unrecognized[1]}\" instruction in the example. List of recognized instructions: #{acceptable_mnemonics.join(', ')}" if unrecognized
+ raise 'found multiple jmp instructions' if handler_instructions.count { |_, mnemonic, _| mnemonic == 'jmp' } > 1
+ raise 'found multiple call instructions' if handler_instructions.count { |_, mnemonic, _| mnemonic == 'call' } > 1
+ call_idx = handler_instructions.find_index { |_, mnemonic, _| mnemonic == 'call' }
+
+
+ puts "Disassembly for the example handler:"
+ puts handler_instructions.map{|_,_,line|line}
+
+ pre_call_bytes = []
+ post_call_bytes = []
+ handler_instructions.take(call_idx).each do |bytes, mnemonic, _|
+ pre_call_bytes += bytes.split
+ end
+ handler_instructions[call_idx + 1, handler_instructions.size].each do |bytes, _, _|
+ post_call_bytes += bytes.split
+ end
+
+ File.write("ujit_examples.h", <<-EOF)
+static const uint8_t ujit_pre_call_bytes[] = { #{pre_call_bytes.map{ |byte| '0x'+byte}.join(', ')} };
+static const uint8_t ujit_post_call_bytes[] = { #{post_call_bytes.map{ |byte| '0x'+byte}.join(', ')} };
+ EOF
+ if $DEBUG
+ puts "file:"
+ puts File.binread("ujit_examples.h")
+ end
+end
+
+instruction_id = get_example_instruction_id
+fileoff = get_fileoff
+tc_table_offset = get_symbol_offset('vm_exec_core.insns_address_table')
+vm_exec_core_offset = get_symbol_offset('vm_exec_core')
+p instruction_id if $DEBUG
+p fileoff if $DEBUG
+p tc_table_offset.to_s(16) if $DEBUG
+offset_to_insn_in_tc_table = fileoff + tc_table_offset + 8 * instruction_id
+p offset_to_insn_in_tc_table if $DEBUG
+offset_to_handler_code_from_vm_exec_core = readint8b(offset_to_insn_in_tc_table)
+p offset_to_handler_code_from_vm_exec_core if $DEBUG
+disassemble(vm_exec_core_offset + offset_to_handler_code_from_vm_exec_core)
+