summaryrefslogtreecommitdiff
path: root/gen_ujit_examples.rb
blob: dc400bc920c4cc1051f38a99972d5aaf86d65c43 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
def get_example_instruction_id
  # TODO we could get this from the script that generates vm.inc instead of doing this song and dance
  `dwarfdump --name='YARVINSN_ujit_call_example' vm.o`.each_line do |line|
    if (id = line[/DW_AT_const_value\s\((\d+\))/, 1])
      p [__method__, line] if $DEBUG
      return id.to_i
    end
  end
  raise
end

def get_fileoff
  # use the load command to figure out the offset to the start of the content of vm.o
  `otool -l vm.o`.each_line do |line|
    if (fileoff = line[/fileoff (\d+)/, 1])
      p [__method__, line] if $DEBUG
      return fileoff.to_i
    end
  end
  raise
end

def get_symbol_offset(symbol)
  `nm vm.o`.each_line do |line|
    if (offset = line[Regexp.compile('(\h+).+' + Regexp.escape(symbol) + '\Z'), 1])
      p [__method__, line] if $DEBUG
      return Integer(offset, 16)
    end
  end
  raise
end

def readint8b(offset)
  bytes = IO.binread('vm.o', 8, offset)
  bytes.unpack('q').first #  this is native endian but we want little endian. it's fine if the host moachine is x86
end


def disassemble(offset)
  command = "objdump --x86-asm-syntax=intel --start-address=#{offset} --stop-address=#{offset+50} -d vm.o"
  puts "Running: #{command}"
  puts "feel free to verify with --reloc"
  disassembly = `#{command}`
  instructions = []
  puts disassembly if $DEBUG
  disassembly.each_line do |line|
    line = line.strip
    match_data = /\h+: ((?:\h\h\s?)+)\s+(\w+)/.match(line)
    if match_data
      bytes = match_data[1]
      mnemonic = match_data[2]
      instructions << [bytes, mnemonic, line]
      break if mnemonic == 'jmp'
    elsif !instructions.empty?
      p line
      raise "expected a continuous sequence of disassembly lines"
    end
  end

  jmp_idx = instructions.find_index { |_, mnemonic, _| mnemonic == 'jmp' }
  raise 'failed to find jmp' unless jmp_idx
  raise 'generated code for example too long' unless jmp_idx < 10
  handler_instructions = instructions[(0..jmp_idx)]

  puts "Disassembly for the example handler:"
  puts handler_instructions.map {|_, _, line| line}


  raise 'rip reference in example makes copying unsafe' if handler_instructions.any? { |_, _, full_line| full_line.downcase.include?('rip') }
  acceptable_mnemonics = %w(mov jmp lea call)
  unrecognized = nil
  handler_instructions.each { |i| unrecognized = i unless acceptable_mnemonics.include?(i[1]) }
  raise "found an unrecognized \"#{unrecognized[1]}\" instruction in the example. List of recognized instructions: #{acceptable_mnemonics.join(', ')}" if unrecognized
  raise 'found multiple jmp instructions' if handler_instructions.count { |_, mnemonic, _| mnemonic == 'jmp' } > 1
  raise "the jmp instruction seems to be relative which isn't copiable" if instructions[jmp_idx][0].split.size > 4
  raise 'found multiple call instructions' if handler_instructions.count { |_, mnemonic, _| mnemonic == 'call' } > 1
  call_idx = handler_instructions.find_index { |_, mnemonic, _| mnemonic == 'call' }


  pre_call_bytes = []
  post_call_bytes = []
  handler_instructions.take(call_idx).each do |bytes, mnemonic, _|
    pre_call_bytes += bytes.split
  end
  handler_instructions[call_idx + 1, handler_instructions.size].each do |bytes, _, _|
    post_call_bytes += bytes.split
  end

  File.write("ujit_examples.h", <<-EOF)
static const uint8_t ujit_pre_call_bytes[] = { #{pre_call_bytes.map{ |byte| '0x'+byte}.join(', ')} };
static const uint8_t ujit_post_call_bytes[] = { #{post_call_bytes.map{ |byte| '0x'+byte}.join(', ')} };
  EOF
  if $DEBUG
    puts "file:"
    puts File.binread("ujit_examples.h")
  end
end

instruction_id = get_example_instruction_id
fileoff = get_fileoff
tc_table_offset = get_symbol_offset('vm_exec_core.insns_address_table')
vm_exec_core_offset = get_symbol_offset('vm_exec_core')
p instruction_id if $DEBUG
p fileoff if $DEBUG
p tc_table_offset.to_s(16) if $DEBUG
offset_to_insn_in_tc_table = fileoff + tc_table_offset + 8 * instruction_id
p offset_to_insn_in_tc_table if $DEBUG
offset_to_handler_code_from_vm_exec_core = readint8b(offset_to_insn_in_tc_table)
p offset_to_handler_code_from_vm_exec_core if $DEBUG
disassemble(vm_exec_core_offset + offset_to_handler_code_from_vm_exec_core)