udis86/scripts/ud_itab.py

457 lines
16 KiB
Python

# udis86 - scripts/ud_itab.py
#
# Copyright (c) 2009, 2013 Vivek Thampi
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import os
import sys
from typing import TextIO, List
from ud_opcode import UdOpcodeTable, UdOpcodeTables, UdInsnDef
class UdItabGenerator:
OperandDict = {
"Av": ["OP_A", "SZ_V"],
"B": ["OP_B", "SZ_DQ"],
"BM": ["OP_BM", "SZ_DQ"],
"BMqR": ["OP_BMR", "SZ_QO"],
"BMdqR": ["OP_BMR", "SZ_DQO"],
"E": ["OP_E", "SZ_NA"],
"Eb": ["OP_E", "SZ_B"],
"Ew": ["OP_E", "SZ_W"],
"Ev": ["OP_E", "SZ_V"],
"Ed": ["OP_E", "SZ_D"],
"Ey": ["OP_E", "SZ_Y"],
"Eq": ["OP_E", "SZ_Q"],
"Ez": ["OP_E", "SZ_Z"],
"Erdq": ["OP_E", "SZ_RDQ"],
"Fv": ["OP_F", "SZ_V"],
"G": ["OP_G", "SZ_NA"],
"Gb": ["OP_G", "SZ_B"],
"Gw": ["OP_G", "SZ_W"],
"Gv": ["OP_G", "SZ_V"],
"Gy": ["OP_G", "SZ_Y"],
"Gd": ["OP_G", "SZ_D"],
"Gq": ["OP_G", "SZ_Q"],
"Gz": ["OP_G", "SZ_Z"],
"M": ["OP_M", "SZ_NA"],
"Mb": ["OP_M", "SZ_B"],
"Mw": ["OP_M", "SZ_W"],
"Ms": ["OP_M", "SZ_W"],
"Md": ["OP_M", "SZ_D"],
"Mq": ["OP_M", "SZ_Q"],
"Mdq": ["OP_M", "SZ_DQ"],
"Mqq": ["OP_M", "SZ_QQ"],
"Mrdq": ["OP_M", "SZ_RDQ"],
"Mv": ["OP_M", "SZ_V"],
"Mx": ["OP_M", "SZ_X"],
"Mt": ["OP_M", "SZ_T"],
"Mo": ["OP_M", "SZ_O"],
"MbRd": ["OP_MR", "SZ_BD"],
"MbRv": ["OP_MR", "SZ_BV"],
"MwRv": ["OP_MR", "SZ_WV"],
"MwRd": ["OP_MR", "SZ_WD"],
"MwRy": ["OP_MR", "SZ_WY"],
"MdRy": ["OP_MR", "SZ_DY"],
"Kb": ["OP_K", "SZ_B"],
"Kw": ["OP_K", "SZ_W"],
"Kd": ["OP_K", "SZ_D"],
"Kq": ["OP_K", "SZ_Q"],
"KMb": ["OP_KM", "SZ_B"],
"KMw": ["OP_KM", "SZ_W"],
"KMd": ["OP_KM", "SZ_D"],
"KMq": ["OP_KM", "SZ_Q"],
"KHb": ["OP_KH", "SZ_B"],
"KHw": ["OP_KH", "SZ_W"],
"KHd": ["OP_KH", "SZ_D"],
"KHq": ["OP_KH", "SZ_Q"],
"I1": ["OP_I1", "SZ_NA"],
"I3": ["OP_I3", "SZ_NA"],
"Ib": ["OP_I", "SZ_B"],
"Id": ["OP_I", "SZ_D"],
"Iw": ["OP_I", "SZ_W"],
"Iv": ["OP_I", "SZ_V"],
"Iz": ["OP_I", "SZ_Z"],
"sIb": ["OP_sI", "SZ_B"],
"sIz": ["OP_sI", "SZ_Z"],
"sIv": ["OP_sI", "SZ_V"],
"Jv": ["OP_J", "SZ_V"],
"Jz": ["OP_J", "SZ_Z"],
"Jb": ["OP_J", "SZ_B"],
"R": ["OP_R", "SZ_RDQ"],
"Rv": ["OP_R", "SZ_V"],
"C": ["OP_C", "SZ_NA"],
"D": ["OP_D", "SZ_NA"],
"S": ["OP_S", "SZ_W"],
"Ob": ["OP_O", "SZ_B"],
"Ow": ["OP_O", "SZ_W"],
"Ov": ["OP_O", "SZ_V"],
"U": ["OP_U", "SZ_O"],
"Ux": ["OP_U", "SZ_X"],
"V": ["OP_V", "SZ_DQ"],
"Vdq": ["OP_V", "SZ_DQ"],
"Vqq": ["OP_V", "SZ_QQ"],
"Vsd": ["OP_V", "SZ_Q"],
"Vx": ["OP_V", "SZ_X"],
"HRv": ["OP_HR", "SZ_V"],
"HRd": ["OP_HR", "SZ_D"],
"HRq": ["OP_HR", "SZ_Q"],
"H": ["OP_H", "SZ_X"],
"Hx": ["OP_H", "SZ_X"],
"Hdq": ["OP_H", "SZ_DQ"],
"Hqq": ["OP_H", "SZ_QQ"],
"W": ["OP_W", "SZ_DQ"],
"Wdq": ["OP_W", "SZ_DQ"],
"Wqq": ["OP_W", "SZ_QQ"],
"Wsd": ["OP_W", "SZ_Q"],
"Wx": ["OP_W", "SZ_X"],
"L": ["OP_L", "SZ_O"],
"Lx": ["OP_L", "SZ_X"],
"Ldq": ["OP_L", "SZ_DQ"],
"Lqq": ["OP_L", "SZ_QQ"],
"MbU": ["OP_MU", "SZ_BO"],
"MwU": ["OP_MU", "SZ_WO"],
"MdU": ["OP_MU", "SZ_DO"],
"MqU": ["OP_MU", "SZ_QO"],
"MdqU": ["OP_MU", "SZ_DQO"],
"MqqU": ["OP_MU", "SZ_QQO"],
"XSd": ["OP_XS", "SZ_D"],
"XSq": ["OP_XS", "SZ_Q"],
"XSXd": ["OP_XSX", "SZ_D"],
"XSXq": ["OP_XSX", "SZ_Q"],
"XSYd": ["OP_XSY", "SZ_D"],
"XSYq": ["OP_XSY", "SZ_Q"],
"N": ["OP_N", "SZ_Q"],
"P": ["OP_P", "SZ_Q"],
"Q": ["OP_Q", "SZ_Q"],
"AL": ["OP_AL", "SZ_B"],
"AX": ["OP_AX", "SZ_W"],
"eAX": ["OP_eAX", "SZ_Z"],
"rAX": ["OP_rAX", "SZ_V"],
"CL": ["OP_CL", "SZ_B"],
"CX": ["OP_CX", "SZ_W"],
"eCX": ["OP_eCX", "SZ_Z"],
"rCX": ["OP_rCX", "SZ_V"],
"DL": ["OP_DL", "SZ_B"],
"DX": ["OP_DX", "SZ_W"],
"eDX": ["OP_eDX", "SZ_Z"],
"rDX": ["OP_rDX", "SZ_V"],
"R0b": ["OP_R0", "SZ_B"],
"R1b": ["OP_R1", "SZ_B"],
"R2b": ["OP_R2", "SZ_B"],
"R3b": ["OP_R3", "SZ_B"],
"R4b": ["OP_R4", "SZ_B"],
"R5b": ["OP_R5", "SZ_B"],
"R6b": ["OP_R6", "SZ_B"],
"R7b": ["OP_R7", "SZ_B"],
"R0w": ["OP_R0", "SZ_W"],
"R1w": ["OP_R1", "SZ_W"],
"R2w": ["OP_R2", "SZ_W"],
"R3w": ["OP_R3", "SZ_W"],
"R4w": ["OP_R4", "SZ_W"],
"R5w": ["OP_R5", "SZ_W"],
"R6w": ["OP_R6", "SZ_W"],
"R7w": ["OP_R7", "SZ_W"],
"R0v": ["OP_R0", "SZ_V"],
"R1v": ["OP_R1", "SZ_V"],
"R2v": ["OP_R2", "SZ_V"],
"R3v": ["OP_R3", "SZ_V"],
"R4v": ["OP_R4", "SZ_V"],
"R5v": ["OP_R5", "SZ_V"],
"R6v": ["OP_R6", "SZ_V"],
"R7v": ["OP_R7", "SZ_V"],
"R0z": ["OP_R0", "SZ_Z"],
"R1z": ["OP_R1", "SZ_Z"],
"R2z": ["OP_R2", "SZ_Z"],
"R3z": ["OP_R3", "SZ_Z"],
"R4z": ["OP_R4", "SZ_Z"],
"R5z": ["OP_R5", "SZ_Z"],
"R6z": ["OP_R6", "SZ_Z"],
"R7z": ["OP_R7", "SZ_Z"],
"R0y": ["OP_R0", "SZ_Y"],
"R1y": ["OP_R1", "SZ_Y"],
"R2y": ["OP_R2", "SZ_Y"],
"R3y": ["OP_R3", "SZ_Y"],
"R4y": ["OP_R4", "SZ_Y"],
"R5y": ["OP_R5", "SZ_Y"],
"R6y": ["OP_R6", "SZ_Y"],
"R7y": ["OP_R7", "SZ_Y"],
"ES": ["OP_ES", "SZ_NA"],
"CS": ["OP_CS", "SZ_NA"],
"DS": ["OP_DS", "SZ_NA"],
"SS": ["OP_SS", "SZ_NA"],
"GS": ["OP_GS", "SZ_NA"],
"FS": ["OP_FS", "SZ_NA"],
"ST0": ["OP_ST0", "SZ_NA"],
"ST1": ["OP_ST1", "SZ_NA"],
"ST2": ["OP_ST2", "SZ_NA"],
"ST3": ["OP_ST3", "SZ_NA"],
"ST4": ["OP_ST4", "SZ_NA"],
"ST5": ["OP_ST5", "SZ_NA"],
"ST6": ["OP_ST6", "SZ_NA"],
"ST7": ["OP_ST7", "SZ_NA"],
"IMP_XMM0": ["OP_IMP_XMM0", "SZ_NA"],
"NONE": ["OP_NONE", "SZ_NA"],
}
AccessDict = {
"N": "UD_ACCESS_NONE",
"R": "UD_ACCESS_READ",
"W": "UD_ACCESS_WRITE",
"RW": "UD_ACCESS_READ|UD_ACCESS_WRITE",
"WR": "UD_ACCESS_READ|UD_ACCESS_WRITE",
}
# opcode prefix dictionary
PrefixDict = {
"rep": "P_str",
"repz": "P_strz",
"aso": "P_aso",
"oso": "P_oso",
"rexw": "P_rexw",
"rexb": "P_rexb",
"rexx": "P_rexx",
"rexr": "P_rexr",
"vexl": "P_vexl",
"vexw": "P_vexw",
"seg": "P_seg",
"inv64": "P_inv64",
"def64": "P_def64",
"cast": "P_cast",
}
MnemonicAliases = ("invalid", "3dnow", "none", "db", "pause")
def __init__(self, tables: UdOpcodeTables):
self.tables = tables
self._insnIndexMap, i = {}, 0
for insn in tables.get_instructions():
self._insnIndexMap[insn], i = i, i + 1
self._tableIndexMap, i = {}, 0
for table in tables.get_tables():
self._tableIndexMap[table], i = i, i + 1
def get_insn_index(self, insn: UdInsnDef) -> int:
return self._insnIndexMap[insn]
def get_table_index(self, table: UdOpcodeTable) -> int:
return self._tableIndexMap[table]
def get_table_name(self, table: UdOpcodeTable) -> str:
return f"ud_itab__{self.get_table_index(table)}"
def gen_opcode_table(self, table: UdOpcodeTable, fh: TextIO, is_global: bool = False):
"""
Emit Opcode Table in C.
"""
fh.write("\n")
if not is_global:
fh.write('static ')
fh.write(f"const uint16_t {self.get_table_name(table)}[] = {{\n")
limit = 0
for i in range(table.size()):
if i > 0 and i % 4 == 0:
fh.write("\n")
if i % 4 == 0:
fh.write(f" /* {i:2x} */")
e = table.get_entry(i)
if e is None:
fh.write(f"{'INVALID':>12},")
limit += 1
elif isinstance(e, UdOpcodeTable):
fh.write(f"{f'GROUP({self.get_table_index(e)})':>12},")
limit += 1
elif isinstance(e, UdInsnDef):
fh.write(f"{self.get_insn_index(e):>12},")
limit += 1
table.set_limit(limit - 1)
fh.write("\n")
fh.write("};\n")
def gen_opcode_tables(self, fh: TextIO):
tables = self.tables.get_tables()
for table in tables:
self.gen_opcode_table(table, fh, table is self.tables.root)
def gen_opcode_tables_lookup_index(self, fh: TextIO):
fh.write("\n\n")
fh.write("struct ud_lookup_table_list_entry ud_lookup_table_list[] = {\n")
for table in self.tables.get_tables():
if table.limit() > 255:
print(f"error: invalid table limit: {table.limit()} \n")
fh.write(f' /* {self.get_table_index(table):03d} */ '
f'{{ {self.get_table_name(table)}, {table.label()}, "{table.meta()}", {table.limit()} }},\n')
fh.write("};")
def gen_insn_table(self, fh: TextIO):
fh.write("struct ud_itab_entry ud_itab[] = {\n")
for insn in self.tables.get_instructions():
opr_c = ["O_NONE", "O_NONE", "O_NONE", "O_NONE"]
acc_c = ["UD_ACCESS_NONE", "UD_ACCESS_NONE", "UD_ACCESS_NONE", "UD_ACCESS_NONE"]
pfx_c = []
opr = insn.operands
for i in range(len(opr)):
if not (opr[i] in self.OperandDict.keys()):
print("error: invalid operand declaration: %s\n" % opr[i])
opr_c[i] = "O_" + opr[i]
opr = f"{opr_c[0]}, {opr_c[1]}, {opr_c[2]}, {opr_c[3]}"
op1_access = "UD_OP_ACCESS_READ"
op2_access = "UD_OP_ACCESS_READ"
if insn.firstOpAccess == "W":
op1_access = "UD_OP_ACCESS_WRITE"
elif insn.firstOpAccess == "RW":
op1_access = "UD_OP_ACCESS_READ | UD_OP_ACCESS_WRITE"
if insn.secondOpAccess == "W":
op2_access = "UD_OP_ACCESS_WRITE"
elif insn.secondOpAccess == "RW":
op2_access = "UD_OP_ACCESS_READ | UD_OP_ACCESS_WRITE"
acc = insn.access
for i in range(len(acc)):
if not (acc[i] in self.AccessDict.keys()):
print("error: invalid operand declaration: %s\n" % acc[i])
acc_c[i] = self.AccessDict[acc[i]]
acc = f"{acc_c[0]}, {acc_c[1]}, {acc_c[2]}, {acc_c[3]}"
for p in insn.prefixes:
if p not in self.PrefixDict.keys():
print(f"error: invalid prefix specification: {p} \n")
pfx_c.append(self.PrefixDict[p])
if len(insn.prefixes) == 0:
pfx_c.append("P_none")
pfx = "|".join(pfx_c)
flag_map = {
'_': 'UD_FLAG_UNCHANGED',
'T': 'UD_FLAG_TESTED',
'M': 'UD_FLAG_MODIFIED',
'R': 'UD_FLAG_RESET',
'S': 'UD_FLAG_SET',
'U': 'UD_FLAG_UNDEFINED',
'P': 'UD_FLAG_PRIOR'
}
eflags = ", ".join(map(lambda f: flag_map[f], [flag for flag in insn.eflags]))
implicit_uses = ", ".join(map(lambda r: "UD_R_" + r.upper(), insn.implicitRegUse))
implicit_defs = ", ".join(map(lambda r: "UD_R_" + r.upper(), insn.implicitRegDef))
if len(implicit_uses) > 0:
implicit_uses += ", "
if len(implicit_defs) > 0:
implicit_defs += ", "
implicit_uses += "UD_NONE"
implicit_defs += "UD_NONE"
fh.write(" /* %04d */ { UD_I%s, %s, %s, %s, %s, { {%s} }, {%s}, {%s}, %s },\n" % (
self.get_insn_index(insn), insn.mnemonic, opr, op1_access, op2_access, pfx, eflags,
implicit_uses, implicit_defs, acc
))
fh.write("};\n")
def get_mnemonics(self) -> List[str]:
mnemonics = self.tables.get_mnemonics()
mnemonics.extend(self.MnemonicAliases)
return mnemonics
def gen_mnemonics_list(self, fh: TextIO):
fh.write("\n\n")
fh.write("const char* ud_mnemonics_str[] = {\n ")
fh.write(",\n ".join([f'"{m}"' for m in self.get_mnemonics()]))
fh.write("\n};\n")
def generate_itab_header(self, file_path: str):
with open(file_path, "w") as fh:
# Generate Table Type Enumeration
fh.write("#ifndef UD_ITAB_H\n")
fh.write("#define UD_ITAB_H\n\n")
fh.write("/* itab.h -- generated by udis86:scripts/ud_itab.py, do no edit */\n\n")
# table type enumeration
fh.write("/* ud_table_type -- lookup table types (see decode.c) */\n")
fh.write("enum ud_table_type {\n ")
enum = UdOpcodeTable.get_labels()
fh.write(",\n ".join(enum))
fh.write("\n};\n\n")
# mnemonic enumeration
fh.write("/* ud_mnemonic -- mnemonic constants */\n")
enum = "enum ud_mnemonic_code {\n "
enum += ",\n ".join([f"UD_I{m}" for m in self.get_mnemonics()])
enum += ",\n UD_MAX_MNEMONIC_CODE"
enum += "\n} UD_ATTR_PACKED;\n"
fh.write(enum)
fh.write("\n")
fh.write("extern const char * ud_mnemonics_str[];\n")
fh.write("\n#endif /* UD_ITAB_H */\n")
def generate_itab_source(self, file_path: str):
with open(file_path, "w") as fh:
fh.write("/* itab.c -- generated by udis86:scripts/ud_itab.py, do no edit */\n")
fh.write("#include \"decode.h\"\n\n")
fh.write("#define GROUP(n) (0x8000 | (n))\n")
fh.write(f"#define INVALID {self.get_insn_index(self.tables.invalidInsn)}\n\n")
self.gen_opcode_tables(fh)
self.gen_opcode_tables_lookup_index(fh)
# Macros defining short-names for operands
fh.write("\n\n/* itab entry operand definitions (for readability) */\n")
for o in sorted(self.OperandDict.keys()):
fh.write(f"#define O_{o:<7} {{ {self.OperandDict[o][0] + ',':<12} {self.OperandDict[o][1]:<8} }}\n")
fh.write("\n")
self.gen_insn_table(fh)
self.gen_mnemonics_list(fh)
def generate_itab_files(self, location: str):
self.generate_itab_source(os.path.join(location, "itab.c"))
self.generate_itab_header(os.path.join(location, "itab.h"))
def usage():
print("usage: ud_itab.py <optable.xml> <output-path>")
def main():
if len(sys.argv) != 3:
usage()
sys.exit(1)
tables = UdOpcodeTables(xml=sys.argv[1])
itab = UdItabGenerator(tables)
itab.generate_itab_files(sys.argv[2])
if __name__ == '__main__':
main()