From 6eecd3d61ff248964fcf6d54b47edabebe654dc3 Mon Sep 17 00:00:00 2001 From: vincent <vincent@trmlabs.com> Date: Thu, 2 Feb 2023 17:48:52 +0100 Subject: [PATCH] WIP --- symbolic_execution/instruction/exceptions.py | 8 + symbolic_execution/instruction/general.py | 93 ++++ symbolic_execution/instruction/interfaces.py | 10 +- symbolic_execution/instruction/stack_based.py | 66 ++- symbolic_execution/language/bitcoin/helper.py | 19 - .../language/bitcoin/op_codes.py | 471 +++++++++++++----- symbolic_execution/language/bitcoin/parser.py | 104 +++- 7 files changed, 597 insertions(+), 174 deletions(-) create mode 100644 symbolic_execution/instruction/general.py delete mode 100644 symbolic_execution/language/bitcoin/helper.py diff --git a/symbolic_execution/instruction/exceptions.py b/symbolic_execution/instruction/exceptions.py index 3fe648e..bb6a5a7 100644 --- a/symbolic_execution/instruction/exceptions.py +++ b/symbolic_execution/instruction/exceptions.py @@ -5,3 +5,11 @@ class InstructionInitializationError(Exception): """Raised by the constructor when an instruction failed to initialize.""" pass + + +class DisabledInstructionError(Exception): + """Raised when attempting to init. a disabled instruction.""" + + def __init__(self, message: str): + """Init the exception and specify the location of the instruction.""" + super().__init__(message) diff --git a/symbolic_execution/instruction/general.py b/symbolic_execution/instruction/general.py new file mode 100644 index 0000000..699e0e5 --- /dev/null +++ b/symbolic_execution/instruction/general.py @@ -0,0 +1,93 @@ +"""Instructions for a stack-based machine.""" +from enum import Enum +from typing import List + +from .exceptions import ( + DisabledInstructionError, InstructionInitializationError +) +from .interfaces import IInstruction + +from symbolic_execution.machines.interfaces import IMachine + + +class InstructionTag(Enum): + """Reason why an instruction is not implemented.""" + + NOT_IMPLEMENTED = 0 + RESERVED = 1 + DISABLED = 2 + + def __str__(self): + """Return the name in lowercase.""" + return f'{self.name}'.lower() + + +class NOP(IInstruction): + """Instruction that does nothing.""" + + def __init__(self, pc: int): + """Initialize a the instruction.""" + super().__init__(pc) + + def run(self, machine: IMachine): + """Run the operator on any machine.""" + super().run(machine) + + def __repr__(self) -> str: + """Return the string representation of the instruction.""" + return f"{super().__repr__()}({self._destination_pcs[0]})" + + +class UnconditionalJump(IInstruction): + """Instruction that modifies the pc.""" + + def __init__(self, pc: int, destination_pcs: List[int], **args): + """Initialize a the instruction.""" + super().__init__(pc, **args) + + if len(destination_pcs) != 1: + raise InstructionInitializationError( + f"""{self.__class__.__name__}: Expects exactly 1 possible + destination pc, while the argument passed + is {destination_pcs}""" + ) + + self._destination_pcs = destination_pcs + + def run(self, machine: IMachine): + """Run the operator on any machine.""" + super().run(machine) + + machine.pc = self._destination_pcs[0] + + def __repr__(self) -> str: + """Return the string representation of the instruction.""" + return f"{super().__repr__()}({self._destination_pcs[0]})" + + +class InvalidInstruction(IInstruction): + """Instruction that raises an exception when run. + + Raise a `DisabledInstructionError` exception. + """ + + def __init__(self, + pc: int, + token: str, + tag: InstructionTag = + InstructionTag.NOT_IMPLEMENTED, + **args): + """Init an instruction that raises an exception when run.""" + super().__init__(pc, **args) + self._token = token + self._tag = tag + + def run(self, machine: IMachine): + """Raise a DisabledInstructionError exception.""" + msg = f"""The opcode {self._token} at position {self._pc} stands \ +for a {self._tag} instruction.""" + raise DisabledInstructionError(msg) + + def __repr__(self) -> str: + """Return the string representation of the instruction.""" + return f"{super().__repr__()}({self._token})" diff --git a/symbolic_execution/instruction/interfaces.py b/symbolic_execution/instruction/interfaces.py index c35a781..ceb2d29 100644 --- a/symbolic_execution/instruction/interfaces.py +++ b/symbolic_execution/instruction/interfaces.py @@ -16,7 +16,7 @@ class IInstruction(ABC): """ return self._pc - def __init__(self, pc: int): + def __init__(self, pc: int, **args): """Initialize an instruction for a language. In particular, the instruction is associated with an byte. @@ -28,6 +28,10 @@ class IInstruction(ABC): """Run the instruction on a machine.""" machine.pc = machine.pc + 1 + def __repr__(self) -> str: + """Return the string representation of the instruction.""" + return f"{self.pc}: {self.__class__.__name__}" + class InstructionBlock(): """Represent a block of consecutive instructions. @@ -71,6 +75,10 @@ class InstructionBlock(): """Set the program counter of this block.""" self.pc = pc + def set_instructions(self, instructions: List[IInstruction]): + """Set the instructions for this block.""" + self.instructions = instructions + def __str__(self, tab=''): """Return the string representation of the block and its children.""" s = self.__repr__() + '\n' diff --git a/symbolic_execution/instruction/stack_based.py b/symbolic_execution/instruction/stack_based.py index 7c0be04..6cc477f 100644 --- a/symbolic_execution/instruction/stack_based.py +++ b/symbolic_execution/instruction/stack_based.py @@ -1,5 +1,5 @@ """Instructions for a stack-based machine.""" -from typing import Any +from typing import Any, List from .exceptions import ( InstructionInitializationError ) @@ -12,13 +12,13 @@ from symbolic_execution.machines.interfaces import IStackBasedMachine class ArithmeticOperator(IInstruction): """IInstruction that applies an arithmetic operator on a stack.""" - def __init__(self, pc: int, operator: str, arity: int): + def __init__(self, pc: int, operator: str, arity: int, **args): """Initialize a stack-based instruction for a language. - In particular, the instruction is associated with an pc, an arity + In particular, the instruction is associated with a pc, an arity and an operator to apply on the operands. """ - super().__init__(pc) + super().__init__(pc, **args) # Arity initialization if arity < 0: @@ -50,19 +50,27 @@ class ArithmeticOperator(IInstruction): machine.push_onto_stack(OPS[self._operator](*ops)) - machine.pc = machine.pc + 1 + def __repr__(self) -> str: + """Return the string representation of the instruction.""" + return f"{super().__repr__()}({self._operator})" class PushDataOperator(IInstruction): """IInstruction that pushes some data on the stack.""" - def __init__(self, pc: int, data: Any): + def __init__(self, pc: int, data: str, **args): """Initialize a stack-based instruction for a language. - In particular, the instruction is associated with an pc and some data + In particular, the instruction is associated with a pc and some data to push onto the stack. """ - super().__init__(pc) + super().__init__(pc, **args) + + if not isinstance(data, str): + raise InstructionInitializationError( + f"""{self.__class__.__name__}: The data must be a string, but + is of type {type(data)}.""" + ) self._data = data def run(self, machine: IStackBasedMachine): @@ -70,3 +78,45 @@ class PushDataOperator(IInstruction): super().run(machine) machine.push_onto_stack(self._data) + + def __repr__(self) -> str: + """Return the string representation of the instruction.""" + return f"{super().__repr__()}({self._data})" + + +class ConditionalJump(IInstruction): + """Jump instruction based on the top stack item. + + It pops and evaluates the top element on the stack. + """ + + def __init__(self, pc: int, destination_pcs: List[int], **args): + """Initialize the instruction. + + In particular, the instruction is associated with a pc and 2 possible + destinations for the jump instructioN. + """ + super().__init__(pc, **args) + + if len(destination_pcs) != 2: + raise InstructionInitializationError( + f"""{self.__class__.__name__}: Expects exactly 2 possible + destination pcs, while the argument passed + is {destination_pcs}""" + ) + self._destination_pcs = destination_pcs + + def run(self, machine: IStackBasedMachine): + """Push {self._data} on the stack.""" + super().run(machine) + + condition = machine.pop_from_stack() + + if machine.is_true(condition): + machine.pc = self._destination_pcs[0] + else: + machine.pc = self._destination_pcs[1] + + def __repr__(self) -> str: + """Return the string representation of the instruction.""" + return f"{super().__repr__()}([{self._destination_pcs}])" diff --git a/symbolic_execution/language/bitcoin/helper.py b/symbolic_execution/language/bitcoin/helper.py deleted file mode 100644 index f01791e..0000000 --- a/symbolic_execution/language/bitcoin/helper.py +++ /dev/null @@ -1,19 +0,0 @@ -"""A few helper functions.""" -from .op_codes import OpCodes, OpCode - - -def is_branching(op_code: OpCode): - """Return whether an op code is a branching operator or not. - - A branching operator is an operator which mights modify the execution flow. - """ - return op_code.branching - - -def non_branching_tokens(): - """Return the tokens for the opcodes unrelated to control flow.""" - tokens = [] - for op_code in OpCodes.d.values(): - if (not is_branching(op_code)): - tokens.append(op_code.name) - return tokens diff --git a/symbolic_execution/language/bitcoin/op_codes.py b/symbolic_execution/language/bitcoin/op_codes.py index 3c294b5..ba7b937 100644 --- a/symbolic_execution/language/bitcoin/op_codes.py +++ b/symbolic_execution/language/bitcoin/op_codes.py @@ -1,26 +1,27 @@ """Code by Vincent Jacquot - University of Liège. - -Imported from source code of bitcoin core: -https://github.com/bitcoin/bitcoin/blob/v0.14.0/src/script/script.h#L46L187 -// Copyright (c) 2009-2010 Satoshi Nakamoto -// Copyright (c) 2009-2016 The Bitcoin Core developers -// Distributed under the MIT software license see the accompanying -// file COPYING or http://www.opensource.org/licenses/mit-license.php. """ -from typing import Callable, Dict +from typing import Any, Callable, Dict +from symbolic_execution.instruction.general import ( + NOP, InvalidInstruction, InstructionTag, UnconditionalJump +) from symbolic_execution.instruction.interfaces import IInstruction +from symbolic_execution.instruction.stack_based import ( + ArithmeticOperator, + ConditionalJump, + PushDataOperator +) class OpCode(): """Represent an opcode in a language.""" def __init__(self, - byte: int, - name: str, + token: str, instruction_constructor: Callable[[ Dict[str, Any]], IInstruction] = None, branching: bool = False, + default_args: dict = None): """Initialize an opcode. In particular, @@ -30,156 +31,352 @@ class OpCode(): instruction. * {branching} is a boolean indicating if the opcode is a branching operator. + * {default_args} the args to be passed along with any call to + {instruction_constructor} """ - self.byte = byte - self.name = name + self.token = token self.instruction_constructor = instruction_constructor self.branching = branching + self.default_args = default_args if default_args else {} class OpCodes(): """Different types of scripts symbols for BTC.""" - __op_codes = [ - OpCode(0, "OP_FALSE", None), - - OpCode(0x4c, "OP_PUSHDATA1", None), - OpCode(0x4d, "OP_PUSHDATA2", None), - OpCode(0x4e, "OP_PUSHDATA4", None), - OpCode(0x4f, "OP_1NEGATE", None), - OpCode(0x50, "OP_RESERVED", None), - OpCode(0x51, "OP_1", None), - OpCode(0x52, "OP_2", None), - OpCode(0x53, "OP_3", None), - OpCode(0x54, "OP_4", None), - OpCode(0x55, "OP_5", None), - OpCode(0x56, "OP_6", None), - OpCode(0x57, "OP_7", None), - OpCode(0x58, "OP_8", None), - OpCode(0x59, "OP_9", None), - OpCode(0x5a, "OP_10", None), - OpCode(0x5b, "OP_11", None), - OpCode(0x5c, "OP_12", None), - OpCode(0x5d, "OP_13", None), - OpCode(0x5e, "OP_14", None), - OpCode(0x5f, "OP_15", None), - OpCode(0x60, "OP_16", None), + __int_to_tokens = { + 0: "OP_FALSE", + 0x4f: "OP_1NEGATE", + 0x50: "OP_RESERVED", + **{0x50 + i: f"OP_{i}" for i in range(1, 17)}, + 0x62: "OP_VER", + 0x63: "OP_IF", + 0x64: "OP_NOTIF", + 0x65: "OP_VERIF", + 0x66: "OP_VERNOTIF", + 0x67: "OP_ELSE", + 0x68: "OP_ENDIF", + 0x69: "OP_VERIFY", + 0x6a: "OP_RETURN", + 0x6b: "OP_TOALTSTACK", + 0x6c: "OP_FROMALTSTACK", + 0x6d: "OP_2DROP", + 0x6e: "OP_2DUP", + 0x6f: "OP_3DUP", + 0x70: "OP_2OVER", + 0x71: "OP_2ROT", + 0x72: "OP_2SWAP", + 0x73: "OP_IFDUP", + 0x74: "OP_DEPTH", + 0x75: "OP_DROP", + 0x76: "OP_DUP", + 0x77: "OP_NIP", + 0x78: "OP_OVER", + 0x79: "OP_PICK", + 0x7a: "OP_ROLL", + 0x7b: "OP_ROT", + 0x7c: "OP_SWAP", + 0x7d: "OP_TUCK", + 0x7e: "OP_CAT", + 0x7f: "OP_SUBSTR", + 0x80: "OP_LEFT", + 0x81: "OP_RIGHT", + 0x82: "OP_SIZE", + 0x83: "OP_INVERT", + 0x84: "OP_AND", + 0x85: "OP_OR", + 0x86: "OP_XOR", + 0x87: "OP_EQUAL", + 0x88: "OP_EQUAL OP_VERIFY", + 0x89: "OP_RESERVED1", + 0x8a: "OP_RESERVED2", + 0x8b: "OP_1ADD", + 0x8c: "OP_1SUB", + 0x8d: "OP_2MUL", + 0x8e: "OP_2DIV", + 0x8f: "OP_NEGATE", + 0x90: "OP_ABS", + 0x91: "OP_NOT", + 0x92: "OP_0NOTEQUAL", + 0x93: "OP_ADD", + 0x94: "OP_SUB", + 0x95: "OP_MUL", + 0x96: "OP_DIV", + 0x97: "OP_MOD", + 0x98: "OP_LSHIFT", + 0x99: "OP_RSHIFT", + 0x9a: "OP_BOOLAND", + 0x9b: "OP_BOOLOR", + 0x9c: "OP_NUMEQUAL", + 0x9d: "OP_NUMEQUAL OP_VERIFY", + 0x9e: "OP_NUMNOTEQUAL", + 0x9f: "OP_LESSTHAN", + 0xa0: "OP_GREATERTHAN", + 0xa1: "OP_LESSTHANOREQUAL", + 0xa2: "OP_GREATERTHANOREQUAL", + 0xa3: "OP_MIN", + 0xa4: "OP_MAX", + 0xa5: "OP_WITHIN", + 0xa6: "OP_RIPEMD160", + 0xa7: "OP_SHA1", + 0xa8: "OP_SHA256", + 0xa9: "OP_HASH160", + 0xaa: "OP_HASH256", + 0xab: "OP_CODESEPARATOR", + 0xac: "OP_CHECKSIG", + 0xad: "OP_CHECKSIG OP_VERIFY", + 0xae: "OP_CHECKMULTISIG", + 0xaf: "OP_CHECKMULTISIG OP_VERIFY", + 0xb0: "OP_NOP1", + 0xb1: "OP_CHECKLOCKTIME OP_VERIFY", + 0xb2: "OP_CHECKSEQUENCE OP_VERIFY", + 0xb3: "OP_NOP4", + 0xb4: "OP_NOP5", + 0xb5: "OP_NOP6", + 0xb6: "OP_NOP7", + 0xb7: "OP_NOP8", + 0xb8: "OP_NOP9", + 0xb9: "OP_NOP10", + 0xfa: "OP_SMALLINTEGER", + 0xfb: "OP_PUBKEYS", + 0xfd: "OP_PUBKEYHASH", + 0xfe: "OP_PUBKEY", + 0xff: "OP_INVALIDOPCODE", + } + __token_to_op_codes = { + "OP_FALSE": OpCode("OP_FALSE", + instruction_constructor=PushDataOperator, + default_args={ + "data": "0" + }), + "OP_PUSHDATA1": OpCode("OP_PUSHDATA1", + instruction_constructor=InvalidInstruction, + default_args={ + "token": "OP_PUSHDATA1", + }), + "OP_PUSHDATA2": OpCode("OP_PUSHDATA2", + instruction_constructor=InvalidInstruction, + default_args={ + "token": "OP_PUSHDATA2", + }), + "OP_PUSHDATA4": OpCode("OP_PUSHDATA4", + instruction_constructor=InvalidInstruction, + default_args={ + "token": "OP_PUSHDATA4", + }), + "OP_1NEGATE": OpCode("OP_1NEGATE", + instruction_constructor=PushDataOperator, + default_args={ + "data": "-1" + }), + "OP_RESERVED": OpCode("OP_RESERVED", + instruction_constructor=InvalidInstruction, + default_args={ + "token": "OP_RESERVED", + "tag": InstructionTag.RESERVED, + }), + **{ + "OP_{i}": OpCode(f"OP_{i}", + instruction_constructor=PushDataOperator, + default_args={ + "data": str(i) + }) + for i in range(1, 17) + }, # control - OpCode(0x61, "OP_NOP", None), - OpCode(0x62, "OP_VER", None), - OpCode(0x63, "OP_IF", None, branching=True), - OpCode(0x64, "OP_NOTIF", None, branching=True), - OpCode(0x65, "OP_VERIF", None), - OpCode(0x66, "OP_VERNOTIF", None), - OpCode(0x67, "OP_ELSE", None, branching=True), - OpCode(0x68, "OP_ENDIF", None, branching=True), - OpCode(0x69, "OP_VERIFY", None), - OpCode(0x6a, "OP_RETURN", None), + "OP_NOP": OpCode("OP_NOP", instruction_constructor=NOP), + "OP_VER": OpCode("OP_VER", + instruction_constructor=InvalidInstruction, + default_args={ + "token": "OP_VER", + "tag": InstructionTag.RESERVED, + }), + "OP_IF": OpCode("OP_IF", branching=True, + instruction_constructor=ConditionalJump), + "OP_NOTIF": OpCode("OP_NOTIF", branching=True, + instruction_constructor=ConditionalJump), + "OP_VERIF": OpCode("OP_VERIF", + instruction_constructor=InvalidInstruction, + default_args={ + "token": "OP_VERIF", + "tag": InstructionTag.RESERVED, + }), + "OP_VERNOTIF": OpCode("OP_VERNOTIF", + instruction_constructor=InvalidInstruction, + default_args={ + "token": "OP_VERNOTIF", + "tag": InstructionTag.RESERVED, + }), + # Removed during second pass + "OP_ELSE": OpCode("OP_ELSE", branching=True, + instruction_constructor=InvalidInstruction, + default_args={ + "token": "OP_ELSE", + }), + # Removed during second pass + "OP_ENDIF": OpCode("OP_ENDIF", branching=True, + instruction_constructor=InvalidInstruction, + default_args={ + "token": "OP_ENDIF", + }), + "OP_VERIFY": OpCode("OP_VERIFY", None), + "OP_RETURN": OpCode("OP_RETURN", None), # stack ops - OpCode(0x6b, "OP_TOALTSTACK", None), - OpCode(0x6c, "OP_FROMALTSTACK", None), - OpCode(0x6d, "OP_2DROP", None), - OpCode(0x6e, "OP_2DUP", None), - OpCode(0x6f, "OP_3DUP", None), - OpCode(0x70, "OP_2OVER", None), - OpCode(0x71, "OP_2ROT", None), - OpCode(0x72, "OP_2SWAP", None), - OpCode(0x73, "OP_IFDUP", None), - OpCode(0x74, "OP_DEPTH", None), - OpCode(0x75, "OP_DROP", None), - OpCode(0x76, "OP_DUP", None), - OpCode(0x77, "OP_NIP", None), - OpCode(0x78, "OP_OVER", None), - OpCode(0x79, "OP_PICK", None), - OpCode(0x7a, "OP_ROLL", None), - OpCode(0x7b, "OP_ROT", None), - OpCode(0x7c, "OP_SWAP", None), - OpCode(0x7d, "OP_TUCK", None), + "OP_TOALTSTACK": OpCode("OP_TOALTSTACK", None), + "OP_FROMALTSTACK": OpCode("OP_FROMALTSTACK", None), + "OP_2DROP": OpCode("OP_2DROP", None), + "OP_2DUP": OpCode("OP_2DUP", None), + "OP_3DUP": OpCode("OP_3DUP", None), + "OP_2OVER": OpCode("OP_2OVER", None), + "OP_2ROT": OpCode("OP_2ROT", None), + "OP_2SWAP": OpCode("OP_2SWAP", None), + "OP_IFDUP": OpCode("OP_IFDUP", None), + "OP_DEPTH": OpCode("OP_DEPTH", None), + "OP_DROP": OpCode("OP_DROP", None), + "OP_DUP": OpCode("OP_DUP", None), + "OP_NIP": OpCode("OP_NIP", None), + "OP_OVER": OpCode("OP_OVER", None), + "OP_PICK": OpCode("OP_PICK", None), + "OP_ROLL": OpCode("OP_ROLL", None), + "OP_ROT": OpCode("OP_ROT", None), + "OP_SWAP": OpCode("OP_SWAP", None), + "OP_TUCK": OpCode("OP_TUCK", None), # splice ops - OpCode(0x7e, "OP_CAT", None), - OpCode(0x7f, "OP_SUBSTR", None), - OpCode(0x80, "OP_LEFT", None), - OpCode(0x81, "OP_RIGHT", None), - OpCode(0x82, "OP_SIZE", None), + "OP_CAT": OpCode("OP_CAT", None), # Disabled + "OP_SUBSTR": OpCode("OP_SUBSTR", None), # Disabled + "OP_LEFT": OpCode("OP_LEFT", None), # Disabled + "OP_RIGHT": OpCode("OP_RIGHT", None), # Disabled + "OP_SIZE": OpCode("OP_SIZE", None), # bit logic - OpCode(0x83, "OP_INVERT", None), - OpCode(0x84, "OP_AND", None), - OpCode(0x85, "OP_OR", None), - OpCode(0x86, "OP_XOR", None), - OpCode(0x87, "OP_EQUAL", None), - OpCode(0x88, "OP_EQUALVERIFY", None), - OpCode(0x89, "OP_RESERVED1", None), - OpCode(0x8a, "OP_RESERVED2", None), + "OP_INVERT": OpCode("OP_INVERT", None), + "OP_AND": OpCode("OP_AND", None), + "OP_OR": OpCode("OP_OR", None), + "OP_XOR": OpCode("OP_XOR", None), + "OP_EQUAL": OpCode("OP_EQUAL", None), + "OP_RESERVED1": OpCode("OP_RESERVED1", None), + "OP_RESERVED2": OpCode("OP_RESERVED2", None), # numeric - OpCode(0x8b, "OP_1ADD", None), - OpCode(0x8c, "OP_1SUB", None), - OpCode(0x8d, "OP_2MUL", None), - OpCode(0x8e, "OP_2DIV", None), - OpCode(0x8f, "OP_NEGATE", None), - OpCode(0x90, "OP_ABS", None), - OpCode(0x91, "OP_NOT", None), - OpCode(0x92, "OP_0NOTEQUAL", None), - - OpCode(0x93, "OP_ADD", None), - OpCode(0x94, "OP_SUB", None), - OpCode(0x95, "OP_MUL", None), - OpCode(0x96, "OP_DIV", None), - OpCode(0x97, "OP_MOD", None), - OpCode(0x98, "OP_LSHIFT", None), - OpCode(0x99, "OP_RSHIFT", None), - - OpCode(0x9a, "OP_BOOLAND", None), - OpCode(0x9b, "OP_BOOLOR", None), - OpCode(0x9c, "OP_NUMEQUAL", None), - OpCode(0x9d, "OP_NUMEQUALVERIFY", None), - OpCode(0x9e, "OP_NUMNOTEQUAL", None), - OpCode(0x9f, "OP_LESSTHAN", None), - OpCode(0xa0, "OP_GREATERTHAN", None), - OpCode(0xa1, "OP_LESSTHANOREQUAL", None), - OpCode(0xa2, "OP_GREATERTHANOREQUAL", None), - OpCode(0xa3, "OP_MIN", None), - OpCode(0xa4, "OP_MAX", None), - - OpCode(0xa5, "OP_WITHIN", None), + "OP_1ADD": OpCode("OP_1ADD", None), + "OP_1SUB": OpCode("OP_1SUB", None), + "OP_2MUL": OpCode("OP_2MUL", None), + "OP_2DIV": OpCode("OP_2DIV", None), + "OP_NEGATE": OpCode("OP_NEGATE", None), + "OP_ABS": OpCode("OP_ABS", None), + "OP_NOT": OpCode("OP_NOT", None), + "OP_0NOTEQUAL": OpCode("OP_0NOTEQUAL", None), + + "OP_ADD": OpCode("OP_ADD", + instruction_constructor=ArithmeticOperator, + default_args={ + "arity": 2, + "operator": '+' + }), + "OP_SUB": OpCode("OP_SUB", + instruction_constructor=ArithmeticOperator, + default_args={ + "arity": 2, + "operator": '-' + }), + "OP_MUL": OpCode("OP_MUL", + instruction_constructor=ArithmeticOperator, + default_args={ + "arity": 2, + "operator": '*' + }), + "OP_DIV": OpCode("OP_DIV", + instruction_constructor=ArithmeticOperator, + default_args={ + "arity": 2, + "operator": '/' + }), + "OP_MOD": OpCode("OP_MOD", + instruction_constructor=ArithmeticOperator, + default_args={ + "arity": 2, + "operator": '%' + }), + "OP_LSHIFT": OpCode("OP_LSHIFT", + instruction_constructor=InvalidInstruction, + default_args={ + "token": "OP_LSHIFT", + "tag": InstructionTag.DISABLED, + }), + "OP_RSHIFT": OpCode("OP_RSHIFT", None), # Disabled + + "OP_BOOLAND": OpCode("OP_BOOLAND", None), + "OP_BOOLOR": OpCode("OP_BOOLOR", None), + "OP_NUMEQUAL": OpCode("OP_NUMEQUAL", None), + "OP_NUMNOTEQUAL": OpCode("OP_NUMNOTEQUAL", None), + "OP_LESSTHAN": OpCode("OP_LESSTHAN", None), + "OP_GREATERTHAN": OpCode("OP_GREATERTHAN", None), + "OP_LESSTHANOREQUAL": OpCode("OP_LESSTHANOREQUAL", None), + "OP_GREATERTHANOREQUAL": OpCode("OP_GREATERTHANOREQUAL", None), + "OP_MIN": OpCode("OP_MIN", None), + "OP_MAX": OpCode("OP_MAX", None), + + "OP_WITHIN": OpCode("OP_WITHIN", None), # crypto - OpCode(0xa6, "OP_RIPEMD160", None), - OpCode(0xa7, "OP_SHA1", None), - OpCode(0xa8, "OP_SHA256", None), - OpCode(0xa9, "OP_HASH160", None), - OpCode(0xaa, "OP_HASH256", None), - OpCode(0xab, "OP_CODESEPARATOR", None), - OpCode(0xac, "OP_CHECKSIG", None), - OpCode(0xad, "OP_CHECKSIGVERIFY", None), - OpCode(0xae, "OP_CHECKMULTISIG", None), - OpCode(0xaf, "OP_CHECKMULTISIGVERIFY", None), + "OP_RIPEMD160": OpCode("OP_RIPEMD160", None), + "OP_SHA1": OpCode("OP_SHA1", None), + "OP_SHA256": OpCode("OP_SHA256", None), + "OP_HASH160": OpCode("OP_HASH160", None), + "OP_HASH256": OpCode("OP_HASH256", None), + "OP_CODESEPARATOR": OpCode("OP_CODESEPARATOR", None), + "OP_CHECKSIG": OpCode("OP_CHECKSIG", None), + "OP_CHECKMULTISIG": OpCode("OP_CHECKMULTISIG", None), # expansion - OpCode(0xb0, "OP_NOP1", None), - OpCode(0xb1, "OP_CHECKLOCKTIMEVERIFY", None), - OpCode(0xb2, "OP_CHECKSEQUENCEVERIFY", None), - OpCode(0xb3, "OP_NOP4", None), - OpCode(0xb4, "OP_NOP5", None), - OpCode(0xb5, "OP_NOP6", None), - OpCode(0xb6, "OP_NOP7", None), - OpCode(0xb7, "OP_NOP8", None), - OpCode(0xb8, "OP_NOP9", None), - OpCode(0xb9, "OP_NOP10", None), + "OP_NOP1": OpCode("OP_NOP1", None), + "OP_NOP4": OpCode("OP_NOP4", None), + "OP_NOP5": OpCode("OP_NOP5", None), + "OP_NOP6": OpCode("OP_NOP6", None), + "OP_NOP7": OpCode("OP_NOP7", None), + "OP_NOP8": OpCode("OP_NOP8", None), + "OP_NOP9": OpCode("OP_NOP9", None), + "OP_NOP10": OpCode("OP_NOP10", None), # template matching params - OpCode(0xfa, "OP_SMALLINTEGER", None), - OpCode(0xfb, "OP_PUBKEYS", None), - OpCode(0xfd, "OP_PUBKEYHASH", None), - OpCode(0xfe, "OP_PUBKEY", None), + "OP_SMALLINTEGER": OpCode("OP_SMALLINTEGER", None), + "OP_PUBKEYS": OpCode("OP_PUBKEYS", None), + "OP_PUBKEYHASH": OpCode("OP_PUBKEYHASH", None), + "OP_PUBKEY": OpCode("OP_PUBKEY", None), + + "OP_INVALIDOPCODE": OpCode("OP_INVALIDOPCODE", None), + + # /!\ Not part of BTC protocol. Custome defined operator. + # Thus, we do not need to add a integer representation for this opcode. + # No script will ever contain a JUMP opcode. + "OP_JUMP": OpCode("OP_JUMP", + instruction_constructor=UnconditionalJump, branching=True), + } + + def int_to_tokens(opcode: int) -> str: + """Return the token(s) associated to an opcode (int). + + To reduce the number of operators that need to be implemented, an + opcode can return several tokens. + + Examples: + * OpCodes._opcode_to_token(0x88) + >> "OP_EQUAL OP_VERIFY" + * OpCodes._opcode_to_token(0x9d) + >> "OP_NUMEQUAL OP_VERIFY" + """ + return OpCodes.__int_to_tokens[opcode] - OpCode(0xff, "OP_INVALIDOPCODE", None), - ] + def token_to_opcode(token: str) -> OpCode: + """Return the Opcode (object) associated to a token.""" + return OpCodes.__token_to_op_codes[token] - d = {} - for op_code in __op_codes: - d[op_code.byte] = op_code + def non_branching_tokens(): + """Return the tokens for the opcodes unrelated to control flow.""" + tokens = [] + for op_code in OpCodes.__token_to_op_codes.values(): + if (not op_code.branching): + tokens.append(op_code.token) + return tokens diff --git a/symbolic_execution/language/bitcoin/parser.py b/symbolic_execution/language/bitcoin/parser.py index 793aac8..a07d46a 100644 --- a/symbolic_execution/language/bitcoin/parser.py +++ b/symbolic_execution/language/bitcoin/parser.py @@ -2,11 +2,12 @@ from typing import List from pyparsing import (Literal, hexnums, ZeroOrMore, countedArray, Optional, Or, Word, Combine, Forward, OneOrMore, Optional, - MatchFirst) + MatchFirst, Keyword) from collections import deque +from symbolic_execution.instruction.general import UnconditionalJump from symbolic_execution.instruction.interfaces import InstructionBlock -from .helper import is_branching, non_branching_tokens +from symbolic_execution.instruction.stack_based import PushDataOperator from ..interfaces import ILanguage from .op_codes import OpCodes @@ -17,9 +18,11 @@ class BitcoinParser (ILanguage): def parse_code(self, code: str) -> List[InstructionBlock]: """Parse {code} and returns the list of instructions in the code.""" tokens = BitcoinParser.__tokenify_script(code) + print(tokens) control_flow_graph = BitcoinParser.__build_control_flow_graph(tokens) BitcoinParser.__set_block_pc(control_flow_graph) - return control_flow_graph + program = BitcoinParser.__build_instructions(control_flow_graph) + return program, control_flow_graph def __tokenify_script(script: str) -> str: """First pass on the script to produce tokens and parse the data. @@ -33,7 +36,7 @@ class BitcoinParser (ILanguage): # converted into a token. OP_CODE = Word(hexnums, exact=2) OP_CODE.addParseAction(lambda tokens: list(map( - lambda t: OpCodes.d[int(t, 16)].name, tokens))) + lambda t: OpCodes.int_to_tokens(int(t, 16)), tokens))) # CONSTANTS OP_PUSH_0 = Or([format(op, '02x') @@ -56,7 +59,12 @@ class BitcoinParser (ILanguage): def __build_control_flow_graph(tokenified_script: str): """Build the control flow graph of the script.""" # PARSER - NON_FLOW_OPCODES = Or([Word(hexnums), *non_branching_tokens()]) + NON_FLOW_OPCODES = Or( + [ + Word(hexnums), + *list(map(lambda token: Keyword(token), + OpCodes.non_branching_tokens())) + ]) CONSECUTIVE_INSTRUCTIONS = OneOrMore(NON_FLOW_OPCODES) BLOCK = Forward() @@ -71,6 +79,15 @@ class BitcoinParser (ILanguage): + Literal("OP_ENDIF") + Optional(BLOCK) ), + ( + Optional(CONSECUTIVE_INSTRUCTIONS) + + (Literal("OP_NOTIF")) + + Optional(BLOCK) + + Optional(Literal("OP_ELSE") + + Optional(BLOCK)) + + Literal("OP_ENDIF") + + Optional(BLOCK) + ), ])) # Parse actions @@ -86,18 +103,16 @@ class BitcoinParser (ILanguage): # A block takes this format: # [block_A] <IF> [block_B] [<ELSE> [block_C]] <ENDIF> [block_D] for i, token in enumerate(tokens): - if token == "OP_IF": + if token in ["OP_IF", "OP_NOTIF"]: blockA = tokens[i-1] if i-1 >= 0 else InstructionBlock() blockA.add_token(token) blockB = InstructionBlock() if isinstance( tokens[i+1], str) else tokens[i+1] - blockB.add_token("JUMP") elif token == "OP_ELSE": blockC = InstructionBlock() if isinstance( tokens[i+1], str) else tokens[i+1] - blockC.add_token("JUMP") elif token == "OP_ENDIF": blockD = tokens[i+1] if i+1 < len( @@ -114,17 +129,88 @@ class BitcoinParser (ILanguage): return parsed_code[0] def __set_block_pc(control_flow_graph: InstructionBlock): - """Set the program counters for every block of instructions.""" + """ + First pass on the control flow graph. + + * Set the program counters for every block of instructions. + * Set a JUMP op code at the end of every block if there's not + already a (conditionnal or unconditonnal) JUMP. + """ fifo_queue = deque() # used for a breadth first serach over the graph fifo_queue.append(control_flow_graph) + visited = set() + block_counter = 0 # pc of the first instruction in the current block # First pass to set the pc of the first instruction in every block while len(fifo_queue) > 0: block = fifo_queue.pop() + # Do not revisit the same block + if block in visited: + continue + + visited.add(block) block.set_pc(block_counter) for c in block.children: fifo_queue.appendleft(c) + # Add another token 'JUMP' if the block has a child and the final + # token is not a branching token + last_opcode = OpCodes.token_to_opcode( + block.tokens[-1]) if len(block.tokens) > 0 else None + branching = last_opcode.branching if last_opcode else False + if len(block.children) > 0 and not branching: + block.add_token("OP_JUMP") + block_counter += len(block.tokens) + + def __build_instructions(control_flow_graph: InstructionBlock): + """Build the instructions of the script. + + Parse the control flow graph of the script and replaces the tokens + with real instructions. + """ + fifo_queue = deque() # used for a breadth first serach over the graph + fifo_queue.append(control_flow_graph) + instructions = {} + + while len(fifo_queue) > 0: + block = fifo_queue.pop() + block_instructions = [] + + for i, token in enumerate(block.tokens): + pc = block.pc + i + + if token.startswith("OP_"): + generator = OpCodes.token_to_opcode( + token).instruction_constructor + if not generator: + raise ValueError( + f"""No generator implemented for this token: + {token}""") + + default_args = OpCodes.token_to_opcode(token).default_args + destination_pcs = list( + map(lambda c: c.pc, block.children)) + + instr = generator( + pc=pc, + **{ + **default_args, + "destination_pcs": destination_pcs + } + ) + + else: + instr = PushDataOperator(pc=pc, data=token) + + block_instructions.append(instr) + instructions[instr.pc] = instr + + block.set_instructions(block_instructions) + + for c in block.children: + fifo_queue.appendleft(c) + + return instructions -- GitLab