PyPy and conda-forge

a lot

  $ conda create -c conda-forge -n my-pypy-env pypy python=3.8
  $ conda activate my-pypy-env

  $ conda config --set channel_priority strict

import pytest
from typing import Optional, Any


class Value:
    def find(self):
        raise NotImplementedError("abstract")

    def _set_forwarded(self, value):
        raise NotImplementedError("abstract")


class Operation(Value):
    def __init__(
        self, name: str, args: list[Value]
    ):
        self.name = name
        self.args = args
        self.forwarded = None
        self.info = None

    def __repr__(self):
        return (
            f"Operation({self.name}, "
            f"{self.args}, {self.forwarded}, "
            f"{self.info})"
        )

    def find(self) -> Value:
        op = self
        while isinstance(op, Operation):
            next = op.forwarded
            if next is None:
                return op
            op = next
        return op

    def arg(self, index):
        return self.args[index].find()

    def make_equal_to(self, value: Value):
        self.find()._set_forwarded(value)

    def _set_forwarded(self, value: Value):
        self.forwarded = value


class Constant(Value):
    def __init__(self, value: Any):
        self.value = value

    def __repr__(self):
        return f"Constant({self.value})"

    def find(self):
        return self

    def _set_forwarded(self, value: Value):
        assert (
            isinstance(value, Constant)
            and value.value == self.value
        )

class Block(list):
    def opbuilder(opname):
        def wraparg(arg):
            if not isinstance(arg, Value):
                arg = Constant(arg)
            return arg
        def build(self, *args):
            # construct an Operation, wrap the
            # arguments in Constants if necessary
            op = Operation(opname,
                [wraparg(arg) for arg in args])
            # add it to self, the basic block
            self.append(op)
            return op
        return build

    # a bunch of operations we support
    add = opbuilder("add")
    mul = opbuilder("mul")
    getarg = opbuilder("getarg")
    dummy = opbuilder("dummy")
    lshift = opbuilder("lshift")
    # some new one for this post
    alloc = opbuilder("alloc")
    load = opbuilder("load")
    store = opbuilder("store")
    print = opbuilder("print")

def bb_to_str(bb: Block, varprefix: str = "var"):
    def arg_to_str(arg: Value):
        if isinstance(arg, Constant):
            return str(arg.value)
        else:
            return varnames[arg]

    varnames = {}
    res = []
    for index, op in enumerate(bb):
        var = f"{varprefix}{index}"
        varnames[op] = var
        arguments = ", ".join(
            arg_to_str(op.arg(i))
                for i in range(len(op.args))
        )
        strop = f"{var} = {op.name}({arguments})"
        res.append(strop)
    return "\n".join(res)

var0 = getarg(0)
obj0 = alloc()
store(obj0, 0, var0)
var1 = load(obj0, 0)
print(var1)
def test_interpret():
    bb = Block()
    var0 = bb.getarg(0)
    obj = bb.alloc()
    sto = bb.store(obj, 0, var0)
    var1 = bb.load(obj, 0)
    bb.print(var1)
    assert interpret(bb, 17) == 17

class Object:
    def __init__(self):
        self.contents: dict[int, Any] = {}

    def store(self, idx : int, value : Any):
        self.contents[idx] = value

    def load(self, idx : int):
        return self.contents[idx]

def get_num(op, index=1):
    assert isinstance(op.arg(index), Constant)
    return op.arg(index).value

def interpret(bb : Block, *args : tuple[Any]):
    def argval(op, i):
        arg = op.arg(i)
        if isinstance(arg, Constant):
            return arg.value
        else:
            assert isinstance(arg, Operation)
            return arg.info

    for index, op in enumerate(bb):
        if op.name == "getarg":
            res = args[get_num(op, 0)]
        elif op.name == "alloc":
            res = Object()
        elif op.name == "load":
            fieldnum = get_num(op)
            res = argval(op, 0).load(fieldnum)
        elif op.name == "store":
            obj = argval(op, 0)
            fieldnum = get_num(op)
            fieldvalue = argval(op, 2)
            obj.store(fieldnum, fieldvalue)
            # no result, only side effect
            continue
        elif op.name == "print":
            res = argval(op, 0)
            print(res)
            return res
        else:
            raise NotImplementedError(
                f"{op.name} not supported")
        op.info = res

var0 = getarg(0)
obj0 = alloc()
store(obj0, 0, var0)
var1 = load(obj0, 0)
print(var1)
var0 = getarg(0)
print(var0)
def test_remove_unused_allocation():
    bb = Block()
    var0 = bb.getarg(0)
    obj = bb.alloc()
    sto = bb.store(obj, 0, var0)
    var1 = bb.load(obj, 0)
    bb.print(var1)
    opt_bb = optimize_alloc_removal(bb)
    # the virtual object looks like this:
    #  obj
    # ┌──────────┐
    # │ 0: var0  │
    # └──────────┘
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = print(optvar0)"""

class VirtualObject:
    def __init__(self):
        self.contents: dict[int, Value] = {}

    def store(self, idx, value):
        self.contents[idx] = value

    def load(self, idx):
        return self.contents[idx]

def optimize_alloc_removal(bb):
    opt_bb = Block()
    for op in bb:
        if op.name == "alloc":
            op.info = VirtualObject()
            continue
        if op.name == "load":
            info = op.arg(0).info
            field = get_num(op)
            op.make_equal_to(info.load(field))
            continue
        if op.name == "store":
            info = op.arg(0).info
            field = get_num(op)
            info.store(field, op.arg(2))
            continue
        opt_bb.append(op)
    return opt_bb

def test_remove_two_allocations():
    bb = Block()
    var0 = bb.getarg(0)
    obj0 = bb.alloc()
    sto1 = bb.store(obj0, 0, var0)
    obj1 = bb.alloc()
    sto2 = bb.store(obj1, 0, obj0)
    var1 = bb.load(obj1, 0)
    var2 = bb.load(var1, 0)
    bb.print(var2)
    # the virtual objects look like this:
    #  obj0
    # ┌──────┐
    # │ 0: ╷ │
    # └────┼─┘
    #      │
    #      ▼
    #     obj1
    #   ┌─────────┐
    #   │ 0: var0 │
    #   └─────────┘
    # therefore
    # var1 is the same as obj0
    # var2 is the same as var0
    opt_bb = optimize_alloc_removal(bb)
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = print(optvar0)"""

def test_materialize():
    bb = Block()
    var0 = bb.getarg(0)
    obj = bb.alloc()
    sto = bb.store(var0, 0, obj)
    opt_bb = optimize_alloc_removal(bb)
    #  obj is virtual, without any fields
    # ┌───────┐
    # │ empty │
    # └───────┘
    # then we store a reference to obj into
    # field 0 of var0. Since var0 is not virtual,
    # obj escapes, so we have to put it back
    # into the optimized basic block
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = alloc()
optvar2 = store(optvar0, 0, optvar1)"""
    # so far, fails like this:
    # the line:
    # info.store(field, op.arg(2))
    # produces an AttributeError because info
    # is None

def materialize(opt_bb, value: Operation) -> None:
    assert not isinstance(value, Constant)
    assert isinstance(value, Operation)
    info = value.info
    assert isinstance(info, VirtualObject)
    assert value.name == "alloc"
    # put the alloc operation back into the trace
    opt_bb.append(value)

def optimize_alloc_removal(bb):
    opt_bb = Block()
    for op in bb:
        if op.name == "alloc":
            op.info = VirtualObject()
            continue
        if op.name == "load":
            info = op.arg(0).info
            field = get_num(op)
            op.make_equal_to(info.load(field))
            continue
        if op.name == "store":
            info = op.arg(0).info
            if info: # virtual
                field = get_num(op)
                info.store(field, op.arg(2))
                continue
            else: # not virtual
                # first materialize the
                # right hand side
                materialize(opt_bb, op.arg(2))
                # then emit the store via
                # the general path below
        opt_bb.append(op)
    return opt_bb

def test_dont_materialize_twice():
    # obj is again an empty virtual object,
    # and we store it into var0 *twice*.
    # this should only materialize it once
    bb = Block()
    var0 = bb.getarg(0)
    obj = bb.alloc()
    sto0 = bb.store(var0, 0, obj)
    sto1 = bb.store(var0, 0, obj)
    opt_bb = optimize_alloc_removal(bb)
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = alloc()
optvar2 = store(optvar0, 0, optvar1)
optvar3 = store(optvar0, 0, optvar1)"""
    # fails so far: the operations that we get
    # at the moment are:
    # optvar0 = getarg(0)
    # optvar1 = alloc()
    # optvar2 = store(optvar0, 0, optvar1)
    # optvar3 = alloc()
    # optvar4 = store(optvar0, 0, optvar3)
    # ie the object is materialized twice,
    # which is incorrect

def materialize(opt_bb, value: Operation) -> None:
    assert not isinstance(value, Constant)
    assert isinstance(value, Operation)
    info = value.info
    if info is None:
        return # already materialized
    assert value.name == "alloc"
    # put the alloc operation back into the trace
    opt_bb.append(value)
    # but only once
    value.info = None

# optimize_alloc_removal unchanged

def test_materialize_non_virtuals():
    # in this example we store a non-virtual var1
    # into another non-virtual var0
    # this should just lead to no optimization at
    # all
    bb = Block()
    var0 = bb.getarg(0)
    var1 = bb.getarg(1)
    sto = bb.store(var0, 0, var1)
    opt_bb = optimize_alloc_removal(bb)
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = getarg(1)
optvar2 = store(optvar0, 0, optvar1)"""

def test_materialization_constants():
    # in this example we store the constant 17
    # into the non-virtual var0
    # again, this will not be optimized
    bb = Block()
    var0 = bb.getarg(0)
    sto = bb.store(var0, 0, 17)
    opt_bb = optimize_alloc_removal(bb)
    # the previous line fails so far, triggering
    # the assert:
    # assert not isinstance(value, Constant)
    # in materialize
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = store(optvar0, 0, 17)"""

def materialize(opt_bb, value: Operation) -> None:
    if isinstance(value, Constant):
        return
    assert isinstance(value, Operation)
    info = value.info
    if info is None:
        return # already materialized
    assert value.name == "alloc"
    # put the alloc operation back into the trace
    opt_bb.append(value)
    # but only once
    value.info = None

# optimize_alloc_removal unchanged

def test_materialize_fields():
    bb = Block()
    var0 = bb.getarg(0)
    var1 = bb.getarg(1)
    obj = bb.alloc()
    contents0 = bb.store(obj, 0, 8)
    contents1 = bb.store(obj, 1, var1)
    sto = bb.store(var0, 0, obj)

    # the virtual obj looks like this
    #  obj
    # ┌──────┬──────────┐
    # │ 0: 8 │ 1: var1  │
    # └──────┴──────────┘
    # then it needs to be materialized
    # this is the first example where a virtual
    # object that we want to materialize has any
    # content and is not just an empty object
    opt_bb = optimize_alloc_removal(bb)
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = getarg(1)
optvar2 = alloc()
optvar3 = store(optvar2, 0, 8)
optvar4 = store(optvar2, 1, optvar1)
optvar5 = store(optvar0, 0, optvar2)"""
    # fails so far! the operations we get
    # at the moment are:
    # optvar0 = getarg(0)
    # optvar1 = getarg(1)
    # optvar2 = alloc()
    # optvar3 = store(optvar0, 0, optvar2)
    # which is wrong, because the store operations
    # into optvar1 got lost

def materialize(opt_bb, value: Operation) -> None:
    if isinstance(value, Constant):
        return
    assert isinstance(value, Operation)
    info = value.info
    if info is None:
        return # already materialized
    assert value.name == "alloc"
    # put the alloc operation back into the trace
    opt_bb.append(value)
    # put the content back
    for idx, val in info.contents.items():
        # re-create store operation
        opt_bb.store(value, idx, val)
    # only materialize once
    value.info = None

# optimize_alloc_removal unchanged

def test_materialize_chained_objects():
    bb = Block()
    var0 = bb.getarg(0)
    obj0 = bb.alloc()
    obj1 = bb.alloc()
    contents = bb.store(obj0, 0, obj1)
    const = bb.store(obj1, 0, 1337)
    sto = bb.store(var0, 0, obj0)
    #  obj0
    # ┌──────┐
    # │ 0: ╷ │
    # └────┼─┘
    #      │
    #      ▼
    #     obj1
    #   ┌─────────┐
    #   │ 0: 1337 │
    #   └─────────┘
    # now obj0 escapes
    opt_bb = optimize_alloc_removal(bb)
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = alloc()
optvar2 = alloc()
optvar3 = store(optvar2, 0, 1337)
optvar4 = store(optvar1, 0, optvar2)
optvar5 = store(optvar0, 0, optvar1)"""
    # fails in an annoying way! the resulting
    # basic block is not in proper SSA form
    # so printing it fails. The optimized
    # block would look like this:
    # optvar0 = getarg(0)
    # optvar1 = alloc()
    # optvar3 = store(optvar1, 0, optvar2)
    # optvar4 = store(optvar0, 0, optvar1)
    # where optvar2 is an ``alloc`` Operation
    # that is not itself in the output block

def materialize(opt_bb, value: Operation) -> None:
    if isinstance(value, Constant):
        return
    assert isinstance(value, Operation)
    info = value.info
    if info is None:
        return # already materialized
    assert value.name == "alloc"
    # put the alloc operation back into the trace
    opt_bb.append(value)
    # put the content back
    for idx, val in sorted(info.contents.items()):
        # materialize recursively
        materialize(opt_bb, val)
        opt_bb.store(value, idx, val)
    # only materialize once
    value.info = None

# optimize_alloc_removal unchanged

def test_object_graph_cycles():
    bb = Block()
    var0 = bb.getarg(0)
    var1 = bb.alloc()
    var2 = bb.store(var1, 0, var1)
    var3 = bb.store(var0, 1, var1)
    #   ┌────────┐
    #   ▼        │
    #  obj0      │
    # ┌──────┐   │
    # │ 0: ╷ │   │
    # └────┼─┘   │
    #      │     │
    #      └─────┘
    # obj0 points to itself, and then it is
    # escaped
    opt_bb = optimize_alloc_removal(bb)
    # the previous line fails with an
    # InfiniteRecursionError
    # materialize calls itself, infinitely

    # what we want is instead this output:
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = alloc()
optvar2 = store(optvar1, 0, optvar1)
optvar3 = store(optvar0, 1, optvar1)"""

def materialize(opt_bb, value: Operation) -> None:
    if isinstance(value, Constant):
        return
    assert isinstance(value, Operation)
    info = value.info
    if info is None:
        return # already materialized
    assert value.name == "alloc"
    # put the alloc operation back into the trace
    opt_bb.append(value)
    # only materialize once
    value.info = None
    # put the content back
    for idx, val in sorted(info.contents.items()):
        # materialize recursively
        materialize(opt_bb, val)
        opt_bb.store(value, idx, val)

def test_load_non_virtual():
    bb = Block()
    var0 = bb.getarg(0)
    var1 = bb.load(var0, 0)
    bb.print(var1)
    # the next line fails in the line
    # op.make_equal_to(info.load(field))
    # because info is None
    opt_bb = optimize_alloc_removal(bb)
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = load(optvar0, 0)
optvar2 = print(optvar1)"""

def optimize_alloc_removal(bb):
    opt_bb = Block()
    for op in bb:
        if op.name == "alloc":
            op.info = VirtualObject()
            continue
        if op.name == "load":
            info = op.arg(0).info
            if info: # virtual
                field = get_num(op)
                op.make_equal_to(info.load(field))
                continue
            # otherwise not virtual, use the
            # general path below
        if op.name == "store":
            info = op.arg(0).info
            if info: # virtual
                field = get_num(op)
                info.store(field, op.arg(2))
                continue
            else: # not virtual
                # first materialize the
                # right hand side
                materialize(opt_bb, op.arg(2))
                # then emit the store via
                # the general path below
        opt_bb.append(op)
    return opt_bb

def test_materialize_on_other_ops():
    # materialize not just on store
    bb = Block()
    var0 = bb.getarg(0)
    var1 = bb.alloc()
    var2 = bb.print(var1)
    opt_bb = optimize_alloc_removal(bb)
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = alloc()
optvar2 = print(optvar1)"""
    # again, the resulting basic block is not in
    # valid SSA form

# materialize is unchanged
def materialize(opt_bb, value: Value) -> None:
    if isinstance(value, Constant):
        return
    assert isinstance(value, Operation)
    info = value.info
    if not info:
        # Already materialized
        return
    assert value.name == "alloc"
    opt_bb.append(value)
    value.info = None
    for idx, val in sorted(info.contents.items()):
        materialize(opt_bb, val)
        opt_bb.store(value, idx, val)

def optimize_alloc_removal(bb):
    opt_bb = Block()
    for op in bb:
        if op.name == "alloc":
            op.info = VirtualObject()
            continue
        if op.name == "load":
            info = op.arg(0).info
            if info: # virtual
                field = get_num(op)
                op.make_equal_to(info.load(field))
                continue
        if op.name == "store":
            info = op.arg(0).info
            if info: # virtual
                field = get_num(op)
                info.store(field, op.arg(2))
                continue
        # materialize all the arguments of
        # operations that are put into the
        # output basic block
        for arg in op.args:
            materialize(opt_bb, arg.find())
        opt_bb.append(op)
    return opt_bb

def test_sink_allocations():
    bb = Block()
    var0 = bb.getarg(0)
    var1 = bb.alloc()
    var2 = bb.store(var1, 0, 123)
    var3 = bb.store(var1, 1, 456)
    var4 = bb.load(var1, 0)
    var5 = bb.load(var1, 1)
    var6 = bb.add(var4, var5)
    var7 = bb.store(var1, 0, var6)
    var8 = bb.store(var0, 1, var1)
    opt_bb = optimize_alloc_removal(bb)
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = add(123, 456)
optvar2 = alloc()
optvar3 = store(optvar2, 0, optvar1)
optvar4 = store(optvar2, 1, 456)
optvar5 = store(optvar0, 1, optvar2)"""

var1 = add(b, 17)
var2 = mul(a, var1)
var3 = add(b, 17)
var4 = add(var2, var3)
import pytest
from typing import Optional, Any

class Value:
    pass

class Constant(Value):
    def __init__(self, value: Any):
        self.value = value

    def __repr__(self):
        return f"Constant({self.value})"

class Operation(Value):
    def __init__(self, name: str, args: list[Value]):
        self.name = name
        self.args = args

    def __repr__(self):
        return f"Operation({self.name}, {self.args})"

    def arg(self, index: int):
        return self.args[index]

def test_construct_example():
    # first we need something to represent
    # "a" and "b". In our limited view, we don't
    # know where they come from, so we will define
    # them with a pseudo-operation called "getarg"
    # which takes a number n as an argument and
    # returns the n-th input argument. The proper
    # SSA way to do this would be phi-nodes.

    a = Operation("getarg", [Constant(0)])
    b = Operation("getarg", [Constant(1)])
    # var1 = add(b, 17)
    var1 = Operation("add", [b, Constant(17)])
    # var2 = mul(a, var1)
    var2 = Operation("mul", [a, var1])
    # var3 = add(b, 17)
    var3 = Operation("add", [b, Constant(17)])
    # var4 = add(var2, var3)
    var4 = Operation("add", [var2, var3])

    sequence = [a, b, var1, var2, var3, var4]
    # nothing to test really, it shouldn't crash

class Block(list):
    def opbuilder(opname):
        def wraparg(arg):
            if not isinstance(arg, Value):
                arg = Constant(arg)
            return arg
        def build(self, *args):
            # construct an Operation, wrap the
            # arguments in Constants if necessary
            op = Operation(opname,
                [wraparg(arg) for arg in args])
            # add it to self, the basic block
            self.append(op)
            return op
        return build

    # a bunch of operations we support
    add = opbuilder("add")
    mul = opbuilder("mul")
    getarg = opbuilder("getarg")
    dummy = opbuilder("dummy")
    lshift = opbuilder("lshift")

def test_convencience_block_construction():
    bb = Block()
    # a again with getarg, the following line
    # defines the Operation instance and
    # immediately adds it to the basic block bb
    a = bb.getarg(0)
    assert len(bb) == 1
    assert bb[0].name == "getarg"

    # it's a Constant
    assert bb[0].args[0].value == 0

    # b with getarg
    b = bb.getarg(1)
    # var1 = add(b, 17)
    var1 = bb.add(b, 17)
    # var2 = mul(a, var1)
    var2 = bb.mul(a, var1)
    # var3 = add(b, 17)
    var3 = bb.add(b, 17)
    # var4 = add(var2, var3)
    var4 = bb.add(var2, var3)
    assert len(bb) == 6

[Operation('getarg', [Constant(0)]),
 Operation('getarg', [Constant(1)]),
 Operation('add',
           [Operation('getarg',
                      [Constant(1)]),
                 Constant(17)]),
 Operation('mul',
           [Operation('getarg',
                      [Constant(0)]),
                 Operation('add',
                           [Operation('getarg',
                                      [Constant(1)]),
                            Constant(17)])]),
 Operation('add',
           [Operation('getarg',
                      [Constant(1)]),
            Constant(17)]),
 Operation('add',
           [Operation('mul',
                       [Operation('getarg',
                                  [Constant(0)]),
                             Operation('add',
                                       [Operation('getarg',
                                                  [Constant(1)]),
                                        Constant(17)])]),
                 Operation('add',
                           [Operation('getarg',
                                           [Constant(1)]),
                                 Constant(17)])])]

def bb_to_str(bb: Block, varprefix: str = "var"):
    # the implementation is not too important,
    # look at the test below to see what the
    # result looks like

    def arg_to_str(arg: Value):
        if isinstance(arg, Constant):
            return str(arg.value)
        else:
            # the key must exist, otherwise it's
            # not a valid SSA basic block:
            # the variable must be defined before
            # its first use
            return varnames[arg]

    varnames = {}
    res = []
    for index, op in enumerate(bb):
        # give the operation a name used while
        # printing:
        var = f"{varprefix}{index}"
        varnames[op] = var
        arguments = ", ".join(
            arg_to_str(op.arg(i))
                for i in range(len(op.args))
        )
        strop = f"{var} = {op.name}({arguments})"
        res.append(strop)
    return "\n".join(res)

def test_basicblock_to_str():
    bb = Block()
    var0 = bb.getarg(0)
    var1 = bb.add(5, 4)
    var2 = bb.add(var1, var0)

    assert bb_to_str(bb) == """\
var0 = getarg(0)
var1 = add(5, 4)
var2 = add(var1, var0)"""

    # with a different prefix for the invented
    # variable names:
    assert bb_to_str(bb, "x") == """\
x0 = getarg(0)
x1 = add(5, 4)
x2 = add(x1, x0)"""

    # and our running example:
    bb = Block()
    a = bb.getarg(0)
    b = bb.getarg(1)
    var1 = bb.add(b, 17)
    var2 = bb.mul(a, var1)
    var3 = bb.add(b, 17)
    var4 = bb.add(var2, var3)

    assert bb_to_str(bb, "v") == """\
v0 = getarg(0)
v1 = getarg(1)
v2 = add(v1, 17)
v3 = mul(v0, v2)
v4 = add(v1, 17)
v5 = add(v3, v4)"""
    # Note the re-numbering of the variables! We
    # don't attach names to Operations at all, so
    # the printing will just number them in
    # sequence, can sometimes be a source of
    # confusion.

v0 = getarg(0)
v1 = getarg(1)
v2 = add(v1, 17)
v3 = mul(v0, v2)
v4 = add(v1, 17)
v5 = add(v3, v4)
optvar0 = getarg(0)
optvar1 = getarg(1)
optvar2 = add(optvar1, 17)
optvar3 = mul(optvar0, optvar2)
optvar4 = add(optvar3, optvar2)
class Value:
    def find(self):
        raise NotImplementedError("abstract")
    def _set_forwarded(self, value):
        raise NotImplementedError("abstract")


class Operation(Value):
    def __init__(self, name: str, args: list[Value]):
        self.name = name
        self.args = args
        self.forwarded = None

    def __repr__(self):
        return (
            f"Operation({self.name},"
            f"{self.args}, {self.forwarded})"
        )

    def find(self) -> Value:
        # returns the "representative" value of
        # self, in the union-find sense
        op = self
        while isinstance(op, Operation):
            # could do path compression here too
            # but not essential
            next = op.forwarded
            if next is None:
                return op
            op = next
        return op

    def arg(self, index):
        # change to above: return the
        # representative of argument 'index'
        return self.args[index].find()

    def make_equal_to(self, value: Value):
        # this is "union" in the union-find sense,
        # but the direction is important! The
        # representative of the union of Operations
        # must be either a Constant or an operation
        # that we know for sure is not optimized
        # away.

        self.find()._set_forwarded(value)

    def _set_forwarded(self, value: Value):
        self.forwarded = value


class Constant(Value):
    def __init__(self, value: Any):
        self.value = value

    def __repr__(self):
        return f"Constant({self.value})"

    def find(self):
        return self

    def _set_forwarded(self, value: Value):
        # if we found out that an Operation is
        # equal to a constant, it's a compiler bug
        # to find out that it's equal to another
        # constant
        assert isinstance(value, Constant) and \
            value.value == self.value

def test_union_find():
    # construct three operation, and unify them
    # step by step
    bb = Block()
    a1 = bb.dummy(1)
    a2 = bb.dummy(2)
    a3 = bb.dummy(3)

    # at the beginning, every op is its own
    # representative, that means every
    # operation is in a singleton set
    # {a1} {a2} {a3}
    assert a1.find() is a1
    assert a2.find() is a2
    assert a3.find() is a3

    # now we unify a2 and a1, then the sets are
    # {a1, a2} {a3}
    a2.make_equal_to(a1)
    # they both return a1 as the representative
    assert a1.find() is a1
    assert a2.find() is a1
    # a3 is still different
    assert a3.find() is a3

    # now they are all in the same set {a1, a2, a3}
    a3.make_equal_to(a2)
    assert a1.find() is a1
    assert a2.find() is a1
    assert a3.find() is a1

    # now they are still all the same, and we
    # also learned that they are the same as the
    # constant 6
    # the single remaining set then is
    # {6, a1, a2, a3}
    c = Constant(6)
    a2.make_equal_to(c)
    assert a1.find() is c
    assert a2.find() is c
    assert a3.find() is c

    # union with the same constant again is fine
    a2.make_equal_to(c)

def constfold_buggy(bb: Block) -> Block:
    opt_bb = Block()

    for op in bb:
        # basic idea: go over the list and do
        # constant folding of add where possible
        if op.name == "add":
            arg0 = op.args[0]
            arg1 = op.args[1]
            if isinstance(arg0, Constant) and \
                    isinstance(arg1, Constant):
                # can constant-fold! that means we
                # learned a new equality, namely
                # that op is equal to a specific
                # constant
                value = arg0.value + arg1.value
                op.make_equal_to(Constant(value))
                # don't need to have the operation
                # in the optimized basic block
                continue
        # otherwise the operation is not
        # constant-foldable and we put into the
        # output list
        opt_bb.append(op)
    return opt_bb


def test_constfold_simple():
    bb = Block()
    var0 = bb.getarg(0)
    var1 = bb.add(5, 4)
    var2 = bb.add(var1, var0)

    opt_bb = constfold_buggy(bb)
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = add(9, optvar0)"""

@pytest.mark.xfail
def test_constfold_buggy_limitation():
    # this test fails! it shows the problem with
    # the above simple constfold_buggy pass

    bb = Block()
    var0 = bb.getarg(0)
    # this is folded
    var1 = bb.add(5, 4)
    # we want this folded too, but it doesn't work
    var2 = bb.add(var1, 10)
    var3 = bb.add(var2, var0)

    opt_bb = constfold_buggy(bb)
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = add(19, optvar0)"""

optvar0 = getarg(0)
optvar1 = add(9, 10)
optvar2 = add(optvar1, optvar0)
def constfold(bb: Block) -> Block:
    opt_bb = Block()

    for op in bb:
        # basic idea: go over the list and do
        # constant folding of add where possible
        if op.name == "add":
            # >>> changed
            arg0 = op.arg(0) # uses .find()
            arg1 = op.arg(1) # uses .find()
            # <<< end changes
            if isinstance(arg0, Constant) and \
                    isinstance(arg1, Constant):
                # can constant-fold! that means we
                # learned a new equality, namely
                # that op is equal to a specific
                # constant
                value = arg0.value + arg1.value
                op.make_equal_to(Constant(value))
                # don't need to have the operation
                # in the optimized basic block
                continue
        # otherwise the operation is not
        # constant-foldable and we put into the
        # output list
        opt_bb.append(op)
    return opt_bb


def test_constfold_two_ops():
    # now it works!
    bb = Block()
    var0 = bb.getarg(0)
    var1 = bb.add(5, 4)
    var2 = bb.add(var1, 10)
    var3 = bb.add(var2, var0)
    opt_bb = constfold(bb)

    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = add(19, optvar0)"""

def cse(bb: Block) -> Block:
    # structure is the same, loop over the input,
    # add some but not all operations to the
    # output

    opt_bb = Block()

    for op in bb:
        # only do CSE for add here, but it
        # generalizes
        if op.name == "add":
            arg0 = op.arg(0)
            arg1 = op.arg(1)
            # Check whether we have emitted the
            # same operation already
            prev_op = find_prev_add_op(
                arg0, arg1, opt_bb)
            if prev_op is not None:
                # if yes, we can optimize op away
                # and replace it with the earlier
                # result, which is an Operation
                # that was already emitted to
                # opt_bb
                op.make_equal_to(prev_op)
                continue
        opt_bb.append(op)
    return opt_bb


def eq_value(val0, val1):
    if isinstance(val0, Constant) and \
            isinstance(val1, Constant):
        # constants compare by their value
        return val0.value == val1.value
    # everything else by identity
    return val0 is val1


def find_prev_add_op(arg0: Value, arg1: Value,
        opt_bb: Block) -> Optional[Operation]:
    # Really naive and quadratic implementation.
    # What we do is walk over the already emitted
    # operations and see whether we emitted an add
    # with the current arguments already. A real
    # implementation might use a hashmap of some
    # kind, or at least only look at a limited
    # window of instructions.
    for opt_op in opt_bb:
        if opt_op.name != "add":
            continue
        # It's important to call arg here,
        # for the same reason why we
        # needed it in constfold: we need to
        # make sure .find() is called
        if eq_value(arg0, opt_op.arg(0)) and \
                eq_value(arg1, opt_op.arg(1)):
            return opt_op
    return None


def test_cse():
    bb = Block()
    a = bb.getarg(0)
    b = bb.getarg(1)
    var1 = bb.add(b, 17)
    var2 = bb.mul(a, var1)
    var3 = bb.add(b, 17)
    var4 = bb.add(var2, var3)

    opt_bb = cse(bb)
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = getarg(1)
optvar2 = add(optvar1, 17)
optvar3 = mul(optvar0, optvar2)
optvar4 = add(optvar3, optvar2)"""

def strength_reduce(bb: Block) -> Block:
    opt_bb = Block()
    for op in bb:
        if op.name == "add":
            arg0 = op.arg(0)
            arg1 = op.arg(1)
            if arg0 is arg1:
                # x + x turns into x << 1
                newop = opt_bb.lshift(arg0, 1)
                op.make_equal_to(newop)
                continue
        opt_bb.append(op)
    return opt_bb

def test_strength_reduce():
    bb = Block()
    var0 = bb.getarg(0)
    var1 = bb.add(var0, var0)

    opt_bb = strength_reduce(bb)

    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = lshift(optvar0, 1)"""

def optimize(bb: Block) -> Block:
    opt_bb = Block()

    for op in bb:
        if op.name == "add":
            arg0 = op.arg(0)
            arg1 = op.arg(1)

            # constant folding
            if isinstance(arg0, Constant) and \
                    isinstance(arg1, Constant):
                value = arg0.value + arg1.value
                op.make_equal_to(Constant(value))
                continue

            # cse
            prev_op = find_prev_add_op(
                arg0, arg1, opt_bb)
            if prev_op is not None:
                op.make_equal_to(prev_op)
                continue

            # strength reduce:
            # x + x turns into x << 1
            if arg0 is arg1:
                newop = opt_bb.lshift(arg0, 1)
                op.make_equal_to(newop)
                continue

            # and while we are at it, let's do some
            # arithmetic simplification:
            # a + 0 => a
            if eq_value(arg0, Constant(0)):
                op.make_equal_to(arg1)
                continue
            if eq_value(arg1, Constant(0)):
                op.make_equal_to(arg0)
                continue
        opt_bb.append(op)
    return opt_bb


def test_single_pass():
    bb = Block()
    # constant folding
    var0 = bb.getarg(0)
    var1 = bb.add(5, 4)
    var2 = bb.add(var1, 10)
    var3 = bb.add(var2, var0)

    opt_bb = optimize(bb)
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = add(19, optvar0)"""

    # cse + strength reduction
    bb = Block()
    var0 = bb.getarg(0)
    var1 = bb.getarg(1)
    var2 = bb.add(var0, var1)
    var3 = bb.add(var0, var1) # the same as var3
    var4 = bb.add(var2, 2)
    var5 = bb.add(var3, 2) # the same as var4
    var6 = bb.add(var4, var5)

    opt_bb = optimize(bb)
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = getarg(1)
optvar2 = add(optvar0, optvar1)
optvar3 = add(optvar2, 2)
optvar4 = lshift(optvar3, 1)"""

    # removing + 0
    bb = Block()
    var0 = bb.getarg(0)
    var1 = bb.add(16, -16)
    var2 = bb.add(var0, var1)
    var3 = bb.add(0, var2)
    var4 = bb.add(var2, var3)

    opt_bb = optimize(bb)
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = lshift(optvar0, 1)"""

class TestW_IntObject:
    ...

    def test_hash(self):
        w_x = W_IntObject(42)
        w_result = w_x.descr_hash(self.space)
        assert isinstance(w_result, W_IntObject)
        assert w_result.intval == 42

def test_hash():
    assert hash(42) == 42

def test_load_attr(self):
    src = '''
        class A(object):
            pass
        a = A()
        a.x = 1
        def main(n):
            i = 0
            while i < n:
                i = i + a.x
            return i
    '''
    log = self.run(src, [1000])
    assert log.result == 1000
    loop, = log.loops_by_filename(self.filepath)
    assert loop.match("""
        i9 = int_lt(i5, i6)
        guard_true(i9, descr=...)
        guard_not_invalidated(descr=...)
        i10 = int_add(i5, 1)
        --TICK--
        jump(..., descr=...)
    """)

Greatest Hits

Personal Favourites

Interpreter

Version 1: Naive Attempt

Version 2: Re-Materializing Allocations

Version 3: Don't Materialize Twice

Version 4: Materialization of Constants

Version 5: Materializing Fields

Version 6: Recursive Materialization

Version 7: Dealing with Object Cycles

Version 8: Loading from non-virtual objects

Version 9 (Final): Materialize on Other Operations

Conclusion

Footnotes

Topics and goals

What is a sprint?

Location

Exact times

Registration

Implementing the Intermediate Representation

Storing Equivalences between Operations Using a Union-Find Data Structure

Constant Folding

Common Subexpression Elimination

Strength Reduction

Putting Things Together

Conclusion

Some Further Pointers

How is PyPy Tested?

Background

PyPy Testing History

Interpreter-Level Tests

Application-Level Tests

The CPython Test Suite

Testing for Performance Regressions

Conclusion

Footnotes

PyPy v7.3.9 security release

What is PyPy?

Known Issues with PyPy3.9

What else is new?

PyPy v7.3.8: release of python 2.7, 3.7, 3.8, and 3.9-beta

What is PyPy?

Known Issues with PyPy3.9

What else is new?

Natural Language Processing for Icelandic with PyPy: A Case Study

The PyPy blogposts

Recent Posts

Archives

Tags