KiBot/kibot/mcpyrate/quotes.py

515 lines
22 KiB
Python

# -*- coding: utf-8; -*-
"""Quasiquotes. Build ASTs in your macros, using syntax that mostly looks like regular code."""
__all__ = ['capture', 'lookup', 'astify', 'unastify',
'q', 'u', 'n', 'a', 's', 'h',
'expand1q', 'expandq',
'expand1', 'expand']
import ast
import pickle
from .core import global_bindings
from .expander import MacroExpander
from .markers import ASTMarker, get_markers
from .unparser import unparse
from .utils import gensym, NestingLevelTracker
# --------------------------------------------------------------------------------
class QuasiquoteMarker(ASTMarker):
"""Base class for AST markers used by quasiquotes. Compiled away by `astify`."""
pass
class ASTLiteral(QuasiquoteMarker): # like `macropy`'s `Literal`
"""Keep the given subtree as-is."""
pass
class CaptureLater(QuasiquoteMarker): # like `macropy`'s `Captured`
"""Capture the value the given subtree evaluates to at the use site of `q`."""
def __init__(self, body, name):
super().__init__(body)
self.name = name
self._fields += ["name"]
# --------------------------------------------------------------------------------
# Hygienically captured run-time values... but to support `.pyc` caching, we can't use a per-process dictionary.
# _hygienic_registry = {}
def _mcpyrate_quotes_attr(attr):
"""Create an AST that, when compiled and run, looks up `mcpyrate.quotes.attr` in `Load` context."""
mcpyrate_quotes_module = ast.Attribute(value=ast.Name(id="mcpyrate", ctx=ast.Load()),
attr="quotes",
ctx=ast.Load())
return ast.Attribute(value=mcpyrate_quotes_module,
attr=attr,
ctx=ast.Load())
def _capture_into(mapping, value, basename):
for k, v in mapping.items():
if v is value:
key = k
break
else:
key = gensym(basename)
mapping[key] = value
return key
def capture(value, name):
"""Hygienically capture a run-time value.
`value`: A run-time value. Must be picklable.
`name`: A human-readable name.
The return value is an AST that, when compiled and run, returns the
captured value (even in another Python process later).
Hygienically captured macro invocations are treated using a different
mechanism; see `mcpyrate.core.global_bindings`.
"""
# If we didn't need to consider bytecode caching, we could just store the
# value in a registry that is populated at macro expansion time. Each
# unique value (by `id`) could be stored only once.
#
# key = _capture_into(_hygienic_registry, value, name)
# return ast.Call(_mcpyrate_quotes_attr("lookup"),
# [ast.Constant(value=key)],
# [])
# But we want to support bytecode caching. To avoid introducing hard-to-find
# bugs into user code, we must provide consistent semantics, regardless of
# whether updating of the bytecode cache is actually enabled or not (see
# `sys.dont_write_bytecode`).
#
# If the macro expansion result is to be re-used from a `.pyc`, we must
# serialize and store the captured value to disk, so that values from
# "macro expansion time last week" remain available when the `.pyc` is
# loaded in another Python process, much later.
#
# Modules are macro-expanded independently (no global finalization for the
# whole codebase), and a `.pyc` may indeed later get loaded into some other
# codebase that imports the same module, so we can't make a centralized
# registry, like we could without bytecode caching (for the current process).
#
# So really pretty much the only thing we can do reliably and simply is to
# store a fresh serialized copy of the value at the capture location in the
# source code, independently at each capture location.
#
# Putting these considerations together, we pickle the value, causing a copy
# and serialization.
#
frozen_value = pickle.dumps(value)
return ast.Call(_mcpyrate_quotes_attr("lookup"),
[ast.Tuple(elts=[ast.Constant(value=name), # for human-readability of expanded code
ast.Constant(value=frozen_value)])],
[])
_lookup_cache = {}
def lookup(key):
"""Look up a hygienically captured run-time value."""
# if type(key) is str: # captured in sys.dont_write_bytecode mode, in this process
# return _hygienic_registry[key]
# else: # frozen into macro-expanded code
# name, frozen_value = key
# return pickle.loads(frozen_value)
name, frozen_value = key
cachekey = (name, id(frozen_value)) # id() so each capture instance behaves independently
if cachekey not in _lookup_cache:
_lookup_cache[cachekey] = pickle.loads(frozen_value)
return _lookup_cache[cachekey]
# --------------------------------------------------------------------------------
def astify(x, expander=None): # like `macropy`'s `ast_repr`
"""Lift a value into its AST representation, if possible.
When the AST is compiled and run, it will evaluate to `x`.
If `x` itself is an AST, then produce an AST that, when compiled and run,
will generate the AST `x`.
If the input is a `list` of ASTs for a statement suite, the return value
is a single `ast.List` node, with its `elts` taken from the input list.
However, most of the time it's not used this way, because `BaseMacroExpander`
already translates a `visit` to a statement suite into visits to individual
nodes, because otherwise `ast.NodeTransformer` chokes on the input. (The only
exception is `q` in block mode; it'll produce a `List` this way.)
`expander` is a `BaseMacroExpander` instance, used for detecting macro names
inside `CaptureLater` markers. If no `expander` is provided, macros cannot be
hygienically captured.
Raises `TypeError` when the lifting fails.
"""
def recurse(x): # second layer just to auto-pass `expander` by closure.
T = type(x)
# Drop the ASTLiteral wrapper; it only tells us to pass through this subtree as-is.
if T is ASTLiteral:
return x.body
# This is the magic part of q[h[]].
elif T is CaptureLater:
if expander and type(x.body) is ast.Name:
function = expander.isbound(x.body.id)
if function:
# Hygienically capture a macro name. We do this immediately,
# during the expansion of `q`. This allows macros in scope at
# the use site of `q` to be hygienically propagated out to the
# use site of the macro that used `q`. So you can write macros
# that `q[h[macroname][...]]` and `macroname` doesn't have to be
# macro-imported wherever that code gets spliced in.
macroname = x.body.id
uniquename = _capture_into(global_bindings, function, macroname)
return recurse(ast.Name(id=uniquename))
# Hygienically capture a garden variety run-time value.
# At the use site of q[], this captures the value, and rewrites itself
# into a lookup. At the use site of the macro that used q[], that
# rewritten code looks up the captured value.
return ast.Call(_mcpyrate_quotes_attr('capture'),
[x.body,
ast.Constant(value=x.name)],
[])
elif T in (int, float, str, bytes, bool, type(None)):
return ast.Constant(value=x)
elif T is list:
return ast.List(elts=list(recurse(elt) for elt in x))
elif T is tuple:
return ast.Tuple(elts=list(recurse(elt) for elt in x))
elif T is dict:
return ast.Dict(keys=list(recurse(k) for k in x.keys()),
values=list(recurse(v) for v in x.values()))
elif T is set:
return ast.Set(elts=list(recurse(elt) for elt in x))
elif isinstance(x, ast.AST):
# TODO: Add support for astifying ASTMarkers?
# TODO: Otherwise the same as regular AST node, but need to refer to the
# TODO: module it is defined in, and we don't have everything in scope here.
if isinstance(x, ASTMarker):
raise TypeError(f"Cannot astify internal AST markers, got {unparse(x)}")
# The magic is in the Call. Take apart the input AST, and construct a
# new AST, that (when compiled and run) will re-generate the input AST.
#
# We refer to the stdlib `ast` module as `mcpyrate.quotes.ast` to avoid
# name conflicts at the use site of `q[]`.
fields = [ast.keyword(a, recurse(b)) for a, b in ast.iter_fields(x)]
node = ast.Call(ast.Attribute(value=_mcpyrate_quotes_attr('ast'),
attr=x.__class__.__name__,
ctx=ast.Load()),
[],
fields)
# Copy source location info for correct coverage reporting of a quoted block.
#
# The location info we fill in here is for the use site of `q`, which is
# typically inside a macro definition. Coverage for a quoted line of code
# means that the expansion of the quote contains input from that line.
# It says nothing about the run-time behavior of that code.
#
# Running the AST produced by the quote re-produces the input AST, which is
# indeed the whole point of quoting stuff. The AST is re-produced **without
# any source location info**. The fact that *this* location info is missing,
# on purpose, is the magic that allows the missing location fixer to fill
# the correct location info at the final use site, i.e. the use site of the
# macro that used `q`.
node = ast.copy_location(node, x)
return node
raise TypeError(f"Don't know how to astify {repr(x)}")
return recurse(x)
def unastify(tree):
"""Inverse of `astify`.
`tree` must have been produced by `astify`. Otherwise raises `TypeError`.
Essentially, this turns an AST representing quoted code back into an AST
that represents that code directly, not quoted. So in a sense, `unastify`
is a top-level unquote operator.
Note subtle difference in meaning to `u[]`. The `u[]` operator interpolates
a value from outside the quote context into the quoted representation - so
that the value actually becomes quoted! - whereas `unastify` inverts the
quote operation.
"""
# CAUTION: in `unastify`, we implement only what we minimally need.
def attr_ast_to_dotted_name(tree):
# Input is like:
# (mcpyrate.quotes).thing
# ((mcpyrate.quotes).ast).thing
if type(tree) is not ast.Attribute:
raise TypeError
acc = []
def recurse(tree):
acc.append(tree.attr)
if type(tree.value) is ast.Attribute:
recurse(tree.value)
elif type(tree.value) is ast.Name:
acc.append(tree.value.id)
else:
raise NotImplementedError
recurse(tree)
return ".".join(reversed(acc))
our_module_globals = globals()
def lookup_thing(dotted_name):
if not dotted_name.startswith("mcpyrate.quotes"):
raise NotImplementedError
path = dotted_name.split(".")
if len(path) < 3:
raise NotImplementedError
name_of_thing = path[2]
thing = our_module_globals[name_of_thing]
if len(path) > 3:
for attrname in path[3:]:
thing = getattr(thing, attrname)
return thing
T = type(tree)
if T is ast.Constant:
return tree.value
# Support machinery for `Call` AST node. This serendipitously supports also
# *args and **kwargs, because as of Python 3.6 those appear in `args` and
# `keywords`, and `Starred` needs no special support here.
elif T is list:
return [unastify(elt) for elt in tree]
elif T is ast.keyword:
return tree.arg, unastify(tree.value)
elif T is ast.List:
return [unastify(elt) for elt in tree.elts]
elif T is ast.Tuple:
return tuple(unastify(elt) for elt in tree.elts)
elif T is ast.Dict:
return {unastify(k): unastify(v) for k, v in zip(tree.keys, tree.values)}
elif T is ast.Set:
return {unastify(elt) for elt in tree.elts}
elif T is ast.Call:
dotted_name = attr_ast_to_dotted_name(tree.func)
callee = lookup_thing(dotted_name)
args = unastify(tree.args)
kwargs = {k: v for k, v in unastify(tree.keywords)}
node = callee(*args, **kwargs)
node = ast.copy_location(node, tree)
return node
raise TypeError(f"Don't know how to unastify {unparse(tree)}")
# --------------------------------------------------------------------------------
# Quasiquote macros
#
# These operators are named after Qu'nash, the goddess of quasiquotes in high-tech-elven mythology.
_quotelevel = NestingLevelTracker()
def _unquote_expand(tree, expander):
"""Expand quasiquote macros in `tree`. If quotelevel is zero, expand all macros in `tree`."""
if _quotelevel.value == 0:
tree = expander.visit_recursively(tree) # result should be runnable, so always use recursive mode.
else:
tree = _expand_quasiquotes(tree, expander)
def _expand_quasiquotes(tree, expander):
"""Expand quasiquote macros only."""
# Use a second expander instance, with different bindings. Copy only the
# bindings of the quasiquote macros from the main `expander`, accounting
# for possible as-imports. This second expander won't even see other macros,
# thus leaving them alone.
bindings = {k: v for k, v in expander.bindings.items() if v in (q, u, n, a, s, h)}
return MacroExpander(bindings, expander.filename).visit(tree)
def q(tree, *, syntax, expander, **kw):
"""[syntax, expr/block] quasiquote. Lift code into its AST representation."""
if syntax not in ("expr", "block"):
raise SyntaxError("`q` is an expr and block macro only")
with _quotelevel.changed_by(+1):
tree = _expand_quasiquotes(tree, expander) # expand any inner quotes and unquotes first
tree = astify(tree, expander=expander) # Magic part of `q`. Supply `expander` for `h[macro]` detection.
ps = get_markers(tree, QuasiquoteMarker) # postcondition: no remaining QuasiquoteMarkers
if ps:
assert False, f"QuasiquoteMarker instances remaining in output: {ps}"
if syntax == 'block':
target = kw['optional_vars'] # List, Tuple, Name
if type(target) is not ast.Name:
raise SyntaxError(f"expected a single asname, got {unparse(target)}")
# Note this `Assign` runs at the use site of `q`, it's not part of the quoted code section.
tree = ast.Assign([target], tree) # Here `tree` is a List.
return tree
def u(tree, *, syntax, expander, **kw):
"""[syntax, expr] unquote. Splice a simple value into a quasiquote.
The value is lifted into an AST that re-constructs that value.
"""
if syntax != "expr":
raise SyntaxError("`u` is an expr macro only")
if _quotelevel.value < 1:
raise SyntaxError("`u` encountered while quotelevel < 1")
with _quotelevel.changed_by(-1):
_unquote_expand(tree, expander)
# We want to generate an AST that compiles to the *value* of `v`. But when
# this runs, it is too early. We must astify *at the use site*. So use an
# `ast.Call` to delay, and in there, splice in `tree` as-is.
return ASTLiteral(ast.Call(_mcpyrate_quotes_attr("astify"), [tree], []))
def n(tree, *, syntax, **kw):
"""[syntax, expr] name-unquote. Splice a string, lifted into a lexical identifier, into a quasiquote.
The resulting node's `ctx` is filled in automatically by the macro expander later.
"""
if syntax != "expr":
raise SyntaxError("`n` is an expr macro only")
if _quotelevel.value < 1:
raise SyntaxError("`n` encountered while quotelevel < 1")
with _quotelevel.changed_by(-1):
return ASTLiteral(astify(ast.Name(id=ASTLiteral(tree))))
def a(tree, *, syntax, **kw):
"""[syntax, expr] AST-unquote. Splice an AST into a quasiquote."""
if syntax != "expr":
raise SyntaxError("`a` is an expr macro only")
if _quotelevel.value < 1:
raise SyntaxError("`a` encountered while quotelevel < 1")
with _quotelevel.changed_by(-1):
return ASTLiteral(tree)
def s(tree, *, syntax, **kw):
"""[syntax, expr] list-unquote. Splice a `list` of ASTs, as an `ast.List`, into a quasiquote."""
if syntax != "expr":
raise SyntaxError("`s` is an expr macro only")
if _quotelevel.value < 1:
raise SyntaxError("`s` encountered while quotelevel < 1")
return ASTLiteral(ast.Call(ast.Attribute(value=_mcpyrate_quotes_attr('ast'),
attr='List'),
[],
[ast.keyword("elts", tree)]))
def h(tree, *, syntax, expander, **kw):
"""[syntax, expr] hygienic-unquote. Splice any value, from the macro definition site, into a quasiquote.
Supports also values that have no meaningful `repr`. The value is captured
at the use site of the surrounding `q`.
The value is frozen into the expanded source code as a pickled blob,
separately at each use site of `h[]`. Thus the value must be picklable,
and each capture will pickle it again.
This is done to ensure the value will remain available, when the
already-expanded code (due to `.pyc` caching) runs again in another
Python process. (In other words, values from "macro expansion time
last week" would not otherwise be available.)
Supports also macros. To hygienically splice a macro invocation, `h[]` only
the macro name. Macro captures are not pickled; they simply extend the bindings
of the expander (with a uniqified macro name) that is expanding the use site of
the surrounding `q`.
"""
if syntax != "expr":
raise SyntaxError("`h` is an expr macro only")
if _quotelevel.value < 1:
raise SyntaxError("`h` encountered while quotelevel < 1")
with _quotelevel.changed_by(-1):
name = unparse(tree)
_unquote_expand(tree, expander)
return CaptureLater(tree, name)
# --------------------------------------------------------------------------------
def expand1q(tree, *, syntax, **kw):
'''[syntax, expr/block] quote-then-expand-once.
Quasiquote `tree`, then expand one layer of macros in it. Return the result
quasiquoted.
If your tree is already quasiquoted, use `expand1` instead.
'''
if syntax not in ("expr", "block"):
raise SyntaxError("`expand1q` is an expr and block macro only")
tree = q(tree, syntax=syntax, **kw)
return expand1(tree, syntax=syntax, **kw)
def expandq(tree, *, syntax, **kw):
'''[syntax, expr/block] quote-then-expand.
Quasiquote `tree`, then expand it until no macros remain. Return the result
quasiquoted. This operator is equivalent to `macropy`'s `q`.
If your tree is already quasiquoted, use `expand` instead.
'''
if syntax not in ("expr", "block"):
raise SyntaxError("`expandq` is an expr and block macro only")
tree = q(tree, syntax=syntax, **kw)
return expand(tree, syntax=syntax, **kw)
# --------------------------------------------------------------------------------
def expand1(tree, *, syntax, expander, **kw):
'''[syntax, expr/block] expand one layer of macros in quasiquoted `tree`.
The result remains in quasiquoted form.
Like calling `expander.visit_once(tree)`, but for quasiquoted `tree`.
`tree` must be a quasiquoted AST; i.e. output from, or an invocation of,
`q`, `expand1q`, `expandq`, `expand1`, or `expand`. Passing any other AST
as `tree` raises `TypeError`.
If your `tree` is not quasiquoted, `expand1q[...]` is a shorthand for
`expand1[q[...]]`.
'''
if syntax not in ("expr", "block"):
raise SyntaxError("`expand1` is an expr and block macro only")
# We first invert the quasiquote operation, then use the garden variety
# `expander` on the result, and then re-quote the expanded AST.
#
# The first `visit_once` makes any quote invocations inside this macro invocation expand first.
# If the input `tree` is an already expanded `q`, it will do nothing, because any macro invocations
# are then in a quoted form, which don't look like macro invocations to the expander.
# If the input `tree` is a `Done`, it will likewise do nothing.
tree = expander.visit_once(tree) # -> Done(body=...)
tree = expander.visit_once(unastify(tree.body)) # On wrong kind of input, `unastify` will `TypeError` for us.
# The final piece of the magic, why this works in the expander's recursive mode,
# without wrapping the result with `Done`, is that after `q` has finished, the output
# will be a **quoted** AST, so macro invocations in it don't look like macro invocations.
# Hence upon looping on the output, the expander finds no more macros.
return q(tree.body, syntax=syntax, expander=expander, **kw)
def expand(tree, *, syntax, expander, **kw):
'''[syntax, expr/block] expand quasiquoted `tree` until no macros remain.
The result remains in quasiquoted form.
Like calling `expander.visit_recursively(tree)`, but for quasiquoted `tree`.
`tree` must be a quasiquoted AST; i.e. output from, or an invocation of,
`q`, `expand1q`, `expandq`, `expand1`, or `expand`. Passing any other AST
as `tree` raises `TypeError`.
If your `tree` is not quasiquoted, `expandq[...]` is a shorthand for
`expand[q[...]]`.
'''
if syntax not in ("expr", "block"):
raise SyntaxError("`expand` is an expr and block macro only")
tree = expander.visit_once(tree) # make the quotes inside this invocation expand first; -> Done(body=...)
# Always use recursive mode, because `expand[...]` may appear inside
# another macro invocation that uses `visit_once` (which sets the expander
# mode to non-recursive for the dynamic extent of the visit).
tree = expander.visit_recursively(unastify(tree.body)) # On wrong kind of input, `unastify` will `TypeError` for us.
return q(tree, syntax=syntax, expander=expander, **kw)