515 lines
22 KiB
Python
515 lines
22 KiB
Python
# -*- coding: utf-8; -*-
|
|
"""Quasiquotes. Build ASTs in your macros, using syntax that mostly looks like regular code."""
|
|
|
|
__all__ = ['capture', 'lookup', 'astify', 'unastify',
|
|
'q', 'u', 'n', 'a', 's', 'h',
|
|
'expand1q', 'expandq',
|
|
'expand1', 'expand']
|
|
|
|
import ast
|
|
import pickle
|
|
|
|
from .core import global_bindings
|
|
from .expander import MacroExpander
|
|
from .markers import ASTMarker, get_markers
|
|
from .unparser import unparse
|
|
from .utils import gensym, NestingLevelTracker
|
|
|
|
# --------------------------------------------------------------------------------
|
|
|
|
class QuasiquoteMarker(ASTMarker):
|
|
"""Base class for AST markers used by quasiquotes. Compiled away by `astify`."""
|
|
pass
|
|
|
|
class ASTLiteral(QuasiquoteMarker): # like `macropy`'s `Literal`
|
|
"""Keep the given subtree as-is."""
|
|
pass
|
|
|
|
class CaptureLater(QuasiquoteMarker): # like `macropy`'s `Captured`
|
|
"""Capture the value the given subtree evaluates to at the use site of `q`."""
|
|
def __init__(self, body, name):
|
|
super().__init__(body)
|
|
self.name = name
|
|
self._fields += ["name"]
|
|
|
|
# --------------------------------------------------------------------------------
|
|
|
|
# Hygienically captured run-time values... but to support `.pyc` caching, we can't use a per-process dictionary.
|
|
# _hygienic_registry = {}
|
|
|
|
def _mcpyrate_quotes_attr(attr):
|
|
"""Create an AST that, when compiled and run, looks up `mcpyrate.quotes.attr` in `Load` context."""
|
|
mcpyrate_quotes_module = ast.Attribute(value=ast.Name(id="mcpyrate", ctx=ast.Load()),
|
|
attr="quotes",
|
|
ctx=ast.Load())
|
|
return ast.Attribute(value=mcpyrate_quotes_module,
|
|
attr=attr,
|
|
ctx=ast.Load())
|
|
|
|
def _capture_into(mapping, value, basename):
|
|
for k, v in mapping.items():
|
|
if v is value:
|
|
key = k
|
|
break
|
|
else:
|
|
key = gensym(basename)
|
|
mapping[key] = value
|
|
return key
|
|
|
|
def capture(value, name):
|
|
"""Hygienically capture a run-time value.
|
|
|
|
`value`: A run-time value. Must be picklable.
|
|
`name`: A human-readable name.
|
|
|
|
The return value is an AST that, when compiled and run, returns the
|
|
captured value (even in another Python process later).
|
|
|
|
Hygienically captured macro invocations are treated using a different
|
|
mechanism; see `mcpyrate.core.global_bindings`.
|
|
"""
|
|
# If we didn't need to consider bytecode caching, we could just store the
|
|
# value in a registry that is populated at macro expansion time. Each
|
|
# unique value (by `id`) could be stored only once.
|
|
#
|
|
# key = _capture_into(_hygienic_registry, value, name)
|
|
# return ast.Call(_mcpyrate_quotes_attr("lookup"),
|
|
# [ast.Constant(value=key)],
|
|
# [])
|
|
|
|
# But we want to support bytecode caching. To avoid introducing hard-to-find
|
|
# bugs into user code, we must provide consistent semantics, regardless of
|
|
# whether updating of the bytecode cache is actually enabled or not (see
|
|
# `sys.dont_write_bytecode`).
|
|
#
|
|
# If the macro expansion result is to be re-used from a `.pyc`, we must
|
|
# serialize and store the captured value to disk, so that values from
|
|
# "macro expansion time last week" remain available when the `.pyc` is
|
|
# loaded in another Python process, much later.
|
|
#
|
|
# Modules are macro-expanded independently (no global finalization for the
|
|
# whole codebase), and a `.pyc` may indeed later get loaded into some other
|
|
# codebase that imports the same module, so we can't make a centralized
|
|
# registry, like we could without bytecode caching (for the current process).
|
|
#
|
|
# So really pretty much the only thing we can do reliably and simply is to
|
|
# store a fresh serialized copy of the value at the capture location in the
|
|
# source code, independently at each capture location.
|
|
#
|
|
# Putting these considerations together, we pickle the value, causing a copy
|
|
# and serialization.
|
|
#
|
|
frozen_value = pickle.dumps(value)
|
|
return ast.Call(_mcpyrate_quotes_attr("lookup"),
|
|
[ast.Tuple(elts=[ast.Constant(value=name), # for human-readability of expanded code
|
|
ast.Constant(value=frozen_value)])],
|
|
[])
|
|
|
|
_lookup_cache = {}
|
|
def lookup(key):
|
|
"""Look up a hygienically captured run-time value."""
|
|
# if type(key) is str: # captured in sys.dont_write_bytecode mode, in this process
|
|
# return _hygienic_registry[key]
|
|
# else: # frozen into macro-expanded code
|
|
# name, frozen_value = key
|
|
# return pickle.loads(frozen_value)
|
|
name, frozen_value = key
|
|
cachekey = (name, id(frozen_value)) # id() so each capture instance behaves independently
|
|
if cachekey not in _lookup_cache:
|
|
_lookup_cache[cachekey] = pickle.loads(frozen_value)
|
|
return _lookup_cache[cachekey]
|
|
|
|
# --------------------------------------------------------------------------------
|
|
|
|
def astify(x, expander=None): # like `macropy`'s `ast_repr`
|
|
"""Lift a value into its AST representation, if possible.
|
|
|
|
When the AST is compiled and run, it will evaluate to `x`.
|
|
|
|
If `x` itself is an AST, then produce an AST that, when compiled and run,
|
|
will generate the AST `x`.
|
|
|
|
If the input is a `list` of ASTs for a statement suite, the return value
|
|
is a single `ast.List` node, with its `elts` taken from the input list.
|
|
However, most of the time it's not used this way, because `BaseMacroExpander`
|
|
already translates a `visit` to a statement suite into visits to individual
|
|
nodes, because otherwise `ast.NodeTransformer` chokes on the input. (The only
|
|
exception is `q` in block mode; it'll produce a `List` this way.)
|
|
|
|
`expander` is a `BaseMacroExpander` instance, used for detecting macro names
|
|
inside `CaptureLater` markers. If no `expander` is provided, macros cannot be
|
|
hygienically captured.
|
|
|
|
Raises `TypeError` when the lifting fails.
|
|
"""
|
|
def recurse(x): # second layer just to auto-pass `expander` by closure.
|
|
T = type(x)
|
|
|
|
# Drop the ASTLiteral wrapper; it only tells us to pass through this subtree as-is.
|
|
if T is ASTLiteral:
|
|
return x.body
|
|
|
|
# This is the magic part of q[h[]].
|
|
elif T is CaptureLater:
|
|
if expander and type(x.body) is ast.Name:
|
|
function = expander.isbound(x.body.id)
|
|
if function:
|
|
# Hygienically capture a macro name. We do this immediately,
|
|
# during the expansion of `q`. This allows macros in scope at
|
|
# the use site of `q` to be hygienically propagated out to the
|
|
# use site of the macro that used `q`. So you can write macros
|
|
# that `q[h[macroname][...]]` and `macroname` doesn't have to be
|
|
# macro-imported wherever that code gets spliced in.
|
|
macroname = x.body.id
|
|
uniquename = _capture_into(global_bindings, function, macroname)
|
|
return recurse(ast.Name(id=uniquename))
|
|
# Hygienically capture a garden variety run-time value.
|
|
# At the use site of q[], this captures the value, and rewrites itself
|
|
# into a lookup. At the use site of the macro that used q[], that
|
|
# rewritten code looks up the captured value.
|
|
return ast.Call(_mcpyrate_quotes_attr('capture'),
|
|
[x.body,
|
|
ast.Constant(value=x.name)],
|
|
[])
|
|
|
|
elif T in (int, float, str, bytes, bool, type(None)):
|
|
return ast.Constant(value=x)
|
|
|
|
elif T is list:
|
|
return ast.List(elts=list(recurse(elt) for elt in x))
|
|
elif T is tuple:
|
|
return ast.Tuple(elts=list(recurse(elt) for elt in x))
|
|
elif T is dict:
|
|
return ast.Dict(keys=list(recurse(k) for k in x.keys()),
|
|
values=list(recurse(v) for v in x.values()))
|
|
elif T is set:
|
|
return ast.Set(elts=list(recurse(elt) for elt in x))
|
|
|
|
elif isinstance(x, ast.AST):
|
|
# TODO: Add support for astifying ASTMarkers?
|
|
# TODO: Otherwise the same as regular AST node, but need to refer to the
|
|
# TODO: module it is defined in, and we don't have everything in scope here.
|
|
if isinstance(x, ASTMarker):
|
|
raise TypeError(f"Cannot astify internal AST markers, got {unparse(x)}")
|
|
|
|
# The magic is in the Call. Take apart the input AST, and construct a
|
|
# new AST, that (when compiled and run) will re-generate the input AST.
|
|
#
|
|
# We refer to the stdlib `ast` module as `mcpyrate.quotes.ast` to avoid
|
|
# name conflicts at the use site of `q[]`.
|
|
fields = [ast.keyword(a, recurse(b)) for a, b in ast.iter_fields(x)]
|
|
node = ast.Call(ast.Attribute(value=_mcpyrate_quotes_attr('ast'),
|
|
attr=x.__class__.__name__,
|
|
ctx=ast.Load()),
|
|
[],
|
|
fields)
|
|
# Copy source location info for correct coverage reporting of a quoted block.
|
|
#
|
|
# The location info we fill in here is for the use site of `q`, which is
|
|
# typically inside a macro definition. Coverage for a quoted line of code
|
|
# means that the expansion of the quote contains input from that line.
|
|
# It says nothing about the run-time behavior of that code.
|
|
#
|
|
# Running the AST produced by the quote re-produces the input AST, which is
|
|
# indeed the whole point of quoting stuff. The AST is re-produced **without
|
|
# any source location info**. The fact that *this* location info is missing,
|
|
# on purpose, is the magic that allows the missing location fixer to fill
|
|
# the correct location info at the final use site, i.e. the use site of the
|
|
# macro that used `q`.
|
|
node = ast.copy_location(node, x)
|
|
return node
|
|
|
|
raise TypeError(f"Don't know how to astify {repr(x)}")
|
|
return recurse(x)
|
|
|
|
|
|
def unastify(tree):
|
|
"""Inverse of `astify`.
|
|
|
|
`tree` must have been produced by `astify`. Otherwise raises `TypeError`.
|
|
|
|
Essentially, this turns an AST representing quoted code back into an AST
|
|
that represents that code directly, not quoted. So in a sense, `unastify`
|
|
is a top-level unquote operator.
|
|
|
|
Note subtle difference in meaning to `u[]`. The `u[]` operator interpolates
|
|
a value from outside the quote context into the quoted representation - so
|
|
that the value actually becomes quoted! - whereas `unastify` inverts the
|
|
quote operation.
|
|
"""
|
|
# CAUTION: in `unastify`, we implement only what we minimally need.
|
|
def attr_ast_to_dotted_name(tree):
|
|
# Input is like:
|
|
# (mcpyrate.quotes).thing
|
|
# ((mcpyrate.quotes).ast).thing
|
|
if type(tree) is not ast.Attribute:
|
|
raise TypeError
|
|
acc = []
|
|
def recurse(tree):
|
|
acc.append(tree.attr)
|
|
if type(tree.value) is ast.Attribute:
|
|
recurse(tree.value)
|
|
elif type(tree.value) is ast.Name:
|
|
acc.append(tree.value.id)
|
|
else:
|
|
raise NotImplementedError
|
|
recurse(tree)
|
|
return ".".join(reversed(acc))
|
|
|
|
our_module_globals = globals()
|
|
def lookup_thing(dotted_name):
|
|
if not dotted_name.startswith("mcpyrate.quotes"):
|
|
raise NotImplementedError
|
|
path = dotted_name.split(".")
|
|
if len(path) < 3:
|
|
raise NotImplementedError
|
|
name_of_thing = path[2]
|
|
thing = our_module_globals[name_of_thing]
|
|
if len(path) > 3:
|
|
for attrname in path[3:]:
|
|
thing = getattr(thing, attrname)
|
|
return thing
|
|
|
|
T = type(tree)
|
|
|
|
if T is ast.Constant:
|
|
return tree.value
|
|
|
|
# Support machinery for `Call` AST node. This serendipitously supports also
|
|
# *args and **kwargs, because as of Python 3.6 those appear in `args` and
|
|
# `keywords`, and `Starred` needs no special support here.
|
|
elif T is list:
|
|
return [unastify(elt) for elt in tree]
|
|
elif T is ast.keyword:
|
|
return tree.arg, unastify(tree.value)
|
|
|
|
elif T is ast.List:
|
|
return [unastify(elt) for elt in tree.elts]
|
|
elif T is ast.Tuple:
|
|
return tuple(unastify(elt) for elt in tree.elts)
|
|
elif T is ast.Dict:
|
|
return {unastify(k): unastify(v) for k, v in zip(tree.keys, tree.values)}
|
|
elif T is ast.Set:
|
|
return {unastify(elt) for elt in tree.elts}
|
|
|
|
elif T is ast.Call:
|
|
dotted_name = attr_ast_to_dotted_name(tree.func)
|
|
callee = lookup_thing(dotted_name)
|
|
args = unastify(tree.args)
|
|
kwargs = {k: v for k, v in unastify(tree.keywords)}
|
|
node = callee(*args, **kwargs)
|
|
node = ast.copy_location(node, tree)
|
|
return node
|
|
|
|
raise TypeError(f"Don't know how to unastify {unparse(tree)}")
|
|
|
|
# --------------------------------------------------------------------------------
|
|
# Quasiquote macros
|
|
#
|
|
# These operators are named after Qu'nash, the goddess of quasiquotes in high-tech-elven mythology.
|
|
|
|
_quotelevel = NestingLevelTracker()
|
|
|
|
def _unquote_expand(tree, expander):
|
|
"""Expand quasiquote macros in `tree`. If quotelevel is zero, expand all macros in `tree`."""
|
|
if _quotelevel.value == 0:
|
|
tree = expander.visit_recursively(tree) # result should be runnable, so always use recursive mode.
|
|
else:
|
|
tree = _expand_quasiquotes(tree, expander)
|
|
|
|
def _expand_quasiquotes(tree, expander):
|
|
"""Expand quasiquote macros only."""
|
|
# Use a second expander instance, with different bindings. Copy only the
|
|
# bindings of the quasiquote macros from the main `expander`, accounting
|
|
# for possible as-imports. This second expander won't even see other macros,
|
|
# thus leaving them alone.
|
|
bindings = {k: v for k, v in expander.bindings.items() if v in (q, u, n, a, s, h)}
|
|
return MacroExpander(bindings, expander.filename).visit(tree)
|
|
|
|
|
|
def q(tree, *, syntax, expander, **kw):
|
|
"""[syntax, expr/block] quasiquote. Lift code into its AST representation."""
|
|
if syntax not in ("expr", "block"):
|
|
raise SyntaxError("`q` is an expr and block macro only")
|
|
with _quotelevel.changed_by(+1):
|
|
tree = _expand_quasiquotes(tree, expander) # expand any inner quotes and unquotes first
|
|
tree = astify(tree, expander=expander) # Magic part of `q`. Supply `expander` for `h[macro]` detection.
|
|
ps = get_markers(tree, QuasiquoteMarker) # postcondition: no remaining QuasiquoteMarkers
|
|
if ps:
|
|
assert False, f"QuasiquoteMarker instances remaining in output: {ps}"
|
|
if syntax == 'block':
|
|
target = kw['optional_vars'] # List, Tuple, Name
|
|
if type(target) is not ast.Name:
|
|
raise SyntaxError(f"expected a single asname, got {unparse(target)}")
|
|
# Note this `Assign` runs at the use site of `q`, it's not part of the quoted code section.
|
|
tree = ast.Assign([target], tree) # Here `tree` is a List.
|
|
return tree
|
|
|
|
|
|
def u(tree, *, syntax, expander, **kw):
|
|
"""[syntax, expr] unquote. Splice a simple value into a quasiquote.
|
|
|
|
The value is lifted into an AST that re-constructs that value.
|
|
"""
|
|
if syntax != "expr":
|
|
raise SyntaxError("`u` is an expr macro only")
|
|
if _quotelevel.value < 1:
|
|
raise SyntaxError("`u` encountered while quotelevel < 1")
|
|
with _quotelevel.changed_by(-1):
|
|
_unquote_expand(tree, expander)
|
|
# We want to generate an AST that compiles to the *value* of `v`. But when
|
|
# this runs, it is too early. We must astify *at the use site*. So use an
|
|
# `ast.Call` to delay, and in there, splice in `tree` as-is.
|
|
return ASTLiteral(ast.Call(_mcpyrate_quotes_attr("astify"), [tree], []))
|
|
|
|
|
|
def n(tree, *, syntax, **kw):
|
|
"""[syntax, expr] name-unquote. Splice a string, lifted into a lexical identifier, into a quasiquote.
|
|
|
|
The resulting node's `ctx` is filled in automatically by the macro expander later.
|
|
"""
|
|
if syntax != "expr":
|
|
raise SyntaxError("`n` is an expr macro only")
|
|
if _quotelevel.value < 1:
|
|
raise SyntaxError("`n` encountered while quotelevel < 1")
|
|
with _quotelevel.changed_by(-1):
|
|
return ASTLiteral(astify(ast.Name(id=ASTLiteral(tree))))
|
|
|
|
|
|
def a(tree, *, syntax, **kw):
|
|
"""[syntax, expr] AST-unquote. Splice an AST into a quasiquote."""
|
|
if syntax != "expr":
|
|
raise SyntaxError("`a` is an expr macro only")
|
|
if _quotelevel.value < 1:
|
|
raise SyntaxError("`a` encountered while quotelevel < 1")
|
|
with _quotelevel.changed_by(-1):
|
|
return ASTLiteral(tree)
|
|
|
|
|
|
def s(tree, *, syntax, **kw):
|
|
"""[syntax, expr] list-unquote. Splice a `list` of ASTs, as an `ast.List`, into a quasiquote."""
|
|
if syntax != "expr":
|
|
raise SyntaxError("`s` is an expr macro only")
|
|
if _quotelevel.value < 1:
|
|
raise SyntaxError("`s` encountered while quotelevel < 1")
|
|
return ASTLiteral(ast.Call(ast.Attribute(value=_mcpyrate_quotes_attr('ast'),
|
|
attr='List'),
|
|
[],
|
|
[ast.keyword("elts", tree)]))
|
|
|
|
|
|
def h(tree, *, syntax, expander, **kw):
|
|
"""[syntax, expr] hygienic-unquote. Splice any value, from the macro definition site, into a quasiquote.
|
|
|
|
Supports also values that have no meaningful `repr`. The value is captured
|
|
at the use site of the surrounding `q`.
|
|
|
|
The value is frozen into the expanded source code as a pickled blob,
|
|
separately at each use site of `h[]`. Thus the value must be picklable,
|
|
and each capture will pickle it again.
|
|
|
|
This is done to ensure the value will remain available, when the
|
|
already-expanded code (due to `.pyc` caching) runs again in another
|
|
Python process. (In other words, values from "macro expansion time
|
|
last week" would not otherwise be available.)
|
|
|
|
Supports also macros. To hygienically splice a macro invocation, `h[]` only
|
|
the macro name. Macro captures are not pickled; they simply extend the bindings
|
|
of the expander (with a uniqified macro name) that is expanding the use site of
|
|
the surrounding `q`.
|
|
"""
|
|
if syntax != "expr":
|
|
raise SyntaxError("`h` is an expr macro only")
|
|
if _quotelevel.value < 1:
|
|
raise SyntaxError("`h` encountered while quotelevel < 1")
|
|
with _quotelevel.changed_by(-1):
|
|
name = unparse(tree)
|
|
_unquote_expand(tree, expander)
|
|
return CaptureLater(tree, name)
|
|
|
|
# --------------------------------------------------------------------------------
|
|
|
|
def expand1q(tree, *, syntax, **kw):
|
|
'''[syntax, expr/block] quote-then-expand-once.
|
|
|
|
Quasiquote `tree`, then expand one layer of macros in it. Return the result
|
|
quasiquoted.
|
|
|
|
If your tree is already quasiquoted, use `expand1` instead.
|
|
'''
|
|
if syntax not in ("expr", "block"):
|
|
raise SyntaxError("`expand1q` is an expr and block macro only")
|
|
tree = q(tree, syntax=syntax, **kw)
|
|
return expand1(tree, syntax=syntax, **kw)
|
|
|
|
|
|
def expandq(tree, *, syntax, **kw):
|
|
'''[syntax, expr/block] quote-then-expand.
|
|
|
|
Quasiquote `tree`, then expand it until no macros remain. Return the result
|
|
quasiquoted. This operator is equivalent to `macropy`'s `q`.
|
|
|
|
If your tree is already quasiquoted, use `expand` instead.
|
|
'''
|
|
if syntax not in ("expr", "block"):
|
|
raise SyntaxError("`expandq` is an expr and block macro only")
|
|
tree = q(tree, syntax=syntax, **kw)
|
|
return expand(tree, syntax=syntax, **kw)
|
|
|
|
# --------------------------------------------------------------------------------
|
|
|
|
def expand1(tree, *, syntax, expander, **kw):
|
|
'''[syntax, expr/block] expand one layer of macros in quasiquoted `tree`.
|
|
|
|
The result remains in quasiquoted form.
|
|
|
|
Like calling `expander.visit_once(tree)`, but for quasiquoted `tree`.
|
|
|
|
`tree` must be a quasiquoted AST; i.e. output from, or an invocation of,
|
|
`q`, `expand1q`, `expandq`, `expand1`, or `expand`. Passing any other AST
|
|
as `tree` raises `TypeError`.
|
|
|
|
If your `tree` is not quasiquoted, `expand1q[...]` is a shorthand for
|
|
`expand1[q[...]]`.
|
|
'''
|
|
if syntax not in ("expr", "block"):
|
|
raise SyntaxError("`expand1` is an expr and block macro only")
|
|
# We first invert the quasiquote operation, then use the garden variety
|
|
# `expander` on the result, and then re-quote the expanded AST.
|
|
#
|
|
# The first `visit_once` makes any quote invocations inside this macro invocation expand first.
|
|
# If the input `tree` is an already expanded `q`, it will do nothing, because any macro invocations
|
|
# are then in a quoted form, which don't look like macro invocations to the expander.
|
|
# If the input `tree` is a `Done`, it will likewise do nothing.
|
|
tree = expander.visit_once(tree) # -> Done(body=...)
|
|
tree = expander.visit_once(unastify(tree.body)) # On wrong kind of input, `unastify` will `TypeError` for us.
|
|
# The final piece of the magic, why this works in the expander's recursive mode,
|
|
# without wrapping the result with `Done`, is that after `q` has finished, the output
|
|
# will be a **quoted** AST, so macro invocations in it don't look like macro invocations.
|
|
# Hence upon looping on the output, the expander finds no more macros.
|
|
return q(tree.body, syntax=syntax, expander=expander, **kw)
|
|
|
|
|
|
def expand(tree, *, syntax, expander, **kw):
|
|
'''[syntax, expr/block] expand quasiquoted `tree` until no macros remain.
|
|
|
|
The result remains in quasiquoted form.
|
|
|
|
Like calling `expander.visit_recursively(tree)`, but for quasiquoted `tree`.
|
|
|
|
`tree` must be a quasiquoted AST; i.e. output from, or an invocation of,
|
|
`q`, `expand1q`, `expandq`, `expand1`, or `expand`. Passing any other AST
|
|
as `tree` raises `TypeError`.
|
|
|
|
If your `tree` is not quasiquoted, `expandq[...]` is a shorthand for
|
|
`expand[q[...]]`.
|
|
'''
|
|
if syntax not in ("expr", "block"):
|
|
raise SyntaxError("`expand` is an expr and block macro only")
|
|
tree = expander.visit_once(tree) # make the quotes inside this invocation expand first; -> Done(body=...)
|
|
# Always use recursive mode, because `expand[...]` may appear inside
|
|
# another macro invocation that uses `visit_once` (which sets the expander
|
|
# mode to non-recursive for the dynamic extent of the visit).
|
|
tree = expander.visit_recursively(unastify(tree.body)) # On wrong kind of input, `unastify` will `TypeError` for us.
|
|
return q(tree, syntax=syntax, expander=expander, **kw)
|