KiBot/kibot/mcpyrate/quotes.py

1214 lines
55 KiB
Python

# -*- coding: utf-8; -*-
"""Quasiquotes. Build ASTs in your macros, using syntax that mostly looks like regular code.
The macro operators `q`, `u`, `n`, `a`, `s`, `t`, `h` are the primary API.
The functions `capture_value` and `capture_as_macro` are public, so you can get the
benefits of hygienic capture also in old-school macros that build ASTs manually
without using quasiquotes.
The `astify` and `unastify` functions are the low-level quasiquote compiler
and uncompiler, respectively.
"""
__all__ = ["capture_value", "capture_macro", "capture_as_macro",
"is_captured_value", "is_captured_macro",
"astify", "unastify",
"q", "u", "n", "a", "s", "t", "h"]
import ast
import copy
import pickle
import sys
from .core import Done, MacroExpansionError, global_bindings
from .coreutils import _mcpyrate_attr
from .expander import MacroExpander, isnamemacro
from .markers import ASTMarker, check_no_markers_remaining, delete_markers
from .unparser import unparse, unparse_with_fallbacks
from .utils import (NestingLevelTracker, extract_bindings, flatten, gensym,
scrub_uuid)
def _mcpyrate_quotes_attr(attr, *, force_import=False):
"""Create an AST that, when compiled and run, looks up `mcpyrate.quotes.attr`.
If `force_import` is `True`, use the builtin `__import__` function to
first import the `mcpyrate.quotes` module. This is useful for e.g.
hygienically unquoted values, whose eventual use site might not import
any `mcpyrate` modules.
"""
return _mcpyrate_attr(f"quotes.{attr}", force_import=force_import)
class QuasiquoteMarker(ASTMarker):
"""Base class for AST markers used by quasiquotes. Compiled away by `astify`."""
class SpliceNodes(QuasiquoteMarker):
"""Splice a `list` of AST nodes into the surrounding context.
Command sent by `ast_literal` (run-time part of `a`)
to `splice_ast_literals` (run-time part of the surrounding `q`).
"""
class QuasiquoteSearchDone(Done, QuasiquoteMarker):
"""Marker used by nested quasiquotes to tell the expander a subtree is already done.
This inherits, but is separate, from the usual `Done`, because:
1. We need to tell the expander that is processing the nested quasiquotes
to stop expanding an invocation that has already been considered.
2. We need to be able to eliminate these (and only these) before
generating the final quoted output.
"""
# --------------------------------------------------------------------------------
# Unquote commands for `astify`. Each type corresponds to an unquote macro.
class Unquote(QuasiquoteMarker):
"""Interpolate the value of the given subtree into the quoted tree. Emitted by `u[]`."""
class LiftSourcecode(QuasiquoteMarker):
"""Parse a string as a Python expression, interpolate the resulting AST. Emitted by `n[]`.
This allows e.g. computing names of lexical variables.
"""
def __init__(self, body, filename):
super().__init__(body)
self.filename = filename
self._fields += ["filename"]
class ASTLiteral(QuasiquoteMarker): # similar to `macropy`'s `Literal`, but supports block mode, too.
"""Interpolate the given AST. Emitted by `a`."""
def __init__(self, body, syntax):
super().__init__(body)
self.syntax = syntax
self._fields += ["syntax"]
class ASTList(QuasiquoteMarker):
"""Interpolate the given iterable of AST nodes as an `ast.List` node. Emitted by `s[]`."""
class ASTTuple(QuasiquoteMarker):
"""Interpolate the given iterable of AST nodes as an `ast.Tuple` node. Emitted by `t[]`."""
class Capture(QuasiquoteMarker): # like `macropy`'s `Captured`
"""Capture given subtree hygienically. Emitted by `h[]`.
Details: capture the value or macro name the given subtree evaluates to,
at the use site of `q`. The value or macro reference is frozen (by pickle)
so that it can be restored also in another Python process later.
(It is important hygienic captures can be restored across process boundaries,
to support bytecode caching for source files that invoke a macro that uses
`h[]` in its output.)
"""
def __init__(self, body, name):
super().__init__(body)
self.name = name
self._fields += ["name"]
# --------------------------------------------------------------------------------
# Run-time parts of the operators.
# Unquote doesn't have its own function here, because it's a special case of `astify`.
def lift_sourcecode(value, filename="<unknown>"):
"""Parse a string as a Python expression. Run-time part of `n[]`.
Main use case is to access lexical variables with names computed at your macro definition site::
lift_sourcecode("kitty") -> Name(id='kitty')
More complex expressions work, too::
lift_sourcecode("kitty.tail") -> Attribute(value=Name(id='kitty'),
attr='tail')
lift_sourcecode("kitty.tail.color") -> Attribute(value=Attribute(value=Name(id='kitty'),
attr='tail'),
attr='color')
lift_sourcecode("kitties[3].paws[2].claws")
"""
if not isinstance(value, str):
raise TypeError(f"`n[]`: expected an expression that evaluates to str, result was {type(value)} with value {repr(value)}")
return ast.parse(value, filename=f"<invocation of n[] in '{filename}'>", mode="eval").body
def _typecheck(node, cls, macroname):
if isinstance(node, ASTMarker):
if isinstance(node.body, list): # statement suite inside a marker
for child in node.body:
_typecheck(child, cls, macroname)
return
# single AST node inside a marker
_typecheck(node.body, cls, macroname)
return
if not isinstance(node, cls):
raise TypeError(f"{macroname}: expected {cls}, got {type(node)} with value {repr(node)}")
def _flatten_and_typecheck_iterable(nodes, cls, macroname):
try:
lst = list(nodes)
except TypeError:
raise TypeError(f"{macroname}: expected an iterable of AST nodes, got {type(nodes)} with value {repr(nodes)}")
lst = flatten(lst)
for node in lst:
_typecheck(node, cls, macroname)
return lst
def ast_literal(tree, syntax):
"""Perform run-time typecheck on AST literal `tree`. Run-time part of `a`.
If `tree` is a run-time iterable, convert it to a `list`, flatten that `list`
locally, and inject a run-time marker for `splice_ast_literals`, to indicate
where splicing into the surrounding context is needed.
"""
if syntax not in ("expr", "block"):
raise ValueError(f"expected `syntax` either 'expr' or 'block', got {repr(syntax)}")
if syntax == "expr":
if isinstance(tree, ast.AST):
_typecheck(tree, ast.expr, "`a` (expr mode)")
return tree
else:
lst = _flatten_and_typecheck_iterable(tree, ast.expr, "`a` (expr mode)")
return SpliceNodes(lst)
assert syntax == "block"
# Block mode `a` always produces a `list` of the items in its body.
# Each item may refer, at run time, to a statement AST node or to a `list`
# of statement AST nodes.
#
# We flatten locally here to get rid of the sublists, so that all statement
# nodes injected by this invocation of block mode `a` become gathered into
# a single flat "master list".
#
# However, there's a piece of postprocessing we cannot do here: the splice
# of the master list into the surrounding context. For that, we mark the
# place for `splice_ast_literals`, which is the run-time part of the
# surrounding block mode `q` (which allows it to operate on the whole
# quoted tree).
#
# The splicer must splice only places marked by us, because lists occur
# in many places in a Python AST beside statement suites (e.g. `Assign`
# targets, the parameter list in a function definition, ...).
lst = _flatten_and_typecheck_iterable(tree, ast.stmt, "`a` (block mode)")
return SpliceNodes(lst)
def splice_ast_literals(tree, filename):
"""Splice list-valued `a` AST literals into the surrounding context. Run-time part of `q`."""
# We do this recursively to splice also at any inner levels of the quoted
# AST (e.g. `with a` inside an `if`).
def doit(thing):
if isinstance(thing, list):
newthing = []
for item in thing:
if isinstance(item, SpliceNodes):
doit(item.body)
# Discard the `SpliceNodes` marker and splice the `list` that was contained in it.
newthing.extend(item.body)
else:
doit(item)
newthing.append(item)
thing[:] = newthing
# As of Python 3.9, `Global` and `Nonlocal` are the only AST node types
# where a field contains a `list` of bare strings.
elif isinstance(thing, (ast.Global, ast.Nonlocal)):
pass
elif isinstance(thing, ast.AST):
for fieldname, value in ast.iter_fields(thing):
if isinstance(value, list):
doit(value)
else:
raise TypeError(f"Expected `list` or AST node, got {type(thing)} with value {repr(thing)}")
doit(tree)
try:
check_no_markers_remaining(tree, filename=filename, cls=SpliceNodes)
except MacroExpansionError:
err = RuntimeError("`q`: `SpliceNodes` markers remaining after expansion, likely a misplaced `a` unquote; did you mean `s[]` or `t[]`?")
# The list of remaining markers is not very useful, suppress it
# (but leave it available for introspection in the `__context__` attribute).
err.__suppress_context__ = True
raise err
return tree
def ast_list(nodes):
"""Interpolate an iterable of expression AST nodes as an `ast.List` node. Run-time part of `s[]`."""
lst = _flatten_and_typecheck_iterable(nodes, ast.expr, "`s[]`")
return ast.List(elts=lst)
def ast_tuple(nodes):
"""Interpolate an iterable of expression AST nodes as an `ast.Tuple` node. Run-time part of `t[]`."""
lst = _flatten_and_typecheck_iterable(nodes, ast.expr, "`t[]`")
return ast.Tuple(elts=lst)
def capture_value(value, name):
"""Hygienically capture a run-time value. Used by `h[]`.
`value`: A run-time value. Must be picklable.
`name`: For human-readability.
The return value is an AST that, when compiled and run, returns the
captured value (even in another Python process later).
"""
# If we didn't need to consider bytecode caching, we could just store the
# value in a dictionary (that lives at the top level of `mcpyrate.quotes`)
# that is populated at macro expansion time. Each unique value (by `id`)
# could be stored only once.
#
# But we want to support bytecode caching. To avoid introducing hard-to-find
# bugs into user code, we must provide consistent semantics, regardless of
# whether updating of the bytecode cache is actually enabled or not (see
# `sys.dont_write_bytecode`). So we must do the same thing regardless of
# whether the captured value is used in the current process, or in another
# Python process later.
#
# If the macro expansion result is to remain available for reuse from a
# `.pyc`, we must serialize and store the captured value to disk, so that
# values from "macro expansion time last week" are still available when the
# `.pyc` is loaded in another Python process later.
#
# Modules are macro-expanded independently (no global finalization for the
# whole codebase), and a `.pyc` may indeed later get loaded into some other
# codebase that imports the same module, so we can't make a centralized
# registry, like we could without bytecode caching.
#
# So really pretty much the only thing we can do reliably and simply is to
# store a fresh serialized copy of the value at the capture location in the
# source code, independently at each capture location.
#
# Putting these considerations together, we pickle the value, causing a copy
# and serialization.
#
frozen_value = pickle.dumps(value)
return ast.Call(_mcpyrate_quotes_attr("lookup_value", force_import=True),
[ast.Tuple(elts=[ast.Constant(value=name),
ast.Constant(value=frozen_value)])],
[])
_lookup_cache = {}
def lookup_value(key):
"""Look up a hygienically captured run-time value. Used by `h[]`.
Usually there's no need to call this function manually; `capture_value`
(and thus also `h[]`) will generate an AST that calls this automatically.
**NOTE**: For advanced macrology: if your own macros need to detect hygienic
captures using `is_captured_value`, and you want to look up the captured
value based on a key returned by that function, be aware that `lookup_value`
will only succeed if a value has been captured.
Trying to look up a key that was extracted from a pre-capture AST
raises `ValueError`. In terms of the discussion in the docstring of
`is_captured_value`, you need a `lookup_value` AST for a value to
be present; a `capture_value` AST is too early. The transition occurs
when the use site of `q` runs.
In that scenario, before you call `lookup_value` on your key, check that
`frozen_value is not None` (see docstring of `is_captured_value`);
that indicates that a value has been captured and can be decoded by
this function.
"""
name, frozen_value = key
# Trying to look up a result of `is_captured_value` that isn't captured yet.
if frozen_value is None:
raise ValueError(f"The given key does not (yet) point to a value: {repr(key)}")
cachekey = (name, id(frozen_value)) # id() so each capture instance behaves independently
if cachekey not in _lookup_cache:
_lookup_cache[cachekey] = pickle.loads(frozen_value)
return _lookup_cache[cachekey]
def capture_macro(macro, name):
"""Hygienically capture a macro. Used by `h[]`.
`macro`: A macro function. Must be picklable.
`name`: For human-readability. The recommended value is the name of
the macro, as it appeared in the bindings of the expander
it was captured from.
The name of the captured macro is automatically uniqified using
`gensym(name)`.
The return value is an AST that, when compiled and run, injects the macro
into the expander's global macro bindings table (even in another Python
process later), and then evaluates to the uniqified macro name as an
`ast.Name`.
"""
if not callable(macro):
raise TypeError(f"`macro` must be callable (a macro function), got {type(macro)} with value {repr(macro)}")
# Scrub any previous UUID suffix from the macro name. We'll get those when
# `unastify` uncompiles a hygienic macro capture, and then `astify`
# compiles the result again.
frozen_macro = pickle.dumps(macro)
name = scrub_uuid(name)
return ast.Call(_mcpyrate_quotes_attr("lookup_macro"),
[ast.Tuple(elts=[ast.Constant(value=name),
ast.Constant(value=gensym(name)),
ast.Constant(value=frozen_macro)])],
[])
def capture_as_macro(macro):
"""Hygienically capture a macro function as a macro, manually.
Like `capture_macro`, but with one less level of delay. This injects the
macro into the expander's global bindings table immediately, and returns
the uniqified `ast.Name` that can be used to refer to it hygienically,
using `a[]`.
The name is taken automatically from the name of the macro function.
"""
if not callable(macro):
raise TypeError(f"`macro` must be callable (a macro function), got {type(macro)} with value {repr(macro)}")
frozen_macro = pickle.dumps(macro)
name = macro.__name__
return lookup_macro((name, gensym(name), frozen_macro))
def lookup_macro(key):
"""Look up a hygienically captured macro. Used by `h[]`.
This injects the macro to the expander's global macro bindings table,
and then returns the macro name, as an `ast.Name`.
Usually there's no need to call this function manually; `capture_macro`
(and thus also `h[]`) will generate an AST that calls this automatically.
"""
name, unique_name, frozen_macro = key
if unique_name not in global_bindings:
global_bindings[unique_name] = pickle.loads(frozen_macro)
return ast.Name(id=unique_name)
# --------------------------------------------------------------------------------
# Advanced macrology support.
# TODO: In a future version, do we want to add an ASTMarker for captured values
# TODO: that are ready for consumption? We could save the actual AST (which is
# TODO: now detected directly) into the `body` attribute of the marker, and make
# TODO: the compiler delete `HygienicValue` markers (replacing each by its `.body`)
# TODO: as the last step before handing the AST over to Python.
def is_captured_value(tree):
"""Test whether `tree` is a hygienically captured run-time value.
This function is sometimes useful for advanced macrology. It facilitates
user-defined macros to work together in an environment where hygienic
captures are present. One macro, using quasiquotes, builds an AST, and
another macro analyzes the expanded AST later.
Consider first, however, if you can arrange things so that the second macro
could analyze an *unexpanded* AST; that's often much easier. When the first
macro simply must expand first (for whatever reason), that's where this function
comes in.
With this function, you can check (either by name or by value) whether some
`q[h[myfunction]]` points to the desired `"myfunction"`, so that e.g. the AST
produced by `q[h[myfunction](a0, ...)]` can be recognized as a call to your
`myfunction`. This allows your second macro to know it's `myfunction`,
so that it'll know how to interpret the args of that call.
Real-world examples of where this is useful are too unwieldy to explain
here, but can be found in `unpythonic.syntax`. Particularly, see any use
sites of the helper function `unpythonic.syntax.nameutil.isx`.
To detect a hygienically captured *macro*, use `is_captured_macro` instead.
Return value:
- On no match, return `False`.
- On match, return a tuple `(name, frozen_value)`, where:
- `name` (str) is the name of the captured identifier, or when the captured
value is from an arbitrary expression, the unparsed source code of that
expression. There is no name mangling for identifiers; it's the exact
original name that appeared in the source code.
- `frozen_value` is either a `bytes` object that stores the frozen value
as opaque binary data, or `None` if the value has not been captured yet.
The `bytes` object can be decoded by passing the whole return value as `key`
to `lookup_value`. That function will decode the data and return the actual
value, just as if the hygienic reference was decoded normally at run time.
**NOTE**:
Stages in the life of a hygienically captured *run-time value* in `mcpyrate`:
1. When the surrounding `q` expands, it first expands any unquotes nested
within it, but only those where the quote level hits zero. The `h[]` is
converted into a `Capture` AST marker; see the `h` operator for details.
2. Then, still while the surrounding `q` expands, `q` compiles quasiquote
markers. A `Capture` marker, in particular, compiles into a call to
the function `capture_value`. This is the output at macro expansion time
(of the use site of `q`).
3. When the use site of `q` reaches run time, the `capture_value` runs
(thus actually performing the capture), and replaces itself (in the
AST that was produced by `q`) with a call to the function `lookup_value`.
That `lookup_value` call is still an AST node.
4. In typical usage, that use site of `q` is inside the implementation
of some user-defined macro. When *that macro's use site* reaches run
time, the `lookup_value` runs (each time that expression is executed).
So in the macro expansion of `q`, we have a call to `capture_value`
representing the hygienically captured run-time value. But once the macro
that uses `q` has returned its output, then we instead have a call to
`lookup_value`. The latter is the most likely scenario for advanced
user-defined macros that work together.
"""
if type(tree) is not ast.Call:
return False
# The format is one of:
#
# - direct reference: `(mcpyrate.quotes).xxx`
# - reference by import: `(__import__("mcpyrate.quotes", ...).quotes).xxx`
#
# First check the `xxx` part:
callee = tree.func
if not (type(callee) is ast.Attribute and callee.attr in ("capture_value", "lookup_value")):
return False
# Then the rest:
if not _is_mcpyrate_quotes_reference(callee.value):
return False
# This AST destructuring and constant extraction must match the format
# of the argument lists produced by the quasiquote system for calls to
# `capture_value` and `lookup_value`.
if callee.attr == "capture_value": # the call is `capture_value(..., name)`
name_node = tree.args[1]
assert type(name_node) is ast.Constant and type(name_node.value) is str
return (name_node.value, None) # the value hasn't been captured yet
elif callee.attr == "lookup_value": # the call is `lookup_value(key)`
key_node = tree.args[0]
name_node, frozen_value_node = key_node.elts
assert type(name_node) is ast.Constant and type(name_node.value) is str
assert type(frozen_value_node) is ast.Constant and type(frozen_value_node.value) is bytes
return (name_node.value, frozen_value_node.value)
assert False # cannot happen
def is_captured_macro(tree):
"""Just like `is_captured_value`, but detect a hygienically captured macro instead.
To detect a hygienically captured *run-time value*, use `is_captured_value` instead.
Return value:
- On no match, return `False`.
- On match, return a tuple `(name, unique_name, frozen_macro)`, where:
- `name` (str) is the name of the macro, as it appeared in the bindings
of the expander instance it was captured from.
- `unique_name` (str) is `name` with an underscore and UUID appended,
to make it unique. This is the name the macro will be injected as
into the expander's global bindings table.
(By unique, we mean "universally unique anywhere for approximately
the next one thousand years"; see `mcpyrate.gensym`, which links to
the UUID spec used by the implementation.)
- `frozen_macro` is a `bytes` object that stores a reference to the
frozen macro function as opaque binary data.
The `bytes` object can be decoded by passing the whole return value as `key`
to `lookup_macro`. That function will decode the data, inject the macro into
the expander's global bindings table (if not already there), and give you an
`ast.Name` node whose `id` attribute contains the unique name (str), just as
if the hygienic reference was decoded normally at macro expansion time.
Then, once the injection has taken place, you can obtain the actual macro
function object by calling `expander.isbound(id)`.
**NOTE**:
Stages in the life of a hygienically captured *macro* in `mcpyrate` are as follows.
Note that unlike `capture_value`, a call to `capture_macro` never appears in the AST.
1. When the surrounding `q` expands, it first expands any unquotes nested
within it, but only those where the quote level hits zero. The `h[]` is
converted into a `Capture` AST marker; see the `h` operator for details.
2. Then, still while the surrounding `q` expands, `q` compiles quasiquote
markers. A `Capture` marker for a macro, in particular, triggers an
immediate call to the function `capture_macro`. The result is an AST
representing a call to the function `lookup_macro`. This gets injected
into the AST produced by `q`.
3. When the use site of `q` reaches run time, the `lookup_macro` runs,
injecting the macro (under its unique name) into the expander's global
bindings table. The `lookup_macro` call replaces itself with an `ast.Name`
whose `id` attribute contains the unique name of the macro.
4. In typical usage, that use site of `q` is inside the implementation
of some user-defined macro. Upon further macro expansion of *that macro's
use site*, the expander finds the now-bound unique name of the macro, and
proceeds to expand that macro.
So in the macro expansion of `q`, we have a call to `lookup_macro`
representing the hygienically captured macro. But this disappears after
a very brief window of time, namely when the use site of `q` reaches run
time. Thus, this function likely has much fewer use cases than
`is_captured_value`, but is provided for completeness.
(The point of hygienic macro capture is that a macro can safely return a further
macro invocation, and guarantee that this will invoke the intended macro - without
requiring the user to import that other macro, and without being forced to expand
it away before returning from the original macro.)
"""
if type(tree) is not ast.Call:
return False
callee = tree.func
if not (type(callee) is ast.Attribute and callee.attr == "lookup_macro"):
return False
if not _is_mcpyrate_quotes_reference(callee.value):
return False
# This AST destructuring and constant extraction must match the format
# of the argument lists produced by the quasiquote system for calls to
# `lookup_macro`.
key_node = tree.args[0] # the call is `lookup_macro(key)`
name_node, unique_name_node, frozen_macro_node = key_node.elts
assert type(name_node) is ast.Constant and type(name_node.value) is str
assert type(unique_name_node) is ast.Constant and type(unique_name_node.value) is str
assert type(frozen_macro_node) is ast.Constant and type(frozen_macro_node.value) is bytes
return (name_node.value, unique_name_node.value, frozen_macro_node.value)
def _is_mcpyrate_quotes_reference(tree):
"""Detect whether `tree` is a reference to `mcpyrate.quotes`.
This matches the ASTs corresponding to:
- direct reference: `mcpyrate.quotes`
- reference by import: `__import__("mcpyrate.quotes", ...).quotes`
Note `__import__` of a dotted module name returns the top-level module,
so we have the name `quotes` appear twice in different places.
See `_mcpyrate_quotes_attr` and `mcpyrate.coreutils._mcpyrate_attr`.
"""
if not (type(tree) is ast.Attribute and tree.attr == "quotes"):
return False
moduleref = tree.value
if type(moduleref) is ast.Name and moduleref.id == "mcpyrate":
return "direct" # ok, direct reference
elif (type(moduleref) is ast.Call and type(moduleref.func) is ast.Name and
moduleref.func.id == "__import__" and type(moduleref.args[0]) is ast.Constant and
moduleref.args[0].value == "mcpyrate.quotes"):
return "import" # ok, reference by import
else:
return False
# --------------------------------------------------------------------------------
# The quasiquote compiler and uncompiler.
def astify(x, expander=None): # like `macropy`'s `ast_repr`
"""Quasiquote compiler. Lift a value into its AST representation, if possible.
When the AST is compiled and run, it will evaluate to `x`.
Note the above implies that if `x` itself is an AST, then this produces
an AST that, when compiled and run, will generate the AST `x`. This is
the mechanism that `q` uses to produce the quoted AST.
If the input is a `list` of ASTs (e.g. body of block mode `q`), the return value
is a single `ast.List` node, with its `elts` taken from the input list
(after recursing into each element).
`expander` is a `BaseMacroExpander` instance, used for detecting macros
inside `Capture` markers. Macros can be hygienically captured only if
an `expander` is provided.
Raises `TypeError` if the lifting fails.
"""
def recurse(x): # second layer just to auto-pass `expander` by closure.
T = type(x)
# Compile the unquote commands.
#
# Minimally, `astify` must support `ASTLiteral`; the others could be
# implemented inside the unquote operators, as `ASTLiteral(ast.Call(...), "expr")`.
# But maybe this approach is cleaner.
if T is Unquote: # `u[]`
# We want to generate an AST that compiles to the *value* of `x.body`,
# evaluated at the use site of `q`. But when the `q` expands, it is
# too early. We must `astify` *at the use site* of `q`. So use an
# `ast.Call` to delay until run-time, and pass in `x.body` as-is.
return ast.Call(_mcpyrate_quotes_attr("astify"), [x.body], [])
elif T is LiftSourcecode: # `n[]`
# Delay the identifier lifting, so it runs at the use site of `q`,
# where the actual value of `x.body` becomes available.
return ast.Call(_mcpyrate_quotes_attr("lift_sourcecode"),
[x.body,
ast.Constant(value=x.filename)],
[])
elif T is ASTLiteral: # `a`
# Pass through this subtree as-is, but apply a run-time typecheck,
# as well as some special run-time handling for `list`s of AST nodes.
return ast.Call(_mcpyrate_quotes_attr("ast_literal"),
[x.body,
ast.Constant(value=x.syntax)],
[])
elif T is ASTList: # `s[]`
return ast.Call(_mcpyrate_quotes_attr("ast_list"), [x.body], [])
elif T is ASTTuple: # `t[]`
return ast.Call(_mcpyrate_quotes_attr("ast_tuple"), [x.body], [])
elif T is Capture: # `h[]`
if expander and type(x.body) is ast.Name:
function = expander.isbound(x.body.id)
if function:
# Hygienically capture a macro. We do this immediately,
# during the expansion of `q`, because the value we want to
# store, i.e. the macro function, is available only at
# macro-expansion time.
#
# This allows macros in scope at the use site of `q` to be
# hygienically propagated out to the use site of the macro
# that used `q`. So you can write macros that `q[h[macroname][...]]`,
# and `macroname` doesn't have to be macro-imported wherever
# that code gets spliced in.
return capture_macro(function, x.body.id)
# Hygienically capture a garden variety run-time value.
# At the use site of q[], this captures the value, and rewrites itself
# into an AST that represents a lookup. At the use site of the macro
# that used q[], that code runs, and looks up the captured value.
return ast.Call(_mcpyrate_quotes_attr("capture_value"),
[x.body,
ast.Constant(value=x.name)],
[])
# Builtin types. Mainly support for `u[]`, but also used by the
# general case for AST node fields that contain bare values.
elif T in (int, float, str, bytes, bool, type(None), type(...)):
return ast.Constant(value=x)
elif T is list:
return ast.List(elts=list(recurse(elt) for elt in x))
elif T is tuple:
return ast.Tuple(elts=list(recurse(elt) for elt in x))
elif T is dict:
return ast.Dict(keys=list(recurse(k) for k in x.keys()),
values=list(recurse(v) for v in x.values()))
elif T is set:
return ast.Set(elts=list(recurse(elt) for elt in x))
# We must support at least the `Done` AST marker, so that things like
# coverage dummy nodes and expanded name macros can be astified.
# (Note we support only exactly `Done`, not arbitrary descendants.)
elif T is Done:
fields = [ast.keyword(a, recurse(b)) for a, b in ast.iter_fields(x)]
# We have imported `Done`, so we can refer to it as `mcpyrate.quotes.Done`.
node = ast.Call(_mcpyrate_quotes_attr("Done"),
[],
fields)
return node
# General case.
elif isinstance(x, ast.AST):
# TODO: Add support for astifying general ASTMarkers?
# Otherwise the same as regular AST node, but need to refer to the
# module it is defined in, and we don't have everything in scope here.
if isinstance(x, ASTMarker):
raise TypeError(f"Cannot astify internal AST markers, got {unparse(x)}")
# The magic is in the Call. Take apart the input AST, and construct a
# new AST, that (when compiled and run) will re-generate the input AST.
#
# We refer to the stdlib `ast` module as `mcpyrate.quotes.ast` to avoid
# name conflicts at the use site of `q[]`.
fields = [ast.keyword(a, recurse(b)) for a, b in ast.iter_fields(x)]
node = ast.Call(ast.Attribute(value=_mcpyrate_quotes_attr("ast"),
attr=x.__class__.__name__),
[],
fields)
# Copy source location info for correct coverage reporting of a quoted block.
#
# The location info we fill in here is for the use site of `q`, which is
# typically inside a macro definition. Coverage for a quoted line of code
# means that the expansion of the quote contains input from that line.
# It says nothing about the run-time behavior of that code.
#
# Running the AST produced by the quote re-produces the input AST, which is
# indeed the whole point of quoting stuff. The AST is re-produced **without
# any source location info**. The fact that *this* location info is missing,
# on purpose, is the magic that allows the missing location fixer to fill
# the correct location info at the final use site, i.e. the use site of the
# macro that used `q`.
node = ast.copy_location(node, x)
return node
raise TypeError(f"Don't know how to astify {repr(x)}")
return recurse(x)
def unastify(tree):
"""Quasiquote uncompiler. Approximate inverse of `astify`.
`tree` must have been produced by `astify`. Otherwise raises `TypeError`.
This turns an "astified" AST, that represents code to construct a run-time
AST value, back into a direct AST. So in a sense, `unastify` is a top-level
unquote operator.
Note subtle difference in meaning to `u[]`. The `u[]` operator interpolates
a value from outside the quote context into the quoted representation - so
that the value actually becomes quoted! - whereas `unastify` inverts the
quote operation.
Note also that `astify` compiles unquote AST markers into ASTs for calls to
the run-time parts of the unquote operators. `unastify` uncompiles those
calls back into the corresponding AST markers. That's the best we can do;
the only context that has the user-provided names (where the unquoted data
comes from) in scope is each particular use site of `q`, at its run time.
The use case of `unastify` is to transform a quoted AST at macro expansion
time when the extra AST layer added by `astify` is still present. The
recipe is `unastify`, process just like any AST, then quote again.
(`expands` and `expand1s` in `mcpyrate.metatools` are examples of this.)
If you just want to macro-expand a quoted AST in the REPL, see the `expand`
family of macros. Prefer the `r` variants; they expand at run time, so
you'll get the final AST with the actual unquoted values spliced in.
"""
# CAUTION: in `unastify`, we implement only what we minimally need.
our_module_globals = globals()
def lookup_thing(dotted_name):
if not dotted_name.startswith("mcpyrate.quotes"):
raise NotImplementedError(f"Don't know how to look up {repr(dotted_name)}")
path = dotted_name.split(".")
if not all(component.isidentifier() for component in path):
raise NotImplementedError(f"Dotted name {repr(dotted_name)} contains at least one non-identifier component")
if len(path) < 3:
raise NotImplementedError(f"Dotted name {repr(dotted_name)} has fewer than two dots (expected 'mcpyrate.quotes.something')")
name_of_thing = path[2]
thing = our_module_globals[name_of_thing]
if len(path) > 3:
for attrname in path[3:]:
thing = getattr(thing, attrname)
return thing
T = type(tree)
if T is ast.Constant:
return tree.value
# Support machinery for `Call` AST node. This serendipitously supports also
# *args and **kwargs, because as of Python 3.6 those appear in `args` and
# `keywords`, and `Starred` needs no special support here.
elif T is list:
return [unastify(elt) for elt in tree]
elif T is ast.keyword:
return tree.arg, unastify(tree.value)
elif T is ast.List:
return [unastify(elt) for elt in tree.elts]
elif T is ast.Tuple:
return tuple(unastify(elt) for elt in tree.elts)
elif T is ast.Dict:
return {unastify(k): unastify(v) for k, v in zip(tree.keys, tree.values)}
elif T is ast.Set:
return {unastify(elt) for elt in tree.elts}
elif T is ast.Call:
dotted_name = unparse(tree.func)
# Drop the run-time part of `q`, if present. This is added by `q` itself,
# not `astify`, but `unastify` is usually applied to the output of `q`.
if dotted_name == "mcpyrate.quotes.splice_ast_literals": # `q[]`
body = tree.args[0]
return unastify(body)
# Even though the unquote operators compile into calls, `unastify`
# must not apply their run-time parts, because it's running in the
# wrong context. Those only work properly at run time, and they
# must run at the use site of `q`, where the user-provided names
# (where the unquoted data comes from) will be in scope.
#
# So we undo what `astify` did, converting the unquote calls back into
# the corresponding AST markers.
elif dotted_name == "mcpyrate.quotes.astify": # `u[]`
body = tree.args[0]
return Unquote(body)
elif dotted_name == "mcpyrate.quotes.lift_sourcecode": # `n[]`
body, filename = tree.args[0], tree.args[1].value
return LiftSourcecode(body, filename)
elif dotted_name == "mcpyrate.quotes.ast_literal": # `a[]`
body, syntax = tree.args[0], tree.args[1].value
return ASTLiteral(body, syntax)
elif dotted_name == "mcpyrate.quotes.ast_list": # `s[]`
body = tree.args[0]
return ASTList(body)
elif dotted_name == "mcpyrate.quotes.ast_tuple": # `t[]`
body = tree.args[0]
return ASTTuple(body)
elif dotted_name == "mcpyrate.quotes.capture_value": # `h[]` (run-time value)
body, name = tree.args[0], tree.args[1].value
return Capture(body, name)
elif dotted_name == "mcpyrate.quotes.lookup_macro": # `h[]` (macro)
# `capture_macro` is done and gone by the time we get here.
# `astify` has generated an `ast.Call` to `lookup_macro`.
#
# To make the this work properly even across process boundaries,
# we cannot simply run the `lookup_macro`. It injects the binding
# once, and then becomes an inert lexical name (pointing to that
# binding) - so that strategy only works inside the same process.
#
# We can't just leave the `lookup_macro` call in the AST, either,
# since that doesn't make any sense when the tree is later sent
# to `astify` to compile it again (we don't want another `ast.Call`
# layer around it).
#
# So we need something that triggers `capture_macro` when the
# result is astified again.
#
# Hence, we uncompile the `lookup_macro` into a `Capture` marker.
#
# But if the astified tree comes from an earlier run (in another
# Python process), the original macro name might not be in the
# expander's bindings any more.
#
# So we inject the captured macro into the expander's global
# bindings table now (by calling `lookup_macro`), and make the
# uncompiled capture command capture that macro.
#
# This does make the rather mild assumption that our input tree
# will be astified again in the same Python process, in order for
# the uncompiled capture to succeed when `astify` compiles it.
key = tree.args[0]
assert type(key) is ast.Tuple
assert all(type(elt) is ast.Constant for elt in key.elts)
name, unique_name, frozen_macro = [elt.value for elt in key.elts]
uniquename_node = lookup_macro((name, unique_name, frozen_macro))
return Capture(uniquename_node, name)
else:
# General case: an astified AST node.
callee = lookup_thing(dotted_name)
args = unastify(tree.args)
kwargs = {k: v for k, v in unastify(tree.keywords)}
node = callee(*args, **kwargs)
node = ast.copy_location(node, tree)
return node
raise TypeError(f"Don't know how to unastify {unparse_with_fallbacks(tree, debug=True, color=True)}")
# --------------------------------------------------------------------------------
# Quasiquote macros
#
# These operators are named after Qu'nasth, the goddess of quasiquotes in high-tech-elven mythology.
_quotelevel = NestingLevelTracker()
def _expand_quasiquotes(tree, expander):
"""Expand quasiquote macros only."""
# Use a second expander instance, with different bindings. Copy only the
# bindings of the quasiquote macros from the main `expander`, accounting
# for possible as-imports. This second expander won't even see other macros,
# thus leaving them alone.
bindings = extract_bindings(expander.bindings, q, u, n, a, s, t, h)
return MacroExpander(bindings, expander.filename).visit(tree)
# TODO: maybe make the rest of this a method of `MacroExpander`, and only wrap with `QuasiquoteSearchDone` here?
def _replace_tree_in_macro_invocation(invocation, newtree):
"""Helper function for handling nested quasiquotes.
Output a new invocation of the same macro, but wrapped in `QuasiquoteSearchDone`,
and with the `tree` inside replaced by `newtree`.
`expr` and `block` modes are supported; this is autodetected from `invocation`.
"""
new_invocation = copy.copy(invocation)
if type(new_invocation) is ast.Subscript:
if sys.version_info >= (3, 9, 0): # Python 3.9+: no ast.Index wrapper
new_invocation.slice = newtree
else:
new_invocation.slice = copy.copy(invocation.slice)
new_invocation.slice.value = newtree
elif type(new_invocation) is ast.With:
new_invocation.body = newtree
else:
raise NotImplementedError
return QuasiquoteSearchDone(body=new_invocation)
def q(tree, *, syntax, expander, invocation, **kw):
"""[syntax, expr/block] quasiquote. Lift code into its AST representation."""
if syntax not in ("expr", "block"):
raise SyntaxError("`q` is an expr and block macro only")
with _quotelevel.changed_by(+1):
tree = _expand_quasiquotes(tree, expander) # expand any unquotes corresponding to this level first
if _quotelevel.value > 1: # nested inside an outer quote?
# TODO: Implications when in block mode and not the only context manager in the `with`?
# TODO: Probably doesn't work in that case. Document that `q`, when used,
# TODO: should be the only ctxmgr in that particular `with`.
return _replace_tree_in_macro_invocation(invocation, tree)
tree = delete_markers(tree, cls=QuasiquoteSearchDone)
tree = astify(tree, expander) # Magic part of `q`. Supply `expander` for `h[macro]` detection.
# `astify` should compile the unquote command markers away, and `SpliceNodes`
# markers only spring into existence when the run-time part of `a` runs
# (for communication with the run-time part of the surrounding `q`).
# So at this point, there should be no quasiquote markers in `tree`.
try:
check_no_markers_remaining(tree, filename=expander.filename, cls=QuasiquoteMarker)
except MacroExpansionError as err:
raise RuntimeError("`q`: internal error in quasiquote system") from err
# `a` introduces the need to splice any interpolated `list`s of ASTs at
# run time into the surrounding context (which is only available to the
# surrounding `q`). Inject a handler for that.
#
# Block mode `a` always produces a `list` of statement AST nodes.
#
# For expression mode `a`, a `list` of expression AST nodes is valid
# e.g. in a function call argument position, to splice the list into
# positional arguments of the `Call`.
tree = ast.Call(_mcpyrate_quotes_attr("splice_ast_literals"),
[tree,
ast.Constant(value=expander.filename)],
[])
if syntax == "block":
# Generate AST to perform the assignment for `with q as quoted`.
target = kw["optional_vars"] # List, Tuple, Name
if target is None:
raise SyntaxError("`q` (block mode) requires an asname to receive the quoted code")
if type(target) is not ast.Name:
raise SyntaxError(f"`q` (block mode) expected a single asname, got {unparse(target)}")
# This `Assign` runs at the use site of `q`, it's not part of the
# quoted code block. The statement nodes are packed into a `List` node,
# because the original `tree` was a `list` of AST nodes (because block mode),
# and we ran it through `astify`.
tree = ast.Assign([target], tree)
return tree
def u(tree, *, syntax, expander, invocation, **kw):
"""[syntax, expr] unquote. Splice a simple value into a quasiquote.
The value is lifted into an AST that re-constructs that value.
"""
if syntax != "expr":
raise SyntaxError("`u` is an expr macro only")
if _quotelevel.value < 1:
raise SyntaxError("`u` encountered while quotelevel < 1")
with _quotelevel.changed_by(-1):
tree = expander.visit_recursively(tree)
if _quotelevel.value > 0:
return _replace_tree_in_macro_invocation(invocation, tree)
return Unquote(tree)
def n(tree, *, syntax, expander, invocation, **kw):
"""[syntax, expr] name-unquote. Parse a string, as Python source code, into an AST.
With `n[]`, you can e.g. compute a name (e.g. by `mcpyrate.gensym`) for a
variable and then use that variable in quasiquoted code - also as an assignment
target. Things like `n[f"self.{x}"]` and `n[f"kitties[{j}].paws[{k}].claws"]`
are also valid.
The use case this operator was designed for is variable access (identifiers,
attributes, subscripts, in any syntactically allowed nested combination) with
computed names, but who knows what else can be done with it?
The correct `ctx` is filled in automatically by the macro expander later.
See also `n[]`'s sister, `a`.
Generalized from `macropy`'s `n`, which converts a string into a variable access.
"""
if syntax != "expr":
raise SyntaxError("`n` is an expr macro only")
if _quotelevel.value < 1:
raise SyntaxError("`n` encountered while quotelevel < 1")
with _quotelevel.changed_by(-1):
tree = expander.visit_recursively(tree)
if _quotelevel.value > 0:
return _replace_tree_in_macro_invocation(invocation, tree)
return LiftSourcecode(tree, expander.filename)
def a(tree, *, syntax, expander, invocation, **kw):
"""[syntax, expr/block] ast-unquote. Splice an AST into a quasiquote.
**Expression mode**::
a[expr]
The expression `expr` must evaluate, at run time at the use site of the
surrounding `q`, to an *expression* AST node, an AST marker containing
an *expression* AST node in its `body` attribute, or in certain contexts
where that is valid in the AST, a `list` of such values.
Typically, `expr` is the name of a variable that holds such data, but
it doesn't have to be; any expression that evaluates to acceptable data
is fine.
An example of a context that accepts a `list` of expression nodes is the
positional arguments of a function call. `q[myfunc(a[args])]` will splice
the `list` `args` into the positional arguments of the `Call`. Of course,
ast-unquoting single positional arguments such as `q[myfunc(a[arg1], a[arg2])]`
is also fine.
**Block mode**::
with a:
stmts
...
Each `stmts` must evaluate, at run time at the use site of the surrounding `q`,
to a *statement* AST node, an AST marker containing a *statement* AST node in
its `body` attribute, or a `list` of such values.
Typically, `stmts`t is the name of a variable that holds such data, but
it doesn't have to be; any expression that evaluates to acceptable data
is fine.
This expands as if all those statements appeared in the `with` body,
in the order listed.
The `with` body must not contain anything else.
See also `a`'s sister, `n[]`.
"""
if syntax not in ("expr", "block"):
raise SyntaxError("`a` is an expr and block macro only")
if _quotelevel.value < 1:
raise SyntaxError("`a` encountered while quotelevel < 1")
if syntax == "block" and kw['optional_vars'] is not None:
raise SyntaxError("`a` (block mode) does not take an asname")
with _quotelevel.changed_by(-1):
tree = expander.visit_recursively(tree)
if _quotelevel.value > 0:
# TODO: implications for block mode?
return _replace_tree_in_macro_invocation(invocation, tree)
if syntax == "expr":
return ASTLiteral(tree, syntax)
assert syntax == "block"
# Block mode: strip `Expr` wrappers.
#
# When `with a` expands, the elements of the list `tree` are `Expr` nodes
# containing expressions. Typically each expression is just a `Name`
# node, or in general, any expression that at run time evaluates to
# a statement AST node, or to an iterable of statement AST nodes.
#
# We want to return, as an AST, a list of those expressions for processing
# later.
#
# The value of the expressions become available when the use site of
# `q` reaches run time. Because each expression may refer to a list of
# AST nodes, `q` injects a call to a postprocessor that, at run time
# (once the values are available), will flatten the quoted AST structure.
out = []
for stmt in tree:
if type(stmt) is not ast.Expr:
raise SyntaxError("`a` (block mode): each item in the body must be an expression (that at run time evaluates to a statement node or iterable of statement nodes)")
out.append(stmt.value)
return ASTLiteral(ast.List(elts=out), syntax)
def s(tree, *, syntax, expander, invocation, **kw):
"""[syntax, expr] ast-list-unquote. Splice an iterable of ASTs, as an `ast.List`, into a quasiquote."""
if syntax != "expr":
raise SyntaxError("`s` is an expr macro only")
if _quotelevel.value < 1:
raise SyntaxError("`s` encountered while quotelevel < 1")
with _quotelevel.changed_by(-1):
tree = expander.visit_recursively(tree)
if _quotelevel.value > 0:
return _replace_tree_in_macro_invocation(invocation, tree)
return ASTList(tree)
def t(tree, *, syntax, expander, invocation, **kw):
"""[syntax, expr] ast-tuple-unquote. Splice an iterable of ASTs, as an `ast.Tuple`, into a quasiquote."""
if syntax != "expr":
raise SyntaxError("`t` is an expr macro only")
if _quotelevel.value < 1:
raise SyntaxError("`t` encountered while quotelevel < 1")
with _quotelevel.changed_by(-1):
tree = expander.visit_recursively(tree)
if _quotelevel.value > 0:
return _replace_tree_in_macro_invocation(invocation, tree)
return ASTTuple(tree)
def h(tree, *, syntax, expander, invocation, **kw):
"""[syntax, expr] hygienic-unquote. Splice any value, from the macro definition site, into a quasiquote.
Supports also values that have no meaningful `repr`. The value is captured
at the use site of the surrounding `q`.
The value is frozen into the expanded source code as a pickled blob,
separately at each use site of `h[]`. Thus the value must be picklable,
and each capture will pickle it again.
This is done to ensure the value will remain available, when the
already-expanded code (due to `.pyc` caching) runs again in another
Python process. (In other words, values from "macro expansion time
last week" would not otherwise be available.)
Supports also macros. To hygienically splice a macro invocation,
`h[]` only the macro name.
"""
if syntax != "expr":
raise SyntaxError("`h` is an expr macro only")
if _quotelevel.value < 1:
raise SyntaxError("`h` encountered while quotelevel < 1")
with _quotelevel.changed_by(-1):
name = unparse(tree)
# TODO: This logic never does anything - any correctly placed `h[]`
# is always inside a `q`, which recurses with an expander that
# only knows about the quasiquote macros.
# Expand macros in the unquoted expression. The only case we need to
# look out for is a `@namemacro` if we have a `h[macroname]`. We're
# only capturing it, so don't expand it just yet.
expand = True
if type(tree) is ast.Name:
function = expander.isbound(tree.id)
if function and isnamemacro(function):
expand = False
if expand:
tree = expander.visit_recursively(tree)
if _quotelevel.value > 0:
return _replace_tree_in_macro_invocation(invocation, tree)
return Capture(tree, name)