Updated mcpyrate, now the time is 260 ms, just 13% over mcpy

2020-10-17 10:17:25 -03:00 · 2020-10-17 10:17:25 -03:00 · 5a24d72772
parent 43278717e9
commit 5a24d72772
5 changed files with 98 additions and 32 deletions
--- a/kibot/mcpyrate/dialects.py
+++ b/kibot/mcpyrate/dialects.py
@ -5,10 +5,10 @@ __all__ = ["Dialect",
           "expand_dialects"]

 import ast
-from collections import deque
+import importlib
+import importlib.util
 import re
 from sys import stderr
-import tokenize

 from .coreutils import ismacroimport, get_macros
 from .unparser import unparse_with_fallbacks
@ -154,7 +154,7 @@ class DialectExpander:

        Return value is an AST for the module.
        '''
-        text = _decode_source_content(data)
+        text = importlib.util.decode_source(data)
        text = self.transform_source(text)
        try:
            tree = ast.parse(data, filename=self.filename, mode="exec")
@ -276,9 +276,11 @@ class DialectExpander:
        '''Find the first dialect-import statement by scanning the AST `tree`.

        Transform the dialect-import into `import ...`, where `...` is the absolute
-        module name the dialects are being imported from.
+        module name the dialects are being imported from. As a side effect, import
+        the dialect definition module.

-        As a side effect, import the dialect definition module.
+        Primarily meant to be called with `tree` the AST of a module that
+        uses dialects, but works with any `tree` that has a `body` attribute.

        A dialect-import is a statement of the form::

@ -302,15 +304,6 @@ class DialectExpander:
                                             statement)
        return module_absname, bindings

-
-def _decode_source_content(data):
-    '''Decode a .py source file from bytes to string, parsing the encoding tag like `tokenize`.'''
-    lines = deque(data.split(b"\n"))
-    def readline():
-        return lines.popleft()
-    encoding, lines_read = tokenize.detect_encoding(readline)
-    return data.decode(encoding)
-
 # --------------------------------------------------------------------------------

 def expand_dialects(data, *, filename):
--- a/kibot/mcpyrate/importer.py
+++ b/kibot/mcpyrate/importer.py
@ -8,6 +8,7 @@ import importlib.util
 from importlib.machinery import FileFinder, SourceFileLoader
 import tokenize
 import os
+import pickle
 import sys

 from .core import MacroExpansionError
@ -25,7 +26,6 @@ def source_to_xcode(self, data, path, *, _optimize=-1):
    Intercepts the source to bytecode transformation.
    '''
    tree = expand_dialects(data, filename=path)
-    # tree = ast.parse(data)

    module_macro_bindings = find_macros(tree, filename=path)
    expansion = expand_macros(tree, bindings=module_macro_bindings, filename=path)
@ -61,20 +61,69 @@ def path_xstats(self, path):
    if path in _xstats_cache:
        return _xstats_cache[path]

-    # TODO: This can be slow, the point of `.pyc` is to avoid the parse-and-compile cost.
-    # TODO: We do save the macro-expansion cost, though, and that's likely much more expensive.
+    stat_result = os.stat(path)
+
+    # Try for cached macro-import statements for `path` to avoid the parse cost.
    #
-    # If that becomes an issue, maybe make our own cache file storing the
-    # macro-imports found in source file `path`, store it in the pyc
-    # directory, and invalidate it based on the mtime of `path` (only)?
-    with tokenize.open(path) as sourcefile:
-        tree = ast.parse(sourcefile.read())
+    # This is a single node in the dependency graph; the result depends only
+    # on the content of the source file `path` itself. So we invalidate the
+    # macro-import statement cache for `path` based on the mtime of `path` only.
+    #
+    # For a given source file `path`, the `.pyc` sometimes becomes newer than
+    # the macro-dependency cache. This is normal. Unlike the bytecode, the
+    # macro-dependency cache only needs to be refreshed when the text of the
+    # source file `path` changes.
+    #
+    # So if some of the macro-dependency source files have changed (so `path`
+    # must be re-expanded and recompiled), but `path` itself hasn't, the text
+    # of the source file `path` will still have the same macro-imports it did
+    # last time.
+    #
+    pycpath = importlib.util.cache_from_source(path)
+    if pycpath.endswith(".pyc"):
+        pycpath = pycpath[:-4]
+    importcachepath = pycpath + ".mcpyrate.pickle"
+    try:
+        cache_valid = False
+        with open(importcachepath, "rb") as importcachefile:
+            data = pickle.load(importcachefile)
+        if data["st_mtime_ns"] == stat_result.st_mtime_ns:
+            cache_valid = True
+    except Exception:
+        pass

-    macroimports = [stmt for stmt in tree.body if ismacroimport(stmt)]
-    dialectimports = [stmt for stmt in tree.body if ismacroimport(stmt, magicname="dialects")]
-    macro_and_dialect_imports = macroimports + dialectimports
-    has_relative_macroimports = any(macroimport.level for macroimport in macro_and_dialect_imports)
+    if cache_valid:
+        macro_and_dialect_imports = data["macroimports"] + data["dialectimports"]
+        has_relative_macroimports = data["has_relative_macroimports"]
+    else:
+        # This can be slow, the point of `.pyc` is to avoid the parse-and-compile cost.
+        # We do save the macro-expansion cost, though, and that's likely much more expensive.
+        #
+        # TODO: Dialects may inject imports in the template that the dialect transformer itself
+        # TODO: doesn't need. How to detect those? Regex-search the source text?
+        with tokenize.open(path) as sourcefile:
+            tree = ast.parse(sourcefile.read())
+        macroimports = [stmt for stmt in tree.body if ismacroimport(stmt)]
+        dialectimports = [stmt for stmt in tree.body if ismacroimport(stmt, magicname="dialects")]

+        macro_and_dialect_imports = macroimports + dialectimports
+        has_relative_macroimports = any(macroimport.level for macroimport in macro_and_dialect_imports)
+
+        # macro-import statement cache goes with the .pyc
+        if not sys.dont_write_bytecode:
+            data = {"st_mtime_ns": stat_result.st_mtime_ns,
+                    "macroimports": macroimports,
+                    "dialectimports": dialectimports,
+                    "has_relative_macroimports": has_relative_macroimports}
+            try:
+                with open(importcachepath, "wb") as importcachefile:
+                    pickle.dump(data, importcachefile)
+            except Exception:
+                pass
+
+    # The rest of the lookup process depends on the configuration of the currently
+    # running Python, particularly its `sys.path`, so we do it dynamically.
+    #
    # TODO: some duplication with code in mcpyrate.coreutils.get_macros, including the error messages.
    package_absname = None
    if has_relative_macroimports:
@ -96,7 +145,6 @@ def path_xstats(self, path):
        stats = path_xstats(self, origin)
        mtimes.append(stats['mtime'])

-    stat_result = os.stat(path)
    mtime = stat_result.st_mtime_ns * 1e-9
    # size = stat_result.st_size
    mtimes.append(mtime)
--- a/kibot/mcpyrate/repl/console.py
+++ b/kibot/mcpyrate/repl/console.py
@ -22,6 +22,7 @@ import code
 import textwrap

 from .. import __version__ as mcpyrate_version
+from ..core import MacroExpansionError
 from ..debug import format_bindings
 from ..expander import find_macros, MacroExpander, global_postprocess
 from .utils import get_makemacro_sourcecode
@ -110,7 +111,7 @@ class MacroConsole(code.InteractiveConsole):

            tree = ast.Interactive(tree.body)
            code = compile(tree, filename, symbol, self.compile.compiler.flags, 1)
-        except (OverflowError, SyntaxError, ValueError):
+        except (OverflowError, SyntaxError, ValueError, MacroExpansionError):
            self.showsyntaxerror(filename)
            return False  # erroneous input
        except ModuleNotFoundError as err:  # during macro module lookup
@ -138,5 +139,10 @@ class MacroConsole(code.InteractiveConsole):
        self._macro_bindings_changed = False

        for asname, function in self.expander.bindings.items():
-            source = f"from {function.__module__} import {function.__qualname__} as {asname}"
-            self._internal_execute(source)
+            if not function.__module__:    # Macros defined in the REPL have `__module__=None`.
+                continue
+            try:
+                source = f"from {function.__module__} import {function.__qualname__} as {asname}"
+                self._internal_execute(source)
+            except (ModuleNotFoundError, ImportError):
+                pass
--- a/kibot/mcpyrate/repl/iconsole.py
+++ b/kibot/mcpyrate/repl/iconsole.py
@ -194,6 +194,8 @@ class IMcpyrateExtension:
                                   silent=True)

        for asname, function in self.macro_transformer.expander.bindings.items():
+            if not function.__module__:
+                continue
            commands = ["%%ignore_importerror",
                        f"from {function.__module__} import {function.__qualname__} as {asname}"]
            internal_execute("\n".join(commands))
--- a/kibot/mcpyrate/repl/macropython.py
+++ b/kibot/mcpyrate/repl/macropython.py
@ -4,18 +4,21 @@

 # TODO: Currently tested in CPython 3.6, and PyPy3 7.3.0 (Python 3.6). Test in 3.7+.

+import argparse
+import atexit
 from importlib import import_module
 from importlib.util import resolve_name, module_from_spec
-import pathlib
 import os
+import pathlib
 import sys
-import argparse

 from ..coreutils import relativize

 from .. import activate  # noqa: F401

 __version__ = "3.0.0"
+
+_config_dir = "~/.config/mcpyrate"
 _macropython_module = None  # sys.modules doesn't always seem to keep it, so stash it locally too.

 def import_module_as_main(name, script_mode):
@ -153,6 +156,18 @@ def main():
        readline.set_completer(rlcompleter.Completer(namespace=repl_locals).complete)
        readline.parse_and_bind("tab: complete")  # PyPy ignores this, but not needed there.

+        config_dir = pathlib.Path(_config_dir).expanduser().resolve()
+        try:
+            readline.read_history_file(config_dir / "macropython_history")
+        except FileNotFoundError:
+            pass
+
+        def save_history():
+            config_dir.mkdir(parents=True, exist_ok=True)
+            readline.set_history_length(1000)
+            readline.write_history_file(config_dir / "macropython_history")
+        atexit.register(save_history)
+
        # Add CWD to import path like the builtin interactive console does.
        if sys.path[0] != "":
            sys.path.insert(0, "")
@ -209,6 +224,8 @@ def main():
        # if not spec:
        #     raise ImportError(f"Not a Python module: '{opts.filename}'"
        # module = module_from_spec(spec)
+        # TODO: if we use this approach, we should first initialize parent packages.
+        # sys.modules[module.__name__] = module
        # spec.loader.exec_module(module)

        root_path, relative_path = relativize(opts.filename)