Fragment: Helping Learners Read Code

Picking up on Helping Learners Look at Their Code, where I showed how we can use the pyflowchart Python package to render a flowchart equivalent of code in a notebook code cell using the flowchart.js package, I started wondering about also generating text based descriptions of simple fragements of code. I half expected there to be a simple package out there that would do this — a Python code summariser, or human radable text description generator — but couldn’t find anything offhand.

So as a a really quick proof of concept knocked up over a coffee break, here are some sketches of a really naive way in to parsing some simple Python code (and that’s all we need to handle…) on the way to creating a simple human readable text version of it.

#  Have a look at the AST of some Python code

# Pretty print AST
#https://github.com/clarketm/pprintast
#%pip install pprintast

from pprintast import pprintast as ppast # OR: from pprintast import ppast

# 2. pretty print AST from a "string".
exp = '''
import os, math
import pandas as pd
from pprintast import pprintast2 as ppast
def test_fn(a, b=1, c=2):
    """Add two numbers"""
    out = a+b
    print(out)
    return out
def test_fn2(a, b=1):
    out = a+b
    if a>b:
        print(a)
    else:
        print(b)
    print(out)
'''

ppast(exp)

This gives a pretty printed output that lets us review the AST:

Module(body=[
    Import(names=[
        alias(name='os', asname=None),
        alias(name='math', asname=None),
      ]),
    Import(names=[
        alias(name='pandas', asname='pd'),
      ]),
    ImportFrom(module='pprintast', names=[
        alias(name='pprintast2', asname='ppast'),
      ], level=0),
    FunctionDef(name='test_fn', args=arguments(posonlyargs=[], args=[
        arg(arg='a', annotation=None, type_comment=None),
        arg(arg='b', annotation=None, type_comment=None),
        arg(arg='c', annotation=None, type_comment=None),
      ], vararg=None, kwonlyargs=[], kw_defaults=[], kwarg=None, defaults=[
        Constant(value=1, kind=None),
        Constant(value=2, kind=None),
      ]), body=[
        Expr(value=Constant(value='Add two numbers', kind=None)),
        Assign(targets=[
            Name(id='out', ctx=Store()),
          ], value=BinOp(left=Name(id='a', ctx=Load()), op=Add(), right=Name(id='b', ctx=Load())), type_comment=None),
        Expr(value=Call(func=Name(id='print', ctx=Load()), args=[
            Name(id='out', ctx=Load()),
          ], keywords=[])),
        Return(value=Name(id='out', ctx=Load())),
      ], decorator_list=[], returns=None, type_comment=None),
    FunctionDef(name='test_fn2', args=arguments(posonlyargs=[], args=[
        arg(arg='a', annotation=None, type_comment=None),
        arg(arg='b', annotation=None, type_comment=None),
      ], vararg=None, kwonlyargs=[], kw_defaults=[], kwarg=None, defaults=[
        Constant(value=1, kind=None),
      ]), body=[
        Assign(targets=[
            Name(id='out', ctx=Store()),
          ], value=BinOp(left=Name(id='a', ctx=Load()), op=Add(), right=Name(id='b', ctx=Load())), type_comment=None),
        If(test=Compare(left=Name(id='a', ctx=Load()), ops=[
            Gt(),
          ], comparators=[
            Name(id='b', ctx=Load()),
          ]), body=[
            Expr(value=Call(func=Name(id='print', ctx=Load()), args=[
                Name(id='a', ctx=Load()),
              ], keywords=[])),
          ], orelse=[
            Expr(value=Call(func=Name(id='print', ctx=Load()), args=[
                Name(id='b', ctx=Load()),
              ], keywords=[])),
          ]),
        Expr(value=Call(func=Name(id='print', ctx=Load()), args=[
            Name(id='out', ctx=Load()),
          ], keywords=[])),
      ], decorator_list=[], returns=None, type_comment=None),
  ], type_ignores=[])

We can now parse that into a dict, for example:

#https://www.mattlayman.com/blog/2018/decipher-python-ast/
import re
import ast
from pprint import pprint

# TO DO update generic_visit to capture other nodes
# A NodeVisitor can respond to any type of node in the Python AST.
# To visit a particular type of node, we must implement a method that looks like visit_.
class Analyzer(ast.NodeVisitor):
    def __init__(self):
        self.stats = {"import": [], "from": [], "function":[]}

    def visit_Import(self, node):
        for alias in node.names:
            import_ = {'name':alias.name, 'alias':alias.asname}
            self.stats["import"].append(import_)
        self.generic_visit(node)

    def visit_ImportFrom(self, node):
        imports = {'from': node.module, 'import':[]}
        for alias in node.names:
            imports['import'].append({'name':alias.name, 'as':alias.asname})
        self.stats["from"].append(imports)
        self.generic_visit(node)
    
    def visit_FunctionDef(self, node):
        ret = None
        args = [a.arg for a in node.args.args]
        args2 = [c.value for c in node.args.defaults]
        argvals = [a for a in args]
        for (i,v) in enumerate(args2[::-1] ):
            argvals[-(i+1)] = f"{args[-(i+1)]}={v}"
        for n in node.body:
            if isinstance(n, ast.Return):
                ret = re.sub('^return\s+' , '', ast.get_source_segment(exp, n))
        self.stats["function"].append({'name':node.name, 
                                       'docstring': ast.get_docstring(node),
                                       'returns': ret,
                                       'args': args, 'args2': args2, 'argvals':argvals,
                                       'src':ast.get_source_segment(exp,node)})
        self.generic_visit(node)

    def report(self):
        pprint(self.stats)
 

And that then generates output of the form:

tree = ast.parse(exp)
analyzer = Analyzer()
analyzer.visit(tree)
analyzer.report()

'''
{'from': [{'from': 'pprintast',
           'import': [{'as': 'ppast', 'name': 'pprintast2'}]}],
 'function': [{'args': ['a', 'b', 'c'],
               'args2': [1, 2],
               'argvals': ['a', 'b=1', 'c=2'],
               'docstring': 'Add two numbers',
               'name': 'test_fn',
               'returns': 'out',
               'src': 'def test_fn(a, b=1, c=2):\n'
                      '    """Add two numbers"""\n'
                      '    out = a+b\n'
                      '    print(out)\n'
                      '    return out'},
              {'args': ['a', 'b'],
               'args2': [1],
               'argvals': ['a', 'b=1'],
               'docstring': None,
               'name': 'test_fn2',
               'returns': None,
               'src': 'def test_fn2(a, b=1):\n'
                      '    out = a+b\n'
                      '    if a>b:\n'
                      '        print(a)\n'
                      '    else:\n'
                      '        print(b)\n'
                      '    print(out)'}],
 'import': [{'alias': None, 'name': 'os'},
            {'alias': None, 'name': 'math'},
            {'alias': 'pd', 'name': 'pandas'}]}
'''

It’s not hard to see how we could then convert that to various text sentences, such as:

# N packages are imported directly: os and math without any aliases, pandas with the alias pd
# The ppast package is loaded in from the pprintast module with alias ppast
# Two functions are defined: test_fn, which will add two numbers, and...
# The test_fn function takes two arguments TO DO N required and M optional

It would be trivial to create some magic to wrap all that together, the let user use a block cell magic such as %%summarise_this_code to generate the text description, or play it out using a simple text to speech function.

PS in passing, it’s also worth noting pindent.py (via) which will add #end of block comments at the end of each code block in a Python program. Backup gist: https://gist.github.com/psychemedia/2c3fe0466aca1f760d67d5ca4f6e00b1

Author: Tony Hirst

I'm a Senior Lecturer at The Open University, with an interest in #opendata policy and practice, as well as general web tinkering...

%d bloggers like this: