Picking up on Helping Learners Look at Their Code, where I showed how we can use the pyflowchart Python package to render a flowchart equivalent of code in a notebook code cell using the flowchart.js package, I started wondering about also generating text based descriptions of simple fragements of code. I half expected there to be a simple package out there that would do this — a Python code summariser, or human radable text description generator — but couldn’t find anything offhand.
So as a a really quick proof of concept knocked up over a coffee break, here are some sketches of a really naive way in to parsing some simple Python code (and that’s all we need to handle…) on the way to creating a simple human readable text version of it.
# Have a look at the AST of some Python code
# Pretty print AST
#https://github.com/clarketm/pprintast
#%pip install pprintast
from pprintast import pprintast as ppast # OR: from pprintast import ppast
# 2. pretty print AST from a "string".
exp = '''
import os, math
import pandas as pd
from pprintast import pprintast2 as ppast
def test_fn(a, b=1, c=2):
"""Add two numbers"""
out = a+b
print(out)
return out
def test_fn2(a, b=1):
out = a+b
if a>b:
print(a)
else:
print(b)
print(out)
'''
ppast(exp)
This gives a pretty printed output that lets us review the AST:
Module(body=[
Import(names=[
alias(name='os', asname=None),
alias(name='math', asname=None),
]),
Import(names=[
alias(name='pandas', asname='pd'),
]),
ImportFrom(module='pprintast', names=[
alias(name='pprintast2', asname='ppast'),
], level=0),
FunctionDef(name='test_fn', args=arguments(posonlyargs=[], args=[
arg(arg='a', annotation=None, type_comment=None),
arg(arg='b', annotation=None, type_comment=None),
arg(arg='c', annotation=None, type_comment=None),
], vararg=None, kwonlyargs=[], kw_defaults=[], kwarg=None, defaults=[
Constant(value=1, kind=None),
Constant(value=2, kind=None),
]), body=[
Expr(value=Constant(value='Add two numbers', kind=None)),
Assign(targets=[
Name(id='out', ctx=Store()),
], value=BinOp(left=Name(id='a', ctx=Load()), op=Add(), right=Name(id='b', ctx=Load())), type_comment=None),
Expr(value=Call(func=Name(id='print', ctx=Load()), args=[
Name(id='out', ctx=Load()),
], keywords=[])),
Return(value=Name(id='out', ctx=Load())),
], decorator_list=[], returns=None, type_comment=None),
FunctionDef(name='test_fn2', args=arguments(posonlyargs=[], args=[
arg(arg='a', annotation=None, type_comment=None),
arg(arg='b', annotation=None, type_comment=None),
], vararg=None, kwonlyargs=[], kw_defaults=[], kwarg=None, defaults=[
Constant(value=1, kind=None),
]), body=[
Assign(targets=[
Name(id='out', ctx=Store()),
], value=BinOp(left=Name(id='a', ctx=Load()), op=Add(), right=Name(id='b', ctx=Load())), type_comment=None),
If(test=Compare(left=Name(id='a', ctx=Load()), ops=[
Gt(),
], comparators=[
Name(id='b', ctx=Load()),
]), body=[
Expr(value=Call(func=Name(id='print', ctx=Load()), args=[
Name(id='a', ctx=Load()),
], keywords=[])),
], orelse=[
Expr(value=Call(func=Name(id='print', ctx=Load()), args=[
Name(id='b', ctx=Load()),
], keywords=[])),
]),
Expr(value=Call(func=Name(id='print', ctx=Load()), args=[
Name(id='out', ctx=Load()),
], keywords=[])),
], decorator_list=[], returns=None, type_comment=None),
], type_ignores=[])
We can now parse that into a dict, for example:
#https://www.mattlayman.com/blog/2018/decipher-python-ast/
import re
import ast
from pprint import pprint
# TO DO update generic_visit to capture other nodes
# A NodeVisitor can respond to any type of node in the Python AST.
# To visit a particular type of node, we must implement a method that looks like visit_.
class Analyzer(ast.NodeVisitor):
def __init__(self):
self.stats = {"import": [], "from": [], "function":[]}
def visit_Import(self, node):
for alias in node.names:
import_ = {'name':alias.name, 'alias':alias.asname}
self.stats["import"].append(import_)
self.generic_visit(node)
def visit_ImportFrom(self, node):
imports = {'from': node.module, 'import':[]}
for alias in node.names:
imports['import'].append({'name':alias.name, 'as':alias.asname})
self.stats["from"].append(imports)
self.generic_visit(node)
def visit_FunctionDef(self, node):
ret = None
args = [a.arg for a in node.args.args]
args2 = [c.value for c in node.args.defaults]
argvals = [a for a in args]
for (i,v) in enumerate(args2[::-1] ):
argvals[-(i+1)] = f"{args[-(i+1)]}={v}"
for n in node.body:
if isinstance(n, ast.Return):
ret = re.sub('^return\s+' , '', ast.get_source_segment(exp, n))
self.stats["function"].append({'name':node.name,
'docstring': ast.get_docstring(node),
'returns': ret,
'args': args, 'args2': args2, 'argvals':argvals,
'src':ast.get_source_segment(exp,node)})
self.generic_visit(node)
def report(self):
pprint(self.stats)
And that then generates output of the form:
tree = ast.parse(exp)
analyzer = Analyzer()
analyzer.visit(tree)
analyzer.report()
'''
{'from': [{'from': 'pprintast',
'import': [{'as': 'ppast', 'name': 'pprintast2'}]}],
'function': [{'args': ['a', 'b', 'c'],
'args2': [1, 2],
'argvals': ['a', 'b=1', 'c=2'],
'docstring': 'Add two numbers',
'name': 'test_fn',
'returns': 'out',
'src': 'def test_fn(a, b=1, c=2):\n'
' """Add two numbers"""\n'
' out = a+b\n'
' print(out)\n'
' return out'},
{'args': ['a', 'b'],
'args2': [1],
'argvals': ['a', 'b=1'],
'docstring': None,
'name': 'test_fn2',
'returns': None,
'src': 'def test_fn2(a, b=1):\n'
' out = a+b\n'
' if a>b:\n'
' print(a)\n'
' else:\n'
' print(b)\n'
' print(out)'}],
'import': [{'alias': None, 'name': 'os'},
{'alias': None, 'name': 'math'},
{'alias': 'pd', 'name': 'pandas'}]}
'''
It’s not hard to see how we could then convert that to various text sentences, such as:
# N packages are imported directly: os and math without any aliases, pandas with the alias pd
# The ppast package is loaded in from the pprintast module with alias ppast
# Two functions are defined: test_fn, which will add two numbers, and...
# The test_fn function takes two arguments TO DO N required and M optional
It would be trivial to create some magic to wrap all that together, the let user use a block cell magic such as %%summarise_this_code to generate the text description, or play it out using a simple text to speech function.
PS in passing, it’s also worth noting pindent.py
(via) which will add #end of block comments at the end of each code block in a Python program. Backup gist: https://gist.github.com/psychemedia/2c3fe0466aca1f760d67d5ca4f6e00b1