From 4fdc1ecc57a13edba18af33ca275457ed9f57cfa Mon Sep 17 00:00:00 2001 From: ShaharNaveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Thu, 24 Jul 2025 17:17:04 +0200 Subject: [PATCH] Added `pyclbr` from 3.13.5 --- Lib/pyclbr.py | 314 +++++++++++++++++++++++++++++++++++++++ Lib/test/pyclbr_input.py | 85 +++++++++++ Lib/test/test_pyclbr.py | 272 +++++++++++++++++++++++++++++++++ 3 files changed, 671 insertions(+) create mode 100644 Lib/pyclbr.py create mode 100644 Lib/test/pyclbr_input.py create mode 100644 Lib/test/test_pyclbr.py diff --git a/Lib/pyclbr.py b/Lib/pyclbr.py new file mode 100644 index 0000000000..37f86995d6 --- /dev/null +++ b/Lib/pyclbr.py @@ -0,0 +1,314 @@ +"""Parse a Python module and describe its classes and functions. + +Parse enough of a Python file to recognize imports and class and +function definitions, and to find out the superclasses of a class. + +The interface consists of a single function: + readmodule_ex(module, path=None) +where module is the name of a Python module, and path is an optional +list of directories where the module is to be searched. If present, +path is prepended to the system search path sys.path. The return value +is a dictionary. The keys of the dictionary are the names of the +classes and functions defined in the module (including classes that are +defined via the from XXX import YYY construct). The values are +instances of classes Class and Function. One special key/value pair is +present for packages: the key '__path__' has a list as its value which +contains the package search path. + +Classes and Functions have a common superclass: _Object. Every instance +has the following attributes: + module -- name of the module; + name -- name of the object; + file -- file in which the object is defined; + lineno -- line in the file where the object's definition starts; + end_lineno -- line in the file where the object's definition ends; + parent -- parent of this object, if any; + children -- nested objects contained in this object. +The 'children' attribute is a dictionary mapping names to objects. + +Instances of Function describe functions with the attributes from _Object, +plus the following: + is_async -- if a function is defined with an 'async' prefix + +Instances of Class describe classes with the attributes from _Object, +plus the following: + super -- list of super classes (Class instances if possible); + methods -- mapping of method names to beginning line numbers. +If the name of a super class is not recognized, the corresponding +entry in the list of super classes is not a class instance but a +string giving the name of the super class. Since import statements +are recognized and imported modules are scanned as well, this +shouldn't happen often. +""" + +import ast +import sys +import importlib.util + +__all__ = ["readmodule", "readmodule_ex", "Class", "Function"] + +_modules = {} # Initialize cache of modules we've seen. + + +class _Object: + "Information about Python class or function." + def __init__(self, module, name, file, lineno, end_lineno, parent): + self.module = module + self.name = name + self.file = file + self.lineno = lineno + self.end_lineno = end_lineno + self.parent = parent + self.children = {} + if parent is not None: + parent.children[name] = self + + +# Odd Function and Class signatures are for back-compatibility. +class Function(_Object): + "Information about a Python function, including methods." + def __init__(self, module, name, file, lineno, + parent=None, is_async=False, *, end_lineno=None): + super().__init__(module, name, file, lineno, end_lineno, parent) + self.is_async = is_async + if isinstance(parent, Class): + parent.methods[name] = lineno + + +class Class(_Object): + "Information about a Python class." + def __init__(self, module, name, super_, file, lineno, + parent=None, *, end_lineno=None): + super().__init__(module, name, file, lineno, end_lineno, parent) + self.super = super_ or [] + self.methods = {} + + +# These 2 functions are used in these tests +# Lib/test/test_pyclbr, Lib/idlelib/idle_test/test_browser.py +def _nest_function(ob, func_name, lineno, end_lineno, is_async=False): + "Return a Function after nesting within ob." + return Function(ob.module, func_name, ob.file, lineno, + parent=ob, is_async=is_async, end_lineno=end_lineno) + +def _nest_class(ob, class_name, lineno, end_lineno, super=None): + "Return a Class after nesting within ob." + return Class(ob.module, class_name, super, ob.file, lineno, + parent=ob, end_lineno=end_lineno) + + +def readmodule(module, path=None): + """Return Class objects for the top-level classes in module. + + This is the original interface, before Functions were added. + """ + + res = {} + for key, value in _readmodule(module, path or []).items(): + if isinstance(value, Class): + res[key] = value + return res + +def readmodule_ex(module, path=None): + """Return a dictionary with all functions and classes in module. + + Search for module in PATH + sys.path. + If possible, include imported superclasses. + Do this by reading source, without importing (and executing) it. + """ + return _readmodule(module, path or []) + + +def _readmodule(module, path, inpackage=None): + """Do the hard work for readmodule[_ex]. + + If inpackage is given, it must be the dotted name of the package in + which we are searching for a submodule, and then PATH must be the + package search path; otherwise, we are searching for a top-level + module, and path is combined with sys.path. + """ + # Compute the full module name (prepending inpackage if set). + if inpackage is not None: + fullmodule = "%s.%s" % (inpackage, module) + else: + fullmodule = module + + # Check in the cache. + if fullmodule in _modules: + return _modules[fullmodule] + + # Initialize the dict for this module's contents. + tree = {} + + # Check if it is a built-in module; we don't do much for these. + if module in sys.builtin_module_names and inpackage is None: + _modules[module] = tree + return tree + + # Check for a dotted module name. + i = module.rfind('.') + if i >= 0: + package = module[:i] + submodule = module[i+1:] + parent = _readmodule(package, path, inpackage) + if inpackage is not None: + package = "%s.%s" % (inpackage, package) + if not '__path__' in parent: + raise ImportError('No package named {}'.format(package)) + return _readmodule(submodule, parent['__path__'], package) + + # Search the path for the module. + f = None + if inpackage is not None: + search_path = path + else: + search_path = path + sys.path + spec = importlib.util._find_spec_from_path(fullmodule, search_path) + if spec is None: + raise ModuleNotFoundError(f"no module named {fullmodule!r}", name=fullmodule) + _modules[fullmodule] = tree + # Is module a package? + if spec.submodule_search_locations is not None: + tree['__path__'] = spec.submodule_search_locations + try: + source = spec.loader.get_source(fullmodule) + except (AttributeError, ImportError): + # If module is not Python source, we cannot do anything. + return tree + else: + if source is None: + return tree + + fname = spec.loader.get_filename(fullmodule) + return _create_tree(fullmodule, path, fname, source, tree, inpackage) + + +class _ModuleBrowser(ast.NodeVisitor): + def __init__(self, module, path, file, tree, inpackage): + self.path = path + self.tree = tree + self.file = file + self.module = module + self.inpackage = inpackage + self.stack = [] + + def visit_ClassDef(self, node): + bases = [] + for base in node.bases: + name = ast.unparse(base) + if name in self.tree: + # We know this super class. + bases.append(self.tree[name]) + elif len(names := name.split(".")) > 1: + # Super class form is module.class: + # look in module for class. + *_, module, class_ = names + if module in _modules: + bases.append(_modules[module].get(class_, name)) + else: + bases.append(name) + + parent = self.stack[-1] if self.stack else None + class_ = Class(self.module, node.name, bases, self.file, node.lineno, + parent=parent, end_lineno=node.end_lineno) + if parent is None: + self.tree[node.name] = class_ + self.stack.append(class_) + self.generic_visit(node) + self.stack.pop() + + def visit_FunctionDef(self, node, *, is_async=False): + parent = self.stack[-1] if self.stack else None + function = Function(self.module, node.name, self.file, node.lineno, + parent, is_async, end_lineno=node.end_lineno) + if parent is None: + self.tree[node.name] = function + self.stack.append(function) + self.generic_visit(node) + self.stack.pop() + + def visit_AsyncFunctionDef(self, node): + self.visit_FunctionDef(node, is_async=True) + + def visit_Import(self, node): + if node.col_offset != 0: + return + + for module in node.names: + try: + try: + _readmodule(module.name, self.path, self.inpackage) + except ImportError: + _readmodule(module.name, []) + except (ImportError, SyntaxError): + # If we can't find or parse the imported module, + # too bad -- don't die here. + continue + + def visit_ImportFrom(self, node): + if node.col_offset != 0: + return + try: + module = "." * node.level + if node.module: + module += node.module + module = _readmodule(module, self.path, self.inpackage) + except (ImportError, SyntaxError): + return + + for name in node.names: + if name.name in module: + self.tree[name.asname or name.name] = module[name.name] + elif name.name == "*": + for import_name, import_value in module.items(): + if import_name.startswith("_"): + continue + self.tree[import_name] = import_value + + +def _create_tree(fullmodule, path, fname, source, tree, inpackage): + mbrowser = _ModuleBrowser(fullmodule, path, fname, tree, inpackage) + mbrowser.visit(ast.parse(source)) + return mbrowser.tree + + +def _main(): + "Print module output (default this file) for quick visual check." + import os + try: + mod = sys.argv[1] + except: + mod = __file__ + if os.path.exists(mod): + path = [os.path.dirname(mod)] + mod = os.path.basename(mod) + if mod.lower().endswith(".py"): + mod = mod[:-3] + else: + path = [] + tree = readmodule_ex(mod, path) + lineno_key = lambda a: getattr(a, 'lineno', 0) + objs = sorted(tree.values(), key=lineno_key, reverse=True) + indent_level = 2 + while objs: + obj = objs.pop() + if isinstance(obj, list): + # Value is a __path__ key. + continue + if not hasattr(obj, 'indent'): + obj.indent = 0 + + if isinstance(obj, _Object): + new_objs = sorted(obj.children.values(), + key=lineno_key, reverse=True) + for ob in new_objs: + ob.indent = obj.indent + indent_level + objs.extend(new_objs) + if isinstance(obj, Class): + print("{}class {} {} {}" + .format(' ' * obj.indent, obj.name, obj.super, obj.lineno)) + elif isinstance(obj, Function): + print("{}def {} {}".format(' ' * obj.indent, obj.name, obj.lineno)) + +if __name__ == "__main__": + _main() diff --git a/Lib/test/pyclbr_input.py b/Lib/test/pyclbr_input.py new file mode 100644 index 0000000000..5535edbfa7 --- /dev/null +++ b/Lib/test/pyclbr_input.py @@ -0,0 +1,85 @@ +"""Test cases for test_pyclbr.py""" + +def f(): pass + +class Other(object): + @classmethod + def foo(c): pass + + def om(self): pass + +class B (object): + def bm(self): pass + +class C (B): + d = 10 + + # This one is correctly considered by both test_pyclbr.py and pyclbr.py + # as a non-method of C. + foo = Other().foo + + # This causes test_pyclbr.py to fail, but only because the + # introspection-based is_method() code in the test can't + # distinguish between this and a genuine method function like m(). + # + # The pyclbr.py module gets this right as it parses the text. + om = Other.om + f = f + + def m(self): pass + + @staticmethod + def sm(self): pass + + @classmethod + def cm(self): pass + +# Check that mangling is correctly handled + +class a: + def a(self): pass + def _(self): pass + def _a(self): pass + def __(self): pass + def ___(self): pass + def __a(self): pass + +class _: + def a(self): pass + def _(self): pass + def _a(self): pass + def __(self): pass + def ___(self): pass + def __a(self): pass + +class __: + def a(self): pass + def _(self): pass + def _a(self): pass + def __(self): pass + def ___(self): pass + def __a(self): pass + +class ___: + def a(self): pass + def _(self): pass + def _a(self): pass + def __(self): pass + def ___(self): pass + def __a(self): pass + +class _a: + def a(self): pass + def _(self): pass + def _a(self): pass + def __(self): pass + def ___(self): pass + def __a(self): pass + +class __a: + def a(self): pass + def _(self): pass + def _a(self): pass + def __(self): pass + def ___(self): pass + def __a(self): pass diff --git a/Lib/test/test_pyclbr.py b/Lib/test/test_pyclbr.py new file mode 100644 index 0000000000..ad26fe1dba --- /dev/null +++ b/Lib/test/test_pyclbr.py @@ -0,0 +1,272 @@ +''' + Test cases for pyclbr.py + Nick Mathewson +''' + +import sys +from textwrap import dedent +from types import FunctionType, MethodType, BuiltinFunctionType +import pyclbr +from unittest import TestCase, main as unittest_main +from test.test_importlib import util as test_importlib_util +import warnings +from test.support.testcase import ExtraAssertions + +import unittest # TODO: RUSTPYTHON + + +StaticMethodType = type(staticmethod(lambda: None)) +ClassMethodType = type(classmethod(lambda c: None)) + +# Here we test the python class browser code. +# +# The main function in this suite, 'testModule', compares the output +# of pyclbr with the introspected members of a module. Because pyclbr +# is imperfect (as designed), testModule is called with a set of +# members to ignore. + +class PyclbrTest(TestCase, ExtraAssertions): + + def assertListEq(self, l1, l2, ignore): + ''' succeed iff {l1} - {ignore} == {l2} - {ignore} ''' + missing = (set(l1) ^ set(l2)) - set(ignore) + if missing: + print("l1=%r\nl2=%r\nignore=%r" % (l1, l2, ignore), file=sys.stderr) + self.fail("%r missing" % missing.pop()) + + def assertHaskey(self, obj, key, ignore): + ''' succeed iff key in obj or key in ignore. ''' + if key in ignore: return + if key not in obj: + print("***",key, file=sys.stderr) + self.assertIn(key, obj) + + def assertEqualsOrIgnored(self, a, b, ignore): + ''' succeed iff a == b or a in ignore or b in ignore ''' + if a not in ignore and b not in ignore: + self.assertEqual(a, b) + + def checkModule(self, moduleName, module=None, ignore=()): + ''' succeed iff pyclbr.readmodule_ex(modulename) corresponds + to the actual module object, module. Any identifiers in + ignore are ignored. If no module is provided, the appropriate + module is loaded with __import__.''' + + ignore = set(ignore) | set(['object']) + + if module is None: + # Import it. + # ('' is to work around an API silliness in __import__) + module = __import__(moduleName, globals(), {}, ['']) + + dict = pyclbr.readmodule_ex(moduleName) + + def ismethod(oclass, obj, name): + classdict = oclass.__dict__ + if isinstance(obj, MethodType): + # could be a classmethod + if (not isinstance(classdict[name], ClassMethodType) or + obj.__self__ is not oclass): + return False + elif not isinstance(obj, FunctionType): + return False + + objname = obj.__name__ + if objname.startswith("__") and not objname.endswith("__"): + if stripped_typename := oclass.__name__.lstrip('_'): + objname = f"_{stripped_typename}{objname}" + return objname == name + + # Make sure the toplevel functions and classes are the same. + for name, value in dict.items(): + if name in ignore: + continue + self.assertHasAttr(module, name, ignore) + py_item = getattr(module, name) + if isinstance(value, pyclbr.Function): + self.assertIsInstance(py_item, (FunctionType, BuiltinFunctionType)) + if py_item.__module__ != moduleName: + continue # skip functions that came from somewhere else + self.assertEqual(py_item.__module__, value.module) + else: + self.assertIsInstance(py_item, type) + if py_item.__module__ != moduleName: + continue # skip classes that came from somewhere else + + real_bases = [base.__name__ for base in py_item.__bases__] + pyclbr_bases = [ getattr(base, 'name', base) + for base in value.super ] + + try: + self.assertListEq(real_bases, pyclbr_bases, ignore) + except: + print("class=%s" % py_item, file=sys.stderr) + raise + + actualMethods = [] + for m in py_item.__dict__.keys(): + if ismethod(py_item, getattr(py_item, m), m): + actualMethods.append(m) + + if stripped_typename := name.lstrip('_'): + foundMethods = [] + for m in value.methods.keys(): + if m.startswith('__') and not m.endswith('__'): + foundMethods.append(f"_{stripped_typename}{m}") + else: + foundMethods.append(m) + else: + foundMethods = list(value.methods.keys()) + + try: + self.assertListEq(foundMethods, actualMethods, ignore) + self.assertEqual(py_item.__module__, value.module) + + self.assertEqualsOrIgnored(py_item.__name__, value.name, + ignore) + # can't check file or lineno + except: + print("class=%s" % py_item, file=sys.stderr) + raise + + # Now check for missing stuff. + def defined_in(item, module): + if isinstance(item, type): + return item.__module__ == module.__name__ + if isinstance(item, FunctionType): + return item.__globals__ is module.__dict__ + return False + for name in dir(module): + item = getattr(module, name) + if isinstance(item, (type, FunctionType)): + if defined_in(item, module): + self.assertHaskey(dict, name, ignore) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_easy(self): + self.checkModule('pyclbr') + # XXX: Metaclasses are not supported + # self.checkModule('ast') + self.checkModule('doctest', ignore=("TestResults", "_SpoofOut", + "DocTestCase", '_DocTestSuite')) + self.checkModule('difflib', ignore=("Match",)) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_cases(self): + # see test.pyclbr_input for the rationale behind the ignored symbols + self.checkModule('test.pyclbr_input', ignore=['om', 'f']) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_nested(self): + mb = pyclbr + # Set arguments for descriptor creation and _creat_tree call. + m, p, f, t, i = 'test', '', 'test.py', {}, None + source = dedent("""\ + def f0(): + def f1(a,b,c): + def f2(a=1, b=2, c=3): pass + return f1(a,b,d) + class c1: pass + class C0: + "Test class." + def F1(): + "Method." + return 'return' + class C1(): + class C2: + "Class nested within nested class." + def F3(): return 1+1 + + """) + actual = mb._create_tree(m, p, f, source, t, i) + + # Create descriptors, linked together, and expected dict. + f0 = mb.Function(m, 'f0', f, 1, end_lineno=5) + f1 = mb._nest_function(f0, 'f1', 2, 4) + f2 = mb._nest_function(f1, 'f2', 3, 3) + c1 = mb._nest_class(f0, 'c1', 5, 5) + C0 = mb.Class(m, 'C0', None, f, 6, end_lineno=14) + F1 = mb._nest_function(C0, 'F1', 8, 10) + C1 = mb._nest_class(C0, 'C1', 11, 14) + C2 = mb._nest_class(C1, 'C2', 12, 14) + F3 = mb._nest_function(C2, 'F3', 14, 14) + expected = {'f0':f0, 'C0':C0} + + def compare(parent1, children1, parent2, children2): + """Return equality of tree pairs. + + Each parent,children pair define a tree. The parents are + assumed equal. Comparing the children dictionaries as such + does not work due to comparison by identity and double + linkage. We separate comparing string and number attributes + from comparing the children of input children. + """ + self.assertEqual(children1.keys(), children2.keys()) + for ob in children1.values(): + self.assertIs(ob.parent, parent1) + for ob in children2.values(): + self.assertIs(ob.parent, parent2) + for key in children1.keys(): + o1, o2 = children1[key], children2[key] + t1 = type(o1), o1.name, o1.file, o1.module, o1.lineno, o1.end_lineno + t2 = type(o2), o2.name, o2.file, o2.module, o2.lineno, o2.end_lineno + self.assertEqual(t1, t2) + if type(o1) is mb.Class: + self.assertEqual(o1.methods, o2.methods) + # Skip superclasses for now as not part of example + compare(o1, o1.children, o2, o2.children) + + compare(None, actual, None, expected) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_others(self): + cm = self.checkModule + + # These were once some of the longest modules. + cm('random', ignore=('Random',)) # from _random import Random as CoreGenerator + cm('pickle', ignore=('partial', 'PickleBuffer')) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + cm('sre_parse', ignore=('dump', 'groups', 'pos')) # from sre_constants import *; property + cm( + 'pdb', + # pyclbr does not handle elegantly `typing` or properties + ignore=('Union', '_ModuleTarget', '_ScriptTarget', '_ZipTarget'), + ) + cm('pydoc', ignore=('input', 'output',)) # properties + + # Tests for modules inside packages + cm('email.parser') + cm('test.test_pyclbr') + + +class ReadmoduleTests(TestCase): + + def setUp(self): + self._modules = pyclbr._modules.copy() + + def tearDown(self): + pyclbr._modules = self._modules + + + def test_dotted_name_not_a_package(self): + # test ImportError is raised when the first part of a dotted name is + # not a package. + # + # Issue #14798. + self.assertRaises(ImportError, pyclbr.readmodule_ex, 'asyncio.foo') + + def test_module_has_no_spec(self): + module_name = "doesnotexist" + assert module_name not in pyclbr._modules + with test_importlib_util.uncache(module_name): + with self.assertRaises(ModuleNotFoundError): + pyclbr.readmodule_ex(module_name) + + +if __name__ == "__main__": + unittest_main()