From 62591e469ec13be5ccb7a70a34ca03942a2262f5 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 17 Jun 2019 12:25:36 +0100 Subject: [PATCH] Python: Avoid duplicate modules in points-to and resulting blow-up. --- python/ql/src/semmle/python/Module.qll | 28 +++++++++++++++++++ .../ql/src/semmle/python/objects/TObject.qll | 5 ++-- .../modules/duplicate_name/Modules.expected | 3 ++ .../modules/duplicate_name/Modules.ql | 7 +++++ .../modules/duplicate_name/options | 2 ++ .../modules/duplicate_name/test.py | 1 + .../duplicate_name/venv/sqlite3/__init__.py | 0 .../duplicate_name/venv/sqlite3/dump.py | 0 8 files changed, 44 insertions(+), 2 deletions(-) create mode 100644 python/ql/test/library-tests/modules/duplicate_name/Modules.expected create mode 100644 python/ql/test/library-tests/modules/duplicate_name/Modules.ql create mode 100644 python/ql/test/library-tests/modules/duplicate_name/options create mode 100644 python/ql/test/library-tests/modules/duplicate_name/test.py create mode 100644 python/ql/test/library-tests/modules/duplicate_name/venv/sqlite3/__init__.py create mode 100644 python/ql/test/library-tests/modules/duplicate_name/venv/sqlite3/dump.py diff --git a/python/ql/src/semmle/python/Module.qll b/python/ql/src/semmle/python/Module.qll index d8d3a93b473e..41fd64997fcc 100644 --- a/python/ql/src/semmle/python/Module.qll +++ b/python/ql/src/semmle/python/Module.qll @@ -195,6 +195,7 @@ class Module extends Module_, Scope, AstNode { } + bindingset[name] private predicate legalDottedName(string name) { name.regexpMatch("(\\p{L}|_)(\\p{L}|\\d|_)*(\\.(\\p{L}|_)(\\p{L}|\\d|_)*)*") @@ -244,3 +245,30 @@ private predicate isStubRoot(Folder f) { f.getAbsolutePath().matches("%/data/python/stubs") } + +/** Holds if the Container `c` should be the preferred file or folder for + * the given name when performing imports. + * Trivially true for any container if it is the only one with its name. + * However, if there are several modules with the same name, then + * this is the module most likely to be imported under that name. + */ +predicate isPreferredModuleForName(Container c, string name) { + exists(int p | + p = min(int x | x = priorityForName(_, name)) and + p = priorityForName(c, name) + ) +} + +private int priorityForName(Container c, string name) { + name = moduleNameFromFile(c) and + ( + // In the source + exists(c.getRelativePath()) and result = -1 + or + // On an import path + exists(c.getImportRoot(result)) + or + // Otherwise + result = 10000 + ) +} diff --git a/python/ql/src/semmle/python/objects/TObject.qll b/python/ql/src/semmle/python/objects/TObject.qll index 5dbe08e8e7fe..27f09e5f3ac1 100644 --- a/python/ql/src/semmle/python/objects/TObject.qll +++ b/python/ql/src/semmle/python/objects/TObject.qll @@ -47,12 +47,13 @@ cached newtype TObject = or /* Package objects */ TPackageObject(Folder f) { - exists(moduleNameFromFile(f)) + isPreferredModuleForName(f, _) } or /* Python module objects */ TPythonModule(Module m) { - not m.isPackage() and not exists(SyntaxError se | se.getFile() = m.getFile()) + not m.isPackage() and isPreferredModuleForName(m.getFile(), _) and + not exists(SyntaxError se | se.getFile() = m.getFile()) } or /* `True` */ diff --git a/python/ql/test/library-tests/modules/duplicate_name/Modules.expected b/python/ql/test/library-tests/modules/duplicate_name/Modules.expected new file mode 100644 index 000000000000..45bd0378c226 --- /dev/null +++ b/python/ql/test/library-tests/modules/duplicate_name/Modules.expected @@ -0,0 +1,3 @@ +| sqlite3 | 2 | 1 | +| sqlite3.__init__ | 2 | 1 | +| sqlite3.dump | 2 | 1 | diff --git a/python/ql/test/library-tests/modules/duplicate_name/Modules.ql b/python/ql/test/library-tests/modules/duplicate_name/Modules.ql new file mode 100644 index 000000000000..536689d0f907 --- /dev/null +++ b/python/ql/test/library-tests/modules/duplicate_name/Modules.ql @@ -0,0 +1,7 @@ + +import python + +from string name, int mcnt +where mcnt = strictcount(Module m | m.getName() = name) and mcnt > 1 +select name, mcnt, strictcount(ModuleValue val | val.getName() = name) + diff --git a/python/ql/test/library-tests/modules/duplicate_name/options b/python/ql/test/library-tests/modules/duplicate_name/options new file mode 100644 index 000000000000..ebe93df725a6 --- /dev/null +++ b/python/ql/test/library-tests/modules/duplicate_name/options @@ -0,0 +1,2 @@ +semmle-extractor-options: -R . +optimize: true diff --git a/python/ql/test/library-tests/modules/duplicate_name/test.py b/python/ql/test/library-tests/modules/duplicate_name/test.py new file mode 100644 index 000000000000..e0931b9009ed --- /dev/null +++ b/python/ql/test/library-tests/modules/duplicate_name/test.py @@ -0,0 +1 @@ +import sqlite3.dump diff --git a/python/ql/test/library-tests/modules/duplicate_name/venv/sqlite3/__init__.py b/python/ql/test/library-tests/modules/duplicate_name/venv/sqlite3/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/ql/test/library-tests/modules/duplicate_name/venv/sqlite3/dump.py b/python/ql/test/library-tests/modules/duplicate_name/venv/sqlite3/dump.py new file mode 100644 index 000000000000..e69de29bb2d1