diff --git a/docs/codeql/support/reusables/frameworks.rst b/docs/codeql/support/reusables/frameworks.rst index be9949aad77f..3e169a0949bf 100644 --- a/docs/codeql/support/reusables/frameworks.rst +++ b/docs/codeql/support/reusables/frameworks.rst @@ -155,6 +155,7 @@ Python built-in support Django, Web framework Flask, Web framework Tornado, Web framework + Twisted, Web framework PyYAML, Serialization dill, Serialization simplejson, Serialization diff --git a/python/change-notes/2021-06-08-twisted-add-modeling.md b/python/change-notes/2021-06-08-twisted-add-modeling.md new file mode 100644 index 000000000000..364b8c027078 --- /dev/null +++ b/python/change-notes/2021-06-08-twisted-add-modeling.md @@ -0,0 +1,2 @@ +lgtm,codescanning +* Added modeling of sources/sinks when using `twisted` to create web servers. diff --git a/python/ql/src/semmle/python/Frameworks.qll b/python/ql/src/semmle/python/Frameworks.qll index 3115c3ffac65..ecbf4eb6c868 100644 --- a/python/ql/src/semmle/python/Frameworks.qll +++ b/python/ql/src/semmle/python/Frameworks.qll @@ -19,5 +19,6 @@ private import semmle.python.frameworks.PyMySQL private import semmle.python.frameworks.Simplejson private import semmle.python.frameworks.Stdlib private import semmle.python.frameworks.Tornado +private import semmle.python.frameworks.Twisted private import semmle.python.frameworks.Ujson private import semmle.python.frameworks.Yaml diff --git a/python/ql/src/semmle/python/frameworks/Twisted.qll b/python/ql/src/semmle/python/frameworks/Twisted.qll new file mode 100644 index 000000000000..5b014597157d --- /dev/null +++ b/python/ql/src/semmle/python/frameworks/Twisted.qll @@ -0,0 +1,250 @@ +/** + * Provides classes modeling security-relevant aspects of the `twisted` PyPI package. + * See https://twistedmatrix.com/. + */ + +private import python +private import semmle.python.dataflow.new.DataFlow +private import semmle.python.dataflow.new.RemoteFlowSources +private import semmle.python.dataflow.new.TaintTracking +private import semmle.python.Concepts +private import semmle.python.ApiGraphs + +/** + * Provides models for the `twisted` PyPI package. + * See https://twistedmatrix.com/. + */ +private module Twisted { + // --------------------------------------------------------------------------- + // request handler modeling + // --------------------------------------------------------------------------- + /** + * A class that is a subclass of `twisted.web.resource.Resource`, thereby + * being able to handle HTTP requests. + * + * See https://twistedmatrix.com/documents/21.2.0/api/twisted.web.resource.Resource.html + */ + class TwistedResourceSubclass extends Class { + TwistedResourceSubclass() { + this.getABase() = + API::moduleImport("twisted") + .getMember("web") + .getMember("resource") + .getMember("Resource") + .getASubclass*() + .getAUse() + .asExpr() + } + + /** Gets a function that could handle incoming requests, if any. */ + Function getARequestHandler() { + // TODO: This doesn't handle attribute assignment. Should be OK, but analysis is not as complete as with + // points-to and `.lookup`, which would handle `post = my_post_handler` inside class def + result = this.getAMethod() and + exists(getRequestParamIndex(result.getName())) + } + } + + /** + * Gets the index the request parameter is supposed to be at for the method named + * `methodName` in a `twisted.web.resource.Resource` subclass. + */ + bindingset[methodName] + private int getRequestParamIndex(string methodName) { + methodName.matches("render_%") and result = 1 + or + methodName in ["render", "listDynamicEntities", "getChildForRequest"] and result = 1 + or + methodName = ["getDynamicEntity", "getChild", "getChildWithDefault"] and result = 2 + } + + /** A method that handles incoming requests, on a `twisted.web.resource.Resource` subclass. */ + class TwistedResourceRequestHandler extends HTTP::Server::RequestHandler::Range { + TwistedResourceRequestHandler() { this = any(TwistedResourceSubclass cls).getARequestHandler() } + + Parameter getRequestParameter() { result = this.getArg(getRequestParamIndex(this.getName())) } + + override Parameter getARoutedParameter() { none() } + + override string getFramework() { result = "twisted" } + } + + /** + * A "render" method on a `twisted.web.resource.Resource` subclass, whose return value + * is written as the body of the HTTP response. + */ + class TwistedResourceRenderMethod extends TwistedResourceRequestHandler { + TwistedResourceRenderMethod() { + this.getName() = "render" or this.getName().matches("render_%") + } + } + + // --------------------------------------------------------------------------- + // request modeling + // --------------------------------------------------------------------------- + /** + * Provides models for the `twisted.web.server.Request` class + * + * See https://twistedmatrix.com/documents/21.2.0/api/twisted.web.server.Request.html + */ + module Request { + /** + * A source of instances of `twisted.web.server.Request`, extend this class to model new instances. + * + * This can include instantiations of the class, return values from function + * calls, or a special parameter that will be set when functions are called by an external + * library. + * + * Use `Request::instance()` predicate to get + * references to instances of `twisted.web.server.Request`. + */ + abstract class InstanceSource extends DataFlow::LocalSourceNode { } + + /** Gets a reference to an instance of `twisted.web.server.Request`. */ + private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) { + t.start() and + result instanceof InstanceSource + or + exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t)) + } + + /** Gets a reference to an instance of `twisted.web.server.Request`. */ + DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) } + } + + /** + * A parameter that will receive a `twisted.web.server.Request` instance, + * when a twisted request handler is called. + */ + class TwistedResourceRequestHandlerRequestParam extends RemoteFlowSource::Range, + Request::InstanceSource, DataFlow::ParameterNode { + TwistedResourceRequestHandlerRequestParam() { + this.getParameter() = any(TwistedResourceRequestHandler handler).getRequestParameter() + } + + override string getSourceType() { result = "twisted.web.server.Request" } + } + + /** + * Taint propagation for `twisted.web.server.Request`. + */ + private class TwistedRequestAdditionalTaintStep extends TaintTracking::AdditionalTaintStep { + override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { + // Methods + // + // TODO: When we have tools that make it easy, model these properly to handle + // `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach + // (since it allows us to at least capture the most common cases). + nodeFrom = Request::instance() and + exists(DataFlow::AttrRead attr | attr.getObject() = nodeFrom | + // normal (non-async) methods + attr.getAttributeName() in [ + "getCookie", "getHeader", "getAllHeaders", "getUser", "getPassword", "getHost", + "getRequestHostname" + ] and + nodeTo.(DataFlow::CallCfgNode).getFunction() = attr + ) + or + // Attributes + nodeFrom = Request::instance() and + nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom and + nodeTo.(DataFlow::AttrRead).getAttributeName() in [ + "uri", "path", "prepath", "postpath", "content", "args", "received_cookies", + "requestHeaders", "user", "password", "host" + ] + } + } + + /** + * A parameter of a request handler method (on a `twisted.web.resource.Resource` subclass) + * that is also given remote user input. (a bit like RoutedParameter). + */ + class TwistedResourceRequestHandlerExtraSources extends RemoteFlowSource::Range, + DataFlow::ParameterNode { + TwistedResourceRequestHandlerExtraSources() { + exists(TwistedResourceRequestHandler func, int i | + func.getName() in ["getChild", "getChildWithDefault"] and i = 1 + or + func.getName() = "getDynamicEntity" and i = 1 + | + this.getParameter() = func.getArg(i) + ) + } + + override string getSourceType() { result = "twisted Resource method extra parameter" } + } + + // --------------------------------------------------------------------------- + // response modeling + // --------------------------------------------------------------------------- + /** + * Implicit response from returns of render methods. + */ + private class TwistedResourceRenderMethodReturn extends HTTP::Server::HttpResponse::Range, + DataFlow::CfgNode { + TwistedResourceRenderMethodReturn() { + this.asCfgNode() = any(TwistedResourceRenderMethod meth).getAReturnValueFlowNode() + } + + override DataFlow::Node getBody() { result = this } + + override DataFlow::Node getMimetypeOrContentTypeArg() { none() } + + override string getMimetypeDefault() { result = "text/html" } + } + + /** + * A call to the `twisted.web.server.Request.write` function. + * + * See https://twistedmatrix.com/documents/21.2.0/api/twisted.web.server.Request.html#write + */ + class TwistedRequestWriteCall extends HTTP::Server::HttpResponse::Range, DataFlow::CallCfgNode { + TwistedRequestWriteCall() { + // TODO: When we have tools that make it easy, model these properly to handle + // `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach + // (since it allows us to at least capture the most common cases). + exists(DataFlow::AttrRead read | + this.getFunction() = read and + read.getObject() = Request::instance() and + read.getAttributeName() = "write" + ) + } + + override DataFlow::Node getBody() { + result.asCfgNode() in [node.getArg(0), node.getArgByName("data")] + } + + override DataFlow::Node getMimetypeOrContentTypeArg() { none() } + + override string getMimetypeDefault() { result = "text/html" } + } + + /** + * A call to the `redirect` function on a twisted request. + * + * See https://twistedmatrix.com/documents/21.2.0/api/twisted.web.http.Request.html#redirect + */ + class TwistedRequestRedirectCall extends HTTP::Server::HttpRedirectResponse::Range, + DataFlow::CallCfgNode { + TwistedRequestRedirectCall() { + // TODO: When we have tools that make it easy, model these properly to handle + // `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach + // (since it allows us to at least capture the most common cases). + exists(DataFlow::AttrRead read | + this.getFunction() = read and + read.getObject() = Request::instance() and + read.getAttributeName() = "redirect" + ) + } + + override DataFlow::Node getBody() { none() } + + override DataFlow::Node getRedirectLocation() { + result.asCfgNode() in [node.getArg(0), node.getArgByName("url")] + } + + override DataFlow::Node getMimetypeOrContentTypeArg() { none() } + + override string getMimetypeDefault() { result = "text/html" } + } +} diff --git a/python/ql/test/library-tests/frameworks/twisted/ConceptsTest.expected b/python/ql/test/library-tests/frameworks/twisted/ConceptsTest.expected new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/ql/test/library-tests/frameworks/twisted/ConceptsTest.ql b/python/ql/test/library-tests/frameworks/twisted/ConceptsTest.ql new file mode 100644 index 000000000000..1e2c1fab3eeb --- /dev/null +++ b/python/ql/test/library-tests/frameworks/twisted/ConceptsTest.ql @@ -0,0 +1,12 @@ +import python +import experimental.meta.ConceptsTest + +class DedicatedResponseTest extends HttpServerHttpResponseTest { + DedicatedResponseTest() { file.getShortName() = "response_test.py" } +} + +class OtherResponseTest extends HttpServerHttpResponseTest { + OtherResponseTest() { not this instanceof DedicatedResponseTest } + + override string getARelevantTag() { result = "HttpResponse" } +} diff --git a/python/ql/test/library-tests/frameworks/twisted/InlineTaintTest.expected b/python/ql/test/library-tests/frameworks/twisted/InlineTaintTest.expected new file mode 100644 index 000000000000..79d760d87f42 --- /dev/null +++ b/python/ql/test/library-tests/frameworks/twisted/InlineTaintTest.expected @@ -0,0 +1,3 @@ +argumentToEnsureNotTaintedNotMarkedAsSpurious +untaintedArgumentToEnsureTaintedNotMarkedAsMissing +failures diff --git a/python/ql/test/library-tests/frameworks/twisted/InlineTaintTest.ql b/python/ql/test/library-tests/frameworks/twisted/InlineTaintTest.ql new file mode 100644 index 000000000000..027ad8667be6 --- /dev/null +++ b/python/ql/test/library-tests/frameworks/twisted/InlineTaintTest.ql @@ -0,0 +1 @@ +import experimental.meta.InlineTaintTest diff --git a/python/ql/test/library-tests/frameworks/twisted/response_test.py b/python/ql/test/library-tests/frameworks/twisted/response_test.py new file mode 100644 index 000000000000..f845f7f693a2 --- /dev/null +++ b/python/ql/test/library-tests/frameworks/twisted/response_test.py @@ -0,0 +1,75 @@ +from twisted.web.server import Site, Request, NOT_DONE_YET +from twisted.web.resource import Resource +from twisted.internet import reactor, endpoints, defer + + +root = Resource() + +class Now(Resource): + def render(self, request: Request): # $ requestHandler + return b"now" # $ HttpResponse mimetype=text/html responseBody=b"now" + + +class AlsoNow(Resource): + def render(self, request: Request): # $ requestHandler + request.write(b"also now") # $ HttpResponse mimetype=text/html responseBody=b"also now" + return b"" # $ HttpResponse mimetype=text/html responseBody=b"" + + +def process_later(request: Request): + print("process_later called") + request.write(b"later") # $ MISSING: responseBody=b"later" + request.finish() + + +class Later(Resource): + def render(self, request: Request): # $ requestHandler + # process the request in 1 second + print("setting up callback for process_later") + reactor.callLater(1, process_later, request) + return NOT_DONE_YET # $ SPURIOUS: HttpResponse mimetype=text/html responseBody=NOT_DONE_YET + + +class PlainText(Resource): + def render(self, request: Request): # $ requestHandler + request.setHeader(b"content-type", "text/plain") + return b"this is plain text" # $ HttpResponse responseBody=b"this is plain text" SPURIOUS: mimetype=text/html MISSING: mimetype=text/plain + + +class Redirect(Resource): + def render_GET(self, request: Request): # $ requestHandler + request.redirect("/new-location") # $ HttpRedirectResponse redirectLocation="/new-location" HttpResponse mimetype=text/html + # By default, this `hello` output is not returned... not even when + # requested with curl. + return b"hello" # $ SPURIOUS: HttpResponse mimetype=text/html responseBody=b"hello" + + +class NonHttpBodyOutput(Resource): + """Examples of providing values in response that is not in the body + """ + def render_GET(self, request: Request): # $ requestHandler + request.responseHeaders.addRawHeader("key", "value") + request.setHeader("key2", "value") + + request.addCookie("key", "value") + request.cookies.append(b"key2=value") + + return b"" # $ HttpResponse mimetype=text/html responseBody=b"" + + +root.putChild(b"now", Now()) +root.putChild(b"also-now", AlsoNow()) +root.putChild(b"later", Later()) +root.putChild(b"plain-text", PlainText()) +root.putChild(b"redirect", Redirect()) +root.putChild(b"non-body", NonHttpBodyOutput()) + + +if __name__ == "__main__": + factory = Site(root) + endpoint = endpoints.TCP4ServerEndpoint(reactor, 8880) + endpoint.listen(factory) + + print("Will run on http://localhost:8880") + + reactor.run() diff --git a/python/ql/test/library-tests/frameworks/twisted/routing_test.py b/python/ql/test/library-tests/frameworks/twisted/routing_test.py new file mode 100644 index 000000000000..bcbcc6521afc --- /dev/null +++ b/python/ql/test/library-tests/frameworks/twisted/routing_test.py @@ -0,0 +1,47 @@ +from twisted.web.server import Site, Request +from twisted.web.resource import Resource +from twisted.internet import reactor, endpoints + + +root = Resource() + + +class Foo(Resource): + def render(self, request: Request): # $ requestHandler + print(f"{request.content=}") + print(f"{request.cookies=}") + print(f"{request.received_cookies=}") + return b"I am Foo" # $ HttpResponse + + +root.putChild(b"foo", Foo()) + + +class Child(Resource): + def __init__(self, name): + self.name = name.decode("utf-8") + + def render_GET(self, request): # $ requestHandler + return f"Hi, I'm child '{self.name}'".encode("utf-8") # $ HttpResponse + + +class Parent(Resource): + def getChild(self, path, request): # $ requestHandler + print(path, type(path)) + return Child(path) + + def render_GET(self, request): # $ requestHandler + return b"Hi, I'm parent" # $ HttpResponse + + +root.putChild(b"parent", Parent()) + + +if __name__ == "__main__": + factory = Site(root) + endpoint = endpoints.TCP4ServerEndpoint(reactor, 8880) + endpoint.listen(factory) + + print("Will run on http://localhost:8880") + + reactor.run() diff --git a/python/ql/test/library-tests/frameworks/twisted/taint_test.py b/python/ql/test/library-tests/frameworks/twisted/taint_test.py new file mode 100644 index 000000000000..cdc6592f90e4 --- /dev/null +++ b/python/ql/test/library-tests/frameworks/twisted/taint_test.py @@ -0,0 +1,70 @@ +from twisted.web.resource import Resource +from twisted.web.server import Request + +class MyTaintTest(Resource): + def getChild(self, path, request): # $ requestHandler + ensure_tainted(path, request) # $ tainted + + def render(self, request): # $ requestHandler + ensure_tainted(request) # $ tainted + + def render_GET(self, request: Request): # $ requestHandler + # see https://twistedmatrix.com/documents/21.2.0/api/twisted.web.server.Request.html + ensure_tainted( + request, # $ tainted + + request.uri, # $ tainted + request.path, # $ tainted + request.prepath, # $ tainted + request.postpath, # $ tainted + + # file-like + request.content, # $ tainted + request.content.read(), # $ MISSING: tainted + + # Dict[bytes, List[bytes]] (for query args) + request.args, # $ tainted + request.args[b"key"], # $ tainted + request.args[b"key"][0], # $ tainted + request.args.get(b"key"), # $ tainted + request.args.get(b"key")[0], # $ tainted + + request.received_cookies, # $ tainted + request.received_cookies["key"], # $ tainted + request.received_cookies.get("key"), # $ tainted + request.getCookie(b"key"), # $ tainted + + # twisted.web.http_headers.Headers + # see https://twistedmatrix.com/documents/21.2.0/api/twisted.web.http_headers.Headers.html + request.requestHeaders, # $ tainted + request.requestHeaders.getRawHeaders("key"), # $ MISSING: tainted + request.requestHeaders.getRawHeaders("key")[0], # $ MISSING: tainted + request.requestHeaders.getAllRawHeaders(), # $ MISSING: tainted + list(request.requestHeaders.getAllRawHeaders()), # $ MISSING: tainted + + request.getHeader("key"), # $ tainted + request.getAllHeaders(), # $ tainted + request.getAllHeaders()["key"], # $ tainted + + request.user, # $ tainted + request.getUser(), # $ tainted + + request.password, # $ tainted + request.getPassword(), # $ tainted + + request.host, # $ tainted + request.getHost(), # $ tainted + request.getRequestHostname(), # $ tainted + ) + + # technically user-controlled, but unlikely to lead to vulnerabilities. + ensure_not_tainted( + request.method, + ) + + # not tainted at all + ensure_not_tainted( + # outgoing things + request.cookies, + request.responseHeaders, + )