OwlCyberSecurity - MANAGER
Edit File: http.py
# -*- test-case-name: txweb2.test.test_http -*- ## # Copyright (c) 2001-2004 Twisted Matrix Laboratories. # Copyright (c) 2010-2017 Apple Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # ## """HyperText Transfer Protocol implementation. The second coming. Maintainer: James Y Knight """ # import traceback; log.info(''.join(traceback.format_stack())) import json import time from twisted.internet import interfaces, error from twisted.python import components from twisted.web.template import Element, XMLString, renderer, flattenString from zope.interface import implements from twext.python.log import Logger from txweb2 import responsecode from txweb2 import http_headers from txweb2 import iweb from txweb2 import stream from txweb2.stream import IByteStream, readAndDiscard log = Logger() defaultPortForScheme = {'http': 80, 'https': 443, 'ftp': 21} def splitHostPort(scheme, hostport): """Split the host in "host:port" format into host and port fields. If port was not specified, use the default for the given scheme, if known. Returns a tuple of (hostname, portnumber).""" # Split hostport into host and port hostport = hostport.split(':', 1) try: if len(hostport) == 2: return hostport[0], int(hostport[1]) except ValueError: pass return hostport[0], defaultPortForScheme.get(scheme, 0) def parseVersion(strversion): """Parse version strings of the form Protocol '/' Major '.' Minor. E.g. 'HTTP/1.1'. Returns (protocol, major, minor). Will raise ValueError on bad syntax.""" proto, strversion = strversion.split('/') major, minor = strversion.split('.') major, minor = int(major), int(minor) if major < 0 or minor < 0: raise ValueError("negative number") return (proto.lower(), major, minor) class HTTPError(Exception): def __init__(self, codeOrResponse): """An Exception for propagating HTTP Error Responses. @param codeOrResponse: The numeric HTTP code or a complete http.Response object. @type codeOrResponse: C{int} or L{http.Response} """ self.response = iweb.IResponse(codeOrResponse) Exception.__init__(self, str(self.response)) def __repr__(self): return "<%s %s>" % (self.__class__.__name__, self.response) class Response(object): """An object representing an HTTP Response to be sent to the client. """ implements(iweb.IResponse) code = responsecode.OK headers = None stream = None def __init__(self, code=None, headers=None, stream=None): """ @param code: The HTTP status code for this Response @type code: C{int} @param headers: Headers to be sent to the client. @type headers: C{dict}, L{txweb2.http_headers.Headers}, or C{None} @param stream: Content body to send to the HTTP client @type stream: L{txweb2.stream.IByteStream} """ if code is not None: self.code = int(code) if headers is not None: if isinstance(headers, dict): headers = http_headers.Headers(headers) self.headers = headers else: self.headers = http_headers.Headers() if stream is not None: self.stream = IByteStream(stream) def __repr__(self): if self.stream is None: streamlen = None else: streamlen = self.stream.length return "<%s.%s code=%d, streamlen=%s>" % (self.__module__, self.__class__.__name__, self.code, streamlen) class StatusResponseElement(Element): """ Render the HTML for a L{StatusResponse} """ loader = XMLString("""<html xmlns:t="http://twistedmatrix.com/ns/twisted.web.template/0.1" t:render="response"><head><title><t:slot name="title" /></title></head><body><h1><t:slot name="title" /></h1><p><t:slot name="description" /></p></body></html>""") def __init__(self, title, description): super(StatusResponseElement, self).__init__() self.title = title self.description = description @renderer def response(self, request, tag): """ Top-level renderer. """ return tag.fillSlots(title=self.title, description=self.description) class StatusResponse (Response): """ A L{Response} object which simply contains a status code and a description of what happened. """ def __init__(self, code, description, title=None): """ @param code: a response code in L{responsecode.RESPONSES}. @param description: a string description. @param title: the message title. If not specified or C{None}, defaults to C{responsecode.RESPONSES[code]}. """ if title is None: title = responsecode.RESPONSES[code] element = StatusResponseElement(title, description) out = [] flattenString(None, element).addCallback(out.append) mime_params = {"charset": "utf-8"} super(StatusResponse, self).__init__(code=code, stream=out[0]) self.headers.setHeader( "content-type", http_headers.MimeType("text", "html", mime_params) ) self.description = description def __repr__(self): return "<%s %s %s>" % (self.__class__.__name__, self.code, self.description) class RedirectResponse (StatusResponse): """ A L{Response} object that contains a redirect to another network location. """ def __init__(self, location, temporary=False): """ @param location: the URI to redirect to. @param temporary: whether it's a temporary redirect or permanent """ code = responsecode.TEMPORARY_REDIRECT if temporary else responsecode.MOVED_PERMANENTLY super(RedirectResponse, self).__init__( code, "Document moved to %s." % (location,) ) self.headers.setHeader("location", location) def NotModifiedResponse(oldResponse=None): if oldResponse is not None: headers = http_headers.Headers() for header in ( # Required from sec 10.3.5: 'date', 'etag', 'content-location', 'expires', 'cache-control', 'vary', # Others: 'server', 'proxy-authenticate', 'www-authenticate', 'warning' ): value = oldResponse.headers.getRawHeaders(header) if value is not None: headers.setRawHeaders(header, value) else: headers = None return Response(code=responsecode.NOT_MODIFIED, headers=headers) def checkPreconditions(request, response=None, entityExists=True, etag=None, lastModified=None): """Check to see if this request passes the conditional checks specified by the client. May raise an HTTPError with result codes L{NOT_MODIFIED} or L{PRECONDITION_FAILED}, as appropriate. This function is called automatically as an output filter for GET and HEAD requests. With GET/HEAD, it is not important for the precondition check to occur before doing the action, as the method is non-destructive. However, if you are implementing other request methods, like PUT for your resource, you will need to call this after determining the etag and last-modified time of the existing resource but before actually doing the requested action. In that case, This examines the appropriate request headers for conditionals, (If-Modified-Since, If-Unmodified-Since, If-Match, If-None-Match, or If-Range), compares with the etag and last and and then sets the response code as necessary. @param response: This should be provided for GET/HEAD methods. If it is specified, the etag and lastModified arguments will be retrieved automatically from the response headers and shouldn't be separately specified. Not providing the response with a GET request may cause the emitted "Not Modified" responses to be non-conformant. @param entityExists: Set to False if the entity in question doesn't yet exist. Necessary for PUT support with 'If-None-Match: *'. @param etag: The etag of the resource to check against, or None. @param lastModified: The last modified date of the resource to check against, or None. @raise: HTTPError: Raised when the preconditions fail, in order to abort processing and emit an error page. """ if response: assert etag is None and lastModified is None # if the code is some sort of error code, don't do anything if not ((response.code >= 200 and response.code <= 299) or response.code == responsecode.PRECONDITION_FAILED): return False etag = response.headers.getHeader("etag") lastModified = response.headers.getHeader("last-modified") def matchETag(tags, allowWeak): if entityExists and '*' in tags: return True if etag is None: return False return ((allowWeak or not etag.weak) and ([etagmatch for etagmatch in tags if etag.match(etagmatch, strongCompare=not allowWeak)])) # First check if-match/if-unmodified-since # If either one fails, we return PRECONDITION_FAILED match = request.headers.getHeader("if-match") if match: if not matchETag(match, False): raise HTTPError(StatusResponse(responsecode.PRECONDITION_FAILED, "Requested resource does not have a matching ETag.")) unmod_since = request.headers.getHeader("if-unmodified-since") if unmod_since: if not lastModified or lastModified > unmod_since: raise HTTPError(StatusResponse(responsecode.PRECONDITION_FAILED, "Requested resource has changed.")) # Now check if-none-match/if-modified-since. # This bit is tricky, because of the requirements when both IMS and INM # are present. In that case, you can't return a failure code # unless *both* checks think it failed. # Also, if the INM check succeeds, ignore IMS, because INM is treated # as more reliable. # I hope I got the logic right here...the RFC is quite poorly written # in this area. Someone might want to verify the testcase against # RFC wording. # If IMS header is later than current time, ignore it. notModified = None ims = request.headers.getHeader('if-modified-since') if ims: notModified = (ims < time.time() and lastModified and lastModified <= ims) inm = request.headers.getHeader("if-none-match") if inm: if request.method in ("HEAD", "GET"): # If it's a range request, don't allow a weak ETag, as that # would break. canBeWeak = not request.headers.hasHeader('Range') if notModified != False and matchETag(inm, canBeWeak): raise HTTPError(NotModifiedResponse(response)) else: if notModified != False and matchETag(inm, False): raise HTTPError(StatusResponse(responsecode.PRECONDITION_FAILED, "Requested resource has a matching ETag.")) else: if notModified == True: raise HTTPError(NotModifiedResponse(response)) def checkIfRange(request, response): """Checks for the If-Range header, and if it exists, checks if the test passes. Returns true if the server should return partial data.""" ifrange = request.headers.getHeader("if-range") if ifrange is None: return True if isinstance(ifrange, http_headers.ETag): return ifrange.match(response.headers.getHeader("etag"), strongCompare=True) else: return ifrange == response.headers.getHeader("last-modified") class _NotifyingProducerStream(stream.ProducerStream): doStartReading = None def __init__(self, length=None, doStartReading=None): stream.ProducerStream.__init__(self, length=length) self.doStartReading = doStartReading def read(self): if self.doStartReading is not None: doStartReading = self.doStartReading self.doStartReading = None doStartReading() return stream.ProducerStream.read(self) def write(self, data): self.doStartReading = None stream.ProducerStream.write(self, data) def finish(self): self.doStartReading = None stream.ProducerStream.finish(self) # response codes that must have empty bodies NO_BODY_CODES = (responsecode.NO_CONTENT, responsecode.NOT_MODIFIED) class Request(object): """A HTTP request. Subclasses should override the process() method to determine how the request will be processed. @ivar method: The HTTP method that was used. @ivar uri: The full URI that was requested (includes arguments). @ivar headers: All received headers @ivar clientproto: client HTTP version @ivar stream: incoming data stream. """ implements(iweb.IRequest, interfaces.IConsumer) known_expects = ('100-continue',) def __init__(self, chanRequest, command, path, version, contentLength, headers): """ @param chanRequest: the channel request we're associated with. """ self.chanRequest = chanRequest self.method = command self.uri = path self.clientproto = version self.headers = headers if '100-continue' in self.headers.getHeader('expect', ()): doStartReading = self._sendContinue else: doStartReading = None self.stream = _NotifyingProducerStream(contentLength, doStartReading) self.stream.registerProducer(self.chanRequest, True) def checkExpect(self): """Ensure there are no expectations that cannot be met. Checks Expect header against self.known_expects.""" expects = self.headers.getHeader('expect', ()) for expect in expects: if expect not in self.known_expects: raise HTTPError(responsecode.EXPECTATION_FAILED) def process(self): """Called by channel to let you process the request. Can be overridden by a subclass to do something useful.""" pass def handleContentChunk(self, data): """Callback from channel when a piece of data has been received. Puts the data in .stream""" self.stream.write(data) def handleContentComplete(self): """Callback from channel when all data has been received. """ self.stream.unregisterProducer() self.stream.finish() def connectionLost(self, reason): """connection was lost""" pass def __repr__(self): return '<%s %s %s>' % (self.method, self.uri, self.clientproto) def _sendContinue(self): self.chanRequest.writeIntermediateResponse(responsecode.CONTINUE) def _reallyFinished(self, x): """We are finished writing data.""" self.chanRequest.finish() def _finished(self, x): """ We are finished writing data. But we need to check that we have also finished reading all data as we might have sent a, for example, 401 response before we read any data. To make sure that the stream/producer sequencing works properly we need to discard the remaining data in the request. """ if self.stream.length != 0: return readAndDiscard(self.stream).addCallback(self._reallyFinished).addErrback(self._error) else: self._reallyFinished(x) def _error(self, reason): if reason.check(error.ConnectionLost): log.info("Request error: {msg}", msg=reason.getErrorMessage()) else: log.failure("Request error", reason) # Only bother with cleanup on errors other than lost connection. self.chanRequest.abortConnection() def writeResponse(self, response): """ Write a response. """ if self.stream.doStartReading is not None: # Expect: 100-continue was requested, but 100 response has not been # sent, and there's a possibility that data is still waiting to be # sent. # # Ideally this means the remote side will not send any data. # However, because of compatibility requirements, it might timeout, # and decide to do so anyways at the same time we're sending back # this response. Thus, the read state is unknown after this. # We must close the connection. self.chanRequest.channel.setReadPersistent(False) # Nothing more will be read self.chanRequest.allContentReceived() if response.code != responsecode.NOT_MODIFIED: # Not modified response is *special* and doesn't get a content-length. if response.stream is None: response.headers.setHeader('content-length', 0) elif response.stream.length is not None: response.headers.setHeader('content-length', response.stream.length) self.chanRequest.writeHeaders(response.code, response.headers) # if this is a "HEAD" request, or a special response code, # don't return any data. if self.method == "HEAD" or response.code in NO_BODY_CODES: if response.stream is not None: response.stream.close() self._finished(None) return d = stream.StreamProducer(response.stream).beginProducing(self.chanRequest) d.addCallback(self._finished).addErrback(self._error) class XMLResponse (Response): """ XML L{Response} object. Renders itself as an XML document. """ def __init__(self, code, element): """ @param xml_responses: an iterable of davxml.Response objects. """ Response.__init__(self, code, stream=element.toxml()) self.headers.setHeader("content-type", http_headers.MimeType("text", "xml")) class JSONResponse (Response): """ JSON L{Response} object. Renders itself as an JSON document. """ def __init__(self, code, jobj, contentType="application/json", pretty=False): """ @param jobj: a Python object that can be serialized to JSON. """ kwargs = {} if pretty: kwargs["indent"] = 2 kwargs["separators"] = (',', ':') Response.__init__(self, code, stream=json.dumps(jobj, **kwargs)) self.headers.setHeader("content-type", http_headers.MimeType(*contentType.split("/"))) components.registerAdapter(Response, int, iweb.IResponse) __all__ = ['HTTPError', 'NotModifiedResponse', 'Request', 'Response', 'StatusResponse', 'RedirectResponse', 'checkIfRange', 'checkPreconditions', 'defaultPortForScheme', 'parseVersion', 'splitHostPort', "XMLResponse", "JSONResponse"]