OwlCyberSecurity - MANAGER
Edit File: cache.py
## # Copyright (c) 2008-2017 Apple Inc. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ## from twext.python.log import Logger from txweb2.dav.util import allDataFromStream from txweb2.http import Response from txweb2.iweb import IResource from txweb2.stream import MemoryStream from twisted.internet.defer import succeed, inlineCallbacks, returnValue from twistedcaldav.config import config from twistedcaldav.memcachepool import CachePoolUserMixIn, defaultCachePool from txdav.idav import IStoreNotifierFactory, IStoreNotifier from zope.interface import implements import cPickle import hashlib import urllib import uuid """ The basic principals of the PROPFIND cache are this: (1) In RootResource.locateChild we "intercept" request processing at a very early stage (before traversing the resource hierarchy for the request URI). If the request is a PROPFIND we check to see whether a cache entry exists and if so immediately return that. If no cache entry exists, normal PROPFIND processing occurs. (2) The PropfindCacheMixin class is mixed into calendar/address book homes. That causes all valid PROPFIND responses to be cached, and also provides a cache invalidation api to allow signaling of changes that need to invalidate the cache. The main and child resources need to cause that api to be called when appropriate changes occur. (3) The response cache entries consist of a key, derived from the request only, and a value. The value contains the set of tokens in effect at the time the entry was cached, together with the response that was cached. The tokens are: - principalToken - a token for the authenticated user's principal - directoryToken - a hash of that principal's directory record - uriToken - a token for the request uri - childTokens - tokens for any child resources the request uri depends on (for depth:1) The current principalToken, uriToken and childTokens values are themselves stored in the cache using the key prefix 'cacheToken:'. (4) When a request is being checked in the cache, the response cache entry key is first computed and any value extracted. The tokens in the value are then checked against the current set of tokens in the cache. If there is any mismatch between tokens, the cache entry is considered invalid and the cached response is not returned. If everything matches up, the cached response is returned to the caller and ultimately sent directly back to the client. (5) Because of shared calendars/address books that can affect the calendar/address book homes of several different users at once, we need to keep track of the separate childTokens for each child resource. The tokens for shared resources are keyed of the sharer's uri, so sharee's homes use that token. That way a single token for all shared instances is used and changed just once. (6) Principals and directory records need to be included as tokens to take account of variations in access control based on who is making the request (including proxy state changes etc). """ class DisabledCacheNotifier(object): def __init__(self, *args, **kwargs): pass def changed(self, url=None): return succeed(None) class DisabledCache(object): def getResponseForRequest(self, request): return succeed(None) def cacheResponseForRequest(self, request, response): return succeed(response) class URINotFoundException(Exception): def __init__(self, uri): self.uri = uri def __repr__(self): return "%s: Could not find URI %r" % ( self.__class__.__name__, self.uri) class MemcacheChangeNotifier(CachePoolUserMixIn): """ A change notifier used by resources (not store objects). """ log = Logger() def __init__(self, resource, cachePool=None, cacheHandle="Default"): self._resource = resource self._cachePool = cachePool self._cachePoolHandle = cacheHandle def _newCacheToken(self): return str(uuid.uuid4()) def changed(self, url=None): """ Change the cache token for a resource @param url: the url whose token is being changed, or C{None} to determine the URL from the supplied resource @type: L{str} return: A L{Deferred} that fires when the token has been changed. """ # For shared resources we use the owner URL as the cache key if url is None: url = self._resource.url() self.log.debug("Changing Cache Token for {url}", url=url) return self.getCachePool().set( 'cacheToken:%s' % (url,), self._newCacheToken(), expireTime=config.ResponseCacheTimeout * 60) class MemcacheURLPatternChangeNotifier(CachePoolUserMixIn): """ A change notifier used to target arbitrary tokens. """ log = Logger() def __init__(self, urlPattern, cachePool=None, cacheHandle="Default"): self._urlPattern = urlPattern self._cachePool = cachePool self._cachePoolHandle = cacheHandle def _newCacheToken(self): return str(uuid.uuid4()) def changed(self, token): """ Change the cache token for a resource return: A L{Deferred} that fires when the token has been changed. """ url = self._urlPattern.format(token=token) self.log.debug("Changing Cache Token for {url}", url=url) return self.getCachePool().set( 'cacheToken:{url}'.format(url=url), self._newCacheToken(), expireTime=config.ResponseCacheTimeout * 60, ) class BaseResponseCache(object): """ A base class which provides some common operations """ log = Logger() def _principalURI(self, principal): return principal.principalURL() if principal is not None else "unauthenticated" def _uriNotFound(self, f, uri): f.trap(AttributeError) raise URINotFoundException(uri) def _getRecordForURI(self, uri, request): """ Return the directory record for the specified principal uri. """ def _getRecord(resrc): if hasattr(resrc, 'record'): return resrc.record try: return request.locateResource(uri).addCallback( _getRecord).addErrback(self._uriNotFound, uri) except AssertionError: raise URINotFoundException(uri) @inlineCallbacks def _canonicalizeURIForRequest(self, uri, request): """ Always use canonicalized forms of the URIs for caching (i.e. __uids__ paths). Do this without calling locateResource which may cause a query on the store. """ uribits = uri.split("/") if len(uribits) > 1 and uribits[1] in ("principals", "calendars", "addressbooks"): if uribits[2] == "__uids__": returnValue(uri) else: recordType = uribits[2] recordName = uribits[3] directory = request.site.resource.getDirectory() record = yield directory.recordWithShortName( directory.oldNameToRecordType(recordType), recordName ) if record is not None: uribits[2] = "__uids__" uribits[3] = record.uid.encode("utf-8") returnValue("/".join(uribits)) # Fall back to the locateResource approach try: resrc = yield request.locateResource(uri) returnValue(resrc.url()) except AssertionError: raise URINotFoundException(uri) def _getURIs(self, request): """ Get principal and resource URIs from the request. """ def _getSecondURI(rURI): return self._canonicalizeURIForRequest( self._principalURI(request.authnUser), request).addCallback(lambda pURI: (pURI, rURI)) d = self._canonicalizeURIForRequest(request.uri, request) d.addCallback(_getSecondURI) return d @inlineCallbacks def _requestKey(self, request): """ Get a key for this request. This depends on the method, Depth: header, authn user principal, request uri and a hash of the request body (the body being normalized for property order). """ requestBody = (yield allDataFromStream(request.stream)) if requestBody is not None: # Give it back to the request so it can be read again request.stream = MemoryStream(requestBody) request.stream.doStartReading = None # Normalize the property order by doing a "dumb" sort on lines requestLines = requestBody.splitlines() requestLines.sort() requestBody = "\n".join(requestLines) request.cacheKey = (request.method, self._principalURI(request.authnUser), request.uri, request.headers.getHeader('depth'), hash(requestBody)) returnValue(request.cacheKey) def _getResponseBody(self, key, response): d1 = allDataFromStream(response.stream) d1.addCallback(lambda responseBody: (key, responseBody)) return d1 class MemcacheResponseCache(BaseResponseCache, CachePoolUserMixIn): def __init__(self, docroot, cachePool=None): self._docroot = docroot self._cachePool = cachePool @inlineCallbacks def _tokenForURI(self, uri, cachePoolHandle=None): """ Get the current token for a particular URI. """ if isinstance(uri, unicode): uri = uri.encode("utf-8") if cachePoolHandle: result = (yield defaultCachePool(cachePoolHandle).get('cacheToken:%s' % (uri,))) else: result = (yield self.getCachePool().get('cacheToken:%s' % (uri,))) if result is not None: _ignore_flags, result = result returnValue(result) @inlineCallbacks def _tokenForRecord(self, uri, request): """ Get the current token for a particular principal URI's directory record. """ record = (yield self._getRecordForURI(uri, request)) returnValue(record.cacheToken()) @inlineCallbacks def _tokensForChildren(self, rURI, request): """ Create a dict of child resource tokens for any "recorded" during this request in the childCacheURIs attribute. """ if hasattr(request, "childCacheURIs"): tokens = dict([(uri, (yield self._tokenForURI(uri)),) for uri in request.childCacheURIs]) returnValue(tokens) else: returnValue({}) @inlineCallbacks def _getTokens(self, request): """ Tokens are a principal token, directory record token, resource token and list of child resource tokens. A change to any one of those will cause cache invalidation. """ tokens = [] pURI, rURI = (yield self._getURIs(request)) tokens.append((yield self._tokenForURI(pURI, "PrincipalToken"))) tokens.append((yield self._tokenForRecord(pURI, request))) tokens.append((yield self._tokenForURI(rURI))) tokens.append((yield self._tokensForChildren(rURI, request))) returnValue(tokens) @inlineCallbacks def _hashedRequestKey(self, request): """ Make a key for a response cache entry. This depends on various request parameters (see _requestKey for details). """ oldkey = (yield self._requestKey(request)) request.cacheKey = key = hashlib.md5( ':'.join([str(t) for t in oldkey])).hexdigest() self.log.debug("hashing key for get: {old!r} to {new!r}", old=oldkey, new=key) returnValue(request.cacheKey) @inlineCallbacks def getResponseForRequest(self, request): """ Try to match a request and a response cache entry. We first get the request key and match that, then pull the cache entry and decompose it into tokens and response. We then compare the cached tokens with their current values. If all match, we can return the cached response data. """ try: key = (yield self._hashedRequestKey(request)) self.log.debug("Checking cache for: {key!r}", key=key) _ignore_flags, value = (yield self.getCachePool().get(key)) if value is None: self.log.debug("Not in cache: {key!r}", key=key) returnValue(None) (principalToken, directoryToken, uriToken, childTokens, (code, headers, body)) = cPickle.loads(value) self.log.debug( "Found in cache: {key!r} = {value!r}", key=key, value=( principalToken, directoryToken, uriToken, childTokens, ) ) currentTokens = (yield self._getTokens(request)) if currentTokens[0] != principalToken: self.log.debug( "Principal token doesn't match for {key!r}: {currentToken!r} != {principalToken!r}", key=request.cacheKey, currentToken=currentTokens[0], principalToken=principalToken, ) returnValue(None) if currentTokens[1] != directoryToken: self.log.debug( "Directory Record Token doesn't match for {key!r}: {currentToken!r} != {directoryToken!r}", key=request.cacheKey, currentToken=currentTokens[1], directoryToken=directoryToken, ) returnValue(None) if currentTokens[2] != uriToken: self.log.debug( "URI token doesn't match for {key!r}: {currentToken!r} != {uriToken!r}", key=request.cacheKey, currentToken=currentTokens[2], uriToken=uriToken, ) returnValue(None) for childuri, token in childTokens.items(): currentToken = (yield self._tokenForURI(childuri)) if currentToken != token: self.log.debug( "Child {uri} token doesn't match for {key!r}: {currentToken!r} != {token!r}", uri=childuri, key=request.cacheKey, currentToken=currentToken, token=token, ) returnValue(None) self.log.debug("Response cache matched") r = Response(code, stream=MemoryStream(body)) for key, value in headers.iteritems(): r.headers.setRawHeaders(key, value) returnValue(r) except URINotFoundException, e: self.log.debug("Could not locate URI: {e!r}", e=e) returnValue(None) @inlineCallbacks def cacheResponseForRequest(self, request, response): """ Given a request and its response, make a response cache entry that encodes the response and various cache tokens. Later, when getResponseForRequest is called we retrieve this entry and compare the old cache tokens with the current ones. If any have changed the response cache entry is removed. """ try: if hasattr(request, 'cacheKey'): key = request.cacheKey else: key = (yield self._hashedRequestKey(request)) key, responseBody = (yield self._getResponseBody(key, response)) response.headers.removeHeader('date') response.stream = MemoryStream(responseBody) pToken, dToken, uToken, cTokens = (yield self._getTokens(request)) cacheEntry = cPickle.dumps(( pToken, dToken, uToken, cTokens, ( response.code, dict(list(response.headers.getAllRawHeaders())), responseBody ) )) self.log.debug( "Adding to cache: {key!r} = tokens - {tokens!r}", key=key, tokens=( pToken, dToken, uToken, cTokens, ) ) yield self.getCachePool().set( key, cacheEntry, expireTime=config.ResponseCacheTimeout * 60 ) except URINotFoundException, e: self.log.debug("Could not locate URI: {e!r}", e=e) returnValue(response) class _CachedResponseResource(object): implements(IResource) def __init__(self, response): self._response = response def renderHTTP(self, request): if not hasattr(request, "extendedLogItems"): request.extendedLogItems = {} request.extendedLogItems["cached"] = "1" return self._response def locateChild(self, request, segments): return self, [] class PropfindCacheMixin(object): """ A mixin that causes a resource's PROPFIND response to be cached. It also adds an api to change the resource's uriToken - this must be used whenever something changes to cause the cache to be invalidated. """ @inlineCallbacks def renderHTTP(self, request): response = (yield super(PropfindCacheMixin, self).renderHTTP(request)) if request.method == 'PROPFIND': resource = (yield request.locateResource("/")) # responseCache might not be present during unit tests if hasattr(resource, "responseCache"): yield resource.responseCache.cacheResponseForRequest(request, response) returnValue(response) class CacheStoreNotifierFactory(CachePoolUserMixIn): """ A notifier factory specifically for store object notifications. This is handed of to the data store object, which calls .newNotifier() each time a home object is created and gives the new notifier to the home. That object is also inherited by home child objects created from the home. This object uses a memcachepool for setting new cache tokens. """ log = Logger() implements(IStoreNotifierFactory) def newNotifier(self, storeObject): return CacheStoreNotifier(self, storeObject) def _newCacheToken(self): return str(uuid.uuid4()) def changed(self, cache_id): """ Change the cache token for a store object. return: A L{Deferred} that fires when the token has been changed. """ self.log.debug("Changing Cache Token for {id!r}", id=cache_id) return self.getCachePool().set( 'cacheToken:%s' % (cache_id,), self._newCacheToken(), expireTime=config.ResponseCacheTimeout * 60) class CacheStoreNotifier(object): """ A notifier for store objects. Store objects will call .notify() when they change. """ implements(IStoreNotifier) def __init__(self, notifierFactory, storeObject): self._notifierFactory = notifierFactory self._storeObject = storeObject @inlineCallbacks def notify(self): """ We need to convert the store object notifier ID into a URI, since the cache uses URIs. Note that for a home child resource we also need to change the token for the home as the sync token on the home changes implicitly without a direct notification. """ prefix, id = self._storeObject.notifierID() if prefix == "CalDAV": uri = "/calendars/__uids__/%s/" % (id,) elif prefix == "CardDAV": uri = "/addressbooks/__uids__/%s/" % (id,) uris = (urllib.quote(uri),) # Also add home if needed if "/" in id: id = id.split("/")[0] if prefix == "CalDAV": uri = "/calendars/__uids__/%s/" % (id,) elif prefix == "CardDAV": uri = "/addressbooks/__uids__/%s/" % (id,) uris += (urllib.quote(uri),) for uri in uris: yield self._notifierFactory.changed(uri) def clone(self, storeObject): return self.__class__(self._notifierFactory, storeObject)