Skip to content
Snippets Groups Projects
Commit 0ed4089c authored by Kenneth Reitz's avatar Kenneth Reitz Committed by GitHub
Browse files

Merge pull request #308 from edmorley/fix-pip8-support

Switch pip-pop back to using pip 8
parents f09191ec 39f2efe2
No related branches found
No related tags found
No related merge requests found
Showing
with 21 additions and 1664 deletions
......@@ -18,6 +18,7 @@ from pip._vendor.requests import session
requests = session()
class Requirements(object):
def __init__(self, reqfile=None):
super(Requirements, self).__init__()
......@@ -31,13 +32,15 @@ class Requirements(object):
return '<Requirements \'{}\'>'.format(self.path)
def load(self, reqfile):
if not os.path.exists(reqfile):
raise ValueError('The given requirements file does not exist.')
finder = PackageFinder([], [], session=requests)
for requirement in parse_requirements(reqfile, finder=finder, session=requests):
if requirement.req:
if not getattr(requirement.req, 'name', None):
# Prior to pip 8.1.2 the attribute `name` did not exist.
requirement.req.name = requirement.req.project_name
self.requirements.append(requirement.req)
......@@ -48,24 +51,24 @@ class Requirements(object):
# Generate fresh packages.
other_reqs = (
[r.project_name for r in r1.requirements]
[r.name for r in r1.requirements]
if ignore_versions else r1.requirements
)
for req in r2.requirements:
r = req.project_name if ignore_versions else req
r = req.name if ignore_versions else req
if r not in other_reqs and r not in excludes:
results['fresh'].append(req)
# Generate stale packages.
other_reqs = (
[r.project_name for r in r2.requirements]
[r.name for r in r2.requirements]
if ignore_versions else r2.requirements
)
for req in r1.requirements:
r = req.project_name if ignore_versions else req
r = req.name if ignore_versions else req
if r not in other_reqs and r not in excludes:
results['stale'].append(req)
......@@ -73,11 +76,7 @@ class Requirements(object):
return results
def diff(r1, r2, include_fresh=False, include_stale=False, excludes=None):
include_versions = True if include_stale else False
excludes = excludes if len(excludes) else []
......@@ -92,12 +91,11 @@ def diff(r1, r2, include_fresh=False, include_stale=False, excludes=None):
if include_fresh:
for line in results['fresh']:
print(line.project_name if include_versions else line)
print(line.name if include_versions else line)
if include_stale:
for line in results['stale']:
print(line.project_name if include_versions else line)
print(line.name if include_versions else line)
def main():
......@@ -114,6 +112,5 @@ def main():
diff(**kwargs)
if __name__ == '__main__':
main()
......@@ -15,6 +15,7 @@ from pip._vendor.requests import session
requests = session()
class Requirements(object):
def __init__(self, reqfile=None):
super(Requirements, self).__init__()
......@@ -28,38 +29,31 @@ class Requirements(object):
return '<Requirements \'{}\'>'.format(self.path)
def load(self, reqfile):
if not os.path.exists(reqfile):
raise ValueError('The given requirements file does not exist.')
finder = PackageFinder([], [], session=requests)
for requirement in parse_requirements(reqfile, finder=finder, session=requests):
self.requirements.append(requirement)
if requirement.req:
if not getattr(requirement.req, 'name', None):
# Prior to pip 8.1.2 the attribute `name` did not exist.
requirement.req.name = requirement.req.project_name
self.requirements.append(requirement.req)
def grep(reqfile, packages, silent=False):
try:
r = Requirements(reqfile)
except ValueError:
if not silent:
print('There was a problem loading the given requirement file.')
exit(os.EX_NOINPUT)
for requirement in r.requirements:
if requirement.req:
if requirement.req.project_name in packages:
if not silent:
print('Package {} found!'.format(requirement.req.project_name))
exit(0)
for req in r.requirements:
if req.name in packages:
if not silent:
print('Package {} found!'.format(req.name))
exit(0)
if not silent:
print('Not found.')
......@@ -72,10 +66,8 @@ def main():
kwargs = {'reqfile': args['<reqfile>'], 'packages': args['<package>'], 'silent': args['-s']}
grep(**kwargs)
if __name__ == '__main__':
main()
#!/usr/bin/env python
from __future__ import absolute_import
import logging
import os
import optparse
import warnings
import sys
import re
from pip.exceptions import InstallationError, CommandError, PipError
from pip.utils import get_installed_distributions, get_prog
from pip.utils import deprecation
from pip.vcs import git, mercurial, subversion, bazaar # noqa
from pip.baseparser import ConfigOptionParser, UpdatingDefaultsHelpFormatter
from pip.commands import get_summaries, get_similar_commands
from pip.commands import commands_dict
from pip._vendor.requests.packages.urllib3.exceptions import (
InsecureRequestWarning,
)
# assignment for flake8 to be happy
# This fixes a peculiarity when importing via __import__ - as we are
# initialising the pip module, "from pip import cmdoptions" is recursive
# and appears not to work properly in that situation.
import pip.cmdoptions
cmdoptions = pip.cmdoptions
# The version as used in the setup.py and the docs conf.py
__version__ = "7.1.2"
logger = logging.getLogger(__name__)
# Hide the InsecureRequestWArning from urllib3
warnings.filterwarnings("ignore", category=InsecureRequestWarning)
def autocomplete():
"""Command and option completion for the main option parser (and options)
and its subcommands (and options).
Enable by sourcing one of the completion shell scripts (bash or zsh).
"""
# Don't complete if user hasn't sourced bash_completion file.
if 'PIP_AUTO_COMPLETE' not in os.environ:
return
cwords = os.environ['COMP_WORDS'].split()[1:]
cword = int(os.environ['COMP_CWORD'])
try:
current = cwords[cword - 1]
except IndexError:
current = ''
subcommands = [cmd for cmd, summary in get_summaries()]
options = []
# subcommand
try:
subcommand_name = [w for w in cwords if w in subcommands][0]
except IndexError:
subcommand_name = None
parser = create_main_parser()
# subcommand options
if subcommand_name:
# special case: 'help' subcommand has no options
if subcommand_name == 'help':
sys.exit(1)
# special case: list locally installed dists for uninstall command
if subcommand_name == 'uninstall' and not current.startswith('-'):
installed = []
lc = current.lower()
for dist in get_installed_distributions(local_only=True):
if dist.key.startswith(lc) and dist.key not in cwords[1:]:
installed.append(dist.key)
# if there are no dists installed, fall back to option completion
if installed:
for dist in installed:
print(dist)
sys.exit(1)
subcommand = commands_dict[subcommand_name]()
options += [(opt.get_opt_string(), opt.nargs)
for opt in subcommand.parser.option_list_all
if opt.help != optparse.SUPPRESS_HELP]
# filter out previously specified options from available options
prev_opts = [x.split('=')[0] for x in cwords[1:cword - 1]]
options = [(x, v) for (x, v) in options if x not in prev_opts]
# filter options by current input
options = [(k, v) for k, v in options if k.startswith(current)]
for option in options:
opt_label = option[0]
# append '=' to options which require args
if option[1]:
opt_label += '='
print(opt_label)
else:
# show main parser options only when necessary
if current.startswith('-') or current.startswith('--'):
opts = [i.option_list for i in parser.option_groups]
opts.append(parser.option_list)
opts = (o for it in opts for o in it)
subcommands += [i.get_opt_string() for i in opts
if i.help != optparse.SUPPRESS_HELP]
print(' '.join([x for x in subcommands if x.startswith(current)]))
sys.exit(1)
def create_main_parser():
parser_kw = {
'usage': '\n%prog <command> [options]',
'add_help_option': False,
'formatter': UpdatingDefaultsHelpFormatter(),
'name': 'global',
'prog': get_prog(),
}
parser = ConfigOptionParser(**parser_kw)
parser.disable_interspersed_args()
pip_pkg_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
parser.version = 'pip %s from %s (python %s)' % (
__version__, pip_pkg_dir, sys.version[:3])
# add the general options
gen_opts = cmdoptions.make_option_group(cmdoptions.general_group, parser)
parser.add_option_group(gen_opts)
parser.main = True # so the help formatter knows
# create command listing for description
command_summaries = get_summaries()
description = [''] + ['%-27s %s' % (i, j) for i, j in command_summaries]
parser.description = '\n'.join(description)
return parser
def parseopts(args):
parser = create_main_parser()
# Note: parser calls disable_interspersed_args(), so the result of this
# call is to split the initial args into the general options before the
# subcommand and everything else.
# For example:
# args: ['--timeout=5', 'install', '--user', 'INITools']
# general_options: ['--timeout==5']
# args_else: ['install', '--user', 'INITools']
general_options, args_else = parser.parse_args(args)
# --version
if general_options.version:
sys.stdout.write(parser.version)
sys.stdout.write(os.linesep)
sys.exit()
# pip || pip help -> print_help()
if not args_else or (args_else[0] == 'help' and len(args_else) == 1):
parser.print_help()
sys.exit()
# the subcommand name
cmd_name = args_else[0]
if cmd_name not in commands_dict:
guess = get_similar_commands(cmd_name)
msg = ['unknown command "%s"' % cmd_name]
if guess:
msg.append('maybe you meant "%s"' % guess)
raise CommandError(' - '.join(msg))
# all the args without the subcommand
cmd_args = args[:]
cmd_args.remove(cmd_name)
return cmd_name, cmd_args
def check_isolated(args):
isolated = False
if "--isolated" in args:
isolated = True
return isolated
def main(args=None):
if args is None:
args = sys.argv[1:]
# Enable our Deprecation Warnings
for deprecation_warning in deprecation.DEPRECATIONS:
warnings.simplefilter("default", deprecation_warning)
# Configure our deprecation warnings to be sent through loggers
deprecation.install_warning_logger()
autocomplete()
try:
cmd_name, cmd_args = parseopts(args)
except PipError as exc:
sys.stderr.write("ERROR: %s" % exc)
sys.stderr.write(os.linesep)
sys.exit(1)
command = commands_dict[cmd_name](isolated=check_isolated(cmd_args))
return command.main(cmd_args)
# ###########################################################
# # Writing freeze files
class FrozenRequirement(object):
def __init__(self, name, req, editable, comments=()):
self.name = name
self.req = req
self.editable = editable
self.comments = comments
_rev_re = re.compile(r'-r(\d+)$')
_date_re = re.compile(r'-(20\d\d\d\d\d\d)$')
@classmethod
def from_dist(cls, dist, dependency_links, find_tags=False):
location = os.path.normcase(os.path.abspath(dist.location))
comments = []
from pip.vcs import vcs, get_src_requirement
if vcs.get_backend_name(location):
editable = True
try:
req = get_src_requirement(dist, location, find_tags)
except InstallationError as exc:
logger.warning(
"Error when trying to get requirement for VCS system %s, "
"falling back to uneditable format", exc
)
req = None
if req is None:
logger.warning(
'Could not determine repository location of %s', location
)
comments.append(
'## !! Could not determine repository location'
)
req = dist.as_requirement()
editable = False
else:
editable = False
req = dist.as_requirement()
specs = req.specs
assert len(specs) == 1 and specs[0][0] in ["==", "==="], \
'Expected 1 spec with == or ===; specs = %r; dist = %r' % \
(specs, dist)
version = specs[0][1]
ver_match = cls._rev_re.search(version)
date_match = cls._date_re.search(version)
if ver_match or date_match:
svn_backend = vcs.get_backend('svn')
if svn_backend:
svn_location = svn_backend().get_location(
dist,
dependency_links,
)
if not svn_location:
logger.warning(
'Warning: cannot find svn location for %s', req)
comments.append(
'## FIXME: could not find svn URL in dependency_links '
'for this package:'
)
else:
comments.append(
'# Installing as editable to satisfy requirement %s:' %
req
)
if ver_match:
rev = ver_match.group(1)
else:
rev = '{%s}' % date_match.group(1)
editable = True
req = '%s@%s#egg=%s' % (
svn_location,
rev,
cls.egg_name(dist)
)
return cls(dist.project_name, req, editable, comments)
@staticmethod
def egg_name(dist):
name = dist.egg_name()
match = re.search(r'-py\d\.\d$', name)
if match:
name = name[:match.start()]
return name
def __str__(self):
req = self.req
if self.editable:
req = '-e %s' % req
return '\n'.join(list(self.comments) + [str(req)]) + '\n'
if __name__ == '__main__':
sys.exit(main())
from __future__ import absolute_import
import os
import sys
# If we are running from a wheel, add the wheel to sys.path
# This allows the usage python pip-*.whl/pip install pip-*.whl
if __package__ == '':
# __file__ is pip-*.whl/pip/__main__.py
# first dirname call strips of '/__main__.py', second strips off '/pip'
# Resulting path is the name of the wheel itself
# Add that to sys.path so we can import pip
path = os.path.dirname(os.path.dirname(__file__))
sys.path.insert(0, path)
import pip # noqa
if __name__ == '__main__':
sys.exit(pip.main())
Policy
======
Vendored libraries should not be modified except as required to actually
successfully vendor them.
Modifications
=============
* html5lib has been modified to import six from pip._vendor
* pkg_resources has been modified to import _markerlib from pip._vendor
* markerlib has been modified to import its API from pip._vendor
* CacheControl has been modified to import it's dependencies from pip._vendor
* progress has been modified to not use unicode literals for support for Python 3.2
_markerlib and pkg_resources
============================
_markerlib and pkg_resources has been pulled in from setuptools 18.2
Note to Downstream Distributors
===============================
Libraries are vendored/bundled inside of this directory in order to prevent
end users from needing to manually install packages if they accidently remove
something that pip depends on.
All bundled packages exist in the ``pip._vendor`` namespace, and the versions
(fetched from PyPI) that we use are located in ``vendor.txt``. If you wish
to debundle these you can do so by either deleting everything in
``pip/_vendor`` **except** for ``pip/_vendor/__init__.py`` or by running
``PIP_NO_VENDOR_FOR_DOWNSTREAM=1 setup.py install``. No other changes should
be required as the ``pip/_vendor/__init__.py`` file will alias the "real"
names (such as ``import six``) to the bundled names (such as
``import pip._vendor.six``) automatically. Alternatively if you delete the
entire ``pip._vendor`` you will need to adjust imports that import from those
locations.
"""
pip._vendor is for vendoring dependencies of pip to prevent needing pip to
depend on something external.
Files inside of pip._vendor should be considered immutable and should only be
updated to versions from upstream.
"""
from __future__ import absolute_import
import glob
import os.path
import sys
# Downstream redistributors which have debundled our dependencies should also
# patch this value to be true. This will trigger the additional patching
# to cause things like "six" to be available as pip.
DEBUNDLED = False
# By default, look in this directory for a bunch of .whl files which we will
# add to the beginning of sys.path before attempting to import anything. This
# is done to support downstream re-distributors like Debian and Fedora who
# wish to create their own Wheels for our dependencies to aid in debundling.
WHEEL_DIR = os.path.abspath(os.path.dirname(__file__))
# Define a small helper function to alias our vendored modules to the real ones
# if the vendored ones do not exist. This idea of this was taken from
# https://github.com/kennethreitz/requests/pull/2567.
def vendored(modulename):
vendored_name = "{0}.{1}".format(__name__, modulename)
try:
__import__(vendored_name, globals(), locals(), level=0)
except ImportError:
__import__(modulename, globals(), locals(), level=0)
sys.modules[vendored_name] = sys.modules[modulename]
base, head = vendored_name.rsplit(".", 1)
setattr(sys.modules[base], head, sys.modules[modulename])
# If we're operating in a debundled setup, then we want to go ahead and trigger
# the aliasing of our vendored libraries as well as looking for wheels to add
# to our sys.path. This will cause all of this code to be a no-op typically
# however downstream redistributors can enable it in a consistent way across
# all platforms.
if DEBUNDLED:
# Actually look inside of WHEEL_DIR to find .whl files and add them to the
# front of our sys.path.
sys.path[:] = glob.glob(os.path.join(WHEEL_DIR, "*.whl")) + sys.path
# Actually alias all of our vendored dependencies.
vendored("cachecontrol")
vendored("colorama")
vendored("distlib")
vendored("html5lib")
vendored("lockfile")
vendored("six")
vendored("six.moves")
vendored("six.moves.urllib")
vendored("packaging")
vendored("packaging.version")
vendored("packaging.specifiers")
vendored("pkg_resources")
vendored("progress")
vendored("retrying")
vendored("requests")
try:
import ast
from pip._vendor._markerlib.markers import default_environment, compile, interpret
except ImportError:
if 'ast' in globals():
raise
def default_environment():
return {}
def compile(marker):
def marker_fn(environment=None, override=None):
# 'empty markers are True' heuristic won't install extra deps.
return not marker.strip()
marker_fn.__doc__ = marker
return marker_fn
def interpret(marker, environment=None, override=None):
return compile(marker)()
# -*- coding: utf-8 -*-
"""Interpret PEP 345 environment markers.
EXPR [in|==|!=|not in] EXPR [or|and] ...
where EXPR belongs to any of those:
python_version = '%s.%s' % (sys.version_info[0], sys.version_info[1])
python_full_version = sys.version.split()[0]
os.name = os.name
sys.platform = sys.platform
platform.version = platform.version()
platform.machine = platform.machine()
platform.python_implementation = platform.python_implementation()
a free string, like '2.6', or 'win32'
"""
__all__ = ['default_environment', 'compile', 'interpret']
import ast
import os
import platform
import sys
import weakref
_builtin_compile = compile
try:
from platform import python_implementation
except ImportError:
if os.name == "java":
# Jython 2.5 has ast module, but not platform.python_implementation() function.
def python_implementation():
return "Jython"
else:
raise
# restricted set of variables
_VARS = {'sys.platform': sys.platform,
'python_version': '%s.%s' % sys.version_info[:2],
# FIXME parsing sys.platform is not reliable, but there is no other
# way to get e.g. 2.7.2+, and the PEP is defined with sys.version
'python_full_version': sys.version.split(' ', 1)[0],
'os.name': os.name,
'platform.version': platform.version(),
'platform.machine': platform.machine(),
'platform.python_implementation': python_implementation(),
'extra': None # wheel extension
}
for var in list(_VARS.keys()):
if '.' in var:
_VARS[var.replace('.', '_')] = _VARS[var]
def default_environment():
"""Return copy of default PEP 385 globals dictionary."""
return dict(_VARS)
class ASTWhitelist(ast.NodeTransformer):
def __init__(self, statement):
self.statement = statement # for error messages
ALLOWED = (ast.Compare, ast.BoolOp, ast.Attribute, ast.Name, ast.Load, ast.Str)
# Bool operations
ALLOWED += (ast.And, ast.Or)
# Comparison operations
ALLOWED += (ast.Eq, ast.Gt, ast.GtE, ast.In, ast.Is, ast.IsNot, ast.Lt, ast.LtE, ast.NotEq, ast.NotIn)
def visit(self, node):
"""Ensure statement only contains allowed nodes."""
if not isinstance(node, self.ALLOWED):
raise SyntaxError('Not allowed in environment markers.\n%s\n%s' %
(self.statement,
(' ' * node.col_offset) + '^'))
return ast.NodeTransformer.visit(self, node)
def visit_Attribute(self, node):
"""Flatten one level of attribute access."""
new_node = ast.Name("%s.%s" % (node.value.id, node.attr), node.ctx)
return ast.copy_location(new_node, node)
def parse_marker(marker):
tree = ast.parse(marker, mode='eval')
new_tree = ASTWhitelist(marker).generic_visit(tree)
return new_tree
def compile_marker(parsed_marker):
return _builtin_compile(parsed_marker, '<environment marker>', 'eval',
dont_inherit=True)
_cache = weakref.WeakValueDictionary()
def compile(marker):
"""Return compiled marker as a function accepting an environment dict."""
try:
return _cache[marker]
except KeyError:
pass
if not marker.strip():
def marker_fn(environment=None, override=None):
""""""
return True
else:
compiled_marker = compile_marker(parse_marker(marker))
def marker_fn(environment=None, override=None):
"""override updates environment"""
if override is None:
override = {}
if environment is None:
environment = default_environment()
environment.update(override)
return eval(compiled_marker, environment)
marker_fn.__doc__ = marker
_cache[marker] = marker_fn
return _cache[marker]
def interpret(marker, environment=None):
return compile(marker)(environment)
"""CacheControl import Interface.
Make it easy to import from cachecontrol without long namespaces.
"""
__author__ = 'Eric Larson'
__email__ = 'eric@ionrock.org'
__version__ = '0.11.5'
from .wrapper import CacheControl
from .adapter import CacheControlAdapter
from .controller import CacheController
import functools
from pip._vendor.requests.adapters import HTTPAdapter
from .controller import CacheController
from .cache import DictCache
from .filewrapper import CallbackFileWrapper
class CacheControlAdapter(HTTPAdapter):
invalidating_methods = set(['PUT', 'DELETE'])
def __init__(self, cache=None,
cache_etags=True,
controller_class=None,
serializer=None,
heuristic=None,
*args, **kw):
super(CacheControlAdapter, self).__init__(*args, **kw)
self.cache = cache or DictCache()
self.heuristic = heuristic
controller_factory = controller_class or CacheController
self.controller = controller_factory(
self.cache,
cache_etags=cache_etags,
serializer=serializer,
)
def send(self, request, **kw):
"""
Send a request. Use the request information to see if it
exists in the cache and cache the response if we need to and can.
"""
if request.method == 'GET':
cached_response = self.controller.cached_request(request)
if cached_response:
return self.build_response(request, cached_response,
from_cache=True)
# check for etags and add headers if appropriate
request.headers.update(
self.controller.conditional_headers(request)
)
resp = super(CacheControlAdapter, self).send(request, **kw)
return resp
def build_response(self, request, response, from_cache=False):
"""
Build a response by making a request or using the cache.
This will end up calling send and returning a potentially
cached response
"""
if not from_cache and request.method == 'GET':
# apply any expiration heuristics
if response.status == 304:
# We must have sent an ETag request. This could mean
# that we've been expired already or that we simply
# have an etag. In either case, we want to try and
# update the cache if that is the case.
cached_response = self.controller.update_cached_response(
request, response
)
if cached_response is not response:
from_cache = True
# We are done with the server response, read a
# possible response body (compliant servers will
# not return one, but we cannot be 100% sure) and
# release the connection back to the pool.
response.read(decode_content=False)
response.release_conn()
response = cached_response
# We always cache the 301 responses
elif response.status == 301:
self.controller.cache_response(request, response)
else:
# Check for any heuristics that might update headers
# before trying to cache.
if self.heuristic:
response = self.heuristic.apply(response)
# Wrap the response file with a wrapper that will cache the
# response when the stream has been consumed.
response._fp = CallbackFileWrapper(
response._fp,
functools.partial(
self.controller.cache_response,
request,
response,
)
)
resp = super(CacheControlAdapter, self).build_response(
request, response
)
# See if we should invalidate the cache.
if request.method in self.invalidating_methods and resp.ok:
cache_url = self.controller.cache_url(request.url)
self.cache.delete(cache_url)
# Give the request a from_cache attr to let people use it
resp.from_cache = from_cache
return resp
def close(self):
self.cache.close()
super(CacheControlAdapter, self).close()
"""
The cache object API for implementing caches. The default is a thread
safe in-memory dictionary.
"""
from threading import Lock
class BaseCache(object):
def get(self, key):
raise NotImplemented()
def set(self, key, value):
raise NotImplemented()
def delete(self, key):
raise NotImplemented()
def close(self):
pass
class DictCache(BaseCache):
def __init__(self, init_dict=None):
self.lock = Lock()
self.data = init_dict or {}
def get(self, key):
return self.data.get(key, None)
def set(self, key, value):
with self.lock:
self.data.update({key: value})
def delete(self, key):
with self.lock:
if key in self.data:
self.data.pop(key)
from textwrap import dedent
try:
from .file_cache import FileCache
except ImportError:
notice = dedent('''
NOTE: In order to use the FileCache you must have
lockfile installed. You can install it via pip:
pip install lockfile
''')
print(notice)
try:
import redis
from .redis_cache import RedisCache
except ImportError:
pass
import hashlib
import os
from pip._vendor.lockfile import LockFile
from pip._vendor.lockfile.mkdirlockfile import MkdirLockFile
from ..cache import BaseCache
from ..controller import CacheController
def _secure_open_write(filename, fmode):
# We only want to write to this file, so open it in write only mode
flags = os.O_WRONLY
# os.O_CREAT | os.O_EXCL will fail if the file already exists, so we only
# will open *new* files.
# We specify this because we want to ensure that the mode we pass is the
# mode of the file.
flags |= os.O_CREAT | os.O_EXCL
# Do not follow symlinks to prevent someone from making a symlink that
# we follow and insecurely open a cache file.
if hasattr(os, "O_NOFOLLOW"):
flags |= os.O_NOFOLLOW
# On Windows we'll mark this file as binary
if hasattr(os, "O_BINARY"):
flags |= os.O_BINARY
# Before we open our file, we want to delete any existing file that is
# there
try:
os.remove(filename)
except (IOError, OSError):
# The file must not exist already, so we can just skip ahead to opening
pass
# Open our file, the use of os.O_CREAT | os.O_EXCL will ensure that if a
# race condition happens between the os.remove and this line, that an
# error will be raised. Because we utilize a lockfile this should only
# happen if someone is attempting to attack us.
fd = os.open(filename, flags, fmode)
try:
return os.fdopen(fd, "wb")
except:
# An error occurred wrapping our FD in a file object
os.close(fd)
raise
class FileCache(BaseCache):
def __init__(self, directory, forever=False, filemode=0o0600,
dirmode=0o0700, use_dir_lock=None, lock_class=None):
if use_dir_lock is not None and lock_class is not None:
raise ValueError("Cannot use use_dir_lock and lock_class together")
if use_dir_lock:
lock_class = MkdirLockFile
if lock_class is None:
lock_class = LockFile
self.directory = directory
self.forever = forever
self.filemode = filemode
self.dirmode = dirmode
self.lock_class = lock_class
@staticmethod
def encode(x):
return hashlib.sha224(x.encode()).hexdigest()
def _fn(self, name):
# NOTE: This method should not change as some may depend on it.
# See: https://github.com/ionrock/cachecontrol/issues/63
hashed = self.encode(name)
parts = list(hashed[:5]) + [hashed]
return os.path.join(self.directory, *parts)
def get(self, key):
name = self._fn(key)
if not os.path.exists(name):
return None
with open(name, 'rb') as fh:
return fh.read()
def set(self, key, value):
name = self._fn(key)
# Make sure the directory exists
try:
os.makedirs(os.path.dirname(name), self.dirmode)
except (IOError, OSError):
pass
with self.lock_class(name) as lock:
# Write our actual file
with _secure_open_write(lock.path, self.filemode) as fh:
fh.write(value)
def delete(self, key):
name = self._fn(key)
if not self.forever:
os.remove(name)
def url_to_file_path(url, filecache):
"""Return the file cache path based on the URL.
This does not ensure the file exists!
"""
key = CacheController.cache_url(url)
return filecache._fn(key)
from __future__ import division
from datetime import datetime
def total_seconds(td):
"""Python 2.6 compatability"""
if hasattr(td, 'total_seconds'):
return td.total_seconds()
ms = td.microseconds
secs = (td.seconds + td.days * 24 * 3600)
return (ms + secs * 10**6) / 10**6
class RedisCache(object):
def __init__(self, conn):
self.conn = conn
def get(self, key):
return self.conn.get(key)
def set(self, key, value, expires=None):
if not expires:
self.conn.set(key, value)
else:
expires = expires - datetime.now()
self.conn.setex(key, total_seconds(expires), value)
def delete(self, key):
self.conn.delete(key)
def clear(self):
"""Helper for clearing all the keys in a database. Use with
caution!"""
for key in self.conn.keys():
self.conn.delete(key)
def close(self):
self.conn.disconnect()
try:
from urllib.parse import urljoin
except ImportError:
from urlparse import urljoin
try:
import cPickle as pickle
except ImportError:
import pickle
from pip._vendor.requests.packages.urllib3.response import HTTPResponse
from pip._vendor.requests.packages.urllib3.util import is_fp_closed
"""
The httplib2 algorithms ported for use with requests.
"""
import re
import calendar
import time
from email.utils import parsedate_tz
from pip._vendor.requests.structures import CaseInsensitiveDict
from .cache import DictCache
from .serialize import Serializer
URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
def parse_uri(uri):
"""Parses a URI using the regex given in Appendix B of RFC 3986.
(scheme, authority, path, query, fragment) = parse_uri(uri)
"""
groups = URI.match(uri).groups()
return (groups[1], groups[3], groups[4], groups[6], groups[8])
class CacheController(object):
"""An interface to see if request should cached or not.
"""
def __init__(self, cache=None, cache_etags=True, serializer=None):
self.cache = cache or DictCache()
self.cache_etags = cache_etags
self.serializer = serializer or Serializer()
@classmethod
def _urlnorm(cls, uri):
"""Normalize the URL to create a safe key for the cache"""
(scheme, authority, path, query, fragment) = parse_uri(uri)
if not scheme or not authority:
raise Exception("Only absolute URIs are allowed. uri = %s" % uri)
scheme = scheme.lower()
authority = authority.lower()
if not path:
path = "/"
# Could do syntax based normalization of the URI before
# computing the digest. See Section 6.2.2 of Std 66.
request_uri = query and "?".join([path, query]) or path
defrag_uri = scheme + "://" + authority + request_uri
return defrag_uri
@classmethod
def cache_url(cls, uri):
return cls._urlnorm(uri)
def parse_cache_control(self, headers):
"""
Parse the cache control headers returning a dictionary with values
for the different directives.
"""
retval = {}
cc_header = 'cache-control'
if 'Cache-Control' in headers:
cc_header = 'Cache-Control'
if cc_header in headers:
parts = headers[cc_header].split(',')
parts_with_args = [
tuple([x.strip().lower() for x in part.split("=", 1)])
for part in parts if -1 != part.find("=")
]
parts_wo_args = [
(name.strip().lower(), 1)
for name in parts if -1 == name.find("=")
]
retval = dict(parts_with_args + parts_wo_args)
return retval
def cached_request(self, request):
"""
Return a cached response if it exists in the cache, otherwise
return False.
"""
cache_url = self.cache_url(request.url)
cc = self.parse_cache_control(request.headers)
# non-caching states
no_cache = True if 'no-cache' in cc else False
if 'max-age' in cc and cc['max-age'] == 0:
no_cache = True
# Bail out if no-cache was set
if no_cache:
return False
# It is in the cache, so lets see if it is going to be
# fresh enough
resp = self.serializer.loads(request, self.cache.get(cache_url))
# Check to see if we have a cached object
if not resp:
return False
# If we have a cached 301, return it immediately. We don't
# need to test our response for other headers b/c it is
# intrinsically "cacheable" as it is Permanent.
# See:
# https://tools.ietf.org/html/rfc7231#section-6.4.2
#
# Client can try to refresh the value by repeating the request
# with cache busting headers as usual (ie no-cache).
if resp.status == 301:
return resp
headers = CaseInsensitiveDict(resp.headers)
if not headers or 'date' not in headers:
# With date or etag, the cached response can never be used
# and should be deleted.
if 'etag' not in headers:
self.cache.delete(cache_url)
return False
now = time.time()
date = calendar.timegm(
parsedate_tz(headers['date'])
)
current_age = max(0, now - date)
# TODO: There is an assumption that the result will be a
# urllib3 response object. This may not be best since we
# could probably avoid instantiating or constructing the
# response until we know we need it.
resp_cc = self.parse_cache_control(headers)
# determine freshness
freshness_lifetime = 0
# Check the max-age pragma in the cache control header
if 'max-age' in resp_cc and resp_cc['max-age'].isdigit():
freshness_lifetime = int(resp_cc['max-age'])
# If there isn't a max-age, check for an expires header
elif 'expires' in headers:
expires = parsedate_tz(headers['expires'])
if expires is not None:
expire_time = calendar.timegm(expires) - date
freshness_lifetime = max(0, expire_time)
# determine if we are setting freshness limit in the req
if 'max-age' in cc:
try:
freshness_lifetime = int(cc['max-age'])
except ValueError:
freshness_lifetime = 0
if 'min-fresh' in cc:
try:
min_fresh = int(cc['min-fresh'])
except ValueError:
min_fresh = 0
# adjust our current age by our min fresh
current_age += min_fresh
# see how fresh we actually are
fresh = (freshness_lifetime > current_age)
if fresh:
return resp
# we're not fresh. If we don't have an Etag, clear it out
if 'etag' not in headers:
self.cache.delete(cache_url)
# return the original handler
return False
def conditional_headers(self, request):
cache_url = self.cache_url(request.url)
resp = self.serializer.loads(request, self.cache.get(cache_url))
new_headers = {}
if resp:
headers = CaseInsensitiveDict(resp.headers)
if 'etag' in headers:
new_headers['If-None-Match'] = headers['ETag']
if 'last-modified' in headers:
new_headers['If-Modified-Since'] = headers['Last-Modified']
return new_headers
def cache_response(self, request, response, body=None):
"""
Algorithm for caching requests.
This assumes a requests Response object.
"""
# From httplib2: Don't cache 206's since we aren't going to
# handle byte range requests
if response.status not in [200, 203, 300, 301]:
return
response_headers = CaseInsensitiveDict(response.headers)
cc_req = self.parse_cache_control(request.headers)
cc = self.parse_cache_control(response_headers)
cache_url = self.cache_url(request.url)
# Delete it from the cache if we happen to have it stored there
no_store = cc.get('no-store') or cc_req.get('no-store')
if no_store and self.cache.get(cache_url):
self.cache.delete(cache_url)
# If we've been given an etag, then keep the response
if self.cache_etags and 'etag' in response_headers:
self.cache.set(
cache_url,
self.serializer.dumps(request, response, body=body),
)
# Add to the cache any 301s. We do this before looking that
# the Date headers.
elif response.status == 301:
self.cache.set(
cache_url,
self.serializer.dumps(request, response)
)
# Add to the cache if the response headers demand it. If there
# is no date header then we can't do anything about expiring
# the cache.
elif 'date' in response_headers:
# cache when there is a max-age > 0
if cc and cc.get('max-age'):
if int(cc['max-age']) > 0:
self.cache.set(
cache_url,
self.serializer.dumps(request, response, body=body),
)
# If the request can expire, it means we should cache it
# in the meantime.
elif 'expires' in response_headers:
if response_headers['expires']:
self.cache.set(
cache_url,
self.serializer.dumps(request, response, body=body),
)
def update_cached_response(self, request, response):
"""On a 304 we will get a new set of headers that we want to
update our cached value with, assuming we have one.
This should only ever be called when we've sent an ETag and
gotten a 304 as the response.
"""
cache_url = self.cache_url(request.url)
cached_response = self.serializer.loads(
request,
self.cache.get(cache_url)
)
if not cached_response:
# we didn't have a cached response
return response
# Lets update our headers with the headers from the new request:
# http://tools.ietf.org/html/draft-ietf-httpbis-p4-conditional-26#section-4.1
#
# The server isn't supposed to send headers that would make
# the cached body invalid. But... just in case, we'll be sure
# to strip out ones we know that might be problmatic due to
# typical assumptions.
excluded_headers = [
"content-length",
]
cached_response.headers.update(
dict((k, v) for k, v in response.headers.items()
if k.lower() not in excluded_headers)
)
# we want a 200 b/c we have content via the cache
cached_response.status = 200
# update our cache
self.cache.set(
cache_url,
self.serializer.dumps(request, cached_response),
)
return cached_response
from io import BytesIO
class CallbackFileWrapper(object):
"""
Small wrapper around a fp object which will tee everything read into a
buffer, and when that file is closed it will execute a callback with the
contents of that buffer.
All attributes are proxied to the underlying file object.
This class uses members with a double underscore (__) leading prefix so as
not to accidentally shadow an attribute.
"""
def __init__(self, fp, callback):
self.__buf = BytesIO()
self.__fp = fp
self.__callback = callback
def __getattr__(self, name):
# The vaguaries of garbage collection means that self.__fp is
# not always set. By using __getattribute__ and the private
# name[0] allows looking up the attribute value and raising an
# AttributeError when it doesn't exist. This stop thigns from
# infinitely recursing calls to getattr in the case where
# self.__fp hasn't been set.
#
# [0] https://docs.python.org/2/reference/expressions.html#atom-identifiers
fp = self.__getattribute__('_CallbackFileWrapper__fp')
return getattr(fp, name)
def __is_fp_closed(self):
try:
return self.__fp.fp is None
except AttributeError:
pass
try:
return self.__fp.closed
except AttributeError:
pass
# We just don't cache it then.
# TODO: Add some logging here...
return False
def read(self, amt=None):
data = self.__fp.read(amt)
self.__buf.write(data)
if self.__is_fp_closed():
if self.__callback:
self.__callback(self.__buf.getvalue())
# We assign this to None here, because otherwise we can get into
# really tricky problems where the CPython interpreter dead locks
# because the callback is holding a reference to something which
# has a __del__ method. Setting this to None breaks the cycle
# and allows the garbage collector to do it's thing normally.
self.__callback = None
return data
import calendar
import time
from email.utils import formatdate, parsedate, parsedate_tz
from datetime import datetime, timedelta
TIME_FMT = "%a, %d %b %Y %H:%M:%S GMT"
def expire_after(delta, date=None):
date = date or datetime.now()
return date + delta
def datetime_to_header(dt):
return formatdate(calendar.timegm(dt.timetuple()))
class BaseHeuristic(object):
def warning(self, response):
"""
Return a valid 1xx warning header value describing the cache
adjustments.
The response is provided too allow warnings like 113
http://tools.ietf.org/html/rfc7234#section-5.5.4 where we need
to explicitly say response is over 24 hours old.
"""
return '110 - "Response is Stale"'
def update_headers(self, response):
"""Update the response headers with any new headers.
NOTE: This SHOULD always include some Warning header to
signify that the response was cached by the client, not
by way of the provided headers.
"""
return {}
def apply(self, response):
warning_header_value = self.warning(response)
response.headers.update(self.update_headers(response))
if warning_header_value is not None:
response.headers.update({'Warning': warning_header_value})
return response
class OneDayCache(BaseHeuristic):
"""
Cache the response by providing an expires 1 day in the
future.
"""
def update_headers(self, response):
headers = {}
if 'expires' not in response.headers:
date = parsedate(response.headers['date'])
expires = expire_after(timedelta(days=1),
date=datetime(*date[:6]))
headers['expires'] = datetime_to_header(expires)
headers['cache-control'] = 'public'
return headers
class ExpiresAfter(BaseHeuristic):
"""
Cache **all** requests for a defined time period.
"""
def __init__(self, **kw):
self.delta = timedelta(**kw)
def update_headers(self, response):
expires = expire_after(self.delta)
return {
'expires': datetime_to_header(expires),
'cache-control': 'public',
}
def warning(self, response):
tmpl = '110 - Automatically cached for %s. Response might be stale'
return tmpl % self.delta
class LastModified(BaseHeuristic):
"""
If there is no Expires header already, fall back on Last-Modified
using the heuristic from
http://tools.ietf.org/html/rfc7234#section-4.2.2
to calculate a reasonable value.
Firefox also does something like this per
https://developer.mozilla.org/en-US/docs/Web/HTTP/Caching_FAQ
http://lxr.mozilla.org/mozilla-release/source/netwerk/protocol/http/nsHttpResponseHead.cpp#397
Unlike mozilla we limit this to 24-hr.
"""
cacheable_by_default_statuses = set([
200, 203, 204, 206, 300, 301, 404, 405, 410, 414, 501
])
def update_headers(self, resp):
headers = resp.headers
if 'expires' in headers:
return {}
if 'cache-control' in headers and headers['cache-control'] != 'public':
return {}
if resp.status not in self.cacheable_by_default_statuses:
return {}
if 'date' not in headers or 'last-modified' not in headers:
return {}
date = calendar.timegm(parsedate_tz(headers['date']))
last_modified = parsedate(headers['last-modified'])
if date is None or last_modified is None:
return {}
now = time.time()
current_age = max(0, now - date)
delta = date - calendar.timegm(last_modified)
freshness_lifetime = max(0, min(delta / 10, 24 * 3600))
if freshness_lifetime <= current_age:
return {}
expires = date + freshness_lifetime
return {'expires': time.strftime(TIME_FMT, time.gmtime(expires))}
def warning(self, resp):
return None
import base64
import io
import json
import zlib
from pip._vendor.requests.structures import CaseInsensitiveDict
from .compat import HTTPResponse, pickle
def _b64_encode_bytes(b):
return base64.b64encode(b).decode("ascii")
def _b64_encode_str(s):
return _b64_encode_bytes(s.encode("utf8"))
def _b64_decode_bytes(b):
return base64.b64decode(b.encode("ascii"))
def _b64_decode_str(s):
return _b64_decode_bytes(s).decode("utf8")
class Serializer(object):
def dumps(self, request, response, body=None):
response_headers = CaseInsensitiveDict(response.headers)
if body is None:
body = response.read(decode_content=False)
# NOTE: 99% sure this is dead code. I'm only leaving it
# here b/c I don't have a test yet to prove
# it. Basically, before using
# `cachecontrol.filewrapper.CallbackFileWrapper`,
# this made an effort to reset the file handle. The
# `CallbackFileWrapper` short circuits this code by
# setting the body as the content is consumed, the
# result being a `body` argument is *always* passed
# into cache_response, and in turn,
# `Serializer.dump`.
response._fp = io.BytesIO(body)
data = {
"response": {
"body": _b64_encode_bytes(body),
"headers": dict(
(_b64_encode_str(k), _b64_encode_str(v))
for k, v in response.headers.items()
),
"status": response.status,
"version": response.version,
"reason": _b64_encode_str(response.reason),
"strict": response.strict,
"decode_content": response.decode_content,
},
}
# Construct our vary headers
data["vary"] = {}
if "vary" in response_headers:
varied_headers = response_headers['vary'].split(',')
for header in varied_headers:
header = header.strip()
data["vary"][header] = request.headers.get(header, None)
# Encode our Vary headers to ensure they can be serialized as JSON
data["vary"] = dict(
(_b64_encode_str(k), _b64_encode_str(v) if v is not None else v)
for k, v in data["vary"].items()
)
return b",".join([
b"cc=2",
zlib.compress(
json.dumps(
data, separators=(",", ":"), sort_keys=True,
).encode("utf8"),
),
])
def loads(self, request, data):
# Short circuit if we've been given an empty set of data
if not data:
return
# Determine what version of the serializer the data was serialized
# with
try:
ver, data = data.split(b",", 1)
except ValueError:
ver = b"cc=0"
# Make sure that our "ver" is actually a version and isn't a false
# positive from a , being in the data stream.
if ver[:3] != b"cc=":
data = ver + data
ver = b"cc=0"
# Get the version number out of the cc=N
ver = ver.split(b"=", 1)[-1].decode("ascii")
# Dispatch to the actual load method for the given version
try:
return getattr(self, "_loads_v{0}".format(ver))(request, data)
except AttributeError:
# This is a version we don't have a loads function for, so we'll
# just treat it as a miss and return None
return
def prepare_response(self, request, cached):
"""Verify our vary headers match and construct a real urllib3
HTTPResponse object.
"""
# Special case the '*' Vary value as it means we cannot actually
# determine if the cached response is suitable for this request.
if "*" in cached.get("vary", {}):
return
# Ensure that the Vary headers for the cached response match our
# request
for header, value in cached.get("vary", {}).items():
if request.headers.get(header, None) != value:
return
body_raw = cached["response"].pop("body")
try:
body = io.BytesIO(body_raw)
except TypeError:
# This can happen if cachecontrol serialized to v1 format (pickle)
# using Python 2. A Python 2 str(byte string) will be unpickled as
# a Python 3 str (unicode string), which will cause the above to
# fail with:
#
# TypeError: 'str' does not support the buffer interface
body = io.BytesIO(body_raw.encode('utf8'))
return HTTPResponse(
body=body,
preload_content=False,
**cached["response"]
)
def _loads_v0(self, request, data):
# The original legacy cache data. This doesn't contain enough
# information to construct everything we need, so we'll treat this as
# a miss.
return
def _loads_v1(self, request, data):
try:
cached = pickle.loads(data)
except ValueError:
return
return self.prepare_response(request, cached)
def _loads_v2(self, request, data):
try:
cached = json.loads(zlib.decompress(data).decode("utf8"))
except ValueError:
return
# We need to decode the items that we've base64 encoded
cached["response"]["body"] = _b64_decode_bytes(
cached["response"]["body"]
)
cached["response"]["headers"] = dict(
(_b64_decode_str(k), _b64_decode_str(v))
for k, v in cached["response"]["headers"].items()
)
cached["response"]["reason"] = _b64_decode_str(
cached["response"]["reason"],
)
cached["vary"] = dict(
(_b64_decode_str(k), _b64_decode_str(v) if v is not None else v)
for k, v in cached["vary"].items()
)
return self.prepare_response(request, cached)
from .adapter import CacheControlAdapter
from .cache import DictCache
def CacheControl(sess,
cache=None,
cache_etags=True,
serializer=None,
heuristic=None):
cache = cache or DictCache()
adapter = CacheControlAdapter(
cache,
cache_etags=cache_etags,
serializer=serializer,
heuristic=heuristic,
)
sess.mount('http://', adapter)
sess.mount('https://', adapter)
return sess
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment