Source code for pypac.resolver
"""
Tools for working with a given PAC file and its return values.
"""
try:
from urllib.parse import urlparse
except ImportError:
from urlparse import urlparse
try:
from urllib.parse import quote_plus
except ImportError:
from urllib import quote_plus
from pypac.parser import parse_pac_value
[docs]class ProxyResolver(object):
"""
Handles the lookup of the proxy to use for any given URL, including proxy failover logic.
"""
def __init__(self, pac, proxy_auth=None, socks_scheme="socks5"):
"""
:param pypac.parser.PACFile pac: Parsed PAC file.
:param requests.auth.HTTPProxyAuth proxy_auth: Username and password proxy authentication.
If provided, then all proxy URLs returned will include these credentials.
:param str socks_scheme: Scheme to assume for SOCKS proxies. `socks5` by default.
"""
self.pac = pac
self._proxy_auth = proxy_auth
self.socks_scheme = socks_scheme
self._offline_proxies = set()
self._cache = {} # Cache parsed version of FindProxyForURL() return values.
@property
def proxy_auth(self):
"""Proxy authentication object."""
return self._proxy_auth
@proxy_auth.setter
def proxy_auth(self, value):
self._proxy_auth = value
self._cache.clear()
self.unban_all()
def get_proxies(self, url):
"""
Get the proxies that are applicable to a given URL, according to the PAC file.
:param str url: The URL for which to find appropriate proxies.
:return: All the proxies that apply to the given URL.
Can be empty, which means to abort the request.
:rtype: list[str]
"""
hostname = urlparse(url).hostname
if hostname is None:
# URL has no hostname, and PAC functions don't expect to receive nulls.
hostname = ""
value_from_js_func = self.pac.find_proxy_for_url(url, hostname)
if value_from_js_func in self._cache:
return self._cache[value_from_js_func]
config_values = parse_pac_value(value_from_js_func, self.socks_scheme)
if self._proxy_auth:
config_values = [add_proxy_auth(value, self._proxy_auth) for value in config_values]
self._cache[value_from_js_func] = config_values
return config_values
def get_proxy(self, url):
"""
Get a proxy to use for a given URL, excluding any banned ones.
:param str url: The URL for which to find an appropriate proxy.
:return: A proxy to use for the URL,
or the string 'DIRECT', which means a proxy is not to be used.
Can be ``None``, which means to not attempt the request.
:rtype: str|None
"""
proxies = self.get_proxies(url)
for proxy in proxies:
if proxy == "DIRECT" or proxy not in self._offline_proxies:
return proxy
def get_proxy_for_requests(self, url):
"""
Get proxy configuration for a given URL, in a form ready to use with the Requests library.
:param str url: The URL for which to obtain proxy configuration.
:returns: Proxy configuration in a form recognized by Requests, for use with the ``proxies`` parameter.
:rtype: dict
:raises ProxyConfigExhaustedError: If no proxy is configured or available,
and 'DIRECT' is not configured as a fallback.
"""
proxy = self.get_proxy(url)
if not proxy:
raise ProxyConfigExhaustedError(url)
return proxy_parameter_for_requests(proxy)
def ban_proxy(self, proxy_url):
"""
Ban a proxy such that :meth:`get_proxy` and :meth:`get_proxy_for_requests` will never return it.
:param str proxy_url: URL for the proxy to ban.
Must match a proxy URL returned by this class, including any authentication info.
"""
self._offline_proxies.add(proxy_url)
def unban_all(self):
"""Unban any banned proxies."""
self._offline_proxies.clear()
[docs]def add_proxy_auth(possible_proxy_url, proxy_auth):
"""
Add a username and password to a proxy URL, if the input value is a proxy URL.
:param str possible_proxy_url: Proxy URL or ``DIRECT``.
:param requests.auth.HTTPProxyAuth proxy_auth: Proxy authentication info.
:returns: Proxy URL with auth info added, or ``DIRECT``.
:rtype: str
"""
if possible_proxy_url == "DIRECT":
return possible_proxy_url
parsed = urlparse(possible_proxy_url)
return "{0}://{1}:{2}@{3}".format(
parsed.scheme, quote_plus(proxy_auth.username), quote_plus(proxy_auth.password), parsed.netloc
)
[docs]def proxy_parameter_for_requests(proxy_url_or_direct):
"""
:param str proxy_url_or_direct: Proxy URL, or ``DIRECT``. Cannot be empty.
:return: Value for use with the ``proxies`` parameter in Requests.
:rtype: dict
"""
if proxy_url_or_direct == "DIRECT":
# This stops Requests from inheriting environment proxy settings.
proxy_url_or_direct = None
return {
"http": proxy_url_or_direct,
"https": proxy_url_or_direct,
}
[docs]class ProxyConfigExhaustedError(Exception):
def __init__(self, for_url):
super(ProxyConfigExhaustedError, self).__init__("No proxy configured or available for '{}'".format(for_url))