Initial revision
git-svn-id: svn://localhost/trunk/ardour2@4 d708f5d6-7413-0410-9779-e7cbd77b26cf
This commit is contained in:
3
tools/bug_tool/ClientCookie/.cvsignore
Normal file
3
tools/bug_tool/ClientCookie/.cvsignore
Normal file
@@ -0,0 +1,3 @@
|
||||
*.pyc
|
||||
Makefile.in
|
||||
Makefile
|
||||
1833
tools/bug_tool/ClientCookie/_ClientCookie.py
Normal file
1833
tools/bug_tool/ClientCookie/_ClientCookie.py
Normal file
File diff suppressed because it is too large
Load Diff
9
tools/bug_tool/ClientCookie/_Debug.py
Normal file
9
tools/bug_tool/ClientCookie/_Debug.py
Normal file
@@ -0,0 +1,9 @@
|
||||
import ClientCookie
|
||||
|
||||
def debug(text):
|
||||
if ClientCookie.CLIENTCOOKIE_DEBUG: _debug(text)
|
||||
|
||||
def _debug(text, *args):
|
||||
if args:
|
||||
text = text % args
|
||||
ClientCookie.DEBUG_STREAM.write(text+"\n")
|
||||
224
tools/bug_tool/ClientCookie/_HeadersUtil.py
Normal file
224
tools/bug_tool/ClientCookie/_HeadersUtil.py
Normal file
@@ -0,0 +1,224 @@
|
||||
"""HTTP header value parsing utility functions.
|
||||
|
||||
from ClientCookie._HeadersUtil import split_header_words
|
||||
values = split_header_words(h.headers["Content-Type"])
|
||||
|
||||
This module provides a few functions that help parsing and construction of
|
||||
valid HTTP header values.
|
||||
|
||||
|
||||
Copyright 1997-1998, Gisle Aas
|
||||
Copyright 2002-2003, John J. Lee
|
||||
|
||||
This code is free software; you can redistribute it and/or modify it under
|
||||
the terms of the BSD License (see the file COPYING included with the
|
||||
distribution).
|
||||
|
||||
"""
|
||||
|
||||
import re, string
|
||||
from types import StringType
|
||||
try:
|
||||
from types import UnicodeType
|
||||
STRING_TYPES = StringType, UnicodeType
|
||||
except:
|
||||
STRING_TYPES = StringType,
|
||||
|
||||
from _Util import startswith, endswith, http2time
|
||||
|
||||
try: True
|
||||
except NameError:
|
||||
True = 1
|
||||
False = 0
|
||||
|
||||
def unmatched(match):
|
||||
"""Return unmatched part of re.Match object."""
|
||||
start, end = match.span(0)
|
||||
return match.string[:start]+match.string[end:]
|
||||
|
||||
# XXX I really can't see what this =* was for (came from LWP, I guess)
|
||||
#token_re = re.compile(r"^\s*(=*[^\s=;,]+)")
|
||||
token_re = re.compile(r"^\s*([^=\s;,]+)")
|
||||
quoted_value_re = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"")
|
||||
value_re = re.compile(r"^\s*=\s*([^\s;,]*)")
|
||||
escape_re = re.compile(r"\\(.)")
|
||||
def split_header_words(header_values):
|
||||
r"""Parse header values into a list of lists containing key,value pairs.
|
||||
|
||||
The function knows how to deal with ",", ";" and "=" as well as quoted
|
||||
values after "=". A list of space separated tokens are parsed as if they
|
||||
were separated by ";".
|
||||
|
||||
If the header_values passed as argument contains multiple values, then they
|
||||
are treated as if they were a single value separated by comma ",".
|
||||
|
||||
This means that this function is useful for parsing header fields that
|
||||
follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
|
||||
the requirement for tokens).
|
||||
|
||||
headers = #header
|
||||
header = (token | parameter) *( [";"] (token | parameter))
|
||||
|
||||
token = 1*<any CHAR except CTLs or separators>
|
||||
separators = "(" | ")" | "<" | ">" | "@"
|
||||
| "," | ";" | ":" | "\" | <">
|
||||
| "/" | "[" | "]" | "?" | "="
|
||||
| "{" | "}" | SP | HT
|
||||
|
||||
quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
|
||||
qdtext = <any TEXT except <">>
|
||||
quoted-pair = "\" CHAR
|
||||
|
||||
parameter = attribute "=" value
|
||||
attribute = token
|
||||
value = token | quoted-string
|
||||
|
||||
Each header is represented by a list of key/value pairs. The value for a
|
||||
simple token (not part of a parameter) is None. Syntactically incorrect
|
||||
headers will not necessarily be parsed as you would want.
|
||||
|
||||
This is easier to describe with some examples:
|
||||
|
||||
>>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])
|
||||
[[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
|
||||
>>> split_header_words(['text/html; charset="iso-8859-1"'])
|
||||
[[('text/html', None), ('charset', 'iso-8859-1')]]
|
||||
>>> split_header_words([r'Basic realm="\"foo\bar\""'])
|
||||
[[('Basic', None), ('realm', '"foobar"')]]
|
||||
|
||||
"""
|
||||
assert type(header_values) not in STRING_TYPES
|
||||
result = []
|
||||
for text in header_values:
|
||||
orig_text = text
|
||||
pairs = []
|
||||
while text:
|
||||
m = token_re.search(text)
|
||||
if m:
|
||||
text = unmatched(m)
|
||||
name = m.group(1)
|
||||
m = quoted_value_re.search(text)
|
||||
if m: # quoted value
|
||||
text = unmatched(m)
|
||||
value = m.group(1)
|
||||
value = escape_re.sub(r"\1", value)
|
||||
else:
|
||||
m = value_re.search(text)
|
||||
if m: # unquoted value
|
||||
text = unmatched(m)
|
||||
value = m.group(1)
|
||||
value = string.rstrip(value)
|
||||
else:
|
||||
# no value, a lone token
|
||||
value = None
|
||||
pairs.append((name, value))
|
||||
elif startswith(string.lstrip(text), ","):
|
||||
# concatenated headers, as per RFC 2616 section 4.2
|
||||
text = string.lstrip(text)[1:]
|
||||
if pairs: result.append(pairs)
|
||||
pairs = []
|
||||
else:
|
||||
# skip junk
|
||||
non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text)
|
||||
assert nr_junk_chars > 0, (
|
||||
"split_header_words bug: '%s', '%s', %s" %
|
||||
(orig_text, text, pairs))
|
||||
text = non_junk
|
||||
if pairs: result.append(pairs)
|
||||
return result
|
||||
|
||||
join_escape_re = re.compile(r"([\"\\])")
|
||||
def join_header_words(lists):
|
||||
"""Do the inverse of the conversion done by split_header_words.
|
||||
|
||||
Takes a list of lists of (key, value) pairs and produces a single header
|
||||
value. Attribute values are quoted if needed.
|
||||
|
||||
>>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
|
||||
'text/plain; charset="iso-8859/1"'
|
||||
>>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
|
||||
'text/plain, charset="iso-8859/1"'
|
||||
|
||||
"""
|
||||
headers = []
|
||||
for pairs in lists:
|
||||
attr = []
|
||||
for k, v in pairs:
|
||||
if v is not None:
|
||||
if not re.search(r"^\w+$", v):
|
||||
v = join_escape_re.sub(r"\\\1", v) # escape " and \
|
||||
v = '"%s"' % v
|
||||
if k is None: # Netscape cookies may have no name
|
||||
k = v
|
||||
else:
|
||||
k = "%s=%s" % (k, v)
|
||||
attr.append(k)
|
||||
if attr: headers.append(string.join(attr, "; "))
|
||||
return string.join(headers, ", ")
|
||||
|
||||
def parse_ns_headers(ns_headers):
|
||||
"""Ad-hoc parser for Netscape protocol cookie-attributes.
|
||||
|
||||
The old Netscape cookie format for Set-Cookie can for instance contain
|
||||
an unquoted "," in the expires field, so we have to use this ad-hoc
|
||||
parser instead of split_header_words.
|
||||
|
||||
XXX This may not make the best possible effort to parse all the crap
|
||||
that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient
|
||||
parser is probably better, so could do worse than following that if
|
||||
this ever gives any trouble.
|
||||
|
||||
Currently, this is also used for parsing RFC 2109 cookies.
|
||||
|
||||
"""
|
||||
known_attrs = ("expires", "domain", "path", "secure",
|
||||
# RFC 2109 attrs (may turn up in Netscape cookies, too)
|
||||
"port", "max-age")
|
||||
|
||||
result = []
|
||||
for ns_header in ns_headers:
|
||||
pairs = []
|
||||
version_set = False
|
||||
for param in re.split(r";\s*", ns_header):
|
||||
param = string.rstrip(param)
|
||||
if param == "": continue
|
||||
if "=" not in param:
|
||||
if string.lower(param) in known_attrs:
|
||||
k, v = param, None
|
||||
else:
|
||||
# cookie with missing name
|
||||
k, v = None, param
|
||||
else:
|
||||
k, v = re.split(r"\s*=\s*", param, 1)
|
||||
k = string.lstrip(k)
|
||||
if k is not None:
|
||||
lc = string.lower(k)
|
||||
if lc in known_attrs:
|
||||
k = lc
|
||||
if k == "version":
|
||||
# This is an RFC 2109 cookie. Will be treated as RFC 2965
|
||||
# cookie in rest of code.
|
||||
# Probably it should be parsed with split_header_words, but
|
||||
# that's too much hassle.
|
||||
version_set = True
|
||||
if k == "expires":
|
||||
# convert expires date to seconds since epoch
|
||||
if startswith(v, '"'): v = v[1:]
|
||||
if endswith(v, '"'): v = v[:-1]
|
||||
v = http2time(v) # None if invalid
|
||||
pairs.append((k, v))
|
||||
|
||||
if pairs:
|
||||
if not version_set:
|
||||
pairs.append(("version", "0"))
|
||||
result.append(pairs)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _test():
|
||||
import doctest, _HeadersUtil
|
||||
return doctest.testmod(_HeadersUtil)
|
||||
|
||||
if __name__ == "__main__":
|
||||
_test()
|
||||
377
tools/bug_tool/ClientCookie/_MSIECookieJar.py
Normal file
377
tools/bug_tool/ClientCookie/_MSIECookieJar.py
Normal file
@@ -0,0 +1,377 @@
|
||||
"""Mozilla / Netscape cookie loading / saving.
|
||||
|
||||
Copyright 1997-1999 Gisle Aas (libwww-perl)
|
||||
Copyright 2002-2003 Johnny Lee <typo_pl@hotmail.com> (MSIE Perl code)
|
||||
Copyright 2002-2003 John J Lee <jjl@pobox.com> (The Python port)
|
||||
|
||||
This code is free software; you can redistribute it and/or modify it under
|
||||
the terms of the BSD License (see the file COPYING included with the
|
||||
distribution).
|
||||
|
||||
"""
|
||||
|
||||
import os, re, string, time, struct
|
||||
if os.name == "nt":
|
||||
import _winreg
|
||||
|
||||
from _ClientCookie import CookieJar, Cookie, MISSING_FILENAME_TEXT
|
||||
from _Util import startswith
|
||||
from _Debug import debug
|
||||
|
||||
try: True
|
||||
except NameError:
|
||||
True = 1
|
||||
False = 0
|
||||
|
||||
|
||||
def regload(path, leaf):
|
||||
key = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, path, 0, _winreg.KEY_ALL_ACCESS)
|
||||
try:
|
||||
value = _winreg.QueryValueEx(key, leaf)[0]
|
||||
except WindowsError:
|
||||
value = None
|
||||
return value
|
||||
|
||||
WIN32_EPOCH = 0x019db1ded53e8000L # 1970 Jan 01 00:00:00 in Win32 FILETIME
|
||||
|
||||
def epoch_time_offset_from_win32_filetime(filetime):
|
||||
"""Convert from win32 filetime to seconds-since-epoch value.
|
||||
|
||||
MSIE stores create and expire times as Win32 FILETIME, which is 64
|
||||
bits of 100 nanosecond intervals since Jan 01 1601.
|
||||
|
||||
Cookies code expects time in 32-bit value expressed in seconds since
|
||||
the epoch (Jan 01 1970).
|
||||
|
||||
"""
|
||||
if filetime < WIN32_EPOCH:
|
||||
raise ValueError("filetime (%d) is before epoch (%d)" %
|
||||
(filetime, WIN32_EPOCH))
|
||||
|
||||
return divmod((filetime - WIN32_EPOCH), 10000000L)[0]
|
||||
|
||||
def binary_to_char(c): return "%02X" % ord(c)
|
||||
def binary_to_str(d): return string.join(map(binary_to_char, list(d)), "")
|
||||
|
||||
class MSIECookieJar(CookieJar):
|
||||
"""
|
||||
This class differs from CookieJar only in the format it uses to load cookies
|
||||
from a file.
|
||||
|
||||
MSIECookieJar can read the cookie files of Microsoft Internet Explorer
|
||||
(MSIE) for Windows, versions 5 and 6, on Windows NT and XP respectively.
|
||||
Other configurations may also work, but are untested. Saving cookies in
|
||||
MSIE format is NOT supported. If you save cookies, they'll be in the usual
|
||||
Set-Cookie3 format, which you can read back in using an instance of the
|
||||
plain old CookieJar class. Don't save using the same filename that you
|
||||
loaded cookies from, because you may succeed in clobbering your MSIE
|
||||
cookies index file!
|
||||
|
||||
You should be able to have LWP share Internet Explorer's cookies like
|
||||
this (note you need to supply a username to load_from_registry if you're on
|
||||
Windows 9x):
|
||||
|
||||
cookies = MSIECookieJar(delayload=1)
|
||||
# find cookies index file in registry and load cookies from it
|
||||
cookies.load_from_registry()
|
||||
opener = ClientCookie.build_opener(ClientCookie.HTTPHandler(cookies))
|
||||
response = opener.open("http://foo.bar.com/")
|
||||
|
||||
Iterating over a delayloaded MSIECookieJar instance will not cause any
|
||||
cookies to be read from disk. To force reading of all cookies from disk,
|
||||
call read_all_cookies. Note that the following methods iterate over self:
|
||||
clear_temporary_cookies, clear_expired_cookies, __len__, __repr__, __str__
|
||||
and as_string.
|
||||
|
||||
Additional methods:
|
||||
|
||||
load_from_registry(ignore_discard=False, ignore_expires=False,
|
||||
username=None)
|
||||
load_cookie_data(filename, ignore_discard=False, ignore_expires=False)
|
||||
read_all_cookies()
|
||||
|
||||
"""
|
||||
magic_re = re.compile(r"Client UrlCache MMF Ver \d\.\d.*")
|
||||
padding = "\x0d\xf0\xad\x0b"
|
||||
|
||||
msie_domain_re = re.compile(r"^([^/]+)(/.*)$")
|
||||
cookie_re = re.compile("Cookie\:.+\@([\x21-\xFF]+).*?"
|
||||
"(.+\@[\x21-\xFF]+\.txt)")
|
||||
|
||||
# path under HKEY_CURRENT_USER from which to get location of index.dat
|
||||
reg_path = r"software\microsoft\windows" \
|
||||
r"\currentversion\explorer\shell folders"
|
||||
reg_key = "Cookies"
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
apply(CookieJar.__init__, (self, args, kwargs))
|
||||
self._delayload_domains = {}
|
||||
|
||||
def set_cookie(self, cookie):
|
||||
if self.delayload:
|
||||
self._delayload_domain(cookie.domain)
|
||||
CookieJar.set_cookie(self, cookie)
|
||||
|
||||
def _cookies_for_domain(self, domain, request, unverifiable):
|
||||
debug("Checking %s for cookies to return" % domain)
|
||||
if not self.policy.domain_return_ok(domain, request, unverifiable):
|
||||
return []
|
||||
|
||||
if self.delayload:
|
||||
self._delayload_domain(domain)
|
||||
|
||||
return CookieJar._cookies_for_domain(
|
||||
self, domain, request, unverifiable)
|
||||
|
||||
def read_all_cookies(self):
|
||||
"""Eagerly read in all cookies."""
|
||||
if self.delayload:
|
||||
for domain in self._delayload_domains.keys():
|
||||
self._delayload_domain(domain)
|
||||
|
||||
def _delayload_domain(self, domain):
|
||||
# if necessary, lazily load cookies for this domain
|
||||
delayload_info = self._delayload_domains.get(domain)
|
||||
if delayload_info is not None:
|
||||
cookie_file, ignore_discard, ignore_expires = delayload_info
|
||||
try:
|
||||
self.load_cookie_data(cookie_file,
|
||||
ignore_discard, ignore_expires)
|
||||
except IOError:
|
||||
debug("error reading cookie file, skipping: %s" % cookie_file)
|
||||
else:
|
||||
del self._delayload_domains[domain]
|
||||
|
||||
def _load_cookies_from_file(self, filename):
|
||||
cookies = []
|
||||
|
||||
cookies_fh = open(filename)
|
||||
|
||||
try:
|
||||
while 1:
|
||||
key = cookies_fh.readline()
|
||||
if key == "": break
|
||||
|
||||
rl = cookies_fh.readline
|
||||
def getlong(rl=rl): return long(rl().rstrip())
|
||||
def getstr(rl=rl): return rl().rstrip()
|
||||
|
||||
key = key.rstrip()
|
||||
value = getstr()
|
||||
domain_path = getstr()
|
||||
flags = getlong() # 0x2000 bit is for secure I think
|
||||
lo_expire = getlong()
|
||||
hi_expire = getlong()
|
||||
lo_create = getlong()
|
||||
hi_create = getlong()
|
||||
sep = getstr()
|
||||
|
||||
if "" in (key, value, domain_path, flags, hi_expire, lo_expire,
|
||||
hi_create, lo_create, sep) or (sep != "*"):
|
||||
break
|
||||
|
||||
m = self.msie_domain_re.search(domain_path)
|
||||
if m:
|
||||
domain = m.group(1)
|
||||
path = m.group(2)
|
||||
|
||||
cookies.append({"KEY": key, "VALUE": value, "DOMAIN": domain,
|
||||
"PATH": path, "FLAGS": flags, "HIXP": hi_expire,
|
||||
"LOXP": lo_expire, "HICREATE": hi_create,
|
||||
"LOCREATE": lo_create})
|
||||
finally:
|
||||
cookies_fh.close()
|
||||
|
||||
return cookies
|
||||
|
||||
def load_cookie_data(self, filename,
|
||||
ignore_discard=False, ignore_expires=False):
|
||||
"""Load cookies from file containing actual cookie data.
|
||||
|
||||
Old cookies are kept unless overwritten by newly loaded ones.
|
||||
|
||||
You should not call this method if the delayload attribute is set.
|
||||
|
||||
I think each of these files contain all cookies for one user, domain,
|
||||
and path.
|
||||
|
||||
filename: file containing cookies -- usually found in a file like
|
||||
C:\WINNT\Profiles\joe\Cookies\joe@blah[1].txt
|
||||
|
||||
"""
|
||||
now = int(time.time())
|
||||
|
||||
cookie_data = self._load_cookies_from_file(filename)
|
||||
|
||||
for cookie in cookie_data:
|
||||
flags = cookie["FLAGS"]
|
||||
secure = ((flags & 0x2000) != 0)
|
||||
filetime = (cookie["HIXP"] << 32) + cookie["LOXP"]
|
||||
expires = epoch_time_offset_from_win32_filetime(filetime)
|
||||
if expires < now:
|
||||
discard = True
|
||||
else:
|
||||
discard = False
|
||||
domain = cookie["DOMAIN"]
|
||||
initial_dot = startswith(domain, ".")
|
||||
if initial_dot:
|
||||
domain_specified = True
|
||||
else:
|
||||
# MSIE 5 does not record whether the domain cookie-attribute
|
||||
# was specified.
|
||||
# Assuming it wasn't is conservative, because with strict
|
||||
# domain matching this will match less frequently; with regular
|
||||
# Netscape tail-matching, this will match at exactly the same
|
||||
# times that domain_specified = True would. It also means we
|
||||
# don't have to prepend a dot to achieve consistency with our
|
||||
# own & Mozilla's domain-munging scheme.
|
||||
domain_specified = False
|
||||
|
||||
# assume path_specified is false
|
||||
# XXX is there other stuff in here? -- eg. comment, commentURL?
|
||||
c = Cookie(0,
|
||||
cookie["KEY"], cookie["VALUE"],
|
||||
None, False,
|
||||
domain, domain_specified, initial_dot,
|
||||
cookie["PATH"], False,
|
||||
secure,
|
||||
expires,
|
||||
discard,
|
||||
None,
|
||||
None,
|
||||
{"flags": flags})
|
||||
if not ignore_discard and c.discard:
|
||||
continue
|
||||
if not ignore_expires and c.is_expired(now):
|
||||
continue
|
||||
self.set_cookie(c)
|
||||
|
||||
def load_from_registry(self, ignore_discard=False, ignore_expires=False,
|
||||
username=None):
|
||||
"""
|
||||
username: only required on win9x
|
||||
|
||||
"""
|
||||
cookies_dir = regload(self.reg_path, self.reg_key)
|
||||
filename = os.path.normpath(os.path.join(cookies_dir, "INDEX.DAT"))
|
||||
self.load(filename, ignore_discard, ignore_expires, username)
|
||||
|
||||
def load(self, filename, ignore_discard=False, ignore_expires=False,
|
||||
username=None):
|
||||
"""Load cookies from an MSIE 'index.dat' cookies index file.
|
||||
|
||||
filename: full path to cookie index file
|
||||
username: only required on win9x
|
||||
|
||||
"""
|
||||
if filename is None:
|
||||
if self.filename is not None: filename = self.filename
|
||||
else: raise ValueError(MISSING_FILENAME_TEXT)
|
||||
|
||||
index = open(filename, "rb")
|
||||
|
||||
try:
|
||||
self._really_load(index, filename, ignore_discard, ignore_expires,
|
||||
username)
|
||||
finally:
|
||||
index.close()
|
||||
|
||||
def _really_load(self, index, filename, ignore_discard, ignore_expires,
|
||||
username):
|
||||
now = int(time.time())
|
||||
|
||||
if username is None:
|
||||
username = string.lower(os.environ['USERNAME'])
|
||||
|
||||
cookie_dir = os.path.dirname(filename)
|
||||
|
||||
data = index.read(256)
|
||||
if len(data) != 256:
|
||||
raise IOError("%s file is too short" % filename)
|
||||
|
||||
# Cookies' index.dat file starts with 32 bytes of signature
|
||||
# followed by an offset to the first record, stored as a little-
|
||||
# endian DWORD.
|
||||
sig, size, data = data[:32], data[32:36], data[36:]
|
||||
size = struct.unpack("<L", size)[0]
|
||||
|
||||
# check that sig is valid
|
||||
if not self.magic_re.match(sig) or size != 0x4000:
|
||||
raise IOError("%s ['%s' %s] does not seem to contain cookies" %
|
||||
(str(filename), sig, size))
|
||||
|
||||
# skip to start of first record
|
||||
index.seek(size, 0)
|
||||
|
||||
sector = 128 # size of sector in bytes
|
||||
|
||||
while 1:
|
||||
data = ""
|
||||
|
||||
# Cookies are usually in two contiguous sectors, so read in two
|
||||
# sectors and adjust if not a Cookie.
|
||||
to_read = 2 * sector
|
||||
d = index.read(to_read)
|
||||
if len(d) != to_read:
|
||||
break
|
||||
data = data + d
|
||||
|
||||
# Each record starts with a 4-byte signature and a count
|
||||
# (little-endian DWORD) of sectors for the record.
|
||||
sig, size, data = data[:4], data[4:8], data[8:]
|
||||
size = struct.unpack("<L", size)[0]
|
||||
|
||||
to_read = (size - 2) * sector
|
||||
|
||||
## from urllib import quote
|
||||
## print "data", quote(data)
|
||||
## print "sig", quote(sig)
|
||||
## print "size in sectors", size
|
||||
## print "size in bytes", size*sector
|
||||
## print "size in units of 16 bytes", (size*sector) / 16
|
||||
## print "size to read in bytes", to_read
|
||||
## print
|
||||
|
||||
if sig != "URL ":
|
||||
assert (sig in ("HASH", "LEAK",
|
||||
self.padding, "\x00\x00\x00\x00"),
|
||||
"unrecognized MSIE index.dat record: %s" %
|
||||
binary_to_str(sig))
|
||||
if sig == "\x00\x00\x00\x00":
|
||||
# assume we've got all the cookies, and stop
|
||||
break
|
||||
if sig == self.padding:
|
||||
continue
|
||||
# skip the rest of this record
|
||||
assert to_read >= 0
|
||||
if size != 2:
|
||||
assert to_read != 0
|
||||
index.seek(to_read, 1)
|
||||
continue
|
||||
|
||||
# read in rest of record if necessary
|
||||
if size > 2:
|
||||
more_data = index.read(to_read)
|
||||
if len(more_data) != to_read: break
|
||||
data = data + more_data
|
||||
|
||||
cookie_re = ("Cookie\:%s\@([\x21-\xFF]+).*?" % username +
|
||||
"(%s\@[\x21-\xFF]+\.txt)" % username)
|
||||
m = re.search(cookie_re, data, re.I)
|
||||
if m:
|
||||
cookie_file = os.path.join(cookie_dir, m.group(2))
|
||||
if not self.delayload:
|
||||
try:
|
||||
self.load_cookie_data(cookie_file,
|
||||
ignore_discard, ignore_expires)
|
||||
except IOError:
|
||||
debug("error reading cookie file, skipping: %s" %
|
||||
cookie_file)
|
||||
else:
|
||||
domain = m.group(1)
|
||||
i = domain.find("/")
|
||||
if i != -1:
|
||||
domain = domain[:i]
|
||||
|
||||
self._delayload_domains[domain] = (
|
||||
cookie_file, ignore_discard, ignore_expires)
|
||||
171
tools/bug_tool/ClientCookie/_MozillaCookieJar.py
Normal file
171
tools/bug_tool/ClientCookie/_MozillaCookieJar.py
Normal file
@@ -0,0 +1,171 @@
|
||||
"""Mozilla / Netscape cookie loading / saving.
|
||||
|
||||
Copyright 1997-1999 Gisle Aas (libwww-perl)
|
||||
Copyright 2002-2003 John J Lee <jjl@pobox.com> (The Python port)
|
||||
|
||||
This code is free software; you can redistribute it and/or modify it under
|
||||
the terms of the BSD License (see the file COPYING included with the
|
||||
distribution).
|
||||
|
||||
"""
|
||||
|
||||
import sys, re, string, time
|
||||
|
||||
import ClientCookie
|
||||
from _ClientCookie import CookieJar, Cookie, MISSING_FILENAME_TEXT
|
||||
from _Util import startswith, endswith
|
||||
from _Debug import debug
|
||||
|
||||
try: True
|
||||
except NameError:
|
||||
True = 1
|
||||
False = 0
|
||||
|
||||
try: issubclass(Exception(), (Exception,))
|
||||
except TypeError:
|
||||
real_issubclass = issubclass
|
||||
from _Util import compat_issubclass
|
||||
issubclass = compat_issubclass
|
||||
del compat_issubclass
|
||||
|
||||
|
||||
class MozillaCookieJar(CookieJar):
|
||||
"""
|
||||
|
||||
WARNING: you may want to backup your browser's cookies file if you use
|
||||
this class to save cookies. I *think* it works, but there have been
|
||||
bugs in the past!
|
||||
|
||||
This class differs from CookieJar only in the format it uses to save and
|
||||
load cookies to and from a file. This class uses the Netscape/Mozilla
|
||||
`cookies.txt' format.
|
||||
|
||||
Don't expect cookies saved while the browser is running to be noticed by
|
||||
the browser (in fact, Mozilla on unix will overwrite your saved cookies if
|
||||
you change them on disk while it's running; on Windows, you probably can't
|
||||
save at all while the browser is running).
|
||||
|
||||
Note that the Netscape/Mozilla format will downgrade RFC2965 cookies to
|
||||
Netscape cookies on saving.
|
||||
|
||||
In particular, the cookie version and port number information is lost,
|
||||
together with information about whether or not Path, Port and Discard were
|
||||
specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the
|
||||
domain as set in the HTTP header started with a dot (yes, I'm aware some
|
||||
domains in Netscape files start with a dot and some don't -- trust me, you
|
||||
really don't want to know any more about this).
|
||||
|
||||
Note that though Mozilla and Netscape use the same format, they use
|
||||
slightly different headers. The class saves cookies using the Netscape
|
||||
header by default (Mozilla can cope with that).
|
||||
|
||||
"""
|
||||
magic_re = "#( Netscape)? HTTP Cookie File"
|
||||
header = """\
|
||||
# Netscape HTTP Cookie File
|
||||
# http://www.netscape.com/newsref/std/cookie_spec.html
|
||||
# This is a generated file! Do not edit.
|
||||
|
||||
"""
|
||||
|
||||
def _really_load(self, f, filename, ignore_discard, ignore_expires):
|
||||
now = time.time()
|
||||
|
||||
magic = f.readline()
|
||||
if not re.search(self.magic_re, magic):
|
||||
f.close()
|
||||
raise IOError(
|
||||
"%s does not look like a Netscape format cookies file" %
|
||||
filename)
|
||||
|
||||
try:
|
||||
while 1:
|
||||
line = f.readline()
|
||||
if line == "": break
|
||||
|
||||
# last field may be absent, so keep any trailing tab
|
||||
if endswith(line, "\n"): line = line[:-1]
|
||||
|
||||
# skip comments and blank lines XXX what is $ for?
|
||||
if (startswith(string.strip(line), "#") or
|
||||
startswith(string.strip(line), "$") or
|
||||
string.strip(line) == ""):
|
||||
continue
|
||||
|
||||
domain, domain_specified, path, secure, expires, name, value = \
|
||||
string.split(line, "\t")
|
||||
secure = (secure == "TRUE")
|
||||
domain_specified = (domain_specified == "TRUE")
|
||||
if name == "": name = None
|
||||
|
||||
initial_dot = startswith(domain, ".")
|
||||
assert domain_specified == initial_dot
|
||||
|
||||
discard = False
|
||||
if expires == "":
|
||||
expires = None
|
||||
discard = True
|
||||
|
||||
# assume path_specified is false
|
||||
c = Cookie(0, name, value,
|
||||
None, False,
|
||||
domain, domain_specified, initial_dot,
|
||||
path, False,
|
||||
secure,
|
||||
expires,
|
||||
discard,
|
||||
None,
|
||||
None,
|
||||
{})
|
||||
if not ignore_discard and c.discard:
|
||||
continue
|
||||
if not ignore_expires and c.is_expired(now):
|
||||
continue
|
||||
self.set_cookie(c)
|
||||
|
||||
except:
|
||||
unmasked = (KeyboardInterrupt, SystemExit)
|
||||
if ClientCookie.CLIENTCOOKIE_DEBUG:
|
||||
unmasked = (Exception,)
|
||||
etype = sys.exc_info()[0]
|
||||
if issubclass(etype, IOError) or \
|
||||
issubclass(etype, unmasked):
|
||||
raise
|
||||
raise IOError("invalid Netscape format file %s: %s" %
|
||||
(filename, line))
|
||||
|
||||
def save(self, filename=None, ignore_discard=False, ignore_expires=False):
|
||||
if filename is None:
|
||||
if self.filename is not None: filename = self.filename
|
||||
else: raise ValueError(MISSING_FILENAME_TEXT)
|
||||
|
||||
f = open(filename, "w")
|
||||
try:
|
||||
f.write(self.header)
|
||||
now = time.time()
|
||||
debug("Saving Netscape cookies.txt file")
|
||||
for cookie in self:
|
||||
if not ignore_discard and cookie.discard:
|
||||
debug(" Not saving %s: marked for discard" % cookie.name)
|
||||
continue
|
||||
if not ignore_expires and cookie.is_expired(now):
|
||||
debug(" Not saving %s: expired" % cookie.name)
|
||||
continue
|
||||
if cookie.secure: secure = "TRUE"
|
||||
else: secure = "FALSE"
|
||||
if startswith(cookie.domain, "."): initial_dot = "TRUE"
|
||||
else: initial_dot = "FALSE"
|
||||
if cookie.expires is not None:
|
||||
expires = str(cookie.expires)
|
||||
else:
|
||||
expires = ""
|
||||
if cookie.name is not None:
|
||||
name = cookie.name
|
||||
else:
|
||||
name = ""
|
||||
f.write(
|
||||
string.join([cookie.domain, initial_dot, cookie.path,
|
||||
secure, expires, name, cookie.value], "\t")+
|
||||
"\n")
|
||||
finally:
|
||||
f.close()
|
||||
459
tools/bug_tool/ClientCookie/_Util.py
Normal file
459
tools/bug_tool/ClientCookie/_Util.py
Normal file
@@ -0,0 +1,459 @@
|
||||
"""Python backwards-compat., date/time routines, seekable file object wrapper.
|
||||
|
||||
Copyright 2002-2003 John J Lee <jjl@pobox.com>
|
||||
|
||||
This code is free software; you can redistribute it and/or modify it under
|
||||
the terms of the BSD License (see the file COPYING included with the
|
||||
distribution).
|
||||
|
||||
"""
|
||||
|
||||
try: True
|
||||
except NameError:
|
||||
True = 1
|
||||
False = 0
|
||||
|
||||
import re, string, time
|
||||
from types import TupleType
|
||||
from StringIO import StringIO
|
||||
|
||||
try:
|
||||
from exceptions import StopIteration
|
||||
except ImportError:
|
||||
from ClientCookie._ClientCookie import StopIteration
|
||||
|
||||
def startswith(string, initial):
|
||||
if len(initial) > len(string): return False
|
||||
return string[:len(initial)] == initial
|
||||
|
||||
def endswith(string, final):
|
||||
if len(final) > len(string): return False
|
||||
return string[-len(final):] == final
|
||||
|
||||
def compat_issubclass(obj, tuple_or_class):
|
||||
# for 2.1 and below
|
||||
if type(tuple_or_class) == TupleType:
|
||||
for klass in tuple_or_class:
|
||||
if issubclass(obj, klass):
|
||||
return True
|
||||
return False
|
||||
return issubclass(obj, tuple_or_class)
|
||||
|
||||
def isstringlike(x):
|
||||
try: x+""
|
||||
except: return False
|
||||
else: return True
|
||||
|
||||
|
||||
try:
|
||||
from calendar import timegm
|
||||
timegm((2045, 1, 1, 22, 23, 32)) # overflows in 2.1
|
||||
except:
|
||||
# Number of days per month (except for February in leap years)
|
||||
mdays = [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
|
||||
|
||||
# Return 1 for leap years, 0 for non-leap years
|
||||
def isleap(year):
|
||||
return year % 4 == 0 and (year % 100 <> 0 or year % 400 == 0)
|
||||
|
||||
# Return number of leap years in range [y1, y2)
|
||||
# Assume y1 <= y2 and no funny (non-leap century) years
|
||||
def leapdays(y1, y2):
|
||||
return (y2+3)/4 - (y1+3)/4
|
||||
|
||||
EPOCH = 1970
|
||||
def timegm(tuple):
|
||||
"""Unrelated but handy function to calculate Unix timestamp from GMT."""
|
||||
year, month, day, hour, minute, second = tuple[:6]
|
||||
assert year >= EPOCH
|
||||
assert 1 <= month <= 12
|
||||
days = 365*(year-EPOCH) + leapdays(EPOCH, year)
|
||||
for i in range(1, month):
|
||||
days = days + mdays[i]
|
||||
if month > 2 and isleap(year):
|
||||
days = days + 1
|
||||
days = days + day - 1
|
||||
hours = days*24 + hour
|
||||
minutes = hours*60 + minute
|
||||
seconds = minutes*60L + second
|
||||
return seconds
|
||||
|
||||
|
||||
# Date/time conversion routines for formats used by the HTTP protocol.
|
||||
|
||||
EPOCH = 1970
|
||||
def my_timegm(tt):
|
||||
year, month, mday, hour, min, sec = tt[:6]
|
||||
if ((year >= EPOCH) and (1 <= month <= 12) and (1 <= mday <= 31) and
|
||||
(0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):
|
||||
return timegm(tt)
|
||||
else:
|
||||
return None
|
||||
|
||||
days = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
|
||||
months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
|
||||
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
|
||||
months_lower = []
|
||||
for month in months: months_lower.append(string.lower(month))
|
||||
|
||||
|
||||
def time2isoz(t=None):
|
||||
"""Return a string representing time in seconds since epoch, t.
|
||||
|
||||
If the function is called without an argument, it will use the current
|
||||
time.
|
||||
|
||||
The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
|
||||
representing Universal Time (UTC, aka GMT). An example of this format is:
|
||||
|
||||
1994-11-24 08:49:37Z
|
||||
|
||||
"""
|
||||
if t is None: t = time.time()
|
||||
year, mon, mday, hour, min, sec = time.gmtime(t)[:6]
|
||||
return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
|
||||
year, mon, mday, hour, min, sec)
|
||||
|
||||
def time2netscape(t=None):
|
||||
"""Return a string representing time in seconds since epoch, t.
|
||||
|
||||
If the function is called without an argument, it will use the current
|
||||
time.
|
||||
|
||||
The format of the returned string is like this:
|
||||
|
||||
Wdy, DD-Mon-YYYY HH:MM:SS GMT
|
||||
|
||||
"""
|
||||
if t is None: t = time.time()
|
||||
year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7]
|
||||
return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % (
|
||||
days[wday], mday, months[mon-1], year, hour, min, sec)
|
||||
|
||||
|
||||
UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
|
||||
|
||||
timezone_re = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$")
|
||||
def offset_from_tz_string(tz):
|
||||
offset = None
|
||||
if UTC_ZONES.has_key(tz):
|
||||
offset = 0
|
||||
else:
|
||||
m = timezone_re.search(tz)
|
||||
if m:
|
||||
offset = 3600 * int(m.group(2))
|
||||
if m.group(3):
|
||||
offset = offset + 60 * int(m.group(3))
|
||||
if m.group(1) == '-':
|
||||
offset = -offset
|
||||
return offset
|
||||
|
||||
def _str2time(day, mon, yr, hr, min, sec, tz):
|
||||
# translate month name to number
|
||||
# month numbers start with 1 (January)
|
||||
try:
|
||||
mon = months_lower.index(string.lower(mon))+1
|
||||
except ValueError:
|
||||
# maybe it's already a number
|
||||
try:
|
||||
imon = int(mon)
|
||||
except ValueError:
|
||||
return None
|
||||
if 1 <= imon <= 12:
|
||||
mon = imon
|
||||
else:
|
||||
return None
|
||||
|
||||
# make sure clock elements are defined
|
||||
if hr is None: hr = 0
|
||||
if min is None: min = 0
|
||||
if sec is None: sec = 0
|
||||
|
||||
yr = int(yr)
|
||||
day = int(day)
|
||||
hr = int(hr)
|
||||
min = int(min)
|
||||
sec = int(sec)
|
||||
|
||||
if yr < 1000:
|
||||
# find "obvious" year
|
||||
cur_yr = time.localtime(time.time())[0]
|
||||
m = cur_yr % 100
|
||||
tmp = yr
|
||||
yr = yr + cur_yr - m
|
||||
m = m - tmp
|
||||
if abs(m) > 50:
|
||||
if m > 0: yr = yr + 100
|
||||
else: yr = yr - 100
|
||||
|
||||
# convert UTC time tuple to seconds since epoch (not timezone-adjusted)
|
||||
t = my_timegm((yr, mon, day, hr, min, sec, tz))
|
||||
|
||||
if t is not None:
|
||||
# adjust time using timezone string, to get absolute time since epoch
|
||||
if tz is None:
|
||||
tz = "UTC"
|
||||
tz = string.upper(tz)
|
||||
offset = offset_from_tz_string(tz)
|
||||
if offset is None:
|
||||
return None
|
||||
t = t - offset
|
||||
|
||||
return t
|
||||
|
||||
|
||||
strict_re = re.compile(r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) (\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$")
|
||||
wkday_re = re.compile(
|
||||
r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I)
|
||||
loose_http_re = re.compile(
|
||||
r"""^
|
||||
(\d\d?) # day
|
||||
(?:\s+|[-\/])
|
||||
(\w+) # month
|
||||
(?:\s+|[-\/])
|
||||
(\d+) # year
|
||||
(?:
|
||||
(?:\s+|:) # separator before clock
|
||||
(\d\d?):(\d\d) # hour:min
|
||||
(?::(\d\d))? # optional seconds
|
||||
)? # optional clock
|
||||
\s*
|
||||
([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
|
||||
\s*
|
||||
(?:\(\w+\))? # ASCII representation of timezone in parens.
|
||||
\s*$""", re.X)
|
||||
def http2time(text):
|
||||
"""Returns time in seconds since epoch of time represented by a string.
|
||||
|
||||
Return value is an integer.
|
||||
|
||||
None is returned if the format of str is unrecognized, the time is outside
|
||||
the representable range, or the timezone string is not recognized. The
|
||||
time formats recognized are the same as for parse_date. If the string
|
||||
contains no timezone, UTC is assumed.
|
||||
|
||||
The timezone in the string may be numerical (like "-0800" or "+0100") or a
|
||||
string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the
|
||||
timezone strings equivalent to UTC (zero offset) are known to the function.
|
||||
|
||||
The function loosely parses the following formats:
|
||||
|
||||
Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format
|
||||
Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format
|
||||
Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format
|
||||
09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday)
|
||||
08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday)
|
||||
08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday)
|
||||
|
||||
The parser ignores leading and trailing whitespace. The time may be
|
||||
absent.
|
||||
|
||||
If the year is given with only 2 digits, then parse_date will select the
|
||||
century that makes the year closest to the current date.
|
||||
|
||||
"""
|
||||
# fast exit for strictly conforming string
|
||||
m = strict_re.search(text)
|
||||
if m:
|
||||
g = m.groups()
|
||||
mon = months_lower.index(string.lower(g[1])) + 1
|
||||
tt = (int(g[2]), mon, int(g[0]),
|
||||
int(g[3]), int(g[4]), float(g[5]))
|
||||
return my_timegm(tt)
|
||||
|
||||
# No, we need some messy parsing...
|
||||
|
||||
# clean up
|
||||
text = string.lstrip(text)
|
||||
text = wkday_re.sub("", text, 1) # Useless weekday
|
||||
|
||||
# tz is time zone specifier string
|
||||
day, mon, yr, hr, min, sec, tz = [None]*7
|
||||
|
||||
# loose regexp parse
|
||||
m = loose_http_re.search(text)
|
||||
if m is not None:
|
||||
day, mon, yr, hr, min, sec, tz = m.groups()
|
||||
else:
|
||||
return None # bad format
|
||||
|
||||
return _str2time(day, mon, yr, hr, min, sec, tz)
|
||||
|
||||
|
||||
iso_re = re.compile(
|
||||
"""^
|
||||
(\d{4}) # year
|
||||
[-\/]?
|
||||
(\d\d?) # numerical month
|
||||
[-\/]?
|
||||
(\d\d?) # day
|
||||
(?:
|
||||
(?:\s+|[-:Tt]) # separator before clock
|
||||
(\d\d?):?(\d\d) # hour:min
|
||||
(?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional)
|
||||
)? # optional clock
|
||||
\s*
|
||||
([-+]?\d\d?:?(:?\d\d)?
|
||||
|Z|z)? # timezone (Z is "zero meridian", i.e. GMT)
|
||||
\s*$""", re.X)
|
||||
def iso2time(text):
|
||||
"""
|
||||
As for httpstr2time, but parses the ISO 8601 formats:
|
||||
|
||||
1994-02-03 14:15:29 -0100 -- ISO 8601 format
|
||||
1994-02-03 14:15:29 -- zone is optional
|
||||
1994-02-03 -- only date
|
||||
1994-02-03T14:15:29 -- Use T as separator
|
||||
19940203T141529Z -- ISO 8601 compact format
|
||||
19940203 -- only date
|
||||
|
||||
"""
|
||||
# clean up
|
||||
text = string.lstrip(text)
|
||||
|
||||
# tz is time zone specifier string
|
||||
day, mon, yr, hr, min, sec, tz = [None]*7
|
||||
|
||||
# loose regexp parse
|
||||
m = iso_re.search(text)
|
||||
if m is not None:
|
||||
# XXX there's an extra bit of the timezone I'm ignoring here: is
|
||||
# this the right thing to do?
|
||||
yr, mon, day, hr, min, sec, tz, _ = m.groups()
|
||||
else:
|
||||
return None # bad format
|
||||
|
||||
return _str2time(day, mon, yr, hr, min, sec, tz)
|
||||
|
||||
|
||||
|
||||
# XXX Andrew Dalke kindly sent me a similar class in response to my request on
|
||||
# comp.lang.python, which I then proceeded to lose. I wrote this class
|
||||
# instead, but I think he's released his code publicly since, could pinch the
|
||||
# tests from it, at least...
|
||||
class seek_wrapper:
|
||||
"""Adds a seek method to a file object.
|
||||
|
||||
This is only designed for seeking on readonly file-like objects.
|
||||
|
||||
Wrapped file-like object must have a read method. The readline method is
|
||||
only supported if that method is present on the wrapped object. The
|
||||
readlines method is always supported. xreadlines and iteration are
|
||||
supported only for Python 2.2 and above.
|
||||
|
||||
Public attribute: wrapped (the wrapped file object).
|
||||
|
||||
WARNING: All other attributes of the wrapped object (ie. those that are not
|
||||
one of wrapped, read, readline, readlines, xreadlines, __iter__ and next)
|
||||
are passed through unaltered, which may or may not make sense for your
|
||||
particular file object.
|
||||
|
||||
"""
|
||||
# General strategy is to check that cache is full enough, then delegate
|
||||
# everything to the cache (self._cache, which is a StringIO.StringIO
|
||||
# instance. Seems to be some cStringIO.StringIO problem on 1.5.2 -- I
|
||||
# get a StringOobject, with no readlines method.
|
||||
|
||||
# Invariant: the end of the cache is always at the same place as the
|
||||
# end of the wrapped file:
|
||||
# self.wrapped.tell() == len(self._cache.getvalue())
|
||||
|
||||
def __init__(self, wrapped):
|
||||
self.wrapped = wrapped
|
||||
self.__have_readline = hasattr(self.wrapped, "readline")
|
||||
self.__cache = StringIO()
|
||||
|
||||
def __getattr__(self, name): return getattr(self.wrapped, name)
|
||||
|
||||
def seek(self, offset, whence=0):
|
||||
# make sure we have read all data up to the point we are seeking to
|
||||
pos = self.__cache.tell()
|
||||
if whence == 0: # absolute
|
||||
to_read = offset - pos
|
||||
elif whence == 1: # relative to current position
|
||||
to_read = offset
|
||||
elif whence == 2: # relative to end of *wrapped* file
|
||||
# since we don't know yet where the end of that file is, we must
|
||||
# read everything
|
||||
to_read = None
|
||||
if to_read >= 0 or to_read is None:
|
||||
if to_read is None:
|
||||
self.__cache.write(self.wrapped.read())
|
||||
else:
|
||||
self.__cache.write(self.wrapped.read(to_read))
|
||||
self.__cache.seek(pos)
|
||||
|
||||
return self.__cache.seek(offset, whence)
|
||||
|
||||
def read(self, size=-1):
|
||||
pos = self.__cache.tell()
|
||||
|
||||
self.__cache.seek(pos)
|
||||
|
||||
end = len(self.__cache.getvalue())
|
||||
available = end - pos
|
||||
|
||||
# enough data already cached?
|
||||
if size <= available and size != -1:
|
||||
return self.__cache.read(size)
|
||||
|
||||
# no, so read sufficient data from wrapped file and cache it
|
||||
to_read = size - available
|
||||
assert to_read > 0 or size == -1
|
||||
self.__cache.seek(0, 2)
|
||||
if size == -1:
|
||||
self.__cache.write(self.wrapped.read())
|
||||
else:
|
||||
self.__cache.write(self.wrapped.read(to_read))
|
||||
self.__cache.seek(pos)
|
||||
|
||||
return self.__cache.read(size)
|
||||
|
||||
def readline(self, size=-1):
|
||||
if not self.__have_readline:
|
||||
raise NotImplementedError("no readline method on wrapped object")
|
||||
|
||||
# line we're about to read might not be complete in the cache, so
|
||||
# read another line first
|
||||
pos = self.__cache.tell()
|
||||
self.__cache.seek(0, 2)
|
||||
self.__cache.write(self.wrapped.readline())
|
||||
self.__cache.seek(pos)
|
||||
|
||||
data = self.__cache.readline()
|
||||
if size != -1:
|
||||
r = data[:size]
|
||||
self.__cache.seek(pos+size)
|
||||
else:
|
||||
r = data
|
||||
return r
|
||||
|
||||
def readlines(self, sizehint=-1):
|
||||
pos = self.__cache.tell()
|
||||
self.__cache.seek(0, 2)
|
||||
self.__cache.write(self.wrapped.read())
|
||||
self.__cache.seek(pos)
|
||||
try:
|
||||
return self.__cache.readlines(sizehint)
|
||||
except TypeError: # 1.5.2 hack
|
||||
return self.__cache.readlines()
|
||||
|
||||
def __iter__(self): return self
|
||||
def next(self):
|
||||
line = self.readline()
|
||||
if line == "": raise StopIteration
|
||||
return line
|
||||
|
||||
xreadlines = __iter__
|
||||
|
||||
def __repr__(self):
|
||||
return ("<%s at %s whose wrapped object = %s>" %
|
||||
(self.__class__.__name__, `id(self)`, `self.wrapped`))
|
||||
|
||||
def close(self):
|
||||
self.read = None
|
||||
self.readline = None
|
||||
self.readlines = None
|
||||
self.seek = None
|
||||
if self.wrapped: self.wrapped.close()
|
||||
self.wrapped = None
|
||||
49
tools/bug_tool/ClientCookie/__init__.py
Normal file
49
tools/bug_tool/ClientCookie/__init__.py
Normal file
@@ -0,0 +1,49 @@
|
||||
# Import names so that they can be imported directly from the package, like
|
||||
# this:
|
||||
#from ClientCookie import <whatever>
|
||||
|
||||
try: True
|
||||
except NameError:
|
||||
True = 1
|
||||
False = 0
|
||||
|
||||
import sys
|
||||
|
||||
# don't edit these here: do eg.
|
||||
# import ClientCookie; ClientCookie.HTTP_DEBUG = 1
|
||||
DEBUG_STREAM = sys.stderr
|
||||
CLIENTCOOKIE_DEBUG = False
|
||||
REDIRECT_DEBUG = False
|
||||
HTTP_DEBUG = False
|
||||
|
||||
from _ClientCookie import VERSION, __doc__, \
|
||||
CookieJar, Cookie, \
|
||||
CookiePolicy, DefaultCookiePolicy, \
|
||||
lwp_cookie_str
|
||||
from _MozillaCookieJar import MozillaCookieJar
|
||||
from _MSIECookieJar import MSIECookieJar
|
||||
try:
|
||||
from urllib2 import AbstractHTTPHandler
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
from ClientCookie._urllib2_support import \
|
||||
HTTPHandler, build_opener, install_opener, urlopen, \
|
||||
HTTPRedirectHandler
|
||||
from ClientCookie._urllib2_support import \
|
||||
OpenerDirector, BaseProcessor, \
|
||||
HTTPRequestUpgradeProcessor, \
|
||||
HTTPEquivProcessor, SeekableProcessor, HTTPCookieProcessor, \
|
||||
HTTPRefererProcessor, HTTPStandardHeadersProcessor, \
|
||||
HTTPRefreshProcessor, HTTPErrorProcessor, \
|
||||
HTTPResponseDebugProcessor
|
||||
|
||||
import httplib
|
||||
if hasattr(httplib, 'HTTPS'):
|
||||
from ClientCookie._urllib2_support import HTTPSHandler
|
||||
del AbstractHTTPHandler, httplib
|
||||
from _Util import http2time
|
||||
str2time = http2time
|
||||
del http2time
|
||||
|
||||
del sys
|
||||
713
tools/bug_tool/ClientCookie/_urllib2_support.py
Normal file
713
tools/bug_tool/ClientCookie/_urllib2_support.py
Normal file
@@ -0,0 +1,713 @@
|
||||
"""Integration with Python standard library module urllib2.
|
||||
|
||||
Also includes a redirection bugfix, support for parsing HTML HEAD blocks for
|
||||
the META HTTP-EQUIV tag contents, and following Refresh header redirects.
|
||||
|
||||
Copyright 2002-2003 John J Lee <jjl@pobox.com>
|
||||
|
||||
This code is free software; you can redistribute it and/or modify it under
|
||||
the terms of the BSD License (see the file COPYING included with the
|
||||
distribution).
|
||||
|
||||
"""
|
||||
|
||||
import copy, time
|
||||
|
||||
import ClientCookie
|
||||
from _ClientCookie import CookieJar, request_host
|
||||
from _Util import isstringlike
|
||||
from _Debug import _debug
|
||||
|
||||
try: True
|
||||
except NameError:
|
||||
True = 1
|
||||
False = 0
|
||||
|
||||
CHUNK = 1024 # size of chunks fed to HTML HEAD parser, in bytes
|
||||
|
||||
try:
|
||||
from urllib2 import AbstractHTTPHandler
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
import urlparse, urllib2, urllib, httplib, htmllib, formatter, string
|
||||
from urllib2 import URLError, HTTPError
|
||||
import types, string, socket
|
||||
from cStringIO import StringIO
|
||||
from _Util import seek_wrapper
|
||||
try:
|
||||
import threading
|
||||
_threading = threading; del threading
|
||||
except ImportError:
|
||||
import dummy_threading
|
||||
_threading = dummy_threading; del dummy_threading
|
||||
|
||||
# This fixes a bug in urllib2 as of Python 2.1.3 and 2.2.2
|
||||
# (http://www.python.org/sf/549151)
|
||||
# 2.2.3 is broken here (my fault!), 2.3 is fixed.
|
||||
class HTTPRedirectHandler(urllib2.BaseHandler):
|
||||
# maximum number of redirections before assuming we're in a loop
|
||||
max_redirections = 10
|
||||
|
||||
# Implementation notes:
|
||||
|
||||
# To avoid the server sending us into an infinite loop, the request
|
||||
# object needs to track what URLs we have already seen. Do this by
|
||||
# adding a handler-specific attribute to the Request object. The value
|
||||
# of the dict is used to count the number of times the same url has
|
||||
# been visited. This is needed because this isn't necessarily a loop:
|
||||
# there is more than one way to redirect (Refresh, 302, 303, 307).
|
||||
|
||||
# Another handler-specific Request attribute, original_url, is used to
|
||||
# remember the URL of the original request so that it is possible to
|
||||
# decide whether or not RFC 2965 cookies should be turned on during
|
||||
# redirect.
|
||||
|
||||
# Always unhandled redirection codes:
|
||||
# 300 Multiple Choices: should not handle this here.
|
||||
# 304 Not Modified: no need to handle here: only of interest to caches
|
||||
# that do conditional GETs
|
||||
# 305 Use Proxy: probably not worth dealing with here
|
||||
# 306 Unused: what was this for in the previous versions of protocol??
|
||||
|
||||
def redirect_request(self, newurl, req, fp, code, msg, headers):
|
||||
"""Return a Request or None in response to a redirect.
|
||||
|
||||
This is called by the http_error_30x methods when a redirection
|
||||
response is received. If a redirection should take place, return a
|
||||
new Request to allow http_error_30x to perform the redirect;
|
||||
otherwise, return None to indicate that an HTTPError should be
|
||||
raised.
|
||||
|
||||
"""
|
||||
if code in (301, 302, 303) or (code == 307 and not req.has_data()):
|
||||
# Strictly (according to RFC 2616), 301 or 302 in response to
|
||||
# a POST MUST NOT cause a redirection without confirmation
|
||||
# from the user (of urllib2, in this case). In practice,
|
||||
# essentially all clients do redirect in this case, so we do
|
||||
# the same.
|
||||
return Request(newurl, headers=req.headers)
|
||||
else:
|
||||
raise HTTPError(req.get_full_url(), code, msg, headers, fp)
|
||||
|
||||
def http_error_302(self, req, fp, code, msg, headers):
|
||||
if headers.has_key('location'):
|
||||
newurl = headers['location']
|
||||
elif headers.has_key('uri'):
|
||||
newurl = headers['uri']
|
||||
else:
|
||||
return
|
||||
newurl = urlparse.urljoin(req.get_full_url(), newurl)
|
||||
|
||||
# XXX Probably want to forget about the state of the current
|
||||
# request, although that might interact poorly with other
|
||||
# handlers that also use handler-specific request attributes
|
||||
new = self.redirect_request(newurl, req, fp, code, msg, headers)
|
||||
if new is None:
|
||||
return
|
||||
|
||||
# remember where we started from
|
||||
if hasattr(req, "original_url"):
|
||||
new.original_url = req.original_url
|
||||
else:
|
||||
new.original_url = req.get_full_url()
|
||||
|
||||
# loop detection
|
||||
# .error_302_dict[(url, code)] is number of times url
|
||||
# previously visited as a result of a redirection with this
|
||||
# code (error_30x_dict would be a better name).
|
||||
new.origin_req_host = req.origin_req_host
|
||||
if not hasattr(req, 'error_302_dict'):
|
||||
new.error_302_dict = req.error_302_dict = {(newurl, code): 1}
|
||||
else:
|
||||
ed = new.error_302_dict = req.error_302_dict
|
||||
nr_visits = ed.get((newurl, code), 0)
|
||||
# Refreshes generate fake 302s, so we can hit the same URL as
|
||||
# a result of the same redirection code twice without
|
||||
# necessarily being in a loop! So, allow two visits to each
|
||||
# URL as a result of each redirection code.
|
||||
if len(ed) < self.max_redirections and nr_visits < 2:
|
||||
ed[(newurl, code)] = nr_visits + 1
|
||||
else:
|
||||
raise HTTPError(req.get_full_url(), code,
|
||||
self.inf_msg + msg, headers, fp)
|
||||
|
||||
if ClientCookie.REDIRECT_DEBUG:
|
||||
_debug("redirecting to %s", newurl)
|
||||
|
||||
# Don't close the fp until we are sure that we won't use it
|
||||
# with HTTPError.
|
||||
fp.read()
|
||||
fp.close()
|
||||
|
||||
return self.parent.open(new)
|
||||
|
||||
http_error_301 = http_error_303 = http_error_307 = http_error_302
|
||||
|
||||
inf_msg = "The HTTP server returned a redirect error that would " \
|
||||
"lead to an infinite loop.\n" \
|
||||
"The last 30x error message was:\n"
|
||||
|
||||
|
||||
class Request(urllib2.Request):
|
||||
def __init__(self, url, data=None, headers={}):
|
||||
urllib2.Request.__init__(self, url, data, headers)
|
||||
self.unredirected_hdrs = {}
|
||||
|
||||
def add_unredirected_header(self, key, val):
|
||||
# these headers do not persist from one request to the next in a chain
|
||||
# of requests
|
||||
self.unredirected_hdrs[string.capitalize(key)] = val
|
||||
|
||||
def has_key(self, header_name):
|
||||
if (self.headers.has_key(header_name) or
|
||||
self.unredirected_hdrs.has_key(header_name)):
|
||||
return True
|
||||
return False
|
||||
|
||||
def get(self, header_name, failobj=None):
|
||||
if self.headers.has_key(header_name):
|
||||
return self.headers[header_name]
|
||||
if self.unredirected_headers.has_key(header_name):
|
||||
return self.unredirected_headers[header_name]
|
||||
return failobj
|
||||
|
||||
|
||||
class BaseProcessor:
|
||||
processor_order = 500
|
||||
|
||||
def add_parent(self, parent):
|
||||
self.parent = parent
|
||||
def close(self):
|
||||
self.parent = None
|
||||
def __lt__(self, other):
|
||||
if not hasattr(other, "processor_order"):
|
||||
return True
|
||||
return self.processor_order < other.processor_order
|
||||
|
||||
class HTTPRequestUpgradeProcessor(BaseProcessor):
|
||||
# upgrade Request to class with support for headers that don't get
|
||||
# redirected
|
||||
processor_order = 0 # before anything else
|
||||
|
||||
def http_request(self, request):
|
||||
if not hasattr(request, "add_unredirected_header"):
|
||||
request = Request(request._Request__original, request.data,
|
||||
request.headers)
|
||||
return request
|
||||
|
||||
https_request = http_request
|
||||
|
||||
class HTTPEquivProcessor(BaseProcessor):
|
||||
"""Append META HTTP-EQUIV headers to regular HTTP headers."""
|
||||
def http_response(self, request, response):
|
||||
if not hasattr(response, "seek"):
|
||||
response = seek_wrapper(response)
|
||||
# grab HTTP-EQUIV headers and add them to the true HTTP headers
|
||||
headers = response.info()
|
||||
for hdr, val in parse_head(response):
|
||||
headers[hdr] = val
|
||||
response.seek(0)
|
||||
return response
|
||||
|
||||
https_response = http_response
|
||||
|
||||
# XXX ATM this only takes notice of http responses -- probably
|
||||
# should be independent of protocol scheme (http, ftp, etc.)
|
||||
class SeekableProcessor(BaseProcessor):
|
||||
"""Make responses seekable."""
|
||||
|
||||
def http_response(self, request, response):
|
||||
if not hasattr(response, "seek"):
|
||||
return seek_wrapper(response)
|
||||
return response
|
||||
|
||||
https_response = http_response
|
||||
|
||||
# XXX if this gets added to urllib2, unverifiable would end up as an
|
||||
# attribute on Request.
|
||||
class HTTPCookieProcessor(BaseProcessor):
|
||||
"""Handle HTTP cookies."""
|
||||
def __init__(self, cookies=None):
|
||||
if cookies is None:
|
||||
cookies = CookieJar()
|
||||
self.cookies = cookies
|
||||
|
||||
def _unverifiable(self, request):
|
||||
if hasattr(request, "error_302_dict") and request.error_302_dict:
|
||||
redirect = True
|
||||
else:
|
||||
redirect = False
|
||||
if (redirect or
|
||||
(hasattr(request, "unverifiable") and request.unverifiable)):
|
||||
unverifiable = True
|
||||
else:
|
||||
unverifiable = False
|
||||
return unverifiable
|
||||
|
||||
def http_request(self, request):
|
||||
unverifiable = self._unverifiable(request)
|
||||
if not unverifiable:
|
||||
# Stuff request-host of this origin transaction into Request
|
||||
# object, because we need to know it to know whether cookies
|
||||
# should be in operation during derived requests (redirects,
|
||||
# specifically -- including refreshes).
|
||||
request.origin_req_host = request_host(request)
|
||||
self.cookies.add_cookie_header(request, unverifiable)
|
||||
return request
|
||||
|
||||
def http_response(self, request, response):
|
||||
unverifiable = self._unverifiable(request)
|
||||
self.cookies.extract_cookies(response, request, unverifiable)
|
||||
return response
|
||||
|
||||
https_request = http_request
|
||||
https_response = http_response
|
||||
|
||||
class HTTPRefererProcessor(BaseProcessor):
|
||||
"""Add Referer header to requests.
|
||||
|
||||
This only makes sense if you use each RefererProcessor for a single
|
||||
chain of requests only (so, for example, if you use a single
|
||||
HTTPRefererProcessor to fetch a series of URLs extracted from a single
|
||||
page, this will break).
|
||||
|
||||
"""
|
||||
def __init__(self):
|
||||
self.referer = None
|
||||
|
||||
def http_request(self, request):
|
||||
if ((self.referer is not None) and
|
||||
not request.has_key("Referer")):
|
||||
request.add_unredirected_header("Referer", self.referer)
|
||||
return request
|
||||
|
||||
def http_response(self, request, response):
|
||||
self.referer = response.geturl()
|
||||
return response
|
||||
|
||||
https_request = http_request
|
||||
https_response = http_response
|
||||
|
||||
class HTTPStandardHeadersProcessor(BaseProcessor):
|
||||
def http_request(self, request):
|
||||
host = request.get_host()
|
||||
if not host:
|
||||
raise URLError('no host given')
|
||||
|
||||
if request.has_data(): # POST
|
||||
data = request.get_data()
|
||||
if not request.has_key('Content-type'):
|
||||
request.add_unredirected_header(
|
||||
'Content-type',
|
||||
'application/x-www-form-urlencoded')
|
||||
if not request.has_key('Content-length'):
|
||||
request.add_unredirected_header(
|
||||
'Content-length', '%d' % len(data))
|
||||
|
||||
scheme, sel = urllib.splittype(request.get_selector())
|
||||
sel_host, sel_path = urllib.splithost(sel)
|
||||
if not request.has_key('Host'):
|
||||
request.add_unredirected_header('Host', sel_host or host)
|
||||
for name, value in self.parent.addheaders:
|
||||
name = string.capitalize(name)
|
||||
if not request.has_key(name):
|
||||
request.add_unredirected_header(name, value)
|
||||
|
||||
return request
|
||||
|
||||
https_request = http_request
|
||||
|
||||
class HTTPResponseDebugProcessor(BaseProcessor):
|
||||
processor_order = 900 # before redirections, after everything else
|
||||
|
||||
def http_response(self, request, response):
|
||||
if not hasattr(response, "seek"):
|
||||
response = seek_wrapper(response)
|
||||
_debug(response.read())
|
||||
_debug("*****************************************************")
|
||||
response.seek(0)
|
||||
return response
|
||||
|
||||
https_response = http_response
|
||||
|
||||
class HTTPRefreshProcessor(BaseProcessor):
|
||||
"""Perform HTTP Refresh redirections.
|
||||
|
||||
Note that if a non-200 HTTP code has occurred (for example, a 30x
|
||||
redirect), this processor will do nothing.
|
||||
|
||||
By default, only zero-time Refresh headers are redirected. Use the
|
||||
max_time constructor argument to allow Refresh with longer pauses.
|
||||
Use the honor_time argument to control whether the requested pause
|
||||
is honoured (with a time.sleep()) or skipped in favour of immediate
|
||||
redirection.
|
||||
|
||||
"""
|
||||
processor_order = 1000
|
||||
|
||||
def __init__(self, max_time=0, honor_time=True):
|
||||
self.max_time = max_time
|
||||
self.honor_time = honor_time
|
||||
|
||||
def http_response(self, request, response):
|
||||
code, msg, hdrs = response.code, response.msg, response.info()
|
||||
|
||||
if code == 200 and hdrs.has_key("refresh"):
|
||||
refresh = hdrs["refresh"]
|
||||
i = string.find(refresh, ";")
|
||||
if i != -1:
|
||||
pause, newurl_spec = refresh[:i], refresh[i+1:]
|
||||
i = string.find(newurl_spec, "=")
|
||||
if i != -1:
|
||||
pause = int(pause)
|
||||
if pause <= self.max_time:
|
||||
if pause != 0 and self.honor_time:
|
||||
time.sleep(pause)
|
||||
newurl = newurl_spec[i+1:]
|
||||
# fake a 302 response
|
||||
hdrs["location"] = newurl
|
||||
response = self.parent.error(
|
||||
'http', request, response, 302, msg, hdrs)
|
||||
|
||||
return response
|
||||
|
||||
https_response = http_response
|
||||
|
||||
class HTTPErrorProcessor(BaseProcessor):
|
||||
"""Process non-200 HTTP error responses.
|
||||
|
||||
This just passes the job on to the Handler.<proto>_error_<code>
|
||||
methods, via the OpenerDirector.error method.
|
||||
|
||||
"""
|
||||
processor_order = 1000
|
||||
|
||||
def http_response(self, request, response):
|
||||
code, msg, hdrs = response.code, response.msg, response.info()
|
||||
|
||||
if code != 200:
|
||||
response = self.parent.error(
|
||||
'http', request, response, code, msg, hdrs)
|
||||
|
||||
return response
|
||||
|
||||
https_response = http_response
|
||||
|
||||
|
||||
class OpenerDirector(urllib2.OpenerDirector):
|
||||
# XXX might be useful to have remove_processor, too (say you want to
|
||||
# set a new RefererProcessor, but keep the old CookieProcessor --
|
||||
# could always just create everything anew, though (using old
|
||||
# CookieJar object to create CookieProcessor)
|
||||
def __init__(self):
|
||||
urllib2.OpenerDirector.__init__(self)
|
||||
#self.processors = []
|
||||
self.process_response = {}
|
||||
self.process_request = {}
|
||||
|
||||
def add_handler(self, handler):
|
||||
# XXX
|
||||
# tidy me
|
||||
# the same handler could be added twice without detection
|
||||
added = 0
|
||||
for meth in dir(handler.__class__):
|
||||
if meth[-5:] == '_open':
|
||||
protocol = meth[:-5]
|
||||
if self.handle_open.has_key(protocol):
|
||||
self.handle_open[protocol].append(handler)
|
||||
self.handle_open[protocol].sort()
|
||||
else:
|
||||
self.handle_open[protocol] = [handler]
|
||||
added = 1
|
||||
continue
|
||||
i = string.find(meth, '_')
|
||||
j = string.find(meth[i+1:], '_') + i + 1
|
||||
if j != -1 and meth[i+1:j] == 'error':
|
||||
proto = meth[:i]
|
||||
kind = meth[j+1:]
|
||||
try:
|
||||
kind = int(kind)
|
||||
except ValueError:
|
||||
pass
|
||||
dict = self.handle_error.get(proto, {})
|
||||
if dict.has_key(kind):
|
||||
dict[kind].append(handler)
|
||||
dict[kind].sort()
|
||||
else:
|
||||
dict[kind] = [handler]
|
||||
self.handle_error[proto] = dict
|
||||
added = 1
|
||||
continue
|
||||
if meth[-9:] == "_response":
|
||||
protocol = meth[:-9]
|
||||
if self.process_response.has_key(protocol):
|
||||
self.process_response[protocol].append(handler)
|
||||
self.process_response[protocol].sort()
|
||||
else:
|
||||
self.process_response[protocol] = [handler]
|
||||
added = True
|
||||
continue
|
||||
elif meth[-8:] == "_request":
|
||||
protocol = meth[:-8]
|
||||
if self.process_request.has_key(protocol):
|
||||
self.process_request[protocol].append(handler)
|
||||
self.process_request[protocol].sort()
|
||||
else:
|
||||
self.process_request[protocol] = [handler]
|
||||
added = True
|
||||
continue
|
||||
if added:
|
||||
self.handlers.append(handler)
|
||||
self.handlers.sort()
|
||||
handler.add_parent(self)
|
||||
|
||||
## def add_processor(self, processor):
|
||||
## added = False
|
||||
## for meth in dir(processor):
|
||||
## if meth[-9:] == "_response":
|
||||
## protocol = meth[:-9]
|
||||
## if self.process_response.has_key(protocol):
|
||||
## self.process_response[protocol].append(processor)
|
||||
## self.process_response[protocol].sort()
|
||||
## else:
|
||||
## self.process_response[protocol] = [processor]
|
||||
## added = True
|
||||
## continue
|
||||
## elif meth[-8:] == "_request":
|
||||
## protocol = meth[:-8]
|
||||
## if self.process_request.has_key(protocol):
|
||||
## self.process_request[protocol].append(processor)
|
||||
## self.process_request[protocol].sort()
|
||||
## else:
|
||||
## self.process_request[protocol] = [processor]
|
||||
## added = True
|
||||
## continue
|
||||
## if added:
|
||||
## self.processors.append(processor)
|
||||
## # XXX base class sorts .handlers, but I have no idea why
|
||||
## #self.processors.sort()
|
||||
## processor.add_parent(self)
|
||||
|
||||
def _request(self, url_or_req, data):
|
||||
if isstringlike(url_or_req):
|
||||
req = Request(url_or_req, data)
|
||||
else:
|
||||
# already a urllib2.Request instance
|
||||
req = url_or_req
|
||||
if data is not None:
|
||||
req.add_data(data)
|
||||
return req
|
||||
|
||||
def open(self, fullurl, data=None):
|
||||
req = self._request(fullurl, data)
|
||||
type = req.get_type()
|
||||
|
||||
# pre-process request
|
||||
# XXX should we allow a Processor to change the type (URL
|
||||
# scheme) of the request?
|
||||
meth_name = type+"_request"
|
||||
for processor in self.process_request.get(type, []):
|
||||
meth = getattr(processor, meth_name)
|
||||
req = meth(req)
|
||||
|
||||
response = urllib2.OpenerDirector.open(self, req, data)
|
||||
|
||||
# post-process response
|
||||
meth_name = type+"_response"
|
||||
for processor in self.process_response.get(type, []):
|
||||
meth = getattr(processor, meth_name)
|
||||
response = meth(req, response)
|
||||
|
||||
return response
|
||||
|
||||
## def close(self):
|
||||
## urllib2.OpenerDirector.close(self)
|
||||
## for processor in self.processors:
|
||||
## processor.close()
|
||||
## self.processors = []
|
||||
|
||||
|
||||
# Note the absence of redirect and header-adding code here
|
||||
# (AbstractHTTPHandler), and the lack of other clutter that would be
|
||||
# here without Processors.
|
||||
class AbstractHTTPHandler(urllib2.BaseHandler):
|
||||
def do_open(self, http_class, req):
|
||||
host = req.get_host()
|
||||
if not host:
|
||||
raise URLError('no host given')
|
||||
|
||||
h = http_class(host) # will parse host:port
|
||||
if ClientCookie.HTTP_DEBUG:
|
||||
h.set_debuglevel(1)
|
||||
|
||||
if req.has_data():
|
||||
h.putrequest('POST', req.get_selector())
|
||||
else:
|
||||
h.putrequest('GET', req.get_selector())
|
||||
|
||||
for k, v in req.headers.items():
|
||||
h.putheader(k, v)
|
||||
for k, v in req.unredirected_hdrs.items():
|
||||
h.putheader(k, v)
|
||||
|
||||
# httplib will attempt to connect() here. be prepared
|
||||
# to convert a socket error to a URLError.
|
||||
try:
|
||||
h.endheaders()
|
||||
except socket.error, err:
|
||||
raise URLError(err)
|
||||
if req.has_data():
|
||||
h.send(req.get_data())
|
||||
|
||||
code, msg, hdrs = h.getreply()
|
||||
fp = h.getfile()
|
||||
|
||||
response = urllib.addinfourl(fp, hdrs, req.get_full_url())
|
||||
response.code = code
|
||||
response.msg = msg
|
||||
|
||||
return response
|
||||
|
||||
|
||||
# XXX would self.reset() work, instead of raising this exception?
|
||||
class EndOfHeadError(Exception): pass
|
||||
class HeadParser(htmllib.HTMLParser):
|
||||
# only these elements are allowed in or before HEAD of document
|
||||
head_elems = ("html", "head",
|
||||
"title", "base",
|
||||
"script", "style", "meta", "link", "object")
|
||||
def __init__(self):
|
||||
htmllib.HTMLParser.__init__(self, formatter.NullFormatter())
|
||||
self.http_equiv = []
|
||||
|
||||
def start_meta(self, attrs):
|
||||
http_equiv = content = None
|
||||
for key, value in attrs:
|
||||
if key == "http-equiv":
|
||||
http_equiv = value
|
||||
elif key == "content":
|
||||
content = value
|
||||
if http_equiv is not None:
|
||||
self.http_equiv.append((http_equiv, content))
|
||||
|
||||
def handle_starttag(self, tag, method, attrs):
|
||||
if tag in self.head_elems:
|
||||
method(attrs)
|
||||
else:
|
||||
raise EndOfHeadError()
|
||||
|
||||
def handle_endtag(self, tag, method):
|
||||
if tag in self.head_elems:
|
||||
method()
|
||||
else:
|
||||
raise EndOfHeadError()
|
||||
|
||||
def end_head(self):
|
||||
raise EndOfHeadError()
|
||||
|
||||
def parse_head(file):
|
||||
"""Return a list of key, value pairs."""
|
||||
hp = HeadParser()
|
||||
while 1:
|
||||
data = file.read(CHUNK)
|
||||
try:
|
||||
hp.feed(data)
|
||||
except EndOfHeadError:
|
||||
break
|
||||
if len(data) != CHUNK:
|
||||
# this should only happen if there is no HTML body, or if
|
||||
# CHUNK is big
|
||||
break
|
||||
return hp.http_equiv
|
||||
|
||||
|
||||
class HTTPHandler(AbstractHTTPHandler):
|
||||
def http_open(self, req):
|
||||
return self.do_open(httplib.HTTP, req)
|
||||
|
||||
if hasattr(httplib, 'HTTPS'):
|
||||
class HTTPSHandler(AbstractHTTPHandler):
|
||||
def https_open(self, req):
|
||||
return self.do_open(httplib.HTTPS, req)
|
||||
|
||||
|
||||
def build_opener(*handlers):
|
||||
"""Create an opener object from a list of handlers and processors.
|
||||
|
||||
The opener will use several default handlers and processors, including
|
||||
support for HTTP and FTP. If there is a ProxyHandler, it must be at the
|
||||
front of the list of handlers. (Yuck. This is fixed in 2.3.)
|
||||
|
||||
If any of the handlers passed as arguments are subclasses of the
|
||||
default handlers, the default handlers will not be used.
|
||||
"""
|
||||
opener = OpenerDirector()
|
||||
default_classes = [
|
||||
# handlers
|
||||
urllib2.ProxyHandler,
|
||||
urllib2.UnknownHandler,
|
||||
HTTPHandler, # from this module (derived from new AbstractHTTPHandler)
|
||||
urllib2.HTTPDefaultErrorHandler,
|
||||
HTTPRedirectHandler, # from this module (bugfixed)
|
||||
urllib2.FTPHandler,
|
||||
urllib2.FileHandler,
|
||||
# processors
|
||||
HTTPRequestUpgradeProcessor,
|
||||
#HTTPEquivProcessor,
|
||||
#SeekableProcessor,
|
||||
HTTPCookieProcessor,
|
||||
#HTTPRefererProcessor,
|
||||
HTTPStandardHeadersProcessor,
|
||||
#HTTPRefreshProcessor,
|
||||
HTTPErrorProcessor
|
||||
]
|
||||
if hasattr(httplib, 'HTTPS'):
|
||||
default_classes.append(HTTPSHandler)
|
||||
skip = []
|
||||
for klass in default_classes:
|
||||
for check in handlers:
|
||||
if type(check) == types.ClassType:
|
||||
if issubclass(check, klass):
|
||||
skip.append(klass)
|
||||
elif type(check) == types.InstanceType:
|
||||
if isinstance(check, klass):
|
||||
skip.append(klass)
|
||||
for klass in skip:
|
||||
default_classes.remove(klass)
|
||||
|
||||
to_add = []
|
||||
for klass in default_classes:
|
||||
to_add.append(klass())
|
||||
for h in handlers:
|
||||
if type(h) == types.ClassType:
|
||||
h = h()
|
||||
to_add.append(h)
|
||||
|
||||
for instance in to_add:
|
||||
opener.add_handler(instance)
|
||||
## # yuck
|
||||
## if hasattr(instance, "processor_order"):
|
||||
## opener.add_processor(instance)
|
||||
## else:
|
||||
## opener.add_handler(instance)
|
||||
|
||||
return opener
|
||||
|
||||
|
||||
_opener = None
|
||||
urlopen_lock = _threading.Lock()
|
||||
def urlopen(url, data=None):
|
||||
global _opener
|
||||
if _opener is None:
|
||||
urlopen_lock.acquire()
|
||||
try:
|
||||
if _opener is None:
|
||||
_opener = build_opener()
|
||||
finally:
|
||||
urlopen_lock.release()
|
||||
return _opener.open(url, data)
|
||||
|
||||
def install_opener(opener):
|
||||
global _opener
|
||||
_opener = opener
|
||||
Reference in New Issue
Block a user