import sys, os, string, re, md5, time, datetime, calendar, traceback
from httplib import HTTPConnection
from urlparse import urlparse, urljoin
import urllib
from HTMLParser import HTMLParser
from django.conf import settings
from django.template import Library, Node, TokenParser
register = Library()
#-----------------------------------------------------------------------------
def get_headers(fn):
hfn = '.%s_headers' % (os.path.basename(fn),)
if os.path.exists(hfn):
return eval(open(hfn).read())
else:
return None
class HTTPClient:
def __init__(self):
# refresh documents that were downloaded more than a day ago
self._refetch = 24*60*60
# if a forced 'reload' is requested, don't impose a minimum interval
# of time before sending the same request again
self._limit = 0
def retrieve(self, url, fn, reload=False):
hfn = '.%s_headers' % (os.path.basename(fn),)
hfn = os.path.join(os.path.dirname(fn), hfn)
old_headers = {}
last_read = 0
last_modified = 0
if os.path.exists(hfn):
old_headers = eval(open(hfn).read())
last_read = os.stat(hfn).st_mtime
stale = self._refetch
if reload: stale = self._limit
last_modified = None
if time.time() < last_read + stale:
status = old_headers.get('status', None)
reason = old_headers.get('reason', None)
if status >= 400:
raise IOError('failed: %d %s' % (status, reason))
return old_headers
last_modified = old_headers.get('last-modified', None)
for iter in range(6): # maximum number of redirects
parsed = list(urlparse(url))
(protocol, location, path, parameters, query, fragment) = parsed
h = HTTPConnection(location)
requrl = ''.join((path, parameters, query, fragment))
request_headers = {}
request_headers['User-Agent'] = 'http://www.allyourpixel.com'
if last_modified and os.path.exists(fn):
request_headers['If-Modified-Since'] = last_modified
for u in (url, requrl):
h.request('GET', u, None, request_headers)
r = h.getresponse()
data = r.read()
headers = {}
headers['status'] = status = r.status
headers['reason'] = reason = r.reason
for key, value in r.getheaders():
headers[key.lower()] = value
if status != 404: break
write_data = True
open(hfn, 'w').write('%r\n' % (headers,))
if status in (301,302):
url = headers['location']
continue
if status == 304:
new_headers = old_headers
new_headers.update(headers)
write_data = False
if status >= 400:
raise IOError('failed: %d %s' % (status, reason))
if write_data:
open(fn, 'wb').write(data)
return headers
raise IOError('too many redirects')
client = HTTPClient()
try:
client._limit = settings.AVATAR_LIMIT
except AttributeError:
pass
try:
client._refetch = settings.AVATAR_REFETCH
except AttributeError:
pass
#-----------------------------------------------------------------------------
def gravatar_url(email, **kwargs):
if '@' not in email: return None
m = md5.md5()
email = re.sub('\s', '', email)
m.update(email)
params = []
gid = m.hexdigest()
params.append(('gravatar_id', gid))
if 'rating' not in kwargs:
kwargs['rating'] = 'PG'
for key, value in kwargs.items():
params.append((key, value))
params = '&'.join(['%s=%s' % p for p in params])
url = 'http://www.gravatar.com/avatar.php?%s' % (params,)
return (url, gid)
def getElementByTagName(node, name):
elements = node.getElementsByTagName(name)
return elements[0]
def findInList(pairs, k):
for key, value in pairs:
if key == k:
return value
raise KeyError('%r not found' % (k,))
class IconParser(HTMLParser):
def handle_starttag(self, tag, attrs):
if tag == 'link':
try:
rel = findInList(attrs, 'rel').strip().lower()
if rel.find('icon') >= 0:
self.icon = findInList(attrs, 'href')
except:
pass
def findIconLink(data):
try:
parser = IconParser()
parser.feed(data)
parser.close()
return parser.icon
except:
return None
def favicon_url(url, **kwargs):
storage = storage_dir()
urlfn = make_filename(url)
urlpath = os.path.join(storage, urlfn)
favicon = None
try:
doc_headers = client.retrieve(url, urlpath)
doc_type = doc_headers.get('content-type', '')
if doc_type.lower().strip() == 'text/html':
favicon = findIconLink(open(urlpath).read())
furl = urljoin(url, favicon)
except:
favicon = None
if favicon is None:
parsed = list(urlparse(url))
(protocol, location, path, parameters, query, fragment) = parsed
furl = '%s://%s/favicon.ico' % (protocol, location)
return furl
def wget(url):
"""
for testing http client, in general. retrieves a document
and returns the http response headers.
"""
fn = make_filename(url)
return client.retrieve(url, fn)
def view_gravatar(email, **kwargs):
"""
for testing gravatars from the command line. retrieves the
gravatar icon and returns the http response headers.
"""
return client.retrieve(*gravatar_url(email, **kwargs))
fs_okay_chars = string.letters+string.digits+'_@.'
def fs_escape_char(ch):
if ch in fs_okay_chars: return ch
return '-%02x' % (ord(ch),)
def fs_escape(fn):
'''
look for "unusual" characters in the string, and turn them into
sequences that a filesystem probably won't mind as much
'''
return ''.join([fs_escape_char(ch) for ch in fn])
def make_filename(*args, **kwargs):
'turn the arguments into a mostly-unique string'
data = ['%s' % (a,) for a in args]
data += ['%s_%s' % a for a in kwargs.items()]
fn = '_'.join(data)
return fs_escape(fn)
def storage_dir():
storage = settings.AVATAR_STORAGE_DIR
if not os.path.exists(storage):
os.makedirs(storage)
os.chmod(storage, 0777) # world-writable
return storage
def download_gravatar(email, reload=0, **kwargs):
if '@' not in email:
# not an email address. fail.
raise ValueError('invalid email address')
storage = storage_dir()
content_type = None
try:
url, gid = gravatar_url(email, **kwargs)
fn = make_filename(gid, **kwargs)
path = os.path.join(storage, fn)
headers = client.retrieve(url, path, reload=reload)
content_type = headers.get('content-type', None)
except:
if path is None or not os.path.exists(path):
raise
return (path, content_type)
def download_favatar(url, reload=0, **kwargs):
url = url.strip()
if not url:
raise ValueError('no url')
storage = storage_dir()
furl = favicon_url(url, **kwargs)
fn = make_filename(furl)
path = os.path.join(storage, fn)
content_type = None
try:
headers = client.retrieve(furl, path, reload=reload)
content_type = headers.get('content-type', None)
except:
#traceback.print_exc()
if not os.path.exists(path):
raise
return (path, content_type)
def real(fn):
return os.path.normpath(os.path.normcase(os.path.realpath(fn)))
def realdir(fn):
fn = real(fn)
if fn[-1] != os.path.sep: fn += os.sep
return fn
sdirs = []
for (dn, url) in settings.STATIC_DIRS:
try:
if url[-1] != '/':
url += '/'
sdirs.append((realdir(dn), url))
except:
traceback.print_exc()
def file_url(fn):
global sdirs
fn = real(fn)
for (dn, url) in sdirs:
if fn[:len(dn)] == dn:
if url is None:
return 'file://' + urllib.quote(fn)
else:
fn = fn[len(dn):]
while len(fn) and fn[0] == '/':
fn = fn[1:]
return url+urllib.quote(fn)
def avatar_img_tag(fn, size=None):
src = file_url(fn)
params = {
'class': 'avatar',
'src': src
}
if size:
params['width'] = params['height'] = size
return '
' % ' '.join(['%s="%s"' % pair for pair in params.items()])
@register.filter
def gravatar(email, size=None):
try:
fn, content_type = download_gravatar(email, size=size)
except:
try:
fn = settings.AVATAR_DEFAULT_IMAGE
except:
return ''
return avatar_img_tag(fn)
@register.filter
def favicon(url, size=None):
try:
fn, content_type = download_favatar(url)
except:
try:
fn = settings.AVATAR_DEFAULT_IMAGE
except:
return ''
return avatar_img_tag(fn)
class AvatarNode(Node):
def __init__(self, obj, size):
self.obj = obj
self.size = size
def __repr__(self):
return "" % \
(self.obj, self.size)
def render(self, context):
request = context['request']
pragma = request.META.get('HTTP_PRAGMA', '').lower().strip()
reload = pragma == 'no-cache'
if type(self.size) in (int, long):
size = self.size
else:
size = self.size.resolve(context)
obj = context[self.obj]
try:
fn, content_type = download_gravatar(obj.email, reload=reload, size=size)
except:
#traceback.print_exc()
try:
fn, content_type = download_favatar(obj.url, reload=reload)
except:
fn = None
if content_type:
major, minor = content_type.lower().strip().split('/')
if major != 'image':
fn = None
if fn is None:
try:
fn = settings.AVATAR_DEFAULT_IMAGE
except:
return ''
return avatar_img_tag(fn, size=size)
@register.tag(name="avatar")
def do_avatar(parser, token):
class AvatarParser(TokenParser):
def top(self):
obj = self.tag()
size = self.value()
return (obj, size)
(obj, size) = AvatarParser(token.contents).top()
if size.isdigit(): size = int(size)
else: size = parser.compile_filter(size)
return AvatarNode(obj, size)
def parse_http_date(d, local=0):
'''
according to http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3:
HTTP applications have historically allowed three different
formats for the representation of date/time stamps:
Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format
this method parse all three.
'''
formats = (
'%a, %d %b %Y %H:%M:%S %Z', # Sun, 06 Nov 1994 08:49:37 GMT
'%A, %d-%b-%y %H:%M:%S %Z', # Sunday, 06-Nov-94 08:49:37 GMT
'%a %b %d %H:%M:%S %Y', # Sun Nov 6 08:49:37 1994
)
for fmt in formats:
try:
t = time.strptime(d, fmt)
if local:
t = calendar.timegm(t)
else:
t = time.mktime(t)
return datetime.datetime.fromtimestamp(t)
except ValueError:
pass
raise ValueError('failed to parse date %r' % (d,))
def test_http_date():
date_strings = (
'Sun, 06 Nov 1994 08:49:37 GMT', # RFC 822, updated by RFC 1123
'Sunday, 06-Nov-94 08:49:37 GMT', # RFC 850, obsoleted by RFC 1036
'Sun Nov 6 08:49:37 1994', # ANSI C asctime() format
)
d = datetime.datetime(1994, 11, 6, 8, 49, 37)
for ds in date_strings:
hd = parse_http_date(ds)
if d != hd:
raise Exception('failed: %r -> %r' % (ds, hd))
if __name__ == '__main__':
prog = sys.argv.pop(0)
routine = sys.argv.pop(0)
meth = eval(routine)
result = apply(meth, sys.argv)
print result