#!/usr/bin/env python import sys import cgi import cgitb import urllib import string from xml.dom import minidom, Node cgitb.enable() def parse(url): result = urllib.urlopen(url) if result is not None: str = result.read() if (str.startswith('<')): return minidom.parseString(str) else: raise ValueError(str) def showNode(node): if node.nodeType == Node.ELEMENT_NODE: print 'Element name: %s' % node.nodeName for (name, value) in node.attributes.items(): print ' Attr -- Name: %s Value: %s' % (name, value) if node.attributes.get('ID') is not None: print ' ID: %s' % node.attributes.get('ID').value def getText(nodelist): rc = "" for node in nodelist: if node.nodeType == node.TEXT_NODE: rc = rc + node.data return rc # http://lh5.ggpht.com/_Ny16LsAeZGk/Sz-CHos_cLI/AAAAAAAABnw/zQcD6apDKMs/dsc_04141.JPG # to # http://lh5.ggpht.com/_Ny16LsAeZGk/Sz-CHos_cLI/AAAAAAAABnw/zQcD6apDKMs/s800/dsc_04141.JPG def to800(url): x = url.split('/') fn = x[-1] x[-1] = 's800' x.append(fn) return string.join(x, '/') def fn(url): x = url.split('/') return x[-1] def main(): if len(sys.argv) > 1: feed = sys.argv[1] else: form = cgi.FieldStorage() feed = form["f"].value doc = parse(feed) node = doc.documentElement print "Content-type: text/xml" print for i in node.getElementsByTagName("channel")[0].getElementsByTagName("item"): caption = None for j in i.childNodes: if j.nodeName == "title": caption = getText(j.childNodes) mg = i.getElementsByTagName("media:group")[0] mc = i.getElementsByTagName("media:content")[0] url = mc.attributes["url"].value if (caption != fn(url)): url = to800(url) nella = 'http://nella.org/jra/geek/captionate/caption.cgi?' #q = { "i": url, "c": caption.encode('latin_1') } q = { "i": url, "c": caption } url = nella + urllib.urlencode(q) mc.attributes["url"].value = url.encode('utf-8') doc.writexml(sys.stdout, encoding = 'utf-8') if __name__ == '__main__': main()