A day in the life.
With much help from neysx (and plenty of arguing for colors!) I have finally gotten the glep page re-arranged. It still needs a bit more work, maybe some last modified dates on some of those low numbered ‘drafts’
I am also pulling the failure report into mysql. The records look like (cpv,date,filename,eid) where eid is an int representing the fetch error in a separate table. My Script is below (database info scrubbed, obviously).
import sys
import urllib2
import MySQLdb
import time
from xml.dom.minidom import parse
def usage():
pass
def main():
if len(sys.argv) < 2:
usage()
return False
url = sys.argv[1]
tuples = BuildTuples(url, debug=False)
conn = MySQLdb.Connection('mysql.scriptkitty.com','wrongusername','youhavewonafreeipod')
if not conn:
return False
conn.select_db('yourmissionisafailure')
for t in tuples:
eid = findErrorId(conn, t[2])
t[2] = eid # replace string error with int for errors table
insertTuple(conn, t)
def insertTuple(conn, t):
"""
Insert a given tuple into the sql database
"""
cursor = conn.cursor()
cursor.execute("insert into failures (cpv, filename, eid, date) VALUES ('%s', '%s', '%d', '%s')" %
(t[0],t[1],t[2],t[3]))
return
def findErrorId(conn, errorMsg):
"""
The errors are in a separate table (so we use a small int ptr instead of a large string.
This functoin attempts to locate the ErrorId of a particular message, if that message isn't
in the db, it's added and then the eid is returned
"""
cursor = conn.cursor()
cursor.execute("select eid from errors where message='%s'" % errorMsg)
row = cursor.fetchone()
if row:
return int(row[0])
else:
cursor.execute("insert into errors (message) VALUES('%s')" % errorMsg)
return conn.insert_id()
def BuildTuples(url, debug):
"""
This function takes a URL and attempts to parse it (see comments).
It returns a list of tuples
If debug=True it will print the tuples as it adds them.
"""
ret = []
try:
conn = urllib2.urlopen(url)
except urllib2.HTTPError:
return [] # Fetch failed
else:
doc = parse(conn)
node = doc.getElementsByTagName('date').item(0)
date = node.childNodes.item(0).nodeValue
if not date:
return [] # Date was None, odd
# Date format is Fri Apr 20 00:41:39 2007
# %a = short day, %b = Short month, %d = day of month, hours, minutes, seconds, year
date = time.strptime(date,"%a %b %d %H:%M:%S %Y")
date = MySQLdb.Timestamp(*date[0:5])
"""
The table we want to parse is in the second section of the guideXML doc
There are three columns:
Basically we find the second
below that, then we figure out which column we are processing.
“”"
second_section = doc.getElementsByTagName(’section’).item(1)
tagNodes = second_section.getElementsByTagName(’ti’)
for x in xrange(tagNodes.length):
item = tagNodes[x].childNodes.item(0).nodeValue
if x % 3 == 1:
filename = item
elif x % 3 == 2: # If this is true, this is the last tuple and we add it to the list
error = item
if debug:
print “cpv: %s, file: %s, error:%s” % (cpv, filename, error)
ret.append([cpv,filename,error,date])
else:
cpv = item
return ret
if __name__ == “__main__”:
sys.exit(not main())
The XML parsing is really quite messy; guideXML isn’t really meant structured data like this; it’s meant for webpages
But I managed dammit! Obviouslly the script has some work left on it, code cleanup, usage(), and wtf is % ( t[0],t[1],t[2],t[3] ); I should be able to use a tuple there but I didn’t care to fix it.
There is probably some sql injection in there too (not sure if execute scrubs stuff or not). I can add some escape bits later.
Peace out homies.

Leave a comment
You must be logged in to post a comment.