Here’s the script I wrote to scrape links to the sound files of my radio programmes and add them to this blog.
#!/usr/bin/env python # some modules we will need import re, urllib, wordpresslib, time # the blog in question blogaddr = "http://it.gen.nz/" # the page with the sound file links on it linkspage = "http://radionz.co.nz/national/programmes/ninetonoon" # look on RNZ site for linsk to my sound files. Kep checking until they are up links =  while len(links)<2: page = urllib.urlopen(linkspage).read() # use re (Regex) module to find links to sound files with "New_Tech" in their names links = re.findall(r'"http\S*?echnology\S*?"',page) # line added Feb 09 to weed out any other links in the file which are not to sound files links = [l for l in links if l[-5:-1] in [".ogg",".mp3"]] # if we haven't found the links they aren't up yet. Wait a minute and try again if len(links)<2: time.sleep(60) # there should be two links - Ogg then MP3 - assemble these into an # HTML fragment to be inserted into the blog linktext = ' <a href='+links+'>ogg</a> or <a href='+links+'>mp3</a>' # Blog processing - set up wordpresslib blog client object blog = wordpresslib.WordPressClient(blogaddr+"/xmlrpc.php","colin",PASSWORD) blog.selectBlog(0) # now get the most recent post post = blog.getLastPost() # and check that it has a 'download the audio' bit, but no links yet frags = re.split(r'download the audio',post.description) if len(frags)>1: # graft in the HTML fragment we created post.description = frags+'download the audio as' + linktext + "." # post it back to the blog blog.editPost(post.id,post,1)