import urllib2 import string import htmllib import formatter file_for_parse = urllib2.urlopen('http://www.wx.com/miniradar.cfm?zip=86326') class tomParser(htmllib.HTMLParser): def __init__(self, formatter): htmllib.HTMLParser.__init__(self, formatter) # flag to determine if we are in an anchor tag self.in_anchor = 0 self.headflag = 0 self.bodyflag = 0 self.titleflag = 0 self.link_list = [] def start_head(self, attrs): self.headflag = 1 def end_head(self): self.headflag = 0 def start_body(self, attrs): self.bodyflag = 1 #self.link_list attrs def end_body(self): self.bodyflag = 0 def start_title(self, attrs): self.titleflag = 1 def end_title(self): self.titleflag = 0 #def start_(self, attrs): #def end_(self, attrs): def start_a(self, attrs): # """Signal when we get to an tag. # """ self.in_anchor = 1 #print 'Anchor: ',attrs def end_a(self): # """Signal when we are out of the anchor -- a tag""" self.in_anchor = 0 def start_img(self, attrs): #print attrs pass def handle_data(self, text): #print self.headflag,self.bodyflag,self.titleflag """This is called everytime we get to text data (ie. not tags) """ #if self.in_anchor: # print text #if self.headflag: # print text if self.bodyflag: print string.strip(text) if len(string.strip(text)) == 1: print ord(string.strip(text)) if self.titleflag: print 'Title: ' , text page_format = formatter.NullFormatter() page_parser = tomParser(page_format) tom = file_for_parse.read() print tom #zapage_parser.feed(tom) file_for_parse.close