import urllib2
import string
import htmllib
import formatter
file_for_parse = urllib2.urlopen('http://www.wx.com/miniradar.cfm?zip=86326')
class tomParser(htmllib.HTMLParser):
def __init__(self, formatter):
htmllib.HTMLParser.__init__(self, formatter)
# flag to determine if we are in an anchor tag
self.in_anchor = 0
self.headflag = 0
self.bodyflag = 0
self.titleflag = 0
self.link_list = []
def start_head(self, attrs):
self.headflag = 1
def end_head(self):
self.headflag = 0
def start_body(self, attrs):
self.bodyflag = 1
#self.link_list attrs
def end_body(self):
self.bodyflag = 0
def start_title(self, attrs):
self.titleflag = 1
def end_title(self):
self.titleflag = 0
#def start_(self, attrs):
#def end_(self, attrs):
def start_a(self, attrs):
# """Signal when we get to an tag.
# """
self.in_anchor = 1
#print 'Anchor: ',attrs
def end_a(self):
# """Signal when we are out of the anchor -- a tag"""
self.in_anchor = 0
def start_img(self, attrs):
#print attrs
pass
def handle_data(self, text):
#print self.headflag,self.bodyflag,self.titleflag
"""This is called everytime we get to text data (ie. not tags) """
#if self.in_anchor:
# print text
#if self.headflag:
# print text
if self.bodyflag:
print string.strip(text)
if len(string.strip(text)) == 1:
print ord(string.strip(text))
if self.titleflag:
print 'Title: ' , text
page_format = formatter.NullFormatter()
page_parser = tomParser(page_format)
tom = file_for_parse.read()
print tom
#zapage_parser.feed(tom)
file_for_parse.close