# Created by Leo from: C:\Python23\Tom\leo\browser3.leo # -*- coding: utf-8 -*- # << parsertest declarations >> from htmllib import HTMLParser import urllib2,sys, formatter import urlparse , StringIO #fakefile = StringIO.StringIO() import wx.lib.colourdb #import pickle #from itertools import cycle as itercycle # -- end -- << parsertest declarations >> # << parsertest methods >> (1 of 3) class FrameParser(HTMLParser): # << class FrameParser methods >> (1 of 4) def __init__(self,formt,web): HTMLParser.__init__(self,formt) self.originalweb = web self.frametext = u'' self.web = web self.maintxt = u'' #self.hiddenid = itercycle(range(10000,15000)) self.framesetrows = [] self.frameonflag =0 self.framesetcols =[] self.framesetborder = u' ' self.framesetborderstyle = 'wx.EXPAND | wx.NORTH | wx.WEST' self.scripttext = u'' self.imagelist = [] self.linklist = [] self.linktextlist = [] self.background = u'' self.backgroundcolor = u'' self.formflag = False self.selectflag = False self.tableflag = 0 self.formattrib = [] self.textarearow = 2 self.textareacol = 2 self.textareanam = u' ' self.nextrow = u' ' self.form_txt_object = u""" """ ##* ##* self.form_frm_object = u""" """ ## ## self.form_opt_object = u""" """ self.form_but_object = u""" """ self.form_sub_object = u""" """ self.form_res_object = u""" """ self.form_hid_object = u""" """ self.form_img_object = u""" """ self.form_fil_object = u""" """ self.form_rad_top_object = u""" """ self.form_chk_top_object = u""" """ self.form_chk_bottom = u""" """ self.selectattrib = [] self.optiondatalist = [] self.checkflag = False self.webformname = u' ' self.webformurl = u' ' self.optionlist = u'' self.colordict = {} tmplst = wx.lib.colourdb.getColourInfoList() for i in tmplst: self.colordict[i[0]] = '#%s%s%s' % ( str(hex(i[1]))[2:].upper(),str(hex(i[2]))[2:].upper(),str(hex(i[3]))[2:].upper()) #self.colordict = pickle.load(file('colors.p')) # << class FrameParser methods >> (2 of 4) def start_frameset(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t newattrs = [] jointxt = u' ' self.frametext = self.frametext + jointxt self.frameonflag = self.frameonflag + 1 # << class FrameParser methods >> (3 of 4) def do_frame(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' self.save_bgn( ) self.frametext = self.frametext + t jointxt = u' ' self.frametext = self.frametext + jointxt # << class FrameParser methods >> (4 of 4) def end_frameset(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' self.frametext = self.frametext + t self.save_bgn( ) self.frametext = self.frametext + u'' jointxt = u'' if self.frameonflag == 1: jointxt = self.form_frm_object % ('100',' ',self.frametext,self.webformname,self.webformurl) self.maintxt = self.maintxt + jointxt self.frameonflag = self.frameonflag - 1 # -- end -- << class FrameParser methods >> # << parsertest methods >> (2 of 3) class PyHTMLParser(HTMLParser): # << class PyHTMLParser methods >> (1 of 109) def __init__(self,formt,web): HTMLParser.__init__(self,formt) self.originalweb = web self.frametext = u'' self.web = web self.maintxt = u'' #self.hiddenid = itercycle(range(10000,15000)) self.framesetrows = [] self.frameonflag =0 self.frametargetalist = [] self.frametargetbase = u'' self.frametargetarealist = [] self.frametargetformlist = [] self.framesetcols =[] self.framesetborder = u' ' self.framesetborderstyle = 'wx.EXPAND | wx.NORTH | wx.WEST' self.scripttext = u'' self.imagelist = [] self.linklist = [] self.linktextlist = [] self.background = u'' self.backgroundcolor = u'' self.formflag = False self.selectflag = False self.tableflag = 0 self.formattrib = [] self.textarearow = 2 self.textareacol = 2 self.textareanam = u' ' self.nextrow = u' ' self.form_txt_object = u""" """ ##* ##* self.form_frm_object = u""" """ ## ## self.form_opt_object = u""" """ self.form_but_object = u""" """ self.form_sub_object = u""" """ self.form_res_object = u""" """ self.form_hid_object = u""" """ self.form_img_object = u""" """ self.form_fil_object = u""" """ self.form_rad_top_object = u""" """ self.form_chk_top_object = u""" """ self.form_chk_bottom = u""" """ self.selectattrib = [] self.optiondatalist = [] self.checkflag = False self.webformname = u' ' self.webformurl = u' ' self.optionlist = u'' self.colordict = {} tmplst = wx.lib.colourdb.getColourInfoList() for i in tmplst: self.colordict[i[0]] = '#%s%s%s' % ( str(hex(i[1]))[2:].upper(),str(hex(i[2]))[2:].upper(),str(hex(i[3]))[2:].upper()) #self.colordict = pickle.load(file('colors.p')) # << class PyHTMLParser methods >> (2 of 109) def handle_comment(self,comment): self.scripttext = self.scripttext + unicode("""<-- """ + comment + """ -->""", 'utf-8','ignore') # << class PyHTMLParser methods >> (3 of 109) def do_base(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (4 of 109) def start_script(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.scripttext = self.scripttext +jointxt # << class PyHTMLParser methods >> (5 of 109) def end_script(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' self.scripttext = self.scripttext +t self.save_bgn( ) self.scripttext = self.scripttext +u'' # << class PyHTMLParser methods >> (6 of 109) def start_html(self,attrs): self.maintxt = self.maintxt +u'' self.save_bgn( ) # << class PyHTMLParser methods >> (7 of 109) def end_html(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' self.maintxt = self.maintxt + t self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (8 of 109) def start_form(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) atrstr = '|' atrlst = [] frmon = False for i in attrs: if 'TARGET' in i[0].upper(): frmon = True for i in attrs: if i[0].upper() == u'TARGET': self.frametargetformlist.append(i[1]) elif i[0].upper() == u'HREF': if frmon == True: self.frametargetformlist.append( urlparse.urljoin(self.web,i[1])) atrlst.append(atrstr.join(i)) if i[0].lower() == 'action': self.webformurl = str(urlparse.urljoin(self.web,i[1])) atrstr = '|' self.webformname = atrstr.join(atrlst) # << class PyHTMLParser methods >> (9 of 109) def end_form(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) self.webformname = '' # << class PyHTMLParser methods >> (10 of 109) def start_select(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn() self.selectflag = True self.selectattrib = attrs # << class PyHTMLParser methods >> (11 of 109) def end_select(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' self.optionlist = self.optionlist + '%s|' %(t) self.save_bgn( ) self.selectflag = False namestr = '' for i in self.selectattrib: if i[0].lower() == 'name': namestr = i[1] else: namestr = ' ' self.selectattrib = [] optdata = '|' optdata = optdata.join(self.optiondatalist) self.maintxt = self.maintxt +self.form_opt_object % (u'50',namestr,u'wx.CB_DROPDOWN', self.optionlist, optdata,self.webformname ,self.webformurl) self.optionlist = ' , ' self.optiondatalist = [] # << class PyHTMLParser methods >> (12 of 109) def do_option(self,value): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False self.save_bgn( ) if not self.selectflag: try: self.optiondatalist.append(value[0][1]) except: pass self.optionlist = self.optionlist + '%s|' %(t) self.selectflag = False # << class PyHTMLParser methods >> (13 of 109) def do_isindex(self,value): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn() webformurl = u' ' for i in value: if i[0].lower() == 'action': webformurl = str(urlparse.urljoin(self.web,i[1])) self.maintxt = self.maintxt +self.form_txt_object % (u'160',u'isindex',u'wx.TE_LEFT','',u'#FFFFFF',self.webformname,webformurl) # << class PyHTMLParser methods >> (14 of 109) def do_input(self,keyworda):#type,value): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' self.maintxt = self.maintxt +t self.save_bgn( ) inptyp = u'' inpsize = u'' inpname = u'' inpvalue = u'' inpaccept = u'' inpmaxlength = 150 inpsrc = u'' inpstyle = u'#FFFFFF' for i in keyworda: if i[0] == 'type': inptyp = str(i[1]) elif i[0] == 'size': inpsize = str(i[1]) elif i[0] == 'name': inpname = str(i[1]) elif i[0] == 'value': inpvalue = str(i[1]) elif i[0] == 'accept': inpaccept = str(i[1]) elif i[0] == 'maxlength': inpmaxlength = str(i[1]) elif i[0] == 'src': inpsrc = str(i[1]) elif i[0] == 'style': try: inpsplit = i[1].split(':')#need to just input style as parim inpstyle = self.colordict[inpsplit[1].upper().strip()] except: inpstyle = u'#FFFFFF' if inptyp == '': inptyp = u' ' if inpname == '': inpname = u' ' if inpsize == '': inpsize = u' ' elif inpsize[-1] == '%': inpsize = inpsize[:-1] if inpvalue == '': inpvalue = u' ' if inpaccept == '': inpaccept = u' ' if inpmaxlength == '': inpmaxlength = u' ' if inptyp == 'text': try: inpsize = str(int(inpsize) * 8) except: inpsize = u'160' self.maintxt = self.maintxt +self.form_txt_object % (inpsize,inpname,u'wx.TE_LEFT',inpvalue,inpstyle,self.webformname,self.webformurl) elif inptyp == 'button': inpsize = inpsize = str(len(inpvalue)*10) self.maintxt = self.maintxt + self.form_but_object % (inpsize,inpname,u'wx.BU_LEFT',inpvalue,inpstyle,self.webformname,self.webformurl) elif inptyp == 'checkbox': inpsize = u'40' self.maintxt = self.maintxt + self.form_chk_top_object % ( inpsize ,inpname,inpstyle,self.webformname,self.webformurl) self.checkflag = True elif inptyp == 'radio': inpsize = u'40' self.maintxt = self.maintxt + self.form_rad_top_object % ( inpsize , inpname ,inpstyle,self.webformname,self.webformurl) self.checkflag = True elif inptyp == 'file': if inpsize[-1] == '%': pass else: try: inpsize = str(int(inpsize) * 8) except: inpsize = u'160' self.maintxt = self.maintxt + self.form_fil_object %(inpsize ,inpname,u"wx.BU_LEFT",inpvalue,inpstyle,self.webformname,self.webformurl) elif inptyp == 'image': jointxt = '' if inpsize[-1] == '%': pass else: inpsize = str(len(inpvalue)*8) self.maintxt = self.maintxt +self.form_img_object % ( inpsize ,inpname ,u"wx.BU_LEFT",urlparse.urljoin(self.web,inpsrc) ,inpstyle,self.webformname , self.webformurl) elif inptyp == 'password': try: inpsize = str(int(inpsize) * 8) except: inpsize = u'160' self.maintxt = self.maintxt +self.form_txt_object % (inpsize,inpname,u'wx.TE_PASSWORD',inpvalue,inpstyle,self.webformname,self.webformurl) elif inptyp == 'submit': inpsize = str(len(inpvalue)*8+10) self.maintxt = self.maintxt +self.form_sub_object % (inpsize,inpname,u'wx.BU_LEFT',inpvalue,inpstyle,self.webformname,self.webformurl) elif inptyp == 'reset': inpsize = u'40' self.maintxt = self.maintxt +self.form_res_object % (inpsize,inpname,u'wx.BU_LEFT',u"Reset",inpstyle,self.webformname,self.webformurl) elif inptyp == 'hidden': atrstr = u'|' newstr = u'' atrlst = [] for i in keyworda: newstr = atrstr.join(i) atrlst.append(newstr) atrstr = u'|' hiddenattrs = atrstr.join(atrlst) self.maintxt = self.maintxt + self.form_hid_object %(hiddenattrs, self.webformname , self.webformurl) else: try: inpsize = str(int(inpsize) * 8) except: inpsize = u'160' self.maintxt = self.maintxt +self.form_txt_object % (inpsize,inpname,u'wx.TE_LEFT',inpvalue,inpstyle,self.webformname,self.webformurl) # << class PyHTMLParser methods >> (15 of 109) def start_textarea(self,value): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn() webformurl = u' ' for i in value: if i[0].upper() == 'ROWS': try: self.textarearow = int(i[1]) except: self.textarearow = 2 if i[0].upper() == 'COLS': try: self.textareacol = int(i[1]) except: self.textareacol = 2 if i[0].upper() == 'NAME': try: self.textareanam = i[1] except: self.textareanam = u' ' # << class PyHTMLParser methods >> (16 of 109) def end_textarea(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' self.save_bgn() inpsize = u'160' inpsize = str(self.textareacol * 8) inpvalue = u' ' if len(t)/self.textareacol >self.textarearow: inpvalue = t else: inpvalue = t for i in range(self.textarearow-(len(t)/self.textareacol)): inpvalue = inpvalue + '\n' self.maintxt = self.maintxt +self.form_txt_object % (inpsize,self.textareanam,u'wx.TE_MULTILINE',inpvalue,u'#FFFFFF',self.webformname,self.webformurl) # << class PyHTMLParser methods >> (17 of 109) def do_br(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt =u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (18 of 109) def start_tr(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (19 of 109) def end_tr(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (20 of 109) def start_a(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (21 of 109) def end_a(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.linktextlist.append(t) self.save_bgn( ) self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (22 of 109) def start_address(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (23 of 109) def end_address(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (24 of 109) def start_area(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: jointxt = self.form_chk_bottom % (t,u'wx.CHK_2STATE') #jointxt = jointxt + """ ' #jointxt = jointxt + self.topwinp4txt + """ "wx.CHK_2STATE">""" + self.bottomwin self.maintxt = self.maintxt +jointxt self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (25 of 109) def end_area(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (26 of 109) def start_b(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (27 of 109) def end_b(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (28 of 109) def start_big(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (29 of 109) def end_big(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (30 of 109) def start_blockquote(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (31 of 109) def end_blockquote(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (32 of 109) def start_body(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (33 of 109) def end_body(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (34 of 109) def start_center(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (35 of 109) def end_center(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (36 of 109) def start_cite(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (37 of 109) def end_cite(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (38 of 109) def start_code(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (39 of 109) def end_code(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (40 of 109) def start_dd(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (41 of 109) def end_dd(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (42 of 109) def start_div(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (43 of 109) def end_div(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (44 of 109) def start_dl(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (45 of 109) def end_dl(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (46 of 109) def start_dt(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (47 of 109) def end_dt(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (48 of 109) def start_em(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (49 of 109) def end_em(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (50 of 109) def start_frameset(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t newattrs = [] jointxt = u' ' self.frametext = self.frametext + jointxt self.frameonflag = self.frameonflag + 1 # << class PyHTMLParser methods >> (51 of 109) def do_frame(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' self.save_bgn( ) self.frametext = self.frametext + t jointxt = u' ' self.frametext = self.frametext + jointxt # << class PyHTMLParser methods >> (52 of 109) def end_frameset(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' self.frametext = self.frametext + t self.save_bgn( ) self.frametext = self.frametext + u'' jointxt = u'' if self.frameonflag == 1: breaks = '
' jointxt = self.form_frm_object % ('100','800',' ',self.frametext,self.webformname,self.webformurl) + breaks self.maintxt = self.maintxt + jointxt self.frameonflag = self.frameonflag - 1 # << class PyHTMLParser methods >> (53 of 109) def start_iframe(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) newattrs = [] framewidth = u'' frameheight = u'' frameborder = u'' for i in attrs: if i[0].upper == 'HEIGHT': try: if i[1].strp()[-1] == '%': frameheight = i[1][:-1] else: frameheight = str(int((float( i[1].strip())/600)*100)) except: frameheight = '40' if i[0].upper == 'WIDTH': try: if i[1].strp()[-1] == '%': framewidth = i[1][:-1] else: framewidth = str(int((float( i[1].strip())/600)*100)) except: framewidth = '40' if i[0].upper() == 'FRAMEBORDER': frameborder = i[1] if i[0].upper() == 'SRC': src = urlparse.urljoin(self.originalweb,i[1]) frametext = u' '% (framewidth,frameheight,frameborder,src) jointxt = self.form_frm_object % ('72','300',' ',frametext,self.webformname,self.webformurl) self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (54 of 109) def end_iframe(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) # << class PyHTMLParser methods >> (55 of 109) def start_h1(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (56 of 109) def end_h1(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (57 of 109) def start_h2(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (58 of 109) def end_h2(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (59 of 109) def start_h3(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (60 of 109) def end_h3(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (61 of 109) def start_h4(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (62 of 109) def end_h4(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (63 of 109) def start_h5(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (64 of 109) def end_h5(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (65 of 109) def start_h6(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (66 of 109) def end_h6(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (67 of 109) def start_i(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (68 of 109) def end_i(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (69 of 109) def start_hr(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (70 of 109) def start_p(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (71 of 109) def start_kbd(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (72 of 109) def end_kbd(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (73 of 109) def start_li(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (74 of 109) def end_li(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt + u'' # << class PyHTMLParser methods >> (75 of 109) def start_ol(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (76 of 109) def end_ol(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (77 of 109) def start_pre(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (78 of 109) def end_pre(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt + u'' # << class PyHTMLParser methods >> (79 of 109) def start_samp(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (80 of 109) def end_samp(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (81 of 109) def start_small(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (82 of 109) def end_small(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt + u'' # << class PyHTMLParser methods >> (83 of 109) def start_strike(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (84 of 109) def end_strike(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (85 of 109) def start_strong(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (86 of 109) def end_strong(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (87 of 109) def start_title(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (88 of 109) def end_title(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (89 of 109) def start_u(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (90 of 109) def end_u(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (91 of 109) def start_tt(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (92 of 109) def end_tt(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (93 of 109) def start_ul(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (94 of 109) def end_ul(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (95 of 109) def start_map(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (96 of 109) def end_map(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (97 of 109) def start_meta(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' self.scripttext = self.scripttext + t self.save_bgn( ) jointxt = u' ' self.scripttext = self.scripttext +jointxt # << class PyHTMLParser methods >> (98 of 109) def end_meta(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' self.scripttext = self.scripttext + t self.save_bgn( ) self.scripttext = self.scripttext +u'' # << class PyHTMLParser methods >> (99 of 109) def start_tr(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) if self.tableflag > 0: jointxt = u' ' self.maintxt = self.maintxt + jointxt # << class PyHTMLParser methods >> (100 of 109) def end_td(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) if self.tableflag > 0: self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (101 of 109) def start_td(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) if self.tableflag > 0: jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (102 of 109) def end_td(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) if self.tableflag > 0: self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (103 of 109) def start_th(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (104 of 109) def end_th(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt +u'' # << class PyHTMLParser methods >> (105 of 109) def do_img(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt +self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (106 of 109) def start_font(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt # << class PyHTMLParser methods >> (107 of 109) def end_font(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) self.maintxt = self.maintxt + u'' # << class PyHTMLParser methods >> (108 of 109) def start_table(self,attrs): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' if self.checkflag: self.maintxt = self.maintxt + self.form_chk_bottom % (t,u'wx.CHK_2STATE') self.checkflag = False else: self.maintxt = self.maintxt +t self.save_bgn( ) jointxt = u' ' self.maintxt = self.maintxt +jointxt self.tableflag = self.tableflag + 1 # << class PyHTMLParser methods >> (109 of 109) def end_table(self): try: t = unicode(self.save_end(), 'utf-8','ignore') except AttributeError: t = u'' self.maintxt = self.maintxt + t self.save_bgn( ) self.maintxt = self.maintxt + u'' self.tableflag = self.tableflag - 1 # -- end -- << class PyHTMLParser methods >> # << parsertest methods >> (3 of 3) class TomFilter(formatter.NullWriter): # << class TomFilter methods >> (1 of 3) def __init__(self): writrtom = formatter.NullWriter() self.formt = formatter.AbstractFormatter(writrtom) self.scripttext = '' self.imagelist = [] self.linklist = [] self.noframe = '' self.pretext = '' self.linktextlist = [] self.background = '' self.backgroundcolor = '' self.frametargetalist = [] self.frametargetbase = u'' self.frametargetarealist = [] self.frametargetformlist = [] # << class TomFilter methods >> (2 of 3) def findform(self,tomtxt): findnum = 0 newnum = 0 newlist = [] while newnum !=-1: newnum = tomtxt.find('',newnum)+7],[newnum, tomtxt.find('',newnum)+7]]) findnum = newnum + 7 return newlist # << class TomFilter methods >> (3 of 3) def formfilter(self,html_text,url): #safelist = ('A','ADDRESS','AREA', 'B','BIG','BLOCKQUOTE','BODY','BR' ,'CENTER','CITE','CODE','DD','DIV', 'DL','DT','EM','FONT' ,'HR', 'H1','H2','H3','H4','H5','H6','I','IMG','KBD','LI','MAP','META','OL','P','PRE','SAMP','SMALL','STRIKE','STRONG','TABLE','TD','TH','TITLE','TR','TT','U','UL','FORM','OPTION','INPUT','SELECT','HTML') if html_text.upper().find('')+11] tomtxt = html_text[:html_text.upper().find('')+11:] else: tomtxt = html_text tom = PyHTMLParser(self.formt,url) cstr = '' tom.feed(tomtxt) self.imagelist = tom.imagelist self.linklist = tom.linklist self.scripttext = tom.scripttext self.linktextlist = tom.linktextlist self.background = tom.background self.backgroundcolor = tom.backgroundcolor self.frametargetalist = tom.frametargetalist self.frametargetbase = tom.frametargetbase self.frametargetarealist = tom.frametargetarealist self.frametargetformlist = tom.frametargetformlist newstr = tom.maintxt tom.reset() return newstr # -- end -- << class TomFilter methods >> # -- end -- << parsertest methods >> #web = 'http://www.yahoo.com #web = 'http://www.mountaindragon.com/html/button.htm' #tom = file('c:\\RBRanch\\out\\rbranch_strangness4.htm') #tom = urllib2.urlopen(web) #tomlines = tom.readlines() #tfle = file('c:\\python23\\yaho.html','w+') #tfle.write(tomtxt) #tfle.close #tfle.close()