import urllib import string query_string = 'Cottonwood+arizona+geology+kids' page_URL = 'http://google.yahoo.com/bin/query?p=' + query_string # page_URL = 'http://google.yahoo.com/bin/query?p=cottonwood+az&hc=1&hs=9' try: search_page = urllib.urlopen(page_URL) except IOError: print 'Error: ' + page_URL end page_list = search_page.readlines() search_page.close() flag = 0 count = 0 link_number_line = '' link_number_string = '' links = open('c:\\links.txt','r+') links2 = open('c:\\links2.txt','r+') total_links = 0 def write_links(page_list): refference = '' ln = '' flag = 0 for ln in page_list: if string.find(ln,'Web Page Matches') <> -1: flag =1 if flag == 1: if string.find(ln,'a href="') <> -1: refference = ln[string.find(ln,'a href="') + 8: string.rfind(ln,'"')] if string.find(refference,'srd.yahoo.com') <> -1: links.write(refference[string.find(refference,'*')+1:] + '\n') for ln in page_list: if string.find(ln,'Web Page Matches') <> -1: flag =1 count = 1 if flag == 1 and count <> 1: link_number_line = ln if flag == 1 and count == 2: flag = 0 count = 0 count = count +1 for ch in link_number_line: if ch in string.digits: link_number_string = link_number_string + ch total_links = int(link_number_string) for c in range(1,total_links,20): page_URL = 'http://google.yahoo.com/bin/query?p=' + query_string + '&b=' + str(c) try: search_page = urllib.urlopen(page_URL) except IOError: print 'Error: ' + page_URL page_list = search_page.readlines() search_page.close() write_links(page_list) links.seek(0) page_URL = links.readline() while page_URL <> '': print try: search_page = urllib.urlopen(page_URL) except IOError: print 'Error: ' + page_URL else: Page_text = search_page.read() number_query = string.count(Page_text,query_string[:string.find(query_string,'+')]) if number_query == 1: links2.write(Page_text[string.find(Page_text,query_string[:string.find(query_string,'+')]) - 10:string.find(Page_text,query_string[:string.find(query_string,'+')]) + 10]) elif number_query > 1: for i in range(number_query): links2.write(Page_text[string.find(Page_text,query_string[:string.find(query_string,'+')]) - number_query * 10:string.find(Page_text,query_string[:string.find(query_string,'+')]) + number_query * 10]) page_URL = links.readline() links.close()