TChan/Notebook/2007-4-15
From OpenWetWare
Jump to navigationJump to search
Revised Plan
- Search eMedicine with given search-term
- Parse HTML returned
- Output Treatment section of the HTML
Code So Far
import sys import string import urllib # INPUT: search_term = disease name which we will search for def parse_search_term(search_term): ## Makes sure that the search_term passed will be able to be tagged ## onto the end of the eMedicine search-URL. search_term_sub = '' letter_sub = '' for letter in search_term: if letter == ' ': letter_sub = '%20' else: letter_sub = letter search_term_sub += letter_sub return search_term_sub def get_eMed_search_page(word): url = "http://www.emedicine.com/cgi-bin/foxweb.exe/searchengine@/em/searchengine?boolean=and&book=all&maxhits=40&HiddenURL=&query=%s" % word fo = urllib.urlopen(url) page = fo.read() return page def find_article_title_in_search_page(page, search_term_line): line = page.readline() search_term_line = '' while line != '': line = page.readline() if line.startswith(' <table class="smalltext"><tr>\t<td width="325">') == True: search_term_line = line if search_term_line != '': article_title = search_term_line[211:search_term_line.find('</a')] return article_title else: return '' def return_treatment_page_or_search_page(article_title, search_term_line, search_page_check): if article_title == '': return '' elif search_term.lower() == article_title.lower(): treatment_url = search_term_line[60:search_term_line.find('" title="')] return treatment_url else: search_page_check = True return "http://www.emedicine.com/cgi-bin/foxweb.exe/searchengine@/em/searchengine?boolean=and&book=all&maxhits=40&HiddenURL=&query=%s" % parse_search_term(search_term), False def return_URL_of_interest(url_of_interest): if url_of_interest == '': print 'No results were found.' return None elif search_page_check == True: print "Your search has found multiple entries." return url_of_interest else: print "Your search has found an exact match." return url_of_interest search_term = 'tay sachs' search_term_line = '' f = open('emed_trial.txt', 'w') f.write(get_eMed_search_page(parse_search_term(search_term))) f.close() f = open('emed_trial.txt', 'r') search_page_check = False article_title = find_article_title_in_search_page(f, search_term_line) print article_title url_of_interest = return_treatment_page_or_search_page(article_title, search_term_line, search_page_check) print url_of_interest return_URL_of_interest(url_of_interest) f.close()