TChan/Notebook/2007-4-15

Revised Plan

 * Search eMedicine with given search-term
 * Parse HTML returned
 * Output Treatment section of the HTML

Code So Far
import sys import string import urllib


 * 1) INPUT: search_term = disease name which we will search for

def parse_search_term(search_term): search_term_sub = '' letter_sub = '' for letter in search_term: if letter == ' ': letter_sub = '%20' else: letter_sub = letter search_term_sub += letter_sub return search_term_sub
 * 1)    Makes sure that the search_term passed will be able to be tagged
 * 2)    onto the end of the eMedicine search-URL.

def get_eMed_search_page(word): url = "http://www.emedicine.com/cgi-bin/foxweb.exe/searchengine@/em/searchengine?boolean=and&book=all&maxhits=40&HiddenURL=&query=%s" % word fo = urllib.urlopen(url) page = fo.read return page

def find_article_title_in_search_page(page, search_term_line): line = page.readline search_term_line = '' while line != '': line = page.readline if line.startswith('    \t ') == True: search_term_line = line if search_term_line != '': article_title = search_term_line[211:search_term_line.find('</a')] return article_title else: return '' def return_treatment_page_or_search_page(article_title, search_term_line, search_page_check): if article_title == '': return '' elif search_term.lower == article_title.lower: treatment_url = search_term_line[60:search_term_line.find('" title="')] return treatment_url else: search_page_check = True return "http://www.emedicine.com/cgi-bin/foxweb.exe/searchengine@/em/searchengine?boolean=and&book=all&maxhits=40&HiddenURL=&query=%s" % parse_search_term(search_term), False

def return_URL_of_interest(url_of_interest): if url_of_interest == '': print 'No results were found.' return None elif search_page_check == True: print "Your search has found multiple entries." return url_of_interest else: print "Your search has found an exact match." return url_of_interest

search_term = 'tay sachs' search_term_line = '' f = open('emed_trial.txt', 'w') f.write(get_eMed_search_page(parse_search_term(search_term))) f.close

f = open('emed_trial.txt', 'r') search_page_check = False article_title = find_article_title_in_search_page(f, search_term_line) print article_title url_of_interest = return_treatment_page_or_search_page(article_title, search_term_line, search_page_check) print url_of_interest return_URL_of_interest(url_of_interest) f.close