TChan/Notebook/2007-4-15
From OpenWetWare
Jump to navigationJump to search
Revised Plan
- Search eMedicine with given search-term
- Parse HTML returned
- Output Treatment section of the HTML
Code So Far
import sys
import string
import urllib
# INPUT: search_term = disease name which we will search for
def parse_search_term(search_term):
## Makes sure that the search_term passed will be able to be tagged
## onto the end of the eMedicine search-URL.
search_term_sub = ''
letter_sub = ''
for letter in search_term:
if letter == ' ':
letter_sub = '%20'
else:
letter_sub = letter
search_term_sub += letter_sub
return search_term_sub
def get_eMed_search_page(word):
url = "http://www.emedicine.com/cgi-bin/foxweb.exe/searchengine@/em/searchengine?boolean=and&book=all&maxhits=40&HiddenURL=&query=%s" % word
fo = urllib.urlopen(url)
page = fo.read()
return page
def find_article_title_in_search_page(page, search_term_line):
line = page.readline()
search_term_line = ''
while line != '':
line = page.readline()
if line.startswith(' <table class="smalltext"><tr>\t<td width="325">') == True:
search_term_line = line
if search_term_line != '':
article_title = search_term_line[211:search_term_line.find('</a')]
return article_title
else:
return ''
def return_treatment_page_or_search_page(article_title, search_term_line, search_page_check):
if article_title == '':
return ''
elif search_term.lower() == article_title.lower():
treatment_url = search_term_line[60:search_term_line.find('" title="')]
return treatment_url
else:
search_page_check = True
return "http://www.emedicine.com/cgi-bin/foxweb.exe/searchengine@/em/searchengine?boolean=and&book=all&maxhits=40&HiddenURL=&query=%s" % parse_search_term(search_term), False
def return_URL_of_interest(url_of_interest):
if url_of_interest == '':
print 'No results were found.'
return None
elif search_page_check == True:
print "Your search has found multiple entries."
return url_of_interest
else:
print "Your search has found an exact match."
return url_of_interest
search_term = 'tay sachs'
search_term_line = ''
f = open('emed_trial.txt', 'w')
f.write(get_eMed_search_page(parse_search_term(search_term)))
f.close()
f = open('emed_trial.txt', 'r')
search_page_check = False
article_title = find_article_title_in_search_page(f, search_term_line)
print article_title
url_of_interest = return_treatment_page_or_search_page(article_title, search_term_line, search_page_check)
print url_of_interest
return_URL_of_interest(url_of_interest)
f.close()