TChan/Notebook/2007-4-15

From OpenWetWare
Jump to navigationJump to search

Revised Plan

  • Search eMedicine with given search-term
  • Parse HTML returned
  • Output Treatment section of the HTML


Code So Far

import sys
import string
import urllib


# INPUT: search_term = disease name which we will search for

def parse_search_term(search_term):
##    Makes sure that the search_term passed will be able to be tagged
##    onto the end of the eMedicine search-URL.
    search_term_sub = ''
    letter_sub = ''
    for letter in search_term:
        if letter == ' ':
            letter_sub = '%20'
        else:
            letter_sub = letter
        search_term_sub += letter_sub
    return search_term_sub


def get_eMed_search_page(word):
    url = "http://www.emedicine.com/cgi-bin/foxweb.exe/searchengine@/em/searchengine?boolean=and&book=all&maxhits=40&HiddenURL=&query=%s" % word
    fo = urllib.urlopen(url)
    page = fo.read()
    return page

def find_article_title_in_search_page(page, search_term_line):
    line = page.readline()
    search_term_line = ''
    while line != '':
        line = page.readline()
        if line.startswith('     <table class="smalltext"><tr>\t<td width="325">') == True:
            search_term_line = line
    if search_term_line != '':
        article_title = search_term_line[211:search_term_line.find('</a')]
        return article_title
    else:
        return ''
    
def return_treatment_page_or_search_page(article_title, search_term_line, search_page_check):
    if article_title == '':
        return ''
    elif search_term.lower() == article_title.lower():
        treatment_url = search_term_line[60:search_term_line.find('" title="')]
        return treatment_url
    else:
        search_page_check = True
        return "http://www.emedicine.com/cgi-bin/foxweb.exe/searchengine@/em/searchengine?boolean=and&book=all&maxhits=40&HiddenURL=&query=%s" % parse_search_term(search_term), False

def return_URL_of_interest(url_of_interest):
    if url_of_interest == '':
        print 'No results were found.'
        return None
    elif search_page_check == True:
        print "Your search has found multiple entries."
        return url_of_interest
    else:
        print "Your search has found an exact match."
        return url_of_interest
    

search_term = 'tay sachs'
search_term_line = ''
f = open('emed_trial.txt', 'w')
f.write(get_eMed_search_page(parse_search_term(search_term)))
f.close()

f = open('emed_trial.txt', 'r')
search_page_check = False
article_title = find_article_title_in_search_page(f, search_term_line)
print article_title
url_of_interest = return_treatment_page_or_search_page(article_title, search_term_line, search_page_check)
print url_of_interest
return_URL_of_interest(url_of_interest)
f.close()