TChan/Notebook/2007-4-23

Goals Completed

 * (Temporary?) URL-parsing to get OUTPUT: MedStory searchpage using INPUT: search_term passed from other parts of the program
 * HTML-parsing:
 * Parse out from MedStory's HTML searchpage return:
 * Drugs
 * Procedures
 * Experts
 * Drugs in Clinical Trials
 * (More) URL-parsing:
 * MedStory's search on Clinical Trials
 * Got code to work for today's new code (ie. everything not commented out below)
 * But when tried to add last week's URL-parsing code (which worked before), got "Token Error: EOF in multi-line statement"
 * What does that mean?

Continuing Goals

 * XML-Parsing
 * Get Clinical Trials in an RSS form out, so that it can be read in an XML reader on the page we present to the patient (? is this the goal ?)

Code
import urllib


 * 1) Definitions of functions

def parse_for_MedStory_genl(search_term): parsed_term = search_term.lower.replace("'", '%27').replace(' ', '+') return "http://www.medstory.com/app?service=external&page=Search&c=true&s=Web&tc=h1&q=%s" % parsed_term

def get_MedStory_search_file(search_term, genl_search_file): URL_stream_genl = urllib.urlopen(parse_for_MedStory_genl(search_term)) page = URL_stream_genl.read URL_stream_genl.close genl_search_file.write(page) def get_drug_names(drug_list, search_file_name): search_file = open(search_file_name, 'r') for line in search_file: if line.find(_Drug">) != -1:           drug_list.append(line[(line.find(_Drug">)+7):line.find()]) search_file.close return drug_list

def get_procedure_names(procedure_list, search_file_name): search_file = open(search_file_name, 'r') for line in search_file: if line.find(_Therapy">) != -1:           procedure_list.append(line[(line.find(_Therapy">)+10):line.find()]) search_file.close return procedure_list

def get_experts_names(experts_list, search_file_name): search_file = open(search_file_name, 'r') for line in search_file: if line.find(_Person">) != -1:           experts_list.append(line[(line.find(_Person">)+9):line.find()]) search_file.close return experts_list

def get_clinical_names(clinical_list, search_file_name): search_file = open(search_file_name, 'r') for line in search_file: if line.find(_ExperimentalDrug">) != -1:           clinical_list.append(line[(line.find(_ExperimentalDrug">)+19):line.find()]) search_file.close return clinical_list


 * 1) Parsing functions to display relevant URLs

def parse_for_MedStory_clinical(search_term): parsed_term = search_term.lower.replace("'", '%27').replace(' ', '+') return "http://www.medstory.com/app?service=external&tc=c1&page=Search&q=%s&s=ClinicalTrial&c=true&i=" % parsed_term


 * 1) def parse_for_eMed(search_term):
 * 2)    parsed_term = search_term.lower.replace(' ', '%20')
 * 3)    return "http://www.emedicine.com/cgi-bin/foxweb.exe/searchengine@/em/searchengine?boolean=and&book=all&maxhits=40&HiddenURL=&query=%s" % parsed_term
 * 4) def parse_for_Google_genl(search_term):
 * 5)    parsed_term = search_term.lower.replace("'", '%27').replace(' ', '+')
 * 6)    return "http://www.google.com/search?hl=en&q=%s&btnG=Search" % parsed_term
 * 7) def parse_for_Google_treatment(search_term):
 * 8)    parsed_term = search_term.lower.replace("'", '%27').replace(' ', '+')
 * 9)    return "http://www.google.com/search?hl=en&q=%s+more:condition_treatment&cx=disease_for_patients&sa=N&oi=cooptsr&resnum=0&ct=col1&cd=1" % parsed_term
 * 10) def parse_for_Wikipedia(search_term):
 * 11)    parsed_term = search_term.lower.capitalize.replace("'", '%27').replace(' ', '_')
 * 12)    return "http://en.wikipedia.org/wiki/%s" % parsed_term
 * 13) def parse_for_WHO(search_term):
 * 14)    parsed_term = search_term.lower.replace("'", '%27').replace(' ', '+')
 * 15)    return "http://search.who.int/search?ie=utf8&site=default_collection&client=WHO&proxystylesheet=WHO&output=xml_no_dtd&oe=utf8&q=%s&Search=Search" % parsed_term
 * 16) def parse_for_GeneCards(search_term):
 * 17)    parsed_term = search_term.lower.replace(" ", '+')
 * 18)    # NB: This only gives a functionally correct search if the search_term is a name of a disease
 * 19)    # because there are other formats for different inputs and different forms of the input
 * 20)    return "http://www.genecards.org/cgi-bin/cardsearch.pl?search_type=kwd&speed=fast&search=%s#MICROCARDS" % parsed_term
 * 21) def return_site_list_for_disease(search_term):
 * 22)    # Currently returns site-name and URL list
 * 23)    # ex. "eMedicine", "http://www.emedicine.com/cgi-bin/foxweb.exe/searchengine@/em/searchengine?boolean=and&book=all&maxhits=40&HiddenURL=&query=parsed-term"
 * 24)    return [["MedStory, clinical trials", parse_for_MedStory_clinical(search_term)],
 * 25)            ["eMedicine", parse_for_eMed(search_term)],
 * 26)            ["Google, general search", parse_for_Google_genl(search_term)],
 * 27)            ["Google, Treatment search", parse_for_Google_treatment(search_term)],
 * 28)            ["Wikipedia", parse_for_Wikipedia(search_term)],
 * 29)            ["WHO", parse_for_WHO(search_term)],
 * 30)            ["GeneCards", parse_for_GeneCards(search_term)]
 * 1)            ["eMedicine", parse_for_eMed(search_term)],
 * 2)            ["Google, general search", parse_for_Google_genl(search_term)],
 * 3)            ["Google, Treatment search", parse_for_Google_treatment(search_term)],
 * 4)            ["Wikipedia", parse_for_Wikipedia(search_term)],
 * 5)            ["WHO", parse_for_WHO(search_term)],
 * 6)            ["GeneCards", parse_for_GeneCards(search_term)]


 * 1) Stuff actually happening in the program

search_term = raw_input(Please enter in the search_term (will be returned in real version): ) search_file_name = "%s_medstory.html" % search_term

q = open(search_file_name, 'w') get_MedStory_search_file(search_term, q) q.close

drug_list = [] procedure_list = [] experts_list = [] clinical_list = []     # refers to drugs currently in clinical trials

get_drug_names(drug_list, search_file_name) get_procedure_names(procedure_list, search_file_name) get_experts_names(experts_list, search_file_name) get_clinical_names(clinical_list, search_file_name)

print drug_list print procedure_list print experts_list print clinical_list

print parse_for_MedStory_clinical(search_term)


 * 1) final_list = return_site_list_for_disease(search_term)
 * 2) print final_list