TChan/Notebook/2007-4-24

=Continuing Goals=
 * XML-parse MedStory's "Clinical Trials" feed
 * UPDATE: No need, since Deniz's code already does this for the "News" feed, and the only thing we'd need to change would be the URL the XML comes from.


 * Reorganize and document the Project page
 * Should wait till 4.26 to talk to class and ask about necessary details, and to make sure no one else is doing this/something like it with documentation

=Update=
 * Reviewed code to figure out EOF error; found missing end-bracket, fixed

Working Code
import urllib


 * 1) Definitions of functions

def parse_for_MedStory_genl(search_term): parsed_term = search_term.lower.replace("'", '%27').replace(' ', '+') return "http://www.medstory.com/app?service=external&page=Search&c=true&s=Web&tc=h1&q=%s" % parsed_term

def get_MedStory_search_file(search_term, genl_search_file): URL_stream_genl = urllib.urlopen(parse_for_MedStory_genl(search_term)) page = URL_stream_genl.read URL_stream_genl.close genl_search_file.write(page) def get_drug_names(drug_list, search_file_name): search_file = open(search_file_name, 'r') for line in search_file: if line.find(_Drug">) != -1:           drug_list.append(line[(line.find(_Drug">)+7):line.find()]) search_file.close return drug_list

def get_procedure_names(procedure_list, search_file_name): search_file = open(search_file_name, 'r') for line in search_file: if line.find(_Therapy">) != -1:           procedure_list.append(line[(line.find(_Therapy">)+10):line.find()]) search_file.close return procedure_list

def get_experts_names(experts_list, search_file_name): search_file = open(search_file_name, 'r') for line in search_file: if line.find(_Person">) != -1:           experts_list.append(line[(line.find(_Person">)+9):line.find()]) search_file.close return experts_list

def get_clinical_names(clinical_list, search_file_name): search_file = open(search_file_name, 'r') for line in search_file: if line.find(_ExperimentalDrug">) != -1:           clinical_list.append(line[(line.find(_ExperimentalDrug">)+19):line.find()]) search_file.close return clinical_list


 * 1) Parsing functions to display relevant URLs

def parse_for_MedStory_clinical(search_term): parsed_term = search_term.lower.replace("'", '%27').replace(' ', '+') return "http://www.medstory.com/app?service=external&tc=c1&page=Search&q=%s&s=ClinicalTrial&c=true&i=" % parsed_term

def parse_for_eMed(search_term): parsed_term = search_term.lower.replace(' ', '%20') return "http://www.emedicine.com/cgi-bin/foxweb.exe/searchengine@/em/searchengine?boolean=and&book=all&maxhits=40&HiddenURL=&query=%s" % parsed_term

def parse_for_Google_genl(search_term): parsed_term = search_term.lower.replace("'", '%27').replace(' ', '+') return "http://www.google.com/search?hl=en&q=%s&btnG=Search" % parsed_term

def parse_for_Google_treatment(search_term): parsed_term = search_term.lower.replace("'", '%27').replace(' ', '+') return "http://www.google.com/search?hl=en&q=%s+more:condition_treatment&cx=disease_for_patients&sa=N&oi=cooptsr&resnum=0&ct=col1&cd=1" % parsed_term

def parse_for_Wikipedia(search_term): parsed_term = search_term.lower.capitalize.replace("'", '%27').replace(' ', '_') return "http://en.wikipedia.org/wiki/%s" % parsed_term

def parse_for_WHO(search_term): parsed_term = search_term.lower.replace("'", '%27').replace(' ', '+') return "http://search.who.int/search?ie=utf8&site=default_collection&client=WHO&proxystylesheet=WHO&output=xml_no_dtd&oe=utf8&q=%s&Search=Search" % parsed_term

def parse_for_GeneCards(search_term): parsed_term = search_term.lower.replace(" ", '+') # NB: This only gives a functionally correct search if the search_term is a name of a disease # because there are other formats for different inputs and different forms of the input return "http://www.genecards.org/cgi-bin/cardsearch.pl?search_type=kwd&speed=fast&search=%s#MICROCARDS" % parsed_term

def return_site_list_for_disease(search_term): # Currently returns site-name and URL list # ex. "eMedicine", "http://www.emedicine.com/cgi-bin/foxweb.exe/searchengine@/em/searchengine?boolean=and&book=all&maxhits=40&HiddenURL=&query=parsed-term" return "MedStory, clinical trials", parse_for_MedStory_clinical(search_term)],           ["eMedicine", parse_for_eMed(search_term)],            ["Google, general search", parse_for_Google_genl(search_term)],            ["Google, Treatment search", parse_for_Google_treatment(search_term)],            ["Wikipedia", parse_for_Wikipedia(search_term)],            ["WHO", parse_for_WHO(search_term)],            ["GeneCards", parse_for_GeneCards(search_term)


 * 1) Stuff actually happening in the program

search_term = """Hashimoto's Thyroiditis"""    # example search_term for now; will be returned by rest of program when finished search_file_name = "%s_medstory.html" % search_term

q = open(search_file_name, 'w') get_MedStory_search_file(search_term, q) q.close

drug_list = [] procedure_list = [] experts_list = [] clinical_list = []     # refers to drugs currently in clinical trials

get_drug_names(drug_list, search_file_name) get_procedure_names(procedure_list, search_file_name) get_experts_names(experts_list, search_file_name) get_clinical_names(clinical_list, search_file_name)

print drug_list print procedure_list print experts_list print clinical_list

final_list = [] final_list = return_site_list_for_disease(search_term) print final_list