TChan/Notebook/2007-4-24
From OpenWetWare
Jump to navigationJump to search
Continuing Goals
- XML-parse MedStory's "Clinical Trials" feed
- UPDATE: No need, since Deniz's code already does this for the "News" feed, and the only thing we'd need to change would be the URL the XML comes from.
- Reorganize and document the Project page
- Should wait till 4.26 to talk to class and ask about necessary details, and to make sure no one else is doing this/something like it with documentation
Update
- Reviewed code to figure out EOF error; found missing end-bracket, fixed
Working Code
import urllib # Definitions of functions def parse_for_MedStory_genl(search_term): parsed_term = search_term.lower().replace("'", '%27').replace(' ', '+') return "http://www.medstory.com/app?service=external&page=Search&c=true&s=Web&tc=h1&q=%s" % parsed_term def get_MedStory_search_file(search_term, genl_search_file): URL_stream_genl = urllib.urlopen(parse_for_MedStory_genl(search_term)) page = URL_stream_genl.read() URL_stream_genl.close() genl_search_file.write(page) def get_drug_names(drug_list, search_file_name): search_file = open(search_file_name, 'r') for line in search_file: if line.find('''_Drug">''') != -1: drug_list.append(line[(line.find('''_Drug">''')+7):line.find('''</a>''')]) search_file.close() return drug_list def get_procedure_names(procedure_list, search_file_name): search_file = open(search_file_name, 'r') for line in search_file: if line.find('''_Therapy">''') != -1: procedure_list.append(line[(line.find('''_Therapy">''')+10):line.find('''</a>''')]) search_file.close() return procedure_list def get_experts_names(experts_list, search_file_name): search_file = open(search_file_name, 'r') for line in search_file: if line.find('''_Person">''') != -1: experts_list.append(line[(line.find('''_Person">''')+9):line.find('''</a>''')]) search_file.close() return experts_list def get_clinical_names(clinical_list, search_file_name): search_file = open(search_file_name, 'r') for line in search_file: if line.find('''_ExperimentalDrug">''') != -1: clinical_list.append(line[(line.find('''_ExperimentalDrug">''')+19):line.find('''</a>''')]) search_file.close() return clinical_list # Parsing functions to display relevant URLs def parse_for_MedStory_clinical(search_term): parsed_term = search_term.lower().replace("'", '%27').replace(' ', '+') return "http://www.medstory.com/app?service=external&tc=c1&page=Search&q=%s&s=ClinicalTrial&c=true&i=" % parsed_term def parse_for_eMed(search_term): parsed_term = search_term.lower().replace(' ', '%20') return "http://www.emedicine.com/cgi-bin/foxweb.exe/searchengine@/em/searchengine?boolean=and&book=all&maxhits=40&HiddenURL=&query=%s" % parsed_term def parse_for_Google_genl(search_term): parsed_term = search_term.lower().replace("'", '%27').replace(' ', '+') return "http://www.google.com/search?hl=en&q=%s&btnG=Search" % parsed_term def parse_for_Google_treatment(search_term): parsed_term = search_term.lower().replace("'", '%27').replace(' ', '+') return "http://www.google.com/search?hl=en&q=%s+more:condition_treatment&cx=disease_for_patients&sa=N&oi=cooptsr&resnum=0&ct=col1&cd=1" % parsed_term def parse_for_Wikipedia(search_term): parsed_term = search_term.lower().capitalize().replace("'", '%27').replace(' ', '_') return "http://en.wikipedia.org/wiki/%s" % parsed_term def parse_for_WHO(search_term): parsed_term = search_term.lower().replace("'", '%27').replace(' ', '+') return "http://search.who.int/search?ie=utf8&site=default_collection&client=WHO&proxystylesheet=WHO&output=xml_no_dtd&oe=utf8&q=%s&Search=Search" % parsed_term def parse_for_GeneCards(search_term): parsed_term = search_term.lower().replace(" ", '+') # NB: This only gives a functionally correct search if the search_term is a name of a disease # because there are other formats for different inputs and different forms of the input return "http://www.genecards.org/cgi-bin/cardsearch.pl?search_type=kwd&speed=fast&search=%s#MICROCARDS" % parsed_term def return_site_list_for_disease(search_term): # Currently returns site-name and URL list # ex. [["eMedicine", "http://www.emedicine.com/cgi-bin/foxweb.exe/searchengine@/em/searchengine?boolean=and&book=all&maxhits=40&HiddenURL=&query=parsed-term"]] return [["MedStory, clinical trials", parse_for_MedStory_clinical(search_term)], ["eMedicine", parse_for_eMed(search_term)], ["Google, general search", parse_for_Google_genl(search_term)], ["Google, Treatment search", parse_for_Google_treatment(search_term)], ["Wikipedia", parse_for_Wikipedia(search_term)], ["WHO", parse_for_WHO(search_term)], ["GeneCards", parse_for_GeneCards(search_term)]] # Stuff actually happening in the program search_term = """Hashimoto's Thyroiditis""" # example search_term for now; will be returned by rest of program when finished search_file_name = "%s_medstory.html" % search_term q = open(search_file_name, 'w') get_MedStory_search_file(search_term, q) q.close() drug_list = [] procedure_list = [] experts_list = [] clinical_list = [] # refers to drugs currently in clinical trials get_drug_names(drug_list, search_file_name) get_procedure_names(procedure_list, search_file_name) get_experts_names(experts_list, search_file_name) get_clinical_names(clinical_list, search_file_name) print drug_list print procedure_list print experts_list print clinical_list final_list = [] final_list = return_site_list_for_disease(search_term) print final_list