TChan/Notebook/2007-5-2
From OpenWetWare
Jump to navigationJump to search
Revision of 'Output Useful Patient Info' code
Input
SAMPLE INPUT: disease name, as string Note: code will handle disease names with spaces, as well as apostrophes (ex. """Hashimoto's Thyroiditis""") """osteosarcoma"""
Code
CODE (revised W 5.2.07 for efficiency): import urllib # Definitions of functions def parse_for_MedStory_genl(search_term): parsed_term = search_term.lower().replace("'", '%27').replace(' ', '+') return "http://www.medstory.com/app?service=external&page=Search&c=true&s=Web&tc=h1&q=%s" % parsed_term def get_MedStory_search_file(search_term, genl_search_file): URL_stream_genl = urllib.urlopen(parse_for_MedStory_genl(search_term)) page = URL_stream_genl.read() URL_stream_genl.close() genl_search_file.write(page) def parse_for_MedStory_genl(search_term): parsed_term = search_term.lower().replace("'", '%27').replace(' ', '+') return "http://www.medstory.com/app?service=external&page=Search&c=true&s=Web&tc=h1&q=%s" % parsed_term def get_MedStory_search_file(search_term, genl_search_file): URL_stream_genl = urllib.urlopen(parse_for_MedStory_genl(search_term)) page = URL_stream_genl.read() URL_stream_genl.close() genl_search_file.write(page) def get_MedStory_data(drug_list, procedure_list, experts_list, clinical_list, search_file): for line in search_file: if line.find('''_Drug">''') != -1: drug_list.append(line[(line.find('''_Drug">''')+7):line.find('''</a>''')]) elif line.find('''_Therapy">''') != -1: procedure_list.append(line[(line.find('''_Therapy">''')+10):line.find('''</a>''')]) elif line.find('''_Person">''') != -1: experts_list.append(line[(line.find('''_Person">''')+9):line.find('''</a>''')]) elif line.find('''_ExperimentalDrug">''') != -1: clinical_list.append(line[(line.find('''_ExperimentalDrug">''')+19):line.find('''</a>''')]) return drug_list, procedure_list, experts_list, clinical_list # Functions that parse the search_term to display relevant URLs def parse_for_MedStory_clinical(search_term): parsed_term = search_term.lower().replace("'", '%27').replace(' ', '+') return "http://www.medstory.com/app?service=external&tc=c1&page=Search&q=%s&s=ClinicalTrial&c=true&i=" % parsed_term def parse_for_eMed(search_term): parsed_term = search_term.lower().replace(' ', '%20') return "http://www.emedicine.com/cgi-bin/foxweb.exe/searchengine@/em/searchengine?boolean=and&book=all&maxhits=40&HiddenURL=&query=%s" % parsed_term def parse_for_Google_genl(search_term): parsed_term = search_term.lower().replace("'", '%27').replace(' ', '+') return "http://www.google.com/search?hl=en&q=%s&btnG=Search" % parsed_term def parse_for_Google_treatment(search_term): parsed_term = search_term.lower().replace("'", '%27').replace(' ', '+') return "http://www.google.com/search?hl=en&q=%s+more:condition_treatment&cx=disease_for_patients&sa=N&oi=cooptsr&resnum=0&ct=col1&cd=1" % parsed_term def parse_for_Wikipedia(search_term): parsed_term = search_term.lower().capitalize().replace("'", '%27').replace(' ', '_') return "http://en.wikipedia.org/wiki/%s" % parsed_term def parse_for_WHO(search_term): parsed_term = search_term.lower().replace("'", '%27').replace(' ', '+') return "http://search.who.int/search?ie=utf8&site=default_collection&client=WHO&proxystylesheet=WHO&output=xml_no_dtd&oe=utf8&q=%s&Search=Search" % parsed_term def parse_for_GeneCards(search_term): parsed_term = search_term.lower().replace(" ", '+') # NB: This only gives a functionally correct search if the search_term is a name of a disease # because there are other formats for different inputs and different forms of the input return "http://www.genecards.org/cgi-bin/cardsearch.pl?search_type=kwd&speed=fast&search=%s#MICROCARDS" % parsed_term def return_site_list_for_disease(search_term): # Currently returns site-name and URL list # ex. [["eMedicine", "http://www.emedicine.com/cgi-bin/foxweb.exe/searchengine@/em/searchengine?boolean=and&book=all&maxhits=40&HiddenURL=&query=parsed-term"]] return [["MedStory, clinical trials", parse_for_MedStory_clinical(search_term)], ["eMedicine", parse_for_eMed(search_term)], ["Google, general search", parse_for_Google_genl(search_term)], ["Google, Treatment search", parse_for_Google_treatment(search_term)], ["Wikipedia", parse_for_Wikipedia(search_term)], ["WHO", parse_for_WHO(search_term)], ["GeneCards", parse_for_GeneCards(search_term)]] # Stuff actually happening in the program search_term = """osteosarcoma""" # sample search_term for now, but will be disease name in complete program search_file_name = "%s_medstory.html" % search_term medstory_file = open(search_file_name, 'w') get_MedStory_search_file(search_term, medstory_file) medstory_file.close() drug_list = [] procedure_list = [] experts_list = [] clinical_list = [] # refers to drugs currently in clinical trials #Build lists of MedStory data search_file = open(search_file_name, 'r') drug_list, procedure_list, experts_list, clinical_list = get_MedStory_data(drug_list, procedure_list, experts_list, clinical_list, search_file) search_file.close() #Build list of relevant URLs final_list = [] final_list = return_site_list_for_disease(search_term) #Print all data print drug_list print procedure_list print experts_list print clinical_list print final_list
Output
SAMPLE OUTPUT: ['Ifex', 'Adriamycin', 'Methotrexate', 'Platinol', 'Neutrexin'] ['Chemotherapy', 'Neoadjuvant Chemo...', 'Adjuvant Chemothe...', 'Radiation Therapy', 'Biopsy'] ['Meyers, Paul A', 'Gorlick, Richard', 'Daw, Najat C', 'Gearen, Peter F', 'Geoerger, Birgit'] ['Slit Cisplatin', 'Ap23573', 'Trabectedin', 'VP-16', 'Dasatinib'] [['MedStory, clinical trials', 'http://www.medstory.com/app?service=external&tc=c1&page=Search&q=osteosarcoma&s=ClinicalTrial&c=true&i='], ['eMedicine', 'http://www.emedicine.com/cgi-bin/foxweb.exe/searchengine@/em/searchengine?boolean=and&book=all&maxhits=40&HiddenURL=&query=osteosarcoma'], ['Google, general search', 'http://www.google.com/search?hl=en&q=osteosarcoma&btnG=Search'], ['Google, Treatment search', 'http://www.google.com/search?hl=en&q=osteosarcoma+more:condition_treatment&cx=disease_for_patients&sa=N&oi=cooptsr&resnum=0&ct=col1&cd=1'], ['Wikipedia', 'http://en.wikipedia.org/wiki/Osteosarcoma'], ['WHO', 'http://search.who.int/search?ie=utf8&site=default_collection&client=WHO&proxystylesheet=WHO&output=xml_no_dtd&oe=utf8&q=osteosarcoma&Search=Search'], ['GeneCards', 'http://www.genecards.org/cgi-bin/cardsearch.pl?search_type=kwd&speed=fast&search=osteosarcoma#MICROCARDS']]