TChan/Notebook/2007-5-2
From OpenWetWare
Jump to navigationJump to search
Revision of 'Output Useful Patient Info' code
Input
SAMPLE INPUT: disease name, as string Note: code will handle disease names with spaces, as well as apostrophes (ex. """Hashimoto's Thyroiditis""") """osteosarcoma"""
Code
CODE (revised W 5.2.07 for efficiency):
import urllib
# Definitions of functions
def parse_for_MedStory_genl(search_term):
parsed_term = search_term.lower().replace("'", '%27').replace(' ', '+')
return "http://www.medstory.com/app?service=external&page=Search&c=true&s=Web&tc=h1&q=%s" % parsed_term
def get_MedStory_search_file(search_term, genl_search_file):
URL_stream_genl = urllib.urlopen(parse_for_MedStory_genl(search_term))
page = URL_stream_genl.read()
URL_stream_genl.close()
genl_search_file.write(page)
def parse_for_MedStory_genl(search_term):
parsed_term = search_term.lower().replace("'", '%27').replace(' ', '+')
return "http://www.medstory.com/app?service=external&page=Search&c=true&s=Web&tc=h1&q=%s" % parsed_term
def get_MedStory_search_file(search_term, genl_search_file):
URL_stream_genl = urllib.urlopen(parse_for_MedStory_genl(search_term))
page = URL_stream_genl.read()
URL_stream_genl.close()
genl_search_file.write(page)
def get_MedStory_data(drug_list, procedure_list, experts_list, clinical_list, search_file):
for line in search_file:
if line.find('''_Drug">''') != -1:
drug_list.append(line[(line.find('''_Drug">''')+7):line.find('''</a>''')])
elif line.find('''_Therapy">''') != -1:
procedure_list.append(line[(line.find('''_Therapy">''')+10):line.find('''</a>''')])
elif line.find('''_Person">''') != -1:
experts_list.append(line[(line.find('''_Person">''')+9):line.find('''</a>''')])
elif line.find('''_ExperimentalDrug">''') != -1:
clinical_list.append(line[(line.find('''_ExperimentalDrug">''')+19):line.find('''</a>''')])
return drug_list, procedure_list, experts_list, clinical_list
# Functions that parse the search_term to display relevant URLs
def parse_for_MedStory_clinical(search_term):
parsed_term = search_term.lower().replace("'", '%27').replace(' ', '+')
return "http://www.medstory.com/app?service=external&tc=c1&page=Search&q=%s&s=ClinicalTrial&c=true&i=" % parsed_term
def parse_for_eMed(search_term):
parsed_term = search_term.lower().replace(' ', '%20')
return "http://www.emedicine.com/cgi-bin/foxweb.exe/searchengine@/em/searchengine?boolean=and&book=all&maxhits=40&HiddenURL=&query=%s" % parsed_term
def parse_for_Google_genl(search_term):
parsed_term = search_term.lower().replace("'", '%27').replace(' ', '+')
return "http://www.google.com/search?hl=en&q=%s&btnG=Search" % parsed_term
def parse_for_Google_treatment(search_term):
parsed_term = search_term.lower().replace("'", '%27').replace(' ', '+')
return "http://www.google.com/search?hl=en&q=%s+more:condition_treatment&cx=disease_for_patients&sa=N&oi=cooptsr&resnum=0&ct=col1&cd=1" % parsed_term
def parse_for_Wikipedia(search_term):
parsed_term = search_term.lower().capitalize().replace("'", '%27').replace(' ', '_')
return "http://en.wikipedia.org/wiki/%s" % parsed_term
def parse_for_WHO(search_term):
parsed_term = search_term.lower().replace("'", '%27').replace(' ', '+')
return "http://search.who.int/search?ie=utf8&site=default_collection&client=WHO&proxystylesheet=WHO&output=xml_no_dtd&oe=utf8&q=%s&Search=Search" % parsed_term
def parse_for_GeneCards(search_term):
parsed_term = search_term.lower().replace(" ", '+')
# NB: This only gives a functionally correct search if the search_term is a name of a disease
# because there are other formats for different inputs and different forms of the input
return "http://www.genecards.org/cgi-bin/cardsearch.pl?search_type=kwd&speed=fast&search=%s#MICROCARDS" % parsed_term
def return_site_list_for_disease(search_term):
# Currently returns site-name and URL list
# ex. [["eMedicine", "http://www.emedicine.com/cgi-bin/foxweb.exe/searchengine@/em/searchengine?boolean=and&book=all&maxhits=40&HiddenURL=&query=parsed-term"]]
return [["MedStory, clinical trials", parse_for_MedStory_clinical(search_term)],
["eMedicine", parse_for_eMed(search_term)],
["Google, general search", parse_for_Google_genl(search_term)],
["Google, Treatment search", parse_for_Google_treatment(search_term)],
["Wikipedia", parse_for_Wikipedia(search_term)],
["WHO", parse_for_WHO(search_term)],
["GeneCards", parse_for_GeneCards(search_term)]]
# Stuff actually happening in the program
search_term = """osteosarcoma""" # sample search_term for now, but will be disease name in complete program
search_file_name = "%s_medstory.html" % search_term
medstory_file = open(search_file_name, 'w')
get_MedStory_search_file(search_term, medstory_file)
medstory_file.close()
drug_list = []
procedure_list = []
experts_list = []
clinical_list = [] # refers to drugs currently in clinical trials
#Build lists of MedStory data
search_file = open(search_file_name, 'r')
drug_list, procedure_list, experts_list, clinical_list = get_MedStory_data(drug_list, procedure_list, experts_list, clinical_list, search_file)
search_file.close()
#Build list of relevant URLs
final_list = []
final_list = return_site_list_for_disease(search_term)
#Print all data
print drug_list
print procedure_list
print experts_list
print clinical_list
print final_list
Output
SAMPLE OUTPUT: ['Ifex', 'Adriamycin', 'Methotrexate', 'Platinol', 'Neutrexin'] ['Chemotherapy', 'Neoadjuvant Chemo...', 'Adjuvant Chemothe...', 'Radiation Therapy', 'Biopsy'] ['Meyers, Paul A', 'Gorlick, Richard', 'Daw, Najat C', 'Gearen, Peter F', 'Geoerger, Birgit'] ['Slit Cisplatin', 'Ap23573', 'Trabectedin', 'VP-16', 'Dasatinib'] [['MedStory, clinical trials', 'http://www.medstory.com/app?service=external&tc=c1&page=Search&q=osteosarcoma&s=ClinicalTrial&c=true&i='], ['eMedicine', 'http://www.emedicine.com/cgi-bin/foxweb.exe/searchengine@/em/searchengine?boolean=and&book=all&maxhits=40&HiddenURL=&query=osteosarcoma'], ['Google, general search', 'http://www.google.com/search?hl=en&q=osteosarcoma&btnG=Search'], ['Google, Treatment search', 'http://www.google.com/search?hl=en&q=osteosarcoma+more:condition_treatment&cx=disease_for_patients&sa=N&oi=cooptsr&resnum=0&ct=col1&cd=1'], ['Wikipedia', 'http://en.wikipedia.org/wiki/Osteosarcoma'], ['WHO', 'http://search.who.int/search?ie=utf8&site=default_collection&client=WHO&proxystylesheet=WHO&output=xml_no_dtd&oe=utf8&q=osteosarcoma&Search=Search'], ['GeneCards', 'http://www.genecards.org/cgi-bin/cardsearch.pl?search_type=kwd&speed=fast&search=osteosarcoma#MICROCARDS']]