Wikiomics:Ensembl local install: Difference between revisions
Darek Kedra (talk | contribs) (→Step 7) |
Darek Kedra (talk | contribs) |
||
(8 intermediate revisions by the same user not shown) | |||
Line 130: | Line 130: | ||
<pre> | <pre> | ||
export PATH=/home/ensembl/local/bin/:$PATH | |||
cd mod_perl-2.0.4 | cd mod_perl-2.0.4 | ||
perl Makefile.PL MP_APXS=/home/ensembl/local/apache2/bin/apxs | perl Makefile.PL MP_APXS=/home/ensembl/local/apache2/bin/apxs | ||
Line 156: | Line 157: | ||
===CPAN Shell=== | ===CPAN Shell=== | ||
Easy things first: | Easy things first: | ||
#editing modules 10/06/02 dk, bad formating | |||
<pre> | <pre> | ||
Line 164: | Line 167: | ||
#<snip> | #<snip> | ||
#commit: wrote '/home/ensembl/local/lib/perl5/5.12.0/CPAN/Config.pm' | #commit: wrote '/home/ensembl/local/lib/perl5/5.12.0/CPAN/Config.pm' | ||
install Cache::Memcached | install Cache::Memcached | ||
# Cache-Memcached-1.28.tar.gz | # Cache-Memcached-1.28.tar.gz | ||
Line 172: | Line 176: | ||
install CGI::Session | install CGI::Session | ||
# CGI-Session-4.42.tar.gz | # CGI-Session-4.42.tar.gz | ||
install Class::Accessor | install Class::Accessor | ||
# Class::Accessor is up to date (0.34). // checked after installation of all modules | # Class::Accessor is up to date (0.34). // checked after installation of all modules | ||
Line 178: | Line 183: | ||
install Class::Std | install Class::Std | ||
# Class-Std-0.011.tar.gz | # Class-Std-0.011.tar.gz | ||
install Class::Std::Utils | install Class::Std::Utils | ||
# Class-Std-Utils-v0.0.3.tar.gz | # Class-Std-Utils-v0.0.3.tar.gz | ||
install Compress::Zlib | install Compress::Zlib | ||
# Compress::Zlib is up to date (2.027). | # Compress::Zlib is up to date (2.027). | ||
install Compress::Raw::Zlib | install Compress::Raw::Zlib | ||
# Compress-Raw-Zlib-2.027.tar.gz | # Compress-Raw-Zlib-2.027.tar.gz | ||
install | install Compress::Bzip2 | ||
# Compress-Bzip2-2.09.tar.gz | |||
# | |||
install Devel::StackTrace | install Devel::StackTrace | ||
# Devel-StackTrace-1.22.tar.gz | # Devel-StackTrace-1.22.tar.gz | ||
install Data::UUID | install Data::UUID | ||
#Data-UUID-1.203.tar.gz | #Data-UUID-1.203.tar.gz | ||
Update 2010-05-28: Data-UUID-1.215.tar.gz | |||
install Digest::MD5 | install Digest::MD5 | ||
#Digest::MD5 is up to date (2.39). | #Digest::MD5 is up to date (2.39). | ||
install Exception::Class | install Exception::Class | ||
# Exception-Class-1.30.tar.gz | # Exception-Class-1.30.tar.gz | ||
install File::Temp | |||
# File::Temp is up to date (0.22) | |||
install Hash::Merge | install Hash::Merge | ||
# Hash-Merge-0.12.tar.gz | # Hash-Merge-0.12.tar.gz | ||
install Storable | install Storable | ||
#Storable is up to date (2.22). | #Storable is up to date (2.22). | ||
install PDF::API2 | install PDF::API2 | ||
# PDF-API2-0.73.tar.gz | # PDF-API2-0.73.tar.gz | ||
Line 220: | Line 218: | ||
install OLE::Storage_Lite | install OLE::Storage_Lite | ||
# OLE::Storage_Lite is up to date (0.19) | # OLE::Storage_Lite is up to date (0.19) | ||
install Mail::Mailer | install Mail::Mailer | ||
# MailTools-2.06.tar.gz | # MailTools-2.06.tar.gz | ||
Line 232: | Line 228: | ||
install Image::Size | install Image::Size | ||
# Image-Size-3.221.tar.gz | # Image-Size-3.221.tar.gz | ||
install List::MoreUtils | |||
#List-MoreUtils-0.22.tar.gz | |||
install Number::Format | |||
# Number-Format-1.73.tar.gz | |||
install Time::HiRes | |||
# Time-HiRes-1.9721.tar.gz | |||
##### | |||
install SOAP::Lite | |||
# SOAP-Lite-0.711.tar.gz | |||
install XML::Parser | |||
# XML::Parser is up to date (2.36). | |||
install XML::Simple | |||
# XML-Simple-2.18.tar.gz | |||
install Parse::RecDescent | |||
# Parse-RecDescent-1.965001.tar.gz | |||
install HTML::Template | |||
# HTML-Template-2.9.tar.gz | |||
install K/KM/KMACLEOD/libxml-perl-0.08.tar.gz | install K/KM/KMACLEOD/libxml-perl-0.08.tar.gz | ||
# // OK | # // OK | ||
install Log::Log4perl | install Log::Log4perl | ||
# Log-Log4perl-1.28.tar.gz | # Log-Log4perl-1.28.tar.gz | ||
install | ###### | ||
#### | |||
install SHLOMIF/Config-IniFiles-2.57.tar.gz | |||
#// i /Config::Inifiles/ lists 3 modules, this one is the only one which version is close to the one listed on 1000genomes.org web page (see above) | |||
Update 2010-05-28: install SHLOMIF/Config-IniFiles-2.58.tar.gz | |||
install DCONWAY/Parse-RecDescent-1.965001.tar.gz | install DCONWAY/Parse-RecDescent-1.965001.tar.gz | ||
# // closest with ver. number from 1000genomes.org list | # // closest with ver. number from 1000genomes.org list | ||
# // i /Parse::RecDescent/ | |||
install S/SM/SMUELLER/PathTools-3.31.tar.gz | install S/SM/SMUELLER/PathTools-3.31.tar.gz | ||
# // closest with ver. number from 1000genomes.org list | # // closest with ver. number from 1000genomes.org list | ||
# // i /PathTools/ | |||
#### | |||
install Template::Plugin::Number::Format | install Template::Plugin::Number::Format | ||
# Template-Plugin-Number-Format-1.02.tar.gz | # Template-Plugin-Number-Format-1.02.tar.gz | ||
install XML::DOM | install XML::DOM | ||
# XML-DOM-1.44.tar.gz | # XML-DOM-1.44.tar.gz | ||
install XML::RSS | install XML::RSS | ||
# XML-RSS-1.48.tar.gz | # XML-RSS-1.48.tar.gz | ||
install | |||
# | ######### | ||
install DBI | |||
#DBI-1.611.tar.gz | |||
install DB_File | |||
# DB_File is up to date (1.820). // day 2, after installation of other modules | |||
install Class::DBI::Sweet | |||
# Class-DBI-Sweet-0.10.tar.gz #needs few other classes, install last? | |||
install GD | |||
#GD-2.45.tar.gz | |||
</pre> | </pre> | ||
Line 265: | Line 307: | ||
install Sys::Hostname::Long | install Sys::Hostname::Long | ||
# Sys-Hostname-Long-1.4.tar.gz | # Sys-Hostname-Long-1.4.tar.gz | ||
install JSON | install JSON | ||
# JSON-2.21.tar.gz | # JSON-2.21.tar.gz | ||
Line 374: | Line 418: | ||
# <snip> | # <snip> | ||
# Failed 1/33 test programs. 0/833 subtests failed. | # Failed 1/33 test programs. 0/833 subtests failed. | ||
make install | |||
</pre> | </pre> | ||
Line 899: | Line 944: | ||
saccharomyces_cerevisiae_otherfeatures_57_1j/ | saccharomyces_cerevisiae_otherfeatures_57_1j/ | ||
A functional site needs just core database, so lets restrict the ambition to do everything at once in a first go: | |||
Get | Get it keeping dir structure: | ||
<pre> | <pre> | ||
wget -x ftp://ftp.ensembl.org/pub/current_mysql/saccharomyces_cerevisiae_core_57_1j/* | wget -x ftp://ftp.ensembl.org/pub/current_mysql/saccharomyces_cerevisiae_core_57_1j/* | ||
</pre> | </pre> | ||
Line 912: | Line 956: | ||
* check it in the browser | * check it in the browser | ||
While we still have (mostly) working pages, some content simply vanished. Looking at the ./logs/*.error_log gives: "Could not connect to database ensembl_compara_57" | While we still have (mostly) working pages, some content simply vanished. Looking at the ./logs/*.error_log gives: "Could not connect to database ensembl_compara_57" | ||
Instead of download everything from: | |||
ftp://ftp.ensembl.org/pub/current_mysql/ensembl_compara_57/ | ftp://ftp.ensembl.org/pub/current_mysql/ensembl_compara_57/ | ||
and | we cut another corner and resign from accessing COMPARA for time being. | ||
This is configured in MULTI.ini file. To make things more complicated there are several such files in the default install: | |||
<pre> | |||
./public-plugins/admin/conf/ini-files/MULTI.ini | |||
./public-plugins/mart/conf/ini-files/MULTI.ini | |||
./public-plugins/ensembl/conf/ini-files/MULTI.ini | |||
./conf/ini-files/MULTI.ini | |||
</pre> | |||
To pick the right one we have to look at conf/Plugins.pm | |||
Whichever one is on the top (in our case: | |||
'EnsEMBL::Mirror' => $SiteDefs::ENSEMBL_SERVERROOT.'/public-plugins/mirror', | |||
) this one determines the final values. | |||
So because it is safer to leave /public-plugins/ensembl/ unmodified, we create | |||
./public-plugins/mirror/conf/ini-files/MULTI.ini | |||
with just two lines: | |||
<pre> | |||
[databases] | |||
DATABASE_COMPARA = | |||
</pre> | |||
Yes, it is an empty line after "=" sign. | |||
==Step 10== | |||
Important: | |||
Ensembl caches its configuration using pack files. There are two places: | |||
<pre> | |||
conf/config.packed #one file | |||
conf/packed/ #multiple files | |||
</pre> | |||
So whenever we change configuration these files need to be deleted (in ensembl directory): | |||
<pre> | |||
rm -i ./conf/config.packed | |||
rm -i ./conf/packed/* | |||
</pre> | |||
===Adding existing "non-animal" species=== | |||
There is a long list of species fully compatible with default setup of Ensembl 57. You can see them in: | |||
public-plugins/ensembl/conf/SiteDefs.pm | |||
Having a local mirror of these is straightforward (follow procedure for yeast above). | |||
Problems start with non-listed species. I run it just for A.thaliana (expect that for other plants it will be similar) but things may differ for other branches (check the SQL schema for each them). | |||
* Downloading core DB: | |||
<pre> | |||
wget -x ftp://ftp.ensemblgenomes.org/pub/plants/release-4/mysql/arabidopsis_thaliana_core_4_56_9/* | |||
</pre> | |||
* Creating database | |||
Following http://www.ensembl.org/info/docs/webcode/install/ensembl-data.html | |||
Before we start: the crucial number in arabidopsis_thaliana_core_4_56_9 is "56". It means that | |||
the files follow Ensembl_56 schema, and accessing them using Ensembl 57 will give us schema mismatch. | |||
There is a path of changing schema from one Ensembl release to another (see ensembl/sql/ patch files). | |||
So in the end we will try: | |||
* use arabidopsis_thaliana_core_4_56_9 files | |||
* create database and populate it with data | |||
* patch the schema | |||
* rename the database to make clear which schema it follows | |||
<pre> | |||
#unpacking | |||
cd /where/you/downloaded/ftp.ensemblgenomes.org/pub/plants/release-4/mysql/arabidopsis_thaliana_core_4_56_9/ | |||
gunzip *gz | |||
#creating db | |||
mysql -u ensembl -p | |||
create database arabidopsis_thaliana_core_4_56_9; | |||
quit | |||
#creating schema + loading data | |||
mysql -u ensembl -p arabidopsis_thaliana_core_4_56_9 < arabidopsis_thaliana_core_4_56_9.sql | |||
mysqlimport -u ensembl -p arabidopsis_thaliana_core_4_56_9 -L *.txt | |||
#patching schema: watch for all files for a given patch! | |||
# run patches by hand in case of any doubt | |||
cd /home/ensembl/local/ensembl/ | |||
cd ensembl/sql/ | |||
for file in patch_56_57_?.sql; do mysql -u ensembl -p arabidopsis_thaliana_core_4_56_9 < $file; done | |||
#renaming the database | |||
cd /var/lib/mysql | |||
sudo /etc/init.d/mysql stop | |||
sudo mv arabidopsis_thaliana_core_4_56_9 arabidopsis_thaliana_core_4_57_9 | |||
sudo /etc/init.d/mysql start | |||
#checking it | |||
mysql -u ensembl -p | |||
show databases; | |||
use arabidopsis_thaliana_core_4_57_9; | |||
show tables; | |||
</pre> | |||
TBC on 2010-05- | TBC on 2010-05-13 | ||
<--(mopping up / adding other species)--> | <!--(mopping up / adding other species)--> |
Latest revision as of 05:09, 7 June 2010
ENSEMBL Install
At this moment (2010-05-08) this is not a validated procedure guaranteed to give you a fully functional local ENSEMBL mirror / instance. These are observations observations made during an ongoing attempt to get such working installation.
There is another page dealing with errors during ENSEMBL installation (in Japanese) here: [1]
Use translate.google.com / find a speaker.
Hardware
- 4 cores Intel(R) Xeon(R) CPU E5405 @ 2.00GHz
- 16GB RAM
This is not a prerequisite, but compilation times on slower machine will take longer time.
Software
- Fedora 8 64-bit Linux
- gcc 4.1.2
Requirements
- without any databases sources directories < 1 Gb,
- destination directory (apache2, mysql, perl, perl modules, ensembl + bioperl code, etc. ca 600Mb
General remarks
This is an attempt to get a working local ENSEMBL in a step-by-step fashion. Most likely one can get away with a bit smaller number of perl modules, no brand new copy of Perl, using distribution-specific apache2, etc. On the other hand for an ENSEMBL newbie, the safest path is to stay at the beginning as close to the provided configuration as possible. Instead of performing 100 steps, from software components installation, MySQL database creation, data loading and with final editing of ENSEMBL configuration files it may be safer to treat each of the steps separately and use error messages as checkpoints.
Web server installation
ENSEMBL relies on mod_perl.
Perl
http://www.perl.org/get.html got perl-5.12.0
cd perl-5.12.0 cd perl-5.12.0/ CFLAGS='-m64 -mtune=nocona' ./Configure -des -A ccflags=-fPIC -Dprefix=/home/ensembl/local/ make make test make install
The "CFLAGS" line is required on 64-bit Linux system to compile mod_perl. See relevant section below
Apache httpd
http://httpd.apache.org/download.cgi got httpd-2.2.15.tar.gz
installation:
./configure --enable--deflate --enable--headers --enable--expires --prefix=/home/ensembl/local/apache2 make make install
checking what is build in:
/home/ensembl/local/apache2/bin/apachectl -t -D DUMP_MODULES
my output:
core_module (static) authn_file_module (static) authn_default_module (static) authz_host_module (static) authz_groupfile_module (static) authz_user_module (static) authz_default_module (static) auth_basic_module (static) include_module (static) filter_module (static) log_config_module (static) env_module (static) setenvif_module (static) version_module (static) mpm_prefork_module (static) http_module (static) mime_module (static) status_module (static) autoindex_module (static) asis_module (static) cgi_module (static) negotiation_module (static) dir_module (static) actions_module (static) userdir_module (static) alias_module (static) so_module (static) perl_module (shared)
I do not get here modules: deflate, headers, expires
What works (in httpd-2.2.15 directory):
cd ./modules/filters/ /home/ensembl/local/apache2/bin/apxs -c mod_deflate.c cp -i .libs/mod_deflate.so /home/ensembl/local/apache2/modules/ cd ../metadata/ /home/ensembl/local/apache2/bin/apxs -c mod_headers.c cp -i .libs/mod_headers.so /home/ensembl/local/apache2/modules/ /home/ensembl/local/apache2/bin/apxs -c mod_expires.c cp -i .libs/mod_expires.so /home/ensembl/local/apache2/modules/ cd /home/ensembl/local/ensembl/modules ln -s /home/ensembl/local/apache2/modules/*.so .
Bug fix (Ubuntu): mod_deflate.so broken if compiled as above grep "^LDFLAGS" /usr/bin/apr-config if you do not have "LDFLAGS="-lz" edit file, then redo the section devoted to mod_deflate.so. Check that all went OK:
ldd /home/ensembl/local/apache2/modules/mod_deflate.so linux-vdso.so.1 => (0x00007fff233f3000) libz.so.1 => /lib/libz.so.1 (0x00007fca9a3e0000) libc.so.6 => /lib/libc.so.6 (0x00007fca9a071000) /lib64/ld-linux-x86-64.so.2 (0x00007fca9a81e000)
Based on: http://prefetch.net/blog/index.php/2005/12/15/unmangling-apxs-and-mod_deflate/
mod_per 4 apache2.x
http://perl.apache.org/download/index.html got mod_perl-2.0.4
export PATH=/home/ensembl/local/bin/:$PATH cd mod_perl-2.0.4 perl Makefile.PL MP_APXS=/home/ensembl/local/apache2/bin/apxs make make test #t/modules/apache_status.t ............... 1/15 # Failed test 14 in t/modules/apache_status.t at line 47 # Failed test 15 in t/modules/apache_status.t at line 47 fail #2 #t/modules/apache_status.t ............... Failed 2/15 subtests make install
Perl modules required by ENSEMBL
Assumes that you installed Perl in /home/ensembl/local/ and got perl binary in /home/ensembl/local/bin/
Check the list of modules here i.e: http://browser.1000genomes.org/info/docs/webcode/install/non-ensembl-code.html
There are several versions of this list of modules, but ultimately you may be missing several not listed modules, and you will get their names (one by one) after trying to start your ENSEMBL site.
Despite the advise of installing always the newest module versions there is one important exception: LWP. LWP version 5.812 is required by latest (2.57) ParallelUserAgent. This will be covered in a separate section of this page.
Also some modules do not install (at least on my machine) from perls CPAN shell. These may require installation by hand from sources (described later).
CPAN Shell
Easy things first:
- editing modules 10/06/02 dk, bad formating
export PATH=/home/ensembl/local/bin/:$PATH which perl # ~/local/bin/perl perl -MCPAN -e shell #<snip> #commit: wrote '/home/ensembl/local/lib/perl5/5.12.0/CPAN/Config.pm' install Cache::Memcached # Cache-Memcached-1.28.tar.gz install CGI # CGI.pm-3.49.tar.gz install CGI::Ajax # CGI-Ajax-0.707.tar.gz install CGI::Session # CGI-Session-4.42.tar.gz install Class::Accessor # Class::Accessor is up to date (0.34). // checked after installation of all modules install Class::Data::Inheritable # Class::Data::Inheritable is up to date (0.08). // checked after inst. of all modules install Class::Std # Class-Std-0.011.tar.gz install Class::Std::Utils # Class-Std-Utils-v0.0.3.tar.gz install Compress::Zlib # Compress::Zlib is up to date (2.027). install Compress::Raw::Zlib # Compress-Raw-Zlib-2.027.tar.gz install Compress::Bzip2 # Compress-Bzip2-2.09.tar.gz install Devel::StackTrace # Devel-StackTrace-1.22.tar.gz install Data::UUID #Data-UUID-1.203.tar.gz Update 2010-05-28: Data-UUID-1.215.tar.gz install Digest::MD5 #Digest::MD5 is up to date (2.39). install Exception::Class # Exception-Class-1.30.tar.gz install File::Temp # File::Temp is up to date (0.22) install Hash::Merge # Hash-Merge-0.12.tar.gz install Storable #Storable is up to date (2.22). install PDF::API2 # PDF-API2-0.73.tar.gz install Spreadsheet::WriteExcel # Spreadsheet-WriteExcel-2.37.tar.gz install OLE::Storage_Lite # OLE::Storage_Lite is up to date (0.19) install Mail::Mailer # MailTools-2.06.tar.gz install Math::Bezier # Math-Bezier-0.01.tar.gz install IO::String # IO-String-1.08.tar.gz install Image::Size # Image-Size-3.221.tar.gz install List::MoreUtils #List-MoreUtils-0.22.tar.gz install Number::Format # Number-Format-1.73.tar.gz install Time::HiRes # Time-HiRes-1.9721.tar.gz ##### install SOAP::Lite # SOAP-Lite-0.711.tar.gz install XML::Parser # XML::Parser is up to date (2.36). install XML::Simple # XML-Simple-2.18.tar.gz install Parse::RecDescent # Parse-RecDescent-1.965001.tar.gz install HTML::Template # HTML-Template-2.9.tar.gz install K/KM/KMACLEOD/libxml-perl-0.08.tar.gz # // OK install Log::Log4perl # Log-Log4perl-1.28.tar.gz ###### #### install SHLOMIF/Config-IniFiles-2.57.tar.gz #// i /Config::Inifiles/ lists 3 modules, this one is the only one which version is close to the one listed on 1000genomes.org web page (see above) Update 2010-05-28: install SHLOMIF/Config-IniFiles-2.58.tar.gz install DCONWAY/Parse-RecDescent-1.965001.tar.gz # // closest with ver. number from 1000genomes.org list # // i /Parse::RecDescent/ install S/SM/SMUELLER/PathTools-3.31.tar.gz # // closest with ver. number from 1000genomes.org list # // i /PathTools/ #### install Template::Plugin::Number::Format # Template-Plugin-Number-Format-1.02.tar.gz install XML::DOM # XML-DOM-1.44.tar.gz install XML::RSS # XML-RSS-1.48.tar.gz ######### install DBI #DBI-1.611.tar.gz install DB_File # DB_File is up to date (1.820). // day 2, after installation of other modules install Class::DBI::Sweet # Class-DBI-Sweet-0.10.tar.gz #needs few other classes, install last? install GD #GD-2.45.tar.gz
These modules were missing when starting httpd with ENSEMBL's httpd.conf:
install BSD::Resource # BSD-Resource-1.2904.tar.gz install Sys::Hostname::Long # Sys-Hostname-Long-1.4.tar.gz install JSON # JSON-2.21.tar.gz
From sources
- DBD::mysql
Problem: requires MySQL (at least mysql client) on the same machine
- install MySQL
mysql-5.1.46
./configure --prefix=/home/ensembl/local/ make make check make test
this is a long compile even on a fast machine.
- DBD::mysql itself
cd DBD-mysql-4.014 perl Makefile.PL --with-mysql=/home/ensembl/local/mysql/ make make install
Not tested: for proper testing you have to have an access to a running MySQL instance
- Bio::Das::Lite
===> problem in CPAN shell: install Bio::Das::Lite
depends on WWW::Curl , reports SZBALINT/WWW-Curl-4.11.tar.gz Running make install
make test had returned bad status, won't install without force
going for source install
- WWW::Curl
got: WWW-Curl-4.11
cd WWW-Curl-4.11/ perl Makefile.PL PREFIX=/home/ensembl/local/ make make test #Failed 1/20 test programs. 3/177 subtests failed. #make: *** [test_dynamic] Error 255 make install
- Bio::Das::Lite
cd Bio-Das-Lite-2.03/ perl Makefile.PL PREFIX=/home/ensembl/local/ make make test #Failed 18/22 test programs. 0/1 subtests failed. make install
This one does not look good. A number of errors like "Bareword "CURLOPT_PROXYUSERNAME" not allowed while "strict subs" in use at lib/Bio/Das/Lite.pm line 861.". No solution at this point.
- Template::Toolkit
Used by BioMart got Template-Toolkit-2.22
cd Template-Toolkit-2.22 perl Makefile.PL PREFIX=/home/ensembl/local/ make make test #All tests successful. make install
ParallelUserAgent
The most current version (2.57) has not been updated in years, and requires old (5.812) wersion of LWN.
- git (in case you do not have it)
http://git-scm.com/ got git-1.7.1
./configure --prefix=/home/ensembl/local/ make make test make install
Two steps below are kind contribution of Dr K.B from England. (I will give her full credits once I get her permission to do so).
- libwww ver. 5.812
mkdir libwww_old cd libwww_old git clone git://gitorious.org/libwww-perl/mainline.git cd mainline git checkout R5.812 perl Makefile.PL PREFIX=/home/ensembl/local/ make make test #local/httpsub.t (Wstat: 512 Tests: 0 Failed: 0) # Non-zero exit status: 2 # Parse errors: No plan found in TAP output # <snip> # Failed 1/33 test programs. 0/833 subtests failed. make install
- ParallelUserAgent (itself)
export PERL5LIB=/home/ensembl/local/lib/perl5/ wget http://search.cpan.org/CPAN/authors/id/M/MA/MARCLANG/ParallelUserAgent-2.57.tar.gz tar xfvz ParallelUserAgent-2.57.tar.gz cd ParallelUserAgent-2.57 perl Makefile.PL PREFIX=/home/ensembl/local/ make make test # All tests successful. make install
Summary of this stage
Most of the components install OK to this point. Broken stuff:
- Bio::Das::Lite (possibly older version of Perl may have been more tolerant?)
ENSEMBL code
prepare the directory
cd /home/ensembl/local/ mkdir ensembl_2010.05.08 ln -s ensembl_2010.05.08 ensembl cd ensembl
API + web-code
in: /home/ensembl/local/ensembl
cvs passwd: CVSUSER
cvs -d :pserver:cvsuser@cvs.sanger.ac.uk:/cvsroot/ensembl login cvs -d :pserver:cvsuser@cvs.sanger.ac.uk:/cvsroot/ensembl co -r branch-ensembl-57 ensembl-api ensembl-website
BioMart
in: /home/ensembl/local/ensembl cvs passwd: CVSUSER
cvs -d :pserver:cvsuser@cvs.sanger.ac.uk:/cvsroot/biomart login cvs -d :pserver:cvsuser@cvs.sanger.ac.uk:/cvsroot/biomart co -r release-0_6 biomart-perl
BioPerl
The required version for parsing BLAST results in ENSEMBL is BioPerl 1.2.3. According to some users posting at dev-ensembl mailing list, you may try to go with newest BioPerl. BLAST searches will not work, but ensembl-api should be OK. Since I am not sure what will give us better overall functionality, we can fork here and get both branches for future testing.
UNRESOLVED 2010-05-08: "normal" installation of BioPerl requires building it (perl Makefile.PL etc.). So far none of the pages describing ENSEMBL install mentions this step. For time being I stop at unpacking BioPerl.
- BioPerl 1.2.3
CVS repository does not exist anymore, we have to get the whole thing from: http://search.cpan.org/~birney/bioperl-1.2.3/
wget http://search.cpan.org/CPAN/authors/id/B/BI/BIRNEY/bioperl-1.2.3.tar.gz tar xfvz bioperl-1.2.3.tar.gz mv bioperl-1.2.3/ /home/ensembl/local/ensembl_2010.05.08 cd /home/ensembl/local/ensembl_2010.05.08 ln -s bioperl-1.2.3 bioperl-live/
- Latest bioperl-live
Requires that you have subvertion installed (not covered here in case you do not) in: some place you unpack sources
svn co svn://code.open-bio.org/bioperl/bioperl-live/trunk bioperl-live mv bioperl-live bioperl-live.2010.05.08 mv bioperl-live.2010.05.08 /home/ensembl/local/ensembl_2010.05.08/ cd /home/ensembl/local/ensembl_2010.05.08 ln -s ./bioperl-live.2010.05.08 ./bioperl-live
This is where we should have (almost, see below ) all what is needed for running the site.
ENSEMBL configuration
I change the configuration in an incremental fashion, starting with the small changes.
Step 1
- create default config files:
cd /home/ensembl/local/ensembl/conf/ cp Plugins.pm-dist Plugins.pm cd /home/ensembl/local/ensembl/public-plugins/mirror/conf/ cp SiteDefs.pm-dist SiteDefs.pm
- Edit: /home/ensembl/local/ensembl/conf/httpd.conf
change:
<Perl> #!/usr/local/bin/perl -w
to:
<Perl> #!/home/ensembl/local/bin/perl -w
- loadable apache modules
in /home/ensembl/local/ensembl/conf/httpd.conf there are few modules required. To check this, simply run:
/home/ensembl/local/apache2/bin/httpd -d /home/ensembl/local/ensembl/
If you got any complains about mod_perl, mod_deflate, mod_headers, mod_expires, one possibility is to link these from our apache2/modules to ensembl/modules directory:
cd /home/ensembl/local/ensembl/modules/ ln -s /home/ensembl/local/apache2/modules/*.so .
when starting "/home/ensembl/local/apache2/bin/httpd -d /home/ensembl/local/ensembl/" you should have:
Starting up Ensembl server on host: myhost.mydomain.org defined(%hash) is deprecated at /home/ensembl /local/lib/perl5/site_perl/5.12.0/LWP/Parallel/UserAgent.pm line 1327. (Maybe you should just omit the defined()?) defined(%hash) is deprecated at /home/ensembl/local/lib/perl5/site_perl/5.12.0/SOAP/Lite.pm line 465. (Maybe you should just omit the defined()?) defined(%hash) is deprecated at /home/ensembl/local/lib/perl5/site_perl/5.12.0/SOAP/Lite.pm line 2203. (Maybe you should just omit the defined()?) ------------------------------------------------------------------------------ - INFO : 0.000 : 0.000 : Parser >> Starting to parse tree ------------------------------------------------------------------------------ - INFO : 0.010 : 0.010 : Parser >> Child objects attached - INFO : 0.011 : 0.001 : Filesystem >> Trawled web tree ------------------------------------------------------------------------------ - INFO : 0.011 : 0.000 : Parser >> Parsing ini files and munging dbs ------------------------------------------------------------------------------ - INFO : 0.018 : 0.006 : Parsing >> DEFAULTS ini file - INFO : 0.019 : 0.001 : Parsing >> Anolis_carolinensis ini file <snip> [WARN] Can't connect to DATABASE_WEBSITE [WARN] DBI connect('database=ensembl_website_57;host=mysql.mydomain.org; DB server hostname/IP address;port=3306','mysqluser',...) failed: Unknown MySQL server host 'mysql.mydomain.org' (2) at /home/dkedra/local/ensembl/modules/EnsEMBL/Web/ConfigPacker_base.pm line 91 <snip>
It does not complain about missing modules here, so lets hope everything to the point of getting MySQL database connection looks OK.
Step 2
- go to the ensembl directory
There is info about mysql access to ENSEMBL databased @ensembl.org: http://www.ensembl.org/info/data/mysql.html The "mysql.mydomain.org" is defined in just one place:
find . -type f | xargs grep -s mysql.mydomain.org # ./conf/ini-files/DEFAULTS.ini:
- edit ./conf/ini-files/DEFAULTS.ini:
comment out original lines:
; DATABASE_HOST = mysql.mydomain.org; DB server hostname/IP address ; DATABASE_HOST_PORT = 3306 ;DB server TCP/IP port ; DATABASE_DBUSER = mysqluser ;DB read-only user ; DATABASE_DBPASS = ;DB read-only password
paste this & save
#modified on 2010-05-08 by some-user DATABASE_HOST = ensembldb.ensembl.org ; DB server hostname/IP address DATABASE_HOST_PORT = 5306 ;DB server TCP/IP port DATABASE_DBUSER = anonymous ;DB read-only user DATABASE_DBPASS = ;DB read-only password
- check if it works: "/home/ensembl/local/apache2/bin/httpd -d /home/ensembl/local/ensembl/":
Nope: "Can't locate MIME/Types.pm in @INC"
- Install missing module:
perl -MCPAN -e shell install MIME::Types #MIME-Types-1.29.tar.gz
- check it again: "/home/ensembl/local/apache2/bin/httpd -d /home/ensembl/local/ensembl/":
ENSEMBL_BLAST_METHODS config unavailable at /home/ensembl/local/ensembl/modules/EnsEMBL/Web/BlastView/BlastDefs.pm line 66. (13)Permission denied: make_sock: could not bind to address [::]:80 (13)Permission denied: make_sock: could not bind to address 0.0.0.0:80 no listening sockets available, shutting down Unable to open logs
This is a good result: stuff to be tweaked are default port for apache2 and permissions in ./log directory. ENSEMBL_BLAST_METHODS is just icing on the cake, so we will leave it for later.
Step 3
still in /home/ensembl/local/ensembl
- log permissions
This is a temporary change. In the end if things go right we will not be running apache2 under our user name on port 8080. So these are temporary fixes in a testing phase.
mkdir logs
- changing port (from 80 to 8080)
edit ./conf/SiteDefs.pm
change line:
ENSEMBL_PORT = 80;
to:
ENSEMBL_PORT = 8080;
- check if it works:
/home/ensembl/local/apache2/bin/httpd -d /home/ensembl/local/ensembl/
gives:
============================================================================== Server information: ============================================================================== Ensembl version: 57 External address: http://www.mydomain.org:8080/ Real address: http://your.workstation.org:8080 Cache namespace: http://www.mydomain.org:8080 Server root: /home/ensembl/local/ensembl PID file: /home/ensembl/local/ensembl/logs/your_machine.httpd.pid Access logs: /home/ensembl/local/ensembl/logs/your_machine.access_log ensembl_extended Error logs: /home/ensembl/local/ensembl/logs/your_machine.error_log CSS file: http://www.mydomain.org:8080/minified/b6ca3ca4ce26fe395240a66f0e285ee4.css JavaScript file: http://www.mydomain.org:8080/minified/ba1ee50aa5b6d1e173d240a25b2f560c.js Plugins installed: EnsEMBL::Mirror EnsEMBL::Ensembl ============================================================================== ENSEMBL_BLAST_METHODS config unavailable at /home/ensembl/local/ensembl/modules/EnsEMBL/Web/BlastView/BlastDefs.pm line 66.
Not perfect (www.mydomain.org etc.) but we can check how it looks in the browser. http://localhost:8080
OK, we got something ENSEMBL-like, but there are no species to look at. When clicking at human icon (or go to: http://localhost:8080/Homo_sapiens/Info/Index ) We get nice error page with: "Can't locate GD/Text.pm" Back to Perl modules
- install missing GD::Text
perl -MCPAN -e shell install GD::Text exit killall -9 /home/ensembl/local/apache2/bin/httpd
- Check the progress again:
"/home/ensembl/local/apache2/bin/httpd -d /home/ensembl/local/ensembl/" browser to: http://localhost:8080 then http://localhost:8080/Homo_sapiens/Info/Index
Novel error:
DBI connect('ensembl_web_user_db:ensembldb.ensembl.org:5306','',...) failed: Access denied for user ... 'your_user_name'@'your_IP_number' (using password: NO) at /home/ensembl/local/lib/perl5/site_perl/5.12.0/Ima/DBI.pm
So while "anonymous" in ./conf/ini-files/DEFAULTS.ini did the job for accessing database=ensembl_website_57 @ensembldb.ensembl.org:5306 during startup this will not do here.
Step 4
Lets look at log files.
more ./logs/*.error_log # [Sun May 09 13:24:31 2010] [warn] pid file /home/ensembl/local/ensembl/logs/your_machine.httpd.pid overwritten -- Unclean shutdown of previous Apache run? # [Sun May 09 13:24:31 2010] [notice] Apache/2.2.15 (Unix) mod_perl/2.0.4 Perl/v5.12.0 configured -- resuming normal operations
Looks, OK, but after reloading http://localhost:8080/index.html in the browser:
EnsEMBL::Web::Root: Can't locate XML/Atom/Feed.pm in @INC
Fixing it:
killall -9 /home/ensembl/local/apache2/bin/httpd perl -MCPAN -e shell install XML::Atom::Feed #XML-Atom-0.37.tar.gz // + few other packages automatically exit /home/ensembl/local/apache2/bin/httpd -d /home/ensembl/local/ensembl/
After reloading http://localhost:8080/index.html in the browser the only thing fixed are news feeds from http://ensembl.blogspot.com/, a rather small step.
Step 5
There is no fully functional connection to MySQL server @ensembldb.ensembl.org:5306 In my browser @http://localhost:8080/index.html I can not see any other species in drop down "Search: All Species".
- revert changes to ./conf/ini-files/DEFAULTS.ini to the default values
comment out:
; #modified on 2010-05-08 by some-user ; DATABASE_HOST = ensembldb.ensembl.org ; DB server hostname/IP address ; DATABASE_HOST_PORT = 5306 ;DB server TCP/IP port ; DATABASE_DBUSER = anonymous ;DB read-only user ; DATABASE_DBPASS = ;DB read-only password
uncomment:
DATABASE_HOST = mysql.mydomain.org; DB server hostname/IP address DATABASE_HOST_PORT = 3306 ;DB server TCP/IP port DATABASE_DBUSER = mysqluser ;DB read-only user DATABASE_DBPASS = ;DB read-only password
- edit public-plugins/mirror/conf/SiteDefs.pm
kate public-plugins/mirror/conf/SiteDefs.pm
change following lines (two blocks):
$SiteDefs::ENSEMBL_USER = 'anonymous'; # 'my_user'; $SiteDefs::ENSEMBL_GROUP = 'apache'; #'my_group'; $SiteDefs::ENSEMBL_USERDB_USER = 'anonymous'; # 'my_mysql_write_user'; $SiteDefs::ENSEMBL_USERDB_HOST = 'ensembldb.ensembl.org'; # 'my_mysql.my_domain.org' $SiteDefs::ENSEMBL_USERDB_PORT = 5306; # 3306;
Check out that you have "anonymous" in /etc/passwd (I do not)
At this point it is impossible to continue without root access to the machine (i.e. creation of new user).
UPDATE (2010-05-12): this was due because of lack of writable user session database access. It is possible to use SQLite for that purpose. Look into: http://browser.1000genomes.org/info/docs/webcode/install/non-ensembl-code.html#sqlite ./public-plugins/sqlite/conf/SiteDefs.pm
Step 6
Moving to Ubuntu 9.10 (old workstation) for testing. Following the installation steps except:
- no MySQL install (got one on the system)
- despite following all the steps with apache2 install I am getting (during httpd startup with ensembl httpd.conf) error:
mod_deflate.so: undefined symbol: inflateEnd
for time being commented mod_deflate import in httpd.conf but then found a fix (see compiling httpd on the top of the page).
- tried to automate installation of Perl modules by creating a file with statements:
/home/ensembl/local/bin/perl -MCPAN -e 'install GD' <snip>
some success but: => Ubuntu does not install i.e. libxml-dev by default. (installed it Synaptic) => problem with GD module, downloaded libgd-2.0.28, ==> to be sure installed (in Synaptic) Freetype, Fontconfig and Xpm (reported during install of libgd. ==> compiled and installed it in /home/ensembl/local
==> DBD::mysql will not get properly tested if there is no MySQL test database. Created test, and run:
perl Makefile.PL --testuser=mysqlusr --testpassword=mysqlusrpass PREFIX=/home/ensembl/local/ make make test make install
=> WWW::Curl does not install with Ubuntu curl installed via Synaptic. Downloaded curl-7.20.1, compiled and installed it in /home/ensembl/local, ==> WWW::Curl installed => Bio::Das::Lite from Sourceforge
svn co https://bio-das-lite.svn.sourceforge.net/svnroot/bio-das-lite bio-das-lite cd ./bio-das-lite/trunk/ perl Build.PL PREFIX=/home/ensembl/local/ ./Build ./Build test ./Build install
=> libwww (LWP): got the newest version installed during "automated" installation of Perl modules, as well as (I think) during WWW::Curl install. Without proper version ParallelUserAgent will not work, so reinstalled 5.812 from source as a last thing.
- edited public-plugins/ensembl/conf/SiteDefs.pm
commenting out all species ($SiteDefs::__species_aliases) except: Caenorhabditis_elegans Saccharomyces_cerevisiae
and changing two lines:
$SiteDefs::ENSEMBL_PRIMARY_SPECIES ='Saccharomyces_cerevisiae'; # Default species $SiteDefs::ENSEMBL_SECONDARY_SPECIES = 'Caenorhabditis_elegans'; # Secondary species
This speeds up Ensembl startup times.
I got the proper Ensembl start page with my two test species available as pull down menus. "Popular species" with human & mouse icons must be predefined elsewhere. Still the error:
DBI connect('ensembl_web_user_db:localhost:3305','',...) failed: Access denied for user 'ensembl'@'localhost' ... (using password: NO) at /home/ensembl/local/lib/perl5/site_perl/5.12.0/Ima/DBI.pm line 328
Step 7
The ensembl_web_user_db is a DB for which Ensembl needs write access. So anonymous@some_place will not do.
In ftp://ftp.ensembl.org/pub/current_mysql/ we can find directories:
- ensembl_web_user_db/
- ensembl_website_57/
wget -x ftp://ftp.ensembl.org/pub/current_mysql/ensembl_web_user_db/* cd ftp.ensembl.org/pub/current_mysql/ensembl_web_user_db/ gunzip *gz mysql -u ensembl -p create database ensembl_web_user_db; quit mysql -u ensembl -p ensembl_web_user_db < ensembl_web_user_db.sql
wget -x ftp://ftp.ensembl.org/pub/current_mysql/ensembl_website_57/* cd ftp.ensembl.org/pub/current_mysql/ensembl_website_57/ gunzip *gz mysql -u ensembl -p create database ensembl_website_57; quit; mysql -u ensembl -p ensembl_website_57 < ensembl_website_57.sql mysqlimport -u ensembl -p ensembl_website_57 -L *.txt
Step 8 (success!)
Temporary fix for MySQL vs Ensembl web code mismatch:
- by default Ubuntu's mysql locks out any network connections in /etc/my.cnf , line:
bind-address = 127.0.0.1
Commenting out this makes MySQL more permissive.
- grant all privileges to user accessing MySQL via ENSEMBL web code (say "ensembl-mysql" user).
- if one can not find what's wrong with a password a short term "solution" is to remove passwd for "ensembl-mysql".
- unsecured MySQL account: I am behind a firewall with a machine with few accounts for the guys from my group, so I am not that exposed during the testing phase. One way of improving the security is to replace some obvious user name (like "ensembl") with a md5sum created (i.e. pick any text file you created) and get something like "1a8d1d9ced6ec9132446cc1180c41dcf". There is limit of 16 chars for User, but I think it should be harder to exploit the hole with a username like "HA1a8D1d9cEd6ec".
- once you get connection (check it in the browser) there are two missing perl modules, but installing them through CPAN shell is trivial.
export PATH=/home/ensembl/local/bin/:$PATH which perl # ~/local/bin/perl perl -MCPAN -e shell install IPC::Run install RTF::Writer
Step 9
To this point my Ensembl was working in a dual mode:
- using local MySQL to store session info in ensembl_web_user_db database
- using remote MySQL database @ensembl.org
Cutting the cord
- Comment out in conf/ini-files/DEFAULTS.ini lines:
#DATABASE_HOST = ensembldb.ensembl.org ; DB server hostname/IP address #DATABASE_HOST_PORT = 5306 ;DB server TCP/IP port #DATABASE_DBUSER = anonymous ;DB read-only user #DATABASE_DBPASS = ;DB read-only password
- set up the connection to the local server:
DATABASE_HOST = 123.24.35.678 ; DB server hostname/IP address DATABASE_HOST_PORT = 3306 ;DB server TCP/IP port DATABASE_DBUSER = ensembl ;DB read-only user DATABASE_DBPASS = hardpass ;DB read-only password
- getting data files for local databases
(going a little bit ahead: not a comprehensive install, just for illustration /small testin)
Smallest DBs for testing: yeast
We already have ensembl_web_user_db ensembl_website_57
There are following directories with yeast data: saccharomyces_cerevisiae_core_57_1j/ saccharomyces_cerevisiae_funcgen_57_1j/ saccharomyces_cerevisiae_otherfeatures_57_1j/
A functional site needs just core database, so lets restrict the ambition to do everything at once in a first go:
Get it keeping dir structure:
wget -x ftp://ftp.ensembl.org/pub/current_mysql/saccharomyces_cerevisiae_core_57_1j/*
- follow procedures from Step 7 to create MySQL databases, create db schemas and load the data.
- restart the ensembl server
- check it in the browser
While we still have (mostly) working pages, some content simply vanished. Looking at the ./logs/*.error_log gives: "Could not connect to database ensembl_compara_57"
Instead of download everything from: ftp://ftp.ensembl.org/pub/current_mysql/ensembl_compara_57/
we cut another corner and resign from accessing COMPARA for time being. This is configured in MULTI.ini file. To make things more complicated there are several such files in the default install:
./public-plugins/admin/conf/ini-files/MULTI.ini ./public-plugins/mart/conf/ini-files/MULTI.ini ./public-plugins/ensembl/conf/ini-files/MULTI.ini ./conf/ini-files/MULTI.ini
To pick the right one we have to look at conf/Plugins.pm Whichever one is on the top (in our case:
'EnsEMBL::Mirror' => $SiteDefs::ENSEMBL_SERVERROOT.'/public-plugins/mirror',
) this one determines the final values. So because it is safer to leave /public-plugins/ensembl/ unmodified, we create ./public-plugins/mirror/conf/ini-files/MULTI.ini
with just two lines:
[databases] DATABASE_COMPARA =
Yes, it is an empty line after "=" sign.
Step 10
Important: Ensembl caches its configuration using pack files. There are two places:
conf/config.packed #one file conf/packed/ #multiple files
So whenever we change configuration these files need to be deleted (in ensembl directory):
rm -i ./conf/config.packed rm -i ./conf/packed/*
Adding existing "non-animal" species
There is a long list of species fully compatible with default setup of Ensembl 57. You can see them in: public-plugins/ensembl/conf/SiteDefs.pm
Having a local mirror of these is straightforward (follow procedure for yeast above). Problems start with non-listed species. I run it just for A.thaliana (expect that for other plants it will be similar) but things may differ for other branches (check the SQL schema for each them).
- Downloading core DB:
wget -x ftp://ftp.ensemblgenomes.org/pub/plants/release-4/mysql/arabidopsis_thaliana_core_4_56_9/*
- Creating database
Following http://www.ensembl.org/info/docs/webcode/install/ensembl-data.html
Before we start: the crucial number in arabidopsis_thaliana_core_4_56_9 is "56". It means that the files follow Ensembl_56 schema, and accessing them using Ensembl 57 will give us schema mismatch. There is a path of changing schema from one Ensembl release to another (see ensembl/sql/ patch files). So in the end we will try:
- use arabidopsis_thaliana_core_4_56_9 files
- create database and populate it with data
- patch the schema
- rename the database to make clear which schema it follows
#unpacking cd /where/you/downloaded/ftp.ensemblgenomes.org/pub/plants/release-4/mysql/arabidopsis_thaliana_core_4_56_9/ gunzip *gz #creating db mysql -u ensembl -p create database arabidopsis_thaliana_core_4_56_9; quit #creating schema + loading data mysql -u ensembl -p arabidopsis_thaliana_core_4_56_9 < arabidopsis_thaliana_core_4_56_9.sql mysqlimport -u ensembl -p arabidopsis_thaliana_core_4_56_9 -L *.txt #patching schema: watch for all files for a given patch! # run patches by hand in case of any doubt cd /home/ensembl/local/ensembl/ cd ensembl/sql/ for file in patch_56_57_?.sql; do mysql -u ensembl -p arabidopsis_thaliana_core_4_56_9 < $file; done #renaming the database cd /var/lib/mysql sudo /etc/init.d/mysql stop sudo mv arabidopsis_thaliana_core_4_56_9 arabidopsis_thaliana_core_4_57_9 sudo /etc/init.d/mysql start #checking it mysql -u ensembl -p show databases; use arabidopsis_thaliana_core_4_57_9; show tables;
TBC on 2010-05-13