# make sure we can ssh in! sudo apt-get install --assume-yes openssh-server # create the arxivwiki user sudo useradd -p "\$1\$tiAZODpE\$zpBSj7s87VaILoJsL/w.M/" --shell /bin/bash --groups admin,adm arxivwiki # now logout, and log back in as arxivwiki sudo deluser ubuntu # Ensure apt-get isn't trying to read from a cdrom: sudo sed -i '/deb cdrom:/d' /etc/apt/sources.list # Start installing, via apt-get ## First update, in case we're on an old virtual machine sudo apt-get update --assume-yes sudo apt-get dist-upgrade --assume-yes ## Apache sudo apt-get install --assume-yes apache2 libapache2-svn ## subversion sudo apt-get install --assume-yes subversion subversion-tools ## bittorrent sudo apt-get install --assume-yes bittorrent ## zip sudo apt-get install --assume-yes zip unzip ## java (prompts for license agreement!) sudo apt-get install --assume-yes sun-java6-jdk sun-java6-source ## Samba drivers sudo apt-get install --assume-yes smbfs ## lynx and curl sudo apt-get install --assume-yes lynx curl ## patch, dos2unix sudo apt-get install --assume-yes patch tofrodos ## some things we need to compile Virtuoso sudo apt-get install --assume-yes autoconf automake libtool flex bison gperf gawk m4 make openssl libssl-dev ## PHP (needed for Danny Ayers sparql-demo sudo apt-get install --assume-yes php5 php5-cli php-xsl php-xml* # Install some more things, which we perhaps don't really need? ## MySQL (TODO: this requires user interaction, when configuring packages!) # sudo apt-get install --assume-yes mysql-client-5.0 mysql-server-5.0 phpmyadmin ## latex # sudo apt-get install --assume-yes texlive-latex-base imagemagick ## sendmail # sudo apt-get install --assume-yes sendmail sasl2-bin ## DAV support for apache (not available in 7.10) # sudo apt-get install --assume-yes libapache-mod-dav ## something needed for jungledisk? (not available in 7.10) # sudo apt-get install --assume-yes ia32-libs ia32-libs-gtk # make mod_rewrite and mod_proxy available sudo ln -s /etc/apache2/mods-available/rewrite.load /etc/apache2/mods-enabled/rewrite.load sudo ln -s /etc/apache2/mods-available/proxy.load /etc/apache2/mods-enabled/proxy.load sudo ln -s /etc/apache2/mods-available/proxy_http.load /etc/apache2/mods-enabled/proxy_http.load # setup the sites cd /etc/apache2/sites-available/ sudo rm arxivwiki.org.conf sudo wget http://katlas.org/svn/arxivwiki/trunk/server-build/arxivwiki.org.conf sudo ln -s /etc/apache2/sites-available/arxivwiki.org.conf /etc/apache2/sites-enabled/arxivwiki.org.conf sudo rm proxy.conf sudo wget http://katlas.org/svn/arxivwiki/trunk/server-build/proxy.conf sudo ln -s /etc/apache2/sites-available/proxy.conf /etc/apache2/sites-enabled/proxy.conf sudo apache2ctl restart sudo mkdir -p /www/arxivwiki.org sudo chown arxivwiki /www/arxivwiki.org/ sudo chgrp arxivwiki /www/arxivwiki.org/ cd /www/arxivwiki.org/ svn checkout http://katlas.org/svn/arxivwiki/trunk/snorql/ svn checkout http://katlas.org/svn/arxivwiki/trunk/sparql-form/ sudo mkdir -p /home/arxivwiki/samba/ sudo chown arxivwiki /home/arxivwiki/ sudo chgrp arxivwiki /home/arxivwiki/ sudo chown arxivwiki /home/arxivwiki/samba/ sudo chgrp arxivwiki /home/arxivwiki/samba/ # @deprecated, but a useful wget example! # cd /www/arxivwiki.org/ # wget -nH http://katlas.org/svn/arxivwiki/trunk/server-build/html/ -r --cut-dirs=5 -np # we should download the torrent at this point! at present it doesn't exist, so that's a bit ambitious. # now unpack the tar files from the torrent mkdir -p ~/samba/mirror/ cd ~/samba/mirror/ for file in ../torrent/*.tar; do tar xvfk $file; done # download sesame2 cd ~ wget http://internap.dl.sourceforge.net/sourceforge/sesame/openrdf-sesame-2.0-sdk.zip unzip openrdf-sesame-2.0-sdk.zip rm openrdf-sesame-2.0-sdk.zip chmod u+x ~/openrdf-sesame-2.0/bin/start-console.sh cd ~ wget http://www.ibiblio.org/pub/mirrors/apache/tomcat/tomcat-5/v5.5.26/bin/apache-tomcat-5.5.26.zip unzip apache-tomcat-5.5.26.zip rm apache-tomcat-5.5.26.zip # now patch server.xml, to enable to proxy connector on port 8082 # cd ~/apache-tomcat-5.5.26/conf # wget http://katlas.org/svn/arxivwiki/trunk/server-build/patches/patch.server.xml # patch server.xml patch.server.xml # install the webapps in tomcat sudo cp /home/arxivwiki/openrdf-sesame-2.0/war/*.war ~/apache-tomcat-5.5.26/webapps/ ## .bashrc doesn't work the way I expect, lame. echo ~/.bashrc > ~/.bash_profile echo "export PATH=~/apache-maven-2.0.8/bin:\$PATH" >> ~/.bashrc echo "export JAVA_HOME=/usr/lib/jvm/java-6-sun/jre/" >> ~/.bashrc chmod a+x ~/apache-tomcat-5.5.26/bin/*.sh # download and install rc.local, and run it right now! cd /etc/ sudo rm /etc/rc.local sudo wget http://katlas.org/svn/arxivwiki/trunk/server-build/rc.local sudo dos2unix rc.local sudo chmod u+x rc.local sudo ./rc.local # Download and build virtuoso. This takes hours. :-( cd ~ wget http://superb-east.dl.sourceforge.net/sourceforge/virtuoso/virtuoso-opensource-5.0.5.tar.gz tar xzvf virtuoso-opensource-5.0.5.tar.gz rm virtuoso-opensource-5.0.5.tar.gz cd virtuoso-opensource-5.0.5 ./configure make sudo make install # start virtuoso cd /usr/local/virtuoso-opensource/var/lib/virtuoso/db sudo rm virtuoso.lck sudo ../../../../bin/virtuoso-t -f & ## You'll have to go do some virtuoso configuring at this point, which it seems we can't do from the commandline. Sorry, lame. ## Login, dba, dba. ## Turn on the SPARQL (and iSPARQL) packages. ## Set up rdf_sinks to put things in the right contexts ### create DAV directories with properties virt:rdf_graph='' and virt:rdf_sponger='on' ## Load RDF data, via curl -- this can be automated below. # download and unpackage maven cd ~ wget http://tqft.net/files/apache-maven-2.0.8-bin.zip unzip apache-maven-2.0.8-bin.zip rm apache-maven-2.0.8-bin.zip # checkout the svn repository, and build: cd ~ svn checkout http://katlas.org/svn/arxivwiki/ cd ~/arxivwiki/trunk/metadata mvn install cd ~/arxivwiki/trunk/arxiv-oai2rdf mvn install cd ~/arxivwiki/trunk/oai2rdf cd src/main/java/edu/mit/simile/rdfizer/oai rm Main.java wget http://katlas.org/svn/arxivwiki/trunk/server-build/patches/Main.java cd ~/arxivwiki/trunk/oai2rdf mvn install # download everything from the arxiv OAI interface, and the citebase OAI interface cd ~/arxivwiki/trunk/metadata ./scrape-oai.sh /www/arxivwiki.org/ # scrape the arxiv! cd ~/arxivwiki/trunk/metadata rm /www/arxivwiki.org/mirror/lists/identifiers.list ./build-mirror.sh /www/arxivwiki.org/ mkdir -p /www/arxivwiki.org/rdf/arxiv/info/ mkdir -p /www/arxivwiki.org/rdf/versions/info/ mkdir -p /www/arxivwiki.org/rdf/citations/info/ mkdir -p /www/arxivwiki.org/rdf/crawler-anomalies/info/ mkdir -p /www/arxivwiki.org/rdf/citebase/info/ # prepare all the rdf! ./prepare-rdf.sh /www/arxivwiki.org/ # put all the RDF somewhere useful :-) #./create-arxivwiki-repository.sh #./load-all-rdf.sh http://localhost:8080/openrdf-sesame ~/samba/ ### errr grep "index\.html" < all-files.txt | sed "s/^\.\/[0-9]*\/[0-9]*\/\(.*\)\/index\.html/http:\/\/arxiv.org\/abs\/\1/g" | sort > index-files.txt