Difference between revisions of "2017 Haneul Lab note"

From Crop Genomics Lab.
Jump to: navigation, search
(Mungbean pacbio assembly)
(Mungbean pacbio assembly)
 
(29 intermediate revisions by one user not shown)
Line 369: Line 369:
 
Repeat masking progress is based on these sites: http://weatherby.genetics.utah.edu/MAKER/wiki/index.php/Repeat_Library_Construction-Advanced, http://www.repeatmasker.org/
 
Repeat masking progress is based on these sites: http://weatherby.genetics.utah.edu/MAKER/wiki/index.php/Repeat_Library_Construction-Advanced, http://www.repeatmasker.org/
  
<big>Repeat masking program installation</big>
+
 
 +
<big>'''Repeat masking program installation'''</big>
 +
 
 +
 
 +
Repbase - for RepeatMasker
 +
(63:/data/skyts0401/program/)
 +
should register http://www.girinst.org/
 +
download RepBaseRepeatMaskerEdition
 +
cp RepBaseRepeatMaskerEdition-20170127\ \(1\).tar.gz RepeatMasker/.
 +
cd RepeatMasker/
 +
tar -xvzf RepBaseRepeatMaskerEdition-20170127\ \(1\).tar.gz
 +
(Libraries/ diretory will be created and all file will be copied to RepeatMasker/Libraries/)
 +
 
 +
rmblast - for RepeatMasker (ver 2.6.0 has problem with install, so I installed v. 2.2.28)
 +
(63:/data/skyts0401/program/)
 +
download from http://www.repeatmasker.org/RMBlast.html
 +
wget ftp://ftp.ncbi.nlm.nih.gov/blast/executables/rmblast/2.2.28/ncbi-rmblastn-2.2.28-x64-linux.tar.gz
 +
wget ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.28/ncbi-blast-2.2.28+-x64-linux.tar.gz
 +
tar zxvf ncbi-blast-2.2.28+-x64-linux.tar.gz
 +
tar zxvf ncbi-rmblastn-2.2.28-x64-linux.tar.gz
 +
cp -R ncbi-rmblastn-2.2.28/* ncbi-blast-2.2.28+/
 +
rm -rf ncbi-rmblastn-2.2.28
 +
mv ncbi-blast-2.2.28+ rmblast-2.2.28
 +
 
 +
trf - for RepeatMasker
 +
(63:/data/skyts0401/program/)
 +
download from http://tandem.bu.edu/trf/trf.html
 +
chmod a+x trf409.linux64
 +
ln -s trf409.linux64 RepeatMasker/trf
 +
 
 +
RepeatMasker
 +
(63:/data/skyts0401/program/)
 +
wget http://www.repeatmasker.org/RepeatMasker-open-4-0-7.tar.gz
 +
tar -xzf RepeatMasker-open-4-0-7.tar.gz
 +
cd RepeatMasker/
 +
(move the Repbase library to RepeatMasker/Libraries/)
 +
perl ./configure
 +
configure directory of trf, rmblast
 +
 
 +
muscle - for MITE-Hunter
 +
(63:/data/skyts0401/program/)
 +
check version on http://www.drive5.com/muscle/
 +
wget http://www.drive5.com/muscle/downloads3.8.31/muscle3.8.31_i86linux64.tar.gz
 +
tar -xvzf muscle3.8.31_i86linux64.tar.gz
 +
mkdir muscle
 +
muscle3.8.31_i86linux64 muscle/
 +
 
 +
mdust - for MITE-Hunter
 +
(63:/data/skyts0401/program/)
 +
wget ftp://occams.dfci.harvard.edu/pub/bio/tgi/software//seqclean/mdust.tar.gz
 +
tar -xvzf mdust.tar.gz
 +
 
 +
MITE-Hunter
 +
(63:/data/skyts0401/program/)
 +
check version on http://target.iplantcollaborative.org/mite_hunter.html
 +
wget http://target.iplantcollaborative.org/mite_hunter/MITE%20Hunter-11-2011.zip
 +
unzip MITE\ Hunter-11-2011.zip
 +
mv MITE\ Hunter/ MITE_Hunter
 +
cd MITE_Hunter/
 +
perl MITE_Hunter_Installer.pl -d /data/skyts0401/program/MITE\ Hunter -f formatdb -b blastall -m /data/skyts0401/program/mdsut -M /data/skyts0401/program/muscle
 +
 
 +
GenomeTools
 +
(63:/data/skyts0401/program/)
 +
check version on http://genometools.org/
 +
wget http://genometools.org/pub/genometools-1.5.9.tar.gz
 +
tar -xvzf genometools-1.5.9.tar.gz
 +
cd genometools-1.5.9/
 +
make
 +
sudo make install
 +
- if have a problem with dependency, please check this -
 +
sudo apt-get install libcairo2-dev
 +
sudo apt-get install libpango1.0-dev
 +
 
 +
Genome tRNA database
 +
(63:/home/skyts0401/bin/)
 +
check version on http://gtrnadb.ucsc.edu
 +
wget http://gtrnadb2009.ucsc.edu/download/tRNAs/eukaryotic-tRNAs.fa.gz
 +
gunzip eukaryotic-tRNAs.fa.gz
 +
 
 +
CRL scripts
 +
(63:/home/skyts0401/bin/)
 +
wget http://www.hrt.msu.edu/uploads/535/78637/CRL_Scripts1.0.tar.gz
 +
tar -xvzf CRL_Scripts1.0.tar.gz
 +
 
 +
transposons protein database
 +
(63:/home/skyts0401/bin/)
 +
wget http://www.hrt.msu.edu/uploads/535/78637/Tpases020812DNA.gz
 +
gunzip Tpases020812DNA.gz
 +
wget http://www.hrt.msu.edu/uploads/535/78637/Tpases020812.gz
 +
gunzip Tpases020812.gz
 +
 
 +
plant protein database
 +
(63:/home/skyts0401/bin/)
 +
wget http://www.hrt.msu.edu/uploads/535/78637/alluniRefprexp070416.gz
 +
gunzip alluniRefprexp070416.gz
 +
 
 +
RECON - for RepeatModeler
 +
(63:/data/skyts0401/program/)
 +
wget http://www.repeatmasker.org/RepeatModeler/RECON-1.08.tar.gz
 +
tar -xvzf RECON-1.08.tar.gz
 +
cd RECON-1.08/src/
 +
make
 +
make install
 +
cd ../scripts/
 +
nano recon.pl (added /data/skyts0401/program/RECON-1.08/bin to PATH = "" (third line))
 +
 
 +
RepeatScout - for RepeatModeler
 +
(63:/data/skyts0401/program/)
 +
wget http://www.repeatmasker.org/RepeatScout-1.0.5.tar.gz
 +
tar -xvzf RepeatScout-1.0.5.tar.gz
 +
cd RepeatScout-1/
 +
make
 +
sudo make install
 +
 
 +
nseg - for RepeatModeler
 +
(63:/data/skyts0401/program/)
 +
mkdir nseg
 +
cd nseg
 +
wget ftp://ftp.ncbi.nih.gov/pub/seg/nseg/* .
 +
make
 +
 
 +
RepeatModeler
 +
(63:/data/skyts0401/program/)
 +
wget http://www.repeatmasker.org/RepeatModeler/RepeatModeler-open-1.0.9.tar.gz
 +
tar -xvzf RepeatModeler-open-1.0.9.tar.gz
 +
cd RepeatModeler-open-1.0.9/
 +
perl ./configure
 +
configure directory of RECON, RepeatScout, nseg, trf, rmblast
 +
 
 +
hmmer - for ProtExcluder
 +
(63:/data/skyts0401/program/)
 +
wget http://eddylab.org/software/hmmer3/3.1b2/hmmer-3.1b2-linux-intel-x86_64.tar.gz
 +
tar -xvzf hmmer-3.1b2-linux-intel-x86_64.tar.gz
 +
cd hmmer-3.1b2-linux-intel-x86_64/
 +
./configure
 +
make
 +
sudo make install
 +
 
 +
ProtExcluder
 +
wget http://www.hrt.msu.edu/uploads/535/78637/ProtExcluder1.2.tar.gz
 +
tar -xvzf ProtExcluder1.2.tar.gz
 +
cd ProtExcluder1.2/
 +
./Installer.pl -m /data/skyts0401/program/hmmer-3.1b2-linux-intel-x86_64/binaries/ -p /data/skyts0401/program/ProtExcluder1.2/
 +
 
 +
 
 +
<big>'''Repeat masking progress'''</big>
 +
 
 +
Basic command which I used is based on http://weatherby.genetics.utah.edu/MAKER/wiki/index.php/Repeat_Library_Construction-Advanced
 +
 
 +
 
 +
Move Mungbean genome assembly final version
 +
scp skyts0401@147.46.250.244:/kev8305/SK3/anchoring/gapfilled_assembly_final/standard_output.gapfilled.final.fa .
 +
 
 +
MITE library
 +
(63:/data/skyts0401/program/MITE_Hunter/)
 +
perl MITE_Hunter_manager.pl -i /data/skyts0401/Mungbean/assembly/standard_output.gapfilled.final.fa -g Mungbean -c 10 -S 12345678
 +
mv Mungbean* /data/skyts0401/Mungbean/repeatmask/MITE/.
 +
cd /data/skyts0401/Mungbean/repeatmask/MITE/
 +
cat Mungbean_Step8_*.fa > ../MITE.lib
 +
 
 +
LTR library
 +
(63:/data/skyts0401/Mungbean/repeatmask/LTR/99/)
 +
ln -s /data/skyts0401/Mungbean/assembly/standard_output.gapfilled.final.fa .
 +
gt suffixerator -db standard_output.gapfilled.final.fa -indexname Mungbean_LTR -tis -suf -lcp -des -ssp -dna
 +
gt ltrharvest -index Mungbean_LTR -out Mungbean.out99 -outinner Mungbean.outinner99 -gff3 Mungbean.gff99 -minlenltr 100 -maxlenltr 6000 0ministltr 1500 -maxdistltr 25000 -mintsd 5 -maxtsd 5 -motif tgca -similar 99 -vic 10 > Mungbean.result99
 +
gt gff3 -sort Mungbean.gff99 > Mungbean.gff99.sort
 +
gt ltrdigest -trnas ~/bin/eukaryotic-tRNAs.fa Mungbean.gff99.sort Mungbean_LTR > Mungbean.gff99.dgt
 +
perl ~/bin/CRL_Scripts1.0/CRL_Step1.pl --gff Mungbean.gff99.dgt
 +
perl ~/bin/CRL_Scripts1.0/CRL_Step2.pl --step1 CRL_Step1_Passed_Elements.txt --repeatfile Mungbean.out99 --resultfile Mungbean.result99 --sequencefile standard_output.gapfilled.final.fa --removed_repeats CRL_Step2_Passed_Elements.fasta
 +
mkdir fasta_files
 +
mv Repeat_*.fasta fasta_files/\
 +
mv Repeat_*.fasta fasta_files/
 +
mv CRL_Step2_Passed_Elements.fasta fasta_files/
 +
cd fasta_files/
 +
perl ~/bin/CRL_Scripts1.0/CRL_Step3.pl --directory . --step2 CRL_Step2_Passed_Elements.fasta --pidentity 60 --seq_c 25
 +
mv CRL_Step3_Passed_Elements.fasta ..
 +
cd ..
 +
perl ~/bin/CRL_Scripts1.0/ltr_library.pl --resultfile Mungbean.result99 --step3 CRL_Step3_Passed_Elements.fasta --sequencefile standard_output.gapfilled.final.fa
 +
cp lLTR_Only.lib ../lLTR_Only_99.lib
 +
cat lLTR_Only.lib ../../MITE/MITE.lib > repeats_to_mask_LTR99.fasta
 +
/data/skyts0401/program/RepeatMasker/RepeatMasker -lib repeats_to_mask_LTR99.fasta -nolow -dir . Mungbean.outinner99
 +
perl ~/bin/CRL_Scripts1.0/cleanRM.pl Mungbean.outinner99.out Mungbean.outinner99.masked > Mungbean.outinner99.unmasked
 +
perl ~/bin/CRL_Scripts1.0/rmshortinner.pl Mungbean.outinner99.unmasked 50 > Mungbean.outinner99.clean
 +
makeblastdb -in ~/bin/Tpases020812DNA -dbtype prot
 +
blastx -query Mungbean.outinner99.clean -db ~/bin/Tpases020812DNA -evalue 1e-10 -num_descriptions 10 -out Mungbean.outinner99.clean_blastx.out.txt
 +
perl ~/bin/CRL_Scripts1.0/outinner_blastx_parse.pl --blastx Mungbean.outinner99.clean_blastx.out.txt --outinner Mungbean.outinner99
 +
perl ~/bin/CRL_Scripts1.0/CRL_Step4.pl --step3 CRL_Step3_Passed_Elements.fasta --resultfile Mungbean.result99 --innerfile passed_outinner_sequence.fasta --sequencefile standard_output.gapfilled.final.fa
 +
makeblastdb -in lLTRs_Seq_For_BLAST.fasta -dbtype nucl
 +
blastn -query lLTRs_Seq_For_BLAST.fasta -db lLTRs_Seq_For_BLAST.fasta -evalue 1e-10 -num_descriptions 1000 -out lLTRs_Seq_For_BLAST.fasta.out
 +
makeblastdb -in Inner_Seq_For_BLAST.fasta -dbtype nucl
 +
blastn -query Inner_Seq_For_BLAST.fasta -db Inner_Seq_For_BLAST.fasta -evalue 1e-10 -num_descriptions 1000 -out Inner_Seq_For_BLAST.fasta.out
 +
perl ~/bin/CRL_Scripts1.0/CRL_Step5.pl --LTR_blast lLTRs_Seq_For_BLAST.fasta.out --inner_blast Inner_Seq_For_BLAST.fasta.out --step3 CRL_Step3_Passed_Elements.fasta --final LTR99.lib --pcoverage 90 --pidentity 80
 +
 +
relatively old LTR (Same command with above one, but for relatively old LTR)
 +
(63:/data/skyts0401/Mungbean/repeatmask/LTR/85)
 +
(to avoid confuse LTR_99 with this results, make directory 99 and 85 in LTR directory)
 +
ln -s /data/skyts0401/Mungbean/assembly/standard_output.gapfilled.final.fa .
 +
gt suffixerator -db standard_output.gapfilled.final.fa -indexname Mungbean_LTR -tis -suf -lcp -des -ssp -dna
 +
gt ltrharvest -index Mungbean_LTR -out Mungbean.out85 -outinner Mungbean.outinner85 -gff3 Mungbean.gff85 -minlenltr 100 -maxlenltr 6000 -mindistltr 1500 -maxdistltr 25000 -mintsd 5 -maxtsd 5 -vic 10  > Mungbean.result85
 +
cp ../99/CRL_Step1_Passed_Elements.txt .
 +
perl ~/bin/CRL_Scripts1.0/CRL_Step2.pl --step1 CRL_Step1_Passed_Elements.txt --repeatfile Mungbean.out85 --resultfile Mungbean.result85 --sequencefile standard_output.gapfilled.final.fa --removed_repeats CRL_Step2_Passed_Elements.fasta
 +
mkdir fasta_files
 +
mv Repeat_*.fasta fasta_files/
 +
mv CRL_Step2_Passed_Elements.fasta fasta_files/
 +
cd fasta_files/
 +
perl ~/bin/CRL_Scripts1.0/CRL_Step3.pl --directory . --step2 CRL_Step2_Passed_Elements.fasta --pidentity 60 --seq_c 25
 +
mv CRL_Step3_Passed_Elements.fasta ..
 +
cd ..
 +
perl ~/bin/CRL_Scripts1.0/ltr_library.pl --resultfile Mungbean.result85 --step3 CRL_Step3_Passed_Elements.fasta --sequencefile standard_output.gapfilled.final.fa
 +
cp lLTR_Only.lib ../lLTR_Only_85.lib
 +
cat lLTR_Only.lib ../../MITE/MITE.lib > repeats_to_mask_LTR85.fasta
 +
/data/skyts0401/program/RepeatMasker/RepeatMasker -lib repeats_to_mask_LTR85.fasta -nolow -dir . Mungbean.outinner85
 +
perl ~/bin/CRL_Scripts1.0/cleanRM.pl Mungbean.outinner85.out Mungbean.outinner85.masked > Mungbean.outinner85.unmasked
 +
perl ~/bin/CRL_Scripts1.0/rmshortinner.pl Mungbean.outinner85.unmasked 50 > Mungbean.outinner85.clean
 +
makeblastdb -in ~/bin/Tpases020812DNA -dbtype prot
 +
blastx -query Mungbean.outinner85.clean -db ~/bin/Tpases020812DNA -evalue 1e-10 -num_descriptions 10 -out Mungbean.outinner85.clean_blastx.out.txt
 +
perl ~/bin/CRL_Scripts1.0/outinner_blastx_parse.pl --blastx Mungbean.outinner85.clean_blastx.out.txt --outinner Mungbean.outinner85
 +
perl ~/bin/CRL_Scripts1.0/CRL_Step4.pl --step3 CRL_Step3_Passed_Elements.fasta --resultfile Mungbean.result85 --innerfile passed_outinner_sequence.fasta --sequencefile standard_output.gapfilled.final.fa
 +
makeblastdb -in lLTRs_Seq_For_BLAST.fasta -dbtype nucl
 +
blastn -query lLTRs_Seq_For_BLAST.fasta -db lLTRs_Seq_For_BLAST.fasta -evalue 1e-10 -num_descriptions 1000 -out lLTRs_Seq_For_BLAST.fasta.out
 +
makeblastdb -in Inner_Seq_For_BLAST.fasta -dbtype nucl
 +
blastn -query Inner_Seq_For_BLAST.fasta -db Inner_Seq_For_BLAST.fasta -evalue 1e-10 -num_descriptions 1000 -out Inner_Seq_For_BLAST.fasta.out
 +
perl ~/bin/CRL_Scripts1.0/CRL_Step5.pl --LTR_blast lLTRs_Seq_For_BLAST.fasta.out --inner_blast Inner_Seq_For_BLAST.fasta.out --step3 CRL_Step3_Passed_Elements.fasta --final LTR85.lib --pcoverage 90 --pidentity 8
 +
/data/skyts0401/program/RepeatMasker/RepeatMasker -lib ../99/LTR99.lib -dir . LTR85.lib
 +
perl ~/bin/CRL_Scripts1.0/remove_masked_sequence.pl --masked_elements LTR85.lib.masked --outfile FinalLTR85.lib
 +
cd ..
 +
cat 99/LTR99.lib 85/FinalLTR85.lib > allLTR.lib
 +
 
 +
Collecting repetitive sequences
 +
ln -s /data/skyts0401/Mungbean/assembly/standard_output.gapfilled.final.fa .
 +
nano fasta_devide.py
 +
python fasta_devide.py standard_output.gapfilled.final.fa
 +
nano repeatmask_combine.sh
 +
chmod a+x repeatmask_combine.sh
 +
./repeatmask_combine.sh
 +
cat standard_output.gapfilled.final_devide*.fa.masked > standard_output.gapfilled.final.fa.masked
 +
perl ~/bin/CRL_Scripts1.0/rmaskedpart.pl standard_output.gapfilled.final.fa.masked 50 > umseqfile
 +
/data/skyts0401/program/RepeatModeler-open-1.0.9/BuildDatabase -name umseqfildeb -engine ncbi umseqfile
 +
nohup /data/skyts0401/program/RepeatModeler-open-1.0.9/RepeatModeler -database umseqfiledb >& umseqfile.out
 +
perl ~/bin/CRL_Scripts1.0/repeatmodeler_parse.pl --fastafile consensi.fa.classified --unknowns repeatmodeler_unknowns.fasta --identities repeatmodeler_identities.fasta
 +
makeblastdb -in ~/bin/Tpases020812 -dbtype prot
 +
blastx -query repeatmodeler_unknowns.fasta -db ~/bin/Tpases020812 -evalue 1e-10 -num_descriptions 10 -out modelerunknown_blast_result.txt
 +
~/bin/CRL_Scripts1.0/transposon_blast_parse.pl --blastx modelerunknown_blast_result.txt --modelerunknown repeatmodeler_unknowns.fasta
 +
mv unknown_elements.txt ModelerUnknown.lib
 +
cat identified_elements.txt repeatmodeler_identities.fasta > ModelerID.lib
 +
 
 +
Exclusion of gene fragments
 +
makeblastdb -in ~/bin/alluniRefprexp070416 -dbtype prot
 +
blastx -query ModelerUnknown.lib -db ~/bin/alluniRefprexp070416 -evalue 1e-10 -num_descriptions 10 -out ModelerUnknown.lib_blast_result.txt
 +
cd LTR/
 +
python headerforamt.py allLTR.lib > allLTR.lib.reformed (LTR library has '(' symbol, resulting in ProtExcluder error, so change the format)
 +
cd ..
 +
mkdir ProtExclude
 +
cd ProtExclude/
 +
cp ../MITE/MITE.lib .
 +
cp ../LTR/allLTR.lib.reformed .
 +
cp ../ModelerID.lib .
 +
cp ../ModelerUnknown.lib .
 +
cat allLTR.lib.reformed MITE.lib ModelerID.lib > KnownRepeats.lib
 +
cat KnownRepeats.lib ModelerUnknown.lib > allRepeats.lib
 +
blastx -query allRepeats.lib -db ~/bin/alluniRefprexp070416 -evalue 1e-10 -num_descriptions 10 -out allRepeats.lib_blast_results.txt
 +
/data/skyts0401/program/ProtExcluder1.2/ProtExcluder.pl allRepeats.lib_blast_results.txt allRepeats.lib
 +
 
 +
== 5/26 ==
 +
=== Mungbean pacbio assembly ===
 +
For assessment of assembly, run CEGMA and BUSCO
 +
 
 +
 
 +
<big>'''Install'''</big>
 +
 
 +
CEGMA
 +
(63:/data/skyts0401/program/)
 +
sudo apt-get install wise (dependency)
 +
wget ftp://genome.crg.es/pub/software/geneid/geneid_v1.4.4.Jan_13_2011.tar.gz (dependency)
 +
tar -xvzf geneid_v1.4.4.Jan_13_2011.tar.gz
 +
cd geneid
 +
make
 +
make install
 +
nano ~/.profile (add $PATH:/data/skyts0401/program/geneid/bin)
 +
cd ..
 +
git clone https://github.com/KorfLab/CEGMA_v2.git
 +
cd CEGMA_v2/
 +
make
 +
 
 +
 
 +
BUSCO
 +
(63:/data/skyts0401/program/)
 +
wget http://bioinf.uni-greifswald.de/augustus/binaries/augustus-3.2.3.tar.gz (dependency)
 +
tar -xvzf augustus-3.2.3.tar.gz
 +
cd augustus-3.2.3/
 +
make (dependency error)
 +
sudo apt-get install bamtools libbamtools-dev
 +
make
 +
sudo make install
 +
cd ..
 +
git clone https://gitlab.com/ezlab/busco.git
 +
cd busco
 +
sudo python setup.py install
 +
cp config/config.ini.default config/config.ini
 +
nano config.ini (change the august path (path = /data/skyts0401/program/augustus-3.2.3/scripts/, bin/))
 +
 
 +
 
 +
<big>'''Running'''</big>
 +
 
 +
CEGMA
 +
(63:/data/skyts0401/Mungbean/cegma/)
 +
export CEGMA="/data/skyts0401/program/CEGMA_v2"
 +
export PERL5LIB="$PERL5LIB:$CEGMA/lib"
 +
export PERL5LIB=$CEGMA/lib:$PERL5LIB
 +
source ~/.profile
 +
/data/skyts0401/program/CEGMA_v2/bin/cegma --genome standard_output.gapfilled.final.fa -threads 5
 +
 
 +
 
 +
BUSCO
 +
(63:/data/skyts0401/Mungbean/busco/)
 +
wget http://busco.ezlab.org/datasets/eukaryota_odb9.tar.gz (dataset)
 +
wget http://busco.ezlab.org/datasets/embryophyta_odb9.tar.gz (dataset)
 +
ln -s ../assembly/standard_output.gapfilled.final.fa .
 +
export AUGUSTUS_CONFIG_PATH="/data/skyts0401/program/augustus-3.2.3/config/"
 +
python /data/skyts0401/program/busco/scripts/run_BUSCO.py -i standard_output.gapfilled.final.fa -o Mungbean_busco -c 20 -l eukaryota_odb9/ -m geno
 +
python /data/skyts0401/program/busco/scripts/run_BUSCO.py -i standard_output.gapfilled.final.fa -o Mungbean_plant_busco -c 20 -l embryophyta_odb9/ -m geno
 +
 
 +
== 5/29 ==
 +
=== Mungbean pacbio assembly ===
 +
Maker
 +
 
 +
 
 +
<big>'''Install'''</big>
 +
 
 +
ncbi-blast+
 +
(63:/data/skyts0401/program/)
 +
wget ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.6.0/ncbi-blast-2.6.0+-x64-linux.tar.gz
 +
tar -xvzf ncbi-blast-2.6.0+-x64-linux.tar.gz
 +
 
 +
 
 +
exonerate
 +
(63:/data/skyts0401/program/)
 +
git clone https://github.com/nathanweeks/exonerate.git
 +
cd exonerate/
 +
git checkout v2.4.0
 +
autoreconf -i
 +
./configure
 +
make
 +
sudo make install
 +
 
 +
 
 +
Maker
 +
(63:/data/skyts0401/program/)
 +
download from http://www.yandell-lab.org/software/maker.html
 +
cd maker/
 +
cd src/
 +
nano ~/.profile (add $PATH=RepeatMasker)
 +
source ~/.profile
 +
perl Build.PL
 +
./Build install
 +
 
 +
 
 +
<big>'''Running'''</big>
 +
 
 +
Preparation
 +
(63:/data/skyts0401/Mungbean/maker/)
 +
(Add PATH(/data/skyts0401/program/maker/bin) to ~/.profile)
 +
ln -s ../assembly/Vradi.pacbio.gapfilled.final.fa .
 +
mkdir ../transcriptome
 +
cd ../transcriptome/
 +
scp skyts0401@147.46.250.244:/data/KangYJ/Mungbean/Transcriptome/merge/mungbean_merge.fa.cdhit.fa .
 +
cd ../maker/
 +
ln -s ../transcriptome/mungbean_merge.fa.cdhit.fa .
 +
mkdir ref
 +
cd ref/
 +
(download Fvesca annotation file from phytozome)
 +
unzip Fvesca_download.zip
 +
cd Fvesca/v1.1/annotation/
 +
gunzip Fvesca_226_v1.1.protein.fa.gz
 +
gunzip Fvesca_226_v1.1.transcript.fa.gz
 +
cd ../../..
 +
cp Fvesca/v1.1/annotation/Fvesca_226_v1.1.protein.fa .
 +
cp Fvesca/v1.1/annotation/Fvesca_226_v1.1.transcript .
 +
scp skyts0401@147.46.250.244:/alima9002/ref/forJat/Athaliana_167_TAIR10*.fa
 +
scp skyts0401@147.46.250.244:/alima9002/ref/forJat/Athaliana_167_TAIR10*.fa .
 +
scp skyts0401@147.46.250.244:/alima9002/ref/forJat/Gmax*.fa .
 +
scp skyts0401@147.46.250.244:/alima9002/ref/forJat/Ptrichocarpa*.fa .
 +
scp skyts0401@147.46.250.244:/alima9002/ref/forJat/Vvinifera*.fa .
 +
scp skyts0401@147.46.250.244:/alima9002/ref/forJat/Osativa*.fa .
 +
cd ..
 +
ln -s ../repeatmask/ProtExclude/allRepeats.libnoProtFinal
 +
mkdir tmp
 +
 
 +
 
 +
Running
 +
(63:/data/skyts0401/Mungbean/maker/)
 +
maker -CTL
 +
nano maker_bopts.ctl (default, check blast_type=ncbi+)
 +
nano maker_exe.ctl (change the path ncbi-blast+, RepeatMasker, exonerate, augustus)
 +
nano maker_opts.ctl (change the path genome, evidence(transcriptome, protein), repeat library, temporary directory)
 +
mpiexec -n 30 maker -fix_nucleotides maker_opts.ctl maker_bopts.ctl maker_exe.ctl >& maker_opts.ctl.log
 +
 
 +
== 6/26 ==
 +
=== Mungbean pacbio assembly ===
 +
checking synteny block for chromosome split, combine
 +
 
 +
 
 +
blast
 +
(NICEM:~/data/Mungbean/blast)
 +
makeblastdb -in Vradi.ver6.cor.pep.fa -dbtype 'prot'
 +
blastall -i adzuki.ver3.pep.fa.tr.cor.fa -d Vradi.ver6.cor.pep.fa -p blastp -e 1e-10 -b 5 -v 5 -m 8 -o mcscanx/old_Va.blast
 +
# same procedure for other organism protein
 +
 
 +
 +
MCSanX
 +
(NICEM:~/data/Mungbean/blast/mcscanx)
 +
python gffcombine.py Vradi_ver6.gff.sorted.by.TY.gff adzuki.ver3.gene.gff.cor.gff > old_Va.gff
 +
~/data/program/MCScanX/MCScanX old_Va
 +
# same procedure for other organism protein, just change the species name in gffcombine.py and command
 +
 
 +
 
 +
Circos
 +
(193:/data2/skyts0401/Mungbean/synteny/circos/)
 +
/data2/skyts0401/program/circos-0.69-4/bin/circos -conf synteny_Va_gene.conf -outputfile synteny_Va_gene.png
 +
# same procedure for other organism, change configuration file

Latest revision as of 06:28, 4 July 2017

Contents

1 / 9

Minyoung_UV_QTL

parsing genotype data(Joinmap) and phenotype data to ICImapping format(bip format)
using lgcombine.py(63:/data/skyts0401/Mungbean/MY_UV/)
find linkage group 2 map is wrong, construct map newly


Mungbean synchronous QTL

make loc file(244:/home/skyts0401/reseq/chr/Mungbean_chr_coseq_parse_seg_dist.loc)
missing > 10%, hetero > 10, depth < 3 marker is filtered
while grouping them, find vr03, vr04 is combined in a group and vr05 is splited 2 groups, check it.


Mungbean pacbio assembly

moving SAM data(align reseq data on pacbio-scaffold) from NICEM server to 244 server (244:/kev8305/SK3/)

1 / 10

Minyoung_UV_QTL

QTL analysis by using IciMapping


Mungbean synchronous QTL

construct genetic map (JoinMap 4.1), just using chr 3, 4 combined and chr 5 splited linkage group.

ML method, Haldane algorithm


Mungbean pacbio assembly

convert SAM format to BAM format (244:/kev8305/SK3/)

./convertbam.sh

1/ 11

Mungbean synchronous QTL

QTL analysis by using RQTL(desktop:/Users/sky/desktop/Mungbean_syn_RQTL.csv)
just for checking locus

1/16

Mungbean pacbio assembly

coping sorted.bam file from 244 server to 63 server

variant calling (244:/kev8305/SK3/, 63:/data/skyts0401/Mungbean/mapping/resequencing/)

samtools mpileup -f falcon_500_sspace.final.scaffolds.fasta -v -t DP,AD,ADF,ADR,SP,INFO/AD,INFO/ADF,INFO/ADR -b bam_list | bcftools call -v -m -O v > variants.vcf
samtools mpileup -f falcon_500_sspace.final.scaffolds.fasta -I -v -t DP,AD,ADF,ADR,SP,INFO/AD,INFO/ADF,INFO/ADR -b bam_list | bcftools call -v -m -O v > variants_snp.vcf

1/18

Mungbean pacbio assembly

variant calling Kyoungki Jarae #5 with pacbio falcon scaffold (63:/data/skyts0401/Mungbean/mapping/resequencing/)

bwa index falcon_500_sspace.final.scaffolds.fasta
bwa mem -t 10 falcon_500_sspace.final.scaffolds.fasta KJ-C_1.fastq.gz KJ-C_2.fastq.gz > KJ-pe_falcon_scaffold.sam

1/19

Mungbean pacbio assembly

variant calling Kyoungki Jarae #5 with pacbio falcon scaffold (63:/data/skyts0401/Mungbean/mapping/resequencing/)

samtools view -Sb KJ-pe_falcon_scaffold.sam > KJ-pe_falcon_scaffold.bam
samtools sort KJ-pe_falcon_scaffold.bam -o KJ-pe_falcon_scaffold.sorted.bam
samtools index KJ-pe_falcon_scaffold.sorted.bam
samtools mpileup -f falcon_500_sspace.final.scaffolds.fasta -I -v -t DP,AD,ADF,ADR,SP,INFO/AD,INFO/ADF,INFO/ADR -u KJ-pe_falcon_scaffold.sorted.bam | bcftools call -v -m -O v > KJ_falcon_scaffold_variants_snp.vcf

1/24

Jatropha assembly

make svg file for superscaffold - linkage group marker location (63:/home/skyts0401/svg/)

python make_chr_lg_svg.py standard_output.final.scaffolds.fasta.tr.JM_out.fa standard_output.final.scaffolds.fasta LG.total.txt.reformed standard_output.final.scaffolds.fasta.tr.JM_out.fa.log > chr_lg.svg

1/31

Mungbean Chloroplast assembly

pairing Illumina PE read (63:/home/skyts0401/)

sudo python PE-pairing.py /data/jungminh/mungbean/PE/SunhwaN_1_cont.fq /data/jungminh/mungbean/PE/SunhwaN_2_cont.fq

2/2

Mungbean Chloroplast assembly

(63:/data/skyts0401/Mungbean/chloroplast/)

gmap_build -D gmap_db -d v.radiata v.radiata.fasta
gmap --nosplicing -D gmap_db -n 1 -d v.radiata -f samse scaf_cp_20k.fasta -t 12 | samtools view -Sb > Vr-cp_scaf-cp-20k.bam
samtools sort Vr-cp_scaf-cp-20k.bam -o Vr-cp_scaf-cp-20k.sorted.bam
samtools index Vr-cp_scaf-cp-20k.sorted.bam

2/3 ~ 2/6

Mungbean Chloroplast assembly

falcon - path : 63:/home/skyts0401/Falcon_RE/rere/

before run, copy fc_env folder (63:/data/skyts0401/Falcon/)

cp -r ~/FALCON_RE/rere/FALCON-integrate/fc_env YOUR_FOLDER

and configure file is on /home/skyts0401/fc_run.cfg


align canu contig_cp file to canu contig_cp assembly (63:/data/skyts0401/Mungbean/chloroplast/)

~/bowtie2-2.2.9/bowtie2-build Vr_cp_canu.contigs.for.mapping.fasta Vr_cp_canu.contigs.for.mapping.fasta
~/bowtie2-2.2.9/bowtie2 -x Vr_cp_canu.contigs.for.mapping.fasta -f canu_ctg_cp.fasta --end-to-end --very-fast -p 20 -S cp-assembly_canu_ctg_revised.sam
samtools view -Sb cp-assembly_canu-ctg.sam > cp-assembly_canu-ctg.bam
samtools sort cp-assembly_canu-ctg.bam -o cp-assembly_canu-ctg.sorted.bam
samtools index cp-assembly_canu-ctg.sorted.bam
samtools faidx canu_ctg_cp.fasta
....
~/bowtie2-2.2.9/bowtie2 -x Vr_cp_canu.contigs.for.mapping.fasta -f pb.cp.fasta --end-to-end --very-fast -p 20 -S cp-assembly_pb-cp.sam
....
~/bowtie2-2.2.9/bowtie2 -x Vr_cp_canu.contigs.for.mapping.fasta -1 SunhwaN_1_cont.fq.pairing.fq -2 SunhwaN_2_cont.fq.pairing.fq --end-to-end --very-fast -p 20 -S cp-assembly_PE-cp.sam

2/6

Mungbean Chloroplast assembly

assembly (canu) mungbean pacbio corrected read for chloroplast, parameter changed (63:/data/skyts0401/Mungbean/chloroplast/)

~/canu/Linux-amd64/bin/canu -assemble -p cp_read5 -d assembly/cp_read5 genomeSize=154k contigFilter="5 1000 0.75 0.75 2" -pacbio-corrected pb.cp.fasta
~/canu/Linux-amd64/bin/canu -assemble -p cp_read10 -d assembly/cp_read10 genomeSize=154k contigFilter="10 1000 0.75 0.75 2" -pacbio-corrected pb.cp.fasta

and we have 2 contigs (one contig have LSC+IR, and other contig have SSC+IR)

just assembly them(cp_1.fa, cp_2.fa, cp_3.fa)

2/9

Mungbean Chloroplast assembly

quiver(GenomicConsensus) install(63:/data/kev8305/skyts0401/program)

--- boost (ConsensusCore dependency) ---
wget https://sourceforge.net/projects/boost/files/boost/1.63.0/boost_1_63_0.tar.gz
tar -xf boost1_63_0.tar.gz
cd boost_1_63_0/
./bootstrap.sh
sudo apt-get install python-dev (solution for error-pyconfig.h)
sudo ./b2 install
--- swig (ConsensusCore dependency) ---
wget https://downloads.sourceforge.net/project/swig/swig/swig-3.0.12/swig-3.0.12.tar.g
tar -xf swig-3.0.12.tar.gz 
cd swig-3.0.12/
./configure 
make
sudo make install
--- ConsensusCore (GenomicConsensus dependency) ---
git clone https://github.com/PacificBiosciences/ConsensusCore.git
cd ConsensusCore/
sudo python setup.py install
--- GenomicConsensus ---
git clone https://github.com/PacificBiosciences/GenomicConsensus.git
sudo apt-get install libhdf5-serial-dev (solution for error-hdf5.h)
sudo make


Align PacBio_chloroplast read to vr.pb.cp.fasta(PacBio cp assembly) (63:/kev8305/Mungbean_assembly/chloroplast/)

bowtie2 -x vr.pb.cp.fasta -f pb.cp.fasta --end-to-end --very-fast -p 4 -S cp-assembly_pb-cp.sam
samtools view -Sb cp-assembly_pb-cp.sam > cp-assembly_pb-cp.bam

Align Illumina Paired-End read to vr.pb.cp.fasta(PacBio cp assembly) (63:/kev8305/Mungbean_assembly/chloroplast/)

bowtie2 -x vr.pb.cp.fasta -1 SunhwaN_1_cont.fq.pairing.fq -2 SunhwaN_2_cont.fq.pairing.fq --end-to-end --very-fast -p 4 -S cp-assembly_PE-cp.sam
samtools view -Sb cp-assembly_PE-cp.sam > cp-assembly_PE-cp.bam

Polishing by Quiver

2/10

Mungbean Chlroplast assembly

Quiver aligning Pacbio_chlroplast read to vr.pb.cp.fasta need to use pbalign, not bowtie or some other program.

pbalign install (63:/kev8305/skyts0401/program)

--- blasr (pbalign dependency) ---
https://github.com/PacificBiosciences/blasr/blob/master/doc/INSTALL_MAKE.md
--- pbcommand (quiver dependency) ---
git clone https://github.com/PacificBiosciences/pbcommand.git
cd pbcommand
sudo python setup.py install
--- pbalign ---
git clone https://github.com/PacificBiosciences/pbalign.git
cd pbalign/
sudo pip install .

pbalign (tried to align by using blasr algorithm , but sam or bam is no longer supported in blasr, so just use bowtie algorithm) (63:/kev8305/Mungbean_assembly/chloroplast/)

pbalign --noSplitSubreads --nproc 4 --algorithm bowtie pb.cp.fasta vr.pb.cp.fasta cp-assembly-pb-cp.for.quiver.sam

2/13

Mungbean Chloroplast assembly

Error occured while pbalign, so re-installed blasr(guess library error)

2/14

Mungbean Chloroplast assembly

variant calling with PE and PB read on chloroplast assembly genome (63:/kev8305/Mungbean_assembly/chloroplast/)

samtools mpileup -f vr.pb.cp.fasta -v -t DP,AD,ADF,ADR,SP,INFO/AD,INFO/ADF,INFO/ADR -u cp-assembly_pb-cp.sorted.bam | bcftools call -v -m -O v > vr.cp_pb_variants.vcf
samtools mpileup -f vr.pb.cp.fasta -v -t DP,AD,ADF,ADR,SP,INFO/AD,INFO/ADF,INFO/ADR -u cp-assembly_PE-cp.sorted.bam | bcftools call -v -m -O v > vr.cp_PE_variants.vcf

2/16

Mungbean Chloroplast assembly

Align PE reads to vr.pb.cp.fasta by using bwa (244:/kev8305/Mungbean_assembly/chloroplast/)

bwa index vr.pb.cp.fasta
bwa mem -t 4 vr.pb.cp.fasta SunhwaN_1_cont.fq.pairing.fq SunhwaN_2_cont.fq.pairing.fq > vr.pb.cp_PE.sam
samtools view -Sb vr.pb.cp_PE.sam > vr.pb.cp_PE.bam
samtools sort vr.pb.cp_PE.bam -o vr.pb.cp_PE.sorted.ba
samtools index vr.pb.cp_PE.sorted.bam
samtools mpileup -f vr.pb.cp.fasta -v -t DP,AD,ADF,ADR,SP,INFO/AD,INFO/ADF,INFO/ADR -u vr.pb.cp_PE.sorted.bam | bcftools call -v -m -O v > variants_PE_bwa.vcf
....
bwa mem -t 4 vr.pb.cp.fasta pb.cp.fasta > vr.pb.cp_PB.sam
samtools view -Sb vr.pb.cp_PB.sam > vr.pb.cp_PB.bam
samtools sort vr.pb.cp_PB.bam -o vr.pb.cp_PB.sorted.bam
samtools index vr.pb.cp_PB.sorted.bam
....
samtools mpileup -f vr.pb.cp.fasta -v -t DP,AD,ADF,ADR,SP,INFO/AD,INFO/ADF,INFO/ADR -u vr.pb.cp_PE.sorted.bam > variants_PE_bwa_all.vcf
python vcf_filtering.py variants_PE_bwa_all.vcf > variants_PE_bwa_all_0.15.vcf

2/21

Mungbean Chloroplast assembly

make a code that read fasta and annotation file(gff or gb) and make a fasta file with gene CDS sequence (63:/kev8305/Mungbean_assembly/chloroplast/)

python getCDS.py vr.pb.cp.fasta vr.pb.cp.gff > vr.pb.cp.gene.fasta
python getCDS.py v.radiata.fasta v.radiata.gb > v.radiata.gene.fasta

2/22

Mungbean pacbio assembly

snp calling done, snp filtering for genetic map construction (244:/kev8305/SK3/)

python ~/reseq/vcfparse_parent.py variants_snp.vcf KJ_falcon_scaffold_variants_snp.vcf
python ~/reseq/vcfparse.py variants_snp_compare_parents.vcf (dp >= 5, missing < 13, hetero < 10)
python ~/reseq/vcfparse_coseg.py variants_snp_compare_parents_filtered.vcf Mungbean_pacbio_scaffold_3.loc
python locparse.py Mungbean_pacbio_scaffold_3_seg_dist.loc > Mungbean_pacbio_scaffold_3_seg_dist_format.loc (scaffold name is too long, eliminate '|')

2/23

Mungbean pacbio assembly

too many snp for joinmap, so filtering missing < 12

python ~/reseq/vcfparse.py variants_snp_compare_parents.vcf
python ~/reseq/vcfparse_coseg.py variants_snp_compare_parents_filtered.vcf Mungbean_pacbio_scaffold_4.loc
python ~/reseq/cal_seg_dist.py Mungbean_pacbio_scaffold_4.loc 
9110
python locparse.py Mungbean_pacbio_scaffold_4_seg_dist.loc > Mungbean_pacbio_scaffold_4_seg_dist_format.loc

2/27

Mungbean pacbio assembly

Mugbean_pacbio_scaffold_7_seg_dist_foramt.loc : no hetero, missing < 18


ALLMAPS install (244:/kev8305/skyts0401/program)

easy_install biopython numpy deap networkx matplotlib jcvi
wget https://dl.dropboxusercontent.com/u/15937715/Data/ALLMAPS/ALLMAPS-install.sh
sh ALLMAPS-install.sh

and, add directory include ALLMAPS binnary code(concorde,faSize,liftOver) to $PATH in ~/.profile


ALLMAPS (244:/kev8305/SK3/anchoring)

python ~/reseq/allmaps_format.py Mungbean_pacbio_5_joinmap.result > Mungbean_pacbio_5_joinmap.for.allmaps
python ~/reseq/allmaps_format.py Mungbean_pacbio_7_joinmap.result > Mungbean_pacbio_7_joinmap.for.allmaps
python -m jcvi.assembly.allmaps merge Mungbean_pacbio_5_joinmap.for.allmaps Mungbean_pacbio_7_joinmap.for.allmaps -o JM-2.bed
python -m jcvi.assembly.allmaps path JM-2.bed falcon_500_sspace.final.scaffolds.fasta.header.fasta

3/2

Mungbean pacbio assembly

MUMmer install, for dot plot between pacbio and previous ref

wget https://downloads.sourceforge.net/project/mummer/mummer/3.23/MUMmer3.23.tar.gz
tar -xvf MUMmer3.23.tar.gz
cd MUMmer3.23
make check
make install
MUMmer3.23/mummer -mum -b -c Vradi.ver6.cor.fa.chr.fa JM-2.chr.fasta > ref_qry.mums

3/3

Mungbean pacbio assembly

lastz install (244:/kev8305/skyts0401/program/)

download from http://www.bx.psu.edu/~rsharris/lastz/
tar -xvzf lastz-1.02.00.tar.gz
cd lastz-distrib-1.02.00/src/
----------------------------------
problem with Makefile, so delete -Werror in line 31 of Makefile, save.
----------------------------------
make
make install

add path /home/skyts0401/lastz-distrib/bin in .profile


lastz (244:/kev8305/SK3/anchoring/)

lastz JM-2.chr.fasta[multiple] Vradi.ver6.cor.fa --notransition --step=20 --gfextend --chain --gapped --format=sam > old_new.sam

3/7

Mungbean pacbio assembly

MUMmer, having a problem with memory, was re-installed with a memory configuration

make clean
make CPPFLAGS="-O3 -DSIXTYFOURBITS"
make install


and use nucmer to align pacbio assembly and previous reference

MUMmer3.23/nucmer -maxmatch -c 100 -p ref_qry JM-2.chr.fasta Vradi.ver6.cor.fa
MUMmer3.23/nucmer --noextend -c 100 -p ref_qry_noextend JM-2.chr.fasta Vradi.ver6.cor.fa


and draw a dot plot using mummerplot

mummerplot --fat -l -png ref_qry_noextend.delta

but it occurs a error like

set mouse clipboardformat "[%.0f, %.0f]"
          ^
"out.gp", line 2594: wrong option

It seems gnuplot was updated, so doesn't support that option resulted from mummerplot. just edit out.gp to delete that line.

/kev8305/skyts0401/program/last-842/scripts/last-dotplot -2 'Vr*' -2 'scaffold_?' -x 1920 -y 1920 ref_qry.maf plot.png

3/16 ~

Mungbean pacbio assembly

compare between pacbio assembly and previous reference

1. 50 reseq marker/LG on previous reference mapping on pacbio super scaffold for checking same marker is on same chromosome. (244:/kev8305/SK3/anchoring/check)

python SNP_marker_pos.py Vradi_ver6.fa Mungbean_chr_coseg_parse_seg_dist.loc > Vradi.ver6.reseq.marker.fasta
makeblastdb -in JM-2.chr.fasta -dbtype 'nucl' -out Mungbean_pacbio
blastn -db Mungbean_pacbio -query Vradi.ver6.reseq.marker.fasta -outfmt 6 -out reseq_marker.blast -num_threads 2 -evalue 1e-5 -word_size 100
python blastparse.py reseq_marker.blast > reseq_marker_for_svg.result
python chr_compare_svg.py fasta.size reseq_marker_for_svg.result > chr_compare_3.svg (output can be changed based on option in python code)
/data2/skyts0401/program/circos-0.69-4/bin/circos -conf chr_compare.conf (193:/data2/skyts0401/check/circos)

2. contig compare. (63:/data/skyts0401/Mungbean/assembly/)

scp assembly@147.46.250.181:/home/assembly/data/Mungbean/mapping/p_ctg.longest.fa .
scp skyts0401@147.46.250.244:/kev8305/SK3/anchoring/final.contigs.longest100.fa .
gmap_build -d pacbio_contig_new p_ctg.longest.fa -D ./
gmap -d pacbio_contig_new -D pacbio_contig_new/ final.contigs.longest100.fa -t 12 -f 1 > pacbio_contig_compare.psl
--------------------------------------------------------------------------------------
(NICEM:/home/assembly/check/)
../bwa-0.7.15/bwa mem -t 30 p_ctg.longest.longest3.fa SunhwaN_1.fastq.gz SunhwaN_2.fastq.gz > newcontig_illumina.sam
 
(244:/kev8305/SK3/anchoring/check/)
ln -s /NGS/NGS/VignaRadiata/DNA/Sunhwa_pacbio/filtered_subreads.fasta .
bwa index p_ctg.longest.longest1.fa
bwa mem -t 8 p_ctg.longest.longest1.fa filtered_subreads.fasta > newcontig_pacbio.sam
 
samtools view -Sb newcontig_pacbio.sam > newcontig_pacbio.bam
samtools sort newcontig_pacbio.bam -o newcontig_pacbio.sorted.bam
samtools index newcontig_pacbio.sorted.bam
 ~ same samtools command with newcontig_illumina.sam ~

Find that something looked splited mapping, so re-align with end-to-end method of bowtie2

(NICEM:~/check/)
~/bowtie2-2.2.9/bowtie2-build p_ctg.longest.longest1.fa p_ctg.longest.longest1.fa
~/bowtie2-2.2.9/bowtie2 -x p_ctg.longest.longest1.fa -1 SunhwaN_1.fastq.gz -2 SunhwaN_2.fastq.gz --end-to-end --very-fast -p 30 -S newcontig_illumina_endtoend.sam
~/bowtie2-2.2.9/bowtie2 -x p_ctg.longest.longest1.fa -f filtered_subreads.fasta --end-to-end --very-fast -p 30 -S newcontig_pacbio_endtoend.sam

!!!bowtie2-2.3.0 version has a bug!!!
(244:/kev8305/SK3/anchoring/check/)
scp assembly@147.46.250.181:/home/assembly/check/newcontig_pacbio_endtoend.sam .
scp assembly@147.46.250.181:/home/assembly/check/newcontig_illumina_endtoend.sam .
~ same samtools command, view, sort, index ~
samtools depth -a -q 0 -Q 0 -r 000000F:2000000-4000000 newcontig_illumina_endtoend.sorted.bam > newcontig_illumina_endtoend.mapping.depth
samtools depth -a -q 0 -Q 0 -r 000000F:2000000-4000000 newcontig_pacbio_endtoend.sorted.bam > newcontig_pacbio_endtoend.mapping.depth

blat for comparing contig (NICEM:/home/assembly/check/, 244:/kev8305/SK3/anchoring/check/)

------------------------------
(contig_compare.sh)
#!/bin/bash

for i in {0..19}; do
        ../blat p_ctg.longest.longest3.fa final.contigs_devide${i}.fa contig_compare_all_${i}.psl &
done

wait
------------------------------
(NICEM)
python fasta_devide.py final.contigs.reformed.fasta 
chmod a+x contig_compare.sh 
./contig_compare.sh 
ls contig_compare_all_*.psl > psl.list
nano pslfilter.py 
python pslfilter.py psl.list > conitg_compare_all.result
python pslfilter2.py contig_compare_all.result > contig_compare_all_filtered.result

4/18

Jatropha assembly

make Jatropha figure(chr - lg) for new version(allmaps) (244:/kev8305/skyts0401/Jatropha)

scp skyts0401@147.46.250.63:/home/skyts0401/svg/make_chr_lg_svg.py make_chr_lg_svg_revised_for_allmaps.py
python make_chr_lg_svg_revised_for_allmaps.py Jatropha_map1.result Jatropha.allmaps.agp > Jatropha_chr_lg.svg

4/26

Mungbean pacbio assembly

mungbean super scaffold (JM-2.fasta) was gap filled. Final assembly Fasta is in /kev8305/SK3/anchoring/gapfilled_assembly_final/

5/1

Mungbean pacbio assembly

Repeat masking progress is based on these sites: http://weatherby.genetics.utah.edu/MAKER/wiki/index.php/Repeat_Library_Construction-Advanced, http://www.repeatmasker.org/


Repeat masking program installation


Repbase - for RepeatMasker

(63:/data/skyts0401/program/)
should register http://www.girinst.org/
download RepBaseRepeatMaskerEdition
cp RepBaseRepeatMaskerEdition-20170127\ \(1\).tar.gz RepeatMasker/.
cd RepeatMasker/
tar -xvzf RepBaseRepeatMaskerEdition-20170127\ \(1\).tar.gz
(Libraries/ diretory will be created and all file will be copied to RepeatMasker/Libraries/)

rmblast - for RepeatMasker (ver 2.6.0 has problem with install, so I installed v. 2.2.28)

(63:/data/skyts0401/program/)
download from http://www.repeatmasker.org/RMBlast.html
wget ftp://ftp.ncbi.nlm.nih.gov/blast/executables/rmblast/2.2.28/ncbi-rmblastn-2.2.28-x64-linux.tar.gz
wget ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.28/ncbi-blast-2.2.28+-x64-linux.tar.gz
tar zxvf ncbi-blast-2.2.28+-x64-linux.tar.gz 
tar zxvf ncbi-rmblastn-2.2.28-x64-linux.tar.gz 
cp -R ncbi-rmblastn-2.2.28/* ncbi-blast-2.2.28+/
rm -rf ncbi-rmblastn-2.2.28
mv ncbi-blast-2.2.28+ rmblast-2.2.28

trf - for RepeatMasker

(63:/data/skyts0401/program/)
download from http://tandem.bu.edu/trf/trf.html
chmod a+x trf409.linux64
ln -s trf409.linux64 RepeatMasker/trf

RepeatMasker

(63:/data/skyts0401/program/)
wget http://www.repeatmasker.org/RepeatMasker-open-4-0-7.tar.gz
tar -xzf RepeatMasker-open-4-0-7.tar.gz
cd RepeatMasker/
(move the Repbase library to RepeatMasker/Libraries/)
perl ./configure
configure directory of trf, rmblast

muscle - for MITE-Hunter

(63:/data/skyts0401/program/)
check version on http://www.drive5.com/muscle/
wget http://www.drive5.com/muscle/downloads3.8.31/muscle3.8.31_i86linux64.tar.gz
tar -xvzf muscle3.8.31_i86linux64.tar.gz
mkdir muscle
muscle3.8.31_i86linux64 muscle/

mdust - for MITE-Hunter

(63:/data/skyts0401/program/)
wget ftp://occams.dfci.harvard.edu/pub/bio/tgi/software//seqclean/mdust.tar.gz
tar -xvzf mdust.tar.gz

MITE-Hunter

(63:/data/skyts0401/program/)
check version on http://target.iplantcollaborative.org/mite_hunter.html
wget http://target.iplantcollaborative.org/mite_hunter/MITE%20Hunter-11-2011.zip
unzip MITE\ Hunter-11-2011.zip
mv MITE\ Hunter/ MITE_Hunter
cd MITE_Hunter/
perl MITE_Hunter_Installer.pl -d /data/skyts0401/program/MITE\ Hunter -f formatdb -b blastall -m /data/skyts0401/program/mdsut -M /data/skyts0401/program/muscle

GenomeTools

(63:/data/skyts0401/program/)
check version on http://genometools.org/
wget http://genometools.org/pub/genometools-1.5.9.tar.gz
tar -xvzf genometools-1.5.9.tar.gz
cd genometools-1.5.9/
make
sudo make install
- if have a problem with dependency, please check this -
sudo apt-get install libcairo2-dev
sudo apt-get install libpango1.0-dev

Genome tRNA database

(63:/home/skyts0401/bin/)
check version on http://gtrnadb.ucsc.edu
wget http://gtrnadb2009.ucsc.edu/download/tRNAs/eukaryotic-tRNAs.fa.gz
gunzip eukaryotic-tRNAs.fa.gz

CRL scripts

(63:/home/skyts0401/bin/)
wget http://www.hrt.msu.edu/uploads/535/78637/CRL_Scripts1.0.tar.gz
tar -xvzf CRL_Scripts1.0.tar.gz

transposons protein database

(63:/home/skyts0401/bin/)
wget http://www.hrt.msu.edu/uploads/535/78637/Tpases020812DNA.gz
gunzip Tpases020812DNA.gz
wget http://www.hrt.msu.edu/uploads/535/78637/Tpases020812.gz
gunzip Tpases020812.gz

plant protein database

(63:/home/skyts0401/bin/)
wget http://www.hrt.msu.edu/uploads/535/78637/alluniRefprexp070416.gz
gunzip alluniRefprexp070416.gz

RECON - for RepeatModeler

(63:/data/skyts0401/program/)
wget http://www.repeatmasker.org/RepeatModeler/RECON-1.08.tar.gz
tar -xvzf RECON-1.08.tar.gz
cd RECON-1.08/src/
make
make install
cd ../scripts/
nano recon.pl (added /data/skyts0401/program/RECON-1.08/bin to PATH = "" (third line))

RepeatScout - for RepeatModeler

(63:/data/skyts0401/program/)
wget http://www.repeatmasker.org/RepeatScout-1.0.5.tar.gz
tar -xvzf RepeatScout-1.0.5.tar.gz
cd RepeatScout-1/
make
sudo make install

nseg - for RepeatModeler

(63:/data/skyts0401/program/)
mkdir nseg
cd nseg
wget ftp://ftp.ncbi.nih.gov/pub/seg/nseg/* .
make

RepeatModeler

(63:/data/skyts0401/program/)
wget http://www.repeatmasker.org/RepeatModeler/RepeatModeler-open-1.0.9.tar.gz
tar -xvzf RepeatModeler-open-1.0.9.tar.gz 
cd RepeatModeler-open-1.0.9/
perl ./configure
configure directory of RECON, RepeatScout, nseg, trf, rmblast

hmmer - for ProtExcluder

(63:/data/skyts0401/program/)
wget http://eddylab.org/software/hmmer3/3.1b2/hmmer-3.1b2-linux-intel-x86_64.tar.gz
tar -xvzf hmmer-3.1b2-linux-intel-x86_64.tar.gz 
cd hmmer-3.1b2-linux-intel-x86_64/
./configure 
make
sudo make install

ProtExcluder

wget http://www.hrt.msu.edu/uploads/535/78637/ProtExcluder1.2.tar.gz
tar -xvzf ProtExcluder1.2.tar.gz 
cd ProtExcluder1.2/
./Installer.pl -m /data/skyts0401/program/hmmer-3.1b2-linux-intel-x86_64/binaries/ -p /data/skyts0401/program/ProtExcluder1.2/


Repeat masking progress

Basic command which I used is based on http://weatherby.genetics.utah.edu/MAKER/wiki/index.php/Repeat_Library_Construction-Advanced


Move Mungbean genome assembly final version

scp skyts0401@147.46.250.244:/kev8305/SK3/anchoring/gapfilled_assembly_final/standard_output.gapfilled.final.fa .

MITE library

(63:/data/skyts0401/program/MITE_Hunter/)
perl MITE_Hunter_manager.pl -i /data/skyts0401/Mungbean/assembly/standard_output.gapfilled.final.fa -g Mungbean -c 10 -S 12345678
mv Mungbean* /data/skyts0401/Mungbean/repeatmask/MITE/.
cd /data/skyts0401/Mungbean/repeatmask/MITE/
cat Mungbean_Step8_*.fa > ../MITE.lib

LTR library

(63:/data/skyts0401/Mungbean/repeatmask/LTR/99/)
ln -s /data/skyts0401/Mungbean/assembly/standard_output.gapfilled.final.fa .
gt suffixerator -db standard_output.gapfilled.final.fa -indexname Mungbean_LTR -tis -suf -lcp -des -ssp -dna
gt ltrharvest -index Mungbean_LTR -out Mungbean.out99 -outinner Mungbean.outinner99 -gff3 Mungbean.gff99 -minlenltr 100 -maxlenltr 6000 0ministltr 1500 -maxdistltr 25000 -mintsd 5 -maxtsd 5 -motif tgca -similar 99 -vic 10 > Mungbean.result99
gt gff3 -sort Mungbean.gff99 > Mungbean.gff99.sort
gt ltrdigest -trnas ~/bin/eukaryotic-tRNAs.fa Mungbean.gff99.sort Mungbean_LTR > Mungbean.gff99.dgt
perl ~/bin/CRL_Scripts1.0/CRL_Step1.pl --gff Mungbean.gff99.dgt 
perl ~/bin/CRL_Scripts1.0/CRL_Step2.pl --step1 CRL_Step1_Passed_Elements.txt --repeatfile Mungbean.out99 --resultfile Mungbean.result99 --sequencefile standard_output.gapfilled.final.fa --removed_repeats CRL_Step2_Passed_Elements.fasta
mkdir fasta_files
mv Repeat_*.fasta fasta_files/\
mv Repeat_*.fasta fasta_files/
mv CRL_Step2_Passed_Elements.fasta fasta_files/
cd fasta_files/
perl ~/bin/CRL_Scripts1.0/CRL_Step3.pl --directory . --step2 CRL_Step2_Passed_Elements.fasta --pidentity 60 --seq_c 25
mv CRL_Step3_Passed_Elements.fasta ..
cd ..
perl ~/bin/CRL_Scripts1.0/ltr_library.pl --resultfile Mungbean.result99 --step3 CRL_Step3_Passed_Elements.fasta --sequencefile standard_output.gapfilled.final.fa 
cp lLTR_Only.lib ../lLTR_Only_99.lib
cat lLTR_Only.lib ../../MITE/MITE.lib > repeats_to_mask_LTR99.fasta
/data/skyts0401/program/RepeatMasker/RepeatMasker -lib repeats_to_mask_LTR99.fasta -nolow -dir . Mungbean.outinner99
perl ~/bin/CRL_Scripts1.0/cleanRM.pl Mungbean.outinner99.out Mungbean.outinner99.masked > Mungbean.outinner99.unmasked
perl ~/bin/CRL_Scripts1.0/rmshortinner.pl Mungbean.outinner99.unmasked 50 > Mungbean.outinner99.clean
makeblastdb -in ~/bin/Tpases020812DNA -dbtype prot
blastx -query Mungbean.outinner99.clean -db ~/bin/Tpases020812DNA -evalue 1e-10 -num_descriptions 10 -out Mungbean.outinner99.clean_blastx.out.txt
perl ~/bin/CRL_Scripts1.0/outinner_blastx_parse.pl --blastx Mungbean.outinner99.clean_blastx.out.txt --outinner Mungbean.outinner99
perl ~/bin/CRL_Scripts1.0/CRL_Step4.pl --step3 CRL_Step3_Passed_Elements.fasta --resultfile Mungbean.result99 --innerfile passed_outinner_sequence.fasta --sequencefile standard_output.gapfilled.final.fa 
makeblastdb -in lLTRs_Seq_For_BLAST.fasta -dbtype nucl
blastn -query lLTRs_Seq_For_BLAST.fasta -db lLTRs_Seq_For_BLAST.fasta -evalue 1e-10 -num_descriptions 1000 -out lLTRs_Seq_For_BLAST.fasta.out
makeblastdb -in Inner_Seq_For_BLAST.fasta -dbtype nucl
blastn -query Inner_Seq_For_BLAST.fasta -db Inner_Seq_For_BLAST.fasta -evalue 1e-10 -num_descriptions 1000 -out Inner_Seq_For_BLAST.fasta.out
perl ~/bin/CRL_Scripts1.0/CRL_Step5.pl --LTR_blast lLTRs_Seq_For_BLAST.fasta.out --inner_blast Inner_Seq_For_BLAST.fasta.out --step3 CRL_Step3_Passed_Elements.fasta --final LTR99.lib --pcoverage 90 --pidentity 80

relatively old LTR (Same command with above one, but for relatively old LTR)

(63:/data/skyts0401/Mungbean/repeatmask/LTR/85)
(to avoid confuse LTR_99 with this results, make directory 99 and 85 in LTR directory)
ln -s /data/skyts0401/Mungbean/assembly/standard_output.gapfilled.final.fa .
gt suffixerator -db standard_output.gapfilled.final.fa -indexname Mungbean_LTR -tis -suf -lcp -des -ssp -dna
gt ltrharvest -index Mungbean_LTR -out Mungbean.out85 -outinner Mungbean.outinner85 -gff3 Mungbean.gff85 -minlenltr 100 -maxlenltr 6000 -mindistltr 1500 -maxdistltr 25000 -mintsd 5 -maxtsd 5 -vic 10  > Mungbean.result85
cp ../99/CRL_Step1_Passed_Elements.txt .
perl ~/bin/CRL_Scripts1.0/CRL_Step2.pl --step1 CRL_Step1_Passed_Elements.txt --repeatfile Mungbean.out85 --resultfile Mungbean.result85 --sequencefile standard_output.gapfilled.final.fa --removed_repeats CRL_Step2_Passed_Elements.fasta
mkdir fasta_files
mv Repeat_*.fasta fasta_files/
mv CRL_Step2_Passed_Elements.fasta fasta_files/
cd fasta_files/
perl ~/bin/CRL_Scripts1.0/CRL_Step3.pl --directory . --step2 CRL_Step2_Passed_Elements.fasta --pidentity 60 --seq_c 25
mv CRL_Step3_Passed_Elements.fasta ..
cd ..
perl ~/bin/CRL_Scripts1.0/ltr_library.pl --resultfile Mungbean.result85 --step3 CRL_Step3_Passed_Elements.fasta --sequencefile standard_output.gapfilled.final.fa 
cp lLTR_Only.lib ../lLTR_Only_85.lib
cat lLTR_Only.lib ../../MITE/MITE.lib > repeats_to_mask_LTR85.fasta
/data/skyts0401/program/RepeatMasker/RepeatMasker -lib repeats_to_mask_LTR85.fasta -nolow -dir . Mungbean.outinner85
perl ~/bin/CRL_Scripts1.0/cleanRM.pl Mungbean.outinner85.out Mungbean.outinner85.masked > Mungbean.outinner85.unmasked
perl ~/bin/CRL_Scripts1.0/rmshortinner.pl Mungbean.outinner85.unmasked 50 > Mungbean.outinner85.clean
makeblastdb -in ~/bin/Tpases020812DNA -dbtype prot
blastx -query Mungbean.outinner85.clean -db ~/bin/Tpases020812DNA -evalue 1e-10 -num_descriptions 10 -out Mungbean.outinner85.clean_blastx.out.txt
perl ~/bin/CRL_Scripts1.0/outinner_blastx_parse.pl --blastx Mungbean.outinner85.clean_blastx.out.txt --outinner Mungbean.outinner85
perl ~/bin/CRL_Scripts1.0/CRL_Step4.pl --step3 CRL_Step3_Passed_Elements.fasta --resultfile Mungbean.result85 --innerfile passed_outinner_sequence.fasta --sequencefile standard_output.gapfilled.final.fa 
makeblastdb -in lLTRs_Seq_For_BLAST.fasta -dbtype nucl
blastn -query lLTRs_Seq_For_BLAST.fasta -db lLTRs_Seq_For_BLAST.fasta -evalue 1e-10 -num_descriptions 1000 -out lLTRs_Seq_For_BLAST.fasta.out
makeblastdb -in Inner_Seq_For_BLAST.fasta -dbtype nucl
blastn -query Inner_Seq_For_BLAST.fasta -db Inner_Seq_For_BLAST.fasta -evalue 1e-10 -num_descriptions 1000 -out Inner_Seq_For_BLAST.fasta.out
perl ~/bin/CRL_Scripts1.0/CRL_Step5.pl --LTR_blast lLTRs_Seq_For_BLAST.fasta.out --inner_blast Inner_Seq_For_BLAST.fasta.out --step3 CRL_Step3_Passed_Elements.fasta --final LTR85.lib --pcoverage 90 --pidentity 8
/data/skyts0401/program/RepeatMasker/RepeatMasker -lib ../99/LTR99.lib -dir . LTR85.lib
perl ~/bin/CRL_Scripts1.0/remove_masked_sequence.pl --masked_elements LTR85.lib.masked --outfile FinalLTR85.lib
cd ..
cat 99/LTR99.lib 85/FinalLTR85.lib > allLTR.lib

Collecting repetitive sequences

ln -s /data/skyts0401/Mungbean/assembly/standard_output.gapfilled.final.fa .
nano fasta_devide.py
python fasta_devide.py standard_output.gapfilled.final.fa
nano repeatmask_combine.sh
chmod a+x repeatmask_combine.sh 
./repeatmask_combine.sh 
cat standard_output.gapfilled.final_devide*.fa.masked > standard_output.gapfilled.final.fa.masked
perl ~/bin/CRL_Scripts1.0/rmaskedpart.pl standard_output.gapfilled.final.fa.masked 50 > umseqfile
/data/skyts0401/program/RepeatModeler-open-1.0.9/BuildDatabase -name umseqfildeb -engine ncbi umseqfile 
nohup /data/skyts0401/program/RepeatModeler-open-1.0.9/RepeatModeler -database umseqfiledb >& umseqfile.out
perl ~/bin/CRL_Scripts1.0/repeatmodeler_parse.pl --fastafile consensi.fa.classified --unknowns repeatmodeler_unknowns.fasta --identities repeatmodeler_identities.fasta
makeblastdb -in ~/bin/Tpases020812 -dbtype prot
blastx -query repeatmodeler_unknowns.fasta -db ~/bin/Tpases020812 -evalue 1e-10 -num_descriptions 10 -out modelerunknown_blast_result.txt
~/bin/CRL_Scripts1.0/transposon_blast_parse.pl --blastx modelerunknown_blast_result.txt --modelerunknown repeatmodeler_unknowns.fasta
mv unknown_elements.txt ModelerUnknown.lib
cat identified_elements.txt repeatmodeler_identities.fasta > ModelerID.lib

Exclusion of gene fragments

makeblastdb -in ~/bin/alluniRefprexp070416 -dbtype prot
blastx -query ModelerUnknown.lib -db ~/bin/alluniRefprexp070416 -evalue 1e-10 -num_descriptions 10 -out ModelerUnknown.lib_blast_result.txt
cd LTR/
python headerforamt.py allLTR.lib > allLTR.lib.reformed (LTR library has '(' symbol, resulting in ProtExcluder error, so change the format)
cd ..
mkdir ProtExclude
cd ProtExclude/
cp ../MITE/MITE.lib .
cp ../LTR/allLTR.lib.reformed .
cp ../ModelerID.lib .
cp ../ModelerUnknown.lib .
cat allLTR.lib.reformed MITE.lib ModelerID.lib > KnownRepeats.lib
cat KnownRepeats.lib ModelerUnknown.lib > allRepeats.lib
blastx -query allRepeats.lib -db ~/bin/alluniRefprexp070416 -evalue 1e-10 -num_descriptions 10 -out allRepeats.lib_blast_results.txt
/data/skyts0401/program/ProtExcluder1.2/ProtExcluder.pl allRepeats.lib_blast_results.txt allRepeats.lib

5/26

Mungbean pacbio assembly

For assessment of assembly, run CEGMA and BUSCO


Install

CEGMA

(63:/data/skyts0401/program/)
sudo apt-get install wise (dependency)
wget ftp://genome.crg.es/pub/software/geneid/geneid_v1.4.4.Jan_13_2011.tar.gz (dependency)
tar -xvzf geneid_v1.4.4.Jan_13_2011.tar.gz
cd geneid
make
make install
nano ~/.profile (add $PATH:/data/skyts0401/program/geneid/bin)
cd ..
git clone https://github.com/KorfLab/CEGMA_v2.git
cd CEGMA_v2/
make


BUSCO

(63:/data/skyts0401/program/)
wget http://bioinf.uni-greifswald.de/augustus/binaries/augustus-3.2.3.tar.gz (dependency)
tar -xvzf augustus-3.2.3.tar.gz 
cd augustus-3.2.3/
make (dependency error)
sudo apt-get install bamtools libbamtools-dev
make
sudo make install
cd ..
git clone https://gitlab.com/ezlab/busco.git
cd busco
sudo python setup.py install
cp config/config.ini.default config/config.ini
nano config.ini (change the august path (path = /data/skyts0401/program/augustus-3.2.3/scripts/, bin/))


Running

CEGMA

(63:/data/skyts0401/Mungbean/cegma/)
export CEGMA="/data/skyts0401/program/CEGMA_v2"
export PERL5LIB="$PERL5LIB:$CEGMA/lib"
export PERL5LIB=$CEGMA/lib:$PERL5LIB
source ~/.profile 
/data/skyts0401/program/CEGMA_v2/bin/cegma --genome standard_output.gapfilled.final.fa -threads 5


BUSCO

(63:/data/skyts0401/Mungbean/busco/)
wget http://busco.ezlab.org/datasets/eukaryota_odb9.tar.gz (dataset)
wget http://busco.ezlab.org/datasets/embryophyta_odb9.tar.gz (dataset)
ln -s ../assembly/standard_output.gapfilled.final.fa .
export AUGUSTUS_CONFIG_PATH="/data/skyts0401/program/augustus-3.2.3/config/"
python /data/skyts0401/program/busco/scripts/run_BUSCO.py -i standard_output.gapfilled.final.fa -o Mungbean_busco -c 20 -l eukaryota_odb9/ -m geno
python /data/skyts0401/program/busco/scripts/run_BUSCO.py -i standard_output.gapfilled.final.fa -o Mungbean_plant_busco -c 20 -l embryophyta_odb9/ -m geno

5/29

Mungbean pacbio assembly

Maker


Install

ncbi-blast+

(63:/data/skyts0401/program/)
wget ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.6.0/ncbi-blast-2.6.0+-x64-linux.tar.gz
tar -xvzf ncbi-blast-2.6.0+-x64-linux.tar.gz


exonerate

(63:/data/skyts0401/program/)
git clone https://github.com/nathanweeks/exonerate.git
cd exonerate/
git checkout v2.4.0
autoreconf -i
./configure
make
sudo make install


Maker

(63:/data/skyts0401/program/)
download from http://www.yandell-lab.org/software/maker.html
cd maker/
cd src/
nano ~/.profile (add $PATH=RepeatMasker)
source ~/.profile
perl Build.PL
./Build install


Running

Preparation

(63:/data/skyts0401/Mungbean/maker/)
(Add PATH(/data/skyts0401/program/maker/bin) to ~/.profile)
ln -s ../assembly/Vradi.pacbio.gapfilled.final.fa .
mkdir ../transcriptome
cd ../transcriptome/
scp skyts0401@147.46.250.244:/data/KangYJ/Mungbean/Transcriptome/merge/mungbean_merge.fa.cdhit.fa .
cd ../maker/
ln -s ../transcriptome/mungbean_merge.fa.cdhit.fa .
mkdir ref
cd ref/
(download Fvesca annotation file from phytozome)
unzip Fvesca_download.zip 
cd Fvesca/v1.1/annotation/
gunzip Fvesca_226_v1.1.protein.fa.gz 
gunzip Fvesca_226_v1.1.transcript.fa.gz 
cd ../../..
cp Fvesca/v1.1/annotation/Fvesca_226_v1.1.protein.fa .
cp Fvesca/v1.1/annotation/Fvesca_226_v1.1.transcript .
scp skyts0401@147.46.250.244:/alima9002/ref/forJat/Athaliana_167_TAIR10*.fa
scp skyts0401@147.46.250.244:/alima9002/ref/forJat/Athaliana_167_TAIR10*.fa .
scp skyts0401@147.46.250.244:/alima9002/ref/forJat/Gmax*.fa .
scp skyts0401@147.46.250.244:/alima9002/ref/forJat/Ptrichocarpa*.fa .
scp skyts0401@147.46.250.244:/alima9002/ref/forJat/Vvinifera*.fa .
scp skyts0401@147.46.250.244:/alima9002/ref/forJat/Osativa*.fa .
cd ..
ln -s ../repeatmask/ProtExclude/allRepeats.libnoProtFinal
mkdir tmp


Running

(63:/data/skyts0401/Mungbean/maker/)
maker -CTL
nano maker_bopts.ctl (default, check blast_type=ncbi+)
nano maker_exe.ctl (change the path ncbi-blast+, RepeatMasker, exonerate, augustus)
nano maker_opts.ctl (change the path genome, evidence(transcriptome, protein), repeat library, temporary directory)
mpiexec -n 30 maker -fix_nucleotides maker_opts.ctl maker_bopts.ctl maker_exe.ctl >& maker_opts.ctl.log

6/26

Mungbean pacbio assembly

checking synteny block for chromosome split, combine


blast

(NICEM:~/data/Mungbean/blast)
makeblastdb -in Vradi.ver6.cor.pep.fa -dbtype 'prot'
blastall -i adzuki.ver3.pep.fa.tr.cor.fa -d Vradi.ver6.cor.pep.fa -p blastp -e 1e-10 -b 5 -v 5 -m 8 -o mcscanx/old_Va.blast
# same procedure for other organism protein


MCSanX

(NICEM:~/data/Mungbean/blast/mcscanx)
python gffcombine.py Vradi_ver6.gff.sorted.by.TY.gff adzuki.ver3.gene.gff.cor.gff > old_Va.gff
~/data/program/MCScanX/MCScanX old_Va
# same procedure for other organism protein, just change the species name in gffcombine.py and command


Circos

(193:/data2/skyts0401/Mungbean/synteny/circos/)
/data2/skyts0401/program/circos-0.69-4/bin/circos -conf synteny_Va_gene.conf -outputfile synteny_Va_gene.png
# same procedure for other organism, change configuration file