Difference between revisions of "2017 Jungmin Lab note"
(→1.11) |
(→3.31) |
||
(48 intermediate revisions by one user not shown) | |||
Line 1: | Line 1: | ||
+ | =Jatropha= | ||
== 1.9 == | == 1.9 == | ||
− | + | ===Illumina PE trimming=== | |
− | + | ||
− | + | ||
nohup ./IlluQC.pl -pe /NGS/NGS/JatrophaCurcas/DNA/Jatropha_1_PE_200bp_1.fastq /NGS/NGS/JatrophaCurcas/DNA/Jatropha_1_PE_200bp_2.fastq 2 A -p 8 & | nohup ./IlluQC.pl -pe /NGS/NGS/JatrophaCurcas/DNA/Jatropha_1_PE_200bp_1.fastq /NGS/NGS/JatrophaCurcas/DNA/Jatropha_1_PE_200bp_2.fastq 2 A -p 8 & | ||
− | + | ===Gapfiller=== | |
− | + | ||
https://www.baseclear.com/base/download/39GapFiller_v1-10_linux-x86_64.tar.gz | https://www.baseclear.com/base/download/39GapFiller_v1-10_linux-x86_64.tar.gz | ||
Line 16: | Line 14: | ||
nohup perl ~/bin/GapFiller_v1-10_linux-x86_64/GapFiller.pl -l pe_lib.txt -s /home/jungminh/jatropha/sspace/standard_output/standard_output.final.scaffolds.fasta -T 8 & | nohup perl ~/bin/GapFiller_v1-10_linux-x86_64/GapFiller.pl -l pe_lib.txt -s /home/jungminh/jatropha/sspace/standard_output/standard_output.final.scaffolds.fasta -T 8 & | ||
+ | |||
+ | |||
+ | |||
== 1.10 == | == 1.10 == | ||
− | scaffold anchoring results | + | ===scaffold anchoring=== |
+ | results | ||
− | cat standard_output.final.scaffolds.fasta.tr.JM_out.fa standard_output.final.scaffolds.fasta.tr.JM_out.fa.unanchored.fa > | + | cat standard_output.final.scaffolds.fasta.tr.JM_out.fa standard_output.final.scaffolds.fasta.tr.JM_out.fa.unanchored.fa > |
/data/Jatropha.a2/JM_newscaf/superscaffold.scaffold.fasta | /data/Jatropha.a2/JM_newscaf/superscaffold.scaffold.fasta | ||
Line 27: | Line 29: | ||
nohup perl ~/bin/GapFiller_v1-10_linux-x86_64/GapFiller.pl -l pe_lib.txt -s superscaffold.scaffold.fasta -T 24 & | nohup perl ~/bin/GapFiller_v1-10_linux-x86_64/GapFiller.pl -l pe_lib.txt -s superscaffold.scaffold.fasta -T 24 & | ||
− | |||
== 1.11 == | == 1.11 == | ||
− | Gap filling on 63 didn't work. try again on 244 | + | ===Gap filling=== |
+ | on 63 didn't work. try again on 244 | ||
nohup perl ~/bin/GapFiller_v1-10_linux-x86_64/GapFiller.pl -l pe_lib.txt -s superscaffold.scaffold.fasta -T 8 & | nohup perl ~/bin/GapFiller_v1-10_linux-x86_64/GapFiller.pl -l pe_lib.txt -s superscaffold.scaffold.fasta -T 8 & | ||
+ | |||
+ | |||
+ | == 1.13 == | ||
+ | ===jatropha final assembly. === | ||
+ | contig scaffod anchoring gapfilling | ||
+ | /home/jungminh/jatropha/sspace/standard_output/gapfiller/superscaffold_gapfilled/standard_output/superscaffold.gapfilled.final.fa | ||
+ | |||
+ | |||
+ | ==3.28== | ||
+ | scaffolds (/home/jungminh/jatropha/final/standard_output.final.scaffolds.fasta) were anchored using ALLMAPs by 하늘 /kev8305/skyts0401/Jatropha/Jatropha.allmaps.fasta | ||
+ | |||
+ | Go gapfilling! | ||
+ | |||
+ | ===GAPfiller=== | ||
+ | @244 | ||
+ | |||
+ | /kev8305/skyts0401/Jatropha/gapfiller/sudo nohup perl ~/bin/GapFiller_v1-10_linux-x86_64/GapFiller.pl -l pe_lib.txt -s ../Jatropha.allmaps.fasta -T 10 & | ||
+ | |||
+ | |||
+ | ==3.31== | ||
+ | ===Gapfiller=== | ||
+ | stoped in the middle of the process. | ||
+ | |||
+ | mv the new superscaffold to /home/jungminh/jatropha/sspace/standard_output/gapfiller/superscaffold_allmaps_gapfilled | ||
+ | and run again | ||
+ | |||
+ | |||
+ | ==6.7== | ||
+ | ===RNA raw reads mapping=== | ||
+ | build index for Jcu_new_first.all.maker.transcripts.fasta | ||
+ | 244:/home/jungminh/jatropha/RNArawreadMapping/bowtie2-build -f Jcu_new_first.all.maker.transcripts.fasta Jcu_new_first.all.maker.transcripts | ||
+ | |||
+ | nohup bowtie2 -x ../Jcu_new_first.all.maker.transcripts -1 /data2/NGS/JatrophaCurcas/RNA/Stem_1.fastq.gz -2 /data2/NGS/JatrophaCurcas/RNA/Stem_2.fastq.gz --very-fast -S J.c.stem & | ||
+ | nohup bowtie2 -x ../Jcu_new_first.all.maker.transcripts -1 /data2/NGS/JatrophaCurcas/RNA/1_2_1.fastq.gz -2 /data2/NGS/JatrophaCurcas/RNA/1_2_2.fastq.gz --very-fast -S J.c.leaf.out & | ||
+ | nohup bowtie2 -x ../Jcu_new_first.all.maker.transcripts -1 /data2/NGS/JatrophaCurcas/RNA/male2_5_1.fastq.gz -2 /data2/NGS/JatrophaCurcas/RNA/male2_5_2.fastq.gz --very-fast -S J.c.mf2_5.out & | ||
+ | nohup bowtie2 -x ../Jcu_new_first.all.maker.transcripts -1 /data2/NGS/JatrophaCurcas/RNA/Root_1.fastq.gz -2 /data2/NGS/JatrophaCurcas/RNA/Root_2.fastq.gz --very-fast -S J.c.root.out & | ||
+ | |||
+ | =jatropha chloroplast= | ||
+ | paste - - - - < /data/JATROPHA/Jatropha/Jatropha.trimmedReads.fastq | cut -f 1,2 | sed 's/^@/>/' | tr "\t" "\n" > Jatropha.trimmedReads.fasta | ||
+ | awk 'BEGIN {n_seq=0;} /^>/ {if(n_seq%100000==0){file=sprintf("myseq%d.fa",n_seq);} print >> file; n_seq++; next;} { print >> file; }' < Jatropha.trimmedReads.fasta | ||
+ | ==deconseq filtering== | ||
+ | @63 | ||
+ | nohup ~/bin/deconseq-standalone-0.4.3/deconseq.pl -f /data/JATROPHA/Jatropha/Jatropha.consensus.fasta -dbs chloro -i 90 -c 90 & | ||
+ | output 1487640568 | ||
+ | |||
+ | nohup ~/bin/deconseq-standalone-0.4.3/deconseq.pl -f /data/jungminh/jatropha/contig_primary.fa -dbs chloro -i 90 -c 90 & | ||
+ | output 1487641000 | ||
+ | |||
+ | ==pb only assembly== | ||
+ | for contigFilter option, version is critical. | ||
+ | depth 10 | ||
+ | nohup /home/skyts0401/canu/Linux-amd64/bin/canu -assemble -d /data/jungminh/jatropha/cp/canu_pb_only/ -p pb_only genomeSize=151000 contigFilter="10 1000 0.75 0.75 2" -pacbio-corrected ./pb_cp.fasta & | ||
+ | depth 5 | ||
+ | nohup /home/skyts0401/canu/Linux-amd64/bin/canu -assemble -d /data/jungminh/jatropha/cp/canu_pb_only_5/ -p pb_only genomeSize=151000 contigFilter="5 1000 0.75 0.75 2" -pacbio-corrected ./pb_cp.fasta & | ||
+ | |||
+ | =L.indica= | ||
+ | == 1.16 == | ||
+ | ===L.indica SSR analysis from TY=== | ||
+ | /alima9002/LIndica/L.Trinity.fasta.cdhit.statistics | ||
+ | |||
+ | = vigna = | ||
+ | chloroplast total length 151271 bp | ||
+ | LSC 1..80898 | ||
+ | IRa 80899 107379 | ||
+ | SSC 107380..124791 | ||
+ | IRb 124792 151271 | ||
+ | |||
+ | |||
+ | == 1.19 == | ||
+ | ===cp database download=== | ||
+ | download chloroplast db from ncbi. | ||
+ | list.txt | ||
+ | NC_005086.1 | ||
+ | NC_000932.1 | ||
+ | NC_015139.1 | ||
+ | NC_027693.1 | ||
+ | NC_010323.1 | ||
+ | NC_032008.1 | ||
+ | NC_008334.1 | ||
+ | NC_015983.1 | ||
+ | NC_007942.1 | ||
+ | NC_014570.1 | ||
+ | NC_018766.1 | ||
+ | NC_002694.1 | ||
+ | NC_010433.1 | ||
+ | NC_003119.6 | ||
+ | NC_031333.1 | ||
+ | NC_009259.1 | ||
+ | NC_009143.1 | ||
+ | NC_014697.1 | ||
+ | NC_016736.1 | ||
+ | NC_007898.3 | ||
+ | NC_008096.2 | ||
+ | NC_014676.2 | ||
+ | NC_021091.1 | ||
+ | NC_013843.1 | ||
+ | NC_018051.1 | ||
+ | NC_007957.1 | ||
+ | NC_001666.2 | ||
+ | |||
+ | ===deconseq=== | ||
+ | ~/bin/deconseq-standalone-0.4.3/bwa64 index ~/db/cp/db.fasta -p db | ||
+ | |||
+ | nohup ./deconseq.pl -f ~/cp/Sunhwa.trimmedReads.fasta -dbs chloro -i 90 -c 90 & | ||
+ | |||
+ | DeconSeqConfig.pm | ||
+ | use constant DB_DIR => '/home/jungminh/db/cp/'; | ||
+ | use constant TMP_DIR => '/home/jungminh/db/cp/tmp/'; | ||
+ | use constant OUTPUT_DIR => '/home/jungminh/cp/trimmedPB/'; | ||
+ | |||
+ | use constant PROG_NAME => 'bwa64'; # should be either bwa64 or bwaMAC (based on your system architecture) | ||
+ | use constant PROG_DIR => './'; # should be the location of the PROG_NAME file (use './' if in the same location at the perl script) | ||
+ | |||
+ | use constant DBS => {chloro => {name => 'plant cp', #database name used for display and used as input for -dbs and -dbs_retain | ||
+ | |||
+ | db => 'db'}, #database name as defined with -p for "bwa index -p ..." (specify multiple database chunks separated with commas without space; e.g. hs_ref_s1,hs_ref_s2,hs_ref_s3) | ||
+ | # bact => {name => 'Bacterial genomes', | ||
+ | # db => 'bactDB'}, | ||
+ | # vir => {name => 'Viral genomes', | ||
+ | # db => 'virDB'} | ||
+ | }; | ||
+ | use constant DB_DEFAULT => 'chloro'; | ||
+ | |||
+ | |||
+ | |||
+ | Deconseq filtering | ||
+ | 63:/data/jungminh/mungbean | ||
+ | pb.cp.fasta error corrected PB by canu | ||
+ | canu_ctg_cp.fasta canu ctg assembled from error corrected PB | ||
+ | ctg_cp.fasta falcon ctg assembled from error corrected PB | ||
+ | scaf_cp.fasta sspace scaf assembled from falcon ctg | ||
+ | |||
+ | 63:/data/jungminh/mungbean/PE | ||
+ | SunhwaN_1_cont.fq.pairing.fq | ||
+ | SunhwaN_2_cont.fq.pairing.fq | ||
+ | |||
+ | ==1.23== | ||
+ | PB deconseq had error. | ||
+ | ERROR: system call "./bwa64 bwasw -A -f /home/jungminh/bin/deconseq-standalone-0.4.3/1484807078_chloro_db.tsv /home/jungminh/db/cp/db /data2/jungminh/cp/pb/Sunhwa.trimmedReads.fasta" failed: 9. | ||
+ | maybe storage problem??? | ||
+ | |||
+ | run again. | ||
+ | |||
+ | |||
+ | ==1.24== | ||
+ | ===bowtie2 mapping=== | ||
+ | at 244 | ||
+ | |||
+ | nohup bowtie2 -x ~/db/v.radiata -f scaf_cp.fasta --end-to-end --very-fast -p 4 -S scaf_cp.SAM & | ||
+ | |||
+ | 198 reads; of these | ||
+ | 198 (100.00%) were unpaired; of these: | ||
+ | 0 (0.00%) aligned 0 times | ||
+ | 140 (70.71%) aligned exactly 1 time | ||
+ | 58 (29.29%) aligned >1 times | ||
+ | 100.00% overall alignment rate | ||
+ | |||
+ | 546 reads; of these: | ||
+ | 546 (100.00%) were unpaired; of these: | ||
+ | 0 (0.00%) aligned 0 times | ||
+ | 391 (71.61%) aligned exactly 1 time | ||
+ | 155 (28.39%) aligned >1 times | ||
+ | 100.00% overall alignment rate | ||
+ | |||
+ | 6035 reads; of these: | ||
+ | 6035 (100.00%) were unpaired; of these: | ||
+ | 0 (0.00%) aligned 0 times | ||
+ | 3583 (59.37%) aligned exactly 1 time | ||
+ | 2452 (40.63%) aligned >1 times | ||
+ | 100.00% overall alignment rate | ||
+ | |||
+ | nohup bowtie2 -x ~/db/v.radiata -f canu_ctg_cp.fasta --end-to-end --very-fast -p 6 -S canu_ctg_cp.SAM & | ||
+ | |||
+ | nohup bowtie2 -x ~/db/v.radiata -f ctg_cp.fasta --end-to-end --very-fast -p 2 -S ctg_cp.SAM & | ||
+ | samtools view -bS ctg_cp.SAM > ctg_cp.BAM | ||
+ | samtools sort ctg_cp.BAM -o ctg_cp_sort | ||
+ | samtools index ctg_cp_sort ctg_cp_sort.index | ||
+ | |||
+ | ===deconseq PB=== | ||
+ | split errorcorrected reads | ||
+ | |||
+ | awk 'BEGIN {n_seq=0;} /^>/ {if(n_seq%1000==0){file=sprintf("myseq%d.fa",n_seq);} print >> file; n_seq++; next;} { print >> file; }' < Sunhwa.trimmedReads.fasta | ||
+ | |||
+ | nohup ./deconseq.pl -f /data2/jungminh/cp/pb/myseq1700000.fa -dbs chloro -i 90 -c 90 & | ||
+ | |||
+ | myseq0-1700000.fa | ||
+ | |||
+ | |||
+ | ==1.25== | ||
+ | ===deconseq PB=== | ||
+ | |||
+ | nohup ./deconseq.pl -f /data2/jungminh/cp/pb/myseq1700000.fa -dbs chloro -i 90 -c 90 & | ||
+ | |||
+ | myseq1800000-900000 | ||
+ | |||
+ | ===filter by length=== | ||
+ | |||
+ | python \[fasta\]filterbylen.py canu_ctg_cp.fasta 20000 > canu_ctg_cp_20k.fasta | ||
+ | |||
+ | |||
+ | ===pb read mapping against mungbean cp=== | ||
+ | nohup bowtie2 -x /home/jungminh/db/cp/v.radiata -f pb.cp.fasta --end-to-end --very-fast -p 6 -S pb_cp.sam & | ||
+ | |||
+ | 31462 reads; of these: | ||
+ | 31462 (100.00%) were unpaired; of these: | ||
+ | 1 (0.00%) aligned 0 times | ||
+ | 22594 (71.81%) aligned exactly 1 time | ||
+ | 8867 (28.18%) aligned >1 times | ||
+ | 100.00% overall alignment rate | ||
+ | |||
+ | ==1.26== | ||
+ | ===PB only assembly by CANU=== | ||
+ | /data/asseblyProgram/canu-1.0/Linux-amd64/bin/canu -assemble -d /data/jungminh/mungbean/assembly/ -p pb_only genomeSize=151271 -pacbio-corrected ../pb.cp.fasta | ||
+ | |||
+ | |||
+ | Sunhwa PE deconseq | ||
+ | 244:/hayasen/Workspace/Vigna/Chloro/split/ | ||
+ | pairing filter by IlluQC | ||
+ | IlluQC -pe A B 2 A | ||
+ | |||
+ | ===canu_ctg only assembly by CANU=== | ||
+ | /data/skyts0401/Mungbean/chloroplast/assembly/Vr_cp/canu_ctg/Vr_cp_canu.contigs.fasta | ||
+ | |||
+ | ==1.31== | ||
+ | ===PB PE ctg assembly=== | ||
+ | @63 | ||
+ | /data/jungminh/mungbean/assembly/spades/PE_PB | ||
+ | nohup ~/bin/SPAdes-3.10.0-Linux/bin/spades.py -1 /data/jungminh/mungbean/PE/SunhwaN_1_cont.fq.pairing.fq -2 /data/jungminh/mungbean/PE/SunhwaN_2_cont.fq.pairing.fq --pacbio /data/jungminh/mungbean/pb.cp.fasta --only-assembler -t 12 -o /data/jungminh/mungbean/assembly/spades/PE_PB & | ||
+ | |||
+ | /data/jungminh/mungbean/assembly/spades/PE_PB_ctg | ||
+ | nohup ~/bin/SPAdes-3.10.0-Linux/bin/spades.py -1 /data/jungminh/mungbean/PE/SunhwaN_1_cont.fq.pairing.fq -2 /data/jungminh/mungbean/PE/SunhwaN_2_cont.fq.pairing.fq --pacbio /data/jungminh/mungbean/pb.cp.fasta --trusted-contigs /data/jungminh/mungbean/ctg_cp_20k.fasta --only-assembler -t 6 -o /data/jungminh/mungbean/assembly/spades/PE_PB_ctg & | ||
+ | |||
+ | /data/jungminh/mungbean/assembly/spades/PE_PB_canu_ctg | ||
+ | nohup ~/bin/SPAdes-3.10.0-Linux/bin/spades.py -1 /data/jungminh/mungbean/PE/SunhwaN_1_cont.fq.pairing.fq -2 /data/jungminh/mungbean/PE/SunhwaN_2_cont.fq.pairing.fq --pacbio /data/jungminh/mungbean/pb.cp.fasta --trusted-contigs /data/jungminh/mungbean/canu_ctg_cp_20k.fasta --only-assembler -t 6 -o /data/jungminh/mungbean/assembly/spades/PE_PB_canu_ctg & | ||
+ | |||
+ | |||
+ | ==2.3== | ||
+ | ===PB canu_ctg assembly=== | ||
+ | @63 | ||
+ | nohup /data/asseblyProgram/canu-1.0/Linux-amd64/bin/canu -assemble -d /data/jungminh/mungbean/assembly/pb_plus_canu_ctg -p pb_plus_canu_ctg genomeSize=151271 -pacbio-corrected ../../pb_plus_canu_ctg_cp.fasta & | ||
+ | |||
+ | ==3.2== | ||
+ | ===sunwha PE mapping agst new v.radiata cp=== | ||
+ | |||
+ | bwa index V.radiata_cp_PB_new.fasta | ||
+ | |||
+ | bwa mem -t 8 V.radiata_cp_PB_new.fasta /hayasen/Workspace/Chloro/SunhwaN_1_cont.fq.pairing.fq /hayasen/Workspace/Chloro/SunhwaN_2_cont.fq.pairing.fq > pe_bwa_new_cp.sam | ||
+ | |||
+ | samtools view -bS pe_bwa_new_cp.sam > pe_bwa_new_cp.bam | ||
+ | |||
+ | samtools sort pe_bwa_new_cp.bam -o pe_bwa_new_cp.sort.bam | ||
+ | |||
+ | samtools index pe_bwa_new_cp.sort.bam pe_bwa_new_cp.sort.bam.bai | ||
+ | |||
+ | |||
+ | ===sunwha PB mapping agst new v.radiata cp=== | ||
+ | bwa mem -t 2 V.radiata_cp_PB_new.fasta /kev8305/Mungbean_assembly/chloroplast/pb.cp.fasta > pb_bwa_new_cp.sam | ||
+ | |||
+ | samtools view -bS pb_bwa_new_cp.sam > pb_bwa_new_cp.bam | ||
+ | |||
+ | samtools sort pb_bwa_new_cp.bam -o pb_bwa_new_cp.sort.bam | ||
+ | |||
+ | samtools index pb_bwa_new_cp.sort.bam pb_bwa_new_cp.sort.bam.bai | ||
+ | |||
+ | samtools faidx V.radiata_cp_PB_new.fasta | ||
+ | |||
+ | samtools tview pe_bwa_new_cp.sort.bam V.radiata_cp_PB_new.fasta | ||
+ | |||
+ | |||
+ | ==4.19== | ||
+ | ===gap filling super scaffold=== | ||
+ | super scaffold 244:/kev8305/SK3/anchoring/JM-2.fasta | ||
+ | PE 244:/NGS/NGS/VignaRadiata/DNA/SunhwaN_1.fastq.gz | ||
+ | /NGS/NGS/VignaRadiata/DNA/SunhwaN_2.fastq.gz | ||
+ | |||
+ | library file: /home/jungminh/mungbean/gapfilled_superscaffold/pe_lib.txt | ||
+ | lib1 bwa /NGS/NGS/VignaRadiata/DNA/SunhwaN_1.fastq.gz /NGS/NGS/VignaRadiata/DNA/SunhwaN_2.fastq.gz 200 0.2 FR | ||
+ | |||
+ | nohup perl ~/bin/GapFiller_v1-10_linux-x86_64/GapFiller.pl -l pe_lib.txt -s /kev8305/SK3/anchoring/JM-2.fasta -T 8 & | ||
+ | |||
+ | =vigna TF= | ||
+ | ==database== | ||
+ | 5.10.2017 | ||
+ | download At Gm Vr TF database from http://planttfdb.cbi.pku.edu.cn/index.php 193:/data2/jungminh/plantTFDB | ||
+ | |||
+ | 193:/data2/jungminh/mungbean/cds vigna species cds transferred from 244:/NGS/NGS/Vigna/RNA/transcriptome | ||
+ | |||
+ | blastn all cds against Vr tf database /data2/jungminh/plantTFDB/Vra_cds.fas | ||
+ | /data2/jungminh/mungbean/tf/*.out | ||
+ | blastn 1e-10 num_alignment 1 outfmt 6 |
Latest revision as of 08:39, 7 June 2017
Contents |
Jatropha
1.9
Illumina PE trimming
nohup ./IlluQC.pl -pe /NGS/NGS/JatrophaCurcas/DNA/Jatropha_1_PE_200bp_1.fastq /NGS/NGS/JatrophaCurcas/DNA/Jatropha_1_PE_200bp_2.fastq 2 A -p 8 &
Gapfiller
https://www.baseclear.com/base/download/39GapFiller_v1-10_linux-x86_64.tar.gz
test with untrimmed PE to Jatropha scaf
library pe_lib.txt lib200 bwa /NGS/NGS/JatrophaCurcas/DNA/Jatropha_1_PE_200bp_1.fastq /NGS/NGS/JatrophaCurcas/DNA/Jatropha_1_PE_200bp_2.fastq 200 0.2 FR
nohup perl ~/bin/GapFiller_v1-10_linux-x86_64/GapFiller.pl -l pe_lib.txt -s /home/jungminh/jatropha/sspace/standard_output/standard_output.final.scaffolds.fasta -T 8 &
1.10
scaffold anchoring
results
cat standard_output.final.scaffolds.fasta.tr.JM_out.fa standard_output.final.scaffolds.fasta.tr.JM_out.fa.unanchored.fa > /data/Jatropha.a2/JM_newscaf/superscaffold.scaffold.fasta
LG anchored super scaffold Gap filling on 63
nohup perl ~/bin/GapFiller_v1-10_linux-x86_64/GapFiller.pl -l pe_lib.txt -s superscaffold.scaffold.fasta -T 24 &
1.11
Gap filling
on 63 didn't work. try again on 244
nohup perl ~/bin/GapFiller_v1-10_linux-x86_64/GapFiller.pl -l pe_lib.txt -s superscaffold.scaffold.fasta -T 8 &
1.13
jatropha final assembly.
contig scaffod anchoring gapfilling /home/jungminh/jatropha/sspace/standard_output/gapfiller/superscaffold_gapfilled/standard_output/superscaffold.gapfilled.final.fa
3.28
scaffolds (/home/jungminh/jatropha/final/standard_output.final.scaffolds.fasta) were anchored using ALLMAPs by 하늘 /kev8305/skyts0401/Jatropha/Jatropha.allmaps.fasta
Go gapfilling!
GAPfiller
@244
/kev8305/skyts0401/Jatropha/gapfiller/sudo nohup perl ~/bin/GapFiller_v1-10_linux-x86_64/GapFiller.pl -l pe_lib.txt -s ../Jatropha.allmaps.fasta -T 10 &
3.31
Gapfiller
stoped in the middle of the process.
mv the new superscaffold to /home/jungminh/jatropha/sspace/standard_output/gapfiller/superscaffold_allmaps_gapfilled and run again
6.7
RNA raw reads mapping
build index for Jcu_new_first.all.maker.transcripts.fasta 244:/home/jungminh/jatropha/RNArawreadMapping/bowtie2-build -f Jcu_new_first.all.maker.transcripts.fasta Jcu_new_first.all.maker.transcripts
nohup bowtie2 -x ../Jcu_new_first.all.maker.transcripts -1 /data2/NGS/JatrophaCurcas/RNA/Stem_1.fastq.gz -2 /data2/NGS/JatrophaCurcas/RNA/Stem_2.fastq.gz --very-fast -S J.c.stem & nohup bowtie2 -x ../Jcu_new_first.all.maker.transcripts -1 /data2/NGS/JatrophaCurcas/RNA/1_2_1.fastq.gz -2 /data2/NGS/JatrophaCurcas/RNA/1_2_2.fastq.gz --very-fast -S J.c.leaf.out & nohup bowtie2 -x ../Jcu_new_first.all.maker.transcripts -1 /data2/NGS/JatrophaCurcas/RNA/male2_5_1.fastq.gz -2 /data2/NGS/JatrophaCurcas/RNA/male2_5_2.fastq.gz --very-fast -S J.c.mf2_5.out & nohup bowtie2 -x ../Jcu_new_first.all.maker.transcripts -1 /data2/NGS/JatrophaCurcas/RNA/Root_1.fastq.gz -2 /data2/NGS/JatrophaCurcas/RNA/Root_2.fastq.gz --very-fast -S J.c.root.out &
jatropha chloroplast
paste - - - - < /data/JATROPHA/Jatropha/Jatropha.trimmedReads.fastq | cut -f 1,2 | sed 's/^@/>/' | tr "\t" "\n" > Jatropha.trimmedReads.fasta awk 'BEGIN {n_seq=0;} /^>/ {if(n_seq%100000==0){file=sprintf("myseq%d.fa",n_seq);} print >> file; n_seq++; next;} { print >> file; }' < Jatropha.trimmedReads.fasta
deconseq filtering
@63 nohup ~/bin/deconseq-standalone-0.4.3/deconseq.pl -f /data/JATROPHA/Jatropha/Jatropha.consensus.fasta -dbs chloro -i 90 -c 90 & output 1487640568
nohup ~/bin/deconseq-standalone-0.4.3/deconseq.pl -f /data/jungminh/jatropha/contig_primary.fa -dbs chloro -i 90 -c 90 & output 1487641000
pb only assembly
for contigFilter option, version is critical. depth 10 nohup /home/skyts0401/canu/Linux-amd64/bin/canu -assemble -d /data/jungminh/jatropha/cp/canu_pb_only/ -p pb_only genomeSize=151000 contigFilter="10 1000 0.75 0.75 2" -pacbio-corrected ./pb_cp.fasta & depth 5 nohup /home/skyts0401/canu/Linux-amd64/bin/canu -assemble -d /data/jungminh/jatropha/cp/canu_pb_only_5/ -p pb_only genomeSize=151000 contigFilter="5 1000 0.75 0.75 2" -pacbio-corrected ./pb_cp.fasta &
L.indica
1.16
L.indica SSR analysis from TY
/alima9002/LIndica/L.Trinity.fasta.cdhit.statistics
vigna
chloroplast total length 151271 bp LSC 1..80898 IRa 80899 107379 SSC 107380..124791 IRb 124792 151271
1.19
cp database download
download chloroplast db from ncbi. list.txt NC_005086.1 NC_000932.1 NC_015139.1 NC_027693.1 NC_010323.1 NC_032008.1 NC_008334.1 NC_015983.1 NC_007942.1 NC_014570.1 NC_018766.1 NC_002694.1 NC_010433.1 NC_003119.6 NC_031333.1 NC_009259.1 NC_009143.1 NC_014697.1 NC_016736.1 NC_007898.3 NC_008096.2 NC_014676.2 NC_021091.1 NC_013843.1 NC_018051.1 NC_007957.1 NC_001666.2
deconseq
~/bin/deconseq-standalone-0.4.3/bwa64 index ~/db/cp/db.fasta -p db
nohup ./deconseq.pl -f ~/cp/Sunhwa.trimmedReads.fasta -dbs chloro -i 90 -c 90 &
DeconSeqConfig.pm use constant DB_DIR => '/home/jungminh/db/cp/'; use constant TMP_DIR => '/home/jungminh/db/cp/tmp/'; use constant OUTPUT_DIR => '/home/jungminh/cp/trimmedPB/';
use constant PROG_NAME => 'bwa64'; # should be either bwa64 or bwaMAC (based on your system architecture) use constant PROG_DIR => './'; # should be the location of the PROG_NAME file (use './' if in the same location at the perl script)
use constant DBS => {chloro => {name => 'plant cp', #database name used for display and used as input for -dbs and -dbs_retain
db => 'db'}, #database name as defined with -p for "bwa index -p ..." (specify multiple database chunks separated with commas without space; e.g. hs_ref_s1,hs_ref_s2,hs_ref_s3)
- bact => {name => 'Bacterial genomes',
- db => 'bactDB'},
- vir => {name => 'Viral genomes',
- db => 'virDB'}
}; use constant DB_DEFAULT => 'chloro';
Deconseq filtering 63:/data/jungminh/mungbean pb.cp.fasta error corrected PB by canu canu_ctg_cp.fasta canu ctg assembled from error corrected PB ctg_cp.fasta falcon ctg assembled from error corrected PB scaf_cp.fasta sspace scaf assembled from falcon ctg
63:/data/jungminh/mungbean/PE SunhwaN_1_cont.fq.pairing.fq SunhwaN_2_cont.fq.pairing.fq
1.23
PB deconseq had error. ERROR: system call "./bwa64 bwasw -A -f /home/jungminh/bin/deconseq-standalone-0.4.3/1484807078_chloro_db.tsv /home/jungminh/db/cp/db /data2/jungminh/cp/pb/Sunhwa.trimmedReads.fasta" failed: 9. maybe storage problem???
run again.
1.24
bowtie2 mapping
at 244
nohup bowtie2 -x ~/db/v.radiata -f scaf_cp.fasta --end-to-end --very-fast -p 4 -S scaf_cp.SAM &
198 reads; of these
198 (100.00%) were unpaired; of these: 0 (0.00%) aligned 0 times 140 (70.71%) aligned exactly 1 time 58 (29.29%) aligned >1 times
100.00% overall alignment rate
546 reads; of these:
546 (100.00%) were unpaired; of these: 0 (0.00%) aligned 0 times 391 (71.61%) aligned exactly 1 time 155 (28.39%) aligned >1 times
100.00% overall alignment rate
6035 reads; of these:
6035 (100.00%) were unpaired; of these: 0 (0.00%) aligned 0 times 3583 (59.37%) aligned exactly 1 time 2452 (40.63%) aligned >1 times
100.00% overall alignment rate
nohup bowtie2 -x ~/db/v.radiata -f canu_ctg_cp.fasta --end-to-end --very-fast -p 6 -S canu_ctg_cp.SAM &
nohup bowtie2 -x ~/db/v.radiata -f ctg_cp.fasta --end-to-end --very-fast -p 2 -S ctg_cp.SAM & samtools view -bS ctg_cp.SAM > ctg_cp.BAM samtools sort ctg_cp.BAM -o ctg_cp_sort samtools index ctg_cp_sort ctg_cp_sort.index
deconseq PB
split errorcorrected reads
awk 'BEGIN {n_seq=0;} /^>/ {if(n_seq%1000==0){file=sprintf("myseq%d.fa",n_seq);} print >> file; n_seq++; next;} { print >> file; }' < Sunhwa.trimmedReads.fasta
nohup ./deconseq.pl -f /data2/jungminh/cp/pb/myseq1700000.fa -dbs chloro -i 90 -c 90 &
myseq0-1700000.fa
1.25
deconseq PB
nohup ./deconseq.pl -f /data2/jungminh/cp/pb/myseq1700000.fa -dbs chloro -i 90 -c 90 &
myseq1800000-900000
filter by length
python \[fasta\]filterbylen.py canu_ctg_cp.fasta 20000 > canu_ctg_cp_20k.fasta
pb read mapping against mungbean cp
nohup bowtie2 -x /home/jungminh/db/cp/v.radiata -f pb.cp.fasta --end-to-end --very-fast -p 6 -S pb_cp.sam &
31462 reads; of these:
31462 (100.00%) were unpaired; of these: 1 (0.00%) aligned 0 times 22594 (71.81%) aligned exactly 1 time 8867 (28.18%) aligned >1 times
100.00% overall alignment rate
1.26
PB only assembly by CANU
/data/asseblyProgram/canu-1.0/Linux-amd64/bin/canu -assemble -d /data/jungminh/mungbean/assembly/ -p pb_only genomeSize=151271 -pacbio-corrected ../pb.cp.fasta
Sunhwa PE deconseq
244:/hayasen/Workspace/Vigna/Chloro/split/
pairing filter by IlluQC
IlluQC -pe A B 2 A
canu_ctg only assembly by CANU
/data/skyts0401/Mungbean/chloroplast/assembly/Vr_cp/canu_ctg/Vr_cp_canu.contigs.fasta
1.31
PB PE ctg assembly
@63 /data/jungminh/mungbean/assembly/spades/PE_PB nohup ~/bin/SPAdes-3.10.0-Linux/bin/spades.py -1 /data/jungminh/mungbean/PE/SunhwaN_1_cont.fq.pairing.fq -2 /data/jungminh/mungbean/PE/SunhwaN_2_cont.fq.pairing.fq --pacbio /data/jungminh/mungbean/pb.cp.fasta --only-assembler -t 12 -o /data/jungminh/mungbean/assembly/spades/PE_PB &
/data/jungminh/mungbean/assembly/spades/PE_PB_ctg nohup ~/bin/SPAdes-3.10.0-Linux/bin/spades.py -1 /data/jungminh/mungbean/PE/SunhwaN_1_cont.fq.pairing.fq -2 /data/jungminh/mungbean/PE/SunhwaN_2_cont.fq.pairing.fq --pacbio /data/jungminh/mungbean/pb.cp.fasta --trusted-contigs /data/jungminh/mungbean/ctg_cp_20k.fasta --only-assembler -t 6 -o /data/jungminh/mungbean/assembly/spades/PE_PB_ctg &
/data/jungminh/mungbean/assembly/spades/PE_PB_canu_ctg nohup ~/bin/SPAdes-3.10.0-Linux/bin/spades.py -1 /data/jungminh/mungbean/PE/SunhwaN_1_cont.fq.pairing.fq -2 /data/jungminh/mungbean/PE/SunhwaN_2_cont.fq.pairing.fq --pacbio /data/jungminh/mungbean/pb.cp.fasta --trusted-contigs /data/jungminh/mungbean/canu_ctg_cp_20k.fasta --only-assembler -t 6 -o /data/jungminh/mungbean/assembly/spades/PE_PB_canu_ctg &
2.3
PB canu_ctg assembly
@63 nohup /data/asseblyProgram/canu-1.0/Linux-amd64/bin/canu -assemble -d /data/jungminh/mungbean/assembly/pb_plus_canu_ctg -p pb_plus_canu_ctg genomeSize=151271 -pacbio-corrected ../../pb_plus_canu_ctg_cp.fasta &
3.2
sunwha PE mapping agst new v.radiata cp
bwa index V.radiata_cp_PB_new.fasta
bwa mem -t 8 V.radiata_cp_PB_new.fasta /hayasen/Workspace/Chloro/SunhwaN_1_cont.fq.pairing.fq /hayasen/Workspace/Chloro/SunhwaN_2_cont.fq.pairing.fq > pe_bwa_new_cp.sam
samtools view -bS pe_bwa_new_cp.sam > pe_bwa_new_cp.bam
samtools sort pe_bwa_new_cp.bam -o pe_bwa_new_cp.sort.bam
samtools index pe_bwa_new_cp.sort.bam pe_bwa_new_cp.sort.bam.bai
sunwha PB mapping agst new v.radiata cp
bwa mem -t 2 V.radiata_cp_PB_new.fasta /kev8305/Mungbean_assembly/chloroplast/pb.cp.fasta > pb_bwa_new_cp.sam
samtools view -bS pb_bwa_new_cp.sam > pb_bwa_new_cp.bam
samtools sort pb_bwa_new_cp.bam -o pb_bwa_new_cp.sort.bam
samtools index pb_bwa_new_cp.sort.bam pb_bwa_new_cp.sort.bam.bai
samtools faidx V.radiata_cp_PB_new.fasta
samtools tview pe_bwa_new_cp.sort.bam V.radiata_cp_PB_new.fasta
4.19
gap filling super scaffold
super scaffold 244:/kev8305/SK3/anchoring/JM-2.fasta PE 244:/NGS/NGS/VignaRadiata/DNA/SunhwaN_1.fastq.gz
/NGS/NGS/VignaRadiata/DNA/SunhwaN_2.fastq.gz
library file: /home/jungminh/mungbean/gapfilled_superscaffold/pe_lib.txt lib1 bwa /NGS/NGS/VignaRadiata/DNA/SunhwaN_1.fastq.gz /NGS/NGS/VignaRadiata/DNA/SunhwaN_2.fastq.gz 200 0.2 FR
nohup perl ~/bin/GapFiller_v1-10_linux-x86_64/GapFiller.pl -l pe_lib.txt -s /kev8305/SK3/anchoring/JM-2.fasta -T 8 &
vigna TF
database
5.10.2017 download At Gm Vr TF database from http://planttfdb.cbi.pku.edu.cn/index.php 193:/data2/jungminh/plantTFDB
193:/data2/jungminh/mungbean/cds vigna species cds transferred from 244:/NGS/NGS/Vigna/RNA/transcriptome
blastn all cds against Vr tf database /data2/jungminh/plantTFDB/Vra_cds.fas /data2/jungminh/mungbean/tf/*.out blastn 1e-10 num_alignment 1 outfmt 6