Difference between revisions of "SK met"
From Crop Genomics Lab.
(→process) |
(→process) |
||
Line 43: | Line 43: | ||
# vcftools --vcf SK_met_variant.vcf --minQ 30 --minDP 5 --recode --out variant.vcf.gz.SNPs --remove-indels & --keep-only-indels | # vcftools --vcf SK_met_variant.vcf --minQ 30 --minDP 5 --recode --out variant.vcf.gz.SNPs --remove-indels & --keep-only-indels | ||
# python testt.py | wc -l | # python testt.py | wc -l | ||
− | file = open(" | + | GNU nano 2.9.3 testt.py |
+ | |||
+ | import sys | ||
+ | file = open("variant.vcf.gz.SNPs.recode.vcf",'r').readlines() | ||
for line in file : | for line in file : | ||
if '#' not in line : | if '#' not in line : | ||
i = 0 | i = 0 | ||
inf = line.split() | inf = line.split() | ||
− | geninfo = inf[ | + | geninfo = inf[int(sys.argv[1])] |
− | + | if geninfo.split(':')[0] != '0/0' and geninfo.split(':')[0] != './.' : | |
− | + | print inf[1]+inf[3]+inf[4], | |
− | + | print geninfo | |
− | + |
Revision as of 04:10, 28 January 2019
materials
raw_data : /NGS/NGS/VignaRadiata/DNA/SK/ /mount_test/sdd1/Mungbean_assemble/ver6/SNP/Vradi_ver6.reseqKJ.sorted.bam
SK010 | TN1509D0657--TCCGGAGA-AGGCTATA |
SK068 | TN1509D0710--TAATGCGC-TAAGATTA |
SK186 | TN1510D0932--TCTCGCGC-TCAGAGCC |
SK049 | TN1509D0695--GAATTCGT-ACGTCCTG |
SK094 | TN1509D0736--TCTCGCGC-GTCAGTAC |
SK152 | TN1510D0898--CTGAAGCT-GCCTCTAT |
SK176 | TN1510D0922--TCCGCGAA-GCCTCTAT |
SK156 | TN1510D0902--CTGAAGCT-TAAGATTA |
- 244 > 63 /data/haggui/raw_data/ 에 sample#.fq.gz 로 저장함
ref : 244 /mount_test/sdd1/Mungbean_assemble/ver6/SNP/Vradi_ver6.fa
- 244 > 63 data/haggui/Vradi_ver6.fa
process
- bwa index <ref.fa>
- cat SK_met.list | parallel --gnu -j 3 "bwa mem Vradi_ver6.fa ./raw_data/{}_1.fq.gz ./raw_data/{}_2.fq.gz | samtools view -bS -q 30 -> {}.bam" (1.5일)
- samtools sort/index/ {bam} samtools faidx ref
- /data/haggui$ /data/haggui/samtools-1.3.1/samtools mpileup -f Vradi_ver6.fa -v -t DP,AD,ADF,ADR,SP,INFO/AD,INFO/ADF,INFO/ADR -u -b {bamfile.list}| bcftools call -v -m -O v > SK_met_variant.vcf
- samtools depth {sorted.bam} > {.dep}
- wc -l *.dep
- cat {sort.bam.dep} |awk '{sum += $3} END { print sum}'
- vcftools --vcf SK_met_variant.vcf --minQ 30 --minDP 5 --recode --out variant.vcf.gz.SNPs --remove-indels & --keep-only-indels
- python testt.py | wc -l
GNU nano 2.9.3 testt.py
import sys
file = open("variant.vcf.gz.SNPs.recode.vcf",'r').readlines() for line in file : if '#' not in line : i = 0 inf = line.split() geninfo = inf[int(sys.argv[1])] if geninfo.split(':')[0] != '0/0' and geninfo.split(':')[0] != './.' : print inf[1]+inf[3]+inf[4], print geninfo