BioMedInfo 板


LINE

小的是perl新手 以下是参考用perl找到CDS的程式码 将start_hash的location改为end end_hash的location改为start 想请问如何修改才能得到所需的intergenic region 谢谢~ #!/usr/bin/perl use strict; use warnings; use Getopt::Long; use File::Basename; use Bio::SeqIO; my $in_file; my $out_dir; my $id_tag; my $print_aa; my $print_nt; my $print_gg; my $print_loc; my $verbose; my $debug; GetOptions( "in_file=s" => \$in_file, "out_dir=s" => \$out_dir, "id_tag=s" => \$id_tag, "print_aa=i" => \$print_aa, "print_nt=i" => \$print_nt, "print_gg=i" => \$print_gg, "print_loc=i" => \$print_loc, "verbose=i" => \$verbose, "debug=i" => \$debug ); $id_tag = $id_tag ? $id_tag : 'protein_id'; system "mkdir -p $out_dir" unless -e $out_dir; my $file_id; if ( $in_file =~ m/.*\/(\S+?)\./ ) { $file_id = $1; } else { die "error parsing file_id from $in_file\n"; } my $out_file = $out_dir . $file_id . '.out'; open OUT, ">$out_file" or die "Can't open output file $out_file\n"; my $err_file = $out_dir . $file_id . '.err'; open ERR, ">$err_file" or die "Can't open output file $err_file\n"; if ($print_aa) { my $aa_out = $out_dir . $file_id . '.aa.fasta'; open AA, ">$aa_out" or die "Can't open output file $aa_out\n"; } if ($print_nt) { my $nt_out = $out_dir . $file_id . '.nt.fasta'; open NT, ">$nt_out" or die "Can't open output file $nt_out\n"; } if ($print_gg) { my $gg_out = $out_dir . $file_id . '.gg'; open GG, ">$gg_out" or die "Can't open output file $gg_out\n"; } if ($print_loc) { my $loc_out = $out_dir . $file_id . '.igr.location'; open LOC, ">$loc_out" or die "Can't open output file $loc_out\n"; } my $seqio_object = Bio::SeqIO->new( -format => "genbank", -file => "$in_file" ); my $seq_name; my $count_seq = 0; my $count_cds_total = 0; my $count_non_coding_total = 0; print GG "$file_id\: "; while ( my $seq_object = $seqio_object->next_seq ) { my $accession = $seq_object->accession_number; $count_seq++; my %nt_seq_hash; # key = seq_name, value = nt_seq my %aa_seq_hash; # key = seq_name, value = aa_seq my %start_hash; # key = seq_name, value = start my %end_hash; # key = seq_name, value = end my %strand_hash; # key = seq_name, value = strand my $count_igr_in = 0; my $count_igr_out = 0; for my $feat_object ( $seq_object->get_SeqFeatures ) { if ( $feat_object->primary_tag eq 'CDS' ) { # ignore pseudo genes next if ( $feat_object->has_tag("pseudo") ); $count_igr_in++; if ( $feat_object->has_tag($id_tag) ) { for my $value ( $feat_object->get_tag_values($id_tag) ) { $seq_name = $value; } if ( $id_tag eq 'protein_id' && $seq_name =~ m/(\S+)\./ ) { $seq_name = $1; } $count_igr_out++; $start_hash{$seq_name} = $feat_object->location->end; $end_hash{$seq_name} = $feat_object->location->start; $strand_hash{$seq_name} = $feat_object->location->strand; $nt_seq_hash{$seq_name} = $feat_object->spliced_seq->seq; if ( $feat_object->has_tag("translation") ) { for my $value ( $feat_object->get_tag_values("translation") ) { $aa_seq_hash{$seq_name} = $value; } } else { print ERR "$file_id: $accession: $seq_name: no translation!\ n"; } } else { print ERR "$file_id: $accession: no $id_tag!\n"; } } } my $count_order = 0; foreach $seq_name ( sort { $start_hash{$a} $start_hash{$b} } keys %start_hash ) { $count_order++; print AA ">$seq_name\n$aa_seq_hash{$seq_name}\n" if ($print_aa); print NT ">$seq_name\n$nt_seq_hash{$seq_name}\n" if ($print_nt); print GG " $seq_name" if ($print_gg); print LOC "$seq_name\t$accession\t$count_order\t", "$start_hash{$seq_name}\t$end_hash{$seq_name}\t", "$strand_hash{$seq_name}\n" if ($print_loc); } print OUT "$file_id: $accession: count_igr_out = $count_igr_out\n"; if ( $count_igr_in != $count_igr_out ) { print ERR "$file_id: $accession: count_igr_in = $count_igr_in ", "!= count_igr_out = $count_igr_out!\n"; } $count_igr_total += $count_igr_out; } # while $seq_object print OUT "$file_id: count_seq = $count_seq, count_igr_total = $count_igr_total\ n"; print GG "\n" if ($print_gg); close OUT; close ERR; close AA if ($print_aa); close NT if ($print_nt); close GG if ($print_gg); close LOC if ($print_loc); exit(0); --



※ 发信站: 批踢踢实业坊(ptt.cc)
◆ From: 125.226.136.153
1F:→ jjhgk:意思是$nt_seq_hash{$seq_name}不是你要的IGR吗? 07/31 10:26
2F:→ lambking:恩恩 想请问$feat_object->primary_tag eq 'CDS' 07/31 19:07
3F:→ lambking:是指 读入CDS片段吗? 07/31 19:09
4F:→ Godkin:这指的是找出当中被标上CDS标签的部份 07/31 19:44
5F:→ jjhgk:不知道你是不是想藉由找出cds的位置来取得non-cds的序列, 07/31 21:14
6F:→ jjhgk:是的话你可能要用其他method取得序列,并确定基因上下游关系 07/31 21:18
7F:→ lambking:请问 若是想从Genbank所取得的序列 直接找出intergenic 07/31 21:52
8F:→ lambking:region 该如何下手呢? 谢谢~ 07/31 21:53







like.gif 您可能会有兴趣的文章
icon.png[问题/行为] 猫晚上进房间会不会有憋尿问题
icon.pngRe: [闲聊] 选了错误的女孩成为魔法少女 XDDDDDDDDDD
icon.png[正妹] 瑞典 一张
icon.png[心得] EMS高领长版毛衣.墨小楼MC1002
icon.png[分享] 丹龙隔热纸GE55+33+22
icon.png[问题] 清洗洗衣机
icon.png[寻物] 窗台下的空间
icon.png[闲聊] 双极の女神1 木魔爵
icon.png[售车] 新竹 1997 march 1297cc 白色 四门
icon.png[讨论] 能从照片感受到摄影者心情吗
icon.png[狂贺] 贺贺贺贺 贺!岛村卯月!总选举NO.1
icon.png[难过] 羡慕白皮肤的女生
icon.png阅读文章
icon.png[黑特]
icon.png[问题] SBK S1安装於安全帽位置
icon.png[分享] 旧woo100绝版开箱!!
icon.pngRe: [无言] 关於小包卫生纸
icon.png[开箱] E5-2683V3 RX480Strix 快睿C1 简单测试
icon.png[心得] 苍の海贼龙 地狱 执行者16PT
icon.png[售车] 1999年Virage iO 1.8EXi
icon.png[心得] 挑战33 LV10 狮子座pt solo
icon.png[闲聊] 手把手教你不被桶之新手主购教学
icon.png[分享] Civic Type R 量产版官方照无预警流出
icon.png[售车] Golf 4 2.0 银色 自排
icon.png[出售] Graco提篮汽座(有底座)2000元诚可议
icon.png[问题] 请问补牙材质掉了还能再补吗?(台中半年内
icon.png[问题] 44th 单曲 生写竟然都给重复的啊啊!
icon.png[心得] 华南红卡/icash 核卡
icon.png[问题] 拔牙矫正这样正常吗
icon.png[赠送] 老莫高业 初业 102年版
icon.png[情报] 三大行动支付 本季掀战火
icon.png[宝宝] 博客来Amos水蜡笔5/1特价五折
icon.pngRe: [心得] 新鲜人一些面试分享
icon.png[心得] 苍の海贼龙 地狱 麒麟25PT
icon.pngRe: [闲聊] (君の名は。雷慎入) 君名二创漫画翻译
icon.pngRe: [闲聊] OGN中场影片:失踪人口局 (英文字幕)
icon.png[问题] 台湾大哥大4G讯号差
icon.png[出售] [全国]全新千寻侘草LED灯, 水草

请输入看板名称,例如:iOS站内搜寻

TOP