; @fasta_list = (); %fasta = (); $key = "key"; foreach $i(@input){ chomp $i; if($i =~ /^>..*/){ $key = $i; push @fasta_list,$key; } else {$fasta{$key} .= $i} } $total_length = length($fasta{$key}); ##Sub-sampling random number matrix generation $count = 0; %random_set = (); while($count < $NUM){ $site = 0; while($site < $LEN){ %tag = (); $num = int(rand($total_length)); next if(exists($tag{$num})); $tag{$num} = 1; $random_set{$count} .= $num; $random_set{$count} .= " "; $site ++; } $count ++; } ##Start building the subsampling dataset $count = 0; while($count < $NUM){ @sites = split / /,$random_set{$count}; foreach $f(@fasta_list){"> ; @fasta_list = (); %fasta = (); $key = "key"; foreach $i(@input){ chomp $i; if($i =~ /^>..*/){ $key = $i; push @fasta_list,$key; } else {$fasta{$key} .= $i} } $total_length = length($fasta{$key}); ##Sub-sampling random number matrix generation $count = 0; %random_set = (); while($count < $NUM){ $site = 0; while($site < $LEN){ %tag = (); $num = int(rand($total_length)); next if(exists($tag{$num})); $tag{$num} = 1; $random_set{$count} .= $num; $random_set{$count} .= " "; $site ++; } $count ++; } ##Start building the subsampling dataset $count = 0; while($count < $NUM){ @sites = split / /,$random_set{$count}; foreach $f(@fasta_list){"> ; @fasta_list = (); %fasta = (); $key = "key"; foreach $i(@input){ chomp $i; if($i =~ /^>..*/){ $key = $i; push @fasta_list,$key; } else {$fasta{$key} .= $i} } $total_length = length($fasta{$key}); ##Sub-sampling random number matrix generation $count = 0; %random_set = (); while($count < $NUM){ $site = 0; while($site < $LEN){ %tag = (); $num = int(rand($total_length)); next if(exists($tag{$num})); $tag{$num} = 1; $random_set{$count} .= $num; $random_set{$count} .= " "; $site ++; } $count ++; } ##Start building the subsampling dataset $count = 0; while($count < $NUM){ @sites = split / /,$random_set{$count}; foreach $f(@fasta_list){">
use warnings;
## For generating loci subdatasets by random sampling
## Usage: perl script MFA subsampling-length subsampling-number
## Email: [email protected]
$LEN = $ARGV[1];
$NUM = $ARGV[2];
##Reading MFA
open IN,"<$ARGV[0]";
@input = <IN>;
@fasta_list = ();
%fasta = ();
$key = "key";
foreach $i(@input){
chomp $i;
if($i =~ /^>..*/){
$key = $i;
push @fasta_list,$key;
}
else {$fasta{$key} .= $i}
}
$total_length = length($fasta{$key});
##Sub-sampling random number matrix generation
$count = 0;
%random_set = ();
while($count < $NUM){
$site = 0;
while($site < $LEN){
%tag = ();
$num = int(rand($total_length));
next if(exists($tag{$num}));
$tag{$num} = 1;
$random_set{$count} .= $num;
$random_set{$count} .= " ";
$site ++;
}
$count ++;
}
##Start building the subsampling dataset
$count = 0;
while($count < $NUM){
@sites = split / /,$random_set{$count};
foreach $f(@fasta_list){
open OUT,">>$count.fa";
chomp $f;
print OUT "$f\\n";
$seq = "";
foreach $c(@sites){
$seq .= substr($fasta{$f},$c,1);
}
print OUT "$seq\\n";
close OUT;
}
$count ++
}
close IN;