; @fasta_list = (); %fasta = (); $key = "key"; foreach $i(@input){ chomp $i; if($i =~ /^>..*/){ $key = $i; push @fasta_list,$key; } else {$fasta{$key} .= $i} } $total_length = length($fasta{$key}); ##Sub-sampling random number matrix generation $count = 0; %random_set = (); while($count < $NUM){ $site = 0; while($site < $LEN){ %tag = (); $num = int(rand($total_length)); next if(exists($tag{$num})); $tag{$num} = 1; $random_set{$count} .= $num; $random_set{$count} .= " "; $site ++; } $count ++; } ##Start building the subsampling dataset $count = 0; while($count < $NUM){ @sites = split / /,$random_set{$count}; foreach $f(@fasta_list){"> ; @fasta_list = (); %fasta = (); $key = "key"; foreach $i(@input){ chomp $i; if($i =~ /^>..*/){ $key = $i; push @fasta_list,$key; } else {$fasta{$key} .= $i} } $total_length = length($fasta{$key}); ##Sub-sampling random number matrix generation $count = 0; %random_set = (); while($count < $NUM){ $site = 0; while($site < $LEN){ %tag = (); $num = int(rand($total_length)); next if(exists($tag{$num})); $tag{$num} = 1; $random_set{$count} .= $num; $random_set{$count} .= " "; $site ++; } $count ++; } ##Start building the subsampling dataset $count = 0; while($count < $NUM){ @sites = split / /,$random_set{$count}; foreach $f(@fasta_list){"> ; @fasta_list = (); %fasta = (); $key = "key"; foreach $i(@input){ chomp $i; if($i =~ /^>..*/){ $key = $i; push @fasta_list,$key; } else {$fasta{$key} .= $i} } $total_length = length($fasta{$key}); ##Sub-sampling random number matrix generation $count = 0; %random_set = (); while($count < $NUM){ $site = 0; while($site < $LEN){ %tag = (); $num = int(rand($total_length)); next if(exists($tag{$num})); $tag{$num} = 1; $random_set{$count} .= $num; $random_set{$count} .= " "; $site ++; } $count ++; } ##Start building the subsampling dataset $count = 0; while($count < $NUM){ @sites = split / /,$random_set{$count}; foreach $f(@fasta_list){">
use warnings;
## For generating loci subdatasets by random sampling
## Usage: perl script MFA subsampling-length subsampling-number
## Email: [email protected]

$LEN = $ARGV[1]; 
$NUM = $ARGV[2];

##Reading MFA
open IN,"<$ARGV[0]"; 
@input = <IN>;
@fasta_list = ();
%fasta = ();
$key = "key";
foreach $i(@input){
    chomp $i;
    if($i =~ /^>..*/){
        $key = $i;
        push @fasta_list,$key;
    }
    else {$fasta{$key} .= $i}
}
$total_length = length($fasta{$key});

##Sub-sampling random number matrix generation
$count = 0;
%random_set = ();
while($count < $NUM){
    $site = 0;
    while($site < $LEN){
        %tag = ();
        $num = int(rand($total_length));
        next if(exists($tag{$num}));
        $tag{$num} = 1;
        $random_set{$count} .= $num;
        $random_set{$count} .= " ";
        $site ++;
    }
    $count ++;
}

##Start building the subsampling dataset
$count = 0;
while($count < $NUM){
    @sites = split / /,$random_set{$count};
    foreach $f(@fasta_list){
        open OUT,">>$count.fa";
        chomp $f;
        print OUT "$f\\n";
        $seq = "";
        foreach $c(@sites){
            $seq .= substr($fasta{$f},$c,1);
        }
        print OUT "$seq\\n";
        close OUT;    
    }
    $count ++
}

close IN;