2010年9月22日 星期三

RepeatElementCoverageInContigs_v1.pl

#!/usr/bin/perl
# Description:
# Get the column of contig and repeattype in repeatMasker .out file and mapping toi the template coverage profile in the .depth file, and generate the mapping file of the coverage file.
#
# Author: Andy
#
# Input file:
# [0] repeatmasker .out file
# [1] .depth file (from alang)
#
# Output format:
# contig RepeatName Coverage file ...
#
# Output file
# ana.RepeatElementCoverageInContigs_v1.modifyRepeatMaskerOutFile is the fileter header and add the number
# ana.RepeatElementCoverageInContigs_v1.IntegerOutFile is the table integrate data
#
# Sample
# perl RepeatElementCoverageInContigs_v1.pl 454AllContigs_change.fna.out 454AllContigs_change.depth
#
# Time: 2010.09.21


use strict;

die "Error:$0 RepeatMasker.out(file) CoverageFile(.depth) " unless ($#ARGV == 1);


# Open the two file
open (my $maskerOutFile, $ARGV[0]);
open (my $depthFile, $ARGV[1]);
open (my $analysisMaskerOutFile, ">".$ARGV[0].".ana.RepeatElementCoverageInContigs_v1.modifyRepeatMaskerOutFile");
open (my $analysisIntegrateOutFile, ">".$ARGV[0].".ana.RepeatElementCoverageInContigs_v1.IntegerOutFile");

# Filter the maskerOutfile
my ($title, $title2, $space, @lines) = <$maskerOutFile>; # Dsicare the header information for the masker .out file


my $count = 1;
my @statistics = <$depthFile>;

foreach my $line (@lines) {
# Analysis the repeat masker outfile
$line =~ s/^\s+//;
my @token = split (/\s+/, $line);
print $analysisMaskerOutFile "$count\t\t$line";

# Assing need process columns
my ($contigName, $repeatName)=($token[4],$token[9]);# token4 is the contigname name and token9 is the repeat name

# Compare the contigs name to static file
foreach my $statisticLine (@statistics) {
# Print the all columns
if ($statisticLine =~ /^$contigName/ ) {
print $analysisIntegrateOutFile "$count\t$contigName\t$repeatName\t$statisticLine";
}
}# end compare the contigs to statistic file

$count++;
print "." if ($count % 1000 == 0);

} # End for eachc line in MakerOutput file

# End the program close file
close $maskerOutFile;
close $depthFile;
close $analysisIntegrateOutFile;
close $analysisMaskerOutFile;

沒有留言:

張貼留言