#!/usr/bin/env perl -w

use strict;

use Getopt::Long;
use Pod::Usage;
use Term::ANSIColor;

my $flag_help       = 0;
my $debug           = 0;
my $param_species   = "monosigaovata";
my $param_build     = "build2";
my $param_trackname = "DummyTrack";
my $param_comment   = "CommentHere";
my $param_option    = "";

GetOptions( 'help|?'      => \$flag_help,
            'species=s'   => \$param_species,
            'build=s'     => \$param_build,
            'optattr=s'   => \$param_option,
            'trackname=s' => \$param_trackname,
            'comment=s'   => \$param_comment,
	        'debug'       => \$debug
          );
pod2usage(1) if $flag_help;

my $number_of_warnings = 0;

#show header
print "[GenomeVersion]\n";
print "Species=$param_species\n";
print "Revision=$param_build\n";
print "\n";
print "[TrackHeader]\n";
print "TrackName=$param_trackname\n";
print "TrackComment=$param_comment\n";
print "TrackOptattr=$param_option\n";
print "\n";
print "[Data]\n";
#main loop
my $currentScaffold = undef;
my $currentScaffoldLength = 0;
while(<>) {
	s/\r?\n$//;
	s/^\s+//;
	next if(/^$/);
	if(/^scaffold(\d+) len=(\d+)$/) {
		$currentScaffold = $1;
		$currentScaffoldLength = $2;
		next;
	}
	if(/^scaffold\s+(\d+)\s+([+\-])([123])\s+(\d+)aa\s+(\d+)-(\d+)\s+([\d\.e\+\-]+)\s+([\d\.e\+\-]+)\s+(\d+)\s+(\S+)\s+\((\d+)\)$/) {
		my $scaffoldnum = $1;
        my $strand = $2; my $is_plus_strand = ($strand eq '+');
        my $frame = $3;
        my $numberOfAminoAcids = $4;
        my $startpos_on_scaffold_inclusive_1origin = $5;
        my $endpos_on_scaffold_inclusive_1origin = $6;
        my $hmmer_score = $7;
        my $evalue = $8;
        my $numdomains = $9;
        my $description = $10;

		print "TargetScaffold=scaffold$scaffoldnum";
		print ",TargetRange=${startpos_on_scaffold_inclusive_1origin}-${endpos_on_scaffold_inclusive_1origin}";
		print ",\"TargetExons=${startpos_on_scaffold_inclusive_1origin}-${endpos_on_scaffold_inclusive_1origin}\"";
		print ",TargetStrand=" . ($is_plus_strand ? '+' : '-');
		print ",Frame=$frame";
		print ",Name=$description";
		print ",Length=${numberOfAminoAcids}aa";
		print ",\"HMMER score=$hmmer_score\"";
		print ",\"e-value=$evalue\"";
		print ",\"# domains=$numdomains\"";
		print ",\"Description=$description\"";
		print "\n";
	} else {
		print color("red") . "WARNING: Could not parse at line $." . color("reset") . "\n";
		print "$_\n";
	}
}

if($number_of_warnings > 0) {
	print color("red") . "$number_of_warnings warnings in total." . color("reset") . "\n";
}

=pod

=head1 NAME

pfamsummary2easyfeaturetrack.pl - converts pfam search summary file to EasyFeatureTrack file for UTGB.

=head1 SYNOPSIS

pfamsummary2easyfeaturetrack.pl [options...] < input > output

Options:
   -help            brief help message

=head1 OPTIONS

=over 8

=item B<-help>

Print a brief help message and exits.

=item B<-species>

Sets the name of species, which will appear in the header of EasyGeneTrack.
The default value is "-species=monosigaovata".

=item B<-build>

Sets the name of assembly build, which will appear in the header of EasyGeneTrack.
The default value is "-build=build2".

=item B<-track>

Sets the name of genome browser track, which will appear in the header of EasyGeneTrack.
The default value is "-track=DummyTrack".

=item B<-comment>

Sets the name of genome browser track comment, which will appear in the header of EasyGeneTrack.
The default value is "-comment=CommentHere".


=back

=head1 DESCRIPTION

B<pfamsummary2easyfeaturetrack.pl> inputs pfam summary file, which is defined elsewhere,
and convert it into EasyFeatureTrack file, which can be directly passed to
UT genome browser. Please see the web page below for EasyFeatureTrack specs.

http://www.utgenome.org/wiki/index.php?Manual%2FEasyFeatureTrack

=cut

