#!/usr/bin/perl -w
use File::Temp;
use Carp qw |croak cluck|;

# popularity contest
if( system('pp_popcon_cnt', '-p', 'profbval') == -1 ){ warn("The Rost Lab recommends you install the pp-popularity-contest package that provides pp_popcon_cnt:\n\nsudo apt-get install pp-popularity-contest\n"); }

$mode=1;
if (@ARGV<6)  {
	die "\nUsage: $0 [fasta] [prof] [hssp] [output_file] [window] [mode] [debug]\n"; ### target_name will be a random variable Nov 29 2009
	}
$seq_file=$ARGV[0];
$rdbProf_file=$ARGV[1];
$hssp_file=$ARGV[2];
$output_file=$ARGV[3];
$wind_size=$ARGV[4];
$mode=$ARGV[5];
$dbg=$ARGV[6] || 0;

if( ! -e $seq_file ) { die("ERROR: input fasta file '$seq_file' does not exist!\n"); }

$dir = $ENV{PROFBVAL_ROOT} || "__pkgdatadir__/";
$resultsdir = File::Temp::tempdir( CLEANUP => !$dbg );
$rand=int(rand(10000));
$target_name="PROFbvalTMP$rand";

$createDataFile_exe = "$dir/scr/createDataFile.pl";
$profBval_exe = "$dir/scr/PROFbval.pl";

# check input sequence early:
my $inseq = '';
open( INSEQ, '<', $seq_file ) || die( "failed to open < $seq_file: $!" );
{
	while( my $line = <INSEQ> ) { if( $line =~ /^>/o ) { next; } else { $inseq .= $line; } }
}
close( INSEQ );

$inseq = uc($inseq);
$inseq =~ s/\s//go;
my @raw_all = split (//o, $inseq);

my @seqerrors = ();
for( my $i = 0; $i < @raw_all; ++$i )
{
	if( $raw_all[$i] =~ /[^ABCDEFGHIJKLMNOPQRSTUVWXYZ]/o )
	{
		push @seqerrors, "invalid amino acid code '$raw_all[$i]' at position ".( $i+1 );
	}
}
if( @seqerrors )
{
	die( "Invalid input sequence: ".join( ', ', @seqerrors ) );
}

{
	my @cmd = ( $createDataFile_exe, $seq_file, $rdbProf_file, $hssp_file, $dir, $target_name, $resultsdir, $dbg );
	if( $dbg ){ warn("@cmd"); }
	system( @cmd ) && die( "'@cmd' failed: ".( $? >> 8 ) );
}
{
	my @cmd = ( $profBval_exe, $seq_file, $output_file, $wind_size, $mode, $target_name, $dir, $resultsdir, $dbg );
	if( $dbg ){ warn("@cmd"); }
	system( @cmd ) && die("'@cmd' failed: ".( $? >> 8 ));
}

exit(0);

__END__

=pod

=head1 NAME

profbval - predicts flexibile/rigid residues from sequence

=head1 SYNOPSIS

profbval <FASTA_FILE> <RDBPROF_FILE> <HSSP_FILE> <OUTPUT_FILE[,OUTPUT_FILE,...]> <WINDOW> <OUTPUT_MODE[,OUTPUT_MODE,...]> <DEBUG>

=head1 DESCRIPTION

PROFbval is a neural-network based trained on backbone B-value data from X-ray structure. PROFbval was trained on a sequence unique set of high-resolution protein structures from the PDB.

The mobility of a given residue on the protein surface is related to its functional role. A common measure of atom mobility in proteins is B-value data from x-ray crystallography structures. PROFbval is a method predicting backbone B-values from amino-acid sequence. PROFbval can be useful for both protein structure and function predictions. For instance, a biologist can locate potentially antigenic 
determinants by identifying the most flexible residues on the protein surface. Additionally, a crystallographer can locate residues that potentially have high experimental B-values. 

=head2 Conversion of PSI-BLAST alignment to HSSP format

The most up-to-date procedure can be found at L<https://www.rostlab.org/owiki/index.php/How_to_generate_an_HSSP_file_from_alignment#Generating_an_HSSP_profile>.

=over

=item 1. Convert BLAST output to a Single Alignment Format (SAF):

 __datadir__/librg-utils-perl/blast2saf.pl fasta=<query_fasta_file> maxAli=3000 eSaf=1 \
  saf=<saf_formatted_file> <blast_output>

=item 2. Convert SAF format to HSSP:

 __datadir__/librg-utils-perl/copf.pl <saf_formatted_file> formatIn=saf formatOut=hssp \
  fileOut=<hssp_formatted_file> exeConvertSeq=convert_seq

=item 3. Filter results to 80% redundancy:

 __datadir__/librg-utils-perl/hssp_filter.pl red=80 <hssp_formatted_file> fileOut=<filtered_hssp_formatted_file>

=back

=head2 Output format

This description applies to the default output format.

=over

=item number

residue number

=item residue

residue type

=item raw

raw value of the different between the two output nodes

=item Bnorm

predicted normalized B-value the values are normalized so in a given sequence the average value is 0 and the standard deviation is 1. The higher the value the more flexible a residue is predicted to be. 

=item Non-strict mode (NS)

indicates on a flexible residue. According to the NS mode most of the residues are flexible; hence, a residue on the surface that is predicted to be rigid is likely to have a functional role. 

=item Strict mode (S)

indicates on a flexible residue. According to the S mode about a third of the residues are flexible, therefore, a stretch of residues that is predicted to be flexible might be important for the protein function. 

=back

=head1 REFERENCES

=over

=item Schlessinger A, Yachdav G, & Rost B. PROFbval: predict flexible and rigid residues in proteins. Bioinformatics. 2006 Apr 1;22(7):891-3. 

=item Schlessinger A, Rost B. Protein flexibility and rigidity predicted from sequence. Proteins. 2005 Oct 1;61(1):115-26. 

=back

=head1 OPTIONS

=over

=item FASTA_FILE

File containing protein amino-acid sequence in fasta format.

=item RDBPROF_FILE

Secondary structure and solvent accessibility prediction by PROF in rdb format. 

=item HSSP_FILE

PSI-BLAST alignment profile file converted to HSSP format.

=item OUTPUT_FILE

The name of the final PROFbavl output file.  You may give multiple output files.  You then probably want to give a list of output modes as well, one for each output file.  Separate entries with ',' (comma is not allowed in the file name).

=item WINDOW

This is the desired smoothing window for the output. The method was trained on window of 9.

=item OUTPUT_MODE

PROFbval can create output files in different formats for different purposes. The default output mode for the package is 6.  You may give multiple output modes.  You then probably want to give a list of output files as well, one for each output mode.  Separate entries with ','.

Modes: '-1', '0', '1', '2', '3', '4', '5', '6', 'snap', 'snapfun'

=over

=item 5

For metadisorder(1)

=item snap

=item snapfun

For snapfun(1)

=back

=item DEBUG

Default: 0. Enable with 1.

=back

=head1 EXAMPLE

 profbval __docdir__/examples/cad23.f __docdir__/examples/cad23-fil.rdbProf __docdir__/examples/cad23-fil.hssp cad23.profbval 9 6

=head1 ENVIRONMENT

=over

=item PROFBVAL_ROOT

The directory used to look up F<./scr/createDataFile.pl>, F<./scr/PROFbval.pl>  and F<./nn_files/jct.in>.  If unset F<__pkgdatadir__> is used.

=back

=head1 FILES

=over

=item F<*.profbval>

default output file extension

=item F<__docdir__/examples>

default precomputed input files directory

=back

=head1 NOTES

=over

=item 1. It is recommended to create the profiles using 3 iteration of PSI-BLAST against big database

=item 2. It is also recommended to filter the hssp files using hssp_filter.pl from the Prof package using the following command: perl hssp_filter.pl hssp_file red=80 

=back

=head1 AUTHOR

=over

=item A. Schlessinger <avnersch@gmail.com>

=back

=head1 SEE ALSO

=over

=item Main website

L<http://www.predictprotein.org/>

=back

=cut

# vim:ai:
