#!/usr/sbin/perl
# Parse vnmr peak files into a morass input file, with auto volume division for Thy methyls
# D.E. Volk 6/98 UTMB Galveston volk@nmr.utmb.edu
#
#  Final working version 061998
#
# Get the name of the input file
#
#	print "What is the name of the varian input file?";
#	$vnmrfile = <STDIN>;
#	chop($vnmrfile);
#	 print "The vnmr peaks input file = $vnmrfile\n";
#
# Get the Assignments from the vnmrfile- read all lines
#
#open(MYIN, "$vnmrfile") || die 'Cannot open file "$vnmrfile".\n';
open(MYIN, "allpeaks") || die 'Cannot open file "$vnmrfile".\n';
while (<MYIN>) {
	($r1,$r2,$r3,$r4,$r5) = split(' ',$_);
	@all = (@all,$r1,$r2,$r3,$r4,$r5);
	}
#
# shift left on matrix 25 times to remove junk from first 6 lines 
#
foreach $l (1 .. 30) {
	$x = shift(@all);
	}
#
# Determine the number of peaks read from the file (assigned or not!)
#
$lmt = ($#all+1)/25;
#	print "$lmt\n";
#
#
@V = @all;   #give a short name to matrix for less typing
#
#
# Print out the header info
# 

	print "===========================================================================\n";
	print "   I     J   VOLUME    ATOM(I)     ATOM(J)        RATE    DIST(A)  PPM(I)  PPM(J)\n";	
	print "===========================================================================\n";
#
# Sort out the useful data, a new peak starts every 25th item
# 
$a0=0; $a10=10; $a14=14;
foreach $1 ( 1 .. $lmt) {
	$label = $V[$a0];
	($temp1,$temp2) = split('_',$label); 	# A7H8_A7H1' --> temp1= A7H8 temp2=A7H1'
	($r1,$p1) = split('H',$temp1);        	# A7H5' ---> $r1=A7 and $p1=8 
	($r2,$p2) = split('H',$temp2);	     	# A7H1' ---> $r2=A7 and $p1=1'
	($res1,$t2) = split('\d',$temp1);	# A7H8 ---> $res1=A and $t2=8
	($res2,$t2) = split('\d',$temp2);	# A7H1' ---> $res1=A and $t2=8
	$num1 = substr($r1,1,2);		# A7 --> 7
	$num2 = substr($r2,1,2);		# A7 --> 7
	$p1="H".$p1;				# 5' --> H5' for example
	$p2="H".$p2;				# 5' --> H5' for example
#
#	Change the labels to fit morass input for H2', H2'', H5', and H5'' 
#	
	if ($p1 eq "H2'") {
	   $p1 = "H2'1"; }
	if ($p1 eq "H2''") {
	   $p1 = "H2'2"; }
	if ($p1 eq "H5'") {
	   $p1 = "H5'1"; }
	if ($p1 eq "H5''") {
	   $p1 = "H5'2"; }
	if ($p2 eq "H2'") {
	   $p2 = "H2'1"; }
	if ($p2 eq "H2''") {
	   $p2 = "H2'2"; }
	if ($p2 eq "H5'") {
	   $p2 = "H5'1"; }
	if ($p2 eq "H5''") {
	   $p2 = "H5'2"; }
#
# Now print the results - NOTE T-methyls need to divide into 3
#

#
#  IF no Thymidine H5 methyls are involved, do a simple print statement (unless peak not assigned, ie $p1='H')
#
	if ( (  ($res1 ne "T") || ($p1 ne "H5") ) && (  ($res2 ne "T") || ($p2 ne "H5") ) && ($p1 ne 'H' ) ) { 
	printf "%5d%5d    %6.4f  D%1s  %3d %4s  D%1s  %3d %4s\n",
	  $V[$a10],$V[$a10],$V[$a14],$res1,$num1,$p1,$res2,$num2,$p2; }
#
# If the first assignment, but not the second one, is a thymidine H5 methyl, print 3 lines, with volume divided
# (unless peak not assigned, ie $p1='H')
#
	if ( (  ($res1 eq "T") && ($p1 eq "H5") ) && (  ($res2 ne "T") || ($p2 ne "H5") ) && ($p1 ne 'H' ) ) {
          $p1a = "H71"; $p1b = "H72"; $p1c = "H73";
	  printf "%5d%5d    %6.4f  D%1s  %3d %4s  D%1s  %3d %4s\n",
	     $V[$a10],$V[$a10],$V[$a14]/3,$res1,$num1,$p1a,$res2,$num2,$p2; 
	  printf "%5d%5d    %6.4f  D%1s  %3d %4s  D%1s  %3d %4s\n",
	     $V[$a10],$V[$a10],$V[$a14]/3,$res1,$num1,$p1b,$res2,$num2,$p2; 
	  printf "%5d%5d    %6.4f  D%1s  %3d %4s  D%1s  %3d %4s\n",
	     $V[$a10],$V[$a10],$V[$a14]/3,$res1,$num1,$p1c,$res2,$num2,$p2; 
        }
#
# If the second assignment, but not the first one, is a thymidine H5 methyl, print 3 lines, with volume divided
# (unless peak not assigned, ie $p1='H')
#
	if ( (  ($res2 eq "T") && ($p2 eq "H5") ) && (  ($res1 ne "T") || ($p1 ne "H5") ) && ($p1 ne 'H' ) ) {
          $p2a = "H71"; $p2b = "H72"; $p2c = "H73";
	  printf "%5d%5d    %6.4f  D%1s  %3d %4s  D%1s  %3d %4s\n",
	     $V[$a10],$V[$a10],$V[$a14]/3,$res1,$num1,$p1,$res2,$num2,$p2a; 
	  printf "%5d%5d    %6.4f  D%1s  %3d %4s  D%1s  %3d %4s\n",
	     $V[$a10],$V[$a10],$V[$a14]/3,$res1,$num1,$p1,$res2,$num2,$p2b; 
	  printf "%5d%5d    %6.4f  D%1s  %3d %4s  D%1s  %3d %4s\n",
	     $V[$a10],$V[$a10],$V[$a14]/3,$res1,$num1,$p1,$res2,$num2,$p2c; 
        }
#
# If the first assignment and the second  are thymidine H5 methyl, print 9 lines, with volume divided
#
	if ( (  ($res1 eq "T") && ($p1 eq "H5") ) && (  ($res2 eq "T") && ($p1 eq "H5") ) ) {
          $p1a = "H71"; $p1b = "H72"; $p1c = "H73";
          $p2a = "H71"; $p2b = "H72"; $p2c = "H73";
	  printf "%5d%5d    %6.4f  D%1s  %3d %4s  D%1s  %3d %4s\n",
	     $V[$a10],$V[$a10],$V[$a14]/9,$res1,$num1,$p1a,$res2,$num2,$p2a; 
	  printf "%5d%5d    %6.4f  D%1s  %3d %4s  D%1s  %3d %4s\n",
	     $V[$a10],$V[$a10],$V[$a14]/9,$res1,$num1,$p1a,$res2,$num2,$p2b; 
	  printf "%5d%5d    %6.4f  D%1s  %3d %4s  D%1s  %3d %4s\n",
	     $V[$a10],$V[$a10],$V[$a14]/9,$res1,$num1,$p1a,$res2,$num2,$p2c; 
	  printf "%5d%5d    %6.4f  D%1s  %3d %4s  D%1s  %3d %4s\n",
	     $V[$a10],$V[$a10],$V[$a14]/9,$res1,$num1,$p1b,$res2,$num2,$p2a; 
	  printf "%5d%5d    %6.4f  D%1s  %3d %4s  D%1s  %3d %4s\n",
	     $V[$a10],$V[$a10],$V[$a14]/9,$res1,$num1,$p1b,$res2,$num2,$p2b; 
	  printf "%5d%5d    %6.4f  D%1s  %3d %4s  D%1s  %3d %4s\n",
	     $V[$a10],$V[$a10],$V[$a14]/9,$res1,$num1,$p1b,$res2,$num2,$p2c; 
	  printf "%5d%5d    %6.4f  D%1s  %3d %4s  D%1s  %3d %4s\n",
	     $V[$a10],$V[$a10],$V[$a14]/9,$res1,$num1,$p1c,$res2,$num2,$p2a; 
	  printf "%5d%5d    %6.4f  D%1s  %3d %4s  D%1s  %3d %4s\n",
	     $V[$a10],$V[$a10],$V[$a14]/9,$res1,$num1,$p1c,$res2,$num2,$p2b; 
	  printf "%5d%5d    %6.4f  D%1s  %3d %4s  D%1s  %3d %4s\n",
	     $V[$a10],$V[$a10],$V[$a14]/9,$res1,$num1,$p1c,$res2,$num2,$p2c; 
        }
#
# Now increment for the next peak
#
	$a0+=25; $a10+=25; $a14+=25;
}

