#!/usr/bin/perl

use strict;

                                        ################################################################
                                        ################################################################
                                        ##                                ##############################
                                        ##                                              ################
                                        ##                                                            ##
                                        ##                                                            ##
                                        ## PROGRAMA EN PERL PER A L'ANALISI DE LA SEQUENCIA PROMOTORA ##
                                        ##                          XAVIER VIŅALS I XAVIER SERRA      ##
                                        ##                                                            ##
                                        ###########                                                   ##
                                        ###################################                           ##
                                        ################################################################
                                        ################################################################
    
  
                           #   CALCUL DE LA LLARGADA I LA FREQUENCIA DE CADA NUCLEOTID EN LA SEQUENCIA PROMTOR)  #
			   #######################################################################################

my $x;
my @vG;
my @vT;
my @vA;
my @vC;
my $l;
my $pctG;
my $pctA;
my $pctT;
my $pctC;

open (FH2, "<$ARGV[1]");

while (){#Aixi treiem la primera fila en cas que estigui en FASTA
  chomp ($_);
  if (m/\>/){
  }

  else {
    $x = $x.$_;
  }
}

$l = length($x); 
print "llargada sequencia: $l\n";

@vG = ($x =~ m/[Gg]/g); #Expresions regulars per comptar el nombre de Gg o altres nucleotids
@vA = ($x =~ m/[Aa]/g);
@vT = ($x =~ m/[Tt]/g);
@vC = ($x =~ m/[Cc]/g);

$pctG = (scalar(@vG) / $l);
$pctT = (scalar(@vT) / $l);
$pctC = (scalar(@vC) / $l);
$pctA = (scalar(@vA) / $l);

print "frequencia G: $pctG\n";
print "frequencia T: $pctT\n";
print "frequencia C: $pctC\n";
print "frequencia A: $pctA\n\n";

close (FH2);

                                             # INICI DE L'EXERCICI 1 I ACOPLAMENT AMB LA PART 2 #
			   #######################################################################################


my %matriu = ("A" => [0, 0, 0, 0, 0, 0, 0, 0, 0],
               "C" => [0, 0, 0, 0, 0, 0, 0, 0, 0],
               "G" => [0, 0, 0, 0, 0, 0, 0, 0, 0],
               "T" => [0, 0, 0, 0, 0, 0, 0, 0, 0]);


my $posicio = 0;
my $i = 0;
my $ipos = 0;

open (FITXER,"<$ARGV[0]");

while () {

  if (m/(FA)/) {
    my $FAline = $_;
    print $FAline;
    print "Nucl.\t"; #Impressio d'una linia amb les diferents posicions
    while ($ipos <= 9){
      print "Pos.",$ipos,"\t";
      $ipos = $ipos + 1;
    }
    $ipos = 0;
    print "\n";
}  

  if (m/\d+\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/){  

   
      $matriu{"A"}[$i] = $1;
      $matriu{"C"}[$i] = $2;
      $matriu{"G"}[$i] = $3;
      $matriu{"T"}[$i] = $4;

      if ($1 == 0) {
	$matriu{"A"}[$i] = -999
      }
      else{
	$matriu{"A"}[$i] = sprintf("%.4f", (log ($matriu{"A"}[$i] / ($1 + $2 + $3 + $4))/(log 10) - (log ($pctA) / log 10)));
      }

      if ($2 == 0) {
	$matriu{"C"}[$i] = -999
      }	
      else{
	$matriu{"C"}[$i] = sprintf("%.4f", (log ($matriu{"C"}[$i] / ($1 + $2 + $3 + $4))/(log (10)) - (log ($pctC) / log (10))));
      }

      if ($3 == 0) {
	$matriu{"G"}[$i] = -999
      }      
      else{
	$matriu{"G"}[$i] = sprintf("%.4f", (log ($matriu{"G"}[$i] / ($1 + $2 + $3 + $4))/(log 10) - (log ($pctG) / log 10)));
      }

      if ($4 == 0) {
	$matriu{"T"}[$i] = -999
      }      
      else{
	$matriu{"T"}[$i] = sprintf("%.4f", (log ($matriu{"T"}[$i] / ($1 + $2 + $3 + $4))/(log 10) - (log ($pctT) / log 10)));
      }
 
	  
    $i = $i + 1;
    $posicio = $posicio +1;

  }

  
                                                         # IMPRESSIO DE LA MATRIU #
			   #######################################################################################

  if ((m/(\/\/)\Z/) && ($i > 1)){

	my @k = keys (%matriu);
	my $j = 0;

	while ($j < scalar(@k)) {
	  my $l = 0;
	  
	  print $k[$j];

	  while ($l < $posicio) {
	   print "\t$matriu{$k[$j]}[$l]";
	    $l = $l + 1;
	  }
	  print "\n";
	  $j = $j + 1;
	}

	                                                # CALCUL DE L'SCORE DE CADA FACTOR #
			   #######################################################################################

	my @y;
	my $yprima;

	open (FH3, "<$ARGV[1]"); #tornem a posar la segona sequencia en un filehandle

	while (){
	  chomp ($_);
	  if (m/\>/){
	  }

	  else {
	    $yprima = $yprima.$_;
	  }
	}

	@y = split (//,$yprima);

	my $compt1 = 0;
	my $compt2 = 0;
	my $puntuacio = 0;
	my $score;
	my $compt3 = 0;
	my $posicioseq;

	while ($compt1 < (scalar(@y) - ($i) + 1)){

	  $compt3 = 0;
	  $compt2 = $compt1;

	  while ($compt3 < $i) {

	    if ($y[$compt2] eq 'A' || $y[$compt2] eq 'a'){
	      $puntuacio = $puntuacio + $matriu{"A"}[$compt3];
	    }

	    if ($y[$compt2] eq 'C' || $y[$compt2] eq 'c'){
	      $puntuacio = $puntuacio + $matriu{"C"}[$compt3];
	    }

	    if ($y[$compt2] eq 'G' || $y[$compt2] eq 'g'){
	      $puntuacio = $puntuacio + $matriu{"G"}[$compt3];
	    }

	    if ($y[$compt2] eq 'T' || $y[$compt2] eq 't'){
	      $puntuacio = $puntuacio + $matriu{"T"}[$compt3];
	    }

	    $compt3 = $compt3 + 1;
	    $compt2 = $compt2 + 1; 
      
	  }
	  if ($compt1 == 0){ #Aquesta primera condicio iterativa serveix per posar el valor de la primera puntuacio a la variable score
	    $score = $puntuacio;
	  }

	  $compt1 = $compt1 + 1;

	  if ($puntuacio > $score){ 
	    $score = $puntuacio; 
	    $posicioseq = $compt2 - $i + 1;
	    }
	    $puntuacio = 0;
	}

	print "score: $score\n";
	print "la/les posicions de la sequencia on el FT s'uneix amb el maxim score es/son: $posicioseq\n";
	
	my $comptprova; #aquesta part serveix per imprimir la sequencia a on s'uneix el factor de transcripcio
	my $printseq;
	$printseq = $posicioseq;
	print "el factor de transcripcio s'uneix a la seguent sequencia: ";
	while ($comptprova < $i){
	  print "$y[$printseq]";
	  $printseq = $printseq +1;
	  $comptprova = $comptprova + 1;
	}
	print "\n";
	close (FH3);


                                            # CALCUL DE L'SCORE PER UNA SEQUENCIA GENERADA A L'ATZAR #
				   #######################################################################################

	my @z = 0;
	my $zprima = 0;

	my $ex4compt1 = 0;
	my $ex4compt2 = 0;
	my $ex4puntuacio = 0;
	my $ex4score;
	my $ex4compt3 = 0;
	my $ex4posicioseq;
	my $ex4count = 0;
	my $ex4c = 0;

	open (FH4, "<$ARGV[1]"); #Tornem a posar la sequencia en un filehandle, encara que els hauriem pogut reutilitzar...
	while ($ex4count < 100){ 
	    $ex4compt1 = 0;

	    while (){
		chomp ($_);
		if (m/\>/){
		}

		else {
		    $zprima = $zprima.$_;
		}
	    }

	    @z = split (//,$zprima);
	    my $nr = scalar(@z);
	    my $ir = $nr - 1;
	    while ($ir >= 0) {
		my $jr = int(rand($ir+1));
		if ($ir != $jr) {
		    my $tmp = $z[$ir];

		    $z[$ir] = $z[$jr];
		    $z[$jr] = $tmp;
		}

		$ir = $ir - 1;
	    }

	    while ($ex4compt1 < (scalar(@z) - ($i) + 1)){
	      
		$ex4compt3 = 0;
		$ex4compt2 = $ex4compt1;

		while ($ex4compt3 < $i) {
	    
		    if ($z[$ex4compt2] eq 'A' || $z[$ex4compt2] eq 'a'){
			$ex4puntuacio = $ex4puntuacio + $matriu{"A"}[$ex4compt3];
	 
		    }
		    if ($z[$ex4compt2] eq 'C' || $z[$ex4compt2] eq 'c'){
			$ex4puntuacio = $ex4puntuacio + $matriu{"C"}[$ex4compt3];
		    }
		    if ($z[$ex4compt2] eq 'G' || $z[$ex4compt2] eq 'g'){
			$ex4puntuacio = $ex4puntuacio + $matriu{"G"}[$ex4compt3];
		    }
		    if ($z[$ex4compt2] eq 'T' || $z[$ex4compt2] eq 't'){
			$ex4puntuacio = $ex4puntuacio + $matriu{"T"}[$ex4compt3];
		    }
		    $ex4compt3 = $ex4compt3 + 1;
		    $ex4compt2 = $ex4compt2 + 1;  
		    
		}
		
		$ex4compt1 = $ex4compt1 + 1;
		if ($ex4puntuacio >= $score){      

		    $ex4c = $ex4c + 1;
		    $ex4compt1 = scalar(@z) - $i + 1;
		}
		
		$ex4puntuacio = 0;
		
	    }

	    $ex4count = $ex4count + 1;
	    
	}
	
	my $pvalue;
	$pvalue = $ex4c/100;
	print "pvalue: $pvalue\n\n";

	close (FH4);


	%matriu = ( "A" => [0, 0, 0, 0, 0, 0, 0, 0, 0], #Tornem a inicialitzar la matriu
		    "C" => [0, 0, 0, 0, 0, 0, 0, 0, 0],
		    "G" => [0, 0, 0, 0, 0, 0, 0, 0, 0],
		    "T" => [0, 0, 0, 0, 0, 0, 0, 0, 0]);

	$i = 0; #i tornem a comen$ar!!!
	$posicio = 0;
      }

}


close (FITXER);