#!/usr/bin/perl
use strict;
################################################################
################################################################
## ##############################
## ################
## ##
## ##
## PROGRAMA EN PERL PER A L'ANALISI DE LA SEQUENCIA PROMOTORA ##
## XAVIER VIŅALS I XAVIER SERRA ##
## ##
########### ##
################################### ##
################################################################
################################################################
# CALCUL DE LA LLARGADA I LA FREQUENCIA DE CADA NUCLEOTID EN LA SEQUENCIA PROMTOR) #
#######################################################################################
my $x;
my @vG;
my @vT;
my @vA;
my @vC;
my $l;
my $pctG;
my $pctA;
my $pctT;
my $pctC;
open (FH2, "<$ARGV[1]");
while (){#Aixi treiem la primera fila en cas que estigui en FASTA
chomp ($_);
if (m/\>/){
}
else {
$x = $x.$_;
}
}
$l = length($x);
print "llargada sequencia: $l\n";
@vG = ($x =~ m/[Gg]/g); #Expresions regulars per comptar el nombre de Gg o altres nucleotids
@vA = ($x =~ m/[Aa]/g);
@vT = ($x =~ m/[Tt]/g);
@vC = ($x =~ m/[Cc]/g);
$pctG = (scalar(@vG) / $l);
$pctT = (scalar(@vT) / $l);
$pctC = (scalar(@vC) / $l);
$pctA = (scalar(@vA) / $l);
print "frequencia G: $pctG\n";
print "frequencia T: $pctT\n";
print "frequencia C: $pctC\n";
print "frequencia A: $pctA\n\n";
close (FH2);
# INICI DE L'EXERCICI 1 I ACOPLAMENT AMB LA PART 2 #
#######################################################################################
my %matriu = ("A" => [0, 0, 0, 0, 0, 0, 0, 0, 0],
"C" => [0, 0, 0, 0, 0, 0, 0, 0, 0],
"G" => [0, 0, 0, 0, 0, 0, 0, 0, 0],
"T" => [0, 0, 0, 0, 0, 0, 0, 0, 0]);
my $posicio = 0;
my $i = 0;
my $ipos = 0;
open (FITXER,"<$ARGV[0]");
while () {
if (m/(FA)/) {
my $FAline = $_;
print $FAline;
print "Nucl.\t"; #Impressio d'una linia amb les diferents posicions
while ($ipos <= 9){
print "Pos.",$ipos,"\t";
$ipos = $ipos + 1;
}
$ipos = 0;
print "\n";
}
if (m/\d+\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/){
$matriu{"A"}[$i] = $1;
$matriu{"C"}[$i] = $2;
$matriu{"G"}[$i] = $3;
$matriu{"T"}[$i] = $4;
if ($1 == 0) {
$matriu{"A"}[$i] = -999
}
else{
$matriu{"A"}[$i] = sprintf("%.4f", (log ($matriu{"A"}[$i] / ($1 + $2 + $3 + $4))/(log 10) - (log ($pctA) / log 10)));
}
if ($2 == 0) {
$matriu{"C"}[$i] = -999
}
else{
$matriu{"C"}[$i] = sprintf("%.4f", (log ($matriu{"C"}[$i] / ($1 + $2 + $3 + $4))/(log (10)) - (log ($pctC) / log (10))));
}
if ($3 == 0) {
$matriu{"G"}[$i] = -999
}
else{
$matriu{"G"}[$i] = sprintf("%.4f", (log ($matriu{"G"}[$i] / ($1 + $2 + $3 + $4))/(log 10) - (log ($pctG) / log 10)));
}
if ($4 == 0) {
$matriu{"T"}[$i] = -999
}
else{
$matriu{"T"}[$i] = sprintf("%.4f", (log ($matriu{"T"}[$i] / ($1 + $2 + $3 + $4))/(log 10) - (log ($pctT) / log 10)));
}
$i = $i + 1;
$posicio = $posicio +1;
}
# IMPRESSIO DE LA MATRIU #
#######################################################################################
if ((m/(\/\/)\Z/) && ($i > 1)){
my @k = keys (%matriu);
my $j = 0;
while ($j < scalar(@k)) {
my $l = 0;
print $k[$j];
while ($l < $posicio) {
print "\t$matriu{$k[$j]}[$l]";
$l = $l + 1;
}
print "\n";
$j = $j + 1;
}
# CALCUL DE L'SCORE DE CADA FACTOR #
#######################################################################################
my @y;
my $yprima;
open (FH3, "<$ARGV[1]"); #tornem a posar la segona sequencia en un filehandle
while (){
chomp ($_);
if (m/\>/){
}
else {
$yprima = $yprima.$_;
}
}
@y = split (//,$yprima);
my $compt1 = 0;
my $compt2 = 0;
my $puntuacio = 0;
my $score;
my $compt3 = 0;
my $posicioseq;
while ($compt1 < (scalar(@y) - ($i) + 1)){
$compt3 = 0;
$compt2 = $compt1;
while ($compt3 < $i) {
if ($y[$compt2] eq 'A' || $y[$compt2] eq 'a'){
$puntuacio = $puntuacio + $matriu{"A"}[$compt3];
}
if ($y[$compt2] eq 'C' || $y[$compt2] eq 'c'){
$puntuacio = $puntuacio + $matriu{"C"}[$compt3];
}
if ($y[$compt2] eq 'G' || $y[$compt2] eq 'g'){
$puntuacio = $puntuacio + $matriu{"G"}[$compt3];
}
if ($y[$compt2] eq 'T' || $y[$compt2] eq 't'){
$puntuacio = $puntuacio + $matriu{"T"}[$compt3];
}
$compt3 = $compt3 + 1;
$compt2 = $compt2 + 1;
}
if ($compt1 == 0){ #Aquesta primera condicio iterativa serveix per posar el valor de la primera puntuacio a la variable score
$score = $puntuacio;
}
$compt1 = $compt1 + 1;
if ($puntuacio > $score){
$score = $puntuacio;
$posicioseq = $compt2 - $i + 1;
}
$puntuacio = 0;
}
print "score: $score\n";
print "la/les posicions de la sequencia on el FT s'uneix amb el maxim score es/son: $posicioseq\n";
my $comptprova; #aquesta part serveix per imprimir la sequencia a on s'uneix el factor de transcripcio
my $printseq;
$printseq = $posicioseq;
print "el factor de transcripcio s'uneix a la seguent sequencia: ";
while ($comptprova < $i){
print "$y[$printseq]";
$printseq = $printseq +1;
$comptprova = $comptprova + 1;
}
print "\n";
close (FH3);
# CALCUL DE L'SCORE PER UNA SEQUENCIA GENERADA A L'ATZAR #
#######################################################################################
my @z = 0;
my $zprima = 0;
my $ex4compt1 = 0;
my $ex4compt2 = 0;
my $ex4puntuacio = 0;
my $ex4score;
my $ex4compt3 = 0;
my $ex4posicioseq;
my $ex4count = 0;
my $ex4c = 0;
open (FH4, "<$ARGV[1]"); #Tornem a posar la sequencia en un filehandle, encara que els hauriem pogut reutilitzar...
while ($ex4count < 100){
$ex4compt1 = 0;
while (){
chomp ($_);
if (m/\>/){
}
else {
$zprima = $zprima.$_;
}
}
@z = split (//,$zprima);
my $nr = scalar(@z);
my $ir = $nr - 1;
while ($ir >= 0) {
my $jr = int(rand($ir+1));
if ($ir != $jr) {
my $tmp = $z[$ir];
$z[$ir] = $z[$jr];
$z[$jr] = $tmp;
}
$ir = $ir - 1;
}
while ($ex4compt1 < (scalar(@z) - ($i) + 1)){
$ex4compt3 = 0;
$ex4compt2 = $ex4compt1;
while ($ex4compt3 < $i) {
if ($z[$ex4compt2] eq 'A' || $z[$ex4compt2] eq 'a'){
$ex4puntuacio = $ex4puntuacio + $matriu{"A"}[$ex4compt3];
}
if ($z[$ex4compt2] eq 'C' || $z[$ex4compt2] eq 'c'){
$ex4puntuacio = $ex4puntuacio + $matriu{"C"}[$ex4compt3];
}
if ($z[$ex4compt2] eq 'G' || $z[$ex4compt2] eq 'g'){
$ex4puntuacio = $ex4puntuacio + $matriu{"G"}[$ex4compt3];
}
if ($z[$ex4compt2] eq 'T' || $z[$ex4compt2] eq 't'){
$ex4puntuacio = $ex4puntuacio + $matriu{"T"}[$ex4compt3];
}
$ex4compt3 = $ex4compt3 + 1;
$ex4compt2 = $ex4compt2 + 1;
}
$ex4compt1 = $ex4compt1 + 1;
if ($ex4puntuacio >= $score){
$ex4c = $ex4c + 1;
$ex4compt1 = scalar(@z) - $i + 1;
}
$ex4puntuacio = 0;
}
$ex4count = $ex4count + 1;
}
my $pvalue;
$pvalue = $ex4c/100;
print "pvalue: $pvalue\n\n";
close (FH4);
%matriu = ( "A" => [0, 0, 0, 0, 0, 0, 0, 0, 0], #Tornem a inicialitzar la matriu
"C" => [0, 0, 0, 0, 0, 0, 0, 0, 0],
"G" => [0, 0, 0, 0, 0, 0, 0, 0, 0],
"T" => [0, 0, 0, 0, 0, 0, 0, 0, 0]);
$i = 0; #i tornem a comen$ar!!!
$posicio = 0;
}
}
close (FITXER);