Primera part (Programa que llegeix el fitxer de matrius, matriu a matriu, enregistrant-les en la memoria com un hash de vectors) #!/usr/bin/perl -w use strict; my %hash = ("A" =>[0, 0, 0, 0, 0, 0, 0, 0, 0], "C" =>[0, 0, 0, 0, 0, 0, 0, 0, 0], "G" =>[0, 0, 0, 0, 0, 0, 0, 0, 0], "T" =>[0, 0, 0, 0, 0, 0, 0, 0, 0]); open(FITXER,"<$ARGV[0]"); my $FT; my $p = 0; #recorre dins les files del firxer# my $c = 0; #conta les files del fitxer# while () { chomp($_); if ($_ =~m/FA/){ $FT = $_; print "$FT\n"; } if ($_=~m/[\d]+[\t](\d+)[\s]+(\d+)[\s]+(\d+)[\s]+(\d+)/){ $hash{"A"}[$p] = $1; $hash{"C"}[$p] = $2; $hash{"G"}[$p] = $3; $hash{"T"}[$p] = $4; $p = $p + 1; $c = $c +1; } if (($_=~m/(\/\/)\Z/) && $c>1){ my @k = ("A","C","G","T"); my $i = 0; while ($i < scalar(@k)){ print "$k[$i]: "; my $j = 0; while ($j < $p){ print "\t$hash{$k[$i]}[$j]"; $j = $j + 1; } print"\n"; $i = $i + 1; } my %hash = ("A" =>[0, 0, 0, 0, 0, 0, 0, 0, 0], "C" =>[0, 0, 0, 0, 0, 0, 0, 0, 0], "G" =>[0, 0, 0, 0, 0, 0, 0, 0, 0], "T" =>[0, 0, 0, 0, 0, 0, 0, 0, 0]); $p = 0; $c = 0; } } close(FITXER); Segona part (Programa que transforma cada matriu de cadascun dels FTs en una matriu de pesos) #!/usr/bin/perl -w use strict; open (SEQPROMOTORA,"<$ARGV[0]"); my $i = 0; my $sumaA = 0; my $sumaC = 0; my $sumaG = 0; my $sumaT = 0; my $propA = 0; my $propC = 0; my $propG = 0; my $propT = 0; my $seqfinal = ""; my $linia; while (){ chomp($_); $linia = $_; $seqfinal = $seqfinal.$linia; } my @v = split (//,$seqfinal); my $longitutvector = scalar (@v); while ($i < $longitutvector){ if ($v[$i] eq "A" || $v[$i] eq "a"){ $sumaA = $sumaA + 1; } if ($v[$i] eq "C" || $v[$i] eq "c"){ $sumaC = $sumaC + 1; } if ($v[$i] eq "G" || $v[$i] eq "g"){ $sumaG = $sumaG + 1; } if ($v[$i] eq "T" || $v[$i] eq "t"){ $sumaT = $sumaT + 1; } $i = $i + 1; } $propA = ($sumaA / $longitutvector); $propC = ($sumaC / $longitutvector); $propG = ($sumaG / $longitutvector); $propT = ($sumaT / $longitutvector); close (SEQPROMOTORA); my %hash = ("A" =>[0, 0, 0, 0, 0, 0, 0, 0, 0], "C" =>[0, 0, 0, 0, 0, 0, 0, 0, 0], "G" =>[0, 0, 0, 0, 0, 0, 0, 0, 0], "T" =>[0, 0, 0, 0, 0, 0, 0, 0, 0]); open(FITXER,"<$ARGV[1]"); my $FT; my $p = 0; #recorre dins les files de la matriu# my $c = 0; #conta les files del fitxer# while () { chomp($_); if ($_ =~m/FA/){ $FT = $_; print "$FT\n"; } if ($_=~m/[\d]+[\t](\d+)[\s]+(\d+)[\s]+(\d+)[\s]+(\d+)/){ $hash{"A"}[$p] = $1; $hash{"C"}[$p] = $2; $hash{"G"}[$p] = $3; $hash{"T"}[$p] = $4; $p = $p + 1; $c = $c +1; } if (($_=~m/(\/\/)\Z/) && $c>1){ my $sm = $p; #$sm es la maxima posicio de la matriu# $p = 0; while ($p < $sm){ my $total = $hash{"A"}[$p] + $hash{"C"}[$p] + $hash{"G"}[$p]+ $hash{"T"}[$p]; if ($hash{"A"}[$p] == 0){ $hash{"A"}[$p] = -999; } else { $hash{"A"}[$p] = log($hash{"A"}[$p]/$total) - log($propA); } if ($hash{"C"}[$p] == 0){ $hash{"C"}[$p] = -999; } else { $hash{"C"}[$p] = log($hash{"C"}[$p]/$total) - log($propC); } if ($hash{"G"}[$p] == 0){ $hash{"G"}[$p] = -999; } else { $hash{"G"}[$p] = log($hash{"G"}[$p]/$total) - log($propG); } if ($hash{"T"}[$p] == 0){ $hash{"T"}[$p] = -999; } else { $hash{"T"}[$p] = log($hash{"T"}[$p]/$total) - log($propT); } $p = $p + 1; } my @k = ("A","C","G","T"); my $i = 0; while ($i < scalar(@k)){ print "$k[$i]: "; my $j = 0; while ($j < $p){ my $tmp = sprintf("%.3f",$hash{$k[$i]}[$j]);#fem la variable tmp per poder printar els valors nomes amb 3 decimals# if ($tmp == -999.000){ print "\t-999"; } else{ print "\t$tmp"; } $j = $j + 1; } print"\n"; $i = $i + 1; } my %hash = ("A" =>[0, 0, 0, 0, 0, 0, 0, 0, 0], "C" =>[0, 0, 0, 0, 0, 0, 0, 0, 0], "G" =>[0, 0, 0, 0, 0, 0, 0, 0, 0], "T" =>[0, 0, 0, 0, 0, 0, 0, 0, 0]); $p = 0; $c = 0; } } close(FITXER); Tercera part (Programa que calcula la puntuacio maxima que s'obte al llarg de la sequencia, aixi com la posicio del lloc d'unio que la proporciona) #!/usr/bin/perl -w use strict; open (SEQPROMOTORA,"<$ARGV[0]"); my $i = 0; my $sumaA = 0; my $sumaC = 0; my $sumaG = 0; my $sumaT = 0; my $propA = 0; my $propC = 0; my $propG = 0; my $propT = 0; my $seqfinal = ""; my $linia; while (){ chomp($_); $linia = $_; $seqfinal = $seqfinal.$linia; } close (SEQPROMOTORA); my @v = split (//,$seqfinal); my $longitutvector = scalar (@v); while ($i < $longitutvector){ if ($v[$i] eq "A" || $v[$i] eq "a"){ $sumaA = $sumaA + 1; } if ($v[$i] eq "C" || $v[$i] eq "c"){ $sumaC = $sumaC + 1; } if ($v[$i] eq "G" || $v[$i] eq "g"){ $sumaG = $sumaG + 1; } if ($v[$i] eq "T" || $v[$i] eq "t"){ $sumaT = $sumaT + 1; } $i = $i + 1; } $propA = ($sumaA / $longitutvector); $propC = ($sumaC / $longitutvector); $propG = ($sumaG / $longitutvector); $propT = ($sumaT / $longitutvector); my %hash = ("A" =>[0, 0, 0, 0, 0, 0, 0, 0, 0], "C" =>[0, 0, 0, 0, 0, 0, 0, 0, 0], "G" =>[0, 0, 0, 0, 0, 0, 0, 0, 0], "T" =>[0, 0, 0, 0, 0, 0, 0, 0, 0]); open(FITXER,"<$ARGV[1]"); my $FT; my $p = 0; #recorre les columnes de la matriu# my $c = 0; #conta les files del fitxer# my $sm; while () { chomp($_); if ($_ =~m/FA/){ $FT = $_; print "$FT\n"; } if ($_=~m/[\d]+[\t](\d+)[\s]+(\d+)[\s]+(\d+)[\s]+(\d+)/){ $hash{"A"}[$p] = $1; $hash{"C"}[$p] = $2; $hash{"G"}[$p] = $3; $hash{"T"}[$p] = $4; $p = $p + 1; $c = $c +1; } if (($_=~m/(\/\/)\Z/) && $c>1){ $sm = $p; #$sm es la maxima posicio de la matriu# $p = 0; while ($p < $sm){ my $total = $hash{"A"}[$p] + $hash{"C"}[$p] + $hash{"G"}[$p]+ $hash{"T"}[$p]; if ($hash{"A"}[$p] == 0){ $hash{"A"}[$p] = -999; } else { $hash{"A"}[$p] = log($hash{"A"}[$p]/$total) - log($propA); } if ($hash{"C"}[$p] == 0){ $hash{"C"}[$p] = -999; } else { $hash{"C"}[$p] = log($hash{"C"}[$p]/$total) - log($propC); } if ($hash{"G"}[$p] == 0){ $hash{"G"}[$p] = -999; } else { $hash{"G"}[$p] = log($hash{"G"}[$p]/$total) - log($propG); } if ($hash{"T"}[$p] == 0){ $hash{"T"}[$p] = -999; } else { $hash{"T"}[$p] = log($hash{"T"}[$p]/$total) - log($propT); } $p = $p + 1; } my $z = 0; #posició del nucleòtid dins la seqüència# my $n = 0; #posició del nucleòtid dins del motiu# my $score = 0; #variable singular que emmagatzema el valor del pes del nucleòtid en la posició que correspon en el motiu# my $scoremax = -9999; my $posiciomax = 0; while ($z < scalar(@v) - $sm + 1){ $n = 0; $score = 0; while ($n < $sm){ if ($v[$z + $n] eq "A" || $v[$z + $n] eq "a"){ $score = $score + $hash{"A"}[$n]; } if ($v[$z + $n] eq "C" || $v[$z + $n] eq "c"){ $score = $score + $hash{"C"}[$n]; } if ($v[$z + $n] eq "G" || $v[$z + $n] eq "g"){ $score = $score + $hash{"G"}[$n]; } if ($v[$z + $n] eq "T" || $v[$z + $n] eq "t"){ $score = $score + $hash{"T"}[$n]; } $n = $n + 1; } if ($score > $scoremax){ $scoremax = $score; $posiciomax = $z; } $z = $z + 1; } print "$scoremax\n"; print "$posiciomax\n"; %hash = ("A" =>[0, 0, 0, 0, 0, 0, 0, 0, 0], "C" =>[0, 0, 0, 0, 0, 0, 0, 0, 0], "G" =>[0, 0, 0, 0, 0, 0, 0, 0, 0], "T" =>[0, 0, 0, 0, 0, 0, 0, 0, 0]); $p = 0; $c = 0; } } close(FITXER); Quarta part (Programa que calcula el p-value de cada FT respecte la sequencia promotora) #!/usr/bin/perl -w use strict; open (SEQPROMOTORA,"<$ARGV[0]"); my $i = 0; my $sumaA = 0; my $sumaC = 0; my $sumaG = 0; my $sumaT = 0; my $propA = 0; my $propC = 0; my $propG = 0; my $propT = 0; my $seqfinal = ""; my $linia; while (){ chomp($_); $linia = $_; $seqfinal = $seqfinal.$linia; } close (SEQPROMOTORA); my @v = split (//,$seqfinal); my $longitutvector = scalar (@v); while ($i < $longitutvector){ if ($v[$i] eq "A" || $v[$i] eq "a"){ $sumaA = $sumaA + 1; } if ($v[$i] eq "C" || $v[$i] eq "c"){ $sumaC = $sumaC + 1; } if ($v[$i] eq "G" || $v[$i] eq "g"){ $sumaG = $sumaG + 1; } if ($v[$i] eq "T" || $v[$i] eq "t"){ $sumaT = $sumaT + 1; } $i = $i + 1; } $propA = ($sumaA / $longitutvector); $propC = ($sumaC / $longitutvector); $propG = ($sumaG / $longitutvector); $propT = ($sumaT / $longitutvector); my %hash = ("A" =>[0, 0, 0, 0, 0, 0, 0, 0, 0], "C" =>[0, 0, 0, 0, 0, 0, 0, 0, 0], "G" =>[0, 0, 0, 0, 0, 0, 0, 0, 0], "T" =>[0, 0, 0, 0, 0, 0, 0, 0, 0]); open(FITXER,"<$ARGV[1]"); my $FT; my $p = 0; #recorre les columnes de la matriu# my $c = 0; #conta les files del fitxer# my $sm; while () { chomp($_); if ($_ =~m/FA/){ $FT = $_; print "$FT\n"; } if ($_=~m/[\d]+[\t](\d+)[\s]+(\d+)[\s]+(\d+)[\s]+(\d+)/){ $hash{"A"}[$p] = $1; $hash{"C"}[$p] = $2; $hash{"G"}[$p] = $3; $hash{"T"}[$p] = $4; $p = $p + 1; $c = $c +1; } if (($_=~m/(\/\/)\Z/) && $c>1){ $sm = $p; #$sm es la maxima posicio de la matriu# $p = 0; while ($p < $sm){ my $total = $hash{"A"}[$p] + $hash{"C"}[$p] + $hash{"G"}[$p]+ $hash{"T"}[$p]; if ($hash{"A"}[$p] == 0){ $hash{"A"}[$p] = -999; } else { $hash{"A"}[$p] = log($hash{"A"}[$p]/$total) - log($propA); } if ($hash{"C"}[$p] == 0){ $hash{"C"}[$p] = -999; } else { $hash{"C"}[$p] = log($hash{"C"}[$p]/$total) - log($propC); } if ($hash{"G"}[$p] == 0){ $hash{"G"}[$p] = -999; } else { $hash{"G"}[$p] = log($hash{"G"}[$p]/$total) - log($propG); } if ($hash{"T"}[$p] == 0){ $hash{"T"}[$p] = -999; } else { $hash{"T"}[$p] = log($hash{"T"}[$p]/$total) - log($propT); } $p = $p + 1; } my $z = 0; #posició del nucleòtid dins la seqüència# my $n = 0; #posició del nucleòtid dins del motiu# my $score = 0; #variable singular que emmagatzema el valor del pes del nucleòtid en la posició que correspon en el motiu# my $scoremax = -9999; my $posiciomax = 0; while ($z < scalar(@v) - $sm + 1){ $n = 0; $score = 0; while ($n < $sm){ if ($v[$z + $n] eq "A" || $v[$z + $n] eq "a"){ $score = $score + $hash{"A"}[$n]; } if ($v[$z + $n] eq "C" || $v[$z + $n] eq "c"){ $score = $score + $hash{"C"}[$n]; } if ($v[$z + $n] eq "G" || $v[$z + $n] eq "g"){ $score = $score + $hash{"G"}[$n]; } if ($v[$z + $n] eq "T" || $v[$z + $n] eq "t"){ $score = $score + $hash{"T"}[$n]; } $n = $n + 1; } if ($score > $scoremax){ $scoremax = $score; $posiciomax = $z; } $z = $z + 1; } print "L'score màxim és: $scoremax\n"; print "La posició d'aquest score és: $posiciomax\n"; my $bucle = 0; my $pvalue; my $cops = 0; while ($bucle < 100){ my @permutat = @v; my $x = $longitutvector - 1; while ($x >= 0) { my $y = int(rand($x+1)); if ($x != $y) { my $lloc = $permutat[$x]; $permutat[$x] = $permutat[$y]; $permutat[$y] = $lloc; } $x = $x - 1; } my $scorefals = -9999; my $posiciofals = 0; $z = 0; while ($z < scalar(@permutat) - $sm + 1){ $n = 0; $score = 0; while ($n < $sm){ if ($permutat[$z + $n] eq "A" || $permutat[$z + $n] eq "a"){ $score = $score + $hash{"A"}[$n]; } if ($permutat[$z + $n] eq "C" || $permutat[$z + $n] eq "c"){ $score = $score + $hash{"C"}[$n]; } if ($permutat[$z + $n] eq "G" || $permutat[$z + $n] eq "g"){ $score = $score + $hash{"G"}[$n]; } if ($permutat[$z + $n] eq "T" || $permutat[$z + $n] eq "t"){ $score = $score + $hash{"T"}[$n]; } $n = $n + 1; } if ($score > $scorefals){ $scorefals = $score; $posiciofals = $z; } $z = $z + 1; } if ($scorefals >= $scoremax){ $cops = $cops + 1; } $bucle = $bucle + 1; } $pvalue = $cops/100; print"El p-value és: $pvalue\n"; %hash = ("A" =>[0, 0, 0, 0, 0, 0, 0, 0, 0], "C" =>[0, 0, 0, 0, 0, 0, 0, 0, 0], "G" =>[0, 0, 0, 0, 0, 0, 0, 0, 0], "T" =>[0, 0, 0, 0, 0, 0, 0, 0, 0]); $p = 0; $c = 0; } } close(FITXER);