#!/usr/bin/perl -w # # statistics.pl # # statistical analysis of data coming from localitzador.pl # # bmorancho & jplanas - UPF - 2002/02/28 # use strict; my $PROG = "$0"; my (@exon,@intron,@pre,@post); my $starting = $ARGV[0]; my $ending = $ARGV[1]; &parse_argvs(); exit (0); sub parse_argvs() { if (scalar(@ARGV < 1)) { print "USAGE: $PROG \n"; exit (1); return; }; if (scalar(@ARGV == 1)) { &analyze ($ARGV[0]); return; }; if (scalar(@ARGV == 2)) { my $counter = $ARGV[0]; while ($counter <= $ending) { &analyze ($counter); $counter ++; } #while return; }; if (scalar(@ARGV > 2)) { print "USAGE: $PROG \n"; exit (1); return; }; } # parse_argvs sub analyze() { open(GLOBAL, "< global.out ") || do { print STDERR "$PROG: global.out cannot be loaded\n"; print STDERR "glolbal.out is needed for running this program\n"; print STDERR "fatal error loadint global.out\n"; exit(1); }; while () { my @global = split /\t/o, $_; if ($global[0] eq "exon") { @exon = split /\s+/o, $_ }; if ($global[0] eq "intron") { @intron = split /\s+/o, $_ }; if ($global[0] eq "pre") { @pre = split /\s+/o, $_ }; if ($global[0] eq "post") { @post = split /\s+/o, $_ }; }; my @high_dens; my @low_dens; my $chr = shift @_; my $open_file_name = "Chr".$chr."_raw.out"; my $stat_file_name = "Chr".$chr."_stat.html"; open(CHROMOSOME, "< $open_file_name ") || do { print STDERR "$PROG: $open_file_name cannot be loaded\n"; print STDERR "fatal error loading $open_file_name\n"; exit(1); }; open(STATISTICS, "> $stat_file_name"); print STATISTICS "\n\n\nChromosome $chr\n\n

\n"; print STATISTICS "Chromosome $chr Data
\n"; print STATISTICS "Chromosome $chr Summary
\n"; print STATISTICS "Global Summary
\n

\n"; print STATISTICS "\n"; print STATISTICS "\n"; print STATISTICS "\n\n"; my $exon_counter = 1; my $intron_counter = 1; my $pre_counter = 1; my $post_counter = 1; my $general_counter = 5; my $flag; my $twice; while () { my $SNPs_file_name; if ($_ eq "\n") { $flag = 1; }; if ($twice) { $flag = 1; }; if (!$flag) { my @line = split /\t/o, $_; if ((($.+ 3) % 4 == 0 ) && ($.!= 4)) { print STATISTICS "\n\n"; }; if ($line[1] eq "exon") { $SNPs_file_name = $chr."exon".$exon_counter."_snp.html"; print STATISTICS "\n\n"; } else { print STATISTICS "NONE\n\n" }; open(SNPS, "> $SNPs_file_name"); print SNPS "\n\n\n SNPs in gene $line[0] \n\n\n"; if ($line[6] !~m/^s+/) { my @f; @f = split / /o, $line[6]; my $f_counter = 0; while ($f_counter < scalar(@f)) { print SNPS "

$f[$f_counter]

"; $f_counter ++; }; # while print SNPS "

\n"; print SNPS "\n\n"; } else { print SNPS "
NONE
\n"; print SNPS "

\n"; print SNPS "\n\n"; }; $exon_counter ++; close(SNPS); next; }; # if exon if ($line[1]eq "intron") { $SNPs_file_name = $chr."intron".$intron_counter."_snp.html"; if ($line[2] eq "NO intron\\t0") { print STATISTICS "\n\n"; next; }; print STATISTICS "\n\n"; } else { print STATISTICS "NONE\n\n" }; open(SNPS, "> $SNPs_file_name"); print SNPS "\n\n\n SNPs in gene $line[0] \n\n\n"; if ($line[6] !~m/^s+/) { my @f; @f = split / /o, $line[6]; my $f_counter = 0; while ($f_counter < scalar(@f)) { print SNPS "

$f[$f_counter]

"; $f_counter ++; }; # while print SNPS "

\n"; print SNPS "\n\n"; } else { print SNPS "
NONE
\n"; print SNPS "

\n"; print SNPS "\n\n"; }; $intron_counter ++; close(SNPS); next; }; # if intron if ($line[1] eq "pre") { $SNPs_file_name = $chr."pre".$pre_counter."_snp.html"; if ($line[2] eq "NO pre\\t0") { print STATISTICS "\n\n"; next; }; print STATISTICS "\n\n"; } else { print STATISTICS "NONE\n\n" }; open(SNPS, "> $SNPs_file_name"); print SNPS "\n\n\n SNPs in gene $line[0] \n\n\n"; if ($line[6] !~m/^s+/) { my @f; @f = split / /o, $line[6]; my $f_counter = 0; while ($f_counter < scalar(@f)) { print SNPS "

$f[$f_counter]

"; $f_counter ++; }; # while print SNPS "

\n"; print SNPS "\n\n"; } else { print SNPS "
NONE
\n"; print SNPS "

\n"; print SNPS "\n\n"; }; $pre_counter ++; close(SNPS); next; }; # if pre if ($line[1] eq "post") { $SNPs_file_name = $chr."post".$post_counter."_snp.html"; if ($line[2] eq "NO post\\t0") { print STATISTICS "\n\n"; next; }; print STATISTICS "\n\n"; } else { print STATISTICS "NONE\n\n" }; open(SNPS, "> $SNPs_file_name"); print SNPS "\n\n\nSNPs in gene $line[0]\n\n\n"; if ($line[6] !~m/^s+/) { my @f; @f = split / /o, $line[6]; my $f_counter = 0; while ($f_counter < scalar(@f)) { print SNPS "

$f[$f_counter]

"; $f_counter ++; }; # while print SNPS "

\n"; print SNPS "\n\n"; } else { print SNPS "
NONE
\n"; print SNPS "

\n"; print SNPS "\n\n"; }; $post_counter ++; close(SNPS); next; }; # if post } # if !flag else { print STATISTICS "\n"; my @line = split /\t/o, $_; if (!$twice) { print STATISTICS "
GEN IDENTIFICATIONREGION IN GENELENGTH (bp)SNPs NUM SNPs DISTANCESNP/KbSNP/Kb(over global) SNPs FOUND
",$line[0],"
$line[0]$line[1]$line[2]$line[3] $line[4]$line[5]" ; if ($line[5] ne "NONE" && $exon[3] ne "NONE") { if ($line[5]/$exon[3] > 5) { push @high_dens, $line[0]; }; if ($line[5]/$exon[3] < 0.2) { push @low_dens, $line[0]; }; print STATISTICS ($line[5]/$exon[3])."SNPs ID
SNPs ID
$line[0]$line[1]SINGLE EXON GENE
$line[0]$line[1]$line[2]$line[3] $line[4]$line[5]"; if ($line[5] ne "NONE" && $intron[3] ne "NONE") { print STATISTICS ($line[5]/$intron[3])."SNPs ID
SNPs ID
$line[0]$line[1]SINGLE EXON GENE
$line[0]$line[1]$line[2]$line[3] $line[4]$line[5]"; if ($line[5] ne "NONE" && $pre[3] ne "NONE") { print STATISTICS ($line[5]/$pre[3])."SNPs ID
SNPs ID
$line[0]$line[1]SINGLE EXON GENE
$line[0]$line[1]$line[2]$line[3] $line[4]$line[5]"; if ($line[5] ne "NONE" && $post[3] ne "NONE") { print STATISTICS ($line[5]/$post[3])."SNPs ID
SNPs ID
\n

"; print STATISTICS "\n\n"; print STATISTICS "\n"; $twice = 1; } else { print STATISTICS "\n\n"; }; next; }; # if flag/else }; # while CHROMOSOME close(CHROMOSOME); print STATISTICS "
REGION IN CHRSNPs NUMLENGTH (bp)SNPs DISTANCESNP/Kb
$line[0]$line[1]$line[2]$line[3] $line[4]
\n"; print STATISTICS "\n"; print STATISTICS "

\n"; print STATISTICS "\n"; print STATISTICS "\n"; print STATISTICS "\n"; print STATISTICS "\n\n"; print STATISTICS "\n\n"; print STATISTICS "\n\n"; print STATISTICS "\n\n"; print STATISTICS "
REGION SNPs NUMLENGTH (bp)SNPs DISTANCESNP/Kb
$intron[0]$intron[1]$intron[2]$intron[3]$intron[4]
$exon[0]$exon[1]$exon[2]$exon[3]$exon[4]
$pre[0]$pre[1]$pre[2]$pre[3]$pre[4]
$post[0]$post[1]$post[2]$post[3]$post[4]
\n"; print STATISTICS "

\n"; print STATISTICS "\n\n"; close(STATISTICS); close(GLOBAL); # # SUMMARY FILE # my $sum_file_name = "Chr".$chr."_sum.html"; open (SUMMARY, "> $sum_file_name"); print SUMMARY "\n\n\nChromosome $chr Summary\n\n

\n"; print SUMMARY "SNPs High Density Genes
\n"; print SUMMARY "SNPs Low Density Genes
\n"; print SUMMARY "Hotpoints: SNPs in splice sites
\n

\n"; print SUMMARY ""; print SUMMARY "\n"; print SUMMARY "\n"; if (scalar(@high_dens) == 0) { print SUMMARY "\n\n"; }; my $high_counter = 0; while ($high_counter < scalar(@high_dens)) { if (scalar(@high_dens) == 0) { print SUMMARY "\n\n"; }; print SUMMARY "\n\n"; $high_counter ++; }; # while high print SUMMARY "
SNPs HIGH DENSITY GENES
NONE
NONE
$high_dens[$high_counter]
\n

\n"; print SUMMARY ""; print SUMMARY "\n"; print SUMMARY "\n"; if (scalar(@low_dens) == 0) { print SUMMARY "\n\n"; }; my $low_counter = 0; while ($low_counter < scalar(@low_dens)) { print SUMMARY "\n\n"; $low_counter ++; }; # while high print SUMMARY "
SNPs LOW DENSITY GENES
NONE
$low_dens[$low_counter]
\n

\n"; # # RECOVERING HOTPOINTS: # my $hot_file_name = "Chr".$chr."_hot.out"; my $hot_counter; open(HOTPOINTS, "< $hot_file_name ") || do { print STDERR "$PROG: $hot_file_name cannot be loaded\n"; print STDERR "fatal error loading $hot_file_name\n"; exit(1); }; print SUMMARY ""; print SUMMARY "\n"; print SUMMARY "\n"; while () { $hot_counter = $.; next if $. < 3; my @f = split /\t/o, $_; print SUMMARY "\n"; } if ($hot_counter == 2) { print SUMMARY "\n\n"; } print SUMMARY "
GENE IDENTIFICATIONEXON NUMLOCATION SNP ID
$f[0]$f[1]$f[2]$f[3]
NO HOT POINTS
\n"; print SUMMARY "

\n"; print SUMMARY "\n\n"; close(HOTPOINTS); close(SUMMARY); } # analyze # # END #