#!/bin/bash
#
#Program develop by César Enríquez-Rodríguez [2017] (cesnmi@gmail.com) for Ammotragus lervia project
#
#GFF2.0 absolute generator
#
#This program looks for archive with suffix exonerate and then it will look for subseq file in the same folder to know what sum values and obtein absolute ones.
#If you don't have a file extension you could remove for ...., do, and done lines and change it by read ones like:
#
#read -p "file:" file   #<= the input is save in 'file' var
#
#Be careful!!! If a exonerate you input have a lot of predictions all will be included. Eliminate those ones you want to discart.
#In future you can include the parmater of exonerate "-n 1" to avoid this problem. Only outputs the best.

for i in */*exonerate; 								#Buscar todos los archivos en qualquier carpeta (*/) que acabe en exonerate (*exonerate)
do 

files=$(echo $i|sed 's/.exoner.*//g'); 							#It eliminates the suffix .exonerate...

subseq="$files.subseq";								#adds the extension for subseq (.subseq by default). If manual use, you could edit this path to find it.
origin=($(sed -E -e'1!d' -e 's/.*subseq\(([0-9]+).*/\1/g' $subseq));

exonerate_file="$files.exonerate";							#adds the extension for exonerate (.subseq by default). If manual use, you could edit this path to find it.

final_path="$files.exonerate.gff.absolute"; #<= where to save (it save in each folder)

############### CORE PROGRAM --- NOT EDIT ##################

echo $i $origin;ultima_linea=$(awk '/START OF GFF DUMP/{flag=1;next}/END OF GFF DUMP/{flag=0}flag' $exonerate_file|awk 'END {print NR}');more $exonerate_file| awk '/START OF GFF DUMP/{flag=1;next}/END OF GFF DUMP/{flag=0}flag' $exonerate_file|awk -v ultima=$ultima_linea -v origin=$origin '{if (FNR<=10){print $0}  else if (FNR==ultima) {$4=$4+origin;$5=$5+origin;for (i=1;i<=NF;i++) if (i==NF){printf $NF"\n"} else if (i<10) {printf $i"\t"} else if (i<16){printf $i" "} else if((i-1) % 5==0) {$i=$i+origin; printf $i" "} else {printf $i" "}} else {$4=$4+origin;$5=$5+origin;for (i=1;i<=NF;i++) if (i<10) {printf $i"\t"} else {printf $i" "}; printf"\n"}}'>$final_path; 

############################################################

done


########Can be eliminated--- Only in memory of first version:
#for i in  */*gff; do files=$(echo $i|sed 's/.exoner.*//g'); origin=($(sed -E -e'1!d' -e 's/.*subseq\(([0-9]+).*/\1/g' $files.subseq));echo $i $origin;ultima_linea=$(awk '/START OF GFF DUMP/{flag=1;next}/END OF GFF DUMP/{flag=0}flag' $files.exonerate|awk 'END {print NR}');more $files.exonerate| awk '/START OF GFF DUMP/{flag=1;next}/END OF GFF DUMP/{flag=0}flag' $files.exonerate|awk -v ultima=$ultima_linea -v origin=$origin '{if (FNR<=10){print $0}  else if (FNR==ultima) {$4=$4+origin;$5=$5+origin;for (i=1;i<=NF;i++) if (i==NF){printf $NF"\n"} else if (i<10) {printf $i"\t"} else if (i<16){printf $i" "} else if((i-1) % 5==0) {$i=$i+origin; printf $i" "} else {printf $i" "}} else {$4=$4+origin;$5=$5+origin;for (i=1;i<=NF;i++) if (i<10) {printf $i"\t"} else {printf $i" "}; printf"\n"}}'>$files.exonerate.gff.absolute; done
########
