BioPHP - Sequence alignment
Original code submitted by josebaCode bellow is covered by GNU GPL v2 license.
Description
Last change: 2011/04/27 07:01 | Edit description | Recent Changes | Original descriptionAlignment of two DNA, RNA or protein sequences. The alignment method is the Smith-Waterman alignment method is used. More info in the script.
Code
Last change: 2011/04/27 07:01 | Edit Code | Recent Changes | Download | Original code and<!--
Developped by: Jose Manuel González Hernández
Universidad de La Laguna, Tenerife, Spain
Smith-Waterman alignment is used. More info in the script.
-->
<html><head><title>Alignment of two DNA or RNA sequences</title></head>
<body bgcolor=FFFFFF>
<?php
// WHEN INFO IS REQUESTED ($_GET["action"]=="info", PRINT INFO AND FINISH
if ($_GET["action"]=="info"){
print_info(); // this function is located at the end of the script
die(); // after the info, die
}
// IF NOTHING HAS BEEN POSTED, PRINT THE FORM AND FINISH
if (!$_POST){
print_form(); // this function is located at the end of the script
die(); // after the form, die
}
// GET SEQUENCES AND PREPARE THEM
$seqa=strtoupper($_POST["sequence"]);
$seqb=strtoupper($_POST["sequence2"]);
$seqa=preg_replace("/\W|\d/", "", $seqa); // remove useless characters
$seqb=preg_replace("/\W|\d/", "", $seqb); // remove useless characters
$seqa=preg_replace("/U/", "T", $seqa); // from RNA to DNA
$seqb=preg_replace("/U/", "T", $seqb); // from RNA to DNA
$seqa=preg_replace("/X/", "N", $seqa); // substitute X -> N
$seqb=preg_replace("/X/", "N", $seqb); // substitute X -> N
// LIMIT SEQUENCE LENGTH TO LIMIT MEMORY USAGE
// THIS SCRIPT CREATES A BIG ARRAY THAT REQUIRES A HUGE AMOUNT OF MEMORY
// DO NOT USED SEQUENCES LONGER THAN 700 BASES EACH (1400 BASES FOR BOTH SEQUENCES)
// IN THIS DEMO, THE LIMIT HAS BEEN SET UP IN 300 BASES
$limit=300;
if ((strlen($seqa)+strlen($seqb))>$limit){die ("Error:<br>The maximum length of code accepted for both sequences is $limit nucleotides");}
// CHECK WHETHER THEY ARE DNA OR PROTEIN, AND ALIGN SEQUENCES
if ((substr_count($seqa,"A")+substr_count($seqa,"C")+substr_count($seqa,"G")+substr_count($seqa,"T"))>(strlen($seqa)/2)){
// if A+C+G+T is at least half of the sequence, it is a DNA
$alignment=align_DNA($seqa,$seqb);
}else{
// else is protein
$alignment=align_proteins($seqa,$seqb);
}
// EXTRACT DATA FROM ALIGNMENT
$align_seqa=$alignment["seqa"];
$align_seqb=$alignment["seqb"];
// COMPARE ALIGNMENTS
$compare=compare_alignment($align_seqa,$align_seqb);
// PRINT RESULTS
print "<div align=right><a href=\"".$_SERVER["PHP_SELF"]."\"><b>New alignment</b></a></div>\n";
print "<center><H2>Alignment of two DNA sequences</H2>\n";
print "<table><tr><td><HR><pre>\n";
$i=0;
while($i<strlen($align_seqa)){
$ii=$i+100;
if ($ii>strlen($align_seqa)){$ii=strlen($align_seqa);}
print substr($align_seqa,$i,100)." $ii\n";
print substr($compare,$i,100)."\n";
print substr($align_seqb,$i,100)." $ii\n\n";
$i+=100;
}
print "</pre><hr>\n";
print "</td></tr><tr><td align=center>";
print "<table><tr><td>";
print $_POST["id1"]."<br><input type=text value=\"$align_seqa\" size=100><p>";
print $_POST["id2"]."<br><input type=text value=\"$align_seqb\" size=100><p>";
print "</td></tr></table><hr>";
print "</td></tr></table></center>\n";
print "<p></body></html>";
// END PRINT RESULTS
// ########################################################################################
// ############################### Functions ####################################
// ########################################################################################
function align_DNA($seqa,$seqb){
$match = 2;
$mismatch = -1;
$gap = -4;
$a = preg_split('//', $seqa, -1, PREG_SPLIT_NO_EMPTY);
$b = preg_split('//', $seqb, -1, PREG_SPLIT_NO_EMPTY);
$maxa=sizeof($a);
$maxb=sizeof($b);
$lenn=max ($maxa,$maxb);
// Creación de la matriz
// He reducido el código para hacerlo mas simple y rapido, pero tan solo ahorra un 20% del tiempo
// Con matrices muy grandes, PHP no sabe trabajar muy bien (es poco eficaz).
$mx=0;
for ($i=0;$i<$maxa;$i++){
for ($j=0;$j<$maxb;$j++){
if($b[$j]==$a[$i]){
$x=$matriz[$j-1][$i-1]+$match;
}else{
$x=max (0,$matriz[$j-1][$i-1]-1,$matriz[$j][$i-1]-4,$matriz[$j-1][$i]-4);
}
$matriz[$j][$i]=$x;
if ($mx<$x){$mx=$x; $mj=$j; $mi=$i;}
}
}
// Matriz terminada
$j=$mj;
$i=$mi;
$matrizz[$j][$i]=1;
while ($i>0 or $j>0):
$aa=$matriz[$j-1][$i-1];
$ab=$matriz[$j][$i-1];
$ac=$matriz[$j-1][$i];
if($aa<>'//' or $aa==0){
if($aa>=$ab and $aa>=$ac){
$j=$j-1;
$i=$i-1;
}
if($ab>$aa){$i=$i-1;}
if($ac>$aa){$j=$j-1;}
}else{
if($ab<>'//'){$i=$i-1;}
if($ac<>'//'){$j=$j-1;}
}
if($j<0){$j=0;}
if($i<0){$i=0;}
$matrizz[$j][$i]=1;
endwhile;
$j=$mj;
$i=$mi;
while ($i<strlen($seqa)-1 or $j<strlen($seqb)-1):
$aa=$matriz[$j+1][$i+1];
$ab=$matriz[$j][$i+1];
$ac=$matriz[$j+1][$i];
if($aa<>'//'){
if($aa>=$ab and $aa>=$ac){
$j=$j+1;
$i=$i+1;
}
if($ab>$aa){$i=$i+1;}
if($ac>$aa){$j=$j+1;}
}else{
if($ab<>'//'){$i=$i+1;}
if($ac<>'//'){$j=$j+1;}
}
if($j>$lenn){$j=$lenn;}
if($i>$lenn){$i=$lenn;}
$matrizz[$j][$i]=1;
endwhile;
$j=0;
$i=0;
$t=1;
while ($i<strlen($seqa)-2 and $j<strlen($seqb)-2 and $t=1):
$t=0;
if($matrizz[$j+1][$i+1]==1){
$t=1;
$sseqa.=$a[$i];
$sseqb.=$b[$j];
$i=$i+1;
$j=$j+1;
}
if($matrizz[$j][$i+1]==1){
$t=1;
$sseqa.=$a[$i];
$sseqb.="-";
$i=$i+1;
}
if($matrizz[$j+1][$i]==1){
$t=1;
$sseqa.="-";
$sseqb.=$b[$j];
$j=$j+1;
}
endwhile;
if($matrizz[$j+1][$i+1]==1){
$sseqa.=$a[$i];
$sseqb.=$b[$j];
$i=$i+1;
$j=$j+1;
$t=1;
}
if($t==0 and $matrizz[$j][$i+1]==1){
$sseqa.=$a[$i];
$sseqb.="-";
$i=$i+1;
}
if($t==0 and $matrizz[$j+1][$i]==1){
$sseqa.="-";
$sseqb.=$b[$j];
$j=$j+1;
}
if($i+1==$maxa){
for ($ii=$j;$ii<$maxb;$ii++){$sseqb.=$b[$ii];}
$sseqa.=$a[$i];
for ($ii=$j;$ii<$maxb-1;$ii++){$sseqa.="-";}
}
if($j+1==$maxb){
for ($ii=$i;$ii<$maxa;$ii++){$sseqa.=$a[$ii];}
$sseqb.=$b[$j];
for ($ii=$i;$ii<$maxa-1;$ii++){$sseqb.="-";}
}
// tengo que quitar la última letra del alineamiento, de lo contrario, se repite la ultima posición
// por que se da la repetición?
$results["seqa"]=substr($sseqa,0,strlen($sseqa)-1);
$results["seqb"]=substr($sseqb,0,strlen($sseqb)-1);
//$results["seqa"]=$sseqa;
//$results["seqb"]=$sseqb;
return $results;
}
// ########################################################################################
function align_proteins($seqa,$seqb){
$PAM250["CC"]=12;
$PAM250["SC"]=0;$PAM250["SS"]=2;
$PAM250["CS"]=0;
$PAM250["TC"]=-2;$PAM250["TS"]=1;$PAM250["TT"]=3;
$PAM250["CT"]=-2;$PAM250["ST"]=1;
$PAM250["PC"]=-3;$PAM250["PS"]=1;$PAM250["PT"]=0;$PAM250["PP"]=6;
$PAM250["CP"]=-3;$PAM250["SP"]=1;$PAM250["TP"]=0;
$PAM250["AC"]=-2;$PAM250["AS"]=1;$PAM250["AT"]=1;$PAM250["AP"]=1;$PAM250["AA"]=2;
$PAM250["CA"]=-2;$PAM250["SA"]=1;$PAM250["TA"]=1;$PAM250["PA"]=1;
$PAM250["GC"]=-3;$PAM250["GS"]=1;$PAM250["GT"]=0;$PAM250["GP"]=-1;$PAM250["GA"]=1;$PAM250["GG"]=5;
$PAM250["CG"]=-3;$PAM250["SG"]=1;$PAM250["TG"]=0;$PAM250["PG"]=-1;$PAM250["AG"]=1;
$PAM250["NC"]=-4;$PAM250["NS"]=1;$PAM250["NT"]=0;$PAM250["NP"]=-1;$PAM250["NA"]=0;$PAM250["NG"]=0;$PAM250["NN"]=2;
$PAM250["CN"]=-4;$PAM250["SN"]=1;$PAM250["TN"]=0;$PAM250["PN"]=-1;$PAM250["AN"]=0;$PAM250["GN"]=0;
$PAM250["DC"]=-5;$PAM250["DS"]=0;$PAM250["DT"]=0;$PAM250["DP"]=-1;$PAM250["DA"]=0;$PAM250["DG"]=1;$PAM250["DN"]=2;$PAM250["DD"]=4;
$PAM250["CD"]=-5;$PAM250["SD"]=0;$PAM250["TD"]=0;$PAM250["PD"]=-1;$PAM250["AD"]=0;$PAM250["GD"]=1;$PAM250["ND"]=2;
$PAM250["EC"]=-5;$PAM250["ES"]=0;$PAM250["ET"]=0;$PAM250["EP"]=-1;$PAM250["EA"]=0;$PAM250["EG"]=0;$PAM250["EN"]=1;$PAM250["ED"]=3;$PAM250["EE"]=4;
$PAM250["CE"]=-5;$PAM250["SE"]=0;$PAM250["TE"]=0;$PAM250["PE"]=-1;$PAM250["AE"]=0;$PAM250["GE"]=0;$PAM250["NE"]=1;$PAM250["DE"]=3;
$PAM250["QC"]=-5;$PAM250["QS"]=-1;$PAM250["QT"]=-1;$PAM250["QP"]=0;$PAM250["QA"]=0;$PAM250["QG"]=-1;$PAM250["QN"]=1;$PAM250["QD"]=2;$PAM250["QE"]=2;$PAM250["QQ"]=4;
$PAM250["CQ"]=-5;$PAM250["SQ"]=-1;$PAM250["TQ"]=-1;$PAM250["PQ"]=0;$PAM250["AQ"]=0;$PAM250["GQ"]=-1;$PAM250["NQ"]=1;$PAM250["DQ"]=2;$PAM250["EQ"]=2;
$PAM250["HC"]=-3;$PAM250["HS"]=-1;$PAM250["HT"]=-1;$PAM250["HP"]=0;$PAM250["HA"]=-1;$PAM250["HG"]=-2;$PAM250["HN"]=2;$PAM250["HD"]=1;$PAM250["HE"]=1;$PAM250["HQ"]=3;$PAM250["HH"]=6;
$PAM250["CH"]=-3;$PAM250["SH"]=-1;$PAM250["TH"]=-1;$PAM250["PH"]=0;$PAM250["AH"]=-1;$PAM250["GH"]=-2;$PAM250["NH"]=2;$PAM250["DH"]=1;$PAM250["EH"]=1;$PAM250["QH"]=3;
$PAM250["RC"]=-4;$PAM250["RS"]=0;$PAM250["RT"]=-1;$PAM250["RP"]=0;$PAM250["RA"]=-2;$PAM250["RG"]=-3;$PAM250["RN"]=0;$PAM250["RD"]=-1;$PAM250["RE"]=-1;$PAM250["RQ"]=1;$PAM250["RH"]=2;$PAM250["RR"]=6;
$PAM250["CR"]=-4;$PAM250["SR"]=0;$PAM250["TR"]=-1;$PAM250["PR"]=0;$PAM250["AR"]=-2;$PAM250["GR"]=-3;$PAM250["NR"]=0;$PAM250["DR"]=-1;$PAM250["ER"]=-1;$PAM250["QR"]=1;$PAM250["HR"]=2;
$PAM250["KC"]=-5;$PAM250["KS"]=0;$PAM250["KT"]=0;$PAM250["KP"]=-1;$PAM250["KA"]=-1;$PAM250["KG"]=-2;$PAM250["KN"]=1;$PAM250["KD"]=0;$PAM250["KE"]=0;$PAM250["KQ"]=1;$PAM250["KH"]=0;$PAM250["KR"]=3;$PAM250["KK"]=5;
$PAM250["CK"]=-5;$PAM250["SK"]=0;$PAM250["TK"]=0;$PAM250["PK"]=-1;$PAM250["AK"]=-1;$PAM250["GK"]=-2;$PAM250["NK"]=1;$PAM250["DK"]=0;$PAM250["EK"]=0;$PAM250["QK"]=1;$PAM250["HK"]=0;$PAM250["RK"]=3;
$PAM250["MC"]=-5;$PAM250["MS"]=-2;$PAM250["MT"]=-1;$PAM250["MP"]=-2;$PAM250["MA"]=-1;$PAM250["MG"]=-3;$PAM250["MN"]=-2;$PAM250["MD"]=-3;$PAM250["ME"]=-2;$PAM250["MQ"]=-1;$PAM250["MH"]=-2;$PAM250["MR"]=0;$PAM250["MK"]=0;$PAM250["MM"]=6;
$PAM250["CM"]=-5;$PAM250["SM"]=-2;$PAM250["TM"]=-1;$PAM250["PM"]=-2;$PAM250["AM"]=-1;$PAM250["GM"]=-3;$PAM250["NM"]=-2;$PAM250["DM"]=-3;$PAM250["EM"]=-2;$PAM250["QM"]=-1;$PAM250["HM"]=-2;$PAM250["RM"]=0;$PAM250["KM"]=0;
$PAM250["IC"]=-2;$PAM250["IS"]=-1;$PAM250["IT"]=0;$PAM250["IP"]=-2;$PAM250["IA"]=-1;$PAM250["IG"]=-3;$PAM250["IN"]=-2;$PAM250["ID"]=-2;$PAM250["IE"]=-2;$PAM250["IQ"]=-2;$PAM250["IH"]=-2;$PAM250["IR"]=-2;$PAM250["IK"]=-2;$PAM250["IM"]=2;$PAM250["II"]=5;
$PAM250["CI"]=-2;$PAM250["SI"]=-1;$PAM250["TI"]=0;$PAM250["PI"]=-2;$PAM250["AI"]=-1;$PAM250["GI"]=-3;$PAM250["NI"]=-2;$PAM250["DI"]=-2;$PAM250["EI"]=-2;$PAM250["QI"]=-2;$PAM250["HI"]=-2;$PAM250["RI"]=-2;$PAM250["KI"]=-2;$PAM250["MI"]=2;
$PAM250["LC"]=-6;$PAM250["LS"]=-3;$PAM250["LT"]=-2;$PAM250["LP"]=-3;$PAM250["LA"]=-2;$PAM250["LG"]=-4;$PAM250["LN"]=-3;$PAM250["LD"]=-4;$PAM250["LE"]=-3;$PAM250["LQ"]=-2;$PAM250["LH"]=-2;$PAM250["LR"]=-3;$PAM250["LK"]=-3;$PAM250["LM"]=4;$PAM250["LI"]=2;$PAM250["LL"]=6;
$PAM250["CL"]=-6;$PAM250["SL"]=-3;$PAM250["TL"]=-2;$PAM250["PL"]=-3;$PAM250["AL"]=-2;$PAM250["GL"]=-4;$PAM250["NL"]=-3;$PAM250["DL"]=-4;$PAM250["EL"]=-3;$PAM250["QL"]=-2;$PAM250["HL"]=-2;$PAM250["RL"]=-3;$PAM250["KL"]=-3;$PAM250["ML"]=4;$PAM250["IL"]=2;
$PAM250["VC"]=-2;$PAM250["VS"]=-1;$PAM250["VT"]=0;$PAM250["VP"]=-1;$PAM250["VA"]=0;$PAM250["VG"]=-1;$PAM250["VN"]=-2;$PAM250["VD"]=-2;$PAM250["VE"]=-2;$PAM250["VQ"]=-2;$PAM250["VH"]=-2;$PAM250["VR"]=-2;$PAM250["VK"]=-2;$PAM250["VM"]=2;$PAM250["VI"]=4;$PAM250["VL"]=2;$PAM250["VV"]=4;
$PAM250["CV"]=-2;$PAM250["SV"]=-1;$PAM250["TV"]=0;$PAM250["PV"]=-1;$PAM250["AV"]=0;$PAM250["GV"]=-1;$PAM250["NV"]=-2;$PAM250["DV"]=-2;$PAM250["EV"]=-2;$PAM250["QV"]=-2;$PAM250["HV"]=-2;$PAM250["RV"]=-2;$PAM250["KV"]=-2;$PAM250["MV"]=2;$PAM250["IV"]=4;$PAM250["LV"]=2;
$PAM250["FC"]=-4;$PAM250["FS"]=-3;$PAM250["FT"]=-3;$PAM250["FP"]=-5;$PAM250["FA"]=-4;$PAM250["FG"]=-5;$PAM250["FN"]=-4;$PAM250["FD"]=-6;$PAM250["FE"]=-5;$PAM250["FQ"]=-5;$PAM250["FH"]=-2;$PAM250["FR"]=-4;$PAM250["FK"]=-5;$PAM250["FM"]=0;$PAM250["FI"]=1;$PAM250["FL"]=2;$PAM250["FV"]=-1;$PAM250["FF"]=9;
$PAM250["CF"]=-4;$PAM250["SF"]=-3;$PAM250["TF"]=-3;$PAM250["PF"]=-5;$PAM250["AF"]=-4;$PAM250["GF"]=-5;$PAM250["NF"]=-4;$PAM250["DF"]=-6;$PAM250["EF"]=-5;$PAM250["QF"]=-5;$PAM250["HF"]=-2;$PAM250["RF"]=-4;$PAM250["KF"]=-5;$PAM250["MF"]=0;$PAM250["IF"]=1;$PAM250["LF"]=2;$PAM250["VF"]=-1;
$PAM250["YC"]=0;$PAM250["YS"]=-3;$PAM250["YT"]=-3;$PAM250["YP"]=-5;$PAM250["YA"]=-3;$PAM250["YG"]=-5;$PAM250["YN"]=-2;$PAM250["YD"]=-4;$PAM250["YE"]=-4;$PAM250["YQ"]=-4;$PAM250["YH"]=0;$PAM250["YR"]=-4;$PAM250["YK"]=-4;$PAM250["YM"]=-2;$PAM250["YI"]=-1;$PAM250["YL"]=-1;$PAM250["YV"]=-2;$PAM250["YF"]=7;$PAM250["YY"]=10;
$PAM250["CY"]=0;$PAM250["SY"]=-3;$PAM250["TY"]=-3;$PAM250["PY"]=-5;$PAM250["AY"]=-3;$PAM250["GY"]=-5;$PAM250["NY"]=-2;$PAM250["DY"]=-4;$PAM250["EY"]=-4;$PAM250["QY"]=-4;$PAM250["HY"]=0;$PAM250["RY"]=-4;$PAM250["KY"]=-4;$PAM250["MY"]=-2;$PAM250["IY"]=-1;$PAM250["LY"]=-1;$PAM250["VY"]=-2;$PAM250["FY"]=7;
$PAM250["WC"]=-8;$PAM250["WS"]=-2;$PAM250["WT"]=-5;$PAM250["WP"]=-6;$PAM250["WA"]=-6;$PAM250["WG"]=-7;$PAM250["WN"]=-4;$PAM250["WD"]=-7;$PAM250["WE"]=-7;$PAM250["WQ"]=-5;$PAM250["WH"]=3;$PAM250["WR"]=2;$PAM250["WK"]=-3;$PAM250["WM"]=-4;$PAM250["WI"]=-5;$PAM250["WL"]=-2;$PAM250["WV"]=-6;$PAM250["WF"]=0;$PAM250["WY"]=0;$PAM250["WW"]=17;
$PAM250["CW"]=-8;$PAM250["SW"]=-2;$PAM250["TW"]=-5;$PAM250["PW"]=-6;$PAM250["AW"]=-6;$PAM250["GW"]=-7;$PAM250["NW"]=-4;$PAM250["DW"]=-7;$PAM250["EW"]=-7;$PAM250["QW"]=-5;$PAM250["HW"]=3;$PAM250["RW"]=2;$PAM250["KW"]=-3;$PAM250["MW"]=-4;$PAM250["IW"]=-5;$PAM250["LW"]=-2;$PAM250["VW"]=-6;$PAM250["FW"]=0;$PAM250["YW"]=0;
$gap = -50;
$arraya = preg_split('//', $seqa, -1, PREG_SPLIT_NO_EMPTY);
$arrayb = preg_split('//', $seqb, -1, PREG_SPLIT_NO_EMPTY);
$maxa=sizeof($arraya);
$maxb=sizeof($arrayb);
$a=$arraya;
$lenn=$maxa;
if($maxb>$lenn){
$lenn=$maxb;
}
$b=$arrayb;
for ($i=0;$i<$maxa;$i++){
$matriz[0][$i]=$PAM250["$a[$i]$b[0]"];
}
for ($i=0;$i<$maxb;$i++){
$matriz[$i][0]=$PAM250["$b[$i]$a[0]"];
}
for ($i=1;$i<$maxa;$i++){
for ($j=1;$j<$maxb;$j++){
if($b[$j]==$a[$i]){
$x=$matriz[$j-1][$i-1]+$PAM250["$b[$j]$a[$i]"];//$x=$matriz[$j-1][$i-1]+$match;
}else{
$x=$matriz[$j-1][$i-1]+$PAM250["$b[$j]$a[$i]"];//$x=$matriz[$j-1][$i-1]+$mismatch;
$y=$matriz[$j][$i-1]+$gap;
if($y>$x){$x=$y;}
$y=$matriz[$j-1][$i]+$gap;
if($y>$x){$x=$y;}
if($x<0){$x=0;}
}
$matriz[$j][$i]=$x;
$x=0;
}//end for $j
}
$mx=0;
for ($i=0;$i<$maxa;$i++){
for ($j=0;$j<$maxb;$j++){
if($mx<$matriz[$j][$i]){
$mx=$matriz[$j][$i];
$mj=$j;
$mi=$i;
}
}
}
$j=$mj;
$i=$mi;
$matrizz[$j][$i]=1;//matrixx(n, m) = 1
while ($i>0 or $j>0):
$aa=$matriz[$j-1][$i-1];//a = matrix(n - 1, m - 1)
$ab=$matriz[$j][$i-1];//b = matrix(n, m - 1)
$ac=$matriz[$j-1][$i];//c = matrix(n - 1, m)
if($aa<>'//' or $aa==0){//If a <> "" Then
if($aa>=$ab and $aa>=$ac){//If a >= b And a >= c Then
$j=$j-1;// n = n - 1: m = m - 1
$i=$i-1;
}
if($ab>$aa){//If b > a Then m = m - 1
$i=$i-1;
}
if($ac>$aa){//If c > a Then n = n - 1
$j=$j-1;
}
}else{//If a = "" Then
if($ab<>'//' or $ab==0){// If b <> "" Then m = m - 1
$i=$i-1;
}
if($ac<>'//' or $ac==0){// If c <> "" Then n = n - 1
$j=$j-1;
}
}//End If
if($j<0){//If n = 0 Then n = 1
$j=0;
}
if($i<0){//If m = 0 Then m = 1
$i=0;
}
$matrizz[$j][$i]=1;//matrixx(n, m) = 1
endwhile;
$j=$mj;//n = mn
$i=$mi;//m = mm
while ($i<strlen($seqa)-1 or $j<strlen($seqb)-1):
$aa=$matriz[$j+1][$i+1];////a = matrix(n + 1, m + 1)
$ab=$matriz[$j][$i+1];//b = matrix(n, m + 1)
$ac=$matriz[$j+1][$i];//c = matrix(n + 1, m)
if($aa<>'//' or $aa==0){//If a <> "" Then
if($aa>=$ab and $aa>=$ac){//If a >= b And a >= c Then
$j=$j+1;// n = n - 1: m = m - 1
$i=$i+1;
}
if($ab>$aa){//If b > a Then m = m - 1
$i=$i+1;
}
if($ac>$aa){//If c > a Then n = n - 1
$j=$j+1;
}
}else{//If a = "" Then
if($ab<>'//' or $ab==0){// If b <> "" Then m = m - 1
$i=$i+1;
}
if($ac<>'//' or $ac==0){// If c <> "" Then n = n - 1
$j=$j+1;
}
}
if($j>$lenn){//If n > lenn Then n = lenn
$j=$lenn;
}
if($i>$lenn){//If m > lenn Then m = lenn
$i=$lenn;
}
$matrizz[$j][$i]=1;//matrixx(n, m) = 1
endwhile;
$j=0;
$i=0;
$t=1;
while ($i<strlen($seqa)-2 and $j<strlen($seqb)-2 and $t=1):
$t=0;
if($matrizz[$j+1][$i+1]==1){
$t=1;
$sseqa.=$a[$i];
$sseqb.=$b[$j];
$i=$i+1;
$j=$j+1;
}//else{
if($matrizz[$j][$i+1]==1){
$t=1;
$sseqa.=$a[$i];
$sseqb.="-";
$i=$i+1;
}//else{
if($matrizz[$j+1][$i]==1){
$t=1;
$sseqa.="-";
$sseqb.=$b[$j];
$j=$j+1;
}//}}
endwhile;
if($matrizz[$j+1][$i+1]==1){
$sseqa.=$a[$i];
$sseqb.=$b[$j];
$i=$i+1;
$j=$j+1;
$t=1;
}
if($t==0){
if($matrizz[$j][$i+1]==1){
$sseqa.=$a[$i];
$sseqb.="-";
$i=$i+1;
}
}
if($t==0){
if($matrizz[$j+1][$i]==1){
$sseqa.="-";
$sseqb.=$b[$j];
$j=$j+1;
}
}
if($j+1==$maxb){
for ($ii=$i;$ii<$maxa;$ii++){
$sseqa.=$a[$ii];
}
$sseqb.=$b[$j];
for ($ii=$i;$ii<$maxa-1;$ii++){
$sseqb.="-";
}
}
if($i+1==$maxa){
for ($ii=$j;$ii<$maxb;$ii++){
$sseqb.=$b[$ii];
}
$sseqa.=$a[$i];
for ($ii=$j;$ii<$maxb-1;$ii++){
$sseqa.="-";
}
}
$results["seqa"]=$sseqa;
$results["seqb"]=$sseqb;
return $results;
}
// ########################################################################################
function compare_alignment($seqa,$seqb){
for ($i=0;$i<strlen($seqa);$i++){
if(substr($seqa,$i,1)==substr($seqb,$i,1)){$compare.="|";}else{$compare.=" ";}
}
return $compare;
}
// ########################################################################################
// The form includes javascript code
function print_form (){
?>
<html>
<head>
<title>Alignment of two DNA or RNA sequences</title>
<script language="JavaScript">
<!--
function tidyup() {
// sequence1
str=document.mydna.sequence.value.toUpperCase();
str=str.split(/\d|\W/).join("");
if (!str) {document.mydna.sequence.value=''};
var revstr=' '; var k=0; for (i =0; i<str.length; i++) { revstr+=str.charAt(i); k+=1; if (k==Math.floor(k/10)*10) {revstr+=' '}; if (k==Math.floor(k/60)*60) {revstr+=k+'\n '}; }; document.mydna.sequence.value=revstr;
// sequence2
str=document.mydna.sequence2.value.toUpperCase();
str=str.split(/\d|\W/).join("");
if (!str) {document.mydna.sequence2.value=''};
var revstr=' '; var k=0; for (i =0; i<str.length; i++) { revstr+=str.charAt(i); k+=1; if (k==Math.floor(k/10)*10) {revstr+=' '}; if (k==Math.floor(k/60)*60) {revstr+=k+'\n '}; };
document.mydna.sequence2.value=revstr;}
// Get complementary of sequence 1
function strcomp1() {
str=document.mydna.sequence.value.toUpperCase();
str=Removeuseless(str);
str=getrev(str);
str=getComplement(str);
document.mydna.sequence.value=str;
tidyup()
}
// Get complementary of sequence 2
function strcomp2() {
str=document.mydna.sequence2.value.toUpperCase();
str=Removeuseless(str);
str=getrev(str);
str=getComplement(str);
document.mydna.sequence2.value=str;
tidyup()
}
function Removeuseless(str) {
str = str.split(/\d|\W/).join("");
return str;
}
function getrev(str) {
var revstr=' ';
var k=0;
for (i = str.length-1; i>=0; i--) {
revstr+=str.charAt(i);
k+=1;
};
return revstr;
}
function getComplement(str) {
str = str.split("A").join("t");
str = str.split("T").join("a");
str = str.split("G").join("c");
str = str.split("C").join("g");
str=str.toUpperCase();
return str;
};
//-->
</script>
</head>
<body bgcolor="white" text="black">
<center>
<form name="mydna" method="post" action="<? print $_SERVER["PHP_SELF"]; ?>">
<table cellpadding="5">
<tbody>
<tr>
<td>
<b><font size="6">Alignment of two DNA, RNA or protein sequences</font></b> <br>
<div align="right"><a href="javascript: tidyup ()">Tidy Up Sequences</a></div>
<br>
<input type=text name=id1 value="Sequence 1" size=30>
<a href="javascript: strcomp1 ()" onmouseover="window.status='Complementary sequence'; return true" onmouseout="window.status=''; return true"><font size=-1>C</font></a>
<br>
<textarea name="sequence" cols="75" rows="3">GGAGTGAGGG GAGCAGTTGG CTGAAGATGG TCCCCGCCGA GGGACCGGTG GGCGACGGCG 60
AGCTGTGGCA GACCTGGCTT CCTAACCACG TCCGTGTTCT TGCGGCTCCG GGAGGGACTG 120 </textarea> <br>
<input type=text name=id2 value="Sequence 2" size=30>
<a href="javascript: strcomp2 ()" onmouseover="window.status='Complementary sequence'; return true" onmouseout="window.status=''; return true"><font size=-1>C</font></a>
<br>
<textarea name="sequence2" cols="75" rows="3">CGCATGCGGA GTGAGGGGAG CAGTTGGGAA CAGATGGTCC CCGCCGAGGG ACCGGTGGGC 60
GACGGCCAGC TGTGGCAGAC CTGGCTTCCT AACCACGGAA CGTTCTTTCC GCTCCGGGAG 120 </textarea>
<center>
<div align=Right><a href="<? print $_SERVER["PHP_SELF"]; ?>">Info</a></div><br><input type="submit" value="Align sequences">
</center>
</td></tr></tbody>
</table>
</form>
Freely downloable PHP script at <a href=http://www.biophp.org/minitools/seq_alignment>biophp.org</a>
</center>
</body>
</html>
<?
} // end of form
// ########################################################################################
function print_info (){
?>
<html>
<head>
<title>Alignment of two DNA or protein sequences</title>
</head>
<body style="background-color: rgb(255, 255, 255);">
<center>
<table>
<tbody>
<tr>
<td style="vertical-align: top;">
<h1 align="center">Alignment of two DNA sequences</h1>
<div style="text-align: right;"><a href="<? print $_SERVER["PHP_SELF"]; ?>">Start using this tool</a><br>
</div>
<br>
This script has been adapted to PHP scripting languaje from the
original version written in Visual Basic for Applications and available on a
Excel page <a href="http://webpages.ull.es/users/jmhernan/">here </a>.
<br>
<br>
The alignment method is the Smith-Waterman type (Smith, T. F., & M. S.
Waterman. 1981. Identification of common molecular subsequences.
Journal of Molecular Biology 147:195-197. <a
href="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=pubmed&dopt=Abstract&list_uids=7265238">PubMed</a>).
To run the program, paste the
DNA or RNA sequences in the form and submit the data. <br>
<br>
Alignment is shown on the response page, and sequences with gabs are
at the bottom.
<p>%nbsp;
<p>Developed by: <br>
</p>
<p>Dr. <b>Jose Manuel González Hernández</b> <br>
Departamento de Microbiología y Biología Celular <br>
Facultad de Farmacia <br>
Universidad de La Laguna <br>
La Laguna, Tenerife <br>
Spain </p>
<p>For suggestions or problems, <a href="http://www.in-silico.com/contact.php">contact us</a> </p>
<div style="text-align: center;">
<hr style="width: 100%; height: 2px;"><br>
<a href="http://www.in-silico.com">www.in-silico.com</a><br>
</div>
</td>
</tr>
</tbody>
</table>
</center>
</body>
</html>
<?
} // end of info
?>