Login | Register

Info | Home

BioPHP - Alignmento of proteins

Original code submitted by joseba
Code bellow is covered by GNU GPL v2 license.

Description

Last change: 2010/10/18 17:04 | Edit description | Recent Changes | Original description
Alignment of two protein sequences. 
Smith-Waterman alignment method is used.

Code

Last change: 2010/10/18 17:04 | Edit Code | Recent Changes | Download | Original code
function alignment_of_proteins($seqa,$seqb){
	$PAM250["CC"]=12;
	
	$PAM250["SC"]=0;$PAM250["SS"]=2;
	$PAM250["CS"]=0;

	$PAM250["TC"]=-2;$PAM250["TS"]=1;$PAM250["TT"]=3;
	$PAM250["CT"]=-2;$PAM250["ST"]=1;

	$PAM250["PC"]=-3;$PAM250["PS"]=1;$PAM250["PT"]=0;$PAM250["PP"]=6;
	$PAM250["CP"]=-3;$PAM250["SP"]=1;$PAM250["TP"]=0;

	$PAM250["AC"]=-2;$PAM250["AS"]=1;$PAM250["AT"]=1;$PAM250["AP"]=1;$PAM250["AA"]=2;
	$PAM250["CA"]=-2;$PAM250["SA"]=1;$PAM250["TA"]=1;$PAM250["PA"]=1;

	$PAM250["GC"]=-3;$PAM250["GS"]=1;$PAM250["GT"]=0;$PAM250["GP"]=-1;$PAM250["GA"]=1;$PAM250["GG"]=5;
	$PAM250["CG"]=-3;$PAM250["SG"]=1;$PAM250["TG"]=0;$PAM250["PG"]=-1;$PAM250["AG"]=1;

	$PAM250["NC"]=-4;$PAM250["NS"]=1;$PAM250["NT"]=0;$PAM250["NP"]=-1;$PAM250["NA"]=0;$PAM250["NG"]=0;$PAM250["NN"]=2;
	$PAM250["CN"]=-4;$PAM250["SN"]=1;$PAM250["TN"]=0;$PAM250["PN"]=-1;$PAM250["AN"]=0;$PAM250["GN"]=0;

	$PAM250["DC"]=-5;$PAM250["DS"]=0;$PAM250["DT"]=0;$PAM250["DP"]=-1;$PAM250["DA"]=0;$PAM250["DG"]=1;$PAM250["DN"]=2;$PAM250["DD"]=4;
	$PAM250["CD"]=-5;$PAM250["SD"]=0;$PAM250["TD"]=0;$PAM250["PD"]=-1;$PAM250["AD"]=0;$PAM250["GD"]=1;$PAM250["ND"]=2;

	$PAM250["EC"]=-5;$PAM250["ES"]=0;$PAM250["ET"]=0;$PAM250["EP"]=-1;$PAM250["EA"]=0;$PAM250["EG"]=0;$PAM250["EN"]=1;$PAM250["ED"]=3;$PAM250["EE"]=4;
	$PAM250["CE"]=-5;$PAM250["SE"]=0;$PAM250["TE"]=0;$PAM250["PE"]=-1;$PAM250["AE"]=0;$PAM250["GE"]=0;$PAM250["NE"]=1;$PAM250["DE"]=3;

	$PAM250["QC"]=-5;$PAM250["QS"]=-1;$PAM250["QT"]=-1;$PAM250["QP"]=0;$PAM250["QA"]=0;$PAM250["QG"]=-1;$PAM250["QN"]=1;$PAM250["QD"]=2;$PAM250["QE"]=2;$PAM250["QQ"]=4;
	$PAM250["CQ"]=-5;$PAM250["SQ"]=-1;$PAM250["TQ"]=-1;$PAM250["PQ"]=0;$PAM250["AQ"]=0;$PAM250["GQ"]=-1;$PAM250["NQ"]=1;$PAM250["DQ"]=2;$PAM250["EQ"]=2;

	$PAM250["HC"]=-3;$PAM250["HS"]=-1;$PAM250["HT"]=-1;$PAM250["HP"]=0;$PAM250["HA"]=-1;$PAM250["HG"]=-2;$PAM250["HN"]=2;$PAM250["HD"]=1;$PAM250["HE"]=1;$PAM250["HQ"]=3;$PAM250["HH"]=6;
	$PAM250["CH"]=-3;$PAM250["SH"]=-1;$PAM250["TH"]=-1;$PAM250["PH"]=0;$PAM250["AH"]=-1;$PAM250["GH"]=-2;$PAM250["NH"]=2;$PAM250["DH"]=1;$PAM250["EH"]=1;$PAM250["QH"]=3;

	$PAM250["RC"]=-4;$PAM250["RS"]=0;$PAM250["RT"]=-1;$PAM250["RP"]=0;$PAM250["RA"]=-2;$PAM250["RG"]=-3;$PAM250["RN"]=0;$PAM250["RD"]=-1;$PAM250["RE"]=-1;$PAM250["RQ"]=1;$PAM250["RH"]=2;$PAM250["RR"]=6;
	$PAM250["CR"]=-4;$PAM250["SR"]=0;$PAM250["TR"]=-1;$PAM250["PR"]=0;$PAM250["AR"]=-2;$PAM250["GR"]=-3;$PAM250["NR"]=0;$PAM250["DR"]=-1;$PAM250["ER"]=-1;$PAM250["QR"]=1;$PAM250["HR"]=2;

	$PAM250["KC"]=-5;$PAM250["KS"]=0;$PAM250["KT"]=0;$PAM250["KP"]=-1;$PAM250["KA"]=-1;$PAM250["KG"]=-2;$PAM250["KN"]=1;$PAM250["KD"]=0;$PAM250["KE"]=0;$PAM250["KQ"]=1;$PAM250["KH"]=0;$PAM250["KR"]=3;$PAM250["KK"]=5;
	$PAM250["CK"]=-5;$PAM250["SK"]=0;$PAM250["TK"]=0;$PAM250["PK"]=-1;$PAM250["AK"]=-1;$PAM250["GK"]=-2;$PAM250["NK"]=1;$PAM250["DK"]=0;$PAM250["EK"]=0;$PAM250["QK"]=1;$PAM250["HK"]=0;$PAM250["RK"]=3;

	$PAM250["MC"]=-5;$PAM250["MS"]=-2;$PAM250["MT"]=-1;$PAM250["MP"]=-2;$PAM250["MA"]=-1;$PAM250["MG"]=-3;$PAM250["MN"]=-2;$PAM250["MD"]=-3;$PAM250["ME"]=-2;$PAM250["MQ"]=-1;$PAM250["MH"]=-2;$PAM250["MR"]=0;$PAM250["MK"]=0;$PAM250["MM"]=6;
	$PAM250["CM"]=-5;$PAM250["SM"]=-2;$PAM250["TM"]=-1;$PAM250["PM"]=-2;$PAM250["AM"]=-1;$PAM250["GM"]=-3;$PAM250["NM"]=-2;$PAM250["DM"]=-3;$PAM250["EM"]=-2;$PAM250["QM"]=-1;$PAM250["HM"]=-2;$PAM250["RM"]=0;$PAM250["KM"]=0;

	$PAM250["IC"]=-2;$PAM250["IS"]=-1;$PAM250["IT"]=0;$PAM250["IP"]=-2;$PAM250["IA"]=-1;$PAM250["IG"]=-3;$PAM250["IN"]=-2;$PAM250["ID"]=-2;$PAM250["IE"]=-2;$PAM250["IQ"]=-2;$PAM250["IH"]=-2;$PAM250["IR"]=-2;$PAM250["IK"]=-2;$PAM250["IM"]=2;$PAM250["II"]=5;
	$PAM250["CI"]=-2;$PAM250["SI"]=-1;$PAM250["TI"]=0;$PAM250["PI"]=-2;$PAM250["AI"]=-1;$PAM250["GI"]=-3;$PAM250["NI"]=-2;$PAM250["DI"]=-2;$PAM250["EI"]=-2;$PAM250["QI"]=-2;$PAM250["HI"]=-2;$PAM250["RI"]=-2;$PAM250["KI"]=-2;$PAM250["MI"]=2;

	$PAM250["LC"]=-6;$PAM250["LS"]=-3;$PAM250["LT"]=-2;$PAM250["LP"]=-3;$PAM250["LA"]=-2;$PAM250["LG"]=-4;$PAM250["LN"]=-3;$PAM250["LD"]=-4;$PAM250["LE"]=-3;$PAM250["LQ"]=-2;$PAM250["LH"]=-2;$PAM250["LR"]=-3;$PAM250["LK"]=-3;$PAM250["LM"]=4;$PAM250["LI"]=2;$PAM250["LL"]=6;
	$PAM250["CL"]=-6;$PAM250["SL"]=-3;$PAM250["TL"]=-2;$PAM250["PL"]=-3;$PAM250["AL"]=-2;$PAM250["GL"]=-4;$PAM250["NL"]=-3;$PAM250["DL"]=-4;$PAM250["EL"]=-3;$PAM250["QL"]=-2;$PAM250["HL"]=-2;$PAM250["RL"]=-3;$PAM250["KL"]=-3;$PAM250["ML"]=4;$PAM250["IL"]=2;

	$PAM250["VC"]=-2;$PAM250["VS"]=-1;$PAM250["VT"]=0;$PAM250["VP"]=-1;$PAM250["VA"]=0;$PAM250["VG"]=-1;$PAM250["VN"]=-2;$PAM250["VD"]=-2;$PAM250["VE"]=-2;$PAM250["VQ"]=-2;$PAM250["VH"]=-2;$PAM250["VR"]=-2;$PAM250["VK"]=-2;$PAM250["VM"]=2;$PAM250["VI"]=4;$PAM250["VL"]=2;$PAM250["VV"]=4;
	$PAM250["CV"]=-2;$PAM250["SV"]=-1;$PAM250["TV"]=0;$PAM250["PV"]=-1;$PAM250["AV"]=0;$PAM250["GV"]=-1;$PAM250["NV"]=-2;$PAM250["DV"]=-2;$PAM250["EV"]=-2;$PAM250["QV"]=-2;$PAM250["HV"]=-2;$PAM250["RV"]=-2;$PAM250["KV"]=-2;$PAM250["MV"]=2;$PAM250["IV"]=4;$PAM250["LV"]=2;

	$PAM250["FC"]=-4;$PAM250["FS"]=-3;$PAM250["FT"]=-3;$PAM250["FP"]=-5;$PAM250["FA"]=-4;$PAM250["FG"]=-5;$PAM250["FN"]=-4;$PAM250["FD"]=-6;$PAM250["FE"]=-5;$PAM250["FQ"]=-5;$PAM250["FH"]=-2;$PAM250["FR"]=-4;$PAM250["FK"]=-5;$PAM250["FM"]=0;$PAM250["FI"]=1;$PAM250["FL"]=2;$PAM250["FV"]=-1;$PAM250["FF"]=9;
	$PAM250["CF"]=-4;$PAM250["SF"]=-3;$PAM250["TF"]=-3;$PAM250["PF"]=-5;$PAM250["AF"]=-4;$PAM250["GF"]=-5;$PAM250["NF"]=-4;$PAM250["DF"]=-6;$PAM250["EF"]=-5;$PAM250["QF"]=-5;$PAM250["HF"]=-2;$PAM250["RF"]=-4;$PAM250["KF"]=-5;$PAM250["MF"]=0;$PAM250["IF"]=1;$PAM250["LF"]=2;$PAM250["VF"]=-1;

	$PAM250["YC"]=0;$PAM250["YS"]=-3;$PAM250["YT"]=-3;$PAM250["YP"]=-5;$PAM250["YA"]=-3;$PAM250["YG"]=-5;$PAM250["YN"]=-2;$PAM250["YD"]=-4;$PAM250["YE"]=-4;$PAM250["YQ"]=-4;$PAM250["YH"]=0;$PAM250["YR"]=-4;$PAM250["YK"]=-4;$PAM250["YM"]=-2;$PAM250["YI"]=-1;$PAM250["YL"]=-1;$PAM250["YV"]=-2;$PAM250["YF"]=7;$PAM250["YY"]=10;
	$PAM250["CY"]=0;$PAM250["SY"]=-3;$PAM250["TY"]=-3;$PAM250["PY"]=-5;$PAM250["AY"]=-3;$PAM250["GY"]=-5;$PAM250["NY"]=-2;$PAM250["DY"]=-4;$PAM250["EY"]=-4;$PAM250["QY"]=-4;$PAM250["HY"]=0;$PAM250["RY"]=-4;$PAM250["KY"]=-4;$PAM250["MY"]=-2;$PAM250["IY"]=-1;$PAM250["LY"]=-1;$PAM250["VY"]=-2;$PAM250["FY"]=7;

	$PAM250["WC"]=-8;$PAM250["WS"]=-2;$PAM250["WT"]=-5;$PAM250["WP"]=-6;$PAM250["WA"]=-6;$PAM250["WG"]=-7;$PAM250["WN"]=-4;$PAM250["WD"]=-7;$PAM250["WE"]=-7;$PAM250["WQ"]=-5;$PAM250["WH"]=3;$PAM250["WR"]=2;$PAM250["WK"]=-3;$PAM250["WM"]=-4;$PAM250["WI"]=-5;$PAM250["WL"]=-2;$PAM250["WV"]=-6;$PAM250["WF"]=0;$PAM250["WY"]=0;$PAM250["WW"]=17;
	$PAM250["CW"]=-8;$PAM250["SW"]=-2;$PAM250["TW"]=-5;$PAM250["PW"]=-6;$PAM250["AW"]=-6;$PAM250["GW"]=-7;$PAM250["NW"]=-4;$PAM250["DW"]=-7;$PAM250["EW"]=-7;$PAM250["QW"]=-5;$PAM250["HW"]=3;$PAM250["RW"]=2;$PAM250["KW"]=-3;$PAM250["MW"]=-4;$PAM250["IW"]=-5;$PAM250["LW"]=-2;$PAM250["VW"]=-6;$PAM250["FW"]=0;$PAM250["YW"]=0;

	$gap = -50;
	$arraya = preg_split('//', $seqa, -1, PREG_SPLIT_NO_EMPTY);
	$arrayb = preg_split('//', $seqb, -1, PREG_SPLIT_NO_EMPTY);
	$maxa=sizeof($arraya);
	$maxb=sizeof($arrayb);
	$a=$arraya;
	$lenn=$maxa;
	if($maxb>$lenn){
		$lenn=$maxb;
	}
	$b=$arrayb;
	for ($i=0;$i<$maxa;$i++){
		$matriz[0][$i]=$PAM250["$a[$i]$b[0]"];
	}
	for ($i=0;$i<$maxb;$i++){
		$matriz[$i][0]=$PAM250["$b[$i]$a[0]"];
	}
	for ($i=1;$i<$maxa;$i++){
        for ($j=1;$j<$maxb;$j++){        
        if($b[$j]==$a[$i]){
                $x=$matriz[$j-1][$i-1]+$PAM250["$b[$j]$a[$i]"];//$x=$matriz[$j-1][$i-1]+$match;
        }else{
                $x=$matriz[$j-1][$i-1]+$PAM250["$b[$j]$a[$i]"];//$x=$matriz[$j-1][$i-1]+$mismatch;
                $y=$matriz[$j][$i-1]+$gap;
                if($y>$x){$x=$y;}
                $y=$matriz[$j-1][$i]+$gap;
                if($y>$x){$x=$y;}
                if($x<0){$x=0;}
        }
        $matriz[$j][$i]=$x;
        $x=0;
        }//end for $j
	}
	$mx=0;
	for ($i=0;$i<$maxa;$i++){
        for ($j=0;$j<$maxb;$j++){
        if($mx<$matriz[$j][$i]){
        $mx=$matriz[$j][$i];
        $mj=$j;
        $mi=$i;
        }
        }
	}
	$j=$mj;
	$i=$mi;
	$matrizz[$j][$i]=1;//matrixx(n, m) = 1
	while ($i>0 or $j>0):
		$aa=$matriz[$j-1][$i-1];//a = matrix(n - 1, m - 1)
		$ab=$matriz[$j][$i-1];//b = matrix(n, m - 1)
		$ac=$matriz[$j-1][$i];//c = matrix(n - 1, m)
		if($aa<>'//' or $aa==0){//If a <> "" Then
            	if($aa>=$ab and $aa>=$ac){//If a >= b And a >= c Then
            	$j=$j-1;//    n = n - 1: m = m - 1
            	$i=$i-1;
            }
		if($ab>$aa){//If b > a Then m = m - 1
            	$i=$i-1;
		}
		if($ac>$aa){//If c > a Then n = n - 1
            	$j=$j-1;
                }
                }else{//If a = "" Then
            if($ab<>'//' or $ab==0){//    If b <> "" Then m = m - 1
            $i=$i-1;
            }
            if($ac<>'//' or $ac==0){//    If c <> "" Then n = n - 1
                        $j=$j-1;
            }
                }//End If
		if($j<0){//If n = 0 Then n = 1
		$j=0;
		}
		if($i<0){//If m = 0 Then m = 1
		$i=0;
		}
		$matrizz[$j][$i]=1;//matrixx(n, m) = 1
	endwhile;
	$j=$mj;//n = mn
	$i=$mi;//m = mm

	while ($i<strlen($seqa)-1 or $j<strlen($seqb)-1):
          	$aa=$matriz[$j+1][$i+1];////a = matrix(n + 1, m + 1)
    		$ab=$matriz[$j][$i+1];//b = matrix(n, m + 1)
    		$ac=$matriz[$j+1][$i];//c = matrix(n + 1, m)
    		if($aa<>'//' or $aa==0){//If a <> "" Then
            	if($aa>=$ab and $aa>=$ac){//If a >= b And a >= c Then
            	$j=$j+1;//    n = n - 1: m = m - 1
            	$i=$i+1;
            	}
    		if($ab>$aa){//If b > a Then m = m - 1
            	$i=$i+1;
    		}
    		if($ac>$aa){//If c > a Then n = n - 1
            	$j=$j+1;
                	}
                	}else{//If a = "" Then
            	if($ab<>'//' or $ab==0){//    If b <> "" Then m = m - 1
            	$i=$i+1;
            	}
            	if($ac<>'//' or $ac==0){//    If c <> "" Then n = n - 1
                        $j=$j+1;
            	}
                	}
    		if($j>$lenn){//If n > lenn Then n = lenn
            	$j=$lenn;        
               	}
                	if($i>$lenn){//If m > lenn Then m = lenn
            	$i=$lenn;        
                	}
    		$matrizz[$j][$i]=1;//matrixx(n, m) = 1
	endwhile;
	$j=0;
	$i=0;        
	$t=1;
	while ($i<strlen($seqa)-2 and $j<strlen($seqb)-2 and $t=1):
                $t=0;                
                if($matrizz[$j+1][$i+1]==1){
                        $t=1;
                        $sseqa.=$a[$i];
                  $sseqb.=$b[$j];                        
                  $i=$i+1;
                  $j=$j+1;
                }//else{
          if($matrizz[$j][$i+1]==1){
                  $t=1;
                        $sseqa.=$a[$i];
                  $sseqb.="-";
                  $i=$i+1;
          }//else{
          if($matrizz[$j+1][$i]==1){
                  $t=1;
                        $sseqa.="-";
                  $sseqb.=$b[$j];
                  $j=$j+1;
          }//}}
	endwhile;
	if($matrizz[$j+1][$i+1]==1){
        $sseqa.=$a[$i];
        $sseqb.=$b[$j];        
        $i=$i+1;
        $j=$j+1;
        $t=1;
	}
	if($t==0){
        if($matrizz[$j][$i+1]==1){
        	$sseqa.=$a[$i];
        	$sseqb.="-";
        	$i=$i+1;
		}
	}
	if($t==0){
 	  if($matrizz[$j+1][$i]==1){
        	$sseqa.="-";
        	$sseqb.=$b[$j];
        	$j=$j+1;
		}
	}
	if($j+1==$maxb){
        for ($ii=$i;$ii<$maxa;$ii++){
                $sseqa.=$a[$ii];
        }
        $sseqb.=$b[$j];
        for ($ii=$i;$ii<$maxa-1;$ii++){
                $sseqb.="-";
        }
	}
	if($i+1==$maxa){
        for ($ii=$j;$ii<$maxb;$ii++){
                $sseqb.=$b[$ii];
        }
        $sseqa.=$a[$i];
        for ($ii=$j;$ii<$maxb-1;$ii++){
                $sseqa.="-";
        }
	}
        $results["seqa"]=$sseqa;
        $results["seqb"]=$sseqb;
        // print_r ($results);
        return $results;
}