";
}
print "";
}
// Description for Function find_microsatellite_repeats
// This function will search for microsatellite repeats within a sequence. A microsatellite repeat is defined as a sequence
// which shows a repeated pattern, as for example in sequence 'ACGTACGTACGTACGT', where 'ACGT' is repeated
// 4 times. The function allows searching for this kind of subsequences within a sequence.
//
// Parameters
// $sequence is the sequence
// $min_length and $max_length are the range of oligo lengths to be searched; p.e. oligos with length 2 to 6
// $min_repeats is the minimal number of time a sequence must be repeated to be considered as a microsatellite repeat
// $min_length_of_MR minimum length of tanden repeat; to avoid considering AAAA as a microsatellite repeat, set it to >4
// $mismatches_allowed is the porcentaje of errors allowed when searching in the repetitive sequence
// so that sequence AACCGGTT-AAGCGGTT-AACCGGAT-AACCGGTT may be considered as a microsatellite repeat
//
// Return
// The function will return an array with the following structure:
// $results=Array(
// 0=>Array(
// start_position => 10,
// length => 4,
// repeats => 4,
// sequence => ACGTACGTACGTACGT,
// ),
// 0=>Array(
// start_position => 50,
// length => 3,
// repeats => 3,
// sequence => ATCATCATC,
// ),
// );
//
// Requeriments:
// Functions IncludeN_1, IncludeN_2 and IncludeN_3
function find_microsatellite_repeats($sequence,$min_length,$max_length,$min_repeats,$min_length_of_MR,$mismatches_allowed){
$len_seq=strlen($sequence);
$counter=0;
for ($i=0;$i<$len_seq-3;$i++){
for ($j=$min_length;$j<$max_length+1;$j++){
if (($i+$j)>$len_seq){break;}
$sub_seq=substr($sequence,$i,$j);
$len_sub_seq=strlen ($sub_seq);
$mismatches=floor($len_sub_seq*$mismatches_allowed);
if ($mismatches==1){$sub_seq_pattern=includeN_1($sub_seq,0);}
elseif ($mismatches==2){$sub_seq_pattern=includeN_2($sub_seq,0);}
elseif ($mismatches==3){$sub_seq_pattern=includeN_3($sub_seq,0);}
else {$sub_seq_pattern=$sub_seq;}
$matches=1;
while (preg_match_all("/($sub_seq_pattern)/",substr($sequence,($i+$j*$matches),$j),$out)==1){$matches++;}
if ($matches>=$min_repeats and ($j*$matches)>=$min_length_of_MR){
$results[$counter]["start_position"]=$i;
$results[$counter]["length"]=$j;
$results[$counter]["repeats"]=$matches;
$results[$counter]["sequence"]=substr($sequence,$i,$j*$matches);
$counter++;
$i+=$j*$matches;
}
}
}
return ($results);
}
// Description for Function IncludeN_1
// When a DNA sequence ("$primer") is provided to this function, as for example "acgt", this function will return
// a pattern like ".cgt|a.gt|ac.t|acg.". This pattern may be useful to find within a DNA sequence
// subsequences matching $primer, but allowing one missmach. The parameter $minus
// is a numeric value which indicates number of bases always maching the DNA sequence in 3' end.
// For example, when $minus is 1, the pattern for "acgt" will be ".cgt|a.gt|ac.t".
// Check also IncludeN_2 and IncludeN_3.
//
// Parameters
// $primer is a DNA sequence (oligonucleotide, primer)
// $minus indicates number of bases in 3' which will always much the DNA sequence.
//
// Return
// Returns a pattern (as described in "Description").
function includeN_1($primer,$minus) {
$code=".".substr($primer,1);
$wpos=1;
while ($wposDefinitions Microsatellite Repeat:
A variety of simple di- (DINUCLEOTIDE REPEATS), tri- (TRINUCLEOTIDE REPEATS), tetra-, and pentanucleotide tandem repeats (usually less than 100 bases long).
Tandem repeats:
Copies of DNA sequences which lie adjacent to each other
NOTE: for sort repeated sequences (p.e. AA or AAA), no mismatches are available.
--------------------
2008/03/17 17:25 / hypics
--------------------
Find tandem repeats
Microsatellite repeats finder
";
print "
Posición
Cicle
Repeats
Sequence
\n";
foreach ($results as $key => $val){
print "
";
print "
".$results[$key]["start_position"]."
";
print "
".$results[$key]["length"]."
";
print "
".$results[$key]["repeats"]."
";
print "
".$results[$key]["sequence"]."
\n";
print "
";
}
print "";
}
// Description for Function find_microsatellite_repeats
// This function will search for microsatellite repeats within a sequence. A microsatellite repeat is defined as a sequence
// which shows a repeated pattern, as for example in sequence 'ACGTACGTACGTACGT', where 'ACGT' is repeated
// 4 times. The function allows searching for this kind of subsequences within a sequence.
//
// Parameters
// $sequence is the sequence
// $min_length and $max_length are the range of oligo lengths to be searched; p.e. oligos with length 2 to 6
// $min_repeats is the minimal number of time a sequence must be repeated to be considered as a microsatellite repeat
// $min_length_of_MR minimum length of tanden repeat; to avoid considering AAAA as a microsatellite repeat, set it to >4
// $mismatches_allowed is the porcentaje of errors allowed when searching in the repetitive sequence
// so that sequence AACCGGTT-AAGCGGTT-AACCGGAT-AACCGGTT may be considered as a microsatellite repeat
//
// Return
// The function will return an array with the following structure:
// $results=Array(
// 0=>Array(
// start_position => 10,
// length => 4,
// repeats => 4,
// sequence => ACGTACGTACGTACGT,
// ),
// 0=>Array(
// start_position => 50,
// length => 3,
// repeats => 3,
// sequence => ATCATCATC,
// ),
// );
//
// Requeriments:
// Functions IncludeN_1, IncludeN_2 and IncludeN_3
function find_microsatellite_repeats($sequence,$min_length,$max_length,$min_repeats,$min_length_of_MR,$mismatches_allowed){
$len_seq=strlen($sequence);
$counter=0;
for ($i=0;$i<$len_seq-3;$i++){
for ($j=$min_length;$j<$max_length+1;$j++){
if (($i+$j)>$len_seq){break;}
$sub_seq=substr($sequence,$i,$j);
$len_sub_seq=strlen ($sub_seq);
$mismatches=floor($len_sub_seq*$mismatches_allowed);
if ($mismatches==1){$sub_seq_pattern=includeN_1($sub_seq,0);}
elseif ($mismatches==2){$sub_seq_pattern=includeN_2($sub_seq,0);}
elseif ($mismatches==3){$sub_seq_pattern=includeN_3($sub_seq,0);}
else {$sub_seq_pattern=$sub_seq;}
$matches=1;
while (preg_match_all("/($sub_seq_pattern)/",substr($sequence,($i+$j*$matches),$j),$out)==1){$matches++;}
if ($matches>=$min_repeats and ($j*$matches)>=$min_length_of_MR){
$results[$counter]["start_position"]=$i;
$results[$counter]["length"]=$j;
$results[$counter]["repeats"]=$matches;
$results[$counter]["sequence"]=substr($sequence,$i,$j*$matches);
$counter++;
$i+=$j*$matches;
}
}
}
return ($results);
}
// Description for Function IncludeN_1
// When a DNA sequence ("$primer") is provided to this function, as for example "acgt", this function will return
// a pattern like ".cgt|a.gt|ac.t|acg.". This pattern may be useful to find within a DNA sequence
// subsequences matching $primer, but allowing one missmach. The parameter $minus
// is a numeric value which indicates number of bases always maching the DNA sequence in 3' end.
// For example, when $minus is 1, the pattern for "acgt" will be ".cgt|a.gt|ac.t".
// Check also IncludeN_2 and IncludeN_3.
//
// Parameters
// $primer is a DNA sequence (oligonucleotide, primer)
// $minus indicates number of bases in 3' which will always much the DNA sequence.
//
// Return
// Returns a pattern (as described in "Description").
function includeN_1($primer,$minus) {
$code=".".substr($primer,1);
$wpos=1;
while ($wposDefinitions Microsatellite Repeat:
A variety of simple di- (DINUCLEOTIDE REPEATS), tri- (TRINUCLEOTIDE REPEATS), tetra-, and pentanucleotide tandem repeats (usually less than 100 bases long).
Tandem repeats:
Copies of DNA sequences which lie adjacent to each other
NOTE: for sort repeated sequences (p.e. AA or AAA), no mismatches are available.
--------------------
2008/03/17 17:49 / hypics
--------------------
Find tandem repeats
Microsatellite repeats finder
";
print "
Posición
Cicle
Repeats
Sequence
\n";
foreach ($results as $key => $val){
print "
";
print "
".$results[$key]["start_position"]."
";
print "
".$results[$key]["length"]."
";
print "
".$results[$key]["repeats"]."
";
print "
".$results[$key]["sequence"]."
\n";
print "
";
}
print "";
}
// Description for Function find_microsatellite_repeats
// This function will search for microsatellite repeats within a sequence. A microsatellite repeat is defined as a sequence
// which shows a repeated pattern, as for example in sequence 'ACGTACGTACGTACGT', where 'ACGT' is repeated
// 4 times. The function allows searching for this kind of subsequences within a sequence.
//
// Parameters
// $sequence is the sequence
// $min_length and $max_length are the range of oligo lengths to be searched; p.e. oligos with length 2 to 6
// $min_repeats is the minimal number of time a sequence must be repeated to be considered as a microsatellite repeat
// $min_length_of_MR minimum length of tanden repeat; to avoid considering AAAA as a microsatellite repeat, set it to >4
// $mismatches_allowed is the porcentaje of errors allowed when searching in the repetitive sequence
// so that sequence AACCGGTT-AAGCGGTT-AACCGGAT-AACCGGTT may be considered as a microsatellite repeat
//
// Return
// The function will return an array with the following structure:
// $results=Array(
// 0=>Array(
// start_position => 10,
// length => 4,
// repeats => 4,
// sequence => ACGTACGTACGTACGT,
// ),
// 0=>Array(
// start_position => 50,
// length => 3,
// repeats => 3,
// sequence => ATCATCATC,
// ),
// );
//
// Requeriments:
// Functions IncludeN_1, IncludeN_2 and IncludeN_3
function find_microsatellite_repeats($sequence,$min_length,$max_length,$min_repeats,$min_length_of_MR,$mismatches_allowed){
$len_seq=strlen($sequence);
$counter=0;
for ($i=0;$i<$len_seq-3;$i++){
for ($j=$min_length;$j<$max_length+1;$j++){
if (($i+$j)>$len_seq){break;}
$sub_seq=substr($sequence,$i,$j);
$len_sub_seq=strlen ($sub_seq);
$mismatches=floor($len_sub_seq*$mismatches_allowed);
if ($mismatches==1){$sub_seq_pattern=includeN_1($sub_seq,0);}
elseif ($mismatches==2){$sub_seq_pattern=includeN_2($sub_seq,0);}
elseif ($mismatches==3){$sub_seq_pattern=includeN_3($sub_seq,0);}
else {$sub_seq_pattern=$sub_seq;}
$matches=1;
while (preg_match_all("/($sub_seq_pattern)/",substr($sequence,($i+$j*$matches),$j),$out)==1){$matches++;}
if ($matches>=$min_repeats and ($j*$matches)>=$min_length_of_MR){
$results[$counter]["start_position"]=$i;
$results[$counter]["length"]=$j;
$results[$counter]["repeats"]=$matches;
$results[$counter]["sequence"]=substr($sequence,$i,$j*$matches);
$counter++;
$i+=$j*$matches;
}
}
}
return ($results);
}
// Description for Function IncludeN_1
// When a DNA sequence ("$primer") is provided to this function, as for example "acgt", this function will return
// a pattern like ".cgt|a.gt|ac.t|acg.". This pattern may be useful to find within a DNA sequence
// subsequences matching $primer, but allowing one missmach. The parameter $minus
// is a numeric value which indicates number of bases always maching the DNA sequence in 3' end.
// For example, when $minus is 1, the pattern for "acgt" will be ".cgt|a.gt|ac.t".
// Check also IncludeN_2 and IncludeN_3.
//
// Parameters
// $primer is a DNA sequence (oligonucleotide, primer)
// $minus indicates number of bases in 3' which will always much the DNA sequence.
//
// Return
// Returns a pattern (as described in "Description").
function includeN_1($primer,$minus) {
$code=".".substr($primer,1);
$wpos=1;
while ($wposDefinitions Microsatellite Repeat:
A variety of simple di- (DINUCLEOTIDE REPEATS), tri- (TRINUCLEOTIDE REPEATS), tetra-, and pentanucleotide tandem repeats (usually less than 100 bases long).
Tandem repeats:
Copies of DNA sequences which lie adjacent to each other
NOTE: for sort repeated sequences (p.e. AA or AAA), no mismatches are available.