Beginner Bioinformatics in Python — Part 7

{
‘A’:[0.33,0,1,0],
‘T’:[0.67,0,0,0],
‘C’:[0,0.33,0,1],
‘G’:[0,0.67,0,0]
}
The Original Count Matrix — 
{
‘A’:[1,0,3,0],
‘T’:[2,0,0,0],
‘C’:[0,1,0,3],
‘G’:[0,2,0,0]
}
Transformed Count Matrix - 
{
‘A’:[2,1,4,1],
‘T’:[3,1,1,1],
‘C’:[1,2,1,4],
‘G’:[1,3,1,1]
}
The new Profile Matrix - {
‘A’:[2/7,1/7,4/7,1/7],
‘T’:[3/7,1/7,1/7,1/7],
‘C’:[1/7,2/7,1/7,4/7],
‘G’:[1/7,3/7,1/7,1/7]
}
def CountWithPseudocounts(Motifs):
motifs_count = Count(Motifs)
return {key: add_pseudocount_toarray(value) for key, value in motifs_count.items()}


def add_pseudocount_toarray(motif_count_array):
return list(map(lambda x: x + 1, motif_count_array))
def ProfileWithPseudocounts(Motifs):
motifs_pseudocounts = CountWithPseudocounts(Motifs)
divisor = len(Motifs) + 4
return {key: list(map(lambda x: x / divisor, value)) for key, value in motifs_pseudocounts.items()}
Sample Input:
3 5
GGCGTTCAGGCA
AAGAATCAGTCA
CAAGGAGTTCGC
CACGTCAATCAC
CAATAATATTCG
Sample Output:
TTC
ATC
TTC
ATC
TTC
def GreedyMotifSearchWithPseudocounts(Dna, k, t):
motif_combinations = [best_motifs_for_given_iteration_with_pseudocounts(Dna, k, i) for i in range(len(Dna[0]) - k + 1)]
motif_scores = [Score(motifs) for motifs in motif_combinations]
return motif_combinations[motif_scores.index(min(motif_scores))]

def best_motifs_for_given_iteration_with_pseudocounts(dna, substring_length, index):
substring = dna[0][index: index + substring_length]
profile_matrix = ProfileWithPseudocounts([substring])
return recursive_compute_best_motifs_with_pseudocounts(dna, substring_length, [substring], profile_matrix, 1)

def recursive_compute_best_motifs_with_pseudocounts(dna, substring_length, previous_motifs, profile_matrix, row_index):
if row_index == len(dna):
return previous_motifs
motif_for_row_index = ProfileMostProbableKmer(dna[row_index], substring_length, profile_matrix)
current_motifs = previous_motifs + [motif_for_row_index]
return recursive_compute_best_motifs(dna, substring_length, current_motifs, ProfileWithPseudocounts(current_motifs), row_index + 1)
def greedy_search_with_custom_profile_function(Dna, k, t, profile_function):
motif_combinations = [best_motifs_for_given_iteration(Dna, k, i, profile_function) for i in range(len(Dna[0]) - k + 1)]
motif_scores = [Score(motifs) for motifs in motif_combinations]
return motif_combinations[motif_scores.index(min(motif_scores))]
def GreedyMotifSearchWithPseudocounts(Dna, k, t):
return greedy_search_with_custom_profile_function(Dna, k, t, ProfileWithPseudocounts)

--

--

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store