from typing import Union, List
from Bio import Align
Smith-Waterman Local Alignment using Python
bioinformatics
alignment
smith-waterman
biopython
Smith-Waterman is a local alignment method for sequence alignment. Below is example implementation using python.
Note
This post was written using:
- biopython
: 1.78
def get_score(A:str, B:str, mismatch_penalty:int, match_score:int) -> int:
# match
if A == B:
return match_score
# mismatch
return mismatch_penalty
def init_matrix(A: str, B: str) -> list:
= len(A) + 1
lenA = len(B) + 1
lenB = []
matrix for i in range(lenB):
0] * lenA)
matrix.append([
return matrix
def noNeg(x:int) -> int:
return max(0, x)
def SmithWaterman(A, B, gap_penalty:int=-2, mismatch_penalty:int=-1, match_score:int=4) -> Union[list, int, list]:
""" initialize matrix and fill
Returns:
list: 2D array of filled value according to Smith-Waterman algorithm
int: Max value in the final filled `matrix`
list: List of position [row, col] of `max_score` in `matrix`
"""
= init_matrix(A, B)
matrix
# in sw, lower bound to 0
for m in range(len(matrix)):
for n in range(len(matrix[0])):
= noNeg(matrix[m][n])
matrix[m][n]
= [[-1, -1]]
diag = [[-1, 0]]
top = [[0, -1]]
left
= 0
max_score = []
max_score_position
for row in range(1, len(B)+1):
for col in range(1, len(A)+1):
= A[col-1]
a_char = B[row-1]
b_char
for dr,dc in left:
= matrix[row + dr][col + dc] + gap_penalty
l
for dr,dc in top:
= matrix[row + dr][col + dc] + gap_penalty
t
for dr,dc in diag:
= matrix[row + dr][col + dc] + get_score(a_char, b_char, mismatch_penalty, match_score)
d
# l,t,d lower bouded to 0 (SW property)
= max(noNeg(l), noNeg(t), noNeg(d))
cur_score if cur_score > max_score:
= cur_score
max_score = [row, col]
max_score_position
= cur_score
matrix[row][col]
return matrix, max_score, max_score_position
def traceback(matrix: list, A: str, B: str,
list,
max_score_position: int,
gap_penalty: int,
mismatch_penalty: int) -> List[str]:
match_score: = []
aligned_A = []
aligned_B
= max_score_position[0]
row = max_score_position[1]
col
while row > 0 and col > 0:
= matrix[row - 1][col - 1]
d = matrix[row - 1][col]
t = matrix[row][col - 1]
l
# Stop when we reach a score of 0 (SW property)
if matrix[row][col] == 0:
break
# Diagonal move (match/mismatch)
if matrix[row][col] == d + get_score(A[col-1], B[row-1], mismatch_penalty, match_score):
-1])
aligned_A.append(A[col-1])
aligned_B.append(B[row-= 1
row -= 1
col # Left move (gap in B)
elif matrix[row][col] == l + gap_penalty:
-1])
aligned_A.append(A[col'-')
aligned_B.append(-= 1
col # Top move (gap in A)
elif matrix[row][col] == t + gap_penalty:
'-')
aligned_A.append(-1])
aligned_B.append(B[row-= 1
row
return ''.join(reversed(aligned_A)), ''.join(reversed(aligned_B))
## Params
= -2
GAP_PENALTY = -1
MISMATCH_PENALTY = 2 MATCH_SCORE
= "AACG", "AATCG" # A = top, B = left
A, B = SmithWaterman(A, B,
matrix, max_score, max_score_position = GAP_PENALTY,
gap_penalty = MISMATCH_PENALTY,
mismatch_penalty = MATCH_SCORE)
match_score for m in matrix:
print(m)
[0, 0, 0, 0, 0]
[0, 2, 2, 0, 0]
[0, 2, 4, 2, 0]
[0, 0, 2, 3, 1]
[0, 0, 0, 4, 2]
[0, 0, 0, 2, 6]
max_score
6
max_score_position
[5, 4]
Above is the filled in matrix. The highlighted cell is the max_score
.
Traceback
= traceback(matrix, A, B,
aligned_A, aligned_B
max_score_position,= GAP_PENALTY,
gap_penalty = MISMATCH_PENALTY,
mismatch_penalty = MATCH_SCORE)
match_score
print("Alignment:")
print(aligned_A)
print(aligned_B)
Alignment:
AA-CG
AATCG
We get same output with University of Freiburg Smith-Waterman Tool (Ref 1).
Get alignment score
= Align.PairwiseAligner()
aligner = 'local'
aligner.mode
= aligner.align(aligned_A.replace('-',''),
alignments '-',''))
aligned_B.replace(
for alignment in sorted(alignments):
print("Score = %.1f:" % alignment.score)
print(alignment)
Score = 4.0:
AA-CG
||-||
AATCG