Sequence alignment by dynamic programming.
## by YGC ## August 7, 2008 ## guangchuangyu AT gmail.com X <->'TTCATA' Y <->'TGCTCGTA' seq.x <- unlist(strsplit(x,="">'')) seq.y <- unlist(strsplit(y,="">'')) seq.x <->0,seq.x) seq.y <->0,seq.y) match <->5 mismatch <->2 indel <->6 ## initial the score matrix score <->NA, length(seq.x), length(seq.y)) score[,1] <->1:length(seq.x)-1, function(x) x * indel) score[1,] <->1:length(seq.y)-1, function(x) x * indel)
## The dynamic programming, global alignment recursion for (i in 2:length(seq.x)) {
for (j in 2:length(seq.y)){
# seq.x[i] , seq.y[j] are aligned if ( seq.x[i] == seq.y[j]) { score[i,j] <->1, j-1] + match } else { score[i,j] <->1, j-1] + mismatch } # seq.x[i] aligned to - sc <->1,j] + indel
if (sc > score[i,j]) score[i,j] = sc
# seq.y[j] aligned to - sc <->1] + indel
if (sc > score[i,j]) score[i,j] = sc } }
## Traceback i <- length(seq.x)="" j=""><- length(seq.y)="" ax=""><- character()="" ay=""><- character()="">
while (i > 1 && j >1){
## case 1: best was seq.x[i] aligned to seq.y[j] sc <->1,j-1]
if (seq.x[i] == seq.y[j]) { sc <- sc="" +="" match="" ="" ="" }="">else { sc <- sc="" +="" mismatch="" ="" ="" }="">
if (sc == score[i,j]) { ax <- c(seq.x[i],="" ax)="" ="" ="" ="" ="" ay=""><- c(seq.y[j],="" ay)="" ="" ="" ="" ="" i=""><- i="">1 j <->1 next }
## case 2: best was seq.x[i] aligned to - if ((score[i-1,j] + indel) == score[i,j]) { ax <- c(seq.x[i],="" ax)="" ="" ="" ="" ="" ay=""><->'-', ay) i <->1 next }
## case 3: best was seq.y[j] aligned to - if ((score[i,j-1] + indel) == score[i,j]) { ax <->'-', ax) ay <- c(seq.y[j],="" ay)="" ="" ="" ="" ="" j=""><->1 next } } cat ('Sequence X: ', X,'\n') cat ('Sequence Y: ', Y,'\n') cat ('Scoring system: ', match, ' for match; ', mismatch, ' for mismatch; ', indel, ' for gap', '\n\n') cat ('Dynamic programming matrix:\n') print (score) cat ('\nAlignment:\n') cat (paste(ax, collapse=''), '\n') cat (paste(ay, collapse=''),'\n\n') cat ('Optimum alignment score: ', score[length(score)],'\n')
联系客服