Aritalab:Lecture/Programming/Java/Alignment

From Metabolomics.JP
< Aritalab:Lecture | Programming | Java(Difference between revisions)
Jump to: navigation, search
(Created page with "配列アライメントアルゴリズムの全体を以下に示します。このページにあるコードを全てつなげて Alignment.java としてコンパイル、実...")
 
Line 1: Line 1:
配列アライメントアルゴリズムの全体を以下に示します。このページにあるコードを全てつなげて Alignment.java としてコンパイル、実行してください。
+
配列アライメントアルゴリズムの全体を以下に示します。このページにあるコード部分を全てつなげて Alignment.java としてコンパイル、実行してください。このファイルだけで動きます。
 
__FORCETOC__
 
__FORCETOC__
 +
 
===クラスおよびペナルティの定義===
 
===クラスおよびペナルティの定義===
 
<pre>
 
<pre>
 
public class Alignment {
 
public class Alignment {
private int LOCAL = 0, GLOBAL = 1;
+
  private int LOCAL = 0, GLOBAL = 1;
private int AlignType = -1;
+
  private int AlignType = -1;
private int MatchScore = 1;
+
  private int MatchScore = 1;
private int GapPenalty = -2;
+
  private int GapPenalty = -2;
private int MismatchPenalty = -1;
+
  private int MismatchPenalty = -1;
private int[][] scoreTable;
+
  private int[][] scoreTable;
private String seqX, seqY;
+
  private String seqX, seqY;
  
public Alignment(String x, String y) {
+
  public Alignment(String x, String y) {
seqX = x;
+
    seqX = x;
seqY = y;
+
    seqY = y;
scoreTable = new int[x.length() + 1][y.length() + 1];
+
    scoreTable = new int[x.length() + 1][y.length() + 1];
}
+
  }
  
private void initializeTable() {
+
  private void initializeTable() {
scoreTable[0][0] = 0;
+
    scoreTable[0][0] = 0;
for (int i = 1; i < scoreTable.length; i++)
+
    for (int i = 1; i < scoreTable.length; i++)
scoreTable[i][0] = scoreTable[i - 1][0] + AlignType * GapPenalty;
+
      scoreTable[i][0] = scoreTable[i - 1][0] + AlignType * GapPenalty;
for (int i = 1; i < scoreTable[0].length; i++)
+
    for (int i = 1; i < scoreTable[0].length; i++)
scoreTable[0][i] = scoreTable[0][i - 1] + AlignType * GapPenalty;
+
      scoreTable[0][i] = scoreTable[0][i - 1] + AlignType * GapPenalty;
}
+
  }
  
private int getMax(int x, int y, int z, int w) {
+
  private int getMax(int x, int y, int z, int w) {
int t = x > y ? x : y;
+
    int t = x > y ? x : y;
int s = z > w ? z : w;
+
    int s = z > w ? z : w;
return (t > s ? t : s);
+
    return (t > s ? t : s);
}
+
  }
  
private int getMax(int x, int y, int z) {
+
  private int getMax(int x, int y, int z) {
int t = x > y ? x : y;
+
    int t = x > y ? x : y;
return (t > z ? t : z);
+
    return (t > z ? t : z);
}
+
  }
 
</pre>
 
</pre>
 
===トレースバック関数===
 
===トレースバック関数===
重要なのは do { ... } while 文の中身だけで、後はトレースを印刷するための準備です。
+
重要なのは traceBack関数の do { ... } while 文の中身だけで、後はトレースを印刷するための準備です。
 
<pre>
 
<pre>
private String[] traceBack() {
+
  private String[] traceBack() {
int x = 0, y = 0;
+
    int x = 0, y = 0;
if (AlignType == LOCAL) {
+
    if (AlignType == LOCAL) {
// Search for the maximum score brute-force (very inefficient).
+
      // Search for the maximum score brute-force (very inefficient).
// The max score can be recorded when computing scoreTable.
+
      // The max score can be recorded when computing scoreTable.
int max = 0;
+
      int max = 0;
for (int i = 0; i < scoreTable.length; i++) {
+
      for (int i = 0; i < scoreTable.length; i++) {
for (int j = 0; j < scoreTable[0].length; j++)
+
        for (int j = 0; j < scoreTable[0].length; j++)
if (scoreTable[i][j] >= max) {
+
          if (scoreTable[i][j] >= max) {
x = i;
+
            x = i;
y = j;
+
            y = j;
max = scoreTable[i][j];
+
            max = scoreTable[i][j];
}
+
          }
}
+
      }
} else if (AlignType == GLOBAL) {
+
    } else if (AlignType == GLOBAL) {
x = scoreTable.length - 1;
+
      x = scoreTable.length - 1;
y = scoreTable[0].length - 1;
+
      y = scoreTable[0].length - 1;
}
+
    }
StringBuffer line1 = new StringBuffer();
+
    StringBuffer line1 = new StringBuffer();
StringBuffer line2 = new StringBuffer();
+
    StringBuffer line2 = new StringBuffer();
line1.append(':');
+
    line1.append(':');
line1.append(x);
+
    line1.append(x);
line2.append(':');
+
    line2.append(':');
line2.append(y);
+
    line2.append(y);
do {
+
    do {
if (scoreTable[x - 1][y] + GapPenalty == scoreTable[x][y]) {
+
      if (scoreTable[x - 1][y] + GapPenalty == scoreTable[x][y]) {
line1.insert(0, seqX.charAt(x-- - 1));
+
        line1.insert(0, seqX.charAt(x-- - 1));
line2.insert(0, '-');
+
        line2.insert(0, '-');
} else if (scoreTable[x][y - 1] + GapPenalty == scoreTable[x][y]) {
+
      } else if (scoreTable[x][y - 1] + GapPenalty == scoreTable[x][y]) {
line1.insert(0, '-');
+
        line1.insert(0, '-');
line2.insert(0, seqY.charAt(y-- - 1));
+
        line2.insert(0, seqY.charAt(y-- - 1));
} else {
+
      } else {
line1.insert(0, seqX.charAt(x-- - 1));
+
        line1.insert(0, seqX.charAt(x-- - 1));
line2.insert(0, seqY.charAt(y-- - 1));
+
        line2.insert(0, seqY.charAt(y-- - 1));
}
+
      }
} while (x != 0 && y != 0
+
    } while (x != 0 && y != 0
&& (AlignType == GLOBAL ? true : scoreTable[x][y] != 0));
+
        && (AlignType == GLOBAL ? true : scoreTable[x][y] != 0));
line1.insert(0, ':');
+
    line1.insert(0, ':');
line1.insert(0, String.format("%1$3d", x + 1));
+
    line1.insert(0, String.format("%1$3d", x + 1));
line2.insert(0, ':');
+
    line2.insert(0, ':');
line2.insert(0, String.format("%1$3d", y + 1));
+
    line2.insert(0, String.format("%1$3d", y + 1));
return new String[] { line1.toString(), line2.toString() };
+
    return new String[] { line1.toString(), line2.toString() };
}
+
  }
  
public void printTable() {
+
  public void printTrace() {
if (scoreTable == null)
+
    String[] trace = traceBack();
return;
+
    String x = trace[0];
StringBuffer sb = new StringBuffer();
+
    String y = trace[1];
for (int i = 0; i < scoreTable.length; i++) {
+
    StringBuffer sb = new StringBuffer();
for (int j = 0; j < scoreTable[0].length; j++)
+
    sb.append("    ");
sb.append(String.format("%1$4d", scoreTable[i][j]));
+
    for (int i = 4; x.charAt(i) != ':'; i++)
sb.append("\n");
+
      sb.append(x.charAt(i) == y.charAt(i) ? "|" : " ");
}
+
    sb.append("   ");
System.out.println(sb.toString());
+
    String z = sb.toString();
}
+
  
public void printTrace() {
+
    int width = 50;
String[] trace = traceBack();
+
    int i = -1;
String x = trace[0];
+
    while (++i * width < x.length()) {
String y = trace[1];
+
      System.out.println(x.substring(i * width,
StringBuffer sb = new StringBuffer();
+
          Math.min(x.length(), (i + 1) * width)));
sb.append("    ");
+
      System.out.println(z.substring(i * width,
for (int i = 4; x.charAt(i) != ':'; i++)
+
          Math.min(z.length(), (i + 1) * width)));
sb.append(x.charAt(i) == y.charAt(i) ? "|" : " ");
+
      System.out.println(y.substring(i * width,
sb.append("    ");
+
          Math.min(y.length(), (i + 1) * width)));
String z = sb.toString();
+
    }
 
+
  }
int width = 50;
+
int i = -1;
+
while (++i * width < x.length()) {
+
System.out.println(x.substring(i * width,
+
Math.min(x.length(), (i + 1) * width)));
+
System.out.println(z.substring(i * width,
+
Math.min(z.length(), (i + 1) * width)));
+
System.out.println(y.substring(i * width,
+
Math.min(y.length(), (i + 1) * width)));
+
}
+
}
+
 
</pre>
 
</pre>
  
 
===アルゴリズム===
 
===アルゴリズム===
大域アライメントと局所アライメントの違いは、漸化式の候補に 0 があるかないかの違いだけである点に注意しましょう。
+
getMax関数で最大値を取る漸化式を for ループで二重に回しているだけです。大域アライメントと局所アライメントの違いは、漸化式の候補に 0 を加えるだけです。
 
<pre>
 
<pre>
public void NeedlemanWunsch() {
+
  public void NeedlemanWunsch() {
AlignType = GLOBAL;
+
    AlignType = GLOBAL;
initializeTable();
+
    initializeTable();
for (int i = 1; i < scoreTable.length; i++)
+
    for (int i = 1; i < scoreTable.length; i++)
for (int j = 1; j < scoreTable[0].length; j++)
+
      for (int j = 1; j < scoreTable[0].length; j++)
scoreTable[i][j] = getMax(
+
        scoreTable[i][j] = getMax(
scoreTable[i - 1][j] + GapPenalty,
+
            scoreTable[i - 1][j] + GapPenalty,
scoreTable[i][j - 1] + GapPenalty,
+
            scoreTable[i][j - 1] + GapPenalty,
scoreTable[i - 1][j - 1]
+
            scoreTable[i - 1][j - 1]
+ (seqX.charAt(i - 1) == seqY.charAt(j - 1) ? MatchScore
+
                + (seqX.charAt(i - 1) == seqY.charAt(j - 1) ? MatchScore
: MismatchPenalty));
+
                    : MismatchPenalty));
}
+
  }
  
public void SmithWaterman() {
+
  public void SmithWaterman() {
AlignType = LOCAL;
+
    AlignType = LOCAL;
initializeTable();
+
    initializeTable();
for (int i = 1; i < scoreTable.length; i++)
+
    for (int i = 1; i < scoreTable.length; i++)
for (int j = 1; j < scoreTable[0].length; j++)
+
      for (int j = 1; j < scoreTable[0].length; j++)
scoreTable[i][j] = getMax(0,
+
        scoreTable[i][j] = getMax(
scoreTable[i - 1][j] + GapPenalty,
+
            0,
scoreTable[i][j - 1] + GapPenalty,
+
            scoreTable[i - 1][j] + GapPenalty,
scoreTable[i - 1][j - 1]
+
            scoreTable[i][j - 1] + GapPenalty,
+ (seqX.charAt(i - 1) == seqY.charAt(j - 1) ? MatchScore
+
            scoreTable[i - 1][j - 1]
: MismatchPenalty));
+
                + (seqX.charAt(i - 1) == seqY.charAt(j - 1) ? MatchScore
}
+
                    : MismatchPenalty));
 +
  }
 
</pre>
 
</pre>
 
===メイン関数===
 
===メイン関数===
 
<pre>
 
<pre>
public static void main(String[] args) {
+
  public void printTable() {
Alignment A = new Alignment("aattgaaggattgctcggataatcgcc","gctaggatcacggccatggcaagcgcg");
+
    if (scoreTable == null)
//Alignment A = new Alignment("aattgaagg","gctagg");
+
      return;
//A.NeedlemanWunsch();
+
    StringBuffer sb = new StringBuffer();
A.SmithWaterman();
+
    for (int i = 0; i < scoreTable.length; i++) {
A.printTable();
+
      for (int j = 0; j < scoreTable[0].length; j++)
A.printTrace();
+
        sb.append(String.format("%1$4d", scoreTable[i][j]));
}
+
      sb.append("\n");
 +
    }
 +
    System.out.println(sb.toString());
 +
  }
 +
 
 +
  public static void main(String[] args) {
 +
    Alignment A = new Alignment("aattgaagg", "gctagg");
 +
    A.NeedlemanWunsch();
 +
    //A.SmithWaterman();
 +
    A.printTable();
 +
    A.printTrace();
 +
  }
 
}
 
}
 +
</pre>
 +
 +
===実行結果===
 +
<pre>
 +
> java Alignment
 +
 +
  0  -2  -4  -6  -8 -10 -12
 +
  -2  -1  -3  -5  -5  -7  -9
 +
  -4  -3  -2  -4  -4  -6  -8
 +
  -6  -5  -4  -1  -3  -5  -7
 +
  -8  -7  -6  -3  -2  -4  -6
 +
-10  -7  -8  -5  -4  -1  -3
 +
-12  -9  -8  -7  -4  -3  -2
 +
-14 -11 -10  -9  -6  -5  -4
 +
-16 -13 -12 -11  -8  -5  -4
 +
-18 -15 -14 -13 -10  -7  -4
 +
 +
  1:aattgaagg:9
 +
      |  | ||   
 +
  1:gct--a-gg:6
 
</pre>
 
</pre>

Revision as of 09:46, 15 December 2011

配列アライメントアルゴリズムの全体を以下に示します。このページにあるコード部分を全てつなげて Alignment.java としてコンパイル、実行してください。このファイルだけで動きます。


Contents

クラスおよびペナルティの定義

public class Alignment {
  private int LOCAL = 0, GLOBAL = 1;
  private int AlignType = -1;
  private int MatchScore = 1;
  private int GapPenalty = -2;
  private int MismatchPenalty = -1;
  private int[][] scoreTable;
  private String seqX, seqY;

  public Alignment(String x, String y) {
    seqX = x;
    seqY = y;
    scoreTable = new int[x.length() + 1][y.length() + 1];
  }

  private void initializeTable() {
    scoreTable[0][0] = 0;
    for (int i = 1; i < scoreTable.length; i++)
      scoreTable[i][0] = scoreTable[i - 1][0] + AlignType * GapPenalty;
    for (int i = 1; i < scoreTable[0].length; i++)
      scoreTable[0][i] = scoreTable[0][i - 1] + AlignType * GapPenalty;
  }

  private int getMax(int x, int y, int z, int w) {
    int t = x > y ? x : y;
    int s = z > w ? z : w;
    return (t > s ? t : s);
  }

  private int getMax(int x, int y, int z) {
    int t = x > y ? x : y;
    return (t > z ? t : z);
  }

トレースバック関数

重要なのは traceBack関数の do { ... } while 文の中身だけで、後はトレースを印刷するための準備です。

  private String[] traceBack() {
    int x = 0, y = 0;
    if (AlignType == LOCAL) {
      // Search for the maximum score brute-force (very inefficient).
      // The max score can be recorded when computing scoreTable.
      int max = 0;
      for (int i = 0; i < scoreTable.length; i)
          if (scoreTable[i][j] >= max) {
            x = i;
            y = j;
            max = scoreTable[i][j];
          }
      }
    } else if (AlignType == GLOBAL) {
      x = scoreTable.length - 1;
      y = scoreTable[0].length - 1;
    }
    StringBuffer line1 = new StringBuffer();
    StringBuffer line2 = new StringBuffer();
    line1.append(':');
    line1.append(x);
    line2.append(':');
    line2.append(y);
    do {
      if (scoreTable[x - 1][y] + GapPenalty == scoreTable[x][y]) {
        line1.insert(0, seqX.charAt(x-- - 1));
        line2.insert(0, '-');
      } else if (scoreTable[x][y - 1] + GapPenalty == scoreTable[x][y]) {
        line1.insert(0, '-');
        line2.insert(0, seqY.charAt(y-- - 1));
      } else {
        line1.insert(0, seqX.charAt(x-- - 1));
        line2.insert(0, seqY.charAt(y-- - 1));
      }
    } while (x != 0 && y != 0
        && (AlignType == GLOBAL ? true : scoreTable[x][y] != 0));
    line1.insert(0, ':');
    line1.insert(0, String.format("%1$3d", x + 1));
    line2.insert(0, ':');
    line2.insert(0, String.format("%1$3d", y + 1));
    return new String[] { line1.toString(), line2.toString() };
  }

  public void printTrace() {
    String[] trace = traceBack();
    String x = trace[0];
    String y = trace[1];
    StringBuffer sb = new StringBuffer();
    sb.append("    ");
    for (int i = 4; x.charAt(i) != ':'; ii * width < x.length()) {
      System.out.println(x.substring(i * width,
          Math.min(x.length(), (i + 1) * width)));
      System.out.println(z.substring(i * width,
          Math.min(z.length(), (i + 1) * width)));
      System.out.println(y.substring(i * width,
          Math.min(y.length(), (i + 1) * width)));
    }
  }

アルゴリズム

getMax関数で最大値を取る漸化式を for ループで二重に回しているだけです。大域アライメントと局所アライメントの違いは、漸化式の候補に 0 を加えるだけです。

  public void NeedlemanWunsch() {
    AlignType = GLOBAL;
    initializeTable();
    for (int i = 1; i < scoreTable.length; i)
        scoreTable[i][j] = getMax(
            scoreTable[i - 1][j] + GapPenalty,
            scoreTable[i][j - 1] + GapPenalty,
            scoreTable[i - 1][j - 1]
                + (seqX.charAt(i - 1) == seqY.charAt(j - 1) ? MatchScore
                    : MismatchPenalty));
  }

  public void SmithWaterman() {
    AlignType = LOCAL;
    initializeTable();
    for (int i = 1; i < scoreTable.length; i)
        scoreTable[i][j] = getMax(
            0,
            scoreTable[i - 1][j] + GapPenalty,
            scoreTable[i][j - 1] + GapPenalty,
            scoreTable[i - 1][j - 1]
                + (seqX.charAt(i - 1) == seqY.charAt(j - 1) ? MatchScore
                    : MismatchPenalty));
  }

メイン関数

  public void printTable() {
    if (scoreTable == null)
      return;
    StringBuffer sb = new StringBuffer();
    for (int i = 0; i < scoreTable.length; i)
        sb.append(String.format("%1$4d", scoreTable[i][j]));
      sb.append("\n");
    }
    System.out.println(sb.toString());
  }

  public static void main(String[] args) {
    Alignment A = new Alignment("aattgaagg", "gctagg");
    A.NeedlemanWunsch();
    //A.SmithWaterman();
    A.printTable();
    A.printTrace();
  }
}

実行結果

> java Alignment

   0  -2  -4  -6  -8 -10 -12
  -2  -1  -3  -5  -5  -7  -9
  -4  -3  -2  -4  -4  -6  -8
  -6  -5  -4  -1  -3  -5  -7
  -8  -7  -6  -3  -2  -4  -6
 -10  -7  -8  -5  -4  -1  -3
 -12  -9  -8  -7  -4  -3  -2
 -14 -11 -10  -9  -6  -5  -4
 -16 -13 -12 -11  -8  -5  -4
 -18 -15 -14 -13 -10  -7  -4

  1:aattgaagg:9
      |  | ||    
  1:gct--a-gg:6
Personal tools
Namespaces

Variants
Actions
Navigation
metabolites
Toolbox