This repository has been archived by the owner on Sep 10, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcmp2java.pl
executable file
·125 lines (97 loc) · 2.78 KB
/
cmp2java.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#!/usr/bin/perl -w
# cmp2java.pl - converts a .cmp file into a Java source for an
# extension of the ScoreMatrix class.
# A .cmp-formatted matrix is only the upper triangle, but for speed
# gain we fill both halves of the matrix (thus no need to test if i >
# j); and we want to address elements directly by char value (minus
# 'A', C-style), so we need all rows and columns between A and Z
# inclusive, even if they correspond to no residue (like J for
# proteins).
use Getopt::Std;
getopts ("mn:t:");
$classname = $opt_n || "NewMatrix";
$template = $opt_t || "ScoreMatrix.tmpl";
# discard until we see a line with only capitals and space - first coordinate
while (($line = <>) !~ /([A-Z] +)+/) {}
# put the letters into an array
$line =~ s/[\.\s]//g;
@letters = split (//, $line);
$nletters = @letters;
foreach (@letters)
{
# skip empty lines
do
{
$line = <>
} until $line !~ /^\s*$/;
# generate the array of scores
$line =~ s/[A-Z]+//g; # there normally should be only 1 letter...
$line =~ s/^\s+//; # slip leading whitespace
@scores = split (/\s+/, $line);
$nscores = @scores;
# now unshift some unknown numbers to get the proper alignment
for ($i = $nletters - $nscores; $i > 0; $i--)
{
unshift (@scores, 'NaN');
}
# fill the %scores_h hash of hashes, indexed by letters - filling the lower triangle
for ($i = 0; $i <= $#scores; $i++)
{
if ($scores[ $i ] eq 'NaN')
{ # lower triangle - get value (already stored) from upper half
$scores_h{ $_ }{ $letters[ $i ] } = $scores_h{ $letters[ $i ] }{ $_ };
}
else
{ # upper triangle - get value (new) from @scores array
$scores_h{ $_ }{ $letters[ $i ] } = $scores[ $i ];
}
}
}
# Now, a more tricky part - insert gaps for unused letters (like J, U
# for prots), or many others for NAs
@alphabet = qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z);
foreach $out (@alphabet)
{
foreach $in (@alphabet)
{
if (! exists $scores_h{ $out }{ $in })
{
$matrix{$out}{$in} = 0;
}
else
{
$matrix{$out}{$in} = $scores_h{ $out }{ $in }
}
}
}
# Now generate the java code, unless the user wants only the matrix
# (-m)
unless ($opt_m)
{
print <<"END";
// $classname.java
public class $classname extends ScoreMatrix
{
$classname ()
{
int tmp [][] =
{
END
}
# print it
$format = "\t\t{" . ("%3d," x 25) . "%3d},\n"; # 26: # of letters in the Roman alphabet
foreach $out (@alphabet)
{
my @vals = ();
foreach $in (@alphabet)
{
push (@vals, $matrix{$out}{$in});
}
printf ($format, @vals);
}
unless ($opt_m)
{
open ('TMPL', "< $template") or
warn ("Could not open $template for reading ($!), printing matrix only.\n");
foreach (<TMPL>) { print; }
}