Skip to content

Commit 876b99d

Browse files
committed
Lexer improvements for highlighter
1 parent 0eaeaaa commit 876b99d

File tree

6 files changed

+393
-11
lines changed

6 files changed

+393
-11
lines changed

sonar-objective-c-plugin/src/main/java/org/sonar/objectivec/api/ObjectiveCTokenType.java

+12-2
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,19 @@
1919
*/
2020
package org.sonar.objectivec.api;
2121

22+
import com.google.common.collect.ImmutableList;
2223
import com.sonar.sslr.api.AstNode;
2324
import com.sonar.sslr.api.TokenType;
2425

25-
public enum ObjectiveCTokenType implements TokenType {
26+
import java.util.List;
2627

27-
NUMERIC_LITERAL;
28+
public enum ObjectiveCTokenType implements TokenType {
29+
CHARACTER_LITERAL,
30+
DOUBLE_LITERAL,
31+
FLOAT_LITERAL,
32+
INTEGER_LITERAL,
33+
LONG_LITERAL,
34+
STRING_LITERAL;
2835

2936
@Override
3037
public String getName() {
@@ -41,4 +48,7 @@ public boolean hasToBeSkippedFromAst(AstNode node) {
4148
return false;
4249
}
4350

51+
public static List numberLiterals() {
52+
return ImmutableList.of(DOUBLE_LITERAL, FLOAT_LITERAL, INTEGER_LITERAL, LONG_LITERAL);
53+
}
4454
}

sonar-objective-c-plugin/src/main/java/org/sonar/objectivec/highlighter/SyntaxHighlighterVisitor.java

+10
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import org.sonar.api.batch.fs.InputFile;
3434
import org.sonar.api.source.Highlightable;
3535
import org.sonar.objectivec.api.ObjectiveCKeyword;
36+
import org.sonar.objectivec.api.ObjectiveCTokenType;
3637
import org.sonar.squidbridge.SquidAstVisitor;
3738

3839
import java.io.IOException;
@@ -116,6 +117,15 @@ public void visitToken(Token token) {
116117
if (token.getType() instanceof ObjectiveCKeyword) {
117118
highlightToken(token, "k");
118119
}
120+
121+
if (ObjectiveCTokenType.numberLiterals().contains(token.getType())) {
122+
highlightToken(token, "c");
123+
}
124+
125+
if (ObjectiveCTokenType.STRING_LITERAL.equals(token.getType())
126+
|| ObjectiveCTokenType.CHARACTER_LITERAL.equals(token.getType())) {
127+
highlightToken(token, "s");
128+
}
119129
}
120130

121131
private void highlightToken(Token token, String typeOfText) {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/*
2+
* SonarQube Objective-C (Community) Plugin
3+
* Copyright (C) 2012-2016 OCTO Technology, Backelite, and contributors
4+
* mailto:sonarqube@googlegroups.com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.objectivec.lexer;
21+
22+
import com.sonar.sslr.impl.Lexer;
23+
import org.sonar.sslr.channel.Channel;
24+
import org.sonar.sslr.channel.CodeReader;
25+
26+
/**
27+
* @author Sonar C++ Plugin (Community) authors
28+
*/
29+
public class BackslashChannel extends Channel<Lexer> {
30+
@Override
31+
public boolean consume(CodeReader code, Lexer output) {
32+
char ch = (char) code.peek();
33+
34+
if ((ch == '\\') && isNewLine(code.charAt(1))) {
35+
// just throw away the backslash
36+
code.pop();
37+
return true;
38+
}
39+
40+
return false;
41+
}
42+
43+
private static boolean isNewLine(char ch) {
44+
return (ch == '\n') || (ch == '\r');
45+
}
46+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
/*
2+
* SonarQube Objective-C (Community) Plugin
3+
* Copyright (C) 2012-2016 OCTO Technology, Backelite, and contributors
4+
* mailto:sonarqube@googlegroups.com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.objectivec.lexer;
21+
22+
import com.sonar.sslr.api.Token;
23+
import com.sonar.sslr.impl.Lexer;
24+
import org.sonar.objectivec.api.ObjectiveCTokenType;
25+
import org.sonar.sslr.channel.Channel;
26+
import org.sonar.sslr.channel.CodeReader;
27+
28+
/**
29+
* @author Sonar C++ Plugin (Community) authors
30+
*/
31+
public class CharacterLiteralsChannel extends Channel<Lexer> {
32+
private static final char EOF = (char) -1;
33+
34+
private final StringBuilder sb = new StringBuilder();
35+
36+
private int index;
37+
private char ch;
38+
39+
@Override
40+
public boolean consume(CodeReader code, Lexer output) {
41+
int line = code.getLinePosition();
42+
int column = code.getColumnPosition();
43+
index = 0;
44+
readPrefix(code);
45+
if ((ch != '\'')) {
46+
return false;
47+
}
48+
if (!read(code)) {
49+
return false;
50+
}
51+
readUdSuffix(code);
52+
for (int i = 0; i < index; i++) {
53+
sb.append((char) code.pop());
54+
}
55+
output.addToken(Token.builder()
56+
.setLine(line)
57+
.setColumn(column)
58+
.setURI(output.getURI())
59+
.setValueAndOriginalValue(sb.toString())
60+
.setType(ObjectiveCTokenType.CHARACTER_LITERAL)
61+
.build());
62+
sb.setLength(0);
63+
return true;
64+
}
65+
66+
private boolean read(CodeReader code) {
67+
index++;
68+
while (code.charAt(index) != ch) {
69+
if (code.charAt(index) == EOF) {
70+
return false;
71+
}
72+
if (code.charAt(index) == '\\') {
73+
// escape
74+
index++;
75+
}
76+
index++;
77+
}
78+
index++;
79+
return true;
80+
}
81+
82+
private void readPrefix(CodeReader code) {
83+
ch = code.charAt(index);
84+
if ((ch == 'u') || (ch == 'U') || ch == 'L') {
85+
index++;
86+
ch = code.charAt(index);
87+
}
88+
}
89+
90+
private void readUdSuffix(CodeReader code) {
91+
for (int start_index = index, len = 0; ; index++) {
92+
char c = code.charAt(index);
93+
if (c == EOF) {
94+
return;
95+
}
96+
if ((c >= 'a' && c <= 'z')
97+
|| (c >= 'A' && c <= 'Z')
98+
|| (c == '_')) {
99+
len++;
100+
} else {
101+
if (c >= '0' && c <= '9') {
102+
if (len > 0) {
103+
len++;
104+
} else {
105+
index = start_index;
106+
return;
107+
}
108+
} else {
109+
return;
110+
}
111+
}
112+
}
113+
}
114+
}

sonar-objective-c-plugin/src/main/java/org/sonar/objectivec/lexer/ObjectiveCLexer.java

+46-9
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
*/
2020
package org.sonar.objectivec.lexer;
2121

22+
import com.sonar.sslr.api.GenericTokenType;
2223
import com.sonar.sslr.impl.Lexer;
2324
import com.sonar.sslr.impl.channel.BlackHoleChannel;
2425
import com.sonar.sslr.impl.channel.IdentifierAndKeywordChannel;
@@ -27,11 +28,33 @@
2728
import org.sonar.objectivec.api.ObjectiveCKeyword;
2829
import org.sonar.objectivec.api.ObjectiveCPunctuator;
2930

30-
import static com.sonar.sslr.api.GenericTokenType.LITERAL;
3131
import static com.sonar.sslr.impl.channel.RegexpChannelBuilder.commentRegexp;
3232
import static com.sonar.sslr.impl.channel.RegexpChannelBuilder.regexp;
33+
import static org.sonar.objectivec.api.ObjectiveCTokenType.DOUBLE_LITERAL;
34+
import static org.sonar.objectivec.api.ObjectiveCTokenType.FLOAT_LITERAL;
35+
import static org.sonar.objectivec.api.ObjectiveCTokenType.INTEGER_LITERAL;
36+
import static org.sonar.objectivec.api.ObjectiveCTokenType.LONG_LITERAL;
3337

3438
public class ObjectiveCLexer {
39+
private static final String EXP_REGEXP = "(?:[Ee][+-]?+[0-9_]++)";
40+
private static final String BINARY_EXP_REGEXP = "(?:[Pp][+-]?+[0-9_]++)";
41+
private static final String FLOATING_LITERAL_WITHOUT_SUFFIX_REGEXP = "(?:" +
42+
// Decimal
43+
"[0-9][0-9_]*+\\.([0-9_]++)?+" + EXP_REGEXP + "?+" +
44+
"|" + "\\.[0-9][0-9_]*+" + EXP_REGEXP + "?+" +
45+
"|" + "[0-9][0-9_]*+" + EXP_REGEXP +
46+
// Hexadecimal
47+
"|" + "0[xX][0-9_a-fA-F]++\\.[0-9_a-fA-F]*+" + BINARY_EXP_REGEXP +
48+
"|" + "0[xX][0-9_a-fA-F]++" + BINARY_EXP_REGEXP +
49+
")";
50+
private static final String INTEGER_LITERAL_REGEXP = "(?:" +
51+
// Hexadecimal
52+
"0[xX][0-9_a-fA-F]++" +
53+
// Binary (Java 7)
54+
"|" + "0[bB][01_]++" +
55+
// Decimal and Octal
56+
"|" + "[0-9][0-9_]*+" +
57+
")";
3558

3659
private ObjectiveCLexer() {
3760
// prevents outside instantiation
@@ -44,23 +67,37 @@ public static Lexer create() {
4467
public static Lexer create(ObjectiveCConfiguration conf) {
4568
return Lexer.builder()
4669
.withCharset(conf.getCharset())
70+
.withFailIfNoChannelToConsumeOneCharacter(true)
4771

48-
.withFailIfNoChannelToConsumeOneCharacter(false)
72+
/* Remove whitespace */
73+
.withChannel(new BlackHoleChannel("\\s++"))
4974

5075
/* Comments */
5176
.withChannel(commentRegexp("//[^\\n\\r]*+"))
52-
.withChannel(commentRegexp("/\\*[\\s\\S]*?\\*/"))
77+
.withChannel(commentRegexp("/\\*", "[\\s\\S]*?", "\\*/"))
78+
79+
/* Backslash at the end of the line: just throw away */
80+
.withChannel(new BackslashChannel())
81+
82+
/* Character literals */
83+
.withChannel(new CharacterLiteralsChannel())
84+
85+
/* String literals */
86+
.withChannel(new StringLiteralsChannel())
87+
88+
/* Number literals */
89+
.withChannel(regexp(FLOAT_LITERAL, FLOATING_LITERAL_WITHOUT_SUFFIX_REGEXP + "[fF]|[0-9][0-9_]*+[fF]"))
90+
.withChannel(regexp(DOUBLE_LITERAL, FLOATING_LITERAL_WITHOUT_SUFFIX_REGEXP + "[dD]?+|[0-9][0-9_]*+[dD]"))
91+
.withChannel(regexp(LONG_LITERAL, INTEGER_LITERAL_REGEXP + "[lL]"))
92+
.withChannel(regexp(INTEGER_LITERAL, INTEGER_LITERAL_REGEXP))
5393

5494
/* Identifiers, keywords, and punctuators */
55-
.withChannel(new IdentifierAndKeywordChannel("(#|@)?[a-zA-Z]([a-zA-Z0-9_]*[a-zA-Z0-9])?+((\\s+)?\\*)?", true, ObjectiveCKeyword.values()))
95+
.withChannel(new IdentifierAndKeywordChannel("[#@]?[a-zA-Z]([a-zA-Z0-9_]*[a-zA-Z0-9])?+((\\s+)?\\*)?", true, ObjectiveCKeyword.values()))
5696
.withChannel(new PunctuatorChannel(ObjectiveCPunctuator.values()))
5797

58-
/* All other tokens */
59-
.withChannel(regexp(LITERAL, "[^\r\n\\s/]+"))
60-
61-
.withChannel(new BlackHoleChannel("[\\s]"))
98+
/* All other tokens -- must be last channel */
99+
.withChannel(regexp(GenericTokenType.IDENTIFIER, "[^\r\n\\s/]+"))
62100

63101
.build();
64102
}
65-
66103
}

0 commit comments

Comments
 (0)