Skip to content

Commit

Permalink
(fix) lib: positive look-around
Browse files Browse the repository at this point in the history
  • Loading branch information
alexey-pelykh committed Jun 23, 2024
1 parent 7fd9a0b commit db29eda
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 12 deletions.
23 changes: 11 additions & 12 deletions lib/src/main/java/org/pcre4j/Pcre4jUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.Map;
Expand Down Expand Up @@ -560,19 +561,17 @@ public static int[] convertOvectorToStringIndices(String subject, byte[] subject
if (ovector.length % 2 != 0) {
throw new IllegalArgumentException("ovector must have an even number of elements");
}
if (ovector[0] > ovector[1]) {
throw new IllegalArgumentException("ovector start must be less than or equal to ovector end");
}

// Match region size in bytes is determined by the first offset pair in the ovector
final var matchSizeInBytes = ovector[1] - ovector[0];
final var matchSince = (int) Arrays.stream(ovector).min().orElseThrow();
final var matchUntil = (int) Arrays.stream(ovector).max().orElseThrow();
final var matchSizeInBytes = matchUntil - matchSince;

// Calculate the mapping of byte offsets to string indices for the relevant subject region of the match
var stringIndex = 0;
final var byteOffsetToStringIndex = new int[(int) matchSizeInBytes + 1];
for (var byteIndex = 0; byteIndex < ovector[1]; ) {
if (byteIndex >= ovector[0]) {
byteOffsetToStringIndex[(int) (byteIndex - ovector[0])] = stringIndex;
for (var byteIndex = 0; byteIndex < matchUntil; ) {
if (byteIndex >= matchSince) {
byteOffsetToStringIndex[byteIndex - matchSince] = stringIndex;
}

final var subjectChar = subject.charAt(stringIndex);
Expand All @@ -589,8 +588,8 @@ public static int[] convertOvectorToStringIndices(String subject, byte[] subject
}

for (var subjectCharByteIndex = 0; subjectCharByteIndex < subjectCharByteLength; subjectCharByteIndex++) {
if (byteIndex >= ovector[0]) {
byteOffsetToStringIndex[(int) (byteIndex - ovector[0])] = stringIndex;
if (byteIndex >= matchSince) {
byteOffsetToStringIndex[byteIndex - matchSince] = stringIndex;
}
byteIndex += 1;
}
Expand All @@ -602,15 +601,15 @@ public static int[] convertOvectorToStringIndices(String subject, byte[] subject
// Convert byte offsets to string indices
final var stringIndices = new int[ovector.length];
for (var valueIndex = 0; valueIndex < ovector.length; valueIndex++) {
final var byteIndex = ovector[valueIndex];
final var byteIndex = (int) ovector[valueIndex];

// Handle case when group was not matched
if (byteIndex == -1) {
stringIndices[valueIndex] = -1;
continue;
}

stringIndices[valueIndex] = byteOffsetToStringIndex[(int) (byteIndex - ovector[0])];
stringIndices[valueIndex] = byteOffsetToStringIndex[byteIndex - matchSince];
}

return stringIndices;
Expand Down
29 changes: 29 additions & 0 deletions regex/src/test/java/org/pcre4j/regex/MatcherTests.java
Original file line number Diff line number Diff line change
Expand Up @@ -705,4 +705,33 @@ void unmatchedGroups(IPcre2 api) {
assertEquals(javaMatcher.end("question"), pcre4jMatcher.end("question"));
}

@ParameterizedTest
@MethodSource("parameters")
void positiveLookaround(IPcre2 api) {
var regex = "(?<=(?<lWrapper>\\W))?(\\d+)(?=(?<rWrapper>\\W))?";
var input = "(42)";
var javaMatcher = java.util.regex.Pattern.compile(regex).matcher(input);
var pcre4jMatcher = Pattern.compile(api, regex).matcher(input);

assertEquals(javaMatcher.find(), pcre4jMatcher.find());

assertEquals(javaMatcher.group(), pcre4jMatcher.group());
assertEquals(javaMatcher.group(0), pcre4jMatcher.group(0));
assertEquals(javaMatcher.group(1), pcre4jMatcher.group(1));
assertEquals(javaMatcher.group(2), pcre4jMatcher.group(2));
assertEquals(javaMatcher.groupCount(), pcre4jMatcher.groupCount());
assertEquals(javaMatcher.start(), pcre4jMatcher.start());
assertEquals(javaMatcher.start(0), pcre4jMatcher.start(0));
assertEquals(javaMatcher.start(1), pcre4jMatcher.start(1));
assertEquals(javaMatcher.start(2), pcre4jMatcher.start(2));
assertEquals(javaMatcher.start("lWrapper"), pcre4jMatcher.start("lWrapper"));
assertEquals(javaMatcher.start("rWrapper"), pcre4jMatcher.start("rWrapper"));
assertEquals(javaMatcher.end(), pcre4jMatcher.end());
assertEquals(javaMatcher.end(0), pcre4jMatcher.end(0));
assertEquals(javaMatcher.end(1), pcre4jMatcher.end(1));
assertEquals(javaMatcher.end(2), pcre4jMatcher.end(2));
assertEquals(javaMatcher.end("lWrapper"), pcre4jMatcher.end("lWrapper"));
assertEquals(javaMatcher.end("rWrapper"), pcre4jMatcher.end("rWrapper"));
}

}

0 comments on commit db29eda

Please sign in to comment.