Skip to content

Commit 51e152a

Browse files
Gets the correct filesystem when writing an rfile (apache#5296)
* Gets the correct filesystem when writing an rfile Modified the rfile client code to get the filesystem based on the path being written or read. This covers the case of having multiple filesystems defined in hadoop config. --------- Co-authored-by: Daniel Roberts ddanielr <ddanielr@gmail.com>
1 parent f8fb872 commit 51e152a

File tree

4 files changed

+53
-4
lines changed

4 files changed

+53
-4
lines changed

core/src/main/java/org/apache/accumulo/core/client/rfile/FSConfArgs.java

+9-1
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,23 @@
2222

2323
import org.apache.hadoop.conf.Configuration;
2424
import org.apache.hadoop.fs.FileSystem;
25+
import org.apache.hadoop.fs.Path;
2526

2627
class FSConfArgs {
2728

2829
FileSystem fs;
2930
Configuration conf;
3031

32+
FileSystem getFileSystem(Path path) throws IOException {
33+
if (fs == null) {
34+
return path.getFileSystem(getConf());
35+
}
36+
return fs;
37+
}
38+
3139
FileSystem getFileSystem() throws IOException {
3240
if (fs == null) {
33-
fs = FileSystem.get(getConf());
41+
return FileSystem.get(getConf());
3442
}
3543
return fs;
3644
}

core/src/main/java/org/apache/accumulo/core/client/rfile/RFileScannerBuilder.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@ RFileSource[] getSources() throws IOException {
5656
if (sources == null) {
5757
sources = new RFileSource[paths.length];
5858
for (int i = 0; i < paths.length; i++) {
59-
sources[i] = new RFileSource(getFileSystem().open(paths[i]),
60-
getFileSystem().getFileStatus(paths[i]).getLen());
59+
sources[i] = new RFileSource(getFileSystem(paths[i]).open(paths[i]),
60+
getFileSystem(paths[i]).getFileStatus(paths[i]).getLen());
6161
}
6262
} else {
6363
for (int i = 0; i < sources.length; i++) {

core/src/main/java/org/apache/accumulo/core/client/rfile/RFileWriterBuilder.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ public RFileWriter build() throws IOException {
119119
visCacheSize);
120120
} else {
121121
return new RFileWriter(fileops.newWriterBuilder()
122-
.forFile(out.path.toString(), out.getFileSystem(), out.getConf(), cs)
122+
.forFile(out.path.toString(), out.getFileSystem(out.path), out.getConf(), cs)
123123
.withTableConfiguration(acuconf).withStartDisabled().build(), visCacheSize);
124124
}
125125
}

core/src/test/java/org/apache/accumulo/core/client/rfile/RFileClientTest.java

+41
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727

2828
import java.io.File;
2929
import java.io.IOException;
30+
import java.net.ConnectException;
3031
import java.security.SecureRandom;
3132
import java.util.AbstractMap;
3233
import java.util.ArrayList;
@@ -67,6 +68,8 @@
6768
import org.apache.hadoop.conf.Configuration;
6869
import org.apache.hadoop.fs.FileSystem;
6970
import org.apache.hadoop.fs.LocalFileSystem;
71+
import org.apache.hadoop.fs.Path;
72+
import org.apache.hadoop.hdfs.DistributedFileSystem;
7073
import org.apache.hadoop.io.Text;
7174
import org.junit.jupiter.api.Test;
7275

@@ -840,4 +843,42 @@ public void testMultipleFilesAndCache() throws Exception {
840843
assertEquals(testData, toMap(scanner));
841844
scanner.close();
842845
}
846+
847+
@Test
848+
public void testFileSystemFromUri() throws Exception {
849+
String localFsClass = "LocalFileSystem";
850+
851+
String remoteFsHost = "127.0.0.5:8080";
852+
String fileUri = "hdfs://" + remoteFsHost + "/bulk-xyx/file1.rf";
853+
// There was a bug in the code where the default hadoop file system was always used. This test
854+
// checks that the hadoop filesystem used it based on the URI and not the default filesystem. In
855+
// this env the default file system is the local hadoop file system.
856+
var exception =
857+
assertThrows(ConnectException.class, () -> RFile.newWriter().to(fileUri).build());
858+
assertTrue(exception.getMessage().contains("to " + remoteFsHost
859+
+ " failed on connection exception: java.net.ConnectException: Connection refused"));
860+
// Ensure the DistributedFileSystem was used.
861+
assertTrue(Arrays.stream(exception.getStackTrace())
862+
.anyMatch(ste -> ste.getClassName().contains(DistributedFileSystem.class.getName())));
863+
assertTrue(Arrays.stream(exception.getStackTrace())
864+
.noneMatch(ste -> ste.getClassName().contains(localFsClass)));
865+
866+
var exception2 = assertThrows(RuntimeException.class, () -> {
867+
var scanner = RFile.newScanner().from(fileUri).build();
868+
scanner.iterator();
869+
});
870+
assertTrue(exception2.getMessage().contains("to " + remoteFsHost
871+
+ " failed on connection exception: java.net.ConnectException: Connection refused"));
872+
assertTrue(Arrays.stream(exception2.getCause().getStackTrace())
873+
.anyMatch(ste -> ste.getClassName().contains(DistributedFileSystem.class.getName())));
874+
assertTrue(Arrays.stream(exception2.getCause().getStackTrace())
875+
.noneMatch(ste -> ste.getClassName().contains(localFsClass)));
876+
877+
// verify the assumptions this test is making about the local filesystem being the default.
878+
var exception3 = assertThrows(IllegalArgumentException.class,
879+
() -> FileSystem.get(new Configuration()).open(new Path(fileUri)));
880+
assertTrue(exception3.getMessage().contains("Wrong FS: " + fileUri + ", expected: file:///"));
881+
assertTrue(Arrays.stream(exception3.getStackTrace())
882+
.anyMatch(ste -> ste.getClassName().contains(localFsClass)));
883+
}
843884
}

0 commit comments

Comments
 (0)