Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Re-do S3.Build node in Snapi to take "bucket" and "entry" as mandatory parameters, not a URL #477

Merged
merged 12 commits into from
Aug 26, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,63 @@ class S3PackageTest extends SnapiTestContext {

import com.rawlabs.snapi.compiler.tests.TestCredentials._

// reading a public s3 bucket without registering or passing credentials
s3Bucket(UnitTestPrivateBucket, UnitTestPrivateBucketCred)
// Reading a public bucket without credentials
test(s"""let
| data = Csv.InferAndRead(
| S3.Build("s3://rawlabs-public-test-data/students.csv")
| S3.Build("$UnitTestPublicBucket", "/students.csv")
| )
|in
| Collection.Count(data)
|""".stripMargin)(it => it should evaluateTo("7"))

// reading a public s3 bucket without registering or passing credentials
// Reading the same file without putting leading slash
test(s"""let
| data = Csv.InferAndRead(S3.Build("s3://$UnitTestPublicBucket/students.csv"))
| data = Csv.InferAndRead(
| S3.Build("$UnitTestPublicBucket", "students.csv")
| )
|in
| Collection.Count(data)
|""".stripMargin)(it => it should evaluateTo("7"))

// Testing the automatic casting from url to S3Location
test(s"""let
| data = Csv.InferAndRead("s3://$UnitTestPublicBucket/students.csv")
|in
| Collection.Count(data)
|""".stripMargin)(it => it should evaluateTo("7"))

// Reading a private bucket with credentials in the code
test(s"""let
| data = Csv.InferAndRead(
| S3.Build(
| "$UnitTestPrivateBucket",
| "/students.csv",
| region = "${UnitTestPrivateBucketCred.getRegion}",
| accessKey = "${UnitTestPrivateBucketCred.getAccessSecretKey.getAccessKey}",
| secretKey = "${UnitTestPrivateBucketCred.getAccessSecretKey.getSecretKey}"
| )
| )
|in
| Collection.Count(data)
|""".stripMargin)(it => it should evaluateTo("7"))

// Reading a private bucket using a registered credential
test(s"""let
| data = Csv.InferAndRead(
| S3.Build(
| "$UnitTestPrivateBucket",
| "/students.csv"
| )
| )
|in
| Collection.Count(data)
|""".stripMargin)(it => it should evaluateTo("7"))

// Using the automatic casting from url to S3Location using credentials
test(s"""let
| data = Csv.InferAndRead("s3://$UnitTestPrivateBucket/students.csv")
|in
| Collection.Count(data)
|""".stripMargin)(it => it should evaluateTo("7"))
}
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ class LocationPackageTest extends SnapiTestContext {
test(s"""let
| data = Csv.InferAndRead(
| S3.Build(
| "s3://$UnitTestPrivateBucket/students.csv",
| "$UnitTestPrivateBucket",
| "students.csv",
| region = "${UnitTestPrivateBucketCred.getRegion}",
| accessKey = "${UnitTestPrivateBucketCred.getAccessSecretKey.getAccessKey}",
| secretKey = "${UnitTestPrivateBucketCred.getAccessSecretKey.getSecretKey}"
Expand All @@ -95,7 +96,7 @@ class LocationPackageTest extends SnapiTestContext {
|""".stripMargin)(it => it should evaluateTo("7"))

// using a private bucket registered in the credentials server
test(s"""String.Read(S3.Build("s3://$UnitTestPrivateBucket2/file1.csv"))
test(s"""String.Read(S3.Build("$UnitTestPrivateBucket2", "/file1.csv"))
|""".stripMargin)(it => it should evaluateTo(""" "foobar" """))

}
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ class S3PackageTest extends SnapiTestContext {
test(s"""let
| data = Csv.InferAndRead(
| S3.Build(
| "s3://rawlabs-private-test-data/students.csv",
| "$UnitTestPrivateBucket",
| "/students.csv",
| region = "${UnitTestPrivateBucketCred.getRegion}",
| accessKey = "${UnitTestPrivateBucketCred.getAccessSecretKey.getAccessKey}",
| secretKey = "${UnitTestPrivateBucketCred.getAccessSecretKey.getSecretKey}"
Expand All @@ -36,14 +37,15 @@ class S3PackageTest extends SnapiTestContext {
|""".stripMargin)(it => it should evaluateTo("7"))

// using a private bucket registered in the credentials server
test(s"""String.Read(S3.Build("s3://$UnitTestPrivateBucket2/file1.csv"))
test(s"""String.Read(S3.Build("$UnitTestPrivateBucket2", "/file1.csv"))
|""".stripMargin)(it => it should evaluateTo(""" "foobar" """))

// listing a s3 bucket from us-east-1 (non default region)
test(s"""let
| data = Location.Ls(
| S3.Build(
| "s3://$unitTestPrivateBucketUsEast1/csvs/01",
| "$unitTestPrivateBucketUsEast1",
| "/csvs/01",
| region = "${unitTestPrivateBucketUsEast1Cred.getRegion}",
| accessKey = "${unitTestPrivateBucketUsEast1Cred.getAccessSecretKey.getAccessKey}",
| secretKey = "${unitTestPrivateBucketUsEast1Cred.getAccessSecretKey.getSecretKey}"
Expand All @@ -60,7 +62,8 @@ class S3PackageTest extends SnapiTestContext {
test(s"""let
| data = Location.Ls(
| S3.Build(
| "s3://$unitTestPrivateBucketUsEast1/csvs/01",
| "$unitTestPrivateBucketUsEast1",
| "/csvs/01",
| accessKey = "${unitTestPrivateBucketUsEast1Cred.getAccessSecretKey.getAccessKey}",
| secretKey = "${unitTestPrivateBucketUsEast1Cred.getAccessSecretKey.getSecretKey}"
| )
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,12 @@ class RD5412Test extends SnapiTestContext {
| awsAccountB = {region: "eu-west-1", accessKey: Environment.Secret(
| "AWS_ACCESS_KEY_ACCOUNT_B"), secret: Environment.Secret(
| "AWS_SECRET_ACCOUNT_B")},
| read_logs(path: string,aws_config: record(
| read_logs(bucket: string, path: string,aws_config: record(
| region: string,
| accessKey: string,
| secret: string)) = let
| bucket = S3.Build(
| bucket,
| path,
| region = aws_config.region,
| accessKey = aws_config.accessKey,
Expand All @@ -39,8 +40,8 @@ class RD5412Test extends SnapiTestContext {
| List.Explode(content, (c) -> c.entries)
|in
| List.Union(
| read_logs("s3://bucketA/*.json", awsAccountA),
| read_logs("s3://bucketB/*.json", awsAccountB))""".stripMargin)(
| read_logs("bucketA", "/*.json", awsAccountA),
| read_logs("bucketB", "/*.json", awsAccountB))""".stripMargin)(
_ should runErrorAs("unknown secret")
)
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,22 +40,23 @@ class S3BuildEntry extends EntryExtension {
override def docs: EntryDoc = EntryDoc(
"Builds a S3 location from an url.",
params = List(
ParamDoc("url", TypeDoc(List("string")), "The url to the s3 location."),
ParamDoc("bucket", TypeDoc(List("string")), "The name of the bucket."),
ParamDoc("path", TypeDoc(List("string")), "The path to the file in the bucket."),
ParamDoc("region", TypeDoc(List("string")), "The region of the bucket, e.g. 'eu-west-1'.", isOptional = true),
ParamDoc("accessKey", TypeDoc(List("string")), "The AWS access key.", isOptional = true),
ParamDoc("secretKey", TypeDoc(List("string")), "The AWS secret key.", isOptional = true)
),
info = Some(
"If the S3 bucket is not registered in the credentials storage, then the region, accessKey and secretKey must be provided as arguments."
),
examples = List(ExampleDoc("""S3.Build("S3://my-bucket/folder/sub-folder/file")""")),
examples = List(ExampleDoc("""S3.Build("my-bucket", "/folder/sub-folder/file")""")),
ret = Some(ReturnDoc("The S3 location.", retType = Some(TypeDoc(List("location")))))
)

override def nrMandatoryParams: Int = 1
override def nrMandatoryParams: Int = 2

override def getMandatoryParam(prevMandatoryArgs: Seq[Arg], idx: Int): Either[String, Param] = {
assert(idx == 0)
assert(idx == 0 || idx == 1)
Right(ExpParam(SnapiStringType()))
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
import com.rawlabs.protocol.compiler.S3Config;
import com.rawlabs.snapi.truffle.SnapiContext;
import com.rawlabs.snapi.truffle.ast.ExpressionNode;
import com.rawlabs.snapi.truffle.runtime.exceptions.TruffleRuntimeException;
import com.rawlabs.snapi.truffle.runtime.primitives.*;
import com.rawlabs.utils.sources.filesystem.s3.S3Path;
import scala.None$;
Expand All @@ -28,42 +27,29 @@
@NodeInfo(shortName = "Location.FromS3")
public class LocationFromS3Node extends ExpressionNode {

@Child private ExpressionNode url;
@Child private ExpressionNode bucket;
@Child private ExpressionNode path;
@Child private ExpressionNode accessKey;
@Child private ExpressionNode secretKey;
@Child private ExpressionNode region;

public LocationFromS3Node(
ExpressionNode url,
ExpressionNode bucket,
ExpressionNode path,
ExpressionNode accessKey,
ExpressionNode secretKey,
ExpressionNode region) {
this.url = url;
this.bucket = bucket;
this.path = path;
this.accessKey = accessKey;
this.secretKey = secretKey;
this.region = region;
}

@Override
public Object executeGeneric(VirtualFrame frame) {
String url = (String) this.url.executeGeneric(frame);

// Parse S3 URL to obtain S3 bucket and path
if (!url.startsWith("s3://")) {
throw new TruffleRuntimeException("invalid S3 URL format: " + url);
}

// Remove the "s3://" prefix
String urlWithoutPrefix = url.substring(5);

// Split the remaining part into bucket and path
int slashIndex = urlWithoutPrefix.indexOf('/');
if (slashIndex == -1) {
throw new IllegalArgumentException("invalid S3 URL format: " + url);
}

String bucket = urlWithoutPrefix.substring(0, slashIndex);
String path = urlWithoutPrefix.substring(slashIndex + 1);
String bucket = (String) this.bucket.executeGeneric(frame);
String path = (String) this.path.executeGeneric(frame);

// The docs say:
// "If the S3 bucket is not registered in the credentials storage, then the region, accessKey
Expand Down Expand Up @@ -104,6 +90,12 @@ public Object executeGeneric(VirtualFrame frame) {
bucket, maybeRegion, maybeAccessKey, maybeSecretKey, path, context.getSettings());
}

return new LocationObject(location, url);
StringBuilder url = new StringBuilder();
url.append("s3://");
url.append(bucket);
if (!path.startsWith("/")) url.append("/");
url.append(path);

return new LocationObject(location, url.toString());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@
public class TruffleS3BuildEntry extends S3BuildEntry implements TruffleEntryExtension, WithArgs {

public ExpressionNode toTruffle(Type type, List<TruffleArg> args, SnapiLanguage rawLanguage) {
ExpressionNode url = args.get(0).exprNode();

ExpressionNode bucket = args.get(0).exprNode();
ExpressionNode path = args.get(1).exprNode();
ExpressionNode accessKey = arg(args, "accessKey").orElse(null);
ExpressionNode secretKey = arg(args, "secretKey").orElse(null);
ExpressionNode region = arg(args, "region").orElse(null);

return new LocationFromS3Node(url, accessKey, secretKey, region);
return new LocationFromS3Node(bucket, path, accessKey, secretKey, region);
}
}