From 6eda89a4f0ad6b93d32acec3ddc49f436d450493 Mon Sep 17 00:00:00 2001 From: Miguel Branco Date: Wed, 11 Sep 2024 11:05:23 +0200 Subject: [PATCH] Removing utils sources. --- build.sbt | 27 +- project/Dependencies.scala | 2 + utils-sources/src/main/java/module-info.java | 76 ---- .../src/main/resources/reference.conf | 33 -- .../rawlabs/utils/sources/api/Encodings.scala | 67 --- .../rawlabs/utils/sources/api/Location.scala | 19 - .../utils/sources/api/LocationException.scala | 17 - .../bytestream/api/ByteStreamException.scala | 17 - .../bytestream/api/ByteStreamLocation.scala | 83 ---- .../api/DelegatingSeekableInputStream.scala | 150 ------- .../api/GenericSkippableInputStream.scala | 86 ---- .../bytestream/api/InputStreamClient.scala | 23 - .../bytestream/api/SeekableInputStream.scala | 41 -- .../bytestream/github/GitHubLocation.scala | 76 ---- .../http/HttpByteStreamClient.scala | 201 --------- .../http/HttpByteStreamException.scala | 17 - .../http/HttpByteStreamLocation.scala | 80 ---- .../inmemory/InMemoryByteStreamLocation.scala | 46 -- .../filesystem/api/BaseFileSystem.scala | 32 -- .../sources/filesystem/api/FileSystem.scala | 50 --- .../filesystem/api/FileSystemExceptions.scala | 45 -- .../filesystem/api/FileSystemLocation.scala | 38 -- .../filesystem/api/FileSystemMetadata.scala | 24 - .../filesystem/dropbox/BaseDropboxPath.scala | 50 --- .../dropbox/DropboxAccessTokenPath.scala | 45 -- .../dropbox/DropboxFileSystem.scala | 169 ------- .../dropbox/DropboxUsernamePasswordPath.scala | 52 --- .../filesystem/local/LocalFileSystem.scala | 162 ------- .../sources/filesystem/local/LocalPath.scala | 54 --- .../sources/filesystem/mock/MockPath.scala | 71 --- .../sources/filesystem/s3/S3FileSystem.scala | 422 ------------------ .../sources/filesystem/s3/S3InputStream.scala | 56 --- .../utils/sources/filesystem/s3/S3Path.scala | 75 ---- .../utils/sources/jdbc/api/JdbcClient.scala | 242 ---------- .../jdbc/api/JdbcLocationExceptions.scala | 28 -- .../sources/jdbc/api/JdbcSchemaLocation.scala | 33 -- .../sources/jdbc/api/JdbcServerLocation.scala | 32 -- .../sources/jdbc/api/JdbcTableLocation.scala | 31 -- .../sources/jdbc/mysql/MySqlClient.scala | 85 ---- .../jdbc/mysql/MySqlSchemaLocation.scala | 50 --- .../jdbc/mysql/MySqlServerLocation.scala | 33 -- .../jdbc/mysql/MySqlTableLocation.scala | 39 -- .../sources/jdbc/oracle/OracleClient.scala | 175 -------- .../jdbc/oracle/OracleSchemaLocation.scala | 57 --- .../jdbc/oracle/OracleServerLocation.scala | 43 -- .../jdbc/oracle/OracleTableLocation.scala | 50 --- .../sources/jdbc/pgsql/PostgresqlClient.scala | 78 ---- .../jdbc/pgsql/PostgresqlSchemaLocation.scala | 57 --- .../jdbc/pgsql/PostgresqlServerLocation.scala | 44 -- .../jdbc/pgsql/PostgresqlTableLocation.scala | 50 --- .../jdbc/snowflake/SnowflakeClient.scala | 113 ----- .../snowflake/SnowflakeSchemaLocation.scala | 68 --- .../snowflake/SnowflakeServerLocation.scala | 61 --- .../snowflake/SnowflakeTableLocation.scala | 80 ---- .../sources/jdbc/sqlite/SqliteClient.scala | 86 ---- .../jdbc/sqlite/SqliteSchemaLocation.scala | 42 -- .../jdbc/sqlite/SqliteServerLocation.scala | 27 -- .../jdbc/sqlite/SqliteTableLocation.scala | 29 -- .../jdbc/sqlserver/SqlServerClient.scala | 82 ---- .../sqlserver/SqlServerSchemaLocation.scala | 57 --- .../sqlserver/SqlServerServerLocation.scala | 44 -- .../sqlserver/SqlServerTableLocation.scala | 50 --- .../jdbc/teradata/TeradataClient.scala | 117 ----- .../teradata/TeradataSchemaLocation.scala | 65 --- .../teradata/TeradataServerLocation.scala | 46 -- .../jdbc/teradata/TeradataTableLocation.scala | 53 --- .../api/TestGenericSkippableInputStream.scala | 52 --- .../http/HttpLocationsTestContext.scala | 23 - .../bytestream/http/TestHttpServer.scala | 56 --- .../filesystem/api/TestFileSystems.scala | 303 ------------- .../dropbox/TestDropboxFileSystem.scala | 43 -- .../dropbox/TestRootDropboxFileSystem.scala | 34 -- .../local/TestLocalFileSystem.scala | 33 -- .../filesystem/s3/TestLargeDirectory.scala | 50 --- .../filesystem/s3/TestRootS3FileSystem.scala | 69 --- .../filesystem/s3/TestS3FileSystem.scala | 115 ----- 76 files changed, 3 insertions(+), 5328 deletions(-) delete mode 100644 utils-sources/src/main/java/module-info.java delete mode 100644 utils-sources/src/main/resources/reference.conf delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/api/Encodings.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/api/Location.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/api/LocationException.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/api/ByteStreamException.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/api/ByteStreamLocation.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/api/DelegatingSeekableInputStream.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/api/GenericSkippableInputStream.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/api/InputStreamClient.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/api/SeekableInputStream.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/github/GitHubLocation.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/http/HttpByteStreamClient.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/http/HttpByteStreamException.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/http/HttpByteStreamLocation.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/inmemory/InMemoryByteStreamLocation.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/api/BaseFileSystem.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/api/FileSystem.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/api/FileSystemExceptions.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/api/FileSystemLocation.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/api/FileSystemMetadata.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/dropbox/BaseDropboxPath.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/dropbox/DropboxAccessTokenPath.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/dropbox/DropboxFileSystem.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/dropbox/DropboxUsernamePasswordPath.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/local/LocalFileSystem.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/local/LocalPath.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/mock/MockPath.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/s3/S3FileSystem.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/s3/S3InputStream.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/s3/S3Path.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/api/JdbcClient.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/api/JdbcLocationExceptions.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/api/JdbcSchemaLocation.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/api/JdbcServerLocation.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/api/JdbcTableLocation.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/mysql/MySqlClient.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/mysql/MySqlSchemaLocation.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/mysql/MySqlServerLocation.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/mysql/MySqlTableLocation.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/oracle/OracleClient.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/oracle/OracleSchemaLocation.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/oracle/OracleServerLocation.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/oracle/OracleTableLocation.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/pgsql/PostgresqlClient.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/pgsql/PostgresqlSchemaLocation.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/pgsql/PostgresqlServerLocation.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/pgsql/PostgresqlTableLocation.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/snowflake/SnowflakeClient.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/snowflake/SnowflakeSchemaLocation.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/snowflake/SnowflakeServerLocation.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/snowflake/SnowflakeTableLocation.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/sqlite/SqliteClient.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/sqlite/SqliteSchemaLocation.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/sqlite/SqliteServerLocation.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/sqlite/SqliteTableLocation.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/sqlserver/SqlServerClient.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/sqlserver/SqlServerSchemaLocation.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/sqlserver/SqlServerServerLocation.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/sqlserver/SqlServerTableLocation.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/teradata/TeradataClient.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/teradata/TeradataSchemaLocation.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/teradata/TeradataServerLocation.scala delete mode 100644 utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/teradata/TeradataTableLocation.scala delete mode 100644 utils-sources/src/test/scala/com/rawlabs/utils/sources/bytestream/api/TestGenericSkippableInputStream.scala delete mode 100644 utils-sources/src/test/scala/com/rawlabs/utils/sources/bytestream/http/HttpLocationsTestContext.scala delete mode 100644 utils-sources/src/test/scala/com/rawlabs/utils/sources/bytestream/http/TestHttpServer.scala delete mode 100644 utils-sources/src/test/scala/com/rawlabs/utils/sources/filesystem/api/TestFileSystems.scala delete mode 100644 utils-sources/src/test/scala/com/rawlabs/utils/sources/filesystem/dropbox/TestDropboxFileSystem.scala delete mode 100644 utils-sources/src/test/scala/com/rawlabs/utils/sources/filesystem/dropbox/TestRootDropboxFileSystem.scala delete mode 100644 utils-sources/src/test/scala/com/rawlabs/utils/sources/filesystem/local/TestLocalFileSystem.scala delete mode 100644 utils-sources/src/test/scala/com/rawlabs/utils/sources/filesystem/s3/TestLargeDirectory.scala delete mode 100644 utils-sources/src/test/scala/com/rawlabs/utils/sources/filesystem/s3/TestRootS3FileSystem.scala delete mode 100644 utils-sources/src/test/scala/com/rawlabs/utils/sources/filesystem/s3/TestS3FileSystem.scala diff --git a/build.sbt b/build.sbt index d46e7a27f..685f9489b 100644 --- a/build.sbt +++ b/build.sbt @@ -39,7 +39,6 @@ lazy val root = (project in file(".")) .aggregate( protocolRaw, protocolCompiler, - utilsSources, compiler, snapiParser, snapiFrontend, @@ -58,30 +57,6 @@ lazy val root = (project in file(".")) publishLocal / skip := true ) -lazy val utilsSources = (project in file("utils-sources")) - .doPatchDependencies() - .settings( - commonSettings, - nonStrictScalaCompileSettings, - testSettings, - libraryDependencies ++= Seq( - utilsCore % "compile->compile;test->test", - apacheHttpClient, - jwtApi, - jwtImpl, - jwtCore, - springCore, - dropboxSDK, - aws, - postgresqlDeps, - mysqlDeps, - mssqlDeps, - snowflakeDeps, - oracleDeps, - teradataDeps - ) - ) - lazy val protocolRaw = (project in file("protocol-raw")) .doPatchDependencies() .enablePlugins(ProtobufPlugin) @@ -158,7 +133,6 @@ lazy val snapiFrontend = (project in file("snapi-frontend")) .doPatchDependencies() .dependsOn( compiler % "compile->compile;test->test", - utilsSources % "compile->compile;test->test", snapiParser % "compile->compile;test->test" ) .settings( @@ -167,6 +141,7 @@ lazy val snapiFrontend = (project in file("snapi-frontend")) testSettings, libraryDependencies ++= Seq( utilsCore % "compile->compile;test->test", + utilsSources % "compile->compile;test->test", commonsLang, commonsText, icuDeps, diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 6ba9724e0..6b7679ac1 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -16,6 +16,8 @@ object Dependencies { val utilsCore = "com.raw-labs" %% "utils-core" % "0.50.0" + val utilsSources = "com.raw-labs" %% "utils-sources" % "0.50.0" + val scalaLogging = "com.typesafe.scala-logging" %% "scala-logging" % "3.9.5" val logbackClassic = "ch.qos.logback" % "logback-classic" % "1.4.12" diff --git a/utils-sources/src/main/java/module-info.java b/utils-sources/src/main/java/module-info.java deleted file mode 100644 index c0abec6a2..000000000 --- a/utils-sources/src/main/java/module-info.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -module raw.utils.sources { - requires scala.library; - requires org.slf4j; - requires java.xml; - requires java.sql; - requires org.apache.commons.io; - requires org.apache.commons.lang3; - requires org.apache.commons.text; - requires org.apache.httpcomponents.core5.httpcore5; - requires org.apache.httpcomponents.client5.httpclient5; - requires java.net.http; - requires spring.core; - requires typesafe.config; - requires scala.logging; - requires ch.qos.logback.classic; - requires com.google.common; - requires jul.to.slf4j; - requires software.amazon.awssdk.annotations; - requires software.amazon.awssdk.http.apache; - requires software.amazon.awssdk.arns; - requires software.amazon.awssdk.auth; - requires software.amazon.awssdk.awscore; - requires software.amazon.awssdk.protocols.query; - requires software.amazon.awssdk.protocols.xml; - requires software.amazon.awssdk.crtcore; - requires software.amazon.awssdk.endpoints; - requires software.amazon.awssdk.http; - requires software.amazon.awssdk.protocols.jsoncore; - requires software.amazon.awssdk.metrics; - requires software.amazon.awssdk.http.nio.netty; - requires software.amazon.awssdk.profiles; - requires software.amazon.awssdk.protocols.core; - requires software.amazon.awssdk.regions; - requires software.amazon.awssdk.services.s3; - requires software.amazon.awssdk.core; - requires software.amazon.awssdk.thirdparty.jackson.core; - requires software.amazon.awssdk.utils; - requires org.postgresql.jdbc; - requires com.microsoft.sqlserver.jdbc; - requires mysql.connector.j; - requires ojdbc10; - requires snowflake.jdbc; - requires dropbox.core.sdk; - requires raw.utils.core; - - exports com.rawlabs.utils.sources.api; - exports com.rawlabs.utils.sources.bytestream.api; - exports com.rawlabs.utils.sources.bytestream.github; - exports com.rawlabs.utils.sources.bytestream.http; - exports com.rawlabs.utils.sources.bytestream.inmemory; - exports com.rawlabs.utils.sources.filesystem.api; - exports com.rawlabs.utils.sources.filesystem.dropbox; - exports com.rawlabs.utils.sources.filesystem.local; - exports com.rawlabs.utils.sources.filesystem.mock; - exports com.rawlabs.utils.sources.filesystem.s3; - exports com.rawlabs.utils.sources.jdbc.api; - exports com.rawlabs.utils.sources.jdbc.mysql; - exports com.rawlabs.utils.sources.jdbc.pgsql; - exports com.rawlabs.utils.sources.jdbc.snowflake; - exports com.rawlabs.utils.sources.jdbc.sqlite; - exports com.rawlabs.utils.sources.jdbc.sqlserver; - exports com.rawlabs.utils.sources.jdbc.oracle; - exports com.rawlabs.utils.sources.jdbc.teradata; -} diff --git a/utils-sources/src/main/resources/reference.conf b/utils-sources/src/main/resources/reference.conf deleted file mode 100644 index d766d1aee..000000000 --- a/utils-sources/src/main/resources/reference.conf +++ /dev/null @@ -1,33 +0,0 @@ -raw.utils.sources { - rdbms { - connect-timeout = 30 s - read-timeout = 300 s - network-timeout = 300 s - login-timeout = 30 s - } - bytestream { - http { - connect-timeout = 20 s - read-timeout = 120 s - } - } - dropbox { - clientId = "" - } - s3 { - connect-timeout = 60 s - read-timeout = 120 s - max-connections = 50 - - # Hadoop s3a filesystem will make a nested loop of retries with the 2 next settings. - # so if max-retries = 7 and max-attempts = 3, it will make 7*3 = 21 retries - # see fs.s3a.attempts.maximum and fs.s3a.retry.limit - max-retries = 10 - max-attempts = 0 - # Initial delay between s3a retries, see fs.s3a.retry.interval - retry-interval = 100 ms - tmp-dir = ${java.io.tmpdir}/s3 - - default-region = eu-west-1 - } -} \ No newline at end of file diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/api/Encodings.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/api/Encodings.scala deleted file mode 100644 index 3098bd042..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/api/Encodings.scala +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.api - -import java.nio.charset.Charset - -/** - * Encodings - */ -sealed trait Encoding { - def rawEncoding: String - def charset: Charset = Charset.forName(rawEncoding) -} - -final case class UTF_8() extends Encoding { - def rawEncoding = "utf-8" -} - -final case class UTF_16() extends Encoding { - def rawEncoding = "utf-16" -} - -final case class UTF_16BE() extends Encoding { - def rawEncoding = "utf-16be" -} - -final case class UTF_16LE() extends Encoding { - def rawEncoding = "utf-16le" -} - -final case class ISO_8859_1() extends Encoding { - def rawEncoding = "iso-8859-1" -} - -final case class ISO_8859_2() extends Encoding { - def rawEncoding = "iso-8859-2" -} - -final case class ISO_8859_9() extends Encoding { - def rawEncoding = "iso-8859-9" -} - -final case class WINDOWS_1252() extends Encoding { - def rawEncoding = "windows-1252" -} - -object Encoding { - - private val VALID_ENCODINGS = - Seq(UTF_8(), UTF_16(), UTF_16BE(), UTF_16LE(), ISO_8859_1(), ISO_8859_2(), ISO_8859_9(), WINDOWS_1252()) - - def fromEncodingString(enc: String): Either[String, Encoding] = { - VALID_ENCODINGS.foreach(encoding => if (encoding.rawEncoding.equalsIgnoreCase(enc)) return Right(encoding)) - Left(s"invalid encoding: '$enc'") - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/api/Location.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/api/Location.scala deleted file mode 100644 index 3dd843ca4..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/api/Location.scala +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.api - -trait Location { - - def testAccess(): Unit - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/api/LocationException.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/api/LocationException.scala deleted file mode 100644 index 66445725f..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/api/LocationException.scala +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.api - -import com.rawlabs.utils.core.RawException - -class LocationException(message: String, cause: Throwable = null) extends RawException(message, cause) diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/api/ByteStreamException.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/api/ByteStreamException.scala deleted file mode 100644 index 070347cd2..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/api/ByteStreamException.scala +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.bytestream.api - -import com.rawlabs.utils.sources.api.LocationException - -class ByteStreamException(message: String, cause: Throwable = null) extends LocationException(message, cause) diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/api/ByteStreamLocation.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/api/ByteStreamLocation.scala deleted file mode 100644 index ea783554b..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/api/ByteStreamLocation.scala +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.bytestream.api - -import java.io.{IOException, InputStream, InputStreamReader, Reader} -import java.nio.charset.{Charset, StandardCharsets} -import java.nio.file.Path -import org.apache.commons.io.ByteOrderMark -import org.apache.commons.io.input.BOMInputStream -import com.rawlabs.utils.sources.api._ - -import scala.util.control.NonFatal - -trait ByteStreamLocation extends Location { - - // This call uses the retry mechanism. - final def getInputStream: InputStream = { - doGetInputStream() - } - - // This call uses the retry mechanism. - final def getSeekableInputStream: SeekableInputStream = { - doGetSeekableInputStream() - } - - protected def doGetInputStream(): InputStream - - protected def doGetSeekableInputStream(): SeekableInputStream - - final def getReader(encoding: Encoding): Reader = { - val charset = encoding.charset - val is = getInputStream - try { - val strippedIs = stripProblematicBOMs(is, charset) - new InputStreamReader(strippedIs, charset) - } catch { - case ex: InterruptedException => - doClose(is) - throw ex - case NonFatal(t) => - doClose(is) - throw t - } - } - - private def doClose(is: InputStream): Unit = { - // If something went wrong, still close the input stream - try { - is.close() // Willing to let this exception go through instead - } catch { - case _: IOException => - // Silently suppressing this one. - // (Sadly can't log because of logger issues as this code is also used in runtime...) - } - } - - private def stripProblematicBOMs(is: InputStream, charset: Charset): InputStream = { - charset match { - case StandardCharsets.UTF_8 => new BOMInputStream(is, false, ByteOrderMark.UTF_8) - case StandardCharsets.UTF_16LE => new BOMInputStream(is, false, ByteOrderMark.UTF_16LE) - case StandardCharsets.UTF_16BE => new BOMInputStream(is, false, ByteOrderMark.UTF_16BE) - case _ => is - } - } - - /** - * Caches the content of the location in a local file and returns the path to it. - * - * @return the path to the local file - */ - def getLocalPath(): Path - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/api/DelegatingSeekableInputStream.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/api/DelegatingSeekableInputStream.scala deleted file mode 100644 index 9d59139c5..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/api/DelegatingSeekableInputStream.scala +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.bytestream.api - -import java.io.EOFException -import java.io.IOException -import java.io.InputStream -import java.nio.ByteBuffer - -/** - * Implements read methods required by [[SeekableInputStream]] for generic input streams. - * - * Implementations must implement [[com.rawlabs.utils.sources.bytestream.DelegatingSeekableInputStream.getPos()]] - * and [[com.rawlabs.utils.sources.bytestream.DelegatingSeekableInputStream.seek(long)]] and may optionally - * implement other read methods to improve performance. - */ -private object DelegatingSeekableInputStream { - // (ns) Copied from parquet-mr library - @throws[IOException] - private def readFully(f: InputStream, bytes: Array[Byte], start: Int, len: Int): Unit = { - var offset = start - var remaining = len - while (remaining > 0) { - val bytesRead = f.read(bytes, offset, remaining) - if (bytesRead < 0) throw new EOFException("Reached the end of stream with " + remaining + " bytes left to read") - remaining -= bytesRead - offset += bytesRead - } - } - - @throws[IOException] - private def readHeapBuffer(f: InputStream, buf: ByteBuffer): Int = { - val bytesRead = f.read(buf.array, buf.arrayOffset + buf.position(), buf.remaining) - if (bytesRead < 0) { // if this resulted in EOF, don't update position - bytesRead - } else { - buf.position(buf.position() + bytesRead) - bytesRead - } - } - - @throws[IOException] - private def readFullyHeapBuffer(f: InputStream, buf: ByteBuffer): Unit = { - readFully(f, buf.array, buf.arrayOffset + buf.position(), buf.remaining) - buf.position(buf.limit) - } - - @throws[IOException] - private def readDirectBuffer(f: InputStream, buf: ByteBuffer, temp: Array[Byte]): Int = { // copy all the bytes that return immediately, stopping at the first - // read that doesn't return a full buffer. - var nextReadLength = Math.min(buf.remaining, temp.length) - var totalBytesRead = 0 - var bytesRead = temp.length - while (bytesRead == temp.length) { - bytesRead = f.read(temp, 0, nextReadLength) - if (bytesRead == temp.length) { - buf.put(temp) - totalBytesRead += bytesRead - nextReadLength = Math.min(buf.remaining, temp.length) - } - } - if (bytesRead < 0) { // return -1 if nothing was read - if (totalBytesRead == 0) -1 else totalBytesRead - } else { // copy the last partial buffer - buf.put(temp, 0, bytesRead) - totalBytesRead += bytesRead - totalBytesRead - } - } - - @throws[IOException] - private def readFullyDirectBuffer(f: InputStream, buf: ByteBuffer, temp: Array[Byte]): Unit = { - var nextReadLength = Math.min(buf.remaining, temp.length) - var bytesRead = 0 - while (nextReadLength > 0 && bytesRead >= 0) { - bytesRead = f.read(temp, 0, nextReadLength) - if (bytesRead >= 0) { - buf.put(temp, 0, bytesRead) - nextReadLength = Math.min(buf.remaining, temp.length) - } - } - if (bytesRead < 0 && buf.remaining > 0) { - throw new EOFException("Reached the end of stream with " + buf.remaining + " bytes left to read") - } - } -} - -abstract class DelegatingSeekableInputStream(val stream: InputStream) extends SeekableInputStream { - final private val COPY_BUFFER_SIZE = 8192 - final private val temp = new Array[Byte](COPY_BUFFER_SIZE) - - def getStream: InputStream = stream - - @throws[IOException] - override def close(): Unit = { - stream.close() - } - - @throws[IOException] - override def getPos: Long - - @throws[IOException] - override def seek(newPos: Long): Unit - - @throws[IOException] - override def read(): Int = stream.read - - @throws[IOException] - override def readFully(bytes: Array[Byte]): Unit = { - DelegatingSeekableInputStream.readFully(stream, bytes, 0, bytes.length) - } - - @throws[IOException] - override def readFully(bytes: Array[Byte], start: Int, len: Int): Unit = { - DelegatingSeekableInputStream.readFully(stream, bytes, start, len) - } - - @throws[IOException] - override def read(b: Array[Byte], off: Int, len: Int): Int = { - stream.read(b, off, len) - } - - @throws[IOException] - override def read(buf: ByteBuffer): Int = { - if (buf.hasArray) { - DelegatingSeekableInputStream.readHeapBuffer(stream, buf) - } else { - DelegatingSeekableInputStream.readDirectBuffer(stream, buf, temp) - } - } - - @throws[IOException] - override def readFully(buf: ByteBuffer): Unit = { - if (buf.hasArray) { - DelegatingSeekableInputStream.readFullyHeapBuffer(stream, buf) - } else { - DelegatingSeekableInputStream.readFullyDirectBuffer(stream, buf, temp) - } - } -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/api/GenericSkippableInputStream.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/api/GenericSkippableInputStream.scala deleted file mode 100644 index e646c9e30..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/api/GenericSkippableInputStream.scala +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.bytestream.api - -import java.io.{IOException, InputStream} - -// This is very inefficient for seeking, as it reads the full stream until reaching the destination. -// But a better implementation needs to use specific information about the source, so this is used -// just as a last resort when we don't have anything better than tha generic input stream. -class GenericSkippableInputStream(inputStreamProvider: () => InputStream) extends InputStream { - private var pos: Long = 0 - private var is: InputStream = inputStreamProvider() - private var closed = false - - def getPos: Long = pos - - def seek(newPos: Long): Unit = synchronized { - if (newPos < pos) { - // Poor man's lseek (but maybe not so bad in practice?). - // Close file and reopen it. - if (closed) throw new IOException("Stream is closed") - is.close() - is = inputStreamProvider() - pos = 0 - seek(newPos) - } else { - assert(newPos >= pos) - var continue = true - while (continue && pos < newPos) { - val n = skip(newPos - pos) // pos is updated in skip - continue = n > 0 - } - } - } - - override def read(): Int = synchronized { - val c = is.read() - if (c >= 0) { - pos += 1 - } - c - } - - override def read(b: Array[Byte], off: Int, len: Int): Int = synchronized { - val n = is.read(b, off, len) - if (n > 0) { - pos += n - } - n - } - - override def available(): Int = synchronized { - is.available() - } - - override def markSupported(): Boolean = false - - override def mark(i: Int): Unit = { - throw new IOException("Mark / reset is not supported") - } - - override def reset(): Unit = { - throw new IOException("Mark / reset is not supported") - } - - override def skip(n: Long): Long = synchronized { - val nread = is.skip(n) - pos += nread - nread - } - - override def close(): Unit = synchronized { - closed = true - is.close() - } -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/api/InputStreamClient.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/api/InputStreamClient.scala deleted file mode 100644 index 987bdcd7d..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/api/InputStreamClient.scala +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.bytestream.api - -import java.io.InputStream - -trait InputStreamClient { - - def getInputStream(path: String): InputStream - - def getSeekableInputStream(path: String): SeekableInputStream - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/api/SeekableInputStream.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/api/SeekableInputStream.scala deleted file mode 100644 index c728b2d42..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/api/SeekableInputStream.scala +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.bytestream.api - -import java.io.{IOException, InputStream} -import java.nio.ByteBuffer - -// Same as org.apache.parquet.IO.SeekableInputStream, see the docs on that class -trait SeekableInputStream extends InputStream { - @throws[IOException] - def getPos: Long - - @throws[IOException] - def seek(newPos: Long): Unit - - @throws[IOException] - def readFully(bytes: Array[Byte]): Unit - - @throws[IOException] - def readFully(bytes: Array[Byte], start: Int, len: Int): Unit - - @throws[IOException] - def read(buf: ByteBuffer): Int - - @throws[IOException] - def readFully(buf: ByteBuffer): Unit - - @throws[IOException] - override def close(): Unit = super.close() - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/github/GitHubLocation.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/github/GitHubLocation.scala deleted file mode 100644 index 736ae436c..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/github/GitHubLocation.scala +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.bytestream.github - -import java.io.InputStream -import java.nio.file.Path -import com.typesafe.scalalogging.StrictLogging -import com.rawlabs.utils.sources.bytestream.http.{HttpByteStreamException, HttpByteStreamLocation} -import com.rawlabs.utils.sources.bytestream.api.{ByteStreamException, ByteStreamLocation, SeekableInputStream} -import com.rawlabs.utils.core.RawSettings - -// Supports only public repositories. -class GitHubLocation(val username: String, val repo: String, val file: String, val maybeBranch: Option[String])( - implicit settings: RawSettings -) extends ByteStreamLocation - with StrictLogging { - - // If branch is not defined, try to find the default one. - // Tried listing branches like this: - // curl -H "Accept: application/vnd.github.v3+json" https://api.github.com/repos/torcato/test-repo/branches - // But got the following after some tests: - // {"message":"API rate limit exceeded for 84.226.22.197. (But here's the good news: Authenticated requests get a higher rate limit. Check out the documentation for more details.)","documentation_url":"https://docs.github.com/rest/overview/resources-in-the-rest-api#rate-limiting"} - private val branch = { - maybeBranch.getOrElse { - - def testBranch(branch: String) = { - val githubUrl = s"https://github.com/$username/$repo/tree/$branch" - try { - val httpLocation = new HttpByteStreamLocation(githubUrl) - httpLocation.testAccess() - true - } catch { - case _: HttpByteStreamException => false - } - } - - Seq("main", "master") // Default branch names. - .find(testBranch) - .getOrElse( - throw new ByteStreamException( - s"could not find default branch after trying 'main' and 'master'; is the GitHub repository public?" - ) - ) - } - } - - private val githubUrl = s"https://github.com/$username/$repo/tree/$branch/$file" - - private val httpClient = new HttpByteStreamLocation(githubUrl) - - override protected def doGetInputStream(): InputStream = { - httpClient.getInputStream - } - - override protected def doGetSeekableInputStream(): SeekableInputStream = { - httpClient.getSeekableInputStream - } - - override def getLocalPath(): Path = { - httpClient.getLocalPath() - } - - override def testAccess(): Unit = { - httpClient.testAccess() - } -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/http/HttpByteStreamClient.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/http/HttpByteStreamClient.scala deleted file mode 100644 index 05d69b43a..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/http/HttpByteStreamClient.scala +++ /dev/null @@ -1,201 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.bytestream.http - -import com.typesafe.scalalogging.StrictLogging -import org.apache.hc.core5.net.URIBuilder -import com.rawlabs.utils.sources.api.LocationException -import com.rawlabs.utils.sources.bytestream.api._ -import com.rawlabs.utils.core.RawSettings - -import java.io.{IOException, InputStream} -import java.net._ -import java.net.http.HttpClient.Version -import java.net.http.HttpRequest.{BodyPublisher, BodyPublishers} -import java.net.http.HttpResponse.BodyHandlers -import java.net.http.{HttpClient, HttpRequest, HttpResponse} -import java.nio.charset.StandardCharsets -import java.time.Duration -import java.util.concurrent.TimeUnit -import java.util.stream.Collectors -import scala.collection.mutable - -object HttpByteStreamClient { - - private val HTTP_CONNECT_TIMEOUT = "raw.utils.sources.bytestream.http.connect-timeout" - private val HTTP_READ_TIMEOUT = "raw.utils.sources.bytestream.http.read-timeout" - - private val ERROR_RESPONSE_MAX_OUTPUT_SIZE = 2048 - - private val httpClientLock = new Object - private var httpClient: HttpClient = _ - - def buildHttpClient(settings: RawSettings): HttpClient = { - httpClientLock.synchronized { - if (httpClient == null) { - val connectTimeout = settings.getDuration(HTTP_CONNECT_TIMEOUT, TimeUnit.MILLISECONDS) - this.httpClient = java.net.http.HttpClient.newBuilder - .version(Version.HTTP_1_1) - .connectTimeout(Duration.ofMillis(connectTimeout)) - .followRedirects(HttpClient.Redirect.NORMAL) - .build - } - } - httpClient - } - -} - -class HttpByteStreamClient( - method: String = "GET", - args: Array[(String, String)] = Array.empty, - headers: Array[(String, String)] = Array.empty, - maybeBody: Option[Array[Byte]] = None, - expectedStatus: Array[Int] = Array( - HttpURLConnection.HTTP_OK, - HttpURLConnection.HTTP_ACCEPTED, - HttpURLConnection.HTTP_CREATED, - HttpURLConnection.HTTP_PARTIAL - ) -)(implicit settings: RawSettings) - extends InputStreamClient - with StrictLogging { - - import HttpByteStreamClient._ - - private val httpClient = HttpByteStreamClient.buildHttpClient(settings) - private val httpReadTimeoutMillis = settings.getDuration(HTTP_READ_TIMEOUT, TimeUnit.MILLISECONDS) - - private val requestBuilderTemplate = HttpRequest - .newBuilder() - .timeout(Duration.ofMillis(httpReadTimeoutMillis)) - - protected def readOutputBounded(is: InputStream): String = { - val data = is.readNBytes(ERROR_RESPONSE_MAX_OUTPUT_SIZE) - val result = new String(data, StandardCharsets.UTF_8) - if (data.length == ERROR_RESPONSE_MAX_OUTPUT_SIZE && is.read() != -1) { - result + "..." - } else { - result - } - } - - // This method expects the response status code to be 200. - override def getInputStream(url: String): InputStream = { - val response = openHTTPConnection(url) - val responseCode = response.statusCode() - - if (expectedStatus.contains(responseCode)) { - response.body() - } else { - val is = response.body() - val bodyContents = - try { - readOutputBounded(is) - } finally { - is.close() - } - if (responseCode == HttpURLConnection.HTTP_UNAUTHORIZED) { - throw new HttpByteStreamException(s"authorization error accessing $url: ($responseCode)\n$bodyContents") - } else { - throw new HttpByteStreamException( - s"could not read (HTTP ${method.toUpperCase}) from $url: ($responseCode)\n$bodyContents" - ) - } - } - } - - // This method does not check the response status code. - def getInputStreamWithStatus(url: String): HttpResult = { - val response = openHTTPConnection(url) - val responseCode = response.statusCode() - - // In some cases, like a 204, there is no content, so creating an empty input-stream - val is = - if (response.body() != null) { - response.body() - } else { - InputStream.nullInputStream() - } - val headersBuffer = new mutable.ArrayBuffer[(String, String)]() - response - .headers() - .map() - .forEach((key, values) => headersBuffer += (key -> values.stream().collect(Collectors.joining(",")))) - val headersSeq = headersBuffer.toArray - HttpResult(responseCode, is, headersSeq) - } - - def getSeekableInputStream(url: String): SeekableInputStream = { - val skippableInputStream = new GenericSkippableInputStream(() => getInputStream(url)) - new DelegatingSeekableInputStream(skippableInputStream) { - override def getPos: Long = skippableInputStream.getPos - - override def seek(newPos: Long): Unit = skippableInputStream.seek(newPos) - } - } - - private def createBodyPublisher(): BodyPublisher = { - maybeBody - .map(s => - method.toLowerCase match { - case "post" | "put" | "patch" => return BodyPublishers.ofByteArray(s) - case _ => throw new LocationException("only 'POST', 'PUT' and 'PATCH' HTTP requests can have a body") - } - ) - .getOrElse(BodyPublishers.noBody()) - } - - private def openHTTPConnection(url: String): HttpResponse[InputStream] = { - val requestBuilder = requestBuilderTemplate.copy() - try { - val uriBuilder = new URIBuilder(url) - args.foreach(x => uriBuilder.addParameter(x._1, x._2)) - requestBuilder.uri(uriBuilder.build()) - } catch { - case ex: IllegalArgumentException => throw new LocationException(s"invalid HTTP URL: ${ex.getMessage}", ex) - case ex: MalformedURLException => throw new LocationException(s"invalid HTTP URL: ${ex.getMessage}", ex) - case ex: URISyntaxException => throw new LocationException(s"invalid HTTP URL: ${ex.getMessage}", ex) - } - method.toLowerCase match { - case "get" => requestBuilder.GET() - case "post" => requestBuilder.POST(createBodyPublisher()) - case "put" => requestBuilder.PUT(createBodyPublisher()) - case "delete" => requestBuilder.DELETE() - case "head" => requestBuilder.method("HEAD", BodyPublishers.noBody()) - case "patch" => requestBuilder.method("PATCH", createBodyPublisher()) - case "options" => requestBuilder.method("OPTIONS", BodyPublishers.noBody()) - case _ => throw new LocationException(s"invalid HTTP method: $method") - } - - headers.foreach { - case (k, v) => - try { requestBuilder.setHeader(k, v) } - catch { - case ex: IllegalArgumentException => - // .setHeader docs says `IllegalArgumentException` can be thrown if the header is restricted (e.g. "Host") - // RD-6871 - throw new HttpByteStreamException(ex.getMessage, ex) - } - } - - val request = requestBuilder.build() - logger.debug(s"Sending request: $request") - try { - httpClient.send(request, BodyHandlers.ofInputStream()) - } catch { - case ex: java.net.ConnectException => throw new HttpByteStreamException(s"host not found for $url", ex) - case ex: IOException => throw new HttpByteStreamException(s"unexpected error accessing $url", ex) - } - } -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/http/HttpByteStreamException.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/http/HttpByteStreamException.scala deleted file mode 100644 index 20d87dfc3..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/http/HttpByteStreamException.scala +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Copyright 2024 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.bytestream.http - -import com.rawlabs.utils.sources.bytestream.api.ByteStreamException - -class HttpByteStreamException(message: String, cause: Throwable = null) extends ByteStreamException(message, cause) diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/http/HttpByteStreamLocation.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/http/HttpByteStreamLocation.scala deleted file mode 100644 index fa0140198..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/http/HttpByteStreamLocation.scala +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.bytestream.http - -import com.rawlabs.utils.sources.bytestream.api.{ByteStreamException, ByteStreamLocation, SeekableInputStream} -import com.rawlabs.utils.core.RawSettings - -import java.io.InputStream -import java.net.{HttpURLConnection, MalformedURLException, URI, URISyntaxException} -import java.nio.file.Path - -final case class HttpResult(status: Int, is: InputStream, headers: Seq[(String, String)]) - -class HttpByteStreamLocation( - val url: String, - val method: String = "GET", - val args: Array[(String, String)] = Array.empty, - val headers: Array[(String, String)] = Array.empty, - val maybeBody: Option[Array[Byte]] = None, - val expectedStatus: Array[Int] = Array( - HttpURLConnection.HTTP_OK, - HttpURLConnection.HTTP_ACCEPTED, - HttpURLConnection.HTTP_CREATED, - HttpURLConnection.HTTP_PARTIAL - ) -)(implicit settings: RawSettings) - extends ByteStreamLocation { - - private val httpClient = - try { - new HttpByteStreamClient(method, args, headers, maybeBody, expectedStatus)( - settings - ) - } catch { - case ex: MalformedURLException => throw new HttpByteStreamException(s"invalid HTTP URL: ${ex.getMessage}", ex) - case ex: URISyntaxException => throw new HttpByteStreamException(s"invalid HTTP URL: ${ex.getMessage}", ex) - } - - private val safeUrl: String = { - new URI(url).normalize().toString - } - - override def testAccess(): Unit = { - // We are reading a single byte to ensure the connection is valid. - // By reading a single byte, we are actually doing a connection and authentication. - val is = httpClient.getInputStream(safeUrl) - try { - is.read() - } finally { - is.close() - } - } - - override protected def doGetInputStream(): InputStream = { - httpClient.getInputStream(safeUrl) - } - - override protected def doGetSeekableInputStream(): SeekableInputStream = { - httpClient.getSeekableInputStream(safeUrl) - } - - override def getLocalPath(): Path = { - throw new ByteStreamException("currently not supported for HTTP get") - } - - def getHttpResult(): HttpResult = { - httpClient.getInputStreamWithStatus(safeUrl) - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/inmemory/InMemoryByteStreamLocation.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/inmemory/InMemoryByteStreamLocation.scala deleted file mode 100644 index bcb0e7a84..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/bytestream/inmemory/InMemoryByteStreamLocation.scala +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.bytestream.inmemory - -import com.rawlabs.utils.sources.api.LocationException -import com.rawlabs.utils.sources.bytestream.api.{ - ByteStreamLocation, - DelegatingSeekableInputStream, - GenericSkippableInputStream, - SeekableInputStream -} - -import java.io.{ByteArrayInputStream, InputStream} -import java.nio.file.Path - -class InMemoryByteStreamLocation(val data: Array[Byte]) extends ByteStreamLocation { - - def this(data: String) = this(data.getBytes("UTF-8")) - - override protected def doGetInputStream(): InputStream = { - new ByteArrayInputStream(data) - } - - override protected def doGetSeekableInputStream(): SeekableInputStream = { - val genSings = new GenericSkippableInputStream(() => doGetInputStream()) - new DelegatingSeekableInputStream(genSings) { - override def getPos: Long = genSings.getPos - override def seek(newPos: Long): Unit = genSings.seek(newPos) - } - } - - override def getLocalPath(): Path = throw new LocationException("not supported for in-memory location") - - override def testAccess(): Unit = {} - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/api/BaseFileSystem.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/api/BaseFileSystem.scala deleted file mode 100644 index 46af6e57c..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/api/BaseFileSystem.scala +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.filesystem.api - -import com.typesafe.scalalogging.StrictLogging - -/** - * BaseFileSystem provides default implementations for some FileSystem methods. - * If a new file system is implemented in terms of BaseFileSystem, then the implementor needs to implement fewer methods. - * However, the performance will likely be worse: e.g. `listContentsWithMetadata` calls `listContents` and then for each - * calls `metadata`. A native implementation that does both simultaneously, if available, would be faster. - */ -trait BaseFileSystem extends FileSystem with StrictLogging { - - override def listContentsWithMetadata(path: String): Iterator[(String, FileSystemMetadata)] = - listContents(path).map(f => (f, metadata(f))) - - override def hasGlob(path: String): Boolean = path.contains("*") || path.contains("?") - - override def isDirectory(path: String): Boolean = metadata(path).isInstanceOf[DirectoryMetadata] - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/api/FileSystem.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/api/FileSystem.scala deleted file mode 100644 index 1bc951339..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/api/FileSystem.scala +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.filesystem.api - -import java.io.InputStream - -import com.rawlabs.utils.sources.bytestream.api.InputStreamClient - -/** - * Path conventions: - * Only absolute paths allowed: e.g. "/dir/file". - * Relative paths NOT allowed: e.g. "foo.csv" is invalid. - * Wildcards allowed are *, ** and ?. - * File Systems should implement paths as flexibly as possible, e.g.: - * "/tmp", "/tmp/" and "/tmp/\*" should all be handled in equivalent ways. - */ -trait FileSystem extends InputStreamClient { - - // Used for testing (to generate the right test code). - private[sources] def fileSeparator: String - - /** - * Test access to the file system. - * If any error occurs, an exception is thrown. - */ - def testAccess(path: String): Unit - - def metadata(path: String): FileSystemMetadata - - def getInputStream(file: String): InputStream - - def listContents(path: String): Iterator[String] - - def listContentsWithMetadata(path: String): Iterator[(String, FileSystemMetadata)] - - def isDirectory(path: String): Boolean - - def hasGlob(path: String): Boolean - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/api/FileSystemExceptions.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/api/FileSystemExceptions.scala deleted file mode 100644 index 417fde682..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/api/FileSystemExceptions.scala +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.filesystem.api - -import com.rawlabs.utils.sources.bytestream.api.ByteStreamException - -class FileSystemException(message: String, cause: Throwable = null) - extends ByteStreamException(s"file system error: $message", cause) - -class PathInvalidException(path: String, cause: Throwable) extends FileSystemException(s"path invalid: $path", cause) - -class PathUnauthorizedException(path: String, cause: Throwable) - extends FileSystemException(s"path not authorized: $path", cause) - -class FileSystemUnavailableException(path: String, cause: Throwable) - extends FileSystemException(s"file system unavailable: $path", cause) - -class FileSystemTimeoutException(path: Option[String], cause: Throwable) - extends FileSystemException(s"file system timeout${path.map(url => s": $url").getOrElse("")}", cause) - -class PathAlreadyExistsException(path: String, cause: Throwable) - extends FileSystemException(s"path already exists: $path", cause) - -class PathNotFoundException(path: String, cause: Throwable) extends FileSystemException(s"path not found: $path", cause) - -class HostNotFoundException(host: String, cause: Throwable) extends FileSystemException(s"host not found: $host", cause) - -class NotAFileException(path: String, cause: Throwable = null) - extends FileSystemException(s"path is not a file: $path", cause) - -class UnexpectedFileSystemErrorException(message: String, cause: Throwable) - extends FileSystemException(if (message.isEmpty) "unexpected error" else s"unexpected error: $message", cause) { - def this(cause: Throwable) = this("", cause) - def this(message: String) = this(message, null) -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/api/FileSystemLocation.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/api/FileSystemLocation.scala deleted file mode 100644 index 724b9f05f..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/api/FileSystemLocation.scala +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.filesystem.api - -import com.rawlabs.utils.sources.bytestream.api.ByteStreamLocation - -trait FileSystemLocation extends ByteStreamLocation { - - def metadata(): FileSystemMetadata - - // This call uses the retry mechanism. - final def ls(): Iterator[FileSystemLocation] = { - doLs() - } - - protected def doLs(): Iterator[FileSystemLocation] - - // (msb): Instead of this call, we could use listFiles and then for each entry do getMetadata. - // To be efficient, this would require the builders to retrieve and store the metadata in memory as - // part of the listFiles call. - // This call uses the retry mechanism. - final def lsWithMetadata(): Iterator[(FileSystemLocation, FileSystemMetadata)] = { - doLsWithMetadata() - } - - protected def doLsWithMetadata(): Iterator[(FileSystemLocation, FileSystemMetadata)] - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/api/FileSystemMetadata.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/api/FileSystemMetadata.scala deleted file mode 100644 index bc8717f1d..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/api/FileSystemMetadata.scala +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.filesystem.api - -import java.time.Instant - -sealed trait FileSystemMetadata - -final case class DirectoryMetadata(modifiedInstant: Option[Instant]) extends FileSystemMetadata - -final case class FileBlock(hosts: Array[String], offset: Long, length: Long) - -final case class FileMetadata(modifiedInstant: Option[Instant], size: Option[Long], blocks: Array[FileBlock]) - extends FileSystemMetadata diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/dropbox/BaseDropboxPath.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/dropbox/BaseDropboxPath.scala deleted file mode 100644 index d9e64d1c9..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/dropbox/BaseDropboxPath.scala +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.filesystem.dropbox - -import com.dropbox.core.v2.DbxClientV2 -import com.rawlabs.utils.sources.bytestream.api.{ByteStreamException, SeekableInputStream} -import com.rawlabs.utils.sources.filesystem.api._ - -import java.io.InputStream -import java.nio.file.Path - -object BaseDropboxPath { - val DROPBOX_CLIENT_ID = "raw.utils.sources.dropbox.clientId" -} - -abstract class BaseDropboxPath(dbxClientV2: DbxClientV2, path: String) extends FileSystemLocation { - - protected val cli = new DropboxFileSystem(dbxClientV2) - - override def testAccess(): Unit = { - cli.testAccess(path) - } - - override protected def doGetInputStream(): InputStream = { - cli.getInputStream(path) - } - - override protected def doGetSeekableInputStream(): SeekableInputStream = { - cli.getSeekableInputStream(path) - } - - override def getLocalPath(): Path = { - throw new ByteStreamException("currently not supported for Dropbox") - } - - override def metadata(): FileSystemMetadata = { - cli.metadata(path) - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/dropbox/DropboxAccessTokenPath.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/dropbox/DropboxAccessTokenPath.scala deleted file mode 100644 index da37a00be..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/dropbox/DropboxAccessTokenPath.scala +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright 2024 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.filesystem.dropbox - -import com.dropbox.core.DbxRequestConfig -import com.dropbox.core.oauth.DbxCredential -import com.dropbox.core.v2.DbxClientV2 -import com.rawlabs.utils.sources.filesystem.api.{FileSystemLocation, FileSystemMetadata} -import com.rawlabs.utils.core.RawSettings - -class DropboxAccessTokenPath(val accessToken: String, val path: String, dbxClientV2: DbxClientV2) - extends BaseDropboxPath(dbxClientV2, path) { - - def this(accessToken: String, path: String)(implicit settings: RawSettings) = this( - accessToken, - path, - new DbxClientV2( - DbxRequestConfig.newBuilder(settings.getString(BaseDropboxPath.DROPBOX_CLIENT_ID)).build(), - new DbxCredential(accessToken) - ) - ) - - override protected def doLs(): Iterator[FileSystemLocation] = { - cli - .listContents(path) - .map(npath => new DropboxAccessTokenPath(accessToken, npath, dbxClientV2)) - } - - override protected def doLsWithMetadata(): Iterator[(FileSystemLocation, FileSystemMetadata)] = { - cli.listContentsWithMetadata(path).map { - case (npath, meta) => (new DropboxAccessTokenPath(accessToken, npath, dbxClientV2), meta) - } - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/dropbox/DropboxFileSystem.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/dropbox/DropboxFileSystem.scala deleted file mode 100644 index ffd85ed17..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/dropbox/DropboxFileSystem.scala +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.filesystem.dropbox - -import com.dropbox.core._ -import com.dropbox.core.v2.DbxClientV2 -import com.dropbox.core.v2.files.{DownloadErrorException, FolderMetadata, Metadata, FileMetadata => DropboxFileMetadata} -import org.springframework.util.AntPathMatcher -import com.rawlabs.utils.sources.bytestream.api.{ - DelegatingSeekableInputStream, - GenericSkippableInputStream, - SeekableInputStream -} -import com.rawlabs.utils.sources.filesystem.api._ - -import java.io.InputStream -import scala.collection.JavaConverters._ -import scala.collection.mutable - -// TODO (msb): Catch unauthorized and throw specific exception? -class DropboxFileSystem(private[rawlabs] val client: DbxClientV2) extends BaseFileSystem { - - val fileSeparator: String = "/" - - private val fileSeparatorRegex: String = "/" - - private def sanitizePath(path: String): String = { - val p = path.replaceAll(s"$fileSeparatorRegex+", fileSeparator).stripSuffix(fileSeparator) - if (p.isEmpty) fileSeparator else p - } - - private def getMetadata(path: String): Metadata = { - try { - client.files().getMetadata(path) - } catch { - case ex: DbxException => throwSourceException(ex, path) - case ex: IllegalArgumentException => throw new PathNotFoundException(path, ex) - } - } - - override def metadata(path: String): FileSystemMetadata = { - val sanitizedPath = sanitizePath(path) - if (sanitizedPath == "/") return DirectoryMetadata(None) - getMetadata(sanitizedPath) match { - case m: DropboxFileMetadata => FileMetadata(Some(m.getClientModified.toInstant), Some(m.getSize), Array.empty) - case _: FolderMetadata => DirectoryMetadata(None) - case _ => throw new PathNotFoundException(path, null) - } - } - - override def getInputStream(file: String): InputStream = { - try { - client.files().download(sanitizePath(file)).getInputStream - } catch { - // TODO: This misses NotAFileException I believe? - case ex: DbxException => throwSourceException(ex, file) - case ex: IllegalArgumentException => throw new PathInvalidException(file, ex) - - } - } - - override def getSeekableInputStream(path: String): SeekableInputStream = { - val skipableInputStream = new GenericSkippableInputStream(() => getInputStream(path)) - new DelegatingSeekableInputStream(skipableInputStream) { - override def getPos: Long = skipableInputStream.getPos - - override def seek(newPos: Long): Unit = skipableInputStream.seek(newPos) - } - } - - override def listContents(path: String): Iterator[String] = { - listContentsWithMetadata(path).map(_._1) - } - - override def listContentsWithMetadata(path: String): Iterator[(String, FileSystemMetadata)] = { - val sanitizedPath = sanitizePath(path) - if (!hasGlob(sanitizedPath)) { - // If no glob, then it is either a file or a directory. - getContents(sanitizedPath, recursive = false) - } else { - // If it has glob, obtain basepath before glob, list recursively and apply our own walker filter. - val pathBeforeGlob = sanitizedPath - .dropWhile(_ == '/') - .split("/") - .takeWhile(p => !hasGlob(p)) - .mkString("/", "/", "") - val matcher = new AntPathMatcher - matcher.setPathSeparator(fileSeparator) - getContents(pathBeforeGlob, recursive = true) - .filter(p => matcher.`match`(sanitizedPath, p._1)) - } - } - - private def getContents(path: String, recursive: Boolean): Iterator[(String, FileSystemMetadata)] = { - if (path == "/") { - listFolder("", recursive) - } else { - assert(!hasGlob(path)) - getMetadata(path) match { - case fm: DropboxFileMetadata => Iterator( - (fm.getPathDisplay, FileMetadata(Some(fm.getClientModified.toInstant), Some(fm.getSize), Array.empty)) - ) - case fm: FolderMetadata => listFolder(fm.getPathDisplay, recursive) - } - } - } - - private def listFolder(path: String, recursive: Boolean): Iterator[(String, FileSystemMetadata)] = { - assert(!hasGlob(path)) - val files = mutable.ListBuffer[(String, FileSystemMetadata)]() - var continue = true - var lfr = - try { - if (recursive) client.files().listFolderBuilder(path).withRecursive(true).start() - else client.files().listFolder(path) - } catch { - case ex: DbxException => throwSourceException(ex, path) - } - while (continue) { - lfr.getEntries.asScala.foreach { - case m: DropboxFileMetadata => files.append( - (m.getPathDisplay, FileMetadata(Some(m.getClientModified.toInstant), Some(m.getSize), Array.empty)) - ) - case m: FolderMetadata => files.append((m.getPathDisplay, DirectoryMetadata(None))) - case _ => - } - if (lfr.getHasMore) { - lfr = - try { - client.files().listFolderContinue(lfr.getCursor) - } catch { - case ex: DbxException => throwSourceException(ex, path) - } - } else { - continue = false - } - } - files.to - } - - private def throwSourceException(ex: DbxException, path: String) = { - ex match { - case _ @(_: ServerException | _: RetryException | _: NetworkIOException) => - throw new FileSystemUnavailableException(path, ex) - case _: InvalidAccessTokenException => throw new PathUnauthorizedException(path, ex) - case e: DownloadErrorException => - // TODO: This can't be handled generally here! Only where we want files... - if (e.errorValue.getPathValue.isNotFile) throw new NotAFileException(path, e) - else throw new PathNotFoundException(path, e) - case _ => // Everything else, assume file does not exist - throw new PathNotFoundException(path, ex) - } - } - - override def testAccess(path: String): Unit = { - listContents(path) // Throws exception if not found. - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/dropbox/DropboxUsernamePasswordPath.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/dropbox/DropboxUsernamePasswordPath.scala deleted file mode 100644 index c881b5c7b..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/dropbox/DropboxUsernamePasswordPath.scala +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright 2024 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.filesystem.dropbox - -import com.dropbox.core.DbxRequestConfig -import com.dropbox.core.oauth.DbxCredential -import com.dropbox.core.v2.DbxClientV2 -import com.rawlabs.utils.sources.filesystem.api.{FileSystemLocation, FileSystemMetadata} -import com.rawlabs.utils.core.RawSettings - -class DropboxUsernamePasswordPath( - val username: String, - val password: String, - val path: String, - dbxClientV2: DbxClientV2 -) extends BaseDropboxPath(dbxClientV2, path) { - - def this(username: String, password: String, path: String)( - implicit settings: RawSettings - ) = this( - username, - password, - path, - new DbxClientV2( - DbxRequestConfig.newBuilder(settings.getString(BaseDropboxPath.DROPBOX_CLIENT_ID)).build(), - new DbxCredential(null, null, null, username, password) - ) - ) - - override protected def doLs(): Iterator[FileSystemLocation] = { - cli - .listContents(path) - .map(npath => new DropboxUsernamePasswordPath(username, password, npath, dbxClientV2)) - } - - override protected def doLsWithMetadata(): Iterator[(FileSystemLocation, FileSystemMetadata)] = { - cli.listContentsWithMetadata(path).map { - case (npath, meta) => (new DropboxUsernamePasswordPath(username, password, npath, dbxClientV2), meta) - } - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/local/LocalFileSystem.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/local/LocalFileSystem.scala deleted file mode 100644 index 28900c0ca..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/local/LocalFileSystem.scala +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.filesystem.local - -import org.springframework.util.AntPathMatcher -import com.rawlabs.utils.sources.bytestream.api.{DelegatingSeekableInputStream, SeekableInputStream} -import com.rawlabs.utils.sources.filesystem.api._ -import com.rawlabs.utils.core._ - -import java.io._ -import java.nio.file._ -import java.nio.file.attribute.BasicFileAttributes -import scala.collection.mutable.ArrayBuffer - -object LocalFileSystem extends BaseFileSystem { - - private[sources] val fileSeparator: String = File.separator - - private val fileSeparatorRegex: String = RawUtils.descape(fileSeparator) - - private def sanitizePath(path: String): String = { - // Currently there's no need to sanitize path because the Java APIs handle it for us. - if (RawUtils.isWindows) path.replaceFirst("^/(.:/)", "$1") - else path - } - - private def handleException(path: String, e: Exception) = e match { - case _: FileNotFoundException | _: NoSuchFileException => throw new PathNotFoundException(path, e) - case _: FileAlreadyExistsException => throw new PathAlreadyExistsException(path, e) - case _: SecurityException | _: ReadOnlyFileSystemException | _: AccessDeniedException => - throw new PathUnauthorizedException(path, e) - case _: InvalidPathException => throw new PathInvalidException(path, e) - case _: IOException => throw new UnexpectedFileSystemErrorException(e) - case _ => throw e - } - - override def testAccess(path: String): Unit = { - getPath(path) - } - - private def getPath(path: String): Path = { - try { - val p = Paths.get(path) - if (!Files.exists(p) || (!Files.isDirectory(p) && !Files.isRegularFile(p))) - throw new PathNotFoundException(path, null) - else if (!Files.isReadable(p) || !p.isAbsolute) throw new PathUnauthorizedException(path, null) - else p - } catch { - case ex: Exception => handleException(path, ex) - } - } - - override def metadata(path: String): FileSystemMetadata = { - val sanitizedPath = sanitizePath(path) - val p = getPath(sanitizedPath) - try { - if (Files.isDirectory(p)) { - val modified = Files.getLastModifiedTime(p) - DirectoryMetadata(Some(modified.toInstant)) - } else { - val modified = Files.getLastModifiedTime(p) - val size = Files.size(p) - FileMetadata(Some(modified.toInstant), Some(size), Array.empty) - } - } catch { - case ex: Exception => handleException(path, ex) - } - } - - override def listContents(path: String): Iterator[String] = { - try { - val sanitizedPath = sanitizePath(path) - if (!hasGlob(sanitizedPath)) { - val p = getPath(sanitizedPath) - if (Files.isDirectory(p)) { - // List path contents - val stream = Files.list(p) - try { - stream - .map[String](_.toAbsolutePath.toString) - .toArray(new Array[String](_)) - .iterator - } finally { - stream.close() - } - } else { - Iterator(sanitizedPath) - } - } else { - val pathBeforeGlob = sanitizedPath - .split(fileSeparatorRegex) - .takeWhile(p => !hasGlob(p)) - .mkString(fileSeparator) - val matcher = new AntPathMatcher - matcher.setPathSeparator(fileSeparator) - val matches = new ArrayBuffer[String]() - Files.walkFileTree( - getPath(pathBeforeGlob), - new SimpleFileVisitor[Path] { - override def visitFileFailed(file: Path, exc: IOException): FileVisitResult = { - FileVisitResult.SKIP_SUBTREE - } - override def preVisitDirectory(dir: Path, attrs: BasicFileAttributes): FileVisitResult = { - val absPath = dir.toAbsolutePath.toString - if (matcher.`match`(sanitizedPath, absPath)) { - matches += absPath - FileVisitResult.CONTINUE - } else if (matcher.matchStart(sanitizedPath, absPath)) { - FileVisitResult.CONTINUE - } else { - FileVisitResult.SKIP_SUBTREE - } - } - - override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = { - val absPath = file.toAbsolutePath.toString - if (matcher.`match`(sanitizedPath, absPath)) { - matches += absPath - } - FileVisitResult.CONTINUE - } - } - ) - matches.iterator - } - } catch { - case ex: Exception => handleException(path, ex) - } - } - - override def getInputStream(file: String): FileInputStream = { - val sanitizedPath = sanitizePath(file) - val path = getPath(sanitizedPath) - try { - if (Files.isDirectory(path)) throw new NotAFileException(sanitizedPath, null) - new FileInputStream(path.toFile) - } catch { - case ex: Exception => handleException(sanitizedPath, ex) - } - } - - override def getSeekableInputStream(pathName: String): SeekableInputStream = { - val is = getInputStream(pathName) - new DelegatingSeekableInputStream(is) { - - def getPos: Long = is.getChannel.position() - - def seek(newPos: Long): Unit = is.getChannel.position(newPos) - } - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/local/LocalPath.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/local/LocalPath.scala deleted file mode 100644 index 512cea527..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/local/LocalPath.scala +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.filesystem.local - -import java.io.InputStream -import java.nio.file.{Path, Paths} -import com.rawlabs.utils.sources.bytestream.api.SeekableInputStream -import com.rawlabs.utils.sources.filesystem.api._ - -class LocalPath(val pathName: String) extends FileSystemLocation { - - def this(path: Path) = this(path.toAbsolutePath.toString) - - protected def path: Path = Paths.get(pathName) - - override def testAccess(): Unit = { - LocalFileSystem.testAccess(pathName) - } - - override protected def doGetInputStream(): InputStream = { - LocalFileSystem.getInputStream(pathName) - } - - override protected def doGetSeekableInputStream(): SeekableInputStream = { - LocalFileSystem.getSeekableInputStream(pathName) - } - - override def getLocalPath(): Path = { - Paths.get(pathName) - } - - override def metadata(): FileSystemMetadata = { - LocalFileSystem.metadata(pathName) - } - - override protected def doLs(): Iterator[FileSystemLocation] = { - LocalFileSystem.listContents(pathName).map(npath => new LocalPath(npath)) - } - - override protected def doLsWithMetadata(): Iterator[(FileSystemLocation, FileSystemMetadata)] = { - LocalFileSystem.listContentsWithMetadata(pathName).map { case (npath, meta) => (new LocalPath(npath), meta) } - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/mock/MockPath.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/mock/MockPath.scala deleted file mode 100644 index 37129edba..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/mock/MockPath.scala +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.filesystem.mock - -import com.typesafe.scalalogging.StrictLogging -import com.rawlabs.utils.sources.bytestream.api.SeekableInputStream -import com.rawlabs.utils.sources.filesystem.api._ - -import java.io.InputStream -import java.lang.StackWalker.StackFrame -import java.nio.file.Path - -class MockPath(val delayMillis: Long, val delegate: FileSystemLocation) extends FileSystemLocation with StrictLogging { - - private def doDelay(): Unit = { - val sw = StackWalker.getInstance() - val caller: StackFrame = sw.walk(s => { - s.skip(1).findFirst().get() - }) - - logger.info(s"Pausing for $delayMillis millis. Called by: $caller") - Thread.sleep(delayMillis) - logger.info(s"Continuing") - } - - override def testAccess(): Unit = { - doDelay() - delegate.testAccess() - } - - override protected def doGetInputStream(): InputStream = { - doDelay() - delegate.getInputStream - } - - override protected def doGetSeekableInputStream(): SeekableInputStream = { - doDelay() - delegate.getSeekableInputStream - } - - override def getLocalPath(): Path = { - doDelay() - delegate.getLocalPath() - } - - override def metadata(): FileSystemMetadata = { - doDelay() - delegate.metadata() - } - - override protected def doLs(): Iterator[FileSystemLocation] = { - doDelay() - delegate.ls() - } - - override protected def doLsWithMetadata(): Iterator[(FileSystemLocation, FileSystemMetadata)] = { - doDelay() - delegate.lsWithMetadata() - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/s3/S3FileSystem.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/s3/S3FileSystem.scala deleted file mode 100644 index 55ee10372..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/s3/S3FileSystem.scala +++ /dev/null @@ -1,422 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.filesystem.s3 - -import software.amazon.awssdk.regions.Region -import software.amazon.awssdk.services.s3.S3Client -import software.amazon.awssdk.services.s3.S3Configuration - -import java.net.ConnectException -import com.google.common.collect.AbstractIterator -import org.springframework.util.AntPathMatcher -import com.rawlabs.utils.sources.bytestream.api.{ - DelegatingSeekableInputStream, - GenericSkippableInputStream, - SeekableInputStream -} -import com.rawlabs.utils.sources.filesystem.api._ -import com.rawlabs.utils.core._ -import software.amazon.awssdk.auth.credentials.{ - AnonymousCredentialsProvider, - AwsBasicCredentials, - StaticCredentialsProvider -} -import software.amazon.awssdk.core.exception.{SdkClientException, SdkException, SdkServiceException} -import software.amazon.awssdk.http.SdkHttpClient -import software.amazon.awssdk.http.apache.ApacheHttpClient -import software.amazon.awssdk.services.s3.model.{ - GetBucketLocationRequest, - GetObjectRequest, - ListObjectsV2Request, - ListObjectsV2Response, - S3Exception, - S3Object -} - -import java.io.InputStream -import java.util -import java.util.concurrent.TimeUnit -import scala.collection.JavaConverters._ - -object S3FileSystem { - private val CONNECT_TIMEOUT = "raw.utils.sources.s3.connect-timeout" - private val READ_TIMEOUT = "raw.utils.sources.s3.read-timeout" - private val MAX_CONNECTIONS = "raw.utils.sources.s3.max-connections" - private val DEFAULT_REGION = "raw.utils.sources.s3.default-region" -} - -class S3FileSystem( - val bucket: String, - val maybeRegion: Option[String], - val maybeAccessKey: Option[String], - val maybeSecretKey: Option[String] -)(implicit settings: RawSettings) - extends BaseFileSystem { - - import S3FileSystem._ - - private lazy val defaultRegion = settings.getString(DEFAULT_REGION) - - private[sources] val fileSeparator: String = "/" - private val fileSeparatorRegex: String = RawUtils.descape(fileSeparator) - - private val s3ConnectTimeout = settings.getDuration(CONNECT_TIMEOUT, TimeUnit.MILLISECONDS) - private val s3ReadTimeout = settings.getDuration(READ_TIMEOUT, TimeUnit.MILLISECONDS) - private val s3MaxConnections = settings.getInt(MAX_CONNECTIONS) - - private def guessBucketRegion(): Region = { - // Try to guess the bucket. - // That said, we can only get the bucket if we have credentials. - // If we don't have credentials, we also don't have permissions to find the bucket region. - if (maybeAccessKey.isDefined && maybeSecretKey.isDefined) { - val builder = S3Client.builder() - builder.region(Region.of(defaultRegion)) - builder.credentialsProvider( - StaticCredentialsProvider.create(AwsBasicCredentials.create(maybeAccessKey.get, maybeSecretKey.get)) - ) - val client = builder.build() - try { - val location = client.getBucketLocation(GetBucketLocationRequest.builder().bucket(bucket).build()) - val bucketRegion = location.locationConstraint() - // 'US_EAST_1' is returned as "null" by AWS SDK - if (bucketRegion == null || bucketRegion.toString == "null" || bucketRegion.toString.isEmpty) Region.US_EAST_1 - else Region.of(bucketRegion.toString) - } finally { - client.close() - } - } else { - Region.of(defaultRegion) - } - } - - private lazy val client = { - val builder = S3Client.builder() - - val s3Config = S3Configuration - .builder() - .useArnRegionEnabled(true) // Equivalent to enableForceGlobalBucketAccess in SDK v1 - .build() - - builder.serviceConfiguration(s3Config) - - // Setting the region - val region = maybeRegion match { - case Some(regionValue) => Region.of(regionValue) - case None => guessBucketRegion() - } - builder.region(region) - - // Set credentials - if (maybeAccessKey.isDefined && maybeSecretKey.isDefined) { - builder.credentialsProvider( - StaticCredentialsProvider.create(AwsBasicCredentials.create(maybeAccessKey.get, maybeSecretKey.get)) - ) - } else { - builder.credentialsProvider(AnonymousCredentialsProvider.create()) - } - - val httpClient: SdkHttpClient = ApacheHttpClient - .builder() - .socketTimeout(java.time.Duration.ofMillis(s3ConnectTimeout.toInt)) - .connectionTimeout(java.time.Duration.ofMillis(s3ReadTimeout.toInt)) - .maxConnections(s3MaxConnections) - .build() - - builder.httpClient(httpClient) - -// // Set retry policy -// builder.retryPolicy(RetryPolicy.defaultRetryPolicy()) - - builder.build() - } - - /** - * This is questionable but has to remain. Here's why: - * - * By sanitizing paths, we are limiting the keys that users can access. - * S3 doesn't actually have the concept of separators or wildcards, so when we sanitize it, we make a potentially - * valid key inaccessible to us. Not likely a problem, since only "strange keys" - e.g. with multiple consecutive - * slashes - would be inaccessible, but it's something to keep in mind. - * - * However, accepting every key would make some parts of this file system weird. For instance, when listing contents, - * we have to manually add the "intermediary directories" to the path result, so that it mimics a normal file system. - * This wouldn't be possible without some form of path "sanitizer". - */ - private def sanitizePath(path: String): String = { - // (CTM) Added the stripPrefix from the original file committed long time ago. - path.replaceAll(s"$fileSeparatorRegex+", fileSeparator).stripSuffix(fileSeparator).stripPrefix(fileSeparator) - } - - private def getS3ObjectResponse(file: String) = { - withCatchAmazonExceptions(file, getS3Object(file).response()) - } - - private def getS3Object(file: String) = { - val getObjectRequest = GetObjectRequest - .builder() - .bucket(bucket) - .key(file) - .build() - - withCatchAmazonExceptions(file, client.getObject(getObjectRequest)) - } - - private def withCatchAmazonExceptions[R](path: String, f: => R): R = { - try { - f - } catch { - case ex: S3Exception if ex.statusCode() == 403 => throw new PathUnauthorizedException(path, ex) - case ex: S3Exception if ex.awsErrorDetails().errorCode() == "NoSuchKey" => - throw new PathNotFoundException(path, ex) - case ex: SdkException => ex.getCause match { - case cause: ConnectException => throw new FileSystemTimeoutException(Some(path), cause) - case _ => - logger.warn("Unhandled S3 exception", ex) - throw new FileSystemUnavailableException(path, ex) - } - } - } - - override def metadata(path: String): FileSystemMetadata = { - val sanitizedPath = sanitizePath(path) - - if (hasGlob(path)) throw new PathInvalidException(path, null) - - if (sanitizedPath == "") { - return DirectoryMetadata(None) - } - val s3Object = { - try { - getS3ObjectResponse(sanitizedPath) - } catch { - case _: PathNotFoundException => - listContents(path).hasNext // If there are contents, then try as "directory", which may throw exception - return DirectoryMetadata(None) - } - } - FileMetadata( - Option(s3Object.lastModified()), - Option(s3Object.contentLength()), - Array.empty - ) - } - - override def getInputStream(file: String): InputStream = { - try { - new S3InputStream(getS3Object(sanitizePath(file))) - } catch { - case ex: PathNotFoundException => - // If there are contents, then try as "directory", which may throw exception - listContents(file) - throw new NotAFileException(file, ex) - } - } - - override def getSeekableInputStream(file: String): SeekableInputStream = { - - // The getInputStream will use getObject with uses withCatchAmazonExceptions - // which already catches and rethrows our exceptions - val is = new GenericSkippableInputStream(() => getInputStream(file)) - new DelegatingSeekableInputStream(is) { - override def getPos: Long = is.getPos - - override def seek(newPos: Long): Unit = is.seek(newPos) - } - - } - - override def listContents(path: String): Iterator[String] = { - listContentsWithMetadata(path).map(_._1) - } - - override def listContentsWithMetadata(path: String): Iterator[(String, FileSystemMetadata)] = { - val it = new ListingIterator(path).asScala - // We check that it hasNext so that we fire an error if the path doesn't exist or is not accessible. - it.hasNext - it - } - - private class ListingIterator(val path: String) extends AbstractIterator[(String, FileSystemMetadata)] { - - private val sanitizedPath = sanitizePath(path) - private val _hasGlob = hasGlob(sanitizedPath) - private val pathBeforeGlob = sanitizedPath - .split(fileSeparatorRegex) - .takeWhile(p => !hasGlob(p)) - .mkString(fileSeparator) - logger.trace(s"Path: $path, sanitizedPath: $sanitizedPath (glob: ${_hasGlob}), pathBeforeGlob: $pathBeforeGlob") - - private val matcher = new AntPathMatcher - matcher.setPathSeparator(fileSeparator) - - private var objectListing: ListObjectsV2Response = { - val s3Request = ListObjectsV2Request - .builder() - .bucket(bucket) - .prefix(pathBeforeGlob) - .build() - - withCatchAmazonExceptions(path, client.listObjectsV2(s3Request)) - } - - private var summariesIterator = objectListing.contents().iterator() - - // Keep track of the number of objects/files found. This does not include directory entries. - // It is necessary to handle the special cases where the results contain a single file. - private var filesCount = 0 - - // Processing a single object summary may yield several entries in the final result: one file - // plus zero or more directories. This queue keeps all the entries found. - private val readyEntries = new util.ArrayDeque[(String, FileSystemMetadata)]() - // Keeps track of all the directories found so far to ensure each directory is returned only once. - private val directoriesFound = new java.util.HashSet[String]() - - private def addDirectory(dir: String): Unit = { - // (CTM) Adding only sanitized directories, was having repeated folders like /path, //path, path/ - val sanitizedDir = sanitizePath(dir) - if (directoriesFound.add(sanitizedDir)) { - readyEntries.add((sanitizedDir, new DirectoryMetadata(None))) - } - } - - /** - * Analyses an object summary returned by S3 and adds 1 or more elements to the readyEntries. - * It will always add a (key, fileMetadata) entry for the object represented by the object summary. - * It will add 0 or more entries to represent the directories that are on the path of the key - * of this object. - */ - private def processObject(file: S3Object): Unit = { - val p = file.key() - val meta = FileMetadata(Some(file.lastModified()), Some(file.size()), Array.empty) - - // S3 will not list "directories" when listing objects. - // Therefore, look at all matching objects and add all intermediate "directories". - val intermediates = p.split(fileSeparatorRegex) - var sep = "" - var cur = "" - for (i <- 0 until intermediates.length - 1) { - cur += sep + intermediates(i) - addDirectory(cur) - sep = fileSeparator - } - filesCount += 1 - readyEntries.add((p, meta)) - } - - private def nextBatch(): Unit = { - if (!summariesIterator.hasNext) { - // Check if there are more objects to list - if (objectListing.isTruncated) { - val nextRequest = ListObjectsV2Request - .builder() - .bucket(bucket) - .prefix(pathBeforeGlob) - .continuationToken(objectListing.nextContinuationToken()) - .build() - - objectListing = withCatchAmazonExceptions(path, client.listObjectsV2(nextRequest)) - summariesIterator = objectListing.contents().iterator() - } - } - - while (summariesIterator.hasNext) { - val summary = summariesIterator.next() - processObject(summary) // Ensure processObject is compatible with SDK v2 summary object - } - } - - override def computeNext(): (String, FileSystemMetadata) = { - while (true) { - // Ensure there are entries available - if (readyEntries.isEmpty) { - nextBatch() - } - if (readyEntries.isEmpty) { - endOfData() - // If nothing found, throw error if not root of the bucket - if (filesCount == 0 && sanitizedPath != "") { - throw new PathNotFoundException(path, null) - } else { - // Doesn't matter what we return, we already called endOfData() in the base class, - // so it will ignore the return of this method - return null - } - } else { - val entry = readyEntries.remove() - val key = entry._1 - val md = entry._2 - var passesFilters = - if (_hasGlob) { - matcher.`match`(sanitizedPath, key) - } else { - val strippedKey = key.stripSuffix(fileSeparator) - // If we obtained a single file entry and this entry matches the path entry, return it. - if (filesCount == 1 && key == sanitizedPath && md.isInstanceOf[FileMetadata]) { - readyEntries.clear() - return entry - } - // Match as directory, i.e. equivalent to '/*' - if (sanitizedPath == "") { - // Root case is peculiar. Need to match two patterns. - matcher.`match`("*", strippedKey) || matcher.`match`(fileSeparator + "*", strippedKey) - } else { - val pattern = - if (sanitizedPath.endsWith(fileSeparator)) { - sanitizedPath + "*" - } else { - sanitizedPath + fileSeparator + "*" - } - matcher.`match`(pattern, strippedKey) - } - } - - // There is at least one match, so add root directory to set - addDirectory(pathBeforeGlob) - - if (passesFilters) { - // Filter AWS console convention where files with 0 size and ending in / actually represent "directories" - passesFilters = md match { - case FileMetadata(_, Some(0), _) if key.endsWith(fileSeparator) => - addDirectory(key) - false - case _ => true - } - } - if (passesFilters) { - return entry - } - } - } - ??? - } - } - - def testBucketAccess(): Unit = { - try { - client.listObjectsV2(ListObjectsV2Request.builder().bucket(bucket).build()) - } catch { - case ex: SdkServiceException if ex.statusCode() == 403 => throw new PathUnauthorizedException(bucket, ex) - - case ex: SdkServiceException => throw new PathNotFoundException(bucket, ex) - - case ex: SdkClientException => throw new FileSystemUnavailableException(s"s3://$bucket", ex) - } - } - - override def testAccess(path: String): Unit = { - if (listContents(path).isEmpty) { - throw new PathNotFoundException(path, null) - } - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/s3/S3InputStream.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/s3/S3InputStream.scala deleted file mode 100644 index 0cdfc0a70..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/s3/S3InputStream.scala +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.filesystem.s3 - -import software.amazon.awssdk.core.ResponseInputStream -import software.amazon.awssdk.services.s3.model.GetObjectResponse - -import java.io.IOException -import java.io.InputStream -import java.io.InterruptedIOException - -class S3InputStream(s3ObjectInputStream: ResponseInputStream[GetObjectResponse]) extends InputStream { - - @throws[IOException] - override def read(): Int = withInterruptedCheck(s3ObjectInputStream.read()) - - @throws[IOException] - override def read(b: Array[Byte]): Int = withInterruptedCheck(s3ObjectInputStream.read(b)) - - @throws[IOException] - override def read(b: Array[Byte], off: Int, len: Int): Int = - withInterruptedCheck(s3ObjectInputStream.read(b, off, len)) - - @throws[IOException] - override def readAllBytes(): Array[Byte] = withInterruptedCheck(s3ObjectInputStream.readAllBytes()) - - @throws[IOException] - override def readNBytes(b: Array[Byte], off: Int, len: Int): Int = - withInterruptedCheck(s3ObjectInputStream.readNBytes(b, off, len)) - - @throws[IOException] - override def readNBytes(len: Int): Array[Byte] = withInterruptedCheck(s3ObjectInputStream.readNBytes(len)) - - private def withInterruptedCheck[T](f: => T): T = { - try { - f - } catch { - case ex: InterruptedException => - // InterruptedException is thrown when a thread is interrupted during a blocking IO operation. - // We wrap it in an InterruptedIOException. - Thread.currentThread().interrupt() // Set the interrupt flag again - throw new InterruptedIOException(ex.getMessage) - } - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/s3/S3Path.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/s3/S3Path.scala deleted file mode 100644 index 08e312b0e..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/filesystem/s3/S3Path.scala +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.filesystem.s3 - -import com.rawlabs.utils.sources.bytestream.api.{ByteStreamException, SeekableInputStream} -import com.rawlabs.utils.sources.filesystem.api.{FileSystemLocation, FileSystemMetadata} -import com.rawlabs.utils.core.RawSettings - -import java.io.InputStream -import java.nio.file.Path - -class S3Path private (cli: S3FileSystem, val path: String)(implicit settings: RawSettings) extends FileSystemLocation { - - val bucket: String = cli.bucket - - val region: Option[String] = cli.maybeRegion - - val maybeAccessKey: Option[String] = cli.maybeAccessKey - - val maybeSecretKey: Option[String] = cli.maybeSecretKey - - def this( - bucket: String, - maybeRegion: Option[String], - maybeAccessKey: Option[String], - maybeSecretKey: Option[String], - path: String - )(implicit settings: RawSettings) = { - this( - new S3FileSystem(bucket, maybeRegion, maybeAccessKey, maybeSecretKey), - path - ) - } - - override def testAccess(): Unit = { - cli.testAccess(path) - } - - override protected def doGetInputStream(): InputStream = { - cli.getInputStream(path) - } - - override protected def doGetSeekableInputStream(): SeekableInputStream = { - cli.getSeekableInputStream(path) - } - - override def getLocalPath(): Path = { - throw new ByteStreamException("currently not supported for S3") - } - - override def metadata(): FileSystemMetadata = { - cli.metadata(path) - } - - override protected def doLs(): Iterator[FileSystemLocation] = { - cli - .listContents(path) - .map(npath => new S3Path(cli, npath)) - } - - override protected def doLsWithMetadata(): Iterator[(FileSystemLocation, FileSystemMetadata)] = { - cli.listContentsWithMetadata(path).map { case (npath, meta) => (new S3Path(cli, npath), meta) } - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/api/JdbcClient.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/api/JdbcClient.scala deleted file mode 100644 index d21ffa081..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/api/JdbcClient.scala +++ /dev/null @@ -1,242 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.jdbc.api - -import java.io.Closeable -import java.sql.{Connection, DriverManager, ResultSet} -import java.util.concurrent.{Executors, TimeUnit} -import com.typesafe.scalalogging.StrictLogging -import com.rawlabs.utils.core.RawSettings - -import scala.collection.mutable - -// TODO (msb): Add relevant attributes. -final case class ServerMetadata() - -// TODO (msb): Add relevant attributes. -final case class SchemaMetadata() - -// TODO (msb): Add histogram info of columns? map? max/min? -final case class TableMetadata(columns: List[TableColumn], cardinality: Option[Long]) - -final case class TableColumn(name: String, t: TableColumnType) - -sealed trait TableColumnType - -final case class JdbcColumnType(jdbcType: Int, jdbcNullability: Int) extends TableColumnType - -/** - * JDBC does not have an interval type in the specification, so we add ours here. - * This means the connector must know how to extract the type and cast it to our interval type expectation. - */ -final case class NativeIntervalType(nullable: Boolean) extends TableColumnType - -case object UnsupportedColumnType extends TableColumnType - -object JdbcClient { - private val CONNECT_TIMEOUT = "raw.utils.sources.rdbms.connect-timeout" - private val READ_TIMEOUT = "raw.utils.sources.rdbms.read-timeout" - private val NETWORK_TIMEOUT = "raw.utils.sources.rdbms.network-timeout" - private val LOGIN_TIMEOUT = "raw.utils.sources.rdbms.login-timeout" -} - -abstract class JdbcClient()(implicit settings: RawSettings) extends StrictLogging { - - import JdbcClient._ - - DriverManager.setLoginTimeout(getLoginTimeout(TimeUnit.SECONDS).toInt) - - def hostname: String - - def vendor: String - - // Database is optional because some databases do not have the concept of database (Teradata and Sqlite). - def maybeDatabase: Option[String] - - // Wrap vendor-specific calls and ensure only RelationalDatabaseException is thrown. - def wrapSQLException[T](f: => T): T - - def connectionString: String - - // For connection pool: - // private lazy val datasource = { - // val connectionFactory = new DriverManagerConnectionFactory(connectionString, username.orNull, password.orNull) - // val poolableConnectionFactory = new PoolableConnectionFactory(connectionFactory, null) - // val connectionPool = new GenericObjectPool(poolableConnectionFactory) - // poolableConnectionFactory.setPool(connectionPool) - // new PoolingDataSource(connectionPool) - // } - - def maybeUsername: Option[String] - - def maybePassword: Option[String] - - def getConnection: Connection = { - // For connection pool: - // wrapSQLException(datasource.getConnection()) - wrapSQLException { - val conn = DriverManager.getConnection(connectionString, maybeUsername.orNull, maybePassword.orNull) - conn.setNetworkTimeout(Executors.newSingleThreadExecutor(), getNetworkTimeout(TimeUnit.MILLISECONDS).toInt) - conn - } - } - - def testAccess(): Unit = { - val conn = getConnection - try { - // Nothing more to do. - } finally { - wrapSQLException(conn.close()) - } - } - - def testAccess(schema: String): Unit = { - listTables(schema).close() - } - - def testAccess(maybeSchema: Option[String], table: String): Unit = { - tableMetadata(maybeSchema, table) - } - - def listSchemas: Iterator[String] with Closeable = { - new JdbcRelationalDatabaseSchemas - } - - def listTables(schema: String): Iterator[String] with Closeable = { - new JdbcRelationalDatabaseTables(schema) - } - - def serverMetadata: ServerMetadata = { - ServerMetadata() - } - - def schemaMetadata(schema: String): SchemaMetadata = { - SchemaMetadata() - } - - def tableMetadata(maybeSchema: Option[String], table: String): TableMetadata = { - val conn = getConnection - try { - val res = getTableMetadata(conn, maybeDatabase, maybeSchema, table) - try { - getTableTypeFromTableMetadata(res) - } finally { - wrapSQLException(res.close()) - } - } finally { - wrapSQLException(conn.close()) - } - } - - protected def getTableMetadata( - conn: Connection, - maybeDatabase: Option[String], - maybeSchema: Option[String], - table: String - ): ResultSet = { - wrapSQLException { - val metaData = conn.getMetaData - metaData.getColumns( - maybeDatabase.orNull, - maybeSchema.orNull, - table, - null // Read all columns - ) - } - } - - // Infer schema from table. - // Skip silently fields we do not understand (except if can't understand any field, in which case, fire an error.) - protected def getTableTypeFromTableMetadata(res: ResultSet): TableMetadata = { - val columns = mutable.ListBuffer[TableColumn]() - while (wrapSQLException(res.next)) { - val columnName = wrapSQLException(res.getString("COLUMN_NAME")) - val columnType = wrapSQLException(res.getInt("DATA_TYPE")) - val nullability = wrapSQLException(res.getInt("NULLABLE")) - columns += TableColumn(columnName, JdbcColumnType(columnType, nullability)) - } - TableMetadata(columns.to, None) - } - - final protected def getConnectTimeout(timeUnit: TimeUnit): Long = settings.getDuration(CONNECT_TIMEOUT, timeUnit) - - final protected def getReadTimeout(timeUnit: TimeUnit): Long = settings.getDuration(READ_TIMEOUT, timeUnit) - - final protected def getNetworkTimeout(timeUnit: TimeUnit): Long = settings.getDuration(NETWORK_TIMEOUT, timeUnit) - - final protected def getLoginTimeout(timeUnit: TimeUnit): Long = settings.getDuration(LOGIN_TIMEOUT, timeUnit) - - class JdbcRelationalDatabaseSchemas extends Iterator[String] with Closeable { - - private val conn = getConnection - - private val metadata = wrapSQLException(conn.getMetaData) - - private val rs = wrapSQLException { - metadata.getSchemas( - null, // TODO (msb): Should restrict to catalog/database in server connection string - "%" - ) - } - - override def hasNext: Boolean = { - val hasNext = wrapSQLException(rs.next()) - if (!hasNext) { - wrapSQLException(conn.close()) - } - hasNext - } - - override def next(): String = { - wrapSQLException(rs.getString("TABLE_SCHEM")) - } - - override def close(): Unit = { - wrapSQLException(conn.close()) - } - } - - class JdbcRelationalDatabaseTables(schema: String) extends Iterator[String] with Closeable { - - private val conn = getConnection - - private val metadata = wrapSQLException(conn.getMetaData) - - private val rs = wrapSQLException { - metadata.getTables( - null, // TODO (msb): Should restrict to catalog/database in server connection string - schema, - "%", - null - ) - } - - override def hasNext: Boolean = { - val hasNext = wrapSQLException(rs.next()) - if (!hasNext) { - wrapSQLException(conn.close()) - } - hasNext - } - - override def next(): String = { - wrapSQLException(rs.getString("TABLE_NAME")) - } - - override def close(): Unit = { - wrapSQLException(conn.close()) - } - - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/api/JdbcLocationExceptions.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/api/JdbcLocationExceptions.scala deleted file mode 100644 index 6bc7249d8..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/api/JdbcLocationExceptions.scala +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.jdbc.api - -import com.rawlabs.utils.sources.api.LocationException - -class JdbcLocationException(message: String, cause: Throwable = null) extends LocationException(message, cause) - -class AuthenticationFailedException(cause: Throwable) extends JdbcLocationException("authentication failed", cause) - -class RDBMSUnknownHostException(hostname: String, cause: Throwable) - extends JdbcLocationException(s"unknown host: $hostname", cause) - -class RDBMSConnectTimeoutException(hostname: String, cause: Throwable) - extends JdbcLocationException(s"connect timed out: $hostname", cause) - -class RDBMSConnectErrorException(hostname: String, cause: Throwable) - extends JdbcLocationException(s"error connecting to database: $hostname", cause) diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/api/JdbcSchemaLocation.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/api/JdbcSchemaLocation.scala deleted file mode 100644 index 9fc506388..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/api/JdbcSchemaLocation.scala +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.jdbc.api - -import com.rawlabs.utils.sources.api.Location - -import java.io.Closeable - -abstract class JdbcSchemaLocation( - val jdbcClient: JdbcClient, - maybeSchema: Option[String] -) extends Location { - - def listTables(): Iterator[JdbcTableLocation] with Closeable - - final override def testAccess(): Unit = { - maybeSchema match { - case Some(s) => jdbcClient.testAccess(s) - case None => jdbcClient.testAccess() - } - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/api/JdbcServerLocation.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/api/JdbcServerLocation.scala deleted file mode 100644 index 2e7f61862..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/api/JdbcServerLocation.scala +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.jdbc.api - -import java.sql.Connection -import com.rawlabs.utils.sources.api.Location - -import java.io.Closeable - -abstract class JdbcServerLocation(val jdbcClient: JdbcClient) extends Location { - - def listSchemas(): Iterator[JdbcSchemaLocation] with Closeable - - final def getJdbcConnection(): Connection = { - jdbcClient.getConnection - } - - final override def testAccess(): Unit = { - jdbcClient.testAccess() - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/api/JdbcTableLocation.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/api/JdbcTableLocation.scala deleted file mode 100644 index 12a32a2bd..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/api/JdbcTableLocation.scala +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.jdbc.api - -import com.rawlabs.utils.sources.api.Location - -abstract class JdbcTableLocation( - val jdbcClient: JdbcClient, - maybeSchema: Option[String], - table: String -) extends Location { - - final override def testAccess(): Unit = { - jdbcClient.testAccess(maybeSchema, table) - } - - final def getType(): TableMetadata = { - jdbcClient.tableMetadata(maybeSchema, table) - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/mysql/MySqlClient.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/mysql/MySqlClient.scala deleted file mode 100644 index 1e50eb6a2..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/mysql/MySqlClient.scala +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.jdbc.mysql - -import com.mysql.cj.exceptions.CJCommunicationsException - -import java.util.concurrent.TimeUnit -import com.rawlabs.utils.sources.jdbc.api._ -import com.rawlabs.utils.core.RawSettings - -import java.net.{SocketTimeoutException, UnknownHostException} -import java.sql.SQLException -import scala.util.control.NonFatal - -class MySqlClient(val hostname: String, val port: Int, dbName: String, username: String, password: String)( - implicit settings: RawSettings -) extends JdbcClient { - - Class.forName("com.mysql.cj.jdbc.Driver") - - private val connectTimeout = getConnectTimeout(TimeUnit.MILLISECONDS) - private val readTimeout = getReadTimeout(TimeUnit.MILLISECONDS) - - override val vendor: String = "mysql" - - override val maybeDatabase: Option[String] = Some(dbName) - - override val maybeUsername: Option[String] = Some(username) - - override val maybePassword: Option[String] = Some(password) - - override val connectionString: String = { - s"jdbc:$vendor://$hostname:$port/$dbName?connectTimeout=$connectTimeout&socketTimeout=$readTimeout" - } - - override def wrapSQLException[T](f: => T): T = { - try { - f - } catch { - case ex: SQLException => ex.getCause match { - case _: UnknownHostException => throw new RDBMSUnknownHostException(hostname, ex) - case _: SocketTimeoutException => throw new RDBMSConnectTimeoutException(hostname, ex) - case ex: InterruptedException => throw ex - case _ => - // Some more codes here (DB2 Universal Messages manual), various databases have varying degrees of compliance - //https://www.ibm.com/support/knowledgecenter/en/SS6NHC/com.ibm.swg.im.dashdb.messages.doc/doc/rdb2stt.html - if (ex.getSQLState != null && ex.getSQLState.startsWith("28")) { - throw new AuthenticationFailedException(ex) - } else if (ex.getSQLState != null && ex.getSQLState.startsWith("08")) { - ex.getCause match { - case ce: CJCommunicationsException => ce.getCause match { - case cause: UnknownHostException => throw new RDBMSUnknownHostException(hostname, cause) - case cause: SocketTimeoutException => throw new RDBMSConnectTimeoutException(hostname, cause) - case _ => throw new RDBMSConnectErrorException(hostname, ex) - } - case _ => throw new RDBMSConnectErrorException(hostname, ex) - } - } else if (ex.getSQLState != null && ex.getSQLState.startsWith("58")) { - throw new JdbcLocationException(s"database system error: ${ex.getMessage}", ex) - } else if (ex.getSQLState != null && ex.getSQLState.startsWith("0A")) { - throw new JdbcLocationException(s"database feature not supported: ${ex.getMessage}", ex) - } else if (ex.getSQLState != null && ex.getSQLState.startsWith("2E")) { - throw new JdbcLocationException(s"database invalid connection name: ${ex.getMessage}", ex) - } else { - logger.warn(s"Unexpected SQL error (code: ${ex.getErrorCode}; state: ${ex.getSQLState}).", ex) - throw new JdbcLocationException(ex.getMessage, ex) - } - } - case ex: JdbcLocationException => throw ex - case ex: InterruptedException => throw ex - case NonFatal(t) => throw new JdbcLocationException(s"unexpected database error", t) - } - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/mysql/MySqlSchemaLocation.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/mysql/MySqlSchemaLocation.scala deleted file mode 100644 index 479150321..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/mysql/MySqlSchemaLocation.scala +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.jdbc.mysql - -import java.io.Closeable -import com.rawlabs.utils.sources.jdbc.api.{JdbcSchemaLocation, JdbcTableLocation} -import com.rawlabs.utils.core.RawSettings - -class MySqlSchemaLocation private (cli: MySqlClient) extends JdbcSchemaLocation(cli, None) { - - val host: String = cli.hostname - - val port: Int = cli.port - - val dbName: String = cli.maybeDatabase.get - - val username: String = cli.maybeUsername.get - - val password: String = cli.maybePassword.get - - def this(host: String, port: Int, dbName: String, username: String, password: String)( - implicit settings: RawSettings - ) = { - this(new MySqlClient(host, port, dbName, username, password)) - } - - override def listTables(): Iterator[JdbcTableLocation] with Closeable = { - new Iterator[JdbcTableLocation] with Closeable { - private val it = cli.listTables("") // Schema is ignored for MySQL. - - override def hasNext: Boolean = it.hasNext - - override def next(): JdbcTableLocation = { - new MySqlTableLocation(cli, it.next()) - } - - override def close(): Unit = it.close() - } - } -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/mysql/MySqlServerLocation.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/mysql/MySqlServerLocation.scala deleted file mode 100644 index b8cd1d10b..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/mysql/MySqlServerLocation.scala +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.jdbc.mysql - -import java.io.Closeable -import com.rawlabs.utils.sources.jdbc.api._ -import com.rawlabs.utils.core.RawSettings - -class MySqlServerLocation( - val host: String, - val port: Int, - val dbName: String, - val username: String, - val password: String -)( - implicit settings: RawSettings -) extends JdbcServerLocation(new MySqlClient(host, port, dbName, username, password)) { - - override def listSchemas(): Iterator[JdbcSchemaLocation] with Closeable = { - throw new JdbcLocationException("no schemas in mysql") - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/mysql/MySqlTableLocation.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/mysql/MySqlTableLocation.scala deleted file mode 100644 index 0ce100a46..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/mysql/MySqlTableLocation.scala +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.jdbc.mysql - -import com.rawlabs.utils.sources.jdbc.api.JdbcTableLocation -import com.rawlabs.utils.core.RawSettings - -class MySqlTableLocation(cli: MySqlClient, val table: String) extends JdbcTableLocation(cli, None, table) { - - val host: String = cli.hostname - - val port: Int = cli.port - - val dbName: String = cli.maybeDatabase.get - - val username: String = cli.maybeUsername.get - - val password: String = cli.maybePassword.get - - def this(host: String, port: Int, dbName: String, username: String, password: String, tableName: String)( - implicit settings: RawSettings - ) = { - this( - new MySqlClient(host, port, dbName, username, password), - tableName - ) - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/oracle/OracleClient.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/oracle/OracleClient.scala deleted file mode 100644 index 3c3814eb6..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/oracle/OracleClient.scala +++ /dev/null @@ -1,175 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.jdbc.oracle - -import oracle.net.ns.NetException -import com.rawlabs.utils.core.RawSettings -import com.rawlabs.utils.sources.jdbc.api._ - -import java.io.Closeable -import java.net.{ConnectException, SocketTimeoutException, UnknownHostException} -import java.sql.{Connection, DriverManager, SQLException} -import java.sql.Types._ -import scala.collection.mutable -import scala.util.matching.Regex -import java.util.Properties -import java.util.concurrent.{Executors, TimeUnit} -import scala.util.control.NonFatal - -object OracleClient { - private val TIMESTAMP_REGEX: Regex = """timestamp\(\d+\)""".r - private val INTERVAL1_REGEX: Regex = """interval year\(\d+\) to month""".r - private val INTERVAL2_REGEX: Regex = """interval day\(\d+\) to second\(\d+\)""".r -} - -class OracleClient(val hostname: String, val port: Int, dbName: String, username: String, password: String)( - implicit settings: RawSettings -) extends JdbcClient { - - import OracleClient._ - - Class.forName("oracle.jdbc.OracleDriver") - - override val vendor: String = "oracle" - - override val maybeDatabase: Option[String] = Some(dbName) - - override val maybeUsername: Option[String] = Some(username) - - override val maybePassword: Option[String] = Some(password) - - override val connectionString: String = { - s"jdbc:$vendor:thin:@$hostname:$port:$dbName" - } - - override def getConnection: Connection = { - // For connection pool: - // wrapSQLException(datasource.getConnection()) - wrapSQLException { - val props = new Properties() - maybeUsername.foreach(user => props.setProperty("user", user)) - maybePassword.foreach(passwd => props.setProperty("password", passwd)) - - // This property is defined in interface oracle.jdbc.OracleConnection.CONNECTION_PROPERTY_THIN_NET_CONNECT_TIMEOUT - // see https://docs.oracle.com/cd/E18283_01/appdev.112/e13995/oracle/jdbc/OracleConnection.html#CONNECTION_PROPERTY_THIN_READ_TIMEOUT - props.setProperty("oracle.net.CONNECT_TIMEOUT", getConnectTimeout(TimeUnit.MILLISECONDS).toString) - // oracle.jdbc.OracleConnection.CONNECTION_PROPERTY_THIN_READ_TIMEOUT - props.setProperty("oracle.jdbc.ReadTimeout", getReadTimeout(TimeUnit.MILLISECONDS).toString) - val conn = DriverManager.getConnection(connectionString, props) - conn.setNetworkTimeout(Executors.newSingleThreadExecutor(), getNetworkTimeout(TimeUnit.MILLISECONDS).toInt) - conn - } - } - - override def listTables(schema: String): Iterator[String] with Closeable = { - // Compensate for ORACLE behaviour that requires the schema to be in upper case if it is not quoted. - val sch = if (schema.startsWith("\"")) schema else schema.toUpperCase() - super.listTables(sch) - } - - override def tableMetadata(maybeSchema: Option[String], table: String): TableMetadata = { - val schema = maybeSchema.get - val conn = getConnection - try { - val stmt = wrapSQLException(conn.createStatement()) - try { - val rs = wrapSQLException( - stmt.executeQuery(s"""SELECT column_name, data_type, nullable, data_length, data_precision, data_scale - | FROM ALL_TAB_COLUMNS - | WHERE UPPER(table_name) = '${table.toUpperCase}' AND UPPER(owner) = '${schema.toUpperCase}' - | ORDER BY column_id""".stripMargin) - ) - val columns = mutable.ListBuffer[TableColumn]() - var nFields = 0 - while (wrapSQLException(rs.next())) { - nFields += 1 - val nullable = wrapSQLException(rs.getString("nullable")) == "Y" - val columnName = wrapSQLException(rs.getString("column_name")) - val typeName = wrapSQLException(rs.getString("data_type")) - val columnType = typeName.toLowerCase match { - case "char" | "nchar" | "varchar2" | "nvarchar2" => JdbcColumnType(VARCHAR, if (nullable) 1 else 0) - case "float" => - // float is a sub-type of number but precision defined in bits instead of digits - // so going for decimal, even though might be fair to go for double - JdbcColumnType(DECIMAL, if (nullable) 1 else 0) - case "number" => - val precision = wrapSQLException(rs.getInt("data_precision")) - val precisionNull = wrapSQLException(rs.wasNull()) - val scale = wrapSQLException(rs.getInt("data_scale")) - val t = - if (scale != 0) DECIMAL - else if (precisionNull) INTEGER - else if (precision < 5) SMALLINT - else if (precision < 10) INTEGER - else if (precision < 20) BIGINT - else DECIMAL - JdbcColumnType(t, if (nullable) 1 else 0) - case "date" => JdbcColumnType(DATE, if (nullable) 1 else 0) - case "long" => JdbcColumnType(INTEGER, if (nullable) 1 else 0) - case "binary_float" => JdbcColumnType(REAL, if (nullable) 1 else 0) - case "binary_double" => JdbcColumnType(DOUBLE, if (nullable) 1 else 0) - case TIMESTAMP_REGEX() => JdbcColumnType(TIMESTAMP, if (nullable) 1 else 0) - case INTERVAL1_REGEX() | INTERVAL2_REGEX() => NativeIntervalType(nullable) - case "raw" => JdbcColumnType(BLOB, if (nullable) 1 else 0) - case "blob" => JdbcColumnType(BLOB, if (nullable) 1 else 0) - case _ => UnsupportedColumnType - } - columns += TableColumn(columnName, columnType) - } - TableMetadata(columns.to, None) - } finally { - stmt.close() - } - } finally { - conn.close() - } - } - - override def wrapSQLException[T](f: => T): T = { - try { - f - } catch { - case ex: SQLException => ex.getCause match { - case inner: NetException => inner.getCause match { - case _: UnknownHostException => throw new RDBMSUnknownHostException(hostname, ex) - case _: SocketTimeoutException => throw new RDBMSConnectTimeoutException(hostname, ex) - case _: ConnectException => throw new RDBMSConnectErrorException(hostname, ex) - } - case ex: InterruptedException => throw ex - case _ => - // TODO (ctm): Find documentation of Oracle error codes and check if it is best to map ORA- here. - if (ex.getErrorCode == 1017) { - // ORA-01017: invalid username/password; logon denied - throw new AuthenticationFailedException(ex) - } else if (ex.getSQLState != null && ex.getSQLState.startsWith("28")) { - throw new AuthenticationFailedException(ex) - } else if (ex.getSQLState != null && ex.getSQLState.startsWith("08")) { - throw new RDBMSConnectErrorException(hostname, ex) - } else if (ex.getSQLState != null && ex.getSQLState.startsWith("58")) { - throw new JdbcLocationException(s"database system error: ${ex.getMessage}", ex) - } else if (ex.getSQLState != null && ex.getSQLState.startsWith("0A")) { - throw new JdbcLocationException(s"database feature not supported: ${ex.getMessage}", ex) - } else if (ex.getSQLState != null && ex.getSQLState.startsWith("2E")) { - throw new JdbcLocationException(s"database invalid connection name: ${ex.getMessage}", ex) - } else { - logger.warn(s"Unexpected SQL error (code: ${ex.getErrorCode}; state: ${ex.getSQLState}).", ex) - throw new JdbcLocationException(ex.getMessage, ex) - } - } - case ex: JdbcLocationException => throw ex - case ex: InterruptedException => throw ex - case NonFatal(t) => throw new JdbcLocationException(s"unexpected database error", t) - } - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/oracle/OracleSchemaLocation.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/oracle/OracleSchemaLocation.scala deleted file mode 100644 index 38be482dc..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/oracle/OracleSchemaLocation.scala +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.jdbc.oracle - -import java.io.Closeable -import com.rawlabs.utils.sources.jdbc.api.{JdbcSchemaLocation, JdbcTableLocation} -import com.rawlabs.utils.core.RawSettings - -class OracleSchemaLocation( - cli: OracleClient, - val schema: String -) extends JdbcSchemaLocation(cli, Some(schema)) { - - val host: String = cli.hostname - - val port: Int = cli.port - - val dbName: String = cli.maybeDatabase.get - - val username: String = cli.maybeUsername.get - - val password: String = cli.maybePassword.get - - def this(host: String, port: Int, dbName: String, username: String, password: String, schema: String)( - implicit settings: RawSettings - ) = { - this( - new OracleClient(host, port, dbName, username, password), - schema - ) - } - - override def listTables(): Iterator[JdbcTableLocation] with Closeable = { - new Iterator[JdbcTableLocation] with Closeable { - private val it = cli.listTables(schema) - - override def hasNext: Boolean = it.hasNext - - override def next(): JdbcTableLocation = { - new OracleTableLocation(cli, schema, it.next()) - } - - override def close(): Unit = it.close() - } - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/oracle/OracleServerLocation.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/oracle/OracleServerLocation.scala deleted file mode 100644 index b34b1256c..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/oracle/OracleServerLocation.scala +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.jdbc.oracle - -import java.io.Closeable -import com.rawlabs.utils.sources.jdbc.api._ -import com.rawlabs.utils.core.RawSettings - -class OracleServerLocation( - val host: String, - val port: Int, - val dbName: String, - val username: String, - val password: String -)( - implicit settings: RawSettings -) extends JdbcServerLocation(new OracleClient(host, port, dbName, username, password)) { - - override def listSchemas(): Iterator[JdbcSchemaLocation] with Closeable = { - new Iterator[JdbcSchemaLocation] with Closeable { - private val cli = jdbcClient.asInstanceOf[OracleClient] - private val it = cli.listSchemas - - override def hasNext: Boolean = it.hasNext - - override def next(): JdbcSchemaLocation = { - new OracleSchemaLocation(cli, it.next()) - } - - override def close(): Unit = it.close() - } - } -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/oracle/OracleTableLocation.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/oracle/OracleTableLocation.scala deleted file mode 100644 index 991fe9dc9..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/oracle/OracleTableLocation.scala +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.jdbc.oracle - -import com.rawlabs.utils.sources.jdbc.api.JdbcTableLocation -import com.rawlabs.utils.core.RawSettings - -class OracleTableLocation( - cli: OracleClient, - val schema: String, - val table: String -) extends JdbcTableLocation(cli, Some(schema.toUpperCase), table.toUpperCase) { - - val host: String = cli.hostname - - val port: Int = cli.port - - val dbName: String = cli.maybeDatabase.get - - val username: String = cli.maybeUsername.get - - val password: String = cli.maybePassword.get - - def this( - host: String, - port: Int, - dbName: String, - username: String, - password: String, - schema: String, - tableName: String - )(implicit settings: RawSettings) = { - this( - new OracleClient(host, port, dbName, username, password), - schema, - tableName - ) - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/pgsql/PostgresqlClient.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/pgsql/PostgresqlClient.scala deleted file mode 100644 index 035fa58c3..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/pgsql/PostgresqlClient.scala +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.jdbc.pgsql - -import org.postgresql.util.PSQLException - -import java.util.concurrent.TimeUnit -import com.rawlabs.utils.sources.jdbc.api._ -import com.rawlabs.utils.core.RawSettings - -import java.net.{SocketTimeoutException, UnknownHostException} -import scala.util.control.NonFatal - -class PostgresqlClient(val hostname: String, val port: Int, dbName: String, username: String, password: String)( - implicit settings: RawSettings -) extends JdbcClient { - - Class.forName("org.postgresql.Driver") - - // Postgres expects the connection timeout in seconds - private val connectTimeout = getConnectTimeout(TimeUnit.SECONDS) - private val readTimeout = getReadTimeout(TimeUnit.SECONDS) - - override val vendor: String = "postgresql" - - override val maybeDatabase: Option[String] = Some(dbName) - - override val maybeUsername: Option[String] = Some(username) - - override val maybePassword: Option[String] = Some(password) - - override val connectionString: String = { - s"jdbc:$vendor://$hostname:$port/$dbName?connectTimeout=$connectTimeout&socketTimeout=$readTimeout" - } - - override def wrapSQLException[T](f: => T): T = { - try { - f - } catch { - case ex: PSQLException => ex.getCause match { - case _: UnknownHostException => throw new RDBMSUnknownHostException(hostname, ex) - case _: SocketTimeoutException => throw new RDBMSConnectTimeoutException(hostname, ex) - case ex: InterruptedException => throw ex - case _ => - // Some more codes here (DB2 Universal Messages manual), various databases have varying degrees of compliance - //https://www.ibm.com/support/knowledgecenter/en/SS6NHC/com.ibm.swg.im.dashdb.messages.doc/doc/rdb2stt.html - if (ex.getSQLState != null && ex.getSQLState.startsWith("28")) { - throw new AuthenticationFailedException(ex) - } else if (ex.getSQLState != null && ex.getSQLState.startsWith("08")) { - throw new RDBMSConnectErrorException(hostname, ex) - } else if (ex.getSQLState != null && ex.getSQLState.startsWith("58")) { - throw new JdbcLocationException(s"database system error: ${ex.getMessage}", ex) - } else if (ex.getSQLState != null && ex.getSQLState.startsWith("0A")) { - throw new JdbcLocationException(s"database feature not supported: ${ex.getMessage}", ex) - } else if (ex.getSQLState != null && ex.getSQLState.startsWith("2E")) { - throw new JdbcLocationException(s"database invalid connection name: ${ex.getMessage}", ex) - } else { - logger.warn(s"Unexpected SQL error (code: ${ex.getErrorCode}; state: ${ex.getSQLState}).", ex) - throw new JdbcLocationException(ex.getMessage, ex) - } - } - case ex: JdbcLocationException => throw ex - case ex: InterruptedException => throw ex - case NonFatal(t) => throw new JdbcLocationException(s"unexpected database error", t) - } - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/pgsql/PostgresqlSchemaLocation.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/pgsql/PostgresqlSchemaLocation.scala deleted file mode 100644 index a56bb63fe..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/pgsql/PostgresqlSchemaLocation.scala +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.jdbc.pgsql - -import java.io.Closeable -import com.rawlabs.utils.sources.jdbc.api.{JdbcSchemaLocation, JdbcTableLocation} -import com.rawlabs.utils.core.RawSettings - -class PostgresqlSchemaLocation( - cli: PostgresqlClient, - val schema: String -) extends JdbcSchemaLocation(cli, Some(schema)) { - - val host: String = cli.hostname - - val port: Int = cli.port - - val dbName: String = cli.maybeDatabase.get - - val username: String = cli.maybeUsername.get - - val password: String = cli.maybePassword.get - - def this(host: String, port: Int, dbName: String, username: String, password: String, schema: String)( - implicit settings: RawSettings - ) = { - this( - new PostgresqlClient(host, port, dbName, username, password), - schema - ) - } - - override def listTables(): Iterator[JdbcTableLocation] with Closeable = { - new Iterator[JdbcTableLocation] with Closeable { - private val it = cli.listTables(schema) - - override def hasNext: Boolean = it.hasNext - - override def next(): JdbcTableLocation = { - new PostgresqlTableLocation(cli, schema, it.next()) - } - - override def close(): Unit = it.close() - } - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/pgsql/PostgresqlServerLocation.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/pgsql/PostgresqlServerLocation.scala deleted file mode 100644 index 4678c9a45..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/pgsql/PostgresqlServerLocation.scala +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.jdbc.pgsql - -import java.io.Closeable -import com.rawlabs.utils.sources.jdbc.api._ -import com.rawlabs.utils.core.RawSettings - -class PostgresqlServerLocation( - val host: String, - val port: Int, - val dbName: String, - val username: String, - val password: String -)( - implicit settings: RawSettings -) extends JdbcServerLocation(new PostgresqlClient(host, port, dbName, username, password)) { - - override def listSchemas(): Iterator[JdbcSchemaLocation] with Closeable = { - new Iterator[JdbcSchemaLocation] with Closeable { - private val cli = jdbcClient.asInstanceOf[PostgresqlClient] - private val it = cli.listSchemas - - override def hasNext: Boolean = it.hasNext - - override def next(): JdbcSchemaLocation = { - new PostgresqlSchemaLocation(cli, it.next()) - } - - override def close(): Unit = it.close() - } - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/pgsql/PostgresqlTableLocation.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/pgsql/PostgresqlTableLocation.scala deleted file mode 100644 index d0199190d..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/pgsql/PostgresqlTableLocation.scala +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.jdbc.pgsql - -import com.rawlabs.utils.sources.jdbc.api.JdbcTableLocation -import com.rawlabs.utils.core.RawSettings - -class PostgresqlTableLocation( - cli: PostgresqlClient, - val schema: String, - val table: String -) extends JdbcTableLocation(cli, Some(schema), table) { - - val host: String = cli.hostname - - val port: Int = cli.port - - val dbName: String = cli.maybeDatabase.get - - val username: String = cli.maybeUsername.get - - val password: String = cli.maybePassword.get - - def this( - host: String, - port: Int, - dbName: String, - username: String, - password: String, - schema: String, - tableName: String - )(implicit settings: RawSettings) = { - this( - new PostgresqlClient(host, port, dbName, username, password), - schema, - tableName - ) - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/snowflake/SnowflakeClient.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/snowflake/SnowflakeClient.scala deleted file mode 100644 index 1aad32b68..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/snowflake/SnowflakeClient.scala +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.jdbc.snowflake - -import net.snowflake.client.jdbc.SnowflakeSQLException -import net.snowflake.client.jdbc.internal.snowflake.common.core.SqlState -import com.rawlabs.utils.sources.jdbc.api._ -import com.rawlabs.utils.core.RawSettings - -import java.sql.{Connection, DriverManager} -import java.util.concurrent.{Executors, TimeUnit} -import java.util.{Properties, TimeZone} -import scala.util.control.NonFatal - -class SnowflakeClient( - dbName: String, - username: String, - password: String, - val accountIdentifier: String, - val parameters: Map[String, String] -)( - implicit settings: RawSettings -) extends JdbcClient { - - Class.forName("net.snowflake.client.jdbc.SnowflakeDriver") - - override val hostname: String = s"$accountIdentifier.snowflakecomputing.com" - - override val vendor: String = "snowflake" - - override val maybeDatabase: Option[String] = Some(dbName) - - override val maybeUsername: Option[String] = Some(username) - - override val maybePassword: Option[String] = Some(password) - - override val connectionString: String = s"jdbc:snowflake://$hostname/" - - override def getConnection: Connection = { - wrapSQLException { - val params = parameters ++ Seq("db" -> dbName) - val props = new Properties() - maybeUsername.foreach(user => props.setProperty("user", user)) - maybePassword.foreach(passwd => props.setProperty("password", passwd)) - props.setProperty("JDBC_QUERY_RESULT_FORMAT", "JSON") - - for ((key, value) <- params) props.setProperty(key, value) - - // (CTM) I am having issues with sql.Time with timezones. I am seeing a shift if the timezone was not set to UTC. - TimeZone.setDefault(TimeZone.getTimeZone("UTC")) - logger.info(s"current timezone ${TimeZone.getDefault}") - val conn = DriverManager.getConnection(connectionString, props) - conn.setNetworkTimeout(Executors.newSingleThreadExecutor(), getNetworkTimeout(TimeUnit.MILLISECONDS).toInt) - conn - } - } - - override def tableMetadata(maybeSchema: Option[String], table: String): TableMetadata = { - val conn = getConnection - try { - val res = getTableMetadata(conn, None, maybeSchema, table) - try { - getTableTypeFromTableMetadata(res) - } finally { - wrapSQLException(res.close()) - } - } finally { - wrapSQLException(conn.close()) - } - } - - override def wrapSQLException[T](f: => T): T = { - try { - f - } catch { - case ex: JdbcLocationException => throw ex - case ex: SnowflakeSQLException => - val e = ex - e.getSQLState match { - case SqlState.INVALID_PASSWORD | SqlState.INVALID_AUTHORIZATION_SPECIFICATION => - throw new AuthenticationFailedException(ex) - case SqlState.IO_ERROR => throw new JdbcLocationException( - s"IO error connecting to $accountIdentifier: ${ex.getMessage}", - ex - ) - case SqlState.SQLCLIENT_UNABLE_TO_ESTABLISH_SQLCONNECTION => throw new JdbcLocationException( - s"unable to establish connection to $accountIdentifier: ${ex.getMessage}", - ex - ) - case SqlState.CONNECTION_EXCEPTION | SqlState.CONNECTION_FAILURE => - throw new RDBMSConnectErrorException(hostname, ex) - case SqlState.SYSTEM_ERROR => throw new JdbcLocationException(s"database system error: ${ex.getMessage}", ex) - case SqlState.FEATURE_NOT_SUPPORTED => - throw new JdbcLocationException(s"database feature not supported: ${ex.getMessage}", ex) - case _ => - logger.warn(s"Unexpected SQL error (code: ${ex.getErrorCode}; state: ${ex.getSQLState}).", ex) - throw new JdbcLocationException(ex.getMessage, ex) - } - case ex: InterruptedException => throw ex - case NonFatal(t) => throw new JdbcLocationException(s"unexpected database error", t) - } - } -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/snowflake/SnowflakeSchemaLocation.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/snowflake/SnowflakeSchemaLocation.scala deleted file mode 100644 index 987cadef0..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/snowflake/SnowflakeSchemaLocation.scala +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.jdbc.snowflake - -import java.io.Closeable -import com.rawlabs.utils.sources.jdbc.api.{JdbcSchemaLocation, JdbcTableLocation} -import com.rawlabs.utils.core.RawSettings - -class SnowflakeSchemaLocation( - cli: SnowflakeClient, - val schema: String -) extends JdbcSchemaLocation(cli, Some(schema)) { - - val dbName: String = cli.maybeDatabase.get - - val username: String = cli.maybeUsername.get - - val password: String = cli.maybePassword.get - - val accountIdentifier: String = cli.accountIdentifier - - val parameters: Map[String, String] = cli.parameters - - def this( - dbName: String, - username: String, - password: String, - accountIdentifier: String, - parameters: Map[String, String], - schema: String - )(implicit settings: RawSettings) = { - this( - new SnowflakeClient( - dbName, - username, - password, - accountIdentifier, - parameters - ), - schema - ) - } - - override def listTables(): Iterator[JdbcTableLocation] with Closeable = { - new Iterator[JdbcTableLocation] with Closeable { - private val it = cli.listTables(schema) - - override def hasNext: Boolean = it.hasNext - - override def next(): JdbcTableLocation = { - new SnowflakeTableLocation(cli, schema, it.next()) - } - - override def close(): Unit = it.close() - } - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/snowflake/SnowflakeServerLocation.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/snowflake/SnowflakeServerLocation.scala deleted file mode 100644 index 78b9a8d7f..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/snowflake/SnowflakeServerLocation.scala +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.jdbc.snowflake - -import java.io.Closeable -import com.rawlabs.utils.sources.jdbc.api._ -import com.rawlabs.utils.core.RawSettings -import scala.collection.JavaConverters._ - -class SnowflakeServerLocation( - val dbName: String, - val username: String, - val password: String, - val accountIdentifier: String, - val parameters: Map[String, String] -)(implicit settings: RawSettings) - extends JdbcServerLocation( - new SnowflakeClient( - dbName, - username, - password, - accountIdentifier, - parameters - ) - ) { - - // Constructor for Java. - def this( - dbName: String, - username: String, - password: String, - accountIdentifier: String, - parameters: java.util.Map[String, String], - settings: RawSettings - ) = this(dbName, username, password, accountIdentifier, parameters.asScala.toMap)(settings) - - override def listSchemas(): Iterator[JdbcSchemaLocation] with Closeable = { - new Iterator[JdbcSchemaLocation] with Closeable { - private val cli = jdbcClient.asInstanceOf[SnowflakeClient] - private val it = cli.listSchemas - - override def hasNext: Boolean = it.hasNext - - override def next(): JdbcSchemaLocation = { - new SnowflakeSchemaLocation(cli, it.next()) - } - - override def close(): Unit = it.close() - } - } -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/snowflake/SnowflakeTableLocation.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/snowflake/SnowflakeTableLocation.scala deleted file mode 100644 index 860c961ba..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/snowflake/SnowflakeTableLocation.scala +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.jdbc.snowflake - -import com.rawlabs.utils.sources.jdbc.api.JdbcTableLocation -import com.rawlabs.utils.core.RawSettings - -import scala.collection.JavaConverters._ - -class SnowflakeTableLocation( - cli: SnowflakeClient, - val schema: String, - val table: String -) extends JdbcTableLocation(cli, Some(schema.toUpperCase), table.toUpperCase) { - - val dbName: String = cli.maybeDatabase.get - - val username: String = cli.maybeUsername.get - - val password: String = cli.maybePassword.get - - val accountIdentifier: String = cli.accountIdentifier - - val parameters: Map[String, String] = cli.parameters - - def this( - dbName: String, - username: String, - password: String, - accountIdentifier: String, - parameters: Map[String, String], - schema: String, - tableName: String - )(implicit settings: RawSettings) = { - this( - new SnowflakeClient( - dbName, - username, - password, - accountIdentifier, - parameters - ), - schema, - tableName - ) - } - - def this( - dbName: String, - username: String, - password: String, - accountIdentifier: String, - parameters: java.util.Map[String, String], - schema: String, - tableName: String - )(implicit settings: RawSettings) = { - this( - new SnowflakeClient( - dbName, - username, - password, - accountIdentifier, - parameters.asScala.toMap - ), - schema, - tableName - ) - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/sqlite/SqliteClient.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/sqlite/SqliteClient.scala deleted file mode 100644 index c3ff6e3d0..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/sqlite/SqliteClient.scala +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.jdbc.sqlite - -import com.rawlabs.utils.sources.api.LocationException -import com.rawlabs.utils.sources.jdbc.api._ -import com.rawlabs.utils.core.RawSettings - -import java.nio.file.{InvalidPathException, Path, Paths} -import java.sql.SQLException -import scala.util.control.NonFatal - -class SqliteClient(val path: String)(implicit settings: RawSettings) extends JdbcClient { - - private val localPath = - try { - Paths.get(path) - } catch { - case _: InvalidPathException => throw new LocationException("invalid path") - } - - // The JDBC driver requires a local path. - // Opted to validate here instead of having constructor take a Path and force all the callers to valid the path, - // so that we have more coherent error handling. - // This way, if there is any error with the "connection string" (the path in this case), we throw early an exception. - val sqlitePath: Path = - try { - localPath.toAbsolutePath - } catch { - case ex: InvalidPathException => throw new JdbcLocationException(s"invalid local path: $localPath", ex) - } - - Class.forName("org.sqlite.JDBC") - - override val vendor: String = "sqlite" - - override val maybeUsername: Option[String] = None - - override val maybePassword: Option[String] = None - - override val maybeDatabase: Option[String] = None - - override val connectionString: String = s"jdbc:$vendor:$sqlitePath" - - override val hostname: String = localPath.toAbsolutePath.toString - - override def wrapSQLException[T](f: => T): T = { - try { - f - } catch { - // TODO (ctm): check Sqlite exceptions - case ex: SQLException => ex.getCause match { - case ex: InterruptedException => throw ex - case _ => - // Some more codes here (DB2 Universal Messages manual), various databases have varying degrees of compliance - //https://www.ibm.com/support/knowledgecenter/en/SS6NHC/com.ibm.swg.im.dashdb.messages.doc/doc/rdb2stt.html - if (ex.getSQLState != null && ex.getSQLState.startsWith("28")) { - throw new AuthenticationFailedException(ex) - } else if (ex.getSQLState != null && ex.getSQLState.startsWith("58")) { - throw new JdbcLocationException(s"database system error: ${ex.getMessage}", ex) - } else if (ex.getSQLState != null && ex.getSQLState.startsWith("0A")) { - throw new JdbcLocationException(s"database feature not supported: ${ex.getMessage}", ex) - } else if (ex.getSQLState != null && ex.getSQLState.startsWith("2E")) { - throw new JdbcLocationException(s"database invalid connection name: ${ex.getMessage}", ex) - } else { - logger.warn(s"Unexpected SQL error (code: ${ex.getErrorCode}; state: ${ex.getSQLState}).", ex) - throw new JdbcLocationException(ex.getMessage, ex) - } - } - case ex: JdbcLocationException => throw ex - case ex: InterruptedException => throw ex - case NonFatal(t) => throw new JdbcLocationException(s"unexpected database error", t) - } - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/sqlite/SqliteSchemaLocation.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/sqlite/SqliteSchemaLocation.scala deleted file mode 100644 index 9111f5d60..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/sqlite/SqliteSchemaLocation.scala +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.jdbc.sqlite - -import java.io.Closeable -import com.rawlabs.utils.sources.jdbc.api.{JdbcSchemaLocation, JdbcTableLocation} -import com.rawlabs.utils.core.RawSettings - -class SqliteSchemaLocation( - cli: SqliteClient -) extends JdbcSchemaLocation(cli, None) { - - val path: String = cli.path - - def this(path: String)(implicit settings: RawSettings) = { - this(new SqliteClient(path)) - } - - override def listTables(): Iterator[JdbcTableLocation] with Closeable = { - new Iterator[JdbcTableLocation] with Closeable { - private val it = cli.listTables("") - - override def hasNext: Boolean = it.hasNext - - override def next(): JdbcTableLocation = { - new SqliteTableLocation(cli, it.next()) - } - - override def close(): Unit = it.close() - } - } -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/sqlite/SqliteServerLocation.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/sqlite/SqliteServerLocation.scala deleted file mode 100644 index 4b3ddab13..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/sqlite/SqliteServerLocation.scala +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.jdbc.sqlite - -import java.io.Closeable -import com.rawlabs.utils.sources.jdbc.api._ -import com.rawlabs.utils.core.RawSettings - -class SqliteServerLocation(val path: String)( - implicit settings: RawSettings -) extends JdbcServerLocation(new SqliteClient(path)) { - - override def listSchemas(): Iterator[JdbcSchemaLocation] with Closeable = { - throw new JdbcLocationException("no schemas in sqlite") - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/sqlite/SqliteTableLocation.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/sqlite/SqliteTableLocation.scala deleted file mode 100644 index a65937a4e..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/sqlite/SqliteTableLocation.scala +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.jdbc.sqlite - -import com.rawlabs.utils.sources.jdbc.api._ -import com.rawlabs.utils.core.RawSettings - -class SqliteTableLocation( - cli: SqliteClient, - val table: String -) extends JdbcTableLocation(cli, None, table) { - - val path: String = cli.path - - def this(path: String, table: String)(implicit settings: RawSettings) = { - this(new SqliteClient(path), table) - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/sqlserver/SqlServerClient.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/sqlserver/SqlServerClient.scala deleted file mode 100644 index b07161055..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/sqlserver/SqlServerClient.scala +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.jdbc.sqlserver - -import com.microsoft.sqlserver.jdbc.SQLServerException - -import java.util.concurrent.TimeUnit -import com.rawlabs.utils.sources.jdbc.api._ -import com.rawlabs.utils.core.RawSettings - -import java.net.{SocketTimeoutException, UnknownHostException} -import scala.util.control.NonFatal - -class SqlServerClient(val hostname: String, val port: Int, dbName: String, username: String, password: String)( - implicit settings: RawSettings -) extends JdbcClient { - - Class.forName("com.microsoft.sqlserver.jdbc.SQLServerDriver") - - // Note that different properties require the time to be specified in different timescales. - // The settings used below are described in: - // https://docs.microsoft.com/en-us/sql/connect/jdbc/setting-the-connection-properties?view=sql-server-2017 - private val connectTimeout = getConnectTimeout(TimeUnit.SECONDS) - private val readTimeout = getReadTimeout(TimeUnit.MILLISECONDS) - - override val vendor: String = "sqlserver" - - override val maybeDatabase: Option[String] = Some(dbName) - - override val maybeUsername: Option[String] = Some(username) - - override val maybePassword: Option[String] = Some(password) - - override val connectionString: String = { - // Explicit sendTimeAsDatetime=false to support time JDBC parameters (https://github.com/microsoft/mssql-jdbc/issues/559) - s"jdbc:$vendor://$hostname:$port;databaseName=$dbName;loginTimeout=$connectTimeout;socketTimeout=$readTimeout;sendTimeAsDatetime=false" - } - - override def wrapSQLException[T](f: => T): T = { - try { - f - } catch { - // TODO: check SqlServer puts exceptions, codes etc. - case ex: SQLServerException => ex.getCause match { - case _: UnknownHostException => throw new RDBMSUnknownHostException(hostname, ex) - case _: SocketTimeoutException => throw new RDBMSConnectTimeoutException(hostname, ex) - case ex: InterruptedException => throw ex - case _ => - // Some more codes here (DB2 Universal Messages manual), various databases have varying degrees of compliance - //https://www.ibm.com/support/knowledgecenter/en/SS6NHC/com.ibm.swg.im.dashdb.messages.doc/doc/rdb2stt.html - if (ex.getSQLState != null && ex.getSQLState.startsWith("28")) { - throw new AuthenticationFailedException(ex) - } else if (ex.getSQLState != null && ex.getSQLState.startsWith("08")) { - throw new RDBMSConnectErrorException(hostname, ex) - } else if (ex.getSQLState != null && ex.getSQLState.startsWith("58")) { - throw new JdbcLocationException(s"database system error: ${ex.getMessage}", ex) - } else if (ex.getSQLState != null && ex.getSQLState.startsWith("0A")) { - throw new JdbcLocationException(s"database feature not supported: ${ex.getMessage}", ex) - } else if (ex.getSQLState != null && ex.getSQLState.startsWith("2E")) { - throw new JdbcLocationException(s"database invalid connection name: ${ex.getMessage}", ex) - } else { - logger.warn(s"Unexpected SQL error (code: ${ex.getErrorCode}; state: ${ex.getSQLState}).", ex) - throw new JdbcLocationException(ex.getMessage, ex) - } - } - case ex: JdbcLocationException => throw ex - case ex: InterruptedException => throw ex - case NonFatal(t) => throw new JdbcLocationException(s"unexpected database error", t) - } - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/sqlserver/SqlServerSchemaLocation.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/sqlserver/SqlServerSchemaLocation.scala deleted file mode 100644 index 9bfff4924..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/sqlserver/SqlServerSchemaLocation.scala +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.jdbc.sqlserver - -import java.io.Closeable -import com.rawlabs.utils.sources.jdbc.api.{JdbcSchemaLocation, JdbcTableLocation} -import com.rawlabs.utils.core.RawSettings - -class SqlServerSchemaLocation( - cli: SqlServerClient, - val schema: String -) extends JdbcSchemaLocation(cli, Some(schema)) { - - val host: String = cli.hostname - - val port: Int = cli.port - - val dbName: String = cli.maybeDatabase.get - - val username: String = cli.maybeUsername.get - - val password: String = cli.maybePassword.get - - def this(host: String, port: Int, dbName: String, username: String, password: String, schema: String)( - implicit settings: RawSettings - ) = { - this( - new SqlServerClient(host, port, dbName, username, password), - schema - ) - } - - override def listTables(): Iterator[JdbcTableLocation] with Closeable = { - new Iterator[JdbcTableLocation] with Closeable { - private val it = cli.listTables(schema) - - override def hasNext: Boolean = it.hasNext - - override def next(): JdbcTableLocation = { - new SqlServerTableLocation(cli, schema, it.next()) - } - - override def close(): Unit = it.close() - } - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/sqlserver/SqlServerServerLocation.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/sqlserver/SqlServerServerLocation.scala deleted file mode 100644 index 180ae1e4d..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/sqlserver/SqlServerServerLocation.scala +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.jdbc.sqlserver - -import java.io.Closeable -import com.rawlabs.utils.sources.jdbc.api._ -import com.rawlabs.utils.core.RawSettings - -class SqlServerServerLocation( - val host: String, - val port: Int, - val dbName: String, - val username: String, - val password: String -)( - implicit settings: RawSettings -) extends JdbcServerLocation(new SqlServerClient(host, port, dbName, username, password)) { - - override def listSchemas(): Iterator[JdbcSchemaLocation] with Closeable = { - new Iterator[JdbcSchemaLocation] with Closeable { - private val cli = jdbcClient.asInstanceOf[SqlServerClient] - private val it = cli.listSchemas - - override def hasNext: Boolean = it.hasNext - - override def next(): JdbcSchemaLocation = { - new SqlServerSchemaLocation(cli, it.next()) - } - - override def close(): Unit = it.close() - } - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/sqlserver/SqlServerTableLocation.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/sqlserver/SqlServerTableLocation.scala deleted file mode 100644 index 36f650766..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/sqlserver/SqlServerTableLocation.scala +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.jdbc.sqlserver - -import com.rawlabs.utils.sources.jdbc.api.JdbcTableLocation -import com.rawlabs.utils.core.RawSettings - -class SqlServerTableLocation( - cli: SqlServerClient, - val schema: String, - val table: String -) extends JdbcTableLocation(cli, Some(schema), table) { - - val host: String = cli.hostname - - val port: Int = cli.port - - val dbName: String = cli.maybeDatabase.get - - val username: String = cli.maybeUsername.get - - val password: String = cli.maybePassword.get - - def this( - host: String, - port: Int, - dbName: String, - username: String, - password: String, - schema: String, - tableName: String - )(implicit settings: RawSettings) = { - this( - new SqlServerClient(host, port, dbName, username, password), - schema, - tableName - ) - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/teradata/TeradataClient.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/teradata/TeradataClient.scala deleted file mode 100644 index 12ece0718..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/teradata/TeradataClient.scala +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.jdbc.teradata - -import com.rawlabs.utils.core.RawSettings -import com.rawlabs.utils.sources.jdbc.api._ - -import java.net.{NoRouteToHostException, SocketTimeoutException, UnknownHostException} -import java.sql.{Connection, DriverManager, ResultSetMetaData, SQLException} -import scala.collection.mutable -import scala.util.control.NonFatal - -class TeradataClient( - val hostname: String, - val port: Int, - dbName: String, - username: String, - password: String, - val parameters: Map[String, String] -)( - implicit settings: RawSettings -) extends JdbcClient { - - Class.forName("com.teradata.jdbc.TeraDriver") - - override val vendor: String = "teradata" - - override val maybeDatabase: Option[String] = Some(dbName) - - override val maybeUsername: Option[String] = Some(username) - - override val maybePassword: Option[String] = Some(password) - - override val connectionString: String = { - val params = parameters + ("DBS_PORT" -> port.toString) - s"jdbc:$vendor://$hostname/${params.map(p => s"${p._1}=${p._2}").mkString(",")}" - } - - override def getConnection: Connection = { - wrapSQLException { - // Teradata jdbc connections does not have the setNetworkTimeout - DriverManager.getConnection(connectionString, maybeUsername.orNull, maybePassword.orNull) - } - } - - override def tableMetadata(maybeSchema: Option[String], table: String): TableMetadata = { - val schema = maybeSchema.get - val conn = getConnection - try { - val query = s"""select top 1 * from "$schema"."$table" ;""" - val stmt = wrapSQLException(conn.prepareStatement(query)) - val meta = wrapSQLException(stmt.getMetaData) - wrapSQLException(stmt.cancel()) - getTableTypeFromResultSetMetadata(meta) - } finally { - conn.close() - } - } - - private def getTableTypeFromResultSetMetadata(res: ResultSetMetaData): TableMetadata = { - val columns = mutable.ListBuffer[TableColumn]() - (1 to wrapSQLException(res.getColumnCount)).foreach { n => - val columnName = wrapSQLException(res.getColumnName(n)) - val columnType = wrapSQLException(res.getColumnType(n)) - val nullability = wrapSQLException(res.isNullable(n)) - columns += TableColumn(columnName, JdbcColumnType(columnType, nullability)) - } - TableMetadata(columns.to, None) - } - - override def wrapSQLException[T](f: => T): T = { - try { - f - } catch { - // TODO (ctm): check Teradata exceptions - case ex: SQLException => ex.getCause match { - case _: UnknownHostException | _: NoRouteToHostException => - // (ctm) In the Python CLI tests, the NoRouteToHostException also happens in the test with bad port - // RuntimeErrorsSourceTeradataTestCase.test_register_bad_port_timeout the host is correct but the port is wrong. - throw new RDBMSUnknownHostException(hostname, ex) - case _: SocketTimeoutException => throw new RDBMSConnectTimeoutException(hostname, ex) - case ex: InterruptedException => throw ex - case _ => - // Some more codes here (DB2 Universal Messages manual), various databases have varying degrees of compliance - //https://www.ibm.com/support/knowledgecenter/en/SS6NHC/com.ibm.swg.im.dashdb.messages.doc/doc/rdb2stt.html - if (ex.getSQLState != null && ex.getSQLState.startsWith("28")) { - throw new AuthenticationFailedException(ex) - } else if (ex.getSQLState != null && ex.getSQLState.startsWith("08")) { - throw new RDBMSConnectErrorException(hostname, ex) - } else if (ex.getSQLState != null && ex.getSQLState.startsWith("58")) { - throw new JdbcLocationException(s"database system error: ${ex.getMessage}", ex) - } else if (ex.getSQLState != null && ex.getSQLState.startsWith("0A")) { - throw new JdbcLocationException(s"database feature not supported: ${ex.getMessage}", ex) - } else if (ex.getSQLState != null && ex.getSQLState.startsWith("2E")) { - throw new JdbcLocationException(s"database invalid connection name: ${ex.getMessage}", ex) - } else { - logger.warn(s"Unexpected SQL error (code: ${ex.getErrorCode}; state: ${ex.getSQLState}).", ex) - throw new JdbcLocationException(ex.getMessage, ex) - } - } - case ex: JdbcLocationException => throw ex - case ex: InterruptedException => throw ex - case NonFatal(t) => throw new JdbcLocationException(s"unexpected database error", t) - } - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/teradata/TeradataSchemaLocation.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/teradata/TeradataSchemaLocation.scala deleted file mode 100644 index 7c077e8bc..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/teradata/TeradataSchemaLocation.scala +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.jdbc.teradata - -import java.io.Closeable -import com.rawlabs.utils.sources.jdbc.api.{JdbcSchemaLocation, JdbcTableLocation} -import com.rawlabs.utils.core.RawSettings - -class TeradataSchemaLocation( - cli: TeradataClient, - val schema: String -) extends JdbcSchemaLocation(cli, Some(schema)) { - - val host: String = cli.hostname - - val port: Int = cli.port - - val dbName: String = cli.maybeDatabase.get - - val username: String = cli.maybeUsername.get - - val password: String = cli.maybePassword.get - - val parameters: Map[String, String] = cli.parameters - - def this( - host: String, - port: Int, - dbName: String, - username: String, - password: String, - schema: String, - parameters: Map[String, String] - )(implicit settings: RawSettings) = { - this( - new TeradataClient(host, port, dbName, username, password, parameters), - schema - ) - } - - override def listTables(): Iterator[JdbcTableLocation] with Closeable = { - new Iterator[JdbcTableLocation] with Closeable { - private val it = cli.listTables(schema) - - override def hasNext: Boolean = it.hasNext - - override def next(): JdbcTableLocation = { - new TeradataTableLocation(cli, schema, it.next()) - } - - override def close(): Unit = it.close() - } - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/teradata/TeradataServerLocation.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/teradata/TeradataServerLocation.scala deleted file mode 100644 index 3c382b7f0..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/teradata/TeradataServerLocation.scala +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.jdbc.teradata - -import com.rawlabs.utils.sources.jdbc.api.{JdbcSchemaLocation, JdbcServerLocation} -import com.rawlabs.utils.core.RawSettings - -import java.io.Closeable - -class TeradataServerLocation( - val host: String, - val port: Int, - val dbName: String, - val username: String, - val password: String, - val parameters: Map[String, String] -)( - implicit settings: RawSettings -) extends JdbcServerLocation(new TeradataClient(host, port, dbName, username, password, parameters)) { - - override def listSchemas(): Iterator[JdbcSchemaLocation] with Closeable = { - new Iterator[JdbcSchemaLocation] with Closeable { - private val cli = jdbcClient.asInstanceOf[TeradataClient] - private val it = cli.listSchemas - - override def hasNext: Boolean = it.hasNext - - override def next(): JdbcSchemaLocation = { - new TeradataSchemaLocation(cli, it.next()) - } - - override def close(): Unit = it.close() - } - } - -} diff --git a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/teradata/TeradataTableLocation.scala b/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/teradata/TeradataTableLocation.scala deleted file mode 100644 index 3786f9975..000000000 --- a/utils-sources/src/main/scala/com/rawlabs/utils/sources/jdbc/teradata/TeradataTableLocation.scala +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.jdbc.teradata - -import com.rawlabs.utils.sources.jdbc.api.JdbcTableLocation -import com.rawlabs.utils.core.RawSettings - -class TeradataTableLocation( - cli: TeradataClient, - val schema: String, - val table: String -) extends JdbcTableLocation(cli, Some(schema), table) { - - val host: String = cli.hostname - - val port: Int = cli.port - - val dbName: String = cli.maybeDatabase.get - - val username: String = cli.maybeUsername.get - - val password: String = cli.maybePassword.get - - val parameters: Map[String, String] = cli.parameters - - def this( - host: String, - port: Int, - dbName: String, - username: String, - password: String, - schema: String, - tableName: String, - parameters: Map[String, String] - )(implicit settings: RawSettings) = { - this( - new TeradataClient(host, port, dbName, username, password, parameters), - schema, - tableName - ) - } - -} diff --git a/utils-sources/src/test/scala/com/rawlabs/utils/sources/bytestream/api/TestGenericSkippableInputStream.scala b/utils-sources/src/test/scala/com/rawlabs/utils/sources/bytestream/api/TestGenericSkippableInputStream.scala deleted file mode 100644 index c2ba042bd..000000000 --- a/utils-sources/src/test/scala/com/rawlabs/utils/sources/bytestream/api/TestGenericSkippableInputStream.scala +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.bytestream.api - -import java.io.{ByteArrayInputStream, IOException} -import org.scalatest.funsuite.AnyFunSuite - -class TestGenericSkippableInputStream extends AnyFunSuite { - - def inputStreamProvider() = { - val array = (0 until 100).map(_.toByte).toArray - new ByteArrayInputStream(array) - } - - test("read and seek") { - val is = new GenericSkippableInputStream(inputStreamProvider) - val b = Array.fill(5)(0.toByte) - - is.read(b) - for (n <- 0 until 5) { - assert(b(n) == n.toByte) - } - - is.seek(0) - assert(is.read() == 0) - } - - test("close and seek") { - val is = new GenericSkippableInputStream(inputStreamProvider) - val b = Array.fill(5)(0.toByte) - - is.read(b) - for (n <- 0 until 5) { - assert(b(n) == n.toByte) - } - - is.close() - intercept[IOException] { - is.seek(0) - } - } -} diff --git a/utils-sources/src/test/scala/com/rawlabs/utils/sources/bytestream/http/HttpLocationsTestContext.scala b/utils-sources/src/test/scala/com/rawlabs/utils/sources/bytestream/http/HttpLocationsTestContext.scala deleted file mode 100644 index b8dd6e1e5..000000000 --- a/utils-sources/src/test/scala/com/rawlabs/utils/sources/bytestream/http/HttpLocationsTestContext.scala +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.bytestream.http - -trait HttpLocationsTestContext { - - lazy val authorsHjsonHttp = s"http://test-data.raw-labs.com/public/authors.hjson" - lazy val authorsSmallHjsonHttp = s"http://test-data.raw-labs.com/public/authorsSmall.hjson" - lazy val publicationsHjsonHttp = s"http://test-data.raw-labs.com/public/publications.hjson" - - lazy val authorsJsonHttp = s"http://test-data.raw-labs.com/public/authors.json" - -} diff --git a/utils-sources/src/test/scala/com/rawlabs/utils/sources/bytestream/http/TestHttpServer.scala b/utils-sources/src/test/scala/com/rawlabs/utils/sources/bytestream/http/TestHttpServer.scala deleted file mode 100644 index b1f35fd6d..000000000 --- a/utils-sources/src/test/scala/com/rawlabs/utils/sources/bytestream/http/TestHttpServer.scala +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.bytestream.http - -import com.sun.net.httpserver.{Authenticator, BasicAuthenticator, HttpExchange, HttpHandler, HttpServer} -import com.typesafe.scalalogging.StrictLogging - -import java.io.IOException -import java.net.InetSocketAddress - -class DefaultHandler(bodyToReturn: String) extends HttpHandler with StrictLogging { - @throws[IOException] - def handle(exchange: HttpExchange): Unit = { - logger.debug(s"Received request: $exchange") - exchange.sendResponseHeaders(200, bodyToReturn.length) - val os = exchange.getResponseBody - os.write(bodyToReturn.getBytes) - os.close() - } -} - -class SimpleAuthenticator(expectedUsername: String, expectedPassword: String) - extends BasicAuthenticator("raw-tests") - with StrictLogging { - override def checkCredentials(user: String, pwd: String): Boolean = { - logger.debug(s"Checking credentials: $user, $pwd") - user == expectedUsername && pwd == expectedPassword - } -} - -class TestHttpServer(authenticator: Authenticator, handler: HttpHandler) extends StrictLogging { - - private val server = HttpServer.create(new InetSocketAddress("localhost", 0), 5) - - private val ctx = server.createContext("/get", handler) - ctx.setAuthenticator(authenticator) - server.setExecutor(null) - logger.info(s"Starting server at address: ${server.getAddress}") - server.start() - - def address = server.getAddress - - def stop(): Unit = { - server.stop(0) - } -} diff --git a/utils-sources/src/test/scala/com/rawlabs/utils/sources/filesystem/api/TestFileSystems.scala b/utils-sources/src/test/scala/com/rawlabs/utils/sources/filesystem/api/TestFileSystems.scala deleted file mode 100644 index d1812dd82..000000000 --- a/utils-sources/src/test/scala/com/rawlabs/utils/sources/filesystem/api/TestFileSystems.scala +++ /dev/null @@ -1,303 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.filesystem.api - -import com.typesafe.scalalogging.StrictLogging -import org.scalatest.BeforeAndAfterAll -import com.rawlabs.utils.core.{RawTestSuite, SettingsTestContext} - -trait TestFileSystems extends SettingsTestContext with BeforeAndAfterAll with StrictLogging { - this: RawTestSuite => - - def basePath: String - - def buildPath(fs: FileSystem, relativePath: String): String = basePath + fs.fileSeparator + relativePath - - def newFileSystem: FileSystem - - def writeTestFile(fs: FileSystem, parts: String*): Unit - - private var fileSeparator: String = _ - - override def beforeAll(): Unit = { - super.beforeAll() - val fs = newFileSystem - fileSeparator = fs.fileSeparator - writeTestFile(fs, "file1.csv") - writeTestFile(fs, "file2.csv") - writeTestFile(fs, "file1.json") - writeTestFile(fs, "jsons", "file1.json") - writeTestFile(fs, "jsons", "file2.json") - writeTestFile(fs, "csvs", "01", "data1.csv") - writeTestFile(fs, "csvs", "01", "data2.csv") - writeTestFile(fs, "csvs", "02", "data1.csv") - } - - // Some implementations have hidden files that must be filtered for comparisons to work properly. - // This method allows thsee implementations to override it and have the comparison still perform as expected. - protected def filterResults(path: String): Boolean = true - - private def runListTest(path: String, expectedResults: Int) = { - val fs = newFileSystem - val fullPath = buildPath(fs, path) - val results = fs.listContents(fullPath).filter(filterResults).toList - logger.debug("Result: " + results) - assert(results.size == expectedResults) - } - - test("list /file1.csv")(_ => runListTest("file1.csv", 1)) - - test("list /*")(_ => runListTest("*", 5)) - - test("list /*csv")(_ => runListTest("*csv", 2)) - - test("list /*.csv")(_ => runListTest("*.csv", 2)) - - test("list /*.json")(_ => runListTest("*.json", 1)) - - test("list /jsons")(_ => runListTest("jsons", 2)) - - test("list /jsons/")(_ => runListTest("jsons" + fileSeparator, 2)) - - test("list /jsons/*")(_ => runListTest("jsons" + fileSeparator + "*", 2)) - - test("list /csvs")(_ => runListTest("csvs", 2)) - - test("list /csvs/")(_ => runListTest("csvs" + fileSeparator, 2)) - - test("list /csvs/*")(_ => runListTest("csvs" + fileSeparator + "*", 2)) - - test("list /**")(_ => runListTest("**", 13)) - - test("list /**/*json")(_ => runListTest("**" + fileSeparator + "*json", 3)) - - test("list /**/*.json")(_ => runListTest("**" + fileSeparator + "*.json", 3)) - - test("list /csvs/**")(_ => runListTest("csvs" + fileSeparator + "**", 6)) - - test("list /csvs/**/*.csv") { _ => - runListTest("csvs" + fileSeparator + "**" + fileSeparator + "*.csv", 3) - // TODO: Add ? version - } - - test("list /") { _ => - val fs = newFileSystem - val p = buildPath(fs, "*") - logger.debug("Result: " + fs.listContents(basePath).toList) - assert(fs.listContents(basePath).toSet === fs.listContents(p).toSet) - } - - test("list with metadata /") { _ => - val fs = newFileSystem - logger.debug("Result: " + fs.listContentsWithMetadata(basePath).toList) - assert(fs.listContentsWithMetadata(basePath).map(_._1).count(filterResults) === 5) - assert(fs.listContentsWithMetadata(basePath).count { - case (f, metadata) => filterResults(f) && metadata.isInstanceOf[DirectoryMetadata] - } === 2) - } - - test("list with metadata /*") { _ => - val fs = newFileSystem - val p = buildPath(fs, "*") - logger.debug("Result: " + fs.listContentsWithMetadata(p).toList) - assert(fs.listContentsWithMetadata(p).map(_._1).toSet === fs.listContentsWithMetadata(basePath).map(_._1).toSet) - } - - test("testAccess /") { _ => - val fs = newFileSystem - fs.testAccess(basePath) - } - - test("testAccess /file1.csv") { _ => - val fs = newFileSystem - val p = buildPath(fs, "file1.csv") - fs.testAccess(p) - } - - // S3 had a bug where files were "found" even though only "prefix" matched - test("testAccess /file1.cs") { _ => - val fs = newFileSystem - val p = buildPath(fs, "file1.cs") - intercept[FileSystemException] { - fs.testAccess(p) - } - } - - // This is a variant of the S3 bug described above that would accidentally match two files - test("testAccess /file") { _ => - val fs = newFileSystem - val p = buildPath(fs, "file") - intercept[FileSystemException] { - fs.testAccess(p) - } - } - - test("testAccess /jsons") { _ => - val fs = newFileSystem - val p = buildPath(fs, "jsons") - fs.testAccess(p) - } - - test("testAccess /jsons/") { _ => - val fs = newFileSystem - val p = buildPath(fs, "jsons" + fs.fileSeparator) - fs.testAccess(p) - } - - test("testAccess /jsons/file1.json") { _ => - val fs = newFileSystem - val p = buildPath(fs, "jsons" + fs.fileSeparator + "file1.json") - fs.testAccess(p) - } - - test("testAccess /doesnt-exist") { _ => - val fs = newFileSystem - val p = buildPath(fs, "doesnt-exist") - intercept[FileSystemException] { - fs.testAccess(p) - } - } - - test("isDirectory /") { _ => - val fs = newFileSystem - assert(fs.isDirectory(basePath)) - } - - test("isDirectory /csvs") { _ => - val fs = newFileSystem - val p = buildPath(fs, "csvs") - assert(fs.isDirectory(p)) - } - - test("isDirectory /csvs/") { _ => - val fs = newFileSystem - val p = buildPath(fs, "csvs" + fs.fileSeparator) - assert(fs.isDirectory(p)) - } - - test("isDirectory /csvs/01") { _ => - val fs = newFileSystem - val p = buildPath(fs, "csvs" + fs.fileSeparator + "01") - assert(fs.isDirectory(p)) - } - - test("isDirectory /csvs/01/") { _ => - val fs = newFileSystem - val p = buildPath(fs, "csvs" + fs.fileSeparator + "01" + fs.fileSeparator) - assert(fs.isDirectory(p)) - } - - test("isDirectory /jsons/file1.json") { _ => - val fs = newFileSystem - val p = buildPath(fs, "jsons" + fs.fileSeparator + "file1.json") - assert(!fs.isDirectory(p)) - } - - test("isDirectory /bad") { _ => - val fs = newFileSystem - val p = buildPath(fs, "bad") - intercept[PathNotFoundException] { - fs.isDirectory(p) - } - } - - test("getInputStream on directory") { _ => - val fs = newFileSystem - val p = buildPath(fs, "jsons") - intercept[NotAFileException] { - fs.getInputStream(p) - } - } - - test("getInputStream on file") { _ => - val fs = newFileSystem - val p = buildPath(fs, "jsons" + fs.fileSeparator + "file1.json") - val is = fs.getInputStream(p) - try { - // most of files created are empty, this mostly checking that no exception is being thrown - assert(is.available() >= 0) - } finally { - is.close() - } - } - - test("getSeekableInputStream on file") { _ => - val fs = newFileSystem - val p = buildPath(fs, "jsons" + fs.fileSeparator + "file1.json") - val is = fs.getSeekableInputStream(p) - try { - // most of files created are empty, this mostly checking that no exception is being thrown - assert(is.available() >= 0) - } finally { - is.close() - } - } - - test("test access does_not_exist") { _ => - val fs = newFileSystem - val p = buildPath(fs, "does_not_exist.csv") - intercept[PathNotFoundException] { - try { - fs.testAccess(p) - } catch { - case e: NotImplementedError => - logger.warn(s"file system ${this.getClass} with testAccess not implemented") - throw new PathNotFoundException(p, e) - } - } - } - - test("test access folder") { _ => - val fs = newFileSystem - val p = buildPath(fs, "jsons") - fs.testAccess(p) - } - - test("test access file") { _ => - val fs = newFileSystem - val p = buildPath(fs, "jsons" + fileSeparator + "file1.json") - fs.testAccess(p) - } - - test("get metadata file") { _ => - val fs = newFileSystem - val p = buildPath(fs, "file1.csv") - val m = fs.metadata(p) - assert(m.isInstanceOf[FileMetadata]) - } - - test("get metadata folder") { _ => - val fs = newFileSystem - val p = buildPath(fs, "jsons") - val m = fs.metadata(p) - - assert(m.isInstanceOf[DirectoryMetadata]) - } - - test("getInputStream on non existent") { _ => - val fs = newFileSystem - val p = buildPath(fs, "does_not_exist") - intercept[PathNotFoundException] { - fs.getInputStream(p) - } - } - - test("exception get metadata on wildcard") { _ => - val fs = newFileSystem - val p = buildPath(fs, "jsons" + fs.fileSeparator + "*") - intercept[FileSystemException] { - fs.metadata(p) - } - } - -} diff --git a/utils-sources/src/test/scala/com/rawlabs/utils/sources/filesystem/dropbox/TestDropboxFileSystem.scala b/utils-sources/src/test/scala/com/rawlabs/utils/sources/filesystem/dropbox/TestDropboxFileSystem.scala deleted file mode 100644 index 40f2ec4de..000000000 --- a/utils-sources/src/test/scala/com/rawlabs/utils/sources/filesystem/dropbox/TestDropboxFileSystem.scala +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.filesystem.dropbox - -import com.dropbox.core.DbxRequestConfig -import com.dropbox.core.oauth.DbxCredential -import com.dropbox.core.v2.DbxClientV2 -import com.rawlabs.utils.core.RawTestSuite -import com.rawlabs.utils.sources.filesystem.api.{FileSystem, TestFileSystems} - -import java.io.ByteArrayInputStream -import scala.util.Try - -class TestDropboxFileSystem extends RawTestSuite with TestFileSystems { - - override val basePath = "/dropbox-test" - - val dropboxClient = new DropboxFileSystem( - new DbxClientV2( - DbxRequestConfig.newBuilder(settings.getString(BaseDropboxPath.DROPBOX_CLIENT_ID)).build(), - new DbxCredential(sys.env("RAW_DROPBOX_TEST_LONG_LIVED_ACCESS_TOKEN")) - ) - ) - - override def newFileSystem: FileSystem = dropboxClient - - override def writeTestFile(fs: FileSystem, parts: String*): Unit = { - Try(dropboxClient.client.files().createFolderV2(basePath)) - val in = new ByteArrayInputStream(Array[Byte]()) - Try(dropboxClient.client.files().uploadBuilder(buildPath(fs, parts.mkString(fs.fileSeparator))).uploadAndFinish(in)) - } - -} diff --git a/utils-sources/src/test/scala/com/rawlabs/utils/sources/filesystem/dropbox/TestRootDropboxFileSystem.scala b/utils-sources/src/test/scala/com/rawlabs/utils/sources/filesystem/dropbox/TestRootDropboxFileSystem.scala deleted file mode 100644 index e7b1ebf09..000000000 --- a/utils-sources/src/test/scala/com/rawlabs/utils/sources/filesystem/dropbox/TestRootDropboxFileSystem.scala +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.filesystem.dropbox - -import com.dropbox.core.DbxRequestConfig -import com.dropbox.core.oauth.DbxCredential -import com.dropbox.core.v2.DbxClientV2 -import com.typesafe.scalalogging.StrictLogging -import com.rawlabs.utils.core.{RawTestSuite, SettingsTestContext} - -class TestRootDropboxFileSystem extends RawTestSuite with SettingsTestContext with StrictLogging { - - test("list /") { _ => - val fs = new DropboxFileSystem( - new DbxClientV2( - DbxRequestConfig.newBuilder(settings.getString(BaseDropboxPath.DROPBOX_CLIENT_ID)).build(), - new DbxCredential(sys.env("RAW_DROPBOX_TEST_LONG_LIVED_ACCESS_TOKEN")) - ) - ) - logger.debug("Result: " + fs.listContents("/").toList) - assert(fs.listContents("/").nonEmpty) - } - -} diff --git a/utils-sources/src/test/scala/com/rawlabs/utils/sources/filesystem/local/TestLocalFileSystem.scala b/utils-sources/src/test/scala/com/rawlabs/utils/sources/filesystem/local/TestLocalFileSystem.scala deleted file mode 100644 index 75c4df9ab..000000000 --- a/utils-sources/src/test/scala/com/rawlabs/utils/sources/filesystem/local/TestLocalFileSystem.scala +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.filesystem.local - -import java.io.File -import com.rawlabs.utils.core.RawTestSuite -import com.rawlabs.utils.sources.filesystem.api.{FileSystem, TestFileSystems} - -import java.nio.file.Files - -class TestLocalFileSystem extends RawTestSuite with TestFileSystems { - - override val basePath: String = Files.createTempDirectory("test-local").toFile.getAbsolutePath - - override def newFileSystem: FileSystem = LocalFileSystem - - override def writeTestFile(fs: FileSystem, parts: String*): Unit = { - val f = new File(buildPath(fs, parts.mkString(fs.fileSeparator))) - f.getParentFile.mkdirs() - f.createNewFile() - } - -} diff --git a/utils-sources/src/test/scala/com/rawlabs/utils/sources/filesystem/s3/TestLargeDirectory.scala b/utils-sources/src/test/scala/com/rawlabs/utils/sources/filesystem/s3/TestLargeDirectory.scala deleted file mode 100644 index bab64b452..000000000 --- a/utils-sources/src/test/scala/com/rawlabs/utils/sources/filesystem/s3/TestLargeDirectory.scala +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.filesystem.s3 - -import com.typesafe.scalalogging.StrictLogging -import org.scalatest.BeforeAndAfterAll -import com.rawlabs.utils.core.{RawTestSuite, SettingsTestContext} - -import scala.collection.mutable - -class TestLargeDirectory extends RawTestSuite with BeforeAndAfterAll with SettingsTestContext with StrictLogging { - - val prefix = "large-folder" - - def genDataset(basePath: String): mutable.ArrayBuffer[String] = { - val keys = new mutable.ArrayBuffer[String]() - for (i <- 1 until 5050) { - val key = basePath + f"/$i%04d.txt" - keys.append(key) - } - keys - } - - test("list large directory") { _ => - val expected = genDataset(prefix) - val s3FileSystem = new S3FileSystem( - "rawlabs-private-test-data", - Some("eu-west-1"), - Some(sys.env("RAW_AWS_ACCESS_KEY_ID")), - Some(sys.env("RAW_AWS_SECRET_ACCESS_KEY")) - ) - val iterator = s3FileSystem.listContentsWithMetadata(prefix) - val actual = new mutable.HashSet[String]() - for ((file, md) <- iterator) { - actual.add(file) - } - assert(expected.toSet === actual) - } - -} diff --git a/utils-sources/src/test/scala/com/rawlabs/utils/sources/filesystem/s3/TestRootS3FileSystem.scala b/utils-sources/src/test/scala/com/rawlabs/utils/sources/filesystem/s3/TestRootS3FileSystem.scala deleted file mode 100644 index d5a04d855..000000000 --- a/utils-sources/src/test/scala/com/rawlabs/utils/sources/filesystem/s3/TestRootS3FileSystem.scala +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.filesystem.s3 - -import com.typesafe.scalalogging.StrictLogging -import com.rawlabs.utils.core.{RawTestSuite, SettingsTestContext} - -class TestRootS3FileSystem extends RawTestSuite with SettingsTestContext with StrictLogging { - - test("list ''") { _ => - val fs = new S3FileSystem( - "rawlabs-private-test-data", - Some("eu-west-1"), - Some(sys.env("RAW_AWS_ACCESS_KEY_ID")), - Some(sys.env("RAW_AWS_SECRET_ACCESS_KEY")) - ) - - val list = fs.listContents("").toList - logger.debug("Result: " + list) - } - - test("list bucket region us-east-1") { _ => - val fs = new S3FileSystem( - "rawlabs-unit-tests-us-east-1", - Some("us-east-1"), - Some(sys.env("RAW_AWS_ACCESS_KEY_ID")), - Some(sys.env("RAW_AWS_SECRET_ACCESS_KEY")) - ) - val list = fs.listContents("").toList - logger.debug("Result: " + list) - } - - test("list bucket us-east-1 without specifying a region") { _ => - val fs = new S3FileSystem( - "rawlabs-unit-tests-us-east-1", - None, - Some(sys.env("RAW_AWS_ACCESS_KEY_ID")), - Some(sys.env("RAW_AWS_SECRET_ACCESS_KEY")) - ) - val list = fs.listContents("").toList - logger.debug("Result: " + list) - } - -} - -class TestRootOfEmptyBucketS3FileSystem extends RawTestSuite with SettingsTestContext with StrictLogging { - - test("list ''") { _ => - val fs = new S3FileSystem( - "rawlabs-unit-test-empty-bucket", - Some("eu-west-1"), - Some(sys.env("RAW_AWS_ACCESS_KEY_ID")), - Some(sys.env("RAW_AWS_SECRET_ACCESS_KEY")) - ) - val list = fs.listContents("").toList - assert(list.isEmpty) - } - -} diff --git a/utils-sources/src/test/scala/com/rawlabs/utils/sources/filesystem/s3/TestS3FileSystem.scala b/utils-sources/src/test/scala/com/rawlabs/utils/sources/filesystem/s3/TestS3FileSystem.scala deleted file mode 100644 index a0ff8e28a..000000000 --- a/utils-sources/src/test/scala/com/rawlabs/utils/sources/filesystem/s3/TestS3FileSystem.scala +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Copyright 2023 RAW Labs S.A. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0, included in the file - * licenses/APL.txt. - */ - -package com.rawlabs.utils.sources.filesystem.s3 - -import com.rawlabs.utils.core.RawTestSuite -import com.rawlabs.utils.sources.filesystem.api.{FileSystem, TestFileSystems} -import software.amazon.awssdk.auth.credentials.{AwsBasicCredentials, StaticCredentialsProvider} -import software.amazon.awssdk.core.exception.SdkClientException -import software.amazon.awssdk.core.sync.RequestBody -import software.amazon.awssdk.regions.Region -import software.amazon.awssdk.services.s3.S3Client -import software.amazon.awssdk.services.s3.model.PutObjectRequest - -import java.nio.charset.StandardCharsets - -trait TestS3FileSystem extends TestFileSystems { - this: RawTestSuite => - - val bucketName: String - - val bucketRegion: String - - val bucketAccessKey: String - - val bucketSecretKey: String - - lazy val awsClient = { - val credentials = AwsBasicCredentials.create( - bucketAccessKey, - bucketSecretKey - ) - - S3Client - .builder() - .credentialsProvider(StaticCredentialsProvider.create(credentials)) - .region(Region.of(bucketRegion)) - .build() - } - - override lazy val newFileSystem: FileSystem = - new S3FileSystem(bucketName, Some(bucketRegion), Some(bucketAccessKey), Some(bucketSecretKey)) - - override def writeTestFile(fs: FileSystem, parts: String*): Unit = { - val s3Path = buildPath(fs, parts.mkString(fs.fileSeparator)) - var retries = 3 - var waitTime = 10; - var ok = false - while (!ok) { - try { - val putRequest = PutObjectRequest - .builder() - .bucket(bucketName) - .key(s3Path) - .build() - - val requestBody = RequestBody.fromString("foobar", StandardCharsets.UTF_8) - - awsClient.putObject(putRequest, requestBody) - ok = true - } catch { - case e: SdkClientException => - retries -= 1 - if (retries <= 0) { - logger.warn(s"Failed to write $s3Path") - throw e - } else { - logger.warn(s"Error trying to write $s3Path, retries $retries, wait time $waitTime", e) - Thread.sleep(waitTime) - waitTime *= 2 - } - } - } - } -} - -class TestForwardSlashS3FileSystem extends RawTestSuite with TestS3FileSystem { - override val bucketName = "rawlabs-private-test-data" - override val bucketRegion = "eu-west-1" - override val bucketAccessKey = sys.env("RAW_AWS_ACCESS_KEY_ID") - override val bucketSecretKey = sys.env("RAW_AWS_SECRET_ACCESS_KEY") - override val basePath = "/s3-test" - override def filterResults(p: String): Boolean = !p.startsWith(s"${basePath.stripPrefix("/")}/tmp-") -} - -class TestNoForwardSlashS3FileSystem extends RawTestSuite with TestS3FileSystem { - override val bucketName = "rawlabs-private-test-data" - override val bucketRegion = "eu-west-1" - override val bucketAccessKey = sys.env("RAW_AWS_ACCESS_KEY_ID") - override val bucketSecretKey = sys.env("RAW_AWS_SECRET_ACCESS_KEY") - override val basePath = "s3-test" - override def filterResults(p: String): Boolean = !p.startsWith(s"$basePath/tmp-") -} - -class TestRootOfBucketS3FileSystem extends RawTestSuite with TestS3FileSystem { - override val bucketName = "rawlabs-unit-tests" - override val bucketRegion = "eu-west-1" - override val bucketAccessKey = sys.env("RAW_AWS_ACCESS_KEY_ID") - override val bucketSecretKey = sys.env("RAW_AWS_SECRET_ACCESS_KEY") - - override val basePath = "" - - override def filterResults(p: String): Boolean = !p.startsWith("tmp-") - - override def buildPath(fs: FileSystem, relativePath: String): String = relativePath -}