From 7b22b5486d72d2e93b887454f7c55001772aaffd Mon Sep 17 00:00:00 2001 From: David Coe Date: Mon, 20 Nov 2023 14:37:12 -0500 Subject: [PATCH 01/13] rebasing --- csharp/src/Drivers/BigQuery/BigQueryStatement.cs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/csharp/src/Drivers/BigQuery/BigQueryStatement.cs b/csharp/src/Drivers/BigQuery/BigQueryStatement.cs index 1c5347ef4a..4f0dce5c46 100644 --- a/csharp/src/Drivers/BigQuery/BigQueryStatement.cs +++ b/csharp/src/Drivers/BigQuery/BigQueryStatement.cs @@ -87,6 +87,7 @@ private Field TranslateField(TableFieldSchema field) return new Field(field.Name, TranslateType(field), field.Mode == "NULLABLE"); } +<<<<<<< HEAD public override object GetValue(IArrowArray arrowArray, Field field, int index) { switch(arrowArray) @@ -100,6 +101,21 @@ public override object GetValue(IArrowArray arrowArray, Field field, int index) } } +======= + public override object GetValue(IArrowArray arrowArray, Field field, int index) + { + switch(arrowArray) + { + case StructArray structArray: + return SerializeToJson(structArray, index); + case ListArray listArray: + return listArray.GetSlicedValues(index); + default: + return base.GetValue(arrowArray, field, index); + } + } + +>>>>>>> be35986c (add back support for List, Struct types) private IArrowType TranslateType(TableFieldSchema field) { // per https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/model/TableFieldSchema.html#getType-- From e62683f583a9a51a5f65f1dc6b5bacd4c868990f Mon Sep 17 00:00:00 2001 From: David Coe Date: Fri, 10 Nov 2023 11:19:32 -0500 Subject: [PATCH 02/13] fix line endings --- csharp/src/Drivers/BigQuery/BigQueryStatement.cs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/csharp/src/Drivers/BigQuery/BigQueryStatement.cs b/csharp/src/Drivers/BigQuery/BigQueryStatement.cs index 4f0dce5c46..7d14bf8ccd 100644 --- a/csharp/src/Drivers/BigQuery/BigQueryStatement.cs +++ b/csharp/src/Drivers/BigQuery/BigQueryStatement.cs @@ -88,6 +88,9 @@ private Field TranslateField(TableFieldSchema field) } <<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> 641a6fe4 (fix line endings) public override object GetValue(IArrowArray arrowArray, Field field, int index) { switch(arrowArray) @@ -101,6 +104,7 @@ public override object GetValue(IArrowArray arrowArray, Field field, int index) } } +<<<<<<< HEAD ======= public override object GetValue(IArrowArray arrowArray, Field field, int index) { @@ -116,6 +120,8 @@ public override object GetValue(IArrowArray arrowArray, Field field, int index) } >>>>>>> be35986c (add back support for List, Struct types) +======= +>>>>>>> 641a6fe4 (fix line endings) private IArrowType TranslateType(TableFieldSchema field) { // per https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/model/TableFieldSchema.html#getType-- From f6523b88677f2a41cf8d70511fbf70f6f9ae4182 Mon Sep 17 00:00:00 2001 From: David Coe Date: Thu, 14 Dec 2023 14:15:03 -0500 Subject: [PATCH 03/13] improve List slice support --- csharp/src/Client/SchemaConverter.cs | 68 ++++++++++++++++++- .../Apache.Arrow.Adbc.Drivers.BigQuery.csproj | 1 + .../src/Drivers/BigQuery/BigQueryStatement.cs | 56 ++++++--------- .../Apache.Arrow.Adbc.Tests/ClientTests.cs | 20 +----- csharp/test/Drivers/BigQuery/BigQueryData.cs | 10 +-- 5 files changed, 96 insertions(+), 59 deletions(-) diff --git a/csharp/src/Client/SchemaConverter.cs b/csharp/src/Client/SchemaConverter.cs index 0c13bb479a..43eb327394 100644 --- a/csharp/src/Client/SchemaConverter.cs +++ b/csharp/src/Client/SchemaConverter.cs @@ -16,6 +16,7 @@ */ using System; +using System.Collections.Generic; using System.Data; using System.Data.Common; using System.Data.SqlTypes; @@ -92,6 +93,19 @@ public static DataTable ConvertArrowSchema(Schema schema, AdbcStatement adbcStat /// /// public static Type ConvertArrowType(Field f, DecimalBehavior decimalBehavior) + { + switch (f.DataType.TypeId) + { + case ArrowTypeId.List: + ListType list = f.DataType as ListType; + IArrowType valueType = list.ValueDataType; + return GetArrowArrayType(valueType); + default: + return GetArrowType(f, decimalBehavior); + } + } + + public static Type GetArrowType(Field f, DecimalBehavior decimalBehavior) { switch (f.DataType.TypeId) { @@ -102,7 +116,7 @@ public static Type ConvertArrowType(Field f, DecimalBehavior decimalBehavior) return typeof(bool); case ArrowTypeId.Decimal128: - if(decimalBehavior == DecimalBehavior.UseSqlDecimal) + if (decimalBehavior == DecimalBehavior.UseSqlDecimal) return typeof(SqlDecimal); else return typeof(decimal); @@ -162,5 +176,57 @@ public static Type ConvertArrowType(Field f, DecimalBehavior decimalBehavior) return f.DataType.GetType(); } } + + public static Type GetArrowArrayType(IArrowType dataType) + { + switch (dataType.TypeId) + { + case ArrowTypeId.Binary: + return typeof(BinaryArray); + case ArrowTypeId.Boolean: + return typeof(BooleanArray); + case ArrowTypeId.Decimal128: + return typeof(Decimal128Array); + case ArrowTypeId.Decimal256: + return typeof(Decimal256Array); + case ArrowTypeId.Time32: + return typeof(Time32Array); + case ArrowTypeId.Time64: + return typeof(Time64Array); + case ArrowTypeId.Date32: + return typeof(Date32Array); + case ArrowTypeId.Date64: + return typeof(Date64Array); + case ArrowTypeId.Double: + return typeof(DoubleArray); + +#if NET5_0_OR_GREATER + case ArrowTypeId.HalfFloat: + return typeof(HalfFloatArray); +#endif + case ArrowTypeId.Float: + return typeof(FloatArray); + case ArrowTypeId.Int8: + return typeof(Int8Array); + case ArrowTypeId.Int16: + return typeof(Int16Array); + case ArrowTypeId.Int32: + return typeof(Int32Array); + case ArrowTypeId.Int64: + return typeof(Int64Array); + case ArrowTypeId.String: + return typeof(StringArray); + case ArrowTypeId.Struct: + return typeof(StructArray); + case ArrowTypeId.Timestamp: + return typeof(TimestampArray); + case ArrowTypeId.Null: + return typeof(NullArray); + case ArrowTypeId.List: + return typeof(ListArray); + } + + throw new InvalidCastException($"Cannot determine the array type for {dataType.Name}"); + } } } diff --git a/csharp/src/Drivers/BigQuery/Apache.Arrow.Adbc.Drivers.BigQuery.csproj b/csharp/src/Drivers/BigQuery/Apache.Arrow.Adbc.Drivers.BigQuery.csproj index a7451c7bd1..69b271a5df 100644 --- a/csharp/src/Drivers/BigQuery/Apache.Arrow.Adbc.Drivers.BigQuery.csproj +++ b/csharp/src/Drivers/BigQuery/Apache.Arrow.Adbc.Drivers.BigQuery.csproj @@ -2,6 +2,7 @@ netstandard2.0;net6.0 readme.md + enable diff --git a/csharp/src/Drivers/BigQuery/BigQueryStatement.cs b/csharp/src/Drivers/BigQuery/BigQueryStatement.cs index 7d14bf8ccd..50964a1571 100644 --- a/csharp/src/Drivers/BigQuery/BigQueryStatement.cs +++ b/csharp/src/Drivers/BigQuery/BigQueryStatement.cs @@ -87,10 +87,6 @@ private Field TranslateField(TableFieldSchema field) return new Field(field.Name, TranslateType(field), field.Mode == "NULLABLE"); } -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 641a6fe4 (fix line endings) public override object GetValue(IArrowArray arrowArray, Field field, int index) { switch(arrowArray) @@ -104,24 +100,6 @@ public override object GetValue(IArrowArray arrowArray, Field field, int index) } } -<<<<<<< HEAD -======= - public override object GetValue(IArrowArray arrowArray, Field field, int index) - { - switch(arrowArray) - { - case StructArray structArray: - return SerializeToJson(structArray, index); - case ListArray listArray: - return listArray.GetSlicedValues(index); - default: - return base.GetValue(arrowArray, field, index); - } - } - ->>>>>>> be35986c (add back support for List, Struct types) -======= ->>>>>>> 641a6fe4 (fix line endings) private IArrowType TranslateType(TableFieldSchema field) { // per https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/model/TableFieldSchema.html#getType-- @@ -129,30 +107,30 @@ private IArrowType TranslateType(TableFieldSchema field) switch (field.Type) { case "INTEGER" or "INT64": - return Int64Type.Default; + return GetType(field, Int64Type.Default); case "FLOAT" or "FLOAT64": - return DoubleType.Default; + return GetType(field, DoubleType.Default); case "BOOL" or "BOOLEAN": - return BooleanType.Default; + return GetType(field, BooleanType.Default); case "STRING": - return StringType.Default; + return GetType(field, StringType.Default); case "BYTES": - return BinaryType.Default; + return GetType(field, BinaryType.Default); case "DATETIME": - return TimestampType.Default; + return GetType(field, TimestampType.Default); case "TIMESTAMP": - return TimestampType.Default; + return GetType(field, TimestampType.Default); case "TIME": - return Time64Type.Default; + return GetType(field, Time64Type.Default); case "DATE": - return Date64Type.Default; + return GetType(field, Date64Type.Default); case "RECORD" or "STRUCT": // its a json string - return StringType.Default; + return GetType(field, StringType.Default); // treat these values as strings case "GEOGRAPHY" or "JSON": - return StringType.Default; + return GetType(field, StringType.Default); // get schema cannot get precision and scale for NUMERIC or BIGNUMERIC types // instead, the max values are returned from BigQuery @@ -160,15 +138,23 @@ private IArrowType TranslateType(TableFieldSchema field) // and discussion in https://github.com/apache/arrow-adbc/pull/1192#discussion_r1365987279 case "NUMERIC" or "DECIMAL": - return new Decimal128Type(38, 9); + return GetType(field, new Decimal128Type(38, 9)); case "BIGNUMERIC" or "BIGDECIMAL": - return bool.Parse(this.Options[BigQueryParameters.LargeDecimalsAsString]) ? StringType.Default : new Decimal256Type(76, 38); + return bool.Parse(this.Options[BigQueryParameters.LargeDecimalsAsString]) ? GetType(field, StringType.Default) : GetType(field, new Decimal256Type(76, 38)); default: throw new InvalidOperationException($"{field.Type} cannot be translated"); } } + private IArrowType GetType(TableFieldSchema field, IArrowType type) + { + if(field.Mode == "REPEATED") + return new ListType(type); + + return type; + } + static IArrowReader ReadChunk(BigQueryReadClient readClient, string streamName) { // Ideally we wouldn't need to indirect through a stream, but the necessary APIs in Arrow diff --git a/csharp/test/Apache.Arrow.Adbc.Tests/ClientTests.cs b/csharp/test/Apache.Arrow.Adbc.Tests/ClientTests.cs index b3784fc05d..cb55a94d9f 100644 --- a/csharp/test/Apache.Arrow.Adbc.Tests/ClientTests.cs +++ b/csharp/test/Apache.Arrow.Adbc.Tests/ClientTests.cs @@ -207,25 +207,7 @@ static void AssertTypeAndValue( if (netType != null) { - if (netType.BaseType.Name.Contains("PrimitiveArray") && value != null) - { - int length = Convert.ToInt32(value.GetType().GetProperty("Length").GetValue(value)); - - if (length > 0) - { - object internalValue = value.GetType().GetMethod("GetValue").Invoke(value, new object[] { 0 }); - - Assert.True(internalValue.GetType() == ctv.ExpectedNetType, $"{name} is {netType.Name} and not {ctv.ExpectedNetType.Name} in the reader for query [{query}]"); - } - else - { - Console.WriteLine($"Could not validate the values inside of {netType.Name} because it is empty for query [{query}]"); - } - } - else - { - Assert.True(netType == ctv.ExpectedNetType, $"{name} is {netType.Name} and not {ctv.ExpectedNetType.Name} in the reader for query [{query}]"); - } + Assert.True(netType == ctv.ExpectedNetType, $"{name} is {netType.Name} and not {ctv.ExpectedNetType.Name} in the reader for query [{query}]"); } if (value != null) diff --git a/csharp/test/Drivers/BigQuery/BigQueryData.cs b/csharp/test/Drivers/BigQuery/BigQueryData.cs index 2a39fcaba8..f23e4ce2ec 100644 --- a/csharp/test/Drivers/BigQuery/BigQueryData.cs +++ b/csharp/test/Drivers/BigQuery/BigQueryData.cs @@ -33,8 +33,11 @@ internal class BigQueryData /// public static SampleDataBuilder GetSampleData() { - Int64Array.Builder numbersBuilder = new Int64Array.Builder(); + ListArray.Builder labuilder = new ListArray.Builder(Int64Type.Default); + Int64Array.Builder numbersBuilder = labuilder.ValueBuilder as Int64Array.Builder; + labuilder.Append(); numbersBuilder.AppendRange(new List() { 1, 2, 3 }); + Int64Array numbersArray = numbersBuilder.Build(); SampleDataBuilder sampleDataBuilder = new SampleDataBuilder(); @@ -76,10 +79,9 @@ public static SampleDataBuilder GetSampleData() new ColumnNetTypeArrowTypeValue("datetime", typeof(DateTimeOffset), typeof(TimestampType), new DateTimeOffset(new DateTime(2023, 9, 8, 12, 34, 56), TimeSpan.Zero)), new ColumnNetTypeArrowTypeValue("timestamp", typeof(DateTimeOffset), typeof(TimestampType), new DateTimeOffset(new DateTime(2023, 9, 8, 12, 34, 56), TimeSpan.Zero)), new ColumnNetTypeArrowTypeValue("point", typeof(string), typeof(StringType), "POINT(1 2)"), - new ColumnNetTypeArrowTypeValue("numbers", typeof(long), typeof(Int64Type), numbersArray), + new ColumnNetTypeArrowTypeValue("numbers", typeof(Int64Array), typeof(ListType), numbersArray), new ColumnNetTypeArrowTypeValue("person", typeof(string), typeof(StringType), "{\"name\":\"John Doe\",\"age\":30}"), new ColumnNetTypeArrowTypeValue("json", typeof(string), typeof(StringType), "{\"age\":29,\"name\":\"Jane Doe\"}") - } }); @@ -139,7 +141,7 @@ public static SampleDataBuilder GetSampleData() new ColumnNetTypeArrowTypeValue("datetime", typeof(DateTimeOffset), typeof(TimestampType), null), new ColumnNetTypeArrowTypeValue("timestamp", typeof(DateTimeOffset), typeof(TimestampType), null), new ColumnNetTypeArrowTypeValue("point", typeof(string), typeof(StringType), null), - new ColumnNetTypeArrowTypeValue("numbers", typeof(long), typeof(Int64Type), emptyNumbersArray), + new ColumnNetTypeArrowTypeValue("numbers", typeof(Int64Array), typeof(ListType), emptyNumbersArray), new ColumnNetTypeArrowTypeValue("person", typeof(string), typeof(StringType), "{\"name\":null,\"age\":null}") } }); From 7f405f57a33a6183e26df128e68f156cd8c5aac9 Mon Sep 17 00:00:00 2001 From: David Coe Date: Thu, 14 Dec 2023 14:22:54 -0500 Subject: [PATCH 04/13] clean up --- csharp/src/Client/SchemaConverter.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/csharp/src/Client/SchemaConverter.cs b/csharp/src/Client/SchemaConverter.cs index 43eb327394..656cd0a789 100644 --- a/csharp/src/Client/SchemaConverter.cs +++ b/csharp/src/Client/SchemaConverter.cs @@ -16,7 +16,6 @@ */ using System; -using System.Collections.Generic; using System.Data; using System.Data.Common; using System.Data.SqlTypes; From 850bd410d5cbdd4a4e11af652ecb74bd0b057a9c Mon Sep 17 00:00:00 2001 From: David Coe Date: Fri, 15 Dec 2023 10:53:28 -0500 Subject: [PATCH 05/13] update xdbc --- .../Drivers/BigQuery/BigQueryConnection.cs | 39 ++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/csharp/src/Drivers/BigQuery/BigQueryConnection.cs b/csharp/src/Drivers/BigQuery/BigQueryConnection.cs index 8e33596671..1ea51ac752 100644 --- a/csharp/src/Drivers/BigQuery/BigQueryConnection.cs +++ b/csharp/src/Drivers/BigQuery/BigQueryConnection.cs @@ -18,6 +18,7 @@ using System; using System.Collections.Generic; using System.Collections.ObjectModel; +using System.Data.SqlTypes; using System.Linq; using System.Net.Http; using System.Text; @@ -683,8 +684,44 @@ private XdbcDataType ToXdbcDataType(string type) return XdbcDataType.XdbcDataType_XDBC_VARBINARY; case "NUMERIC" or "DECIMAL" or "BIGNUMERIC" or "BIGDECIMAL": return XdbcDataType.XdbcDataType_XDBC_NUMERIC; - default: + + int decimalMaxScale = 28; + + if(type.StartsWith("NUMERIC(")) + { + ParsedDecimalValues parsedDecimalValues = ParsePrecisionAndScale(type); + + if (parsedDecimalValues.Scale <= decimalMaxScale) + return XdbcDataType.XdbcDataType_XDBC_DECIMAL; + else + return XdbcDataType.XdbcDataType_XDBC_VARCHAR; + } + + if (type.StartsWith("BIGNUMERIC(")) + { + if(bool.Parse(this.properties[BigQueryParameters.LargeDecimalsAsString])) + { + return XdbcDataType.XdbcDataType_XDBC_VARCHAR; + } + else + { + ParsedDecimalValues parsedDecimalValues = ParsePrecisionAndScale(type); + + if (parsedDecimalValues.Scale <= decimalMaxScale) + return XdbcDataType.XdbcDataType_XDBC_DECIMAL; + else + return XdbcDataType.XdbcDataType_XDBC_VARCHAR; + } + } + + if (type.StartsWith("STRUCT")) + return XdbcDataType.XdbcDataType_XDBC_VARCHAR; + + //if (type.StartsWith("ARRAY<")) + // return XdbcDataType.XdbcDataType_XDBC_VARCHAR; + + return XdbcDataType.XdbcDataType_XDBC_UNKNOWN_TYPE; } } From 8884b367ded20eba9bfa066050547de6c3b27c60 Mon Sep 17 00:00:00 2001 From: David Coe Date: Fri, 15 Dec 2023 11:06:39 -0500 Subject: [PATCH 06/13] clean up --- csharp/src/Drivers/BigQuery/BigQueryConnection.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/csharp/src/Drivers/BigQuery/BigQueryConnection.cs b/csharp/src/Drivers/BigQuery/BigQueryConnection.cs index 1ea51ac752..0dad3be2af 100644 --- a/csharp/src/Drivers/BigQuery/BigQueryConnection.cs +++ b/csharp/src/Drivers/BigQuery/BigQueryConnection.cs @@ -18,7 +18,6 @@ using System; using System.Collections.Generic; using System.Collections.ObjectModel; -using System.Data.SqlTypes; using System.Linq; using System.Net.Http; using System.Text; From 5c18f9af76600fad1bbaa9aa14f07db785123048 Mon Sep 17 00:00:00 2001 From: David Coe Date: Fri, 15 Dec 2023 11:08:29 -0500 Subject: [PATCH 07/13] clean up --- csharp/src/Drivers/BigQuery/BigQueryConnection.cs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/csharp/src/Drivers/BigQuery/BigQueryConnection.cs b/csharp/src/Drivers/BigQuery/BigQueryConnection.cs index 0dad3be2af..8d02c485f8 100644 --- a/csharp/src/Drivers/BigQuery/BigQueryConnection.cs +++ b/csharp/src/Drivers/BigQuery/BigQueryConnection.cs @@ -717,10 +717,6 @@ private XdbcDataType ToXdbcDataType(string type) if (type.StartsWith("STRUCT")) return XdbcDataType.XdbcDataType_XDBC_VARCHAR; - //if (type.StartsWith("ARRAY<")) - // return XdbcDataType.XdbcDataType_XDBC_VARCHAR; - - return XdbcDataType.XdbcDataType_XDBC_UNKNOWN_TYPE; } } From b10c369269461d27c58e2ef5f63096e377485321 Mon Sep 17 00:00:00 2001 From: David Coe Date: Fri, 15 Dec 2023 17:04:06 -0500 Subject: [PATCH 08/13] update constraints functionality --- .../src/Apache.Arrow.Adbc/StandardSchemas.cs | 2 +- .../Drivers/BigQuery/BigQueryConnection.cs | 2 +- .../Metadata/AdbcConstraint.cs | 29 +++++++++++ .../Metadata/AdbcTable.cs | 5 ++ .../Metadata/AdbcUsageSchema.cs | 30 +++++++++++ .../Metadata/GetObjectsParser.cs | 51 ++++++++++++++++++- 6 files changed, 116 insertions(+), 3 deletions(-) create mode 100644 csharp/test/Apache.Arrow.Adbc.Tests/Metadata/AdbcConstraint.cs create mode 100644 csharp/test/Apache.Arrow.Adbc.Tests/Metadata/AdbcUsageSchema.cs diff --git a/csharp/src/Apache.Arrow.Adbc/StandardSchemas.cs b/csharp/src/Apache.Arrow.Adbc/StandardSchemas.cs index 41f0c3eb49..aa1606d363 100644 --- a/csharp/src/Apache.Arrow.Adbc/StandardSchemas.cs +++ b/csharp/src/Apache.Arrow.Adbc/StandardSchemas.cs @@ -94,7 +94,7 @@ public static class StandardSchemas { new Field("constraint_name", StringType.Default, false), new Field("constraint_type", StringType.Default, false), - new Field("constraint_column_usage", + new Field("constraint_column_names", new ListType( new Field("item", StringType.Default, true) ), diff --git a/csharp/src/Drivers/BigQuery/BigQueryConnection.cs b/csharp/src/Drivers/BigQuery/BigQueryConnection.cs index 8d02c485f8..1d0ee7e221 100644 --- a/csharp/src/Drivers/BigQuery/BigQueryConnection.cs +++ b/csharp/src/Drivers/BigQuery/BigQueryConnection.cs @@ -537,7 +537,7 @@ private StructArray GetConstraintSchema( nullBitmapBuffer.Append(true); length++; - if (depth == GetObjectsDepth.All) + if (depth == GetObjectsDepth.All || depth == GetObjectsDepth.Tables) { constraintColumnNamesValues.Add(GetConstraintColumnNames( catalog, dbSchema, table, constraintName)); diff --git a/csharp/test/Apache.Arrow.Adbc.Tests/Metadata/AdbcConstraint.cs b/csharp/test/Apache.Arrow.Adbc.Tests/Metadata/AdbcConstraint.cs new file mode 100644 index 0000000000..c55e205a50 --- /dev/null +++ b/csharp/test/Apache.Arrow.Adbc.Tests/Metadata/AdbcConstraint.cs @@ -0,0 +1,29 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +using System.Collections.Generic; + +namespace Apache.Arrow.Adbc.Tests.Metadata +{ + public class AdbcConstraint + { + public string Name { get; set; } + public string Type { get; set; } + public List ColumnNames { get; set; } + public List ColumnUsage { get; set; } + } +} diff --git a/csharp/test/Apache.Arrow.Adbc.Tests/Metadata/AdbcTable.cs b/csharp/test/Apache.Arrow.Adbc.Tests/Metadata/AdbcTable.cs index 8f99c919ba..f55b92e9a8 100644 --- a/csharp/test/Apache.Arrow.Adbc.Tests/Metadata/AdbcTable.cs +++ b/csharp/test/Apache.Arrow.Adbc.Tests/Metadata/AdbcTable.cs @@ -38,5 +38,10 @@ public class AdbcTable /// List of columns associated with the table. /// public List Columns { get; set; } + + /// + /// The constrains associated with the table. + /// + public List Constraints { get; set; } } } diff --git a/csharp/test/Apache.Arrow.Adbc.Tests/Metadata/AdbcUsageSchema.cs b/csharp/test/Apache.Arrow.Adbc.Tests/Metadata/AdbcUsageSchema.cs new file mode 100644 index 0000000000..c996a59b25 --- /dev/null +++ b/csharp/test/Apache.Arrow.Adbc.Tests/Metadata/AdbcUsageSchema.cs @@ -0,0 +1,30 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +namespace Apache.Arrow.Adbc.Tests.Metadata +{ + public class AdbcUsageSchema + { + public string FkCatalog { get; set; } + + public string FkDbSchema { get; set; } + + public string FkTable { get; set; } + + public string FkColumnName { get; set; } + } +} diff --git a/csharp/test/Apache.Arrow.Adbc.Tests/Metadata/GetObjectsParser.cs b/csharp/test/Apache.Arrow.Adbc.Tests/Metadata/GetObjectsParser.cs index c783ba1928..c614092478 100644 --- a/csharp/test/Apache.Arrow.Adbc.Tests/Metadata/GetObjectsParser.cs +++ b/csharp/test/Apache.Arrow.Adbc.Tests/Metadata/GetObjectsParser.cs @@ -16,6 +16,8 @@ */ using System.Collections.Generic; +using System.Linq; +using System.Text.Unicode; namespace Apache.Arrow.Adbc.Tests.Metadata { @@ -88,7 +90,8 @@ private static List ParseTables(StructArray tablesArray) { Name = tableNameArray.GetString(i), Type = tableTypeArray.GetString(i), - Columns = ParseColumns((StructArray)columnsArray.GetSlicedValues(i)) + Columns = ParseColumns((StructArray)columnsArray.GetSlicedValues(i)), + Constraints = ParseConstraints((StructArray)tableConstraintsArray.GetSlicedValues(i)) }); } @@ -149,5 +152,51 @@ private static List ParseColumns(StructArray columnsArray) return columns; } + + private static List ParseConstraints(StructArray constraintsArray) + { + if (constraintsArray == null) return null; + + List constraints = new List(); + + StringArray name = (StringArray)constraintsArray.Fields[StandardSchemas.ConstraintSchema.FindIndex(f => f.Name == "constraint_name")]; // constraint_name | utf8 + StringArray type = (StringArray)constraintsArray.Fields[StandardSchemas.ConstraintSchema.FindIndex(f => f.Name == "constraint_type")]; // constraint_type | utf8 not null + ListArray column_names = (ListArray)constraintsArray.Fields[StandardSchemas.ConstraintSchema.FindIndex(f => f.Name == "constraint_column_names")]; // constraint_column_names | list not null + ListArray column_usage = (ListArray)constraintsArray.Fields[StandardSchemas.ConstraintSchema.FindIndex(f => f.Name == "constraint_column_usage")]; // constraint_column_usage | list + + for (int i = 0; i < constraintsArray.Length; i++) + { + AdbcConstraint c = new AdbcConstraint(); + c.Name = name.GetString(i); + c.Type = type.GetString(i); + + StringArray col_names = column_names.GetSlicedValues(i) as StringArray; + StructArray usage = column_usage.GetSlicedValues(i) as StructArray; + + for(int j=0; j< column_names.Length; j++) + { + c.ColumnNames?.Add(col_names.GetString(j)); + } + + for(int j=0;j<=usage.Length; j++) + { + StringArray fkCatalog = (StringArray)usage.Fields[StandardSchemas.UsageSchema.FindIndex(f => f.Name == "fk_catalog")]; // fk_catalog | utf8 + StringArray fkDbSchema = (StringArray)usage.Fields[StandardSchemas.UsageSchema.FindIndex(f => f.Name == "fk_db_schema")]; //fk_db_schema | utf8 + StringArray fkTable = (StringArray)usage.Fields[StandardSchemas.UsageSchema.FindIndex(f => f.Name == "fk_table")]; // fk_table | utf8 not null + StringArray fkColumnName = (StringArray)usage.Fields[StandardSchemas.UsageSchema.FindIndex(f => f.Name == "fk_column_name")]; // fk_column_name | utf8 not null + + AdbcUsageSchema adbcUsageSchema = new AdbcUsageSchema(); + adbcUsageSchema.FkCatalog = fkCatalog.GetString(j); + adbcUsageSchema.FkDbSchema = fkDbSchema.GetString(j); + adbcUsageSchema.FkTable = fkTable.GetString(j); + adbcUsageSchema.FkColumnName = fkColumnName.GetString(j); + c.ColumnUsage?.Add(adbcUsageSchema); + } + + constraints.Add(c); + } + + return constraints; + } } } From 87af05c5f2127b1167d622b195a72e773e4cb456 Mon Sep 17 00:00:00 2001 From: David Coe Date: Mon, 8 Jan 2024 12:04:08 -0500 Subject: [PATCH 09/13] commit before merge --- .../Drivers/BigQuery/BigQueryConnection.cs | 22 ++++++++---- .../Metadata/AdbcConstraint.cs | 6 ++++ .../Metadata/GetObjectsParser.cs | 36 ++++++++++--------- 3 files changed, 41 insertions(+), 23 deletions(-) diff --git a/csharp/src/Drivers/BigQuery/BigQueryConnection.cs b/csharp/src/Drivers/BigQuery/BigQueryConnection.cs index 1d0ee7e221..9fa17ec031 100644 --- a/csharp/src/Drivers/BigQuery/BigQueryConnection.cs +++ b/csharp/src/Drivers/BigQuery/BigQueryConnection.cs @@ -588,7 +588,8 @@ private StringArray GetConstraintColumnNames( foreach (BigQueryRow row in result) { - constraintColumnNamesBuilder.Append(row["column_name"].ToString()); + string column = row["column_name"].ToString(); + constraintColumnNamesBuilder.Append(column); } return constraintColumnNamesBuilder.Build(); @@ -607,17 +608,24 @@ private StructArray GetConstraintsUsage( ArrowBuffer.BitmapBuilder nullBitmapBuffer = new ArrowBuffer.BitmapBuilder(); int length = 0; - string query = string.Format("SELECT * FROM `{0}`.`{1}`.INFORMATION_SCHEMA.CONSTRAINT_COLUMN_USAGE WHERE table_name = '{2}' AND constraint_name = '{3}'", - Sanitize(catalog), Sanitize(dbSchema), Sanitize(table), Sanitize(constraintName)); + // table_name = '{2}' AND + string query = string.Format("SELECT * FROM `{0}`.`{1}`.INFORMATION_SCHEMA.CONSTRAINT_COLUMN_USAGE WHERE constraint_name = '{2}'", + Sanitize(catalog), Sanitize(dbSchema), /*Sanitize(table),*/ Sanitize(constraintName)); BigQueryResults result = this.client.ExecuteQuery(query, parameters: null); foreach (BigQueryRow row in result) { - constraintFkCatalogBuilder.Append(row["constraint_catalog"].ToString()); - constraintFkDbSchemaBuilder.Append(row["constraint_schema"].ToString()); - constraintFkTableBuilder.Append(row["table_name"].ToString()); - constraintFkColumnNameBuilder.Append(row["column_name"].ToString()); + string constraint_catalog = row["constraint_catalog"].ToString(); + string constraint_schema = row["constraint_schema"].ToString(); + string table_name = row["table_name"].ToString(); + string column_name = row["column_name"].ToString(); + + constraintFkCatalogBuilder.Append(constraint_catalog); + constraintFkDbSchemaBuilder.Append(constraint_schema); + constraintFkTableBuilder.Append(table_name); + constraintFkColumnNameBuilder.Append(column_name); + nullBitmapBuffer.Append(true); length++; } diff --git a/csharp/test/Apache.Arrow.Adbc.Tests/Metadata/AdbcConstraint.cs b/csharp/test/Apache.Arrow.Adbc.Tests/Metadata/AdbcConstraint.cs index c55e205a50..387da01e70 100644 --- a/csharp/test/Apache.Arrow.Adbc.Tests/Metadata/AdbcConstraint.cs +++ b/csharp/test/Apache.Arrow.Adbc.Tests/Metadata/AdbcConstraint.cs @@ -21,6 +21,12 @@ namespace Apache.Arrow.Adbc.Tests.Metadata { public class AdbcConstraint { + public AdbcConstraint() + { + ColumnNames = new List(); + ColumnUsage = new List(); + } + public string Name { get; set; } public string Type { get; set; } public List ColumnNames { get; set; } diff --git a/csharp/test/Apache.Arrow.Adbc.Tests/Metadata/GetObjectsParser.cs b/csharp/test/Apache.Arrow.Adbc.Tests/Metadata/GetObjectsParser.cs index c614092478..047c124269 100644 --- a/csharp/test/Apache.Arrow.Adbc.Tests/Metadata/GetObjectsParser.cs +++ b/csharp/test/Apache.Arrow.Adbc.Tests/Metadata/GetObjectsParser.cs @@ -173,24 +173,28 @@ private static List ParseConstraints(StructArray constraintsArra StringArray col_names = column_names.GetSlicedValues(i) as StringArray; StructArray usage = column_usage.GetSlicedValues(i) as StructArray; - for(int j=0; j< column_names.Length; j++) - { - c.ColumnNames?.Add(col_names.GetString(j)); - } + //for(int j=0; j< column_names.Length; j++) + //{ + // if(column_names.IsValid(j)) + // c.ColumnNames.Add(col_names.GetString(j)); + //} - for(int j=0;j<=usage.Length; j++) + if (usage != null) { - StringArray fkCatalog = (StringArray)usage.Fields[StandardSchemas.UsageSchema.FindIndex(f => f.Name == "fk_catalog")]; // fk_catalog | utf8 - StringArray fkDbSchema = (StringArray)usage.Fields[StandardSchemas.UsageSchema.FindIndex(f => f.Name == "fk_db_schema")]; //fk_db_schema | utf8 - StringArray fkTable = (StringArray)usage.Fields[StandardSchemas.UsageSchema.FindIndex(f => f.Name == "fk_table")]; // fk_table | utf8 not null - StringArray fkColumnName = (StringArray)usage.Fields[StandardSchemas.UsageSchema.FindIndex(f => f.Name == "fk_column_name")]; // fk_column_name | utf8 not null - - AdbcUsageSchema adbcUsageSchema = new AdbcUsageSchema(); - adbcUsageSchema.FkCatalog = fkCatalog.GetString(j); - adbcUsageSchema.FkDbSchema = fkDbSchema.GetString(j); - adbcUsageSchema.FkTable = fkTable.GetString(j); - adbcUsageSchema.FkColumnName = fkColumnName.GetString(j); - c.ColumnUsage?.Add(adbcUsageSchema); + for (int j = 0; j < usage.Length; j++) + { + StringArray fkCatalog = (StringArray)usage.Fields[StandardSchemas.UsageSchema.FindIndex(f => f.Name == "fk_catalog")]; // fk_catalog | utf8 + StringArray fkDbSchema = (StringArray)usage.Fields[StandardSchemas.UsageSchema.FindIndex(f => f.Name == "fk_db_schema")]; //fk_db_schema | utf8 + StringArray fkTable = (StringArray)usage.Fields[StandardSchemas.UsageSchema.FindIndex(f => f.Name == "fk_table")]; // fk_table | utf8 not null + StringArray fkColumnName = (StringArray)usage.Fields[StandardSchemas.UsageSchema.FindIndex(f => f.Name == "fk_column_name")]; // fk_column_name | utf8 not null + + AdbcUsageSchema adbcUsageSchema = new AdbcUsageSchema(); + adbcUsageSchema.FkCatalog = fkCatalog.GetString(j); + adbcUsageSchema.FkDbSchema = fkDbSchema.GetString(j); + adbcUsageSchema.FkTable = fkTable.GetString(j); + adbcUsageSchema.FkColumnName = fkColumnName.GetString(j); + c.ColumnUsage?.Add(adbcUsageSchema); + } } constraints.Add(c); From 5bbc3c144674720a1026954f2042ab12516a6e42 Mon Sep 17 00:00:00 2001 From: David Coe Date: Wed, 10 Jan 2024 10:08:12 -0500 Subject: [PATCH 10/13] PR feedback --- csharp/src/Drivers/BigQuery/BigQueryConnection.cs | 11 ++++++----- csharp/src/Drivers/BigQuery/BigQueryStatement.cs | 2 +- .../Metadata/GetObjectsParser.cs | 6 ------ 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/csharp/src/Drivers/BigQuery/BigQueryConnection.cs b/csharp/src/Drivers/BigQuery/BigQueryConnection.cs index 9fa17ec031..5c182d637c 100644 --- a/csharp/src/Drivers/BigQuery/BigQueryConnection.cs +++ b/csharp/src/Drivers/BigQuery/BigQueryConnection.cs @@ -608,9 +608,8 @@ private StructArray GetConstraintsUsage( ArrowBuffer.BitmapBuilder nullBitmapBuffer = new ArrowBuffer.BitmapBuilder(); int length = 0; - // table_name = '{2}' AND string query = string.Format("SELECT * FROM `{0}`.`{1}`.INFORMATION_SCHEMA.CONSTRAINT_COLUMN_USAGE WHERE constraint_name = '{2}'", - Sanitize(catalog), Sanitize(dbSchema), /*Sanitize(table),*/ Sanitize(constraintName)); + Sanitize(catalog), Sanitize(dbSchema), Sanitize(constraintName)); BigQueryResults result = this.client.ExecuteQuery(query, parameters: null); @@ -693,9 +692,11 @@ private XdbcDataType ToXdbcDataType(string type) return XdbcDataType.XdbcDataType_XDBC_NUMERIC; default: + // in SqlDecimal, an OverflowException is thrown for decimals with scale > 28 + // so the XDBC type needs to map the SqlDecimal type int decimalMaxScale = 28; - if(type.StartsWith("NUMERIC(")) + if (type.StartsWith("NUMERIC(")) { ParsedDecimalValues parsedDecimalValues = ParsePrecisionAndScale(type); @@ -707,7 +708,7 @@ private XdbcDataType ToXdbcDataType(string type) if (type.StartsWith("BIGNUMERIC(")) { - if(bool.Parse(this.properties[BigQueryParameters.LargeDecimalsAsString])) + if (bool.Parse(this.properties[BigQueryParameters.LargeDecimalsAsString])) { return XdbcDataType.XdbcDataType_XDBC_VARCHAR; } @@ -913,7 +914,7 @@ public override AdbcStatement CreateStatement() throw new InvalidOperationException(); } - if(this.client == null) + if (this.client == null) { Open(); } diff --git a/csharp/src/Drivers/BigQuery/BigQueryStatement.cs b/csharp/src/Drivers/BigQuery/BigQueryStatement.cs index 50964a1571..009500b864 100644 --- a/csharp/src/Drivers/BigQuery/BigQueryStatement.cs +++ b/csharp/src/Drivers/BigQuery/BigQueryStatement.cs @@ -149,7 +149,7 @@ private IArrowType TranslateType(TableFieldSchema field) private IArrowType GetType(TableFieldSchema field, IArrowType type) { - if(field.Mode == "REPEATED") + if (field.Mode == "REPEATED") return new ListType(type); return type; diff --git a/csharp/test/Apache.Arrow.Adbc.Tests/Metadata/GetObjectsParser.cs b/csharp/test/Apache.Arrow.Adbc.Tests/Metadata/GetObjectsParser.cs index 047c124269..44af9bb0bb 100644 --- a/csharp/test/Apache.Arrow.Adbc.Tests/Metadata/GetObjectsParser.cs +++ b/csharp/test/Apache.Arrow.Adbc.Tests/Metadata/GetObjectsParser.cs @@ -173,12 +173,6 @@ private static List ParseConstraints(StructArray constraintsArra StringArray col_names = column_names.GetSlicedValues(i) as StringArray; StructArray usage = column_usage.GetSlicedValues(i) as StructArray; - //for(int j=0; j< column_names.Length; j++) - //{ - // if(column_names.IsValid(j)) - // c.ColumnNames.Add(col_names.GetString(j)); - //} - if (usage != null) { for (int j = 0; j < usage.Length; j++) From 626184c486f211a99962ece066c6d825dfe5fa9d Mon Sep 17 00:00:00 2001 From: David Coe Date: Thu, 11 Jan 2024 10:19:55 -0500 Subject: [PATCH 11/13] add support for column size and decimal digits --- .../Drivers/BigQuery/BigQueryConnection.cs | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/csharp/src/Drivers/BigQuery/BigQueryConnection.cs b/csharp/src/Drivers/BigQuery/BigQueryConnection.cs index 5c182d637c..a231afa5ac 100644 --- a/csharp/src/Drivers/BigQuery/BigQueryConnection.cs +++ b/csharp/src/Drivers/BigQuery/BigQueryConnection.cs @@ -18,6 +18,7 @@ using System; using System.Collections.Generic; using System.Collections.ObjectModel; +using System.Diagnostics; using System.Linq; using System.Net.Http; using System.Text; @@ -455,14 +456,29 @@ private StructArray GetColumnSchema( foreach (BigQueryRow row in result) { + + row.Schema.Fields.Select(x => x.Name).ToList().ForEach(x => Debug.WriteLine($"{x} = {row[x]}")); + Debug.WriteLine("---------"); + columnNameBuilder.Append(row["column_name"].ToString()); ordinalPositionBuilder.Append((int)(long)row["ordinal_position"]); remarksBuilder.Append(""); xdbcDataTypeBuilder.AppendNull(); string dataType = ToTypeName(row["data_type"].ToString()); + + if (dataType.StartsWith("NUMERIC") || dataType.StartsWith("DECIMAL") || dataType.StartsWith("BIGNUMERIC") || dataType.StartsWith("BIGDECIMAL")) + { + ParsedDecimalValues values = ParsePrecisionAndScale(dataType); + xdbcColumnSizeBuilder.Append(values.Precision); + xdbcDecimalDigitsBuilder.Append(Convert.ToInt16(values.Scale)); + } + else + { + xdbcColumnSizeBuilder.AppendNull(); + xdbcDecimalDigitsBuilder.AppendNull(); + } + xdbcTypeNameBuilder.Append(dataType); - xdbcColumnSizeBuilder.AppendNull(); - xdbcDecimalDigitsBuilder.AppendNull(); xdbcNumPrecRadixBuilder.AppendNull(); xdbcNullableBuilder.AppendNull(); xdbcColumnDefBuilder.AppendNull(); From ee38c3b5e9c361de53921022ab99ee55d94a390b Mon Sep 17 00:00:00 2001 From: David Coe Date: Thu, 11 Jan 2024 10:43:52 -0500 Subject: [PATCH 12/13] add support for xdbc data type --- csharp/src/Drivers/BigQuery/BigQueryConnection.cs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/csharp/src/Drivers/BigQuery/BigQueryConnection.cs b/csharp/src/Drivers/BigQuery/BigQueryConnection.cs index a231afa5ac..c2c92b9cbd 100644 --- a/csharp/src/Drivers/BigQuery/BigQueryConnection.cs +++ b/csharp/src/Drivers/BigQuery/BigQueryConnection.cs @@ -463,7 +463,7 @@ private StructArray GetColumnSchema( columnNameBuilder.Append(row["column_name"].ToString()); ordinalPositionBuilder.Append((int)(long)row["ordinal_position"]); remarksBuilder.Append(""); - xdbcDataTypeBuilder.AppendNull(); + string dataType = ToTypeName(row["data_type"].ToString()); if (dataType.StartsWith("NUMERIC") || dataType.StartsWith("DECIMAL") || dataType.StartsWith("BIGNUMERIC") || dataType.StartsWith("BIGDECIMAL")) @@ -471,11 +471,17 @@ private StructArray GetColumnSchema( ParsedDecimalValues values = ParsePrecisionAndScale(dataType); xdbcColumnSizeBuilder.Append(values.Precision); xdbcDecimalDigitsBuilder.Append(Convert.ToInt16(values.Scale)); + + if (dataType.StartsWith("NUMERIC") || dataType.StartsWith("DECIMAL")) + xdbcDataTypeBuilder.Append((int)ArrowTypeId.Decimal128); + else + xdbcDataTypeBuilder.Append((int)ArrowTypeId.Decimal256); } else { xdbcColumnSizeBuilder.AppendNull(); xdbcDecimalDigitsBuilder.AppendNull(); + xdbcDataTypeBuilder.AppendNull(); } xdbcTypeNameBuilder.Append(dataType); From ed3d1bcc5e70dab44e13590c8817cbd954b4a5fb Mon Sep 17 00:00:00 2001 From: David Coe Date: Tue, 16 Jan 2024 11:37:45 -0500 Subject: [PATCH 13/13] PR feedback --- csharp/src/Drivers/BigQuery/BigQueryConnection.cs | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/csharp/src/Drivers/BigQuery/BigQueryConnection.cs b/csharp/src/Drivers/BigQuery/BigQueryConnection.cs index c2c92b9cbd..8e599231a8 100644 --- a/csharp/src/Drivers/BigQuery/BigQueryConnection.cs +++ b/csharp/src/Drivers/BigQuery/BigQueryConnection.cs @@ -456,10 +456,6 @@ private StructArray GetColumnSchema( foreach (BigQueryRow row in result) { - - row.Schema.Fields.Select(x => x.Name).ToList().ForEach(x => Debug.WriteLine($"{x} = {row[x]}")); - Debug.WriteLine("---------"); - columnNameBuilder.Append(row["column_name"].ToString()); ordinalPositionBuilder.Append((int)(long)row["ordinal_position"]); remarksBuilder.Append(""); @@ -471,19 +467,14 @@ private StructArray GetColumnSchema( ParsedDecimalValues values = ParsePrecisionAndScale(dataType); xdbcColumnSizeBuilder.Append(values.Precision); xdbcDecimalDigitsBuilder.Append(Convert.ToInt16(values.Scale)); - - if (dataType.StartsWith("NUMERIC") || dataType.StartsWith("DECIMAL")) - xdbcDataTypeBuilder.Append((int)ArrowTypeId.Decimal128); - else - xdbcDataTypeBuilder.Append((int)ArrowTypeId.Decimal256); } else { xdbcColumnSizeBuilder.AppendNull(); xdbcDecimalDigitsBuilder.AppendNull(); - xdbcDataTypeBuilder.AppendNull(); } + xdbcDataTypeBuilder.AppendNull(); xdbcTypeNameBuilder.Append(dataType); xdbcNumPrecRadixBuilder.AppendNull(); xdbcNullableBuilder.AppendNull();