diff --git a/CMakeLists.txt b/CMakeLists.txt index 6ab79d69..eb6150f8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,7 +10,7 @@ else() add_compile_options(-Wall -Wextra -Werror) endif() -find_package(Python COMPONENTS Interpreter Development.Module NumPy REQUIRED) +find_package(Python COMPONENTS Interpreter Development.Module REQUIRED) # Detect the installed nanobind package and import it into CMake execute_process( diff --git a/NUMPY_LICENSE.txt b/NUMPY_LICENSE.txt deleted file mode 100644 index 4723d4ea..00000000 --- a/NUMPY_LICENSE.txt +++ /dev/null @@ -1,30 +0,0 @@ -Copyright (c) 2005-2021, NumPy Developers. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. - - * Neither the name of the NumPy Developers nor the names of any - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/pyproject.toml b/pyproject.toml index 05037d6b..73a3ca67 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,6 @@ requires = [ "scikit-build-core", "nanobind", - "oldest-supported-numpy", # only needed for datetime ] build-backend = "scikit_build_core.build" @@ -69,7 +68,7 @@ build = "cp39-*64 cp310-*64 cp311-*64 cp312-*64" skip = "*musllinux*" test-command = "pytest {project}/tests" -test-requires = ["pytest", "pandas>=2.0.0", "polars<1.3.0", "narwhals", "numpy"] +test-requires = ["pytest", "pandas>=2.0.0", "polars<1.3.0", "narwhals"] [tool.ruff] line-length = 88 diff --git a/src/pantab/CMakeLists.txt b/src/pantab/CMakeLists.txt index db79484d..6a3d2674 100644 --- a/src/pantab/CMakeLists.txt +++ b/src/pantab/CMakeLists.txt @@ -2,7 +2,6 @@ set(PANTAB_SOURCES libpantab.cpp reader.cpp writer.cpp - numpy_datetime.cpp ) if(WIN32) # Required so local installation of pantab can resolve to bundled @@ -17,7 +16,6 @@ nanobind_add_module( NOMINSIZE ${PANTAB_SOURCES} ) -target_include_directories(libpantab PUBLIC ${Python_NumPy_INCLUDE_DIRS}) target_link_libraries(libpantab PRIVATE Tableau::tableauhyperapi-cxx PRIVATE nanoarrow diff --git a/src/pantab/numpy_datetime.cpp b/src/pantab/numpy_datetime.cpp deleted file mode 100644 index 46fe7c38..00000000 --- a/src/pantab/numpy_datetime.cpp +++ /dev/null @@ -1,447 +0,0 @@ -/* - * This file implements core functionality for NumPy datetime. - * - * Written by Mark Wiebe (mwwiebe@gmail.com) - * Copyright (c) 2011 by Enthought, Inc. - * - * This file is derived from NumPy 1.20. See NUMPY_LICENSE.txt - */ - -#include "numpy_datetime.h" - -#define PY_SSIZE_T_CLEAN -#include - -#include - -#define NO_IMPORT_ARRAY -#define PY_ARRAY_UNIQUE_SYMBOL PANTAB_ARRAY_API -#include - -/* - * Computes the python `ret, d = divmod(d, unit)`. - * - * Note that GCC is smart enough at -O2 to eliminate the `if(*d < 0)` branch - * for subsequent calls to this command - it is able to deduce that `*d >= 0`. - */ -static inline npy_int64 extract_unit_64(npy_int64 *d, npy_int64 unit) { - assert(unit > 0); - npy_int64 div = *d / unit; - npy_int64 mod = *d % unit; - if (mod < 0) { - mod += unit; - div -= 1; - } - assert(mod >= 0); - *d = mod; - return div; -} - -/* Days per month, regular year and leap year */ -int _days_per_month_table[2][12] = { - {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, - {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}}; - -/* - * Returns 1 if the given year is a leap year, 0 otherwise. - */ -int is_leapyear(npy_int64 year) { - return (year & 0x3) == 0 && /* year % 4 == 0 */ - ((year % 100) != 0 || (year % 400) == 0); -} - -/* - * Calculates the days offset from the 1970 epoch. - */ -npy_int64 get_datetimestruct_days(const npy_datetimestruct *dts) { - int i, month; - npy_int64 year, days = 0; - int *month_lengths; - - year = dts->year - 1970; - days = year * 365; - - /* Adjust for leap years */ - if (days >= 0) { - /* - * 1968 is the closest leap year before 1970. - * Exclude the current year, so add 1. - */ - year += 1; - /* Add one day for each 4 years */ - days += year / 4; - /* 1900 is the closest previous year divisible by 100 */ - year += 68; - /* Subtract one day for each 100 years */ - days -= year / 100; - /* 1600 is the closest previous year divisible by 400 */ - year += 300; - /* Add one day for each 400 years */ - days += year / 400; - } else { - /* - * 1972 is the closest later year after 1970. - * Include the current year, so subtract 2. - */ - year -= 2; - /* Subtract one day for each 4 years */ - days += year / 4; - /* 2000 is the closest later year divisible by 100 */ - year -= 28; - /* Add one day for each 100 years */ - days -= year / 100; - /* 2000 is also the closest later year divisible by 400 */ - /* Subtract one day for each 400 years */ - days += year / 400; - } - - month_lengths = _days_per_month_table[is_leapyear(dts->year)]; - month = dts->month - 1; - - /* Add the months */ - for (i = 0; i < month; ++i) { - days += month_lengths[i]; - } - - /* Add the days */ - days += dts->day - 1; - - return days; -} - -/* - * Modifies '*days_' to be the day offset within the year, - * and returns the year. - */ -static npy_int64 days_to_yearsdays(npy_int64 *days_) { - const npy_int64 days_per_400years = (400 * 365 + 100 - 4 + 1); - /* Adjust so it's relative to the year 2000 (divisible by 400) */ - npy_int64 days = (*days_) - (365 * 30 + 7); - npy_int64 year; - - /* Break down the 400 year cycle to get the year and day within the year */ - year = 400 * extract_unit_64(&days, days_per_400years); - - /* Work out the year/day within the 400 year cycle */ - if (days >= 366) { - year += 100 * ((days - 1) / (100 * 365 + 25 - 1)); - days = (days - 1) % (100 * 365 + 25 - 1); - if (days >= 365) { - year += 4 * ((days + 1) / (4 * 365 + 1)); - days = (days + 1) % (4 * 365 + 1); - if (days >= 366) { - year += (days - 1) / 365; - days = (days - 1) % 365; - } - } - } - - *days_ = days; - return year + 2000; -} - -/* - * Fills in the year, month, day in 'dts' based on the days - * offset from 1970. - */ -static void set_datetimestruct_days(npy_int64 days, npy_datetimestruct *dts) { - int *month_lengths, i; - - dts->year = days_to_yearsdays(&days); - month_lengths = _days_per_month_table[is_leapyear(dts->year)]; - - for (i = 0; i < 12; ++i) { - if (days < month_lengths[i]) { - dts->month = i + 1; - dts->day = (int)days + 1; - return; - } else { - days -= month_lengths[i]; - } - } -} - -/* - * Converts a datetime from a datetimestruct to a datetime based - * on some metadata. The date is assumed to be valid. - * - * TODO: If meta->num is really big, there could be overflow - * - * Returns 0 on success, -1 on failure. - */ -int convert_datetimestruct_to_datetime(PyArray_DatetimeMetaData *meta, - const npy_datetimestruct *dts, - npy_datetime *out) { - npy_datetime ret; - NPY_DATETIMEUNIT base = meta->base; - - /* If the datetimestruct is NaT, return NaT */ - if (dts->year == NPY_DATETIME_NAT) { - *out = NPY_DATETIME_NAT; - return 0; - } - - /* Cannot instantiate a datetime with generic units */ - if (meta->base == NPY_FR_GENERIC) { - PyErr_SetString(PyExc_ValueError, - "Cannot create a NumPy datetime other than NaT " - "with generic units"); - return -1; - } - - if (base == NPY_FR_Y) { - /* Truncate to the year */ - ret = dts->year - 1970; - } else if (base == NPY_FR_M) { - /* Truncate to the month */ - ret = 12 * (dts->year - 1970) + (dts->month - 1); - } else { - /* Otherwise calculate the number of days to start */ - npy_int64 days = get_datetimestruct_days(dts); - - switch (base) { - case NPY_FR_W: - /* Truncate to weeks */ - if (days >= 0) { - ret = days / 7; - } else { - ret = (days - 6) / 7; - } - break; - case NPY_FR_D: - ret = days; - break; - case NPY_FR_h: - ret = days * 24 + dts->hour; - break; - case NPY_FR_m: - ret = (days * 24 + dts->hour) * 60 + dts->min; - break; - case NPY_FR_s: - ret = ((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec; - break; - case NPY_FR_ms: - ret = (((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec) * 1000 + - dts->us / 1000; - break; - case NPY_FR_us: - ret = (((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec) * - 1000000 + - dts->us; - break; - case NPY_FR_ns: - ret = ((((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec) * - 1000000 + - dts->us) * - 1000 + - dts->ps / 1000; - break; - case NPY_FR_ps: - ret = ((((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec) * - 1000000 + - dts->us) * - 1000000 + - dts->ps; - break; - case NPY_FR_fs: - /* only 2.6 hours */ - ret = (((((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec) * - 1000000 + - dts->us) * - 1000000 + - dts->ps) * - 1000 + - dts->as / 1000; - break; - case NPY_FR_as: - /* only 9.2 secs */ - ret = (((((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec) * - 1000000 + - dts->us) * - 1000000 + - dts->ps) * - 1000000 + - dts->as; - break; - default: - /* Something got corrupted */ - PyErr_SetString(PyExc_ValueError, - "NumPy datetime metadata with corrupt unit value"); - return -1; - } - } - - /* Divide by the multiplier */ - if (meta->num > 1) { - if (ret >= 0) { - ret /= meta->num; - } else { - ret = (ret - meta->num + 1) / meta->num; - } - } - - *out = ret; - - return 0; -} - -/* - * Converts a datetime based on the given metadata into a datetimestruct - */ -int convert_datetime_to_datetimestruct(PyArray_DatetimeMetaData *meta, - npy_datetime dt, - npy_datetimestruct *out) { - npy_int64 days; - - /* Initialize the output to all zeros */ - memset(out, 0, sizeof(npy_datetimestruct)); - out->year = 1970; - out->month = 1; - out->day = 1; - - /* NaT is signaled in the year */ - if (dt == NPY_DATETIME_NAT) { - out->year = NPY_DATETIME_NAT; - return 0; - } - - /* Datetimes can't be in generic units */ - if (meta->base == NPY_FR_GENERIC) { - PyErr_SetString(PyExc_ValueError, - "Cannot convert a NumPy datetime value other than NaT " - "with generic units"); - return -1; - } - - /* TODO: Change to a mechanism that avoids the potential overflow */ - dt *= meta->num; - - /* - * Note that care must be taken with the / and % operators - * for negative values. - */ - switch (meta->base) { - case NPY_FR_Y: - out->year = 1970 + dt; - break; - - case NPY_FR_M: - out->year = 1970 + extract_unit_64(&dt, 12); - out->month = dt + 1; - break; - - case NPY_FR_W: - /* A week is 7 days */ - set_datetimestruct_days(dt * 7, out); - break; - - case NPY_FR_D: - set_datetimestruct_days(dt, out); - break; - - case NPY_FR_h: - days = extract_unit_64(&dt, 24LL); - set_datetimestruct_days(days, out); - out->hour = (int)dt; - break; - - case NPY_FR_m: - days = extract_unit_64(&dt, 60LL * 24); - set_datetimestruct_days(days, out); - out->hour = (int)extract_unit_64(&dt, 60LL); - out->min = (int)dt; - break; - - case NPY_FR_s: - days = extract_unit_64(&dt, 60LL * 60 * 24); - set_datetimestruct_days(days, out); - out->hour = (int)extract_unit_64(&dt, 60LL * 60); - out->min = (int)extract_unit_64(&dt, 60LL); - out->sec = (int)dt; - break; - - case NPY_FR_ms: - days = extract_unit_64(&dt, 1000LL * 60 * 60 * 24); - set_datetimestruct_days(days, out); - out->hour = (int)extract_unit_64(&dt, 1000LL * 60 * 60); - out->min = (int)extract_unit_64(&dt, 1000LL * 60); - out->sec = (int)extract_unit_64(&dt, 1000LL); - out->us = (int)(dt * 1000); - break; - - case NPY_FR_us: - days = extract_unit_64(&dt, 1000LL * 1000 * 60 * 60 * 24); - set_datetimestruct_days(days, out); - out->hour = (int)extract_unit_64(&dt, 1000LL * 1000 * 60 * 60); - out->min = (int)extract_unit_64(&dt, 1000LL * 1000 * 60); - out->sec = (int)extract_unit_64(&dt, 1000LL * 1000); - out->us = (int)dt; - break; - - case NPY_FR_ns: - days = extract_unit_64(&dt, 1000LL * 1000 * 1000 * 60 * 60 * 24); - set_datetimestruct_days(days, out); - out->hour = (int)extract_unit_64(&dt, 1000LL * 1000 * 1000 * 60 * 60); - out->min = (int)extract_unit_64(&dt, 1000LL * 1000 * 1000 * 60); - out->sec = (int)extract_unit_64(&dt, 1000LL * 1000 * 1000); - out->us = (int)extract_unit_64(&dt, 1000LL); - out->ps = (int)(dt * 1000); - break; - - case NPY_FR_ps: - days = extract_unit_64(&dt, 1000LL * 1000 * 1000 * 1000 * 60 * 60 * 24); - set_datetimestruct_days(days, out); - out->hour = - (int)extract_unit_64(&dt, 1000LL * 1000 * 1000 * 1000 * 60 * 60); - out->min = (int)extract_unit_64(&dt, 1000LL * 1000 * 1000 * 1000 * 60); - out->sec = (int)extract_unit_64(&dt, 1000LL * 1000 * 1000 * 1000); - out->us = (int)extract_unit_64(&dt, 1000LL * 1000); - out->ps = (int)(dt); - break; - - case NPY_FR_fs: - /* entire range is only +- 2.6 hours */ - out->hour = - (int)extract_unit_64(&dt, 1000LL * 1000 * 1000 * 1000 * 1000 * 60 * 60); - if (out->hour < 0) { - out->year = 1969; - out->month = 12; - out->day = 31; - out->hour += 24; - assert(out->hour >= 0); - } - out->min = - (int)extract_unit_64(&dt, 1000LL * 1000 * 1000 * 1000 * 1000 * 60); - out->sec = (int)extract_unit_64(&dt, 1000LL * 1000 * 1000 * 1000 * 1000); - out->us = (int)extract_unit_64(&dt, 1000LL * 1000 * 1000); - out->ps = (int)extract_unit_64(&dt, 1000LL); - out->as = (int)(dt * 1000); - break; - - case NPY_FR_as: - /* entire range is only +- 9.2 seconds */ - out->sec = - (int)extract_unit_64(&dt, 1000LL * 1000 * 1000 * 1000 * 1000 * 1000); - if (out->sec < 0) { - out->year = 1969; - out->month = 12; - out->day = 31; - out->hour = 23; - out->min = 59; - out->sec += 60; - assert(out->sec >= 0); - } - out->us = (int)extract_unit_64(&dt, 1000LL * 1000 * 1000 * 1000); - out->ps = (int)extract_unit_64(&dt, 1000LL * 1000); - out->as = (int)dt; - break; - - default: - PyErr_SetString(PyExc_RuntimeError, - "NumPy datetime metadata is corrupted with invalid " - "base unit"); - return -1; - } - - return 0; -} diff --git a/src/pantab/numpy_datetime.h b/src/pantab/numpy_datetime.h deleted file mode 100644 index ef1f1ccf..00000000 --- a/src/pantab/numpy_datetime.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * This file is derived from NumPy 1.20. See NUMPY_LICENSE.txt - */ - -#ifndef _NPY_PRIVATE__DATETIME_H_ -#define _NPY_PRIVATE__DATETIME_H_ -#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION -#include - -/* - * Converts a datetime based on the given metadata into a datetimestruct - */ -int convert_datetime_to_datetimestruct(PyArray_DatetimeMetaData *meta, - npy_datetime dt, - npy_datetimestruct *out); - -/* - * Converts a datetime from a datetimestruct to a datetime based - * on some metadata. The date is assumed to be valid. - * - * TODO: If meta->num is really big, there could be overflow - * - * Returns 0 on success, -1 on failure. - */ -int convert_datetimestruct_to_datetime(PyArray_DatetimeMetaData *meta, - const npy_datetimestruct *dts, - npy_datetime *out); - -#endif diff --git a/src/pantab/writer.cpp b/src/pantab/writer.cpp index b2caf511..f744fc81 100644 --- a/src/pantab/writer.cpp +++ b/src/pantab/writer.cpp @@ -6,8 +6,6 @@ #include #include -#include "numpy_datetime.h" - static auto GetHyperTypeFromArrowSchema(struct ArrowSchema *schema, ArrowError *error) -> hyperapi::SqlType { @@ -270,45 +268,25 @@ class TimestampInsertHelper : public InsertHelper { array_view_->buffer_views[1].data.as_uint8 + (idx * elem_size), elem_size); - // using timestamp_t = - // typename std::conditional::type; - // TODO: need overflow checks here - npy_datetimestruct dts; - PyArray_DatetimeMetaData meta; if constexpr (TU == NANOARROW_TIME_UNIT_SECOND) { - meta = {NPY_FR_s, 1}; + value *= 1000000; } else if constexpr (TU == NANOARROW_TIME_UNIT_MILLI) { - meta = {NPY_FR_ms, 1}; - } else if constexpr (TU == NANOARROW_TIME_UNIT_MICRO) { - meta = {NPY_FR_us, 1}; + value *= 1000; } else if constexpr (TU == NANOARROW_TIME_UNIT_NANO) { - // we assume pandas is ns here but should check format - meta = {NPY_FR_ns, 1}; + value /= 1000; } - int ret = convert_datetime_to_datetimestruct(&meta, value, &dts); - if (ret != 0) { - throw std::invalid_argument("could not convert datetime value "); - } - const hyperapi::Date dt{static_cast(dts.year), - static_cast(dts.month), - static_cast(dts.day)}; - const hyperapi::Time time{static_cast(dts.hour), - static_cast(dts.min), - static_cast(dts.sec), dts.us}; - - if constexpr (TZAware) { - const hyperapi::OffsetTimestamp ts{dt, time, std::chrono::minutes{0}}; - hyperapi::internal::ValueInserter{inserter_}.addValue( - static_cast(ts)); + constexpr int64_t USEC_TABLEAU_TO_UNIX_EPOCH = 210866803200000000LL; + hyper_timestamp_t raw_timestamp = + static_cast(value + USEC_TABLEAU_TO_UNIX_EPOCH); - } else { - const hyperapi::Timestamp ts{dt, time}; - hyperapi::internal::ValueInserter{inserter_}.addValue( - static_cast(ts)); - } + using timestamp_t = + typename std::conditional::type; + const timestamp_t ts{raw_timestamp, {}}; + hyperapi::internal::ValueInserter{inserter_}.addValue( + static_cast(ts)); } };