Skip to content

Commit 7b182ee

Browse files
duanmengfacebook-github-bot
authored andcommitted
Add crc32 Presto Functions (facebookincubator#3368)
Summary: [Add crc32 Presto Functions](https://prestodb.io/docs/current/functions/binary.html#crc32) crc32(binary) → bigint Computes the CRC-32 of binary. For general purpose hashing, use [xxhash64()](https://prestodb.io/docs/current/functions/binary.html#xxhash64), as it is much faster and produces a better quality hash. Pull Request resolved: facebookincubator#3368 Reviewed By: xiaoxmeng Differential Revision: D41592764 Pulled By: Yuhta fbshipit-source-id: 6202445e220655f23d98aec086bddb19ebedac16
1 parent 18153f8 commit 7b182ee

File tree

5 files changed

+42
-1
lines changed

5 files changed

+42
-1
lines changed

velox/docs/functions/binary.rst

+4
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22
Binary Functions
33
================
44

5+
.. function:: crc32(binary) -> bigint
6+
7+
Computes the crc32 checksum of ``binary``.
8+
59
.. function:: xxhash64(binary) -> varbinary
610

711
Computes the xxhash64 hash of ``binary``.

velox/functions/prestosql/CMakeLists.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,8 @@ target_link_libraries(
6363
md5
6464
velox_type_tz
6565
velox_presto_types
66-
velox_functions_util)
66+
velox_functions_util
67+
${FOLLY})
6768

6869
set_property(TARGET velox_functions_prestosql_impl PROPERTY JOB_POOL_COMPILE
6970
high_memory_pool)

velox/functions/prestosql/StringFunctions.h

+15
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
*/
1616
#pragma once
1717

18+
#include <folly/hash/Checksum.h>
1819
#include <cstdint>
1920
#define XXH_INLINE_ALL
2021
#include <xxhash.h>
@@ -53,6 +54,20 @@ struct CodePointFunction {
5354
}
5455
};
5556

57+
/// crc32(varbinary) → bigint
58+
/// Return an int64_t checksum calculated using the crc32 method in zlib.
59+
template <typename T>
60+
struct CRC32Function {
61+
VELOX_DEFINE_FUNCTION_TYPES(T);
62+
63+
FOLLY_ALWAYS_INLINE
64+
bool call(out_type<int64_t>& result, const arg_type<Varchar>& input) {
65+
result = static_cast<int64_t>(folly::crc32_type(
66+
reinterpret_cast<const unsigned char*>(input.data()), input.size()));
67+
return true;
68+
}
69+
};
70+
5671
/// xxhash64(varbinary) → varbinary
5772
/// Return an 8-byte binary to hash64 of input (varbinary such as string)
5873
template <typename T>

velox/functions/prestosql/registration/StringFunctionsRegistration.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ void registerSimpleFunctions() {
5555
registerFunction<RPadFunction, Varchar, Varchar, int64_t, Varchar>({"rpad"});
5656

5757
// Register hash functions.
58+
registerFunction<CRC32Function, int64_t, Varbinary>({"crc32"});
5859
registerFunction<XxHash64Function, Varbinary, Varbinary>({"xxhash64"});
5960
registerFunction<Md5Function, Varbinary, Varbinary>({"md5"});
6061
registerFunction<Sha256Function, Varbinary, Varbinary>({"sha256"});

velox/functions/prestosql/tests/StringFunctionsTest.cpp

+20
Original file line numberDiff line numberDiff line change
@@ -1384,6 +1384,26 @@ TEST_F(StringFunctionsTest, controlExprEncodingPropagation) {
13841384
test("if(1!=1, lower(C1), lower(C2))", false);
13851385
}
13861386

1387+
TEST_F(StringFunctionsTest, crc32) {
1388+
const auto crc32 = [&](std::optional<std::string> value) {
1389+
return evaluateOnce<int64_t, std::string>(
1390+
"crc32(c0)", {value}, {VARBINARY()});
1391+
};
1392+
// use python3 zlib result as the expected values,
1393+
// >>> import zlib
1394+
// >>> print(zlib.crc32(b"DEAD_BEEF"))
1395+
// 2634114297
1396+
// >>> print(zlib.crc32(b"CRC32"))
1397+
// 4128576900
1398+
// >>> print(zlib.crc32(b"velox is an open source unified execution engine."))
1399+
// 2173230066
1400+
EXPECT_EQ(std::nullopt, crc32(std::nullopt));
1401+
EXPECT_EQ(2634114297L, crc32("DEAD_BEEF"));
1402+
EXPECT_EQ(4128576900L, crc32("CRC32"));
1403+
EXPECT_EQ(
1404+
2173230066L, crc32("velox is an open source unified execution engine."));
1405+
}
1406+
13871407
TEST_F(StringFunctionsTest, xxhash64) {
13881408
const auto xxhash64 = [&](std::optional<std::string> value) {
13891409
return evaluateOnce<std::string, std::string>(

0 commit comments

Comments
 (0)